From: Chuck Lever <chuck.lever@oracle.com>
To: bfields@fieldses.org
Cc: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org
Subject: [PATCH v3 01/32] SUNRPC: Move xpt_mutex into socket xpo_sendto methods
Date: Thu, 21 May 2020 10:33:48 -0400 [thread overview]
Message-ID: <20200521143348.3557.45005.stgit@klimt.1015granger.net> (raw)
In-Reply-To: <20200521141100.3557.17098.stgit@klimt.1015granger.net>
It appears that the RPC/RDMA transport does not need serialization
of calls to its xpo_sendto method. Move the mutex into the socket
methods that still need that serialization.
Tail latencies are unambiguously better with this patch applied.
fio randrw 8KB 70/30 on NFSv3, smaller numbers are better:
clat percentiles (usec):
With xpt_mutex:
r | 99.99th=[ 8848]
w | 99.99th=[ 9634]
Without xpt_mutex:
r | 99.99th=[ 8586]
w | 99.99th=[ 8979]
Serializing the construction of RPC/RDMA transport headers is not
really necessary at this point, because the Linux NFS server
implementation never changes its credit grant on a connection. If
that should change, then svc_rdma_sendto will need to serialize
access to the transport's credit grant fields.
Reported-by: kbuild test robot <lkp@intel.com>
[ cel: fix uninitialized variable warning ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
include/linux/sunrpc/svc_xprt.h | 6 +++++
net/sunrpc/svc_xprt.c | 12 ++--------
net/sunrpc/svcsock.c | 25 ++++++++++++++++++++
net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 35 +++++++++++++---------------
net/sunrpc/xprtrdma/svc_rdma_sendto.c | 10 +++-----
net/sunrpc/xprtsock.c | 12 ++++++++--
6 files changed, 64 insertions(+), 36 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 9e1e046de176..aca35ab5cff2 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -117,6 +117,12 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
return 0;
}
+static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt)
+{
+ return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) ||
+ (test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0);
+}
+
int svc_reg_xprt_class(struct svc_xprt_class *);
void svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2284ff038dad..07cdbf7d5764 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -914,16 +914,10 @@ int svc_send(struct svc_rqst *rqstp)
xb->page_len +
xb->tail[0].iov_len;
trace_svc_sendto(xb);
-
- /* Grab mutex to serialize outgoing data. */
- mutex_lock(&xprt->xpt_mutex);
trace_svc_stats_latency(rqstp);
- if (test_bit(XPT_DEAD, &xprt->xpt_flags)
- || test_bit(XPT_CLOSE, &xprt->xpt_flags))
- len = -ENOTCONN;
- else
- len = xprt->xpt_ops->xpo_sendto(rqstp);
- mutex_unlock(&xprt->xpt_mutex);
+
+ len = xprt->xpt_ops->xpo_sendto(rqstp);
+
trace_svc_send(rqstp, len);
svc_xprt_release(rqstp);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 023514e392b3..3e7b6445e317 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -506,6 +506,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
* svc_udp_sendto - Send out a reply on a UDP socket
* @rqstp: completed svc_rqst
*
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_udp_sendto(struct svc_rqst *rqstp)
@@ -531,6 +534,11 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
svc_set_cmsg_data(rqstp, cmh);
+ mutex_lock(&xprt->xpt_mutex);
+
+ if (svc_xprt_is_dead(xprt))
+ goto out_notconn;
+
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
if (err == -ECONNREFUSED) {
@@ -538,9 +546,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
}
+
+ mutex_unlock(&xprt->xpt_mutex);
if (err < 0)
return err;
return sent;
+
+out_notconn:
+ mutex_unlock(&xprt->xpt_mutex);
+ return -ENOTCONN;
}
static int svc_udp_has_wspace(struct svc_xprt *xprt)
@@ -1063,6 +1077,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
* svc_tcp_sendto - Send out a reply on a TCP socket
* @rqstp: completed svc_rqst
*
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_tcp_sendto(struct svc_rqst *rqstp)
@@ -1080,12 +1097,19 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
svc_release_skb(rqstp);
+ mutex_lock(&xprt->xpt_mutex);
+ if (svc_xprt_is_dead(xprt))
+ goto out_notconn;
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
xdr_free_bvec(xdr);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
+ mutex_unlock(&xprt->xpt_mutex);
return sent;
+out_notconn:
+ mutex_unlock(&xprt->xpt_mutex);
+ return -ENOTCONN;
out_close:
pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
xprt->xpt_server->sv_name,
@@ -1093,6 +1117,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
(err < 0) ? err : sent, xdr->len);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
+ mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index af7eb8d202ae..d9aab4504f2c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -210,34 +210,31 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
return -ENOTCONN;
}
-/* Send an RPC call on the passive end of a transport
- * connection.
+/**
+ * xprt_rdma_bc_send_request - Send a reverse-direction Call
+ * @rqst: rpc_rqst containing Call message to be sent
+ *
+ * Return values:
+ * %0 if the message was sent successfully
+ * %ENOTCONN if the message was not sent
*/
-static int
-xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
+static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
{
struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
- struct svcxprt_rdma *rdma;
+ struct svcxprt_rdma *rdma =
+ container_of(sxprt, struct svcxprt_rdma, sc_xprt);
int ret;
dprintk("svcrdma: sending bc call with xid: %08x\n",
be32_to_cpu(rqst->rq_xid));
- mutex_lock(&sxprt->xpt_mutex);
-
- ret = -ENOTCONN;
- rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
- if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
- ret = rpcrdma_bc_send_request(rdma, rqst);
- if (ret == -ENOTCONN)
- svc_close_xprt(sxprt);
- }
+ if (test_bit(XPT_DEAD, &sxprt->xpt_flags))
+ return -ENOTCONN;
- mutex_unlock(&sxprt->xpt_mutex);
-
- if (ret < 0)
- return ret;
- return 0;
+ ret = rpcrdma_bc_send_request(rdma, rqst);
+ if (ret == -ENOTCONN)
+ svc_close_xprt(sxprt);
+ return ret;
}
static void
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b6c8643867f2..38e7c3c8c4a9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -868,12 +868,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
__be32 *p;
int ret;
- /* Create the RDMA response header. xprt->xpt_mutex,
- * acquired in svc_send(), serializes RPC replies. The
- * code path below that inserts the credit grant value
- * into each transport header runs only inside this
- * critical section.
- */
+ ret = -ENOTCONN;
+ if (svc_xprt_is_dead(xprt))
+ goto err0;
+
ret = -ENOMEM;
sctxt = svc_rdma_send_ctxt_get(rdma);
if (!sctxt)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 845d0be805ec..839c49330785 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2548,8 +2548,16 @@ static int bc_sendto(struct rpc_rqst *req)
return sent;
}
-/*
- * The send routine. Borrows from svc_send
+/**
+ * bc_send_request - Send a backchannel Call on a TCP socket
+ * @req: rpc_rqst containing Call message to be sent
+ *
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
+ * Return values:
+ * %0 if the message was sent successfully
+ * %ENOTCONN if the message was not sent
*/
static int bc_send_request(struct rpc_rqst *req)
{
next prev parent reply other threads:[~2020-05-21 14:33 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-21 14:33 [PATCH v3 00/32] Possible NFSD patches for v5.8 Chuck Lever
2020-05-21 14:33 ` Chuck Lever [this message]
2020-05-21 14:33 ` [PATCH v3 02/32] svcrdma: Clean up the tracing for rw_ctx_init errors Chuck Lever
2020-05-21 14:33 ` [PATCH v3 03/32] svcrdma: Clean up handling of get_rw_ctx errors Chuck Lever
2020-05-21 14:34 ` [PATCH v3 04/32] svcrdma: Trace page overruns when constructing RDMA Reads Chuck Lever
2020-05-21 14:34 ` [PATCH v3 05/32] svcrdma: trace undersized Write chunks Chuck Lever
2020-05-21 14:34 ` [PATCH v3 06/32] svcrdma: Fix backchannel return code Chuck Lever
2020-05-21 14:34 ` [PATCH v3 07/32] svcrdma: Remove backchannel dprintk call sites Chuck Lever
2020-05-21 14:34 ` [PATCH v3 08/32] svcrdma: Rename tracepoints that record header decoding errors Chuck Lever
2020-05-21 14:34 ` [PATCH v3 09/32] svcrdma: Remove the SVCRDMA_DEBUG macro Chuck Lever
2020-05-21 14:34 ` [PATCH v3 10/32] svcrdma: Displayed remote IP address should match stored address Chuck Lever
2020-05-21 14:34 ` [PATCH v3 11/32] svcrdma: Add tracepoints to report ->xpo_accept failures Chuck Lever
2020-05-21 14:34 ` [PATCH v3 12/32] SUNRPC: Remove kernel memory address from svc_xprt tracepoints Chuck Lever
2020-05-21 14:34 ` [PATCH v3 13/32] SUNRPC: Tracepoint to record errors in svc_xpo_create() Chuck Lever
2020-05-21 14:34 ` [PATCH v3 14/32] SUNRPC: Trace a few more generic svc_xprt events Chuck Lever
2020-05-21 14:35 ` [PATCH v3 15/32] SUNRPC: Remove "#include <trace/events/skb.h>" Chuck Lever
2020-05-21 14:35 ` [PATCH v3 16/32] SUNRPC: Add more svcsock tracepoints Chuck Lever
2020-05-21 14:35 ` [PATCH v3 17/32] SUNRPC: Replace dprintk call sites in TCP state change callouts Chuck Lever
2020-05-21 14:35 ` [PATCH v3 18/32] SUNRPC: Trace server-side rpcbind registration events Chuck Lever
2020-05-21 14:35 ` [PATCH v3 19/32] SUNRPC: Rename svc_sock::sk_reclen Chuck Lever
2020-05-21 14:35 ` [PATCH v3 20/32] SUNRPC: Restructure svc_tcp_recv_record() Chuck Lever
2020-05-21 14:35 ` [PATCH v3 21/32] SUNRPC: Replace dprintk() call sites in TCP receive path Chuck Lever
2020-05-21 14:35 ` [PATCH v3 22/32] SUNRPC: Refactor recvfrom path dealing with incomplete TCP receives Chuck Lever
2020-05-21 14:35 ` [PATCH v3 23/32] SUNRPC: Clean up svc_release_skb() functions Chuck Lever
2020-05-21 14:35 ` [PATCH v3 24/32] SUNRPC: Refactor svc_recvfrom() Chuck Lever
2020-05-21 14:35 ` [PATCH v3 25/32] SUNRPC: Restructure svc_udp_recvfrom() Chuck Lever
2020-05-21 14:36 ` [PATCH v3 26/32] SUNRPC: svc_show_status() macro should have enum definitions Chuck Lever
2020-05-21 14:36 ` [PATCH v3 27/32] NFSD: Add tracepoints to NFSD's duplicate reply cache Chuck Lever
2020-05-21 14:36 ` [PATCH v3 28/32] NFSD: Add tracepoints to the NFSD state management code Chuck Lever
2020-05-21 14:36 ` [PATCH v3 29/32] NFSD: Add tracepoints for monitoring NFSD callbacks Chuck Lever
2020-05-21 14:36 ` [PATCH v3 30/32] SUNRPC: Clean up request deferral tracepoints Chuck Lever
2020-05-21 14:36 ` [PATCH v3 31/32] NFSD: Squash an annoying compiler warning Chuck Lever
2020-05-21 14:36 ` [PATCH v3 32/32] NFSD: Fix improperly-formatted Doxygen comments Chuck Lever
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200521143348.3557.45005.stgit@klimt.1015granger.net \
--to=chuck.lever@oracle.com \
--cc=bfields@fieldses.org \
--cc=linux-nfs@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).