All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC 0/3] Prepare NFS server for RPC-on-TLS
@ 2020-02-12  0:42 Chuck Lever
  2020-02-12  0:42 ` [PATCH RFC 1/3] SUNRPC: Move xs_stream_record_marker Chuck Lever
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Chuck Lever @ 2020-02-12  0:42 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs

The following series implements changes to the Linux NFS server's
socket transport that are pre-requisite to supporting RPC on TLS.
The socket transport is made to use the iov_iter-based kernel
socket APIs. These changes are modeled after similar changes made
to the NFS client-side socket transport.

This is a draft patch series. I have tried it out and it is working,
but as it has changed only the Reply-send side, it is incomplete.
This posting is a request for comments -- show your work.

One additional change I might make in this series is moving the
xpt_mutex into the socket sendto methods. This is because the
RDMA sendto method doesn't appear to require serialization. That
is still down the road.


---

Chuck Lever (3):
      SUNRPC: Move xs_stream_record_marker
      SUNRPC: Refactor xs_sendpages()
      SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends


 include/linux/sunrpc/msg_prot.h |   13 ++
 include/linux/sunrpc/svcauth.h  |    1 
 include/linux/sunrpc/xdr.h      |   14 ---
 net/sunrpc/socklib.c            |  136 ++++++++++++++++++++++++++
 net/sunrpc/socklib.h            |   15 +++
 net/sunrpc/sunrpc.h             |    4 -
 net/sunrpc/svc.c                |    4 -
 net/sunrpc/svcsock.c            |  203 +++++++++++++--------------------------
 net/sunrpc/xprtsock.c           |  203 ++++++---------------------------------
 9 files changed, 261 insertions(+), 332 deletions(-)
 create mode 100644 net/sunrpc/socklib.h

--
Chuck Lever

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH RFC 1/3] SUNRPC: Move xs_stream_record_marker
  2020-02-12  0:42 [PATCH RFC 0/3] Prepare NFS server for RPC-on-TLS Chuck Lever
@ 2020-02-12  0:42 ` Chuck Lever
  2020-02-12  0:42 ` [PATCH RFC 2/3] SUNRPC: Refactor xs_sendpages() Chuck Lever
  2020-02-12  0:42 ` [PATCH RFC 3/3] SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends Chuck Lever
  2 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever @ 2020-02-12  0:42 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs

Refactor: Allow the client and server to share this helper function.

msg_prot.h now needs linux/kernel.h for cpu_to_be32. #include
<msg_prot.h> is removed anywhere it is not required and causes a
compilation failure due to the new header dependency.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/msg_prot.h |   13 +++++++++++++
 include/linux/sunrpc/svcauth.h  |    1 -
 net/sunrpc/xprtsock.c           |   18 +++---------------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index bea40d9f03a1..cceb5a3e39f5 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -104,6 +104,19 @@ enum rpc_auth_stat {
 #define	RPC_FRAGMENT_SIZE_MASK		(~RPC_LAST_STREAM_FRAGMENT)
 #define	RPC_MAX_FRAGMENT_SIZE		((1U << 31) - 1)
 
+/**
+ * rpc_stream_record_marker - Construct a 4-octet stream record marker
+ * @size: number of octets in the RPC message to send
+ *
+ * Returns the stream record marker field for a record of length < 2^31-1
+ */
+static inline rpc_fraghdr rpc_stream_record_marker(u32 size)
+{
+	if (!size)
+		return 0;
+	return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | size);
+}
+
 /*
  * RPC call and reply header size as number of 32bit words (verifier
  * size computed separately, see below)
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index b0003866a249..aeac5a955cce 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -11,7 +11,6 @@
 #define _LINUX_SUNRPC_SVCAUTH_H_
 
 #include <linux/string.h>
-#include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/gss_api.h>
 #include <linux/hash.h>
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d86c664ea6af..f940179db510 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -929,17 +929,6 @@ static int xs_nospace(struct rpc_rqst *req)
 	return transport->xmit.offset != 0 && req->rq_bytes_sent == 0;
 }
 
-/*
- * Return the stream record marker field for a record of length < 2^31-1
- */
-static rpc_fraghdr
-xs_stream_record_marker(struct xdr_buf *xdr)
-{
-	if (!xdr->len)
-		return 0;
-	return cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | (u32)xdr->len);
-}
-
 /**
  * xs_local_send_request - write an RPC request to an AF_LOCAL socket
  * @req: pointer to RPC request
@@ -957,7 +946,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
 	struct sock_xprt *transport =
 				container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
-	rpc_fraghdr rm = xs_stream_record_marker(xdr);
+	rpc_fraghdr rm = rpc_stream_record_marker(xdr->len);
 	unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
 	int status;
 	int sent = 0;
@@ -1104,7 +1093,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
-	rpc_fraghdr rm = xs_stream_record_marker(xdr);
+	rpc_fraghdr rm = rpc_stream_record_marker(xdr->len);
 	unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
 	bool vm_wait = false;
 	int status;
@@ -2652,8 +2641,7 @@ static int bc_sendto(struct rpc_rqst *req)
 	struct msghdr msg = {
 		.msg_flags	= MSG_MORE
 	};
-	rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
-					 (u32)xbufp->len);
+	rpc_fraghdr marker = rpc_stream_record_marker(xbufp->len);
 	struct kvec iov = {
 		.iov_base	= &marker,
 		.iov_len	= sizeof(marker),


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH RFC 2/3] SUNRPC: Refactor xs_sendpages()
  2020-02-12  0:42 [PATCH RFC 0/3] Prepare NFS server for RPC-on-TLS Chuck Lever
  2020-02-12  0:42 ` [PATCH RFC 1/3] SUNRPC: Move xs_stream_record_marker Chuck Lever
@ 2020-02-12  0:42 ` Chuck Lever
  2020-02-12  0:42 ` [PATCH RFC 3/3] SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends Chuck Lever
  2 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever @ 2020-02-12  0:42 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs

Refactor: Re-locate xs_sendpages() so that it can be shared with
server code.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdr.h |   14 ----
 net/sunrpc/socklib.c       |  136 ++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/socklib.h       |   15 ++++
 net/sunrpc/svcsock.c       |    1 
 net/sunrpc/xprtsock.c      |  148 ++++++--------------------------------------
 5 files changed, 172 insertions(+), 142 deletions(-)
 create mode 100644 net/sunrpc/socklib.h

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index b41f34977995..a1264b19b34c 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -188,20 +188,6 @@ static inline void xdr_netobj_dup(struct xdr_netobj *dst,
 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 
-/*
- * Helper structure for copying from an sk_buff.
- */
-struct xdr_skb_reader {
-	struct sk_buff	*skb;
-	unsigned int	offset;
-	size_t		count;
-	__wsum		csum;
-};
-
-typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len);
-
-extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *);
-
 extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32);
 extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *);
 
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 1a864f1ed119..7ccc83046af5 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -14,9 +14,24 @@
 #include <linux/types.h>
 #include <linux/pagemap.h>
 #include <linux/udp.h>
+#include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/export.h>
 
+#include "socklib.h"
+
+/*
+ * Helper structure for copying from an sk_buff.
+ */
+struct xdr_skb_reader {
+	struct sk_buff	*skb;
+	unsigned int	offset;
+	size_t		count;
+	__wsum		csum;
+};
+
+typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to,
+				     size_t len);
 
 /**
  * xdr_skb_read_bits - copy some data bits from skb to internal buffer
@@ -186,3 +201,124 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
+
+static inline int xprt_sendmsg(struct socket *sock, struct msghdr *msg,
+			       size_t seek)
+{
+	if (seek)
+		iov_iter_advance(&msg->msg_iter, seek);
+	return sock_sendmsg(sock, msg);
+}
+
+static int xprt_send_kvec(struct socket *sock, struct msghdr *msg,
+			  struct kvec *vec, size_t seek)
+{
+	iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
+	return xprt_sendmsg(sock, msg, seek);
+}
+
+static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg,
+			      struct xdr_buf *xdr, size_t base)
+{
+	int err;
+
+	err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+	if (err < 0)
+		return err;
+
+	iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec,
+			xdr_buf_pagecount(xdr),
+			xdr->page_len + xdr->page_base);
+	return xprt_sendmsg(sock, msg, base + xdr->page_base);
+}
+
+/* Common case:
+ *  - stream transport
+ *  - sending from byte 0 of the message
+ *  - the message is wholly contained in @xdr's head iovec
+ */
+static int xprt_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
+				 rpc_fraghdr marker, struct kvec *vec,
+				 size_t base)
+{
+	struct kvec iov[2] = {
+		[0] = {
+			.iov_base	= &marker,
+			.iov_len	= sizeof(marker)
+		},
+		[1] = *vec,
+	};
+	size_t len = iov[0].iov_len + iov[1].iov_len;
+
+	iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
+	return xprt_sendmsg(sock, msg, base);
+}
+
+/**
+ * xprt_sock_sendmsg - write an xdr_buf directly to a socket
+ * @sock: open socket to send on
+ * @msg: socket message metadata
+ * @xdr: xdr_buf containing this request
+ * @base: starting position in the buffer
+ * @marker: stream record marker field
+ * @sent_p: return the total number of bytes successfully queued for sending
+ *
+ * Return values:
+ *   On success, returns zero and fills in @sent_p.
+ *   %-ENOTSOCK if  @sock is not a struct socket.
+ */
+int xprt_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+		      struct xdr_buf *xdr, unsigned int base,
+		      rpc_fraghdr marker, unsigned int *sent_p)
+{
+	unsigned int rmsize = marker ? sizeof(marker) : 0;
+	unsigned int remainder = rmsize + xdr->len - base;
+	unsigned int want;
+	int err = 0;
+
+	if (unlikely(!sock))
+		return -ENOTSOCK;
+
+	msg->msg_flags |= MSG_MORE;
+	want = xdr->head[0].iov_len + rmsize;
+	if (base < want) {
+		unsigned int len = want - base;
+		remainder -= len;
+		if (remainder == 0)
+			msg->msg_flags &= ~MSG_MORE;
+		if (rmsize)
+			err = xprt_send_rm_and_kvec(sock, msg, marker,
+						    &xdr->head[0], base);
+		else
+			err = xprt_send_kvec(sock, msg, &xdr->head[0], base);
+		if (remainder == 0 || err != len)
+			goto out;
+		*sent_p += err;
+		base = 0;
+	} else
+		base -= want;
+
+	if (base < xdr->page_len) {
+		unsigned int len = xdr->page_len - base;
+		remainder -= len;
+		if (remainder == 0)
+			msg->msg_flags &= ~MSG_MORE;
+		err = xprt_send_pagedata(sock, msg, xdr, base);
+		if (remainder == 0 || err != len)
+			goto out;
+		*sent_p += err;
+		base = 0;
+	} else
+		base -= xdr->page_len;
+
+	if (base >= xdr->tail[0].iov_len)
+		return 0;
+	msg->msg_flags &= ~MSG_MORE;
+	err = xprt_send_kvec(sock, msg, &xdr->tail[0], base);
+out:
+	if (err > 0) {
+		*sent_p += err;
+		err = 0;
+	}
+	return err;
+}
diff --git a/net/sunrpc/socklib.h b/net/sunrpc/socklib.h
new file mode 100644
index 000000000000..c48114ad6f00
--- /dev/null
+++ b/net/sunrpc/socklib.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ * Copyright (C) 2020, Oracle.
+ */
+
+#ifndef _NET_SUNRPC_SOCKLIB_H_
+#define _NET_SUNRPC_SOCKLIB_H_
+
+int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb);
+int xprt_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+		      struct xdr_buf *xdr, unsigned int base,
+		      rpc_fraghdr marker, unsigned int *sent_p);
+
+#endif /* _NET_SUNRPC_SOCKLIB_H_ */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index fe045b3e7e08..47957088d7d3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -55,6 +55,7 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/xprt.h>
 
+#include "socklib.h"
 #include "sunrpc.h"
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index f940179db510..0380d72d8485 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -54,6 +54,7 @@
 
 #include <trace/events/sunrpc.h>
 
+#include "socklib.h"
 #include "sunrpc.h"
 
 static void xs_close(struct rpc_xprt *xprt);
@@ -749,125 +750,6 @@ static void xs_stream_data_receive_workfn(struct work_struct *work)
 
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
 
-static int xs_sendmsg(struct socket *sock, struct msghdr *msg, size_t seek)
-{
-	if (seek)
-		iov_iter_advance(&msg->msg_iter, seek);
-	return sock_sendmsg(sock, msg);
-}
-
-static int xs_send_kvec(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t seek)
-{
-	iov_iter_kvec(&msg->msg_iter, WRITE, vec, 1, vec->iov_len);
-	return xs_sendmsg(sock, msg, seek);
-}
-
-static int xs_send_pagedata(struct socket *sock, struct msghdr *msg, struct xdr_buf *xdr, size_t base)
-{
-	int err;
-
-	err = xdr_alloc_bvec(xdr, GFP_KERNEL);
-	if (err < 0)
-		return err;
-
-	iov_iter_bvec(&msg->msg_iter, WRITE, xdr->bvec,
-			xdr_buf_pagecount(xdr),
-			xdr->page_len + xdr->page_base);
-	return xs_sendmsg(sock, msg, base + xdr->page_base);
-}
-
-#define xs_record_marker_len() sizeof(rpc_fraghdr)
-
-/* Common case:
- *  - stream transport
- *  - sending from byte 0 of the message
- *  - the message is wholly contained in @xdr's head iovec
- */
-static int xs_send_rm_and_kvec(struct socket *sock, struct msghdr *msg,
-		rpc_fraghdr marker, struct kvec *vec, size_t base)
-{
-	struct kvec iov[2] = {
-		[0] = {
-			.iov_base	= &marker,
-			.iov_len	= sizeof(marker)
-		},
-		[1] = *vec,
-	};
-	size_t len = iov[0].iov_len + iov[1].iov_len;
-
-	iov_iter_kvec(&msg->msg_iter, WRITE, iov, 2, len);
-	return xs_sendmsg(sock, msg, base);
-}
-
-/**
- * xs_sendpages - write pages directly to a socket
- * @sock: socket to send on
- * @addr: UDP only -- address of destination
- * @addrlen: UDP only -- length of destination address
- * @xdr: buffer containing this request
- * @base: starting position in the buffer
- * @rm: stream record marker field
- * @sent_p: return the total number of bytes successfully queued for sending
- *
- */
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, rpc_fraghdr rm, int *sent_p)
-{
-	struct msghdr msg = {
-		.msg_name = addr,
-		.msg_namelen = addrlen,
-		.msg_flags = XS_SENDMSG_FLAGS | MSG_MORE,
-	};
-	unsigned int rmsize = rm ? sizeof(rm) : 0;
-	unsigned int remainder = rmsize + xdr->len - base;
-	unsigned int want;
-	int err = 0;
-
-	if (unlikely(!sock))
-		return -ENOTSOCK;
-
-	want = xdr->head[0].iov_len + rmsize;
-	if (base < want) {
-		unsigned int len = want - base;
-		remainder -= len;
-		if (remainder == 0)
-			msg.msg_flags &= ~MSG_MORE;
-		if (rmsize)
-			err = xs_send_rm_and_kvec(sock, &msg, rm,
-					&xdr->head[0], base);
-		else
-			err = xs_send_kvec(sock, &msg, &xdr->head[0], base);
-		if (remainder == 0 || err != len)
-			goto out;
-		*sent_p += err;
-		base = 0;
-	} else
-		base -= want;
-
-	if (base < xdr->page_len) {
-		unsigned int len = xdr->page_len - base;
-		remainder -= len;
-		if (remainder == 0)
-			msg.msg_flags &= ~MSG_MORE;
-		err = xs_send_pagedata(sock, &msg, xdr, base);
-		if (remainder == 0 || err != len)
-			goto out;
-		*sent_p += err;
-		base = 0;
-	} else
-		base -= xdr->page_len;
-
-	if (base >= xdr->tail[0].iov_len)
-		return 0;
-	msg.msg_flags &= ~MSG_MORE;
-	err = xs_send_kvec(sock, &msg, &xdr->tail[0], base);
-out:
-	if (err > 0) {
-		*sent_p += err;
-		err = 0;
-	}
-	return err;
-}
-
 /**
  * xs_nospace - handle transmit was incomplete
  * @req: pointer to RPC request
@@ -948,8 +830,11 @@ static int xs_local_send_request(struct rpc_rqst *req)
 	struct xdr_buf *xdr = &req->rq_snd_buf;
 	rpc_fraghdr rm = rpc_stream_record_marker(xdr->len);
 	unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
+	struct msghdr msg = {
+		.msg_flags	= XS_SENDMSG_FLAGS,
+	};
+	unsigned int sent = 0;
 	int status;
-	int sent = 0;
 
 	/* Close the stream if the previous transmission was incomplete */
 	if (xs_send_request_was_aborted(transport, req)) {
@@ -961,8 +846,8 @@ static int xs_local_send_request(struct rpc_rqst *req)
 			req->rq_svec->iov_base, req->rq_svec->iov_len);
 
 	req->rq_xtime = ktime_get();
-	status = xs_sendpages(transport->sock, NULL, 0, xdr,
-			      transport->xmit.offset, rm, &sent);
+	status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
+				   transport->xmit.offset, rm, &sent);
 	dprintk("RPC:       %s(%u) = %d\n",
 			__func__, xdr->len - transport->xmit.offset, status);
 
@@ -1014,7 +899,12 @@ static int xs_udp_send_request(struct rpc_rqst *req)
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
-	int sent = 0;
+	struct msghdr msg = {
+		.msg_name	= xs_addr(xprt),
+		.msg_namelen	= xprt->addrlen,
+		.msg_flags	= XS_SENDMSG_FLAGS,
+	};
+	unsigned int sent = 0;
 	int status;
 
 	xs_pktdump("packet data:",
@@ -1028,8 +918,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
 		return -EBADSLT;
 
 	req->rq_xtime = ktime_get();
-	status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
-			      xdr, 0, 0, &sent);
+	status = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, 0, &sent);
 
 	dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
 			xdr->len, status);
@@ -1095,9 +984,12 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
 	struct xdr_buf *xdr = &req->rq_snd_buf;
 	rpc_fraghdr rm = rpc_stream_record_marker(xdr->len);
 	unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
+	struct msghdr msg = {
+		.msg_flags	= XS_SENDMSG_FLAGS,
+	};
 	bool vm_wait = false;
+	unsigned int sent;
 	int status;
-	int sent;
 
 	/* Close the stream if the previous transmission was incomplete */
 	if (xs_send_request_was_aborted(transport, req)) {
@@ -1119,8 +1011,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
 	req->rq_xtime = ktime_get();
 	while (1) {
 		sent = 0;
-		status = xs_sendpages(transport->sock, NULL, 0, xdr,
-				      transport->xmit.offset, rm, &sent);
+		status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
+					   transport->xmit.offset, rm, &sent);
 
 		dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
 				xdr->len - transport->xmit.offset, status);


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH RFC 3/3] SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends
  2020-02-12  0:42 [PATCH RFC 0/3] Prepare NFS server for RPC-on-TLS Chuck Lever
  2020-02-12  0:42 ` [PATCH RFC 1/3] SUNRPC: Move xs_stream_record_marker Chuck Lever
  2020-02-12  0:42 ` [PATCH RFC 2/3] SUNRPC: Refactor xs_sendpages() Chuck Lever
@ 2020-02-12  0:42 ` Chuck Lever
  2 siblings, 0 replies; 4+ messages in thread
From: Chuck Lever @ 2020-02-12  0:42 UTC (permalink / raw)
  To: trondmy; +Cc: linux-nfs

xprt_sock_sendmsg uses the more efficient iov_iter-enabled kernel
socket API, and is a pre-requisite for server send-side support for
TLS.

Note that svc_process no longer needs to reserve a word for the
stream record marker, since the TCP transport now provides the
record marker automatically in a separate buffer.

The dprintk() in svc_send_common is also removed. It didn't seem
crucial for field troubleshooting. If more is needed there, a trace
point could be added in xprt_sock_sendmsg().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/sunrpc.h   |    4 -
 net/sunrpc/svc.c      |    4 -
 net/sunrpc/svcsock.c  |  202 ++++++++++++++++---------------------------------
 net/sunrpc/xprtsock.c |   39 ++-------
 4 files changed, 74 insertions(+), 175 deletions(-)

diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index c9bacb3c930f..47a756503d11 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -50,10 +50,6 @@ static inline int sock_is_loopback(struct sock *sk)
 	return loopback;
 }
 
-int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
-		    struct page *headpage, unsigned long headoffset,
-		    struct page *tailpage, unsigned long tailoffset);
-
 int rpc_clients_notifier_register(void);
 void rpc_clients_notifier_unregister(void);
 #endif /* _NET_SUNRPC_SUNRPC_H */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d4a0134f1ca1..73b536cdb818 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1529,10 +1529,6 @@ static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ..
 		goto out_drop;
 	}
 
-	/* Reserve space for the record marker */
-	if (rqstp->rq_prot == IPPROTO_TCP)
-		svc_putnl(resv, 0);
-
 	/* Returns 1 for send, 0 for drop */
 	if (likely(svc_process_common(rqstp, argv, resv)))
 		return svc_send(rqstp);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 47957088d7d3..4d2bdce48b64 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -175,111 +175,6 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 	}
 }
 
-/*
- * send routine intended to be shared by the fore- and back-channel
- */
-int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
-		    struct page *headpage, unsigned long headoffset,
-		    struct page *tailpage, unsigned long tailoffset)
-{
-	int		result;
-	int		size;
-	struct page	**ppage = xdr->pages;
-	size_t		base = xdr->page_base;
-	unsigned int	pglen = xdr->page_len;
-	unsigned int	flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
-	int		slen;
-	int		len = 0;
-
-	slen = xdr->len;
-
-	/* send head */
-	if (slen == xdr->head[0].iov_len)
-		flags = 0;
-	len = kernel_sendpage(sock, headpage, headoffset,
-				  xdr->head[0].iov_len, flags);
-	if (len != xdr->head[0].iov_len)
-		goto out;
-	slen -= xdr->head[0].iov_len;
-	if (slen == 0)
-		goto out;
-
-	/* send page data */
-	size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen;
-	while (pglen > 0) {
-		if (slen == size)
-			flags = 0;
-		result = kernel_sendpage(sock, *ppage, base, size, flags);
-		if (result > 0)
-			len += result;
-		if (result != size)
-			goto out;
-		slen -= size;
-		pglen -= size;
-		size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen;
-		base = 0;
-		ppage++;
-	}
-
-	/* send tail */
-	if (xdr->tail[0].iov_len) {
-		result = kernel_sendpage(sock, tailpage, tailoffset,
-				   xdr->tail[0].iov_len, 0);
-		if (result > 0)
-			len += result;
-	}
-
-out:
-	return len;
-}
-
-
-/*
- * Generic sendto routine
- */
-static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
-{
-	struct svc_sock	*svsk =
-		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
-	struct socket	*sock = svsk->sk_sock;
-	union {
-		struct cmsghdr	hdr;
-		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
-	} buffer;
-	struct cmsghdr *cmh = &buffer.hdr;
-	int		len = 0;
-	unsigned long tailoff;
-	unsigned long headoff;
-	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-
-	if (rqstp->rq_prot == IPPROTO_UDP) {
-		struct msghdr msg = {
-			.msg_name	= &rqstp->rq_addr,
-			.msg_namelen	= rqstp->rq_addrlen,
-			.msg_control	= cmh,
-			.msg_controllen	= sizeof(buffer),
-			.msg_flags	= MSG_MORE,
-		};
-
-		svc_set_cmsg_data(rqstp, cmh);
-
-		if (sock_sendmsg(sock, &msg) < 0)
-			goto out;
-	}
-
-	tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
-	headoff = 0;
-	len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
-			       rqstp->rq_respages[0], tailoff);
-
-out:
-	dprintk("svc: socket %p sendto([%p %zu... ], %d) = %d (addr %s)\n",
-		svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
-		xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
-
-	return len;
-}
-
 static void svc_sock_read_payload(struct svc_rqst *rqstp, unsigned int pos,
 				  unsigned long length)
 {
@@ -606,17 +501,45 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 	return 0;
 }
 
-static int
-svc_udp_sendto(struct svc_rqst *rqstp)
+/**
+ * svc_udp_sendto - Send out a reply on a UDP socket
+ * @rqstp: completed svc_rqst
+ *
+ * Returns the number of bytes sent, or a negative errno.
+ */
+static int svc_udp_sendto(struct svc_rqst *rqstp)
 {
-	int		error;
+	struct svc_xprt *xprt = rqstp->rq_xprt;
+	struct svc_sock	*svsk = container_of(xprt, struct svc_sock, sk_xprt);
+	struct xdr_buf *xdr = &rqstp->rq_res;
+	union {
+		struct cmsghdr	hdr;
+		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
+	} buffer;
+	struct cmsghdr *cmh = &buffer.hdr;
+	struct msghdr msg = {
+		.msg_name	= &rqstp->rq_addr,
+		.msg_namelen	= rqstp->rq_addrlen,
+		.msg_control	= cmh,
+		.msg_controllen	= sizeof(buffer),
+	};
+	unsigned int sent;
+	int err;
 
-	error = svc_sendto(rqstp, &rqstp->rq_res);
-	if (error == -ECONNREFUSED)
-		/* ICMP error on earlier request. */
-		error = svc_sendto(rqstp, &rqstp->rq_res);
+	svc_set_cmsg_data(rqstp, cmh);
 
-	return error;
+	sent = 0;
+	err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+	xdr_free_bvec(xdr);
+	if (err == -ECONNREFUSED) {
+		/* ICMP error on earlier request. */
+		sent = 0;
+		err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+		xdr_free_bvec(xdr);
+	}
+	if (err < 0)
+		return err;
+	return sent;
 }
 
 static int svc_udp_has_wspace(struct svc_xprt *xprt)
@@ -1135,35 +1058,40 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	return 0;	/* record not complete */
 }
 
-/*
- * Send out data on TCP socket.
+/**
+ * svc_tcp_sendto - Send out a reply on a TCP socket
+ * @rqstp: completed svc_rqst
+ *
+ * Returns the number of bytes sent, or a negative errno.
  */
 static int svc_tcp_sendto(struct svc_rqst *rqstp)
 {
-	struct xdr_buf	*xbufp = &rqstp->rq_res;
-	int sent;
-	__be32 reclen;
+	struct svc_xprt *xprt = rqstp->rq_xprt;
+	struct svc_sock	*svsk = container_of(xprt, struct svc_sock, sk_xprt);
+	struct xdr_buf *xdr = &rqstp->rq_res;
+	rpc_fraghdr marker = rpc_stream_record_marker(xdr->len);
+	struct msghdr msg = {
+		.msg_flags	= 0,
+	};
+	unsigned int sent;
+	int err;
 
-	/* Set up the first element of the reply kvec.
-	 * Any other kvecs that may be in use have been taken
-	 * care of by the server implementation itself.
-	 */
-	reclen = htonl(0x80000000|((xbufp->len ) - 4));
-	memcpy(xbufp->head[0].iov_base, &reclen, 4);
-
-	sent = svc_sendto(rqstp, &rqstp->rq_res);
-	if (sent != xbufp->len) {
-		printk(KERN_NOTICE
-		       "rpc-srv/tcp: %s: %s %d when sending %d bytes "
-		       "- shutting down socket\n",
-		       rqstp->rq_xprt->xpt_server->sv_name,
-		       (sent<0)?"got error":"sent only",
-		       sent, xbufp->len);
-		set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags);
-		svc_xprt_enqueue(rqstp->rq_xprt);
-		sent = -EAGAIN;
-	}
+	sent = 0;
+	err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
+	xdr_free_bvec(xdr);
+	if (err < 0 || sent != (xdr->len + sizeof(marker)))
+		goto out_close;
 	return sent;
+
+out_close:
+	pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes "
+		  "- shutting down socket\n",
+		  xprt->xpt_server->sv_name,
+		  (err < 0) ? "got error" : "sent",
+		  (err < 0) ? err : sent, xdr->len);
+	set_bit(XPT_CLOSE, &xprt->xpt_flags);
+	svc_xprt_enqueue(xprt);
+	return -EAGAIN;
 }
 
 static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0380d72d8485..4fc38bd2f41e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2517,45 +2517,24 @@ static void bc_free(struct rpc_task *task)
 	free_page((unsigned long)buf);
 }
 
-/*
- * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
- * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
- */
 static int bc_sendto(struct rpc_rqst *req)
 {
-	int len;
-	struct xdr_buf *xbufp = &req->rq_snd_buf;
+	struct xdr_buf *xdr = &req->rq_snd_buf;
 	struct sock_xprt *transport =
 			container_of(req->rq_xprt, struct sock_xprt, xprt);
-	unsigned long headoff;
-	unsigned long tailoff;
-	struct page *tailpage;
 	struct msghdr msg = {
-		.msg_flags	= MSG_MORE
-	};
-	rpc_fraghdr marker = rpc_stream_record_marker(xbufp->len);
-	struct kvec iov = {
-		.iov_base	= &marker,
-		.iov_len	= sizeof(marker),
+		.msg_flags	= 0,
 	};
+	rpc_fraghdr marker = rpc_stream_record_marker(xdr->len);
+	unsigned int sent = 0;
+	int err;
 
 	req->rq_xtime = ktime_get();
-
-	len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len);
-	if (len != iov.iov_len)
+	err = xprt_sock_sendmsg(transport->sock, &msg, xdr, 0, marker, &sent);
+	xdr_free_bvec(xdr);
+	if (err < 0 || sent != (xdr->len + sizeof(marker)))
 		return -EAGAIN;
-
-	tailpage = NULL;
-	if (xbufp->tail[0].iov_len)
-		tailpage = virt_to_page(xbufp->tail[0].iov_base);
-	tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
-	headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
-	len = svc_send_common(transport->sock, xbufp,
-			      virt_to_page(xbufp->head[0].iov_base), headoff,
-			      tailpage, tailoff);
-	if (len != xbufp->len)
-		return -EAGAIN;
-	return len;
+	return sent;
 }
 
 /*


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-02-12  0:42 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-12  0:42 [PATCH RFC 0/3] Prepare NFS server for RPC-on-TLS Chuck Lever
2020-02-12  0:42 ` [PATCH RFC 1/3] SUNRPC: Move xs_stream_record_marker Chuck Lever
2020-02-12  0:42 ` [PATCH RFC 2/3] SUNRPC: Refactor xs_sendpages() Chuck Lever
2020-02-12  0:42 ` [PATCH RFC 3/3] SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends Chuck Lever

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.