All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: "David S . Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	Jonathan Lemon <jonathan.lemon@gmail.com>,
	Willem de Bruijn <willemb@google.com>,
	Jens Axboe <axboe@kernel.dk>,
	kernel-team@fb.com, Pavel Begunkov <asml.silence@gmail.com>
Subject: [RFC net-next v3 21/29] io_uring: wire send zc request type
Date: Tue, 28 Jun 2022 19:56:43 +0100	[thread overview]
Message-ID: <b8af7329752169ad0fef9e5d8d41c40327a91fa1.1653992701.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1653992701.git.asml.silence@gmail.com>

Add a new io_uring opcode IORING_OP_SENDZC. The main distinction from
IORING_OP_SEND is that the user should specify a notification slot
index in sqe::notification_idx and the buffers are safe to reuse only
when the used notification is flushed and completes.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 103 +++++++++++++++++++++++++++++++++-
 include/uapi/linux/io_uring.h |   5 ++
 2 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a88c9c73ed1d..4a1a1d43e9b3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -716,6 +716,14 @@ struct io_sr_msg {
 	unsigned int			flags;
 };
 
+struct io_sendzc {
+	struct file			*file;
+	void __user			*buf;
+	size_t				len;
+	u16				slot_idx;
+	int				msg_flags;
+};
+
 struct io_open {
 	struct file			*file;
 	int				dfd;
@@ -1044,6 +1052,7 @@ struct io_kiocb {
 		struct io_socket	sock;
 		struct io_nop		nop;
 		struct io_uring_cmd	uring_cmd;
+		struct io_sendzc	msgzc;
 	};
 
 	u8				opcode;
@@ -1384,6 +1393,13 @@ static const struct io_op_def io_op_defs[] = {
 		.needs_async_setup	= 1,
 		.async_size		= uring_cmd_pdu_size(1),
 	},
+	[IORING_OP_SENDZC] = {
+		.needs_file		= 1,
+		.unbound_nonreg_file	= 1,
+		.pollout		= 1,
+		.audit_skip		= 1,
+		.ioprio			= 1,
+	},
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
@@ -1525,6 +1541,8 @@ const char *io_uring_get_opcode(u8 opcode)
 		return "SOCKET";
 	case IORING_OP_URING_CMD:
 		return "URING_CMD";
+	case IORING_OP_SENDZC:
+		return "URING_SENDZC";
 	case IORING_OP_LAST:
 		return "INVALID";
 	}
@@ -2920,7 +2938,6 @@ static struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
 	return notif;
 }
 
-__attribute__((unused))
 static inline struct io_notif *io_get_notif(struct io_ring_ctx *ctx,
 					    struct io_notif_slot *slot)
 {
@@ -2929,7 +2946,6 @@ static inline struct io_notif *io_get_notif(struct io_ring_ctx *ctx,
 	return slot->notif;
 }
 
-__attribute__((unused))
 static inline struct io_notif_slot *io_get_notif_slot(struct io_ring_ctx *ctx,
 						      int idx)
 	__must_hold(&ctx->uring_lock)
@@ -6546,6 +6562,83 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
 	return 0;
 }
 
+static int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_sendzc *zc = &req->msgzc;
+
+	if (READ_ONCE(sqe->ioprio) || READ_ONCE(sqe->addr2) || READ_ONCE(sqe->__pad2[0]))
+		return -EINVAL;
+
+	zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	zc->len = READ_ONCE(sqe->len);
+	zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
+	zc->slot_idx = READ_ONCE(sqe->notification_idx);
+	if (zc->msg_flags & MSG_DONTWAIT)
+		req->flags |= REQ_F_NOWAIT;
+#ifdef CONFIG_COMPAT
+	if (req->ctx->compat)
+		zc->msg_flags |= MSG_CMSG_COMPAT;
+#endif
+	return 0;
+}
+
+static int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_sendzc *zc = &req->msgzc;
+	struct io_notif_slot *notif_slot;
+	struct io_notif *notif;
+	struct msghdr msg;
+	struct iovec iov;
+	struct socket *sock;
+	unsigned msg_flags;
+	int ret, min_ret = 0;
+
+	if (issue_flags & IO_URING_F_UNLOCKED)
+		return -EAGAIN;
+	sock = sock_from_file(req->file);
+	if (unlikely(!sock))
+		return -ENOTSOCK;
+
+	notif_slot = io_get_notif_slot(ctx, zc->slot_idx);
+	if (!notif_slot)
+		return -EINVAL;
+	notif = io_get_notif(ctx, notif_slot);
+	if (!notif)
+		return -ENOMEM;
+
+	msg.msg_name = NULL;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_namelen = 0;
+	msg.msg_managed_data = 0;
+
+	ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter);
+	if (unlikely(ret))
+		return ret;
+
+	msg_flags = zc->msg_flags | MSG_ZEROCOPY;
+	if (issue_flags & IO_URING_F_NONBLOCK)
+		msg_flags |= MSG_DONTWAIT;
+	if (msg_flags & MSG_WAITALL)
+		min_ret = iov_iter_count(&msg.msg_iter);
+
+	msg.msg_flags = msg_flags;
+	msg.msg_ubuf = &notif->uarg;
+	ret = sock_sendmsg(sock, &msg);
+
+	if (unlikely(ret < min_ret)) {
+		if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
+			return -EAGAIN;
+		if (ret == -ERESTARTSYS)
+			ret = -EINTR;
+		req_set_fail(req);
+	}
+
+	__io_req_complete(req, issue_flags, ret, 0);
+	return 0;
+}
+
 static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
 				 struct io_async_msghdr *iomsg)
 {
@@ -7064,6 +7157,7 @@ IO_NETOP_PREP_ASYNC(connect);
 IO_NETOP_PREP(accept);
 IO_NETOP_PREP(socket);
 IO_NETOP_PREP(shutdown);
+IO_NETOP_PREP(sendzc);
 IO_NETOP_FN(send);
 IO_NETOP_FN(recv);
 #endif /* CONFIG_NET */
@@ -8389,6 +8483,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	case IORING_OP_SENDMSG:
 	case IORING_OP_SEND:
 		return io_sendmsg_prep(req, sqe);
+	case IORING_OP_SENDZC:
+		return io_sendzc_prep(req, sqe);
 	case IORING_OP_RECVMSG:
 	case IORING_OP_RECV:
 		return io_recvmsg_prep(req, sqe);
@@ -8689,6 +8785,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	case IORING_OP_SEND:
 		ret = io_send(req, issue_flags);
 		break;
+	case IORING_OP_SENDZC:
+		ret = io_sendzc(req, issue_flags);
+		break;
 	case IORING_OP_RECVMSG:
 		ret = io_recvmsg(req, issue_flags);
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 19b9d7a2da29..6c6f20ae5a95 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -61,6 +61,10 @@ struct io_uring_sqe {
 	union {
 		__s32	splice_fd_in;
 		__u32	file_index;
+		struct {
+			__u16	notification_idx;
+			__u16	__pad;
+		} __attribute__((packed));
 	};
 	union {
 		struct {
@@ -190,6 +194,7 @@ enum io_uring_op {
 	IORING_OP_GETXATTR,
 	IORING_OP_SOCKET,
 	IORING_OP_URING_CMD,
+	IORING_OP_SENDZC,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
-- 
2.36.1


  parent reply	other threads:[~2022-06-28 19:04 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-28 18:56 [RFC net-next v3 00/29] io_uring zerocopy send Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 01/29] ipv4: avoid partial copy for zc Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 02/29] ipv6: " Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 03/29] skbuff: add SKBFL_DONT_ORPHAN flag Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 04/29] skbuff: carry external ubuf_info in msghdr Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 05/29] net: bvec specific path in zerocopy_sg_from_iter Pavel Begunkov
2022-06-28 20:06   ` Al Viro
2022-06-28 21:33     ` Pavel Begunkov
2022-06-28 22:52   ` David Ahern
2022-07-04 13:31     ` Pavel Begunkov
2022-07-05  2:28       ` David Ahern
2022-07-05 14:03         ` Pavel Begunkov
2022-07-05 22:09           ` Pavel Begunkov
2022-07-06 15:11             ` David Ahern
2022-06-28 18:56 ` [RFC net-next v3 06/29] net: optimise bvec-based zc page referencing Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 07/29] net: don't track pfmemalloc for managed frags Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 08/29] skbuff: don't mix ubuf_info of different types Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 09/29] ipv4/udp: support zc with managed data Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 10/29] ipv6/udp: " Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 11/29] tcp: " Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 12/29] tcp: kill extra io_uring's uarg refcounting Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 13/29] net: let callers provide extra ubuf_info refs Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 14/29] io_uring: opcode independent fixed buf import Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 15/29] io_uring: add zc notification infrastructure Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 16/29] io_uring: cache struct io_notif Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 17/29] io_uring: complete notifiers in tw Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 18/29] io_uring: add notification slot registration Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 19/29] io_uring: rename IORING_OP_FILES_UPDATE Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 20/29] io_uring: add zc notification flush requests Pavel Begunkov
2022-06-28 18:56 ` Pavel Begunkov [this message]
2022-06-28 18:56 ` [RFC net-next v3 22/29] io_uring: account locked pages for non-fixed zc Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 23/29] io_uring: allow to pass addr into sendzc Pavel Begunkov
2022-06-29  7:42   ` Stefan Metzmacher
2022-06-29  9:53     ` Pavel Begunkov
2022-08-13  8:45       ` Stefan Metzmacher
2022-08-15  9:46         ` Pavel Begunkov
2022-08-15 11:40           ` Stefan Metzmacher
2022-08-15 12:19             ` Pavel Begunkov
2022-08-15 13:30               ` Stefan Metzmacher
2022-08-15 14:09                 ` Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 24/29] io_uring: add rsrc referencing for notifiers Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 25/29] io_uring: sendzc with fixed buffers Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 26/29] io_uring: flush notifiers after sendzc Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 27/29] io_uring: allow to override zc tag on flush Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 28/29] io_uring: batch submission notif referencing Pavel Begunkov
2022-06-28 18:56 ` [RFC net-next v3 29/29] selftests/io_uring: test zerocopy send Pavel Begunkov
2022-06-28 19:03 ` [RFC net-next v3 00/29] io_uring " Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b8af7329752169ad0fef9e5d8d41c40327a91fa1.1653992701.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=davem@davemloft.net \
    --cc=io-uring@vger.kernel.org \
    --cc=jonathan.lemon@gmail.com \
    --cc=kernel-team@fb.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.