All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: Jakub Kicinski <kuba@kernel.org>,
	Jonathan Lemon <jonathan.lemon@gmail.com>,
	"David S . Miller" <davem@davemloft.net>,
	Willem de Bruijn <willemb@google.com>,
	Eric Dumazet <edumazet@google.com>,
	David Ahern <dsahern@kernel.org>, Jens Axboe <axboe@kernel.dk>,
	Pavel Begunkov <asml.silence@gmail.com>
Subject: [RFC v2 10/19] io_uring: add send notifiers registration
Date: Tue, 21 Dec 2021 15:35:32 +0000	[thread overview]
Message-ID: <4cfeb88dc07fcdff2c0c864c031bb32b61439674.1640029579.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1640029579.git.asml.silence@gmail.com>

Add IORING_REGISTER_TX_CTX and IORING_UNREGISTER_TX_CTX. Transmission
(i.e. send) context will serve be used to notify the userspace when
fixed buffers used for zerocopy sends are released by the kernel.

Notification of a single tx context lives in generations, where each
generation posts one CQE with ->user_data equal to the specified tag and
->res is a generation number starting from 0. All requests issued
against a ctx will get attached to the current generation of
notifications. Then, the userspace will be able to request to flush the
notification allowing it to post a CQE when all buffers of all requests
attached to it are released by the kernel. It'll also switch the
generation to a new one with a sequence number incremented by one.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c                 | 72 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  7 ++++
 2 files changed, 79 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 59380e3454ad..a01f91e70fa5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -94,6 +94,8 @@
 #define IORING_MAX_CQ_ENTRIES	(2 * IORING_MAX_ENTRIES)
 #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
 
+#define IORING_MAX_TX_NOTIFIERS	(1U << 10)
+
 /* only define max */
 #define IORING_MAX_FIXED_FILES	(1U << 15)
 #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \
@@ -326,6 +328,15 @@ struct io_submit_state {
 	struct blk_plug		plug;
 };
 
+struct io_tx_notifier {
+};
+
+struct io_tx_ctx {
+	struct io_tx_notifier	*notifier;
+	u64			tag;
+	u32			seq;
+};
+
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
 	struct {
@@ -373,6 +384,8 @@ struct io_ring_ctx {
 		unsigned		nr_user_files;
 		unsigned		nr_user_bufs;
 		struct io_mapped_ubuf	**user_bufs;
+		struct io_tx_ctx	*tx_ctxs;
+		unsigned		nr_tx_ctxs;
 
 		struct io_submit_state	submit_state;
 		struct list_head	timeout_list;
@@ -9199,6 +9212,55 @@ static int io_buffer_validate(struct iovec *iov)
 	return 0;
 }
 
+static int io_sqe_tx_ctx_unregister(struct io_ring_ctx *ctx)
+{
+	if (!ctx->nr_tx_ctxs)
+		return -ENXIO;
+
+	kvfree(ctx->tx_ctxs);
+	ctx->tx_ctxs = NULL;
+	ctx->nr_tx_ctxs = 0;
+	return 0;
+}
+
+static int io_sqe_tx_ctx_register(struct io_ring_ctx *ctx,
+				  void __user *arg, unsigned int nr_args)
+{
+	struct io_uring_tx_ctx_register __user *tx_args = arg;
+	struct io_uring_tx_ctx_register tx_arg;
+	unsigned i;
+	int ret;
+
+	if (ctx->nr_tx_ctxs)
+		return -EBUSY;
+	if (!nr_args)
+		return -EINVAL;
+	if (nr_args > IORING_MAX_TX_NOTIFIERS)
+		return -EMFILE;
+
+	ctx->tx_ctxs = kvcalloc(nr_args, sizeof(ctx->tx_ctxs[0]),
+				GFP_KERNEL_ACCOUNT);
+	if (!ctx->tx_ctxs)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_args; i++, ctx->nr_tx_ctxs++) {
+		struct io_tx_ctx *tx_ctx = &ctx->tx_ctxs[i];
+
+		if (copy_from_user(&tx_arg, &tx_args[i], sizeof(tx_arg))) {
+			ret = -EFAULT;
+			goto out_fput;
+		}
+		tx_ctx->tag = tx_arg.tag;
+	}
+	return 0;
+
+out_fput:
+	kvfree(ctx->tx_ctxs);
+	ctx->tx_ctxs = NULL;
+	ctx->nr_tx_ctxs = 0;
+	return ret;
+}
+
 static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 				   unsigned int nr_args, u64 __user *tags)
 {
@@ -9429,6 +9491,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 #endif
 	WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
 
+	io_sqe_tx_ctx_unregister(ctx);
 	io_mem_free(ctx->rings);
 	io_mem_free(ctx->sq_sqes);
 
@@ -11104,6 +11167,15 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_register_iowq_max_workers(ctx, arg);
 		break;
+	case IORING_REGISTER_TX_CTX:
+		ret = io_sqe_tx_ctx_register(ctx, arg, nr_args);
+		break;
+	case IORING_UNREGISTER_TX_CTX:
+		ret = -EINVAL;
+		if (arg || nr_args)
+			break;
+		ret = io_sqe_tx_ctx_unregister(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 787f491f0d2a..f2e8d18e40e0 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -325,6 +325,9 @@ enum {
 	/* set/get max number of io-wq workers */
 	IORING_REGISTER_IOWQ_MAX_WORKERS	= 19,
 
+	IORING_REGISTER_TX_CTX			= 20,
+	IORING_UNREGISTER_TX_CTX		= 21,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
@@ -365,6 +368,10 @@ struct io_uring_rsrc_update2 {
 	__u32 resv2;
 };
 
+struct io_uring_tx_ctx_register {
+	__u64 tag;
+};
+
 /* Skip updating fd indexes set to this value in the fd table */
 #define IORING_REGISTER_FILES_SKIP	(-2)
 
-- 
2.34.1


  parent reply	other threads:[~2021-12-21 15:36 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-21 15:35 [RFC v2 00/19] io_uring zerocopy tx Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 01/19] skbuff: add SKBFL_DONT_ORPHAN flag Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 02/19] skbuff: pass a struct ubuf_info in msghdr Pavel Begunkov
2022-01-11 13:51   ` Hao Xu
2022-01-11 15:50     ` Pavel Begunkov
2022-01-12  3:39       ` Hao Xu
2022-01-12 16:53         ` Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 03/19] net: add zerocopy_sg_from_iter for bvec Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 04/19] net: optimise page get/free for bvec zc Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 05/19] net: don't track pfmemalloc for zc registered mem Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 06/19] ipv4/udp: add support msgdr::msg_ubuf Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 07/19] ipv6/udp: " Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 08/19] ipv4: avoid partial copy for zc Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 09/19] ipv6: " Pavel Begunkov
2021-12-21 15:35 ` Pavel Begunkov [this message]
2021-12-21 15:35 ` [RFC v2 11/19] io_uring: infrastructure for send zc notifications Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 12/19] io_uring: wire send zc request type Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 13/19] io_uring: add an option to flush zc notifications Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 14/19] io_uring: opcode independent fixed buf import Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 15/19] io_uring: sendzc with fixed buffers Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 16/19] io_uring: cache struct ubuf_info Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 17/19] io_uring: unclog ctx refs waiting with zc notifiers Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 18/19] io_uring: task_work for notification delivery Pavel Begunkov
2021-12-21 15:35 ` [RFC v2 19/19] io_uring: optimise task referencing by notifiers Pavel Begunkov
2021-12-21 15:43 ` [RFC v2 00/19] io_uring zerocopy tx Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4cfeb88dc07fcdff2c0c864c031bb32b61439674.1640029579.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=io-uring@vger.kernel.org \
    --cc=jonathan.lemon@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.