All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: Jakub Kicinski <kuba@kernel.org>,
	Jonathan Lemon <jonathan.lemon@gmail.com>,
	"David S . Miller" <davem@davemloft.net>,
	Willem de Bruijn <willemb@google.com>,
	Eric Dumazet <edumazet@google.com>,
	Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>,
	David Ahern <dsahern@kernel.org>, Jens Axboe <axboe@kernel.dk>,
	Pavel Begunkov <asml.silence@gmail.com>
Subject: [RFC 07/12] io_uring: infrastructure for send zc notifications
Date: Tue, 30 Nov 2021 15:18:55 +0000	[thread overview]
Message-ID: <5c2b751d6c29c02f1d0a3b0e0b220de321bc3e2d.1638282789.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1638282789.git.asml.silence@gmail.com>

Add a new ubuf_info callback io_uring_tx_zerocopy_callback(), which
should post an CQE when it completes. Also, implement some
infrastructuire for allocating and managing struct ubuf_info.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 108 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a01f91e70fa5..6ca02e60fa48 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -329,6 +329,11 @@ struct io_submit_state {
 };
 
 struct io_tx_notifier {
+	struct ubuf_info	uarg;
+	struct work_struct	commit_work;
+	struct percpu_ref	*fixed_rsrc_refs;
+	u64			tag;
+	u32			seq;
 };
 
 struct io_tx_ctx {
@@ -1275,15 +1280,20 @@ static void io_rsrc_refs_refill(struct io_ring_ctx *ctx)
 	percpu_ref_get_many(&ctx->rsrc_node->refs, IO_RSRC_REF_BATCH);
 }
 
+static inline void io_set_rsrc_node(struct percpu_ref **rsrc_refs,
+				    struct io_ring_ctx *ctx)
+{
+	*rsrc_refs = &ctx->rsrc_node->refs;
+	ctx->rsrc_cached_refs--;
+	if (unlikely(ctx->rsrc_cached_refs < 0))
+		io_rsrc_refs_refill(ctx);
+}
+
 static inline void io_req_set_rsrc_node(struct io_kiocb *req,
 					struct io_ring_ctx *ctx)
 {
-	if (!req->fixed_rsrc_refs) {
-		req->fixed_rsrc_refs = &ctx->rsrc_node->refs;
-		ctx->rsrc_cached_refs--;
-		if (unlikely(ctx->rsrc_cached_refs < 0))
-			io_rsrc_refs_refill(ctx);
-	}
+	if (!req->fixed_rsrc_refs)
+		io_set_rsrc_node(&req->fixed_rsrc_refs, ctx);
 }
 
 static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
@@ -1930,6 +1940,76 @@ static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
 	return __io_fill_cqe(ctx, user_data, res, cflags);
 }
 
+static void io_zc_tx_work_callback(struct work_struct *work)
+{
+	struct io_tx_notifier *notifier = container_of(work, struct io_tx_notifier,
+						       commit_work);
+	struct io_ring_ctx *ctx = notifier->uarg.ctx;
+
+	spin_lock(&ctx->completion_lock);
+	io_fill_cqe_aux(ctx, notifier->tag, notifier->seq, 0);
+	io_commit_cqring(ctx);
+	spin_unlock(&ctx->completion_lock);
+	io_cqring_ev_posted(ctx);
+
+	percpu_ref_put(notifier->fixed_rsrc_refs);
+	percpu_ref_put(&ctx->refs);
+	kfree(notifier);
+}
+
+static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
+					  struct ubuf_info *uarg,
+					  bool success)
+{
+	struct io_tx_notifier *notifier;
+
+	notifier = container_of(uarg, struct io_tx_notifier, uarg);
+	if (!refcount_dec_and_test(&uarg->refcnt))
+		return;
+
+	if (in_interrupt()) {
+		INIT_WORK(&notifier->commit_work, io_zc_tx_work_callback);
+		queue_work(system_unbound_wq, &notifier->commit_work);
+	} else {
+		io_zc_tx_work_callback(&notifier->commit_work);
+	}
+}
+
+static struct io_tx_notifier *io_alloc_tx_notifier(struct io_ring_ctx *ctx,
+						   struct io_tx_ctx *tx_ctx)
+{
+	struct io_tx_notifier *notifier;
+	struct ubuf_info *uarg;
+
+	notifier = kmalloc(sizeof(*notifier), GFP_ATOMIC);
+	if (!notifier)
+		return NULL;
+
+	WARN_ON_ONCE(!current->io_uring);
+	notifier->seq = tx_ctx->seq++;
+	notifier->tag = tx_ctx->tag;
+	io_set_rsrc_node(&notifier->fixed_rsrc_refs, ctx);
+
+	uarg = &notifier->uarg;
+	uarg->ctx = ctx;
+	uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
+	uarg->callback = io_uring_tx_zerocopy_callback;
+	refcount_set(&uarg->refcnt, 1);
+	percpu_ref_get(&ctx->refs);
+	return notifier;
+}
+
+__attribute__((unused))
+static inline struct io_tx_notifier *io_get_tx_notifier(struct io_ring_ctx *ctx,
+							struct io_tx_ctx *tx_ctx)
+{
+	if (tx_ctx->notifier)
+		return tx_ctx->notifier;
+
+	tx_ctx->notifier = io_alloc_tx_notifier(ctx, tx_ctx);
+	return tx_ctx->notifier;
+}
+
 static void io_req_complete_post(struct io_kiocb *req, s32 res,
 				 u32 cflags)
 {
@@ -9212,11 +9292,27 @@ static int io_buffer_validate(struct iovec *iov)
 	return 0;
 }
 
+static void io_sqe_tx_ctx_kill_ubufs(struct io_ring_ctx *ctx)
+{
+	struct io_tx_ctx *tx_ctx;
+	int i;
+
+	for (i = 0; i < ctx->nr_tx_ctxs; i++) {
+		tx_ctx = &ctx->tx_ctxs[i];
+		if (!tx_ctx->notifier)
+			continue;
+		io_uring_tx_zerocopy_callback(NULL, &tx_ctx->notifier->uarg,
+					      true);
+		tx_ctx->notifier = NULL;
+	}
+}
+
 static int io_sqe_tx_ctx_unregister(struct io_ring_ctx *ctx)
 {
 	if (!ctx->nr_tx_ctxs)
 		return -ENXIO;
 
+	io_sqe_tx_ctx_kill_ubufs(ctx);
 	kvfree(ctx->tx_ctxs);
 	ctx->tx_ctxs = NULL;
 	ctx->nr_tx_ctxs = 0;
@@ -9608,6 +9704,12 @@ static __cold void io_ring_exit_work(struct work_struct *work)
 			io_sq_thread_unpark(sqd);
 		}
 
+		if (READ_ONCE(ctx->nr_tx_ctxs)) {
+			mutex_lock(&ctx->uring_lock);
+			io_sqe_tx_ctx_kill_ubufs(ctx);
+			mutex_unlock(&ctx->uring_lock);
+		}
+
 		io_req_caches_free(ctx);
 
 		if (WARN_ON_ONCE(time_after(jiffies, timeout))) {
-- 
2.34.0


  parent reply	other threads:[~2021-11-30 15:21 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-30 15:18 [RFC 00/12] io_uring zerocopy send Pavel Begunkov
2021-11-30 15:18 ` [RFC 01/12] skbuff: add SKBFL_DONT_ORPHAN flag Pavel Begunkov
2021-11-30 15:18 ` [RFC 02/12] skbuff: pass a struct ubuf_info in msghdr Pavel Begunkov
2021-11-30 15:18 ` [RFC 03/12] net/udp: add support msgdr::msg_ubuf Pavel Begunkov
2021-11-30 15:18 ` [RFC 04/12] net: add zerocopy_sg_from_iter for bvec Pavel Begunkov
2021-11-30 15:18 ` [RFC 05/12] net: optimise page get/free for bvec zc Pavel Begunkov
2021-12-01 19:20   ` Jonathan Lemon
2021-12-01 20:17     ` Pavel Begunkov
2021-11-30 15:18 ` [RFC 06/12] io_uring: add send notifiers registration Pavel Begunkov
2021-11-30 15:18 ` Pavel Begunkov [this message]
2021-11-30 15:18 ` [RFC 08/12] io_uring: wire send zc request type Pavel Begunkov
2021-11-30 15:18 ` [RFC 09/12] io_uring: add an option to flush zc notifications Pavel Begunkov
2021-11-30 15:18 ` [RFC 10/12] io_uring: opcode independent fixed buf import Pavel Begunkov
2021-11-30 15:18 ` [RFC 11/12] io_uring: sendzc with fixed buffers Pavel Begunkov
2021-11-30 23:22   ` kernel test robot
2021-12-01  9:18   ` kernel test robot
2021-11-30 15:19 ` [RFC 12/12] io_uring: cache struct ubuf_info Pavel Begunkov
2021-12-01  3:10 ` [RFC 00/12] io_uring zerocopy send David Ahern
2021-12-01 15:32   ` Pavel Begunkov
2021-12-01 17:57     ` David Ahern
2021-12-01 19:11       ` Pavel Begunkov
2021-12-01 19:20         ` David Ahern
2021-12-01 20:15           ` Pavel Begunkov
2021-12-01 21:51             ` Martin KaFai Lau
2021-12-01 22:35               ` David Ahern
2021-12-01 23:07                 ` Martin KaFai Lau
2021-12-01 23:18                   ` Pavel Begunkov
2021-12-02 15:48               ` Pavel Begunkov
2021-12-02 17:40                 ` Martin KaFai Lau
2021-12-01 20:42       ` Pavel Begunkov
2021-12-01 14:31 ` Pavel Begunkov
2021-12-01 17:49   ` David Ahern
2021-12-01 19:59     ` Pavel Begunkov
2021-12-01 18:10 ` Willem de Bruijn
2021-12-01 19:59   ` Pavel Begunkov
2021-12-01 20:29     ` Pavel Begunkov
2021-12-02  0:36       ` Willem de Bruijn
2021-12-02 16:25         ` Pavel Begunkov
2021-12-02  0:32     ` Willem de Bruijn
2021-12-02 16:45       ` Pavel Begunkov
2021-12-02 21:25         ` Willem de Bruijn
2021-12-03 16:19           ` Pavel Begunkov
2021-12-03 16:30             ` Willem de Bruijn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5c2b751d6c29c02f1d0a3b0e0b220de321bc3e2d.1638282789.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=io-uring@vger.kernel.org \
    --cc=jonathan.lemon@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=willemb@google.com \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.