All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: "David S . Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	Jonathan Lemon <jonathan.lemon@gmail.com>,
	Willem de Bruijn <willemb@google.com>,
	Jens Axboe <axboe@kernel.dk>, David Ahern <dsahern@kernel.org>,
	kernel-team@fb.com, Pavel Begunkov <asml.silence@gmail.com>
Subject: [PATCH net-next v3 14/25] io_uring: cache struct io_notif
Date: Tue,  5 Jul 2022 16:01:14 +0100	[thread overview]
Message-ID: <e31a9d109bfd0cf7926c79ad95a548e905ae033b.1656318994.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1656318994.git.asml.silence@gmail.com>

kmalloc'ing struct io_notif is too expensive when done frequently, cache
them as many other resources in io_uring. Keep two list, the first one
is from where we're getting notifiers, it's protected by ->uring_lock.
The second is protected by ->completion_lock, to which we queue released
notifiers. Then we splice one list into another when needed.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 include/linux/io_uring_types.h |  7 +++++
 io_uring/io_uring.c            |  3 ++
 io_uring/notif.c               | 57 +++++++++++++++++++++++++++++-----
 io_uring/notif.h               |  5 +++
 4 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 26a1504ad24c..0a3fdaa368a4 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -244,6 +244,9 @@ struct io_ring_ctx {
 		struct xarray		io_bl_xa;
 		struct list_head	io_buffers_cache;
 
+		/* struct io_notif cache, protected by uring_lock */
+		struct list_head	notif_list;
+
 		struct io_hash_table	cancel_table_locked;
 		struct list_head	cq_overflow_list;
 		struct list_head	apoll_cache;
@@ -255,6 +258,10 @@ struct io_ring_ctx {
 	struct io_wq_work_list	locked_free_list;
 	unsigned int		locked_free_nr;
 
+	/* struct io_notif cache protected by completion_lock */
+	struct list_head	notif_list_locked;
+	unsigned int		notif_locked_nr;
+
 	const struct cred	*sq_creds;	/* cred used for __io_sq_thread() */
 	struct io_sq_data	*sq_data;	/* if using sq thread polling */
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5fbbdcad14fa..6054e71e6ade 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -318,6 +318,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_WQ_LIST(&ctx->locked_free_list);
 	INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
 	INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
+	INIT_LIST_HEAD(&ctx->notif_list);
+	INIT_LIST_HEAD(&ctx->notif_list_locked);
 	return ctx;
 err:
 	kfree(ctx->dummy_ubuf);
@@ -2491,6 +2493,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
 	WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots);
 
+	io_notif_cache_purge(ctx);
 	io_mem_free(ctx->rings);
 	io_mem_free(ctx->sq_sqes);
 
diff --git a/io_uring/notif.c b/io_uring/notif.c
index e9e0c5566c4a..ffbd5ce03c36 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -15,10 +15,12 @@ static void __io_notif_complete_tw(struct callback_head *cb)
 
 	io_cq_lock(ctx);
 	io_fill_cqe_aux(ctx, notif->tag, 0, notif->seq);
+
+	list_add(&notif->cache_node, &ctx->notif_list_locked);
+	ctx->notif_locked_nr++;
 	io_cq_unlock_post(ctx);
 
 	percpu_ref_put(&ctx->refs);
-	kfree(notif);
 }
 
 static inline void io_notif_complete(struct io_notif *notif)
@@ -45,21 +47,62 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
 	queue_work(system_unbound_wq, &notif->commit_work);
 }
 
+static void io_notif_splice_cached(struct io_ring_ctx *ctx)
+	__must_hold(&ctx->uring_lock)
+{
+	spin_lock(&ctx->completion_lock);
+	list_splice_init(&ctx->notif_list_locked, &ctx->notif_list);
+	ctx->notif_locked_nr = 0;
+	spin_unlock(&ctx->completion_lock);
+}
+
+void io_notif_cache_purge(struct io_ring_ctx *ctx)
+	__must_hold(&ctx->uring_lock)
+{
+	io_notif_splice_cached(ctx);
+
+	while (!list_empty(&ctx->notif_list)) {
+		struct io_notif *notif = list_first_entry(&ctx->notif_list,
+						struct io_notif, cache_node);
+
+		list_del(&notif->cache_node);
+		kfree(notif);
+	}
+}
+
+static inline bool io_notif_has_cached(struct io_ring_ctx *ctx)
+	__must_hold(&ctx->uring_lock)
+{
+	if (likely(!list_empty(&ctx->notif_list)))
+		return true;
+	if (data_race(READ_ONCE(ctx->notif_locked_nr) <= IO_NOTIF_SPLICE_BATCH))
+		return false;
+	io_notif_splice_cached(ctx);
+	return !list_empty(&ctx->notif_list);
+}
+
 struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
 				struct io_notif_slot *slot)
 	__must_hold(&ctx->uring_lock)
 {
 	struct io_notif *notif;
 
-	notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT);
-	if (!notif)
-		return NULL;
+	if (likely(io_notif_has_cached(ctx))) {
+		notif = list_first_entry(&ctx->notif_list,
+					 struct io_notif, cache_node);
+		list_del(&notif->cache_node);
+	} else {
+		notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT);
+		if (!notif)
+			return NULL;
+		/* pre-initialise some fields */
+		notif->ctx = ctx;
+		notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
+		notif->uarg.callback = io_uring_tx_zerocopy_callback;
+	}
 
 	notif->seq = slot->seq++;
 	notif->tag = slot->tag;
-	notif->ctx = ctx;
-	notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
-	notif->uarg.callback = io_uring_tx_zerocopy_callback;
 	/* master ref owned by io_notif_slot, will be dropped on flush */
 	refcount_set(&notif->uarg.refcnt, 1);
 	percpu_ref_get(&ctx->refs);
diff --git a/io_uring/notif.h b/io_uring/notif.h
index 3d7a1d242e17..b23c9c0515bb 100644
--- a/io_uring/notif.h
+++ b/io_uring/notif.h
@@ -5,6 +5,8 @@
 #include <net/sock.h>
 #include <linux/nospec.h>
 
+#define IO_NOTIF_SPLICE_BATCH	32
+
 struct io_notif {
 	struct ubuf_info	uarg;
 	struct io_ring_ctx	*ctx;
@@ -13,6 +15,8 @@ struct io_notif {
 	u64			tag;
 	/* see struct io_notif_slot::seq */
 	u32			seq;
+	/* hook into ctx->notif_list and ctx->notif_list_locked */
+	struct list_head	cache_node;
 
 	union {
 		struct callback_head	task_work;
@@ -41,6 +45,7 @@ struct io_notif_slot {
 };
 
 int io_notif_unregister(struct io_ring_ctx *ctx);
+void io_notif_cache_purge(struct io_ring_ctx *ctx);
 
 struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
 				struct io_notif_slot *slot);
-- 
2.36.1


  parent reply	other threads:[~2022-07-05 15:03 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-05 15:01 [PATCH net-next v3 00/25] io_uring zerocopy send Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 01/25] ipv4: avoid partial copy for zc Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 02/25] ipv6: " Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 03/25] skbuff: add SKBFL_DONT_ORPHAN flag Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 04/25] skbuff: carry external ubuf_info in msghdr Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 05/25] net: bvec specific path in zerocopy_sg_from_iter Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 06/25] net: optimise bvec-based zc page referencing Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 07/25] net: don't track pfmemalloc for managed frags Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 08/25] skbuff: don't mix ubuf_info of different types Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 09/25] ipv4/udp: support zc with managed data Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 10/25] ipv6/udp: " Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 11/25] tcp: " Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 12/25] io_uring: add zc notification infrastructure Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 13/25] io_uring: export task put Pavel Begunkov
2022-07-05 15:01 ` Pavel Begunkov [this message]
2022-07-05 15:01 ` [PATCH net-next v3 15/25] io_uring: complete notifiers in tw Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 16/25] io_uring: add notification slot registration Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 17/25] io_uring: wire send zc request type Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 18/25] io_uring: account locked pages for non-fixed zc Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 19/25] io_uring: allow to pass addr into sendzc Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 20/25] io_uring: add rsrc referencing for notifiers Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 21/25] io_uring: sendzc with fixed buffers Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 22/25] io_uring: flush notifiers after sendzc Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 23/25] io_uring: rename IORING_OP_FILES_UPDATE Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 24/25] io_uring: add zc notification flush requests Pavel Begunkov
2022-07-05 15:01 ` [PATCH net-next v3 25/25] selftests/io_uring: test zerocopy send Pavel Begunkov
2022-07-05 15:04 ` [PATCH net-next v3 00/25] io_uring " Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e31a9d109bfd0cf7926c79ad95a548e905ae033b.1656318994.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=io-uring@vger.kernel.org \
    --cc=jonathan.lemon@gmail.com \
    --cc=kernel-team@fb.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.