io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHSET 0/3] req alloc and task_work speedups
@ 2021-02-09 19:04 Jens Axboe
  2021-02-09 19:04 ` [PATCH 1/3] io_ring: use persistent request cache Jens Axboe
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Jens Axboe @ 2021-02-09 19:04 UTC (permalink / raw)
  To: io-uring

Hi,

This small series ensures persistent req allocations across invocations
of io_uring_enter(). It's sitting on top of the 4 patches from Pavel
that move the submit/completion state cache into the ctx itself.

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] io_ring: use persistent request cache
  2021-02-09 19:04 [PATCHSET 0/3] req alloc and task_work speedups Jens Axboe
@ 2021-02-09 19:04 ` Jens Axboe
  2021-02-09 19:04 ` [PATCH 2/3] io_uring: provide FIFO ordering for task_work Jens Axboe
  2021-02-09 19:04 ` [PATCH 3/3] io_uring: enable req cache for task_work items Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2021-02-09 19:04 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Now that we have the submit_state in the ring itself, we can have io_kiocb
allocations that are persistent across invocations. This reduces the time
spent doing slab allocations and frees.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 58 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ab3f842869dd..502bdef41460 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -263,8 +263,9 @@ struct io_sq_data {
 #define IO_COMPL_BATCH			32
 
 struct io_comp_state {
-	unsigned int		nr;
 	struct io_kiocb		*reqs[IO_COMPL_BATCH];
+	unsigned int		nr;
+	struct list_head	free_list;
 };
 
 struct io_submit_state {
@@ -1290,7 +1291,6 @@ static inline bool io_is_timeout_noseq(struct io_kiocb *req)
 
 static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 {
-	struct io_submit_state *submit_state;
 	struct io_ring_ctx *ctx;
 	int hash_bits;
 
@@ -1343,10 +1343,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
 	init_llist_head(&ctx->rsrc_put_llist);
 
-	submit_state = &ctx->submit_state;
-	submit_state->comp.nr = 0;
-	submit_state->file_refs = 0;
-	submit_state->free_reqs = 0;
+	INIT_LIST_HEAD(&ctx->submit_state.comp.free_list);
 	return ctx;
 err:
 	if (ctx->fallback_req)
@@ -1969,6 +1966,14 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx)
 {
 	struct io_submit_state *state = &ctx->submit_state;
 
+	if (!list_empty(&state->comp.free_list)) {
+		struct io_kiocb *req;
+
+		req = list_first_entry(&state->comp.free_list, struct io_kiocb,
+					compl.list);
+		list_del(&req->compl.list);
+		return req;
+	}
 	if (!state->free_reqs) {
 		gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
 		size_t sz;
@@ -2255,33 +2260,25 @@ static void io_free_req(struct io_kiocb *req)
 }
 
 struct req_batch {
-	void *reqs[IO_IOPOLL_BATCH];
-	int to_free;
-
 	struct task_struct	*task;
 	int			task_refs;
+	int			ctx_refs;
 };
 
 static inline void io_init_req_batch(struct req_batch *rb)
 {
-	rb->to_free = 0;
+	rb->ctx_refs = 0;
 	rb->task_refs = 0;
 	rb->task = NULL;
 }
 
-static void __io_req_free_batch_flush(struct io_ring_ctx *ctx,
-				      struct req_batch *rb)
-{
-	kmem_cache_free_bulk(req_cachep, rb->to_free, rb->reqs);
-	percpu_ref_put_many(&ctx->refs, rb->to_free);
-	rb->to_free = 0;
-}
-
 static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
 				     struct req_batch *rb)
 {
-	if (rb->to_free)
-		__io_req_free_batch_flush(ctx, rb);
+	if (rb->ctx_refs) {
+		percpu_ref_put_many(&ctx->refs, rb->ctx_refs);
+		rb->ctx_refs = 0;
+	}
 	if (rb->task) {
 		io_put_task(rb->task, rb->task_refs);
 		rb->task = NULL;
@@ -2290,6 +2287,8 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
 
 static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
 {
+	struct io_comp_state *cs = &req->ctx->submit_state.comp;
+
 	if (unlikely(io_is_fallback_req(req))) {
 		io_free_req(req);
 		return;
@@ -2305,9 +2304,8 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
 	rb->task_refs++;
 
 	io_dismantle_req(req);
-	rb->reqs[rb->to_free++] = req;
-	if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs)))
-		__io_req_free_batch_flush(req->ctx, rb);
+	rb->ctx_refs++;
+	list_add(&req->compl.list, &cs->free_list);
 }
 
 static void io_submit_flush_completions(struct io_comp_state *cs,
@@ -8668,6 +8666,19 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
 	idr_destroy(&ctx->io_buffer_idr);
 }
 
+static void io_req_cache_free(struct io_ring_ctx *ctx)
+{
+	struct io_comp_state *cs = &ctx->submit_state.comp;
+
+	while (!list_empty(&cs->free_list)) {
+		struct io_kiocb *req;
+
+		req = list_first_entry(&cs->free_list, struct io_kiocb, compl.list);
+		list_del(&req->compl.list);
+		kmem_cache_free(req_cachep, req);
+	}
+}
+
 static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 {
 	io_finish_async(ctx);
@@ -8705,6 +8716,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	put_cred(ctx->creds);
 	kfree(ctx->cancel_hash);
 	kmem_cache_free(req_cachep, ctx->fallback_req);
+	io_req_cache_free(ctx);
 	kfree(ctx);
 }
 
-- 
2.30.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] io_uring: provide FIFO ordering for task_work
  2021-02-09 19:04 [PATCHSET 0/3] req alloc and task_work speedups Jens Axboe
  2021-02-09 19:04 ` [PATCH 1/3] io_ring: use persistent request cache Jens Axboe
@ 2021-02-09 19:04 ` Jens Axboe
  2021-02-09 19:04 ` [PATCH 3/3] io_uring: enable req cache for task_work items Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2021-02-09 19:04 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

task_work is a LIFO list, due to how it's implemented as a lockless
list. For long chains of task_work, this can be problematic as the
first entry added is the last one processed. Similarly, we'd waste
a lot of CPU cycles reversing this list.

Wrap the task_work so we have a single task_work entry per task per
ctx, and use that to run it in the right order.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.h               |   9 ----
 fs/io_uring.c            | 101 ++++++++++++++++++++++++++++++++++++---
 include/linux/io_uring.h |  14 ++++++
 3 files changed, 108 insertions(+), 16 deletions(-)

diff --git a/fs/io-wq.h b/fs/io-wq.h
index e37a0f217cc8..096f1021018e 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -27,15 +27,6 @@ enum io_wq_cancel {
 	IO_WQ_CANCEL_NOTFOUND,	/* work not found */
 };
 
-struct io_wq_work_node {
-	struct io_wq_work_node *next;
-};
-
-struct io_wq_work_list {
-	struct io_wq_work_node *first;
-	struct io_wq_work_node *last;
-};
-
 static inline void wq_list_add_after(struct io_wq_work_node *node,
 				     struct io_wq_work_node *pos,
 				     struct io_wq_work_list *list)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 502bdef41460..5700b2f75364 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -717,6 +717,11 @@ struct async_poll {
 	struct io_poll_iocb	*double_poll;
 };
 
+struct io_task_work {
+	struct io_wq_work_node	node;
+	task_work_func_t	func;
+};
+
 /*
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
@@ -775,7 +780,10 @@ struct io_kiocb {
 	 * 2. to track reqs with ->files (see io_op_def::file_table)
 	 */
 	struct list_head		inflight_entry;
-	struct callback_head		task_work;
+	union {
+		struct io_task_work	io_task_work;
+		struct callback_head	task_work;
+	};
 	/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
 	struct hlist_node		hash_node;
 	struct async_poll		*apoll;
@@ -2150,6 +2158,81 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
 	return __io_req_find_next(req);
 }
 
+static bool __tctx_task_work(struct io_uring_task *tctx)
+{
+	struct io_wq_work_list list;
+	struct io_wq_work_node *node;
+
+	if (wq_list_empty(&tctx->task_list))
+		return false;
+
+	spin_lock(&tctx->task_lock);
+	list = tctx->task_list;
+	INIT_WQ_LIST(&tctx->task_list);
+	spin_unlock(&tctx->task_lock);
+
+	node = list.first;
+	while (node) {
+		struct io_wq_work_node *next = node->next;
+		struct io_kiocb *req;
+
+		req = container_of(node, struct io_kiocb, io_task_work.node);
+		req->task_work.func(&req->task_work);
+		node = next;
+	}
+
+	return list.first != NULL;
+}
+
+static void tctx_task_work(struct callback_head *cb)
+{
+	struct io_uring_task *tctx = container_of(cb, struct io_uring_task, task_work);
+
+	while (__tctx_task_work(tctx))
+		cond_resched();
+
+	clear_bit(0, &tctx->task_state);
+}
+
+static int io_task_work_add(struct task_struct *tsk, struct io_kiocb *req,
+			    enum task_work_notify_mode notify)
+{
+	struct io_uring_task *tctx = tsk->io_uring;
+	struct io_wq_work_node *node, *prev;
+	int ret;
+
+	WARN_ON_ONCE(!tctx);
+
+	spin_lock(&tctx->task_lock);
+	wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
+	spin_unlock(&tctx->task_lock);
+
+	/* task_work already pending, we're done */
+	if (test_bit(0, &tctx->task_state) ||
+	    test_and_set_bit(0, &tctx->task_state))
+		return 0;
+
+	if (!task_work_add(tsk, &tctx->task_work, notify))
+		return 0;
+
+	/*
+	 * Slow path - we failed, find and delete work. if the work is not
+	 * in the list, it got run and we're fine.
+	 */
+	ret = 0;
+	spin_lock(&tctx->task_lock);
+	wq_list_for_each(node, prev, &tctx->task_list) {
+		if (&req->io_task_work.node == node) {
+			wq_list_del(&tctx->task_list, node, prev);
+			ret = 1;
+			break;
+		}
+	}
+	spin_unlock(&tctx->task_lock);
+	clear_bit(0, &tctx->task_state);
+	return ret;
+}
+
 static int io_req_task_work_add(struct io_kiocb *req)
 {
 	struct task_struct *tsk = req->task;
@@ -2170,7 +2253,7 @@ static int io_req_task_work_add(struct io_kiocb *req)
 	if (!(ctx->flags & IORING_SETUP_SQPOLL))
 		notify = TWA_SIGNAL;
 
-	ret = task_work_add(tsk, &req->task_work, notify);
+	ret = io_task_work_add(tsk, req, notify);
 	if (!ret)
 		wake_up_process(tsk);
 
@@ -2178,7 +2261,7 @@ static int io_req_task_work_add(struct io_kiocb *req)
 }
 
 static void io_req_task_work_add_fallback(struct io_kiocb *req,
-					  void (*cb)(struct callback_head *))
+					  task_work_func_t cb)
 {
 	struct task_struct *tsk = io_wq_get_task(req->ctx->io_wq);
 
@@ -2237,7 +2320,7 @@ static void io_req_task_queue(struct io_kiocb *req)
 {
 	int ret;
 
-	init_task_work(&req->task_work, io_req_task_submit);
+	req->task_work.func = io_req_task_submit;
 	percpu_ref_get(&req->ctx->refs);
 
 	ret = io_req_task_work_add(req);
@@ -2369,7 +2452,7 @@ static void io_free_req_deferred(struct io_kiocb *req)
 {
 	int ret;
 
-	init_task_work(&req->task_work, io_put_req_deferred_cb);
+	req->task_work.func = io_put_req_deferred_cb;
 	ret = io_req_task_work_add(req);
 	if (unlikely(ret))
 		io_req_task_work_add_fallback(req, io_put_req_deferred_cb);
@@ -3414,7 +3497,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
 	req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
 	list_del_init(&wait->entry);
 
-	init_task_work(&req->task_work, io_req_task_submit);
+	req->task_work.func = io_req_task_submit;
 	percpu_ref_get(&req->ctx->refs);
 
 	/* submit ref gets dropped, acquire a new one */
@@ -5113,7 +5196,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
 	list_del_init(&poll->wait.entry);
 
 	req->result = mask;
-	init_task_work(&req->task_work, func);
+	req->task_work.func = func;
 	percpu_ref_get(&req->ctx->refs);
 
 	/*
@@ -8120,6 +8203,10 @@ static int io_uring_alloc_task_context(struct task_struct *task)
 	io_init_identity(&tctx->__identity);
 	tctx->identity = &tctx->__identity;
 	task->io_uring = tctx;
+	spin_lock_init(&tctx->task_lock);
+	INIT_WQ_LIST(&tctx->task_list);
+	tctx->task_state = 0;
+	init_task_work(&tctx->task_work, tctx_task_work);
 	return 0;
 }
 
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 35b2d845704d..2eb6d19de336 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -22,6 +22,15 @@ struct io_identity {
 	refcount_t			count;
 };
 
+struct io_wq_work_node {
+	struct io_wq_work_node *next;
+};
+
+struct io_wq_work_list {
+	struct io_wq_work_node *first;
+	struct io_wq_work_node *last;
+};
+
 struct io_uring_task {
 	/* submission side */
 	struct xarray		xa;
@@ -32,6 +41,11 @@ struct io_uring_task {
 	struct io_identity	*identity;
 	atomic_t		in_idle;
 	bool			sqpoll;
+
+	spinlock_t		task_lock;
+	struct io_wq_work_list	task_list;
+	unsigned long		task_state;
+	struct callback_head	task_work;
 };
 
 #if defined(CONFIG_IO_URING)
-- 
2.30.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] io_uring: enable req cache for task_work items
  2021-02-09 19:04 [PATCHSET 0/3] req alloc and task_work speedups Jens Axboe
  2021-02-09 19:04 ` [PATCH 1/3] io_ring: use persistent request cache Jens Axboe
  2021-02-09 19:04 ` [PATCH 2/3] io_uring: provide FIFO ordering for task_work Jens Axboe
@ 2021-02-09 19:04 ` Jens Axboe
  2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2021-02-09 19:04 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

task_work is run without utilizing the req alloc cache, so any deferred
items don't get to take advantage of either the alloc or free side of it.
With task_work now being wrapped by io_uring, we can use the ctx
completion state to both use the req cache and the completion flush
batching.

With this, the only request type that cannot take advantage of the req
cache is IRQ driven IO for regular files / block devices. Anything else,
including IOPOLL polled IO to those same tyes, will take advantage of it.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5700b2f75364..8e9a492f548f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1047,6 +1047,8 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
 			     const struct iovec *fast_iov,
 			     struct iov_iter *iter, bool force);
 static void io_req_task_queue(struct io_kiocb *req);
+static void io_submit_flush_completions(struct io_comp_state *cs,
+					struct io_ring_ctx *ctx);
 
 static struct kmem_cache *req_cachep;
 
@@ -2160,6 +2162,7 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
 
 static bool __tctx_task_work(struct io_uring_task *tctx)
 {
+	struct io_ring_ctx *ctx = NULL;
 	struct io_wq_work_list list;
 	struct io_wq_work_node *node;
 
@@ -2174,11 +2177,28 @@ static bool __tctx_task_work(struct io_uring_task *tctx)
 	node = list.first;
 	while (node) {
 		struct io_wq_work_node *next = node->next;
+		struct io_ring_ctx *this_ctx;
 		struct io_kiocb *req;
 
 		req = container_of(node, struct io_kiocb, io_task_work.node);
+		this_ctx = req->ctx;
 		req->task_work.func(&req->task_work);
 		node = next;
+
+		if (!ctx) {
+			ctx = this_ctx;
+		} else if (ctx != this_ctx) {
+			mutex_lock(&ctx->uring_lock);
+			io_submit_flush_completions(&ctx->submit_state.comp, ctx);
+			mutex_unlock(&ctx->uring_lock);
+			ctx = this_ctx;
+		}
+	}
+
+	if (ctx && ctx->submit_state.comp.nr) {
+		mutex_lock(&ctx->uring_lock);
+		io_submit_flush_completions(&ctx->submit_state.comp, ctx);
+		mutex_unlock(&ctx->uring_lock);
 	}
 
 	return list.first != NULL;
@@ -2301,7 +2321,7 @@ static void __io_req_task_submit(struct io_kiocb *req)
 	if (!ctx->sqo_dead &&
 	    !__io_sq_thread_acquire_mm(ctx) &&
 	    !__io_sq_thread_acquire_files(ctx))
-		__io_queue_sqe(req, NULL);
+		__io_queue_sqe(req, &ctx->submit_state.comp);
 	else
 		__io_req_task_cancel(req, -EFAULT);
 	mutex_unlock(&ctx->uring_lock);
-- 
2.30.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-02-09 19:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-09 19:04 [PATCHSET 0/3] req alloc and task_work speedups Jens Axboe
2021-02-09 19:04 ` [PATCH 1/3] io_ring: use persistent request cache Jens Axboe
2021-02-09 19:04 ` [PATCH 2/3] io_uring: provide FIFO ordering for task_work Jens Axboe
2021-02-09 19:04 ` [PATCH 3/3] io_uring: enable req cache for task_work items Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).