IO-Uring Archive on lore.kernel.org
 help / color / Atom feed
* [PATCHSET 0/5] Allow batching of inline completions
@ 2020-06-23 15:16 Jens Axboe
  2020-06-23 15:16 ` [PATCH 1/5] io_uring: provide generic io_req_complete() helper Jens Axboe
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-23 15:16 UTC (permalink / raw)
  To: io-uring; +Cc: xuanzhuo, Dust.li

As Xuan Zhuo <xuanzhuo@linux.alibaba.com> reported here:

https://lore.kernel.org/io-uring/34ecb5c9-5822-827f-6e7b-973bea543569@kernel.dk/T/#me32d6897f976e8268284ff5cbdb3696010c2b7ba

we can do a bit better when dealing with inline completions from the
submission path. This patchset cleans up the standard completion
logic, then builds on top of that to allow collecting completions done
at submission time. This allows io_uring to amortize the cost of needing
to grab the completion lock, and updating the CQ ring as well.

On a silly t/io_uring NOP test on my laptop, this brings about a 20%
increase in performance. Xuan Zhuo reports that it changes his SQPOLL
based UDP processing (running at 800K PPS) profile from:

17.97% [kernel] [k] copy_user_generic_unrolled
13.92% [kernel] [k] io_commit_cqring
11.04% [kernel] [k] __io_cqring_fill_event
10.33% [kernel] [k] udp_recvmsg
 5.94% [kernel] [k] skb_release_data
 4.31% [kernel] [k] udp_rmem_release
 2.68% [kernel] [k] __check_object_size
 2.24% [kernel] [k] __slab_free
 2.22% [kernel] [k] _raw_spin_lock_bh
 2.21% [kernel] [k] kmem_cache_free
 2.13% [kernel] [k] free_pcppages_bulk
 1.83% [kernel] [k] io_submit_sqes
 1.38% [kernel] [k] page_frag_free
 1.31% [kernel] [k] inet_recvmsg

to

19.99% [kernel] [k] copy_user_generic_unrolled
11.63% [kernel] [k] skb_release_data
 9.36% [kernel] [k] udp_rmem_release
 8.64% [kernel] [k] udp_recvmsg
 6.21% [kernel] [k] __slab_free
 4.39% [kernel] [k] __check_object_size
 3.64% [kernel] [k] free_pcppages_bulk
 2.41% [kernel] [k] kmem_cache_free
 2.00% [kernel] [k] io_submit_sqes
 1.95% [kernel] [k] page_frag_free
 1.54% [kernel] [k] io_put_req
[...]
 0.07% [kernel] [k] io_commit_cqring
 0.44% [kernel] [k] __io_cqring_fill_event

which looks much nicer.

Patches are against my for-5.9/io_uring branch.

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/5] io_uring: provide generic io_req_complete() helper
  2020-06-23 15:16 [PATCHSET 0/5] Allow batching of inline completions Jens Axboe
@ 2020-06-23 15:16 ` Jens Axboe
  2020-06-23 15:16 ` [PATCH 2/5] io_uring: add 'io_comp_state' to struct io_submit_state Jens Axboe
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-23 15:16 UTC (permalink / raw)
  To: io-uring; +Cc: xuanzhuo, Dust.li, Jens Axboe

We have lots of callers of:

io_cqring_add_event(req, result);
io_put_req(req);

Provide a helper that does this for us. It helps clean up the code, and
also provides a more convenient location for us to change the completion
handling.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 106 ++++++++++++++++++++------------------------------
 1 file changed, 43 insertions(+), 63 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c686061c3762..0cdf088c56cd 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1335,7 +1335,7 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
 	__io_cqring_fill_event(req, res, 0);
 }
 
-static void __io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
+static void io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	unsigned long flags;
@@ -1348,9 +1348,15 @@ static void __io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
 	io_cqring_ev_posted(ctx);
 }
 
-static void io_cqring_add_event(struct io_kiocb *req, long res)
+static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags)
 {
-	__io_cqring_add_event(req, res, 0);
+	io_cqring_add_event(req, res, cflags);
+	io_put_req(req);
+}
+
+static void io_req_complete(struct io_kiocb *req, long res)
+{
+	__io_req_complete(req, res, 0);
 }
 
 static inline bool io_is_fallback_req(struct io_kiocb *req)
@@ -1978,7 +1984,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
 		req_set_fail_links(req);
 	if (req->flags & REQ_F_BUFFER_SELECTED)
 		cflags = io_put_kbuf(req);
-	__io_cqring_add_event(req, res, cflags);
+	io_cqring_add_event(req, res, cflags);
 }
 
 static void io_sq_thread_drop_mm(struct io_ring_ctx *ctx)
@@ -2041,9 +2047,8 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
 		return true;
 	kfree(iovec);
 end_req:
-	io_cqring_add_event(req, ret);
 	req_set_fail_links(req);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return false;
 }
 
@@ -3110,10 +3115,9 @@ static int io_tee(struct io_kiocb *req, bool force_nonblock)
 	io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 
-	io_cqring_add_event(req, ret);
 	if (ret != sp->len)
 		req_set_fail_links(req);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3147,10 +3151,9 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock)
 	io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 
-	io_cqring_add_event(req, ret);
 	if (ret != sp->len)
 		req_set_fail_links(req);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3164,8 +3167,7 @@ static int io_nop(struct io_kiocb *req)
 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 
-	io_cqring_add_event(req, 0);
-	io_put_req(req);
+	io_req_complete(req, 0);
 	return 0;
 }
 
@@ -3204,8 +3206,7 @@ static int io_fsync(struct io_kiocb *req, bool force_nonblock)
 				req->sync.flags & IORING_FSYNC_DATASYNC);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3238,8 +3239,7 @@ static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
 	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3335,8 +3335,7 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3409,8 +3408,7 @@ static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock)
 	io_ring_submit_lock(ctx, !force_nonblock);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3497,8 +3495,7 @@ static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock)
 	io_ring_submit_unlock(ctx, !force_nonblock);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3541,8 +3538,7 @@ static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock)
 
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 #else
 	return -EOPNOTSUPP;
@@ -3578,8 +3574,7 @@ static int io_madvise(struct io_kiocb *req, bool force_nonblock)
 	ret = do_madvise(ma->addr, ma->len, ma->advice);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 #else
 	return -EOPNOTSUPP;
@@ -3618,8 +3613,7 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
 	ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3658,8 +3652,7 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock)
 
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3715,10 +3708,9 @@ static int io_close(struct io_kiocb *req, bool force_nonblock)
 	ret = filp_close(close->put_file, req->work.files);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
 	fput(close->put_file);
 	close->put_file = NULL;
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3752,8 +3744,7 @@ static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
 				req->sync.flags);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3852,10 +3843,9 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
 	if (kmsg && kmsg->iov != kmsg->fast_iov)
 		kfree(kmsg->iov);
 	req->flags &= ~REQ_F_NEED_CLEANUP;
-	io_cqring_add_event(req, ret);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -3895,10 +3885,9 @@ static int io_send(struct io_kiocb *req, bool force_nonblock)
 			ret = -EINTR;
 	}
 
-	io_cqring_add_event(req, ret);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -4095,10 +4084,9 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
 	if (kmsg && kmsg->iov != kmsg->fast_iov)
 		kfree(kmsg->iov);
 	req->flags &= ~REQ_F_NEED_CLEANUP;
-	__io_cqring_add_event(req, ret, cflags);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_put_req(req);
+	__io_req_complete(req, ret, cflags);
 	return 0;
 }
 
@@ -4152,10 +4140,9 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock)
 
 	kfree(kbuf);
 	req->flags &= ~REQ_F_NEED_CLEANUP;
-	__io_cqring_add_event(req, ret, cflags);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_put_req(req);
+	__io_req_complete(req, ret, cflags);
 	return 0;
 }
 
@@ -4194,8 +4181,7 @@ static int io_accept(struct io_kiocb *req, bool force_nonblock)
 			ret = -EINTR;
 		req_set_fail_links(req);
 	}
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -4255,8 +4241,7 @@ static int io_connect(struct io_kiocb *req, bool force_nonblock)
 out:
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 #else /* !CONFIG_NET */
@@ -4548,7 +4533,7 @@ static void io_async_task_func(struct callback_head *cb)
 	if (!canceled) {
 		__set_current_state(TASK_RUNNING);
 		if (io_sq_thread_acquire_mm(ctx, req)) {
-			io_cqring_add_event(req, -EFAULT);
+			io_cqring_add_event(req, -EFAULT, 0);
 			goto end_req;
 		}
 		mutex_lock(&ctx->uring_lock);
@@ -4797,10 +4782,9 @@ static int io_poll_remove(struct io_kiocb *req)
 	ret = io_poll_cancel(ctx, addr);
 	spin_unlock_irq(&ctx->completion_lock);
 
-	io_cqring_add_event(req, ret);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -5156,8 +5140,7 @@ static int io_files_update(struct io_kiocb *req, bool force_nonblock)
 
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_cqring_add_event(req, ret);
-	io_put_req(req);
+	io_req_complete(req, ret);
 	return 0;
 }
 
@@ -5650,8 +5633,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
 
 	if (ret) {
 		req_set_fail_links(req);
-		io_cqring_add_event(req, ret);
-		io_put_req(req);
+		io_req_complete(req, ret);
 	}
 
 	io_steal_work(req, workptr);
@@ -5768,8 +5750,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
 		io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
 		io_put_req(prev);
 	} else {
-		io_cqring_add_event(req, -ETIME);
-		io_put_req(req);
+		io_req_complete(req, -ETIME);
 	}
 	return HRTIMER_NORESTART;
 }
@@ -5878,9 +5859,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	/* and drop final reference, if we failed */
 	if (ret) {
-		io_cqring_add_event(req, ret);
 		req_set_fail_links(req);
-		io_put_req(req);
+		io_req_complete(req, ret);
 	}
 	if (nxt) {
 		req = nxt;
@@ -5902,9 +5882,9 @@ static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (ret) {
 		if (ret != -EIOCBQUEUED) {
 fail_req:
-			io_cqring_add_event(req, ret);
 			req_set_fail_links(req);
-			io_double_put_req(req);
+			io_put_req(req);
+			io_req_complete(req, ret);
 		}
 	} else if (req->flags & REQ_F_FORCE_ASYNC) {
 		if (!req->io) {
@@ -5930,8 +5910,8 @@ static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 static inline void io_queue_link_head(struct io_kiocb *req)
 {
 	if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
-		io_cqring_add_event(req, -ECANCELED);
-		io_double_put_req(req);
+		io_put_req(req);
+		io_req_complete(req, -ECANCELED);
 	} else
 		io_queue_sqe(req, NULL);
 }
@@ -6188,8 +6168,8 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 
 		if (unlikely(err)) {
 fail_req:
-			io_cqring_add_event(req, err);
-			io_double_put_req(req);
+			io_put_req(req);
+			io_req_complete(req, err);
 			break;
 		}
 
-- 
2.27.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/5] io_uring: add 'io_comp_state' to struct io_submit_state
  2020-06-23 15:16 [PATCHSET 0/5] Allow batching of inline completions Jens Axboe
  2020-06-23 15:16 ` [PATCH 1/5] io_uring: provide generic io_req_complete() helper Jens Axboe
@ 2020-06-23 15:16 ` Jens Axboe
  2020-06-23 15:16 ` [PATCH 3/5] io_uring: pass down completion state on the issue side Jens Axboe
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-23 15:16 UTC (permalink / raw)
  To: io-uring; +Cc: xuanzhuo, Dust.li, Jens Axboe

No functional changes in this patch, just in preparation for passing back
pending completions to the caller and completing them in a batched
fashion.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0cdf088c56cd..1da5269c9ef7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -678,6 +678,12 @@ struct io_kiocb {
 
 #define IO_IOPOLL_BATCH			8
 
+struct io_comp_state {
+	unsigned int		nr;
+	struct list_head	list;
+	struct io_ring_ctx	*ctx;
+};
+
 struct io_submit_state {
 	struct blk_plug		plug;
 
@@ -687,6 +693,11 @@ struct io_submit_state {
 	void			*reqs[IO_IOPOLL_BATCH];
 	unsigned int		free_reqs;
 
+	/*
+	 * Batch completion logic
+	 */
+	struct io_comp_state	comp;
+
 	/*
 	 * File reference cache
 	 */
@@ -5999,12 +6010,15 @@ static void io_submit_state_end(struct io_submit_state *state)
  * Start submission side cache.
  */
 static void io_submit_state_start(struct io_submit_state *state,
-				  unsigned int max_ios)
+				  struct io_ring_ctx *ctx, unsigned int max_ios)
 {
 	blk_start_plug(&state->plug);
 #ifdef CONFIG_BLOCK
 	state->plug.nowait = true;
 #endif
+	state->comp.nr = 0;
+	INIT_LIST_HEAD(&state->comp.list);
+	state->comp.ctx = ctx;
 	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
@@ -6139,7 +6153,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
 		return -EAGAIN;
 
-	io_submit_state_start(&state, nr);
+	io_submit_state_start(&state, ctx, nr);
 
 	ctx->ring_fd = ring_fd;
 	ctx->ring_file = ring_file;
-- 
2.27.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 3/5] io_uring: pass down completion state on the issue side
  2020-06-23 15:16 [PATCHSET 0/5] Allow batching of inline completions Jens Axboe
  2020-06-23 15:16 ` [PATCH 1/5] io_uring: provide generic io_req_complete() helper Jens Axboe
  2020-06-23 15:16 ` [PATCH 2/5] io_uring: add 'io_comp_state' to struct io_submit_state Jens Axboe
@ 2020-06-23 15:16 ` Jens Axboe
  2020-06-23 15:16 ` [PATCH 4/5] io_uring: pass in completion state to appropriate issue side handlers Jens Axboe
  2020-06-23 15:16 ` [PATCH 5/5] io_uring: enable READ/WRITE to use deferred completions Jens Axboe
  4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-23 15:16 UTC (permalink / raw)
  To: io-uring; +Cc: xuanzhuo, Dust.li, Jens Axboe

No functional changes in this patch, just in preparation for having the
completion state be available on the issue side. Later on, this will
allow requests that complete inline to be completed in batches.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 67 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 17 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1da5269c9ef7..d6eb608c105a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -909,7 +909,8 @@ static void io_cleanup_req(struct io_kiocb *req);
 static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
 		       int fd, struct file **out_file, bool fixed);
 static void __io_queue_sqe(struct io_kiocb *req,
-			   const struct io_uring_sqe *sqe);
+			   const struct io_uring_sqe *sqe,
+			   struct io_comp_state *cs);
 
 static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
 			       struct iovec **iovec, struct iov_iter *iter,
@@ -2799,7 +2800,7 @@ static void io_async_buf_retry(struct callback_head *cb)
 	__set_current_state(TASK_RUNNING);
 	if (!io_sq_thread_acquire_mm(ctx, req)) {
 		mutex_lock(&ctx->uring_lock);
-		__io_queue_sqe(req, NULL);
+		__io_queue_sqe(req, NULL, NULL);
 		mutex_unlock(&ctx->uring_lock);
 	} else {
 		__io_async_buf_error(req, -EFAULT);
@@ -4423,7 +4424,7 @@ static void io_poll_task_func(struct callback_head *cb)
 		struct io_ring_ctx *ctx = nxt->ctx;
 
 		mutex_lock(&ctx->uring_lock);
-		__io_queue_sqe(nxt, NULL);
+		__io_queue_sqe(nxt, NULL, NULL);
 		mutex_unlock(&ctx->uring_lock);
 	}
 }
@@ -4548,7 +4549,7 @@ static void io_async_task_func(struct callback_head *cb)
 			goto end_req;
 		}
 		mutex_lock(&ctx->uring_lock);
-		__io_queue_sqe(req, NULL);
+		__io_queue_sqe(req, NULL, NULL);
 		mutex_unlock(&ctx->uring_lock);
 	} else {
 		io_cqring_ev_posted(ctx);
@@ -5345,7 +5346,7 @@ static void io_cleanup_req(struct io_kiocb *req)
 }
 
 static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-			bool force_nonblock)
+			bool force_nonblock, struct io_comp_state *cs)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
@@ -5630,7 +5631,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
 
 	if (!ret) {
 		do {
-			ret = io_issue_sqe(req, NULL, false);
+			ret = io_issue_sqe(req, NULL, false, NULL);
 			/*
 			 * We can get EAGAIN for polled IO even though we're
 			 * forcing a sync submission from here, since we can't
@@ -5807,7 +5808,8 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
 	return nxt;
 }
 
-static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+			   struct io_comp_state *cs)
 {
 	struct io_kiocb *linked_timeout;
 	struct io_kiocb *nxt;
@@ -5827,7 +5829,7 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 			old_creds = override_creds(req->work.creds);
 	}
 
-	ret = io_issue_sqe(req, sqe, true);
+	ret = io_issue_sqe(req, sqe, true, cs);
 
 	/*
 	 * We async punt it if the file wasn't marked NOWAIT, or if the file
@@ -5885,7 +5887,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		revert_creds(old_creds);
 }
 
-static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+			 struct io_comp_state *cs)
 {
 	int ret;
 
@@ -5914,21 +5917,22 @@ static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		req->work.flags |= IO_WQ_WORK_CONCURRENT;
 		io_queue_async_work(req);
 	} else {
-		__io_queue_sqe(req, sqe);
+		__io_queue_sqe(req, sqe, cs);
 	}
 }
 
-static inline void io_queue_link_head(struct io_kiocb *req)
+static inline void io_queue_link_head(struct io_kiocb *req,
+				      struct io_comp_state *cs)
 {
 	if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
 		io_put_req(req);
 		io_req_complete(req, -ECANCELED);
 	} else
-		io_queue_sqe(req, NULL);
+		io_queue_sqe(req, NULL, cs);
 }
 
 static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-			 struct io_kiocb **link)
+			 struct io_kiocb **link, struct io_comp_state *cs)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
@@ -5968,7 +5972,7 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 
 		/* last request of a link, enqueue the link */
 		if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
-			io_queue_link_head(head);
+			io_queue_link_head(head, cs);
 			*link = NULL;
 		}
 	} else {
@@ -5988,18 +5992,47 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 				req->flags |= REQ_F_FAIL_LINK;
 			*link = req;
 		} else {
-			io_queue_sqe(req, sqe);
+			io_queue_sqe(req, sqe, cs);
 		}
 	}
 
 	return 0;
 }
 
+static void io_submit_flush_completions(struct io_comp_state *cs)
+{
+	struct io_ring_ctx *ctx = cs->ctx;
+
+	spin_lock_irq(&ctx->completion_lock);
+	while (!list_empty(&cs->list)) {
+		struct io_kiocb *req;
+
+		req = list_first_entry(&cs->list, struct io_kiocb, list);
+		list_del(&req->list);
+		io_cqring_fill_event(req, req->result);
+		if (!(req->flags & REQ_F_LINK_HEAD)) {
+			req->flags |= REQ_F_COMP_LOCKED;
+			io_put_req(req);
+		} else {
+			spin_unlock_irq(&ctx->completion_lock);
+			io_put_req(req);
+			spin_lock_irq(&ctx->completion_lock);
+		}
+	}
+	io_commit_cqring(ctx);
+	spin_unlock_irq(&ctx->completion_lock);
+
+	io_cqring_ev_posted(ctx);
+	cs->nr = 0;
+}
+
 /*
  * Batched submission is done, ensure local IO is flushed out.
  */
 static void io_submit_state_end(struct io_submit_state *state)
 {
+	if (!list_empty(&state->comp.list))
+		io_submit_flush_completions(&state->comp);
 	blk_finish_plug(&state->plug);
 	io_state_file_put(state);
 	if (state->free_reqs)
@@ -6189,7 +6222,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 
 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
 						true, io_async_submit(ctx));
-		err = io_submit_sqe(req, sqe, &link);
+		err = io_submit_sqe(req, sqe, &link, &state.comp);
 		if (err)
 			goto fail_req;
 	}
@@ -6200,7 +6233,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		percpu_ref_put_many(&ctx->refs, nr - ref_used);
 	}
 	if (link)
-		io_queue_link_head(link);
+		io_queue_link_head(link, &state.comp);
 	io_submit_state_end(&state);
 
 	 /* Commit SQ ring head once we've consumed and submitted all SQEs */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 4/5] io_uring: pass in completion state to appropriate issue side handlers
  2020-06-23 15:16 [PATCHSET 0/5] Allow batching of inline completions Jens Axboe
                   ` (2 preceding siblings ...)
  2020-06-23 15:16 ` [PATCH 3/5] io_uring: pass down completion state on the issue side Jens Axboe
@ 2020-06-23 15:16 ` Jens Axboe
  2020-06-23 15:16 ` [PATCH 5/5] io_uring: enable READ/WRITE to use deferred completions Jens Axboe
  4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-23 15:16 UTC (permalink / raw)
  To: io-uring; +Cc: xuanzhuo, Dust.li, Jens Axboe

Provide the completion state to the handlers that we know can complete
inline, so they can utilize this for batching completions.

Cap the max batch count at 32. This should be enough to provide a good
amortization of the cost of the lock+commit dance for completions, while
still being low enough not to cause any real latency issues for SQPOLL
applications.

Xuan Zhuo <xuanzhuo@linux.alibaba.com> reports that this changes his
profile from:

17.97% [kernel] [k] copy_user_generic_unrolled
13.92% [kernel] [k] io_commit_cqring
11.04% [kernel] [k] __io_cqring_fill_event
10.33% [kernel] [k] udp_recvmsg
 5.94% [kernel] [k] skb_release_data
 4.31% [kernel] [k] udp_rmem_release
 2.68% [kernel] [k] __check_object_size
 2.24% [kernel] [k] __slab_free
 2.22% [kernel] [k] _raw_spin_lock_bh
 2.21% [kernel] [k] kmem_cache_free
 2.13% [kernel] [k] free_pcppages_bulk
 1.83% [kernel] [k] io_submit_sqes
 1.38% [kernel] [k] page_frag_free
 1.31% [kernel] [k] inet_recvmsg

to

19.99% [kernel] [k] copy_user_generic_unrolled
11.63% [kernel] [k] skb_release_data
 9.36% [kernel] [k] udp_rmem_release
 8.64% [kernel] [k] udp_recvmsg
 6.21% [kernel] [k] __slab_free
 4.39% [kernel] [k] __check_object_size
 3.64% [kernel] [k] free_pcppages_bulk
 2.41% [kernel] [k] kmem_cache_free
 2.00% [kernel] [k] io_submit_sqes
 1.95% [kernel] [k] page_frag_free
 1.54% [kernel] [k] io_put_req
[...]
 0.07% [kernel] [k] io_commit_cqring
 0.44% [kernel] [k] __io_cqring_fill_event

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 153 ++++++++++++++++++++++++++++----------------------
 1 file changed, 86 insertions(+), 67 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d6eb608c105a..0939f5c1a681 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1360,15 +1360,50 @@ static void io_cqring_add_event(struct io_kiocb *req, long res, long cflags)
 	io_cqring_ev_posted(ctx);
 }
 
-static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags)
+static void io_submit_flush_completions(struct io_comp_state *cs)
 {
-	io_cqring_add_event(req, res, cflags);
-	io_put_req(req);
+	struct io_ring_ctx *ctx = cs->ctx;
+
+	spin_lock_irq(&ctx->completion_lock);
+	while (!list_empty(&cs->list)) {
+		struct io_kiocb *req;
+
+		req = list_first_entry(&cs->list, struct io_kiocb, list);
+		list_del(&req->list);
+		io_cqring_fill_event(req, req->result);
+		if (!(req->flags & REQ_F_LINK_HEAD)) {
+			req->flags |= REQ_F_COMP_LOCKED;
+			io_put_req(req);
+		} else {
+			spin_unlock_irq(&ctx->completion_lock);
+			io_put_req(req);
+			spin_lock_irq(&ctx->completion_lock);
+		}
+	}
+	io_commit_cqring(ctx);
+	spin_unlock_irq(&ctx->completion_lock);
+
+	io_cqring_ev_posted(ctx);
+	cs->nr = 0;
+}
+
+static void __io_req_complete(struct io_kiocb *req, long res, unsigned cflags,
+			      struct io_comp_state *cs)
+{
+	if (!cs) {
+		io_cqring_add_event(req, res, cflags);
+		io_put_req(req);
+	} else {
+		req->result = res;
+		list_add_tail(&req->list, &cs->list);
+		if (++cs->nr >= 32)
+			io_submit_flush_completions(cs);
+	}
 }
 
 static void io_req_complete(struct io_kiocb *req, long res)
 {
-	__io_req_complete(req, res, 0);
+	__io_req_complete(req, res, 0, NULL);
 }
 
 static inline bool io_is_fallback_req(struct io_kiocb *req)
@@ -3172,14 +3207,14 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock)
 /*
  * IORING_OP_NOP just posts a completion event, nothing else.
  */
-static int io_nop(struct io_kiocb *req)
+static int io_nop(struct io_kiocb *req, struct io_comp_state *cs)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 
 	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 
-	io_req_complete(req, 0);
+	__io_req_complete(req, 0, 0, cs);
 	return 0;
 }
 
@@ -3401,7 +3436,8 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
 	return i;
 }
 
-static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock)
+static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock,
+			     struct io_comp_state *cs)
 {
 	struct io_provide_buf *p = &req->pbuf;
 	struct io_ring_ctx *ctx = req->ctx;
@@ -3420,7 +3456,7 @@ static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock)
 	io_ring_submit_lock(ctx, !force_nonblock);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_req_complete(req, ret);
+	__io_req_complete(req, ret, 0, cs);
 	return 0;
 }
 
@@ -3478,7 +3514,8 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head)
 	return i ? i : -ENOMEM;
 }
 
-static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock)
+static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock,
+			      struct io_comp_state *cs)
 {
 	struct io_provide_buf *p = &req->pbuf;
 	struct io_ring_ctx *ctx = req->ctx;
@@ -3507,7 +3544,7 @@ static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock)
 	io_ring_submit_unlock(ctx, !force_nonblock);
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_req_complete(req, ret);
+	__io_req_complete(req, ret, 0, cs);
 	return 0;
 }
 
@@ -3538,7 +3575,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
 #endif
 }
 
-static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock)
+static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock,
+			struct io_comp_state *cs)
 {
 #if defined(CONFIG_EPOLL)
 	struct io_epoll *ie = &req->epoll;
@@ -3550,7 +3588,7 @@ static int io_epoll_ctl(struct io_kiocb *req, bool force_nonblock)
 
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_req_complete(req, ret);
+	__io_req_complete(req, ret, 0, cs);
 	return 0;
 #else
 	return -EOPNOTSUPP;
@@ -3695,7 +3733,8 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return 0;
 }
 
-static int io_close(struct io_kiocb *req, bool force_nonblock)
+static int io_close(struct io_kiocb *req, bool force_nonblock,
+		    struct io_comp_state *cs)
 {
 	struct io_close *close = &req->close;
 	int ret;
@@ -3722,7 +3761,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock)
 		req_set_fail_links(req);
 	fput(close->put_file);
 	close->put_file = NULL;
-	io_req_complete(req, ret);
+	__io_req_complete(req, ret, 0, cs);
 	return 0;
 }
 
@@ -3808,7 +3847,8 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return ret;
 }
 
-static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
+static int io_sendmsg(struct io_kiocb *req, bool force_nonblock,
+		      struct io_comp_state *cs)
 {
 	struct io_async_msghdr *kmsg = NULL;
 	struct socket *sock;
@@ -3857,11 +3897,12 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_req_complete(req, ret);
+	__io_req_complete(req, ret, 0, cs);
 	return 0;
 }
 
-static int io_send(struct io_kiocb *req, bool force_nonblock)
+static int io_send(struct io_kiocb *req, bool force_nonblock,
+		   struct io_comp_state *cs)
 {
 	struct socket *sock;
 	int ret;
@@ -3899,7 +3940,7 @@ static int io_send(struct io_kiocb *req, bool force_nonblock)
 
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_req_complete(req, ret);
+	__io_req_complete(req, ret, 0, cs);
 	return 0;
 }
 
@@ -4042,7 +4083,8 @@ static int io_recvmsg_prep(struct io_kiocb *req,
 	return ret;
 }
 
-static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
+static int io_recvmsg(struct io_kiocb *req, bool force_nonblock,
+		      struct io_comp_state *cs)
 {
 	struct io_async_msghdr *kmsg = NULL;
 	struct socket *sock;
@@ -4098,11 +4140,12 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < 0)
 		req_set_fail_links(req);
-	__io_req_complete(req, ret, cflags);
+	__io_req_complete(req, ret, cflags, cs);
 	return 0;
 }
 
-static int io_recv(struct io_kiocb *req, bool force_nonblock)
+static int io_recv(struct io_kiocb *req, bool force_nonblock,
+		   struct io_comp_state *cs)
 {
 	struct io_buffer *kbuf = NULL;
 	struct socket *sock;
@@ -4154,7 +4197,7 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock)
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 	if (ret < 0)
 		req_set_fail_links(req);
-	__io_req_complete(req, ret, cflags);
+	__io_req_complete(req, ret, cflags, cs);
 	return 0;
 }
 
@@ -4174,7 +4217,8 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return 0;
 }
 
-static int io_accept(struct io_kiocb *req, bool force_nonblock)
+static int io_accept(struct io_kiocb *req, bool force_nonblock,
+		     struct io_comp_state *cs)
 {
 	struct io_accept *accept = &req->accept;
 	unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
@@ -4193,7 +4237,7 @@ static int io_accept(struct io_kiocb *req, bool force_nonblock)
 			ret = -EINTR;
 		req_set_fail_links(req);
 	}
-	io_req_complete(req, ret);
+	__io_req_complete(req, ret, 0, cs);
 	return 0;
 }
 
@@ -4217,7 +4261,8 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 					&io->connect.address);
 }
 
-static int io_connect(struct io_kiocb *req, bool force_nonblock)
+static int io_connect(struct io_kiocb *req, bool force_nonblock,
+		      struct io_comp_state *cs)
 {
 	struct io_async_ctx __io, *io;
 	unsigned file_flags;
@@ -4253,7 +4298,7 @@ static int io_connect(struct io_kiocb *req, bool force_nonblock)
 out:
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_req_complete(req, ret);
+	__io_req_complete(req, ret, 0, cs);
 	return 0;
 }
 #else /* !CONFIG_NET */
@@ -5134,7 +5179,8 @@ static int io_files_update_prep(struct io_kiocb *req,
 	return 0;
 }
 
-static int io_files_update(struct io_kiocb *req, bool force_nonblock)
+static int io_files_update(struct io_kiocb *req, bool force_nonblock,
+			   struct io_comp_state *cs)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_uring_files_update up;
@@ -5152,7 +5198,7 @@ static int io_files_update(struct io_kiocb *req, bool force_nonblock)
 
 	if (ret < 0)
 		req_set_fail_links(req);
-	io_req_complete(req, ret);
+	__io_req_complete(req, ret, 0, cs);
 	return 0;
 }
 
@@ -5353,7 +5399,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 
 	switch (req->opcode) {
 	case IORING_OP_NOP:
-		ret = io_nop(req);
+		ret = io_nop(req, cs);
 		break;
 	case IORING_OP_READV:
 	case IORING_OP_READ_FIXED:
@@ -5415,9 +5461,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 				break;
 		}
 		if (req->opcode == IORING_OP_SENDMSG)
-			ret = io_sendmsg(req, force_nonblock);
+			ret = io_sendmsg(req, force_nonblock, cs);
 		else
-			ret = io_send(req, force_nonblock);
+			ret = io_send(req, force_nonblock, cs);
 		break;
 	case IORING_OP_RECVMSG:
 	case IORING_OP_RECV:
@@ -5427,9 +5473,9 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 				break;
 		}
 		if (req->opcode == IORING_OP_RECVMSG)
-			ret = io_recvmsg(req, force_nonblock);
+			ret = io_recvmsg(req, force_nonblock, cs);
 		else
-			ret = io_recv(req, force_nonblock);
+			ret = io_recv(req, force_nonblock, cs);
 		break;
 	case IORING_OP_TIMEOUT:
 		if (sqe) {
@@ -5453,7 +5499,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			if (ret)
 				break;
 		}
-		ret = io_accept(req, force_nonblock);
+		ret = io_accept(req, force_nonblock, cs);
 		break;
 	case IORING_OP_CONNECT:
 		if (sqe) {
@@ -5461,7 +5507,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			if (ret)
 				break;
 		}
-		ret = io_connect(req, force_nonblock);
+		ret = io_connect(req, force_nonblock, cs);
 		break;
 	case IORING_OP_ASYNC_CANCEL:
 		if (sqe) {
@@ -5493,7 +5539,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			if (ret)
 				break;
 		}
-		ret = io_close(req, force_nonblock);
+		ret = io_close(req, force_nonblock, cs);
 		break;
 	case IORING_OP_FILES_UPDATE:
 		if (sqe) {
@@ -5501,7 +5547,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			if (ret)
 				break;
 		}
-		ret = io_files_update(req, force_nonblock);
+		ret = io_files_update(req, force_nonblock, cs);
 		break;
 	case IORING_OP_STATX:
 		if (sqe) {
@@ -5541,7 +5587,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			if (ret)
 				break;
 		}
-		ret = io_epoll_ctl(req, force_nonblock);
+		ret = io_epoll_ctl(req, force_nonblock, cs);
 		break;
 	case IORING_OP_SPLICE:
 		if (sqe) {
@@ -5557,7 +5603,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			if (ret)
 				break;
 		}
-		ret = io_provide_buffers(req, force_nonblock);
+		ret = io_provide_buffers(req, force_nonblock, cs);
 		break;
 	case IORING_OP_REMOVE_BUFFERS:
 		if (sqe) {
@@ -5565,7 +5611,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			if (ret)
 				break;
 		}
-		ret = io_remove_buffers(req, force_nonblock);
+		ret = io_remove_buffers(req, force_nonblock, cs);
 		break;
 	case IORING_OP_TEE:
 		if (sqe) {
@@ -5999,33 +6045,6 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	return 0;
 }
 
-static void io_submit_flush_completions(struct io_comp_state *cs)
-{
-	struct io_ring_ctx *ctx = cs->ctx;
-
-	spin_lock_irq(&ctx->completion_lock);
-	while (!list_empty(&cs->list)) {
-		struct io_kiocb *req;
-
-		req = list_first_entry(&cs->list, struct io_kiocb, list);
-		list_del(&req->list);
-		io_cqring_fill_event(req, req->result);
-		if (!(req->flags & REQ_F_LINK_HEAD)) {
-			req->flags |= REQ_F_COMP_LOCKED;
-			io_put_req(req);
-		} else {
-			spin_unlock_irq(&ctx->completion_lock);
-			io_put_req(req);
-			spin_lock_irq(&ctx->completion_lock);
-		}
-	}
-	io_commit_cqring(ctx);
-	spin_unlock_irq(&ctx->completion_lock);
-
-	io_cqring_ev_posted(ctx);
-	cs->nr = 0;
-}
-
 /*
  * Batched submission is done, ensure local IO is flushed out.
  */
-- 
2.27.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 5/5] io_uring: enable READ/WRITE to use deferred completions
  2020-06-23 15:16 [PATCHSET 0/5] Allow batching of inline completions Jens Axboe
                   ` (3 preceding siblings ...)
  2020-06-23 15:16 ` [PATCH 4/5] io_uring: pass in completion state to appropriate issue side handlers Jens Axboe
@ 2020-06-23 15:16 ` Jens Axboe
  4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2020-06-23 15:16 UTC (permalink / raw)
  To: io-uring; +Cc: xuanzhuo, Dust.li, Jens Axboe

A bit more surgery required here, as completions are generally done
through the kiocb->ki_complete() callback, even if they complete inline.
This enables the regular read/write path to use the io_comp_state
logic to batch inline completions.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0939f5c1a681..a4936b94988c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2019,7 +2019,8 @@ static inline void req_set_fail_links(struct io_kiocb *req)
 		req->flags |= REQ_F_FAIL_LINK;
 }
 
-static void io_complete_rw_common(struct kiocb *kiocb, long res)
+static void io_complete_rw_common(struct kiocb *kiocb, long res,
+				  struct io_comp_state *cs)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 	int cflags = 0;
@@ -2031,7 +2032,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
 		req_set_fail_links(req);
 	if (req->flags & REQ_F_BUFFER_SELECTED)
 		cflags = io_put_kbuf(req);
-	io_cqring_add_event(req, res, cflags);
+	__io_req_complete(req, res, cflags, cs);
 }
 
 static void io_sq_thread_drop_mm(struct io_ring_ctx *ctx)
@@ -2134,14 +2135,18 @@ static bool io_rw_reissue(struct io_kiocb *req, long res)
 	return false;
 }
 
+static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
+			     struct io_comp_state *cs)
+{
+	if (!io_rw_reissue(req, res))
+		io_complete_rw_common(&req->rw.kiocb, res, cs);
+}
+
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
-	if (!io_rw_reissue(req, res)) {
-		io_complete_rw_common(kiocb, res);
-		io_put_req(req);
-	}
+	__io_complete_rw(req, res, res2, NULL);
 }
 
 static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
@@ -2375,14 +2380,15 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
 	}
 }
 
-static void kiocb_done(struct kiocb *kiocb, ssize_t ret)
+static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
+		       struct io_comp_state *cs)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
 	if (req->flags & REQ_F_CUR_POS)
 		req->file->f_pos = kiocb->ki_pos;
 	if (ret >= 0 && kiocb->ki_complete == io_complete_rw)
-		io_complete_rw(kiocb, ret, 0);
+		__io_complete_rw(req, ret, 0, cs);
 	else
 		io_rw_done(kiocb, ret);
 }
@@ -2918,7 +2924,8 @@ static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
 	return loop_rw_iter(READ, req->file, &req->rw.kiocb, iter);
 }
 
-static int io_read(struct io_kiocb *req, bool force_nonblock)
+static int io_read(struct io_kiocb *req, bool force_nonblock,
+		   struct io_comp_state *cs)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct kiocb *kiocb = &req->rw.kiocb;
@@ -2953,7 +2960,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock)
 
 		/* Catch -EAGAIN return for forced non-blocking submission */
 		if (!force_nonblock || (ret2 != -EAGAIN && ret2 != -EIO)) {
-			kiocb_done(kiocb, ret2);
+			kiocb_done(kiocb, ret2, cs);
 		} else {
 			iter.count = iov_count;
 			iter.nr_segs = nr_segs;
@@ -2968,7 +2975,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock)
 				if (ret2 == -EIOCBQUEUED) {
 					goto out_free;
 				} else if (ret2 != -EAGAIN) {
-					kiocb_done(kiocb, ret2);
+					kiocb_done(kiocb, ret2, cs);
 					goto out_free;
 				}
 			}
@@ -3014,7 +3021,8 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	return 0;
 }
 
-static int io_write(struct io_kiocb *req, bool force_nonblock)
+static int io_write(struct io_kiocb *req, bool force_nonblock,
+		    struct io_comp_state *cs)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct kiocb *kiocb = &req->rw.kiocb;
@@ -3083,7 +3091,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock)
 		if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
 			ret2 = -EAGAIN;
 		if (!force_nonblock || ret2 != -EAGAIN) {
-			kiocb_done(kiocb, ret2);
+			kiocb_done(kiocb, ret2, cs);
 		} else {
 			iter.count = iov_count;
 			iter.nr_segs = nr_segs;
@@ -5409,7 +5417,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			if (ret < 0)
 				break;
 		}
-		ret = io_read(req, force_nonblock);
+		ret = io_read(req, force_nonblock, cs);
 		break;
 	case IORING_OP_WRITEV:
 	case IORING_OP_WRITE_FIXED:
@@ -5419,7 +5427,7 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			if (ret < 0)
 				break;
 		}
-		ret = io_write(req, force_nonblock);
+		ret = io_write(req, force_nonblock, cs);
 		break;
 	case IORING_OP_FSYNC:
 		if (sqe) {
-- 
2.27.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, back to index

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-23 15:16 [PATCHSET 0/5] Allow batching of inline completions Jens Axboe
2020-06-23 15:16 ` [PATCH 1/5] io_uring: provide generic io_req_complete() helper Jens Axboe
2020-06-23 15:16 ` [PATCH 2/5] io_uring: add 'io_comp_state' to struct io_submit_state Jens Axboe
2020-06-23 15:16 ` [PATCH 3/5] io_uring: pass down completion state on the issue side Jens Axboe
2020-06-23 15:16 ` [PATCH 4/5] io_uring: pass in completion state to appropriate issue side handlers Jens Axboe
2020-06-23 15:16 ` [PATCH 5/5] io_uring: enable READ/WRITE to use deferred completions Jens Axboe

IO-Uring Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/io-uring/0 io-uring/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 io-uring io-uring/ https://lore.kernel.org/io-uring \
		io-uring@vger.kernel.org
	public-inbox-index io-uring

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.io-uring


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git