io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC 0/3] improvements for poll requests
@ 2021-09-22 12:34 Xiaoguang Wang
  2021-09-22 12:34 ` [RFC 1/3] io_uring: reduce frequent add_wait_queue() overhead for multi-shot poll request Xiaoguang Wang
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Xiaoguang Wang @ 2021-09-22 12:34 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, asml.silence, Xiaoguang Wang

This patchset tries to improve echo_server model based on io_uring's
IORING_POLL_ADD_MULTI feature.

Xiaoguang Wang (3):
  io_uring: reduce frequent add_wait_queue() overhead for multi-shot
    poll request
  io_uring: don't get completion_lock in io_poll_rewait()
  io_uring: try to batch poll request completion

 fs/io_uring.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 92 insertions(+), 24 deletions(-)

-- 
2.14.4.44.g2045bb6


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC 1/3] io_uring: reduce frequent add_wait_queue() overhead for multi-shot poll request
  2021-09-22 12:34 [RFC 0/3] improvements for poll requests Xiaoguang Wang
@ 2021-09-22 12:34 ` Xiaoguang Wang
  2021-09-22 17:52   ` Hao Xu
  2021-09-22 12:34 ` [RFC 2/3] io_uring: don't get completion_lock in io_poll_rewait() Xiaoguang Wang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Xiaoguang Wang @ 2021-09-22 12:34 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, asml.silence, Xiaoguang Wang

Run echo_server to evaluate io_uring's multi-shot poll performance, perf
shows that add_wait_queue() has obvious overhead. Intruduce a new state
'active' in io_poll_iocb to indicate whether io_poll_wake() should queue
a task_work. This new state will be set to true initially, be set to false
when starting to queue a task work, and be set to true again when a poll
cqe has been committed. One concern is that this method may lost waken-up
event, but seems it's ok.

  io_poll_wake                io_poll_task_func
t1                       |
t2                       |    WRITE_ONCE(req->poll.active, true);
t3                       |
t4                       |    io_commit_cqring(ctx);
t5                       |
t6                       |

If waken-up events happens before or at t4, it's ok, user app will always
see a cqe. If waken-up events happens after t4 and IIUC, io_poll_wake()
will see the new req->poll.active value by using READ_ONCE().

With this patch, a pure echo_server(1000 connections, packet is 16 bytes)
shows about 1.6% reqs improvement.

Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
---
 fs/io_uring.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1294b1ef4acb..ca4464a75c7b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -487,6 +487,7 @@ struct io_poll_iocb {
 	__poll_t			events;
 	bool				done;
 	bool				canceled;
+	bool				active;
 	struct wait_queue_entry		wait;
 };
 
@@ -5025,8 +5026,6 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
 
 	trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
 
-	list_del_init(&poll->wait.entry);
-
 	req->result = mask;
 	req->io_task_work.func = func;
 
@@ -5057,7 +5056,10 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
 
 	spin_lock(&ctx->completion_lock);
 	if (!req->result && !READ_ONCE(poll->canceled)) {
-		add_wait_queue(poll->head, &poll->wait);
+		if (req->opcode == IORING_OP_POLL_ADD)
+			WRITE_ONCE(req->poll.active, true);
+		else
+			add_wait_queue(poll->head, &poll->wait);
 		return true;
 	}
 
@@ -5133,6 +5135,9 @@ static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
 	return done;
 }
 
+static bool __io_poll_remove_one(struct io_kiocb *req,
+				 struct io_poll_iocb *poll, bool do_cancel);
+
 static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -5146,10 +5151,11 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 		done = __io_poll_complete(req, req->result);
 		if (done) {
 			io_poll_remove_double(req);
+			__io_poll_remove_one(req, io_poll_get_single(req), true);
 			hash_del(&req->hash_node);
 		} else {
 			req->result = 0;
-			add_wait_queue(req->poll.head, &req->poll.wait);
+			WRITE_ONCE(req->poll.active, true);
 		}
 		io_commit_cqring(ctx);
 		spin_unlock(&ctx->completion_lock);
@@ -5204,6 +5210,7 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
 	poll->head = NULL;
 	poll->done = false;
 	poll->canceled = false;
+	poll->active = true;
 #define IO_POLL_UNMASK	(EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
 	/* mask in events that we always want/need */
 	poll->events = events | IO_POLL_UNMASK;
@@ -5301,6 +5308,7 @@ static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data,
 					key_to_poll(key));
 
+	list_del_init(&poll->wait.entry);
 	return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func);
 }
 
@@ -5569,6 +5577,10 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	struct io_kiocb *req = wait->private;
 	struct io_poll_iocb *poll = &req->poll;
 
+	if (!READ_ONCE(poll->active))
+		return 0;
+
+	WRITE_ONCE(poll->active, false);
 	return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func);
 }
 
-- 
2.14.4.44.g2045bb6


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC 2/3] io_uring: don't get completion_lock in io_poll_rewait()
  2021-09-22 12:34 [RFC 0/3] improvements for poll requests Xiaoguang Wang
  2021-09-22 12:34 ` [RFC 1/3] io_uring: reduce frequent add_wait_queue() overhead for multi-shot poll request Xiaoguang Wang
@ 2021-09-22 12:34 ` Xiaoguang Wang
  2021-09-22 12:34 ` [RFC 3/3] io_uring: try to batch poll request completion Xiaoguang Wang
  2021-09-22 13:00 ` [RFC 0/3] improvements for poll requests Jens Axboe
  3 siblings, 0 replies; 11+ messages in thread
From: Xiaoguang Wang @ 2021-09-22 12:34 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, asml.silence, Xiaoguang Wang

In current implementation, if there are not available events,
io_poll_rewait() just gets completion_lock, and unlocks it in
io_poll_task_func() or io_async_task_func(), which isn't necessary.

Change this logic to let io_poll_task_func() or io_async_task_func()
get the completion_lock lock, this is also a preparation for
later patch, which will batch poll request completion.

Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
---
 fs/io_uring.c | 50 ++++++++++++++++++++++----------------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ca4464a75c7b..6fdfb688cf91 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5040,10 +5040,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
 }
 
 static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
-	__acquires(&req->ctx->completion_lock)
 {
-	struct io_ring_ctx *ctx = req->ctx;
-
 	/* req->task == current here, checking PF_EXITING is safe */
 	if (unlikely(req->task->flags & PF_EXITING))
 		WRITE_ONCE(poll->canceled, true);
@@ -5054,7 +5051,6 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
 		req->result = vfs_poll(req->file, &pt) & poll->events;
 	}
 
-	spin_lock(&ctx->completion_lock);
 	if (!req->result && !READ_ONCE(poll->canceled)) {
 		if (req->opcode == IORING_OP_POLL_ADD)
 			WRITE_ONCE(req->poll.active, true);
@@ -5142,30 +5138,29 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_kiocb *nxt;
+	bool done;
 
-	if (io_poll_rewait(req, &req->poll)) {
-		spin_unlock(&ctx->completion_lock);
-	} else {
-		bool done;
+	if (io_poll_rewait(req, &req->poll))
+		return;
 
-		done = __io_poll_complete(req, req->result);
-		if (done) {
-			io_poll_remove_double(req);
-			__io_poll_remove_one(req, io_poll_get_single(req), true);
-			hash_del(&req->hash_node);
-		} else {
-			req->result = 0;
-			WRITE_ONCE(req->poll.active, true);
-		}
-		io_commit_cqring(ctx);
-		spin_unlock(&ctx->completion_lock);
-		io_cqring_ev_posted(ctx);
+	spin_lock(&ctx->completion_lock);
+	done = __io_poll_complete(req, req->result);
+	if (done) {
+		io_poll_remove_double(req);
+		__io_poll_remove_one(req, io_poll_get_single(req), true);
+		hash_del(&req->hash_node);
+	} else {
+		req->result = 0;
+		WRITE_ONCE(req->poll.active, true);
+	}
+	io_commit_cqring(ctx);
+	spin_unlock(&ctx->completion_lock);
+	io_cqring_ev_posted(ctx);
 
-		if (done) {
-			nxt = io_put_req_find_next(req);
-			if (nxt)
-				io_req_task_submit(nxt, locked);
-		}
+	if (done) {
+		nxt = io_put_req_find_next(req);
+		if (nxt)
+			io_req_task_submit(nxt, locked);
 	}
 }
 
@@ -5284,11 +5279,10 @@ static void io_async_task_func(struct io_kiocb *req, bool *locked)
 
 	trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data);
 
-	if (io_poll_rewait(req, &apoll->poll)) {
-		spin_unlock(&ctx->completion_lock);
+	if (io_poll_rewait(req, &apoll->poll))
 		return;
-	}
 
+	spin_lock(&ctx->completion_lock);
 	hash_del(&req->hash_node);
 	io_poll_remove_double(req);
 	spin_unlock(&ctx->completion_lock);
-- 
2.14.4.44.g2045bb6


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC 3/3] io_uring: try to batch poll request completion
  2021-09-22 12:34 [RFC 0/3] improvements for poll requests Xiaoguang Wang
  2021-09-22 12:34 ` [RFC 1/3] io_uring: reduce frequent add_wait_queue() overhead for multi-shot poll request Xiaoguang Wang
  2021-09-22 12:34 ` [RFC 2/3] io_uring: don't get completion_lock in io_poll_rewait() Xiaoguang Wang
@ 2021-09-22 12:34 ` Xiaoguang Wang
  2021-09-22 16:24   ` Pavel Begunkov
  2021-09-22 17:00   ` Hao Xu
  2021-09-22 13:00 ` [RFC 0/3] improvements for poll requests Jens Axboe
  3 siblings, 2 replies; 11+ messages in thread
From: Xiaoguang Wang @ 2021-09-22 12:34 UTC (permalink / raw)
  To: io-uring; +Cc: axboe, asml.silence, Xiaoguang Wang

For an echo-server based on io_uring's IORING_POLL_ADD_MULTI feature,
only poll request are completed in task work, normal read/write
requests are issued when user app sees cqes on corresponding poll
requests, and they will mostly read/write data successfully, which
don't need task work. So at least for echo-server model, batching
poll request completion properly will give benefits.

Currently don't find any appropriate place to store batched poll
requests, put them in struct io_submit_state temporarily, which I
think it'll need rework in future.

Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
---
 fs/io_uring.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6fdfb688cf91..14118388bfc6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -321,6 +321,11 @@ struct io_submit_state {
 	 */
 	struct io_kiocb		*compl_reqs[IO_COMPL_BATCH];
 	unsigned int		compl_nr;
+
+	struct io_kiocb		*poll_compl_reqs[IO_COMPL_BATCH];
+	bool			poll_req_status[IO_COMPL_BATCH];
+	unsigned int		poll_compl_nr;
+
 	/* inline/task_work completion list, under ->uring_lock */
 	struct list_head	free_list;
 
@@ -2093,6 +2098,8 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
 	percpu_ref_put(&ctx->refs);
 }
 
+static void io_poll_flush_completions(struct io_ring_ctx *ctx, bool *locked);
+
 static void tctx_task_work(struct callback_head *cb)
 {
 	bool locked = false;
@@ -2103,8 +2110,11 @@ static void tctx_task_work(struct callback_head *cb)
 	while (1) {
 		struct io_wq_work_node *node;
 
-		if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
+		if (!tctx->task_list.first && locked && (ctx->submit_state.compl_nr ||
+		    ctx->submit_state.poll_compl_nr)) {
 			io_submit_flush_completions(ctx);
+			io_poll_flush_completions(ctx, &locked);
+		}
 
 		spin_lock_irq(&tctx->task_lock);
 		node = tctx->task_list.first;
@@ -5134,6 +5144,49 @@ static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
 static bool __io_poll_remove_one(struct io_kiocb *req,
 				 struct io_poll_iocb *poll, bool do_cancel);
 
+static void io_poll_flush_completions(struct io_ring_ctx *ctx, bool *locked)
+	__must_hold(&ctx->uring_lock)
+{
+	struct io_submit_state *state = &ctx->submit_state;
+	struct io_kiocb *req, *nxt;
+	int i, nr = state->poll_compl_nr;
+	bool done, skip_done = true;
+
+	spin_lock(&ctx->completion_lock);
+	for (i = 0; i < nr; i++) {
+		req = state->poll_compl_reqs[i];
+		done = __io_poll_complete(req, req->result);
+		if (done) {
+			io_poll_remove_double(req);
+			__io_poll_remove_one(req, io_poll_get_single(req), true);
+			hash_del(&req->hash_node);
+			state->poll_req_status[i] = true;
+			if (skip_done)
+				skip_done = false;
+		} else {
+			req->result = 0;
+			state->poll_req_status[i] = false;
+			WRITE_ONCE(req->poll.active, true);
+		}
+	}
+	io_commit_cqring(ctx);
+	spin_unlock(&ctx->completion_lock);
+	io_cqring_ev_posted(ctx);
+	state->poll_compl_nr = 0;
+
+	if (skip_done)
+		return;
+
+	for (i = 0; i < nr; i++) {
+		if (state->poll_req_status[i]) {
+			req = state->poll_compl_reqs[i];
+			nxt = io_put_req_find_next(req);
+			if (nxt)
+				io_req_task_submit(nxt, locked);
+		}
+	}
+}
+
 static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -5143,6 +5196,15 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
 	if (io_poll_rewait(req, &req->poll))
 		return;
 
+	if (*locked) {
+		struct io_submit_state *state = &ctx->submit_state;
+
+		state->poll_compl_reqs[state->poll_compl_nr++] = req;
+		if (state->poll_compl_nr == ARRAY_SIZE(state->poll_compl_reqs))
+			io_poll_flush_completions(ctx, locked);
+		return;
+	}
+
 	spin_lock(&ctx->completion_lock);
 	done = __io_poll_complete(req, req->result);
 	if (done) {
-- 
2.14.4.44.g2045bb6


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC 0/3] improvements for poll requests
  2021-09-22 12:34 [RFC 0/3] improvements for poll requests Xiaoguang Wang
                   ` (2 preceding siblings ...)
  2021-09-22 12:34 ` [RFC 3/3] io_uring: try to batch poll request completion Xiaoguang Wang
@ 2021-09-22 13:00 ` Jens Axboe
  3 siblings, 0 replies; 11+ messages in thread
From: Jens Axboe @ 2021-09-22 13:00 UTC (permalink / raw)
  To: Xiaoguang Wang, io-uring; +Cc: asml.silence

On 9/22/21 6:34 AM, Xiaoguang Wang wrote:
> This patchset tries to improve echo_server model based on io_uring's
> IORING_POLL_ADD_MULTI feature.

Nifty, I'll take a look. Can you put the echo server using multishot
somewhere so others can test it too? Maybe it already is, but would be
nice to detail exactly what was run.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC 3/3] io_uring: try to batch poll request completion
  2021-09-22 12:34 ` [RFC 3/3] io_uring: try to batch poll request completion Xiaoguang Wang
@ 2021-09-22 16:24   ` Pavel Begunkov
  2021-09-24  4:28     ` Xiaoguang Wang
  2021-09-22 17:00   ` Hao Xu
  1 sibling, 1 reply; 11+ messages in thread
From: Pavel Begunkov @ 2021-09-22 16:24 UTC (permalink / raw)
  To: Xiaoguang Wang, io-uring; +Cc: axboe

On 9/22/21 1:34 PM, Xiaoguang Wang wrote:
> For an echo-server based on io_uring's IORING_POLL_ADD_MULTI feature,
> only poll request are completed in task work, normal read/write
> requests are issued when user app sees cqes on corresponding poll
> requests, and they will mostly read/write data successfully, which
> don't need task work. So at least for echo-server model, batching
> poll request completion properly will give benefits.
> 
> Currently don't find any appropriate place to store batched poll
> requests, put them in struct io_submit_state temporarily, which I
> think it'll need rework in future.
> 
> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
> ---
>  fs/io_uring.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 63 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 6fdfb688cf91..14118388bfc6 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -321,6 +321,11 @@ struct io_submit_state {
>  	 */
>  	struct io_kiocb		*compl_reqs[IO_COMPL_BATCH];
>  	unsigned int		compl_nr;
> +
> +	struct io_kiocb		*poll_compl_reqs[IO_COMPL_BATCH];
> +	bool			poll_req_status[IO_COMPL_BATCH];
> +	unsigned int		poll_compl_nr;
> +
>  	/* inline/task_work completion list, under ->uring_lock */
>  	struct list_head	free_list;
>  
> @@ -2093,6 +2098,8 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
>  	percpu_ref_put(&ctx->refs);
>  }
>  
> +static void io_poll_flush_completions(struct io_ring_ctx *ctx, bool *locked);
> +
>  static void tctx_task_work(struct callback_head *cb)
>  {
>  	bool locked = false;
> @@ -2103,8 +2110,11 @@ static void tctx_task_work(struct callback_head *cb)
>  	while (1) {
>  		struct io_wq_work_node *node;
>  
> -		if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
> +		if (!tctx->task_list.first && locked && (ctx->submit_state.compl_nr ||
> +		    ctx->submit_state.poll_compl_nr)) {

io_submit_flush_completions() shouldn't be called if there are no requests... And the
check is already inside for-next, will be 

if (... && locked) {
	io_submit_flush_completions();
	if (poll_compl_nr)
		io_poll_flush_completions();
}

>  			io_submit_flush_completions(ctx);
> +			io_poll_flush_completions(ctx, &locked);
> +		}
>  
>  		spin_lock_irq(&tctx->task_lock);
>  		node = tctx->task_list.first;
> @@ -5134,6 +5144,49 @@ static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
>  static bool __io_poll_remove_one(struct io_kiocb *req,
>  				 struct io_poll_iocb *poll, bool do_cancel);
>  
> +static void io_poll_flush_completions(struct io_ring_ctx *ctx, bool *locked)
> +	__must_hold(&ctx->uring_lock)
> +{
> +	struct io_submit_state *state = &ctx->submit_state;
> +	struct io_kiocb *req, *nxt;
> +	int i, nr = state->poll_compl_nr;
> +	bool done, skip_done = true;
> +
> +	spin_lock(&ctx->completion_lock);
> +	for (i = 0; i < nr; i++) {
> +		req = state->poll_compl_reqs[i];
> +		done = __io_poll_complete(req, req->result);

I believe we first need to fix all the poll problems and lay out something more intuitive
than the current implementation, or it'd be pure hell to do afterwards.

Can be a nice addition, curious about numbers as well.


-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC 3/3] io_uring: try to batch poll request completion
  2021-09-22 12:34 ` [RFC 3/3] io_uring: try to batch poll request completion Xiaoguang Wang
  2021-09-22 16:24   ` Pavel Begunkov
@ 2021-09-22 17:00   ` Hao Xu
  2021-09-22 17:01     ` Hao Xu
  1 sibling, 1 reply; 11+ messages in thread
From: Hao Xu @ 2021-09-22 17:00 UTC (permalink / raw)
  To: Xiaoguang Wang, io-uring; +Cc: axboe, asml.silence

在 2021/9/22 下午8:34, Xiaoguang Wang 写道:
> For an echo-server based on io_uring's IORING_POLL_ADD_MULTI feature,
> only poll request are completed in task work, normal read/write
> requests are issued when user app sees cqes on corresponding poll
> requests, and they will mostly read/write data successfully, which
> don't need task work. So at least for echo-server model, batching
> poll request completion properly will give benefits.
> 
> Currently don't find any appropriate place to store batched poll
> requests, put them in struct io_submit_state temporarily, which I
> think it'll need rework in future.
> 
> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
We may need flush completion when io_submit_end as well, there may be
situations where pure poll reqs are sparse. For instance, users may
just use pure poll to do accept, and fast poll for read/write,
send/recv, latency for pure poll reqs may amplify.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC 3/3] io_uring: try to batch poll request completion
  2021-09-22 17:00   ` Hao Xu
@ 2021-09-22 17:01     ` Hao Xu
  2021-09-22 17:09       ` Hao Xu
  0 siblings, 1 reply; 11+ messages in thread
From: Hao Xu @ 2021-09-22 17:01 UTC (permalink / raw)
  To: Xiaoguang Wang, io-uring; +Cc: axboe, asml.silence

在 2021/9/23 上午1:00, Hao Xu 写道:
> 在 2021/9/22 下午8:34, Xiaoguang Wang 写道:
>> For an echo-server based on io_uring's IORING_POLL_ADD_MULTI feature,
>> only poll request are completed in task work, normal read/write
>> requests are issued when user app sees cqes on corresponding poll
>> requests, and they will mostly read/write data successfully, which
>> don't need task work. So at least for echo-server model, batching
>> poll request completion properly will give benefits.
>>
>> Currently don't find any appropriate place to store batched poll
>> requests, put them in struct io_submit_state temporarily, which I
>> think it'll need rework in future.
>>
>> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
> We may need flush completion when io_submit_end as well, there may be
> situations where pure poll reqs are sparse. For instance, users may
> just use pure poll to do accept, and fast poll for read/write,
I mean in persistent connection model.
> send/recv, latency for pure poll reqs may amplify.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC 3/3] io_uring: try to batch poll request completion
  2021-09-22 17:01     ` Hao Xu
@ 2021-09-22 17:09       ` Hao Xu
  0 siblings, 0 replies; 11+ messages in thread
From: Hao Xu @ 2021-09-22 17:09 UTC (permalink / raw)
  To: Xiaoguang Wang, io-uring; +Cc: axboe, asml.silence

在 2021/9/23 上午1:01, Hao Xu 写道:
> 在 2021/9/23 上午1:00, Hao Xu 写道:
>> 在 2021/9/22 下午8:34, Xiaoguang Wang 写道:
>>> For an echo-server based on io_uring's IORING_POLL_ADD_MULTI feature,
>>> only poll request are completed in task work, normal read/write
>>> requests are issued when user app sees cqes on corresponding poll
>>> requests, and they will mostly read/write data successfully, which
>>> don't need task work. So at least for echo-server model, batching
>>> poll request completion properly will give benefits.
>>>
>>> Currently don't find any appropriate place to store batched poll
>>> requests, put them in struct io_submit_state temporarily, which I
>>> think it'll need rework in future.
>>>
>>> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
>> We may need flush completion when io_submit_end as well, there may be
>> situations where pure poll reqs are sparse. For instance, users may
>> just use pure poll to do accept, and fast poll for read/write,
> I mean in persistent connection model.
>> send/recv, latency for pure poll reqs may amplify.
I actually think flush them in io_subimit_state_end doesn't work as well
since it is multishot req..


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC 1/3] io_uring: reduce frequent add_wait_queue() overhead for multi-shot poll request
  2021-09-22 12:34 ` [RFC 1/3] io_uring: reduce frequent add_wait_queue() overhead for multi-shot poll request Xiaoguang Wang
@ 2021-09-22 17:52   ` Hao Xu
  0 siblings, 0 replies; 11+ messages in thread
From: Hao Xu @ 2021-09-22 17:52 UTC (permalink / raw)
  To: Xiaoguang Wang, io-uring; +Cc: axboe, asml.silence, Joseph Qi

在 2021/9/22 下午8:34, Xiaoguang Wang 写道:
> Run echo_server to evaluate io_uring's multi-shot poll performance, perf
> shows that add_wait_queue() has obvious overhead. Intruduce a new state
> 'active' in io_poll_iocb to indicate whether io_poll_wake() should queue
> a task_work. This new state will be set to true initially, be set to false
> when starting to queue a task work, and be set to true again when a poll
> cqe has been committed. One concern is that this method may lost waken-up
> event, but seems it's ok.
> 
>    io_poll_wake                io_poll_task_func
> t1                       |
> t2                       |    WRITE_ONCE(req->poll.active, true);
> t3                       |
> t4                       |    io_commit_cqring(ctx);
> t5                       |
> t6                       |
> 
> If waken-up events happens before or at t4, it's ok, user app will always
> see a cqe. If waken-up events happens after t4 and IIUC, io_poll_wake()
> will see the new req->poll.active value by using READ_ONCE().
> 
> With this patch, a pure echo_server(1000 connections, packet is 16 bytes)
> shows about 1.6% reqs improvement.
> 
> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
> ---
>   fs/io_uring.c | 20 ++++++++++++++++----
>   1 file changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 1294b1ef4acb..ca4464a75c7b 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -487,6 +487,7 @@ struct io_poll_iocb {
>   	__poll_t			events;
>   	bool				done;
>   	bool				canceled;
> +	bool				active;
>   	struct wait_queue_entry		wait;
>   };
>   
> @@ -5025,8 +5026,6 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
>   
>   	trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
>   
> -	list_del_init(&poll->wait.entry);
> -
>   	req->result = mask;
>   	req->io_task_work.func = func;
>   
> @@ -5057,7 +5056,10 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
>   
>   	spin_lock(&ctx->completion_lock);
>   	if (!req->result && !READ_ONCE(poll->canceled)) {
> -		add_wait_queue(poll->head, &poll->wait);
> +		if (req->opcode == IORING_OP_POLL_ADD)
> +			WRITE_ONCE(req->poll.active, true);
> +		else
> +			add_wait_queue(poll->head, &poll->wait);
>   		return true;
>   	}
>   
> @@ -5133,6 +5135,9 @@ static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
>   	return done;
>   }
>   
> +static bool __io_poll_remove_one(struct io_kiocb *req,
> +				 struct io_poll_iocb *poll, bool do_cancel);
> +
>   static void io_poll_task_func(struct io_kiocb *req, bool *locked)
>   {
>   	struct io_ring_ctx *ctx = req->ctx;
> @@ -5146,10 +5151,11 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
>   		done = __io_poll_complete(req, req->result);
>   		if (done) {
>   			io_poll_remove_double(req);
> +			__io_poll_remove_one(req, io_poll_get_single(req), true);
This may cause race problems, like there may be multiple cancelled cqes
considerring io_poll_add() parallelled. hash_del is redundant either.
__io_poll_remove_one may not be the best choice here, and since we now
don't del wait entry inbetween, code in _arm_poll should probably be
tweaked as well(not very sure, will dive into it tomorrow).

Regards,
Hao
>   			hash_del(&req->hash_node);
>   		} else {
>   			req->result = 0;
> -			add_wait_queue(req->poll.head, &req->poll.wait);
> +			WRITE_ONCE(req->poll.active, true);
>   		}
>   		io_commit_cqring(ctx);
>   		spin_unlock(&ctx->completion_lock);
> @@ -5204,6 +5210,7 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
>   	poll->head = NULL;
>   	poll->done = false;
>   	poll->canceled = false;
> +	poll->active = true;
>   #define IO_POLL_UNMASK	(EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
>   	/* mask in events that we always want/need */
>   	poll->events = events | IO_POLL_UNMASK;
> @@ -5301,6 +5308,7 @@ static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
>   	trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data,
>   					key_to_poll(key));
>   
> +	list_del_init(&poll->wait.entry);
>   	return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func);
>   }
>   
> @@ -5569,6 +5577,10 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
>   	struct io_kiocb *req = wait->private;
>   	struct io_poll_iocb *poll = &req->poll;
>   
> +	if (!READ_ONCE(poll->active))
> +		return 0;
> +
> +	WRITE_ONCE(poll->active, false);
>   	return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func);
>   }
>   
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC 3/3] io_uring: try to batch poll request completion
  2021-09-22 16:24   ` Pavel Begunkov
@ 2021-09-24  4:28     ` Xiaoguang Wang
  0 siblings, 0 replies; 11+ messages in thread
From: Xiaoguang Wang @ 2021-09-24  4:28 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring; +Cc: axboe

hi,


> On 9/22/21 1:34 PM, Xiaoguang Wang wrote:
>> For an echo-server based on io_uring's IORING_POLL_ADD_MULTI feature,
>> only poll request are completed in task work, normal read/write
>> requests are issued when user app sees cqes on corresponding poll
>> requests, and they will mostly read/write data successfully, which
>> don't need task work. So at least for echo-server model, batching
>> poll request completion properly will give benefits.
>>
>> Currently don't find any appropriate place to store batched poll
>> requests, put them in struct io_submit_state temporarily, which I
>> think it'll need rework in future.
>>
>> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
>> ---
>>   fs/io_uring.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 63 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 6fdfb688cf91..14118388bfc6 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -321,6 +321,11 @@ struct io_submit_state {
>>   	 */
>>   	struct io_kiocb		*compl_reqs[IO_COMPL_BATCH];
>>   	unsigned int		compl_nr;
>> +
>> +	struct io_kiocb		*poll_compl_reqs[IO_COMPL_BATCH];
>> +	bool			poll_req_status[IO_COMPL_BATCH];
>> +	unsigned int		poll_compl_nr;
>> +
>>   	/* inline/task_work completion list, under ->uring_lock */
>>   	struct list_head	free_list;
>>   
>> @@ -2093,6 +2098,8 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
>>   	percpu_ref_put(&ctx->refs);
>>   }
>>   
>> +static void io_poll_flush_completions(struct io_ring_ctx *ctx, bool *locked);
>> +
>>   static void tctx_task_work(struct callback_head *cb)
>>   {
>>   	bool locked = false;
>> @@ -2103,8 +2110,11 @@ static void tctx_task_work(struct callback_head *cb)
>>   	while (1) {
>>   		struct io_wq_work_node *node;
>>   
>> -		if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
>> +		if (!tctx->task_list.first && locked && (ctx->submit_state.compl_nr ||
>> +		    ctx->submit_state.poll_compl_nr)) {
> io_submit_flush_completions() shouldn't be called if there are no requests... And the
> check is already inside for-next, will be
>
> if (... && locked) {
> 	io_submit_flush_completions();
> 	if (poll_compl_nr)
> 		io_poll_flush_completions();

OK, thanks for pointing this, and I have dropped the poll request 
completion batching patch, since

it shows performance fluctuations, hard to say whether it's useful.


Regards,

Xiaoguang Wang


> }
>
>>   			io_submit_flush_completions(ctx);
>> +			io_poll_flush_completions(ctx, &locked);
>> +		}
>>   
>>   		spin_lock_irq(&tctx->task_lock);
>>   		node = tctx->task_list.first;
>> @@ -5134,6 +5144,49 @@ static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
>>   static bool __io_poll_remove_one(struct io_kiocb *req,
>>   				 struct io_poll_iocb *poll, bool do_cancel);
>>   
>> +static void io_poll_flush_completions(struct io_ring_ctx *ctx, bool *locked)
>> +	__must_hold(&ctx->uring_lock)
>> +{
>> +	struct io_submit_state *state = &ctx->submit_state;
>> +	struct io_kiocb *req, *nxt;
>> +	int i, nr = state->poll_compl_nr;
>> +	bool done, skip_done = true;
>> +
>> +	spin_lock(&ctx->completion_lock);
>> +	for (i = 0; i < nr; i++) {
>> +		req = state->poll_compl_reqs[i];
>> +		done = __io_poll_complete(req, req->result);
> I believe we first need to fix all the poll problems and lay out something more intuitive
> than the current implementation, or it'd be pure hell to do afterwards.
>
> Can be a nice addition, curious about numbers as well.
>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-09-24  4:28 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-22 12:34 [RFC 0/3] improvements for poll requests Xiaoguang Wang
2021-09-22 12:34 ` [RFC 1/3] io_uring: reduce frequent add_wait_queue() overhead for multi-shot poll request Xiaoguang Wang
2021-09-22 17:52   ` Hao Xu
2021-09-22 12:34 ` [RFC 2/3] io_uring: don't get completion_lock in io_poll_rewait() Xiaoguang Wang
2021-09-22 12:34 ` [RFC 3/3] io_uring: try to batch poll request completion Xiaoguang Wang
2021-09-22 16:24   ` Pavel Begunkov
2021-09-24  4:28     ` Xiaoguang Wang
2021-09-22 17:00   ` Hao Xu
2021-09-22 17:01     ` Hao Xu
2021-09-22 17:09       ` Hao Xu
2021-09-22 13:00 ` [RFC 0/3] improvements for poll requests Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).