All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 5.13 0/9] another 5.13 pack
@ 2021-04-13  1:58 Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 1/9] io_uring: fix leaking reg files on exit Pavel Begunkov
                   ` (9 more replies)
  0 siblings, 10 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

1-2 are fixes

7/9 is about nicer overflow handling while someones exits

8-9 changes how we do iopoll with iopoll_list empty, saves from burning
CPU for nothing.

Pavel Begunkov (9):
  io_uring: fix leaking reg files on exit
  io_uring: fix uninit old data for poll event upd
  io_uring: split poll and poll update structures
  io_uring: add timeout completion_lock annotation
  io_uring: refactor hrtimer_try_to_cancel uses
  io_uring: clean up io_poll_remove_waitqs()
  io_uring: don't fail overflow on in_idle
  io_uring: skip futile iopoll iterations
  io_uring: inline io_iopoll_getevents()

 fs/io_uring.c | 236 ++++++++++++++++++++++----------------------------
 1 file changed, 104 insertions(+), 132 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/9] io_uring: fix leaking reg files on exit
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 2/9] io_uring: fix uninit old data for poll event upd Pavel Begunkov
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: stable

If io_sqe_files_unregister() faults on io_rsrc_ref_quiesce(), it will
fail to do unregister leaving files referenced. And that may well happen
because of a strayed signal or just because it does allocations inside.

In io_ring_ctx_free() do an unsafe version of unregister, as it's
guaranteed to not have requests by that point and so quiesce is useless.

Cc: stable@vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 257eddd4cd82..44342ff5c4e1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7094,6 +7094,10 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
 			fput(file);
 	}
 #endif
+	io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
+	kfree(ctx->file_data);
+	ctx->file_data = NULL;
+	ctx->nr_user_files = 0;
 }
 
 static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx)
@@ -7200,21 +7204,14 @@ static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
 
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
-	struct io_rsrc_data *data = ctx->file_data;
 	int ret;
 
-	if (!data)
+	if (!ctx->file_data)
 		return -ENXIO;
-	ret = io_rsrc_ref_quiesce(data, ctx);
-	if (ret)
-		return ret;
-
-	__io_sqe_files_unregister(ctx);
-	io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
-	kfree(data);
-	ctx->file_data = NULL;
-	ctx->nr_user_files = 0;
-	return 0;
+	ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+	if (!ret)
+		__io_sqe_files_unregister(ctx);
+	return ret;
 }
 
 static void io_sq_thread_unpark(struct io_sq_data *sqd)
@@ -7664,7 +7661,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 
 	ret = io_sqe_files_scm(ctx);
 	if (ret) {
-		io_sqe_files_unregister(ctx);
+		__io_sqe_files_unregister(ctx);
 		return ret;
 	}
 
@@ -8465,7 +8462,11 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	}
 
 	mutex_lock(&ctx->uring_lock);
-	io_sqe_files_unregister(ctx);
+	if (ctx->file_data) {
+		if (!atomic_dec_and_test(&ctx->file_data->refs))
+			wait_for_completion(&ctx->file_data->done);
+		__io_sqe_files_unregister(ctx);
+	}
 	if (ctx->rings)
 		__io_cqring_overflow_flush(ctx, true);
 	mutex_unlock(&ctx->uring_lock);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/9] io_uring: fix uninit old data for poll event upd
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 1/9] io_uring: fix leaking reg files on exit Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Both IORING_POLL_UPDATE_EVENTS and IORING_POLL_UPDATE_USER_DATA need
old_user_data to find/cancel a poll request, but it's set only for the
first one.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 44342ff5c4e1..429ee5fd9044 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5384,17 +5384,17 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 	if (!(flags & IORING_POLL_ADD_MULTI))
 		events |= EPOLLONESHOT;
 	poll->update_events = poll->update_user_data = false;
-	if (flags & IORING_POLL_UPDATE_EVENTS) {
-		poll->update_events = true;
+
+	if (flags & (IORING_POLL_UPDATE_EVENTS|IORING_POLL_UPDATE_USER_DATA)) {
 		poll->old_user_data = READ_ONCE(sqe->addr);
+		poll->update_events = flags & IORING_POLL_UPDATE_EVENTS;
+		poll->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
+		if (poll->update_user_data)
+			poll->new_user_data = READ_ONCE(sqe->off);
+	} else {
+		if (sqe->off || sqe->addr)
+			return -EINVAL;
 	}
-	if (flags & IORING_POLL_UPDATE_USER_DATA) {
-		poll->update_user_data = true;
-		poll->new_user_data = READ_ONCE(sqe->off);
-	}
-	if (!(poll->update_events || poll->update_user_data) &&
-	     (sqe->off || sqe->addr))
-		return -EINVAL;
 	poll->events = demangle_poll(events) |
 				(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
 	return 0;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/9] io_uring: split poll and poll update structures
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 1/9] io_uring: fix leaking reg files on exit Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 2/9] io_uring: fix uninit old data for poll event upd Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13 17:14   ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 4/9] io_uring: add timeout completion_lock annotation Pavel Begunkov
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

struct io_poll_iocb became pretty nasty combining also update fields.
Split them, so we would have more clarity to it.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 55 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 429ee5fd9044..a0f207e62e32 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -490,15 +490,16 @@ struct io_poll_iocb {
 	__poll_t			events;
 	bool				done;
 	bool				canceled;
+	struct wait_queue_entry		wait;
+};
+
+struct io_poll_update {
+	struct file			*file;
+	u64				old_user_data;
+	u64				new_user_data;
+	__poll_t			events;
 	bool				update_events;
 	bool				update_user_data;
-	union {
-		struct wait_queue_entry	wait;
-		struct {
-			u64		old_user_data;
-			u64		new_user_data;
-		};
-	};
 };
 
 struct io_poll_remove {
@@ -715,6 +716,7 @@ enum {
 	REQ_F_COMPLETE_INLINE_BIT,
 	REQ_F_REISSUE_BIT,
 	REQ_F_DONT_REISSUE_BIT,
+	REQ_F_POLL_UPDATE_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_ASYNC_READ_BIT,
 	REQ_F_ASYNC_WRITE_BIT,
@@ -762,6 +764,8 @@ enum {
 	REQ_F_REISSUE		= BIT(REQ_F_REISSUE_BIT),
 	/* don't attempt request reissue, see io_rw_reissue() */
 	REQ_F_DONT_REISSUE	= BIT(REQ_F_DONT_REISSUE_BIT),
+	/* switches between poll and poll update */
+	REQ_F_POLL_UPDATE	= BIT(REQ_F_POLL_UPDATE_BIT),
 	/* supports async reads */
 	REQ_F_ASYNC_READ	= BIT(REQ_F_ASYNC_READ_BIT),
 	/* supports async writes */
@@ -791,6 +795,7 @@ struct io_kiocb {
 		struct file		*file;
 		struct io_rw		rw;
 		struct io_poll_iocb	poll;
+		struct io_poll_update	poll_update;
 		struct io_poll_remove	poll_remove;
 		struct io_accept	accept;
 		struct io_sync		sync;
@@ -4989,7 +4994,6 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
 	poll->head = NULL;
 	poll->done = false;
 	poll->canceled = false;
-	poll->update_events = poll->update_user_data = false;
 #define IO_POLL_UNMASK	(EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
 	/* mask in events that we always want/need */
 	poll->events = events | IO_POLL_UNMASK;
@@ -5366,7 +5370,6 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 
 static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	struct io_poll_iocb *poll = &req->poll;
 	u32 events, flags;
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -5383,20 +5386,26 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 #endif
 	if (!(flags & IORING_POLL_ADD_MULTI))
 		events |= EPOLLONESHOT;
-	poll->update_events = poll->update_user_data = false;
+	events = demangle_poll(events) |
+				(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
 
 	if (flags & (IORING_POLL_UPDATE_EVENTS|IORING_POLL_UPDATE_USER_DATA)) {
-		poll->old_user_data = READ_ONCE(sqe->addr);
-		poll->update_events = flags & IORING_POLL_UPDATE_EVENTS;
-		poll->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
-		if (poll->update_user_data)
-			poll->new_user_data = READ_ONCE(sqe->off);
+		struct io_poll_update *poll_upd = &req->poll_update;
+
+		req->flags |= REQ_F_POLL_UPDATE;
+		poll_upd->events = events;
+		poll_upd->old_user_data = READ_ONCE(sqe->addr);
+		poll_upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
+		poll_upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
+		if (poll_upd->update_user_data)
+			poll_upd->new_user_data = READ_ONCE(sqe->off);
 	} else {
+		struct io_poll_iocb *poll = &req->poll;
+
+		poll->events = events;
 		if (sqe->off || sqe->addr)
 			return -EINVAL;
 	}
-	poll->events = demangle_poll(events) |
-				(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
 	return 0;
 }
 
@@ -5434,7 +5443,7 @@ static int io_poll_update(struct io_kiocb *req)
 	int ret;
 
 	spin_lock_irq(&ctx->completion_lock);
-	preq = io_poll_find(ctx, req->poll.old_user_data);
+	preq = io_poll_find(ctx, req->poll_update.old_user_data);
 	if (!preq) {
 		ret = -ENOENT;
 		goto err;
@@ -5464,13 +5473,13 @@ static int io_poll_update(struct io_kiocb *req)
 		return 0;
 	}
 	/* only mask one event flags, keep behavior flags */
-	if (req->poll.update_events) {
+	if (req->poll_update.update_events) {
 		preq->poll.events &= ~0xffff;
-		preq->poll.events |= req->poll.events & 0xffff;
+		preq->poll.events |= req->poll_update.events & 0xffff;
 		preq->poll.events |= IO_POLL_UNMASK;
 	}
-	if (req->poll.update_user_data)
-		preq->user_data = req->poll.new_user_data;
+	if (req->poll_update.update_user_data)
+		preq->user_data = req->poll_update.new_user_data;
 
 	spin_unlock_irq(&ctx->completion_lock);
 
@@ -5489,7 +5498,7 @@ static int io_poll_update(struct io_kiocb *req)
 
 static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
 {
-	if (!req->poll.update_events && !req->poll.update_user_data)
+	if (!(req->flags & REQ_F_POLL_UPDATE))
 		return __io_poll_add(req);
 	return io_poll_update(req);
 }
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/9] io_uring: add timeout completion_lock annotation
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (2 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses Pavel Begunkov
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Add one more sparse locking annotation for readability in
io_kill_timeout().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a0f207e62e32..eaa8f1af29cc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1269,6 +1269,7 @@ static void io_queue_async_work(struct io_kiocb *req)
 }
 
 static void io_kill_timeout(struct io_kiocb *req, int status)
+	__must_hold(&req->ctx->completion_lock)
 {
 	struct io_timeout_data *io = req->async_data;
 	int ret;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (3 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 4/9] io_uring: add timeout completion_lock annotation Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs() Pavel Begunkov
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Don't save return values of hrtimer_try_to_cancel() in a variable, but
use right away. It's in general safer to not have an intermediate
variable, which may be reused and passed out wrongly, but it be
contracted out. Also clean io_timeout_extract().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index eaa8f1af29cc..4f4b4f4bff2d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1272,10 +1272,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
 	__must_hold(&req->ctx->completion_lock)
 {
 	struct io_timeout_data *io = req->async_data;
-	int ret;
 
-	ret = hrtimer_try_to_cancel(&io->timer);
-	if (ret != -1) {
+	if (hrtimer_try_to_cancel(&io->timer) != -1) {
 		atomic_set(&req->ctx->cq_timeouts,
 			atomic_read(&req->ctx->cq_timeouts) + 1);
 		list_del_init(&req->timeout.list);
@@ -1792,12 +1790,10 @@ static bool io_kill_linked_timeout(struct io_kiocb *req)
 	 */
 	if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
 		struct io_timeout_data *io = link->async_data;
-		int ret;
 
 		io_remove_next_linked(req);
 		link->timeout.head = NULL;
-		ret = hrtimer_try_to_cancel(&io->timer);
-		if (ret != -1) {
+		if (hrtimer_try_to_cancel(&io->timer) != -1) {
 			io_cqring_fill_event(link, -ECANCELED, 0);
 			io_put_req_deferred(link, 1);
 			return true;
@@ -5533,21 +5529,18 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
 {
 	struct io_timeout_data *io;
 	struct io_kiocb *req;
-	int ret = -ENOENT;
+	bool found = false;
 
 	list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
-		if (user_data == req->user_data) {
-			ret = 0;
+		found = user_data == req->user_data;
+		if (found)
 			break;
-		}
 	}
-
-	if (ret == -ENOENT)
-		return ERR_PTR(ret);
+	if (!found)
+		return ERR_PTR(-ENOENT);
 
 	io = req->async_data;
-	ret = hrtimer_try_to_cancel(&io->timer);
-	if (ret == -1)
+	if (hrtimer_try_to_cancel(&io->timer) == -1)
 		return ERR_PTR(-EALREADY);
 	list_del_init(&req->timeout.list);
 	return req;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs()
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (4 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 7/9] io_uring: don't fail overflow on in_idle Pavel Begunkov
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Move some parts of io_poll_remove_waitqs() that are opcode independent.
Looks better and stresses that both do __io_poll_remove_one().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4f4b4f4bff2d..4407ebc8f8d3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5224,21 +5224,16 @@ static bool io_poll_remove_waitqs(struct io_kiocb *req)
 	bool do_complete;
 
 	io_poll_remove_double(req);
+	do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true);
 
-	if (req->opcode == IORING_OP_POLL_ADD) {
-		do_complete = __io_poll_remove_one(req, &req->poll, true);
-	} else {
+	if (req->opcode != IORING_OP_POLL_ADD && do_complete) {
 		struct async_poll *apoll = req->apoll;
 
 		/* non-poll requests have submit ref still */
-		do_complete = __io_poll_remove_one(req, &apoll->poll, true);
-		if (do_complete) {
-			req_ref_put(req);
-			kfree(apoll->double_poll);
-			kfree(apoll);
-		}
+		req_ref_put(req);
+		kfree(apoll->double_poll);
+		kfree(apoll);
 	}
-
 	return do_complete;
 }
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 7/9] io_uring: don't fail overflow on in_idle
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (5 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs() Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 8/9] io_uring: skip futile iopoll iterations Pavel Begunkov
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

As CQE overflows are now untied from requests and so don't hold any
ref, we don't need to handle exiting/exec'ing cases there anymore.
Moreover, it's much nicer in regards to userspace to save overflowed
CQEs whenever possible, so remove failing on in_idle.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4407ebc8f8d3..cc6a44533802 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1515,32 +1515,28 @@ static bool io_cqring_event_overflow(struct io_kiocb *req, long res,
 				     unsigned int cflags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
+	struct io_overflow_cqe *ocqe;
 
-	if (!atomic_read(&req->task->io_uring->in_idle)) {
-		struct io_overflow_cqe *ocqe;
-
-		ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
-		if (!ocqe)
-			goto overflow;
-		if (list_empty(&ctx->cq_overflow_list)) {
-			set_bit(0, &ctx->sq_check_overflow);
-			set_bit(0, &ctx->cq_check_overflow);
-			ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
-		}
-		ocqe->cqe.user_data = req->user_data;
-		ocqe->cqe.res = res;
-		ocqe->cqe.flags = cflags;
-		list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
-		return true;
+	ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
+	if (!ocqe) {
+		/*
+		 * If we're in ring overflow flush mode, or in task cancel mode,
+		 * or cannot allocate an overflow entry, then we need to drop it
+		 * on the floor.
+		 */
+		WRITE_ONCE(ctx->rings->cq_overflow, ++ctx->cached_cq_overflow);
+		return false;
 	}
-overflow:
-	/*
-	 * If we're in ring overflow flush mode, or in task cancel mode,
-	 * or cannot allocate an overflow entry, then we need to drop it
-	 * on the floor.
-	 */
-	WRITE_ONCE(ctx->rings->cq_overflow, ++ctx->cached_cq_overflow);
-	return false;
+	if (list_empty(&ctx->cq_overflow_list)) {
+		set_bit(0, &ctx->sq_check_overflow);
+		set_bit(0, &ctx->cq_check_overflow);
+		ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
+	}
+	ocqe->cqe.user_data = req->user_data;
+	ocqe->cqe.res = res;
+	ocqe->cqe.flags = cflags;
+	list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
+	return true;
 }
 
 static inline bool __io_cqring_fill_event(struct io_kiocb *req, long res,
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 8/9] io_uring: skip futile iopoll iterations
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (6 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 7/9] io_uring: don't fail overflow on in_idle Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 9/9] io_uring: inline io_iopoll_getevents() Pavel Begunkov
  2021-04-13 15:38 ` [PATCH 5.13 0/9] another 5.13 pack Jens Axboe
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

The only way to get out of io_iopoll_getevents() and continue iterating
is to have empty iopoll_list, otherwise the main loop would just exit.
So, instead of the unlock on 8th time heuristic, do that based on
iopoll_list.

Also, as no one can add new requests to iopoll_list while
io_iopoll_check() hold uring_lock, it's useless to spin with the list
empty, return in that case.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index cc6a44533802..1111968bbe7f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2387,7 +2387,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
 static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 {
 	unsigned int nr_events = 0;
-	int iters = 0, ret = 0;
+	int ret = 0;
 
 	/*
 	 * We disallow the app entering submit/complete with polling, but we
@@ -2416,10 +2416,13 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 		 * forever, while the workqueue is stuck trying to acquire the
 		 * very same mutex.
 		 */
-		if (!(++iters & 7)) {
+		if (list_empty(&ctx->iopoll_list)) {
 			mutex_unlock(&ctx->uring_lock);
 			io_run_task_work();
 			mutex_lock(&ctx->uring_lock);
+
+			if (list_empty(&ctx->iopoll_list))
+				break;
 		}
 
 		ret = io_iopoll_getevents(ctx, &nr_events, min);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 9/9] io_uring: inline io_iopoll_getevents()
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (7 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 8/9] io_uring: skip futile iopoll iterations Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13 15:38 ` [PATCH 5.13 0/9] another 5.13 pack Jens Axboe
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

io_iopoll_getevents() is of no use to us anymore, io_iopoll_check()
handles all the cases.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 52 +++++++++++++--------------------------------------
 1 file changed, 13 insertions(+), 39 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1111968bbe7f..05c67ebeabd5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2331,27 +2331,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
 	return ret;
 }
 
-/*
- * Poll for a minimum of 'min' events. Note that if min == 0 we consider that a
- * non-spinning poll check - we'll still enter the driver poll loop, but only
- * as a non-spinning completion check.
- */
-static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
-				long min)
-{
-	while (!list_empty(&ctx->iopoll_list) && !need_resched()) {
-		int ret;
-
-		ret = io_do_iopoll(ctx, nr_events, min);
-		if (ret < 0)
-			return ret;
-		if (*nr_events >= min)
-			return 0;
-	}
-
-	return 1;
-}
-
 /*
  * We can't just wait for polled events to come to us, we have to actively
  * find and complete them.
@@ -2395,17 +2374,16 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 	 * that got punted to a workqueue.
 	 */
 	mutex_lock(&ctx->uring_lock);
+	/*
+	 * Don't enter poll loop if we already have events pending.
+	 * If we do, we can potentially be spinning for commands that
+	 * already triggered a CQE (eg in error).
+	 */
+	if (test_bit(0, &ctx->cq_check_overflow))
+		__io_cqring_overflow_flush(ctx, false);
+	if (io_cqring_events(ctx))
+		goto out;
 	do {
-		/*
-		 * Don't enter poll loop if we already have events pending.
-		 * If we do, we can potentially be spinning for commands that
-		 * already triggered a CQE (eg in error).
-		 */
-		if (test_bit(0, &ctx->cq_check_overflow))
-			__io_cqring_overflow_flush(ctx, false);
-		if (io_cqring_events(ctx))
-			break;
-
 		/*
 		 * If a submit got punted to a workqueue, we can have the
 		 * application entering polling for a command before it gets
@@ -2424,13 +2402,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 			if (list_empty(&ctx->iopoll_list))
 				break;
 		}
-
-		ret = io_iopoll_getevents(ctx, &nr_events, min);
-		if (ret <= 0)
-			break;
-		ret = 0;
-	} while (min && !nr_events && !need_resched());
-
+		ret = io_do_iopoll(ctx, &nr_events, min);
+	} while (!ret && nr_events < min && !need_resched());
+out:
 	mutex_unlock(&ctx->uring_lock);
 	return ret;
 }
@@ -2543,7 +2517,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
 /*
  * After the iocb has been issued, it's safe to be found on the poll list.
  * Adding the kiocb to the list AFTER submission ensures that we don't
- * find it from a io_iopoll_getevents() thread before the issuer is done
+ * find it from a io_do_iopoll() thread before the issuer is done
  * accessing the kiocb cookie.
  */
 static void io_iopoll_req_issued(struct io_kiocb *req, bool in_async)
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.13 0/9] another 5.13 pack
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (8 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 9/9] io_uring: inline io_iopoll_getevents() Pavel Begunkov
@ 2021-04-13 15:38 ` Jens Axboe
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2021-04-13 15:38 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 4/12/21 7:58 PM, Pavel Begunkov wrote:
> 1-2 are fixes
> 
> 7/9 is about nicer overflow handling while someones exits
> 
> 8-9 changes how we do iopoll with iopoll_list empty, saves from burning
> CPU for nothing.
> 
> Pavel Begunkov (9):
>   io_uring: fix leaking reg files on exit
>   io_uring: fix uninit old data for poll event upd
>   io_uring: split poll and poll update structures
>   io_uring: add timeout completion_lock annotation
>   io_uring: refactor hrtimer_try_to_cancel uses
>   io_uring: clean up io_poll_remove_waitqs()
>   io_uring: don't fail overflow on in_idle
>   io_uring: skip futile iopoll iterations
>   io_uring: inline io_iopoll_getevents()
> 
>  fs/io_uring.c | 236 ++++++++++++++++++++++----------------------------
>  1 file changed, 104 insertions(+), 132 deletions(-)

Applied, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/9] io_uring: split poll and poll update structures
  2021-04-13  1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
@ 2021-04-13 17:14   ` Pavel Begunkov
  0 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 17:14 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 13/04/2021 02:58, Pavel Begunkov wrote:
> struct io_poll_iocb became pretty nasty combining also update fields.
> Split them, so we would have more clarity to it.

I think we should move update into IORING_OP_POLL_REMOVE until it's
not too late. Would have better struct layouts and didn't get in
way of POLL_ADD, which should be more popular.

Another thing, looks IORING_OP_POLL_REMOVE may cancel apoll, that
doesn't sound great. IMHO we need to limit it to only poll requests,
rather confusing otherwise. note: it can't be used reliably from
the userspace, so we may probably not care about change in behaviour

> 
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
>  fs/io_uring.c | 55 ++++++++++++++++++++++++++++++---------------------
>  1 file changed, 32 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 429ee5fd9044..a0f207e62e32 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -490,15 +490,16 @@ struct io_poll_iocb {
>  	__poll_t			events;
>  	bool				done;
>  	bool				canceled;
> +	struct wait_queue_entry		wait;
> +};
> +
> +struct io_poll_update {
> +	struct file			*file;
> +	u64				old_user_data;
> +	u64				new_user_data;
> +	__poll_t			events;
>  	bool				update_events;
>  	bool				update_user_data;
> -	union {
> -		struct wait_queue_entry	wait;
> -		struct {
> -			u64		old_user_data;
> -			u64		new_user_data;
> -		};
> -	};
>  };
>  
>  struct io_poll_remove {
> @@ -715,6 +716,7 @@ enum {
>  	REQ_F_COMPLETE_INLINE_BIT,
>  	REQ_F_REISSUE_BIT,
>  	REQ_F_DONT_REISSUE_BIT,
> +	REQ_F_POLL_UPDATE_BIT,
>  	/* keep async read/write and isreg together and in order */
>  	REQ_F_ASYNC_READ_BIT,
>  	REQ_F_ASYNC_WRITE_BIT,
> @@ -762,6 +764,8 @@ enum {
>  	REQ_F_REISSUE		= BIT(REQ_F_REISSUE_BIT),
>  	/* don't attempt request reissue, see io_rw_reissue() */
>  	REQ_F_DONT_REISSUE	= BIT(REQ_F_DONT_REISSUE_BIT),
> +	/* switches between poll and poll update */
> +	REQ_F_POLL_UPDATE	= BIT(REQ_F_POLL_UPDATE_BIT),
>  	/* supports async reads */
>  	REQ_F_ASYNC_READ	= BIT(REQ_F_ASYNC_READ_BIT),
>  	/* supports async writes */
> @@ -791,6 +795,7 @@ struct io_kiocb {
>  		struct file		*file;
>  		struct io_rw		rw;
>  		struct io_poll_iocb	poll;
> +		struct io_poll_update	poll_update;
>  		struct io_poll_remove	poll_remove;
>  		struct io_accept	accept;
>  		struct io_sync		sync;
> @@ -4989,7 +4994,6 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
>  	poll->head = NULL;
>  	poll->done = false;
>  	poll->canceled = false;
> -	poll->update_events = poll->update_user_data = false;
>  #define IO_POLL_UNMASK	(EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
>  	/* mask in events that we always want/need */
>  	poll->events = events | IO_POLL_UNMASK;
> @@ -5366,7 +5370,6 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
>  
>  static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  {
> -	struct io_poll_iocb *poll = &req->poll;
>  	u32 events, flags;
>  
>  	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
> @@ -5383,20 +5386,26 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
>  #endif
>  	if (!(flags & IORING_POLL_ADD_MULTI))
>  		events |= EPOLLONESHOT;
> -	poll->update_events = poll->update_user_data = false;
> +	events = demangle_poll(events) |
> +				(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
>  
>  	if (flags & (IORING_POLL_UPDATE_EVENTS|IORING_POLL_UPDATE_USER_DATA)) {
> -		poll->old_user_data = READ_ONCE(sqe->addr);
> -		poll->update_events = flags & IORING_POLL_UPDATE_EVENTS;
> -		poll->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
> -		if (poll->update_user_data)
> -			poll->new_user_data = READ_ONCE(sqe->off);
> +		struct io_poll_update *poll_upd = &req->poll_update;
> +
> +		req->flags |= REQ_F_POLL_UPDATE;
> +		poll_upd->events = events;
> +		poll_upd->old_user_data = READ_ONCE(sqe->addr);
> +		poll_upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
> +		poll_upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
> +		if (poll_upd->update_user_data)
> +			poll_upd->new_user_data = READ_ONCE(sqe->off);
>  	} else {
> +		struct io_poll_iocb *poll = &req->poll;
> +
> +		poll->events = events;
>  		if (sqe->off || sqe->addr)
>  			return -EINVAL;
>  	}
> -	poll->events = demangle_poll(events) |
> -				(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
>  	return 0;
>  }
>  
> @@ -5434,7 +5443,7 @@ static int io_poll_update(struct io_kiocb *req)
>  	int ret;
>  
>  	spin_lock_irq(&ctx->completion_lock);
> -	preq = io_poll_find(ctx, req->poll.old_user_data);
> +	preq = io_poll_find(ctx, req->poll_update.old_user_data);
>  	if (!preq) {
>  		ret = -ENOENT;
>  		goto err;
> @@ -5464,13 +5473,13 @@ static int io_poll_update(struct io_kiocb *req)
>  		return 0;
>  	}
>  	/* only mask one event flags, keep behavior flags */
> -	if (req->poll.update_events) {
> +	if (req->poll_update.update_events) {
>  		preq->poll.events &= ~0xffff;
> -		preq->poll.events |= req->poll.events & 0xffff;
> +		preq->poll.events |= req->poll_update.events & 0xffff;
>  		preq->poll.events |= IO_POLL_UNMASK;
>  	}
> -	if (req->poll.update_user_data)
> -		preq->user_data = req->poll.new_user_data;
> +	if (req->poll_update.update_user_data)
> +		preq->user_data = req->poll_update.new_user_data;
>  
>  	spin_unlock_irq(&ctx->completion_lock);
>  
> @@ -5489,7 +5498,7 @@ static int io_poll_update(struct io_kiocb *req)
>  
>  static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
>  {
> -	if (!req->poll.update_events && !req->poll.update_user_data)
> +	if (!(req->flags & REQ_F_POLL_UPDATE))
>  		return __io_poll_add(req);
>  	return io_poll_update(req);
>  }
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-04-13 17:19 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
2021-04-13  1:58 ` [PATCH 1/9] io_uring: fix leaking reg files on exit Pavel Begunkov
2021-04-13  1:58 ` [PATCH 2/9] io_uring: fix uninit old data for poll event upd Pavel Begunkov
2021-04-13  1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
2021-04-13 17:14   ` Pavel Begunkov
2021-04-13  1:58 ` [PATCH 4/9] io_uring: add timeout completion_lock annotation Pavel Begunkov
2021-04-13  1:58 ` [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses Pavel Begunkov
2021-04-13  1:58 ` [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs() Pavel Begunkov
2021-04-13  1:58 ` [PATCH 7/9] io_uring: don't fail overflow on in_idle Pavel Begunkov
2021-04-13  1:58 ` [PATCH 8/9] io_uring: skip futile iopoll iterations Pavel Begunkov
2021-04-13  1:58 ` [PATCH 9/9] io_uring: inline io_iopoll_getevents() Pavel Begunkov
2021-04-13 15:38 ` [PATCH 5.13 0/9] another 5.13 pack Jens Axboe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.