IO-Uring Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/6] 5.9 small cleanups/fixes
@ 2020-07-30 15:43 Pavel Begunkov
  2020-07-30 15:43 ` [PATCH 1/6] io_uring: de-unionise io_kiocb Pavel Begunkov
                   ` (6 more replies)
  0 siblings, 7 replies; 13+ messages in thread
From: Pavel Begunkov @ 2020-07-30 15:43 UTC (permalink / raw)
  To: Jens Axboe, io-uring

[1/1] takes apart the union, too much trouble and there is no reason
left for keeping it. Probably for 5.10 we can reshuffle the layout as
discussed.

Pavel Begunkov (6):
  io_uring: de-unionise io_kiocb
  io_uring: deduplicate __io_complete_rw()
  io_uring: fix racy overflow count reporting
  io_uring: fix stalled deferred requests
  io_uring: consolidate *_check_overflow accounting
  io_uring: get rid of atomic FAA for cq_timeouts

 fs/io_uring.c | 100 ++++++++++++++++++--------------------------------
 1 file changed, 36 insertions(+), 64 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/6] io_uring: de-unionise io_kiocb
  2020-07-30 15:43 [PATCH 0/6] 5.9 small cleanups/fixes Pavel Begunkov
@ 2020-07-30 15:43 ` Pavel Begunkov
  2020-07-30 15:43 ` [PATCH 2/6] io_uring: deduplicate __io_complete_rw() Pavel Begunkov
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Pavel Begunkov @ 2020-07-30 15:43 UTC (permalink / raw)
  To: Jens Axboe, io-uring

As io_kiocb have enough space, move ->work out of a union. It's safer
this way and removes ->work memcpy bouncing.
By the way make tabulation in struct io_kiocb consistent.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 59 ++++++++++++---------------------------------------
 1 file changed, 14 insertions(+), 45 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3e406bc1f855..86ec5669fe50 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -600,7 +600,6 @@ enum {
 struct async_poll {
 	struct io_poll_iocb	poll;
 	struct io_poll_iocb	*double_poll;
-	struct io_wq_work	work;
 };
 
 /*
@@ -641,36 +640,26 @@ struct io_kiocb {
 	u16				buf_index;
 	u32				result;
 
-	struct io_ring_ctx	*ctx;
-	unsigned int		flags;
-	refcount_t		refs;
-	struct task_struct	*task;
-	u64			user_data;
+	struct io_ring_ctx		*ctx;
+	unsigned int			flags;
+	refcount_t			refs;
+	struct task_struct		*task;
+	u64				user_data;
 
-	struct list_head	link_list;
+	struct list_head		link_list;
 
 	/*
 	 * 1. used with ctx->iopoll_list with reads/writes
 	 * 2. to track reqs with ->files (see io_op_def::file_table)
 	 */
-	struct list_head	inflight_entry;
-
-	struct percpu_ref	*fixed_file_refs;
-
-	union {
-		/*
-		 * Only commands that never go async can use the below fields,
-		 * obviously. Right now only IORING_OP_POLL_ADD uses them, and
-		 * async armed poll handlers for regular commands. The latter
-		 * restore the work, if needed.
-		 */
-		struct {
-			struct hlist_node	hash_node;
-			struct async_poll	*apoll;
-		};
-		struct io_wq_work	work;
-	};
-	struct callback_head	task_work;
+	struct list_head		inflight_entry;
+
+	struct percpu_ref		*fixed_file_refs;
+	struct callback_head		task_work;
+	/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
+	struct hlist_node		hash_node;
+	struct async_poll		*apoll;
+	struct io_wq_work		work;
 };
 
 struct io_defer_entry {
@@ -4668,10 +4657,6 @@ static void io_async_task_func(struct callback_head *cb)
 	io_poll_remove_double(req, apoll->double_poll);
 	spin_unlock_irq(&ctx->completion_lock);
 
-	/* restore ->work in case we need to retry again */
-	if (req->flags & REQ_F_WORK_INITIALIZED)
-		memcpy(&req->work, &apoll->work, sizeof(req->work));
-
 	if (!READ_ONCE(apoll->poll.canceled))
 		__io_req_task_submit(req);
 	else
@@ -4763,9 +4748,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 	apoll->double_poll = NULL;
 
 	req->flags |= REQ_F_POLLED;
-	if (req->flags & REQ_F_WORK_INITIALIZED)
-		memcpy(&apoll->work, &req->work, sizeof(req->work));
-
 	io_get_req_task(req);
 	req->apoll = apoll;
 	INIT_HLIST_NODE(&req->hash_node);
@@ -4784,8 +4766,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
 	if (ret) {
 		io_poll_remove_double(req, apoll->double_poll);
 		spin_unlock_irq(&ctx->completion_lock);
-		if (req->flags & REQ_F_WORK_INITIALIZED)
-			memcpy(&req->work, &apoll->work, sizeof(req->work));
 		kfree(apoll->double_poll);
 		kfree(apoll);
 		return false;
@@ -4828,14 +4808,6 @@ static bool io_poll_remove_one(struct io_kiocb *req)
 		do_complete = __io_poll_remove_one(req, &apoll->poll);
 		if (do_complete) {
 			io_put_req(req);
-			/*
-			 * restore ->work because we will call
-			 * io_req_clean_work below when dropping the
-			 * final reference.
-			 */
-			if (req->flags & REQ_F_WORK_INITIALIZED)
-				memcpy(&req->work, &apoll->work,
-				       sizeof(req->work));
 			kfree(apoll->double_poll);
 			kfree(apoll);
 		}
@@ -4969,9 +4941,6 @@ static int io_poll_add(struct io_kiocb *req)
 	struct io_poll_table ipt;
 	__poll_t mask;
 
-	/* ->work is in union with hash_node and others */
-	io_req_clean_work(req);
-
 	INIT_HLIST_NODE(&req->hash_node);
 	ipt.pt._qproc = io_poll_queue_proc;
 
-- 
2.24.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/6] io_uring: deduplicate __io_complete_rw()
  2020-07-30 15:43 [PATCH 0/6] 5.9 small cleanups/fixes Pavel Begunkov
  2020-07-30 15:43 ` [PATCH 1/6] io_uring: de-unionise io_kiocb Pavel Begunkov
@ 2020-07-30 15:43 ` Pavel Begunkov
  2020-07-30 15:43 ` [PATCH 3/6] io_uring: fix racy overflow count reporting Pavel Begunkov
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Pavel Begunkov @ 2020-07-30 15:43 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Call __io_complete_rw() in io_iopoll_queue() instead of hand coding it.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 86ec5669fe50..11f4ab87e08f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -891,7 +891,8 @@ enum io_mem_account {
 	ACCT_PINNED,
 };
 
-static bool io_rw_reissue(struct io_kiocb *req, long res);
+static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
+			     struct io_comp_state *cs);
 static void io_cqring_fill_event(struct io_kiocb *req, long res);
 static void io_put_req(struct io_kiocb *req);
 static void io_double_put_req(struct io_kiocb *req);
@@ -902,8 +903,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 				 struct io_uring_files_update *ip,
 				 unsigned nr_args);
 static int io_prep_work_files(struct io_kiocb *req);
-static void io_complete_rw_common(struct kiocb *kiocb, long res,
-				  struct io_comp_state *cs);
 static void __io_clean_op(struct io_kiocb *req);
 static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
 		       int fd, struct file **out_file, bool fixed);
@@ -1976,8 +1975,7 @@ static void io_iopoll_queue(struct list_head *again)
 	do {
 		req = list_first_entry(again, struct io_kiocb, inflight_entry);
 		list_del(&req->inflight_entry);
-		if (!io_rw_reissue(req, -EAGAIN))
-			io_complete_rw_common(&req->rw.kiocb, -EAGAIN, NULL);
+		__io_complete_rw(req, -EAGAIN, 0, NULL);
 	} while (!list_empty(again));
 }
 
-- 
2.24.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 3/6] io_uring: fix racy overflow count reporting
  2020-07-30 15:43 [PATCH 0/6] 5.9 small cleanups/fixes Pavel Begunkov
  2020-07-30 15:43 ` [PATCH 1/6] io_uring: de-unionise io_kiocb Pavel Begunkov
  2020-07-30 15:43 ` [PATCH 2/6] io_uring: deduplicate __io_complete_rw() Pavel Begunkov
@ 2020-07-30 15:43 ` Pavel Begunkov
  2020-07-30 17:18   ` Jens Axboe
  2020-07-30 15:43 ` [PATCH 4/6] io_uring: fix stalled deferred requests Pavel Begunkov
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 13+ messages in thread
From: Pavel Begunkov @ 2020-07-30 15:43 UTC (permalink / raw)
  To: Jens Axboe, io-uring

All ->cq_overflow modifications should be under completion_lock,
otherwise it can report a wrong number to the userspace. Fix it in
io_uring_cancel_files().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 11f4ab87e08f..6e2322525da6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7847,10 +7847,9 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
 				clear_bit(0, &ctx->cq_check_overflow);
 				ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
 			}
-			spin_unlock_irq(&ctx->completion_lock);
-
 			WRITE_ONCE(ctx->rings->cq_overflow,
 				atomic_inc_return(&ctx->cached_cq_overflow));
+			spin_unlock_irq(&ctx->completion_lock);
 
 			/*
 			 * Put inflight ref and overflow ref. If that's
-- 
2.24.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 4/6] io_uring: fix stalled deferred requests
  2020-07-30 15:43 [PATCH 0/6] 5.9 small cleanups/fixes Pavel Begunkov
                   ` (2 preceding siblings ...)
  2020-07-30 15:43 ` [PATCH 3/6] io_uring: fix racy overflow count reporting Pavel Begunkov
@ 2020-07-30 15:43 ` Pavel Begunkov
  2020-07-30 15:43 ` [PATCH 5/6] io_uring: consolidate *_check_overflow accounting Pavel Begunkov
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 13+ messages in thread
From: Pavel Begunkov @ 2020-07-30 15:43 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Always do io_commit_cqring() after completing a request, even if it was
accounted as overflowed on the CQ side. Failing to do that may lead to
not to pushing deferred requests when needed, and so stalling the whole
ring.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6e2322525da6..11c1abe8bd1a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7849,6 +7849,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
 			}
 			WRITE_ONCE(ctx->rings->cq_overflow,
 				atomic_inc_return(&ctx->cached_cq_overflow));
+			io_commit_cqring(ctx);
 			spin_unlock_irq(&ctx->completion_lock);
 
 			/*
-- 
2.24.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 5/6] io_uring: consolidate *_check_overflow accounting
  2020-07-30 15:43 [PATCH 0/6] 5.9 small cleanups/fixes Pavel Begunkov
                   ` (3 preceding siblings ...)
  2020-07-30 15:43 ` [PATCH 4/6] io_uring: fix stalled deferred requests Pavel Begunkov
@ 2020-07-30 15:43 ` Pavel Begunkov
  2020-07-30 15:43 ` [PATCH 6/6] io_uring: get rid of atomic FAA for cq_timeouts Pavel Begunkov
  2020-07-30 18:08 ` [PATCH 0/6] 5.9 small cleanups/fixes Jens Axboe
  6 siblings, 0 replies; 13+ messages in thread
From: Pavel Begunkov @ 2020-07-30 15:43 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Add a helper to mark ctx->{cq,sq}_check_overflow to get rid of
duplicates, and it's clearer to check cq_overflow_list directly anyway.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 11c1abe8bd1a..efec290c6b08 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1303,6 +1303,15 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
 		eventfd_signal(ctx->cq_ev_fd, 1);
 }
 
+static void io_cqring_mark_overflow(struct io_ring_ctx *ctx)
+{
+	if (list_empty(&ctx->cq_overflow_list)) {
+		clear_bit(0, &ctx->sq_check_overflow);
+		clear_bit(0, &ctx->cq_check_overflow);
+		ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
+	}
+}
+
 /* Returns true if there are no backlogged entries after the flush */
 static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 {
@@ -1347,11 +1356,8 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
 	}
 
 	io_commit_cqring(ctx);
-	if (cqe) {
-		clear_bit(0, &ctx->sq_check_overflow);
-		clear_bit(0, &ctx->cq_check_overflow);
-		ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
-	}
+	io_cqring_mark_overflow(ctx);
+
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 	io_cqring_ev_posted(ctx);
 
@@ -7842,11 +7848,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
 			spin_lock_irq(&ctx->completion_lock);
 			list_del(&cancel_req->compl.list);
 			cancel_req->flags &= ~REQ_F_OVERFLOW;
-			if (list_empty(&ctx->cq_overflow_list)) {
-				clear_bit(0, &ctx->sq_check_overflow);
-				clear_bit(0, &ctx->cq_check_overflow);
-				ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
-			}
+
+			io_cqring_mark_overflow(ctx);
 			WRITE_ONCE(ctx->rings->cq_overflow,
 				atomic_inc_return(&ctx->cached_cq_overflow));
 			io_commit_cqring(ctx);
-- 
2.24.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 6/6] io_uring: get rid of atomic FAA for cq_timeouts
  2020-07-30 15:43 [PATCH 0/6] 5.9 small cleanups/fixes Pavel Begunkov
                   ` (4 preceding siblings ...)
  2020-07-30 15:43 ` [PATCH 5/6] io_uring: consolidate *_check_overflow accounting Pavel Begunkov
@ 2020-07-30 15:43 ` Pavel Begunkov
  2020-07-30 18:08 ` [PATCH 0/6] 5.9 small cleanups/fixes Jens Axboe
  6 siblings, 0 replies; 13+ messages in thread
From: Pavel Begunkov @ 2020-07-30 15:43 UTC (permalink / raw)
  To: Jens Axboe, io-uring

If ->cq_timeouts modifications are done under ->completion_lock, we
don't really nee any fetch-and-add and other complex atomics. Replace it
with non-atomic FAA, that saves an implicit full memory barrier.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 fs/io_uring.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index efec290c6b08..fabf0b692384 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1205,7 +1205,8 @@ static void io_kill_timeout(struct io_kiocb *req)
 
 	ret = hrtimer_try_to_cancel(&req->io->timeout.timer);
 	if (ret != -1) {
-		atomic_inc(&req->ctx->cq_timeouts);
+		atomic_set(&req->ctx->cq_timeouts,
+			atomic_read(&req->ctx->cq_timeouts) + 1);
 		list_del_init(&req->timeout.list);
 		req->flags |= REQ_F_COMP_LOCKED;
 		io_cqring_fill_event(req, 0);
@@ -4972,9 +4973,10 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
 	struct io_ring_ctx *ctx = req->ctx;
 	unsigned long flags;
 
-	atomic_inc(&ctx->cq_timeouts);
-
 	spin_lock_irqsave(&ctx->completion_lock, flags);
+	atomic_set(&req->ctx->cq_timeouts,
+		atomic_read(&req->ctx->cq_timeouts) + 1);
+
 	/*
 	 * We could be racing with timeout deletion. If the list is empty,
 	 * then timeout lookup already found it and will be handling it.
-- 
2.24.0


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/6] io_uring: fix racy overflow count reporting
  2020-07-30 15:43 ` [PATCH 3/6] io_uring: fix racy overflow count reporting Pavel Begunkov
@ 2020-07-30 17:18   ` Jens Axboe
  2020-07-30 17:33     ` Pavel Begunkov
  0 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2020-07-30 17:18 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/30/20 9:43 AM, Pavel Begunkov wrote:
> All ->cq_overflow modifications should be under completion_lock,
> otherwise it can report a wrong number to the userspace. Fix it in
> io_uring_cancel_files().
> 
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
>  fs/io_uring.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 11f4ab87e08f..6e2322525da6 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -7847,10 +7847,9 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
>  				clear_bit(0, &ctx->cq_check_overflow);
>  				ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
>  			}
> -			spin_unlock_irq(&ctx->completion_lock);
> -
>  			WRITE_ONCE(ctx->rings->cq_overflow,
>  				atomic_inc_return(&ctx->cached_cq_overflow));
> +			spin_unlock_irq(&ctx->completion_lock);

Torn writes? Not sure I see what the issue here, can you expand?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/6] io_uring: fix racy overflow count reporting
  2020-07-30 17:18   ` Jens Axboe
@ 2020-07-30 17:33     ` Pavel Begunkov
  2020-07-30 17:41       ` Jens Axboe
  0 siblings, 1 reply; 13+ messages in thread
From: Pavel Begunkov @ 2020-07-30 17:33 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 30/07/2020 20:18, Jens Axboe wrote:
> On 7/30/20 9:43 AM, Pavel Begunkov wrote:
>> All ->cq_overflow modifications should be under completion_lock,
>> otherwise it can report a wrong number to the userspace. Fix it in
>> io_uring_cancel_files().
>>
>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>> ---
>>  fs/io_uring.c | 3 +--
>>  1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 11f4ab87e08f..6e2322525da6 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -7847,10 +7847,9 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
>>  				clear_bit(0, &ctx->cq_check_overflow);
>>  				ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
>>  			}
>> -			spin_unlock_irq(&ctx->completion_lock);
>> -
>>  			WRITE_ONCE(ctx->rings->cq_overflow,
>>  				atomic_inc_return(&ctx->cached_cq_overflow));
>> +			spin_unlock_irq(&ctx->completion_lock);
> 
> Torn writes? Not sure I see what the issue here, can you expand?

No, just off-by-one(many). E.g.

let: cached_overflow = 0;

        CPU 1                   |               CPU 2
====================================================================
t = ++cached_overflow // t == 1 |
                                | t2 = ++cached_overflow // t2 == 2
                                | WRITE_ONCE(cq_overflow, t2)
WRITE_ONCE(cq_overflow, t1) 	|


So, ctx->rings->cq_overflow == 1, but ctx->cached_cq_overflow == 2.
A minor problem and easy to fix.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/6] io_uring: fix racy overflow count reporting
  2020-07-30 17:33     ` Pavel Begunkov
@ 2020-07-30 17:41       ` Jens Axboe
  0 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-07-30 17:41 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/30/20 11:33 AM, Pavel Begunkov wrote:
> On 30/07/2020 20:18, Jens Axboe wrote:
>> On 7/30/20 9:43 AM, Pavel Begunkov wrote:
>>> All ->cq_overflow modifications should be under completion_lock,
>>> otherwise it can report a wrong number to the userspace. Fix it in
>>> io_uring_cancel_files().
>>>
>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>> ---
>>>  fs/io_uring.c | 3 +--
>>>  1 file changed, 1 insertion(+), 2 deletions(-)
>>>
>>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>>> index 11f4ab87e08f..6e2322525da6 100644
>>> --- a/fs/io_uring.c
>>> +++ b/fs/io_uring.c
>>> @@ -7847,10 +7847,9 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
>>>  				clear_bit(0, &ctx->cq_check_overflow);
>>>  				ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
>>>  			}
>>> -			spin_unlock_irq(&ctx->completion_lock);
>>> -
>>>  			WRITE_ONCE(ctx->rings->cq_overflow,
>>>  				atomic_inc_return(&ctx->cached_cq_overflow));
>>> +			spin_unlock_irq(&ctx->completion_lock);
>>
>> Torn writes? Not sure I see what the issue here, can you expand?
> 
> No, just off-by-one(many). E.g.
> 
> let: cached_overflow = 0;
> 
>         CPU 1                   |               CPU 2
> ====================================================================
> t = ++cached_overflow // t == 1 |
>                                 | t2 = ++cached_overflow // t2 == 2
>                                 | WRITE_ONCE(cq_overflow, t2)
> WRITE_ONCE(cq_overflow, t1) 	|
> 
> 
> So, ctx->rings->cq_overflow == 1, but ctx->cached_cq_overflow == 2.
> A minor problem and easy to fix.

Ah yes, good point.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/6] 5.9 small cleanups/fixes
  2020-07-30 15:43 [PATCH 0/6] 5.9 small cleanups/fixes Pavel Begunkov
                   ` (5 preceding siblings ...)
  2020-07-30 15:43 ` [PATCH 6/6] io_uring: get rid of atomic FAA for cq_timeouts Pavel Begunkov
@ 2020-07-30 18:08 ` Jens Axboe
  2020-07-31 14:28   ` Pavel Begunkov
  6 siblings, 1 reply; 13+ messages in thread
From: Jens Axboe @ 2020-07-30 18:08 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/30/20 9:43 AM, Pavel Begunkov wrote:
> [1/1] takes apart the union, too much trouble and there is no reason
> left for keeping it. Probably for 5.10 we can reshuffle the layout as
> discussed.

Let's hope so, because I do think this is the safest option, but it does
incur a 5% drop for me.

We could probably make this work for 5.9 as well, depending on if there
is an -rc8 or not. Seems like there will be, hence we're a week and a
half out from the merge window.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/6] 5.9 small cleanups/fixes
  2020-07-30 18:08 ` [PATCH 0/6] 5.9 small cleanups/fixes Jens Axboe
@ 2020-07-31 14:28   ` Pavel Begunkov
  2020-07-31 15:10     ` Jens Axboe
  0 siblings, 1 reply; 13+ messages in thread
From: Pavel Begunkov @ 2020-07-31 14:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 30/07/2020 21:08, Jens Axboe wrote:
> On 7/30/20 9:43 AM, Pavel Begunkov wrote:
>> [1/1] takes apart the union, too much trouble and there is no reason
>> left for keeping it. Probably for 5.10 we can reshuffle the layout as
>> discussed.
> 
> Let's hope so, because I do think this is the safest option, but it does
> incur a 5% drop for me.

Hmm, any ideas why? If your test doesn't use neither apoll/poll/io-wq,
then it looks like either something related to slab (e.g. partial zeroing),
or magic with @task_work moved from 4th to 3rd cacheline.

> 
> We could probably make this work for 5.9 as well, depending on if there
> is an -rc8 or not. Seems like there will be, hence we're a week and a
> half out from the merge window.
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/6] 5.9 small cleanups/fixes
  2020-07-31 14:28   ` Pavel Begunkov
@ 2020-07-31 15:10     ` Jens Axboe
  0 siblings, 0 replies; 13+ messages in thread
From: Jens Axboe @ 2020-07-31 15:10 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/31/20 8:28 AM, Pavel Begunkov wrote:
> On 30/07/2020 21:08, Jens Axboe wrote:
>> On 7/30/20 9:43 AM, Pavel Begunkov wrote:
>>> [1/1] takes apart the union, too much trouble and there is no reason
>>> left for keeping it. Probably for 5.10 we can reshuffle the layout as
>>> discussed.
>>
>> Let's hope so, because I do think this is the safest option, but it does
>> incur a 5% drop for me.
> 
> Hmm, any ideas why? If your test doesn't use neither apoll/poll/io-wq,
> then it looks like either something related to slab (e.g. partial zeroing),
> or magic with @task_work moved from 4th to 3rd cacheline.

Was looking into it just now, but I think it was just a bad test run. Checked
out and ran a new test, and then just with #1 applied which would be the only
one that should impact things. And it's pretty close, so I'm going to call
it a wash.

So nothing to worry about I think.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, back to index

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-30 15:43 [PATCH 0/6] 5.9 small cleanups/fixes Pavel Begunkov
2020-07-30 15:43 ` [PATCH 1/6] io_uring: de-unionise io_kiocb Pavel Begunkov
2020-07-30 15:43 ` [PATCH 2/6] io_uring: deduplicate __io_complete_rw() Pavel Begunkov
2020-07-30 15:43 ` [PATCH 3/6] io_uring: fix racy overflow count reporting Pavel Begunkov
2020-07-30 17:18   ` Jens Axboe
2020-07-30 17:33     ` Pavel Begunkov
2020-07-30 17:41       ` Jens Axboe
2020-07-30 15:43 ` [PATCH 4/6] io_uring: fix stalled deferred requests Pavel Begunkov
2020-07-30 15:43 ` [PATCH 5/6] io_uring: consolidate *_check_overflow accounting Pavel Begunkov
2020-07-30 15:43 ` [PATCH 6/6] io_uring: get rid of atomic FAA for cq_timeouts Pavel Begunkov
2020-07-30 18:08 ` [PATCH 0/6] 5.9 small cleanups/fixes Jens Axboe
2020-07-31 14:28   ` Pavel Begunkov
2020-07-31 15:10     ` Jens Axboe

IO-Uring Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/io-uring/0 io-uring/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 io-uring io-uring/ https://lore.kernel.org/io-uring \
		io-uring@vger.kernel.org
	public-inbox-index io-uring

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.io-uring


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git