io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/1] io_uring: fix skipping of old timeout events
@ 2021-01-14 15:50 Marcelo Diop-Gonzalez
  2021-01-14 15:50 ` [PATCH v3 1/1] io_uring: flush timeouts that should already have expired Marcelo Diop-Gonzalez
  2021-01-14 21:42 ` [PATCH v3 0/1] io_uring: fix skipping of old timeout events Pavel Begunkov
  0 siblings, 2 replies; 6+ messages in thread
From: Marcelo Diop-Gonzalez @ 2021-01-14 15:50 UTC (permalink / raw)
  To: axboe, asml.silence; +Cc: io-uring, Marcelo Diop-Gonzalez

This patch tries to fix a problem with IORING_OP_TIMEOUT events
not being flushed if they should already have expired. The test below
hangs before this change (unless you run with $ ./a.out ~/somefile 1):

#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include <liburing.h>

int main(int argc, char **argv) {
	if (argc < 2)
		return 1;

	int fd = open(argv[1], O_RDONLY);
	if (fd < 0) {
		perror("open");
		return 1;
	}

	struct io_uring ring;
	io_uring_queue_init(4, &ring, 0);

	struct io_uring_sqe *sqe = io_uring_get_sqe(&ring);

	struct __kernel_timespec ts = { .tv_sec = 9999999 };
	io_uring_prep_timeout(sqe, &ts, 1, 0);
	sqe->user_data = 123;
	int ret = io_uring_submit(&ring);
	if (ret < 0) {
		fprintf(stderr, "submit(timeout_sqe): %d\n", ret);
		return 1;
	}

	int n = 2;
	if (argc > 2)
		n = atoi(argv[2]);

	char buf;
	for (int i = 0; i < n; i++) {
		sqe = io_uring_get_sqe(&ring);
		if (!sqe) {
			fprintf(stderr, "too many\n");
			exit(1);
		}
		io_uring_prep_read(sqe, fd, &buf, 1, 0);
	}
	ret = io_uring_submit(&ring);
	if (ret < 0) {
		fprintf(stderr, "submit(read_sqe): %d\n", ret);
		exit(1);
	}

	struct io_uring_cqe *cqe;
	for (int i = 0; i < n+1; i++) {
		struct io_uring_cqe *cqe;
		int ret = io_uring_wait_cqe(&ring, &cqe);
		if (ret < 0) {
			fprintf(stderr, "wait_cqe(): %d\n", ret);
			return 1;
		}
		if (cqe->user_data == 123)
			printf("timeout found\n");
		io_uring_cqe_seen(&ring, cqe);
	}
}

v3: Add ->last_flush member to handle more overflow issues
v2: Properly handle u32 overflow issues

Marcelo Diop-Gonzalez (1):
  io_uring: flush timeouts that should already have expired

 fs/io_uring.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v3 1/1] io_uring: flush timeouts that should already have expired
  2021-01-14 15:50 [PATCH v3 0/1] io_uring: fix skipping of old timeout events Marcelo Diop-Gonzalez
@ 2021-01-14 15:50 ` Marcelo Diop-Gonzalez
  2021-01-14 21:40   ` Pavel Begunkov
  2021-01-14 21:42 ` [PATCH v3 0/1] io_uring: fix skipping of old timeout events Pavel Begunkov
  1 sibling, 1 reply; 6+ messages in thread
From: Marcelo Diop-Gonzalez @ 2021-01-14 15:50 UTC (permalink / raw)
  To: axboe, asml.silence; +Cc: io-uring, Marcelo Diop-Gonzalez

Right now io_flush_timeouts() checks if the current number of events
is equal to ->timeout.target_seq, but this will miss some timeouts if
there have been more than 1 event added since the last time they were
flushed (possible in io_submit_flush_completions(), for example). Fix
it by recording the last sequence at which timeouts were flushed so
that the number of events seen can be compared to the number of events
needed without overflow.

Signed-off-by: Marcelo Diop-Gonzalez <marcelo827@gmail.com>
---
 fs/io_uring.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 372be9caf340..71d8fa0733ad 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -354,6 +354,7 @@ struct io_ring_ctx {
 		unsigned		cq_entries;
 		unsigned		cq_mask;
 		atomic_t		cq_timeouts;
+		unsigned		cq_last_tm_flush;
 		unsigned long		cq_check_overflow;
 		struct wait_queue_head	cq_wait;
 		struct fasync_struct	*cq_fasync;
@@ -1639,19 +1640,36 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
 
 static void io_flush_timeouts(struct io_ring_ctx *ctx)
 {
-	while (!list_empty(&ctx->timeout_list)) {
+	u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+
+	if (list_empty(&ctx->timeout_list))
+		return;
+
+	do {
+		u32 events_needed, events_got;
 		struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
 						struct io_kiocb, timeout.list);
 
 		if (io_is_timeout_noseq(req))
 			break;
-		if (req->timeout.target_seq != ctx->cached_cq_tail
-					- atomic_read(&ctx->cq_timeouts))
+
+		/*
+		 * Since seq can easily wrap around over time, subtract
+		 * the last seq at which timeouts were flushed before comparing.
+		 * Assuming not more than 2^31-1 events have happened since,
+		 * these subtractions won't have wrapped, so we can check if
+		 * target is in [last_seq, current_seq] by comparing the two.
+		 */
+		events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush;
+		events_got = seq - ctx->cq_last_tm_flush;
+		if (events_got < events_needed)
 			break;
 
 		list_del_init(&req->timeout.list);
 		io_kill_timeout(req);
-	}
+	} while (!list_empty(&ctx->timeout_list));
+
+	ctx->cq_last_tm_flush = seq;
 }
 
 static void io_commit_cqring(struct io_ring_ctx *ctx)
@@ -5837,6 +5855,9 @@ static int io_timeout(struct io_kiocb *req)
 	tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
 	req->timeout.target_seq = tail + off;
 
+	/* Update the last seq here in case io_flush_timeouts() hasn't */
+	ctx->cq_last_tm_flush = tail;
+
 	/*
 	 * Insertion sort, ensuring the first entry in the list is always
 	 * the one we need first.
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 1/1] io_uring: flush timeouts that should already have expired
  2021-01-14 15:50 ` [PATCH v3 1/1] io_uring: flush timeouts that should already have expired Marcelo Diop-Gonzalez
@ 2021-01-14 21:40   ` Pavel Begunkov
  2021-01-15 14:45     ` Pavel Begunkov
  0 siblings, 1 reply; 6+ messages in thread
From: Pavel Begunkov @ 2021-01-14 21:40 UTC (permalink / raw)
  To: Marcelo Diop-Gonzalez, axboe; +Cc: io-uring

On 14/01/2021 15:50, Marcelo Diop-Gonzalez wrote:
> Right now io_flush_timeouts() checks if the current number of events
> is equal to ->timeout.target_seq, but this will miss some timeouts if
> there have been more than 1 event added since the last time they were
> flushed (possible in io_submit_flush_completions(), for example). Fix
> it by recording the last sequence at which timeouts were flushed so
> that the number of events seen can be compared to the number of events
> needed without overflow.

Looks good, but there is a little change I'll ask you to make (see
below). In a meanwhile I'll test it, so the patch on the fast track.

> 
> Signed-off-by: Marcelo Diop-Gonzalez <marcelo827@gmail.com>
> ---
>  fs/io_uring.c | 29 +++++++++++++++++++++++++----
>  1 file changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 372be9caf340..71d8fa0733ad 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -354,6 +354,7 @@ struct io_ring_ctx {
>  		unsigned		cq_entries;
>  		unsigned		cq_mask;
>  		atomic_t		cq_timeouts;
> +		unsigned		cq_last_tm_flush;
>  		unsigned long		cq_check_overflow;
>  		struct wait_queue_head	cq_wait;
>  		struct fasync_struct	*cq_fasync;
> @@ -1639,19 +1640,36 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
>  
>  static void io_flush_timeouts(struct io_ring_ctx *ctx)
>  {
> -	while (!list_empty(&ctx->timeout_list)) {
> +	u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);

This assignment should go after list_empty() -- because of the atomic part
my compiler can't reshuffle them itself.

> +
> +	if (list_empty(&ctx->timeout_list))
> +		return;
[...]
>  static void io_commit_cqring(struct io_ring_ctx *ctx)
> @@ -5837,6 +5855,9 @@ static int io_timeout(struct io_kiocb *req)
>  	tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
>  	req->timeout.target_seq = tail + off;
>  
> +	/* Update the last seq here in case io_flush_timeouts() hasn't */
> +	ctx->cq_last_tm_flush = tail;

Have to note that it's ok to do because we don't mix submissions and
completions, so io_timeout should never fall under same completion_lock
section as cq commit,

but otherwise some future locked version of io_timeout would be cutting
off a part of the current flush window (i.e. this [last, cur] thing).

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 0/1] io_uring: fix skipping of old timeout events
  2021-01-14 15:50 [PATCH v3 0/1] io_uring: fix skipping of old timeout events Marcelo Diop-Gonzalez
  2021-01-14 15:50 ` [PATCH v3 1/1] io_uring: flush timeouts that should already have expired Marcelo Diop-Gonzalez
@ 2021-01-14 21:42 ` Pavel Begunkov
  2021-01-15 16:37   ` Marcelo Diop-Gonzalez
  1 sibling, 1 reply; 6+ messages in thread
From: Pavel Begunkov @ 2021-01-14 21:42 UTC (permalink / raw)
  To: Marcelo Diop-Gonzalez, axboe; +Cc: io-uring

On 14/01/2021 15:50, Marcelo Diop-Gonzalez wrote:
> This patch tries to fix a problem with IORING_OP_TIMEOUT events
> not being flushed if they should already have expired. The test below
> hangs before this change (unless you run with $ ./a.out ~/somefile 1):

How sending it as a liburing test?

BTW, there was a test before triggering this issue but was shut off
with "return 0" at some point, but that's not for sure.

> 
> #include <fcntl.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <unistd.h>
> 
> #include <liburing.h>
> 
> int main(int argc, char **argv) {
> 	if (argc < 2)
> 		return 1;
> 
> 	int fd = open(argv[1], O_RDONLY);
> 	if (fd < 0) {
> 		perror("open");
> 		return 1;
> 	}
> 
> 	struct io_uring ring;
> 	io_uring_queue_init(4, &ring, 0);
> 
> 	struct io_uring_sqe *sqe = io_uring_get_sqe(&ring);
> 
> 	struct __kernel_timespec ts = { .tv_sec = 9999999 };
> 	io_uring_prep_timeout(sqe, &ts, 1, 0);
> 	sqe->user_data = 123;
> 	int ret = io_uring_submit(&ring);
> 	if (ret < 0) {
> 		fprintf(stderr, "submit(timeout_sqe): %d\n", ret);
> 		return 1;
> 	}
> 
> 	int n = 2;
> 	if (argc > 2)
> 		n = atoi(argv[2]);
> 
> 	char buf;
> 	for (int i = 0; i < n; i++) {
> 		sqe = io_uring_get_sqe(&ring);
> 		if (!sqe) {
> 			fprintf(stderr, "too many\n");
> 			exit(1);
> 		}
> 		io_uring_prep_read(sqe, fd, &buf, 1, 0);
> 	}
> 	ret = io_uring_submit(&ring);
> 	if (ret < 0) {
> 		fprintf(stderr, "submit(read_sqe): %d\n", ret);
> 		exit(1);
> 	}
> 
> 	struct io_uring_cqe *cqe;
> 	for (int i = 0; i < n+1; i++) {
> 		struct io_uring_cqe *cqe;
> 		int ret = io_uring_wait_cqe(&ring, &cqe);
> 		if (ret < 0) {
> 			fprintf(stderr, "wait_cqe(): %d\n", ret);
> 			return 1;
> 		}
> 		if (cqe->user_data == 123)
> 			printf("timeout found\n");
> 		io_uring_cqe_seen(&ring, cqe);
> 	}
> }
> 
> v3: Add ->last_flush member to handle more overflow issues
> v2: Properly handle u32 overflow issues
> 
> Marcelo Diop-Gonzalez (1):
>   io_uring: flush timeouts that should already have expired
> 
>  fs/io_uring.c | 29 +++++++++++++++++++++++++----
>  1 file changed, 25 insertions(+), 4 deletions(-)
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 1/1] io_uring: flush timeouts that should already have expired
  2021-01-14 21:40   ` Pavel Begunkov
@ 2021-01-15 14:45     ` Pavel Begunkov
  0 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-01-15 14:45 UTC (permalink / raw)
  To: Marcelo Diop-Gonzalez, axboe; +Cc: io-uring

On 14/01/2021 21:40, Pavel Begunkov wrote:
> On 14/01/2021 15:50, Marcelo Diop-Gonzalez wrote:
>> Right now io_flush_timeouts() checks if the current number of events
>> is equal to ->timeout.target_seq, but this will miss some timeouts if
>> there have been more than 1 event added since the last time they were
>> flushed (possible in io_submit_flush_completions(), for example). Fix
>> it by recording the last sequence at which timeouts were flushed so
>> that the number of events seen can be compared to the number of events
>> needed without overflow.
> 
> Looks good, but there is a little change I'll ask you to make (see
> below). In a meanwhile I'll test it, so the patch on the fast track.
Tested, works good for me.

Apart from the small change I described before
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>

> 
>>
>> Signed-off-by: Marcelo Diop-Gonzalez <marcelo827@gmail.com>
>> ---
>>  fs/io_uring.c | 29 +++++++++++++++++++++++++----
>>  1 file changed, 25 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 372be9caf340..71d8fa0733ad 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -354,6 +354,7 @@ struct io_ring_ctx {
>>  		unsigned		cq_entries;
>>  		unsigned		cq_mask;
>>  		atomic_t		cq_timeouts;
>> +		unsigned		cq_last_tm_flush;
>>  		unsigned long		cq_check_overflow;
>>  		struct wait_queue_head	cq_wait;
>>  		struct fasync_struct	*cq_fasync;
>> @@ -1639,19 +1640,36 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
>>  
>>  static void io_flush_timeouts(struct io_ring_ctx *ctx)
>>  {
>> -	while (!list_empty(&ctx->timeout_list)) {
>> +	u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
> 
> This assignment should go after list_empty() -- because of the atomic part
> my compiler can't reshuffle them itself.
> 
>> +
>> +	if (list_empty(&ctx->timeout_list))
>> +		return;
> [...]
>>  static void io_commit_cqring(struct io_ring_ctx *ctx)
>> @@ -5837,6 +5855,9 @@ static int io_timeout(struct io_kiocb *req)
>>  	tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
>>  	req->timeout.target_seq = tail + off;
>>  
>> +	/* Update the last seq here in case io_flush_timeouts() hasn't */
>> +	ctx->cq_last_tm_flush = tail;
> 
> Have to note that it's ok to do because we don't mix submissions and
> completions, so io_timeout should never fall under same completion_lock
> section as cq commit,
> 
> but otherwise some future locked version of io_timeout would be cutting
> off a part of the current flush window (i.e. this [last, cur] thing).
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 0/1] io_uring: fix skipping of old timeout events
  2021-01-14 21:42 ` [PATCH v3 0/1] io_uring: fix skipping of old timeout events Pavel Begunkov
@ 2021-01-15 16:37   ` Marcelo Diop-Gonzalez
  0 siblings, 0 replies; 6+ messages in thread
From: Marcelo Diop-Gonzalez @ 2021-01-15 16:37 UTC (permalink / raw)
  To: Pavel Begunkov; +Cc: axboe, io-uring

On Thu, Jan 14, 2021 at 09:42:36PM +0000, Pavel Begunkov wrote:
> On 14/01/2021 15:50, Marcelo Diop-Gonzalez wrote:
> > This patch tries to fix a problem with IORING_OP_TIMEOUT events
> > not being flushed if they should already have expired. The test below
> > hangs before this change (unless you run with $ ./a.out ~/somefile 1):
> 
> How sending it as a liburing test?

Oh yea I didn't even think of that. I can clean it up some and send a patch

-Marcelo

> 
> BTW, there was a test before triggering this issue but was shut off
> with "return 0" at some point, but that's not for sure.
> 
> > 
> > #include <fcntl.h>
> > #include <stdio.h>
> > #include <stdlib.h>
> > #include <string.h>
> > #include <unistd.h>
> > 
> > #include <liburing.h>
> > 
> > int main(int argc, char **argv) {
> > 	if (argc < 2)
> > 		return 1;
> > 
> > 	int fd = open(argv[1], O_RDONLY);
> > 	if (fd < 0) {
> > 		perror("open");
> > 		return 1;
> > 	}
> > 
> > 	struct io_uring ring;
> > 	io_uring_queue_init(4, &ring, 0);
> > 
> > 	struct io_uring_sqe *sqe = io_uring_get_sqe(&ring);
> > 
> > 	struct __kernel_timespec ts = { .tv_sec = 9999999 };
> > 	io_uring_prep_timeout(sqe, &ts, 1, 0);
> > 	sqe->user_data = 123;
> > 	int ret = io_uring_submit(&ring);
> > 	if (ret < 0) {
> > 		fprintf(stderr, "submit(timeout_sqe): %d\n", ret);
> > 		return 1;
> > 	}
> > 
> > 	int n = 2;
> > 	if (argc > 2)
> > 		n = atoi(argv[2]);
> > 
> > 	char buf;
> > 	for (int i = 0; i < n; i++) {
> > 		sqe = io_uring_get_sqe(&ring);
> > 		if (!sqe) {
> > 			fprintf(stderr, "too many\n");
> > 			exit(1);
> > 		}
> > 		io_uring_prep_read(sqe, fd, &buf, 1, 0);
> > 	}
> > 	ret = io_uring_submit(&ring);
> > 	if (ret < 0) {
> > 		fprintf(stderr, "submit(read_sqe): %d\n", ret);
> > 		exit(1);
> > 	}
> > 
> > 	struct io_uring_cqe *cqe;
> > 	for (int i = 0; i < n+1; i++) {
> > 		struct io_uring_cqe *cqe;
> > 		int ret = io_uring_wait_cqe(&ring, &cqe);
> > 		if (ret < 0) {
> > 			fprintf(stderr, "wait_cqe(): %d\n", ret);
> > 			return 1;
> > 		}
> > 		if (cqe->user_data == 123)
> > 			printf("timeout found\n");
> > 		io_uring_cqe_seen(&ring, cqe);
> > 	}
> > }
> > 
> > v3: Add ->last_flush member to handle more overflow issues
> > v2: Properly handle u32 overflow issues
> > 
> > Marcelo Diop-Gonzalez (1):
> >   io_uring: flush timeouts that should already have expired
> > 
> >  fs/io_uring.c | 29 +++++++++++++++++++++++++----
> >  1 file changed, 25 insertions(+), 4 deletions(-)
> > 
> 
> -- 
> Pavel Begunkov

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-01-15 16:38 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-14 15:50 [PATCH v3 0/1] io_uring: fix skipping of old timeout events Marcelo Diop-Gonzalez
2021-01-14 15:50 ` [PATCH v3 1/1] io_uring: flush timeouts that should already have expired Marcelo Diop-Gonzalez
2021-01-14 21:40   ` Pavel Begunkov
2021-01-15 14:45     ` Pavel Begunkov
2021-01-14 21:42 ` [PATCH v3 0/1] io_uring: fix skipping of old timeout events Pavel Begunkov
2021-01-15 16:37   ` Marcelo Diop-Gonzalez

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).