io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH liburing RESEND] Update link_drain with new kernel method
@ 2019-11-22  6:01 Jackie Liu
  2019-11-22  6:01 ` [PATCH RESEND] io_uring: a small optimization for REQ_F_DRAIN_LINK Jackie Liu
  0 siblings, 1 reply; 5+ messages in thread
From: Jackie Liu @ 2019-11-22  6:01 UTC (permalink / raw)
  To: axboe; +Cc: asml.silence, io-uring, liuyun01

From: Jackie Liu <liuyun01@kylinos.cn>

Now we are dealing with link-drain in a much simpler way,
so the test program is updated as well.

Also fixed a bug that did not close fd when an error occurred.
and some dead code has been modified, like commit e1420b89c do.

Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
---
 resend that patch, because reject by mail-list.

 test/link_drain.c | 129 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 113 insertions(+), 16 deletions(-)

diff --git a/test/link_drain.c b/test/link_drain.c
index c192a5d..ebc000b 100644
--- a/test/link_drain.c
+++ b/test/link_drain.c
@@ -11,13 +11,7 @@
 
 #include "liburing.h"
 
-char expect[3][5] = {
-	{ 0, 1, 2, 3, 4 },
-	{ 0, 1, 2, 4, 3 },
-	{ 0, 1, 4, 2, 3 }
-};
-
-static int test_link_drain(struct io_uring *ring)
+static int test_link_drain_one(struct io_uring *ring)
 {
 	struct io_uring_cqe *cqe;
 	struct io_uring_sqe *sqe[5];
@@ -25,6 +19,7 @@ static int test_link_drain(struct io_uring *ring)
 	int i, fd, ret;
 	off_t off = 0;
 	char data[5] = {0};
+	char expect[5] = {0, 1, 2, 3, 4};
 
 	fd = open("testfile", O_WRONLY | O_CREAT, 0644);
 	if (fd < 0) {
@@ -67,10 +62,10 @@ static int test_link_drain(struct io_uring *ring)
 
 	ret = io_uring_submit(ring);
 	if (ret < 5) {
-		printf("Submitted only %d\n", ret);
+		printf("sqe submit failed\n");
 		goto err;
 	} else if (ret < 0) {
-		printf("sqe submit failed\n");
+		printf("Submitted only %d\n", ret);
 		goto err;
 	}
 
@@ -85,21 +80,121 @@ static int test_link_drain(struct io_uring *ring)
 		io_uring_cqe_seen(ring, cqe);
 	}
 
+	if (memcmp(data, expect, 5) != 0)
+		goto err;
+
 	free(iovecs.iov_base);
 	close(fd);
+	unlink("testfile");
+	return 0;
+err:
+	free(iovecs.iov_base);
+	close(fd);
+	unlink("testfile");
+	return 1;
+}
+
+int test_link_drain_multi(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe[9];
+	struct iovec iovecs;
+	int i, fd, ret;
+	off_t off = 0;
+	char data[9] = {0};
+	char expect[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
 
-	for (i = 0; i < 3; i++) {
-		if (memcmp(data, expect[i], 5) == 0)
-			break;
+	fd = open("testfile", O_WRONLY | O_CREAT, 0644);
+	if (fd < 0) {
+		perror("open");
+		return 1;
+	}
+
+	iovecs.iov_base = malloc(4096);
+	iovecs.iov_len = 4096;
+
+	for (i = 0; i < 9; i++) {
+		sqe[i] = io_uring_get_sqe(ring);
+		if (!sqe[i]) {
+			printf("get sqe failed\n");
+			goto err;
+		}
 	}
-	if (i == 3)
+
+	/* normal heavy io */
+	io_uring_prep_writev(sqe[0], fd, &iovecs, 1, off);
+	sqe[0]->user_data = 0;
+
+	/* link1 io head */
+	io_uring_prep_nop(sqe[1]);
+	sqe[1]->flags |= IOSQE_IO_LINK;
+	sqe[1]->user_data = 1;
+
+	/* link1 drain io */
+	io_uring_prep_nop(sqe[2]);
+	sqe[2]->flags |= (IOSQE_IO_LINK | IOSQE_IO_DRAIN);
+	sqe[2]->user_data = 2;
+
+	/* link1 io end*/
+	io_uring_prep_nop(sqe[3]);
+	sqe[3]->user_data = 3;
+
+	/* link2 io head */
+	io_uring_prep_nop(sqe[4]);
+	sqe[4]->flags |= IOSQE_IO_LINK;
+	sqe[4]->user_data = 4;
+
+	/* link2 io */
+	io_uring_prep_nop(sqe[5]);
+	sqe[5]->flags |= IOSQE_IO_LINK;
+	sqe[5]->user_data = 5;
+
+	/* link2 drain io */
+	io_uring_prep_writev(sqe[6], fd, &iovecs, 1, off);
+	sqe[6]->flags |= (IOSQE_IO_LINK | IOSQE_IO_DRAIN);
+	sqe[6]->user_data = 6;
+
+	/* link2 io end */
+	io_uring_prep_nop(sqe[7]);
+	sqe[7]->user_data = 7;
+
+	/* normal io */
+	io_uring_prep_nop(sqe[8]);
+	sqe[8]->user_data = 8;
+
+	ret = io_uring_submit(ring);
+	if (ret < 0) {
+		printf("sqe submit failed\n");
+		goto err;
+	} else if (ret < 9) {
+		printf("Submitted only %d\n", ret);
+		goto err;
+	}
+
+	for (i = 0; i < 9; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret < 0) {
+			printf("child: wait completion %d\n", ret);
+			goto err;
+		}
+
+		data[i] = cqe->user_data;
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	if (memcmp(data, expect, 9) != 0)
 		goto err;
 
+	free(iovecs.iov_base);
+	close(fd);
 	unlink("testfile");
 	return 0;
 err:
+	free(iovecs.iov_base);
+	close(fd);
 	unlink("testfile");
 	return 1;
+
 }
 
 int main(int argc, char *argv[])
@@ -107,14 +202,16 @@ int main(int argc, char *argv[])
 	struct io_uring ring;
 	int i, ret;
 
-	ret = io_uring_queue_init(5, &ring, 0);
+	ret = io_uring_queue_init(100, &ring, 0);
 	if (ret) {
 		printf("ring setup failed\n");
 		return 1;
 	}
 
-	for (i = 0; i < 1000; i++)
-		ret |= test_link_drain(&ring);
+	for (i = 0; i < 1000; i++) {
+		ret |= test_link_drain_one(&ring);
+		ret |= test_link_drain_multi(&ring);
+	}
 
 	if (ret)
 		return ret;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH RESEND] io_uring: a small optimization for REQ_F_DRAIN_LINK
  2019-11-22  6:01 [PATCH liburing RESEND] Update link_drain with new kernel method Jackie Liu
@ 2019-11-22  6:01 ` Jackie Liu
  2019-11-22 10:05   ` Pavel Begunkov
  0 siblings, 1 reply; 5+ messages in thread
From: Jackie Liu @ 2019-11-22  6:01 UTC (permalink / raw)
  To: axboe; +Cc: asml.silence, io-uring, liuyun01

From: Jackie Liu <liuyun01@kylinos.cn>

We don't need to set drain_next every time, make a small judgment
and add unlikely, it looks like there will be a little optimization.

Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
---
 resend that patch, because reject by mail-list.

 fs/io_uring.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 013e5ed..f4ec44a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2938,12 +2938,14 @@ static void __io_queue_sqe(struct io_kiocb *req)
 static void io_queue_sqe(struct io_kiocb *req)
 {
 	int ret;
+	bool drain_link = req->flags & REQ_F_DRAIN_LINK;
 
-	if (unlikely(req->ctx->drain_next)) {
+	if (unlikely(req->ctx->drain_next && !drain_link)) {
 		req->flags |= REQ_F_IO_DRAIN;
 		req->ctx->drain_next = false;
+	} else if (unlikely(drain_link)) {
+		req->ctx->drain_next = true;
 	}
-	req->ctx->drain_next = (req->flags & REQ_F_DRAIN_LINK);
 
 	ret = io_req_defer(req);
 	if (ret) {
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] io_uring: a small optimization for REQ_F_DRAIN_LINK
  2019-11-22  6:01 ` [PATCH RESEND] io_uring: a small optimization for REQ_F_DRAIN_LINK Jackie Liu
@ 2019-11-22 10:05   ` Pavel Begunkov
  2019-11-22 10:26     ` JackieLiu
  0 siblings, 1 reply; 5+ messages in thread
From: Pavel Begunkov @ 2019-11-22 10:05 UTC (permalink / raw)
  To: Jackie Liu, axboe; +Cc: io-uring, liuyun01

On 11/22/2019 9:01 AM, Jackie Liu wrote:
> From: Jackie Liu <liuyun01@kylinos.cn>
> 
> We don't need to set drain_next every time, make a small judgment
> and add unlikely, it looks like there will be a little optimization.
> 
Not sure about that. It's 1 CMP + 1 SETcc/STORE, which works pretty fast
as @drain_next is hot (especially after read) and there is no write-read
dependency close. For yours, there is likely always 3 CMPs in the way.

Did you benchmarked it somehow or compared assembly?

> Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
> ---
>  resend that patch, because reject by mail-list.
> 
>  fs/io_uring.c | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 013e5ed..f4ec44a 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2938,12 +2938,14 @@ static void __io_queue_sqe(struct io_kiocb *req)
>  static void io_queue_sqe(struct io_kiocb *req)
>  {
>  	int ret;
> +	bool drain_link = req->flags & REQ_F_DRAIN_LINK;
>  
> -	if (unlikely(req->ctx->drain_next)) {
> +	if (unlikely(req->ctx->drain_next && !drain_link)) {
>  		req->flags |= REQ_F_IO_DRAIN;
>  		req->ctx->drain_next = false;
> +	} else if (unlikely(drain_link)) {
> +		req->ctx->drain_next = true;
>  	}
> -	req->ctx->drain_next = (req->flags & REQ_F_DRAIN_LINK);
>  
>  	ret = io_req_defer(req);
>  	if (ret) {
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] io_uring: a small optimization for REQ_F_DRAIN_LINK
  2019-11-22 10:05   ` Pavel Begunkov
@ 2019-11-22 10:26     ` JackieLiu
  2019-11-22 10:54       ` Pavel Begunkov
  0 siblings, 1 reply; 5+ messages in thread
From: JackieLiu @ 2019-11-22 10:26 UTC (permalink / raw)
  To: Pavel Begunkov; +Cc: axboe, io-uring



> 2019年11月22日 18:05,Pavel Begunkov <asml.silence@gmail.com> 写道:
> 
> On 11/22/2019 9:01 AM, Jackie Liu wrote:
>> From: Jackie Liu <liuyun01@kylinos.cn>
>> 
>> We don't need to set drain_next every time, make a small judgment
>> and add unlikely, it looks like there will be a little optimization.
>> 
> Not sure about that. It's 1 CMP + 1 SETcc/STORE, which works pretty fast
> as @drain_next is hot (especially after read) and there is no write-read
> dependency close. For yours, there is likely always 3 CMPs in the way.
> 
> Did you benchmarked it somehow or compared assembly?

It is only theoretically possible. In most cases, our drain_link 
and drain_next are both false, so only two CMPs are needed, and modern CPUs
have branch predictions. Perhaps these judgments can be optimized.

Your code is very nice, when I reading and understanding your code,
I want to try if there is any other way to optimize it. 

Sometimes you don't need to reset drain_next, such as drain_link == true && 
drain_next == true, you don't need to set below one more time.

--
Jackie Liu

> 
>> Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
>> ---
>> resend that patch, because reject by mail-list.
>> 
>> fs/io_uring.c | 6 ++++--
>> 1 file changed, 4 insertions(+), 2 deletions(-)
>> 
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 013e5ed..f4ec44a 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -2938,12 +2938,14 @@ static void __io_queue_sqe(struct io_kiocb *req)
>> static void io_queue_sqe(struct io_kiocb *req)
>> {
>> 	int ret;
>> +	bool drain_link = req->flags & REQ_F_DRAIN_LINK;
>> 
>> -	if (unlikely(req->ctx->drain_next)) {
>> +	if (unlikely(req->ctx->drain_next && !drain_link)) {
>> 		req->flags |= REQ_F_IO_DRAIN;
>> 		req->ctx->drain_next = false;
>> +	} else if (unlikely(drain_link)) {
>> +		req->ctx->drain_next = true;
>> 	}
>> -	req->ctx->drain_next = (req->flags & REQ_F_DRAIN_LINK);
>> 
>> 	ret = io_req_defer(req);
>> 	if (ret) {
>> 
> 
> -- 
> Pavel Begunkov


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] io_uring: a small optimization for REQ_F_DRAIN_LINK
  2019-11-22 10:26     ` JackieLiu
@ 2019-11-22 10:54       ` Pavel Begunkov
  0 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2019-11-22 10:54 UTC (permalink / raw)
  To: JackieLiu; +Cc: axboe, io-uring

On 11/22/2019 1:26 PM, JackieLiu wrote:
>> Not sure about that. It's 1 CMP + 1 SETcc/STORE, which works pretty fast
>> as @drain_next is hot (especially after read) and there is no write-read
>> dependency close. For yours, there is likely always 3 CMPs in the way.
>>
>> Did you benchmarked it somehow or compared assembly?
> 
> It is only theoretically possible. In most cases, our drain_link 
> and drain_next are both false, so only two CMPs are needed, and modern CPUs
> have branch predictions. Perhaps these judgments can be optimized.
> 
My bad, right, 2 CMPs in the common way.

> Your code is very nice, when I reading and understanding your code,
> I want to try if there is any other way to optimize it. 
> 
> Sometimes you don't need to reset drain_next, such as drain_link == true && 
> drain_next == true, you don't need to set below one more time.

We may think to change like below, but I'd rather rely on a compiler to
optimise it for us (i.e. knowing the target architecture). Everything
else is a really rare/slow path in my opinion, so shouldn't be of concern.

-	req->ctx->drain_next = (req->flags & REQ_F_DRAIN_LINK);
+	if (req->flags & REQ_F_DRAIN_LINK)
+		req->ctx->drain_next = true;

If the goal is to micro-optimise things, it's better to think how to
toss the whole scheme to reduce number of CMPs and memory read/writes in
the hot path, including setting REQ_F_DRAIN_LINK in submit_sqe().

Though, there are still heavier things happening around.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-11-22 11:13 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-22  6:01 [PATCH liburing RESEND] Update link_drain with new kernel method Jackie Liu
2019-11-22  6:01 ` [PATCH RESEND] io_uring: a small optimization for REQ_F_DRAIN_LINK Jackie Liu
2019-11-22 10:05   ` Pavel Begunkov
2019-11-22 10:26     ` JackieLiu
2019-11-22 10:54       ` Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).