io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] single cqe per link
@ 2020-02-25  0:39 Pavel Begunkov
  2020-02-25  2:14 ` Carter Li 李通洲
  2020-02-25  2:24 ` [RFC] single cqe per link Jens Axboe
  0 siblings, 2 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-02-25  0:39 UTC (permalink / raw)
  To: Jens Axboe, io-uring, 李通洲

I've got curious about performance of the idea of having only 1 CQE per link
(for the failed or last one). Tested it with a quick dirty patch doing
submit-and-reap of a nops-link (patched for inline execution).

1) link size: 100
old: 206 ns per nop
new: 144 ns per nop

2) link size: 10
old: 234 ns per nop
new: 181 ns per nop

3) link size: 10, FORCE_ASYNC
old: 667 ns per nop
new: 569 ns per nop


The patch below breaks sequences, linked_timeout and who knows what else.
The first one requires synchronisation/atomic, so it's a bit in the way. I've
been wondering, whether IOSQE_IO_DRAIN is popular and how much it's used. We can
try to find tradeoff or even disable it with this feature.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 65a61b8b37c4..9ec29f01cfda 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1164,7 +1164,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx
*ctx, bool force)
 	return cqe != NULL;
 }

-static void io_cqring_fill_event(struct io_kiocb *req, long res)
+static void __io_cqring_fill_event(struct io_kiocb *req, long res)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_uring_cqe *cqe;
@@ -1196,13 +1196,31 @@ static void io_cqring_fill_event(struct io_kiocb *req,
long res)
 	}
 }

+static inline bool io_ignore_cqe(struct io_kiocb *req)
+{
+	if (!(req->ctx->flags & IORING_SETUP_BOXED_CQE))
+		return false;
+
+	return (req->flags & (REQ_F_LINK|REQ_F_FAIL_LINK)) == REQ_F_LINK;
+}
+
+static void io_cqring_fill_event(struct io_kiocb *req, long res)
+{
+	if (io_ignore_cqe(req))
+		return;
+	__io_cqring_fill_event(req, res);
+}
+
 static void io_cqring_add_event(struct io_kiocb *req, long res)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	unsigned long flags;

+	if (io_ignore_cqe(req))
+		return;
+
 	spin_lock_irqsave(&ctx->completion_lock, flags);
-	io_cqring_fill_event(req, res);
+	__io_cqring_fill_event(req, res);
 	io_commit_cqring(ctx);
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);

@@ -7084,7 +7102,8 @@ static long io_uring_setup(u32 entries, struct
io_uring_params __user *params)

 	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
 			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
-			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
+			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
+			IORING_SETUP_BOXED_CQE))
 		return -EINVAL;

 	ret = io_uring_create(entries, &p);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 08891cc1c1e7..3d69369e252c 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -86,6 +86,7 @@ enum {
 #define IORING_SETUP_CQSIZE	(1U << 3)	/* app defines CQ size */
 #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
 #define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
+#define IORING_SETUP_BOXED_CQE	(1U << 6)	/* single sqe per link */

 enum {
 	IORING_OP_NOP,


-- 
Pavel Begunkov

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC] single cqe per link
  2020-02-25  0:39 [RFC] single cqe per link Pavel Begunkov
@ 2020-02-25  2:14 ` Carter Li 李通洲
  2020-02-25  2:36   ` Jens Axboe
  2020-02-25  2:24 ` [RFC] single cqe per link Jens Axboe
  1 sibling, 1 reply; 10+ messages in thread
From: Carter Li 李通洲 @ 2020-02-25  2:14 UTC (permalink / raw)
  To: Pavel Begunkov; +Cc: Jens Axboe, io-uring

Hello Pavel,

I still think flags tagged on sqes could be a better choice, which gives users an ability to deside if they want to ignore the cqes, not only for links, but also for normal sqes.

In addition, boxed cqes couldn’t resolve the issue of IORING_IO_TIMEOUT.

Carter

> 2020年2月25日 上午8:39,Pavel Begunkov <asml.silence@gmail.com> 写道:
> 
> I've got curious about performance of the idea of having only 1 CQE per link
> (for the failed or last one). Tested it with a quick dirty patch doing
> submit-and-reap of a nops-link (patched for inline execution).
> 
> 1) link size: 100
> old: 206 ns per nop
> new: 144 ns per nop
> 
> 2) link size: 10
> old: 234 ns per nop
> new: 181 ns per nop
> 
> 3) link size: 10, FORCE_ASYNC
> old: 667 ns per nop
> new: 569 ns per nop
> 
> 
> The patch below breaks sequences, linked_timeout and who knows what else.
> The first one requires synchronisation/atomic, so it's a bit in the way. I've
> been wondering, whether IOSQE_IO_DRAIN is popular and how much it's used. We can
> try to find tradeoff or even disable it with this feature.
> 
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 65a61b8b37c4..9ec29f01cfda 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -1164,7 +1164,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx
> *ctx, bool force)
> 	return cqe != NULL;
> }
> 
> -static void io_cqring_fill_event(struct io_kiocb *req, long res)
> +static void __io_cqring_fill_event(struct io_kiocb *req, long res)
> {
> 	struct io_ring_ctx *ctx = req->ctx;
> 	struct io_uring_cqe *cqe;
> @@ -1196,13 +1196,31 @@ static void io_cqring_fill_event(struct io_kiocb *req,
> long res)
> 	}
> }
> 
> +static inline bool io_ignore_cqe(struct io_kiocb *req)
> +{
> +	if (!(req->ctx->flags & IORING_SETUP_BOXED_CQE))
> +		return false;
> +
> +	return (req->flags & (REQ_F_LINK|REQ_F_FAIL_LINK)) == REQ_F_LINK;
> +}
> +
> +static void io_cqring_fill_event(struct io_kiocb *req, long res)
> +{
> +	if (io_ignore_cqe(req))
> +		return;
> +	__io_cqring_fill_event(req, res);
> +}
> +
> static void io_cqring_add_event(struct io_kiocb *req, long res)
> {
> 	struct io_ring_ctx *ctx = req->ctx;
> 	unsigned long flags;
> 
> +	if (io_ignore_cqe(req))
> +		return;
> +
> 	spin_lock_irqsave(&ctx->completion_lock, flags);
> -	io_cqring_fill_event(req, res);
> +	__io_cqring_fill_event(req, res);
> 	io_commit_cqring(ctx);
> 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
> 
> @@ -7084,7 +7102,8 @@ static long io_uring_setup(u32 entries, struct
> io_uring_params __user *params)
> 
> 	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
> 			IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
> -			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
> +			IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
> +			IORING_SETUP_BOXED_CQE))
> 		return -EINVAL;
> 
> 	ret = io_uring_create(entries, &p);
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 08891cc1c1e7..3d69369e252c 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -86,6 +86,7 @@ enum {
> #define IORING_SETUP_CQSIZE	(1U << 3)	/* app defines CQ size */
> #define IORING_SETUP_CLAMP	(1U << 4)	/* clamp SQ/CQ ring sizes */
> #define IORING_SETUP_ATTACH_WQ	(1U << 5)	/* attach to existing wq */
> +#define IORING_SETUP_BOXED_CQE	(1U << 6)	/* single sqe per link */
> 
> enum {
> 	IORING_OP_NOP,
> 
> 
> -- 
> Pavel Begunkov


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] single cqe per link
  2020-02-25  0:39 [RFC] single cqe per link Pavel Begunkov
  2020-02-25  2:14 ` Carter Li 李通洲
@ 2020-02-25  2:24 ` Jens Axboe
  1 sibling, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2020-02-25  2:24 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring, 李通洲

On 2/24/20 5:39 PM, Pavel Begunkov wrote:
> I've got curious about performance of the idea of having only 1 CQE per link
> (for the failed or last one). Tested it with a quick dirty patch doing
> submit-and-reap of a nops-link (patched for inline execution).
> 
> 1) link size: 100
> old: 206 ns per nop
> new: 144 ns per nop
> 
> 2) link size: 10
> old: 234 ns per nop
> new: 181 ns per nop
> 
> 3) link size: 10, FORCE_ASYNC
> old: 667 ns per nop
> new: 569 ns per nop
> 
> 
> The patch below breaks sequences, linked_timeout and who knows what else.
> The first one requires synchronisation/atomic, so it's a bit in the way. I've
> been wondering, whether IOSQE_IO_DRAIN is popular and how much it's used. We can
> try to find tradeoff or even disable it with this feature.

For a more realistic workload, I can try and run a random read workload
on a fast device. If I just make the QD the link count, then we'll
have the same amount in parallel, just with link-depth ratio less
CQEs. I'd be curious to see what that does.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] single cqe per link
  2020-02-25  2:14 ` Carter Li 李通洲
@ 2020-02-25  2:36   ` Jens Axboe
  2020-02-25  3:13     ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-02-25  2:36 UTC (permalink / raw)
  To: Carter Li 李通洲, Pavel Begunkov; +Cc: io-uring

On 2/24/20 7:14 PM, Carter Li 李通洲 wrote:
>> 2020年2月25日 上午8:39,Pavel Begunkov <asml.silence@gmail.com> 写道:
>>
>> I've got curious about performance of the idea of having only 1 CQE per link
>> (for the failed or last one). Tested it with a quick dirty patch doing
>> submit-and-reap of a nops-link (patched for inline execution).
>>
>> 1) link size: 100
>> old: 206 ns per nop
>> new: 144 ns per nop
>>
>> 2) link size: 10
>> old: 234 ns per nop
>> new: 181 ns per nop
>>
>> 3) link size: 10, FORCE_ASYNC
>> old: 667 ns per nop
>> new: 569 ns per nop
>>
>>
>> The patch below breaks sequences, linked_timeout and who knows what else.
>> The first one requires synchronisation/atomic, so it's a bit in the way. I've
>> been wondering, whether IOSQE_IO_DRAIN is popular and how much it's used. We can
>> try to find tradeoff or even disable it with this feature.
>
> Hello Pavel,
> 
> I still think flags tagged on sqes could be a better choice, which
> gives users an ability to deside if they want to ignore the cqes, not
> only for links, but also for normal sqes.
> 
> In addition, boxed cqes couldn’t resolve the issue of
> IORING_IO_TIMEOUT.

I would tend to agree, and it'd be trivial to just set the flag on
whatever SQEs in the chain you don't care about. Or even an individual
SQE, though that's probably a bit more of a reach in terms of use case.
Maybe nop with drain + ignore?

In any case it's definitely more flexible.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] single cqe per link
  2020-02-25  2:36   ` Jens Axboe
@ 2020-02-25  3:13     ` Jens Axboe
  2020-02-25 10:12       ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-02-25  3:13 UTC (permalink / raw)
  To: Carter Li 李通洲, Pavel Begunkov; +Cc: io-uring

On 2/24/20 7:36 PM, Jens Axboe wrote:
> On 2/24/20 7:14 PM, Carter Li 李通洲 wrote:
>>> 2020年2月25日 上午8:39,Pavel Begunkov <asml.silence@gmail.com> 写道:
>>>
>>> I've got curious about performance of the idea of having only 1 CQE per link
>>> (for the failed or last one). Tested it with a quick dirty patch doing
>>> submit-and-reap of a nops-link (patched for inline execution).
>>>
>>> 1) link size: 100
>>> old: 206 ns per nop
>>> new: 144 ns per nop
>>>
>>> 2) link size: 10
>>> old: 234 ns per nop
>>> new: 181 ns per nop
>>>
>>> 3) link size: 10, FORCE_ASYNC
>>> old: 667 ns per nop
>>> new: 569 ns per nop
>>>
>>>
>>> The patch below breaks sequences, linked_timeout and who knows what else.
>>> The first one requires synchronisation/atomic, so it's a bit in the way. I've
>>> been wondering, whether IOSQE_IO_DRAIN is popular and how much it's used. We can
>>> try to find tradeoff or even disable it with this feature.
>>
>> Hello Pavel,
>>
>> I still think flags tagged on sqes could be a better choice, which
>> gives users an ability to deside if they want to ignore the cqes, not
>> only for links, but also for normal sqes.
>>
>> In addition, boxed cqes couldn’t resolve the issue of
>> IORING_IO_TIMEOUT.
> 
> I would tend to agree, and it'd be trivial to just set the flag on
> whatever SQEs in the chain you don't care about. Or even an individual
> SQE, though that's probably a bit more of a reach in terms of use case.
> Maybe nop with drain + ignore?
> 
> In any case it's definitely more flexible.

In the interest of taking this to the extreme, I tried a nop benchmark
on my laptop (qemu/kvm). Granted, this setup is particularly sensitive
to spinlocks, they are a lot more expensive there than on a real host.

Anyway, regular nops run at about 9.5M/sec with a single thread.
Flagging all SQEs with IOSQE_NO_CQE nets me about 14M/sec. So a handy
improvement. Looking at the top of profiles:

cqe-per-sqe:

+   28.45%  io_uring  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
+   14.38%  io_uring  [kernel.kallsyms]  [k] io_submit_sqes
+    9.38%  io_uring  [kernel.kallsyms]  [k] io_put_req
+    7.25%  io_uring  libc-2.31.so       [.] syscall
+    6.12%  io_uring  [kernel.kallsyms]  [k] kmem_cache_free

no-cqes:

+   19.72%  io_uring  [kernel.kallsyms]  [k] io_put_req
+   11.93%  io_uring  [kernel.kallsyms]  [k] io_submit_sqes
+   10.14%  io_uring  [kernel.kallsyms]  [k] kmem_cache_free
+    9.55%  io_uring  libc-2.31.so       [.] syscall
+    7.48%  io_uring  [kernel.kallsyms]  [k] __io_queue_sqe

I'll try the real disk IO tomorrow, using polled IO.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] single cqe per link
  2020-02-25  3:13     ` Jens Axboe
@ 2020-02-25 10:12       ` Pavel Begunkov
  2020-02-25 20:20         ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-02-25 10:12 UTC (permalink / raw)
  To: Jens Axboe, Carter Li 李通洲; +Cc: io-uring

On 2/25/2020 6:13 AM, Jens Axboe wrote:
>>> I still think flags tagged on sqes could be a better choice, which
>>> gives users an ability to deside if they want to ignore the cqes, not
>>> only for links, but also for normal sqes.
>>>
>>> In addition, boxed cqes couldn’t resolve the issue of
>>> IORING_IO_TIMEOUT.
>>
>> I would tend to agree, and it'd be trivial to just set the flag on
>> whatever SQEs in the chain you don't care about. Or even an individual
>> SQE, though that's probably a bit more of a reach in terms of use case.
>> Maybe nop with drain + ignore?

Flexible, but not performant. The existence of drain is already makes
io_uring to do a lot of extra stuff, and even worse when it's actually used.

>>
>> In any case it's definitely more flexible.

That's a different thing. Knowing how requests behave (e.g. if
nbytes!=res, then fail link), one would want to get cqe for the last
executed sqe, whether it's an error or a success for the last one.

It makes a link to be handled as a single entity. I don't see a way to
emulate similar behaviour with the unconditional masking. Probably, we
will need them both.

> In the interest of taking this to the extreme, I tried a nop benchmark
> on my laptop (qemu/kvm). Granted, this setup is particularly sensitive
> to spinlocks, they are a lot more expensive there than on a real host.
> 
> Anyway, regular nops run at about 9.5M/sec with a single thread.
> Flagging all SQEs with IOSQE_NO_CQE nets me about 14M/sec. So a handy
> improvement. Looking at the top of profiles:
> 
> cqe-per-sqe:
> 
> +   28.45%  io_uring  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
> +   14.38%  io_uring  [kernel.kallsyms]  [k] io_submit_sqes
> +    9.38%  io_uring  [kernel.kallsyms]  [k] io_put_req
> +    7.25%  io_uring  libc-2.31.so       [.] syscall
> +    6.12%  io_uring  [kernel.kallsyms]  [k] kmem_cache_free
> 
> no-cqes:
> 
> +   19.72%  io_uring  [kernel.kallsyms]  [k] io_put_req
> +   11.93%  io_uring  [kernel.kallsyms]  [k] io_submit_sqes
> +   10.14%  io_uring  [kernel.kallsyms]  [k] kmem_cache_free
> +    9.55%  io_uring  libc-2.31.so       [.] syscall
> +    7.48%  io_uring  [kernel.kallsyms]  [k] __io_queue_sqe
> 
> I'll try the real disk IO tomorrow, using polled IO.

Great, would love to see


-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] single cqe per link
  2020-02-25 10:12       ` Pavel Begunkov
@ 2020-02-25 20:20         ` Jens Axboe
  2020-02-25 21:13           ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-02-25 20:20 UTC (permalink / raw)
  To: Pavel Begunkov, Carter Li 李通洲; +Cc: io-uring

On 2/25/20 3:12 AM, Pavel Begunkov wrote:
> On 2/25/2020 6:13 AM, Jens Axboe wrote:
>>>> I still think flags tagged on sqes could be a better choice, which
>>>> gives users an ability to deside if they want to ignore the cqes, not
>>>> only for links, but also for normal sqes.
>>>>
>>>> In addition, boxed cqes couldn’t resolve the issue of
>>>> IORING_IO_TIMEOUT.
>>>
>>> I would tend to agree, and it'd be trivial to just set the flag on
>>> whatever SQEs in the chain you don't care about. Or even an individual
>>> SQE, though that's probably a bit more of a reach in terms of use case.
>>> Maybe nop with drain + ignore?
> 
> Flexible, but not performant. The existence of drain is already makes
> io_uring to do a lot of extra stuff, and even worse when it's actually used.

Yeah I agree, that's assuming we can make the drain more efficient. Just
hand waving on possible use cases :-)

>>> In any case it's definitely more flexible.
> 
> That's a different thing. Knowing how requests behave (e.g. if
> nbytes!=res, then fail link), one would want to get cqe for the last
> executed sqe, whether it's an error or a success for the last one.
> 
> It makes a link to be handled as a single entity. I don't see a way to
> emulate similar behaviour with the unconditional masking. Probably, we
> will need them both.

But you can easily do that with IOSQE_NO_CQE, in fact that's what I did
to test this. The chain will have IOSQE_NO_CQE | IOSQE_IO_LINK set on
all but the last request.

>> In the interest of taking this to the extreme, I tried a nop benchmark
>> on my laptop (qemu/kvm). Granted, this setup is particularly sensitive
>> to spinlocks, they are a lot more expensive there than on a real host.
>>
>> Anyway, regular nops run at about 9.5M/sec with a single thread.
>> Flagging all SQEs with IOSQE_NO_CQE nets me about 14M/sec. So a handy
>> improvement. Looking at the top of profiles:
>>
>> cqe-per-sqe:
>>
>> +   28.45%  io_uring  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
>> +   14.38%  io_uring  [kernel.kallsyms]  [k] io_submit_sqes
>> +    9.38%  io_uring  [kernel.kallsyms]  [k] io_put_req
>> +    7.25%  io_uring  libc-2.31.so       [.] syscall
>> +    6.12%  io_uring  [kernel.kallsyms]  [k] kmem_cache_free
>>
>> no-cqes:
>>
>> +   19.72%  io_uring  [kernel.kallsyms]  [k] io_put_req
>> +   11.93%  io_uring  [kernel.kallsyms]  [k] io_submit_sqes
>> +   10.14%  io_uring  [kernel.kallsyms]  [k] kmem_cache_free
>> +    9.55%  io_uring  libc-2.31.so       [.] syscall
>> +    7.48%  io_uring  [kernel.kallsyms]  [k] __io_queue_sqe
>>
>> I'll try the real disk IO tomorrow, using polled IO.
> 
> Great, would love to see

My box with the optane2 is out of commission, apparently, cannot get it
going today. So I had to make do with my laptop, which does about ~600K
random read IOPS. I don't see any difference there, using polled IO,
using 4 link deep chains (so 1/4th the CQEs). Both run at around
611-613K IOPS.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] single cqe per link
  2020-02-25 20:20         ` Jens Axboe
@ 2020-02-25 21:13           ` Pavel Begunkov
  2020-08-21  5:17             ` Questions about IORING_OP_ASYNC_CANCEL usage Carter Li 李通洲
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-02-25 21:13 UTC (permalink / raw)
  To: Jens Axboe, Carter Li 李通洲; +Cc: io-uring

On 25/02/2020 23:20, Jens Axboe wrote:
> On 2/25/20 3:12 AM, Pavel Begunkov wrote:
>> Flexible, but not performant. The existence of drain is already makes
>> io_uring to do a lot of extra stuff, and even worse when it's actually used.
> 
> Yeah I agree, that's assuming we can make the drain more efficient. Just
> hand waving on possible use cases :-)

I don't even know what to do with sequences and drains when we get to in-kernel
sqe generation. And the current linear numbering won't be the case at all.

E.g. req1 -> DRAIN, and req1 infinitely generates req2, req3, etc. Should they
go before DRAIN? or at any time? What would be performance burden for it?..

I'd rather forbid them for using with some new features. And that's the reason
behind the question about wideness of its use.

>>
>> That's a different thing. Knowing how requests behave (e.g. if
>> nbytes!=res, then fail link), one would want to get cqe for the last
>> executed sqe, whether it's an error or a success for the last one.
>>
>> It makes a link to be handled as a single entity. I don't see a way to
>> emulate similar behaviour with the unconditional masking. Probably, we
>> will need them both.
> 
> But you can easily do that with IOSQE_NO_CQE, in fact that's what I did
> to test this. The chain will have IOSQE_NO_CQE | IOSQE_IO_LINK set on
> all but the last request.

It's fine if you don't expect it to fail. Otherwise, there will be only
-ECANCELELED for the last one, so you don't know error code nor failed
req/user_data. Forcing IOSQE_NO_CQE to emit in case of an error is not really
better.

I know, it's hard to judge base on performance-testing-only patch, but the whole
idea is to greatly simplify userspace cqe handling, including errors. And I'd
like to find something better/faster and doing the same favor.


> 
> My box with the optane2 is out of commission, apparently, cannot get it
> going today. So I had to make do with my laptop, which does about ~600K
> random read IOPS. I don't see any difference there, using polled IO,
> using 4 link deep chains (so 1/4th the CQEs). Both run at around
> 611-613K IOPS.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Questions about IORING_OP_ASYNC_CANCEL usage
  2020-02-25 21:13           ` Pavel Begunkov
@ 2020-08-21  5:17             ` Carter Li 李通洲
  2020-08-21  5:20               ` Carter Li 李通洲
  0 siblings, 1 reply; 10+ messages in thread
From: Carter Li 李通洲 @ 2020-08-21  5:17 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

Hi Jens,

I have some ( maybe stupid ) questions about IORING_OP_ASYNC_CANCEL:

1. Does IORING_OP_ASYNC_CANCEL always complete inline ( i.e. finishes before the io_uring_enter syscall returns )?

2. Does recent changes of async buffered reads have any impact with cancelation? Can we cancel a buffered IORING_OP_READV operation after it’s started? Although the disk->kernel DMA operation is not cancelable, can we cancel the kernel->userland data copy?

3. I heard that all buffered writes are serialized on the inode mutex. If a buffered IORING_OP_WRITEV is blocked on the node mutex, can we cancel it?

Thanks in advance,
Carter

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Questions about IORING_OP_ASYNC_CANCEL usage
  2020-08-21  5:17             ` Questions about IORING_OP_ASYNC_CANCEL usage Carter Li 李通洲
@ 2020-08-21  5:20               ` Carter Li 李通洲
  0 siblings, 0 replies; 10+ messages in thread
From: Carter Li 李通洲 @ 2020-08-21  5:20 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

Sorry, please ignore this mail, I’ll send a new one

> 2020年8月21日 下午1:17,Carter Li 李通洲 <carter.li@eoitek.com> 写道:
> 
> Hi Jens,
> 
> I have some ( maybe stupid ) questions about IORING_OP_ASYNC_CANCEL:
> 
> 1. Does IORING_OP_ASYNC_CANCEL always complete inline ( i.e. finishes before the io_uring_enter syscall returns )?
> 
> 2. Does recent changes of async buffered reads have any impact with cancelation? Can we cancel a buffered IORING_OP_READV operation after it’s started? Although the disk->kernel DMA operation is not cancelable, can we cancel the kernel->userland data copy?
> 
> 3. I heard that all buffered writes are serialized on the inode mutex. If a buffered IORING_OP_WRITEV is blocked on the node mutex, can we cancel it?
> 
> Thanks in advance,
> Carter


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-08-21  5:21 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-25  0:39 [RFC] single cqe per link Pavel Begunkov
2020-02-25  2:14 ` Carter Li 李通洲
2020-02-25  2:36   ` Jens Axboe
2020-02-25  3:13     ` Jens Axboe
2020-02-25 10:12       ` Pavel Begunkov
2020-02-25 20:20         ` Jens Axboe
2020-02-25 21:13           ` Pavel Begunkov
2020-08-21  5:17             ` Questions about IORING_OP_ASYNC_CANCEL usage Carter Li 李通洲
2020-08-21  5:20               ` Carter Li 李通洲
2020-02-25  2:24 ` [RFC] single cqe per link Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).