io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC v2] io_uring: limit inflight IO
@ 2019-11-08 21:10 Jens Axboe
  2019-11-09 11:01 ` Pavel Begunkov
  0 siblings, 1 reply; 4+ messages in thread
From: Jens Axboe @ 2019-11-08 21:10 UTC (permalink / raw)
  To: io-uring, Pavel Begunkov (Silence)

Here's a modified version for discussion. Instead of sizing this on the
specific ring, just size it based on the max allowable CQ ring size. I
think this should be safer, and won't risk breaking existing use cases
out there.

The reasoning here is that we already limit the ring sizes globally,
they are under ulimit memlock protection. With this on top, we have some
sort of safe guard for the system as a whole as well, whereas before we
had none. Even a small ring size can keep queuing IO.

Let me know what you guys think...

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 29ea1106132d..0d8c3b1612af 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -737,6 +737,25 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
 	return NULL;
 }
 
+static bool io_req_over_limit(struct io_ring_ctx *ctx)
+{
+	unsigned inflight;
+
+	/*
+	 * This doesn't need to be super precise, so only check every once
+	 * in a while.
+	 */
+	if (ctx->cached_sq_head & ctx->sq_mask)
+		return false;
+
+	/*
+	 * Use 2x the max CQ ring size
+	 */
+	inflight = ctx->cached_sq_head -
+		  (ctx->cached_cq_tail + atomic_read(&ctx->cached_cq_overflow));
+	return inflight >= 2 * IORING_MAX_CQ_ENTRIES;
+}
+
 static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 				   struct io_submit_state *state)
 {
@@ -744,9 +763,11 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 	struct io_kiocb *req;
 
 	if (!percpu_ref_tryget(&ctx->refs))
-		return NULL;
+		return ERR_PTR(-ENXIO);
 
 	if (!state) {
+		if (unlikely(io_req_over_limit(ctx)))
+			goto out_limit;
 		req = kmem_cache_alloc(req_cachep, gfp);
 		if (unlikely(!req))
 			goto fallback;
@@ -754,6 +775,8 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 		size_t sz;
 		int ret;
 
+		if (unlikely(io_req_over_limit(ctx)))
+			goto out_limit;
 		sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
 		ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
 
@@ -789,8 +812,9 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 	req = io_get_fallback_req(ctx);
 	if (req)
 		goto got_it;
+out_limit:
 	percpu_ref_put(&ctx->refs);
-	return NULL;
+	return ERR_PTR(-EBUSY);
 }
 
 static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
@@ -3016,9 +3040,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		unsigned int sqe_flags;
 
 		req = io_get_req(ctx, statep);
-		if (unlikely(!req)) {
+		if (unlikely(IS_ERR(req))) {
 			if (!submitted)
-				submitted = -EAGAIN;
+				submitted = PTR_ERR(req);
 			break;
 		}
 		if (!io_get_sqring(ctx, &req->submit)) {
@@ -3039,8 +3063,10 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		if (link && (sqe_flags & IOSQE_IO_DRAIN)) {
 			if (!shadow_req) {
 				shadow_req = io_get_req(ctx, NULL);
-				if (unlikely(!shadow_req))
+				if (unlikely(IS_ERR(shadow_req))) {
+					shadow_req = NULL;
 					goto out;
+				}
 				shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
 				refcount_dec(&shadow_req->refs);
 			}

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC v2] io_uring: limit inflight IO
  2019-11-08 21:10 [PATCH RFC v2] io_uring: limit inflight IO Jens Axboe
@ 2019-11-09 11:01 ` Pavel Begunkov
  2019-11-09 12:28   ` Pavel Begunkov
  0 siblings, 1 reply; 4+ messages in thread
From: Pavel Begunkov @ 2019-11-09 11:01 UTC (permalink / raw)
  To: Jens Axboe, io-uring


[-- Attachment #1.1: Type: text/plain, Size: 3651 bytes --]

On 09/11/2019 00:10, Jens Axboe wrote:
> Here's a modified version for discussion. Instead of sizing this on the
> specific ring, just size it based on the max allowable CQ ring size. I
> think this should be safer, and won't risk breaking existing use cases
> out there.
> 
> The reasoning here is that we already limit the ring sizes globally,
> they are under ulimit memlock protection. With this on top, we have some
> sort of safe guard for the system as a whole as well, whereas before we
> had none. Even a small ring size can keep queuing IO.
> 
> Let me know what you guys think...
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 29ea1106132d..0d8c3b1612af 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -737,6 +737,25 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
>  	return NULL;
>  }
>  
> +static bool io_req_over_limit(struct io_ring_ctx *ctx)
> +{
> +	unsigned inflight;
> +
> +	/*
> +	 * This doesn't need to be super precise, so only check every once
> +	 * in a while.
> +	 */
> +	if (ctx->cached_sq_head & ctx->sq_mask)
> +		return false;
> +
> +	/*
> +	 * Use 2x the max CQ ring size
> +	 */
> +	inflight = ctx->cached_sq_head -
> +		  (ctx->cached_cq_tail + atomic_read(&ctx->cached_cq_overflow));
> +	return inflight >= 2 * IORING_MAX_CQ_ENTRIES;
> +}

ctx->cached_cq_tail protected by ctx->completion_lock and can be
changed asynchronously. That's a not synchronised access, so 
formally (probably) breaks the memory model.

False values shouldn't be a problem here, but anyway.


> +
>  static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
>  				   struct io_submit_state *state)
>  {
> @@ -744,9 +763,11 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
>  	struct io_kiocb *req;
>  
>  	if (!percpu_ref_tryget(&ctx->refs))
> -		return NULL;
> +		return ERR_PTR(-ENXIO);
>  
>  	if (!state) {
> +		if (unlikely(io_req_over_limit(ctx)))
> +			goto out_limit;
>  		req = kmem_cache_alloc(req_cachep, gfp);
>  		if (unlikely(!req))
>  			goto fallback;
> @@ -754,6 +775,8 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
>  		size_t sz;
>  		int ret;
>  
> +		if (unlikely(io_req_over_limit(ctx)))
> +			goto out_limit;
>  		sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
>  		ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
>  
> @@ -789,8 +812,9 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
>  	req = io_get_fallback_req(ctx);
>  	if (req)
>  		goto got_it;
> +out_limit:
>  	percpu_ref_put(&ctx->refs);
> -	return NULL;
> +	return ERR_PTR(-EBUSY);
>  }
>  
>  static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
> @@ -3016,9 +3040,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
>  		unsigned int sqe_flags;
>  
>  		req = io_get_req(ctx, statep);
> -		if (unlikely(!req)) {
> +		if (unlikely(IS_ERR(req))) {
>  			if (!submitted)
> -				submitted = -EAGAIN;
> +				submitted = PTR_ERR(req);
>  			break;
>  		}
>  		if (!io_get_sqring(ctx, &req->submit)) {
> @@ -3039,8 +3063,10 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
>  		if (link && (sqe_flags & IOSQE_IO_DRAIN)) {
>  			if (!shadow_req) {
>  				shadow_req = io_get_req(ctx, NULL);
> -				if (unlikely(!shadow_req))
> +				if (unlikely(IS_ERR(shadow_req))) {
> +					shadow_req = NULL;
>  					goto out;
> +				}
>  				shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
>  				refcount_dec(&shadow_req->refs);
>  			}
> 

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC v2] io_uring: limit inflight IO
  2019-11-09 11:01 ` Pavel Begunkov
@ 2019-11-09 12:28   ` Pavel Begunkov
  2019-11-09 14:16     ` Jens Axboe
  0 siblings, 1 reply; 4+ messages in thread
From: Pavel Begunkov @ 2019-11-09 12:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 11/9/2019 2:01 PM, Pavel Begunkov wrote:
> On 09/11/2019 00:10, Jens Axboe wrote:
>> Here's a modified version for discussion. Instead of sizing this on the
>> specific ring, just size it based on the max allowable CQ ring size. I
>> think this should be safer, and won't risk breaking existing use cases
>> out there.
>>
>> The reasoning here is that we already limit the ring sizes globally,
>> they are under ulimit memlock protection. With this on top, we have some
>> sort of safe guard for the system as a whole as well, whereas before we
>> had none. Even a small ring size can keep queuing IO.
>>
>> Let me know what you guys think...
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 29ea1106132d..0d8c3b1612af 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -737,6 +737,25 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
>>  	return NULL;
>>  }
>>  
>> +static bool io_req_over_limit(struct io_ring_ctx *ctx)
>> +{
>> +	unsigned inflight;
>> +
>> +	/*
>> +	 * This doesn't need to be super precise, so only check every once
>> +	 * in a while.
>> +	 */
>> +	if (ctx->cached_sq_head & ctx->sq_mask)
>> +		return false;
>> +
>> +	/*
>> +	 * Use 2x the max CQ ring size
>> +	 */
>> +	inflight = ctx->cached_sq_head -
>> +		  (ctx->cached_cq_tail + atomic_read(&ctx->cached_cq_overflow));
>> +	return inflight >= 2 * IORING_MAX_CQ_ENTRIES;
>> +}
> 
> ctx->cached_cq_tail protected by ctx->completion_lock and can be
> changed asynchronously. That's a not synchronised access, so 
> formally (probably) breaks the memory model.
> 
> False values shouldn't be a problem here, but anyway.
> 

Took a glance, it seems access to cached_cq_tail is already messed up in
other places. Do I miss something?

> 
>> +
>>  static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
>>  				   struct io_submit_state *state)
>>  {
>> @@ -744,9 +763,11 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
>>  	struct io_kiocb *req;
>>  
>>  	if (!percpu_ref_tryget(&ctx->refs))
>> -		return NULL;
>> +		return ERR_PTR(-ENXIO);
>>  
>>  	if (!state) {
>> +		if (unlikely(io_req_over_limit(ctx)))
>> +			goto out_limit;
>>  		req = kmem_cache_alloc(req_cachep, gfp);
>>  		if (unlikely(!req))
>>  			goto fallback;
>> @@ -754,6 +775,8 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
>>  		size_t sz;
>>  		int ret;
>>  
>> +		if (unlikely(io_req_over_limit(ctx)))
>> +			goto out_limit;
>>  		sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
>>  		ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
>>  
>> @@ -789,8 +812,9 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
>>  	req = io_get_fallback_req(ctx);
>>  	if (req)
>>  		goto got_it;
>> +out_limit:
>>  	percpu_ref_put(&ctx->refs);
>> -	return NULL;
>> +	return ERR_PTR(-EBUSY);
>>  }
>>  
>>  static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
>> @@ -3016,9 +3040,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
>>  		unsigned int sqe_flags;
>>  
>>  		req = io_get_req(ctx, statep);
>> -		if (unlikely(!req)) {
>> +		if (unlikely(IS_ERR(req))) {
>>  			if (!submitted)
>> -				submitted = -EAGAIN;
>> +				submitted = PTR_ERR(req);
>>  			break;
>>  		}
>>  		if (!io_get_sqring(ctx, &req->submit)) {
>> @@ -3039,8 +3063,10 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
>>  		if (link && (sqe_flags & IOSQE_IO_DRAIN)) {
>>  			if (!shadow_req) {
>>  				shadow_req = io_get_req(ctx, NULL);
>> -				if (unlikely(!shadow_req))
>> +				if (unlikely(IS_ERR(shadow_req))) {
>> +					shadow_req = NULL;
>>  					goto out;
>> +				}
>>  				shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
>>  				refcount_dec(&shadow_req->refs);
>>  			}
>>
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC v2] io_uring: limit inflight IO
  2019-11-09 12:28   ` Pavel Begunkov
@ 2019-11-09 14:16     ` Jens Axboe
  0 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2019-11-09 14:16 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 11/9/19 5:28 AM, Pavel Begunkov wrote:
> On 11/9/2019 2:01 PM, Pavel Begunkov wrote:
>> On 09/11/2019 00:10, Jens Axboe wrote:
>>> Here's a modified version for discussion. Instead of sizing this on the
>>> specific ring, just size it based on the max allowable CQ ring size. I
>>> think this should be safer, and won't risk breaking existing use cases
>>> out there.
>>>
>>> The reasoning here is that we already limit the ring sizes globally,
>>> they are under ulimit memlock protection. With this on top, we have some
>>> sort of safe guard for the system as a whole as well, whereas before we
>>> had none. Even a small ring size can keep queuing IO.
>>>
>>> Let me know what you guys think...
>>>
>>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>>> index 29ea1106132d..0d8c3b1612af 100644
>>> --- a/fs/io_uring.c
>>> +++ b/fs/io_uring.c
>>> @@ -737,6 +737,25 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
>>>   	return NULL;
>>>   }
>>>   
>>> +static bool io_req_over_limit(struct io_ring_ctx *ctx)
>>> +{
>>> +	unsigned inflight;
>>> +
>>> +	/*
>>> +	 * This doesn't need to be super precise, so only check every once
>>> +	 * in a while.
>>> +	 */
>>> +	if (ctx->cached_sq_head & ctx->sq_mask)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * Use 2x the max CQ ring size
>>> +	 */
>>> +	inflight = ctx->cached_sq_head -
>>> +		  (ctx->cached_cq_tail + atomic_read(&ctx->cached_cq_overflow));
>>> +	return inflight >= 2 * IORING_MAX_CQ_ENTRIES;
>>> +}
>>
>> ctx->cached_cq_tail protected by ctx->completion_lock and can be
>> changed asynchronously. That's a not synchronised access, so
>> formally (probably) breaks the memory model.
>>
>> False values shouldn't be a problem here, but anyway.
>>
> 
> Took a glance, it seems access to cached_cq_tail is already messed up in
> other places. Do I miss something?

It doesn't really matter for cases that don't need a stable value,
like this one right here. It's an integer, so it's not like we'll
ever see a fractured value, at most it'll be a bit stale.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-11-09 14:16 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-08 21:10 [PATCH RFC v2] io_uring: limit inflight IO Jens Axboe
2019-11-09 11:01 ` Pavel Begunkov
2019-11-09 12:28   ` Pavel Begunkov
2019-11-09 14:16     ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).