io-uring.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jackie Liu <liuyun01@kylinos.cn>
To: Jens Axboe <axboe@kernel.dk>
Cc: io-uring@vger.kernel.org
Subject: Re: [PATCH] io_uring: fix race with shadow drain deferrals
Date: Thu, 21 Nov 2019 09:35:22 +0800	[thread overview]
Message-ID: <890E4F5B-DDA2-40EF-B7AD-3C63EFA20D93@kylinos.cn> (raw)
In-Reply-To: <1C6D35BF-C89B-4AB9-83CD-0A6B676E4752@kylinos.cn>



> 2019年11月21日 09:32,Jackie Liu <liuyun01@kylinos.cn> 写道:
> 
> 2019年11月21日 07:58,Jens Axboe <axboe@kernel.dk> 写道:
> 
>> 
>> On 11/20/19 4:07 PM, Jens Axboe wrote:
>>> When we go and queue requests with drain, we check if we need to defer
>>> based on sequence. This is done safely under the lock, but then we drop
>>> the lock before actually inserting the shadow. If the original request
>>> is found on the deferred list by another completion in the mean time,
>>> it could have been started AND completed by the time we insert the
>>> shadow, which will stall the queue.
>>> 
>>> After re-grabbing the completion lock, check if the original request is
>>> still in the deferred list. If it isn't, then we know that someone else
>>> already found and issued it. If that happened, then our job is done, we
>>> can simply free the shadow.
>>> 
>>> Cc: Jackie Liu <liuyun01@kylinos.cn>
>>> Fixes: 4fe2c963154c ("io_uring: add support for link with drain")
>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> 
>> BTW, the other solution here is to not release the completion_lock if
>> we're going to return -EIOCBQUEUED, and let the caller do what it needs
>> before releasing it. That'd look something like this, with some sparse
>> annotations to keep things happy.
>> 
>> I think the original I posted here is easier to follow, and the
>> deferral list is going to be tiny in general so it won't really add
>> any extra overhead.
>> 
>> Let me know what you think and prefer.
>> 
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 6175e2e195c0..0d1f33bcedc0 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -2552,6 +2552,11 @@ static int io_async_cancel(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>> 	return 0;
>> }
>> 
>> +/*
>> + * Returns with ctx->completion_lock held if -EIOCBQUEUED is returned, so
>> + * the caller can make decisions based on the deferral without worrying about
>> + * the request being found and issued in the mean time.
>> + */
>> static int io_req_defer(struct io_kiocb *req)
>> {
>> 	const struct io_uring_sqe *sqe = req->submit.sqe;
>> @@ -2579,7 +2584,7 @@ static int io_req_defer(struct io_kiocb *req)
>> 
>> 	trace_io_uring_defer(ctx, req, false);
>> 	list_add_tail(&req->list, &ctx->defer_list);
>> -	spin_unlock_irq(&ctx->completion_lock);
>> +	__release(&ctx->completion_lock);
>> 	return -EIOCBQUEUED;
>> }
>> 
>> @@ -2954,6 +2959,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
>> 
>> static void io_queue_sqe(struct io_kiocb *req)
>> {
>> +	struct io_ring_ctx *ctx = req->ctx;
>> 	int ret;
>> 
>> 	ret = io_req_defer(req);
>> @@ -2963,6 +2969,9 @@ static void io_queue_sqe(struct io_kiocb *req)
>> 			if (req->flags & REQ_F_LINK)
>> 				req->flags |= REQ_F_FAIL_LINK;
>> 			io_double_put_req(req);
>> +		} else {
>> +			__acquire(&ctx->completion_lock);
>> +			spin_unlock_irq(&ctx->completion_lock);
>> 		}
>> 	} else
>> 		__io_queue_sqe(req);
>> @@ -3001,16 +3010,17 @@ static void io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
>> 				__io_free_req(shadow);
>> 			return;
>> 		}
>> +		__acquire(&ctx->completion_lock);
>> 	} else {
>> 		/*
>> 		 * If ret == 0 means that all IOs in front of link io are
>> 		 * running done. let's queue link head.
>> 		 */
>> 		need_submit = true;
>> +		spin_lock_irq(&ctx->completion_lock);
>> 	}
>> 
>> 	/* Insert shadow req to defer_list, blocking next IOs */
>> -	spin_lock_irq(&ctx->completion_lock);
>> 	trace_io_uring_defer(ctx, shadow, true);
>> 	list_add_tail(&shadow->list, &ctx->defer_list);
>> 	spin_unlock_irq(&ctx->completion_lock);
> 
> This is indeed a potential lock issue, thanks, I am prefer this solution, clearer than first one.
> But It may be a bit difficult for other people who read the code, use 'io_req_defer_may_lock'?
> 
> who about this?
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 5ad652f..6fdaeb1 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2469,7 +2469,7 @@ static int io_async_cancel(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>        return 0;
> }
> 
> -static int io_req_defer(struct io_kiocb *req)
> +static int __io_req_defer(struct io_kiocb *req)
> {
>        const struct io_uring_sqe *sqe = req->submit.sqe;
>        struct io_uring_sqe *sqe_copy;
> @@ -2495,8 +2495,21 @@ static int io_req_defer(struct io_kiocb *req)
> 
>        trace_io_uring_defer(ctx, req, false);
>        list_add_tail(&req->list, &ctx->defer_list);
> +
> +       return -EIOCBQUEUED;
> +}
> +
> +static int io_req_defer(struct io_kiocb *req)
> +{
> +       int ret = __io_req_defer(req);

There have an problem, need fix.

static int io_req_defer(struct io_kiocb *req)
{
	int ret = __io_req_defer(req);
	if (ret == -EIOCBQUEUED)
		spin_unlock_irq(&ctx->completion_lock);
	return ret;
}

>        spin_unlock_irq(&ctx->completion_lock);
> -       return-EIOCBQUEUED;
> +       return ret;
> +}
> +
> +static int io_req_defer_may_lock(struct io_kiocb *req)
> +{
> +       return __io_req_defer(req);
> +
> }
> 
> static int __io_submit_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
> @@ -2927,7 +2940,7 @@ static int io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
>         * list.
>         */
>        req->flags |= REQ_F_IO_DRAIN;
> -       ret = io_req_defer(req);
> +       ret = io_req_defer_may_lock(req);
>        if (ret) {
>                if (ret != -EIOCBQUEUED) {
>                        io_cqring_add_event(req, ret);
> @@ -2941,10 +2954,10 @@ static int io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
>                 * running done. let's queue link head.
>                 */
>                need_submit = true;
> +               spin_lock_irq(&ctx->completion_lock);
>        }
> 
>        /* Insert shadow req to defer_list, blocking next IOs */
> -       spin_lock_irq(&ctx->completion_lock);
>        trace_io_uring_defer(ctx, shadow, true);
>        list_add_tail(&shadow->list, &ctx->defer_list);
>        spin_unlock_irq(&ctx->completion_lock);
> 
> --
> BR, Jackie Liu


--
BR, Jackie Liu




  reply	other threads:[~2019-11-21  1:35 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-20 23:07 [PATCH] io_uring: fix race with shadow drain deferrals Jens Axboe
2019-11-20 23:58 ` Jens Axboe
2019-11-21  1:32   ` Jackie Liu
2019-11-21  1:35     ` Jackie Liu [this message]
2019-11-21  1:40       ` Jens Axboe
2019-11-21  1:49         ` Jens Axboe
2019-11-21  1:57           ` Jackie Liu
2019-11-20 23:14             ` Jens Axboe
     [not found]               ` <57EF3B0C-A6D3-45D5-A689-B8090F750C1E@kylinos.cn>
2019-11-20 23:03                 ` Jens Axboe
2019-11-21  8:54           ` [PATCH] io_uring: drain next sqe instead of shadowing Pavel Begunkov
     [not found]             ` <A12FD0FF-3C4F-46BE-8ABB-AA732002A9CA@kylinos.cn>
2019-11-21  9:43               ` Pavel Begunkov
     [not found]                 ` <5dd68282.1c69fb81.110a.43a7SMTPIN_ADDED_BROKEN@mx.google.com>
2019-11-21 12:40                   ` Pavel Begunkov
     [not found]                     ` <5dd68820.1c69fb81.64e0b.4340SMTPIN_ADDED_BROKEN@mx.google.com>
2019-11-21 13:47                       ` Jens Axboe
     [not found]                         ` <5dd69c7f.1c69fb81.8868.e3c2SMTPIN_ADDED_BROKEN@mx.google.com>
2019-11-21 13:54                           ` Jens Axboe
     [not found]                         ` <5dd69c43.1c69fb81.6589a.b4f1SMTPIN_ADDED_BROKEN@mx.google.com>
2019-11-21 14:28                           ` Pavel Begunkov
2019-11-21 13:53                             ` Jens Axboe
2019-11-21 15:23                               ` Pavel Begunkov
2019-11-21 13:50                                 ` Jens Axboe
2019-11-21  1:39     ` [PATCH] io_uring: fix race with shadow drain deferrals Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=890E4F5B-DDA2-40EF-B7AD-3C63EFA20D93@kylinos.cn \
    --to=liuyun01@kylinos.cn \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).