linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: Jens Axboe <axboe@kernel.dk>,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2/3] io_uring: Fix broken links with offloading
Date: Fri, 25 Oct 2019 12:36:14 +0300	[thread overview]
Message-ID: <dd88f5be-930b-53e5-c3b6-12927d6634b1@gmail.com> (raw)
In-Reply-To: <bd0eaa7729e3b8b599a25167df0a4ee583da69cc.1571991701.git.asml.silence@gmail.com>


[-- Attachment #1.1: Type: text/plain, Size: 5989 bytes --]

On 25/10/2019 12:31, Pavel Begunkov (Silence) wrote:
> From: Pavel Begunkov <asml.silence@gmail.com>
> 
> io_sq_thread() processes sqes by 8 without considering links. As a
> result, links will be randomely subdivided.
> 
> The easiest way to fix it is to call io_get_sqring() inside
> io_submit_sqes() as do io_ring_submit().
> 
> Downsides:
> 1. This removes optimisation of not grabbing mm_struct for fixed files
> 2. It submitting all sqes in one go, without finer-grained sheduling
> with cq processing.
> 
Is this logic with not-grabbing mm and fixed files critical?
I want to put it back later after some cleanup. 

> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
>  fs/io_uring.c | 62 +++++++++++++++++++++++++++------------------------
>  1 file changed, 33 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 0e141d905a5b..949c82a40d16 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -735,6 +735,14 @@ static unsigned io_cqring_events(struct io_rings *rings)
>  	return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
>  }
>  
> +static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
> +{
> +	struct io_rings *rings = ctx->rings;
> +
> +	/* make sure SQ entry isn't read before tail */
> +	return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
> +}
> +
>  /*
>   * Find and free completed poll iocbs
>   */
> @@ -2560,8 +2568,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
>  	return false;
>  }
>  
> -static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
> -			  unsigned int nr, bool has_user, bool mm_fault)
> +static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
> +			  bool has_user, bool mm_fault)
>  {
>  	struct io_submit_state state, *statep = NULL;
>  	struct io_kiocb *link = NULL;
> @@ -2575,6 +2583,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
>  	}
>  
>  	for (i = 0; i < nr; i++) {
> +		struct sqe_submit s;
> +
> +		if (!io_get_sqring(ctx, &s))
> +			break;
> +
>  		/*
>  		 * If previous wasn't linked and we have a linked command,
>  		 * that's the end of the chain. Submit the previous link.
> @@ -2584,9 +2597,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
>  			link = NULL;
>  			shadow_req = NULL;
>  		}
> -		prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
> +		prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
>  
> -		if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) {
> +		if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
>  			if (!shadow_req) {
>  				shadow_req = io_get_req(ctx, NULL);
>  				if (unlikely(!shadow_req))
> @@ -2594,18 +2607,18 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
>  				shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
>  				refcount_dec(&shadow_req->refs);
>  			}
> -			shadow_req->sequence = sqes[i].sequence;
> +			shadow_req->sequence = s.sequence;
>  		}
>  
>  out:
>  		if (unlikely(mm_fault)) {
> -			io_cqring_add_event(ctx, sqes[i].sqe->user_data,
> +			io_cqring_add_event(ctx, s.sqe->user_data,
>  						-EFAULT);
>  		} else {
> -			sqes[i].has_user = has_user;
> -			sqes[i].needs_lock = true;
> -			sqes[i].needs_fixed_file = true;
> -			io_submit_sqe(ctx, &sqes[i], statep, &link);
> +			s.has_user = has_user;
> +			s.needs_lock = true;
> +			s.needs_fixed_file = true;
> +			io_submit_sqe(ctx, &s, statep, &link);
>  			submitted++;
>  		}
>  	}
> @@ -2620,7 +2633,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
>  
>  static int io_sq_thread(void *data)
>  {
> -	struct sqe_submit sqes[IO_IOPOLL_BATCH];
>  	struct io_ring_ctx *ctx = data;
>  	struct mm_struct *cur_mm = NULL;
>  	mm_segment_t old_fs;
> @@ -2635,8 +2647,8 @@ static int io_sq_thread(void *data)
>  
>  	timeout = inflight = 0;
>  	while (!kthread_should_park()) {
> -		bool all_fixed, mm_fault = false;
> -		int i;
> +		bool mm_fault = false;
> +		unsigned int to_submit;
>  
>  		if (inflight) {
>  			unsigned nr_events = 0;
> @@ -2656,7 +2668,8 @@ static int io_sq_thread(void *data)
>  				timeout = jiffies + ctx->sq_thread_idle;
>  		}
>  
> -		if (!io_get_sqring(ctx, &sqes[0])) {
> +		to_submit = io_sqring_entries(ctx);
> +		if (!to_submit) {
>  			/*
>  			 * We're polling. If we're within the defined idle
>  			 * period, then let us spin without work before going
> @@ -2687,7 +2700,8 @@ static int io_sq_thread(void *data)
>  			/* make sure to read SQ tail after writing flags */
>  			smp_mb();
>  
> -			if (!io_get_sqring(ctx, &sqes[0])) {
> +			to_submit = io_sqring_entries(ctx);
> +			if (!to_submit) {
>  				if (kthread_should_park()) {
>  					finish_wait(&ctx->sqo_wait, &wait);
>  					break;
> @@ -2705,19 +2719,8 @@ static int io_sq_thread(void *data)
>  			ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
>  		}
>  
> -		i = 0;
> -		all_fixed = true;
> -		do {
> -			if (all_fixed && io_sqe_needs_user(sqes[i].sqe))
> -				all_fixed = false;
> -
> -			i++;
> -			if (i == ARRAY_SIZE(sqes))
> -				break;
> -		} while (io_get_sqring(ctx, &sqes[i]));
> -
>  		/* Unless all new commands are FIXED regions, grab mm */
> -		if (!all_fixed && !cur_mm) {
> +		if (!cur_mm) {
>  			mm_fault = !mmget_not_zero(ctx->sqo_mm);
>  			if (!mm_fault) {
>  				use_mm(ctx->sqo_mm);
> @@ -2725,8 +2728,9 @@ static int io_sq_thread(void *data)
>  			}
>  		}
>  
> -		inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL,
> -						mm_fault);
> +		to_submit = min(to_submit, ctx->sq_entries);
> +		inflight += io_submit_sqes(ctx, to_submit, cur_mm != NULL,
> +					   mm_fault);
>  
>  		/* Commit SQ ring head once we've consumed all SQEs */
>  		io_commit_sqring(ctx);
> 

-- 
Yours sincerely,
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2019-10-25  9:36 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-25  9:31 [PATCH 0/3][for-linus] Fix bunch of bugs in io_uring Pavel Begunkov (Silence)
2019-10-25  9:31 ` [PATCH 1/3] io_uring: Fix corrupted user_data Pavel Begunkov (Silence)
2019-10-25  9:31 ` [PATCH 2/3] io_uring: Fix broken links with offloading Pavel Begunkov (Silence)
2019-10-25  9:36   ` Pavel Begunkov [this message]
2019-10-25 15:01     ` Jens Axboe
2019-10-25  9:31 ` [PATCH 3/3] io_uring: Fix race for sqes with userspace Pavel Begunkov (Silence)
2019-10-25 15:01 ` [PATCH 0/3][for-linus] Fix bunch of bugs in io_uring Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dd88f5be-930b-53e5-c3b6-12927d6634b1@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).