All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Salman Qazi <sqazi@google.com>
Cc: Jens Axboe <axboe@kernel.dk>,
	Bart Van Assche <bvanassche@acm.org>,
	linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
	Jesse Barnes <jsbarnes@google.com>,
	Gwendal Grignou <gwendal@google.com>,
	Hannes Reinecke <hare@suse.com>, Christoph Hellwig <hch@lst.de>
Subject: Re: [PATCH] block: Limit number of items taken from the I/O scheduler in one go
Date: Thu, 6 Feb 2020 18:18:33 +0800	[thread overview]
Message-ID: <20200206101833.GA20943@ming.t460p> (raw)
In-Reply-To: <20200205195706.49438-1-sqazi@google.com>

On Wed, Feb 05, 2020 at 11:57:06AM -0800, Salman Qazi wrote:
> Flushes bypass the I/O scheduler and get added to hctx->dispatch
> in blk_mq_sched_bypass_insert.  This can happen while a kworker is running
> hctx->run_work work item and is past the point in
> blk_mq_sched_dispatch_requests where hctx->dispatch is checked.
> 
> The blk_mq_do_dispatch_sched call is not guaranteed to end in bounded time,
> because the I/O scheduler can feed an arbitrary number of commands.
> 
> Since we have only one hctx->run_work, the commands waiting in
> hctx->dispatch will wait an arbitrary length of time for run_work to be
> rerun.
> 
> A similar phenomenon exists with dispatches from the software queue.
> 
> The solution is to poll hctx->dispatch in blk_mq_do_dispatch_sched and
> blk_mq_do_dispatch_ctx and return from the run_work handler and let it
> rerun.
> 
> Signed-off-by: Salman Qazi <sqazi@google.com>
> ---
>  block/blk-mq-sched.c | 37 +++++++++++++++++++++++++++++++------
>  1 file changed, 31 insertions(+), 6 deletions(-)
> 
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index ca22afd47b3d..52249fddeb66 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -84,12 +84,16 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
>   * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
>   * its queue by itself in its completion handler, so we don't need to
>   * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
> + *
> + * Returns true if hctx->dispatch was found non-empty and
> + * run_work has to be run again.
>   */
> -static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
> +static bool blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
>  {
>  	struct request_queue *q = hctx->queue;
>  	struct elevator_queue *e = q->elevator;
>  	LIST_HEAD(rq_list);
> +	bool ret = false;
>  
>  	do {
>  		struct request *rq;
> @@ -97,6 +101,11 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
>  		if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
>  			break;
>  
> +		if (!list_empty_careful(&hctx->dispatch)) {
> +			ret = true;
> +			break;
> +		}
> +
>  		if (!blk_mq_get_dispatch_budget(hctx))
>  			break;
>  
> @@ -113,6 +122,8 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
>  		 */
>  		list_add(&rq->queuelist, &rq_list);
>  	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
> +
> +	return ret;
>  }
>  
>  static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
> @@ -130,16 +141,25 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
>   * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
>   * its queue by itself in its completion handler, so we don't need to
>   * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
> + *
> + * Returns true if hctx->dispatch was found non-empty and
> + * run_work has to be run again.
>   */
> -static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
> +static bool blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
>  {
>  	struct request_queue *q = hctx->queue;
>  	LIST_HEAD(rq_list);
>  	struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
> +	bool ret = false;
>  
>  	do {
>  		struct request *rq;
>  
> +		if (!list_empty_careful(&hctx->dispatch)) {
> +			ret = true;
> +			break;
> +		}
> +
>  		if (!sbitmap_any_bit_set(&hctx->ctx_map))
>  			break;
>  
> @@ -165,6 +185,7 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
>  	} while (blk_mq_dispatch_rq_list(q, &rq_list, true));
>  
>  	WRITE_ONCE(hctx->dispatch_from, ctx);
> +	return ret;
>  }
>  
>  void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
> @@ -172,6 +193,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
>  	struct request_queue *q = hctx->queue;
>  	struct elevator_queue *e = q->elevator;
>  	const bool has_sched_dispatch = e && e->type->ops.dispatch_request;
> +	bool run_again = false;
>  	LIST_HEAD(rq_list);
>  
>  	/* RCU or SRCU read lock is needed before checking quiesced flag */
> @@ -208,19 +230,22 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
>  		blk_mq_sched_mark_restart_hctx(hctx);
>  		if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
>  			if (has_sched_dispatch)
> -				blk_mq_do_dispatch_sched(hctx);
> +				run_again = blk_mq_do_dispatch_sched(hctx);
>  			else
> -				blk_mq_do_dispatch_ctx(hctx);
> +				run_again = blk_mq_do_dispatch_ctx(hctx);
>  		}
>  	} else if (has_sched_dispatch) {
> -		blk_mq_do_dispatch_sched(hctx);
> +		run_again = blk_mq_do_dispatch_sched(hctx);
>  	} else if (hctx->dispatch_busy) {
>  		/* dequeue request one by one from sw queue if queue is busy */
> -		blk_mq_do_dispatch_ctx(hctx);
> +		run_again = blk_mq_do_dispatch_ctx(hctx);
>  	} else {
>  		blk_mq_flush_busy_ctxs(hctx, &rq_list);
>  		blk_mq_dispatch_rq_list(q, &rq_list, false);
>  	}
> +
> +	if (run_again)
> +		blk_mq_run_hw_queue(hctx, true);

One improvement may be to run again locally in this function by
limited times(such as 1) first, then switch to blk_mq_run_hw_queue()
if run again is still needed.

This way may save one async run hw queue.

Thanks,
Ming


  reply	other threads:[~2020-02-06 10:18 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-30 19:34 Hung tasks with multiple partitions Salman Qazi
2020-01-30 20:49 ` Bart Van Assche
2020-01-30 21:02   ` Salman Qazi
     [not found]     ` <20200203204554.119849-1-sqazi@google.com>
2020-02-03 20:59       ` [PATCH] block: Limit number of items taken from the I/O scheduler in one go Salman Qazi
2020-02-04  3:47         ` Bart Van Assche
2020-02-04  9:20         ` Ming Lei
2020-02-04 18:26           ` Salman Qazi
2020-02-04 19:37             ` Salman Qazi
2020-02-05  4:55               ` Ming Lei
2020-02-05 19:57                 ` Salman Qazi
2020-02-06 10:18                   ` Ming Lei [this message]
2020-02-06 21:12                     ` Salman Qazi
2020-02-07  2:07                       ` Ming Lei
2020-02-07 15:26                       ` Bart Van Assche
2020-02-07 18:45                         ` Salman Qazi
2020-02-07 19:04                           ` Salman Qazi
2020-02-07 20:19                           ` Bart Van Assche
2020-02-07 20:37                             ` Salman Qazi
2020-04-20 16:42                               ` Doug Anderson
2020-04-23 20:13                                 ` Jesse Barnes
2020-04-23 20:34                                   ` Jens Axboe
2020-04-23 20:40                                     ` Salman Qazi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200206101833.GA20943@ming.t460p \
    --to=ming.lei@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=bvanassche@acm.org \
    --cc=gwendal@google.com \
    --cc=hare@suse.com \
    --cc=hch@lst.de \
    --cc=jsbarnes@google.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sqazi@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.