Linux-Block Archive on lore.kernel.org
 help / color / Atom feed
From: Christoph Hellwig <hch@infradead.org>
To: Ming Lei <ming.lei@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>,
	linux-block@vger.kernel.org, Sagi Grimberg <sagi@grimberg.me>,
	Baolin Wang <baolin.wang7@gmail.com>,
	Christoph Hellwig <hch@infradead.org>
Subject: Re: [PATCH V6 6/6] blk-mq: support batching dispatch in case of io scheduler
Date: Mon, 29 Jun 2020 10:04:52 +0100
Message-ID: <20200629090452.GA4892@infradead.org> (raw)
In-Reply-To: <20200624230349.1046821-7-ming.lei@redhat.com>

> +/*
> + * We know bfq and deadline apply single scheduler queue instead of multi
> + * queue. However, the two are often used on single queue devices, also
> + * the current @hctx should affect the real device status most of times
> + * because of locality principle.
> + *
> + * So use current hctx->dispatch_busy directly for figuring out batching
> + * dispatch count.
> + */

I don't really understand this comment.  Also I think the code might
be cleaner if this function is inlined as an if/else in the only
caller.

> +static inline bool blk_mq_do_dispatch_rq_lists(struct blk_mq_hw_ctx *hctx,
> +		struct list_head *lists, bool multi_hctxs, unsigned count)
> +{
> +	bool ret;
> +
> +	if (!count)
> +		return false;
> +
> +	if (likely(!multi_hctxs))
> +		return blk_mq_dispatch_rq_list(hctx, lists, count);

Keeping these checks in the callers would keep things a little cleaner,
especially as the multi hctx case only really needs the lists argument.

> +		LIST_HEAD(list);
> +		struct request *new, *rq = list_first_entry(lists,
> +				struct request, queuelist);
> +		unsigned cnt = 0;
> +
> +		list_for_each_entry(new, lists, queuelist) {
> +			if (new->mq_hctx != rq->mq_hctx)
> +				break;
> +			cnt++;
> +		}
> +
> +		if (new->mq_hctx == rq->mq_hctx)
> +			list_splice_tail_init(lists, &list);
> +		else
> +			list_cut_before(&list, lists, &new->queuelist);
> +
> +		ret = blk_mq_dispatch_rq_list(rq->mq_hctx, &list, cnt);
> +	}

I think this has two issues:  for one ret should be ORed as any dispatch
or error should leaave ret set.  Also we need to splice the dispatch
list back onto the main list here, otherwise we can lose those requests.

FYI, while reviewing this I ended up editing things into a shape I
could better understand.  Let me know what you think of this version?


diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 4c72073830f3cb..466dce99699ae4 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/blk-mq.h>
+#include <linux/list_sort.h>
 
 #include <trace/events/block.h>
 
@@ -80,6 +81,38 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
 	blk_mq_run_hw_queue(hctx, true);
 }
 
+static int sched_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct request *rqa = container_of(a, struct request, queuelist);
+	struct request *rqb = container_of(b, struct request, queuelist);
+
+	return rqa->mq_hctx > rqb->mq_hctx;
+}
+
+static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
+{
+	struct blk_mq_hw_ctx *hctx =
+		list_first_entry(rq_list, struct request, queuelist)->mq_hctx;
+	struct request *rq;
+	LIST_HEAD(hctx_list);
+	unsigned int count = 0;
+	bool ret;
+
+	list_for_each_entry(rq, rq_list, queuelist) {
+		if (rq->mq_hctx != hctx) {
+			list_cut_before(&hctx_list, rq_list, &rq->queuelist);
+			goto dispatch;
+		}
+		count++;
+	}
+	list_splice_tail_init(rq_list, &hctx_list);
+
+dispatch:
+	ret = blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
+	list_splice(&hctx_list, rq_list);
+	return ret;
+}
+
 #define BLK_MQ_BUDGET_DELAY	3		/* ms units */
 
 /*
@@ -90,20 +123,29 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
  * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
  * be run again.  This is necessary to avoid starving flushes.
  */
-static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
 	struct elevator_queue *e = q->elevator;
+	bool multi_hctxs = false, run_queue = false;
+	bool dispatched = false, busy = false;
+	unsigned int max_dispatch;
 	LIST_HEAD(rq_list);
-	int ret = 0;
-	struct request *rq;
+	int count = 0;
+
+	if (hctx->dispatch_busy)
+		max_dispatch = 1;
+	else
+		max_dispatch = hctx->queue->nr_requests;
 
 	do {
+		struct request *rq;
+
 		if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
 			break;
 
 		if (!list_empty_careful(&hctx->dispatch)) {
-			ret = -EAGAIN;
+			busy = true;
 			break;
 		}
 
@@ -120,7 +162,7 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 			 * no guarantee anyone will kick the queue.  Kick it
 			 * ourselves.
 			 */
-			blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
+			run_queue = true;
 			break;
 		}
 
@@ -130,7 +172,43 @@ static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
 		 * in blk_mq_dispatch_rq_list().
 		 */
 		list_add(&rq->queuelist, &rq_list);
-	} while (blk_mq_dispatch_rq_list(rq->mq_hctx, &rq_list, 1));
+		if (rq->mq_hctx != hctx)
+			multi_hctxs = true;
+	} while (++count < max_dispatch);
+
+	if (!count) {
+		if (run_queue)
+			blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
+	} else if (multi_hctxs) {
+		/*
+		 * Requests from different hctx may be dequeued from some
+		 * schedulers, such as bfq and deadline.
+		 *
+		 * Sort the requests in the list according to their hctx,
+		 * dispatch batching requests from same hctx at a time.
+		 */
+		list_sort(NULL, &rq_list, sched_rq_cmp);
+		do {
+			dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
+		} while (!list_empty(&rq_list));
+	} else {
+		dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, count);
+	}
+
+	if (busy)
+		return -EAGAIN;
+	if (dispatched)
+		return 1;
+	return 0;
+}
+
+static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+{
+	int ret;
+
+	do {
+		ret = __blk_mq_do_dispatch_sched(hctx);
+	} while (ret == 1);
 
 	return ret;
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 29e0afc2612c6b..3d8c259b0f8c58 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1321,8 +1321,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
 	if (list_empty(list))
 		return false;
 
-	WARN_ON(!list_is_singular(list) && nr_budgets);
-
 	/*
 	 * Now process all the entries, sending them to the driver.
 	 */

  reply index

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-24 23:03 [PATCH V6 0/6] blk-mq: support batching dispatch from scheduler Ming Lei
2020-06-24 23:03 ` [PATCH V6 1/6] blk-mq: pass request queue into get/put budget callback Ming Lei
2020-06-24 23:03 ` [PATCH V6 2/6] blk-mq: pass hctx to blk_mq_dispatch_rq_list Ming Lei
2020-06-24 23:03 ` [PATCH V6 3/6] blk-mq: move getting driver tag and budget into one helper Ming Lei
2020-06-24 23:03 ` [PATCH V6 4/6] blk-mq: remove dead check from blk_mq_dispatch_rq_list Ming Lei
2020-06-25  7:42   ` Johannes Thumshirn
2020-06-24 23:03 ` [PATCH V6 5/6] blk-mq: pass obtained budget count to blk_mq_dispatch_rq_list Ming Lei
2020-06-29  8:11   ` Christoph Hellwig
2020-06-24 23:03 ` [PATCH V6 6/6] blk-mq: support batching dispatch in case of io scheduler Ming Lei
2020-06-29  9:04   ` Christoph Hellwig [this message]
2020-06-29 10:32     ` Ming Lei
2020-06-30  0:57       ` Ming Lei
2020-06-30  4:56         ` Christoph Hellwig
2020-06-30  4:55       ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200629090452.GA4892@infradead.org \
    --to=hch@infradead.org \
    --cc=axboe@kernel.dk \
    --cc=baolin.wang7@gmail.com \
    --cc=linux-block@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org
	public-inbox-index linux-block

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git