linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: John Garry <john.garry@huawei.com>
To: <axboe@kernel.dk>
Cc: <linux-block@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<ming.lei@redhat.com>, <kashyap.desai@broadcom.com>,
	<hare@suse.de>, "John Garry" <john.garry@huawei.com>
Subject: [PATCH RFT 3/3] blk-mq: Optimise blk_mq_queue_tag_busy_iter() for shared tags
Date: Tue, 2 Nov 2021 19:27:35 +0800	[thread overview]
Message-ID: <1635852455-39935-4-git-send-email-john.garry@huawei.com> (raw)
In-Reply-To: <1635852455-39935-1-git-send-email-john.garry@huawei.com>

Kashyap reports high CPU usage in blk_mq_queue_tag_busy_iter() and callees
using megaraid SAS RAID card since moving to shared tags [0].

Previously, when shared tags was shared sbitmap, this function was less
than optimum since we would iter through all tags for all hctx's,
yet only ever match upto tagset depth number of rqs.

Since the change to shared tags, things are even less efficient if we have
parallel callers of blk_mq_queue_tag_busy_iter(). This is because in
bt_iter() -> blk_mq_find_and_get_req() there would be more contention on
accessing each request ref and tags->lock since they are now shared among
all HW queues.

Optimise by having separate calls to bt_for_each() for when we're using
shared tags. In this case no longer pass a hctx, as it is no longer
relevant, and teach bt_iter() about this.

Ming suggested something along the lines of this change, apart from a
different implementation.

[0] https://lore.kernel.org/linux-block/e4e92abbe9d52bcba6b8cc6c91c442cc@mail.gmail.com/

Signed-off-by: John Garry <john.garry@huawei.com>
---
 block/blk-mq-tag.c | 52 +++++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index bc233ea92adf..00515933c8a8 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -215,6 +215,7 @@ void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
 
 struct bt_iter_data {
 	struct blk_mq_hw_ctx *hctx;
+	struct request_queue *q;
 	busy_tag_iter_fn *fn;
 	void *data;
 	bool reserved;
@@ -238,11 +239,18 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 {
 	struct bt_iter_data *iter_data = data;
 	struct blk_mq_hw_ctx *hctx = iter_data->hctx;
-	struct blk_mq_tags *tags = hctx->tags;
+	struct request_queue *q = iter_data->q;
 	bool reserved = iter_data->reserved;
+	struct blk_mq_tag_set *set = q->tag_set;
+	struct blk_mq_tags *tags;
 	struct request *rq;
 	bool ret = true;
 
+	if (blk_mq_is_shared_tags(set->flags))
+		tags = set->shared_tags;
+	else
+		tags = hctx->tags;
+
 	if (!reserved)
 		bitnr += tags->nr_reserved_tags;
 	/*
@@ -253,7 +261,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 	if (!rq)
 		return true;
 
-	if (rq->q == hctx->queue && rq->mq_hctx == hctx)
+	if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
 		ret = iter_data->fn(rq, iter_data->data, reserved);
 	blk_mq_put_rq_ref(rq);
 	return ret;
@@ -274,13 +282,15 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
  *		bitmap_tags member of struct blk_mq_tags.
  */
 static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
-			busy_tag_iter_fn *fn, void *data, bool reserved)
+			busy_tag_iter_fn *fn, void *data, bool reserved,
+			struct request_queue *q)
 {
 	struct bt_iter_data iter_data = {
 		.hctx = hctx,
 		.fn = fn,
 		.data = data,
 		.reserved = reserved,
+		.q = q,
 	};
 
 	sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
@@ -460,9 +470,6 @@ EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
 		void *priv)
 {
-	struct blk_mq_hw_ctx *hctx;
-	int i;
-
 	/*
 	 * __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
 	 * while the queue is frozen. So we can use q_usage_counter to avoid
@@ -471,19 +478,30 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
 	if (!percpu_ref_tryget(&q->q_usage_counter))
 		return;
 
-	queue_for_each_hw_ctx(q, hctx, i) {
-		struct blk_mq_tags *tags = hctx->tags;
-
-		/*
-		 * If no software queues are currently mapped to this
-		 * hardware queue, there's nothing to check
-		 */
-		if (!blk_mq_hw_queue_mapped(hctx))
-			continue;
+	if (blk_mq_is_shared_tags(q->tag_set->flags)) {
+		struct blk_mq_tags *tags = q->tag_set->shared_tags;
 
 		if (tags->nr_reserved_tags)
-			bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
-		bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
+			bt_for_each(NULL, &tags->breserved_tags, fn, priv, true, q);
+		bt_for_each(NULL, &tags->bitmap_tags, fn, priv, false, q);
+	} else {
+		struct blk_mq_hw_ctx *hctx;
+		int i;
+
+		queue_for_each_hw_ctx(q, hctx, i) {
+			struct blk_mq_tags *tags = hctx->tags;
+
+			/*
+			 * If no software queues are currently mapped to this
+			 * hardware queue, there's nothing to check
+			 */
+			if (!blk_mq_hw_queue_mapped(hctx))
+				continue;
+
+			if (tags->nr_reserved_tags)
+				bt_for_each(hctx, &tags->breserved_tags, fn, priv, true, q);
+			bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false, q);
+		}
 	}
 	blk_queue_exit(q);
 }
-- 
2.17.1


  parent reply	other threads:[~2021-11-02 11:33 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-02 11:27 [PATCH RFT 0/3] blk-mq: Optimise blk_mq_queue_tag_busy_iter() for shared tags John Garry
2021-11-02 11:27 ` [PATCH RFT 1/3] blk-mq: Drop busy_iter_fn blk_mq_hw_ctx argument John Garry
2021-11-09  3:02   ` Ming Lei
2021-11-28 10:39   ` Hannes Reinecke
2021-11-02 11:27 ` [PATCH RFT 2/3] blk-mq: Delete busy_iter_fn John Garry
2021-11-09  3:05   ` Ming Lei
2021-11-28 10:40   ` Hannes Reinecke
2021-11-02 11:27 ` John Garry [this message]
2021-11-28 10:43   ` [PATCH RFT 3/3] blk-mq: Optimise blk_mq_queue_tag_busy_iter() for shared tags Hannes Reinecke
2021-11-29  2:19   ` Ming Lei
2021-11-15 15:56 ` [PATCH RFT 0/3] " John Garry
2021-11-15 16:31   ` Kashyap Desai
2021-11-25 13:46   ` Kashyap Desai
2021-11-25 14:09     ` John Garry
2021-11-26 11:25       ` Kashyap Desai
2021-11-26 11:51         ` John Garry
2021-12-06  9:57           ` John Garry
2021-12-06 10:03             ` Kashyap Desai
2021-12-08 13:04               ` John Garry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1635852455-39935-4-git-send-email-john.garry@huawei.com \
    --to=john.garry@huawei.com \
    --cc=axboe@kernel.dk \
    --cc=hare@suse.de \
    --cc=kashyap.desai@broadcom.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).