All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sagi Grimberg <sagi@grimberg.me>
To: linux-nvme@lists.infradead.org, Christoph Hellwig <hch@lst.de>,
	Keith Busch <kbusch@kernel.org>, Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, Ming Lin <mlin@kernel.org>,
	Chao Leng <lengchao@huawei.com>
Subject: [PATCH v5 1/2] blk-mq: add tagset quiesce interface
Date: Mon, 27 Jul 2020 16:10:21 -0700	[thread overview]
Message-ID: <20200727231022.307602-2-sagi@grimberg.me> (raw)
In-Reply-To: <20200727231022.307602-1-sagi@grimberg.me>

drivers that have shared tagsets may need to quiesce potentially a lot
of request queues that all share a single tagset (e.g. nvme). Add an interface
to quiesce all the queues on a given tagset. This interface is useful because
it can speedup the quiesce by doing it in parallel.

For tagsets that have BLK_MQ_F_BLOCKING set, we use call_srcu to all hctxs
in parallel such that all of them wait for the same rcu elapsed period with
a per-hctx heap allocated rcu_synchronize. for tagsets that don't have
BLK_MQ_F_BLOCKING set, we simply call a single synchronize_rcu as this is
sufficient.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 block/blk-mq.c         | 66 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/blk-mq.h |  4 +++
 2 files changed, 70 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index abcf590f6238..c37e37354330 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -209,6 +209,42 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
 
+static void blk_mq_quiesce_blocking_queue_async(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+
+	blk_mq_quiesce_queue_nowait(q);
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		WARN_ON_ONCE(!(hctx->flags & BLK_MQ_F_BLOCKING));
+		hctx->rcu_sync = kmalloc(sizeof(*hctx->rcu_sync), GFP_KERNEL);
+		if (!hctx->rcu_sync)
+			continue;
+
+		init_completion(&hctx->rcu_sync->completion);
+		init_rcu_head(&hctx->rcu_sync->head);
+		call_srcu(hctx->srcu, &hctx->rcu_sync->head,
+				wakeme_after_rcu);
+	}
+}
+
+static void blk_mq_quiesce_blocking_queue_async_wait(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		WARN_ON_ONCE(!(hctx->flags & BLK_MQ_F_BLOCKING));
+		if (!hctx->rcu_sync) {
+			synchronize_srcu(hctx->srcu);
+			continue;
+		}
+		wait_for_completion(&hctx->rcu_sync->completion);
+		destroy_rcu_head(&hctx->rcu_sync->head);
+	}
+}
+
 /**
  * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
  * @q: request queue.
@@ -2884,6 +2920,36 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 	}
 }
 
+void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set)
+{
+	struct request_queue *q;
+
+	mutex_lock(&set->tag_list_lock);
+	if (set->flags & BLK_MQ_F_BLOCKING) {
+		list_for_each_entry(q, &set->tag_list, tag_set_list)
+			blk_mq_quiesce_blocking_queue_async(q);
+		list_for_each_entry(q, &set->tag_list, tag_set_list)
+			blk_mq_quiesce_blocking_queue_async_wait(q);
+	} else {
+		list_for_each_entry(q, &set->tag_list, tag_set_list)
+			blk_mq_quiesce_queue_nowait(q);
+		synchronize_rcu();
+	}
+	mutex_unlock(&set->tag_list_lock);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset);
+
+void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set)
+{
+	struct request_queue *q;
+
+	mutex_lock(&set->tag_list_lock);
+	list_for_each_entry(q, &set->tag_list, tag_set_list)
+		blk_mq_unquiesce_queue(q);
+	mutex_unlock(&set->tag_list_lock);
+}
+EXPORT_SYMBOL_GPL(blk_mq_unquiesce_tagset);
+
 static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
 					bool shared)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 23230c1d031e..a85f2dedc947 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -5,6 +5,7 @@
 #include <linux/blkdev.h>
 #include <linux/sbitmap.h>
 #include <linux/srcu.h>
+#include <linux/rcupdate_wait.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
@@ -170,6 +171,7 @@ struct blk_mq_hw_ctx {
 	 */
 	struct list_head	hctx_list;
 
+	struct rcu_synchronize	*rcu_sync;
 	/**
 	 * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
 	 * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
@@ -532,6 +534,8 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
 void blk_mq_quiesce_queue_nowait(struct request_queue *q);
+void blk_mq_quiesce_tagset(struct request_queue *q);
+void blk_mq_unquiesce_tagset(struct request_queue *q);
 
 unsigned int blk_mq_rq_cpu(struct request *rq);
 
-- 
2.25.1


WARNING: multiple messages have this Message-ID (diff)
From: Sagi Grimberg <sagi@grimberg.me>
To: linux-nvme@lists.infradead.org, Christoph Hellwig <hch@lst.de>,
	Keith Busch <kbusch@kernel.org>, Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, Ming Lin <mlin@kernel.org>,
	Chao Leng <lengchao@huawei.com>
Subject: [PATCH v5 1/2] blk-mq: add tagset quiesce interface
Date: Mon, 27 Jul 2020 16:10:21 -0700	[thread overview]
Message-ID: <20200727231022.307602-2-sagi@grimberg.me> (raw)
In-Reply-To: <20200727231022.307602-1-sagi@grimberg.me>

drivers that have shared tagsets may need to quiesce potentially a lot
of request queues that all share a single tagset (e.g. nvme). Add an interface
to quiesce all the queues on a given tagset. This interface is useful because
it can speedup the quiesce by doing it in parallel.

For tagsets that have BLK_MQ_F_BLOCKING set, we use call_srcu to all hctxs
in parallel such that all of them wait for the same rcu elapsed period with
a per-hctx heap allocated rcu_synchronize. for tagsets that don't have
BLK_MQ_F_BLOCKING set, we simply call a single synchronize_rcu as this is
sufficient.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 block/blk-mq.c         | 66 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/blk-mq.h |  4 +++
 2 files changed, 70 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index abcf590f6238..c37e37354330 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -209,6 +209,42 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
 
+static void blk_mq_quiesce_blocking_queue_async(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+
+	blk_mq_quiesce_queue_nowait(q);
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		WARN_ON_ONCE(!(hctx->flags & BLK_MQ_F_BLOCKING));
+		hctx->rcu_sync = kmalloc(sizeof(*hctx->rcu_sync), GFP_KERNEL);
+		if (!hctx->rcu_sync)
+			continue;
+
+		init_completion(&hctx->rcu_sync->completion);
+		init_rcu_head(&hctx->rcu_sync->head);
+		call_srcu(hctx->srcu, &hctx->rcu_sync->head,
+				wakeme_after_rcu);
+	}
+}
+
+static void blk_mq_quiesce_blocking_queue_async_wait(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		WARN_ON_ONCE(!(hctx->flags & BLK_MQ_F_BLOCKING));
+		if (!hctx->rcu_sync) {
+			synchronize_srcu(hctx->srcu);
+			continue;
+		}
+		wait_for_completion(&hctx->rcu_sync->completion);
+		destroy_rcu_head(&hctx->rcu_sync->head);
+	}
+}
+
 /**
  * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
  * @q: request queue.
@@ -2884,6 +2920,36 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 	}
 }
 
+void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set)
+{
+	struct request_queue *q;
+
+	mutex_lock(&set->tag_list_lock);
+	if (set->flags & BLK_MQ_F_BLOCKING) {
+		list_for_each_entry(q, &set->tag_list, tag_set_list)
+			blk_mq_quiesce_blocking_queue_async(q);
+		list_for_each_entry(q, &set->tag_list, tag_set_list)
+			blk_mq_quiesce_blocking_queue_async_wait(q);
+	} else {
+		list_for_each_entry(q, &set->tag_list, tag_set_list)
+			blk_mq_quiesce_queue_nowait(q);
+		synchronize_rcu();
+	}
+	mutex_unlock(&set->tag_list_lock);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset);
+
+void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set)
+{
+	struct request_queue *q;
+
+	mutex_lock(&set->tag_list_lock);
+	list_for_each_entry(q, &set->tag_list, tag_set_list)
+		blk_mq_unquiesce_queue(q);
+	mutex_unlock(&set->tag_list_lock);
+}
+EXPORT_SYMBOL_GPL(blk_mq_unquiesce_tagset);
+
 static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
 					bool shared)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 23230c1d031e..a85f2dedc947 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -5,6 +5,7 @@
 #include <linux/blkdev.h>
 #include <linux/sbitmap.h>
 #include <linux/srcu.h>
+#include <linux/rcupdate_wait.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
@@ -170,6 +171,7 @@ struct blk_mq_hw_ctx {
 	 */
 	struct list_head	hctx_list;
 
+	struct rcu_synchronize	*rcu_sync;
 	/**
 	 * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
 	 * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
@@ -532,6 +534,8 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap);
 void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
 void blk_mq_quiesce_queue_nowait(struct request_queue *q);
+void blk_mq_quiesce_tagset(struct request_queue *q);
+void blk_mq_unquiesce_tagset(struct request_queue *q);
 
 unsigned int blk_mq_rq_cpu(struct request *rq);
 
-- 
2.25.1


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

  reply	other threads:[~2020-07-27 23:10 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-27 23:10 [PATCH v5 0/2] improve nvme quiesce time for large amount of namespaces Sagi Grimberg
2020-07-27 23:10 ` Sagi Grimberg
2020-07-27 23:10 ` Sagi Grimberg [this message]
2020-07-27 23:10   ` [PATCH v5 1/2] blk-mq: add tagset quiesce interface Sagi Grimberg
2020-07-27 23:32   ` Keith Busch
2020-07-27 23:32     ` Keith Busch
2020-07-28  0:12     ` Sagi Grimberg
2020-07-28  0:12       ` Sagi Grimberg
2020-07-28  1:40   ` Ming Lei
2020-07-28  1:40     ` Ming Lei
2020-07-28  1:51     ` Jens Axboe
2020-07-28  1:51       ` Jens Axboe
2020-07-28  2:17       ` Ming Lei
2020-07-28  2:17         ` Ming Lei
2020-07-28  2:23         ` Jens Axboe
2020-07-28  2:23           ` Jens Axboe
2020-07-28  2:28           ` Ming Lei
2020-07-28  2:28             ` Ming Lei
2020-07-28  2:32             ` Jens Axboe
2020-07-28  2:32               ` Jens Axboe
2020-07-28  3:29               ` Sagi Grimberg
2020-07-28  3:29                 ` Sagi Grimberg
2020-07-28  3:25     ` Sagi Grimberg
2020-07-28  3:25       ` Sagi Grimberg
2020-07-28  7:18   ` Christoph Hellwig
2020-07-28  7:18     ` Christoph Hellwig
2020-07-28  7:48     ` Sagi Grimberg
2020-07-28  7:48       ` Sagi Grimberg
2020-07-28  9:16     ` Ming Lei
2020-07-28  9:16       ` Ming Lei
2020-07-28  9:24       ` Sagi Grimberg
2020-07-28  9:24         ` Sagi Grimberg
2020-07-28  9:33         ` Ming Lei
2020-07-28  9:33           ` Ming Lei
2020-07-28  9:37           ` Sagi Grimberg
2020-07-28  9:37             ` Sagi Grimberg
2020-07-28  9:43             ` Sagi Grimberg
2020-07-28  9:43               ` Sagi Grimberg
2020-07-28 10:10               ` Ming Lei
2020-07-28 10:10                 ` Ming Lei
2020-07-28 10:57                 ` Christoph Hellwig
2020-07-28 10:57                   ` Christoph Hellwig
2020-07-28 14:13                 ` Paul E. McKenney
2020-07-28 14:13                   ` Paul E. McKenney
2020-07-28 10:58             ` Christoph Hellwig
2020-07-28 10:58               ` Christoph Hellwig
2020-07-28 16:25               ` Sagi Grimberg
2020-07-28 16:25                 ` Sagi Grimberg
2020-07-28 13:54         ` Paul E. McKenney
2020-07-28 13:54           ` Paul E. McKenney
2020-07-28 23:46           ` Sagi Grimberg
2020-07-28 23:46             ` Sagi Grimberg
2020-07-29  0:31             ` Paul E. McKenney
2020-07-29  0:31               ` Paul E. McKenney
2020-07-29  0:43               ` Sagi Grimberg
2020-07-29  0:43                 ` Sagi Grimberg
2020-07-29  0:59                 ` Keith Busch
2020-07-29  0:59                   ` Keith Busch
2020-07-29  4:39                   ` Sagi Grimberg
2020-07-29  4:39                     ` Sagi Grimberg
2020-08-07  9:04                     ` Chao Leng
2020-08-07  9:04                       ` Chao Leng
2020-08-07  9:24                       ` Ming Lei
2020-08-07  9:24                         ` Ming Lei
2020-08-07  9:35                         ` Chao Leng
2020-08-07  9:35                           ` Chao Leng
2020-07-29  4:10                 ` Paul E. McKenney
2020-07-29  4:10                   ` Paul E. McKenney
2020-07-29  4:37                   ` Sagi Grimberg
2020-07-29  4:37                     ` Sagi Grimberg
2020-07-27 23:10 ` [PATCH v5 2/2] nvme: use blk_mq_[un]quiesce_tagset Sagi Grimberg
2020-07-27 23:10   ` Sagi Grimberg
2020-07-28  0:54   ` Sagi Grimberg
2020-07-28  0:54     ` Sagi Grimberg
2020-07-28  3:21     ` Chao Leng
2020-07-28  3:21       ` Chao Leng
2020-07-28  3:34       ` Sagi Grimberg
2020-07-28  3:34         ` Sagi Grimberg
2020-07-28  3:51         ` Chao Leng
2020-07-28  3:51           ` Chao Leng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200727231022.307602-2-sagi@grimberg.me \
    --to=sagi@grimberg.me \
    --cc=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=lengchao@huawei.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=mlin@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.