From: Sagi Grimberg <sagi@grimberg.me> To: Jens Axboe <axboe@kernel.dk>, linux-nvme@lists.infradead.org, Christoph Hellwig <hch@lst.de>, Keith Busch <kbusch@kernel.org> Cc: linux-block@vger.kernel.org, Ming Lin <mlin@kernel.org>, Chao Leng <lengchao@huawei.com> Subject: Re: [PATCH v4 1/2] blk-mq: add async quiesce interface Date: Mon, 27 Jul 2020 15:37:16 -0700 [thread overview] Message-ID: <e22c2f2f-6924-0090-d841-da2d84c606ae@grimberg.me> (raw) In-Reply-To: <fe247bae-8428-bca8-81b5-a7015bc39591@kernel.dk> >> +void blk_mq_quiesce_queue_async(struct request_queue *q) >> +{ >> + struct blk_mq_hw_ctx *hctx; >> + unsigned int i; >> + int rcu = false; >> + >> + blk_mq_quiesce_queue_nowait(q); >> + >> + queue_for_each_hw_ctx(q, hctx, i) { >> + hctx->rcu_sync = kmalloc(sizeof(*hctx->rcu_sync), GFP_KERNEL); >> + if (!hctx->rcu_sync) { >> + /* fallback to serial rcu sync */ >> + if (hctx->flags & BLK_MQ_F_BLOCKING) >> + synchronize_srcu(hctx->srcu); >> + else >> + rcu = true; >> + } else { >> + init_completion(&hctx->rcu_sync->completion); >> + init_rcu_head(&hctx->rcu_sync->head); >> + if (hctx->flags & BLK_MQ_F_BLOCKING) >> + call_srcu(hctx->srcu, &hctx->rcu_sync->head, >> + wakeme_after_rcu); >> + else >> + call_rcu(&hctx->rcu_sync->head, >> + wakeme_after_rcu); >> + } >> + } >> + if (rcu) >> + synchronize_rcu(); >> +} >> +EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_async); > > This won't always be async, and that might matter to some users. I think > it'd be better to put the fallback path into the _wait() part instead, > since the caller should expect that to be blocking/waiting as the name > implies. > > Nit picking, but... Makes sense.. I thought more about Keith suggestion for an interface that accepts a tagset. It allows us to decide what we do based on the tagset itself which is now passed in the interface. What do you think about: -- diff --git a/block/blk-mq.c b/block/blk-mq.c index abcf590f6238..d4b24aa1a766 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -209,6 +209,43 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait); +static void blk_mq_quiesce_queue_async(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + + blk_mq_quiesce_queue_nowait(q); + + queue_for_each_hw_ctx(q, hctx, i) { + if (!(hctx->flags & BLK_MQ_F_BLOCKING)) + continue; + + hctx->rcu_sync = kmalloc(sizeof(*hctx->rcu_sync), GFP_KERNEL); + if (!hctx->rcu_sync) + continue; + + init_completion(&hctx->rcu_sync->completion); + init_rcu_head(&hctx->rcu_sync->head); + call_srcu(hctx->srcu, &hctx->rcu_sync->head, + wakeme_after_rcu); + } +} + +static void blk_mq_quiesce_queue_async_wait(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + + queue_for_each_hw_ctx(q, hctx, i) { + if (!hctx->rcu_sync) { + synchronize_srcu(hctx->srcu); + continue; + } + wait_for_completion(&hctx->rcu_sync->completion); + destroy_rcu_head(&hctx->rcu_sync->head); + } +} + /** * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished * @q: request queue. @@ -2884,6 +2921,39 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) } } +void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set) +{ + struct request_queue *q; + + mutex_lock(&set->tag_list_lock); + list_for_each_entry(q, &set->tag_list, tag_set_list) { + if (!(set->flags & BLK_MQ_F_BLOCKING)) + blk_mq_quiesce_queue_nowait(q); + else + blk_mq_quiesce_queue_async(q); + } + + if (!(set->flags & BLK_MQ_F_BLOCKING)) { + synchronize_rcu(); + } else { + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_quiesce_queue_async_wait(q); + } + mutex_unlock(&set->tag_list_lock); +} +EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset); + +void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set) +{ + struct request_queue *q; + + mutex_lock(&set->tag_list_lock); + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_unquiesce_queue(q); + mutex_unlock(&set->tag_list_lock); +} +EXPORT_SYMBOL_GPL(blk_mq_unquiesce_tagset); + static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 23230c1d031e..a85f2dedc947 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -5,6 +5,7 @@ #include <linux/blkdev.h> #include <linux/sbitmap.h> #include <linux/srcu.h> +#include <linux/rcupdate_wait.h> struct blk_mq_tags; struct blk_flush_queue; @@ -170,6 +171,7 @@ struct blk_mq_hw_ctx { */ struct list_head hctx_list; + struct rcu_synchronize *rcu_sync; /** * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also @@ -532,6 +534,8 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); +void blk_mq_quiesce_tagset(struct request_queue *q); +void blk_mq_unquiesce_tagset(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); -- And then nvme will use it: -- diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 05aa568a60af..c41df20996d7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4557,23 +4557,13 @@ EXPORT_SYMBOL_GPL(nvme_start_freeze); void nvme_stop_queues(struct nvme_ctrl *ctrl) { - struct nvme_ns *ns; - - down_read(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_quiesce_queue(ns->queue); - up_read(&ctrl->namespaces_rwsem); + blk_mq_quiesce_tagset(ctrl->tagset); } EXPORT_SYMBOL_GPL(nvme_stop_queues); void nvme_start_queues(struct nvme_ctrl *ctrl) { - struct nvme_ns *ns; - - down_read(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_unquiesce_queue(ns->queue); - up_read(&ctrl->namespaces_rwsem); + blk_mq_unquiesce_tagset(ctrl->tagset); } EXPORT_SYMBOL_GPL(nvme_start_queues); -- Thoughts?
WARNING: multiple messages have this Message-ID (diff)
From: Sagi Grimberg <sagi@grimberg.me> To: Jens Axboe <axboe@kernel.dk>, linux-nvme@lists.infradead.org, Christoph Hellwig <hch@lst.de>, Keith Busch <kbusch@kernel.org> Cc: linux-block@vger.kernel.org, Ming Lin <mlin@kernel.org>, Chao Leng <lengchao@huawei.com> Subject: Re: [PATCH v4 1/2] blk-mq: add async quiesce interface Date: Mon, 27 Jul 2020 15:37:16 -0700 [thread overview] Message-ID: <e22c2f2f-6924-0090-d841-da2d84c606ae@grimberg.me> (raw) In-Reply-To: <fe247bae-8428-bca8-81b5-a7015bc39591@kernel.dk> >> +void blk_mq_quiesce_queue_async(struct request_queue *q) >> +{ >> + struct blk_mq_hw_ctx *hctx; >> + unsigned int i; >> + int rcu = false; >> + >> + blk_mq_quiesce_queue_nowait(q); >> + >> + queue_for_each_hw_ctx(q, hctx, i) { >> + hctx->rcu_sync = kmalloc(sizeof(*hctx->rcu_sync), GFP_KERNEL); >> + if (!hctx->rcu_sync) { >> + /* fallback to serial rcu sync */ >> + if (hctx->flags & BLK_MQ_F_BLOCKING) >> + synchronize_srcu(hctx->srcu); >> + else >> + rcu = true; >> + } else { >> + init_completion(&hctx->rcu_sync->completion); >> + init_rcu_head(&hctx->rcu_sync->head); >> + if (hctx->flags & BLK_MQ_F_BLOCKING) >> + call_srcu(hctx->srcu, &hctx->rcu_sync->head, >> + wakeme_after_rcu); >> + else >> + call_rcu(&hctx->rcu_sync->head, >> + wakeme_after_rcu); >> + } >> + } >> + if (rcu) >> + synchronize_rcu(); >> +} >> +EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_async); > > This won't always be async, and that might matter to some users. I think > it'd be better to put the fallback path into the _wait() part instead, > since the caller should expect that to be blocking/waiting as the name > implies. > > Nit picking, but... Makes sense.. I thought more about Keith suggestion for an interface that accepts a tagset. It allows us to decide what we do based on the tagset itself which is now passed in the interface. What do you think about: -- diff --git a/block/blk-mq.c b/block/blk-mq.c index abcf590f6238..d4b24aa1a766 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -209,6 +209,43 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait); +static void blk_mq_quiesce_queue_async(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + + blk_mq_quiesce_queue_nowait(q); + + queue_for_each_hw_ctx(q, hctx, i) { + if (!(hctx->flags & BLK_MQ_F_BLOCKING)) + continue; + + hctx->rcu_sync = kmalloc(sizeof(*hctx->rcu_sync), GFP_KERNEL); + if (!hctx->rcu_sync) + continue; + + init_completion(&hctx->rcu_sync->completion); + init_rcu_head(&hctx->rcu_sync->head); + call_srcu(hctx->srcu, &hctx->rcu_sync->head, + wakeme_after_rcu); + } +} + +static void blk_mq_quiesce_queue_async_wait(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + + queue_for_each_hw_ctx(q, hctx, i) { + if (!hctx->rcu_sync) { + synchronize_srcu(hctx->srcu); + continue; + } + wait_for_completion(&hctx->rcu_sync->completion); + destroy_rcu_head(&hctx->rcu_sync->head); + } +} + /** * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished * @q: request queue. @@ -2884,6 +2921,39 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) } } +void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set) +{ + struct request_queue *q; + + mutex_lock(&set->tag_list_lock); + list_for_each_entry(q, &set->tag_list, tag_set_list) { + if (!(set->flags & BLK_MQ_F_BLOCKING)) + blk_mq_quiesce_queue_nowait(q); + else + blk_mq_quiesce_queue_async(q); + } + + if (!(set->flags & BLK_MQ_F_BLOCKING)) { + synchronize_rcu(); + } else { + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_quiesce_queue_async_wait(q); + } + mutex_unlock(&set->tag_list_lock); +} +EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset); + +void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set) +{ + struct request_queue *q; + + mutex_lock(&set->tag_list_lock); + list_for_each_entry(q, &set->tag_list, tag_set_list) + blk_mq_unquiesce_queue(q); + mutex_unlock(&set->tag_list_lock); +} +EXPORT_SYMBOL_GPL(blk_mq_unquiesce_tagset); + static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 23230c1d031e..a85f2dedc947 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -5,6 +5,7 @@ #include <linux/blkdev.h> #include <linux/sbitmap.h> #include <linux/srcu.h> +#include <linux/rcupdate_wait.h> struct blk_mq_tags; struct blk_flush_queue; @@ -170,6 +171,7 @@ struct blk_mq_hw_ctx { */ struct list_head hctx_list; + struct rcu_synchronize *rcu_sync; /** * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also @@ -532,6 +534,8 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); +void blk_mq_quiesce_tagset(struct request_queue *q); +void blk_mq_unquiesce_tagset(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); -- And then nvme will use it: -- diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 05aa568a60af..c41df20996d7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4557,23 +4557,13 @@ EXPORT_SYMBOL_GPL(nvme_start_freeze); void nvme_stop_queues(struct nvme_ctrl *ctrl) { - struct nvme_ns *ns; - - down_read(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_quiesce_queue(ns->queue); - up_read(&ctrl->namespaces_rwsem); + blk_mq_quiesce_tagset(ctrl->tagset); } EXPORT_SYMBOL_GPL(nvme_stop_queues); void nvme_start_queues(struct nvme_ctrl *ctrl) { - struct nvme_ns *ns; - - down_read(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) - blk_mq_unquiesce_queue(ns->queue); - up_read(&ctrl->namespaces_rwsem); + blk_mq_unquiesce_tagset(ctrl->tagset); } EXPORT_SYMBOL_GPL(nvme_start_queues); -- Thoughts? _______________________________________________ Linux-nvme mailing list Linux-nvme@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-nvme
next prev parent reply other threads:[~2020-07-27 22:37 UTC|newest] Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-07-27 22:07 [PATCH v4 0/2] improve quiesce time for large amount of namespaces Sagi Grimberg 2020-07-27 22:07 ` Sagi Grimberg 2020-07-27 22:07 ` [PATCH v4 1/2] blk-mq: add async quiesce interface Sagi Grimberg 2020-07-27 22:07 ` Sagi Grimberg 2020-07-27 22:13 ` Jens Axboe 2020-07-27 22:13 ` Jens Axboe 2020-07-27 22:37 ` Sagi Grimberg [this message] 2020-07-27 22:37 ` Sagi Grimberg 2020-07-27 22:07 ` [PATCH v4 2/2] nvme: improve quiesce time for large amount of namespaces Sagi Grimberg 2020-07-27 22:07 ` Sagi Grimberg
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=e22c2f2f-6924-0090-d841-da2d84c606ae@grimberg.me \ --to=sagi@grimberg.me \ --cc=axboe@kernel.dk \ --cc=hch@lst.de \ --cc=kbusch@kernel.org \ --cc=lengchao@huawei.com \ --cc=linux-block@vger.kernel.org \ --cc=linux-nvme@lists.infradead.org \ --cc=mlin@kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.