From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx1.redhat.com ([209.132.183.28]:60542 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751101AbdLNCcI (ORCPT ); Wed, 13 Dec 2017 21:32:08 -0500 From: Ming Lei To: linux-nvme@lists.infradead.org, Christoph Hellwig , Jens Axboe , linux-block@vger.kernel.org Cc: Bart Van Assche , Keith Busch , Sagi Grimberg , Yi Zhang , Johannes Thumshirn , Ming Lei Subject: [PATCH V2 2/6] blk-mq: support concurrent blk_mq_quiesce_queue() Date: Thu, 14 Dec 2017 10:30:59 +0800 Message-Id: <20171214023103.18272-3-ming.lei@redhat.com> In-Reply-To: <20171214023103.18272-1-ming.lei@redhat.com> References: <20171214023103.18272-1-ming.lei@redhat.com> Sender: linux-block-owner@vger.kernel.org List-Id: linux-block@vger.kernel.org Turns out that blk_mq_freeze_queue() isn't stronger[1] than blk_mq_quiesce_queue() because dispatch may still be in-progress after queue is frozen, and in several cases, such as switching io scheduler, and updating hw queues, we still need to quiesce queue as a supplement of freezing queue. As we need to extend uses of blk_mq_quiesce_queue(), it is inevitable for us to need support cucurrent quiesce, especailly we can't let unquiesce happen when there is quiesce happenning from other contexts. This patch introduces q->mq_quiesce_depth to deal concurrent quiesce, and we only unquiesce queue when it is the last one from all contexts. [1] https://marc.info/?l=linux-block&m=150993988115872&w=2 Suggested-by: Bart Van Assche Signed-off-by: Ming Lei --- block/blk-mq.c | 21 ++++++++++++++++----- drivers/nvme/host/core.c | 4 ++-- include/linux/blk-mq.h | 7 ++++++- include/linux/blkdev.h | 2 ++ 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 11097477eeab..5d69c8075339 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -196,7 +196,8 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - queue_flag_set(QUEUE_FLAG_QUIESCED, q); + if (!q->quiesce_depth++) + queue_flag_set(QUEUE_FLAG_QUIESCED, q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait); @@ -232,22 +233,32 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue); /* * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue() * @q: request queue. + * @force: force to unquiesce if set * * This function recovers queue into the state before quiescing * which is done by blk_mq_quiesce_queue. + * + * Note: @force should be passed as true only when it is done before + * cleanup queue, for other cases, please don't use this way. */ -void blk_mq_unquiesce_queue(struct request_queue *q) +void __blk_mq_unquiesce_queue(struct request_queue *q, bool force) { unsigned long flags; + int depth; spin_lock_irqsave(q->queue_lock, flags); - queue_flag_clear(QUEUE_FLAG_QUIESCED, q); + if (q->quiesce_depth > 0) + q->quiesce_depth--; + depth = q->quiesce_depth; + if (!depth || force) + queue_flag_clear(QUEUE_FLAG_QUIESCED, q); spin_unlock_irqrestore(q->queue_lock, flags); /* dispatch requests which are inserted during quiescing */ - blk_mq_run_hw_queues(q, true); + if (!depth || force) + blk_mq_run_hw_queues(q, true); } -EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue); +EXPORT_SYMBOL_GPL(__blk_mq_unquiesce_queue); void blk_mq_wake_waiters(struct request_queue *q) { diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f837d666cbd4..1ab1168cd46a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3371,7 +3371,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ if (ctrl->admin_q) - blk_mq_unquiesce_queue(ctrl->admin_q); + __blk_mq_unquiesce_queue(ctrl->admin_q, true); list_for_each_entry(ns, &ctrl->namespaces, list) { /* @@ -3384,7 +3384,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) blk_set_queue_dying(ns->queue); /* Forcibly unquiesce queues to avoid blocking dispatch */ - blk_mq_unquiesce_queue(ns->queue); + __blk_mq_unquiesce_queue(ns->queue, true); } mutex_unlock(&ctrl->namespaces_mutex); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 95c9a5c862e2..8a01822dc09e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -265,7 +265,7 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); -void blk_mq_unquiesce_queue(struct request_queue *q); +void __blk_mq_unquiesce_queue(struct request_queue *q, bool force); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); @@ -286,6 +286,11 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); +static inline void blk_mq_unquiesce_queue(struct request_queue *q) +{ + __blk_mq_unquiesce_queue(q, false); +} + /* * Driver command data is immediately after the request. So subtract request * size to get back to the original request, add request size to get the PDU. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8089ca17db9a..ee3731f417c0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -570,6 +570,8 @@ struct request_queue { int bypass_depth; atomic_t mq_freeze_depth; + int quiesce_depth; + #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; struct bsg_class_device bsg_dev; -- 2.9.5 From mboxrd@z Thu Jan 1 00:00:00 1970 From: ming.lei@redhat.com (Ming Lei) Date: Thu, 14 Dec 2017 10:30:59 +0800 Subject: [PATCH V2 2/6] blk-mq: support concurrent blk_mq_quiesce_queue() In-Reply-To: <20171214023103.18272-1-ming.lei@redhat.com> References: <20171214023103.18272-1-ming.lei@redhat.com> Message-ID: <20171214023103.18272-3-ming.lei@redhat.com> Turns out that blk_mq_freeze_queue() isn't stronger[1] than blk_mq_quiesce_queue() because dispatch may still be in-progress after queue is frozen, and in several cases, such as switching io scheduler, and updating hw queues, we still need to quiesce queue as a supplement of freezing queue. As we need to extend uses of blk_mq_quiesce_queue(), it is inevitable for us to need support cucurrent quiesce, especailly we can't let unquiesce happen when there is quiesce happenning from other contexts. This patch introduces q->mq_quiesce_depth to deal concurrent quiesce, and we only unquiesce queue when it is the last one from all contexts. [1] https://marc.info/?l=linux-block&m=150993988115872&w=2 Suggested-by: Bart Van Assche Signed-off-by: Ming Lei --- block/blk-mq.c | 21 ++++++++++++++++----- drivers/nvme/host/core.c | 4 ++-- include/linux/blk-mq.h | 7 ++++++- include/linux/blkdev.h | 2 ++ 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 11097477eeab..5d69c8075339 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -196,7 +196,8 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - queue_flag_set(QUEUE_FLAG_QUIESCED, q); + if (!q->quiesce_depth++) + queue_flag_set(QUEUE_FLAG_QUIESCED, q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait); @@ -232,22 +233,32 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue); /* * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue() * @q: request queue. + * @force: force to unquiesce if set * * This function recovers queue into the state before quiescing * which is done by blk_mq_quiesce_queue. + * + * Note: @force should be passed as true only when it is done before + * cleanup queue, for other cases, please don't use this way. */ -void blk_mq_unquiesce_queue(struct request_queue *q) +void __blk_mq_unquiesce_queue(struct request_queue *q, bool force) { unsigned long flags; + int depth; spin_lock_irqsave(q->queue_lock, flags); - queue_flag_clear(QUEUE_FLAG_QUIESCED, q); + if (q->quiesce_depth > 0) + q->quiesce_depth--; + depth = q->quiesce_depth; + if (!depth || force) + queue_flag_clear(QUEUE_FLAG_QUIESCED, q); spin_unlock_irqrestore(q->queue_lock, flags); /* dispatch requests which are inserted during quiescing */ - blk_mq_run_hw_queues(q, true); + if (!depth || force) + blk_mq_run_hw_queues(q, true); } -EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue); +EXPORT_SYMBOL_GPL(__blk_mq_unquiesce_queue); void blk_mq_wake_waiters(struct request_queue *q) { diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f837d666cbd4..1ab1168cd46a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3371,7 +3371,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ if (ctrl->admin_q) - blk_mq_unquiesce_queue(ctrl->admin_q); + __blk_mq_unquiesce_queue(ctrl->admin_q, true); list_for_each_entry(ns, &ctrl->namespaces, list) { /* @@ -3384,7 +3384,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) blk_set_queue_dying(ns->queue); /* Forcibly unquiesce queues to avoid blocking dispatch */ - blk_mq_unquiesce_queue(ns->queue); + __blk_mq_unquiesce_queue(ns->queue, true); } mutex_unlock(&ctrl->namespaces_mutex); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 95c9a5c862e2..8a01822dc09e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -265,7 +265,7 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); -void blk_mq_unquiesce_queue(struct request_queue *q); +void __blk_mq_unquiesce_queue(struct request_queue *q, bool force); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); @@ -286,6 +286,11 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); +static inline void blk_mq_unquiesce_queue(struct request_queue *q) +{ + __blk_mq_unquiesce_queue(q, false); +} + /* * Driver command data is immediately after the request. So subtract request * size to get back to the original request, add request size to get the PDU. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8089ca17db9a..ee3731f417c0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -570,6 +570,8 @@ struct request_queue { int bypass_depth; atomic_t mq_freeze_depth; + int quiesce_depth; + #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; struct bsg_class_device bsg_dev; -- 2.9.5