All of lore.kernel.org
 help / color / mirror / Atom feed
From: marc@merlins.org (Marc MERLIN)
Subject: [PATCH 5/5] nvme/pci: Complete all stuck requests
Date: Wed, 15 Feb 2017 10:14:35 -0800	[thread overview]
Message-ID: <20170215181435.36saagrln25im7ml@merlins.org> (raw)
In-Reply-To: <1486768553-13738-6-git-send-email-keith.busch@intel.com>

On Fri, Feb 10, 2017@06:15:53PM -0500, Keith Busch wrote:
> If the nvme driver is shutting down, it will not start the queues back
> up until asked to resume. If the block layer has entered requests and
> gets a CPU hot plug event prior to the resume event, it will wait for
> those requests to exit. Those requests will never exit since the NVMe
> driver is quieced, creating a deadlock.
> 
> This patch fixes that by freezing the queue and flushing all entered
> requests to either their natural completion, or forces their demise. We
> only need to do this when requesting to shutdown the controller since
> we will not be starting the IO queues back up again.
> 
> Signed-off-by: Keith Busch <keith.busch at intel.com>

Tested-by: Marc MERLIN <marc at merlins.org>

> ---
>  drivers/nvme/host/core.c | 33 +++++++++++++++++++++++++++++++++
>  drivers/nvme/host/nvme.h |  3 +++
>  drivers/nvme/host/pci.c  | 34 +++++++++++++++++++++++++++++++++-
>  3 files changed, 69 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index c302270..1888451 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2125,6 +2125,39 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
>  }
>  EXPORT_SYMBOL_GPL(nvme_kill_queues);
>  
> +void nvme_unfreeze(struct nvme_ctrl *ctrl)
> +{
> +	struct nvme_ns *ns;
> +
> +	mutex_lock(&ctrl->namespaces_mutex);
> +	list_for_each_entry(ns, &ctrl->namespaces, list)
> +		blk_mq_unfreeze_queue(ns->queue);
> +	mutex_unlock(&ctrl->namespaces_mutex);
> +}
> +EXPORT_SYMBOL_GPL(nvme_unfreeze);
> +
> +void nvme_wait_freeze(struct nvme_ctrl *ctrl)
> +{
> +	struct nvme_ns *ns;
> +
> +	mutex_lock(&ctrl->namespaces_mutex);
> +	list_for_each_entry(ns, &ctrl->namespaces, list)
> +		blk_mq_freeze_queue(ns->queue);
> +	mutex_unlock(&ctrl->namespaces_mutex);
> +}
> +EXPORT_SYMBOL_GPL(nvme_wait_freeze);
> +
> +void nvme_start_freeze(struct nvme_ctrl *ctrl)
> +{
> +	struct nvme_ns *ns;
> +
> +	mutex_lock(&ctrl->namespaces_mutex);
> +	list_for_each_entry(ns, &ctrl->namespaces, list)
> +		blk_mq_freeze_queue_start(ns->queue);
> +	mutex_unlock(&ctrl->namespaces_mutex);
> +}
> +EXPORT_SYMBOL_GPL(nvme_start_freeze);
> +
>  void nvme_stop_queues(struct nvme_ctrl *ctrl)
>  {
>  	struct nvme_ns *ns;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 569cba1..7408373 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -289,6 +289,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
>  void nvme_stop_queues(struct nvme_ctrl *ctrl);
>  void nvme_start_queues(struct nvme_ctrl *ctrl);
>  void nvme_kill_queues(struct nvme_ctrl *ctrl);
> +void nvme_unfreeze(struct nvme_ctrl *ctrl);
> +void nvme_wait_freeze(struct nvme_ctrl *ctrl);
> +void nvme_start_freeze(struct nvme_ctrl *ctrl);
>  
>  #define NVME_QID_ANY -1
>  struct request *nvme_alloc_request(struct request_queue *q,
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 92010fd..b6451d8 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -1672,12 +1672,15 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
>  {
>  	int i, queues;
>  	u32 csts = -1;
> +	bool drain_queue = pci_is_enabled(to_pci_dev(dev->dev));
>  
>  	del_timer_sync(&dev->watchdog_timer);
>  	cancel_work_sync(&dev->reset_work);
>  
>  	mutex_lock(&dev->shutdown_lock);
> -	if (pci_is_enabled(to_pci_dev(dev->dev))) {
> +	if (drain_queue) {
> +		if (shutdown)
> +			nvme_start_freeze(&dev->ctrl);
>  		nvme_stop_queues(&dev->ctrl);
>  		csts = readl(dev->bar + NVME_REG_CSTS);
>  	}
> @@ -1701,6 +1704,25 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
>  
>  	blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
>  	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
> +
> +	/*
> +	 * If shutting down, the driver will not be starting up queues again,
> +	 * so must drain all entered requests to their demise to avoid
> +	 * deadlocking blk-mq hot-cpu notifier.
> +	 */
> +	if (drain_queue && shutdown) {
> +		nvme_start_queues(&dev->ctrl);
> +		/*
> +		 * Waiting for frozen increases the freeze depth. Since we
> +		 * already start the freeze earlier in this function to stop
> +		 * incoming requests, we have to unfreeze after froze to get
> +		 * the depth back to the desired.
> +		 */
> +		nvme_wait_freeze(&dev->ctrl);
> +		nvme_unfreeze(&dev->ctrl);
> +		nvme_stop_queues(&dev->ctrl);
> +	}
> +
>  	mutex_unlock(&dev->shutdown_lock);
>  }
>  
> @@ -1817,6 +1839,16 @@ static void nvme_reset_work(struct work_struct *work)
>  	} else {
>  		nvme_start_queues(&dev->ctrl);
>  		nvme_dev_add(dev);
> +
> +		/*
> +		 * If we are resuming from suspend, the queue was set to freeze
> +		 * to prevent blk-mq's hot CPU notifier from getting stuck on
> +		 * requests that entered the queue that NVMe had quiesced. Now
> +		 * that we are resuming and have notified blk-mq of the new h/w
> +		 * context queue count, it is safe to unfreeze the queues.
> +		 */
> +		if (was_suspend)
> +			nvme_unfreeze(&dev->ctrl);
>  	}
>  
>  	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
> -- 
> 1.8.3.1
> 
> 

-- 
"A mouse is a device used to point at the xterm you want to type in" - A.S.R.
Microsoft is to operating systems ....
                                      .... what McDonalds is to gourmet cooking
Home page: http://marc.merlins.org/                         | PGP 1024R/763BE901

  parent reply	other threads:[~2017-02-15 18:14 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-10 23:15 [PATCH 0/5] NVMe pci fixes, for-4.11 Keith Busch
2017-02-10 23:15 ` [PATCH 1/5] nvme/pci: Disable on removal when disconnected Keith Busch
2017-02-13 10:18   ` Johannes Thumshirn
2017-02-13 13:51   ` Christoph Hellwig
2017-02-10 23:15 ` [PATCH 2/5] nvme/pci: Cancel work after watchdog disabled Keith Busch
2017-02-13 10:25   ` Johannes Thumshirn
2017-02-13 13:51   ` Christoph Hellwig
2017-02-10 23:15 ` [PATCH 3/5] nvme/core: Fix race kicking freed request_queue Keith Busch
2017-02-13 10:33   ` Johannes Thumshirn
2017-02-13 13:53   ` Christoph Hellwig
2017-02-10 23:15 ` [PATCH 4/5] nvme/pci: No special case for queue busy on IO Keith Busch
2017-02-13 13:53   ` Christoph Hellwig
2017-02-10 23:15 ` [PATCH 5/5] nvme/pci: Complete all stuck requests Keith Busch
2017-02-15  9:50   ` Sagi Grimberg
2017-02-15 15:46     ` Keith Busch
2017-02-15 16:04       ` Marc MERLIN
2017-02-15 17:36         ` J Freyensee
2017-02-16  9:12         ` Sagi Grimberg
2017-02-16 22:51           ` Keith Busch
2017-02-17  8:25             ` Christoph Hellwig
2017-02-15 18:14   ` Marc MERLIN [this message]
2017-12-14  3:36     ` Marc MERLIN
2018-02-28  2:22       ` Marc MERLIN
2017-02-17 15:27   ` Christoph Hellwig
2017-02-17 16:33     ` Keith Busch
2017-02-20 10:05       ` Christoph Hellwig
2017-02-21 15:57         ` Keith Busch
2017-02-22  7:17           ` Christoph Hellwig
2017-02-22 14:45             ` Keith Busch
2017-02-23 15:06               ` Christoph Hellwig
2017-02-23 15:21                 ` Keith Busch
2017-02-23 15:16                   ` Christoph Hellwig
2017-02-21 21:55       ` Sagi Grimberg
2017-02-21 23:26         ` Keith Busch
2017-02-15  9:40 ` [PATCH 0/5] NVMe pci fixes, for-4.11 Sagi Grimberg
     [not found] <20170313153319.fmy6ww72fjtx74xq@merlins.org>
     [not found] ` <20170313143649.GC6994@localhost.localdomain>

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170215181435.36saagrln25im7ml@merlins.org \
    --to=marc@merlins.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.