All of lore.kernel.org
 help / color / mirror / Atom feed
From: Keith Busch <keith.busch@intel.com>
To: Jens Axboe <axboe@kernel.dk>, Christoph Hellwig <hch@lst.de>,
	Sagi Grimberg <sagi@grimberg.me>,
	linux-nvme@lists.infradead.org, linux-block@vger.kernel.org
Cc: Keith Busch <keith.busch@intel.com>
Subject: [PATCH 2/2] nvme: Remove queue flushing hack
Date: Fri, 30 Nov 2018 13:26:35 -0700	[thread overview]
Message-ID: <20181130202635.11145-2-keith.busch@intel.com> (raw)
In-Reply-To: <20181130202635.11145-1-keith.busch@intel.com>

The nvme driver checked the queue state on every IO so the path could
drain requests. The code however declares "We shold not need to do this",
so let's not do it. Instead, use blk-mq's tag iterator to terminate
entered requests on dying queues so the IO path doesn't have to deal
with these conditions.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 drivers/nvme/host/core.c | 10 ++++++++--
 drivers/nvme/host/pci.c  | 43 +++++++++++++++++++++++++++----------------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 91474b3c566c..af84c4d3c20e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -103,6 +103,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 					   unsigned nsid);
 
+static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
+			      void *data, bool reserved)
+{
+	blk_mq_end_request(req, BLK_STS_IOERR);
+	return true;
+}
+
 static void nvme_set_queue_dying(struct nvme_ns *ns)
 {
 	/*
@@ -113,8 +120,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
 		return;
 	revalidate_disk(ns->disk);
 	blk_set_queue_dying(ns->queue);
-	/* Forcibly unquiesce queues to avoid blocking dispatch */
-	blk_mq_unquiesce_queue(ns->queue);
+	blk_mq_queue_tag_busy_iter(ns->queue, nvme_fail_request, NULL);
 }
 
 static void nvme_queue_scan(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3ecc0bf75a62..ec830aa52842 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -926,13 +926,6 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command cmnd;
 	blk_status_t ret;
 
-	/*
-	 * We should not need to do this, but we're still using this to
-	 * ensure we can drain requests on a dying queue.
-	 */
-	if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
-		return BLK_STS_IOERR;
-
 	ret = nvme_setup_cmd(ns, req, &cmnd);
 	if (ret)
 		return ret;
@@ -1408,10 +1401,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 {
 	if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags))
 		return 1;
-
-	/* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */
-	mb();
-
 	nvmeq->dev->online_queues--;
 	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
 		blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
@@ -1611,15 +1600,30 @@ static const struct blk_mq_ops nvme_mq_ops = {
 	.poll		= nvme_poll,
 };
 
+static bool nvme_fail_queue_request(struct request *req, void *data, bool reserved)
+{
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	struct nvme_queue *nvmeq = iod->nvmeq;
+
+	if (test_bit(NVMEQ_ENABLED, &nvmeq->flags))
+		return true;
+	blk_mq_end_request(req, BLK_STS_IOERR);
+	return true;
+}
+
 static void nvme_dev_remove_admin(struct nvme_dev *dev)
 {
 	if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
 		/*
 		 * If the controller was reset during removal, it's possible
-		 * user requests may be waiting on a stopped queue. Start the
-		 * queue to flush these to completion.
+		 * user requests may be waiting on a stopped queue. End all
+		 * entered requests after preventing new requests from
+		 * entering.
 		 */
-		blk_mq_unquiesce_queue(dev->ctrl.admin_q);
+		blk_set_queue_dying(dev->ctrl.admin_q);
+		blk_mq_tagset_all_iter(&dev->admin_tagset,
+				       nvme_fail_queue_request,
+				       NULL);
 		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
@@ -2411,6 +2415,11 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 	}
 }
 
+static void nvme_fail_requests(struct nvme_dev *dev)
+{
+	blk_mq_tagset_all_iter(&dev->tagset, nvme_fail_queue_request, NULL);
+}
+
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i;
@@ -2454,11 +2463,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 
 	/*
 	 * The driver will not be starting up queues again if shutting down so
-	 * must flush all entered requests to their failed completion to avoid
+	 * must end all entered requests to their failed completion to avoid
 	 * deadlocking blk-mq hot-cpu notifier.
 	 */
 	if (shutdown)
-		nvme_start_queues(&dev->ctrl);
+		nvme_fail_requests(dev);
 	mutex_unlock(&dev->shutdown_lock);
 }
 
@@ -2601,6 +2610,8 @@ static void nvme_reset_work(struct work_struct *work)
 		nvme_remove_namespaces(&dev->ctrl);
 		new_state = NVME_CTRL_ADMIN_ONLY;
 	} else {
+		/* Fail requests that entered an hctx that no longer exists */
+		nvme_fail_requests(dev);
 		nvme_start_queues(&dev->ctrl);
 		nvme_wait_freeze(&dev->ctrl);
 		/* hit this only when allocate tagset fails */
-- 
2.14.4


WARNING: multiple messages have this Message-ID (diff)
From: keith.busch@intel.com (Keith Busch)
Subject: [PATCH 2/2] nvme: Remove queue flushing hack
Date: Fri, 30 Nov 2018 13:26:35 -0700	[thread overview]
Message-ID: <20181130202635.11145-2-keith.busch@intel.com> (raw)
In-Reply-To: <20181130202635.11145-1-keith.busch@intel.com>

The nvme driver checked the queue state on every IO so the path could
drain requests. The code however declares "We shold not need to do this",
so let's not do it. Instead, use blk-mq's tag iterator to terminate
entered requests on dying queues so the IO path doesn't have to deal
with these conditions.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/nvme/host/core.c | 10 ++++++++--
 drivers/nvme/host/pci.c  | 43 +++++++++++++++++++++++++++----------------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 91474b3c566c..af84c4d3c20e 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -103,6 +103,13 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys);
 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 					   unsigned nsid);
 
+static bool nvme_fail_request(struct blk_mq_hw_ctx *hctx, struct request *req,
+			      void *data, bool reserved)
+{
+	blk_mq_end_request(req, BLK_STS_IOERR);
+	return true;
+}
+
 static void nvme_set_queue_dying(struct nvme_ns *ns)
 {
 	/*
@@ -113,8 +120,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
 		return;
 	revalidate_disk(ns->disk);
 	blk_set_queue_dying(ns->queue);
-	/* Forcibly unquiesce queues to avoid blocking dispatch */
-	blk_mq_unquiesce_queue(ns->queue);
+	blk_mq_queue_tag_busy_iter(ns->queue, nvme_fail_request, NULL);
 }
 
 static void nvme_queue_scan(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3ecc0bf75a62..ec830aa52842 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -926,13 +926,6 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_command cmnd;
 	blk_status_t ret;
 
-	/*
-	 * We should not need to do this, but we're still using this to
-	 * ensure we can drain requests on a dying queue.
-	 */
-	if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
-		return BLK_STS_IOERR;
-
 	ret = nvme_setup_cmd(ns, req, &cmnd);
 	if (ret)
 		return ret;
@@ -1408,10 +1401,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 {
 	if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags))
 		return 1;
-
-	/* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */
-	mb();
-
 	nvmeq->dev->online_queues--;
 	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
 		blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
@@ -1611,15 +1600,30 @@ static const struct blk_mq_ops nvme_mq_ops = {
 	.poll		= nvme_poll,
 };
 
+static bool nvme_fail_queue_request(struct request *req, void *data, bool reserved)
+{
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	struct nvme_queue *nvmeq = iod->nvmeq;
+
+	if (test_bit(NVMEQ_ENABLED, &nvmeq->flags))
+		return true;
+	blk_mq_end_request(req, BLK_STS_IOERR);
+	return true;
+}
+
 static void nvme_dev_remove_admin(struct nvme_dev *dev)
 {
 	if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
 		/*
 		 * If the controller was reset during removal, it's possible
-		 * user requests may be waiting on a stopped queue. Start the
-		 * queue to flush these to completion.
+		 * user requests may be waiting on a stopped queue. End all
+		 * entered requests after preventing new requests from
+		 * entering.
 		 */
-		blk_mq_unquiesce_queue(dev->ctrl.admin_q);
+		blk_set_queue_dying(dev->ctrl.admin_q);
+		blk_mq_tagset_all_iter(&dev->admin_tagset,
+				       nvme_fail_queue_request,
+				       NULL);
 		blk_cleanup_queue(dev->ctrl.admin_q);
 		blk_mq_free_tag_set(&dev->admin_tagset);
 	}
@@ -2411,6 +2415,11 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 	}
 }
 
+static void nvme_fail_requests(struct nvme_dev *dev)
+{
+	blk_mq_tagset_all_iter(&dev->tagset, nvme_fail_queue_request, NULL);
+}
+
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i;
@@ -2454,11 +2463,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 
 	/*
 	 * The driver will not be starting up queues again if shutting down so
-	 * must flush all entered requests to their failed completion to avoid
+	 * must end all entered requests to their failed completion to avoid
 	 * deadlocking blk-mq hot-cpu notifier.
 	 */
 	if (shutdown)
-		nvme_start_queues(&dev->ctrl);
+		nvme_fail_requests(dev);
 	mutex_unlock(&dev->shutdown_lock);
 }
 
@@ -2601,6 +2610,8 @@ static void nvme_reset_work(struct work_struct *work)
 		nvme_remove_namespaces(&dev->ctrl);
 		new_state = NVME_CTRL_ADMIN_ONLY;
 	} else {
+		/* Fail requests that entered an hctx that no longer exists */
+		nvme_fail_requests(dev);
 		nvme_start_queues(&dev->ctrl);
 		nvme_wait_freeze(&dev->ctrl);
 		/* hit this only when allocate tagset fails */
-- 
2.14.4

  reply	other threads:[~2018-11-30 20:29 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-30 20:26 [PATCH 1/2] blk-mq: Export iterating all tagged requests Keith Busch
2018-11-30 20:26 ` Keith Busch
2018-11-30 20:26 ` Keith Busch [this message]
2018-11-30 20:26   ` [PATCH 2/2] nvme: Remove queue flushing hack Keith Busch
2018-11-30 20:36 ` [PATCH 1/2] blk-mq: Export iterating all tagged requests Jens Axboe
2018-11-30 20:36   ` Jens Axboe
2018-11-30 20:39   ` Keith Busch
2018-11-30 20:39     ` Keith Busch
2018-12-01 16:48   ` Christoph Hellwig
2018-12-01 16:48     ` Christoph Hellwig
2018-12-01 17:11     ` Hannes Reinecke
2018-12-01 17:11       ` Hannes Reinecke
2018-12-01 18:32       ` Bart Van Assche
2018-12-01 18:32         ` Bart Van Assche
2018-12-03 18:57         ` James Smart
2018-12-03 18:57           ` James Smart
2018-12-04  1:33       ` Sagi Grimberg
2018-12-04  1:33         ` Sagi Grimberg
2018-12-04 15:46         ` Keith Busch
2018-12-04 15:46           ` Keith Busch
2018-12-04 16:26           ` James Smart
2018-12-04 16:26             ` James Smart
2018-12-04 17:23             ` Sagi Grimberg
2018-12-04 17:23               ` Sagi Grimberg
2018-12-04 19:13               ` James Smart
2018-12-04 19:13                 ` James Smart
2018-12-04 17:38           ` Sagi Grimberg
2018-12-04 17:38             ` Sagi Grimberg
2018-12-04 17:48             ` Keith Busch
2018-12-04 17:48               ` Keith Busch
2018-12-04 19:33               ` James Smart
2018-12-04 19:33                 ` James Smart
2018-12-04 21:21                 ` Keith Busch
2018-12-04 21:21                   ` Keith Busch
2018-12-04 21:43                   ` Keith Busch
2018-12-04 21:43                     ` Keith Busch
2018-12-04 22:09                   ` James Smart
2018-12-04 22:09                     ` James Smart
2018-12-03  7:44     ` Ming Lei
2018-12-03  7:44       ` Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181130202635.11145-2-keith.busch@intel.com \
    --to=keith.busch@intel.com \
    --cc=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.