linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jianchao Wang <jianchao.w.wang@oracle.com>
To: keith.busch@intel.com, axboe@fb.com, hch@lst.de, sagi@grimberg.me
Cc: linux-nvme@lists.infradead.org, linux-kernel@vger.kernel.org
Subject: [PATCH V4 2/5] nvme: add helper interface to flush in-flight requests
Date: Thu,  8 Mar 2018 14:19:28 +0800	[thread overview]
Message-ID: <1520489971-31174-3-git-send-email-jianchao.w.wang@oracle.com> (raw)
In-Reply-To: <1520489971-31174-1-git-send-email-jianchao.w.wang@oracle.com>

Currently, we use nvme_cancel_request to complete the request
forcedly. This has following defects:
 - It is not safe to race with the normal completion path.
   blk_mq_complete_request is ok to race with timeout path,
   but not with itself.
 - Cannot ensure all the requests have been handled. The timeout
   path may grab some expired requests, blk_mq_complete_request
   cannot touch them.

add two helper interface to flush in-flight requests more safely.
- nvme_abort_requests_sync
use nvme_abort_req to timeout all the in-flight requests and wait
until timeout work and irq completion path completes. More details
please refer to the comment of this interface.
- nvme_flush_aborted_requests
complete the requests 'aborted' by nvme_abort_requests_sync. It will
be invoked after the controller is disabled/shutdown.

Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
---
 drivers/nvme/host/core.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h |  4 +-
 2 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7b8df47..e26759b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3567,6 +3567,102 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
 
+static void nvme_abort_req(struct request *req, void *data, bool reserved)
+{
+	if (!blk_mq_request_started(req))
+		return;
+
+	dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
+				"Abort I/O %d", req->tag);
+
+	/* The timeout path need identify this flag and return
+	 * BLK_EH_NOT_HANDLED, then the request will not be completed.
+	 * we will defer the completion after the controller is disabled or
+	 * shutdown.
+	 */
+	set_bit(NVME_REQ_ABORTED, &nvme_req(req)->flags);
+	blk_abort_request(req);
+}
+
+/*
+ * This function will ensure all the in-flight requests on the
+ * controller to be handled by timeout path or irq completion path.
+ * It has to pair with nvme_flush_aborted_requests which will be
+ * invoked after the controller has been disabled/shutdown and
+ * complete the requests 'aborted' by nvme_abort_req.
+ *
+ * Note, the driver layer will not be quiescent before disable
+ * controller, because the requests aborted by blk_abort_request
+ * are still active and the irq will fire at any time, but it can
+ * not enter into completion path, because the request has been
+ * timed out.
+ */
+void nvme_abort_requests_sync(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	blk_mq_tagset_busy_iter(ctrl->tagset, nvme_abort_req, ctrl);
+	blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_abort_req, ctrl);
+	/*
+	 * ensure the timeout_work is queued, thus needn't to sync
+	 * the timer
+	 */
+	kblockd_schedule_work(&ctrl->admin_q->timeout_work);
+
+	down_read(&ctrl->namespaces_rwsem);
+
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		kblockd_schedule_work(&ns->queue->timeout_work);
+
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		flush_work(&ns->queue->timeout_work);
+
+	up_read(&ctrl->namespaces_rwsem);
+	/* This will ensure all the nvme irq completion path have exited */
+	synchronize_sched();
+}
+EXPORT_SYMBOL_GPL(nvme_abort_requests_sync);
+
+static void nvme_comp_req(struct request *req, void *data, bool reserved)
+{
+	struct nvme_ctrl *ctrl = (struct nvme_ctrl *)data;
+
+	if (!test_bit(NVME_REQ_ABORTED, &nvme_req(req)->flags))
+		return;
+
+	WARN_ON(!blk_mq_request_started(req));
+
+	if (ctrl->tagset && ctrl->tagset->ops->complete) {
+		clear_bit(NVME_REQ_ABORTED, &nvme_req(req)->flags);
+		/*
+		 * We set the status to NVME_SC_ABORT_REQ, then ioq request
+		 * will be requeued and adminq one will be failed.
+		 */
+		nvme_req(req)->status = NVME_SC_ABORT_REQ;
+		/*
+		 * For ioq request, blk_mq_requeue_request should be better
+		 * here. But the nvme code will still setup the cmd even if
+		 * the RQF_DONTPREP is set. We have to use .complete to free
+		 * the cmd and then requeue it.
+		 *
+		 * For adminq request, invoking .complete directly will miss
+		 * blk_mq_sched_completed_request, but this is ok because we
+		 * won't have io scheduler for adminq.
+		 */
+		ctrl->tagset->ops->complete(req);
+	}
+}
+
+/*
+ * Should pair with nvme_abort_requests_sync
+ */
+void nvme_flush_aborted_requests(struct nvme_ctrl *ctrl)
+{
+	blk_mq_tagset_busy_iter(ctrl->tagset, nvme_comp_req, ctrl);
+	blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_comp_req, ctrl);
+}
+EXPORT_SYMBOL_GPL(nvme_flush_aborted_requests);
+
 int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
 {
 	if (!ctrl->ops->reinit_request)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 02097e8..3c71c73 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -104,6 +104,7 @@ struct nvme_request {
 
 enum {
 	NVME_REQ_CANCELLED		= 0,
+	NVME_REQ_ABORTED,            /* cmd is aborted by nvme_abort_request */
 };
 
 static inline struct nvme_request *nvme_req(struct request *req)
@@ -381,7 +382,8 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl);
 void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
 void nvme_start_freeze(struct nvme_ctrl *ctrl);
 int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set);
-
+void nvme_abort_requests_sync(struct nvme_ctrl *ctrl);
+void nvme_flush_aborted_requests(struct nvme_ctrl *ctrl);
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid);
-- 
2.7.4

  parent reply	other threads:[~2018-03-08  6:20 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-08  6:19 PATCH V4 0/5 nvme-pci: fixes on nvme_timeout and nvme_dev_disable Jianchao Wang
2018-03-08  6:19 ` [PATCH V4 1/5] nvme: do atomically bit operations on nvme_request.flags Jianchao Wang
2018-03-08  7:57   ` Christoph Hellwig
2018-03-08 14:32     ` jianchao.wang
2018-03-08  6:19 ` Jianchao Wang [this message]
2018-03-08 13:11   ` [PATCH V4 2/5] nvme: add helper interface to flush in-flight requests Ming Lei
2018-03-08 14:44     ` jianchao.wang
2018-03-08 18:21   ` Sagi Grimberg
2018-03-09  1:59     ` jianchao.wang
2018-03-08  6:19 ` [PATCH V4 3/5] nvme-pci: avoid nvme_dev_disable to be invoked in nvme_timeout Jianchao Wang
2018-03-09  2:01   ` jianchao.wang
2018-03-13 13:29     ` jianchao.wang
2018-03-08  6:19 ` [PATCH V4 4/5] nvme-pci: discard wait timeout when delete cq/sq Jianchao Wang
2018-03-08  6:19 ` [PATCH V4 5/5] nvme-pci: add the timeout case for DELETEING state Jianchao Wang
2018-04-17 15:17 ` PATCH V4 0/5 nvme-pci: fixes on nvme_timeout and nvme_dev_disable Ming Lei
2018-04-18 14:24   ` jianchao.wang
2018-04-18 15:40     ` Ming Lei
2018-04-19  1:51       ` jianchao.wang
2018-04-19  2:27         ` Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1520489971-31174-3-git-send-email-jianchao.w.wang@oracle.com \
    --to=jianchao.w.wang@oracle.com \
    --cc=axboe@fb.com \
    --cc=hch@lst.de \
    --cc=keith.busch@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).