linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Keith Busch <kbusch@kernel.org>
To: Hannes Reinecke <hare@suse.de>
Cc: "hch@lst.de" <hch@lst.de>, Sagi Grimberg <sagi@grimberg.me>,
	"linux-nvme@lists.infradead.org" <linux-nvme@lists.infradead.org>,
	"Meneghini, John" <John.Meneghini@netapp.com>
Subject: Re: [PATCH] nvme: Translate more status codes to blk_status_t
Date: Tue, 17 Dec 2019 00:30:19 +0900	[thread overview]
Message-ID: <20191216153019.GA11424@redsun51.ssa.fujisawa.hgst.com> (raw)
In-Reply-To: <1db1bb81-f2e6-4ec2-ea43-b5e609be1a4e@suse.de>

On Mon, Dec 16, 2019 at 09:02:28AM +0100, Hannes Reinecke wrote:
> On 12/13/19 10:02 PM, Sagi Grimberg wrote:
> I never liked the reset here in the first place; I/O errors, okay, but reset
> does imply that something unrecoverable happened.
> Which clearly it didn't as the controller did provide us with a reply, and
> it's just us being too daft to understand it.
> 
> So I do agree with Sagi here; we should restrict controller reset to real
> communication failures, not failed error handling.
> We did that in SCSI, and it turned out to be a mess.

I also think resetting as the default is too harsh of recovery on the
first escalation to a potential path related error. If we're really having
path communication problems, the timeout work will come in to clean up,
or we can white-list some errors if we agree reset is an appropriate
action. We mentioned so very early in the native mpath development:

  https://patchwork.kernel.org/patch/9918145/#20876583

Beyond reducing the errors that trigger reset, it would be good to make
mpath also delay with ACRE's CRD values. It should work if we retype the
requeue work to a delayed_work like the below (untested) patch:

---
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e6ee34376c5e..a39c125d6305 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -264,17 +264,11 @@ static inline bool nvme_req_needs_retry(struct request *req)
 static void nvme_retry_req(struct request *req)
 {
 	struct nvme_ns *ns = req->q->queuedata;
-	unsigned long delay = 0;
-	u16 crd;
-
-	/* The mask and shift result must be <= 3 */
-	crd = (nvme_req(req)->status & NVME_SC_CRD) >> 11;
-	if (ns && crd)
-		delay = ns->ctrl->crdt[crd - 1] * 100;
+	u16 status = nvme_req(req)->status;
 
 	nvme_req(req)->retries++;
 	blk_mq_requeue_request(req, false);
-	blk_mq_delay_kick_requeue_list(req->q, delay);
+	blk_mq_delay_kick_requeue_list(req->q, nvme_retry_delay(status, ns));
 }
 
 void nvme_complete_rq(struct request *req)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 797c18337d96..40b33f2a19ad 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -111,7 +111,8 @@ void nvme_failover_req(struct request *req)
 		break;
 	}
 
-	kblockd_schedule_work(&ns->head->requeue_work);
+	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &ns->head->requeue_work,
+				    nvme_retry_delay(status, ns));
 }
 
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
@@ -121,7 +122,7 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
 	down_read(&ctrl->namespaces_rwsem);
 	list_for_each_entry(ns, &ctrl->namespaces, list) {
 		if (ns->head->disk)
-			kblockd_schedule_work(&ns->head->requeue_work);
+			kblockd_schedule_work(&ns->head->requeue_work.work);
 	}
 	up_read(&ctrl->namespaces_rwsem);
 }
@@ -162,7 +163,7 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 	down_read(&ctrl->namespaces_rwsem);
 	list_for_each_entry(ns, &ctrl->namespaces, list)
 		if (nvme_mpath_clear_current_path(ns))
-			kblockd_schedule_work(&ns->head->requeue_work);
+			kblockd_schedule_work(&ns->head->requeue_work.work);
 	up_read(&ctrl->namespaces_rwsem);
 	mutex_unlock(&ctrl->scan_lock);
 }
@@ -339,7 +340,7 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
 static void nvme_requeue_work(struct work_struct *work)
 {
 	struct nvme_ns_head *head =
-		container_of(work, struct nvme_ns_head, requeue_work);
+		container_of(work, struct nvme_ns_head, requeue_work.work);
 	struct bio *bio, *next;
 
 	spin_lock_irq(&head->requeue_lock);
@@ -367,7 +368,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	mutex_init(&head->lock);
 	bio_list_init(&head->requeue_list);
 	spin_lock_init(&head->requeue_lock);
-	INIT_WORK(&head->requeue_work, nvme_requeue_work);
+	INIT_DELAYED_WORK(&head->requeue_work, nvme_requeue_work);
 
 	/*
 	 * Add a multipath node if the subsystems supports multiple controllers.
@@ -432,7 +433,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
 	}
 
 	synchronize_srcu(&ns->head->srcu);
-	kblockd_schedule_work(&ns->head->requeue_work);
+	kblockd_schedule_work(&ns->head->requeue_work.work);
 }
 
 static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
@@ -680,8 +681,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 		del_gendisk(head->disk);
 	blk_set_queue_dying(head->disk->queue);
 	/* make sure all pending bios are cleaned up */
-	kblockd_schedule_work(&head->requeue_work);
-	flush_work(&head->requeue_work);
+	kblockd_schedule_work(&head->requeue_work.work);
+	flush_work(&head->requeue_work.work);
 	blk_cleanup_queue(head->disk->queue);
 	put_disk(head->disk);
 }
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1024fec7914c..929427defbcb 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -356,7 +356,7 @@ struct nvme_ns_head {
 	struct gendisk		*disk;
 	struct bio_list		requeue_list;
 	spinlock_t		requeue_lock;
-	struct work_struct	requeue_work;
+	struct delayed_work	requeue_work;
 	struct mutex		lock;
 	struct nvme_ns __rcu	*current_path[];
 #endif
@@ -393,6 +393,15 @@ struct nvme_ns {
 
 };
 
+static inline unsigned long nvme_retry_delay(u16 status, struct nvme_ns *ns)
+{
+	/* The mask and shift result must be <= 3 */
+	u16 crd = (status & NVME_SC_CRD) >> 11;
+	if (ns && crd)
+		return ns->ctrl->crdt[crd - 1] * 100;
+	return 0;
+}
+
 struct nvme_ctrl_ops {
 	const char *name;
 	struct module *module;
@@ -567,7 +576,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 	struct nvme_ns_head *head = ns->head;
 
 	if (head->disk && list_empty(&head->list))
-		kblockd_schedule_work(&head->requeue_work);
+		kblockd_schedule_work(&head->requeue_work.work);
 }
 
 static inline void nvme_trace_bio_complete(struct request *req,
--

_______________________________________________
linux-nvme mailing list
linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

      reply	other threads:[~2019-12-16 15:30 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-05 19:57 Keith Busch
2019-12-12  9:20 ` Christoph Hellwig
2019-12-12 19:41 ` Meneghini, John
2019-12-13  7:32   ` Meneghini, John
2019-12-13 21:02     ` Sagi Grimberg
2019-12-16  8:02       ` Hannes Reinecke
2019-12-16 15:30         ` Keith Busch [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191216153019.GA11424@redsun51.ssa.fujisawa.hgst.com \
    --to=kbusch@kernel.org \
    --cc=John.Meneghini@netapp.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    --subject='Re: [PATCH] nvme: Translate more status codes to blk_status_t' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).