All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] nvme: fix controller removal race with scan work
@ 2019-07-25 18:56 Sagi Grimberg
  2019-07-26 15:42 ` Hannes Reinecke
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Sagi Grimberg @ 2019-07-25 18:56 UTC (permalink / raw)


With multipath enabled, nvme_scan_work() can read from the device
(through nvme_mpath_add_disk()) and hang [1]. However, with fabrics,
once ctrl->state is set to NVME_CTRL_DELETING, the reads will hang
(see nvmf_check_ready()) and the mpath stack device make_request
will block if head->list is not empty. However, when the head->list
consistst of only DELETING/DEAD controllers, we should actually not
block, but rather fail immediately.

In addition, before we go ahead and remove the namespaces, make sure
to clear the current path and kick the requeue list so that the
request will fast fail upon requeuing.

[1]:
--
  INFO: task kworker/u4:3:166 blocked for more than 120 seconds.
        Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  kworker/u4:3    D    0   166      2 0x80004000
  Workqueue: nvme-wq nvme_scan_work
  Call Trace:
   __schedule+0x851/0x1400
   schedule+0x99/0x210
   io_schedule+0x21/0x70
   do_read_cache_page+0xa57/0x1330
   read_cache_page+0x4a/0x70
   read_dev_sector+0xbf/0x380
   amiga_partition+0xc4/0x1230
   check_partition+0x30f/0x630
   rescan_partitions+0x19a/0x980
   __blkdev_get+0x85a/0x12f0
   blkdev_get+0x2a5/0x790
   __device_add_disk+0xe25/0x1250
   device_add_disk+0x13/0x20
   nvme_mpath_set_live+0x172/0x2b0
   nvme_update_ns_ana_state+0x130/0x180
   nvme_set_ns_ana_state+0x9a/0xb0
   nvme_parse_ana_log+0x1c3/0x4a0
   nvme_mpath_add_disk+0x157/0x290
   nvme_validate_ns+0x1017/0x1bd0
   nvme_scan_work+0x44d/0x6a0
   process_one_work+0x7d7/0x1240
   worker_thread+0x8e/0xff0
   kthread+0x2c3/0x3b0
   ret_from_fork+0x35/0x40

   INFO: task kworker/u4:1:1034 blocked for more than 120 seconds.
        Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  kworker/u4:1    D    0  1034      2 0x80004000
  Workqueue: nvme-delete-wq nvme_delete_ctrl_work
  Call Trace:
   __schedule+0x851/0x1400
   schedule+0x99/0x210
   schedule_timeout+0x390/0x830
   wait_for_completion+0x1a7/0x310
   __flush_work+0x241/0x5d0
   flush_work+0x10/0x20
   nvme_remove_namespaces+0x85/0x3d0
   nvme_do_delete_ctrl+0xb4/0x1e0
   nvme_delete_ctrl_work+0x15/0x20
   process_one_work+0x7d7/0x1240
   worker_thread+0x8e/0xff0
   kthread+0x2c3/0x3b0
   ret_from_fork+0x35/0x40
--

Reported-by: Logan Gunthorpe <logang at deltatee.com>
Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
Changes from v2:
- fix silly compilation error (undiclared variable)

Changes from v1:
- fix compilation with CONFIG_NVME_MULTIPATH=n (Logan)

 drivers/nvme/host/core.c      |  7 ++++++
 drivers/nvme/host/multipath.c | 46 ++++++++++++++++++++++++++++++-----
 drivers/nvme/host/nvme.h      |  9 +++++--
 3 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1c2863216082..c657e38f0554 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3577,6 +3577,13 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 	struct nvme_ns *ns, *next;
 	LIST_HEAD(ns_list);
 
+	/*
+	 * make sure to requeue I/O to all namespaces as these
+	 * might result from the scan itself and must complete
+	 * for the scan_work to make progress
+	 */
+	nvme_mpath_clear_ctrl_paths(ctrl);
+
 	/* prevent racing with ns scanning */
 	flush_work(&ctrl->scan_work);
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a9a927677970..45ffa17f844e 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -109,18 +109,34 @@ static const char *nvme_ana_state_names[] = {
 	[NVME_ANA_CHANGE]		= "change",
 };
 
-void nvme_mpath_clear_current_path(struct nvme_ns *ns)
+bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
 	struct nvme_ns_head *head = ns->head;
+	bool changed = false;
 	int node;
 
 	if (!head)
-		return;
+		goto out;
 
 	for_each_node(node) {
-		if (ns == rcu_access_pointer(head->current_path[node]))
+		if (ns == rcu_access_pointer(head->current_path[node])) {
 			rcu_assign_pointer(head->current_path[node], NULL);
+			changed = true;
+		}
 	}
+out:
+	return changed;
+}
+
+void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->scan_lock);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		if (nvme_mpath_clear_current_path(ns))
+			kblockd_schedule_work(&ns->head->requeue_work);
+	mutex_unlock(&ctrl->scan_lock);
 }
 
 static bool nvme_path_is_disabled(struct nvme_ns *ns)
@@ -231,6 +247,24 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
 	return ns;
 }
 
+static bool nvme_available_path(struct nvme_ns_head *head)
+{
+	struct nvme_ns *ns;
+
+	list_for_each_entry_rcu(ns, &head->list, siblings) {
+		switch (ns->ctrl->state) {
+		case NVME_CTRL_LIVE:
+		case NVME_CTRL_RESETTING:
+		case NVME_CTRL_CONNECTING:
+			/* fallthru */
+			return true;
+		default:
+			break;
+		}
+	}
+	return false;
+}
+
 static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
 		struct bio *bio)
 {
@@ -257,14 +291,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
 				      disk_devt(ns->head->disk),
 				      bio->bi_iter.bi_sector);
 		ret = direct_make_request(bio);
-	} else if (!list_empty_careful(&head->list)) {
-		dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
+	} else if (nvme_available_path(head)) {
+		dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
 
 		spin_lock_irq(&head->requeue_lock);
 		bio_list_add(&head->requeue_list, bio);
 		spin_unlock_irq(&head->requeue_lock);
 	} else {
-		dev_warn_ratelimited(dev, "no path - failing I/O\n");
+		dev_warn_ratelimited(dev, "no available path - failing I/O\n");
 
 		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 740241e84d29..a730c6b3dfba 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -496,7 +496,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
 int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
 void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
-void nvme_mpath_clear_current_path(struct nvme_ns *ns);
+bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
+void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
 struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
 
 static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
@@ -544,7 +545,11 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
 static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 {
 }
-static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
+static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
+{
+	return false;
+}
+static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 {
 }
 static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v3] nvme: fix controller removal race with scan work
  2019-07-25 18:56 [PATCH v3] nvme: fix controller removal race with scan work Sagi Grimberg
@ 2019-07-26 15:42 ` Hannes Reinecke
  2019-07-26 17:39   ` Sagi Grimberg
  2019-07-26 18:50 ` Logan Gunthorpe
  2019-07-30  1:46 ` Ming Lei
  2 siblings, 1 reply; 10+ messages in thread
From: Hannes Reinecke @ 2019-07-26 15:42 UTC (permalink / raw)


On 7/25/19 8:56 PM, Sagi Grimberg wrote:
> With multipath enabled, nvme_scan_work() can read from the device
> (through nvme_mpath_add_disk()) and hang [1]. However, with fabrics,
> once ctrl->state is set to NVME_CTRL_DELETING, the reads will hang
> (see nvmf_check_ready()) and the mpath stack device make_request
> will block if head->list is not empty. However, when the head->list
> consistst of only DELETING/DEAD controllers, we should actually not
> block, but rather fail immediately.
> 
> In addition, before we go ahead and remove the namespaces, make sure
> to clear the current path and kick the requeue list so that the
> request will fast fail upon requeuing.
> 
> [1]:
> --
>    INFO: task kworker/u4:3:166 blocked for more than 120 seconds.
>          Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316
>    "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>    kworker/u4:3    D    0   166      2 0x80004000
>    Workqueue: nvme-wq nvme_scan_work
>    Call Trace:
>     __schedule+0x851/0x1400
>     schedule+0x99/0x210
>     io_schedule+0x21/0x70
>     do_read_cache_page+0xa57/0x1330
>     read_cache_page+0x4a/0x70
>     read_dev_sector+0xbf/0x380
>     amiga_partition+0xc4/0x1230
>     check_partition+0x30f/0x630
>     rescan_partitions+0x19a/0x980
>     __blkdev_get+0x85a/0x12f0
>     blkdev_get+0x2a5/0x790
>     __device_add_disk+0xe25/0x1250
>     device_add_disk+0x13/0x20
>     nvme_mpath_set_live+0x172/0x2b0
>     nvme_update_ns_ana_state+0x130/0x180
>     nvme_set_ns_ana_state+0x9a/0xb0
>     nvme_parse_ana_log+0x1c3/0x4a0
>     nvme_mpath_add_disk+0x157/0x290
>     nvme_validate_ns+0x1017/0x1bd0
>     nvme_scan_work+0x44d/0x6a0
>     process_one_work+0x7d7/0x1240
>     worker_thread+0x8e/0xff0
>     kthread+0x2c3/0x3b0
>     ret_from_fork+0x35/0x40
> 
>     INFO: task kworker/u4:1:1034 blocked for more than 120 seconds.
>          Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316
>    "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>    kworker/u4:1    D    0  1034      2 0x80004000
>    Workqueue: nvme-delete-wq nvme_delete_ctrl_work
>    Call Trace:
>     __schedule+0x851/0x1400
>     schedule+0x99/0x210
>     schedule_timeout+0x390/0x830
>     wait_for_completion+0x1a7/0x310
>     __flush_work+0x241/0x5d0
>     flush_work+0x10/0x20
>     nvme_remove_namespaces+0x85/0x3d0
>     nvme_do_delete_ctrl+0xb4/0x1e0
>     nvme_delete_ctrl_work+0x15/0x20
>     process_one_work+0x7d7/0x1240
>     worker_thread+0x8e/0xff0
>     kthread+0x2c3/0x3b0
>     ret_from_fork+0x35/0x40
> --
> 
> Reported-by: Logan Gunthorpe <logang at deltatee.com>
> Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
> ---
> Changes from v2:
> - fix silly compilation error (undiclared variable)
> 
> Changes from v1:
> - fix compilation with CONFIG_NVME_MULTIPATH=n (Logan)
> 
>   drivers/nvme/host/core.c      |  7 ++++++
>   drivers/nvme/host/multipath.c | 46 ++++++++++++++++++++++++++++++-----
>   drivers/nvme/host/nvme.h      |  9 +++++--
>   3 files changed, 54 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 1c2863216082..c657e38f0554 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3577,6 +3577,13 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
>   	struct nvme_ns *ns, *next;
>   	LIST_HEAD(ns_list);
>   
> +	/*
> +	 * make sure to requeue I/O to all namespaces as these
> +	 * might result from the scan itself and must complete
> +	 * for the scan_work to make progress
> +	 */
> +	nvme_mpath_clear_ctrl_paths(ctrl);
> +
>   	/* prevent racing with ns scanning */
>   	flush_work(&ctrl->scan_work);
>   
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index a9a927677970..45ffa17f844e 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -109,18 +109,34 @@ static const char *nvme_ana_state_names[] = {
>   	[NVME_ANA_CHANGE]		= "change",
>   };
>   
> -void nvme_mpath_clear_current_path(struct nvme_ns *ns)
> +bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
>   {
>   	struct nvme_ns_head *head = ns->head;
> +	bool changed = false;
>   	int node;
>   
>   	if (!head)
> -		return;
> +		goto out;
>   
>   	for_each_node(node) {
> -		if (ns == rcu_access_pointer(head->current_path[node]))
> +		if (ns == rcu_access_pointer(head->current_path[node])) {
>   			rcu_assign_pointer(head->current_path[node], NULL);
> +			changed = true;
> +		}
>   	}
> +out:
> +	return changed;
> +}
> +
> +void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
> +{
> +	struct nvme_ns *ns;
> +
> +	mutex_lock(&ctrl->scan_lock);
> +	list_for_each_entry(ns, &ctrl->namespaces, list)
> +		if (nvme_mpath_clear_current_path(ns))
> +			kblockd_schedule_work(&ns->head->requeue_work);
> +	mutex_unlock(&ctrl->scan_lock);
>   }
>   
>   static bool nvme_path_is_disabled(struct nvme_ns *ns)
> @@ -231,6 +247,24 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
>   	return ns;
>   }
>   
> +static bool nvme_available_path(struct nvme_ns_head *head)
> +{
> +	struct nvme_ns *ns;
> +
> +	list_for_each_entry_rcu(ns, &head->list, siblings) {
> +		switch (ns->ctrl->state) {
> +		case NVME_CTRL_LIVE:
> +		case NVME_CTRL_RESETTING:
> +		case NVME_CTRL_CONNECTING:
> +			/* fallthru */
> +			return true;
> +		default:
> +			break;
> +		}
> +	}
> +	return false;
> +}
> +
>   static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
>   		struct bio *bio)
>   {
> @@ -257,14 +291,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
>   				      disk_devt(ns->head->disk),
>   				      bio->bi_iter.bi_sector);
>   		ret = direct_make_request(bio);
> -	} else if (!list_empty_careful(&head->list)) {
> -		dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
> +	} else if (nvme_available_path(head)) {
> +		dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
>   
>   		spin_lock_irq(&head->requeue_lock);
>   		bio_list_add(&head->requeue_list, bio);
>   		spin_unlock_irq(&head->requeue_lock);
>   	} else {
> -		dev_warn_ratelimited(dev, "no path - failing I/O\n");
> +		dev_warn_ratelimited(dev, "no available path - failing I/O\n");
>   
>   		bio->bi_status = BLK_STS_IOERR;
>   		bio_endio(bio);
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 740241e84d29..a730c6b3dfba 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -496,7 +496,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
>   int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
>   void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
>   void nvme_mpath_stop(struct nvme_ctrl *ctrl);
> -void nvme_mpath_clear_current_path(struct nvme_ns *ns);
> +bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
> +void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
>   struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
>   
>   static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
> @@ -544,7 +545,11 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
>   static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
>   {
>   }
> -static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
> +static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
> +{
> +	return false;
> +}
> +static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
>   {
>   }
>   static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
> 
Weellll ... I'm not sure if I totally agree with this one.

As you might know I'm chasing down the mysterious reset/rescan issue, 
too, and still haven't been able to resolve is properly.

But looking at this patch the hunk for clearing the current path on 
removing namespaces looks like a good idea on itself.
Care to separate this out?

Thing is, I had been notified on a regression caused by commit 
525aa5a705d86e ("nvme-multipath: split bios with the ns_head..."), which 
manifests itself by a spurious I/O error during failover.
This is hitting precisely the code path in nvme multipath; we're seeing 
the message 'No path, failing I/O' during failover despite paths having 
been reconnected previously.
So your patch would indeed increase the likelyhood here, and I'm not 
sure if that's the correct way to go.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare at suse.de                              +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Mary Higgins, Sri Rasiah
HRB 21284 (AG N?rnberg)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] nvme: fix controller removal race with scan work
  2019-07-26 15:42 ` Hannes Reinecke
@ 2019-07-26 17:39   ` Sagi Grimberg
  2019-07-26 17:50     ` Logan Gunthorpe
  2019-07-29  8:31     ` Hannes Reinecke
  0 siblings, 2 replies; 10+ messages in thread
From: Sagi Grimberg @ 2019-07-26 17:39 UTC (permalink / raw)



> Weellll ... I'm not sure if I totally agree with this one.

That's fair...

> As you might know I'm chasing down the mysterious reset/rescan issue, 
> too, and still haven't been able to resolve is properly.

Have you tried the patch that I proposed to you?

This is a different problem than what you reported, this is
addressing the scenario that the host connects and quickly
disconnects from a client before the scan is completed AND
multipath is enabled.

> But looking at this patch the hunk for clearing the current path on 
> removing namespaces looks like a good idea on itself.
> Care to separate this out?

Sure, although its designed to address this issue and not very
effective without the other part.

> Thing is, I had been notified on a regression caused by commit 
> 525aa5a705d86e ("nvme-multipath: split bios with the ns_head..."), which 
> manifests itself by a spurious I/O error during failover.

Hmm, this is interesting. Not sure how this specific commit would
introduce such an issue.

> This is hitting precisely the code path in nvme multipath; we're seeing 
> the message 'No path, failing I/O' during failover despite paths having 
> been reconnected previously.

Can we understand what are the controller states in this scenario?

> So your patch would indeed increase the likelyhood here, and I'm not 
> sure if that's the correct way to go.

Well, we are clearly doing the wrong thing if we're queueing I/O if all
the paths that we have are either DELETING or DEAD controllers. That
is precisely why we suffer from I/O hangs. This patch corrects this
wrong behavior. We must only requeue I/O if we have an available path
that is expected to sometime to resume (i.e. RESETTING/CONNECTING).

If we get I/O errors because all our paths are DELETING/DEAD then these
are not spurious, but rather expected.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] nvme: fix controller removal race with scan work
  2019-07-26 17:39   ` Sagi Grimberg
@ 2019-07-26 17:50     ` Logan Gunthorpe
  2019-07-29  8:31     ` Hannes Reinecke
  1 sibling, 0 replies; 10+ messages in thread
From: Logan Gunthorpe @ 2019-07-26 17:50 UTC (permalink / raw)




On 2019-07-26 11:39 a.m., Sagi Grimberg wrote:
> 
>> Weellll ... I'm not sure if I totally agree with this one.
> 
> That's fair...
> 
>> As you might know I'm chasing down the mysterious reset/rescan issue,
>> too, and still haven't been able to resolve is properly.
> 
> Have you tried the patch that I proposed to you?
> 
> This is a different problem than what you reported, this is
> addressing the scenario that the host connects and quickly
> disconnects from a client before the scan is completed AND
> multipath is enabled.
> 
>> But looking at this patch the hunk for clearing the current path on
>> removing namespaces looks like a good idea on itself.
>> Care to separate this out?
> 
> Sure, although its designed to address this issue and not very
> effective without the other part.
> 
>> Thing is, I had been notified on a regression caused by commit
>> 525aa5a705d86e ("nvme-multipath: split bios with the ns_head..."),
>> which manifests itself by a spurious I/O error during failover.
> 
> Hmm, this is interesting. Not sure how this specific commit would
> introduce such an issue.
> 
>> This is hitting precisely the code path in nvme multipath; we're
>> seeing the message 'No path, failing I/O' during failover despite
>> paths having been reconnected previously.
> 
> Can we understand what are the controller states in this scenario?
> 
>> So your patch would indeed increase the likelyhood here, and I'm not
>> sure if that's the correct way to go.
> 
> Well, we are clearly doing the wrong thing if we're queueing I/O if all
> the paths that we have are either DELETING or DEAD controllers. That
> is precisely why we suffer from I/O hangs. This patch corrects this
> wrong behavior. We must only requeue I/O if we have an available path
> that is expected to sometime to resume (i.e. RESETTING/CONNECTING).

Agreed. I think this is the key point and why I think this patch is the
right thing to do.

Logan

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] nvme: fix controller removal race with scan work
  2019-07-25 18:56 [PATCH v3] nvme: fix controller removal race with scan work Sagi Grimberg
  2019-07-26 15:42 ` Hannes Reinecke
@ 2019-07-26 18:50 ` Logan Gunthorpe
  2019-07-30  1:46 ` Ming Lei
  2 siblings, 0 replies; 10+ messages in thread
From: Logan Gunthorpe @ 2019-07-26 18:50 UTC (permalink / raw)




On 2019-07-25 12:56 p.m., Sagi Grimberg wrote:
> With multipath enabled, nvme_scan_work() can read from the device
> (through nvme_mpath_add_disk()) and hang [1]. However, with fabrics,
> once ctrl->state is set to NVME_CTRL_DELETING, the reads will hang
> (see nvmf_check_ready()) and the mpath stack device make_request
> will block if head->list is not empty. However, when the head->list
> consistst of only DELETING/DEAD controllers, we should actually not
> block, but rather fail immediately.
> 
> In addition, before we go ahead and remove the namespaces, make sure
> to clear the current path and kick the requeue list so that the
> request will fast fail upon requeuing.
> 
> [1]:
> --
>   INFO: task kworker/u4:3:166 blocked for more than 120 seconds.
>         Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316
>   "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>   kworker/u4:3    D    0   166      2 0x80004000
>   Workqueue: nvme-wq nvme_scan_work
>   Call Trace:
>    __schedule+0x851/0x1400
>    schedule+0x99/0x210
>    io_schedule+0x21/0x70
>    do_read_cache_page+0xa57/0x1330
>    read_cache_page+0x4a/0x70
>    read_dev_sector+0xbf/0x380
>    amiga_partition+0xc4/0x1230
>    check_partition+0x30f/0x630
>    rescan_partitions+0x19a/0x980
>    __blkdev_get+0x85a/0x12f0
>    blkdev_get+0x2a5/0x790
>    __device_add_disk+0xe25/0x1250
>    device_add_disk+0x13/0x20
>    nvme_mpath_set_live+0x172/0x2b0
>    nvme_update_ns_ana_state+0x130/0x180
>    nvme_set_ns_ana_state+0x9a/0xb0
>    nvme_parse_ana_log+0x1c3/0x4a0
>    nvme_mpath_add_disk+0x157/0x290
>    nvme_validate_ns+0x1017/0x1bd0
>    nvme_scan_work+0x44d/0x6a0
>    process_one_work+0x7d7/0x1240
>    worker_thread+0x8e/0xff0
>    kthread+0x2c3/0x3b0
>    ret_from_fork+0x35/0x40
> 
>    INFO: task kworker/u4:1:1034 blocked for more than 120 seconds.
>         Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316
>   "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>   kworker/u4:1    D    0  1034      2 0x80004000
>   Workqueue: nvme-delete-wq nvme_delete_ctrl_work
>   Call Trace:
>    __schedule+0x851/0x1400
>    schedule+0x99/0x210
>    schedule_timeout+0x390/0x830
>    wait_for_completion+0x1a7/0x310
>    __flush_work+0x241/0x5d0
>    flush_work+0x10/0x20
>    nvme_remove_namespaces+0x85/0x3d0
>    nvme_do_delete_ctrl+0xb4/0x1e0
>    nvme_delete_ctrl_work+0x15/0x20
>    process_one_work+0x7d7/0x1240
>    worker_thread+0x8e/0xff0
>    kthread+0x2c3/0x3b0
>    ret_from_fork+0x35/0x40
> --
> 
> Reported-by: Logan Gunthorpe <logang at deltatee.com>
> Signed-off-by: Sagi Grimberg <sagi at grimberg.me>

Tested-by: Logan Gunthorpe <logang at deltatee.com>
Reviewed-by: Logan Gunthorpe <logang at deltatee.com>

Looks good to me. Thanks!

Logan

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] nvme: fix controller removal race with scan work
  2019-07-26 17:39   ` Sagi Grimberg
  2019-07-26 17:50     ` Logan Gunthorpe
@ 2019-07-29  8:31     ` Hannes Reinecke
  2019-07-29 16:11       ` Sagi Grimberg
  1 sibling, 1 reply; 10+ messages in thread
From: Hannes Reinecke @ 2019-07-29  8:31 UTC (permalink / raw)


On 7/26/19 7:39 PM, Sagi Grimberg wrote:
> 
>> Weellll ... I'm not sure if I totally agree with this one.
> 
> That's fair...
> 
>> As you might know I'm chasing down the mysterious reset/rescan issue,
>> too, and still haven't been able to resolve is properly.
> 
> Have you tried the patch that I proposed to you?
> 
Yeah. But reports are still inconclusive due to the second problem.
And

> This is a different problem than what you reported, this is
> addressing the scenario that the host connects and quickly
> disconnects from a client before the scan is completed AND
> multipath is enabled.
> 
.... Which is precisely the failure scenario for my problem, too ...

>> But looking at this patch the hunk for clearing the current path on
>> removing namespaces looks like a good idea on itself.
>> Care to separate this out?
> 
> Sure, although its designed to address this issue and not very
> effective without the other part.
> 
>> Thing is, I had been notified on a regression caused by commit
>> 525aa5a705d86e ("nvme-multipath: split bios with the ns_head..."),
>> which manifests itself by a spurious I/O error during failover.
> 
> Hmm, this is interesting. Not sure how this specific commit would
> introduce such an issue.
> 
See? Me neither. Sadly they _have_ bisected it, and this really is the
patch introducing the regression.
Current thinking is that we're dropping all paths during reset, causing
ns_head to be deleted, too. The ns_head will get recreated immediately
afterwards, so userspace wouldn't even notice that.
But if we have a non-empty requeue list that will be flushed only when
doing the final put on the ns_head structure, causing all I/O to fail.

So looking at it that way, it _might_ actually be that your patch helps
for that situation, too, seeing that we can't reliably enumerate
namespaces during reset.

I'll give it a try and will get back to you with the results.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare at suse.de			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Mary Higgins, Sri Rasiah
HRB 21284 (AG N?rnberg)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] nvme: fix controller removal race with scan work
  2019-07-29  8:31     ` Hannes Reinecke
@ 2019-07-29 16:11       ` Sagi Grimberg
  2019-07-29 16:36         ` Hannes Reinecke
  0 siblings, 1 reply; 10+ messages in thread
From: Sagi Grimberg @ 2019-07-29 16:11 UTC (permalink / raw)



>>> As you might know I'm chasing down the mysterious reset/rescan issue,
>>> too, and still haven't been able to resolve is properly.
>>
>> Have you tried the patch that I proposed to you?
>>
> Yeah. But reports are still inconclusive due to the second problem.
> And

OK, looking forward to understanding the root-cause and if/how
we can address this.

>> This is a different problem than what you reported, this is
>> addressing the scenario that the host connects and quickly
>> disconnects from a client before the scan is completed AND
>> multipath is enabled.
>>
> .... Which is precisely the failure scenario for my problem, too ...

Hannes, I'm a bit confused again. I was under the impression that
your issues were around controller reset race with scan, not controller
removal (disconnect).

>>> Thing is, I had been notified on a regression caused by commit
>>> 525aa5a705d86e ("nvme-multipath: split bios with the ns_head..."),
>>> which manifests itself by a spurious I/O error during failover.
>>
>> Hmm, this is interesting. Not sure how this specific commit would
>> introduce such an issue.
>>
> See? Me neither. Sadly they _have_ bisected it, and this really is the
> patch introducing the regression.
> Current thinking is that we're dropping all paths during reset, causing
> ns_head to be deleted, too.

That is impossible, the only thing that would drop paths is either
if the user is removing a controller, or the ctrl_loss_tmo expired, both
are controller removal and not controller reset.

> The ns_head will get recreated immediately
> afterwards, so userspace wouldn't even notice that.
> But if we have a non-empty requeue list that will be flushed only when
> doing the final put on the ns_head structure, causing all I/O to fail.

Again, we need to understand what is the exact scenario, because if we
are losing all the paths, I/O failure is the expected behavior.

> So looking at it that way, it _might_ actually be that your patch helps
> for that situation, too, seeing that we can't reliably enumerate
> namespaces during reset.

Can we define clearly what do you mean when you say "enumerate 
namespaces during reset"?

> I'll give it a try and will get back to you with the results.

Thanks Hannes.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] nvme: fix controller removal race with scan work
  2019-07-29 16:11       ` Sagi Grimberg
@ 2019-07-29 16:36         ` Hannes Reinecke
  2019-07-29 20:31           ` Sagi Grimberg
  0 siblings, 1 reply; 10+ messages in thread
From: Hannes Reinecke @ 2019-07-29 16:36 UTC (permalink / raw)


On 7/29/19 6:11 PM, Sagi Grimberg wrote:
> 
>>>> As you might know I'm chasing down the mysterious reset/rescan issue,
>>>> too, and still haven't been able to resolve is properly.
>>>
>>> Have you tried the patch that I proposed to you?
>>>
>> Yeah. But reports are still inconclusive due to the second problem.
>> And
> 
> OK, looking forward to understanding the root-cause and if/how
> we can address this.
> 
>>> This is a different problem than what you reported, this is
>>> addressing the scenario that the host connects and quickly
>>> disconnects from a client before the scan is completed AND
>>> multipath is enabled.
>>>
>> .... Which is precisely the failure scenario for my problem, too ...
> 
> Hannes, I'm a bit confused again. I was under the impression that
> your issues were around controller reset race with scan, not controller
> removal (disconnect).
> 
>>>> Thing is, I had been notified on a regression caused by commit
>>>> 525aa5a705d86e ("nvme-multipath: split bios with the ns_head..."),
>>>> which manifests itself by a spurious I/O error during failover.
>>>
>>> Hmm, this is interesting. Not sure how this specific commit would
>>> introduce such an issue.
>>>
>> See? Me neither. Sadly they _have_ bisected it, and this really is the
>> patch introducing the regression.
>> Current thinking is that we're dropping all paths during reset, causing
>> ns_head to be deleted, too.
> 
> That is impossible, the only thing that would drop paths is either
> if the user is removing a controller, or the ctrl_loss_tmo expired, both
> are controller removal and not controller reset.
> 
Well ... the problem seem to be arising if we get an Namespace Changed 
AEN during reset.
In that case we'll call nvme_scan_ns_list(), on which the initial 
identify will fail for all namespaces, and all of them will be disconnected.

>> The ns_head will get recreated immediately
>> afterwards, so userspace wouldn't even notice that.
>> But if we have a non-empty requeue list that will be flushed only when
>> doing the final put on the ns_head structure, causing all I/O to fail.
> 
> Again, we need to understand what is the exact scenario, because if we
> are losing all the paths, I/O failure is the expected behavior.
> 
... Unless we're disconnecting those paths during controller reset, 
which actually is expected to happen, and will be corrected after reset 
completes.

>> So looking at it that way, it _might_ actually be that your patch helps
>> for that situation, too, seeing that we can't reliably enumerate
>> namespaces during reset.
> 
> Can we define clearly what do you mean when you say "enumerate 
> namespaces during reset"?
> 
Any call which walks the list of namespaces, either the controller list 
or the ns_head siblings list.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare at suse.de                              +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Mary Higgins, Sri Rasiah
HRB 21284 (AG N?rnberg)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v3] nvme: fix controller removal race with scan work
  2019-07-29 16:36         ` Hannes Reinecke
@ 2019-07-29 20:31           ` Sagi Grimberg
  0 siblings, 0 replies; 10+ messages in thread
From: Sagi Grimberg @ 2019-07-29 20:31 UTC (permalink / raw)



>>>>> Thing is, I had been notified on a regression caused by commit
>>>>> 525aa5a705d86e ("nvme-multipath: split bios with the ns_head..."),
>>>>> which manifests itself by a spurious I/O error during failover.
>>>>
>>>> Hmm, this is interesting. Not sure how this specific commit would
>>>> introduce such an issue.
>>>>
>>> See? Me neither. Sadly they _have_ bisected it, and this really is the
>>> patch introducing the regression.
>>> Current thinking is that we're dropping all paths during reset, causing
>>> ns_head to be deleted, too.
>>
>> That is impossible, the only thing that would drop paths is either
>> if the user is removing a controller, or the ctrl_loss_tmo expired, both
>> are controller removal and not controller reset.
>>
> Well ... the problem seem to be arising if we get an Namespace Changed 
> AEN during reset.
> In that case we'll call nvme_scan_ns_list(), on which the initial 
> identify will fail for all namespaces, and all of them will be 
> disconnected.

Well that sounds like the wrong behavior. If we are not able to scan
the ns list because we are reconnecting, we should not remove the
namespaces.

What exactly fails? and what falls us to removing the namespaces?
is nvme_identify_ns_list() not returning a failure status code?

Something is still not clear to me.

Perhaps we need this protection in place such that we are not removing
a namespace if we failed namepsace revalidation because we lost
connectivity:
--
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fa31da0762b9..5f6970e7ba73 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3428,7 +3428,8 @@ static void nvme_validate_ns(struct nvme_ctrl 
*ctrl, unsigned nsid)

         ns = nvme_find_get_ns(ctrl, nsid);
         if (ns) {
-               if (ns->disk && revalidate_disk(ns->disk))
+               if (ns->disk && revalidate_disk(ns->disk) &&
+                   ctrl->state != NVME_CTRL_LIVE)
                         nvme_ns_remove(ns);
                 nvme_put_ns(ns);
         } else
--

>>> The ns_head will get recreated immediately
>>> afterwards, so userspace wouldn't even notice that.
>>> But if we have a non-empty requeue list that will be flushed only when
>>> doing the final put on the ns_head structure, causing all I/O to fail.
>>
>> Again, we need to understand what is the exact scenario, because if we
>> are losing all the paths, I/O failure is the expected behavior.
>>
> ... Unless we're disconnecting those paths during controller reset, 
> which actually is expected to happen, and will be corrected after reset 
> completes.

Why is that the correct behavior? Doesn't sound like that to me...
If we don't have a communication channel to the target ns scanning will
fail for sure, we should not remove these namespaces because we don't
have a way to get the answer during this time.

>>> So looking at it that way, it _might_ actually be that your patch helps
>>> for that situation, too, seeing that we can't reliably enumerate
>>> namespaces during reset.
>>
>> Can we define clearly what do you mean when you say "enumerate 
>> namespaces during reset"?
>>
> Any call which walks the list of namespaces, either the controller list 
> or the ns_head siblings list.

If we have a case where the sibling list gets emptied during the
controller reset, this patch will not help because nvme_available_path()
traverses it. I think that something else is the problem in the issues
you are seeing.

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v3] nvme: fix controller removal race with scan work
  2019-07-25 18:56 [PATCH v3] nvme: fix controller removal race with scan work Sagi Grimberg
  2019-07-26 15:42 ` Hannes Reinecke
  2019-07-26 18:50 ` Logan Gunthorpe
@ 2019-07-30  1:46 ` Ming Lei
  2 siblings, 0 replies; 10+ messages in thread
From: Ming Lei @ 2019-07-30  1:46 UTC (permalink / raw)


On Fri, Jul 26, 2019@2:58 AM Sagi Grimberg <sagi@grimberg.me> wrote:
>
> With multipath enabled, nvme_scan_work() can read from the device
> (through nvme_mpath_add_disk()) and hang [1]. However, with fabrics,
> once ctrl->state is set to NVME_CTRL_DELETING, the reads will hang
> (see nvmf_check_ready()) and the mpath stack device make_request
> will block if head->list is not empty. However, when the head->list
> consistst of only DELETING/DEAD controllers, we should actually not
> block, but rather fail immediately.
>
> In addition, before we go ahead and remove the namespaces, make sure
> to clear the current path and kick the requeue list so that the
> request will fast fail upon requeuing.
>
> [1]:
> --
>   INFO: task kworker/u4:3:166 blocked for more than 120 seconds.
>         Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316
>   "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>   kworker/u4:3    D    0   166      2 0x80004000
>   Workqueue: nvme-wq nvme_scan_work
>   Call Trace:
>    __schedule+0x851/0x1400
>    schedule+0x99/0x210
>    io_schedule+0x21/0x70
>    do_read_cache_page+0xa57/0x1330
>    read_cache_page+0x4a/0x70
>    read_dev_sector+0xbf/0x380
>    amiga_partition+0xc4/0x1230
>    check_partition+0x30f/0x630
>    rescan_partitions+0x19a/0x980
>    __blkdev_get+0x85a/0x12f0
>    blkdev_get+0x2a5/0x790
>    __device_add_disk+0xe25/0x1250
>    device_add_disk+0x13/0x20
>    nvme_mpath_set_live+0x172/0x2b0
>    nvme_update_ns_ana_state+0x130/0x180
>    nvme_set_ns_ana_state+0x9a/0xb0
>    nvme_parse_ana_log+0x1c3/0x4a0
>    nvme_mpath_add_disk+0x157/0x290
>    nvme_validate_ns+0x1017/0x1bd0
>    nvme_scan_work+0x44d/0x6a0
>    process_one_work+0x7d7/0x1240
>    worker_thread+0x8e/0xff0
>    kthread+0x2c3/0x3b0
>    ret_from_fork+0x35/0x40
>
>    INFO: task kworker/u4:1:1034 blocked for more than 120 seconds.
>         Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316
>   "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>   kworker/u4:1    D    0  1034      2 0x80004000
>   Workqueue: nvme-delete-wq nvme_delete_ctrl_work
>   Call Trace:
>    __schedule+0x851/0x1400
>    schedule+0x99/0x210
>    schedule_timeout+0x390/0x830
>    wait_for_completion+0x1a7/0x310
>    __flush_work+0x241/0x5d0
>    flush_work+0x10/0x20
>    nvme_remove_namespaces+0x85/0x3d0
>    nvme_do_delete_ctrl+0xb4/0x1e0
>    nvme_delete_ctrl_work+0x15/0x20
>    process_one_work+0x7d7/0x1240
>    worker_thread+0x8e/0xff0
>    kthread+0x2c3/0x3b0
>    ret_from_fork+0x35/0x40
> --
>
> Reported-by: Logan Gunthorpe <logang at deltatee.com>
> Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
> ---
> Changes from v2:
> - fix silly compilation error (undiclared variable)
>
> Changes from v1:
> - fix compilation with CONFIG_NVME_MULTIPATH=n (Logan)
>
>  drivers/nvme/host/core.c      |  7 ++++++
>  drivers/nvme/host/multipath.c | 46 ++++++++++++++++++++++++++++++-----
>  drivers/nvme/host/nvme.h      |  9 +++++--
>  3 files changed, 54 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 1c2863216082..c657e38f0554 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3577,6 +3577,13 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
>         struct nvme_ns *ns, *next;
>         LIST_HEAD(ns_list);
>
> +       /*
> +        * make sure to requeue I/O to all namespaces as these
> +        * might result from the scan itself and must complete
> +        * for the scan_work to make progress
> +        */
> +       nvme_mpath_clear_ctrl_paths(ctrl);
> +
>         /* prevent racing with ns scanning */
>         flush_work(&ctrl->scan_work);
>
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index a9a927677970..45ffa17f844e 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -109,18 +109,34 @@ static const char *nvme_ana_state_names[] = {
>         [NVME_ANA_CHANGE]               = "change",
>  };
>
> -void nvme_mpath_clear_current_path(struct nvme_ns *ns)
> +bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
>  {
>         struct nvme_ns_head *head = ns->head;
> +       bool changed = false;
>         int node;
>
>         if (!head)
> -               return;
> +               goto out;
>
>         for_each_node(node) {
> -               if (ns == rcu_access_pointer(head->current_path[node]))
> +               if (ns == rcu_access_pointer(head->current_path[node])) {
>                         rcu_assign_pointer(head->current_path[node], NULL);
> +                       changed = true;
> +               }
>         }
> +out:
> +       return changed;
> +}
> +
> +void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
> +{
> +       struct nvme_ns *ns;
> +
> +       mutex_lock(&ctrl->scan_lock);
> +       list_for_each_entry(ns, &ctrl->namespaces, list)
> +               if (nvme_mpath_clear_current_path(ns))
> +                       kblockd_schedule_work(&ns->head->requeue_work);
> +       mutex_unlock(&ctrl->scan_lock);
>  }
>
>  static bool nvme_path_is_disabled(struct nvme_ns *ns)
> @@ -231,6 +247,24 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
>         return ns;
>  }
>
> +static bool nvme_available_path(struct nvme_ns_head *head)
> +{
> +       struct nvme_ns *ns;
> +
> +       list_for_each_entry_rcu(ns, &head->list, siblings) {
> +               switch (ns->ctrl->state) {
> +               case NVME_CTRL_LIVE:
> +               case NVME_CTRL_RESETTING:
> +               case NVME_CTRL_CONNECTING:
> +                       /* fallthru */
> +                       return true;
> +               default:
> +                       break;
> +               }
> +       }
> +       return false;
> +}
> +
>  static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
>                 struct bio *bio)
>  {
> @@ -257,14 +291,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
>                                       disk_devt(ns->head->disk),
>                                       bio->bi_iter.bi_sector);
>                 ret = direct_make_request(bio);
> -       } else if (!list_empty_careful(&head->list)) {
> -               dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
> +       } else if (nvme_available_path(head)) {
> +               dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
>
>                 spin_lock_irq(&head->requeue_lock);
>                 bio_list_add(&head->requeue_list, bio);
>                 spin_unlock_irq(&head->requeue_lock);
>         } else {
> -               dev_warn_ratelimited(dev, "no path - failing I/O\n");
> +               dev_warn_ratelimited(dev, "no available path - failing I/O\n");
>
>                 bio->bi_status = BLK_STS_IOERR;
>                 bio_endio(bio);
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 740241e84d29..a730c6b3dfba 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -496,7 +496,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
>  int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
>  void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
>  void nvme_mpath_stop(struct nvme_ctrl *ctrl);
> -void nvme_mpath_clear_current_path(struct nvme_ns *ns);
> +bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
> +void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
>  struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
>
>  static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
> @@ -544,7 +545,11 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
>  static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
>  {
>  }
> -static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
> +static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
> +{
> +       return false;
> +}
> +static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
>  {
>  }
>  static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
> --

Wrt. multipath, the patch does correct thing to fail bios(supposed to
requeue) in case
of no available path, so looks fine:

Reviewed-by: Ming Lei <ming.lei at redhat.com>


Thanks,
Ming Lei

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-07-30  1:46 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-25 18:56 [PATCH v3] nvme: fix controller removal race with scan work Sagi Grimberg
2019-07-26 15:42 ` Hannes Reinecke
2019-07-26 17:39   ` Sagi Grimberg
2019-07-26 17:50     ` Logan Gunthorpe
2019-07-29  8:31     ` Hannes Reinecke
2019-07-29 16:11       ` Sagi Grimberg
2019-07-29 16:36         ` Hannes Reinecke
2019-07-29 20:31           ` Sagi Grimberg
2019-07-26 18:50 ` Logan Gunthorpe
2019-07-30  1:46 ` Ming Lei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.