All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] nvme: add 'queue_if_no_path' semantics
@ 2020-10-02 16:06 Hannes Reinecke
  2020-10-05  9:24 ` Christoph Hellwig
  0 siblings, 1 reply; 3+ messages in thread
From: Hannes Reinecke @ 2020-10-02 16:06 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-nvme, Sagi Grimberg, Keith Busch, Hannes Reinecke

Currently namespaces behave differently depending on the 'CMIC'
setting. If CMIC is zero, the device is removed once the last path
goes away. If CMIC has the multipath bit set, the device is retained
even if the last path is removed.
This is okay for fabrics, where one can do an explicit disconnect
to remove the device, but for nvme-pci this induces a regression
with PCI hotplug.
When the NVMe device is opened (eg by MD), the NVMe device is not
removed after a PCI hot-remove. Hence MD will not be notified about
the event, and will continue to consider this device as operational.
Consequently, upon PCI hot-add the device shows up as a new NVMe
device, and MD will fail to reattach the device.
So this patch adds NVME_NSHEAD_QUEUE_IF_NO_PATH flag to the nshead
to restore the original behaviour for non-fabrics NVMe devices.

Suggested-by: Keith Busch <keith.busch@wdc.com>
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/core.c      | 42 ++++++++++++++++++++++++++++++++++-
 drivers/nvme/host/multipath.c |  5 ++++-
 drivers/nvme/host/nvme.h      |  3 ++-
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 385b10317873..87589f0adea3 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -474,7 +474,11 @@ static void nvme_free_ns_head(struct kref *ref)
 	struct nvme_ns_head *head =
 		container_of(ref, struct nvme_ns_head, ref);
 
-	nvme_mpath_remove_disk(head);
+	if (head->disk) {
+		if (test_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags))
+			nvme_mpath_remove_disk(head);
+		put_disk(head->disk);
+	}
 	ida_simple_remove(&head->subsys->ns_ida, head->instance);
 	cleanup_srcu_struct(&head->srcu);
 	nvme_put_subsystem(head->subsys);
@@ -3345,12 +3349,42 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(nsid);
 
+static ssize_t queue_if_no_path_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvme_ns_head *head = dev_to_ns_head(dev);
+
+	return sprintf(buf, "%s\n",
+		       test_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags) ?
+		       "on" : "off");
+}
+
+static ssize_t queue_if_no_path_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct nvme_ns_head *head = dev_to_ns_head(dev);
+	int queue_if_no_path, err;
+
+	err = kstrtoint(buf, 10, &queue_if_no_path);
+	if (err)
+		return -EINVAL;
+
+	else if (queue_if_no_path <= 0)
+		clear_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags);
+	else
+		set_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags);
+	return count;
+}
+static DEVICE_ATTR(queue_if_no_path, S_IRUGO | S_IWUSR,
+	queue_if_no_path_show, queue_if_no_path_store);
+
 static struct attribute *nvme_ns_id_attrs[] = {
 	&dev_attr_wwid.attr,
 	&dev_attr_uuid.attr,
 	&dev_attr_nguid.attr,
 	&dev_attr_eui.attr,
 	&dev_attr_nsid.attr,
+	&dev_attr_queue_if_no_path.attr,
 #ifdef CONFIG_NVME_MULTIPATH
 	&dev_attr_ana_grpid.attr,
 	&dev_attr_ana_state.attr,
@@ -3377,6 +3411,10 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
 		if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
 			return 0;
 	}
+	if (a == &dev_attr_queue_if_no_path.attr) {
+		if (dev_to_disk(dev)->fops == &nvme_fops)
+			return 0;
+	}
 #ifdef CONFIG_NVME_MULTIPATH
 	if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
 		if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
@@ -3685,6 +3723,8 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 	head->subsys = ctrl->subsys;
 	head->ns_id = nsid;
 	head->ids = *ids;
+	if (ctrl->ops->flags & NVME_F_FABRICS)
+		set_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags);
 	kref_init(&head->ref);
 
 	ret = __nvme_check_ids(ctrl->subsys, head);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 74896be40c17..05c4fbc35bb2 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -682,6 +682,10 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 {
 	if (!head->disk)
 		return;
+	if (test_bit(NVME_NSHEAD_QUEUE_IF_NO_PATH, &head->flags)) {
+		kblockd_schedule_work(&head->requeue_work);
+		return;
+	}
 	if (head->disk->flags & GENHD_FL_UP)
 		del_gendisk(head->disk);
 	blk_set_queue_dying(head->disk->queue);
@@ -697,7 +701,6 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 		 */
 		head->disk->queue = NULL;
 	}
-	put_disk(head->disk);
 }
 
 int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 566776100126..baf4a84918d8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -411,6 +411,7 @@ struct nvme_ns_head {
 	struct mutex		lock;
 	unsigned long		flags;
 #define NVME_NSHEAD_DISK_LIVE	0
+#define NVME_NSHEAD_QUEUE_IF_NO_PATH 1
 	struct nvme_ns __rcu	*current_path[];
 #endif
 };
@@ -670,7 +671,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 	struct nvme_ns_head *head = ns->head;
 
 	if (head->disk && list_empty(&head->list))
-		kblockd_schedule_work(&head->requeue_work);
+		nvme_mpath_remove_disk(head);
 }
 
 static inline void nvme_trace_bio_complete(struct request *req,
-- 
2.26.2


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [RFC PATCH] nvme: add 'queue_if_no_path' semantics
  2020-10-02 16:06 [RFC PATCH] nvme: add 'queue_if_no_path' semantics Hannes Reinecke
@ 2020-10-05  9:24 ` Christoph Hellwig
  2020-10-05  9:30   ` Hannes Reinecke
  0 siblings, 1 reply; 3+ messages in thread
From: Christoph Hellwig @ 2020-10-05  9:24 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: linux-nvme, Christoph Hellwig, Keith Busch, Sagi Grimberg

On Fri, Oct 02, 2020 at 06:06:34PM +0200, Hannes Reinecke wrote:
> Currently namespaces behave differently depending on the 'CMIC'
> setting. If CMIC is zero, the device is removed once the last path
> goes away. If CMIC has the multipath bit set, the device is retained
> even if the last path is removed.
> This is okay for fabrics, where one can do an explicit disconnect
> to remove the device, but for nvme-pci this induces a regression
> with PCI hotplug.
> When the NVMe device is opened (eg by MD), the NVMe device is not
> removed after a PCI hot-remove. Hence MD will not be notified about
> the event, and will continue to consider this device as operational.
> Consequently, upon PCI hot-add the device shows up as a new NVMe
> device, and MD will fail to reattach the device.
> So this patch adds NVME_NSHEAD_QUEUE_IF_NO_PATH flag to the nshead
> to restore the original behaviour for non-fabrics NVMe devices.
> 
> Suggested-by: Keith Busch <keith.busch@wdc.com>
> Signed-off-by: Hannes Reinecke <hare@suse.de>

This seems to include the patch from Keith to tear down the disk
earlier. I think that needs to stay a separate prep patch, otherwise
it will be very hard to review and reason about.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC PATCH] nvme: add 'queue_if_no_path' semantics
  2020-10-05  9:24 ` Christoph Hellwig
@ 2020-10-05  9:30   ` Hannes Reinecke
  0 siblings, 0 replies; 3+ messages in thread
From: Hannes Reinecke @ 2020-10-05  9:30 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-nvme, Sagi Grimberg, Keith Busch

On 10/5/20 11:24 AM, Christoph Hellwig wrote:
> On Fri, Oct 02, 2020 at 06:06:34PM +0200, Hannes Reinecke wrote:
>> Currently namespaces behave differently depending on the 'CMIC'
>> setting. If CMIC is zero, the device is removed once the last path
>> goes away. If CMIC has the multipath bit set, the device is retained
>> even if the last path is removed.
>> This is okay for fabrics, where one can do an explicit disconnect
>> to remove the device, but for nvme-pci this induces a regression
>> with PCI hotplug.
>> When the NVMe device is opened (eg by MD), the NVMe device is not
>> removed after a PCI hot-remove. Hence MD will not be notified about
>> the event, and will continue to consider this device as operational.
>> Consequently, upon PCI hot-add the device shows up as a new NVMe
>> device, and MD will fail to reattach the device.
>> So this patch adds NVME_NSHEAD_QUEUE_IF_NO_PATH flag to the nshead
>> to restore the original behaviour for non-fabrics NVMe devices.
>>
>> Suggested-by: Keith Busch <keith.busch@wdc.com>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
> 
> This seems to include the patch from Keith to tear down the disk
> earlier. I think that needs to stay a separate prep patch, otherwise
> it will be very hard to review and reason about.
> 
Wasn't sure how to approach this best, but yes, sure, I can split it off 
into separate patches, with that one from Keith being the first one.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-10-05  9:30 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-02 16:06 [RFC PATCH] nvme: add 'queue_if_no_path' semantics Hannes Reinecke
2020-10-05  9:24 ` Christoph Hellwig
2020-10-05  9:30   ` Hannes Reinecke

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.