linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Minwoo Im <minwoo.im.dev@gmail.com>
To: linux-nvme@lists.infradead.org
Cc: Keith Busch <kbusch@kernel.org>, Jens Axboe <axboe@fb.com>,
	Minwoo Im <minwoo.im.dev@gmail.com>,
	Christoph Hellwig <hch@lst.de>, Sagi Grimberg <sagi@grimberg.me>
Subject: [PATCH] nvme: add support namespace management to sysfs
Date: Sun, 21 Feb 2021 00:36:10 +0900	[thread overview]
Message-ID: <20210220153610.237288-1-minwoo.im.dev@gmail.com> (raw)

Namespaces are generally managed by scan_work.  If admin needs to attach
namespace to controller, admin command like `nvme-cli` should be issued
with rescanning controller.

If admin issues a Namespace Attachment command with detach option to a
namespace through IOCTL, kernel is not aware of it.  In this case, block
device for the namespace becomes meaningless, but still shown in the
kernel space as maintained.  After the rescanning the controller
manually, namespace node will be removed from the driver and user-space
will not be able to I/O to the namespace.

Add support for namespace management (attach/detach) to sysfs of
controller instance to make it easy to attach/detach namespaces with
kernel-aware rather than non-kernel-aware passthru via IOCTL.  Also, it
provides closer relationship between the device and kernel for the
namespace perspective (tightly coupled).

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
---
 drivers/nvme/host/core.c | 189 +++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h     |  12 +++
 2 files changed, 201 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d77f3f26d8d3..71dad1b5ffdc 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -92,6 +92,9 @@ static struct class *nvme_subsys_class;
 static void nvme_put_subsystem(struct nvme_subsystem *subsys);
 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 					   unsigned nsid);
+static void nvme_ns_remove(struct nvme_ns *ns);
+static struct nvme_ns *nvme_find_get_ns_by_disk_name(struct nvme_ctrl *ctrl,
+						     const char *disk_name);
 
 /*
  * Prepare a queue for teardown.
@@ -3454,6 +3457,28 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(nsid);
 
+#ifdef CONFIG_NVME_MULTIPATH
+static ssize_t path_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct nvme_ns_head *head = dev_to_ns_head(dev);
+	struct nvme_ns *ns;
+	int node = numa_node_id();
+	int srcu_idx;
+
+	srcu_idx = srcu_read_lock(&head->srcu);
+	ns = srcu_dereference(head->current_path[node], &head->srcu);
+	if (!ns) {
+		srcu_read_unlock(&head->srcu, srcu_idx);
+		return sprintf(buf, "none\n");
+	}
+
+	srcu_read_unlock(&head->srcu, srcu_idx);
+	return sprintf(buf, "%s\n", ns->disk->disk_name);
+}
+static DEVICE_ATTR_RO(path);
+#endif
+
 static struct attribute *nvme_ns_id_attrs[] = {
 	&dev_attr_wwid.attr,
 	&dev_attr_uuid.attr,
@@ -3463,6 +3488,7 @@ static struct attribute *nvme_ns_id_attrs[] = {
 #ifdef CONFIG_NVME_MULTIPATH
 	&dev_attr_ana_grpid.attr,
 	&dev_attr_ana_state.attr,
+	&dev_attr_path.attr,
 #endif
 	NULL,
 };
@@ -3493,6 +3519,11 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
 		if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
 			return 0;
 	}
+
+	if (a == &dev_attr_path.attr) {
+		if (dev_to_disk(dev)->fops == &nvme_bdev_ops)
+			return 0;
+	}
 #endif
 	return a->mode;
 }
@@ -3684,6 +3715,142 @@ static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev,
 static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR,
 	nvme_ctrl_reconnect_delay_show, nvme_ctrl_reconnect_delay_store);
 
+static int __nvme_ns_detach(struct nvme_ctrl *ctrl, unsigned int nsid)
+{
+	struct nvme_command c = { };
+	int err;
+	u16 *buf;
+
+	c.ns_attach.opcode = nvme_admin_ns_attach;
+	c.ns_attach.nsid = cpu_to_le32(nsid);
+	c.ns_attach.sel = cpu_to_le32(0x1);
+
+	buf = kmalloc(4096, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf[0] = cpu_to_le32(0x1);
+	buf[1] = cpu_to_le32(ctrl->cntlid);
+
+	err = nvme_submit_sync_cmd(ctrl->admin_q, &c, buf, 4096);
+	if (err) {
+		kfree(buf);
+		return err;
+	}
+
+	kfree(buf);
+	return 0;
+}
+
+static int nvme_ns_detach(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
+{
+	int err;
+
+	blk_mq_quiesce_queue(ns->queue);
+
+	err = __nvme_ns_detach(ctrl, ns->head->ns_id);
+	if (err) {
+		blk_mq_unquiesce_queue(ns->queue);
+		return err;
+	}
+
+	nvme_set_queue_dying(ns);
+	nvme_ns_remove(ns);
+
+	return 0;
+}
+
+static int __nvme_ns_attach(struct nvme_ctrl *ctrl, unsigned int nsid)
+{
+	struct nvme_command c = { };
+	int err;
+	u16 *buf;
+
+	c.ns_attach.opcode = nvme_admin_ns_attach;
+	c.ns_attach.nsid = cpu_to_le32(nsid);
+	c.ns_attach.sel = cpu_to_le32(0x0);
+
+	buf = kmalloc(4096, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf[0] = cpu_to_le32(0x1);
+	buf[1] = cpu_to_le32(ctrl->cntlid);
+
+	err = nvme_submit_sync_cmd(ctrl->admin_q, &c, buf, 4096);
+	if (err) {
+		kfree(buf);
+		return err;
+	}
+
+	kfree(buf);
+	return 0;
+}
+
+static int nvme_ns_attach(struct nvme_ctrl *ctrl, unsigned int nsid, bool scan)
+{
+	int err;
+
+	if (!(ctrl->oacs & NVME_CTRL_OACS_NS_MANAGEMENT))
+		return -EOPNOTSUPP;
+
+	err = __nvme_ns_attach(ctrl, nsid);
+	if (err)
+		return err;
+
+	if (scan)
+		nvme_queue_scan(ctrl);
+
+	return 0;
+}
+
+static ssize_t detach_ns_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+	struct nvme_ns *ns;
+	int err;
+
+	if (!(ctrl->oacs & NVME_CTRL_OACS_NS_MANAGEMENT))
+		return -EOPNOTSUPP;
+
+	ns = nvme_find_get_ns_by_disk_name(ctrl, buf);
+	if (!ns)
+		return -EINVAL;
+
+	err = nvme_ns_detach(ctrl, ns);
+	if (err) {
+		nvme_put_ns(ns);
+		return err;
+	}
+
+	nvme_put_ns(ns);
+	return count;
+}
+static DEVICE_ATTR_WO(detach_ns);
+
+static ssize_t attach_ns_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+	unsigned int nsid;
+	int err;
+
+	/*
+	 * 'nsid' is device namespace id which is reported by NVMe controller.
+	 */
+	err = kstrtou32(buf, 10, &nsid);
+	if (err)
+		return err;
+
+	err = nvme_ns_attach(ctrl, nsid, true);
+	if (err)
+		return err;
+
+	return count;
+}
+static DEVICE_ATTR_WO(attach_ns);
+
 static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_reset_controller.attr,
 	&dev_attr_rescan_controller.attr,
@@ -3703,6 +3870,8 @@ static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_hostid.attr,
 	&dev_attr_ctrl_loss_tmo.attr,
 	&dev_attr_reconnect_delay.attr,
+	&dev_attr_detach_ns.attr,
+	&dev_attr_attach_ns.attr,
 	NULL
 };
 
@@ -3902,6 +4071,25 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 }
 EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
 
+static struct nvme_ns *nvme_find_get_ns_by_disk_name(struct nvme_ctrl *ctrl,
+		const char *disk_name)
+{
+	struct nvme_ns *ns, *ret = NULL;
+
+	down_read(&ctrl->namespaces_rwsem);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
+		if (!strcmp(ns->disk->disk_name, disk_name)) {
+			if (!kref_get_unless_zero(&ns->kref))
+				continue;
+			ret = ns;
+			break;
+		}
+	}
+	up_read(&ctrl->namespaces_rwsem);
+
+	return ret;
+}
+
 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 		struct nvme_ns_ids *ids)
 {
@@ -4751,6 +4939,7 @@ static inline void _nvme_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
 	BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_ns_attach) != 64);
 }
 
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b08787cd0881..bc6c2a162bbb 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -322,6 +322,7 @@ enum {
 	NVME_CTRL_ONCS_TIMESTAMP		= 1 << 6,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
+	NVME_CTRL_OACS_NS_MANAGEMENT		= 1 << 3,
 	NVME_CTRL_OACS_DIRECTIVES		= 1 << 5,
 	NVME_CTRL_OACS_DBBUF_SUPP		= 1 << 8,
 	NVME_CTRL_LPA_CMD_EFFECTS_LOG		= 1 << 1,
@@ -1398,6 +1399,16 @@ struct streams_directive_params {
 	__u8	rsvd2[6];
 };
 
+struct nvme_ns_attach {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u32			rsvd2[8];
+	__le32			sel;
+	__u32			rsvd11[5];
+};
+
 struct nvme_command {
 	union {
 		struct nvme_common_command common;
@@ -1421,6 +1432,7 @@ struct nvme_command {
 		struct nvmf_property_get_command prop_get;
 		struct nvme_dbbuf dbbuf;
 		struct nvme_directive_cmd directive;
+		struct nvme_ns_attach ns_attach;
 	};
 };
 
-- 
2.25.1


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

             reply	other threads:[~2021-02-20 15:36 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-20 15:36 Minwoo Im [this message]
2021-02-20 15:39 ` [PATCH] nvme: add support namespace management to sysfs Minwoo Im
2021-02-20 15:40 Minwoo Im
2021-02-20 15:50 ` Keith Busch
2021-02-20 16:07   ` Minwoo Im

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210220153610.237288-1-minwoo.im.dev@gmail.com \
    --to=minwoo.im.dev@gmail.com \
    --cc=axboe@fb.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).