Linux-NVME Archive on lore.kernel.org
 help / color / Atom feed
* nvme generic per-namespace char device
@ 2021-04-20 10:07 Christoph Hellwig
  2021-04-20 10:07 ` [PATCH] nvme: introduce generic per-namespace chardev Christoph Hellwig
  0 siblings, 1 reply; 4+ messages in thread
From: Christoph Hellwig @ 2021-04-20 10:07 UTC (permalink / raw)
  To: Minwoo Im, Keith Busch, Sagi Grimberg
  Cc: Kanchan Joshi, Javier Gonz??lez, linux-nvme

Hi all,

this series picks up the generic per-namespace character device
from Minwoo and Javier with a few small fixes.

Changes since v1:
 - ensure the cdev idea is freed
 - dropped the already merged ioctl cleanup

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] nvme: introduce generic per-namespace chardev
  2021-04-20 10:07 nvme generic per-namespace char device Christoph Hellwig
@ 2021-04-20 10:07 ` Christoph Hellwig
  2021-04-20 18:13   ` Kanchan Joshi
  0 siblings, 1 reply; 4+ messages in thread
From: Christoph Hellwig @ 2021-04-20 10:07 UTC (permalink / raw)
  To: Minwoo Im, Keith Busch, Sagi Grimberg
  Cc: Kanchan Joshi, Javier Gonz??lez, linux-nvme

From: Minwoo Im <minwoo.im.dev@gmail.com>

Userspace has not been allowed to I/O to device that's failed to
be initialized.  This patch introduces generic per-namespace character
device to allow userspace to I/O regardless the block device is there or
not.

The chardev naming convention will similar to the existing blkdev naming,
using a ng prefix instead of nvme, i.e.

	- /dev/ngXnY

It also supports multipath which means it will not expose chardev for the
hidden namespace blkdevs (e.g., nvmeXcYnZ).  If /dev/ngXnY is created for
a ns_head, then I/O request will be routed to a specific controller
selected by the iopolicy of the subsystem.

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
Signed-off-by: Javier González <javier.gonz@samsung.com>
[hch: rebased on top of the ioctl cleanups, split the ns_head and ns
 implementations]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c      | 87 +++++++++++++++++++++++++++++++++++
 drivers/nvme/host/ioctl.c     | 17 +++++++
 drivers/nvme/host/multipath.c | 51 ++++++++++++++++++--
 drivers/nvme/host/nvme.h      | 13 ++++++
 4 files changed, 165 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b905f91f14eba6..2f45e8fcdd7cbd 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -89,6 +89,10 @@ static dev_t nvme_ctrl_base_chr_devt;
 static struct class *nvme_class;
 static struct class *nvme_subsys_class;
 
+static DEFINE_IDA(nvme_ns_chr_minor_ida);
+static dev_t nvme_ns_chr_devt;
+static struct class *nvme_ns_chr_class;
+
 static void nvme_put_subsystem(struct nvme_subsystem *subsys);
 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 					   unsigned nsid);
@@ -3429,6 +3433,66 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
 	return 0;
 }
 
+void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device)
+{
+	cdev_device_del(cdev, cdev_device);
+	ida_simple_remove(&nvme_ns_chr_minor_ida, MINOR(cdev_device->devt));
+}
+
+int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
+		const struct file_operations *fops, struct module *owner)
+{
+	int minor, ret;
+
+	minor = ida_simple_get(&nvme_ns_chr_minor_ida, 0, 0, GFP_KERNEL);
+	if (minor < 0)
+		return minor;
+	cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
+	cdev_device->class = nvme_ns_chr_class;
+	device_initialize(cdev_device);
+	cdev_init(cdev, fops);
+	cdev->owner = owner;
+	ret = cdev_device_add(cdev, cdev_device);
+	if (ret)
+		ida_simple_remove(&nvme_ns_chr_minor_ida, minor);
+	return ret;
+}
+
+static int nvme_ns_chr_open(struct inode *inode, struct file *file)
+{
+	return nvme_ns_open(container_of(inode->i_cdev, struct nvme_ns, cdev));
+}
+
+static int nvme_ns_chr_release(struct inode *inode, struct file *file)
+{
+	nvme_ns_release(container_of(inode->i_cdev, struct nvme_ns, cdev));
+	return 0;
+}
+
+static const struct file_operations nvme_ns_chr_fops = {
+	.owner		= THIS_MODULE,
+	.open		= nvme_ns_chr_open,
+	.release	= nvme_ns_chr_release,
+	.unlocked_ioctl	= nvme_ns_chr_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+};
+
+static int nvme_add_ns_cdev(struct nvme_ns *ns)
+{
+	int ret;
+
+	ns->cdev_device.parent = ns->ctrl->device;
+	ret = dev_set_name(&ns->cdev_device, "ng%dn%d",
+			   ns->ctrl->instance, ns->head->instance);
+	if (ret)
+		return ret;
+	ret = nvme_cdev_add(&ns->cdev, &ns->cdev_device, &nvme_ns_chr_fops,
+			    ns->ctrl->ops->module);
+	if (ret)
+		kfree_const(ns->cdev_device.kobj.name);
+	return ret;
+}
+
 static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 		unsigned nsid, struct nvme_ns_ids *ids)
 {
@@ -3630,6 +3694,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
 	nvme_get_ctrl(ctrl);
 
 	device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
+	if (!nvme_ns_head_multipath(ns->head))
+		nvme_add_ns_cdev(ns);
 
 	nvme_mpath_add_disk(ns, id);
 	nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
@@ -3674,6 +3740,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 	synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
 
 	if (ns->disk->flags & GENHD_FL_UP) {
+		if (!nvme_ns_head_multipath(ns->head))
+			nvme_cdev_del(&ns->cdev, &ns->cdev_device);
 		del_gendisk(ns->disk);
 		blk_cleanup_queue(ns->queue);
 		if (blk_get_integrity(ns->disk))
@@ -4464,8 +4532,24 @@ static int __init nvme_core_init(void)
 		result = PTR_ERR(nvme_subsys_class);
 		goto destroy_class;
 	}
+
+	result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS,
+				     "nvme-generic");
+	if (result < 0)
+		goto destroy_subsys_class;
+
+	nvme_ns_chr_class = class_create(THIS_MODULE, "nvme-generic");
+	if (IS_ERR(nvme_ns_chr_class)) {
+		result = PTR_ERR(nvme_ns_chr_class);
+		goto unregister_generic_ns;
+	}
+
 	return 0;
 
+unregister_generic_ns:
+	unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
+destroy_subsys_class:
+	class_destroy(nvme_subsys_class);
 destroy_class:
 	class_destroy(nvme_class);
 unregister_chrdev:
@@ -4482,12 +4566,15 @@ static int __init nvme_core_init(void)
 
 static void __exit nvme_core_exit(void)
 {
+	class_destroy(nvme_ns_chr_class);
 	class_destroy(nvme_subsys_class);
 	class_destroy(nvme_class);
+	unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
 	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
 	destroy_workqueue(nvme_delete_wq);
 	destroy_workqueue(nvme_reset_wq);
 	destroy_workqueue(nvme_wq);
+	ida_destroy(&nvme_ns_chr_minor_ida);
 	ida_destroy(&nvme_instance_ida);
 }
 
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 8e05d65c9e9340..a95b7eef9e62b2 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -357,6 +357,14 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 	return nvme_ns_ioctl(ns, cmd, argp);
 }
 
+long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct nvme_ns *ns =
+		container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
+
+	return nvme_ns_ioctl(ns, cmd, (void __user *)arg);
+}
+
 #ifdef CONFIG_NVME_MULTIPATH
 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head,
 		unsigned int cmd, void __user *argp)
@@ -393,6 +401,15 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
 		return nvme_ns_head_ctrl_ioctl(head, cmd, (void __user *)arg);
 	return nvme_ns_head_ns_ioctl(head, cmd, (void __user *)arg);
 }
+
+long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	struct cdev *cdev = file_inode(file)->i_cdev;
+	struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
+
+	return nvme_ns_head_ns_ioctl(head, cmd, (void __user *)arg);
+}
 #endif /* CONFIG_NVME_MULTIPATH */
 
 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 68918ea1d3d098..0d0de3433f3776 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -357,6 +357,48 @@ const struct block_device_operations nvme_ns_head_ops = {
 	.pr_ops		= &nvme_pr_ops,
 };
 
+static inline struct nvme_ns_head *cdev_to_ns_head(struct cdev *cdev)
+{
+	return container_of(cdev, struct nvme_ns_head, cdev);
+}
+
+static int nvme_ns_head_chr_open(struct inode *inode, struct file *file)
+{
+	if (!nvme_tryget_ns_head(cdev_to_ns_head(inode->i_cdev)))
+		return -ENXIO;
+	return 0;
+}
+
+static int nvme_ns_head_chr_release(struct inode *inode, struct file *file)
+{
+	nvme_put_ns_head(cdev_to_ns_head(inode->i_cdev));
+	return 0;
+}
+
+static const struct file_operations nvme_ns_head_chr_fops = {
+	.owner		= THIS_MODULE,
+	.open		= nvme_ns_head_chr_open,
+	.release	= nvme_ns_head_chr_release,
+	.unlocked_ioctl	= nvme_ns_head_chr_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+};
+
+static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
+{
+	int ret;
+
+	head->cdev_device.parent = &head->subsys->dev;
+	ret = dev_set_name(&head->cdev_device, "ng%dn%d",
+			   head->subsys->instance, head->instance);
+	if (ret)
+		return ret;
+	ret = nvme_cdev_add(&head->cdev, &head->cdev_device,
+			    &nvme_ns_head_chr_fops, THIS_MODULE);
+	if (ret)
+		kfree_const(head->cdev_device.kobj.name);
+	return ret;
+}
+
 static void nvme_requeue_work(struct work_struct *work)
 {
 	struct nvme_ns_head *head =
@@ -435,9 +477,11 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
 	if (!head->disk)
 		return;
 
-	if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
+	if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
 		device_add_disk(&head->subsys->dev, head->disk,
 				nvme_ns_id_attr_groups);
+		nvme_add_ns_head_cdev(head);
+	}
 
 	mutex_lock(&head->lock);
 	if (nvme_path_is_optimized(ns)) {
@@ -714,8 +758,10 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 {
 	if (!head->disk)
 		return;
-	if (head->disk->flags & GENHD_FL_UP)
+	if (head->disk->flags & GENHD_FL_UP) {
+		nvme_cdev_del(&head->cdev, &head->cdev_device);
 		del_gendisk(head->disk);
+	}
 	blk_set_queue_dying(head->disk->queue);
 	/* make sure all pending bios are cleaned up */
 	kblockd_schedule_work(&head->requeue_work);
@@ -785,4 +831,3 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
 	kfree(ctrl->ana_log_buf);
 	ctrl->ana_log_buf = NULL;
 }
-
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 49276186d5bd6e..773dde5b231dab 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -412,6 +412,10 @@ struct nvme_ns_head {
 	bool			shared;
 	int			instance;
 	struct nvme_effects_log *effects;
+
+	struct cdev		cdev;
+	struct device		cdev_device;
+
 	struct gendisk		*disk;
 #ifdef CONFIG_NVME_MULTIPATH
 	struct bio_list		requeue_list;
@@ -464,6 +468,9 @@ struct nvme_ns {
 #define NVME_NS_ANA_PENDING	2
 #define NVME_NS_FORCE_RO	3
 
+	struct cdev		cdev;
+	struct device		cdev_device;
+
 	struct nvme_fault_inject fault_inject;
 
 };
@@ -658,10 +665,16 @@ void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);
 bool nvme_tryget_ns_head(struct nvme_ns_head *head);
 void nvme_put_ns_head(struct nvme_ns_head *head);
 struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys);
+int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
+		const struct file_operations *fops, struct module *owner);
+void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
 int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 		unsigned int cmd, unsigned long arg);
+long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
 		unsigned int cmd, unsigned long arg);
+long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg);
 long nvme_dev_ioctl(struct file *file, unsigned int cmd,
 		unsigned long arg);
 int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
-- 
2.30.1


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] nvme: introduce generic per-namespace chardev
  2021-04-20 10:07 ` [PATCH] nvme: introduce generic per-namespace chardev Christoph Hellwig
@ 2021-04-20 18:13   ` Kanchan Joshi
  2021-04-20 18:58     ` Keith Busch
  0 siblings, 1 reply; 4+ messages in thread
From: Kanchan Joshi @ 2021-04-20 18:13 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Minwoo Im, Keith Busch, Sagi Grimberg, Javier Gonz??lez, linux-nvme

On Tue, Apr 20, 2021 at 3:37 PM Christoph Hellwig <hch@lst.de> wrote:
>
> From: Minwoo Im <minwoo.im.dev@gmail.com>
>
> Userspace has not been allowed to I/O to device that's failed to
> be initialized.  This patch introduces generic per-namespace character
> device to allow userspace to I/O regardless the block device is there or
> not.

When an application uses this interface to issue passthrough IO, it
needs to know the lba size (at least) to form the command correctly.
But determining lba size would require an admin ioctl.
Do you see there is a way to go about this issue?


-- 
Joshi

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] nvme: introduce generic per-namespace chardev
  2021-04-20 18:13   ` Kanchan Joshi
@ 2021-04-20 18:58     ` Keith Busch
  0 siblings, 0 replies; 4+ messages in thread
From: Keith Busch @ 2021-04-20 18:58 UTC (permalink / raw)
  To: Kanchan Joshi
  Cc: Christoph Hellwig, Minwoo Im, Sagi Grimberg, Javier Gonz??lez,
	linux-nvme

On Tue, Apr 20, 2021 at 11:43:51PM +0530, Kanchan Joshi wrote:
> On Tue, Apr 20, 2021 at 3:37 PM Christoph Hellwig <hch@lst.de> wrote:
> >
> > From: Minwoo Im <minwoo.im.dev@gmail.com>
> >
> > Userspace has not been allowed to I/O to device that's failed to
> > be initialized.  This patch introduces generic per-namespace character
> > device to allow userspace to I/O regardless the block device is there or
> > not.
> 
> When an application uses this interface to issue passthrough IO, it
> needs to know the lba size (at least) to form the command correctly.
> But determining lba size would require an admin ioctl.
> Do you see there is a way to go about this issue?

You'd have to look up the parent controller and issue the command
through that.

If your application doesn't have permissions to open the controller,
then it wouldn't work.

So I guess all applications that can open generic namespaces would need
access to the controller handle. But if you do that, then is there a
point to this indirection anymore?

I have to agree, the ability to query your namespaces settings through
the namespace's handle really does simplify things...

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-20 10:07 nvme generic per-namespace char device Christoph Hellwig
2021-04-20 10:07 ` [PATCH] nvme: introduce generic per-namespace chardev Christoph Hellwig
2021-04-20 18:13   ` Kanchan Joshi
2021-04-20 18:58     ` Keith Busch

Linux-NVME Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nvme/0 linux-nvme/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nvme linux-nvme/ https://lore.kernel.org/linux-nvme \
		linux-nvme@lists.infradead.org
	public-inbox-index linux-nvme

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.infradead.lists.linux-nvme


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git