All of lore.kernel.org
 help / color / mirror / Atom feed
From: Keith Busch <keith.busch@intel.com>
To: Christoph Hellwig <hch@infradead.org>
Cc: Yan Liu <yan@purestorage.com>,
	Matthew Wilcox <willy@linux.intel.com>,
	linux-kernel@vger.kernel.org, linux-nvme@lists.infradead.org
Subject: Re: [PATCH 1/1] NVMe: Do not take nsid while a passthrough IO command is being issued via a block device file descriptor
Date: Fri, 23 Jan 2015 16:22:02 +0000 (UTC)	[thread overview]
Message-ID: <alpine.LNX.2.00.1501231451380.15481@localhost.lm.intel.com> (raw)
In-Reply-To: <20150123075708.GA17232@infradead.org>

On Thu, 22 Jan 2015, Christoph Hellwig wrote:
> On Thu, Jan 22, 2015 at 04:02:08PM -0800, Yan Liu wrote:
>> When a passthrough IO command is issued with a specific block device file descriptor. It should be applied at
>> the namespace which is associated with that block device file descriptor. This patch makes such passthrough
>> command ignore nsid in nvme_passthru_cmd structure. Instead it takes the namespace ID asscoiated with the
>> block device descriptor.
>>
>> Signed-off-by: Yan Liu <yan@purestorage.com>
>
> Please move the code to find the ns into the caller, or even better a
> seaprate helper used by the caller. instead of adding another argument to
> nvme_user_cmd.

The namespace id should be enforced on block devices, but is there a
problem allowing arbitrary commands through the management char device?
I have a need for a pure passthrough, but the proposed patch requires
a matching namespace id all the time.

I wrote and tested the one below to override nsid on block devices,
but doesn't require a visible namespace through the management device.

---
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index cb529e9..bdec1d7 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1682,7 +1682,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
  	return status;
  }

-static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
+static int nvme_user_cmd(struct nvme_dev *dev, struct request_queue *q,
  			struct nvme_passthru_cmd __user *ucmd)
  {
  	struct nvme_passthru_cmd cmd;
@@ -1690,6 +1690,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
  	int status, length;
  	struct nvme_iod *uninitialized_var(iod);
  	unsigned timeout;
+	struct request *req;
+	struct nvme_ns *ns = q->queuedata;

  	if (!capable(CAP_SYS_ADMIN))
  		return -EACCES;
@@ -1699,7 +1701,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
  	memset(&c, 0, sizeof(c));
  	c.common.opcode = cmd.opcode;
  	c.common.flags = cmd.flags;
-	c.common.nsid = cpu_to_le32(cmd.nsid);
+	c.common.nsid = ns ? cpu_to_le32(ns->ns_id) : cpu_to_le32(cmd.nsid);
  	c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
  	c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
  	c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
@@ -1725,21 +1727,15 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,

  	if (length != cmd.data_len)
  		status = -ENOMEM;
-	else if (ns) {
-		struct request *req;
-
-		req = blk_mq_alloc_request(ns->queue, WRITE,
-						(GFP_KERNEL|__GFP_WAIT), false);
-		if (IS_ERR(req))
-			status = PTR_ERR(req);
-		else {
-			status = nvme_submit_sync_cmd(req, &c, &cmd.result,
-								timeout);
-			blk_mq_free_request(req);
-		}
-	} else
-		status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout);

+	req = blk_mq_alloc_request(q, WRITE, (GFP_KERNEL|__GFP_WAIT), false);
+	if (IS_ERR(req)) {
+		status = PTR_ERR(req);
+		goto out;
+	}
+	status = nvme_submit_sync_cmd(req, &c, &cmd.result, timeout);
+	blk_mq_free_request(req);
+ out:
  	if (cmd.data_len) {
  		nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
  		nvme_free_iod(dev, iod);
@@ -1762,9 +1758,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
  		force_successful_syscall_return();
  		return ns->ns_id;
  	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(ns->dev, NULL, (void __user *)arg);
+		return nvme_user_cmd(ns->dev, ns->dev->admin_q, (void __user *)arg);
  	case NVME_IOCTL_IO_CMD:
-		return nvme_user_cmd(ns->dev, ns, (void __user *)arg);
+		return nvme_user_cmd(ns->dev, ns->queue, (void __user *)arg);
  	case NVME_IOCTL_SUBMIT_IO:
  		return nvme_submit_io(ns, (void __user *)arg);
  	case SG_GET_VERSION_NUM:
@@ -2155,6 +2151,17 @@ static int nvme_dev_add(struct nvme_dev *dev)
  	if (blk_mq_alloc_tag_set(&dev->tagset))
  		goto out;

+	dev->io_q = blk_mq_init_queue(&dev->tagset);
+	if (IS_ERR(dev->io_q)) {
+		blk_mq_free_tag_set(&dev->tagset);
+		goto out;
+	}
+	if (!blk_get_queue(dev->io_q)) {
+		blk_cleanup_queue(dev->io_q);
+		blk_mq_free_tag_set(&dev->tagset);
+		goto out;
+	}
+
  	id_ns = mem;
  	for (i = 1; i <= nn; i++) {
  		res = nvme_identify(dev, i, 0, dma_addr);
@@ -2565,6 +2572,7 @@ static void nvme_free_dev(struct kref *kref)
  	nvme_release_instance(dev);
  	blk_mq_free_tag_set(&dev->tagset);
  	blk_put_queue(dev->admin_q);
+	blk_put_queue(dev->io_q);
  	kfree(dev->queues);
  	kfree(dev->entry);
  	kfree(dev);
@@ -2589,16 +2597,12 @@ static int nvme_dev_release(struct inode *inode, struct file *f)
  static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
  {
  	struct nvme_dev *dev = f->private_data;
-	struct nvme_ns *ns;

  	switch (cmd) {
  	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(dev, NULL, (void __user *)arg);
+		return nvme_user_cmd(dev, dev->admin_q, (void __user *)arg);
  	case NVME_IOCTL_IO_CMD:
-		if (list_empty(&dev->namespaces))
-			return -ENOTTY;
-		ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
-		return nvme_user_cmd(dev, ns, (void __user *)arg);
+		return nvme_user_cmd(dev, dev->io_q, (void __user *)arg);
  	default:
  		return -ENOTTY;
  	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 258945f..d3b467b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -74,6 +74,7 @@ struct nvme_dev {
  	struct list_head node;
  	struct nvme_queue **queues;
  	struct request_queue *admin_q;
+	struct request_queue *io_q;
  	struct blk_mq_tag_set tagset;
  	struct blk_mq_tag_set admin_tagset;
  	u32 __iomem *dbs;
--

WARNING: multiple messages have this Message-ID (diff)
From: keith.busch@intel.com (Keith Busch)
Subject: [PATCH 1/1] NVMe: Do not take nsid while a passthrough IO command is being issued via a block device file descriptor
Date: Fri, 23 Jan 2015 16:22:02 +0000 (UTC)	[thread overview]
Message-ID: <alpine.LNX.2.00.1501231451380.15481@localhost.lm.intel.com> (raw)
In-Reply-To: <20150123075708.GA17232@infradead.org>

On Thu, 22 Jan 2015, Christoph Hellwig wrote:
> On Thu, Jan 22, 2015@04:02:08PM -0800, Yan Liu wrote:
>> When a passthrough IO command is issued with a specific block device file descriptor. It should be applied at
>> the namespace which is associated with that block device file descriptor. This patch makes such passthrough
>> command ignore nsid in nvme_passthru_cmd structure. Instead it takes the namespace ID asscoiated with the
>> block device descriptor.
>>
>> Signed-off-by: Yan Liu <yan at purestorage.com>
>
> Please move the code to find the ns into the caller, or even better a
> seaprate helper used by the caller. instead of adding another argument to
> nvme_user_cmd.

The namespace id should be enforced on block devices, but is there a
problem allowing arbitrary commands through the management char device?
I have a need for a pure passthrough, but the proposed patch requires
a matching namespace id all the time.

I wrote and tested the one below to override nsid on block devices,
but doesn't require a visible namespace through the management device.

---
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index cb529e9..bdec1d7 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1682,7 +1682,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
  	return status;
  }

-static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
+static int nvme_user_cmd(struct nvme_dev *dev, struct request_queue *q,
  			struct nvme_passthru_cmd __user *ucmd)
  {
  	struct nvme_passthru_cmd cmd;
@@ -1690,6 +1690,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
  	int status, length;
  	struct nvme_iod *uninitialized_var(iod);
  	unsigned timeout;
+	struct request *req;
+	struct nvme_ns *ns = q->queuedata;

  	if (!capable(CAP_SYS_ADMIN))
  		return -EACCES;
@@ -1699,7 +1701,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
  	memset(&c, 0, sizeof(c));
  	c.common.opcode = cmd.opcode;
  	c.common.flags = cmd.flags;
-	c.common.nsid = cpu_to_le32(cmd.nsid);
+	c.common.nsid = ns ? cpu_to_le32(ns->ns_id) : cpu_to_le32(cmd.nsid);
  	c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
  	c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
  	c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
@@ -1725,21 +1727,15 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,

  	if (length != cmd.data_len)
  		status = -ENOMEM;
-	else if (ns) {
-		struct request *req;
-
-		req = blk_mq_alloc_request(ns->queue, WRITE,
-						(GFP_KERNEL|__GFP_WAIT), false);
-		if (IS_ERR(req))
-			status = PTR_ERR(req);
-		else {
-			status = nvme_submit_sync_cmd(req, &c, &cmd.result,
-								timeout);
-			blk_mq_free_request(req);
-		}
-	} else
-		status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout);

+	req = blk_mq_alloc_request(q, WRITE, (GFP_KERNEL|__GFP_WAIT), false);
+	if (IS_ERR(req)) {
+		status = PTR_ERR(req);
+		goto out;
+	}
+	status = nvme_submit_sync_cmd(req, &c, &cmd.result, timeout);
+	blk_mq_free_request(req);
+ out:
  	if (cmd.data_len) {
  		nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
  		nvme_free_iod(dev, iod);
@@ -1762,9 +1758,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
  		force_successful_syscall_return();
  		return ns->ns_id;
  	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(ns->dev, NULL, (void __user *)arg);
+		return nvme_user_cmd(ns->dev, ns->dev->admin_q, (void __user *)arg);
  	case NVME_IOCTL_IO_CMD:
-		return nvme_user_cmd(ns->dev, ns, (void __user *)arg);
+		return nvme_user_cmd(ns->dev, ns->queue, (void __user *)arg);
  	case NVME_IOCTL_SUBMIT_IO:
  		return nvme_submit_io(ns, (void __user *)arg);
  	case SG_GET_VERSION_NUM:
@@ -2155,6 +2151,17 @@ static int nvme_dev_add(struct nvme_dev *dev)
  	if (blk_mq_alloc_tag_set(&dev->tagset))
  		goto out;

+	dev->io_q = blk_mq_init_queue(&dev->tagset);
+	if (IS_ERR(dev->io_q)) {
+		blk_mq_free_tag_set(&dev->tagset);
+		goto out;
+	}
+	if (!blk_get_queue(dev->io_q)) {
+		blk_cleanup_queue(dev->io_q);
+		blk_mq_free_tag_set(&dev->tagset);
+		goto out;
+	}
+
  	id_ns = mem;
  	for (i = 1; i <= nn; i++) {
  		res = nvme_identify(dev, i, 0, dma_addr);
@@ -2565,6 +2572,7 @@ static void nvme_free_dev(struct kref *kref)
  	nvme_release_instance(dev);
  	blk_mq_free_tag_set(&dev->tagset);
  	blk_put_queue(dev->admin_q);
+	blk_put_queue(dev->io_q);
  	kfree(dev->queues);
  	kfree(dev->entry);
  	kfree(dev);
@@ -2589,16 +2597,12 @@ static int nvme_dev_release(struct inode *inode, struct file *f)
  static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
  {
  	struct nvme_dev *dev = f->private_data;
-	struct nvme_ns *ns;

  	switch (cmd) {
  	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(dev, NULL, (void __user *)arg);
+		return nvme_user_cmd(dev, dev->admin_q, (void __user *)arg);
  	case NVME_IOCTL_IO_CMD:
-		if (list_empty(&dev->namespaces))
-			return -ENOTTY;
-		ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
-		return nvme_user_cmd(dev, ns, (void __user *)arg);
+		return nvme_user_cmd(dev, dev->io_q, (void __user *)arg);
  	default:
  		return -ENOTTY;
  	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 258945f..d3b467b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -74,6 +74,7 @@ struct nvme_dev {
  	struct list_head node;
  	struct nvme_queue **queues;
  	struct request_queue *admin_q;
+	struct request_queue *io_q;
  	struct blk_mq_tag_set tagset;
  	struct blk_mq_tag_set admin_tagset;
  	u32 __iomem *dbs;
--

  reply	other threads:[~2015-01-23 16:22 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-23  0:02 [PATCH 1/1] NVMe: Do not take nsid while a passthrough IO command is being issued via a block device file descriptor Yan Liu
2015-01-23  0:02 ` Yan Liu
2015-01-23  7:57 ` Christoph Hellwig
2015-01-23  7:57   ` Christoph Hellwig
2015-01-23 16:22   ` Keith Busch [this message]
2015-01-23 16:22     ` Keith Busch
2015-01-23 17:27     ` Christoph Hellwig
2015-01-23 17:27       ` Christoph Hellwig
2015-01-23 17:50       ` Keith Busch
2015-01-23 17:50         ` Keith Busch
2015-01-25 14:41         ` Christoph Hellwig
2015-01-25 14:41           ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2015-01-23 23:57 Yan Liu
2015-01-23 23:57 ` Yan Liu
2015-01-25 14:59 ` Christoph Hellwig
2015-01-25 14:59   ` Christoph Hellwig
2015-01-26 18:02   ` Keith Busch
2015-01-26 18:02     ` Keith Busch
2015-01-22  0:28 Yan Liu
2015-01-22  0:28 ` Yan Liu
2015-01-22  0:47 ` Keith Busch
2015-01-22  0:47   ` Keith Busch
2015-01-22  8:45   ` Christoph Hellwig
2015-01-22  8:45     ` Christoph Hellwig
2015-01-22 15:21     ` Keith Busch
2015-01-22 15:21       ` Keith Busch
2015-01-22 15:49       ` Christoph Hellwig
2015-01-22 15:49         ` Christoph Hellwig
2015-01-22 16:58         ` Keith Busch
2015-01-22 16:58           ` Keith Busch
     [not found]   ` <CADMsRTZjajAj682a5FH-AmpphoQ4vw5QxqnJiGEQ+Jg_f7TvoA@mail.gmail.com>
2015-01-22 14:22     ` Keith Busch
2015-01-22 14:22       ` Keith Busch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.LNX.2.00.1501231451380.15481@localhost.lm.intel.com \
    --to=keith.busch@intel.com \
    --cc=hch@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=willy@linux.intel.com \
    --cc=yan@purestorage.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.