linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] nvme: remove unprivileged passthrough support
       [not found] <CGME20231016061151epcas5p1a0e18162b362ffbea754157e99f88995@epcas5p1.samsung.com>
@ 2023-10-16  6:05 ` Kanchan Joshi
  2023-10-16 18:41   ` Keith Busch
  2023-10-27  7:06   ` Shinichiro Kawasaki
  0 siblings, 2 replies; 13+ messages in thread
From: Kanchan Joshi @ 2023-10-16  6:05 UTC (permalink / raw)
  To: hch, kbusch, axboe, sagi
  Cc: linux-nvme, gost.dev, vincentfu, Kanchan Joshi, stable

Passthrough has got a hole that can be exploited to cause kernel memory
corruption. This is about making the device do larger DMA into
short meta/data buffer owned by kernel [1].

As a stopgap measure, disable the support of unprivileged passthrough.

This patch brings back coarse-granular CAP_SYS_ADMIN checks by reverting
following patches:

- 7d9d7d59d44 ("nvme: replace the fmode_t argument to the nvme ioctl handlers with a simple bool")
- 313c08c72ee ("nvme: don't allow unprivileged passthrough on partitions")
- 6f99ac04c46 ("nvme: consult the CSE log page for unprivileged passthrough")
- ea43fceea41 ("nvme: allow unprivileged passthrough of Identify Controller")
- e4fbcf32c86 ("nvme: identify-namespace without CAP_SYS_ADMIN")
- 855b7717f44 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")

[1] https://lore.kernel.org/linux-nvme/20231013051458.39987-1-joshi.k@samsung.com/

CC: stable@vger.kernel.org # 6.2
Fixes: 855b7717f44b1 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
Changes since v1:
- Fix the way "Fixes:" was written before (Greg)

 drivers/nvme/host/ioctl.c | 159 ++++++++------------------------------
 include/linux/nvme.h      |   2 -
 2 files changed, 34 insertions(+), 127 deletions(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index d8ff796fd5f2..788b36e7915a 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -10,80 +10,8 @@
 
 enum {
 	NVME_IOCTL_VEC		= (1 << 0),
-	NVME_IOCTL_PARTITION	= (1 << 1),
 };
 
-static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
-		unsigned int flags, bool open_for_write)
-{
-	u32 effects;
-
-	if (capable(CAP_SYS_ADMIN))
-		return true;
-
-	/*
-	 * Do not allow unprivileged passthrough on partitions, as that allows an
-	 * escape from the containment of the partition.
-	 */
-	if (flags & NVME_IOCTL_PARTITION)
-		return false;
-
-	/*
-	 * Do not allow unprivileged processes to send vendor specific or fabrics
-	 * commands as we can't be sure about their effects.
-	 */
-	if (c->common.opcode >= nvme_cmd_vendor_start ||
-	    c->common.opcode == nvme_fabrics_command)
-		return false;
-
-	/*
-	 * Do not allow unprivileged passthrough of admin commands except
-	 * for a subset of identify commands that contain information required
-	 * to form proper I/O commands in userspace and do not expose any
-	 * potentially sensitive information.
-	 */
-	if (!ns) {
-		if (c->common.opcode == nvme_admin_identify) {
-			switch (c->identify.cns) {
-			case NVME_ID_CNS_NS:
-			case NVME_ID_CNS_CS_NS:
-			case NVME_ID_CNS_NS_CS_INDEP:
-			case NVME_ID_CNS_CS_CTRL:
-			case NVME_ID_CNS_CTRL:
-				return true;
-			}
-		}
-		return false;
-	}
-
-	/*
-	 * Check if the controller provides a Commands Supported and Effects log
-	 * and marks this command as supported.  If not reject unprivileged
-	 * passthrough.
-	 */
-	effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
-	if (!(effects & NVME_CMD_EFFECTS_CSUPP))
-		return false;
-
-	/*
-	 * Don't allow passthrough for command that have intrusive (or unknown)
-	 * effects.
-	 */
-	if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
-			NVME_CMD_EFFECTS_UUID_SEL |
-			NVME_CMD_EFFECTS_SCOPE_MASK))
-		return false;
-
-	/*
-	 * Only allow I/O commands that transfer data to the controller or that
-	 * change the logical block contents if the file descriptor is open for
-	 * writing.
-	 */
-	if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
-		return open_for_write;
-	return true;
-}
-
 /*
  * Convert integer values from ioctl structures to user pointers, silently
  * ignoring the upper bits in the compat case to match behaviour of 32-bit
@@ -335,8 +263,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
 }
 
 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
-		struct nvme_passthru_cmd __user *ucmd, unsigned int flags,
-		bool open_for_write)
+			struct nvme_passthru_cmd __user *ucmd)
 {
 	struct nvme_passthru_cmd cmd;
 	struct nvme_command c;
@@ -344,6 +271,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	u64 result;
 	int status;
 
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
 	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
 		return -EFAULT;
 	if (cmd.flags)
@@ -364,9 +293,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	c.common.cdw14 = cpu_to_le32(cmd.cdw14);
 	c.common.cdw15 = cpu_to_le32(cmd.cdw15);
 
-	if (!nvme_cmd_allowed(ns, &c, 0, open_for_write))
-		return -EACCES;
-
 	if (cmd.timeout_ms)
 		timeout = msecs_to_jiffies(cmd.timeout_ms);
 
@@ -383,14 +309,16 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 }
 
 static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
-		struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags,
-		bool open_for_write)
+			struct nvme_passthru_cmd64 __user *ucmd,
+			unsigned int flags)
 {
 	struct nvme_passthru_cmd64 cmd;
 	struct nvme_command c;
 	unsigned timeout = 0;
 	int status;
 
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
 	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
 		return -EFAULT;
 	if (cmd.flags)
@@ -411,9 +339,6 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	c.common.cdw14 = cpu_to_le32(cmd.cdw14);
 	c.common.cdw15 = cpu_to_le32(cmd.cdw15);
 
-	if (!nvme_cmd_allowed(ns, &c, flags, open_for_write))
-		return -EACCES;
-
 	if (cmd.timeout_ms)
 		timeout = msecs_to_jiffies(cmd.timeout_ms);
 
@@ -563,6 +488,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	void *meta = NULL;
 	int ret;
 
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	c.common.opcode = READ_ONCE(cmd->opcode);
 	c.common.flags = READ_ONCE(cmd->flags);
 	if (c.common.flags)
@@ -584,9 +512,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
 	c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
 
-	if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE))
-		return -EACCES;
-
 	d.metadata = READ_ONCE(cmd->metadata);
 	d.addr = READ_ONCE(cmd->addr);
 	d.data_len = READ_ONCE(cmd->data_len);
@@ -643,13 +568,13 @@ static bool is_ctrl_ioctl(unsigned int cmd)
 }
 
 static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
-		void __user *argp, bool open_for_write)
+		void __user *argp)
 {
 	switch (cmd) {
 	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write);
+		return nvme_user_cmd(ctrl, NULL, argp);
 	case NVME_IOCTL_ADMIN64_CMD:
-		return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write);
+		return nvme_user_cmd64(ctrl, NULL, argp, 0);
 	default:
 		return sed_ioctl(ctrl->opal_dev, cmd, argp);
 	}
@@ -674,14 +599,16 @@ struct nvme_user_io32 {
 #endif /* COMPAT_FOR_U64_ALIGNMENT */
 
 static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
-		void __user *argp, unsigned int flags, bool open_for_write)
+		void __user *argp)
 {
+	unsigned int flags = 0;
+
 	switch (cmd) {
 	case NVME_IOCTL_ID:
 		force_successful_syscall_return();
 		return ns->head->ns_id;
 	case NVME_IOCTL_IO_CMD:
-		return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write);
+		return nvme_user_cmd(ns->ctrl, ns, argp);
 	/*
 	 * struct nvme_user_io can have different padding on some 32-bit ABIs.
 	 * Just accept the compat version as all fields that are used are the
@@ -696,39 +623,32 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
 		flags |= NVME_IOCTL_VEC;
 		fallthrough;
 	case NVME_IOCTL_IO64_CMD:
-		return nvme_user_cmd64(ns->ctrl, ns, argp, flags,
-				       open_for_write);
+		return nvme_user_cmd64(ns->ctrl, ns, argp, flags);
 	default:
 		return -ENOTTY;
 	}
 }
 
-int nvme_ioctl(struct block_device *bdev, blk_mode_t mode,
+int nvme_ioctl(struct block_device *bdev, fmode_t mode,
 		unsigned int cmd, unsigned long arg)
 {
 	struct nvme_ns *ns = bdev->bd_disk->private_data;
-	bool open_for_write = mode & BLK_OPEN_WRITE;
 	void __user *argp = (void __user *)arg;
-	unsigned int flags = 0;
-
-	if (bdev_is_partition(bdev))
-		flags |= NVME_IOCTL_PARTITION;
 
 	if (is_ctrl_ioctl(cmd))
-		return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write);
-	return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write);
+		return nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
+	return nvme_ns_ioctl(ns, cmd, argp);
 }
 
 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct nvme_ns *ns =
 		container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
-	bool open_for_write = file->f_mode & FMODE_WRITE;
 	void __user *argp = (void __user *)arg;
 
 	if (is_ctrl_ioctl(cmd))
-		return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write);
-	return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write);
+		return nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
+	return nvme_ns_ioctl(ns, cmd, argp);
 }
 
 static int nvme_uring_cmd_checks(unsigned int issue_flags)
@@ -792,8 +712,7 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
 }
 #ifdef CONFIG_NVME_MULTIPATH
 static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
-		void __user *argp, struct nvme_ns_head *head, int srcu_idx,
-		bool open_for_write)
+		void __user *argp, struct nvme_ns_head *head, int srcu_idx)
 	__releases(&head->srcu)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
@@ -801,7 +720,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
 
 	nvme_get_ctrl(ns->ctrl);
 	srcu_read_unlock(&head->srcu, srcu_idx);
-	ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write);
+	ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
 
 	nvme_put_ctrl(ctrl);
 	return ret;
@@ -811,14 +730,9 @@ int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
 		unsigned int cmd, unsigned long arg)
 {
 	struct nvme_ns_head *head = bdev->bd_disk->private_data;
-	bool open_for_write = mode & BLK_OPEN_WRITE;
 	void __user *argp = (void __user *)arg;
 	struct nvme_ns *ns;
 	int srcu_idx, ret = -EWOULDBLOCK;
-	unsigned int flags = 0;
-
-	if (bdev_is_partition(bdev))
-		flags |= NVME_IOCTL_PARTITION;
 
 	srcu_idx = srcu_read_lock(&head->srcu);
 	ns = nvme_find_path(head);
@@ -831,10 +745,9 @@ int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
 	 * deadlock when deleting namespaces using the passthrough interface.
 	 */
 	if (is_ctrl_ioctl(cmd))
-		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
-					       open_for_write);
+		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
 
-	ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write);
+	ret = nvme_ns_ioctl(ns, cmd, argp);
 out_unlock:
 	srcu_read_unlock(&head->srcu, srcu_idx);
 	return ret;
@@ -843,7 +756,6 @@ int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
 		unsigned long arg)
 {
-	bool open_for_write = file->f_mode & FMODE_WRITE;
 	struct cdev *cdev = file_inode(file)->i_cdev;
 	struct nvme_ns_head *head =
 		container_of(cdev, struct nvme_ns_head, cdev);
@@ -857,10 +769,9 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
 		goto out_unlock;
 
 	if (is_ctrl_ioctl(cmd))
-		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
-				open_for_write);
+		return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
 
-	ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write);
+	ret = nvme_ns_ioctl(ns, cmd, argp);
 out_unlock:
 	srcu_read_unlock(&head->srcu, srcu_idx);
 	return ret;
@@ -909,8 +820,7 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
 	return ret;
 }
 
-static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
-		bool open_for_write)
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
 {
 	struct nvme_ns *ns;
 	int ret;
@@ -934,7 +844,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
 	kref_get(&ns->kref);
 	up_read(&ctrl->namespaces_rwsem);
 
-	ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write);
+	ret = nvme_user_cmd(ctrl, ns, argp);
 	nvme_put_ns(ns);
 	return ret;
 
@@ -946,17 +856,16 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
 long nvme_dev_ioctl(struct file *file, unsigned int cmd,
 		unsigned long arg)
 {
-	bool open_for_write = file->f_mode & FMODE_WRITE;
 	struct nvme_ctrl *ctrl = file->private_data;
 	void __user *argp = (void __user *)arg;
 
 	switch (cmd) {
 	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write);
+		return nvme_user_cmd(ctrl, NULL, argp);
 	case NVME_IOCTL_ADMIN64_CMD:
-		return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write);
+		return nvme_user_cmd64(ctrl, NULL, argp, 0);
 	case NVME_IOCTL_IO_CMD:
-		return nvme_dev_user_cmd(ctrl, argp, open_for_write);
+		return nvme_dev_user_cmd(ctrl, argp);
 	case NVME_IOCTL_RESET:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 26dd3f859d9d..df3e2c619448 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -642,7 +642,6 @@ enum {
 	NVME_CMD_EFFECTS_CCC		= 1 << 4,
 	NVME_CMD_EFFECTS_CSE_MASK	= GENMASK(18, 16),
 	NVME_CMD_EFFECTS_UUID_SEL	= 1 << 19,
-	NVME_CMD_EFFECTS_SCOPE_MASK	= GENMASK(31, 20),
 };
 
 struct nvme_effects_log {
@@ -834,7 +833,6 @@ enum nvme_opcode {
 	nvme_cmd_zone_mgmt_send	= 0x79,
 	nvme_cmd_zone_mgmt_recv	= 0x7a,
 	nvme_cmd_zone_append	= 0x7d,
-	nvme_cmd_vendor_start	= 0x80,
 };
 
 #define nvme_opcode_name(opcode)	{ opcode, #opcode }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-16  6:05 ` [PATCH v2] nvme: remove unprivileged passthrough support Kanchan Joshi
@ 2023-10-16 18:41   ` Keith Busch
  2023-10-18 21:26     ` Keith Busch
  2023-10-27  7:06   ` Shinichiro Kawasaki
  1 sibling, 1 reply; 13+ messages in thread
From: Keith Busch @ 2023-10-16 18:41 UTC (permalink / raw)
  To: Kanchan Joshi; +Cc: hch, axboe, sagi, linux-nvme, gost.dev, vincentfu, stable

On Mon, Oct 16, 2023 at 11:35:19AM +0530, Kanchan Joshi wrote:
> Passthrough has got a hole that can be exploited to cause kernel memory
> corruption. This is about making the device do larger DMA into
> short meta/data buffer owned by kernel [1].
> 
> As a stopgap measure, disable the support of unprivileged passthrough.
> 
> This patch brings back coarse-granular CAP_SYS_ADMIN checks by reverting
> following patches:
> 
> - 7d9d7d59d44 ("nvme: replace the fmode_t argument to the nvme ioctl handlers with a simple bool")
> - 313c08c72ee ("nvme: don't allow unprivileged passthrough on partitions")
> - 6f99ac04c46 ("nvme: consult the CSE log page for unprivileged passthrough")
> - ea43fceea41 ("nvme: allow unprivileged passthrough of Identify Controller")
> - e4fbcf32c86 ("nvme: identify-namespace without CAP_SYS_ADMIN")
> - 855b7717f44 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
> 
> [1] https://lore.kernel.org/linux-nvme/20231013051458.39987-1-joshi.k@samsung.com/
> 
> CC: stable@vger.kernel.org # 6.2
> Fixes: 855b7717f44b1 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
> 
> Suggested-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>


Applied for nvme-6.6.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-16 18:41   ` Keith Busch
@ 2023-10-18 21:26     ` Keith Busch
  2023-10-19  5:04       ` Christoph Hellwig
  0 siblings, 1 reply; 13+ messages in thread
From: Keith Busch @ 2023-10-18 21:26 UTC (permalink / raw)
  To: Kanchan Joshi; +Cc: hch, axboe, sagi, linux-nvme, gost.dev, vincentfu, stable

On further consideration and some offline chats, I believe this large
change is a bit too late for 6.6. I think this should wait for 6.7 (and
stable), hopefully preserving non-root access in some sane capacity.
It's backed out now, and current nvme-6.6 PR does not include this
patch.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-18 21:26     ` Keith Busch
@ 2023-10-19  5:04       ` Christoph Hellwig
  2023-10-20 14:25         ` Keith Busch
  0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2023-10-19  5:04 UTC (permalink / raw)
  To: Keith Busch
  Cc: Kanchan Joshi, hch, axboe, sagi, linux-nvme, gost.dev, vincentfu, stable

On Wed, Oct 18, 2023 at 03:26:20PM -0600, Keith Busch wrote:
> On further consideration and some offline chats, I believe this large
> change is a bit too late for 6.6. I think this should wait for 6.7 (and
> stable), hopefully preserving non-root access in some sane capacity.
> It's backed out now, and current nvme-6.6 PR does not include this
> patch.

Umm, what are the offlist discussions?  We leave an exploitable hole
in, so I don't think waiting any longer is an option.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-19  5:04       ` Christoph Hellwig
@ 2023-10-20 14:25         ` Keith Busch
  2023-10-23  5:44           ` Christoph Hellwig
  0 siblings, 1 reply; 13+ messages in thread
From: Keith Busch @ 2023-10-20 14:25 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Kanchan Joshi, axboe, sagi, linux-nvme, gost.dev, vincentfu, stable

On Thu, Oct 19, 2023 at 07:04:11AM +0200, Christoph Hellwig wrote:
> On Wed, Oct 18, 2023 at 03:26:20PM -0600, Keith Busch wrote:
> > On further consideration and some offline chats, I believe this large
> > change is a bit too late for 6.6. I think this should wait for 6.7 (and
> > stable), hopefully preserving non-root access in some sane capacity.
> > It's backed out now, and current nvme-6.6 PR does not include this
> > patch.
> 
> Umm, what are the offlist discussions?  We leave an exploitable hole
> in, so I don't think waiting any longer is an option.

Jens repeated what he told me offline on this thread here, and dropped
the pull request that contained this patch:

  https://lists.infradead.org/pipermail/linux-nvme/2023-October/042684.html

BTW, don't you still need someone with root access to change the
permissions on the device handle in order for an unpriveledged user to
reach this hole? It's not open access by default, you still have to
opt-in.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-20 14:25         ` Keith Busch
@ 2023-10-23  5:44           ` Christoph Hellwig
  2023-10-23 15:18             ` Keith Busch
  0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2023-10-23  5:44 UTC (permalink / raw)
  To: Keith Busch
  Cc: Christoph Hellwig, Kanchan Joshi, axboe, sagi, linux-nvme,
	gost.dev, vincentfu, stable

On Fri, Oct 20, 2023 at 08:25:49AM -0600, Keith Busch wrote:
> Jens repeated what he told me offline on this thread here, and dropped
> the pull request that contained this patch:
> 
>   https://lists.infradead.org/pipermail/linux-nvme/2023-October/042684.html
> 
> BTW, don't you still need someone with root access to change the
> permissions on the device handle in order for an unpriveledged user to
> reach this hole? It's not open access by default, you still have to
> opt-in.

Yes, you need someone with root access to change the device node
persmissions.  But we allowed that under the assumption it is safe
to do so, which it turns out it is not.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-23  5:44           ` Christoph Hellwig
@ 2023-10-23 15:18             ` Keith Busch
  2023-10-24  7:07               ` Christoph Hellwig
  0 siblings, 1 reply; 13+ messages in thread
From: Keith Busch @ 2023-10-23 15:18 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Kanchan Joshi, axboe, sagi, linux-nvme, gost.dev, vincentfu, stable

On Mon, Oct 23, 2023 at 07:44:56AM +0200, Christoph Hellwig wrote:
> Yes, you need someone with root access to change the device node
> persmissions.  But we allowed that under the assumption it is safe
> to do so, which it turns out it is not.

Okay, iiuc, while we have to opt-in to allow this hole, we need another
option for users to set to allow this usage because it's not safe.

Here are two options I have considered for unpriveledged access, please
let me know if you have others or thoughts.

  Restrict access for processes with CAP_SYS_RAWIO, which can be granted
  to non-root users. This cap is already used in scsi subsystem, too.

  A per nvme-generic namespace sysfs attribute that only root can toggle
  that would override any caps and just rely on access permissions.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-23 15:18             ` Keith Busch
@ 2023-10-24  7:07               ` Christoph Hellwig
  2023-10-26 14:31                 ` Kanchan Joshi
  0 siblings, 1 reply; 13+ messages in thread
From: Christoph Hellwig @ 2023-10-24  7:07 UTC (permalink / raw)
  To: Keith Busch
  Cc: Christoph Hellwig, Kanchan Joshi, axboe, sagi, linux-nvme,
	gost.dev, vincentfu, stable

On Mon, Oct 23, 2023 at 09:18:36AM -0600, Keith Busch wrote:
> On Mon, Oct 23, 2023 at 07:44:56AM +0200, Christoph Hellwig wrote:
> > Yes, you need someone with root access to change the device node
> > persmissions.  But we allowed that under the assumption it is safe
> > to do so, which it turns out it is not.
> 
> Okay, iiuc, while we have to opt-in to allow this hole, we need another
> option for users to set to allow this usage because it's not safe.
> 
> Here are two options I have considered for unpriveledged access, please
> let me know if you have others or thoughts.
> 
>   Restrict access for processes with CAP_SYS_RAWIO, which can be granted
>   to non-root users. This cap is already used in scsi subsystem, too.

Well, that's sensible in general.

>   A per nvme-generic namespace sysfs attribute that only root can toggle
>   that would override any caps and just rely on access permissions.

And that I'm not confident about as long as we can only use the broken
PRP scheme on NVMe.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-24  7:07               ` Christoph Hellwig
@ 2023-10-26 14:31                 ` Kanchan Joshi
  2023-10-26 15:15                   ` Keith Busch
  0 siblings, 1 reply; 13+ messages in thread
From: Kanchan Joshi @ 2023-10-26 14:31 UTC (permalink / raw)
  To: Christoph Hellwig, Keith Busch
  Cc: axboe, sagi, linux-nvme, gost.dev, vincentfu, stable

On 10/24/2023 12:37 PM, Christoph Hellwig wrote:
> On Mon, Oct 23, 2023 at 09:18:36AM -0600, Keith Busch wrote:
>> On Mon, Oct 23, 2023 at 07:44:56AM +0200, Christoph Hellwig wrote:
>>> Yes, you need someone with root access to change the device node
>>> persmissions.  But we allowed that under the assumption it is safe
>>> to do so, which it turns out it is not.
>>
>> Okay, iiuc, while we have to opt-in to allow this hole, we need another
>> option for users to set to allow this usage because it's not safe.
>>
>> Here are two options I have considered for unpriveledged access, please
>> let me know if you have others or thoughts.
>>
>>    Restrict access for processes with CAP_SYS_RAWIO, which can be granted
>>    to non-root users. This cap is already used in scsi subsystem, too.
> 
> Well, that's sensible in general.

With that someone needs to make each binary (that wants to use 
passthrough) capability-aware by doing:

setcap "CAP_SYS_RAWIO=ep" <binary>

Seems extra work for admins (or distros if they need to ship the binary 
that way).


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-26 14:31                 ` Kanchan Joshi
@ 2023-10-26 15:15                   ` Keith Busch
  0 siblings, 0 replies; 13+ messages in thread
From: Keith Busch @ 2023-10-26 15:15 UTC (permalink / raw)
  To: Kanchan Joshi
  Cc: Christoph Hellwig, axboe, sagi, linux-nvme, gost.dev, vincentfu, stable

On Thu, Oct 26, 2023 at 08:01:36PM +0530, Kanchan Joshi wrote:
> On 10/24/2023 12:37 PM, Christoph Hellwig wrote:
> > On Mon, Oct 23, 2023 at 09:18:36AM -0600, Keith Busch wrote:
> >> On Mon, Oct 23, 2023 at 07:44:56AM +0200, Christoph Hellwig wrote:
> >>> Yes, you need someone with root access to change the device node
> >>> persmissions.  But we allowed that under the assumption it is safe
> >>> to do so, which it turns out it is not.
> >>
> >> Okay, iiuc, while we have to opt-in to allow this hole, we need another
> >> option for users to set to allow this usage because it's not safe.
> >>
> >> Here are two options I have considered for unpriveledged access, please
> >> let me know if you have others or thoughts.
> >>
> >>    Restrict access for processes with CAP_SYS_RAWIO, which can be granted
> >>    to non-root users. This cap is already used in scsi subsystem, too.
> > 
> > Well, that's sensible in general.
> 
> With that someone needs to make each binary (that wants to use 
> passthrough) capability-aware by doing:
> 
> setcap "CAP_SYS_RAWIO=ep" <binary>
> 
> Seems extra work for admins (or distros if they need to ship the binary 
> that way).

The way I usually see it done is add the capability to the user so any
binary executed by that user already has the caps.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-16  6:05 ` [PATCH v2] nvme: remove unprivileged passthrough support Kanchan Joshi
  2023-10-16 18:41   ` Keith Busch
@ 2023-10-27  7:06   ` Shinichiro Kawasaki
  2023-10-27  7:15     ` Kanchan Joshi
  1 sibling, 1 reply; 13+ messages in thread
From: Shinichiro Kawasaki @ 2023-10-27  7:06 UTC (permalink / raw)
  To: Kanchan Joshi
  Cc: hch, kbusch, axboe, sagi, linux-nvme, gost.dev, vincentfu, stable

On Oct 16, 2023 / 11:35, Kanchan Joshi wrote:
> Passthrough has got a hole that can be exploited to cause kernel memory
> corruption. This is about making the device do larger DMA into
> short meta/data buffer owned by kernel [1].
> 
> As a stopgap measure, disable the support of unprivileged passthrough.
> 
> This patch brings back coarse-granular CAP_SYS_ADMIN checks by reverting
> following patches:
> 
> - 7d9d7d59d44 ("nvme: replace the fmode_t argument to the nvme ioctl handlers with a simple bool")
> - 313c08c72ee ("nvme: don't allow unprivileged passthrough on partitions")
> - 6f99ac04c46 ("nvme: consult the CSE log page for unprivileged passthrough")
> - ea43fceea41 ("nvme: allow unprivileged passthrough of Identify Controller")
> - e4fbcf32c86 ("nvme: identify-namespace without CAP_SYS_ADMIN")
> - 855b7717f44 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
> 
> [1] https://lore.kernel.org/linux-nvme/20231013051458.39987-1-joshi.k@samsung.com/

This change looks affecting the blktests test case nvme/046. Should we adjust
the test case for the coarse-granular CAP_SYS_ADMIN checks?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-27  7:06   ` Shinichiro Kawasaki
@ 2023-10-27  7:15     ` Kanchan Joshi
  2023-10-27  7:49       ` Shinichiro Kawasaki
  0 siblings, 1 reply; 13+ messages in thread
From: Kanchan Joshi @ 2023-10-27  7:15 UTC (permalink / raw)
  To: Shinichiro Kawasaki
  Cc: hch, kbusch, axboe, sagi, linux-nvme, gost.dev, vincentfu, stable

On 10/27/2023 12:36 PM, Shinichiro Kawasaki wrote:
> On Oct 16, 2023 / 11:35, Kanchan Joshi wrote:
>> Passthrough has got a hole that can be exploited to cause kernel memory
>> corruption. This is about making the device do larger DMA into
>> short meta/data buffer owned by kernel [1].
>>
>> As a stopgap measure, disable the support of unprivileged passthrough.
>>
>> This patch brings back coarse-granular CAP_SYS_ADMIN checks by reverting
>> following patches:
>>
>> - 7d9d7d59d44 ("nvme: replace the fmode_t argument to the nvme ioctl handlers with a simple bool")
>> - 313c08c72ee ("nvme: don't allow unprivileged passthrough on partitions")
>> - 6f99ac04c46 ("nvme: consult the CSE log page for unprivileged passthrough")
>> - ea43fceea41 ("nvme: allow unprivileged passthrough of Identify Controller")
>> - e4fbcf32c86 ("nvme: identify-namespace without CAP_SYS_ADMIN")
>> - 855b7717f44 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
>>
>> [1] https://lore.kernel.org/linux-nvme/20231013051458.39987-1-joshi.k@samsung.com/
> 
> This change looks affecting the blktests test case nvme/046. Should we adjust
> the test case for the coarse-granular CAP_SYS_ADMIN checks?

Nothing to adjust in the test, as there is no change in the kernel (at 
this point). I have made a note to revisit the test if anything changes.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2] nvme: remove unprivileged passthrough support
  2023-10-27  7:15     ` Kanchan Joshi
@ 2023-10-27  7:49       ` Shinichiro Kawasaki
  0 siblings, 0 replies; 13+ messages in thread
From: Shinichiro Kawasaki @ 2023-10-27  7:49 UTC (permalink / raw)
  To: Kanchan Joshi
  Cc: hch, kbusch, axboe, sagi, linux-nvme, gost.dev, vincentfu, stable

On Oct 27, 2023 / 12:45, Kanchan Joshi wrote:
> On 10/27/2023 12:36 PM, Shinichiro Kawasaki wrote:
> > On Oct 16, 2023 / 11:35, Kanchan Joshi wrote:
> >> Passthrough has got a hole that can be exploited to cause kernel memory
> >> corruption. This is about making the device do larger DMA into
> >> short meta/data buffer owned by kernel [1].
> >>
> >> As a stopgap measure, disable the support of unprivileged passthrough.
> >>
> >> This patch brings back coarse-granular CAP_SYS_ADMIN checks by reverting
> >> following patches:
> >>
> >> - 7d9d7d59d44 ("nvme: replace the fmode_t argument to the nvme ioctl handlers with a simple bool")
> >> - 313c08c72ee ("nvme: don't allow unprivileged passthrough on partitions")
> >> - 6f99ac04c46 ("nvme: consult the CSE log page for unprivileged passthrough")
> >> - ea43fceea41 ("nvme: allow unprivileged passthrough of Identify Controller")
> >> - e4fbcf32c86 ("nvme: identify-namespace without CAP_SYS_ADMIN")
> >> - 855b7717f44 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
> >>
> >> [1] https://lore.kernel.org/linux-nvme/20231013051458.39987-1-joshi.k@samsung.com/
> > 
> > This change looks affecting the blktests test case nvme/046. Should we adjust
> > the test case for the coarse-granular CAP_SYS_ADMIN checks?
> 
> Nothing to adjust in the test, as there is no change in the kernel (at 
> this point). I have made a note to revisit the test if anything changes.

Alright, thanks for the care.


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2023-10-27  7:49 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20231016061151epcas5p1a0e18162b362ffbea754157e99f88995@epcas5p1.samsung.com>
2023-10-16  6:05 ` [PATCH v2] nvme: remove unprivileged passthrough support Kanchan Joshi
2023-10-16 18:41   ` Keith Busch
2023-10-18 21:26     ` Keith Busch
2023-10-19  5:04       ` Christoph Hellwig
2023-10-20 14:25         ` Keith Busch
2023-10-23  5:44           ` Christoph Hellwig
2023-10-23 15:18             ` Keith Busch
2023-10-24  7:07               ` Christoph Hellwig
2023-10-26 14:31                 ` Kanchan Joshi
2023-10-26 15:15                   ` Keith Busch
2023-10-27  7:06   ` Shinichiro Kawasaki
2023-10-27  7:15     ` Kanchan Joshi
2023-10-27  7:49       ` Shinichiro Kawasaki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).