linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC 0/1] VFIO: Region-specific file descriptors
@ 2019-09-30 23:55 Shawn Anastasio
  2019-09-30 23:55 ` [PATCH RFC 1/1] vfio/pci: Introduce region " Shawn Anastasio
  2019-10-01 15:38 ` [PATCH RFC 0/1] VFIO: Region-specific " Alex Williamson
  0 siblings, 2 replies; 4+ messages in thread
From: Shawn Anastasio @ 2019-09-30 23:55 UTC (permalink / raw)
  To: alex.williamson, kvm; +Cc: cohuck, linux-kernel

This patch adds region file descriptors to VFIO, a simple file descriptor type
that allows read/write/mmap operations on a single region of a VFIO device.

This feature is particularly useful for privileged applications that use VFIO
and wish to share file descriptors with unprivileged applications without
handing over full control of the device. It also allows applications to use
regular offsets in read/write/mmap instead of the region index + offset that
must be used with device file descriptors.

The current implementation is very raw (PCI only, no reference counting which
is probably wrong), but I wanted to get a sense to see if this feature is
desired. If it is, tips on how to implement this more correctly are
appreciated.

Comments welcome!


Shawn Anastasio (1):
  vfio/pci: Introduce region file descriptors

 drivers/vfio/pci/vfio_pci.c         | 105 ++++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_private.h |   5 ++
 include/uapi/linux/vfio.h           |  14 ++++
 3 files changed, 124 insertions(+)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH RFC 1/1] vfio/pci: Introduce region file descriptors
  2019-09-30 23:55 [PATCH RFC 0/1] VFIO: Region-specific file descriptors Shawn Anastasio
@ 2019-09-30 23:55 ` Shawn Anastasio
  2019-10-01 15:38 ` [PATCH RFC 0/1] VFIO: Region-specific " Alex Williamson
  1 sibling, 0 replies; 4+ messages in thread
From: Shawn Anastasio @ 2019-09-30 23:55 UTC (permalink / raw)
  To: alex.williamson, kvm; +Cc: cohuck, linux-kernel

Introduce a new type of VFIO file descriptor that allows
memfd-style semantics for regions of a VFIO device.

Unlike VFIO device file descriptors, region file descriptors
are limited to a single region, and all offsets (read, etc.)
are relative to the start of the region.

This allows for finer granularity when sharing VFIO fds,
as applications can now choose to only share specific regions.

Signed-off-by: Shawn Anastasio <shawn@anastas.io>
---
 drivers/vfio/pci/vfio_pci.c         | 105 ++++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_private.h |   5 ++
 include/uapi/linux/vfio.h           |  14 ++++
 3 files changed, 124 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 02206162eaa9..132ed245cd68 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -27,6 +27,7 @@
 #include <linux/vfio.h>
 #include <linux/vgaarb.h>
 #include <linux/nospec.h>
+#include <linux/anon_inodes.h>
 
 #include "vfio_pci_private.h"
 
@@ -688,6 +689,9 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
 	return 0;
 }
 
+
+static const struct file_operations vfio_region_fops;
+
 static long vfio_pci_ioctl(void *device_data,
 			   unsigned int cmd, unsigned long arg)
 {
@@ -1137,6 +1141,54 @@ static long vfio_pci_ioctl(void *device_data,
 
 		return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
 					  ioeventfd.data, count, ioeventfd.fd);
+	} else if (cmd == VFIO_DEVICE_GET_REGION_FD) {
+		struct pci_dev *pdev = vdev->pdev;
+		u32 index;
+		u32 len;
+		int ret;
+		struct file *filep;
+		struct vfio_pci_region_info *info;
+
+		if (copy_from_user(&index, (void __user *)arg, sizeof(u32)))
+			return -EFAULT;
+
+		/* Don't support non-BAR regions */
+		if (index > VFIO_PCI_BAR5_REGION_INDEX)
+			return -EINVAL;
+
+		len = pci_resource_len(pdev, index);
+		if (!len)
+			return -EINVAL;
+
+		if (!vdev->bar_mmap_supported[index])
+			return -EINVAL;
+
+		info = kzalloc(sizeof(*info), GFP_KERNEL);
+		if (!info)
+			return -ENOMEM;
+
+		info->index = index;
+		info->vdev = vdev;
+
+		ret = get_unused_fd_flags(O_CLOEXEC);
+		if (ret < 0) {
+			kfree(info);
+			return ret;
+		}
+
+		filep = anon_inode_getfile("[vfio-region]", &vfio_region_fops,
+					   info, O_RDWR);
+		if (IS_ERR(filep)) {
+			put_unused_fd(ret);
+			ret = PTR_ERR(filep);
+			kfree(info);
+			return ret;
+		}
+		filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
+
+		fd_install(ret, filep);
+
+		return ret;
 	}
 
 	return -ENOTTY;
@@ -1286,6 +1338,59 @@ static const struct vfio_device_ops vfio_pci_ops = {
 	.request	= vfio_pci_request,
 };
 
+static int vfio_region_fops_release(struct inode *inode, struct file *filep)
+{
+	kfree(filep->private_data);
+	return 0;
+}
+
+static ssize_t vfio_region_fops_read(struct file *filep, char __user *buf,
+				     size_t count, loff_t *ppos)
+{
+	struct vfio_pci_region_info *info = filep->private_data;
+
+	if (*ppos > VFIO_PCI_OFFSET_MASK)
+		return -EINVAL;
+
+	*ppos |= VFIO_PCI_INDEX_TO_OFFSET(info->index);
+
+	return vfio_pci_rw(info->vdev, buf, count, ppos, false);
+}
+
+static ssize_t vfio_region_fops_write(struct file *filep,
+				      const char __user *buf,
+				      size_t count, loff_t *ppos)
+{
+	struct vfio_pci_region_info *info = filep->private_data;
+
+	if (*ppos > VFIO_PCI_OFFSET_MASK)
+		return -EINVAL;
+
+	*ppos |= VFIO_PCI_INDEX_TO_OFFSET(info->index);
+
+	return vfio_pci_rw(info->vdev, (char __user *)buf, count, ppos, true);
+}
+
+static int vfio_region_fops_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+	struct vfio_pci_region_info *info = filep->private_data;
+
+	if (vma->vm_pgoff > ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1))
+		return -EINVAL;
+
+	vma->vm_pgoff |= info->index << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+
+	return vfio_pci_mmap(info->vdev, vma);
+}
+
+static const struct file_operations vfio_region_fops = {
+	.owner = THIS_MODULE,
+	.release = vfio_region_fops_release,
+	.read = vfio_region_fops_read,
+	.write = vfio_region_fops_write,
+	.mmap = vfio_region_fops_mmap
+};
+
 static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
 static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
 
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index ee6ee91718a4..318f42e9faa0 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -124,6 +124,11 @@ struct vfio_pci_device {
 	struct list_head	ioeventfds_list;
 };
 
+struct vfio_pci_region_info {
+	u32 index;
+	struct vfio_pci_device *vdev;
+};
+
 #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
 #define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX)
 #define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9e843a147ead..9f9bafd41093 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -707,6 +707,20 @@ struct vfio_device_ioeventfd {
 
 #define VFIO_DEVICE_IOEVENTFD		_IO(VFIO_TYPE, VFIO_BASE + 16)
 
+/**
+ * VFIO_DEVICE_GET_REGION_FD - _IOW(VFIO_TYPE, VFIO_BASE + 17,
+ *				    __u32)
+ *
+ * Return a new file descriptor for the region specified by the provided
+ * index. The region must have a non-zero size and support mmap.
+ * The returned file descriptor may be used with standard read, write,
+ * and mmap operations. Provided offsets are relative to the region,
+ * unlike device file descriptors.
+ * Return: new file descriptor on success, -errno on failure.
+ */
+#define VFIO_DEVICE_GET_REGION_FD	_IO(VFIO_TYPE, VFIO_BASE + 17)
+
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC 0/1] VFIO: Region-specific file descriptors
  2019-09-30 23:55 [PATCH RFC 0/1] VFIO: Region-specific file descriptors Shawn Anastasio
  2019-09-30 23:55 ` [PATCH RFC 1/1] vfio/pci: Introduce region " Shawn Anastasio
@ 2019-10-01 15:38 ` Alex Williamson
  2019-10-02 21:57   ` Shawn Anastasio
  1 sibling, 1 reply; 4+ messages in thread
From: Alex Williamson @ 2019-10-01 15:38 UTC (permalink / raw)
  To: Shawn Anastasio; +Cc: kvm, cohuck, linux-kernel, Donald Dutile

On Mon, 30 Sep 2019 18:55:32 -0500
Shawn Anastasio <shawn@anastas.io> wrote:

> This patch adds region file descriptors to VFIO, a simple file descriptor type
> that allows read/write/mmap operations on a single region of a VFIO device.
> 
> This feature is particularly useful for privileged applications that use VFIO
> and wish to share file descriptors with unprivileged applications without
> handing over full control of the device.

Such as?  How do we defined "privileged"?  VFIO already allows
"unprivileged applications" to own a device, only file permissions are
necessary for the VFIO group.  Does region level granularity really
allow us to claim that the consumer of this fd doesn't have full
control of the device?  Clearly device ioctls, including interrupts,
and DMA mappings are not granted with only access to a region, but said
unprivileged application may have absolute full control of the device
itself via that region.

> It also allows applications to use
> regular offsets in read/write/mmap instead of the region index + offset that
> must be used with device file descriptors.

How is this actually an issue that needs a solution?

> The current implementation is very raw (PCI only, no reference counting which
> is probably wrong), but I wanted to get a sense to see if this feature is
> desired. If it is, tips on how to implement this more correctly are
> appreciated.

Handling the ownership and life cycle of the region fds is the more
complicated problem.  If an unprivileged user has an mmap to a device
owned by a privileged user, how does it get revoked by the privileged
part of this equation?  How do we decide which regions merit this
support, for instance a device specific region could have just as
viable a use case as a BAR.  Why does this code limit support to
regions supporting mmap but then support read/write as well?

Technically, isn't the extent of functionality provided in this RFC
already available via the PCI resource files in sysfs?

Without a concrete use case, this looks like a solution in search of a
problem.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH RFC 0/1] VFIO: Region-specific file descriptors
  2019-10-01 15:38 ` [PATCH RFC 0/1] VFIO: Region-specific " Alex Williamson
@ 2019-10-02 21:57   ` Shawn Anastasio
  0 siblings, 0 replies; 4+ messages in thread
From: Shawn Anastasio @ 2019-10-02 21:57 UTC (permalink / raw)
  To: Alex Williamson; +Cc: kvm, cohuck, linux-kernel, Donald Dutile

On 10/1/19 10:38 AM, Alex Williamson wrote:
> On Mon, 30 Sep 2019 18:55:32 -0500 Shawn Anastasio <shawn@anastas.io>
> wrote:
> 
>> This patch adds region file descriptors to VFIO, a simple file
>> descriptor type that allows read/write/mmap operations on a single
>> region of a VFIO device.
>> 
>> This feature is particularly useful for privileged applications
>> that use VFIO and wish to share file descriptors with unprivileged
>> applications without handing over full control of the device.
> 
> Such as?  How do we defined "privileged"?  VFIO already allows 
> "unprivileged applications" to own a device, only file permissions
> are necessary for the VFIO group.  Does region level granularity
> really allow us to claim that the consumer of this fd doesn't have
> full control of the device?  Clearly device ioctls, including
> interrupts, and DMA mappings are not granted with only access to a
> region, but said unprivileged application may have absolute full
> control of the device itself via that region.

Yes, that's true - determining whether any control was restricted will
depend on the specifics of the device and region shared.

The use case I had in mind when implementing this was QEMU's ivshmem
device. I'm writing a daemon that uses VFIO to establish a shared
memory channel with the host via ivshmem devices and then passes
the shared memory region to unprivileged clients over unix domain
sockets. In this case, it is beneficial to have a way to only share
BAR 2 of the ivshmem device (the shared memory region) without giving
control over device configuration and interrupts.

It would also perhaps be useful to restrict the read/write/mmap
abilities of a region fd at time of creation, though the patch
as-is doesn't implement that.

>> It also allows applications to use regular offsets in
>> read/write/mmap instead of the region index + offset that must be
>> used with device file descriptors.
> 
> How is this actually an issue that needs a solution?

It allows applications that expect memfd/shm style semantics to
work without modification. In the use case I mentioned, it allows
the unprivileged clients to use any received shared memory fds without
knowledge of VFIO-specific semantics. This means that the code paths for
the host, where regular memfds are passed, and the guest, where VFIO
region fds are passed can be the same.

>> The current implementation is very raw (PCI only, no reference
>> counting which is probably wrong), but I wanted to get a sense to
>> see if this feature is desired. If it is, tips on how to implement
>> this more correctly are appreciated.
> 
> Handling the ownership and life cycle of the region fds is the more 
> complicated problem.  If an unprivileged user has an mmap to a
> device owned by a privileged user, how does it get revoked by the
> privileged part of this equation?

Yes, this is something that I've been thinking about. IIUC, the current
patch results in all region fds being invalidated when the privileged
process drops the device fd, but this may not be the best solution.

Perhaps having region fds bump the device struct reference counts
so that region fds can outlive device fds makes more sense. This
wouldn't allow the privileged process to revoke region access, though.

> How do we decide which regions merit this support, for instance a
> device specific region could have just as viable a use case as a BAR.
> Why does this code limit support to regions supporting mmap but then
> support read/write as well?

This was an arbitrary decision I made while testing and not necessarily
the behavior I wish to keep. Perhaps it would make sense to allow
region fd creation for regions that support any of r, w, mmap.

> Technically, isn't the extent of functionality provided in this RFC 
> already available via the PCI resource files in sysfs?

That's a good point, though having the functionality within the
VFIO framework would be nice as well. There's plenty of functionality
already duplicated between sysfs, UIO, and VFIO.

> Without a concrete use case, this looks like a solution in search of
> a problem.  Thanks,

Hopefully the use case I described above makes sense. This is my first
time writing a PCI driver, so I don't know if this use case is a bit
contrived and not applicable outside my specific application.

I don't think it's too unreasonable to think that there are some
other applications where restricting access to specific regions
would be useful, though.

Thanks,
Shawn

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-10-02 21:57 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-30 23:55 [PATCH RFC 0/1] VFIO: Region-specific file descriptors Shawn Anastasio
2019-09-30 23:55 ` [PATCH RFC 1/1] vfio/pci: Introduce region " Shawn Anastasio
2019-10-01 15:38 ` [PATCH RFC 0/1] VFIO: Region-specific " Alex Williamson
2019-10-02 21:57   ` Shawn Anastasio

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).