* [PATCH RFC 1/1] vfio/pci: Introduce region file descriptors
2019-09-30 23:55 [PATCH RFC 0/1] VFIO: Region-specific file descriptors Shawn Anastasio
@ 2019-09-30 23:55 ` Shawn Anastasio
2019-10-01 15:38 ` [PATCH RFC 0/1] VFIO: Region-specific " Alex Williamson
1 sibling, 0 replies; 4+ messages in thread
From: Shawn Anastasio @ 2019-09-30 23:55 UTC (permalink / raw)
To: alex.williamson, kvm; +Cc: cohuck, linux-kernel
Introduce a new type of VFIO file descriptor that allows
memfd-style semantics for regions of a VFIO device.
Unlike VFIO device file descriptors, region file descriptors
are limited to a single region, and all offsets (read, etc.)
are relative to the start of the region.
This allows for finer granularity when sharing VFIO fds,
as applications can now choose to only share specific regions.
Signed-off-by: Shawn Anastasio <shawn@anastas.io>
---
drivers/vfio/pci/vfio_pci.c | 105 ++++++++++++++++++++++++++++
drivers/vfio/pci/vfio_pci_private.h | 5 ++
include/uapi/linux/vfio.h | 14 ++++
3 files changed, 124 insertions(+)
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 02206162eaa9..132ed245cd68 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -27,6 +27,7 @@
#include <linux/vfio.h>
#include <linux/vgaarb.h>
#include <linux/nospec.h>
+#include <linux/anon_inodes.h>
#include "vfio_pci_private.h"
@@ -688,6 +689,9 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
return 0;
}
+
+static const struct file_operations vfio_region_fops;
+
static long vfio_pci_ioctl(void *device_data,
unsigned int cmd, unsigned long arg)
{
@@ -1137,6 +1141,54 @@ static long vfio_pci_ioctl(void *device_data,
return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
ioeventfd.data, count, ioeventfd.fd);
+ } else if (cmd == VFIO_DEVICE_GET_REGION_FD) {
+ struct pci_dev *pdev = vdev->pdev;
+ u32 index;
+ u32 len;
+ int ret;
+ struct file *filep;
+ struct vfio_pci_region_info *info;
+
+ if (copy_from_user(&index, (void __user *)arg, sizeof(u32)))
+ return -EFAULT;
+
+ /* Don't support non-BAR regions */
+ if (index > VFIO_PCI_BAR5_REGION_INDEX)
+ return -EINVAL;
+
+ len = pci_resource_len(pdev, index);
+ if (!len)
+ return -EINVAL;
+
+ if (!vdev->bar_mmap_supported[index])
+ return -EINVAL;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->index = index;
+ info->vdev = vdev;
+
+ ret = get_unused_fd_flags(O_CLOEXEC);
+ if (ret < 0) {
+ kfree(info);
+ return ret;
+ }
+
+ filep = anon_inode_getfile("[vfio-region]", &vfio_region_fops,
+ info, O_RDWR);
+ if (IS_ERR(filep)) {
+ put_unused_fd(ret);
+ ret = PTR_ERR(filep);
+ kfree(info);
+ return ret;
+ }
+ filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
+
+ fd_install(ret, filep);
+
+ return ret;
}
return -ENOTTY;
@@ -1286,6 +1338,59 @@ static const struct vfio_device_ops vfio_pci_ops = {
.request = vfio_pci_request,
};
+static int vfio_region_fops_release(struct inode *inode, struct file *filep)
+{
+ kfree(filep->private_data);
+ return 0;
+}
+
+static ssize_t vfio_region_fops_read(struct file *filep, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct vfio_pci_region_info *info = filep->private_data;
+
+ if (*ppos > VFIO_PCI_OFFSET_MASK)
+ return -EINVAL;
+
+ *ppos |= VFIO_PCI_INDEX_TO_OFFSET(info->index);
+
+ return vfio_pci_rw(info->vdev, buf, count, ppos, false);
+}
+
+static ssize_t vfio_region_fops_write(struct file *filep,
+ const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct vfio_pci_region_info *info = filep->private_data;
+
+ if (*ppos > VFIO_PCI_OFFSET_MASK)
+ return -EINVAL;
+
+ *ppos |= VFIO_PCI_INDEX_TO_OFFSET(info->index);
+
+ return vfio_pci_rw(info->vdev, (char __user *)buf, count, ppos, true);
+}
+
+static int vfio_region_fops_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+ struct vfio_pci_region_info *info = filep->private_data;
+
+ if (vma->vm_pgoff > ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1))
+ return -EINVAL;
+
+ vma->vm_pgoff |= info->index << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+
+ return vfio_pci_mmap(info->vdev, vma);
+}
+
+static const struct file_operations vfio_region_fops = {
+ .owner = THIS_MODULE,
+ .release = vfio_region_fops_release,
+ .read = vfio_region_fops_read,
+ .write = vfio_region_fops_write,
+ .mmap = vfio_region_fops_mmap
+};
+
static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index ee6ee91718a4..318f42e9faa0 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -124,6 +124,11 @@ struct vfio_pci_device {
struct list_head ioeventfds_list;
};
+struct vfio_pci_region_info {
+ u32 index;
+ struct vfio_pci_device *vdev;
+};
+
#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX)
#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9e843a147ead..9f9bafd41093 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -707,6 +707,20 @@ struct vfio_device_ioeventfd {
#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16)
+/**
+ * VFIO_DEVICE_GET_REGION_FD - _IOW(VFIO_TYPE, VFIO_BASE + 17,
+ * __u32)
+ *
+ * Return a new file descriptor for the region specified by the provided
+ * index. The region must have a non-zero size and support mmap.
+ * The returned file descriptor may be used with standard read, write,
+ * and mmap operations. Provided offsets are relative to the region,
+ * unlike device file descriptors.
+ * Return: new file descriptor on success, -errno on failure.
+ */
+#define VFIO_DEVICE_GET_REGION_FD _IO(VFIO_TYPE, VFIO_BASE + 17)
+
+
/* -------- API for Type1 VFIO IOMMU -------- */
/**
--
2.20.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH RFC 0/1] VFIO: Region-specific file descriptors
2019-09-30 23:55 [PATCH RFC 0/1] VFIO: Region-specific file descriptors Shawn Anastasio
2019-09-30 23:55 ` [PATCH RFC 1/1] vfio/pci: Introduce region " Shawn Anastasio
@ 2019-10-01 15:38 ` Alex Williamson
2019-10-02 21:57 ` Shawn Anastasio
1 sibling, 1 reply; 4+ messages in thread
From: Alex Williamson @ 2019-10-01 15:38 UTC (permalink / raw)
To: Shawn Anastasio; +Cc: kvm, cohuck, linux-kernel, Donald Dutile
On Mon, 30 Sep 2019 18:55:32 -0500
Shawn Anastasio <shawn@anastas.io> wrote:
> This patch adds region file descriptors to VFIO, a simple file descriptor type
> that allows read/write/mmap operations on a single region of a VFIO device.
>
> This feature is particularly useful for privileged applications that use VFIO
> and wish to share file descriptors with unprivileged applications without
> handing over full control of the device.
Such as? How do we defined "privileged"? VFIO already allows
"unprivileged applications" to own a device, only file permissions are
necessary for the VFIO group. Does region level granularity really
allow us to claim that the consumer of this fd doesn't have full
control of the device? Clearly device ioctls, including interrupts,
and DMA mappings are not granted with only access to a region, but said
unprivileged application may have absolute full control of the device
itself via that region.
> It also allows applications to use
> regular offsets in read/write/mmap instead of the region index + offset that
> must be used with device file descriptors.
How is this actually an issue that needs a solution?
> The current implementation is very raw (PCI only, no reference counting which
> is probably wrong), but I wanted to get a sense to see if this feature is
> desired. If it is, tips on how to implement this more correctly are
> appreciated.
Handling the ownership and life cycle of the region fds is the more
complicated problem. If an unprivileged user has an mmap to a device
owned by a privileged user, how does it get revoked by the privileged
part of this equation? How do we decide which regions merit this
support, for instance a device specific region could have just as
viable a use case as a BAR. Why does this code limit support to
regions supporting mmap but then support read/write as well?
Technically, isn't the extent of functionality provided in this RFC
already available via the PCI resource files in sysfs?
Without a concrete use case, this looks like a solution in search of a
problem. Thanks,
Alex
^ permalink raw reply [flat|nested] 4+ messages in thread