[RFC PATCH kernel] vfio-pci: Allow mapping MSIX BAR

* [RFC PATCH kernel] vfio-pci: Allow mapping MSIX BAR
@ 2017-11-22  4:09 Alexey Kardashevskiy
  2017-11-22  4:25 ` David Gibson
  2017-11-22  4:28 ` Alex Williamson
  0 siblings, 2 replies; 16+ messages in thread
From: Alexey Kardashevskiy @ 2017-11-22  4:09 UTC (permalink / raw)
  To: David Gibson
  Cc: Alexey Kardashevskiy, kvm, Yongji Xie, Eric Auger, Alex Williamson

By default VFIO disables mapping of MSIX BAR to the userspace as
the userspace may program it in a way allowing spurious interrupts;
instead the userspace uses the VFIO_DEVICE_SET_IRQS ioctl.

This works fine as long as the system page size equals to the MSIX
alignment requirement which is 4KB. However with a bigger page size
the existing code prohibits mapping non-MSIX parts of a page with MSIX
structures so these parts have to be emulated via slow reads/writes on
a VFIO device fd. If these emulated bits are accessed often, this has
serious impact on performance.

This adds an ioctl to the vfio-pci device which hides the sparse
capability and allows the userspace to map a BAR with MSIX structures.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h           | 15 +++++++++++++++
 drivers/vfio/pci/vfio_pci.c         | 19 +++++++++++++++++--
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index f561ac1..058fa9b 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -90,6 +90,7 @@ struct vfio_pci_device {
 	bool			has_vga;
 	bool			needs_reset;
 	bool			nointx;
+	bool			msix_mmap;
 	struct pci_saved_state	*pci_saved_state;
 	int			refcnt;
 	struct eventfd_ctx	*err_trigger;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index ae46105..60cd4fd 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -502,6 +502,21 @@ struct vfio_pci_hot_reset {
 
 #define VFIO_DEVICE_PCI_HOT_RESET	_IO(VFIO_TYPE, VFIO_BASE + 13)
 
+/**
+ * VFIO_DEVICE_PCI_ALLOW_MSIX_MMAP - _IOW(VFIO_TYPE, VFIO_BASE + 14,
+ *				    struct vfio_pci_allow_msix_mmap)
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+#define VFIO_PCI_ALLOW_MSIX_MMAP	(1 << 0) /* mmap of entire MSIX BAR */
+
+struct vfio_pci_allow_msix_mmap {
+	__u32	argsz;
+	__u32	flags;
+};
+
+#define VFIO_DEVICE_PCI_ALLOW_MSIX_MMAP	_IO(VFIO_TYPE, VFIO_BASE + 14)
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index f041b1a..adba5ab 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -691,7 +691,8 @@ static long vfio_pci_ioctl(void *device_data,
 				     VFIO_REGION_INFO_FLAG_WRITE;
 			if (vdev->bar_mmap_supported[info.index]) {
 				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
-				if (info.index == vdev->msix_bar) {
+				if (info.index == vdev->msix_bar &&
+						!vdev->msix_mmap) {
 					ret = msix_sparse_mmap_cap(vdev, &caps);
 					if (ret)
 						return ret;
@@ -1039,6 +1040,20 @@ static long vfio_pci_ioctl(void *device_data,
 
 		kfree(groups);
 		return ret;
+	} else if (cmd == VFIO_DEVICE_PCI_ALLOW_MSIX_MMAP) {
+		struct vfio_pci_allow_msix_mmap hdr;
+
+		minsz = offsetofend(struct vfio_pci_allow_msix_mmap, flags);
+
+		if (copy_from_user(&hdr, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (hdr.argsz < minsz || hdr.flags & ~VFIO_PCI_ALLOW_MSIX_MMAP)
+			return -EINVAL;
+
+		vdev->msix_mmap = !!(hdr.flags & VFIO_PCI_ALLOW_MSIX_MMAP);
+
+		return 0;
 	}
 
 	return -ENOTTY;
@@ -1122,7 +1137,7 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
 	if (req_start + req_len > phys_len)
 		return -EINVAL;
 
-	if (index == vdev->msix_bar) {
+	if (!vdev->msix_mmap && index == vdev->msix_bar) {
 		/*
 		 * Disallow mmaps overlapping the MSI-X table; users don't
 		 * get to touch this directly.  We could find somewhere
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread