qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 0/2] QEMU: Dynamic trap/untrap of VFIO PCI BARs
@ 2019-12-05  3:44 Yan Zhao
  2019-12-05  3:45 ` [RFC PATCH 1/2] hw/vfio: add a 'disablable' flag to sparse mmaped region Yan Zhao
  2019-12-05  3:45 ` [RFC PATCH 2/2] hw/vfio/pci: init dynamic-trap-bar-info region Yan Zhao
  0 siblings, 2 replies; 3+ messages in thread
From: Yan Zhao @ 2019-12-05  3:44 UTC (permalink / raw)
  To: alex.williamson
  Cc: kevin.tian, Yan Zhao, cohuck, qemu-devel, zhenyuw, shaopeng.he,
	zhi.a.wang

This patchset enables PCI BARs to be dynamically trapped/passthroughed
in response to vendor driver's needs.

To dynamic trap/untrap PCI BARs, 3 info required:
(1) which part of PCI BARs are to be trapped/passthroughed
(2) when to do the trap/passthrough transition
(3) to trap or to passthrough

Patch 1 let vendor driver specify which sparse mmaped subregions are
disablable. Therefore providing the first info.

Patch 2 probes and creates dynamic trap bar info region, whose
"dt_fd" field provides the second info, and
"trap" field provide the third info.

The corresponding kernel implementation is at
https://www.spinics.net/lists/kernel/msg3337337.html.


Yan Zhao (2):
  hw/vfio: add a 'disablable' flag to sparse mmaped region
  hw/vfio/pci: init dynamic-trap-bar-info region

 hw/vfio/common.c              |  28 +++++++-
 hw/vfio/pci.c                 | 117 ++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h                 |   5 ++
 hw/vfio/trace-events          |   4 +-
 include/hw/vfio/vfio-common.h |   2 +
 linux-headers/linux/vfio.h    |  13 ++++
 6 files changed, 167 insertions(+), 2 deletions(-)

-- 
2.17.1



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [RFC PATCH 1/2] hw/vfio: add a 'disablable' flag to sparse mmaped region
  2019-12-05  3:44 [RFC PATCH 0/2] QEMU: Dynamic trap/untrap of VFIO PCI BARs Yan Zhao
@ 2019-12-05  3:45 ` Yan Zhao
  2019-12-05  3:45 ` [RFC PATCH 2/2] hw/vfio/pci: init dynamic-trap-bar-info region Yan Zhao
  1 sibling, 0 replies; 3+ messages in thread
From: Yan Zhao @ 2019-12-05  3:45 UTC (permalink / raw)
  To: alex.williamson
  Cc: kevin.tian, Yan Zhao, cohuck, qemu-devel, zhenyuw, shaopeng.he,
	zhi.a.wang

add a 'disablable' flag to each each sparse mmaped region and this flag is by
default off.

vfio_region_disablable_mmaps_set_enabled() will enable/disable mmapped
subregions if its 'disablable' flag is on.

Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
---
 hw/vfio/common.c              | 28 +++++++++++++++++++++++++++-
 hw/vfio/trace-events          |  3 ++-
 include/hw/vfio/vfio-common.h |  2 ++
 linux-headers/linux/vfio.h    |  2 ++
 4 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6f36b02e3e..79f694dd19 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -883,11 +883,13 @@ static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
     for (i = 0, j = 0; i < sparse->nr_areas; i++) {
         trace_vfio_region_sparse_mmap_entry(i, sparse->areas[i].offset,
                                             sparse->areas[i].offset +
-                                            sparse->areas[i].size);
+                                            sparse->areas[i].size,
+                                            sparse->areas[i].disablable);
 
         if (sparse->areas[i].size) {
             region->mmaps[j].offset = sparse->areas[i].offset;
             region->mmaps[j].size = sparse->areas[i].size;
+            region->mmaps[j].disablable = sparse->areas[i].disablable;
             j++;
         }
     }
@@ -1084,6 +1086,30 @@ void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled)
                                         enabled);
 }
 
+/**
+ * enable/disable vfio regions with mmaped subregions
+ * It only disable mmapped subregions with disablable flag on
+ */
+void vfio_region_disablable_mmaps_set_enabled(VFIORegion *region, bool enabled)
+{
+    int i;
+
+    if (!region->mem) {
+        return;
+    }
+
+    for (i = 0; i < region->nr_mmaps; i++) {
+        if (region->mmaps[i].mmap && region->mmaps[i].disablable) {
+            memory_region_set_enabled(&region->mmaps[i].mem, enabled);
+            trace_vfio_region_disablable_mmaps_set_enabled(
+                    memory_region_name(region->mem),
+                    region->mmaps[i].offset,
+                    region->mmaps[i].offset + region->mmaps[i].size,
+                    enabled);
+        }
+    }
+}
+
 void vfio_reset_handler(void *opaque)
 {
     VFIOGroup *group;
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 414a5e69ec..7b2d07529e 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -113,7 +113,7 @@ vfio_region_finalize(const char *name, int index) "Device %s, region %d"
 vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d"
 vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Region %s unmap [0x%lx - 0x%lx]"
 vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
-vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
+vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end, bool disablable) "sparse entry %d [0x%lx - 0x%lx] disablable %d"
 vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8"
 vfio_dma_unmap_overflow_workaround(void) ""
 
@@ -161,3 +161,4 @@ vfio_load_device_config_state(char *name) " (%s)"
 vfio_load_state(char *name, uint64_t data) " (%s) data 0x%"PRIx64
 vfio_load_state_device_data(char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
 vfio_get_dirty_page_list(char *name, uint64_t start, uint64_t pfn_count, uint64_t page_size) " (%s) start 0x%"PRIx64" pfn_count 0x%"PRIx64 " page size 0x%"PRIx64
+vfio_region_disablable_mmaps_set_enabled(const char *name, unsigned long offset, unsigned long end, bool enabled) "Region %s mmaps [0x%lx - 0x%lx] set to %d"
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 41ff5ebba2..8cfe46c681 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -45,6 +45,7 @@ typedef struct VFIOMmap {
     void *mmap;
     off_t offset;
     size_t size;
+    bool disablable; /* whether this region is able to get diabled */
 } VFIOMmap;
 
 typedef struct VFIORegion {
@@ -187,6 +188,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
                       int index, const char *name);
 int vfio_region_mmap(VFIORegion *region);
 void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
+void vfio_region_disablable_mmaps_set_enabled(VFIORegion *region, bool enabled);
 void vfio_region_unmap(VFIORegion *region);
 void vfio_region_exit(VFIORegion *region);
 void vfio_region_finalize(VFIORegion *region);
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 4bc0236b08..f9f0ea8eda 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -258,6 +258,8 @@ struct vfio_region_info {
 struct vfio_region_sparse_mmap_area {
 	__u64	offset;	/* Offset of mmap'able area within region */
 	__u64	size;	/* Size of mmap'able area */
+	__u32   disablable; /* whether this mmap'able are able to
+					be dynamically disbled */
 };
 
 struct vfio_region_info_cap_sparse_mmap {
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [RFC PATCH 2/2] hw/vfio/pci: init dynamic-trap-bar-info region
  2019-12-05  3:44 [RFC PATCH 0/2] QEMU: Dynamic trap/untrap of VFIO PCI BARs Yan Zhao
  2019-12-05  3:45 ` [RFC PATCH 1/2] hw/vfio: add a 'disablable' flag to sparse mmaped region Yan Zhao
@ 2019-12-05  3:45 ` Yan Zhao
  1 sibling, 0 replies; 3+ messages in thread
From: Yan Zhao @ 2019-12-05  3:45 UTC (permalink / raw)
  To: alex.williamson
  Cc: kevin.tian, Yan Zhao, cohuck, qemu-devel, zhenyuw, shaopeng.he,
	zhi.a.wang

for devices that support device region of type
VFIO_REGION_TYPE_DYNAMIC_TRAP_BAR_INFO and subtype
VFIO_REGION_SUBTYPE_DYNAMIC_TRAP_BAR_INFO, probe and init a
dynamic-trap-bar-info region which holds info of
(1) fd of eventfd,
(2) to trap/untrap of sparse mmaped pci bars.

Vendor driver first should specify device pci bars to be sparse mmapped,
which means those bars are sparsely passthroughed.
And if it wants certain sub-regions to be dynamically trapped, it should
also set 'disablable' flag for those sub-regions.

When vendor driver signals the eventfd, QEMU reads 'trap' field of this
dynamic trap bar info region, then disable/enable disablable subregions
of pci bar regions.

Cc: Kevin Tian <kevin.tian@intel.com>

Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
---
 hw/vfio/pci.c              | 117 +++++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h              |   5 ++
 hw/vfio/trace-events       |   1 +
 linux-headers/linux/vfio.h |  11 ++++
 4 files changed, 134 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c04f4bcfb8..3837f77185 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2638,6 +2638,120 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
     return 0;
 }
 
+static void vfio_dt_notifier_handler(void *opaque)
+{
+    VFIOPCIDevice *vdev = opaque;
+    int i;
+    __u32 dt_state;
+
+    if (!event_notifier_test_and_clear(&vdev->dt_notifier)) {
+        return;
+    }
+
+    if (vdev->dt_offset < 0) {
+        return;
+    }
+
+    if (pread(vdev->vbasedev.fd, &dt_state,
+                sizeof(dt_state),
+                vdev->dt_offset +
+                offsetof(struct vfio_device_dt_bar_info_region, trap))
+            != sizeof(dt_state)) {
+        error_report("vfio failed to read from dt region");
+        return;
+    }
+
+    if (dt_state == vdev->dt_state) {
+        return;
+    }
+
+    for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
+        vfio_region_disablable_mmaps_set_enabled(&vdev->bars[i].region,
+                                                 !dt_state);
+    }
+
+    vdev->dt_state = dt_state;
+
+}
+
+static void vfio_register_dt_notifier(VFIOPCIDevice *vdev)
+{
+    if (vdev->enable_dt) {
+        return;
+    }
+
+    if (event_notifier_init(&vdev->dt_notifier, 0)) {
+        error_report("vfio: unable to init event notifier for dynamic trap");
+        return;
+    }
+
+    qemu_set_fd_handler(event_notifier_get_fd(&vdev->dt_notifier),
+            vfio_dt_notifier_handler, NULL, vdev);
+}
+
+static void vfio_unregister_dt_notifier(VFIOPCIDevice *vdev)
+{
+    if (!vdev->enable_dt) {
+        return;
+    }
+
+    qemu_set_fd_handler(event_notifier_get_fd(&vdev->dt_notifier),
+            NULL, NULL, vdev);
+    event_notifier_cleanup(&vdev->dt_notifier);
+    vdev->enable_dt = false;
+    vdev->dt_offset = -1;
+    vdev->dt_state = false;
+}
+
+/**
+ * init a dynamic trap bar info region
+ * this region is used for qemu to communicate to vendor driver
+ *
+ * if this device region is queried from vendor driver, qemu will
+ * create an eventfd and write fd of this eventfd to dt_fd field of
+ * this region.
+ *
+ * when vendor driver notifys this dt_fd, qemu first read trap field
+ * of this region to get trap/untrap info. Then qemu will disable/enable
+ * mmaped subregions of pci bar regions according to this info.
+ *
+ */
+static void init_dt_region(VFIOPCIDevice *vdev)
+{
+    struct vfio_region_info *reg_info;
+    int ret;
+    uint32_t dt_fd;
+    vdev->dt_state = false;
+
+    ret = vfio_get_dev_region_info(&vdev->vbasedev,
+            VFIO_REGION_TYPE_DYNAMIC_TRAP_BAR_INFO,
+            VFIO_REGION_SUBTYPE_DYNAMIC_TRAP_BAR_INFO,
+            &reg_info);
+    if (ret || reg_info->size < sizeof(dt_fd)) {
+        goto out;
+    }
+
+    vdev->dt_offset = reg_info->offset;
+
+    vfio_register_dt_notifier(vdev);
+    dt_fd = event_notifier_get_fd(&vdev->dt_notifier);
+
+    trace_vfio_init_dt_region(vdev->vbasedev.name, vdev->vendor_id,
+                              vdev->device_id, reg_info->offset,
+                              reg_info->offset + reg_info->size - 1, dt_fd);
+
+    if (pwrite(vdev->vbasedev.fd, &dt_fd,
+                sizeof(dt_fd),
+                vdev->dt_offset) != sizeof(dt_fd)) {
+        error_report("vfio failed to write to dt region");
+        vfio_unregister_dt_notifier(vdev);
+    }
+    vdev->enable_dt = true;
+out:
+    g_free(reg_info);
+}
+
+
 static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
 {
     VFIODevice *vbasedev = &vdev->vbasedev;
@@ -3173,6 +3287,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
                          vdev->vbasedev.name);
     }
 
+    init_dt_region(vdev);
+
     vfio_register_err_notifier(vdev);
     vfio_register_req_notifier(vdev);
     vfio_setup_resetfn_quirk(vdev);
@@ -3214,6 +3330,7 @@ static void vfio_exitfn(PCIDevice *pdev)
 
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
+    vfio_unregister_dt_notifier(vdev);
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     vfio_disable_interrupts(vdev);
     if (vdev->intx.mmap_timer) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 834a90d646..981d81b516 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -168,6 +168,11 @@ typedef struct VFIOPCIDevice {
     bool no_vfio_ioeventfd;
     bool enable_ramfb;
     VFIODisplay *dpy;
+    EventNotifier dt_notifier;
+    bool enable_dt; /*is dynamically trap enabled*/
+    bool dt_state; /* trap or untrap. default state is untrap */
+    off_t dt_offset; /* Offset of dt region within device fd */
+
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 7b2d07529e..b445887d85 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -162,3 +162,4 @@ vfio_load_state(char *name, uint64_t data) " (%s) data 0x%"PRIx64
 vfio_load_state_device_data(char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64
 vfio_get_dirty_page_list(char *name, uint64_t start, uint64_t pfn_count, uint64_t page_size) " (%s) start 0x%"PRIx64" pfn_count 0x%"PRIx64 " page size 0x%"PRIx64
 vfio_region_disablable_mmaps_set_enabled(const char *name, unsigned long offset, unsigned long end, bool enabled) "Region %s mmaps [0x%lx - 0x%lx] set to %d"
+vfio_init_dt_region(char *name, uint32_t vendor_id, uint32_t device_id, unsigned long offset, unsigned long size, uint32_t dt_fd) "(%s %x %x) init dt region [0x%lx - 0x%lx] dt_fd=%d"
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index f9f0ea8eda..1743ce68e5 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -378,6 +378,7 @@ struct vfio_region_gfx_edid {
 #define VFIO_REGION_TYPE_MIGRATION	        (3)
 #define VFIO_REGION_SUBTYPE_MIGRATION	        (1)
 
+
 /**
  * Structure vfio_device_migration_info is placed at 0th offset of
  * VFIO_REGION_SUBTYPE_MIGRATION region to get/set VFIO device related migration
@@ -522,6 +523,16 @@ struct vfio_device_migration_info {
 #define VFIO_DEVICE_DIRTY_PFNS_ALL      (~0ULL)
 } __attribute__((packed));
 
+
+/* Region type and sub-type to hold info to dynamically trap bars */
+#define VFIO_REGION_TYPE_DYNAMIC_TRAP_BAR_INFO          (4)
+#define VFIO_REGION_SUBTYPE_DYNAMIC_TRAP_BAR_INFO       (1)
+
+struct vfio_device_dt_bar_info_region {
+	__u32 dt_fd; /* fd of eventfd to notify qemu trap/untrap bars */
+	__u32 trap;   /* trap/untrap bar regions */
+};
+
 /*
  * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
  * which allows direct access to non-MSIX registers which happened to be within
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-12-05  3:58 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-05  3:44 [RFC PATCH 0/2] QEMU: Dynamic trap/untrap of VFIO PCI BARs Yan Zhao
2019-12-05  3:45 ` [RFC PATCH 1/2] hw/vfio: add a 'disablable' flag to sparse mmaped region Yan Zhao
2019-12-05  3:45 ` [RFC PATCH 2/2] hw/vfio/pci: init dynamic-trap-bar-info region Yan Zhao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).