All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: qemu-devel@nongnu.org
Cc: eric.auger@redhat.com, peterx@redhat.com, kvm@vger.kernel.org
Subject: [PATCH v2 4/4] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly
Date: Tue, 01 May 2018 10:43:46 -0600	[thread overview]
Message-ID: <20180501164346.28940.93328.stgit@gimli.home> (raw)
In-Reply-To: <20180501162901.28940.1075.stgit@gimli.home>

With vfio ioeventfd support, we can program vfio-pci to perform a
specified BAR write when an eventfd is triggered.  This allows the
KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding
userspace handling for these events.  On the same micro-benchmark
where the ioeventfd got us to almost 90% of performance versus
disabling the GeForce quirks, this gets us to within 95%.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/pci-quirks.c |   50 +++++++++++++++++++++++++++++++++++++++++++-------
 hw/vfio/pci.c        |    2 ++
 hw/vfio/pci.h        |    2 ++
 hw/vfio/trace-events |    2 +-
 4 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 4cedc733bc0a..94be27dd0a3b 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -16,6 +16,7 @@
 #include "qemu/range.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
+#include <sys/ioctl.h>
 #include "hw/nvram/fw_cfg.h"
 #include "pci.h"
 #include "trace.h"
@@ -287,13 +288,31 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
     return quirk;
 }
 
-static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
+static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
 {
     QLIST_REMOVE(ioeventfd, next);
+
     memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
                               ioeventfd->match_data, ioeventfd->data,
                               &ioeventfd->e);
-    qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), NULL, NULL, NULL);
+
+    if (ioeventfd->vfio) {
+        struct vfio_device_ioeventfd vfio_ioeventfd;
+
+        vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
+        vfio_ioeventfd.flags = ioeventfd->size;
+        vfio_ioeventfd.data = ioeventfd->data;
+        vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
+                                ioeventfd->region_addr;
+        vfio_ioeventfd.fd = -1;
+
+        ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
+
+    } else {
+        qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
+                            NULL, NULL, NULL);
+    }
+
     event_notifier_cleanup(&ioeventfd->e);
     trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
                               (uint64_t)ioeventfd->addr, ioeventfd->size,
@@ -307,7 +326,7 @@ static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
 
     QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
         if (ioeventfd->dynamic) {
-            vfio_ioeventfd_exit(ioeventfd);
+            vfio_ioeventfd_exit(vdev, ioeventfd);
         }
     }
 }
@@ -361,13 +380,30 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
     ioeventfd->region = region;
     ioeventfd->region_addr = region_addr;
 
-    qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
-                        vfio_ioeventfd_handler, NULL, ioeventfd);
+    if (!vdev->no_vfio_ioeventfd) {
+        struct vfio_device_ioeventfd vfio_ioeventfd;
+
+        vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
+        vfio_ioeventfd.flags = ioeventfd->size;
+        vfio_ioeventfd.data = ioeventfd->data;
+        vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
+                                ioeventfd->region_addr;
+        vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
+
+        ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
+                                 VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
+    }
+
+    if (!ioeventfd->vfio) {
+        qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
+                            vfio_ioeventfd_handler, NULL, ioeventfd);
+    }
+
     memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
                               ioeventfd->size, ioeventfd->match_data,
                               ioeventfd->data, &ioeventfd->e);
     trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
-                              size, data);
+                              size, data, ioeventfd->vfio);
 
     return ioeventfd;
 }
@@ -1835,7 +1871,7 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
 
     QLIST_FOREACH(quirk, &bar->quirks, next) {
         while (!QLIST_EMPTY(&quirk->ioeventfds)) {
-            vfio_ioeventfd_exit(QLIST_FIRST(&quirk->ioeventfds));
+            vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
         }
 
         for (i = 0; i < quirk->nr_mem; i++) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index ba1239551115..84e27c7bb2d1 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3177,6 +3177,8 @@ static Property vfio_pci_dev_properties[] = {
                      no_geforce_quirks, false),
     DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd,
                      false),
+    DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd,
+                     false),
     DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index dbb3aca9b3d2..dbb3932b50ef 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -35,6 +35,7 @@ typedef struct VFIOIOEventFD {
     hwaddr region_addr;
     bool match_data;
     bool dynamic;
+    bool vfio;
 } VFIOIOEventFD;
 
 typedef struct VFIOQuirk {
@@ -164,6 +165,7 @@ typedef struct VFIOPCIDevice {
     bool no_kvm_msix;
     bool no_geforce_quirks;
     bool no_kvm_ioeventfd;
+    bool no_vfio_ioeventfd;
     VFIODisplay *dpy;
 } VFIOPCIDevice;
 
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index f8f97d1ff90c..d2a74952e389 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -79,7 +79,7 @@ vfio_quirk_ati_bonaire_reset_done(const char *name) "%s"
 vfio_quirk_ati_bonaire_reset(const char *name) "%s"
 vfio_ioeventfd_exit(const char *name, uint64_t addr, unsigned size, uint64_t data) "%s+0x%"PRIx64"[%d]:0x%"PRIx64
 vfio_ioeventfd_handler(const char *name, uint64_t addr, unsigned size, uint64_t data) "%s+0x%"PRIx64"[%d] -> 0x%"PRIx64
-vfio_ioeventfd_init(const char *name, uint64_t addr, unsigned size, uint64_t data) "%s+0x%"PRIx64"[%d]:0x%"PRIx64
+vfio_ioeventfd_init(const char *name, uint64_t addr, unsigned size, uint64_t data, bool vfio) "%s+0x%"PRIx64"[%d]:0x%"PRIx64" vfio:%d"
 vfio_pci_igd_bar4_write(const char *name, uint32_t index, uint32_t data, uint32_t base) "%s [0x%03x] 0x%08x -> 0x%08x"
 vfio_pci_igd_bdsm_enabled(const char *name, int size) "%s %dMB"
 vfio_pci_igd_opregion_enabled(const char *name) "%s"

WARNING: multiple messages have this Message-ID (diff)
From: Alex Williamson <alex.williamson@redhat.com>
To: qemu-devel@nongnu.org
Cc: kvm@vger.kernel.org, eric.auger@redhat.com, peterx@redhat.com
Subject: [Qemu-devel] [PATCH v2 4/4] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly
Date: Tue, 01 May 2018 10:43:46 -0600	[thread overview]
Message-ID: <20180501164346.28940.93328.stgit@gimli.home> (raw)
In-Reply-To: <20180501162901.28940.1075.stgit@gimli.home>

With vfio ioeventfd support, we can program vfio-pci to perform a
specified BAR write when an eventfd is triggered.  This allows the
KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding
userspace handling for these events.  On the same micro-benchmark
where the ioeventfd got us to almost 90% of performance versus
disabling the GeForce quirks, this gets us to within 95%.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/pci-quirks.c |   50 +++++++++++++++++++++++++++++++++++++++++++-------
 hw/vfio/pci.c        |    2 ++
 hw/vfio/pci.h        |    2 ++
 hw/vfio/trace-events |    2 +-
 4 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 4cedc733bc0a..94be27dd0a3b 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -16,6 +16,7 @@
 #include "qemu/range.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
+#include <sys/ioctl.h>
 #include "hw/nvram/fw_cfg.h"
 #include "pci.h"
 #include "trace.h"
@@ -287,13 +288,31 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
     return quirk;
 }
 
-static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
+static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
 {
     QLIST_REMOVE(ioeventfd, next);
+
     memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
                               ioeventfd->match_data, ioeventfd->data,
                               &ioeventfd->e);
-    qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), NULL, NULL, NULL);
+
+    if (ioeventfd->vfio) {
+        struct vfio_device_ioeventfd vfio_ioeventfd;
+
+        vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
+        vfio_ioeventfd.flags = ioeventfd->size;
+        vfio_ioeventfd.data = ioeventfd->data;
+        vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
+                                ioeventfd->region_addr;
+        vfio_ioeventfd.fd = -1;
+
+        ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
+
+    } else {
+        qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
+                            NULL, NULL, NULL);
+    }
+
     event_notifier_cleanup(&ioeventfd->e);
     trace_vfio_ioeventfd_exit(memory_region_name(ioeventfd->mr),
                               (uint64_t)ioeventfd->addr, ioeventfd->size,
@@ -307,7 +326,7 @@ static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
 
     QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
         if (ioeventfd->dynamic) {
-            vfio_ioeventfd_exit(ioeventfd);
+            vfio_ioeventfd_exit(vdev, ioeventfd);
         }
     }
 }
@@ -361,13 +380,30 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
     ioeventfd->region = region;
     ioeventfd->region_addr = region_addr;
 
-    qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
-                        vfio_ioeventfd_handler, NULL, ioeventfd);
+    if (!vdev->no_vfio_ioeventfd) {
+        struct vfio_device_ioeventfd vfio_ioeventfd;
+
+        vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
+        vfio_ioeventfd.flags = ioeventfd->size;
+        vfio_ioeventfd.data = ioeventfd->data;
+        vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
+                                ioeventfd->region_addr;
+        vfio_ioeventfd.fd = event_notifier_get_fd(&ioeventfd->e);
+
+        ioeventfd->vfio = !ioctl(vdev->vbasedev.fd,
+                                 VFIO_DEVICE_IOEVENTFD, &vfio_ioeventfd);
+    }
+
+    if (!ioeventfd->vfio) {
+        qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
+                            vfio_ioeventfd_handler, NULL, ioeventfd);
+    }
+
     memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
                               ioeventfd->size, ioeventfd->match_data,
                               ioeventfd->data, &ioeventfd->e);
     trace_vfio_ioeventfd_init(memory_region_name(mr), (uint64_t)addr,
-                              size, data);
+                              size, data, ioeventfd->vfio);
 
     return ioeventfd;
 }
@@ -1835,7 +1871,7 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
 
     QLIST_FOREACH(quirk, &bar->quirks, next) {
         while (!QLIST_EMPTY(&quirk->ioeventfds)) {
-            vfio_ioeventfd_exit(QLIST_FIRST(&quirk->ioeventfds));
+            vfio_ioeventfd_exit(vdev, QLIST_FIRST(&quirk->ioeventfds));
         }
 
         for (i = 0; i < quirk->nr_mem; i++) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index ba1239551115..84e27c7bb2d1 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3177,6 +3177,8 @@ static Property vfio_pci_dev_properties[] = {
                      no_geforce_quirks, false),
     DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd,
                      false),
+    DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd,
+                     false),
     DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index dbb3aca9b3d2..dbb3932b50ef 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -35,6 +35,7 @@ typedef struct VFIOIOEventFD {
     hwaddr region_addr;
     bool match_data;
     bool dynamic;
+    bool vfio;
 } VFIOIOEventFD;
 
 typedef struct VFIOQuirk {
@@ -164,6 +165,7 @@ typedef struct VFIOPCIDevice {
     bool no_kvm_msix;
     bool no_geforce_quirks;
     bool no_kvm_ioeventfd;
+    bool no_vfio_ioeventfd;
     VFIODisplay *dpy;
 } VFIOPCIDevice;
 
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index f8f97d1ff90c..d2a74952e389 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -79,7 +79,7 @@ vfio_quirk_ati_bonaire_reset_done(const char *name) "%s"
 vfio_quirk_ati_bonaire_reset(const char *name) "%s"
 vfio_ioeventfd_exit(const char *name, uint64_t addr, unsigned size, uint64_t data) "%s+0x%"PRIx64"[%d]:0x%"PRIx64
 vfio_ioeventfd_handler(const char *name, uint64_t addr, unsigned size, uint64_t data) "%s+0x%"PRIx64"[%d] -> 0x%"PRIx64
-vfio_ioeventfd_init(const char *name, uint64_t addr, unsigned size, uint64_t data) "%s+0x%"PRIx64"[%d]:0x%"PRIx64
+vfio_ioeventfd_init(const char *name, uint64_t addr, unsigned size, uint64_t data, bool vfio) "%s+0x%"PRIx64"[%d]:0x%"PRIx64" vfio:%d"
 vfio_pci_igd_bar4_write(const char *name, uint32_t index, uint32_t data, uint32_t base) "%s [0x%03x] 0x%08x -> 0x%08x"
 vfio_pci_igd_bdsm_enabled(const char *name, int size) "%s %dMB"
 vfio_pci_igd_opregion_enabled(const char *name) "%s"

  parent reply	other threads:[~2018-05-01 16:43 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-01 16:42 [PATCH v2 0/4] vfio/quirks: ioeventfd support Alex Williamson
2018-05-01 16:42 ` [Qemu-devel] " Alex Williamson
2018-05-01 16:43 ` [PATCH v2 1/4] vfio/quirks: Add common quirk alloc helper Alex Williamson
2018-05-01 16:43   ` [Qemu-devel] " Alex Williamson
2018-05-01 16:43 ` [PATCH v2 2/4] vfio/quirks: Add quirk reset callback Alex Williamson
2018-05-01 16:43   ` [Qemu-devel] " Alex Williamson
2018-05-01 16:43 ` [PATCH v2 3/4] vfio/quirks: ioeventfd quirk acceleration Alex Williamson
2018-05-01 16:43   ` [Qemu-devel] " Alex Williamson
2018-05-03  3:36   ` Peter Xu
2018-05-03  3:36     ` [Qemu-devel] " Peter Xu
2018-05-03  4:20     ` Alex Williamson
2018-05-03  4:20       ` [Qemu-devel] " Alex Williamson
2018-05-03 14:33   ` Auger Eric
2018-05-03 14:33     ` [Qemu-devel] " Auger Eric
2018-05-03 14:48     ` Alex Williamson
2018-05-03 14:48       ` [Qemu-devel] " Alex Williamson
2018-05-01 16:43 ` Alex Williamson [this message]
2018-05-01 16:43   ` [Qemu-devel] [PATCH v2 4/4] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly Alex Williamson
2018-05-03  4:56   ` Peter Xu
2018-05-03  4:56     ` [Qemu-devel] " Peter Xu
2018-05-03 16:29     ` Alex Williamson
2018-05-03 16:29       ` [Qemu-devel] " Alex Williamson
2018-05-04  2:16       ` Peter Xu
2018-05-04  2:16         ` [Qemu-devel] " Peter Xu
2018-05-04  7:38       ` Auger Eric
2018-05-04  7:38         ` [Qemu-devel] " Auger Eric
2018-05-03 15:20   ` Auger Eric
2018-05-03 15:20     ` [Qemu-devel] " Auger Eric
2018-05-03 16:30     ` Alex Williamson
2018-05-03 16:30       ` [Qemu-devel] " Alex Williamson
2018-05-01 16:56 ` [PATCH v2 0/4] vfio/quirks: ioeventfd support no-reply
2018-05-01 16:56   ` [Qemu-devel] " no-reply
2018-05-01 17:05   ` Alex Williamson
2018-05-01 17:05     ` [Qemu-devel] " Alex Williamson
2018-05-01 16:56 ` no-reply
2018-05-01 16:56   ` [Qemu-devel] " no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180501164346.28940.93328.stgit@gimli.home \
    --to=alex.williamson@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.