All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, kvm@vger.kernel.org
Subject: [RFC PATCH 3/5] vfio/quirks: Automatic ioeventfd enabling for NVIDIA BAR0 quirks
Date: Tue, 06 Feb 2018 17:26:32 -0700	[thread overview]
Message-ID: <20180207002632.1156.53770.stgit@gimli.home> (raw)
In-Reply-To: <20180207001615.1156.10547.stgit@gimli.home>

Record data writes that come through the NVIDIA BAR0 quirk, if we get
enough in a row that we're only passing through, automatically enable
an ioeventfd for it.  The primary target for this is the MSI-ACK
that NVIDIA uses to allow the MSI interrupt to re-trigger, which is a
4-byte write, data value 0x0 to offset 0x704 into the quirk, 0x88704
into BAR0 MMIO space.  For an interrupt latency sensitive micro-
benchmark, this takes us from 83% of performance versus disabling the
quirk entirely (which GeForce cannot do), to to almost 90%.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/pci-quirks.c |   89 +++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/vfio/pci.h        |    2 +
 2 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index e4cf4ea2dd9c..e739efe601b1 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -203,6 +203,7 @@ typedef struct VFIOConfigMirrorQuirk {
     uint32_t offset;
     uint8_t bar;
     MemoryRegion *mem;
+    uint8_t data[];
 } VFIOConfigMirrorQuirk;
 
 static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
@@ -297,6 +298,50 @@ static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
     g_free(ioeventfd);
 }
 
+static void vfio_ioeventfd_handler(void *opaque)
+{
+    VFIOIOEventFD *ioeventfd = opaque;
+
+    if (event_notifier_test_and_clear(&ioeventfd->e)) {
+        vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
+                          ioeventfd->data, ioeventfd->size);
+    }
+}
+
+static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
+                                          MemoryRegion *mr, hwaddr addr,
+                                          unsigned size, uint64_t data,
+                                          VFIORegion *region,
+                                          hwaddr region_addr)
+{
+    VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
+
+    if (event_notifier_init(&ioeventfd->e, 0)) {
+        g_free(ioeventfd);
+        return NULL;
+    }
+
+    ioeventfd->mr = mr;
+    ioeventfd->addr = addr;
+    ioeventfd->size = size;
+    ioeventfd->match_data = true;
+    ioeventfd->data = data;
+    ioeventfd->region = region;
+    ioeventfd->region_addr = region_addr;
+
+    qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
+                        vfio_ioeventfd_handler, NULL, ioeventfd);
+    memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
+                              ioeventfd->size, ioeventfd->match_data,
+                              ioeventfd->data, &ioeventfd->e);
+
+    info_report("Enabled automatic ioeventfd acceleration for %s region %d, "
+                "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u",
+                vdev->vbasedev.name, region->nr, region_addr, data, size);
+
+    return ioeventfd;
+}
+
 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
 {
     VFIOQuirk *quirk;
@@ -732,6 +777,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
     trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
 }
 
+typedef struct LastDataSet {
+    hwaddr addr;
+    uint64_t data;
+    unsigned size;
+    int count;
+} LastDataSet;
+
 /*
  * Finally, BAR0 itself.  We want to redirect any accesses to either
  * 0x1800 or 0x88000 through the PCI config space access functions.
@@ -742,6 +794,7 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
     VFIOConfigMirrorQuirk *mirror = opaque;
     VFIOPCIDevice *vdev = mirror->vdev;
     PCIDevice *pdev = &vdev->pdev;
+    LastDataSet *last = (LastDataSet *)&mirror->data;
 
     vfio_generic_quirk_mirror_write(opaque, addr, data, size);
 
@@ -756,6 +809,38 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
                           addr + mirror->offset, data, size);
         trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
     }
+
+    /*
+     * Automatically add an ioeventfd to handle any repeated write with the
+     * same data and size above the standard PCI config space header.  This is
+     * primarily expected to accelerate the MSI-ACK behavior, such as noted
+     * above.  Current hardware/drivers should trigger an ioeventfd at config
+     * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
+     */
+    if (addr > PCI_STD_HEADER_SIZEOF) {
+        if (addr != last->addr || data != last->data || size != last->size) {
+            last->addr = addr;
+            last->data = data;
+            last->size = size;
+            last->count = 1;
+        } else if (++last->count > 10) {
+            VFIOIOEventFD *ioeventfd;
+
+            ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size, data,
+                                            &vdev->bars[mirror->bar].region,
+                                            mirror->offset + addr);
+            if (ioeventfd) {
+                VFIOQuirk *quirk;
+
+                QLIST_FOREACH(quirk, &vdev->bars[mirror->bar].quirks, next) {
+                    if (quirk->data == mirror) {
+                        QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
+                        break;
+                    }
+                }
+            }
+        }
+    }
 }
 
 static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
@@ -776,7 +861,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
     }
 
     quirk = vfio_quirk_alloc(1);
-    mirror = quirk->data = g_malloc0(sizeof(*mirror));
+    mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
     mirror->mem = quirk->mem;
     mirror->vdev = vdev;
     mirror->offset = 0x88000;
@@ -794,7 +879,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
     /* The 0x1800 offset mirror only seems to get used by legacy VGA */
     if (vdev->vga) {
         quirk = vfio_quirk_alloc(1);
-        mirror = quirk->data = g_malloc0(sizeof(*mirror));
+        mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
         mirror->mem = quirk->mem;
         mirror->vdev = vdev;
         mirror->offset = 0x1800;
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 146065c2f715..ec53b9935725 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -32,6 +32,8 @@ typedef struct VFIOIOEventFD {
     bool match_data;
     uint64_t data;
     EventNotifier e;
+    VFIORegion *region;
+    hwaddr region_addr;
 } VFIOIOEventFD;
 
 typedef struct VFIOQuirk {

WARNING: multiple messages have this Message-ID (diff)
From: Alex Williamson <alex.williamson@redhat.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, kvm@vger.kernel.org
Subject: [Qemu-devel] [RFC PATCH 3/5] vfio/quirks: Automatic ioeventfd enabling for NVIDIA BAR0 quirks
Date: Tue, 06 Feb 2018 17:26:32 -0700	[thread overview]
Message-ID: <20180207002632.1156.53770.stgit@gimli.home> (raw)
In-Reply-To: <20180207001615.1156.10547.stgit@gimli.home>

Record data writes that come through the NVIDIA BAR0 quirk, if we get
enough in a row that we're only passing through, automatically enable
an ioeventfd for it.  The primary target for this is the MSI-ACK
that NVIDIA uses to allow the MSI interrupt to re-trigger, which is a
4-byte write, data value 0x0 to offset 0x704 into the quirk, 0x88704
into BAR0 MMIO space.  For an interrupt latency sensitive micro-
benchmark, this takes us from 83% of performance versus disabling the
quirk entirely (which GeForce cannot do), to to almost 90%.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/pci-quirks.c |   89 +++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/vfio/pci.h        |    2 +
 2 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index e4cf4ea2dd9c..e739efe601b1 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -203,6 +203,7 @@ typedef struct VFIOConfigMirrorQuirk {
     uint32_t offset;
     uint8_t bar;
     MemoryRegion *mem;
+    uint8_t data[];
 } VFIOConfigMirrorQuirk;
 
 static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
@@ -297,6 +298,50 @@ static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
     g_free(ioeventfd);
 }
 
+static void vfio_ioeventfd_handler(void *opaque)
+{
+    VFIOIOEventFD *ioeventfd = opaque;
+
+    if (event_notifier_test_and_clear(&ioeventfd->e)) {
+        vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
+                          ioeventfd->data, ioeventfd->size);
+    }
+}
+
+static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
+                                          MemoryRegion *mr, hwaddr addr,
+                                          unsigned size, uint64_t data,
+                                          VFIORegion *region,
+                                          hwaddr region_addr)
+{
+    VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
+
+    if (event_notifier_init(&ioeventfd->e, 0)) {
+        g_free(ioeventfd);
+        return NULL;
+    }
+
+    ioeventfd->mr = mr;
+    ioeventfd->addr = addr;
+    ioeventfd->size = size;
+    ioeventfd->match_data = true;
+    ioeventfd->data = data;
+    ioeventfd->region = region;
+    ioeventfd->region_addr = region_addr;
+
+    qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
+                        vfio_ioeventfd_handler, NULL, ioeventfd);
+    memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
+                              ioeventfd->size, ioeventfd->match_data,
+                              ioeventfd->data, &ioeventfd->e);
+
+    info_report("Enabled automatic ioeventfd acceleration for %s region %d, "
+                "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u",
+                vdev->vbasedev.name, region->nr, region_addr, data, size);
+
+    return ioeventfd;
+}
+
 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
 {
     VFIOQuirk *quirk;
@@ -732,6 +777,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
     trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
 }
 
+typedef struct LastDataSet {
+    hwaddr addr;
+    uint64_t data;
+    unsigned size;
+    int count;
+} LastDataSet;
+
 /*
  * Finally, BAR0 itself.  We want to redirect any accesses to either
  * 0x1800 or 0x88000 through the PCI config space access functions.
@@ -742,6 +794,7 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
     VFIOConfigMirrorQuirk *mirror = opaque;
     VFIOPCIDevice *vdev = mirror->vdev;
     PCIDevice *pdev = &vdev->pdev;
+    LastDataSet *last = (LastDataSet *)&mirror->data;
 
     vfio_generic_quirk_mirror_write(opaque, addr, data, size);
 
@@ -756,6 +809,38 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
                           addr + mirror->offset, data, size);
         trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
     }
+
+    /*
+     * Automatically add an ioeventfd to handle any repeated write with the
+     * same data and size above the standard PCI config space header.  This is
+     * primarily expected to accelerate the MSI-ACK behavior, such as noted
+     * above.  Current hardware/drivers should trigger an ioeventfd at config
+     * offset 0x704 (region offset 0x88704), with data 0x0, size 4.
+     */
+    if (addr > PCI_STD_HEADER_SIZEOF) {
+        if (addr != last->addr || data != last->data || size != last->size) {
+            last->addr = addr;
+            last->data = data;
+            last->size = size;
+            last->count = 1;
+        } else if (++last->count > 10) {
+            VFIOIOEventFD *ioeventfd;
+
+            ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size, data,
+                                            &vdev->bars[mirror->bar].region,
+                                            mirror->offset + addr);
+            if (ioeventfd) {
+                VFIOQuirk *quirk;
+
+                QLIST_FOREACH(quirk, &vdev->bars[mirror->bar].quirks, next) {
+                    if (quirk->data == mirror) {
+                        QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next);
+                        break;
+                    }
+                }
+            }
+        }
+    }
 }
 
 static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
@@ -776,7 +861,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
     }
 
     quirk = vfio_quirk_alloc(1);
-    mirror = quirk->data = g_malloc0(sizeof(*mirror));
+    mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
     mirror->mem = quirk->mem;
     mirror->vdev = vdev;
     mirror->offset = 0x88000;
@@ -794,7 +879,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
     /* The 0x1800 offset mirror only seems to get used by legacy VGA */
     if (vdev->vga) {
         quirk = vfio_quirk_alloc(1);
-        mirror = quirk->data = g_malloc0(sizeof(*mirror));
+        mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet));
         mirror->mem = quirk->mem;
         mirror->vdev = vdev;
         mirror->offset = 0x1800;
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 146065c2f715..ec53b9935725 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -32,6 +32,8 @@ typedef struct VFIOIOEventFD {
     bool match_data;
     uint64_t data;
     EventNotifier e;
+    VFIORegion *region;
+    hwaddr region_addr;
 } VFIOIOEventFD;
 
 typedef struct VFIOQuirk {

  parent reply	other threads:[~2018-02-07  0:26 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-07  0:26 [RFC PATCH 0/5] vfio: ioeventfd support Alex Williamson
2018-02-07  0:26 ` [Qemu-devel] " Alex Williamson
2018-02-07  0:26 ` [RFC PATCH 1/5] vfio/quirks: Add common quirk alloc helper Alex Williamson
2018-02-07  0:26   ` [Qemu-devel] " Alex Williamson
2018-02-08 11:10   ` Auger Eric
2018-02-08 11:10     ` [Qemu-devel] " Auger Eric
2018-02-08 18:28     ` Alex Williamson
2018-02-08 18:28       ` [Qemu-devel] " Alex Williamson
2018-02-07  0:26 ` [RFC PATCH 2/5] vfio/quirks: Add generic support for ioveventfds Alex Williamson
2018-02-07  0:26   ` [Qemu-devel] " Alex Williamson
2018-02-08 11:11   ` Auger Eric
2018-02-08 11:11     ` [Qemu-devel] " Auger Eric
2018-02-08 18:33     ` Alex Williamson
2018-02-08 18:33       ` [Qemu-devel] " Alex Williamson
2018-02-08 20:37       ` Auger Eric
2018-02-07  0:26 ` Alex Williamson [this message]
2018-02-07  0:26   ` [Qemu-devel] [RFC PATCH 3/5] vfio/quirks: Automatic ioeventfd enabling for NVIDIA BAR0 quirks Alex Williamson
2018-02-08 11:10   ` Auger Eric
2018-02-08 11:10     ` [Qemu-devel] " Auger Eric
2018-02-08 11:33     ` Auger Eric
2018-02-08 18:24     ` Alex Williamson
2018-02-08 18:24       ` [Qemu-devel] " Alex Williamson
2018-02-08 20:52       ` Auger Eric
2018-02-08 20:52         ` [Qemu-devel] " Auger Eric
2018-02-07  0:26 ` [RFC PATCH 4/5] vfio: Update linux header Alex Williamson
2018-02-07  0:26   ` [Qemu-devel] " Alex Williamson
2018-02-07  0:26 ` [RFC PATCH 5/5] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly Alex Williamson
2018-02-07  0:26   ` [Qemu-devel] " Alex Williamson
2018-02-08 11:42   ` Auger Eric
2018-02-08 11:42     ` [Qemu-devel] " Auger Eric
2018-02-08 18:41     ` Alex Williamson
2018-02-08 18:41       ` [Qemu-devel] " Alex Williamson
2018-02-09  7:11   ` Peter Xu
2018-02-09 22:09     ` Alex Williamson
2018-02-11  2:38       ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180207002632.1156.53770.stgit@gimli.home \
    --to=alex.williamson@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.