From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alex Williamson Subject: [RFC PATCH 3/5] vfio/quirks: Automatic ioeventfd enabling for NVIDIA BAR0 quirks Date: Tue, 06 Feb 2018 17:26:32 -0700 Message-ID: <20180207002632.1156.53770.stgit@gimli.home> References: <20180207001615.1156.10547.stgit@gimli.home> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Cc: alex.williamson@redhat.com, kvm@vger.kernel.org To: qemu-devel@nongnu.org Return-path: Received: from mx1.redhat.com ([209.132.183.28]:45500 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932106AbeBGA0d (ORCPT ); Tue, 6 Feb 2018 19:26:33 -0500 In-Reply-To: <20180207001615.1156.10547.stgit@gimli.home> Sender: kvm-owner@vger.kernel.org List-ID: Record data writes that come through the NVIDIA BAR0 quirk, if we get enough in a row that we're only passing through, automatically enable an ioeventfd for it. The primary target for this is the MSI-ACK that NVIDIA uses to allow the MSI interrupt to re-trigger, which is a 4-byte write, data value 0x0 to offset 0x704 into the quirk, 0x88704 into BAR0 MMIO space. For an interrupt latency sensitive micro- benchmark, this takes us from 83% of performance versus disabling the quirk entirely (which GeForce cannot do), to to almost 90%. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++- hw/vfio/pci.h | 2 + 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index e4cf4ea2dd9c..e739efe601b1 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -203,6 +203,7 @@ typedef struct VFIOConfigMirrorQuirk { uint32_t offset; uint8_t bar; MemoryRegion *mem; + uint8_t data[]; } VFIOConfigMirrorQuirk; static uint64_t vfio_generic_quirk_mirror_read(void *opaque, @@ -297,6 +298,50 @@ static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd) g_free(ioeventfd); } +static void vfio_ioeventfd_handler(void *opaque) +{ + VFIOIOEventFD *ioeventfd = opaque; + + if (event_notifier_test_and_clear(&ioeventfd->e)) { + vfio_region_write(ioeventfd->region, ioeventfd->region_addr, + ioeventfd->data, ioeventfd->size); + } +} + +static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev, + MemoryRegion *mr, hwaddr addr, + unsigned size, uint64_t data, + VFIORegion *region, + hwaddr region_addr) +{ + VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd)); + + if (event_notifier_init(&ioeventfd->e, 0)) { + g_free(ioeventfd); + return NULL; + } + + ioeventfd->mr = mr; + ioeventfd->addr = addr; + ioeventfd->size = size; + ioeventfd->match_data = true; + ioeventfd->data = data; + ioeventfd->region = region; + ioeventfd->region_addr = region_addr; + + qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), + vfio_ioeventfd_handler, NULL, ioeventfd); + memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, + ioeventfd->size, ioeventfd->match_data, + ioeventfd->data, &ioeventfd->e); + + info_report("Enabled automatic ioeventfd acceleration for %s region %d, " + "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u", + vdev->vbasedev.name, region->nr, region_addr, data, size); + + return ioeventfd; +} + static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) { VFIOQuirk *quirk; @@ -732,6 +777,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name); } +typedef struct LastDataSet { + hwaddr addr; + uint64_t data; + unsigned size; + int count; +} LastDataSet; + /* * Finally, BAR0 itself. We want to redirect any accesses to either * 0x1800 or 0x88000 through the PCI config space access functions. @@ -742,6 +794,7 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr, VFIOConfigMirrorQuirk *mirror = opaque; VFIOPCIDevice *vdev = mirror->vdev; PCIDevice *pdev = &vdev->pdev; + LastDataSet *last = (LastDataSet *)&mirror->data; vfio_generic_quirk_mirror_write(opaque, addr, data, size); @@ -756,6 +809,38 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr, addr + mirror->offset, data, size); trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name); } + + /* + * Automatically add an ioeventfd to handle any repeated write with the + * same data and size above the standard PCI config space header. This is + * primarily expected to accelerate the MSI-ACK behavior, such as noted + * above. Current hardware/drivers should trigger an ioeventfd at config + * offset 0x704 (region offset 0x88704), with data 0x0, size 4. + */ + if (addr > PCI_STD_HEADER_SIZEOF) { + if (addr != last->addr || data != last->data || size != last->size) { + last->addr = addr; + last->data = data; + last->size = size; + last->count = 1; + } else if (++last->count > 10) { + VFIOIOEventFD *ioeventfd; + + ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size, data, + &vdev->bars[mirror->bar].region, + mirror->offset + addr); + if (ioeventfd) { + VFIOQuirk *quirk; + + QLIST_FOREACH(quirk, &vdev->bars[mirror->bar].quirks, next) { + if (quirk->data == mirror) { + QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next); + break; + } + } + } + } + } } static const MemoryRegionOps vfio_nvidia_mirror_quirk = { @@ -776,7 +861,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) } quirk = vfio_quirk_alloc(1); - mirror = quirk->data = g_malloc0(sizeof(*mirror)); + mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet)); mirror->mem = quirk->mem; mirror->vdev = vdev; mirror->offset = 0x88000; @@ -794,7 +879,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) /* The 0x1800 offset mirror only seems to get used by legacy VGA */ if (vdev->vga) { quirk = vfio_quirk_alloc(1); - mirror = quirk->data = g_malloc0(sizeof(*mirror)); + mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet)); mirror->mem = quirk->mem; mirror->vdev = vdev; mirror->offset = 0x1800; diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 146065c2f715..ec53b9935725 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -32,6 +32,8 @@ typedef struct VFIOIOEventFD { bool match_data; uint64_t data; EventNotifier e; + VFIORegion *region; + hwaddr region_addr; } VFIOIOEventFD; typedef struct VFIOQuirk { From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:56396) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ejDZN-0007ei-NS for qemu-devel@nongnu.org; Tue, 06 Feb 2018 19:26:39 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ejDZK-0005LO-Bl for qemu-devel@nongnu.org; Tue, 06 Feb 2018 19:26:37 -0500 Received: from mx1.redhat.com ([209.132.183.28]:50716) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1ejDZK-0005Jw-3b for qemu-devel@nongnu.org; Tue, 06 Feb 2018 19:26:34 -0500 From: Alex Williamson Date: Tue, 06 Feb 2018 17:26:32 -0700 Message-ID: <20180207002632.1156.53770.stgit@gimli.home> In-Reply-To: <20180207001615.1156.10547.stgit@gimli.home> References: <20180207001615.1156.10547.stgit@gimli.home> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Subject: [Qemu-devel] [RFC PATCH 3/5] vfio/quirks: Automatic ioeventfd enabling for NVIDIA BAR0 quirks List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: alex.williamson@redhat.com, kvm@vger.kernel.org Record data writes that come through the NVIDIA BAR0 quirk, if we get enough in a row that we're only passing through, automatically enable an ioeventfd for it. The primary target for this is the MSI-ACK that NVIDIA uses to allow the MSI interrupt to re-trigger, which is a 4-byte write, data value 0x0 to offset 0x704 into the quirk, 0x88704 into BAR0 MMIO space. For an interrupt latency sensitive micro- benchmark, this takes us from 83% of performance versus disabling the quirk entirely (which GeForce cannot do), to to almost 90%. Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++- hw/vfio/pci.h | 2 + 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index e4cf4ea2dd9c..e739efe601b1 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -203,6 +203,7 @@ typedef struct VFIOConfigMirrorQuirk { uint32_t offset; uint8_t bar; MemoryRegion *mem; + uint8_t data[]; } VFIOConfigMirrorQuirk; static uint64_t vfio_generic_quirk_mirror_read(void *opaque, @@ -297,6 +298,50 @@ static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd) g_free(ioeventfd); } +static void vfio_ioeventfd_handler(void *opaque) +{ + VFIOIOEventFD *ioeventfd = opaque; + + if (event_notifier_test_and_clear(&ioeventfd->e)) { + vfio_region_write(ioeventfd->region, ioeventfd->region_addr, + ioeventfd->data, ioeventfd->size); + } +} + +static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev, + MemoryRegion *mr, hwaddr addr, + unsigned size, uint64_t data, + VFIORegion *region, + hwaddr region_addr) +{ + VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd)); + + if (event_notifier_init(&ioeventfd->e, 0)) { + g_free(ioeventfd); + return NULL; + } + + ioeventfd->mr = mr; + ioeventfd->addr = addr; + ioeventfd->size = size; + ioeventfd->match_data = true; + ioeventfd->data = data; + ioeventfd->region = region; + ioeventfd->region_addr = region_addr; + + qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), + vfio_ioeventfd_handler, NULL, ioeventfd); + memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr, + ioeventfd->size, ioeventfd->match_data, + ioeventfd->data, &ioeventfd->e); + + info_report("Enabled automatic ioeventfd acceleration for %s region %d, " + "offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u", + vdev->vbasedev.name, region->nr, region_addr, data, size); + + return ioeventfd; +} + static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) { VFIOQuirk *quirk; @@ -732,6 +777,13 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name); } +typedef struct LastDataSet { + hwaddr addr; + uint64_t data; + unsigned size; + int count; +} LastDataSet; + /* * Finally, BAR0 itself. We want to redirect any accesses to either * 0x1800 or 0x88000 through the PCI config space access functions. @@ -742,6 +794,7 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr, VFIOConfigMirrorQuirk *mirror = opaque; VFIOPCIDevice *vdev = mirror->vdev; PCIDevice *pdev = &vdev->pdev; + LastDataSet *last = (LastDataSet *)&mirror->data; vfio_generic_quirk_mirror_write(opaque, addr, data, size); @@ -756,6 +809,38 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr, addr + mirror->offset, data, size); trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name); } + + /* + * Automatically add an ioeventfd to handle any repeated write with the + * same data and size above the standard PCI config space header. This is + * primarily expected to accelerate the MSI-ACK behavior, such as noted + * above. Current hardware/drivers should trigger an ioeventfd at config + * offset 0x704 (region offset 0x88704), with data 0x0, size 4. + */ + if (addr > PCI_STD_HEADER_SIZEOF) { + if (addr != last->addr || data != last->data || size != last->size) { + last->addr = addr; + last->data = data; + last->size = size; + last->count = 1; + } else if (++last->count > 10) { + VFIOIOEventFD *ioeventfd; + + ioeventfd = vfio_ioeventfd_init(vdev, mirror->mem, addr, size, data, + &vdev->bars[mirror->bar].region, + mirror->offset + addr); + if (ioeventfd) { + VFIOQuirk *quirk; + + QLIST_FOREACH(quirk, &vdev->bars[mirror->bar].quirks, next) { + if (quirk->data == mirror) { + QLIST_INSERT_HEAD(&quirk->ioeventfds, ioeventfd, next); + break; + } + } + } + } + } } static const MemoryRegionOps vfio_nvidia_mirror_quirk = { @@ -776,7 +861,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) } quirk = vfio_quirk_alloc(1); - mirror = quirk->data = g_malloc0(sizeof(*mirror)); + mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet)); mirror->mem = quirk->mem; mirror->vdev = vdev; mirror->offset = 0x88000; @@ -794,7 +879,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) /* The 0x1800 offset mirror only seems to get used by legacy VGA */ if (vdev->vga) { quirk = vfio_quirk_alloc(1); - mirror = quirk->data = g_malloc0(sizeof(*mirror)); + mirror = quirk->data = g_malloc0(sizeof(*mirror) + sizeof(LastDataSet)); mirror->mem = quirk->mem; mirror->vdev = vdev; mirror->offset = 0x1800; diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 146065c2f715..ec53b9935725 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -32,6 +32,8 @@ typedef struct VFIOIOEventFD { bool match_data; uint64_t data; EventNotifier e; + VFIORegion *region; + hwaddr region_addr; } VFIOIOEventFD; typedef struct VFIOQuirk {