All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: alex.williamson@redhat.com
Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org
Subject: [PATCH] vfio-pci: Quirk RTL8168 NIC
Date: Sat, 10 May 2014 17:03:11 -0600	[thread overview]
Message-ID: <20140510230225.17688.6353.stgit@bling.home> (raw)

This device is ridiculous.  It has two MMIO BARs, BAR4 and BAR2.  BAR4
hosts the MSI-X table, so oviously it would be too easy to access it
directly, instead it creates a window register in BAR2 that, among
other things, provides access to the MSI-X table.  This means MSI-X
doesn't work in the guest because the driver actually manages to
program the physical table.  When interrupt remapping is present, the
device MSI will be blocked.  The Linux driver doesn't make use of this
window, so apparently it's not required to make use of MSI-X.  This
quirk makes the device work with the Windows driver that does use this
window for MSI-X, but I certainly cannot recommend this device for
assignment (the Windows 7 driver also constantly pokes PCI config
space).

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/misc/vfio.c |  145 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 9cf5b84..c3d2f7a 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -1668,6 +1668,150 @@ static void vfio_probe_ati_bar4_window_quirk(VFIODevice *vdev, int nr)
             vdev->host.function);
 }
 
+#define PCI_VENDOR_ID_REALTEK 0x10ec
+
+/*
+ * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
+ * offset 0x70 there is a dword data register, offset 0x74 is a dword address
+ * register.  According to the Linux r8169 driver, the MSI-X table is addressed
+ * when the "type" portion of the address register is set to 0x1.  This appears
+ * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
+ * "address latched" indicator.  Bits 12:15 is a mask field, which we're
+ * going to ignore because we don't really know what it means and the MSI-X
+ * area always seems to be accessed with a full mask.  Bits 0:11 is offset
+ * within the type.
+ *
+ * Example trace:
+ *
+ * Read from MSI-X table offset 0
+ * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
+ * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
+ * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
+ *
+ * Write 0xfee00000 to MSI-X table offset 0
+ * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
+ * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
+ * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
+ */
+
+static uint64_t vfio_rtl8168_window_quirk_read(void *opaque,
+                                               hwaddr addr, unsigned size)
+{
+    VFIOQuirk *quirk = opaque;
+    VFIODevice *vdev = quirk->vdev;
+
+    switch (addr) {
+    case 4: /* address */
+        if (quirk->data.flags) {
+            DPRINTF("%s fake read(%04x:%02x:%02x.%xd)\n",
+                    memory_region_name(&quirk->mem), vdev->host.domain,
+                    vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+            return quirk->data.address_match ^ 0x10000000U;
+        }
+        break;
+    case 0: /* data */
+        if (quirk->data.flags) {
+            uint64_t val;
+
+            DPRINTF("%s MSI-X table read(%04x:%02x:%02x.%xd)\n",
+                    memory_region_name(&quirk->mem), vdev->host.domain,
+                    vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+            if (!(vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
+                return 0;
+            }
+
+            io_mem_read(&vdev->pdev.msix_table_mmio,
+                        (hwaddr)(quirk->data.address_match & 0xfff),
+                        &val, size);
+            return val;
+        }
+    }
+
+    DPRINTF("%s direct read(%04x:%02x:%02x.%xd)\n",
+            memory_region_name(&quirk->mem), vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    return vfio_bar_read(&vdev->bars[quirk->data.bar], addr + 0x70, size);
+}
+
+static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr,
+                                            uint64_t data, unsigned size)
+{
+    VFIOQuirk *quirk = opaque;
+    VFIODevice *vdev = quirk->vdev;
+
+    switch (addr) {
+    case 4: /* address */
+        if ((data & 0x7fff0000) == 0x10000) {
+            if (data & 0x10000000U &&
+                vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
+
+                DPRINTF("%s MSI-X table write(%04x:%02x:%02x.%xd)\n",
+                        memory_region_name(&quirk->mem), vdev->host.domain,
+                        vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+                io_mem_write(&vdev->pdev.msix_table_mmio,
+                             (hwaddr)(quirk->data.address_match & 0xfff),
+                             data, size);
+            }
+
+            quirk->data.flags = 1;
+            quirk->data.address_match = data;
+
+            return;
+        }
+        quirk->data.flags = 0;
+        break;
+    case 0: /* data */
+        quirk->data.address_mask = data;
+        break;
+    }
+
+    DPRINTF("%s direct write(%04x:%02x:%02x.%xd)\n",
+            memory_region_name(&quirk->mem), vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    vfio_bar_write(&vdev->bars[quirk->data.bar], addr + 0x70, data, size);
+}
+
+static const MemoryRegionOps vfio_rtl8168_window_quirk = {
+    .read = vfio_rtl8168_window_quirk_read,
+    .write = vfio_rtl8168_window_quirk_write,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+        .unaligned = false,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_probe_rtl8168_bar2_window_quirk(VFIODevice *vdev, int nr)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    VFIOQuirk *quirk;
+
+    if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK ||
+        pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) {
+        return;
+    }
+
+    quirk = g_malloc0(sizeof(*quirk));
+    quirk->vdev = vdev;
+    quirk->data.bar = nr;
+
+    memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk,
+                          quirk, "vfio-rtl8168-window-quirk", 8);
+    memory_region_add_subregion_overlap(&vdev->bars[nr].mem,
+                                        0x70, &quirk->mem, 1);
+
+    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+
+    DPRINTF("Enabled RTL8168 BAR2 window quirk for device %04x:%02x:%02x.%x\n",
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function);
+}
 /*
  * Trap the BAR2 MMIO window to config space as well.
  */
@@ -2071,6 +2215,7 @@ static void vfio_bar_quirk_setup(VFIODevice *vdev, int nr)
     vfio_probe_nvidia_bar5_window_quirk(vdev, nr);
     vfio_probe_nvidia_bar0_88000_quirk(vdev, nr);
     vfio_probe_nvidia_bar0_1800_quirk(vdev, nr);
+    vfio_probe_rtl8168_bar2_window_quirk(vdev, nr);
 }
 
 static void vfio_bar_quirk_teardown(VFIODevice *vdev, int nr)


WARNING: multiple messages have this Message-ID
From: Alex Williamson <alex.williamson@redhat.com>
To: alex.williamson@redhat.com
Cc: qemu-devel@nongnu.org, kvm@vger.kernel.org
Subject: [Qemu-devel] [PATCH] vfio-pci: Quirk RTL8168 NIC
Date: Sat, 10 May 2014 17:03:11 -0600	[thread overview]
Message-ID: <20140510230225.17688.6353.stgit@bling.home> (raw)

This device is ridiculous.  It has two MMIO BARs, BAR4 and BAR2.  BAR4
hosts the MSI-X table, so oviously it would be too easy to access it
directly, instead it creates a window register in BAR2 that, among
other things, provides access to the MSI-X table.  This means MSI-X
doesn't work in the guest because the driver actually manages to
program the physical table.  When interrupt remapping is present, the
device MSI will be blocked.  The Linux driver doesn't make use of this
window, so apparently it's not required to make use of MSI-X.  This
quirk makes the device work with the Windows driver that does use this
window for MSI-X, but I certainly cannot recommend this device for
assignment (the Windows 7 driver also constantly pokes PCI config
space).

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/misc/vfio.c |  145 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 9cf5b84..c3d2f7a 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -1668,6 +1668,150 @@ static void vfio_probe_ati_bar4_window_quirk(VFIODevice *vdev, int nr)
             vdev->host.function);
 }
 
+#define PCI_VENDOR_ID_REALTEK 0x10ec
+
+/*
+ * RTL8168 devices have a backdoor that can access the MSI-X table.  At BAR2
+ * offset 0x70 there is a dword data register, offset 0x74 is a dword address
+ * register.  According to the Linux r8169 driver, the MSI-X table is addressed
+ * when the "type" portion of the address register is set to 0x1.  This appears
+ * to be bits 16:30.  Bit 31 is both a write indicator and some sort of
+ * "address latched" indicator.  Bits 12:15 is a mask field, which we're
+ * going to ignore because we don't really know what it means and the MSI-X
+ * area always seems to be accessed with a full mask.  Bits 0:11 is offset
+ * within the type.
+ *
+ * Example trace:
+ *
+ * Read from MSI-X table offset 0
+ * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
+ * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
+ * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
+ *
+ * Write 0xfee00000 to MSI-X table offset 0
+ * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
+ * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
+ * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
+ */
+
+static uint64_t vfio_rtl8168_window_quirk_read(void *opaque,
+                                               hwaddr addr, unsigned size)
+{
+    VFIOQuirk *quirk = opaque;
+    VFIODevice *vdev = quirk->vdev;
+
+    switch (addr) {
+    case 4: /* address */
+        if (quirk->data.flags) {
+            DPRINTF("%s fake read(%04x:%02x:%02x.%xd)\n",
+                    memory_region_name(&quirk->mem), vdev->host.domain,
+                    vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+            return quirk->data.address_match ^ 0x10000000U;
+        }
+        break;
+    case 0: /* data */
+        if (quirk->data.flags) {
+            uint64_t val;
+
+            DPRINTF("%s MSI-X table read(%04x:%02x:%02x.%xd)\n",
+                    memory_region_name(&quirk->mem), vdev->host.domain,
+                    vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+            if (!(vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
+                return 0;
+            }
+
+            io_mem_read(&vdev->pdev.msix_table_mmio,
+                        (hwaddr)(quirk->data.address_match & 0xfff),
+                        &val, size);
+            return val;
+        }
+    }
+
+    DPRINTF("%s direct read(%04x:%02x:%02x.%xd)\n",
+            memory_region_name(&quirk->mem), vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    return vfio_bar_read(&vdev->bars[quirk->data.bar], addr + 0x70, size);
+}
+
+static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr,
+                                            uint64_t data, unsigned size)
+{
+    VFIOQuirk *quirk = opaque;
+    VFIODevice *vdev = quirk->vdev;
+
+    switch (addr) {
+    case 4: /* address */
+        if ((data & 0x7fff0000) == 0x10000) {
+            if (data & 0x10000000U &&
+                vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
+
+                DPRINTF("%s MSI-X table write(%04x:%02x:%02x.%xd)\n",
+                        memory_region_name(&quirk->mem), vdev->host.domain,
+                        vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+                io_mem_write(&vdev->pdev.msix_table_mmio,
+                             (hwaddr)(quirk->data.address_match & 0xfff),
+                             data, size);
+            }
+
+            quirk->data.flags = 1;
+            quirk->data.address_match = data;
+
+            return;
+        }
+        quirk->data.flags = 0;
+        break;
+    case 0: /* data */
+        quirk->data.address_mask = data;
+        break;
+    }
+
+    DPRINTF("%s direct write(%04x:%02x:%02x.%xd)\n",
+            memory_region_name(&quirk->mem), vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    vfio_bar_write(&vdev->bars[quirk->data.bar], addr + 0x70, data, size);
+}
+
+static const MemoryRegionOps vfio_rtl8168_window_quirk = {
+    .read = vfio_rtl8168_window_quirk_read,
+    .write = vfio_rtl8168_window_quirk_write,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+        .unaligned = false,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_probe_rtl8168_bar2_window_quirk(VFIODevice *vdev, int nr)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    VFIOQuirk *quirk;
+
+    if (pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_REALTEK ||
+        pci_get_word(pdev->config + PCI_DEVICE_ID) != 0x8168 || nr != 2) {
+        return;
+    }
+
+    quirk = g_malloc0(sizeof(*quirk));
+    quirk->vdev = vdev;
+    quirk->data.bar = nr;
+
+    memory_region_init_io(&quirk->mem, OBJECT(vdev), &vfio_rtl8168_window_quirk,
+                          quirk, "vfio-rtl8168-window-quirk", 8);
+    memory_region_add_subregion_overlap(&vdev->bars[nr].mem,
+                                        0x70, &quirk->mem, 1);
+
+    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+
+    DPRINTF("Enabled RTL8168 BAR2 window quirk for device %04x:%02x:%02x.%x\n",
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function);
+}
 /*
  * Trap the BAR2 MMIO window to config space as well.
  */
@@ -2071,6 +2215,7 @@ static void vfio_bar_quirk_setup(VFIODevice *vdev, int nr)
     vfio_probe_nvidia_bar5_window_quirk(vdev, nr);
     vfio_probe_nvidia_bar0_88000_quirk(vdev, nr);
     vfio_probe_nvidia_bar0_1800_quirk(vdev, nr);
+    vfio_probe_rtl8168_bar2_window_quirk(vdev, nr);
 }
 
 static void vfio_bar_quirk_teardown(VFIODevice *vdev, int nr)

             reply	other threads:[~2014-05-10 23:03 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-10 23:03 Alex Williamson [this message]
2014-05-10 23:03 ` [Qemu-devel] " Alex Williamson
2014-05-12 20:02 ` Francois Romieu
2014-05-12 20:02   ` [Qemu-devel] " Francois Romieu
2014-05-12 20:28   ` Alex Williamson
2014-05-12 20:28     ` [Qemu-devel] " Alex Williamson
2014-05-13  3:06     ` Alex Williamson
2014-05-13 22:22       ` Francois Romieu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140510230225.17688.6353.stgit@bling.home \
    --to=alex.williamson@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=qemu-devel@nongnu.org \
    --subject='Re: [PATCH] vfio-pci: Quirk RTL8168 NIC' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.