All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2
@ 2016-03-21 10:08 Cao jin
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 01/10] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
                   ` (9 more replies)
  0 siblings, 10 replies; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

v3-v4:
   1. rebase patchset to fit latest master branch.
   2. modifying patches 5/10 ~ 8/10 as Alex sugguestion(Thanks).

v2-v3:
   1. fix patch 4/9, 5/9 as Alex sugguestion.
   2. patches 5/9 ~ 8/9 are made to force limiting that all vfio functions
      are combined in the same way as on the host.

v1-v2:
   1. limit all devices on same bus in guest are on same bus in host in patch 5/11.
   2. patch 05/11 ~ 09/11 has been changed.

Chen Fan (10):
  vfio: extract vfio_get_hot_reset_info as a single function
  vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
  vfio: add pcie extended capability support
  vfio: add aer support for vfio device
  vfio: extending function vfio_pci_host_match to support mask func
    number
  vfio: add check host bus reset is support or not
  vfio: add check aer functionality for hotplug device
  vfio: vote the function 0 to do host bus reset when aer occurred
  vfio-pci: pass the aer error to guest
  vfio: add 'aer' property to expose aercap

 hw/vfio/pci.c | 675 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
 hw/vfio/pci.h |   6 +
 2 files changed, 616 insertions(+), 65 deletions(-)

-- 
1.9.3

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH v4 01/10] vfio: extract vfio_get_hot_reset_info as a single function
  2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
@ 2016-03-21 10:08 ` Cao jin
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 02/10] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

the function is used to get affected devices by bus reset.
so here extract it, and can used for aer soon.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 66 +++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 18 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d091d8c..cf40f9e 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1701,6 +1701,51 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
     }
 }
 
+/*
+ * return negative with errno, return 0 on success.
+ * if success, the point of ret_info fill with the affected device reset info.
+ *
+ */
+static int vfio_get_hot_reset_info(VFIOPCIDevice *vdev,
+                                   struct vfio_pci_hot_reset_info **ret_info)
+{
+    struct vfio_pci_hot_reset_info *info;
+    int ret, count;
+
+    *ret_info = NULL;
+
+    info = g_malloc0(sizeof(*info));
+    info->argsz = sizeof(*info);
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+    if (ret && errno != ENOSPC) {
+        ret = -errno;
+        goto error;
+    }
+
+    count = info->count;
+
+    info = g_realloc(info, sizeof(*info) +
+                     (count * sizeof(struct vfio_pci_dependent_device)));
+    info->argsz = sizeof(*info) +
+                  (count * sizeof(struct vfio_pci_dependent_device));
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+    if (ret) {
+        ret = -errno;
+        error_report("vfio: hot reset info failed: %m");
+        goto error;
+    }
+
+    *ret_info = info;
+    info = NULL;
+
+    return 0;
+error:
+    g_free(info);
+    return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1842,7 +1887,7 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
     VFIOGroup *group;
-    struct vfio_pci_hot_reset_info *info;
+    struct vfio_pci_hot_reset_info *info = NULL;
     struct vfio_pci_dependent_device *devices;
     struct vfio_pci_hot_reset *reset;
     int32_t *fds;
@@ -1854,12 +1899,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     vfio_pci_pre_reset(vdev);
     vdev->vbasedev.needs_reset = false;
 
-    info = g_malloc0(sizeof(*info));
-    info->argsz = sizeof(*info);
-
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-    if (ret && errno != ENOSPC) {
-        ret = -errno;
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
         if (!vdev->has_pm_reset) {
             error_report("vfio: Cannot reset device %s, "
                          "no available reset mechanism.", vdev->vbasedev.name);
@@ -1867,18 +1908,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         goto out_single;
     }
 
-    count = info->count;
-    info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
-    info->argsz = sizeof(*info) + (count * sizeof(*devices));
     devices = &info->devices[0];
-
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-    if (ret) {
-        ret = -errno;
-        error_report("vfio: hot reset info failed: %m");
-        goto out_single;
-    }
-
     trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
 
     /* Verify that we have all the groups required */
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH v4 02/10] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
  2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 01/10] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
@ 2016-03-21 10:08 ` Cao jin
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 03/10] vfio: add pcie extended capability support Cao jin
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

squeeze out vfio_pci_do_hot_reset to do host bus reset when AER recovery.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 75 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index cf40f9e..1ad47ef 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1746,6 +1746,48 @@ error:
     return ret;
 }
 
+static int vfio_pci_do_hot_reset(VFIOPCIDevice *vdev,
+                                 struct vfio_pci_hot_reset_info *info)
+{
+    VFIOGroup *group;
+    struct vfio_pci_hot_reset *reset;
+    int32_t *fds;
+    int ret, i, count;
+    struct vfio_pci_dependent_device *devices;
+
+    /* Determine how many group fds need to be passed */
+    count = 0;
+    devices = &info->devices[0];
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        for (i = 0; i < info->count; i++) {
+            if (group->groupid == devices[i].group_id) {
+                count++;
+                break;
+            }
+        }
+    }
+
+    reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+    reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+    fds = &reset->group_fds[0];
+
+    /* Fill in group fds */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        for (i = 0; i < info->count; i++) {
+            if (group->groupid == devices[i].group_id) {
+                fds[reset->count++] = group->fd;
+                break;
+            }
+        }
+    }
+
+    /* Bus reset! */
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+    g_free(reset);
+
+    return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1889,9 +1931,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     VFIOGroup *group;
     struct vfio_pci_hot_reset_info *info = NULL;
     struct vfio_pci_dependent_device *devices;
-    struct vfio_pci_hot_reset *reset;
-    int32_t *fds;
-    int ret, i, count;
+    int ret, i;
     bool multi = false;
 
     trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
@@ -1969,34 +2009,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         goto out_single;
     }
 
-    /* Determine how many group fds need to be passed */
-    count = 0;
-    QLIST_FOREACH(group, &vfio_group_list, next) {
-        for (i = 0; i < info->count; i++) {
-            if (group->groupid == devices[i].group_id) {
-                count++;
-                break;
-            }
-        }
-    }
-
-    reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
-    reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
-    fds = &reset->group_fds[0];
-
-    /* Fill in group fds */
-    QLIST_FOREACH(group, &vfio_group_list, next) {
-        for (i = 0; i < info->count; i++) {
-            if (group->groupid == devices[i].group_id) {
-                fds[reset->count++] = group->fd;
-                break;
-            }
-        }
-    }
-
-    /* Bus reset! */
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
-    g_free(reset);
+    ret = vfio_pci_do_hot_reset(vdev, info);
 
     trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
                                     ret ? "%m" : "Success");
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH v4 03/10] vfio: add pcie extended capability support
  2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 01/10] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 02/10] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
@ 2016-03-21 10:08 ` Cao jin
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 04/10] vfio: add aer support for vfio device Cao jin
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

For vfio pcie device, we could expose the extended capability on
PCIE bus. due to add a new pcie capability at the tail of the chain,
in order to avoid config space overwritten, we introduce a copy config
for parsing extended caps. and rebuild the pcie extended config space.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 1ad47ef..ff14af0 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1528,6 +1528,21 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
     return next - pos;
 }
 
+
+static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos)
+{
+    uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE;
+
+    for (tmp = PCI_CONFIG_SPACE_SIZE; tmp;
+        tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) {
+        if (tmp > pos && tmp < next) {
+            next = tmp;
+        }
+    }
+
+    return next - pos;
+}
+
 static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
 {
     pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
@@ -1862,16 +1877,71 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint32_t header;
+    uint16_t cap_id, next, size;
+    uint8_t cap_ver;
+    uint8_t *config;
+
+    /*
+     * pcie_add_capability always inserts the new capability at the tail
+     * of the chain.  Therefore to end up with a chain that matches the
+     * physical device, we cache the config space to avoid overwriting
+     * the original config space when we parse the extended capabilities.
+     */
+    config = g_memdup(pdev->config, vdev->config_size);
+
+    for (next = PCI_CONFIG_SPACE_SIZE; next;
+         next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
+        header = pci_get_long(config + next);
+        cap_id = PCI_EXT_CAP_ID(header);
+        cap_ver = PCI_EXT_CAP_VER(header);
+
+        /*
+         * If it becomes important to configure extended capabilities to their
+         * actual size, use this as the default when it's something we don't
+         * recognize. Since QEMU doesn't actually handle many of the config
+         * accesses, exact size doesn't seem worthwhile.
+         */
+        size = vfio_ext_cap_max_size(config, next);
+
+        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+        pci_set_long(dev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+
+        /* Use emulated next pointer to allow dropping extended caps */
+        pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
+                                   PCI_EXT_CAP_NEXT_MASK);
+    }
+
+    g_free(config);
+    return 0;
+}
+
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
+    int ret;
 
     if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
         !pdev->config[PCI_CAPABILITY_LIST]) {
         return 0; /* Nothing to add */
     }
 
-    return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    if (ret) {
+        return ret;
+    }
+
+    /* on PCI bus, it doesn't make sense to expose extended capabilities. */
+    if (!pci_is_express(pdev) ||
+        !pci_bus_is_express(pdev->bus) ||
+        !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
+        return 0;
+    }
+
+    return vfio_add_ext_cap(vdev);
 }
 
 static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH v4 04/10] vfio: add aer support for vfio device
  2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
                   ` (2 preceding siblings ...)
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 03/10] vfio: add pcie extended capability support Cao jin
@ 2016-03-21 10:08 ` Cao jin
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 05/10] vfio: extending function vfio_pci_host_match to support mask func number Cao jin
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Calling pcie_aer_init to initilize aer related registers for
vfio device, then reload physical related registers to expose
device capability.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 hw/vfio/pci.h |  3 +++
 2 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index ff14af0..0516d94 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1877,6 +1877,66 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
+                          int pos, uint16_t size)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    PCIDevice *dev_iter;
+    uint8_t type;
+    uint32_t errcap;
+
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+        pcie_add_capability(pdev, PCI_EXT_CAP_ID_ERR,
+                            cap_ver, pos, size);
+        return 0;
+    }
+
+    dev_iter = pci_bridge_get_device(pdev->bus);
+    if (!dev_iter) {
+        goto error;
+    }
+
+    while (dev_iter) {
+        if (!pci_is_express(dev_iter)) {
+            goto error;
+        }
+
+        type = pcie_cap_get_type(dev_iter);
+        if ((type != PCI_EXP_TYPE_ROOT_PORT &&
+             type != PCI_EXP_TYPE_UPSTREAM &&
+             type != PCI_EXP_TYPE_DOWNSTREAM)) {
+            goto error;
+        }
+
+        if (!dev_iter->exp.aer_cap) {
+            goto error;
+        }
+
+        dev_iter = pci_bridge_get_device(dev_iter->bus);
+    }
+
+    errcap = vfio_pci_read_config(pdev, pos + PCI_ERR_CAP, 4);
+    /*
+     * The ability to record multiple headers is depending on
+     * the state of the Multiple Header Recording Capable bit and
+     * enabled by the Multiple Header Recording Enable bit.
+     */
+    if ((errcap & PCI_ERR_CAP_MHRC) &&
+        (errcap & PCI_ERR_CAP_MHRE)) {
+        pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+    } else {
+        pdev->exp.aer_log.log_max = 0;
+    }
+
+    pcie_cap_deverr_init(pdev);
+    return pcie_aer_init(pdev, pos, size);
+
+error:
+    error_report("vfio: Unable to enable AER for device %s, parent bus "
+                 "does not support AER signaling", vdev->vbasedev.name);
+    return -1;
+}
+
 static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1884,6 +1944,7 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
     uint16_t cap_id, next, size;
     uint8_t cap_ver;
     uint8_t *config;
+    int ret = 0;
 
     /*
      * pcie_add_capability always inserts the new capability at the tail
@@ -1907,16 +1968,29 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
          */
         size = vfio_ext_cap_max_size(config, next);
 
-        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
-        pci_set_long(dev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+        switch (cap_id) {
+        case PCI_EXT_CAP_ID_ERR:
+            ret = vfio_setup_aer(vdev, cap_ver, next, size);
+            break;
+        default:
+            pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+            break;
+        }
+
+        if (ret) {
+            goto out;
+        }
+
+        pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
 
         /* Use emulated next pointer to allow dropping extended caps */
         pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
                                    PCI_EXT_CAP_NEXT_MASK);
     }
 
+out:
     g_free(config);
-    return 0;
+    return ret;
 }
 
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
@@ -2673,6 +2747,11 @@ static int vfio_initfn(PCIDevice *pdev)
         goto out_teardown;
     }
 
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        !pdev->exp.aer_cap) {
+        goto out_teardown;
+    }
+
     /* QEMU emulates all of MSI & MSIX */
     if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
         memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 3976f68..7b3924e 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"
 #include "qemu/queue.h"
@@ -128,6 +129,8 @@ typedef struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
 #define VFIO_FEATURE_ENABLE_REQ_BIT 1
 #define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
+#define VFIO_FEATURE_ENABLE_AER_BIT 2
+#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
     int32_t bootindex;
     uint8_t pm_cap;
     bool has_vga;
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH v4 05/10] vfio: extending function vfio_pci_host_match to support mask func number
  2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
                   ` (3 preceding siblings ...)
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 04/10] vfio: add aer support for vfio device Cao jin
@ 2016-03-21 10:08 ` Cao jin
  2016-03-21 21:39   ` Alex Williamson
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 06/10] vfio: add check host bus reset is support or not Cao jin
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0516d94..8842b7f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2060,14 +2060,25 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
     vfio_intx_enable(vdev);
 }
 
-static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
+#define HOST_CMP_FUNC_MASK       (1 << 0)
+static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name,
+                                uint8_t mask)
 {
-    char tmp[13];
+    PCIHostDeviceAddress tmp;
 
-    sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain,
-            addr->bus, addr->slot, addr->function);
+    if (strlen(name) != 12) {
+        return false;
+    }
+
+    if (sscanf(name, "%04x:%02x:%02x.%1x", &tmp.domain,
+               &tmp.bus, &tmp.slot, &tmp.function) != 4) {
+        return false;
+    }
 
-    return (strcmp(tmp, name) == 0);
+    return (tmp.domain == addr->domain && tmp.bus == addr->bus &&
+            tmp.slot == addr->slot &&
+            ((mask & HOST_CMP_FUNC_MASK) ?
+                1 : (tmp.function == addr->function)));
 }
 
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
@@ -2109,7 +2120,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         trace_vfio_pci_hot_reset_dep_devices(host.domain,
                 host.bus, host.slot, host.function, devices[i].group_id);
 
-        if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
+        if (vfio_pci_host_match(&host, vdev->vbasedev.name, 0)) {
             continue;
         }
 
@@ -2135,7 +2146,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
                 continue;
             }
             tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
-            if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
+            if (vfio_pci_host_match(&host, tmp->vbasedev.name, 0)) {
                 if (single) {
                     ret = -EINVAL;
                     goto out_single;
@@ -2170,7 +2181,7 @@ out:
         host.slot = PCI_SLOT(devices[i].devfn);
         host.function = PCI_FUNC(devices[i].devfn);
 
-        if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
+        if (vfio_pci_host_match(&host, vdev->vbasedev.name, 0)) {
             continue;
         }
 
@@ -2189,7 +2200,7 @@ out:
                 continue;
             }
             tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
-            if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
+            if (vfio_pci_host_match(&host, tmp->vbasedev.name, 0)) {
                 vfio_pci_post_reset(tmp);
                 break;
             }
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH v4 06/10] vfio: add check host bus reset is support or not
  2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
                   ` (4 preceding siblings ...)
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 05/10] vfio: extending function vfio_pci_host_match to support mask func number Cao jin
@ 2016-03-21 10:08 ` Cao jin
  2016-03-21 21:40   ` Alex Williamson
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 07/10] vfio: add check aer functionality for hotplug device Cao jin
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

when boot up a VM that assigning vfio devices with aer enabled, we
must check the vfio device whether support host bus reset. because
when one error occur. OS driver always recover the device by do a
bus reset, in order to recover the vfio device, qemu must able to do
a host bus reset to recover the device to default status. and for all
affected devices by the bus reset. we must check them whether all
are assigned to the VM and on the same virtual bus. meanwhile, for
simply done, the devices which don't affected by the host bus reset
are not allowed to assign to the same virtual bus.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/vfio/pci.h |   1 +
 2 files changed, 205 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8842b7f..dce3b6d 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -38,6 +38,10 @@
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
 
+#define HOST_CMP_FUNC_MASK       (1 << 0)
+static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name,
+                                uint8_t mask);
+
 /*
  * Disabling BAR mmaping can be slow, but toggling it around INTx can
  * also be a huge overhead.  We try to get the best of both worlds by
@@ -1877,6 +1881,185 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
+{
+    PCIBus *bus = vdev->pdev.bus;
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    VFIOGroup *group;
+    int ret, i, devfn;
+
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
+        error_setg(errp, "vfio: Cannot enable AER for device %s,"
+                   " device does not support hot reset.",
+                   vdev->vbasedev.name);
+        return;
+    }
+
+    /* List all affected devices by bus reset */
+    devices = &info->devices[0];
+
+    /* Verify that we have all the groups required */
+    for (i = 0; i < info->count; i++) {
+        PCIHostDeviceAddress host;
+        VFIOPCIDevice *tmp;
+        VFIODevice *vbasedev_iter;
+        bool found = false;
+
+        host.domain = devices[i].segment;
+        host.bus = devices[i].bus;
+        host.slot = PCI_SLOT(devices[i].devfn);
+        host.function = PCI_FUNC(devices[i].devfn);
+
+        /* Skip the current device */
+        if (vfio_pci_host_match(&host, vdev->vbasedev.name, 0)) {
+            continue;
+        }
+
+        /* Ensure we own the group of the affected device */
+        QLIST_FOREACH(group, &vfio_group_list, next) {
+            if (group->groupid == devices[i].group_id) {
+                break;
+            }
+        }
+
+        if (!group) {
+            error_setg(errp, "vfio: Cannot enable AER for device %s, "
+                       "depends on group %d which is not owned.",
+                       vdev->vbasedev.name, devices[i].group_id);
+            goto out;
+        }
+
+        /* Ensure affected devices for reset on the same bus */
+        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+            if (vfio_pci_host_match(&host, tmp->vbasedev.name, 0)) {
+                /*
+                 * AER errors may be broadcast to all functions of a multi-
+                 * function endpoint.  If any of those sibling functions are
+                 * also assigned, they need to have AER enabled or else an
+                 * error may continue to cause a vm_stop condition.  IOW,
+                 * AER setup of this function would be pointless.
+                 */
+                if (vfio_pci_host_match(&host, vdev->vbasedev.name,
+                                        HOST_CMP_FUNC_MASK) &&
+                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
+                    error_setg(errp, "vfio: Cannot enable AER for device %s, on same slot"
+                               " the dependent device %s which does not enable AER.",
+                               vdev->vbasedev.name, tmp->vbasedev.name);
+                    goto out;
+                }
+
+                if (tmp->pdev.bus != bus) {
+                    error_setg(errp, "vfio: Cannot enable AER for device %s, "
+                               "the dependent device %s is not on the same bus",
+                               vdev->vbasedev.name, tmp->vbasedev.name);
+                    goto out;
+                }
+                found = true;
+                break;
+            }
+        }
+
+        /* Ensure all affected devices assigned to VM */
+        if (!found) {
+            error_setg(errp, "vfio: Cannot enable AER for device %s, "
+                       "the dependent device %04x:%02x:%02x.%x "
+                       "is not assigned to VM.",
+                       vdev->vbasedev.name, host.domain, host.bus,
+                       host.slot, host.function);
+            goto out;
+        }
+    }
+
+    /*
+     * To make things simply, functions must are combined
+     * in the same way as on the host. no other irrelative
+     * functions on the virtual same bus was required.
+     */
+    for (devfn = 0; devfn < 8; devfn++) {
+        VFIOPCIDevice *tmp;
+        PCIDevice *dev;
+        bool found = false;
+
+        dev = pci_find_device(bus, pci_bus_num(bus),
+                  PCI_DEVFN(PCI_SLOT(vdev->pdev.devfn), devfn));
+
+        if (!dev) {
+            continue;
+        }
+
+        if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+            error_setg(errp, "vfio: Cannot enable AER for device %s, "
+                       "only support all dependent devices on the same bus, "
+                       "%s is not one of the dependent devices.",
+                       vdev->vbasedev.name, dev->name);
+            goto out;
+        }
+
+        tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
+        for (i = 0; i < info->count; i++) {
+            PCIHostDeviceAddress host;
+
+            host.domain = devices[i].segment;
+            host.bus = devices[i].bus;
+            host.slot = PCI_SLOT(devices[i].devfn);
+            host.function = PCI_FUNC(devices[i].devfn);
+
+            if (vfio_pci_host_match(&host, tmp->vbasedev.name, 0)) {
+                found = true;
+                break;
+            }
+        }
+
+        if (!found) {
+            error_setg(errp, "vfio: Cannot enable AER for device %s, "
+                       "only support all dependent devices on the same bus, "
+                       "%s is not one of the dependent devices.",
+                       vdev->vbasedev.name, tmp->vbasedev.name);
+            goto out;
+        }
+    }
+
+out:
+    g_free(info);
+    return;
+}
+
+static void vfio_aer_check_host_bus_reset(Error **errp)
+{
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+    VFIOPCIDevice *vdev;
+    Error *local_err = NULL;
+
+    /* Check All vfio-pci devices if have bus reset capability */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+            if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+                if (!pci_get_function_0(&vdev->pdev)) {
+                    continue;
+                }
+                vfio_check_hot_bus_reset(vdev, &local_err);
+                if (local_err) {
+                    error_propagate(errp, local_err);
+                    return;
+                }
+            }
+        }
+    }
+
+    return;
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -2060,7 +2243,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
     vfio_intx_enable(vdev);
 }
 
-#define HOST_CMP_FUNC_MASK       (1 << 0)
 static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name,
                                 uint8_t mask)
 {
@@ -2587,6 +2769,22 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
+{
+    Error *local_err = NULL;
+
+    vfio_aer_check_host_bus_reset(&local_err);
+    if (local_err) {
+        fprintf(stderr, "%s\n", error_get_pretty(local_err));
+        error_free(local_err);
+        exit(1);
+    }
+}
+
+static Notifier machine_notifier = {
+    .notify = vfio_pci_machine_done_notify,
+};
+
 static int vfio_initfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@@ -2932,6 +3130,11 @@ static const TypeInfo vfio_pci_dev_info = {
 static void register_vfio_pci_dev_type(void)
 {
     type_register_static(&vfio_pci_dev_info);
+    /*
+     * Register notifier when machine init is done, since we need
+     * check the configration manner after all vfio device are inited.
+     */
+    qemu_add_machine_init_done_notifier(&machine_notifier);
 }
 
 type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 7b3924e..db7c6d5 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH v4 07/10] vfio: add check aer functionality for hotplug device
  2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
                   ` (5 preceding siblings ...)
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 06/10] vfio: add check host bus reset is support or not Cao jin
@ 2016-03-21 10:08 ` Cao jin
  2016-03-21 21:40   ` Alex Williamson
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 08/10] vfio: vote the function 0 to do host bus reset when aer occurred Cao jin
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

because we make the vfio functions are combined
in the same way as on the host for aer, so we can
do the aer check when the function 0 was hotplugged.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index dce3b6d..9902c87 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2030,6 +2030,35 @@ out:
     return;
 }
 
+static void vfio_bus_check_aer_functions(PCIDevice *pdev, Error **errp)
+{
+    VFIOPCIDevice *vdev;
+    PCIDevice *dev;
+    Error *local_err = NULL;
+    int devfn;
+
+    for (devfn = 0; devfn < 8; devfn++) {
+        dev = pci_find_device(pdev->bus, pci_bus_num(pdev->bus),
+                  PCI_DEVFN(PCI_SLOT(pdev->devfn), devfn));
+        if (!dev) {
+            continue;
+        }
+        if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+            continue;
+        }
+        vdev = DO_UPCAST(VFIOPCIDevice, pdev, dev);
+        if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+            vfio_check_hot_bus_reset(vdev, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            }
+        }
+    }
+
+    return;
+}
+
 static void vfio_aer_check_host_bus_reset(Error **errp)
 {
     VFIOGroup *group;
@@ -2982,6 +3011,22 @@ static int vfio_initfn(PCIDevice *pdev)
         }
     }
 
+    /*
+     *  If this function is func 0, indicate the closure of the slot.
+     *  we get the chance to check aer-enabled devices whether support
+     *  hot bus reset.
+     */
+    if (DEVICE(pdev)->hotplugged &&
+        pdev == pci_get_function_0(pdev)) {
+        Error *local_err = NULL;
+
+        vfio_bus_check_aer_functions(pdev, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            goto out_teardown;
+        }
+    }
+
     vfio_register_err_notifier(vdev);
     vfio_register_req_notifier(vdev);
     vfio_setup_resetfn_quirk(vdev);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH v4 08/10] vfio: vote the function 0 to do host bus reset when aer occurred
  2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
                   ` (6 preceding siblings ...)
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 07/10] vfio: add check aer functionality for hotplug device Cao jin
@ 2016-03-21 10:08 ` Cao jin
  2016-03-21 21:40   ` Alex Williamson
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 09/10] vfio-pci: pass the aer error to guest Cao jin
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 10/10] vfio: add 'aer' property to expose aercap Cao jin
  9 siblings, 1 reply; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Due to all devices assigned to VM on the same way as host if enable
aer, so we can easily do the hot reset by selecting the function #0
to do the hot reset.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h |  2 ++
 2 files changed, 52 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 9902c87..718cde7 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1900,6 +1900,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
     /* List all affected devices by bus reset */
     devices = &info->devices[0];
 
+    vdev->single_depend_dev = (info->count == 1);
+
     /* Verify that we have all the groups required */
     for (i = 0; i < info->count; i++) {
         PCIHostDeviceAddress host;
@@ -2608,11 +2610,36 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
 static void vfio_err_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
+    PCIDevice *pdev = &vdev->pdev;
 
     if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
         return;
     }
 
+    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+        VFIOPCIDevice *tmp;
+        PCIDevice *dev;
+        int devfn;
+
+        /*
+         * If one device has aer capability on a bus, when aer occurred,
+         * we should notify all devices on the bus there was an aer arrived,
+         * then we are able to vote the device #0 to do host bus reset.
+         */
+        for (devfn = 0; devfn < 8; devfn++) {
+            dev = pci_find_device(pdev->bus, pci_bus_num(pdev->bus),
+                      PCI_DEVFN(PCI_SLOT(pdev->devfn), devfn));
+            if (!dev) {
+                continue;
+            }
+            if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+                continue;
+            }
+            tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
+            tmp->aer_occurred = true;
+        }
+    }
+
     /*
      * TBD. Retrieve the error details and decide what action
      * needs to be taken. One of the actions could be to pass
@@ -3075,6 +3102,29 @@ static void vfio_pci_reset(DeviceState *dev)
 
     trace_vfio_pci_reset(vdev->vbasedev.name);
 
+    if (vdev->aer_occurred) {
+        PCIDevice *br = pci_bridge_get_device(pdev->bus);
+
+        if (br &&
+            (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
+             PCI_BRIDGE_CTL_BUS_RESET)) {
+            /* simply voting the function 0 to do hot bus reset */
+            if (pci_get_function_0(pdev) == pdev) {
+                if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+                    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+                } else {
+                    /* if this device has not AER capability, code
+                     * coming here indicates there is another function
+                     * on the bus has AER capability.
+                     * */
+                    vfio_pci_hot_reset(vdev, false);
+                }
+            }
+            vdev->aer_occurred = false;
+            return;
+        }
+    }
+
     vfio_pci_pre_reset(vdev);
 
     if (vdev->resetfn && !vdev->resetfn(vdev)) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index db7c6d5..17c75b8 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -143,6 +143,8 @@ typedef struct VFIOPCIDevice {
     bool no_kvm_intx;
     bool no_kvm_msi;
     bool no_kvm_msix;
+    bool aer_occurred;
+    bool single_depend_dev;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH v4 09/10] vfio-pci: pass the aer error to guest
  2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
                   ` (7 preceding siblings ...)
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 08/10] vfio: vote the function 0 to do host bus reset when aer occurred Cao jin
@ 2016-03-21 10:08 ` Cao jin
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 10/10] vfio: add 'aer' property to expose aercap Cao jin
  9 siblings, 0 replies; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

when the vfio device encounters an uncorrectable error in host,
the vfio_pci driver will signal the eventfd registered by this
vfio device, resulting in the qemu eventfd handler getting
invoked.

this patch is to pass the error to guest and let the guest driver
recover from the error.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 48 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 718cde7..d2f47ee 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2611,6 +2611,11 @@ static void vfio_err_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
     PCIDevice *pdev = &vdev->pdev;
+    Error *local_err = NULL;
+    PCIEAERMsg msg = {
+        .severity = 0,
+        .source_id = (pci_bus_num(pdev->bus) << 8) | pdev->devfn,
+    };
 
     if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
         return;
@@ -2622,6 +2627,16 @@ static void vfio_err_notifier_handler(void *opaque)
         int devfn;
 
         /*
+         * in case the real hardware configuration has been changed,
+         * here we should recheck the bus reset capability.
+         */
+        vfio_check_hot_bus_reset(vdev, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            goto stop;
+        }
+
+        /*
          * If one device has aer capability on a bus, when aer occurred,
          * we should notify all devices on the bus there was an aer arrived,
          * then we are able to vote the device #0 to do host bus reset.
@@ -2638,15 +2653,42 @@ static void vfio_err_notifier_handler(void *opaque)
             tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
             tmp->aer_occurred = true;
         }
+
+        /*
+         * we should read the error details from the real hardware
+         * configuration spaces, here we only need to do is signaling
+         * to guest an uncorrectable error has occurred.
+         */
+        if (pdev->exp.aer_cap) {
+            uint8_t *aer_cap = pdev->config + pdev->exp.aer_cap;
+            uint32_t uncor_status;
+            bool isfatal;
+
+            uncor_status = vfio_pci_read_config(pdev,
+                               pdev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+            /*
+             * if the error is not emitted by this device, we can
+             * just ignore it.
+             */
+            if (!(uncor_status & ~0UL)) {
+                return;
+            }
+
+            isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+            msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+                                     PCI_ERR_ROOT_CMD_NONFATAL_EN;
+
+            pcie_aer_msg(pdev, &msg);
+            return;
+        }
     }
 
+stop:
     /*
-     * TBD. Retrieve the error details and decide what action
-     * needs to be taken. One of the actions could be to pass
-     * the error to the guest and have the guest driver recover
-     * from the error. This requires that PCIe capabilities be
-     * exposed to the guest. For now, we just terminate the
-     * guest to contain the error.
+     * If the aer capability is not exposed to the guest. we just
+     * terminate the guest to contain the error.
      */
 
     error_report("%s(%s) Unrecoverable error detected. Please collect any data possible and then kill the guest", __func__, vdev->vbasedev.name);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Qemu-devel] [PATCH v4 10/10] vfio: add 'aer' property to expose aercap
  2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
                   ` (8 preceding siblings ...)
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 09/10] vfio-pci: pass the aer error to guest Cao jin
@ 2016-03-21 10:08 ` Cao jin
  9 siblings, 0 replies; 21+ messages in thread
From: Cao jin @ 2016-03-21 10:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, izumi.taku, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

add 'aer' property to let user able to decide whether expose
the aer capability. by default we should disable aer feature,
because it needs configuration restrictions.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d2f47ee..aa40870 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3225,6 +3225,8 @@ static Property vfio_pci_dev_properties[] = {
                        sub_vendor_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
                        sub_device_id, PCI_ANY_ID),
+    DEFINE_PROP_BIT("aer", VFIOPCIDevice, features,
+                    VFIO_FEATURE_ENABLE_AER_BIT, false),
     /*
      * TODO - support passed fds... is this necessary?
      * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v4 05/10] vfio: extending function vfio_pci_host_match to support mask func number
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 05/10] vfio: extending function vfio_pci_host_match to support mask func number Cao jin
@ 2016-03-21 21:39   ` Alex Williamson
  0 siblings, 0 replies; 21+ messages in thread
From: Alex Williamson @ 2016-03-21 21:39 UTC (permalink / raw)
  To: Cao jin; +Cc: chen.fan.fnst, izumi.taku, qemu-devel, mst

On Mon, 21 Mar 2016 18:08:41 +0800
Cao jin <caoj.fnst@cn.fujitsu.com> wrote:

> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 29 ++++++++++++++++++++---------
>  1 file changed, 20 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 0516d94..8842b7f 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2060,14 +2060,25 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>      vfio_intx_enable(vdev);
>  }
>  
> -static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
> +#define HOST_CMP_FUNC_MASK       (1 << 0)
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name,
> +                                uint8_t mask)
>  {
> -    char tmp[13];
> +    PCIHostDeviceAddress tmp;
>  
> -    sprintf(tmp, "%04x:%02x:%02x.%1x", addr->domain,
> -            addr->bus, addr->slot, addr->function);
> +    if (strlen(name) != 12) {
> +        return false;
> +    }
> +
> +    if (sscanf(name, "%04x:%02x:%02x.%1x", &tmp.domain,
> +               &tmp.bus, &tmp.slot, &tmp.function) != 4) {
> +        return false;
> +    }
>  
> -    return (strcmp(tmp, name) == 0);
> +    return (tmp.domain == addr->domain && tmp.bus == addr->bus &&
> +            tmp.slot == addr->slot &&
> +            ((mask & HOST_CMP_FUNC_MASK) ?
> +                1 : (tmp.function == addr->function)));
>  }

I'd probably go for something like:

static int vfio_pci_name_to_addr(const char *name, PCIHostDeviceAddress *addr)
{
    if (strlen(name) != 12 ||
        sscanf(name, "%04x:%02x:%02x.%1x", &addr->domain,
               &addr->bus, &addr->slot, &addr->function) != 4 ) {
        return -EINVAL;
    }

    return 0;
}

static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
{
    PCIHostDeviceAddress tmp;

    if (vfio_pci_name_to_addr(name, &tmp)) {
        return false;
    }

    return (tmp.domain == addr->domain && tmp.bus == addr->bus &&
            tmp.slot == addr->slot && tmp.function == addr->function);
}

Then a _slot version that avoids skips the function comparison.  The
mask argument doesn't make much sense for such a simple function when
the code duplication is trivial.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v4 06/10] vfio: add check host bus reset is support or not
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 06/10] vfio: add check host bus reset is support or not Cao jin
@ 2016-03-21 21:40   ` Alex Williamson
  2016-03-22 10:02     ` Chen Fan
  0 siblings, 1 reply; 21+ messages in thread
From: Alex Williamson @ 2016-03-21 21:40 UTC (permalink / raw)
  To: Cao jin; +Cc: chen.fan.fnst, izumi.taku, qemu-devel, mst

On Mon, 21 Mar 2016 18:08:42 +0800
Cao jin <caoj.fnst@cn.fujitsu.com> wrote:

> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> when boot up a VM that assigning vfio devices with aer enabled, we
> must check the vfio device whether support host bus reset. because
> when one error occur. OS driver always recover the device by do a
> bus reset, in order to recover the vfio device, qemu must able to do
> a host bus reset to recover the device to default status. and for all
> affected devices by the bus reset. we must check them whether all
> are assigned to the VM and on the same virtual bus. meanwhile, for
> simply done, the devices which don't affected by the host bus reset
> are not allowed to assign to the same virtual bus.

Rewording/expansion suggestion:

  When assigning a vfio device with AER enabled, we must check whether
  the device supports a host bus reset (ie. hot reset) as this may be
  used by the guest OS in order to recover the device from an AER
  error.  QEMU must therefore have the ability to perform a physical
  host bus reset using the existing vfio APIs in response to a virtual
  bus reset in the VM.  A physical bus reset affects all of the devices
  on the host bus, therefore we place a few simplifying configuration
  restriction on the VM:

   - All physical devices affected by a bus reset must be assigned to
     the VM with AER enabled on each and be configured on the same
     virtual bus in the VM.

   - No devices unaffected by the bus reset, be they physical, emulated,
     or paravirtual may be configured on the same virtual bus as a
     device supporting AER signaling through vfio.

  In other words users wishing to enable AER on a multifunction device
  need to assign all functions of the device to the same virtual bus
  and enable AER support for each device.  The easiest way to
  accomplish this is to identity map the physical functions to virtual
  functions with multifunction enabled on the virtual device.

> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/vfio/pci.h |   1 +
>  2 files changed, 205 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 8842b7f..dce3b6d 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -38,6 +38,10 @@
>  static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>  static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
>  
> +#define HOST_CMP_FUNC_MASK       (1 << 0)
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name,
> +                                uint8_t mask);
> +

This would of course go back to a _slot version of the function as
you've had in previous versions.

>  /*
>   * Disabling BAR mmaping can be slow, but toggling it around INTx can
>   * also be a huge overhead.  We try to get the best of both worlds by
> @@ -1877,6 +1881,185 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> +{
> +    PCIBus *bus = vdev->pdev.bus;
> +    struct vfio_pci_hot_reset_info *info = NULL;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +    int ret, i, devfn;
> +
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret) {
> +        error_setg(errp, "vfio: Cannot enable AER for device %s,"
> +                   " device does not support hot reset.",
> +                   vdev->vbasedev.name);
> +        return;
> +    }
> +
> +    /* List all affected devices by bus reset */
> +    devices = &info->devices[0];
> +
> +    /* Verify that we have all the groups required */
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +        VFIOPCIDevice *tmp;
> +        VFIODevice *vbasedev_iter;
> +        bool found = false;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        /* Skip the current device */
> +        if (vfio_pci_host_match(&host, vdev->vbasedev.name, 0)) {
> +            continue;
> +        }
> +
> +        /* Ensure we own the group of the affected device */
> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> +            if (group->groupid == devices[i].group_id) {
> +                break;
> +            }
> +        }
> +
> +        if (!group) {
> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> +                       "depends on group %d which is not owned.",
> +                       vdev->vbasedev.name, devices[i].group_id);
> +            goto out;
> +        }
> +
> +        /* Ensure affected devices for reset on the same bus */
> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> +            if (vfio_pci_host_match(&host, tmp->vbasedev.name, 0)) {
> +                /*
> +                 * AER errors may be broadcast to all functions of a multi-
> +                 * function endpoint.  If any of those sibling functions are
> +                 * also assigned, they need to have AER enabled or else an
> +                 * error may continue to cause a vm_stop condition.  IOW,
> +                 * AER setup of this function would be pointless.
> +                 */
> +                if (vfio_pci_host_match(&host, vdev->vbasedev.name,
> +                                        HOST_CMP_FUNC_MASK) &&
> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> +                    error_setg(errp, "vfio: Cannot enable AER for device %s, on same slot"
> +                               " the dependent device %s which does not enable AER.",
> +                               vdev->vbasedev.name, tmp->vbasedev.name);
> +                    goto out;
> +                }
> +
> +                if (tmp->pdev.bus != bus) {
> +                    error_setg(errp, "vfio: Cannot enable AER for device %s, "
> +                               "the dependent device %s is not on the same bus",
> +                               vdev->vbasedev.name, tmp->vbasedev.name);
> +                    goto out;
> +                }
> +                found = true;
> +                break;
> +            }
> +        }
> +
> +        /* Ensure all affected devices assigned to VM */
> +        if (!found) {
> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> +                       "the dependent device %04x:%02x:%02x.%x "
> +                       "is not assigned to VM.",
> +                       vdev->vbasedev.name, host.domain, host.bus,
> +                       host.slot, host.function);
> +            goto out;
> +        }
> +    }
> +
> +    /*
> +     * To make things simply, functions must are combined
> +     * in the same way as on the host. no other irrelative
> +     * functions on the virtual same bus was required.
> +     */

Suggestion:

  The above code verified that all devices affected by a bus reset
  exist on the same bus in the VM.  To further simplify, we also
  require that there are no additional devices beyond those existing on
  the VM bus.

> +    for (devfn = 0; devfn < 8; devfn++) {

What about ARI?  Don't we potentially need to walk up to 255 here if
the parent bridge device has ARI enabled?  Should we always walk up
through 255 to prevent users from using non-zero slot numbers for other
devices?

> +        VFIOPCIDevice *tmp;
> +        PCIDevice *dev;
> +        bool found = false;
> +
> +        dev = pci_find_device(bus, pci_bus_num(bus),
> +                  PCI_DEVFN(PCI_SLOT(vdev->pdev.devfn), devfn));
> +
> +        if (!dev) {
> +            continue;
> +        }
> +
> +        if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> +                       "only support all dependent devices on the same bus, "
> +                       "%s is not one of the dependent devices.",
> +                       vdev->vbasedev.name, dev->name);

"vfio: Cannot enable AER for device %s, device %s cannot be configured
on the same virtual bus"

Is dev->name sufficiently descriptive?  Perhaps print the conflicting
PCI address as well.

> +            goto out;
> +        }
> +
> +        tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
> +        for (i = 0; i < info->count; i++) {
> +            PCIHostDeviceAddress host;
> +
> +            host.domain = devices[i].segment;
> +            host.bus = devices[i].bus;
> +            host.slot = PCI_SLOT(devices[i].devfn);
> +            host.function = PCI_FUNC(devices[i].devfn);
> +
> +            if (vfio_pci_host_match(&host, tmp->vbasedev.name, 0)) {
> +                found = true;
> +                break;
> +            }
> +        }
> +
> +        if (!found) {
> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> +                       "only support all dependent devices on the same bus, "
> +                       "%s is not one of the dependent devices.",
> +                       vdev->vbasedev.name, tmp->vbasedev.name);

Update this comment similar to above too.

> +            goto out;
> +        }
> +    }
> +
> +out:
> +    g_free(info);
> +    return;
> +}
> +
> +static void vfio_aer_check_host_bus_reset(Error **errp)
> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +    VFIOPCIDevice *vdev;
> +    Error *local_err = NULL;
> +
> +    /* Check All vfio-pci devices if have bus reset capability */
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +            if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> +                if (!pci_get_function_0(&vdev->pdev)) {
> +                    continue;
> +                }

Is this function_0 test really necessary?  It seems like it's only
relevant to the hotplug case, in which case it should be included in
the next patch, but even there I don't see a path where we'll get here
when function 0 is not populated.  Is this perhaps leftover from
previous versions?  It actually seems like it's opening a gap where
function 0 could be a non-vfio-pci device (or non-existent) and bypass
our validation check here.

> +                vfio_check_hot_bus_reset(vdev, &local_err);
> +                if (local_err) {
> +                    error_propagate(errp, local_err);
> +                    return;
> +                }
> +            }
> +        }
> +    }
> +
> +    return;
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
> @@ -2060,7 +2243,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>      vfio_intx_enable(vdev);
>  }
>  
> -#define HOST_CMP_FUNC_MASK       (1 << 0)
>  static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name,
>                                  uint8_t mask)
>  {
> @@ -2587,6 +2769,22 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
> +{
> +    Error *local_err = NULL;
> +
> +    vfio_aer_check_host_bus_reset(&local_err);
> +    if (local_err) {
> +        fprintf(stderr, "%s\n", error_get_pretty(local_err));
> +        error_free(local_err);
> +        exit(1);
> +    }
> +}
> +
> +static Notifier machine_notifier = {
> +    .notify = vfio_pci_machine_done_notify,
> +};
> +
>  static int vfio_initfn(PCIDevice *pdev)
>  {
>      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> @@ -2932,6 +3130,11 @@ static const TypeInfo vfio_pci_dev_info = {
>  static void register_vfio_pci_dev_type(void)
>  {
>      type_register_static(&vfio_pci_dev_info);
> +    /*
> +     * Register notifier when machine init is done, since we need
> +     * check the configration manner after all vfio device are inited.
> +     */

Suggestion:

  The AER configuration may depend on multiple devices, so we cannot
  validate consistency after each device is initialized.  We can only
  depend on function initialization order (function 0 last) for hotplug
  devices, therefore a machine-init-done notifier is used to validate
  the configuration after all cold-plug devices are processed.

> +    qemu_add_machine_init_done_notifier(&machine_notifier);
>  }
>  
>  type_init(register_vfio_pci_dev_type)
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 7b3924e..db7c6d5 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -15,6 +15,7 @@
>  #include "qemu-common.h"
>  #include "exec/memory.h"
>  #include "hw/pci/pci.h"
> +#include "hw/pci/pci_bus.h"
>  #include "hw/pci/pci_bridge.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "qemu/event_notifier.h"

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v4 07/10] vfio: add check aer functionality for hotplug device
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 07/10] vfio: add check aer functionality for hotplug device Cao jin
@ 2016-03-21 21:40   ` Alex Williamson
  0 siblings, 0 replies; 21+ messages in thread
From: Alex Williamson @ 2016-03-21 21:40 UTC (permalink / raw)
  To: Cao jin; +Cc: chen.fan.fnst, izumi.taku, qemu-devel, mst

On Mon, 21 Mar 2016 18:08:43 +0800
Cao jin <caoj.fnst@cn.fujitsu.com> wrote:

> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> because we make the vfio functions are combined
> in the same way as on the host for aer, so we can
> do the aer check when the function 0 was hotplugged.

Suggestion:

  PCI hotplug requires that function 0 is added last to close the
  slot.  Since we require that the VM bus contains the same set of
  devices as the host bus to support AER, we can perform an AER
  validation test whenever a function 0 in the VM is hot-added.

> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 45 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index dce3b6d..9902c87 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2030,6 +2030,35 @@ out:
>      return;
>  }
>  
> +static void vfio_bus_check_aer_functions(PCIDevice *pdev, Error **errp)
> +{
> +    VFIOPCIDevice *vdev;
> +    PCIDevice *dev;
> +    Error *local_err = NULL;
> +    int devfn;
> +
> +    for (devfn = 0; devfn < 8; devfn++) {

ARI question again.  Perhaps always use 0-255?

> +        dev = pci_find_device(pdev->bus, pci_bus_num(pdev->bus),
> +                  PCI_DEVFN(PCI_SLOT(pdev->devfn), devfn));
> +        if (!dev) {
> +            continue;
> +        }
> +        if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +            continue;
> +        }
> +        vdev = DO_UPCAST(VFIOPCIDevice, pdev, dev);
> +        if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> +            vfio_check_hot_bus_reset(vdev, &local_err);
> +            if (local_err) {
> +                error_propagate(errp, local_err);
> +                return;
> +            }
> +        }
> +    }
> +
> +    return;
> +}
> +
>  static void vfio_aer_check_host_bus_reset(Error **errp)
>  {
>      VFIOGroup *group;
> @@ -2982,6 +3011,22 @@ static int vfio_initfn(PCIDevice *pdev)
>          }
>      }
>  
> +    /*
> +     *  If this function is func 0, indicate the closure of the slot.
> +     *  we get the chance to check aer-enabled devices whether support
> +     *  hot bus reset.
> +     */
> +    if (DEVICE(pdev)->hotplugged &&
> +        pdev == pci_get_function_0(pdev)) {
> +        Error *local_err = NULL;
> +
> +        vfio_bus_check_aer_functions(pdev, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            goto out_teardown;
> +        }
> +    }
> +
>      vfio_register_err_notifier(vdev);
>      vfio_register_req_notifier(vdev);
>      vfio_setup_resetfn_quirk(vdev);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v4 08/10] vfio: vote the function 0 to do host bus reset when aer occurred
  2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 08/10] vfio: vote the function 0 to do host bus reset when aer occurred Cao jin
@ 2016-03-21 21:40   ` Alex Williamson
  2016-03-22 10:14     ` Chen Fan
                       ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Alex Williamson @ 2016-03-21 21:40 UTC (permalink / raw)
  To: Cao jin; +Cc: chen.fan.fnst, izumi.taku, qemu-devel, mst

On Mon, 21 Mar 2016 18:08:44 +0800
Cao jin <caoj.fnst@cn.fujitsu.com> wrote:

> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Due to all devices assigned to VM on the same way as host if enable
> aer, so we can easily do the hot reset by selecting the function #0
> to do the hot reset.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/pci.h |  2 ++
>  2 files changed, 52 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 9902c87..718cde7 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1900,6 +1900,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>      /* List all affected devices by bus reset */
>      devices = &info->devices[0];
>  
> +    vdev->single_depend_dev = (info->count == 1);
> +
>      /* Verify that we have all the groups required */
>      for (i = 0; i < info->count; i++) {
>          PCIHostDeviceAddress host;
> @@ -2608,11 +2610,36 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
>  static void vfio_err_notifier_handler(void *opaque)
>  {
>      VFIOPCIDevice *vdev = opaque;
> +    PCIDevice *pdev = &vdev->pdev;
>  
>      if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
>          return;
>      }
>  
> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> +        VFIOPCIDevice *tmp;
> +        PCIDevice *dev;
> +        int devfn;
> +
> +        /*
> +         * If one device has aer capability on a bus, when aer occurred,
> +         * we should notify all devices on the bus there was an aer arrived,
> +         * then we are able to vote the device #0 to do host bus reset.
> +         */
> +        for (devfn = 0; devfn < 8; devfn++) {

ARI?

> +            dev = pci_find_device(pdev->bus, pci_bus_num(pdev->bus),
> +                      PCI_DEVFN(PCI_SLOT(pdev->devfn), devfn));
> +            if (!dev) {
> +                continue;
> +            }
> +            if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +                continue;
> +            }
> +            tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
> +            tmp->aer_occurred = true;
> +        }
> +    }
> +
>      /*
>       * TBD. Retrieve the error details and decide what action
>       * needs to be taken. One of the actions could be to pass
> @@ -3075,6 +3102,29 @@ static void vfio_pci_reset(DeviceState *dev)
>  
>      trace_vfio_pci_reset(vdev->vbasedev.name);
>  
> +    if (vdev->aer_occurred) {
> +        PCIDevice *br = pci_bridge_get_device(pdev->bus);
> +
> +        if (br &&
> +            (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
> +             PCI_BRIDGE_CTL_BUS_RESET)) {
> +            /* simply voting the function 0 to do hot bus reset */
> +            if (pci_get_function_0(pdev) == pdev) {
> +                if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> +                    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +                } else {
> +                    /* if this device has not AER capability, code
> +                     * coming here indicates there is another function
> +                     * on the bus has AER capability.
> +                     * */

This shouldn't be possible, right?

> +                    vfio_pci_hot_reset(vdev, false);
> +                }
> +            }
> +            vdev->aer_occurred = false;
> +            return;
> +        }
> +    }

Why do we care than an AER occurred now?  Can't we simply test:

    if (vdev->features & VFIO_FEATURE_ENABLE_AER &&
        pci_get_function_0(pdev) == pdev) {
        PCIDevice *br = pci_bridge_get_device(pdev->bus);

        if (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
            PCI_BRIDGE_CTL_BUS_RESET)) {

            vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
            return;
        }
    }

> +
>      vfio_pci_pre_reset(vdev);
>  
>      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index db7c6d5..17c75b8 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -143,6 +143,8 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +    bool aer_occurred;
> +    bool single_depend_dev;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v4 06/10] vfio: add check host bus reset is support or not
  2016-03-21 21:40   ` Alex Williamson
@ 2016-03-22 10:02     ` Chen Fan
  2016-03-22 16:04       ` Alex Williamson
  0 siblings, 1 reply; 21+ messages in thread
From: Chen Fan @ 2016-03-22 10:02 UTC (permalink / raw)
  To: Alex Williamson, Cao jin; +Cc: izumi.taku, qemu-devel, mst

[-- Attachment #1: Type: text/plain, Size: 14639 bytes --]


On 03/22/2016 05:40 AM, Alex Williamson wrote:
> On Mon, 21 Mar 2016 18:08:42 +0800
> Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
>
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> when boot up a VM that assigning vfio devices with aer enabled, we
>> must check the vfio device whether support host bus reset. because
>> when one error occur. OS driver always recover the device by do a
>> bus reset, in order to recover the vfio device, qemu must able to do
>> a host bus reset to recover the device to default status. and for all
>> affected devices by the bus reset. we must check them whether all
>> are assigned to the VM and on the same virtual bus. meanwhile, for
>> simply done, the devices which don't affected by the host bus reset
>> are not allowed to assign to the same virtual bus.
> Rewording/expansion suggestion:
>
>    When assigning a vfio device with AER enabled, we must check whether
>    the device supports a host bus reset (ie. hot reset) as this may be
>    used by the guest OS in order to recover the device from an AER
>    error.  QEMU must therefore have the ability to perform a physical
>    host bus reset using the existing vfio APIs in response to a virtual
>    bus reset in the VM.  A physical bus reset affects all of the devices
>    on the host bus, therefore we place a few simplifying configuration
>    restriction on the VM:
>
>     - All physical devices affected by a bus reset must be assigned to
>       the VM with AER enabled on each and be configured on the same
>       virtual bus in the VM.
>
>     - No devices unaffected by the bus reset, be they physical, emulated,
>       or paravirtual may be configured on the same virtual bus as a
>       device supporting AER signaling through vfio.
>
>    In other words users wishing to enable AER on a multifunction device
>    need to assign all functions of the device to the same virtual bus
>    and enable AER support for each device.  The easiest way to
>    accomplish this is to identity map the physical functions to virtual
>    functions with multifunction enabled on the virtual device.

nice description. thanks :)
>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   hw/vfio/pci.h |   1 +
>>   2 files changed, 205 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 8842b7f..dce3b6d 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -38,6 +38,10 @@
>>   static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>>   static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
>>   
>> +#define HOST_CMP_FUNC_MASK       (1 << 0)
>> +static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name,
>> +                                uint8_t mask);
>> +
> This would of course go back to a _slot version of the function as
> you've had in previous versions.
>
>>   /*
>>    * Disabling BAR mmaping can be slow, but toggling it around INTx can
>>    * also be a huge overhead.  We try to get the best of both worlds by
>> @@ -1877,6 +1881,185 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>>       return 0;
>>   }
>>   
>> +static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>> +{
>> +    PCIBus *bus = vdev->pdev.bus;
>> +    struct vfio_pci_hot_reset_info *info = NULL;
>> +    struct vfio_pci_dependent_device *devices;
>> +    VFIOGroup *group;
>> +    int ret, i, devfn;
>> +
>> +    ret = vfio_get_hot_reset_info(vdev, &info);
>> +    if (ret) {
>> +        error_setg(errp, "vfio: Cannot enable AER for device %s,"
>> +                   " device does not support hot reset.",
>> +                   vdev->vbasedev.name);
>> +        return;
>> +    }
>> +
>> +    /* List all affected devices by bus reset */
>> +    devices = &info->devices[0];
>> +
>> +    /* Verify that we have all the groups required */
>> +    for (i = 0; i < info->count; i++) {
>> +        PCIHostDeviceAddress host;
>> +        VFIOPCIDevice *tmp;
>> +        VFIODevice *vbasedev_iter;
>> +        bool found = false;
>> +
>> +        host.domain = devices[i].segment;
>> +        host.bus = devices[i].bus;
>> +        host.slot = PCI_SLOT(devices[i].devfn);
>> +        host.function = PCI_FUNC(devices[i].devfn);
>> +
>> +        /* Skip the current device */
>> +        if (vfio_pci_host_match(&host, vdev->vbasedev.name, 0)) {
>> +            continue;
>> +        }
>> +
>> +        /* Ensure we own the group of the affected device */
>> +        QLIST_FOREACH(group, &vfio_group_list, next) {
>> +            if (group->groupid == devices[i].group_id) {
>> +                break;
>> +            }
>> +        }
>> +
>> +        if (!group) {
>> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
>> +                       "depends on group %d which is not owned.",
>> +                       vdev->vbasedev.name, devices[i].group_id);
>> +            goto out;
>> +        }
>> +
>> +        /* Ensure affected devices for reset on the same bus */
>> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
>> +                continue;
>> +            }
>> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
>> +            if (vfio_pci_host_match(&host, tmp->vbasedev.name, 0)) {
>> +                /*
>> +                 * AER errors may be broadcast to all functions of a multi-
>> +                 * function endpoint.  If any of those sibling functions are
>> +                 * also assigned, they need to have AER enabled or else an
>> +                 * error may continue to cause a vm_stop condition.  IOW,
>> +                 * AER setup of this function would be pointless.
>> +                 */
>> +                if (vfio_pci_host_match(&host, vdev->vbasedev.name,
>> +                                        HOST_CMP_FUNC_MASK) &&
>> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
>> +                    error_setg(errp, "vfio: Cannot enable AER for device %s, on same slot"
>> +                               " the dependent device %s which does not enable AER.",
>> +                               vdev->vbasedev.name, tmp->vbasedev.name);
>> +                    goto out;
>> +                }
>> +
>> +                if (tmp->pdev.bus != bus) {
>> +                    error_setg(errp, "vfio: Cannot enable AER for device %s, "
>> +                               "the dependent device %s is not on the same bus",
>> +                               vdev->vbasedev.name, tmp->vbasedev.name);
>> +                    goto out;
>> +                }
>> +                found = true;
>> +                break;
>> +            }
>> +        }
>> +
>> +        /* Ensure all affected devices assigned to VM */
>> +        if (!found) {
>> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
>> +                       "the dependent device %04x:%02x:%02x.%x "
>> +                       "is not assigned to VM.",
>> +                       vdev->vbasedev.name, host.domain, host.bus,
>> +                       host.slot, host.function);
>> +            goto out;
>> +        }
>> +    }
>> +
>> +    /*
>> +     * To make things simply, functions must are combined
>> +     * in the same way as on the host. no other irrelative
>> +     * functions on the virtual same bus was required.
>> +     */
> Suggestion:
>
>    The above code verified that all devices affected by a bus reset
>    exist on the same bus in the VM.  To further simplify, we also
>    require that there are no additional devices beyond those existing on
>    the VM bus.
>
>> +    for (devfn = 0; devfn < 8; devfn++) {
> What about ARI?  Don't we potentially need to walk up to 255 here if
> the parent bridge device has ARI enabled?  Should we always walk up
> through 255 to prevent users from using non-zero slot numbers for other
> devices?
can we use the function pcie_cap_is_arifwd_enabled to check the bridge
whether enable ARI?, if don't enable, we can simple scan the slot 0, 
otherwise,
we should walk up to 255.

>> +        VFIOPCIDevice *tmp;
>> +        PCIDevice *dev;
>> +        bool found = false;
>> +
>> +        dev = pci_find_device(bus, pci_bus_num(bus),
>> +                  PCI_DEVFN(PCI_SLOT(vdev->pdev.devfn), devfn));
>> +
>> +        if (!dev) {
>> +            continue;
>> +        }
>> +
>> +        if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
>> +                       "only support all dependent devices on the same bus, "
>> +                       "%s is not one of the dependent devices.",
>> +                       vdev->vbasedev.name, dev->name);
> "vfio: Cannot enable AER for device %s, device %s cannot be configured
> on the same virtual bus"
>
> Is dev->name sufficiently descriptive?  Perhaps print the conflicting
> PCI address as well.
>
>> +            goto out;
>> +        }
>> +
>> +        tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
>> +        for (i = 0; i < info->count; i++) {
>> +            PCIHostDeviceAddress host;
>> +
>> +            host.domain = devices[i].segment;
>> +            host.bus = devices[i].bus;
>> +            host.slot = PCI_SLOT(devices[i].devfn);
>> +            host.function = PCI_FUNC(devices[i].devfn);
>> +
>> +            if (vfio_pci_host_match(&host, tmp->vbasedev.name, 0)) {
>> +                found = true;
>> +                break;
>> +            }
>> +        }
>> +
>> +        if (!found) {
>> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
>> +                       "only support all dependent devices on the same bus, "
>> +                       "%s is not one of the dependent devices.",
>> +                       vdev->vbasedev.name, tmp->vbasedev.name);
> Update this comment similar to above too.
>
>> +            goto out;
>> +        }
>> +    }
>> +
>> +out:
>> +    g_free(info);
>> +    return;
>> +}
>> +
>> +static void vfio_aer_check_host_bus_reset(Error **errp)
>> +{
>> +    VFIOGroup *group;
>> +    VFIODevice *vbasedev;
>> +    VFIOPCIDevice *vdev;
>> +    Error *local_err = NULL;
>> +
>> +    /* Check All vfio-pci devices if have bus reset capability */
>> +    QLIST_FOREACH(group, &vfio_group_list, next) {
>> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
>> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
>> +                continue;
>> +            }
>> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>> +            if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
>> +                if (!pci_get_function_0(&vdev->pdev)) {
>> +                    continue;
>> +                }
> Is this function_0 test really necessary?  It seems like it's only
> relevant to the hotplug case, in which case it should be included in
> the next patch, but even there I don't see a path where we'll get here
> when function 0 is not populated.  Is this perhaps leftover from
> previous versions?  It actually seems like it's opening a gap where
> function 0 could be a non-vfio-pci device (or non-existent) and bypass
> our validation check here.
yes, you are right, as for adding the function_0 test, I just thought that
if we boot up VM without function_0 in command line , whether we should
test the other not_function_0 vfio device with aer on the bus support 
hot reset.
if we don't add the function 0 test, we probably can't boot up the VM.
because the affected function 0 is not added yet. I never thought that
this open a gap that the function 0 could be a non-vfio-pci device, thanks
for pointing it out. for hotplug case, we also potential hotplug a 
not-vfio-pci device
and bypass our check. so I think the check still need to place in 
device_realize like
previous version?

Thanks,
Chen

>
>> +                vfio_check_hot_bus_reset(vdev, &local_err);
>> +                if (local_err) {
>> +                    error_propagate(errp, local_err);
>> +                    return;
>> +                }
>> +            }
>> +        }
>> +    }
>> +
>> +    return;
>> +}
>> +
>>   static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>                             int pos, uint16_t size)
>>   {
>> @@ -2060,7 +2243,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>>       vfio_intx_enable(vdev);
>>   }
>>   
>> -#define HOST_CMP_FUNC_MASK       (1 << 0)
>>   static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name,
>>                                   uint8_t mask)
>>   {
>> @@ -2587,6 +2769,22 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>>       vdev->req_enabled = false;
>>   }
>>   
>> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
>> +{
>> +    Error *local_err = NULL;
>> +
>> +    vfio_aer_check_host_bus_reset(&local_err);
>> +    if (local_err) {
>> +        fprintf(stderr, "%s\n", error_get_pretty(local_err));
>> +        error_free(local_err);
>> +        exit(1);
>> +    }
>> +}
>> +
>> +static Notifier machine_notifier = {
>> +    .notify = vfio_pci_machine_done_notify,
>> +};
>> +
>>   static int vfio_initfn(PCIDevice *pdev)
>>   {
>>       VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> @@ -2932,6 +3130,11 @@ static const TypeInfo vfio_pci_dev_info = {
>>   static void register_vfio_pci_dev_type(void)
>>   {
>>       type_register_static(&vfio_pci_dev_info);
>> +    /*
>> +     * Register notifier when machine init is done, since we need
>> +     * check the configration manner after all vfio device are inited.
>> +     */
> Suggestion:
>
>    The AER configuration may depend on multiple devices, so we cannot
>    validate consistency after each device is initialized.  We can only
>    depend on function initialization order (function 0 last) for hotplug
>    devices, therefore a machine-init-done notifier is used to validate
>    the configuration after all cold-plug devices are processed.
>
>> +    qemu_add_machine_init_done_notifier(&machine_notifier);
>>   }
>>   
>>   type_init(register_vfio_pci_dev_type)
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index 7b3924e..db7c6d5 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -15,6 +15,7 @@
>>   #include "qemu-common.h"
>>   #include "exec/memory.h"
>>   #include "hw/pci/pci.h"
>> +#include "hw/pci/pci_bus.h"
>>   #include "hw/pci/pci_bridge.h"
>>   #include "hw/vfio/vfio-common.h"
>>   #include "qemu/event_notifier.h"
>
>
> .
>




[-- Attachment #2: Type: text/html, Size: 16074 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v4 08/10] vfio: vote the function 0 to do host bus reset when aer occurred
  2016-03-21 21:40   ` Alex Williamson
@ 2016-03-22 10:14     ` Chen Fan
  2016-03-22 16:07       ` Alex Williamson
  2016-03-23  6:25     ` Chen Fan
  2016-03-23  6:25     ` Chen Fan
  2 siblings, 1 reply; 21+ messages in thread
From: Chen Fan @ 2016-03-22 10:14 UTC (permalink / raw)
  To: Alex Williamson, Cao jin; +Cc: izumi.taku, qemu-devel, mst


On 03/22/2016 05:40 AM, Alex Williamson wrote:
> On Mon, 21 Mar 2016 18:08:44 +0800
> Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
>
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Due to all devices assigned to VM on the same way as host if enable
>> aer, so we can easily do the hot reset by selecting the function #0
>> to do the hot reset.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>   hw/vfio/pci.h |  2 ++
>>   2 files changed, 52 insertions(+)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 9902c87..718cde7 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1900,6 +1900,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>>       /* List all affected devices by bus reset */
>>       devices = &info->devices[0];
>>   
>> +    vdev->single_depend_dev = (info->count == 1);
>> +
>>       /* Verify that we have all the groups required */
>>       for (i = 0; i < info->count; i++) {
>>           PCIHostDeviceAddress host;
>> @@ -2608,11 +2610,36 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
>>   static void vfio_err_notifier_handler(void *opaque)
>>   {
>>       VFIOPCIDevice *vdev = opaque;
>> +    PCIDevice *pdev = &vdev->pdev;
>>   
>>       if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
>>           return;
>>       }
>>   
>> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
>> +        VFIOPCIDevice *tmp;
>> +        PCIDevice *dev;
>> +        int devfn;
>> +
>> +        /*
>> +         * If one device has aer capability on a bus, when aer occurred,
>> +         * we should notify all devices on the bus there was an aer arrived,
>> +         * then we are able to vote the device #0 to do host bus reset.
>> +         */
>> +        for (devfn = 0; devfn < 8; devfn++) {
> ARI?
>
>> +            dev = pci_find_device(pdev->bus, pci_bus_num(pdev->bus),
>> +                      PCI_DEVFN(PCI_SLOT(pdev->devfn), devfn));
>> +            if (!dev) {
>> +                continue;
>> +            }
>> +            if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> +                continue;
>> +            }
>> +            tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
>> +            tmp->aer_occurred = true;
>> +        }
>> +    }
>> +
>>       /*
>>        * TBD. Retrieve the error details and decide what action
>>        * needs to be taken. One of the actions could be to pass
>> @@ -3075,6 +3102,29 @@ static void vfio_pci_reset(DeviceState *dev)
>>   
>>       trace_vfio_pci_reset(vdev->vbasedev.name);
>>   
>> +    if (vdev->aer_occurred) {
>> +        PCIDevice *br = pci_bridge_get_device(pdev->bus);
>> +
>> +        if (br &&
>> +            (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
>> +             PCI_BRIDGE_CTL_BUS_RESET)) {
>> +            /* simply voting the function 0 to do hot bus reset */
>> +            if (pci_get_function_0(pdev) == pdev) {
>> +                if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
>> +                    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>> +                } else {
>> +                    /* if this device has not AER capability, code
>> +                     * coming here indicates there is another function
>> +                     * on the bus has AER capability.
>> +                     * */
> This shouldn't be possible, right?
>
>> +                    vfio_pci_hot_reset(vdev, false);
>> +                }
>> +            }
>> +            vdev->aer_occurred = false;
>> +            return;
>> +        }
>> +    }
> Why do we care than an AER occurred now?  Can't we simply test:
>
>      if (vdev->features & VFIO_FEATURE_ENABLE_AER &&
>          pci_get_function_0(pdev) == pdev) {
>          PCIDevice *br = pci_bridge_get_device(pdev->bus);
>
>          if (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
>              PCI_BRIDGE_CTL_BUS_RESET)) {
>
>              vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>              return;
>          }
>      }

do we have the case that only one/few of the devices affected
by a bus reset assigned to VM enabled AER, then when bus
reset, we let the function 0 do hot reset, which may not enable AER,
but we should tell other devices on the bus that they don't need
to do bus reset. so I just mark all devices on the bus when needing a 
hot reset.

Thanks,
Chen



>
>> +
>>       vfio_pci_pre_reset(vdev);
>>   
>>       if (vdev->resetfn && !vdev->resetfn(vdev)) {
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index db7c6d5..17c75b8 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -143,6 +143,8 @@ typedef struct VFIOPCIDevice {
>>       bool no_kvm_intx;
>>       bool no_kvm_msi;
>>       bool no_kvm_msix;
>> +    bool aer_occurred;
>> +    bool single_depend_dev;
>>   } VFIOPCIDevice;
>>   
>>   uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
>
>
> .
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v4 06/10] vfio: add check host bus reset is support or not
  2016-03-22 10:02     ` Chen Fan
@ 2016-03-22 16:04       ` Alex Williamson
  0 siblings, 0 replies; 21+ messages in thread
From: Alex Williamson @ 2016-03-22 16:04 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, Cao jin, qemu-devel, mst

On Tue, 22 Mar 2016 18:02:25 +0800
Chen Fan <chen.fan.fnst@cn.fujitsu.com> wrote:

> On 03/22/2016 05:40 AM, Alex Williamson wrote:
> > On Mon, 21 Mar 2016 18:08:42 +0800
> > Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
> >  
> >> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >>
> >> when boot up a VM that assigning vfio devices with aer enabled, we
> >> must check the vfio device whether support host bus reset. because
> >> when one error occur. OS driver always recover the device by do a
> >> bus reset, in order to recover the vfio device, qemu must able to do
> >> a host bus reset to recover the device to default status. and for all
> >> affected devices by the bus reset. we must check them whether all
> >> are assigned to the VM and on the same virtual bus. meanwhile, for
> >> simply done, the devices which don't affected by the host bus reset
> >> are not allowed to assign to the same virtual bus.  
> > Rewording/expansion suggestion:
> >
> >    When assigning a vfio device with AER enabled, we must check whether
> >    the device supports a host bus reset (ie. hot reset) as this may be
> >    used by the guest OS in order to recover the device from an AER
> >    error.  QEMU must therefore have the ability to perform a physical
> >    host bus reset using the existing vfio APIs in response to a virtual
> >    bus reset in the VM.  A physical bus reset affects all of the devices
> >    on the host bus, therefore we place a few simplifying configuration
> >    restriction on the VM:
> >
> >     - All physical devices affected by a bus reset must be assigned to
> >       the VM with AER enabled on each and be configured on the same
> >       virtual bus in the VM.
> >
> >     - No devices unaffected by the bus reset, be they physical, emulated,
> >       or paravirtual may be configured on the same virtual bus as a
> >       device supporting AER signaling through vfio.
> >
> >    In other words users wishing to enable AER on a multifunction device
> >    need to assign all functions of the device to the same virtual bus
> >    and enable AER support for each device.  The easiest way to
> >    accomplish this is to identity map the physical functions to virtual
> >    functions with multifunction enabled on the virtual device.  
> 
> nice description. thanks :)
> >  
> >> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >> ---
> >>   hw/vfio/pci.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> >>   hw/vfio/pci.h |   1 +
> >>   2 files changed, 205 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index 8842b7f..dce3b6d 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -38,6 +38,10 @@
> >>   static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
> >>   static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
> >>   
> >> +#define HOST_CMP_FUNC_MASK       (1 << 0)
> >> +static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name,
> >> +                                uint8_t mask);
> >> +  
> > This would of course go back to a _slot version of the function as
> > you've had in previous versions.
> >  
> >>   /*
> >>    * Disabling BAR mmaping can be slow, but toggling it around INTx can
> >>    * also be a huge overhead.  We try to get the best of both worlds by
> >> @@ -1877,6 +1881,185 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
> >>       return 0;
> >>   }
> >>   
> >> +static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> >> +{
> >> +    PCIBus *bus = vdev->pdev.bus;
> >> +    struct vfio_pci_hot_reset_info *info = NULL;
> >> +    struct vfio_pci_dependent_device *devices;
> >> +    VFIOGroup *group;
> >> +    int ret, i, devfn;
> >> +
> >> +    ret = vfio_get_hot_reset_info(vdev, &info);
> >> +    if (ret) {
> >> +        error_setg(errp, "vfio: Cannot enable AER for device %s,"
> >> +                   " device does not support hot reset.",
> >> +                   vdev->vbasedev.name);
> >> +        return;
> >> +    }
> >> +
> >> +    /* List all affected devices by bus reset */
> >> +    devices = &info->devices[0];
> >> +
> >> +    /* Verify that we have all the groups required */
> >> +    for (i = 0; i < info->count; i++) {
> >> +        PCIHostDeviceAddress host;
> >> +        VFIOPCIDevice *tmp;
> >> +        VFIODevice *vbasedev_iter;
> >> +        bool found = false;
> >> +
> >> +        host.domain = devices[i].segment;
> >> +        host.bus = devices[i].bus;
> >> +        host.slot = PCI_SLOT(devices[i].devfn);
> >> +        host.function = PCI_FUNC(devices[i].devfn);
> >> +
> >> +        /* Skip the current device */
> >> +        if (vfio_pci_host_match(&host, vdev->vbasedev.name, 0)) {
> >> +            continue;
> >> +        }
> >> +
> >> +        /* Ensure we own the group of the affected device */
> >> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> >> +            if (group->groupid == devices[i].group_id) {
> >> +                break;
> >> +            }
> >> +        }
> >> +
> >> +        if (!group) {
> >> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> >> +                       "depends on group %d which is not owned.",
> >> +                       vdev->vbasedev.name, devices[i].group_id);
> >> +            goto out;
> >> +        }
> >> +
> >> +        /* Ensure affected devices for reset on the same bus */
> >> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> >> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> >> +                continue;
> >> +            }
> >> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> >> +            if (vfio_pci_host_match(&host, tmp->vbasedev.name, 0)) {
> >> +                /*
> >> +                 * AER errors may be broadcast to all functions of a multi-
> >> +                 * function endpoint.  If any of those sibling functions are
> >> +                 * also assigned, they need to have AER enabled or else an
> >> +                 * error may continue to cause a vm_stop condition.  IOW,
> >> +                 * AER setup of this function would be pointless.
> >> +                 */
> >> +                if (vfio_pci_host_match(&host, vdev->vbasedev.name,
> >> +                                        HOST_CMP_FUNC_MASK) &&
> >> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> >> +                    error_setg(errp, "vfio: Cannot enable AER for device %s, on same slot"
> >> +                               " the dependent device %s which does not enable AER.",
> >> +                               vdev->vbasedev.name, tmp->vbasedev.name);
> >> +                    goto out;
> >> +                }
> >> +
> >> +                if (tmp->pdev.bus != bus) {
> >> +                    error_setg(errp, "vfio: Cannot enable AER for device %s, "
> >> +                               "the dependent device %s is not on the same bus",
> >> +                               vdev->vbasedev.name, tmp->vbasedev.name);
> >> +                    goto out;
> >> +                }
> >> +                found = true;
> >> +                break;
> >> +            }
> >> +        }
> >> +
> >> +        /* Ensure all affected devices assigned to VM */
> >> +        if (!found) {
> >> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> >> +                       "the dependent device %04x:%02x:%02x.%x "
> >> +                       "is not assigned to VM.",
> >> +                       vdev->vbasedev.name, host.domain, host.bus,
> >> +                       host.slot, host.function);
> >> +            goto out;
> >> +        }
> >> +    }
> >> +
> >> +    /*
> >> +     * To make things simply, functions must are combined
> >> +     * in the same way as on the host. no other irrelative
> >> +     * functions on the virtual same bus was required.
> >> +     */  
> > Suggestion:
> >
> >    The above code verified that all devices affected by a bus reset
> >    exist on the same bus in the VM.  To further simplify, we also
> >    require that there are no additional devices beyond those existing on
> >    the VM bus.
> >  
> >> +    for (devfn = 0; devfn < 8; devfn++) {  
> > What about ARI?  Don't we potentially need to walk up to 255 here if
> > the parent bridge device has ARI enabled?  Should we always walk up
> > through 255 to prevent users from using non-zero slot numbers for other
> > devices?  
> can we use the function pcie_cap_is_arifwd_enabled to check the bridge
> whether enable ARI?, if don't enable, we can simple scan the slot 0, 
> otherwise,
> we should walk up to 255.

That seems reasonable.

> >> +        VFIOPCIDevice *tmp;
> >> +        PCIDevice *dev;
> >> +        bool found = false;
> >> +
> >> +        dev = pci_find_device(bus, pci_bus_num(bus),
> >> +                  PCI_DEVFN(PCI_SLOT(vdev->pdev.devfn), devfn));
> >> +
> >> +        if (!dev) {
> >> +            continue;
> >> +        }
> >> +
> >> +        if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> >> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> >> +                       "only support all dependent devices on the same bus, "
> >> +                       "%s is not one of the dependent devices.",
> >> +                       vdev->vbasedev.name, dev->name);  
> > "vfio: Cannot enable AER for device %s, device %s cannot be configured
> > on the same virtual bus"
> >
> > Is dev->name sufficiently descriptive?  Perhaps print the conflicting
> > PCI address as well.
> >  
> >> +            goto out;
> >> +        }
> >> +
> >> +        tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
> >> +        for (i = 0; i < info->count; i++) {
> >> +            PCIHostDeviceAddress host;
> >> +
> >> +            host.domain = devices[i].segment;
> >> +            host.bus = devices[i].bus;
> >> +            host.slot = PCI_SLOT(devices[i].devfn);
> >> +            host.function = PCI_FUNC(devices[i].devfn);
> >> +
> >> +            if (vfio_pci_host_match(&host, tmp->vbasedev.name, 0)) {
> >> +                found = true;
> >> +                break;
> >> +            }
> >> +        }
> >> +
> >> +        if (!found) {
> >> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
> >> +                       "only support all dependent devices on the same bus, "
> >> +                       "%s is not one of the dependent devices.",
> >> +                       vdev->vbasedev.name, tmp->vbasedev.name);  
> > Update this comment similar to above too.
> >  
> >> +            goto out;
> >> +        }
> >> +    }
> >> +
> >> +out:
> >> +    g_free(info);
> >> +    return;
> >> +}
> >> +
> >> +static void vfio_aer_check_host_bus_reset(Error **errp)
> >> +{
> >> +    VFIOGroup *group;
> >> +    VFIODevice *vbasedev;
> >> +    VFIOPCIDevice *vdev;
> >> +    Error *local_err = NULL;
> >> +
> >> +    /* Check All vfio-pci devices if have bus reset capability */
> >> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> >> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> >> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
> >> +                continue;
> >> +            }
> >> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> >> +            if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> >> +                if (!pci_get_function_0(&vdev->pdev)) {
> >> +                    continue;
> >> +                }  
> > Is this function_0 test really necessary?  It seems like it's only
> > relevant to the hotplug case, in which case it should be included in
> > the next patch, but even there I don't see a path where we'll get here
> > when function 0 is not populated.  Is this perhaps leftover from
> > previous versions?  It actually seems like it's opening a gap where
> > function 0 could be a non-vfio-pci device (or non-existent) and bypass
> > our validation check here.  
> yes, you are right, as for adding the function_0 test, I just thought that
> if we boot up VM without function_0 in command line , whether we should
> test the other not_function_0 vfio device with aer on the bus support 
> hot reset.
> if we don't add the function 0 test, we probably can't boot up the VM.
> because the affected function 0 is not added yet. I never thought that
> this open a gap that the function 0 could be a non-vfio-pci device, thanks
> for pointing it out. for hotplug case, we also potential hotplug a 
> not-vfio-pci device
> and bypass our check. so I think the check still need to place in 
> device_realize like
> previous version?

Yes, I was excited that you had removed all the pci-core dependencies,
but it seems like that's one that we cannot get around.  We cannot
validate the config as each function is added due to dependencies
between functions and we have no guarantee that the user will make
function zero be a vfio-pci device.  It seems like we really do need
that slot closure notification from the core.  Thanks,

Alex

> >> +                vfio_check_hot_bus_reset(vdev, &local_err);
> >> +                if (local_err) {
> >> +                    error_propagate(errp, local_err);
> >> +                    return;
> >> +                }
> >> +            }
> >> +        }
> >> +    }
> >> +
> >> +    return;
> >> +}
> >> +
> >>   static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
> >>                             int pos, uint16_t size)
> >>   {
> >> @@ -2060,7 +2243,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
> >>       vfio_intx_enable(vdev);
> >>   }
> >>   
> >> -#define HOST_CMP_FUNC_MASK       (1 << 0)
> >>   static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name,
> >>                                   uint8_t mask)
> >>   {
> >> @@ -2587,6 +2769,22 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
> >>       vdev->req_enabled = false;
> >>   }
> >>   
> >> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
> >> +{
> >> +    Error *local_err = NULL;
> >> +
> >> +    vfio_aer_check_host_bus_reset(&local_err);
> >> +    if (local_err) {
> >> +        fprintf(stderr, "%s\n", error_get_pretty(local_err));
> >> +        error_free(local_err);
> >> +        exit(1);
> >> +    }
> >> +}
> >> +
> >> +static Notifier machine_notifier = {
> >> +    .notify = vfio_pci_machine_done_notify,
> >> +};
> >> +
> >>   static int vfio_initfn(PCIDevice *pdev)
> >>   {
> >>       VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> >> @@ -2932,6 +3130,11 @@ static const TypeInfo vfio_pci_dev_info = {
> >>   static void register_vfio_pci_dev_type(void)
> >>   {
> >>       type_register_static(&vfio_pci_dev_info);
> >> +    /*
> >> +     * Register notifier when machine init is done, since we need
> >> +     * check the configration manner after all vfio device are inited.
> >> +     */  
> > Suggestion:
> >
> >    The AER configuration may depend on multiple devices, so we cannot
> >    validate consistency after each device is initialized.  We can only
> >    depend on function initialization order (function 0 last) for hotplug
> >    devices, therefore a machine-init-done notifier is used to validate
> >    the configuration after all cold-plug devices are processed.
> >  
> >> +    qemu_add_machine_init_done_notifier(&machine_notifier);
> >>   }
> >>   
> >>   type_init(register_vfio_pci_dev_type)
> >> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> >> index 7b3924e..db7c6d5 100644
> >> --- a/hw/vfio/pci.h
> >> +++ b/hw/vfio/pci.h
> >> @@ -15,6 +15,7 @@
> >>   #include "qemu-common.h"
> >>   #include "exec/memory.h"
> >>   #include "hw/pci/pci.h"
> >> +#include "hw/pci/pci_bus.h"
> >>   #include "hw/pci/pci_bridge.h"
> >>   #include "hw/vfio/vfio-common.h"
> >>   #include "qemu/event_notifier.h"  
> >
> >
> > .
> >  
> 
> 
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v4 08/10] vfio: vote the function 0 to do host bus reset when aer occurred
  2016-03-22 10:14     ` Chen Fan
@ 2016-03-22 16:07       ` Alex Williamson
  0 siblings, 0 replies; 21+ messages in thread
From: Alex Williamson @ 2016-03-22 16:07 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, Cao jin, qemu-devel, mst

On Tue, 22 Mar 2016 18:14:45 +0800
Chen Fan <chen.fan.fnst@cn.fujitsu.com> wrote:

> On 03/22/2016 05:40 AM, Alex Williamson wrote:
> > On Mon, 21 Mar 2016 18:08:44 +0800
> > Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
> >  
> >> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >>
> >> Due to all devices assigned to VM on the same way as host if enable
> >> aer, so we can easily do the hot reset by selecting the function #0
> >> to do the hot reset.
> >>
> >> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >> ---
> >>   hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >>   hw/vfio/pci.h |  2 ++
> >>   2 files changed, 52 insertions(+)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index 9902c87..718cde7 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -1900,6 +1900,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> >>       /* List all affected devices by bus reset */
> >>       devices = &info->devices[0];
> >>   
> >> +    vdev->single_depend_dev = (info->count == 1);
> >> +
> >>       /* Verify that we have all the groups required */
> >>       for (i = 0; i < info->count; i++) {
> >>           PCIHostDeviceAddress host;
> >> @@ -2608,11 +2610,36 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
> >>   static void vfio_err_notifier_handler(void *opaque)
> >>   {
> >>       VFIOPCIDevice *vdev = opaque;
> >> +    PCIDevice *pdev = &vdev->pdev;
> >>   
> >>       if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
> >>           return;
> >>       }
> >>   
> >> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> >> +        VFIOPCIDevice *tmp;
> >> +        PCIDevice *dev;
> >> +        int devfn;
> >> +
> >> +        /*
> >> +         * If one device has aer capability on a bus, when aer occurred,
> >> +         * we should notify all devices on the bus there was an aer arrived,
> >> +         * then we are able to vote the device #0 to do host bus reset.
> >> +         */
> >> +        for (devfn = 0; devfn < 8; devfn++) {  
> > ARI?
> >  
> >> +            dev = pci_find_device(pdev->bus, pci_bus_num(pdev->bus),
> >> +                      PCI_DEVFN(PCI_SLOT(pdev->devfn), devfn));
> >> +            if (!dev) {
> >> +                continue;
> >> +            }
> >> +            if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> >> +                continue;
> >> +            }
> >> +            tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
> >> +            tmp->aer_occurred = true;
> >> +        }
> >> +    }
> >> +
> >>       /*
> >>        * TBD. Retrieve the error details and decide what action
> >>        * needs to be taken. One of the actions could be to pass
> >> @@ -3075,6 +3102,29 @@ static void vfio_pci_reset(DeviceState *dev)
> >>   
> >>       trace_vfio_pci_reset(vdev->vbasedev.name);
> >>   
> >> +    if (vdev->aer_occurred) {
> >> +        PCIDevice *br = pci_bridge_get_device(pdev->bus);
> >> +
> >> +        if (br &&
> >> +            (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
> >> +             PCI_BRIDGE_CTL_BUS_RESET)) {
> >> +            /* simply voting the function 0 to do hot bus reset */
> >> +            if (pci_get_function_0(pdev) == pdev) {
> >> +                if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> >> +                    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> >> +                } else {
> >> +                    /* if this device has not AER capability, code
> >> +                     * coming here indicates there is another function
> >> +                     * on the bus has AER capability.
> >> +                     * */  
> > This shouldn't be possible, right?
> >  
> >> +                    vfio_pci_hot_reset(vdev, false);
> >> +                }
> >> +            }
> >> +            vdev->aer_occurred = false;
> >> +            return;
> >> +        }
> >> +    }  
> > Why do we care than an AER occurred now?  Can't we simply test:
> >
> >      if (vdev->features & VFIO_FEATURE_ENABLE_AER &&
> >          pci_get_function_0(pdev) == pdev) {
> >          PCIDevice *br = pci_bridge_get_device(pdev->bus);
> >
> >          if (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
> >              PCI_BRIDGE_CTL_BUS_RESET)) {
> >
> >              vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> >              return;
> >          }
> >      }  
> 
> do we have the case that only one/few of the devices affected
> by a bus reset assigned to VM enabled AER, then when bus
> reset, we let the function 0 do hot reset, which may not enable AER,
> but we should tell other devices on the bus that they don't need
> to do bus reset. so I just mark all devices on the bus when needing a 
> hot reset.

I thought we were requiring all the bus reset affected devices to
enable AER, so that example should not be possible.  I think it matches
our target use case to make this a requirement.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v4 08/10] vfio: vote the function 0 to do host bus reset when aer occurred
  2016-03-21 21:40   ` Alex Williamson
  2016-03-22 10:14     ` Chen Fan
@ 2016-03-23  6:25     ` Chen Fan
  2016-03-23  6:25     ` Chen Fan
  2 siblings, 0 replies; 21+ messages in thread
From: Chen Fan @ 2016-03-23  6:25 UTC (permalink / raw)
  To: Alex Williamson, Cao jin; +Cc: izumi.taku, qemu-devel, mst


On 03/22/2016 05:40 AM, Alex Williamson wrote:
> On Mon, 21 Mar 2016 18:08:44 +0800
> Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
>
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Due to all devices assigned to VM on the same way as host if enable
>> aer, so we can easily do the hot reset by selecting the function #0
>> to do the hot reset.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>   hw/vfio/pci.h |  2 ++
>>   2 files changed, 52 insertions(+)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 9902c87..718cde7 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1900,6 +1900,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>>       /* List all affected devices by bus reset */
>>       devices = &info->devices[0];
>>   
>> +    vdev->single_depend_dev = (info->count == 1);
>> +
>>       /* Verify that we have all the groups required */
>>       for (i = 0; i < info->count; i++) {
>>           PCIHostDeviceAddress host;
>> @@ -2608,11 +2610,36 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
>>   static void vfio_err_notifier_handler(void *opaque)
>>   {
>>       VFIOPCIDevice *vdev = opaque;
>> +    PCIDevice *pdev = &vdev->pdev;
>>   
>>       if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
>>           return;
>>       }
>>   
>> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
>> +        VFIOPCIDevice *tmp;
>> +        PCIDevice *dev;
>> +        int devfn;
>> +
>> +        /*
>> +         * If one device has aer capability on a bus, when aer occurred,
>> +         * we should notify all devices on the bus there was an aer arrived,
>> +         * then we are able to vote the device #0 to do host bus reset.
>> +         */
>> +        for (devfn = 0; devfn < 8; devfn++) {
> ARI?
>
>> +            dev = pci_find_device(pdev->bus, pci_bus_num(pdev->bus),
>> +                      PCI_DEVFN(PCI_SLOT(pdev->devfn), devfn));
>> +            if (!dev) {
>> +                continue;
>> +            }
>> +            if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> +                continue;
>> +            }
>> +            tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
>> +            tmp->aer_occurred = true;
>> +        }
>> +    }
>> +
>>       /*
>>        * TBD. Retrieve the error details and decide what action
>>        * needs to be taken. One of the actions could be to pass
>> @@ -3075,6 +3102,29 @@ static void vfio_pci_reset(DeviceState *dev)
>>   
>>       trace_vfio_pci_reset(vdev->vbasedev.name);
>>   
>> +    if (vdev->aer_occurred) {
>> +        PCIDevice *br = pci_bridge_get_device(pdev->bus);
>> +
>> +        if (br &&
>> +            (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
>> +             PCI_BRIDGE_CTL_BUS_RESET)) {
>> +            /* simply voting the function 0 to do hot bus reset */
>> +            if (pci_get_function_0(pdev) == pdev) {
>> +                if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
>> +                    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>> +                } else {
>> +                    /* if this device has not AER capability, code
>> +                     * coming here indicates there is another function
>> +                     * on the bus has AER capability.
>> +                     * */
> This shouldn't be possible, right?
>
>> +                    vfio_pci_hot_reset(vdev, false);
>> +                }
>> +            }
>> +            vdev->aer_occurred = false;
>> +            return;
>> +        }
>> +    }
> Why do we care than an AER occurred now?  Can't we simply test:
>
>      if (vdev->features & VFIO_FEATURE_ENABLE_AER &&
>          pci_get_function_0(pdev) == pdev) {
>          PCIDevice *br = pci_bridge_get_device(pdev->bus);
>
>          if (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
>              PCI_BRIDGE_CTL_BUS_RESET)) {
>
>              vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>              return;
>          }
>      }
I think this is not practicable, not_function_0 device can't
pass the condition, it would do reset normally. but our intention
is doing nothing for them, and direct return.
I will change this in next version.

Thanks,
CHen

>
>> +
>>       vfio_pci_pre_reset(vdev);
>>   
>>       if (vdev->resetfn && !vdev->resetfn(vdev)) {
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index db7c6d5..17c75b8 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -143,6 +143,8 @@ typedef struct VFIOPCIDevice {
>>       bool no_kvm_intx;
>>       bool no_kvm_msi;
>>       bool no_kvm_msix;
>> +    bool aer_occurred;
>> +    bool single_depend_dev;
>>   } VFIOPCIDevice;
>>   
>>   uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
>
>
> .
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Qemu-devel] [PATCH v4 08/10] vfio: vote the function 0 to do host bus reset when aer occurred
  2016-03-21 21:40   ` Alex Williamson
  2016-03-22 10:14     ` Chen Fan
  2016-03-23  6:25     ` Chen Fan
@ 2016-03-23  6:25     ` Chen Fan
  2 siblings, 0 replies; 21+ messages in thread
From: Chen Fan @ 2016-03-23  6:25 UTC (permalink / raw)
  To: Alex Williamson, Cao jin; +Cc: izumi.taku, qemu-devel, mst


On 03/22/2016 05:40 AM, Alex Williamson wrote:
> On Mon, 21 Mar 2016 18:08:44 +0800
> Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
>
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Due to all devices assigned to VM on the same way as host if enable
>> aer, so we can easily do the hot reset by selecting the function #0
>> to do the hot reset.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>   hw/vfio/pci.h |  2 ++
>>   2 files changed, 52 insertions(+)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 9902c87..718cde7 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1900,6 +1900,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>>       /* List all affected devices by bus reset */
>>       devices = &info->devices[0];
>>   
>> +    vdev->single_depend_dev = (info->count == 1);
>> +
>>       /* Verify that we have all the groups required */
>>       for (i = 0; i < info->count; i++) {
>>           PCIHostDeviceAddress host;
>> @@ -2608,11 +2610,36 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
>>   static void vfio_err_notifier_handler(void *opaque)
>>   {
>>       VFIOPCIDevice *vdev = opaque;
>> +    PCIDevice *pdev = &vdev->pdev;
>>   
>>       if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
>>           return;
>>       }
>>   
>> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
>> +        VFIOPCIDevice *tmp;
>> +        PCIDevice *dev;
>> +        int devfn;
>> +
>> +        /*
>> +         * If one device has aer capability on a bus, when aer occurred,
>> +         * we should notify all devices on the bus there was an aer arrived,
>> +         * then we are able to vote the device #0 to do host bus reset.
>> +         */
>> +        for (devfn = 0; devfn < 8; devfn++) {
> ARI?
>
>> +            dev = pci_find_device(pdev->bus, pci_bus_num(pdev->bus),
>> +                      PCI_DEVFN(PCI_SLOT(pdev->devfn), devfn));
>> +            if (!dev) {
>> +                continue;
>> +            }
>> +            if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> +                continue;
>> +            }
>> +            tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev);
>> +            tmp->aer_occurred = true;
>> +        }
>> +    }
>> +
>>       /*
>>        * TBD. Retrieve the error details and decide what action
>>        * needs to be taken. One of the actions could be to pass
>> @@ -3075,6 +3102,29 @@ static void vfio_pci_reset(DeviceState *dev)
>>   
>>       trace_vfio_pci_reset(vdev->vbasedev.name);
>>   
>> +    if (vdev->aer_occurred) {
>> +        PCIDevice *br = pci_bridge_get_device(pdev->bus);
>> +
>> +        if (br &&
>> +            (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
>> +             PCI_BRIDGE_CTL_BUS_RESET)) {
>> +            /* simply voting the function 0 to do hot bus reset */
>> +            if (pci_get_function_0(pdev) == pdev) {
>> +                if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
>> +                    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>> +                } else {
>> +                    /* if this device has not AER capability, code
>> +                     * coming here indicates there is another function
>> +                     * on the bus has AER capability.
>> +                     * */
> This shouldn't be possible, right?
>
>> +                    vfio_pci_hot_reset(vdev, false);
>> +                }
>> +            }
>> +            vdev->aer_occurred = false;
>> +            return;
>> +        }
>> +    }
> Why do we care than an AER occurred now?  Can't we simply test:
>
>      if (vdev->features & VFIO_FEATURE_ENABLE_AER &&
>          pci_get_function_0(pdev) == pdev) {
>          PCIDevice *br = pci_bridge_get_device(pdev->bus);
>
>          if (pci_get_word(br->config + PCI_BRIDGE_CONTROL) &
>              PCI_BRIDGE_CTL_BUS_RESET)) {
>
>              vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>              return;
>          }
>      }
I think this is not practicable, not_function_0 device can't
pass the condition, it would do reset normally. but our intention
is doing nothing for them, and direct return.
I will change this in next version.

Thanks,
Chen

>
>> +
>>       vfio_pci_pre_reset(vdev);
>>   
>>       if (vdev->resetfn && !vdev->resetfn(vdev)) {
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index db7c6d5..17c75b8 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -143,6 +143,8 @@ typedef struct VFIOPCIDevice {
>>       bool no_kvm_intx;
>>       bool no_kvm_msi;
>>       bool no_kvm_msix;
>> +    bool aer_occurred;
>> +    bool single_depend_dev;
>>   } VFIOPCIDevice;
>>   
>>   uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
>
>
> .
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2016-03-23  6:31 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-21 10:08 [Qemu-devel] [PATCH v4 00/10] vfio-pci: pass the aer error to guest, part2 Cao jin
2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 01/10] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 02/10] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 03/10] vfio: add pcie extended capability support Cao jin
2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 04/10] vfio: add aer support for vfio device Cao jin
2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 05/10] vfio: extending function vfio_pci_host_match to support mask func number Cao jin
2016-03-21 21:39   ` Alex Williamson
2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 06/10] vfio: add check host bus reset is support or not Cao jin
2016-03-21 21:40   ` Alex Williamson
2016-03-22 10:02     ` Chen Fan
2016-03-22 16:04       ` Alex Williamson
2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 07/10] vfio: add check aer functionality for hotplug device Cao jin
2016-03-21 21:40   ` Alex Williamson
2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 08/10] vfio: vote the function 0 to do host bus reset when aer occurred Cao jin
2016-03-21 21:40   ` Alex Williamson
2016-03-22 10:14     ` Chen Fan
2016-03-22 16:07       ` Alex Williamson
2016-03-23  6:25     ` Chen Fan
2016-03-23  6:25     ` Chen Fan
2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 09/10] vfio-pci: pass the aer error to guest Cao jin
2016-03-21 10:08 ` [Qemu-devel] [PATCH v4 10/10] vfio: add 'aer' property to expose aercap Cao jin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.