All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest
@ 2015-06-16  8:10 Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 01/19] vfio: extract vfio_get_hot_reset_info as a single function Chen Fan
                   ` (18 more replies)
  0 siblings, 19 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

For now, for vfio pci passthough devices when qemu receives
an error from host aer report, there just terminate the guest,
but usually user want to know what error occurred but stop the
guest, so this patches add aer capability support for vfio device,
and pass the error to guest, and have guest driver to recover
from the error.
and turning on SERR# for error forwording in bridge control register
patch in seabios has been merged.

v9-v10:
   1. support vfio_get_group with as is NULL. keep group into container.

v8-v9:
   1. add ref to group for devices supported aer could own the group
      without devices in VM.
   2. add check for all aer devices if conflict the topology at each
      initfn time.

Chen Fan (19):
  vfio: extract vfio_get_hot_reset_info as a single function
  vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
  pcie: modify the capability size assert
  vfio: make the 4 bytes aligned for capability size
  vfio: add pcie extanded capability support
  aer: impove pcie_aer_init to support vfio device
  vfio: add aer support for vfio device
  vfio: add ref for group to support own affected groups
  vfio: extract vfio_register_container_listener from
    vfio_connect_container
  vfio: improve vfio_get_group to support adding as is NULL.
  get all affected groups for each device support aer
  vfio: add check host bus reset is support or not
  pci: add bus reset_notifiers callbacks for host bus reset
  vfio: add sec_bus_reset notifier to notify physical bus reset is
    needed
  vfio: improve vfio_pci_hot_reset to support more case
  vfio: do hot bus reset when do virtual secondary bus reset
  pcie_aer: expose pcie_aer_msg() interface
  vfio-pci: pass the aer error to guest
  vfio: add 'aer' property to expose aercap

 hw/pci-bridge/ioh3420.c            |   2 +-
 hw/pci-bridge/xio3130_downstream.c |   2 +-
 hw/pci-bridge/xio3130_upstream.c   |   2 +-
 hw/pci/pci.c                       |  16 +
 hw/pci/pci_bridge.c                |   6 +
 hw/pci/pcie.c                      |   2 +-
 hw/pci/pcie_aer.c                  |   6 +-
 hw/vfio/common.c                   |  83 +++--
 hw/vfio/pci.c                      | 742 +++++++++++++++++++++++++++++++++----
 include/hw/pci/pci.h               |   4 +
 include/hw/pci/pci_bus.h           |   2 +
 include/hw/pci/pcie_aer.h          |   3 +-
 include/hw/vfio/vfio-common.h      |   1 +
 13 files changed, 776 insertions(+), 95 deletions(-)

-- 
1.9.3

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 01/19] vfio: extract vfio_get_hot_reset_info as a single function
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 02/19] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Chen Fan
                   ` (17 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

the function is used to get affected devices by bus reset.
so here extract it, and can used for aer soon.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 66 +++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 18 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e0e339a..4a97ccc 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2641,6 +2641,51 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
     }
 }
 
+/*
+ * return negative with errno, return 0 on success.
+ * if success, the point of ret_info fill with the affected device reset info.
+ *
+ */
+static int vfio_get_hot_reset_info(VFIOPCIDevice *vdev,
+                                   struct vfio_pci_hot_reset_info **ret_info)
+{
+    struct vfio_pci_hot_reset_info *info;
+    int ret, count;
+
+    *ret_info = NULL;
+
+    info = g_malloc0(sizeof(*info));
+    info->argsz = sizeof(*info);
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+    if (ret && errno != ENOSPC) {
+        ret = -errno;
+        goto error;
+    }
+
+    count = info->count;
+
+    info = g_realloc(info, sizeof(*info) +
+                     (count * sizeof(struct vfio_pci_dependent_device)));
+    info->argsz = sizeof(*info) +
+                  (count * sizeof(struct vfio_pci_dependent_device));
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+    if (ret) {
+        ret = -errno;
+        error_report("vfio: hot reset info failed: %m");
+        goto error;
+    }
+
+    *ret_info = info;
+    info = NULL;
+
+    return 0;
+error:
+    g_free(info);
+    return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -2780,7 +2825,7 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
     VFIOGroup *group;
-    struct vfio_pci_hot_reset_info *info;
+    struct vfio_pci_hot_reset_info *info = NULL;
     struct vfio_pci_dependent_device *devices;
     struct vfio_pci_hot_reset *reset;
     int32_t *fds;
@@ -2792,12 +2837,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     vfio_pci_pre_reset(vdev);
     vdev->vbasedev.needs_reset = false;
 
-    info = g_malloc0(sizeof(*info));
-    info->argsz = sizeof(*info);
-
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-    if (ret && errno != ENOSPC) {
-        ret = -errno;
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
         if (!vdev->has_pm_reset) {
             error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
                          "no available reset mechanism.", vdev->host.domain,
@@ -2806,18 +2847,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         goto out_single;
     }
 
-    count = info->count;
-    info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
-    info->argsz = sizeof(*info) + (count * sizeof(*devices));
     devices = &info->devices[0];
-
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-    if (ret) {
-        ret = -errno;
-        error_report("vfio: hot reset info failed: %m");
-        goto out_single;
-    }
-
     trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
 
     /* Verify that we have all the groups required */
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 02/19] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 01/19] vfio: extract vfio_get_hot_reset_info as a single function Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 03/19] pcie: modify the capability size assert Chen Fan
                   ` (16 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

squeeze out vfio_pci_do_hot_reset to do host bus reset when AER recovery.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 75 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 4a97ccc..e056c49 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2686,6 +2686,48 @@ error:
     return ret;
 }
 
+static int vfio_pci_do_hot_reset(VFIOPCIDevice *vdev,
+                                 struct vfio_pci_hot_reset_info *info)
+{
+    VFIOGroup *group;
+    struct vfio_pci_hot_reset *reset;
+    int32_t *fds;
+    int ret, i, count;
+    struct vfio_pci_dependent_device *devices;
+
+    /* Determine how many group fds need to be passed */
+    count = 0;
+    devices = &info->devices[0];
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        for (i = 0; i < info->count; i++) {
+            if (group->groupid == devices[i].group_id) {
+                count++;
+                break;
+            }
+        }
+    }
+
+    reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+    reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+    fds = &reset->group_fds[0];
+
+    /* Fill in group fds */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        for (i = 0; i < info->count; i++) {
+            if (group->groupid == devices[i].group_id) {
+                fds[reset->count++] = group->fd;
+                break;
+            }
+        }
+    }
+
+    /* Bus reset! */
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+    g_free(reset);
+
+    return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -2827,9 +2869,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     VFIOGroup *group;
     struct vfio_pci_hot_reset_info *info = NULL;
     struct vfio_pci_dependent_device *devices;
-    struct vfio_pci_hot_reset *reset;
-    int32_t *fds;
-    int ret, i, count;
+    int ret, i;
     bool multi = false;
 
     trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
@@ -2908,34 +2948,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         goto out_single;
     }
 
-    /* Determine how many group fds need to be passed */
-    count = 0;
-    QLIST_FOREACH(group, &vfio_group_list, next) {
-        for (i = 0; i < info->count; i++) {
-            if (group->groupid == devices[i].group_id) {
-                count++;
-                break;
-            }
-        }
-    }
-
-    reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
-    reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
-    fds = &reset->group_fds[0];
-
-    /* Fill in group fds */
-    QLIST_FOREACH(group, &vfio_group_list, next) {
-        for (i = 0; i < info->count; i++) {
-            if (group->groupid == devices[i].group_id) {
-                fds[reset->count++] = group->fd;
-                break;
-            }
-        }
-    }
-
-    /* Bus reset! */
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
-    g_free(reset);
+    ret = vfio_pci_do_hot_reset(vdev, info);
 
     trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
                                     ret ? "%m" : "Success");
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 03/19] pcie: modify the capability size assert
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 01/19] vfio: extract vfio_get_hot_reset_info as a single function Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 02/19] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-17  9:43   ` Marcel Apfelbaum
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 04/19] vfio: make the 4 bytes aligned for capability size Chen Fan
                   ` (15 subsequent siblings)
  18 siblings, 1 reply; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

because the capabilities need to be DWORD aligned, so the size
should DWORD aligned too, and then the last capability size can
to be the greatest 0x1000. e.g. if I have a capability starting
4 bytes from the end, 0xFFC.  The max size should be 4 bytes,
0x1000 - 0xFFC, not 3 bytes, 0xFFF - 0xFFC.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pcie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 1463e65..6cdd4a1 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -595,7 +595,7 @@ void pcie_add_capability(PCIDevice *dev,
 
     assert(offset >= PCI_CONFIG_SPACE_SIZE);
     assert(offset < offset + size);
-    assert(offset + size < PCIE_CONFIG_SPACE_SIZE);
+    assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
     assert(size >= 8);
     assert(pci_is_express(dev));
 
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 04/19] vfio: make the 4 bytes aligned for capability size
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (2 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 03/19] pcie: modify the capability size assert Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 05/19] vfio: add pcie extanded capability support Chen Fan
                   ` (14 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e056c49..52e8ad4 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2481,7 +2481,8 @@ static void vfio_unmap_bars(VFIOPCIDevice *vdev)
  */
 static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
 {
-    uint8_t tmp, next = 0xff;
+    uint8_t tmp;
+    uint16_t next = PCI_CONFIG_SPACE_SIZE;
 
     for (tmp = pdev->config[PCI_CAPABILITY_LIST]; tmp;
          tmp = pdev->config[tmp + 1]) {
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 05/19] vfio: add pcie extanded capability support
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (3 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 04/19] vfio: make the 4 bytes aligned for capability size Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 06/19] aer: impove pcie_aer_init to support vfio device Chen Fan
                   ` (13 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

For vfio pcie device, we could expose the extended capability on
PCIE bus. in order to avoid config space broken, we introduce
a copy config for parsing extended caps. and rebuild the pcie
extended config space.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 52e8ad4..e2f6442 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2494,6 +2494,21 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
     return next - pos;
 }
 
+
+static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos)
+{
+    uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE;
+
+    for (tmp = PCI_CONFIG_SPACE_SIZE; tmp;
+        tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) {
+        if (tmp > pos && tmp < next) {
+            next = tmp;
+        }
+    }
+
+    return next - pos;
+}
+
 static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
 {
     pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
@@ -2804,16 +2819,71 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint32_t header;
+    uint16_t cap_id, next, size;
+    uint8_t cap_ver;
+    uint8_t *config;
+
+    /*
+     * In order to avoid breaking config space, create a copy to
+     * use for parsing extended capabilities.
+     */
+    config = g_memdup(pdev->config, vdev->config_size);
+
+    for (next = PCI_CONFIG_SPACE_SIZE; next;
+         next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
+        header = pci_get_long(config + next);
+        cap_id = PCI_EXT_CAP_ID(header);
+        cap_ver = PCI_EXT_CAP_VER(header);
+
+        /*
+         * If it becomes important to configure extended capabilities to their
+         * actual size, use this as the default when it's something we don't
+         * recognize. Since QEMU doesn't actually handle many of the config
+         * accesses, exact size doesn't seem worthwhile.
+         */
+        size = vfio_ext_cap_max_size(config, next);
+
+        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+        if (next == PCI_CONFIG_SPACE_SIZE) {
+            /* Begin the rebuild, we should set the next offset zero. */
+            pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+        }
+
+        /* Use emulated header pointer to allow dropping extended caps */
+        pci_set_long(vdev->emulated_config_bits + next, 0xffffffff);
+    }
+
+    g_free(config);
+    return 0;
+}
+
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
+    int ret;
 
     if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
         !pdev->config[PCI_CAPABILITY_LIST]) {
         return 0; /* Nothing to add */
     }
 
-    return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    if (ret) {
+        return ret;
+    }
+
+    /* on PCI bus, it doesn't make sense to expose extended capabilities. */
+    if (!pci_is_express(pdev) ||
+        !pci_bus_is_express(pdev->bus) ||
+        !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
+        return 0;
+    }
+
+    return vfio_add_ext_cap(vdev);
 }
 
 static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 06/19] aer: impove pcie_aer_init to support vfio device
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (4 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 05/19] vfio: add pcie extanded capability support Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 07/19] vfio: add aer support for " Chen Fan
                   ` (12 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

pcie_aer_init was used to emulate an aer capability for pcie device,
but for vfio device, the aer config space size is mutable and is not
always equal to PCI_ERR_SIZEOF(0x48). it depends on where the TLP Prefix
register required, so here we add a size argument.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci-bridge/ioh3420.c            | 2 +-
 hw/pci-bridge/xio3130_downstream.c | 2 +-
 hw/pci-bridge/xio3130_upstream.c   | 2 +-
 hw/pci/pcie_aer.c                  | 4 ++--
 include/hw/pci/pcie_aer.h          | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index cce2fdd..4d9cd3f 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -129,7 +129,7 @@ static int ioh3420_initfn(PCIDevice *d)
         goto err_pcie_cap;
     }
     pcie_cap_root_init(d);
-    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET);
+    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index b3a6479..9737041 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -92,7 +92,7 @@ static int xio3130_downstream_initfn(PCIDevice *d)
         goto err_pcie_cap;
     }
     pcie_cap_arifwd_init(d);
-    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
+    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index eada582..4d7f894 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -81,7 +81,7 @@ static int xio3130_upstream_initfn(PCIDevice *d)
     }
     pcie_cap_flr_init(d);
     pcie_cap_deverr_init(d);
-    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
+    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index c8dea8e..6171a29 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -94,12 +94,12 @@ static void aer_log_clear_all_err(PCIEAERLog *aer_log)
     aer_log->log_num = 0;
 }
 
-int pcie_aer_init(PCIDevice *dev, uint16_t offset)
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
 {
     PCIExpressDevice *exp;
 
     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
-                        offset, PCI_ERR_SIZEOF);
+                        offset, size);
     exp = &dev->exp;
     exp->aer_cap = offset;
 
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index 2fb8388..156acb0 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -87,7 +87,7 @@ struct PCIEAERErr {
 
 extern const VMStateDescription vmstate_pcie_aer_log;
 
-int pcie_aer_init(PCIDevice *dev, uint16_t offset);
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size);
 void pcie_aer_exit(PCIDevice *dev);
 void pcie_aer_write_config(PCIDevice *dev,
                            uint32_t addr, uint32_t val, int len);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 07/19] vfio: add aer support for vfio device
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (5 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 06/19] aer: impove pcie_aer_init to support vfio device Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 08/19] vfio: add ref for group to support own affected groups Chen Fan
                   ` (11 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

Calling pcie_aer_init to initilize aer related registers for
vfio device, then reload physical related registers to expose
device capability.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 87 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e2f6442..e1bbd03 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -32,6 +32,7 @@
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "qemu/event_notifier.h"
@@ -160,6 +161,8 @@ typedef struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
 #define VFIO_FEATURE_ENABLE_REQ_BIT 1
 #define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
+#define VFIO_FEATURE_ENABLE_AER_BIT 2
+#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
     int32_t bootindex;
     uint8_t pm_cap;
     bool has_vga;
@@ -2819,6 +2822,74 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
+                          int pos, uint16_t size)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint8_t *exp_cap = pdev->config + pdev->exp.exp_cap;
+    PCIDevice *dev_iter;
+    uint8_t type;
+    uint32_t severity, errcap;
+    int ret;
+
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+        pcie_add_capability(pdev, PCI_EXT_CAP_ID_ERR,
+                            cap_ver, pos, size);
+        return 0;
+    }
+
+    dev_iter = pci_bridge_get_device(pdev->bus);
+    if (!dev_iter) {
+        goto error;
+    }
+
+    while (dev_iter) {
+        type = pcie_cap_get_type(dev_iter);
+        if ((type != PCI_EXP_TYPE_ROOT_PORT &&
+             type != PCI_EXP_TYPE_UPSTREAM &&
+             type != PCI_EXP_TYPE_DOWNSTREAM)) {
+            goto error;
+        }
+
+        if (!dev_iter->exp.aer_cap) {
+            goto error;
+        }
+
+        dev_iter = pci_bridge_get_device(dev_iter->bus);
+    }
+
+    errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
+    /*
+     * The ability to record multiple headers is depending on
+     * the state of the Multiple Header Recording Capable bit and
+     * enabled by the Multiple Header Recording Enable bit.
+     */
+    if ((errcap & PCI_ERR_CAP_MHRC) &&
+        (errcap & PCI_ERR_CAP_MHRE)) {
+        pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+    } else {
+        pdev->exp.aer_log.log_max = 0;
+    }
+
+    pcie_cap_deverr_init(pdev);
+    ret = pcie_aer_init(pdev, pos, size);
+    if (ret) {
+        return ret;
+    }
+
+    /* load physical registers */
+    severity = vfio_pci_read_config(pdev,
+                   pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
+    pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
+
+    return 0;
+
+error:
+    error_report("vfio: Unable to enable AER for device %s, parent bus "
+                 "does not support AER signaling", vdev->vbasedev.name);
+    return -1;
+}
+
 static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -2826,6 +2897,7 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
     uint16_t cap_id, next, size;
     uint8_t cap_ver;
     uint8_t *config;
+    int ret = 0;
 
     /*
      * In order to avoid breaking config space, create a copy to
@@ -2847,7 +2919,19 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
          */
         size = vfio_ext_cap_max_size(config, next);
 
-        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+        switch (cap_id) {
+        case PCI_EXT_CAP_ID_ERR:
+            ret = vfio_setup_aer(vdev, cap_ver, next, size);
+            break;
+        default:
+            pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+            break;
+        }
+
+        if (ret) {
+            goto out;
+        }
+
         if (next == PCI_CONFIG_SPACE_SIZE) {
             /* Begin the rebuild, we should set the next offset zero. */
             pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
@@ -2857,8 +2941,9 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
         pci_set_long(vdev->emulated_config_bits + next, 0xffffffff);
     }
 
+out:
     g_free(config);
-    return 0;
+    return ret;
 }
 
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 08/19] vfio: add ref for group to support own affected groups
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (6 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 07/19] vfio: add aer support for " Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 09/19] vfio: extract vfio_register_container_listener from vfio_connect_container Chen Fan
                   ` (10 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/common.c              | 14 +++++++++++++-
 include/hw/vfio/vfio-common.h |  1 +
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b1045da..67881f7 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -795,6 +795,7 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as)
         if (group->groupid == groupid) {
             /* Found it.  Now is it already in the right context? */
             if (group->container->space->as == as) {
+                group->ref++;
                 return group;
             } else {
                 error_report("vfio: group %d used in multiple address spaces",
@@ -825,6 +826,7 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as)
         goto close_fd_exit;
     }
 
+    group->ref = 1;
     group->groupid = groupid;
     QLIST_INIT(&group->device_list);
 
@@ -854,7 +856,17 @@ free_group_exit:
 
 void vfio_put_group(VFIOGroup *group)
 {
-    if (!group || !QLIST_EMPTY(&group->device_list)) {
+    if (!group) {
+        return;
+    }
+
+    group->ref--;
+    assert(group->ref >= 0);
+    if (!QLIST_EMPTY(&group->device_list)) {
+        return;
+    }
+
+    if (group->ref) {
         return;
     }
 
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 0d1fb80..3eb042e 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -117,6 +117,7 @@ struct VFIODeviceOps {
 typedef struct VFIOGroup {
     int fd;
     int groupid;
+    int ref;
     VFIOContainer *container;
     QLIST_HEAD(, VFIODevice) device_list;
     QLIST_ENTRY(VFIOGroup) next;
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 09/19] vfio: extract vfio_register_container_listener from vfio_connect_container
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (7 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 08/19] vfio: add ref for group to support own affected groups Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 10/19] vfio: improve vfio_get_group to support adding as is NULL Chen Fan
                   ` (9 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/common.c | 60 ++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 67881f7..df3171d 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -631,6 +631,38 @@ static void vfio_put_address_space(VFIOAddressSpace *space)
     }
 }
 
+static int vfio_register_container_listener(VFIOContainer *container,
+                                            AddressSpace *as)
+{
+    int ret;
+
+    if (!as) {
+        return 0;
+    }
+
+    container->iommu_data.type1.listener = vfio_memory_listener;
+    container->iommu_data.release = vfio_listener_release;
+
+    memory_listener_register(&container->iommu_data.type1.listener,
+                             as);
+
+    if (container->iommu_data.type1.error) {
+        ret = container->iommu_data.type1.error;
+        error_report("vfio: memory listener initialization failed for container");
+        goto listener_release_exit;
+    }
+
+    container->iommu_data.type1.initialized = true;
+    container->space->as = as;
+
+    return 0;
+
+listener_release_exit:
+    vfio_listener_release(container);
+
+    return ret;
+}
+
 static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
 {
     VFIOContainer *container;
@@ -684,20 +716,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
             goto free_container_exit;
         }
 
-        container->iommu_data.type1.listener = vfio_memory_listener;
-        container->iommu_data.release = vfio_listener_release;
-
-        memory_listener_register(&container->iommu_data.type1.listener,
-                                 container->space->as);
-
-        if (container->iommu_data.type1.error) {
-            ret = container->iommu_data.type1.error;
-            error_report("vfio: memory listener initialization failed for container");
-            goto listener_release_exit;
+        ret = vfio_register_container_listener(container, as);
+        if (ret) {
+            goto free_container_exit;
         }
-
-        container->iommu_data.type1.initialized = true;
-
     } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
         ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
         if (ret) {
@@ -724,12 +746,10 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
             goto free_container_exit;
         }
 
-        container->iommu_data.type1.listener = vfio_memory_listener;
-        container->iommu_data.release = vfio_listener_release;
-
-        memory_listener_register(&container->iommu_data.type1.listener,
-                                 container->space->as);
-
+        ret = vfio_register_container_listener(container, as);
+        if (ret) {
+            goto free_container_exit;
+        }
     } else {
         error_report("vfio: No available IOMMU models");
         ret = -EINVAL;
@@ -743,8 +763,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
     QLIST_INSERT_HEAD(&container->group_list, group, container_next);
 
     return 0;
-listener_release_exit:
-    vfio_listener_release(container);
 
 free_container_exit:
     g_free(container);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 10/19] vfio: improve vfio_get_group to support adding as is NULL.
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (8 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 09/19] vfio: extract vfio_register_container_listener from vfio_connect_container Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-18 16:41   ` Alex Williamson
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 11/19] get all affected groups for each device support aer Chen Fan
                   ` (8 subsequent siblings)
  18 siblings, 1 reply; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/common.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index df3171d..15f19a2 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -808,11 +808,18 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as)
     VFIOGroup *group;
     char path[32];
     struct vfio_group_status status = { .argsz = sizeof(status) };
+    int ret;
 
     QLIST_FOREACH(group, &vfio_group_list, next) {
         if (group->groupid == groupid) {
             /* Found it.  Now is it already in the right context? */
-            if (group->container->space->as == as) {
+            if (as && !group->container->space->as) {
+                ret = vfio_register_container_listener(group->container, as);
+                if (ret) {
+                    return NULL;
+                }
+            }
+            if (!as || group->container->space->as == as) {
                 group->ref++;
                 return group;
             } else {
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 11/19] get all affected groups for each device support aer
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (9 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 10/19] vfio: improve vfio_get_group to support adding as is NULL Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 12/19] vfio: add check host bus reset is support or not Chen Fan
                   ` (7 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

Add the affected groups without any devices into VM,
it can keep the VM ownship the all groups. and use a
reference to make the group visible.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 116 insertions(+), 7 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e1bbd03..ade3196 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2822,6 +2822,113 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
+                                PCIHostDeviceAddress *host2)
+{
+    return (host1->domain == host2->domain && host1->bus == host2->bus &&
+            host1->slot == host2->slot && host1->function == host2->function);
+}
+
+static int vfio_get_affected_groups(VFIOPCIDevice *vdev)
+{
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    PCIHostDeviceAddress host;
+    VFIOGroup *group;
+    int ret, i;
+    struct vfio_group_head vfio_groups =
+           QLIST_HEAD_INITIALIZER(vfio_groups);
+
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
+        goto out;
+    }
+
+    /* List all affected devices by bus reset */
+    devices = &info->devices[0];
+
+    /* Verify that we have all the groups required */
+    for (i = 0; i < info->count; i++) {
+        host.domain = devices[i].segment;
+        host.bus = devices[i].bus;
+        host.slot = PCI_SLOT(devices[i].devfn);
+        host.function = PCI_FUNC(devices[i].devfn);
+
+        /* Skip the current device */
+        if (vfio_pci_host_match(&host, &vdev->host)) {
+            continue;
+        }
+
+        /* Get all affected groups into VM */
+        group = vfio_get_group(devices[i].group_id, NULL);
+        if (!group) {
+            error_report("vfio: failed to get affected group %d",
+                         devices[i].group_id);
+            ret = -1;
+            goto out;
+        }
+
+        QLIST_INSERT_HEAD(&vfio_groups, group, next);
+    }
+    ret = 0;
+out:
+    if (ret && !QLIST_EMPTY(&vfio_groups)) {
+        QLIST_FOREACH(group, &vfio_groups, next) {
+            vfio_put_group(group);
+        }
+        vdev->features &= ~VFIO_FEATURE_ENABLE_AER;
+    }
+    g_free(info);
+    return ret;
+}
+
+static int vfio_put_affected_groups(VFIOPCIDevice *vdev)
+{
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    PCIHostDeviceAddress host;
+    VFIOGroup *group;
+    int ret, i;
+
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
+        goto out;
+    }
+
+    /* List all affected devices by bus reset */
+    devices = &info->devices[0];
+
+    /* Verify that we have all the groups required */
+    for (i = 0; i < info->count; i++) {
+        host.domain = devices[i].segment;
+        host.bus = devices[i].bus;
+        host.slot = PCI_SLOT(devices[i].devfn);
+        host.function = PCI_FUNC(devices[i].devfn);
+
+        /* Skip the current device */
+        if (vfio_pci_host_match(&host, &vdev->host)) {
+            continue;
+        }
+
+        /* Ensure we own the group of the affected device */
+        QLIST_FOREACH(group, &vfio_group_list, next) {
+            if (group->groupid == devices[i].group_id) {
+                break;
+            }
+        }
+
+        if (!group)
+            continue;
+
+        vfio_put_group(group);
+    }
+
+    ret = 0;
+out:
+    g_free(info);
+    return ret;
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -2858,6 +2965,12 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
         dev_iter = pci_bridge_get_device(dev_iter->bus);
     }
 
+    /* Ensure own all affected groups */
+    ret = vfio_get_affected_groups(vdev);
+    if (ret) {
+        return ret;
+    }
+
     errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
     /*
      * The ability to record multiple headers is depending on
@@ -3013,13 +3126,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
     vfio_enable_intx(vdev);
 }
 
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
-                                PCIHostDeviceAddress *host2)
-{
-    return (host1->domain == host2->domain && host1->bus == host2->bus &&
-            host1->slot == host2->slot && host1->function == host2->function);
-}
-
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
     VFIOGroup *group;
@@ -3851,6 +3957,9 @@ static void vfio_instance_finalize(Object *obj)
     g_free(vdev->rom);
     vfio_put_device(vdev);
     vfio_put_group(group);
+    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+        vfio_put_affected_groups(vdev);
+    }
 }
 
 static void vfio_exitfn(PCIDevice *pdev)
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 12/19] vfio: add check host bus reset is support or not
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (10 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 11/19] get all affected groups for each device support aer Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 13/19] pci: add bus reset_notifiers callbacks for host bus reset Chen Fan
                   ` (6 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

when init vfio device, we should test all the devices supported
aer whether conflict with others. For each one, get the hot reset
info for the affected device list.  For each affected device, if
it's attached to the VM, it needs to be on or below the bus of
the target device. also, we should test all of the non-AER supporting
vfio-pci devices on or below the target bus to verify they have a reset
mechanism.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 170 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index ade3196..53fb544 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -32,6 +32,7 @@
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_bridge.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
@@ -2929,6 +2930,169 @@ out:
     return ret;
 }
 
+struct VFIODeviceFind {
+    PCIDevice *pdev;
+    bool found;
+};
+
+static void find_devices(PCIBus *bus, void *opaque)
+{
+    struct VFIODeviceFind *find = opaque;
+    int i;
+
+    if (find->found) {
+        return;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(bus->devices); i++) {
+        if (!bus->devices[i]) {
+            continue;
+        }
+
+        if (bus->devices[i] == find->pdev) {
+            find->found = true;
+            break;
+        }
+    }
+}
+
+static void vfio_check_device_reset(PCIBus *bus, void *opaque)
+{
+    int i;
+    PCIDevice *dev;
+    VFIOPCIDevice *vdev;
+    struct VFIODeviceFind *find = opaque;
+
+    if (find->found) {
+        return;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(bus->devices); i++) {
+        if (!bus->devices[i]) {
+            continue;
+        }
+        dev = bus->devices[i];
+        if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+            continue;
+        }
+        vdev = DO_UPCAST(VFIOPCIDevice, pdev, dev);
+        if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+            !vdev->vbasedev.reset_works &&
+            !vdev->has_flr &&
+            !vdev->has_pm_reset) {
+            find->pdev = dev;
+            find->found = true;
+            break;
+        }
+    }
+}
+
+static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
+{
+    PCIBus *bus = vdev->pdev.bus;
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    VFIOGroup *group;
+    int ret, i;
+    struct VFIODeviceFind find;
+
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
+        goto out;
+    }
+
+    /* List all affected devices by bus reset */
+    devices = &info->devices[0];
+
+    /* Verify that we have all the groups required */
+    for (i = 0; i < info->count; i++) {
+        PCIHostDeviceAddress host;
+        VFIOPCIDevice *tmp;
+        VFIODevice *vbasedev_iter;
+
+        host.domain = devices[i].segment;
+        host.bus = devices[i].bus;
+        host.slot = PCI_SLOT(devices[i].devfn);
+        host.function = PCI_FUNC(devices[i].devfn);
+
+        /* Skip the current device */
+        if (vfio_pci_host_match(&host, &vdev->host)) {
+            continue;
+        }
+
+        /* Ensure we own the group of the affected device */
+        QLIST_FOREACH(group, &vfio_group_list, next) {
+            if (group->groupid == devices[i].group_id) {
+                break;
+            }
+        }
+
+        if (!group) {
+            ret = -1;
+            goto out;
+        }
+
+        /* Ensure affected devices for reset under the same bus */
+        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+            if (vfio_pci_host_match(&host, &tmp->host)) {
+                find.pdev = &tmp->pdev;
+                find.found = false;
+
+                pci_for_each_bus(bus, find_devices, &find);
+                if (!find.found) {
+                    ret = -1;
+                    goto out;
+                }
+                break;
+            }
+        }
+    }
+
+    /*
+     * Check the all vfio pci devices on or below the target bus
+     * have a reset mechanism at least.
+     */
+    find.pdev = NULL;
+    find.found = false;
+    pci_for_each_bus(bus, vfio_check_device_reset, &find);
+    if (find.found) {
+        ret = -1;
+        goto out;
+    }
+
+    ret = 0;
+out:
+    g_free(info);
+    return ret;
+}
+
+static int vfio_check_devices_host_bus_reset(void)
+{
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+    VFIOPCIDevice *vdev;
+
+    /* Check All vfio-pci devices if have bus reset capability */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+                vfio_check_host_bus_reset(vdev)) {
+                error_report("vfio: Cannot enable AER for device %s, "
+                             "which does not support host bus reset.",
+                              vdev->vbasedev.name);
+                 return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -2971,6 +3135,12 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
         return ret;
     }
 
+    /* Make sure this devices does conflict the existing aer topology */
+    ret = vfio_check_devices_host_bus_reset();
+    if (ret) {
+        return ret;
+    }
+
     errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
     /*
      * The ability to record multiple headers is depending on
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 13/19] pci: add bus reset_notifiers callbacks for host bus reset
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (11 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 12/19] vfio: add check host bus reset is support or not Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16 10:20   ` Michael S. Tsirkin
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 14/19] vfio: add sec_bus_reset notifier to notify physical bus reset is needed Chen Fan
                   ` (5 subsequent siblings)
  18 siblings, 1 reply; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

Particularly, For vfio devices, Once need to recovery devices
by bus reset such as AER, we always need to reset the host bus
to recovery the devices under the bus, so we need to add pci bus
callbacks to specify to do host bus reset.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pci.c             | 16 ++++++++++++++++
 hw/pci/pci_bridge.c      |  6 ++++++
 include/hw/pci/pci.h     |  4 ++++
 include/hw/pci/pci_bus.h |  2 ++
 4 files changed, 28 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 3423c3a..3bd954e 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -74,11 +74,27 @@ static const VMStateDescription vmstate_pcibus = {
     }
 };
 
+void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify)
+{
+    notifier_list_add(&bus->reset_notifiers, notify);
+}
+
+void pci_bus_remove_reset_notifier(Notifier *notify)
+{
+    notifier_remove(notify);
+}
+
+void pci_bus_run_reset_notifier(PCIBus *bus, void *opaque)
+{
+    notifier_list_notify(&bus->reset_notifiers, opaque);
+}
+
 static void pci_bus_realize(BusState *qbus, Error **errp)
 {
     PCIBus *bus = PCI_BUS(qbus);
 
     vmstate_register(NULL, -1, &vmstate_pcibus, bus);
+    notifier_list_init(&bus->reset_notifiers);
 }
 
 static void pci_bus_unrealize(BusState *qbus, Error **errp)
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 40c97b1..a8e3f57 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -267,6 +267,12 @@ void pci_bridge_write_config(PCIDevice *d,
 
     newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
     if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
+        /*
+         * Notify all vfio-pci devices under the bus
+         * to do physical bus reset.
+         */
+        pci_for_each_bus(&s->sec_bus,
+                pci_bus_run_reset_notifier, d);
         /* Trigger hot reset on 0->1 transition. */
         qbus_reset_all(&s->sec_bus.qbus);
     }
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 6c2af0d..d353c9d 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -7,6 +7,7 @@
 #include "exec/memory.h"
 #include "sysemu/dma.h"
 #include "qapi/error.h"
+#include "qemu/notify.h"
 
 /* PCI includes legacy ISA access.  */
 #include "hw/isa/isa.h"
@@ -377,6 +378,9 @@ void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
 void pci_device_set_intx_routing_notifier(PCIDevice *dev,
                                           PCIINTxRoutingNotifier notifier);
 void pci_device_reset(PCIDevice *dev);
+void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify);
+void pci_bus_remove_reset_notifier(Notifier *notify);
+void pci_bus_run_reset_notifier(PCIBus *bus, void *opaque);
 
 PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
                                const char *default_model,
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index fabaeee..3b551d7 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -29,6 +29,8 @@ struct PCIBus {
        Keep a count of the number of devices with raised IRQs.  */
     int nirq;
     int *irq_count;
+
+    NotifierList reset_notifiers;
 };
 
 typedef struct PCIBridgeWindows PCIBridgeWindows;
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 14/19] vfio: add sec_bus_reset notifier to notify physical bus reset is needed
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (12 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 13/19] pci: add bus reset_notifiers callbacks for host bus reset Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 15/19] vfio: improve vfio_pci_hot_reset to support more case Chen Fan
                   ` (4 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 53fb544..fad6a80 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -156,6 +156,7 @@ typedef struct VFIOPCIDevice {
     PCIHostDeviceAddress host;
     EventNotifier err_notifier;
     EventNotifier req_notifier;
+    Notifier sec_bus_reset_notifier;
     int (*resetfn)(struct VFIOPCIDevice *);
     uint32_t features;
 #define VFIO_FEATURE_ENABLE_VGA_BIT 0
@@ -171,6 +172,7 @@ typedef struct VFIOPCIDevice {
     bool req_enabled;
     bool has_flr;
     bool has_pm_reset;
+    bool needs_bus_reset;
     bool rom_read_failed;
 } VFIOPCIDevice;
 
@@ -3093,6 +3095,81 @@ static int vfio_check_devices_host_bus_reset(void)
     return 0;
 }
 
+/* Update all the reset state of the affected devices in VM */
+static void vfio_pci_host_needs_bus_reset(Notifier *n, void *opaque)
+{
+    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, sec_bus_reset_notifier);
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    PCIDevice *parent = opaque;
+    VFIOGroup *group;
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    int i, ret;
+
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
+        goto out;
+    }
+
+    vdev->needs_bus_reset = true;
+    /*
+     * qbus_reset_all always reset the devices from the depth level,
+     * here only need to do the reset for the device encounter the aer,
+     * in order to avoid reset the affected device first below the bus,
+     * we only set the top level devices as needs_reset.
+     */
+    if (parent == pci_bridge_get_device(vdev->pdev.bus)) {
+        vbasedev->needs_reset = true;
+    }
+
+    /* List all affected devices by bus reset */
+    devices = &info->devices[0];
+
+    /* Verify that we have all the groups required */
+    for (i = 0; i < info->count; i++) {
+        PCIHostDeviceAddress host;
+        VFIOPCIDevice *tmp;
+        VFIODevice *vbasedev_iter;
+
+        host.domain = devices[i].segment;
+        host.bus = devices[i].bus;
+        host.slot = PCI_SLOT(devices[i].devfn);
+        host.function = PCI_FUNC(devices[i].devfn);
+
+        /* Skip the current device */
+        if (vfio_pci_host_match(&host, &vdev->host)) {
+            continue;
+        }
+
+        /* Ensure we own the group of the affected device */
+        QLIST_FOREACH(group, &vfio_group_list, next) {
+            if (group->groupid == devices[i].group_id) {
+                break;
+            }
+        }
+
+        if (!group) {
+            goto out;
+        }
+
+        /* Ensure affected devices for reset under the same bus */
+        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+            if (vfio_pci_host_match(&host, &tmp->host)) {
+                tmp->needs_bus_reset = true;
+                tmp->vbasedev.needs_reset = false;
+                break;
+            }
+        }
+    }
+
+out:
+    g_free(info);
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -3165,6 +3242,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                    pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
     pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
 
+    vdev->sec_bus_reset_notifier.notify = vfio_pci_host_needs_bus_reset;
+    pci_bus_add_reset_notifier(pdev->bus, &vdev->sec_bus_reset_notifier);
+
     return 0;
 
 error:
@@ -4138,6 +4218,7 @@ static void vfio_exitfn(PCIDevice *pdev)
 
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
+    pci_bus_remove_reset_notifier(&vdev->sec_bus_reset_notifier);
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     vfio_disable_interrupts(vdev);
     if (vdev->intx.mmap_timer) {
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 15/19] vfio: improve vfio_pci_hot_reset to support more case
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (13 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 14/19] vfio: add sec_bus_reset notifier to notify physical bus reset is needed Chen Fan
@ 2015-06-16  8:10 ` Chen Fan
  2015-06-16  8:11 ` [Qemu-devel] [RFC v10 16/19] vfio: do hot bus reset when do virtual secondary bus reset Chen Fan
                   ` (3 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:10 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

the vfio_pci_hot_reset differentiate the single and multi in-used
devices for reset. but sometimes we own the group without any devices,
that also should support hot reset.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index fad6a80..a86edab 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3407,6 +3407,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         PCIHostDeviceAddress host;
         VFIOPCIDevice *tmp;
         VFIODevice *vbasedev_iter;
+        bool found;
 
         host.domain = devices[i].segment;
         host.bus = devices[i].bus;
@@ -3436,6 +3437,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
             goto out;
         }
 
+        found = false;
         /* Prep dependent devices for reset and clear our marker. */
         QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
             if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
@@ -3447,12 +3449,21 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
                     ret = -EINVAL;
                     goto out_single;
                 }
+                found = true;
                 vfio_pci_pre_reset(tmp);
                 tmp->vbasedev.needs_reset = false;
                 multi = true;
                 break;
             }
         }
+
+        /*
+         * If we own the group but does not own the device, we also
+         * should call hot reset with multi.
+         */
+        if (!single && !found) {
+            multi = true;
+        }
     }
 
     if (!single && !multi) {
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 16/19] vfio: do hot bus reset when do virtual secondary bus reset
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (14 preceding siblings ...)
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 15/19] vfio: improve vfio_pci_hot_reset to support more case Chen Fan
@ 2015-06-16  8:11 ` Chen Fan
  2015-06-16  8:11 ` [Qemu-devel] [RFC v10 17/19] pcie_aer: expose pcie_aer_msg() interface Chen Fan
                   ` (2 subsequent siblings)
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

when do virtual secondary bus reset, the vfio device under
this bus need to do host bus reset to reset the device.
so add this case.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a86edab..5bdfa73 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -4246,6 +4246,15 @@ static void vfio_pci_reset(DeviceState *dev)
 
     trace_vfio_pci_reset(vdev->vbasedev.name);
 
+    if (vdev->needs_bus_reset) {
+        vdev->needs_bus_reset = false;
+        /* Avoid duplicate bus reset */
+        if (vdev->vbasedev.needs_reset) {
+            vfio_pci_hot_reset(vdev, false);
+        }
+        return;
+    }
+
     vfio_pci_pre_reset(vdev);
 
     if (vdev->resetfn && !vdev->resetfn(vdev)) {
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 17/19] pcie_aer: expose pcie_aer_msg() interface
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (15 preceding siblings ...)
  2015-06-16  8:11 ` [Qemu-devel] [RFC v10 16/19] vfio: do hot bus reset when do virtual secondary bus reset Chen Fan
@ 2015-06-16  8:11 ` Chen Fan
  2015-06-17  9:46   ` Marcel Apfelbaum
  2015-06-16  8:11 ` [Qemu-devel] [RFC v10 18/19] vfio-pci: pass the aer error to guest Chen Fan
  2015-06-16  8:11 ` [Qemu-devel] [RFC v10 19/19] vfio: add 'aer' property to expose aercap Chen Fan
  18 siblings, 1 reply; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

For vfio device, we need to propagate the aer error to
Guest OS. we use the pcie_aer_msg() to send aer error
to guest.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pcie_aer.c         | 2 +-
 include/hw/pci/pcie_aer.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 6171a29..373d1f2 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -370,7 +370,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
  *
  * Walk up the bus tree from the device, propagate the error message.
  */
-static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
 {
     uint8_t type;
 
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index 156acb0..c2ee4e2 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
 
 /* error injection */
 int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
 
 #endif /* QEMU_PCIE_AER_H */
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 18/19] vfio-pci: pass the aer error to guest
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (16 preceding siblings ...)
  2015-06-16  8:11 ` [Qemu-devel] [RFC v10 17/19] pcie_aer: expose pcie_aer_msg() interface Chen Fan
@ 2015-06-16  8:11 ` Chen Fan
  2015-06-16  8:11 ` [Qemu-devel] [RFC v10 19/19] vfio: add 'aer' property to expose aercap Chen Fan
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

when the vfio device encounters an uncorrectable error in host,
the vfio_pci driver will signal the eventfd registered by this
vfio device, the results in the qemu eventfd handler getting
invoked.

this patch is to pass the error to guest and have the guest driver
recover from the error.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 45 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 5bdfa73..3b76329 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3699,18 +3699,51 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
 static void vfio_err_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
+    PCIDevice *dev = &vdev->pdev;
+    PCIEAERMsg msg = {
+        .severity = 0,
+        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
+    };
 
     if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
         return;
     }
 
     /*
-     * TBD. Retrieve the error details and decide what action
-     * needs to be taken. One of the actions could be to pass
-     * the error to the guest and have the guest driver recover
-     * from the error. This requires that PCIe capabilities be
-     * exposed to the guest. For now, we just terminate the
-     * guest to contain the error.
+     * in case the real hardware configration has been changed,
+     * here we should recheck the bus reset capability.
+     */
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        vfio_check_host_bus_reset(vdev)) {
+        goto stop;
+    }
+    /*
+     * we should read the error details from the real hardware
+     * configuration spaces, here we only need to do is signaling
+     * to guest an uncorrectable error has occurred.
+     */
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        dev->exp.aer_cap) {
+        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+        uint32_t uncor_status;
+        bool isfatal;
+
+        uncor_status = vfio_pci_read_config(dev,
+                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
+
+        pcie_aer_msg(dev, &msg);
+        return;
+    }
+
+stop:
+    /*
+     * If the aer capability is not exposed to the guest. we just
+     * terminate the guest to contain the error.
      */
 
     error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [Qemu-devel] [RFC v10 19/19] vfio: add 'aer' property to expose aercap
  2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
                   ` (17 preceding siblings ...)
  2015-06-16  8:11 ` [Qemu-devel] [RFC v10 18/19] vfio-pci: pass the aer error to guest Chen Fan
@ 2015-06-16  8:11 ` Chen Fan
  18 siblings, 0 replies; 27+ messages in thread
From: Chen Fan @ 2015-06-16  8:11 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

add 'aer' property to let user able to decide whether expose
the aer capability. by default we should disable aer feature,
because it needs configuration restrictions.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 3b76329..46bec3a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -4335,6 +4335,8 @@ static Property vfio_pci_dev_properties[] = {
                     VFIO_FEATURE_ENABLE_VGA_BIT, false),
     DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features,
                     VFIO_FEATURE_ENABLE_REQ_BIT, true),
+    DEFINE_PROP_BIT("aer", VFIOPCIDevice, features,
+                    VFIO_FEATURE_ENABLE_AER_BIT, false),
     DEFINE_PROP_INT32("bootindex", VFIOPCIDevice, bootindex, -1),
     DEFINE_PROP_BOOL("x-mmap", VFIOPCIDevice, vbasedev.allow_mmap, true),
     /*
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [RFC v10 13/19] pci: add bus reset_notifiers callbacks for host bus reset
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 13/19] pci: add bus reset_notifiers callbacks for host bus reset Chen Fan
@ 2015-06-16 10:20   ` Michael S. Tsirkin
  2015-06-17  1:41     ` Chen Fan
  0 siblings, 1 reply; 27+ messages in thread
From: Michael S. Tsirkin @ 2015-06-16 10:20 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, alex.williamson, qemu-devel

On Tue, Jun 16, 2015 at 04:10:57PM +0800, Chen Fan wrote:
> Particularly, For vfio devices, Once need to recovery devices
> by bus reset such as AER, we always need to reset the host bus
> to recovery the devices under the bus, so we need to add pci bus
> callbacks to specify to do host bus reset.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Interesting. What prevents guest from triggering reset at an arbitrary
time, killing all VFs for all guests?

> ---
>  hw/pci/pci.c             | 16 ++++++++++++++++
>  hw/pci/pci_bridge.c      |  6 ++++++
>  include/hw/pci/pci.h     |  4 ++++
>  include/hw/pci/pci_bus.h |  2 ++
>  4 files changed, 28 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 3423c3a..3bd954e 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -74,11 +74,27 @@ static const VMStateDescription vmstate_pcibus = {
>      }
>  };
>  
> +void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify)
> +{
> +    notifier_list_add(&bus->reset_notifiers, notify);
> +}
> +
> +void pci_bus_remove_reset_notifier(Notifier *notify)
> +{
> +    notifier_remove(notify);
> +}
> +
> +void pci_bus_run_reset_notifier(PCIBus *bus, void *opaque)
> +{
> +    notifier_list_notify(&bus->reset_notifiers, opaque);
> +}
> +
>  static void pci_bus_realize(BusState *qbus, Error **errp)
>  {
>      PCIBus *bus = PCI_BUS(qbus);
>  
>      vmstate_register(NULL, -1, &vmstate_pcibus, bus);
> +    notifier_list_init(&bus->reset_notifiers);
>  }
>  
>  static void pci_bus_unrealize(BusState *qbus, Error **errp)
> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> index 40c97b1..a8e3f57 100644
> --- a/hw/pci/pci_bridge.c
> +++ b/hw/pci/pci_bridge.c
> @@ -267,6 +267,12 @@ void pci_bridge_write_config(PCIDevice *d,
>  
>      newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
>      if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
> +        /*
> +         * Notify all vfio-pci devices under the bus
> +         * to do physical bus reset.
> +         */
> +        pci_for_each_bus(&s->sec_bus,
> +                pci_bus_run_reset_notifier, d);
>          /* Trigger hot reset on 0->1 transition. */
>          qbus_reset_all(&s->sec_bus.qbus);
>      }
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 6c2af0d..d353c9d 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -7,6 +7,7 @@
>  #include "exec/memory.h"
>  #include "sysemu/dma.h"
>  #include "qapi/error.h"
> +#include "qemu/notify.h"
>  
>  /* PCI includes legacy ISA access.  */
>  #include "hw/isa/isa.h"
> @@ -377,6 +378,9 @@ void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>                                            PCIINTxRoutingNotifier notifier);
>  void pci_device_reset(PCIDevice *dev);
> +void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify);
> +void pci_bus_remove_reset_notifier(Notifier *notify);
> +void pci_bus_run_reset_notifier(PCIBus *bus, void *opaque);
>  
>  PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
>                                 const char *default_model,
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index fabaeee..3b551d7 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -29,6 +29,8 @@ struct PCIBus {
>         Keep a count of the number of devices with raised IRQs.  */
>      int nirq;
>      int *irq_count;
> +
> +    NotifierList reset_notifiers;
>  };
>  
>  typedef struct PCIBridgeWindows PCIBridgeWindows;
> -- 
> 1.9.3
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [RFC v10 13/19] pci: add bus reset_notifiers callbacks for host bus reset
  2015-06-16 10:20   ` Michael S. Tsirkin
@ 2015-06-17  1:41     ` Chen Fan
  2015-06-17  6:47       ` Michael S. Tsirkin
  0 siblings, 1 reply; 27+ messages in thread
From: Chen Fan @ 2015-06-17  1:41 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: izumi.taku, alex.williamson, qemu-devel


On 06/16/2015 06:20 PM, Michael S. Tsirkin wrote:
> On Tue, Jun 16, 2015 at 04:10:57PM +0800, Chen Fan wrote:
>> Particularly, For vfio devices, Once need to recovery devices
>> by bus reset such as AER, we always need to reset the host bus
>> to recovery the devices under the bus, so we need to add pci bus
>> callbacks to specify to do host bus reset.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> Interesting. What prevents guest from triggering reset at an arbitrary
> time, killing all VFs for all guests?
If the VF device was assigned to VM, with enable aer, we check the
VF affected devices e.g. the other VFs and PF is belonged to VM or not.
so It can't to affect other VMs for this. if disable aer, there will no 
affection.

Thanks,
Chen

>
>> ---
>>   hw/pci/pci.c             | 16 ++++++++++++++++
>>   hw/pci/pci_bridge.c      |  6 ++++++
>>   include/hw/pci/pci.h     |  4 ++++
>>   include/hw/pci/pci_bus.h |  2 ++
>>   4 files changed, 28 insertions(+)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 3423c3a..3bd954e 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -74,11 +74,27 @@ static const VMStateDescription vmstate_pcibus = {
>>       }
>>   };
>>   
>> +void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify)
>> +{
>> +    notifier_list_add(&bus->reset_notifiers, notify);
>> +}
>> +
>> +void pci_bus_remove_reset_notifier(Notifier *notify)
>> +{
>> +    notifier_remove(notify);
>> +}
>> +
>> +void pci_bus_run_reset_notifier(PCIBus *bus, void *opaque)
>> +{
>> +    notifier_list_notify(&bus->reset_notifiers, opaque);
>> +}
>> +
>>   static void pci_bus_realize(BusState *qbus, Error **errp)
>>   {
>>       PCIBus *bus = PCI_BUS(qbus);
>>   
>>       vmstate_register(NULL, -1, &vmstate_pcibus, bus);
>> +    notifier_list_init(&bus->reset_notifiers);
>>   }
>>   
>>   static void pci_bus_unrealize(BusState *qbus, Error **errp)
>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>> index 40c97b1..a8e3f57 100644
>> --- a/hw/pci/pci_bridge.c
>> +++ b/hw/pci/pci_bridge.c
>> @@ -267,6 +267,12 @@ void pci_bridge_write_config(PCIDevice *d,
>>   
>>       newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
>>       if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
>> +        /*
>> +         * Notify all vfio-pci devices under the bus
>> +         * to do physical bus reset.
>> +         */
>> +        pci_for_each_bus(&s->sec_bus,
>> +                pci_bus_run_reset_notifier, d);
>>           /* Trigger hot reset on 0->1 transition. */
>>           qbus_reset_all(&s->sec_bus.qbus);
>>       }
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index 6c2af0d..d353c9d 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -7,6 +7,7 @@
>>   #include "exec/memory.h"
>>   #include "sysemu/dma.h"
>>   #include "qapi/error.h"
>> +#include "qemu/notify.h"
>>   
>>   /* PCI includes legacy ISA access.  */
>>   #include "hw/isa/isa.h"
>> @@ -377,6 +378,9 @@ void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>>   void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>                                             PCIINTxRoutingNotifier notifier);
>>   void pci_device_reset(PCIDevice *dev);
>> +void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify);
>> +void pci_bus_remove_reset_notifier(Notifier *notify);
>> +void pci_bus_run_reset_notifier(PCIBus *bus, void *opaque);
>>   
>>   PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
>>                                  const char *default_model,
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index fabaeee..3b551d7 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -29,6 +29,8 @@ struct PCIBus {
>>          Keep a count of the number of devices with raised IRQs.  */
>>       int nirq;
>>       int *irq_count;
>> +
>> +    NotifierList reset_notifiers;
>>   };
>>   
>>   typedef struct PCIBridgeWindows PCIBridgeWindows;
>> -- 
>> 1.9.3
>>
> .
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [RFC v10 13/19] pci: add bus reset_notifiers callbacks for host bus reset
  2015-06-17  1:41     ` Chen Fan
@ 2015-06-17  6:47       ` Michael S. Tsirkin
  2015-06-17 15:19         ` Alex Williamson
  0 siblings, 1 reply; 27+ messages in thread
From: Michael S. Tsirkin @ 2015-06-17  6:47 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, alex.williamson, qemu-devel

On Wed, Jun 17, 2015 at 09:41:40AM +0800, Chen Fan wrote:
> 
> On 06/16/2015 06:20 PM, Michael S. Tsirkin wrote:
> >On Tue, Jun 16, 2015 at 04:10:57PM +0800, Chen Fan wrote:
> >>Particularly, For vfio devices, Once need to recovery devices
> >>by bus reset such as AER, we always need to reset the host bus
> >>to recovery the devices under the bus, so we need to add pci bus
> >>callbacks to specify to do host bus reset.
> >>
> >>Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >Interesting. What prevents guest from triggering reset at an arbitrary
> >time, killing all VFs for all guests?
> If the VF device was assigned to VM, with enable aer, we check the
> VF affected devices e.g. the other VFs and PF is belonged to VM or not.
> so It can't to affect other VMs for this. if disable aer, there will no
> affection.
> 
> Thanks,
> Chen

So fundamentally this assumes PF is assigned to the VM?
Why even bother with this aer on VFs then?
Do this for PFs only.

> >
> >>---
> >>  hw/pci/pci.c             | 16 ++++++++++++++++
> >>  hw/pci/pci_bridge.c      |  6 ++++++
> >>  include/hw/pci/pci.h     |  4 ++++
> >>  include/hw/pci/pci_bus.h |  2 ++
> >>  4 files changed, 28 insertions(+)
> >>
> >>diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>index 3423c3a..3bd954e 100644
> >>--- a/hw/pci/pci.c
> >>+++ b/hw/pci/pci.c
> >>@@ -74,11 +74,27 @@ static const VMStateDescription vmstate_pcibus = {
> >>      }
> >>  };
> >>+void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify)
> >>+{
> >>+    notifier_list_add(&bus->reset_notifiers, notify);
> >>+}
> >>+
> >>+void pci_bus_remove_reset_notifier(Notifier *notify)
> >>+{
> >>+    notifier_remove(notify);
> >>+}
> >>+
> >>+void pci_bus_run_reset_notifier(PCIBus *bus, void *opaque)
> >>+{
> >>+    notifier_list_notify(&bus->reset_notifiers, opaque);
> >>+}
> >>+
> >>  static void pci_bus_realize(BusState *qbus, Error **errp)
> >>  {
> >>      PCIBus *bus = PCI_BUS(qbus);
> >>      vmstate_register(NULL, -1, &vmstate_pcibus, bus);
> >>+    notifier_list_init(&bus->reset_notifiers);
> >>  }
> >>  static void pci_bus_unrealize(BusState *qbus, Error **errp)
> >>diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> >>index 40c97b1..a8e3f57 100644
> >>--- a/hw/pci/pci_bridge.c
> >>+++ b/hw/pci/pci_bridge.c
> >>@@ -267,6 +267,12 @@ void pci_bridge_write_config(PCIDevice *d,
> >>      newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
> >>      if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
> >>+        /*
> >>+         * Notify all vfio-pci devices under the bus
> >>+         * to do physical bus reset.
> >>+         */
> >>+        pci_for_each_bus(&s->sec_bus,
> >>+                pci_bus_run_reset_notifier, d);
> >>          /* Trigger hot reset on 0->1 transition. */
> >>          qbus_reset_all(&s->sec_bus.qbus);
> >>      }
> >>diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> >>index 6c2af0d..d353c9d 100644
> >>--- a/include/hw/pci/pci.h
> >>+++ b/include/hw/pci/pci.h
> >>@@ -7,6 +7,7 @@
> >>  #include "exec/memory.h"
> >>  #include "sysemu/dma.h"
> >>  #include "qapi/error.h"
> >>+#include "qemu/notify.h"
> >>  /* PCI includes legacy ISA access.  */
> >>  #include "hw/isa/isa.h"
> >>@@ -377,6 +378,9 @@ void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
> >>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
> >>                                            PCIINTxRoutingNotifier notifier);
> >>  void pci_device_reset(PCIDevice *dev);
> >>+void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify);
> >>+void pci_bus_remove_reset_notifier(Notifier *notify);
> >>+void pci_bus_run_reset_notifier(PCIBus *bus, void *opaque);
> >>  PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
> >>                                 const char *default_model,
> >>diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> >>index fabaeee..3b551d7 100644
> >>--- a/include/hw/pci/pci_bus.h
> >>+++ b/include/hw/pci/pci_bus.h
> >>@@ -29,6 +29,8 @@ struct PCIBus {
> >>         Keep a count of the number of devices with raised IRQs.  */
> >>      int nirq;
> >>      int *irq_count;
> >>+
> >>+    NotifierList reset_notifiers;
> >>  };
> >>  typedef struct PCIBridgeWindows PCIBridgeWindows;
> >>-- 
> >>1.9.3
> >>
> >.
> >

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [RFC v10 03/19] pcie: modify the capability size assert
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 03/19] pcie: modify the capability size assert Chen Fan
@ 2015-06-17  9:43   ` Marcel Apfelbaum
  0 siblings, 0 replies; 27+ messages in thread
From: Marcel Apfelbaum @ 2015-06-17  9:43 UTC (permalink / raw)
  To: Chen Fan, qemu-devel; +Cc: izumi.taku, alex.williamson

On 06/16/2015 11:10 AM, Chen Fan wrote:
> because the capabilities need to be DWORD aligned, so the size
> should DWORD aligned too, and then the last capability size can
> to be the greatest 0x1000. e.g. if I have a capability starting
> 4 bytes from the end, 0xFFC.  The max size should be 4 bytes,
> 0x1000 - 0xFFC, not 3 bytes, 0xFFF - 0xFFC.
I would re-word the message to something simpler like:

    Offset and size can reach PCIE_CONFIG_SPACE_SIZE, fix
    the corresponding assert.

Other than that,

Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>

Thanks,
Marcel

>
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>   hw/pci/pcie.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 1463e65..6cdd4a1 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -595,7 +595,7 @@ void pcie_add_capability(PCIDevice *dev,
>
>       assert(offset >= PCI_CONFIG_SPACE_SIZE);
>       assert(offset < offset + size);
> -    assert(offset + size < PCIE_CONFIG_SPACE_SIZE);
> +    assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
>       assert(size >= 8);
>       assert(pci_is_express(dev));
>
>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [RFC v10 17/19] pcie_aer: expose pcie_aer_msg() interface
  2015-06-16  8:11 ` [Qemu-devel] [RFC v10 17/19] pcie_aer: expose pcie_aer_msg() interface Chen Fan
@ 2015-06-17  9:46   ` Marcel Apfelbaum
  0 siblings, 0 replies; 27+ messages in thread
From: Marcel Apfelbaum @ 2015-06-17  9:46 UTC (permalink / raw)
  To: Chen Fan, qemu-devel; +Cc: izumi.taku, alex.williamson

On 06/16/2015 11:11 AM, Chen Fan wrote:
> For vfio device, we need to propagate the aer error to
> Guest OS. we use the pcie_aer_msg() to send aer error
> to guest.
>
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>   hw/pci/pcie_aer.c         | 2 +-
>   include/hw/pci/pcie_aer.h | 1 +
>   2 files changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
> index 6171a29..373d1f2 100644
> --- a/hw/pci/pcie_aer.c
> +++ b/hw/pci/pcie_aer.c
> @@ -370,7 +370,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
>    *
>    * Walk up the bus tree from the device, propagate the error message.
>    */
> -static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
>   {
>       uint8_t type;
>
> diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
> index 156acb0..c2ee4e2 100644
> --- a/include/hw/pci/pcie_aer.h
> +++ b/include/hw/pci/pcie_aer.h
> @@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
>
>   /* error injection */
>   int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
>
>   #endif /* QEMU_PCIE_AER_H */
>
Assuming the pcie_aer_msg will be used (after all reviews):

     Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>

Thanks,
Marcel

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [RFC v10 13/19] pci: add bus reset_notifiers callbacks for host bus reset
  2015-06-17  6:47       ` Michael S. Tsirkin
@ 2015-06-17 15:19         ` Alex Williamson
  0 siblings, 0 replies; 27+ messages in thread
From: Alex Williamson @ 2015-06-17 15:19 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, 2015-06-17 at 08:47 +0200, Michael S. Tsirkin wrote:
> On Wed, Jun 17, 2015 at 09:41:40AM +0800, Chen Fan wrote:
> > 
> > On 06/16/2015 06:20 PM, Michael S. Tsirkin wrote:
> > >On Tue, Jun 16, 2015 at 04:10:57PM +0800, Chen Fan wrote:
> > >>Particularly, For vfio devices, Once need to recovery devices
> > >>by bus reset such as AER, we always need to reset the host bus
> > >>to recovery the devices under the bus, so we need to add pci bus
> > >>callbacks to specify to do host bus reset.
> > >>
> > >>Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > >Interesting. What prevents guest from triggering reset at an arbitrary
> > >time, killing all VFs for all guests?
> > If the VF device was assigned to VM, with enable aer, we check the
> > VF affected devices e.g. the other VFs and PF is belonged to VM or not.
> > so It can't to affect other VMs for this. if disable aer, there will no
> > affection.
> > 
> > Thanks,
> > Chen
> 
> So fundamentally this assumes PF is assigned to the VM?
> Why even bother with this aer on VFs then?
> Do this for PFs only.

At QEMU, we don't know or care whether we're dealing with a PF or VF and
I don't see this as an opportunity to add that distinction.  The issue
is simply that we can't do a bus reset on a VF, but that's also true of
PFs that reside on the host root complex.  The vfio-pci bus reset
interface handles this, it tells us when we can and can't do a bus reset
and lists the affected devices when we can.  A VF has no parent bridge
on which to perform a secondary bus reset, just like a PF on the root
complex.  So we can just let the devices fall out there without caring
that they're VFs.  Neither the QEMU PCI level nor the guest (other than
device IDs) really knows that the device is a VF either.  Thanks,

Alex

> > >>---
> > >>  hw/pci/pci.c             | 16 ++++++++++++++++
> > >>  hw/pci/pci_bridge.c      |  6 ++++++
> > >>  include/hw/pci/pci.h     |  4 ++++
> > >>  include/hw/pci/pci_bus.h |  2 ++
> > >>  4 files changed, 28 insertions(+)
> > >>
> > >>diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > >>index 3423c3a..3bd954e 100644
> > >>--- a/hw/pci/pci.c
> > >>+++ b/hw/pci/pci.c
> > >>@@ -74,11 +74,27 @@ static const VMStateDescription vmstate_pcibus = {
> > >>      }
> > >>  };
> > >>+void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify)
> > >>+{
> > >>+    notifier_list_add(&bus->reset_notifiers, notify);
> > >>+}
> > >>+
> > >>+void pci_bus_remove_reset_notifier(Notifier *notify)
> > >>+{
> > >>+    notifier_remove(notify);
> > >>+}
> > >>+
> > >>+void pci_bus_run_reset_notifier(PCIBus *bus, void *opaque)
> > >>+{
> > >>+    notifier_list_notify(&bus->reset_notifiers, opaque);
> > >>+}
> > >>+
> > >>  static void pci_bus_realize(BusState *qbus, Error **errp)
> > >>  {
> > >>      PCIBus *bus = PCI_BUS(qbus);
> > >>      vmstate_register(NULL, -1, &vmstate_pcibus, bus);
> > >>+    notifier_list_init(&bus->reset_notifiers);
> > >>  }
> > >>  static void pci_bus_unrealize(BusState *qbus, Error **errp)
> > >>diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > >>index 40c97b1..a8e3f57 100644
> > >>--- a/hw/pci/pci_bridge.c
> > >>+++ b/hw/pci/pci_bridge.c
> > >>@@ -267,6 +267,12 @@ void pci_bridge_write_config(PCIDevice *d,
> > >>      newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
> > >>      if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
> > >>+        /*
> > >>+         * Notify all vfio-pci devices under the bus
> > >>+         * to do physical bus reset.
> > >>+         */
> > >>+        pci_for_each_bus(&s->sec_bus,
> > >>+                pci_bus_run_reset_notifier, d);
> > >>          /* Trigger hot reset on 0->1 transition. */
> > >>          qbus_reset_all(&s->sec_bus.qbus);
> > >>      }
> > >>diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > >>index 6c2af0d..d353c9d 100644
> > >>--- a/include/hw/pci/pci.h
> > >>+++ b/include/hw/pci/pci.h
> > >>@@ -7,6 +7,7 @@
> > >>  #include "exec/memory.h"
> > >>  #include "sysemu/dma.h"
> > >>  #include "qapi/error.h"
> > >>+#include "qemu/notify.h"
> > >>  /* PCI includes legacy ISA access.  */
> > >>  #include "hw/isa/isa.h"
> > >>@@ -377,6 +378,9 @@ void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
> > >>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
> > >>                                            PCIINTxRoutingNotifier notifier);
> > >>  void pci_device_reset(PCIDevice *dev);
> > >>+void pci_bus_add_reset_notifier(PCIBus *bus, Notifier *notify);
> > >>+void pci_bus_remove_reset_notifier(Notifier *notify);
> > >>+void pci_bus_run_reset_notifier(PCIBus *bus, void *opaque);
> > >>  PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
> > >>                                 const char *default_model,
> > >>diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> > >>index fabaeee..3b551d7 100644
> > >>--- a/include/hw/pci/pci_bus.h
> > >>+++ b/include/hw/pci/pci_bus.h
> > >>@@ -29,6 +29,8 @@ struct PCIBus {
> > >>         Keep a count of the number of devices with raised IRQs.  */
> > >>      int nirq;
> > >>      int *irq_count;
> > >>+
> > >>+    NotifierList reset_notifiers;
> > >>  };
> > >>  typedef struct PCIBridgeWindows PCIBridgeWindows;
> > >>-- 
> > >>1.9.3
> > >>
> > >.
> > >

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [Qemu-devel] [RFC v10 10/19] vfio: improve vfio_get_group to support adding as is NULL.
  2015-06-16  8:10 ` [Qemu-devel] [RFC v10 10/19] vfio: improve vfio_get_group to support adding as is NULL Chen Fan
@ 2015-06-18 16:41   ` Alex Williamson
  0 siblings, 0 replies; 27+ messages in thread
From: Alex Williamson @ 2015-06-18 16:41 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Tue, 2015-06-16 at 16:10 +0800, Chen Fan wrote:
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/common.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index df3171d..15f19a2 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -808,11 +808,18 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as)
>      VFIOGroup *group;
>      char path[32];
>      struct vfio_group_status status = { .argsz = sizeof(status) };
> +    int ret;
>  
>      QLIST_FOREACH(group, &vfio_group_list, next) {
>          if (group->groupid == groupid) {
>              /* Found it.  Now is it already in the right context? */
> -            if (group->container->space->as == as) {
> +            if (as && !group->container->space->as) {
> +                ret = vfio_register_container_listener(group->container, as);
> +                if (ret) {
> +                    return NULL;
> +                }
> +            }
> +            if (!as || group->container->space->as == as) {
>                  group->ref++;
>                  return group;
>              } else {


No, this is broken.  Look further down where we call
vfio_connect_container(group, as).  With as=NULL, that gives us a new
VFIOAddressSpace with space.as=NULL.  Then we walk the list of
containers attached to that as, which is of course empty, so we create a
new container for the device.  If we have more than one group attached
this way, they share this container.  What happens latter if one or more
of those devices are attached to the VM?  In the best case we setup a
memory listener for this container, but that breaks vfio locked memory
accounting because previously they would have shared a container and now
we have two separate containers for them.  But to make things worse, any
other groups that were attached to this space.as=NULL container are
forced to share this new as, so it's impossible to add them to a
separate as.

So, this won't work.  I also don't like the side-effect of taking
implicit ownership of groups.  IOMMU groups are challenging enough and
changing the rules to care about not only groups but shared bus
dependencies seems like a support nightmare.

I keep trying to think how we could reduce the scope to make AER support
more manageable.  Should we simply require that all device affected by a
bus reset are assigned to the VM?  To do that we'd need to revert to
your machine-init-done check for compliance.  But then what becomes of
hotplug?  As soon as we hot-remove one of those devices, AER is broken
and we can't hot-add device individually and maintain support.

If we limit support to single function endpoints, our problems go away,
but the number of devices we could enable AER for is nearly zero.  If we
include multi-function, then hotplug is a significant issue since each
function is individually hot-pluggable.  But we can solve that by
requiring AER devices to have a 1:1 function mapping to the VM and
requiring all the functions of the slot to be assigned to the VM.
Hot-remove then works automatically since that's done on a slot basis.
Hot-add is not possible since QEMU doesn't support hot-add of
multi-function devices, but that's a problem that needs to be solved
anyway.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2015-06-18 16:41 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-16  8:10 [Qemu-devel] [RFC v10 00/19] vfio-pci: pass the aer error to guest Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 01/19] vfio: extract vfio_get_hot_reset_info as a single function Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 02/19] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 03/19] pcie: modify the capability size assert Chen Fan
2015-06-17  9:43   ` Marcel Apfelbaum
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 04/19] vfio: make the 4 bytes aligned for capability size Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 05/19] vfio: add pcie extanded capability support Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 06/19] aer: impove pcie_aer_init to support vfio device Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 07/19] vfio: add aer support for " Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 08/19] vfio: add ref for group to support own affected groups Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 09/19] vfio: extract vfio_register_container_listener from vfio_connect_container Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 10/19] vfio: improve vfio_get_group to support adding as is NULL Chen Fan
2015-06-18 16:41   ` Alex Williamson
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 11/19] get all affected groups for each device support aer Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 12/19] vfio: add check host bus reset is support or not Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 13/19] pci: add bus reset_notifiers callbacks for host bus reset Chen Fan
2015-06-16 10:20   ` Michael S. Tsirkin
2015-06-17  1:41     ` Chen Fan
2015-06-17  6:47       ` Michael S. Tsirkin
2015-06-17 15:19         ` Alex Williamson
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 14/19] vfio: add sec_bus_reset notifier to notify physical bus reset is needed Chen Fan
2015-06-16  8:10 ` [Qemu-devel] [RFC v10 15/19] vfio: improve vfio_pci_hot_reset to support more case Chen Fan
2015-06-16  8:11 ` [Qemu-devel] [RFC v10 16/19] vfio: do hot bus reset when do virtual secondary bus reset Chen Fan
2015-06-16  8:11 ` [Qemu-devel] [RFC v10 17/19] pcie_aer: expose pcie_aer_msg() interface Chen Fan
2015-06-17  9:46   ` Marcel Apfelbaum
2015-06-16  8:11 ` [Qemu-devel] [RFC v10 18/19] vfio-pci: pass the aer error to guest Chen Fan
2015-06-16  8:11 ` [Qemu-devel] [RFC v10 19/19] vfio: add 'aer' property to expose aercap Chen Fan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.