All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest
@ 2015-11-11 10:34 Cao jin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 01/13] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
                   ` (12 more replies)
  0 siblings, 13 replies; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

For now, for vfio pci passthough devices when qemu receives
an error from host aer report, currentlly just terminate the guest,
but usually user want to know what error occurred but stopping the
guest, so this patches add aer capability support for vfio device,
and pass the error to guest, and have guest driver to recover
from the error.

v12-v13:
   1. since support multifuncion hotplug, here add callback to enable aer.
   2. add pci device pre+post reset for aer host reset.


Chen Fan (13):
  vfio: extract vfio_get_hot_reset_info as a single function
  vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
  pcie: modify the capability size assert
  vfio: make the 4 bytes aligned for capability size
  vfio: add pcie extanded capability support
  aer: impove pcie_aer_init to support vfio device
  vfio: add aer support for vfio device
  vfio: add check host bus reset is support or not
  add check reset mechanism when hotplug vfio device
  pci: add pci device pre-post reset callbacks for host bus reset
  pcie_aer: expose pcie_aer_msg() interface
  vfio-pci: pass the aer error to guest
  vfio: add 'aer' property to expose aercap

 hw/pci-bridge/ioh3420.c            |   2 +-
 hw/pci-bridge/xio3130_downstream.c |   2 +-
 hw/pci-bridge/xio3130_upstream.c   |   2 +-
 hw/pci/pci.c                       |  47 +++
 hw/pci/pci_bridge.c                |   9 +
 hw/pci/pcie.c                      |   2 +-
 hw/pci/pcie_aer.c                  |   6 +-
 hw/vfio/pci.c                      | 592 +++++++++++++++++++++++++++++++++----
 hw/vfio/pci.h                      |   8 +
 include/hw/pci/pci.h               |   7 +
 include/hw/pci/pci_bus.h           |   5 +
 include/hw/pci/pcie_aer.h          |   3 +-
 12 files changed, 613 insertions(+), 72 deletions(-)

-- 
1.9.3

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 01/13] vfio: extract vfio_get_hot_reset_info as a single function
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 02/13] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

the function is used to get affected devices by bus reset.
so here extract it, and can used for aer soon.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 66 +++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 18 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8fadbcf..464e6b7 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1628,6 +1628,51 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
     }
 }
 
+/*
+ * return negative with errno, return 0 on success.
+ * if success, the point of ret_info fill with the affected device reset info.
+ *
+ */
+static int vfio_get_hot_reset_info(VFIOPCIDevice *vdev,
+                                   struct vfio_pci_hot_reset_info **ret_info)
+{
+    struct vfio_pci_hot_reset_info *info;
+    int ret, count;
+
+    *ret_info = NULL;
+
+    info = g_malloc0(sizeof(*info));
+    info->argsz = sizeof(*info);
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+    if (ret && errno != ENOSPC) {
+        ret = -errno;
+        goto error;
+    }
+
+    count = info->count;
+
+    info = g_realloc(info, sizeof(*info) +
+                     (count * sizeof(struct vfio_pci_dependent_device)));
+    info->argsz = sizeof(*info) +
+                  (count * sizeof(struct vfio_pci_dependent_device));
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+    if (ret) {
+        ret = -errno;
+        error_report("vfio: hot reset info failed: %m");
+        goto error;
+    }
+
+    *ret_info = info;
+    info = NULL;
+
+    return 0;
+error:
+    g_free(info);
+    return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1767,7 +1812,7 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
     VFIOGroup *group;
-    struct vfio_pci_hot_reset_info *info;
+    struct vfio_pci_hot_reset_info *info = NULL;
     struct vfio_pci_dependent_device *devices;
     struct vfio_pci_hot_reset *reset;
     int32_t *fds;
@@ -1779,12 +1824,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     vfio_pci_pre_reset(vdev);
     vdev->vbasedev.needs_reset = false;
 
-    info = g_malloc0(sizeof(*info));
-    info->argsz = sizeof(*info);
-
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-    if (ret && errno != ENOSPC) {
-        ret = -errno;
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
         if (!vdev->has_pm_reset) {
             error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
                          "no available reset mechanism.", vdev->host.domain,
@@ -1793,18 +1834,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         goto out_single;
     }
 
-    count = info->count;
-    info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
-    info->argsz = sizeof(*info) + (count * sizeof(*devices));
     devices = &info->devices[0];
-
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-    if (ret) {
-        ret = -errno;
-        error_report("vfio: hot reset info failed: %m");
-        goto out_single;
-    }
-
     trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
 
     /* Verify that we have all the groups required */
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 02/13] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 01/13] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 03/13] pcie: modify the capability size assert Cao jin
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

squeeze out vfio_pci_do_hot_reset to do host bus reset when AER recovery.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 75 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 464e6b7..f333dfc 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1673,6 +1673,48 @@ error:
     return ret;
 }
 
+static int vfio_pci_do_hot_reset(VFIOPCIDevice *vdev,
+                                 struct vfio_pci_hot_reset_info *info)
+{
+    VFIOGroup *group;
+    struct vfio_pci_hot_reset *reset;
+    int32_t *fds;
+    int ret, i, count;
+    struct vfio_pci_dependent_device *devices;
+
+    /* Determine how many group fds need to be passed */
+    count = 0;
+    devices = &info->devices[0];
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        for (i = 0; i < info->count; i++) {
+            if (group->groupid == devices[i].group_id) {
+                count++;
+                break;
+            }
+        }
+    }
+
+    reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+    reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+    fds = &reset->group_fds[0];
+
+    /* Fill in group fds */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        for (i = 0; i < info->count; i++) {
+            if (group->groupid == devices[i].group_id) {
+                fds[reset->count++] = group->fd;
+                break;
+            }
+        }
+    }
+
+    /* Bus reset! */
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+    g_free(reset);
+
+    return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1814,9 +1856,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     VFIOGroup *group;
     struct vfio_pci_hot_reset_info *info = NULL;
     struct vfio_pci_dependent_device *devices;
-    struct vfio_pci_hot_reset *reset;
-    int32_t *fds;
-    int ret, i, count;
+    int ret, i;
     bool multi = false;
 
     trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
@@ -1895,34 +1935,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         goto out_single;
     }
 
-    /* Determine how many group fds need to be passed */
-    count = 0;
-    QLIST_FOREACH(group, &vfio_group_list, next) {
-        for (i = 0; i < info->count; i++) {
-            if (group->groupid == devices[i].group_id) {
-                count++;
-                break;
-            }
-        }
-    }
-
-    reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
-    reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
-    fds = &reset->group_fds[0];
-
-    /* Fill in group fds */
-    QLIST_FOREACH(group, &vfio_group_list, next) {
-        for (i = 0; i < info->count; i++) {
-            if (group->groupid == devices[i].group_id) {
-                fds[reset->count++] = group->fd;
-                break;
-            }
-        }
-    }
-
-    /* Bus reset! */
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
-    g_free(reset);
+    ret = vfio_pci_do_hot_reset(vdev, info);
 
     trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
                                     ret ? "%m" : "Success");
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 03/13] pcie: modify the capability size assert
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 01/13] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 02/13] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 16:55   ` Michael S. Tsirkin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 04/13] vfio: make the 4 bytes aligned for capability size Cao jin
                   ` (9 subsequent siblings)
  12 siblings, 1 reply; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

 Device's Offset and size can reach PCIE_CONFIG_SPACE_SIZE,
 fix the corresponding assert.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
---
 hw/pci/pcie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 0eab29d..8f4c0e5 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -607,7 +607,7 @@ void pcie_add_capability(PCIDevice *dev,
 
     assert(offset >= PCI_CONFIG_SPACE_SIZE);
     assert(offset < offset + size);
-    assert(offset + size < PCIE_CONFIG_SPACE_SIZE);
+    assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
     assert(size >= 8);
     assert(pci_is_express(dev));
 
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 04/13] vfio: make the 4 bytes aligned for capability size
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (2 preceding siblings ...)
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 03/13] pcie: modify the capability size assert Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 05/13] vfio: add pcie extanded capability support Cao jin
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

this function search the capability from the end, the last
size should 0x100 - pos, not 0xff - pos.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index f333dfc..e305cda 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1468,7 +1468,8 @@ static void vfio_unmap_bars(VFIOPCIDevice *vdev)
  */
 static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
 {
-    uint8_t tmp, next = 0xff;
+    uint8_t tmp;
+    uint16_t next = PCI_CONFIG_SPACE_SIZE;
 
     for (tmp = pdev->config[PCI_CAPABILITY_LIST]; tmp;
          tmp = pdev->config[tmp + 1]) {
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 05/13] vfio: add pcie extanded capability support
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (3 preceding siblings ...)
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 04/13] vfio: make the 4 bytes aligned for capability size Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 06/13] aer: impove pcie_aer_init to support vfio device Cao jin
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

For vfio pcie device, we could expose the extended capability on
PCIE bus. in order to avoid config space broken, we introduce
a copy config for parsing extended caps. and rebuild the pcie
extended config space.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e305cda..4bc2b51 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1481,6 +1481,21 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
     return next - pos;
 }
 
+
+static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos)
+{
+    uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE;
+
+    for (tmp = PCI_CONFIG_SPACE_SIZE; tmp;
+        tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) {
+        if (tmp > pos && tmp < next) {
+            next = tmp;
+        }
+    }
+
+    return next - pos;
+}
+
 static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
 {
     pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
@@ -1791,16 +1806,69 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint32_t header;
+    uint16_t cap_id, next, size;
+    uint8_t cap_ver;
+    uint8_t *config;
+
+    /*
+     * In order to avoid breaking config space, create a copy to
+     * use for parsing extended capabilities.
+     */
+    config = g_memdup(pdev->config, vdev->config_size);
+
+    for (next = PCI_CONFIG_SPACE_SIZE; next;
+         next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
+        header = pci_get_long(config + next);
+        cap_id = PCI_EXT_CAP_ID(header);
+        cap_ver = PCI_EXT_CAP_VER(header);
+
+        /*
+         * If it becomes important to configure extended capabilities to their
+         * actual size, use this as the default when it's something we don't
+         * recognize. Since QEMU doesn't actually handle many of the config
+         * accesses, exact size doesn't seem worthwhile.
+         */
+        size = vfio_ext_cap_max_size(config, next);
+
+        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+        pci_set_long(dev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+
+        /* Use emulated next pointer to allow dropping extended caps */
+        pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
+                                   PCI_EXT_CAP_NEXT_MASK);
+    }
+
+    g_free(config);
+    return 0;
+}
+
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
+    int ret;
 
     if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
         !pdev->config[PCI_CAPABILITY_LIST]) {
         return 0; /* Nothing to add */
     }
 
-    return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    if (ret) {
+        return ret;
+    }
+
+    /* on PCI bus, it doesn't make sense to expose extended capabilities. */
+    if (!pci_is_express(pdev) ||
+        !pci_bus_is_express(pdev->bus) ||
+        !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
+        return 0;
+    }
+
+    return vfio_add_ext_cap(vdev);
 }
 
 static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 06/13] aer: impove pcie_aer_init to support vfio device
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (4 preceding siblings ...)
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 05/13] vfio: add pcie extanded capability support Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 16:55   ` Michael S. Tsirkin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 07/13] vfio: add aer support for " Cao jin
                   ` (6 subsequent siblings)
  12 siblings, 1 reply; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

pcie_aer_init was used to emulate an aer capability for pcie device,
but for vfio device, the aer config space size is mutable and is not
always equal to PCI_ERR_SIZEOF(0x48). it depends on where the TLP Prefix
register required, so here we add a size argument.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci-bridge/ioh3420.c            | 2 +-
 hw/pci-bridge/xio3130_downstream.c | 2 +-
 hw/pci-bridge/xio3130_upstream.c   | 2 +-
 hw/pci/pcie_aer.c                  | 4 ++--
 include/hw/pci/pcie_aer.h          | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index cce2fdd..4d9cd3f 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -129,7 +129,7 @@ static int ioh3420_initfn(PCIDevice *d)
         goto err_pcie_cap;
     }
     pcie_cap_root_init(d);
-    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET);
+    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index b3a6479..9737041 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -92,7 +92,7 @@ static int xio3130_downstream_initfn(PCIDevice *d)
         goto err_pcie_cap;
     }
     pcie_cap_arifwd_init(d);
-    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
+    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index eada582..4d7f894 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -81,7 +81,7 @@ static int xio3130_upstream_initfn(PCIDevice *d)
     }
     pcie_cap_flr_init(d);
     pcie_cap_deverr_init(d);
-    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
+    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 98d2c18..45f351b 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -94,12 +94,12 @@ static void aer_log_clear_all_err(PCIEAERLog *aer_log)
     aer_log->log_num = 0;
 }
 
-int pcie_aer_init(PCIDevice *dev, uint16_t offset)
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
 {
     PCIExpressDevice *exp;
 
     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
-                        offset, PCI_ERR_SIZEOF);
+                        offset, size);
     exp = &dev->exp;
     exp->aer_cap = offset;
 
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index 2fb8388..156acb0 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -87,7 +87,7 @@ struct PCIEAERErr {
 
 extern const VMStateDescription vmstate_pcie_aer_log;
 
-int pcie_aer_init(PCIDevice *dev, uint16_t offset);
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size);
 void pcie_aer_exit(PCIDevice *dev);
 void pcie_aer_write_config(PCIDevice *dev,
                            uint32_t addr, uint32_t val, int len);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 07/13] vfio: add aer support for vfio device
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (5 preceding siblings ...)
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 06/13] aer: impove pcie_aer_init to support vfio device Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 20:49   ` Alex Williamson
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 08/13] vfio: add check host bus reset is support or not Cao jin
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Calling pcie_aer_init to initilize aer related registers for
vfio device, then reload physical related registers to expose
device capability.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 hw/vfio/pci.h |  3 +++
 2 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 4bc2b51..2d34edf 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1806,6 +1806,68 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
+                          int pos, uint16_t size)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    PCIDevice *dev_iter;
+    uint8_t type;
+    uint32_t errcap;
+    int ret;
+
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+        pcie_add_capability(pdev, PCI_EXT_CAP_ID_ERR,
+                            cap_ver, pos, size);
+        return 0;
+    }
+
+    dev_iter = pci_bridge_get_device(pdev->bus);
+    if (!dev_iter) {
+        goto error;
+    }
+
+    while (dev_iter) {
+        type = pcie_cap_get_type(dev_iter);
+        if ((type != PCI_EXP_TYPE_ROOT_PORT &&
+             type != PCI_EXP_TYPE_UPSTREAM &&
+             type != PCI_EXP_TYPE_DOWNSTREAM)) {
+            goto error;
+        }
+
+        if (!dev_iter->exp.aer_cap) {
+            goto error;
+        }
+
+        dev_iter = pci_bridge_get_device(dev_iter->bus);
+    }
+
+    errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
+    /*
+     * The ability to record multiple headers is depending on
+     * the state of the Multiple Header Recording Capable bit and
+     * enabled by the Multiple Header Recording Enable bit.
+     */
+    if ((errcap & PCI_ERR_CAP_MHRC) &&
+        (errcap & PCI_ERR_CAP_MHRE)) {
+        pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+    } else {
+        pdev->exp.aer_log.log_max = 0;
+    }
+
+    pcie_cap_deverr_init(pdev);
+    ret = pcie_aer_init(pdev, pos, size);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+
+error:
+    error_report("vfio: Unable to enable AER for device %s, parent bus "
+                 "does not support AER signaling", vdev->vbasedev.name);
+    return -1;
+}
+
 static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1813,6 +1875,7 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
     uint16_t cap_id, next, size;
     uint8_t cap_ver;
     uint8_t *config;
+    int ret = 0;
 
     /*
      * In order to avoid breaking config space, create a copy to
@@ -1834,16 +1897,29 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
          */
         size = vfio_ext_cap_max_size(config, next);
 
-        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
-        pci_set_long(dev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+        switch (cap_id) {
+        case PCI_EXT_CAP_ID_ERR:
+            ret = vfio_setup_aer(vdev, cap_ver, next, size);
+            break;
+        default:
+            pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+            break;
+        }
+
+        if (ret) {
+            goto out;
+        }
+
+        pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
 
         /* Use emulated next pointer to allow dropping extended caps */
         pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
                                    PCI_EXT_CAP_NEXT_MASK);
     }
 
+out:
     g_free(config);
-    return 0;
+    return ret;
 }
 
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index f004d52..48c1f69 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"
 #include "qemu/queue.h"
@@ -127,6 +128,8 @@ typedef struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
 #define VFIO_FEATURE_ENABLE_REQ_BIT 1
 #define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
+#define VFIO_FEATURE_ENABLE_AER_BIT 2
+#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
     int32_t bootindex;
     uint8_t pm_cap;
     bool has_vga;
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 08/13] vfio: add check host bus reset is support or not
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (6 preceding siblings ...)
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 07/13] vfio: add aer support for " Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 20:53   ` Alex Williamson
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device Cao jin
                   ` (4 subsequent siblings)
  12 siblings, 1 reply; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

when init vfio devices done, we should test all the devices supported
aer whether conflict with others. For each one, get the hot reset
info for the affected device list.  For each affected device, all
should attach to the VM and on/below the same bus. also, we should test
all of the non-AER supporting vfio-pci devices on or below the target
bus to verify they have a reset mechanism.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 hw/vfio/pci.h |   1 +
 2 files changed, 204 insertions(+), 7 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 2d34edf..31ffd44 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1806,6 +1806,190 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
+                                PCIHostDeviceAddress *host2)
+{
+    return (host1->domain == host2->domain && host1->bus == host2->bus &&
+            host1->slot == host2->slot && host1->function == host2->function);
+}
+
+struct VFIODeviceFind {
+    PCIDevice *pdev;
+    bool found;
+};
+
+static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
+                                      void *opaque)
+{
+    DeviceState *dev = DEVICE(pdev);
+    DeviceClass *dc = DEVICE_GET_CLASS(dev);
+    VFIOPCIDevice *vdev;
+    struct VFIODeviceFind *find = opaque;
+
+    if (find->found) {
+        return;
+    }
+
+    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+        if (!dc->reset) {
+            goto found;
+        }
+        return;
+    }
+    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        !vdev->vbasedev.reset_works) {
+        goto found;
+    }
+
+    return;
+found:
+    find->pdev = pdev;
+    find->found = true;
+}
+
+static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque)
+{
+    struct VFIODeviceFind *find = opaque;
+
+    if (find->found) {
+        return;
+    }
+
+    if (pdev == find->pdev) {
+        find->found = true;
+    }
+}
+
+static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
+{
+    PCIBus *bus = vdev->pdev.bus;
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    VFIOGroup *group;
+    struct VFIODeviceFind find;
+    int ret, i;
+
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
+        error_report("vfio: Cannot get hot reset info");
+        goto out;
+    }
+
+    /* List all affected devices by bus reset */
+    devices = &info->devices[0];
+
+    /* Verify that we have all the groups required */
+    for (i = 0; i < info->count; i++) {
+        PCIHostDeviceAddress host;
+        VFIOPCIDevice *tmp;
+        VFIODevice *vbasedev_iter;
+        bool found = false;
+
+        host.domain = devices[i].segment;
+        host.bus = devices[i].bus;
+        host.slot = PCI_SLOT(devices[i].devfn);
+        host.function = PCI_FUNC(devices[i].devfn);
+
+        /* Skip the current device */
+        if (vfio_pci_host_match(&host, &vdev->host)) {
+            continue;
+        }
+
+        /* Ensure we own the group of the affected device */
+        QLIST_FOREACH(group, &vfio_group_list, next) {
+            if (group->groupid == devices[i].group_id) {
+                break;
+            }
+        }
+
+        if (!group) {
+            error_report("vfio: Cannot enable AER for device %s, "
+                         "depends on group %d which is not owned.",
+                         vdev->vbasedev.name, devices[i].group_id);
+            ret = -1;
+            goto out;
+        }
+
+        /* Ensure affected devices for reset on/blow the bus */
+        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+            if (vfio_pci_host_match(&host, &tmp->host)) {
+                PCIDevice *pci = PCI_DEVICE(tmp);
+
+                find.pdev = pci;
+                find.found = false;
+                pci_for_each_device(bus, pci_bus_num(bus),
+                                    device_find, &find);
+                if (!find.found) {
+                    error_report("vfio: Cannot enable AER for device %s, "
+                                 "the dependent device %s is not under the same bus",
+                                 vdev->vbasedev.name, tmp->vbasedev.name);
+                    ret = -1;
+                    goto out;
+                }
+                found = true;
+                break;
+            }
+        }
+
+        /* Ensure all affected devices assigned to VM */
+        if (!found) {
+            error_report("vfio: Cannot enable AER for device %s, "
+                         "the dependent device %04x:%02x:%02x.%x "
+                         "is not assigned to VM.",
+                         vdev->vbasedev.name, host.domain, host.bus,
+                         host.slot, host.function);
+            ret = -1;
+            goto out;
+        }
+    }
+
+    /*
+     * Check the all pci devices on or below the target bus
+     * have a reset mechanism at least.
+     */
+    find.pdev = NULL;
+    find.found = false;
+    pci_for_each_device(bus, pci_bus_num(bus),
+                        vfio_check_device_noreset, &find);
+    if (find.found) {
+        error_report("vfio: Cannot enable AER for device %s, "
+                     "the affected device %s have not a reset mechanism.",
+                     vdev->vbasedev.name, find.pdev->name);
+        ret = -1;
+        goto out;
+    }
+
+    ret = 0;
+out:
+    g_free(info);
+    return ret;
+}
+
+static int vfio_check_devices_host_bus_reset(void)
+{
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+    VFIOPCIDevice *vdev;
+
+    /* Check All vfio-pci devices if have bus reset capability */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+                vfio_check_host_bus_reset(vdev)) {
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -1989,13 +2173,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
     vfio_intx_enable(vdev);
 }
 
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
-                                PCIHostDeviceAddress *host2)
-{
-    return (host1->domain == host2->domain && host1->bus == host2->bus &&
-            host1->slot == host2->slot && host1->function == host2->function);
-}
-
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
     VFIOGroup *group;
@@ -2501,6 +2678,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
+{
+    int ret;
+
+    ret = vfio_check_devices_host_bus_reset();
+    if (ret) {
+        exit(1);
+    }
+}
+
+static Notifier machine_notifier = {
+    .notify = vfio_pci_machine_done_notify,
+};
+
 static int vfio_initfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@@ -2842,6 +3033,11 @@ static const TypeInfo vfio_pci_dev_info = {
 static void register_vfio_pci_dev_type(void)
 {
     type_register_static(&vfio_pci_dev_info);
+    /*
+     * Register notifier when machine init is done, since we need
+     * check the configration manner after all vfio device are inited.
+     */
+    qemu_add_machine_init_done_notifier(&machine_notifier);
 }
 
 type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 48c1f69..59ae194 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (7 preceding siblings ...)
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 08/13] vfio: add check host bus reset is support or not Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-12 11:51   ` Michael S. Tsirkin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
                   ` (3 subsequent siblings)
  12 siblings, 1 reply; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Since we support multi-function hotplug. the function 0 indicate
the closure of the slot, so we have the chance to do the check.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
 hw/vfio/pci.c            | 19 +++++++++++++++++++
 hw/vfio/pci.h            |  2 ++
 include/hw/pci/pci_bus.h |  5 +++++
 4 files changed, 55 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 168b9cc..f6ca6ef 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
     PCIBus *bus = PCI_BUS(qbus);
 
     vmstate_register(NULL, -1, &vmstate_pcibus, bus);
+    notifier_with_return_list_init(&bus->hotplug_notifiers);
 }
 
 static void pci_bus_unrealize(BusState *qbus, Error **errp)
@@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
     return bus->devices[devfn];
 }
 
+void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
+{
+    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
+}
+
+void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
+{
+    notifier_with_return_remove(notifier);
+}
+
+static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
+{
+    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
+                                            opaque);
+}
+
 static void pci_qdev_realize(DeviceState *qdev, Error **errp)
 {
     PCIDevice *pci_dev = (PCIDevice *)qdev;
@@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         pci_qdev_unrealize(DEVICE(pci_dev), NULL);
         return;
     }
+
+    /*
+     *  If the function is func 0, indicate the closure of the slot.
+     *  signal the callback.
+     */
+    if (DEVICE(pci_dev)->hotplugged &&
+        pci_get_function_0(pci_dev) == pci_dev &&
+        pci_bus_hotplug_notifier(bus, pci_dev)) {
+        error_setg(errp, "failed to hotplug function 0");
+        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+        return;
+    }
 }
 
 static void pci_default_realize(PCIDevice *dev, Error **errp)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 31ffd44..e619998 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1990,6 +1990,19 @@ static int vfio_check_devices_host_bus_reset(void)
     return 0;
 }
 
+static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
+{
+    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, hotplug_notifier);
+    PCIDevice *pci_dev = PCI_DEVICE(vdev);
+    PCIDevice *pci_func0 = opaque;
+
+    if (pci_get_function_0(pci_dev) != pci_func0) {
+        return 0;
+    }
+
+    return vfio_check_host_bus_reset(vdev);
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -2044,6 +2057,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
         return ret;
     }
 
+    vdev->hotplug_notifier.notify = vfio_check_bus_reset;
+    pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
+
     return 0;
 
 error:
@@ -2919,6 +2935,9 @@ static void vfio_exitfn(PCIDevice *pdev)
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+        pci_bus_remove_hotplug_notifier(&vdev->hotplug_notifier);
+    }
     vfio_disable_interrupts(vdev);
     if (vdev->intx.mmap_timer) {
         timer_free(vdev->intx.mmap_timer);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 59ae194..b385f07 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
     bool no_kvm_intx;
     bool no_kvm_msi;
     bool no_kvm_msix;
+
+    NotifierWithReturn hotplug_notifier;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 403fec6..7812fa9 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -39,8 +39,13 @@ struct PCIBus {
        Keep a count of the number of devices with raised IRQs.  */
     int nirq;
     int *irq_count;
+
+    NotifierWithReturnList hotplug_notifiers;
 };
 
+void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify);
+void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notify);
+
 typedef struct PCIBridgeWindows PCIBridgeWindows;
 
 /*
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (8 preceding siblings ...)
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 16:56   ` Michael S. Tsirkin
  2015-11-11 20:58   ` Alex Williamson
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 11/13] pcie_aer: expose pcie_aer_msg() interface Cao jin
                   ` (2 subsequent siblings)
  12 siblings, 2 replies; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Particularly, For vfio devices, Once need to recovery devices
by bus reset such as AER, we always need to reset the host bus
to recovery the devices under the bus, so we need to add pci device
callbacks to specify to do host bus reset.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pci.c         | 18 ++++++++++++++++++
 hw/pci/pci_bridge.c  |  9 +++++++++
 hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
 hw/vfio/pci.h        |  2 ++
 include/hw/pci/pci.h |  7 +++++++
 5 files changed, 62 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index f6ca6ef..64fa2cc 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice *dev)
     msix_reset(dev);
 }
 
+void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void *unused)
+{
+    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
+
+    if (dc->pre_reset) {
+        dc->pre_reset(dev);
+    }
+}
+
+void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void *unused)
+{
+    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
+
+    if (dc->post_reset) {
+        dc->post_reset(dev);
+    }
+}
+
 /*
  * This function is called on #RST and FLR.
  * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 40c97b1..ddb76ab 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
 
     newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
     if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
+        /*
+         * Notify all vfio-pci devices under the bus
+         * should do physical bus reset.
+         */
+        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
+        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                            pci_device_pre_reset, NULL);
         /* Trigger hot reset on 0->1 transition. */
         qbus_reset_all(&s->sec_bus.qbus);
+        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                            pci_device_post_reset, NULL);
     }
 }
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e619998..90df393 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -39,6 +39,7 @@
 
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
+static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
 
 /*
  * Disabling BAR mmaping can be slow, but toggling it around INTx can
@@ -1879,6 +1880,8 @@ static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
     /* List all affected devices by bus reset */
     devices = &info->devices[0];
 
+    vdev->single_depend_dev = (info->count == 1);
+
     /* Verify that we have all the groups required */
     for (i = 0; i < info->count; i++) {
         PCIHostDeviceAddress host;
@@ -2003,10 +2006,26 @@ static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
     return vfio_check_host_bus_reset(vdev);
 }
 
+static void vfio_aer_pre_reset(PCIDevice *pdev)
+{
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+
+    vdev->aer_reset = true;
+    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+}
+
+static void vfio_aer_post_reset(PCIDevice *pdev)
+{
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+
+    vdev->aer_reset = false;
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
     PCIDevice *pdev = &vdev->pdev;
+    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(pdev);
     PCIDevice *dev_iter;
     uint8_t type;
     uint32_t errcap;
@@ -2060,6 +2079,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
     vdev->hotplug_notifier.notify = vfio_check_bus_reset;
     pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
 
+    dc->pre_reset = vfio_aer_pre_reset;
+    dc->post_reset = vfio_aer_post_reset;
+
     return 0;
 
 error:
@@ -2953,6 +2975,10 @@ static void vfio_pci_reset(DeviceState *dev)
 
     trace_vfio_pci_reset(vdev->vbasedev.name);
 
+    if (vdev->aer_reset) {
+        return;
+    }
+
     vfio_pci_pre_reset(vdev);
 
     if (vdev->resetfn && !vdev->resetfn(vdev)) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index b385f07..1b89b83 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -144,6 +144,8 @@ typedef struct VFIOPCIDevice {
     bool no_kvm_msix;
 
     NotifierWithReturn hotplug_notifier;
+    bool aer_reset;
+    bool single_depend_dev;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 379b6e1..6b1f2d4 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -105,6 +105,9 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
                                 pcibus_t addr, pcibus_t size, int type);
 typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
 
+typedef void PCIPreResetFunc(PCIDevice *pci_dev);
+typedef void PCIPostResetFunc(PCIDevice *pci_dev);
+
 typedef struct PCIIORegion {
     pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
 #define PCI_BAR_UNMAPPED (~(pcibus_t)0)
@@ -193,6 +196,8 @@ typedef struct PCIDeviceClass {
     PCIUnregisterFunc *exit;
     PCIConfigReadFunc *config_read;
     PCIConfigWriteFunc *config_write;
+    PCIPreResetFunc *pre_reset;
+    PCIPostResetFunc *post_reset;
 
     uint16_t vendor_id;
     uint16_t device_id;
@@ -380,6 +385,8 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
 void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
 void pci_device_set_intx_routing_notifier(PCIDevice *dev,
                                           PCIINTxRoutingNotifier notifier);
+void pci_device_pre_reset(PCIBus *bus, PCIDevice *d, void *opaque);
+void pci_device_post_reset(PCIBus *bus, PCIDevice *d, void *opaque);
 void pci_device_reset(PCIDevice *dev);
 
 PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 11/13] pcie_aer: expose pcie_aer_msg() interface
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (9 preceding siblings ...)
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 16:56   ` Michael S. Tsirkin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 12/13] vfio-pci: pass the aer error to guest Cao jin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 13/13] vfio: add 'aer' property to expose aercap Cao jin
  12 siblings, 1 reply; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

For vfio device, we need to propagate the aer error to
Guest OS. we use the pcie_aer_msg() to send aer error
to guest.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pcie_aer.c         | 2 +-
 include/hw/pci/pcie_aer.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 45f351b..fbbd7d2 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -370,7 +370,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
  *
  * Walk up the bus tree from the device, propagate the error message.
  */
-static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
 {
     uint8_t type;
 
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index 156acb0..c2ee4e2 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
 
 /* error injection */
 int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
 
 #endif /* QEMU_PCIE_AER_H */
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 12/13] vfio-pci: pass the aer error to guest
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (10 preceding siblings ...)
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 11/13] pcie_aer: expose pcie_aer_msg() interface Cao jin
@ 2015-11-11 10:34 ` Cao jin
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 13/13] vfio: add 'aer' property to expose aercap Cao jin
  12 siblings, 0 replies; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

when the vfio device encounters an uncorrectable error in host,
the vfio_pci driver will signal the eventfd registered by this
vfio device, the results in the qemu eventfd handler getting
invoked.

this patch is to pass the error to guest and have the guest driver
recover from the error.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 45 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 90df393..c593876 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2523,18 +2523,51 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
 static void vfio_err_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
+    PCIDevice *dev = &vdev->pdev;
+    PCIEAERMsg msg = {
+        .severity = 0,
+        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
+    };
 
     if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
         return;
     }
 
     /*
-     * TBD. Retrieve the error details and decide what action
-     * needs to be taken. One of the actions could be to pass
-     * the error to the guest and have the guest driver recover
-     * from the error. This requires that PCIe capabilities be
-     * exposed to the guest. For now, we just terminate the
-     * guest to contain the error.
+     * in case the real hardware configration has been changed,
+     * here we should recheck the bus reset capability.
+     */
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        vfio_check_host_bus_reset(vdev)) {
+        goto stop;
+    }
+    /*
+     * we should read the error details from the real hardware
+     * configuration spaces, here we only need to do is signaling
+     * to guest an uncorrectable error has occurred.
+     */
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        dev->exp.aer_cap) {
+        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+        uint32_t uncor_status;
+        bool isfatal;
+
+        uncor_status = vfio_pci_read_config(dev,
+                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
+
+        pcie_aer_msg(dev, &msg);
+        return;
+    }
+
+stop:
+    /*
+     * If the aer capability is not exposed to the guest. we just
+     * terminate the guest to contain the error.
      */
 
     error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Qemu-devel] [PATCH v13 13/13] vfio: add 'aer' property to expose aercap
  2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (11 preceding siblings ...)
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 12/13] vfio-pci: pass the aer error to guest Cao jin
@ 2015-11-11 10:34 ` Cao jin
  12 siblings, 0 replies; 30+ messages in thread
From: Cao jin @ 2015-11-11 10:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: Chen Fan, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

add 'aer' property to let user able to decide whether expose
the aer capability. by default we should disable aer feature,
because it needs configuration restrictions.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c593876..d2efbb0 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3069,6 +3069,8 @@ static Property vfio_pci_dev_properties[] = {
                        sub_vendor_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
                        sub_device_id, PCI_ANY_ID),
+    DEFINE_PROP_BIT("aer", VFIOPCIDevice, features,
+                    VFIO_FEATURE_ENABLE_AER_BIT, false),
     /*
      * TODO - support passed fds... is this necessary?
      * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 03/13] pcie: modify the capability size assert
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 03/13] pcie: modify the capability size assert Cao jin
@ 2015-11-11 16:55   ` Michael S. Tsirkin
  0 siblings, 0 replies; 30+ messages in thread
From: Michael S. Tsirkin @ 2015-11-11 16:55 UTC (permalink / raw)
  To: Cao jin; +Cc: Chen Fan, alex.williamson, qemu-devel

On Wed, Nov 11, 2015 at 06:34:21PM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
>  Device's Offset and size can reach PCIE_CONFIG_SPACE_SIZE,
>  fix the corresponding assert.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>

Reviewed-by: Michael S. Tsirkin <mst@redhat.com>

> ---
>  hw/pci/pcie.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 0eab29d..8f4c0e5 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -607,7 +607,7 @@ void pcie_add_capability(PCIDevice *dev,
>  
>      assert(offset >= PCI_CONFIG_SPACE_SIZE);
>      assert(offset < offset + size);
> -    assert(offset + size < PCIE_CONFIG_SPACE_SIZE);
> +    assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
>      assert(size >= 8);
>      assert(pci_is_express(dev));
>  
> -- 
> 1.9.3

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 06/13] aer: impove pcie_aer_init to support vfio device
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 06/13] aer: impove pcie_aer_init to support vfio device Cao jin
@ 2015-11-11 16:55   ` Michael S. Tsirkin
  0 siblings, 0 replies; 30+ messages in thread
From: Michael S. Tsirkin @ 2015-11-11 16:55 UTC (permalink / raw)
  To: Cao jin; +Cc: Chen Fan, alex.williamson, qemu-devel

On Wed, Nov 11, 2015 at 06:34:24PM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> pcie_aer_init was used to emulate an aer capability for pcie device,
> but for vfio device, the aer config space size is mutable and is not
> always equal to PCI_ERR_SIZEOF(0x48). it depends on where the TLP Prefix
> register required, so here we add a size argument.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Reviewed-by: Michael S. Tsirkin <mst@redhat.com>

> ---
>  hw/pci-bridge/ioh3420.c            | 2 +-
>  hw/pci-bridge/xio3130_downstream.c | 2 +-
>  hw/pci-bridge/xio3130_upstream.c   | 2 +-
>  hw/pci/pcie_aer.c                  | 4 ++--
>  include/hw/pci/pcie_aer.h          | 2 +-
>  5 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
> index cce2fdd..4d9cd3f 100644
> --- a/hw/pci-bridge/ioh3420.c
> +++ b/hw/pci-bridge/ioh3420.c
> @@ -129,7 +129,7 @@ static int ioh3420_initfn(PCIDevice *d)
>          goto err_pcie_cap;
>      }
>      pcie_cap_root_init(d);
> -    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET);
> +    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
>      if (rc < 0) {
>          goto err;
>      }
> diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
> index b3a6479..9737041 100644
> --- a/hw/pci-bridge/xio3130_downstream.c
> +++ b/hw/pci-bridge/xio3130_downstream.c
> @@ -92,7 +92,7 @@ static int xio3130_downstream_initfn(PCIDevice *d)
>          goto err_pcie_cap;
>      }
>      pcie_cap_arifwd_init(d);
> -    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
> +    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
>      if (rc < 0) {
>          goto err;
>      }
> diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
> index eada582..4d7f894 100644
> --- a/hw/pci-bridge/xio3130_upstream.c
> +++ b/hw/pci-bridge/xio3130_upstream.c
> @@ -81,7 +81,7 @@ static int xio3130_upstream_initfn(PCIDevice *d)
>      }
>      pcie_cap_flr_init(d);
>      pcie_cap_deverr_init(d);
> -    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
> +    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
>      if (rc < 0) {
>          goto err;
>      }
> diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
> index 98d2c18..45f351b 100644
> --- a/hw/pci/pcie_aer.c
> +++ b/hw/pci/pcie_aer.c
> @@ -94,12 +94,12 @@ static void aer_log_clear_all_err(PCIEAERLog *aer_log)
>      aer_log->log_num = 0;
>  }
>  
> -int pcie_aer_init(PCIDevice *dev, uint16_t offset)
> +int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
>  {
>      PCIExpressDevice *exp;
>  
>      pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
> -                        offset, PCI_ERR_SIZEOF);
> +                        offset, size);
>      exp = &dev->exp;
>      exp->aer_cap = offset;
>  
> diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
> index 2fb8388..156acb0 100644
> --- a/include/hw/pci/pcie_aer.h
> +++ b/include/hw/pci/pcie_aer.h
> @@ -87,7 +87,7 @@ struct PCIEAERErr {
>  
>  extern const VMStateDescription vmstate_pcie_aer_log;
>  
> -int pcie_aer_init(PCIDevice *dev, uint16_t offset);
> +int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size);
>  void pcie_aer_exit(PCIDevice *dev);
>  void pcie_aer_write_config(PCIDevice *dev,
>                             uint32_t addr, uint32_t val, int len);
> -- 
> 1.9.3

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
@ 2015-11-11 16:56   ` Michael S. Tsirkin
  2015-11-11 20:58   ` Alex Williamson
  1 sibling, 0 replies; 30+ messages in thread
From: Michael S. Tsirkin @ 2015-11-11 16:56 UTC (permalink / raw)
  To: Cao jin; +Cc: Chen Fan, alex.williamson, qemu-devel

On Wed, Nov 11, 2015 at 06:34:28PM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Particularly, For vfio devices, Once need to recovery devices
> by bus reset such as AER, we always need to reset the host bus
> to recovery the devices under the bus, so we need to add pci device
> callbacks to specify to do host bus reset.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>


Reviewed-by: Michael S. Tsirkin <mst@redhat.com>

> ---
>  hw/pci/pci.c         | 18 ++++++++++++++++++
>  hw/pci/pci_bridge.c  |  9 +++++++++
>  hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
>  hw/vfio/pci.h        |  2 ++
>  include/hw/pci/pci.h |  7 +++++++
>  5 files changed, 62 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index f6ca6ef..64fa2cc 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice *dev)
>      msix_reset(dev);
>  }
>  
> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void *unused)
> +{
> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
> +
> +    if (dc->pre_reset) {
> +        dc->pre_reset(dev);
> +    }
> +}
> +
> +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void *unused)
> +{
> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
> +
> +    if (dc->post_reset) {
> +        dc->post_reset(dev);
> +    }
> +}
> +
>  /*
>   * This function is called on #RST and FLR.
>   * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> index 40c97b1..ddb76ab 100644
> --- a/hw/pci/pci_bridge.c
> +++ b/hw/pci/pci_bridge.c
> @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
>  
>      newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
>      if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
> +        /*
> +         * Notify all vfio-pci devices under the bus
> +         * should do physical bus reset.
> +         */
> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> +                            pci_device_pre_reset, NULL);
>          /* Trigger hot reset on 0->1 transition. */
>          qbus_reset_all(&s->sec_bus.qbus);
> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> +                            pci_device_post_reset, NULL);
>      }
>  }
>  
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index e619998..90df393 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -39,6 +39,7 @@
>  
>  static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>  static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
> +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
>  
>  /*
>   * Disabling BAR mmaping can be slow, but toggling it around INTx can
> @@ -1879,6 +1880,8 @@ static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
>      /* List all affected devices by bus reset */
>      devices = &info->devices[0];
>  
> +    vdev->single_depend_dev = (info->count == 1);
> +
>      /* Verify that we have all the groups required */
>      for (i = 0; i < info->count; i++) {
>          PCIHostDeviceAddress host;
> @@ -2003,10 +2006,26 @@ static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
>      return vfio_check_host_bus_reset(vdev);
>  }
>  
> +static void vfio_aer_pre_reset(PCIDevice *pdev)
> +{
> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +
> +    vdev->aer_reset = true;
> +    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +}
> +
> +static void vfio_aer_post_reset(PCIDevice *pdev)
> +{
> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +
> +    vdev->aer_reset = false;
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
>      PCIDevice *pdev = &vdev->pdev;
> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(pdev);
>      PCIDevice *dev_iter;
>      uint8_t type;
>      uint32_t errcap;
> @@ -2060,6 +2079,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>      vdev->hotplug_notifier.notify = vfio_check_bus_reset;
>      pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
>  
> +    dc->pre_reset = vfio_aer_pre_reset;
> +    dc->post_reset = vfio_aer_post_reset;
> +
>      return 0;
>  
>  error:
> @@ -2953,6 +2975,10 @@ static void vfio_pci_reset(DeviceState *dev)
>  
>      trace_vfio_pci_reset(vdev->vbasedev.name);
>  
> +    if (vdev->aer_reset) {
> +        return;
> +    }
> +
>      vfio_pci_pre_reset(vdev);
>  
>      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index b385f07..1b89b83 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -144,6 +144,8 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_msix;
>  
>      NotifierWithReturn hotplug_notifier;
> +    bool aer_reset;
> +    bool single_depend_dev;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 379b6e1..6b1f2d4 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -105,6 +105,9 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
>                                  pcibus_t addr, pcibus_t size, int type);
>  typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
>  
> +typedef void PCIPreResetFunc(PCIDevice *pci_dev);
> +typedef void PCIPostResetFunc(PCIDevice *pci_dev);
> +
>  typedef struct PCIIORegion {
>      pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
>  #define PCI_BAR_UNMAPPED (~(pcibus_t)0)
> @@ -193,6 +196,8 @@ typedef struct PCIDeviceClass {
>      PCIUnregisterFunc *exit;
>      PCIConfigReadFunc *config_read;
>      PCIConfigWriteFunc *config_write;
> +    PCIPreResetFunc *pre_reset;
> +    PCIPostResetFunc *post_reset;
>  
>      uint16_t vendor_id;
>      uint16_t device_id;
> @@ -380,6 +385,8 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>                                            PCIINTxRoutingNotifier notifier);
> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *d, void *opaque);
> +void pci_device_post_reset(PCIBus *bus, PCIDevice *d, void *opaque);
>  void pci_device_reset(PCIDevice *dev);
>  
>  PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
> -- 
> 1.9.3

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 11/13] pcie_aer: expose pcie_aer_msg() interface
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 11/13] pcie_aer: expose pcie_aer_msg() interface Cao jin
@ 2015-11-11 16:56   ` Michael S. Tsirkin
  0 siblings, 0 replies; 30+ messages in thread
From: Michael S. Tsirkin @ 2015-11-11 16:56 UTC (permalink / raw)
  To: Cao jin; +Cc: Chen Fan, alex.williamson, qemu-devel

On Wed, Nov 11, 2015 at 06:34:29PM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> For vfio device, we need to propagate the aer error to
> Guest OS. we use the pcie_aer_msg() to send aer error
> to guest.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Reviewed-by: Michael S. Tsirkin <mst@redhat.com>

> ---
>  hw/pci/pcie_aer.c         | 2 +-
>  include/hw/pci/pcie_aer.h | 1 +
>  2 files changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
> index 45f351b..fbbd7d2 100644
> --- a/hw/pci/pcie_aer.c
> +++ b/hw/pci/pcie_aer.c
> @@ -370,7 +370,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
>   *
>   * Walk up the bus tree from the device, propagate the error message.
>   */
> -static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
>  {
>      uint8_t type;
>  
> diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
> index 156acb0..c2ee4e2 100644
> --- a/include/hw/pci/pcie_aer.h
> +++ b/include/hw/pci/pcie_aer.h
> @@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
>  
>  /* error injection */
>  int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
>  
>  #endif /* QEMU_PCIE_AER_H */
> -- 
> 1.9.3

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 07/13] vfio: add aer support for vfio device
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 07/13] vfio: add aer support for " Cao jin
@ 2015-11-11 20:49   ` Alex Williamson
  2015-11-12 11:54     ` Cao jin
  0 siblings, 1 reply; 30+ messages in thread
From: Alex Williamson @ 2015-11-11 20:49 UTC (permalink / raw)
  To: Cao jin; +Cc: Chen Fan, qemu-devel, mst

On Wed, 2015-11-11 at 18:34 +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Calling pcie_aer_init to initilize aer related registers for
> vfio device, then reload physical related registers to expose
> device capability.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---


What if VFIO_FEATURE_ENABLE_AER is enabled for a device that doesn't
posses an AER capability or isn't attached to a PCIe bus?  It appears
that we silently ignore it, which would lead to unregistering a hotplug
notifier that was never registered in 09/13 and needing to test both
VFIO_FEATURE_ENABLE_AER and exp.aer_cap in 12/13 as well as the
inconsistency that we often only test for VFIO_FEATURE_ENABLE_AER when
really we expect that to imply that AER is setup and enabled for the
device.  It seems like we need to error either within
vfio_add_capabilities() or after calling it if VFIO_FEATURE_ENABLE_AER
is specified but not configured.  If a user expects AER to be enabled
for a device by specifying aer=on, we need to fail if that's not
possible.

>  hw/vfio/pci.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  hw/vfio/pci.h |  3 +++
>  2 files changed, 82 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 4bc2b51..2d34edf 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1806,6 +1806,68 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
...
> +
> +    pcie_cap_deverr_init(pdev);
> +    ret = pcie_aer_init(pdev, pos, size);
> +    if (ret) {
> +        return ret;
> +    }

This branch is unnecessary, we can simply:

return pcie_aer_init(pdev, pos, size);

if we get this far.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 08/13] vfio: add check host bus reset is support or not
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 08/13] vfio: add check host bus reset is support or not Cao jin
@ 2015-11-11 20:53   ` Alex Williamson
  2015-11-12 11:56     ` Cao jin
  0 siblings, 1 reply; 30+ messages in thread
From: Alex Williamson @ 2015-11-11 20:53 UTC (permalink / raw)
  To: Cao jin; +Cc: Chen Fan, qemu-devel, mst

On Wed, 2015-11-11 at 18:34 +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> when init vfio devices done, we should test all the devices supported
> aer whether conflict with others. For each one, get the hot reset
> info for the affected device list.  For each affected device, all
> should attach to the VM and on/below the same bus. also, we should test
> all of the non-AER supporting vfio-pci devices on or below the target
> bus to verify they have a reset mechanism.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  hw/vfio/pci.h |   1 +
>  2 files changed, 204 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 2d34edf..31ffd44 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1806,6 +1806,190 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> +                                PCIHostDeviceAddress *host2)
> +{
> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> +            host1->slot == host2->slot && host1->function == host2->function);
> +}
> +
> +struct VFIODeviceFind {
> +    PCIDevice *pdev;
> +    bool found;
> +};
> +
> +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
> +                                      void *opaque)
> +{
> +    DeviceState *dev = DEVICE(pdev);
> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> +    VFIOPCIDevice *vdev;
> +    struct VFIODeviceFind *find = opaque;
> +
> +    if (find->found) {
> +        return;
> +    }
> +
> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +        if (!dc->reset) {
> +            goto found;
> +        }
> +        return;
> +    }
> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +        !vdev->vbasedev.reset_works) {
> +        goto found;
> +    }
> +
> +    return;
> +found:
> +    find->pdev = pdev;
> +    find->found = true;
> +}
> +
> +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque)
> +{
> +    struct VFIODeviceFind *find = opaque;
> +
> +    if (find->found) {
> +        return;
> +    }
> +
> +    if (pdev == find->pdev) {
> +        find->found = true;
> +    }
> +}
> +
> +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> +{
> +    PCIBus *bus = vdev->pdev.bus;
> +    struct vfio_pci_hot_reset_info *info = NULL;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +    struct VFIODeviceFind find;
> +    int ret, i;
> +
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret) {
> +        error_report("vfio: Cannot get hot reset info");


This needs a more useful error message:

-        error_report("vfio: Cannot get hot reset info");
+        error_report("vfio: Cannot enable AER for device %s, "
+                     "device does not support hot reset." vdev->vbasedev.name);

> +        goto out;
> +    }
> +
> +    /* List all affected devices by bus reset */
> +    devices = &info->devices[0];
> +
> +    /* Verify that we have all the groups required */
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +        VFIOPCIDevice *tmp;
> +        VFIODevice *vbasedev_iter;
> +        bool found = false;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        /* Skip the current device */
> +        if (vfio_pci_host_match(&host, &vdev->host)) {
> +            continue;
> +        }
> +
> +        /* Ensure we own the group of the affected device */
> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> +            if (group->groupid == devices[i].group_id) {
> +                break;
> +            }
> +        }
> +
> +        if (!group) {
> +            error_report("vfio: Cannot enable AER for device %s, "
> +                         "depends on group %d which is not owned.",
> +                         vdev->vbasedev.name, devices[i].group_id);
> +            ret = -1;
> +            goto out;
> +        }
> +
> +        /* Ensure affected devices for reset on/blow the bus */
> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> +            if (vfio_pci_host_match(&host, &tmp->host)) {
> +                PCIDevice *pci = PCI_DEVICE(tmp);
> +
> +                find.pdev = pci;
> +                find.found = false;
> +                pci_for_each_device(bus, pci_bus_num(bus),
> +                                    device_find, &find);
> +                if (!find.found) {
> +                    error_report("vfio: Cannot enable AER for device %s, "
> +                                 "the dependent device %s is not under the same bus",
> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
> +                    ret = -1;
> +                    goto out;
> +                }
> +                found = true;
> +                break;
> +            }
> +        }
> +
> +        /* Ensure all affected devices assigned to VM */
> +        if (!found) {
> +            error_report("vfio: Cannot enable AER for device %s, "
> +                         "the dependent device %04x:%02x:%02x.%x "
> +                         "is not assigned to VM.",
> +                         vdev->vbasedev.name, host.domain, host.bus,
> +                         host.slot, host.function);
> +            ret = -1;
> +            goto out;
> +        }
> +    }
> +
> +    /*
> +     * Check the all pci devices on or below the target bus
> +     * have a reset mechanism at least.
> +     */
> +    find.pdev = NULL;
> +    find.found = false;
> +    pci_for_each_device(bus, pci_bus_num(bus),
> +                        vfio_check_device_noreset, &find);
> +    if (find.found) {
> +        error_report("vfio: Cannot enable AER for device %s, "
> +                     "the affected device %s have not a reset mechanism.",

s/have not/does not have/

> +                     vdev->vbasedev.name, find.pdev->name);
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    ret = 0;
> +out:
> +    g_free(info);
> +    return ret;
> +}
> +
> +static int vfio_check_devices_host_bus_reset(void)
> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +    VFIOPCIDevice *vdev;
> +
> +    /* Check All vfio-pci devices if have bus reset capability */
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {

Missing a test for vfio-pci device:

+            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }


> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +                vfio_check_host_bus_reset(vdev)) {
> +                return -1;
> +            }
> +        }
> +    }
> +

Thanks,
Alex

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
  2015-11-11 16:56   ` Michael S. Tsirkin
@ 2015-11-11 20:58   ` Alex Williamson
  2015-11-12 11:58     ` Cao jin
  1 sibling, 1 reply; 30+ messages in thread
From: Alex Williamson @ 2015-11-11 20:58 UTC (permalink / raw)
  To: Cao jin; +Cc: Chen Fan, qemu-devel, mst

On Wed, 2015-11-11 at 18:34 +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Particularly, For vfio devices, Once need to recovery devices
> by bus reset such as AER, we always need to reset the host bus
> to recovery the devices under the bus, so we need to add pci device
> callbacks to specify to do host bus reset.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/pci/pci.c         | 18 ++++++++++++++++++
>  hw/pci/pci_bridge.c  |  9 +++++++++
>  hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
>  hw/vfio/pci.h        |  2 ++
>  include/hw/pci/pci.h |  7 +++++++
>  5 files changed, 62 insertions(+)
...
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index b385f07..1b89b83 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -144,6 +144,8 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_msix;
>  
>      NotifierWithReturn hotplug_notifier;
> +    bool aer_reset;
> +    bool single_depend_dev;
>  } VFIOPCIDevice;

Add these to with the rest of the bools above hotplug_notifier so the
structure isn't larger than it needs to be.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device
  2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device Cao jin
@ 2015-11-12 11:51   ` Michael S. Tsirkin
  2015-11-13  3:28     ` Cao jin
  0 siblings, 1 reply; 30+ messages in thread
From: Michael S. Tsirkin @ 2015-11-12 11:51 UTC (permalink / raw)
  To: Cao jin; +Cc: Chen Fan, alex.williamson, qemu-devel

On Wed, Nov 11, 2015 at 06:34:27PM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Since we support multi-function hotplug. the function 0 indicate
> the closure of the slot, so we have the chance to do the check.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
>  hw/vfio/pci.c            | 19 +++++++++++++++++++
>  hw/vfio/pci.h            |  2 ++
>  include/hw/pci/pci_bus.h |  5 +++++
>  4 files changed, 55 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 168b9cc..f6ca6ef 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
>      PCIBus *bus = PCI_BUS(qbus);
>  
>      vmstate_register(NULL, -1, &vmstate_pcibus, bus);
> +    notifier_with_return_list_init(&bus->hotplug_notifiers);
>  }
>  
>  static void pci_bus_unrealize(BusState *qbus, Error **errp)
> @@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
>      return bus->devices[devfn];
>  }
>  
> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
> +{
> +    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
> +}
> +
> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
> +{
> +    notifier_with_return_remove(notifier);
> +}
> +
> +static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
> +{
> +    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
> +                                            opaque);
> +}
> +
>  static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>  {
>      PCIDevice *pci_dev = (PCIDevice *)qdev;
> @@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>          pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>          return;
>      }
> +
> +    /*
> +     *  If the function is func 0, indicate the closure of the slot.
> +     *  signal the callback.
> +     */
> +    if (DEVICE(pci_dev)->hotplugged &&
> +        pci_get_function_0(pci_dev) == pci_dev &&
> +        pci_bus_hotplug_notifier(bus, pci_dev)) {
> +        error_setg(errp, "failed to hotplug function 0");
> +        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
> +        return;
> +    }

I don't understand why this is required in pci core.
PCI Device is already constructed anyway.
Just do the checks and call unrealize in vfio.

I also don't see why you are tying this to hotplug.
I would check each function as it's added.
But that's a vfio thing, if both you and Alex think
it's a good idea, fine by me.

>  }
>  
>  static void pci_default_realize(PCIDevice *dev, Error **errp)
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 31ffd44..e619998 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1990,6 +1990,19 @@ static int vfio_check_devices_host_bus_reset(void)
>      return 0;
>  }
>  
> +static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
> +{
> +    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, hotplug_notifier);
> +    PCIDevice *pci_dev = PCI_DEVICE(vdev);
> +    PCIDevice *pci_func0 = opaque;
> +
> +    if (pci_get_function_0(pci_dev) != pci_func0) {
> +        return 0;
> +    }
> +
> +    return vfio_check_host_bus_reset(vdev);
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
> @@ -2044,6 +2057,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>          return ret;
>      }
>  
> +    vdev->hotplug_notifier.notify = vfio_check_bus_reset;
> +    pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
> +
>      return 0;
>  
>  error:
> @@ -2919,6 +2935,9 @@ static void vfio_exitfn(PCIDevice *pdev)
>      vfio_unregister_req_notifier(vdev);
>      vfio_unregister_err_notifier(vdev);
>      pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> +        pci_bus_remove_hotplug_notifier(&vdev->hotplug_notifier);
> +    }
>      vfio_disable_interrupts(vdev);
>      if (vdev->intx.mmap_timer) {
>          timer_free(vdev->intx.mmap_timer);
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 59ae194..b385f07 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +
> +    NotifierWithReturn hotplug_notifier;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 403fec6..7812fa9 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -39,8 +39,13 @@ struct PCIBus {
>         Keep a count of the number of devices with raised IRQs.  */
>      int nirq;
>      int *irq_count;
> +
> +    NotifierWithReturnList hotplug_notifiers;
>  };
>  
> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify);
> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notify);
> +
>  typedef struct PCIBridgeWindows PCIBridgeWindows;
>  
>  /*
> -- 
> 1.9.3

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 07/13] vfio: add aer support for vfio device
  2015-11-11 20:49   ` Alex Williamson
@ 2015-11-12 11:54     ` Cao jin
  0 siblings, 0 replies; 30+ messages in thread
From: Cao jin @ 2015-11-12 11:54 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Chen Fan, qemu-devel, mst



On 11/12/2015 04:49 AM, Alex Williamson wrote:
> On Wed, 2015-11-11 at 18:34 +0800, Cao jin wrote:
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Calling pcie_aer_init to initilize aer related registers for
>> vfio device, then reload physical related registers to expose
>> device capability.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>
>
> What if VFIO_FEATURE_ENABLE_AER is enabled for a device that doesn't
> posses an AER capability or isn't attached to a PCIe bus?  It appears
> that we silently ignore it, which would lead to unregistering a hotplug
> notifier that was never registered in 09/13 and needing to test both
> VFIO_FEATURE_ENABLE_AER and exp.aer_cap in 12/13 as well as the
> inconsistency that we often only test for VFIO_FEATURE_ENABLE_AER when
> really we expect that to imply that AER is setup and enabled for the
> device.  It seems like we need to error either within
> vfio_add_capabilities() or after calling it if VFIO_FEATURE_ENABLE_AER
> is specified but not configured.  If a user expects AER to be enabled
> for a device by specifying aer=on, we need to fail if that's not
> possible.

make sense, how about adding aer cap dynamically by 
object_property_add_bool, then we can use set function to check whether 
the aer is valid or not.

>
>>   hw/vfio/pci.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
>>   hw/vfio/pci.h |  3 +++
>>   2 files changed, 82 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 4bc2b51..2d34edf 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1806,6 +1806,68 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
> ...
>> +
>> +    pcie_cap_deverr_init(pdev);
>> +    ret = pcie_aer_init(pdev, pos, size);
>> +    if (ret) {
>> +        return ret;
>> +    }
>
> This branch is unnecessary, we can simply:
>
> return pcie_aer_init(pdev, pos, size);
>
> if we get this far.  Thanks,

OK

>
> Alex
>
> .
>

-- 
Yours Sincerely,

Cao Jin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 08/13] vfio: add check host bus reset is support or not
  2015-11-11 20:53   ` Alex Williamson
@ 2015-11-12 11:56     ` Cao jin
  0 siblings, 0 replies; 30+ messages in thread
From: Cao jin @ 2015-11-12 11:56 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Chen Fan, qemu-devel, mst

ok, will fix these

On 11/12/2015 04:53 AM, Alex Williamson wrote:
> On Wed, 2015-11-11 at 18:34 +0800, Cao jin wrote:
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>

>> +        error_report("vfio: Cannot get hot reset info");
>
>
> This needs a more useful error message:
>
> -        error_report("vfio: Cannot get hot reset info");
> +        error_report("vfio: Cannot enable AER for device %s, "
> +                     "device does not support hot reset." vdev->vbasedev.name);
>
>> +        goto out;
>> +    }
>> +

>> +    if (find.found) {
>> +        error_report("vfio: Cannot enable AER for device %s, "
>> +                     "the affected device %s have not a reset mechanism.",
>
> s/have not/does not have/
>
>> +                     vdev->vbasedev.name, find.pdev->name);
>> +        ret = -1;
>> +        goto out;
>> +    }
>> +
>> +    ret = 0;
>> +out:
>> +    g_free(info);
>> +    return ret;
>> +}
>> +
>> +static int vfio_check_devices_host_bus_reset(void)
>> +{
>> +    VFIOGroup *group;
>> +    VFIODevice *vbasedev;
>> +    VFIOPCIDevice *vdev;
>> +
>> +    /* Check All vfio-pci devices if have bus reset capability */
>> +    QLIST_FOREACH(group, &vfio_group_list, next) {
>> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
>
> Missing a test for vfio-pci device:
>
> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
>
>

> Thanks,
> Alex
>
> .
>

-- 
Yours Sincerely,

Cao Jin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-11-11 20:58   ` Alex Williamson
@ 2015-11-12 11:58     ` Cao jin
  0 siblings, 0 replies; 30+ messages in thread
From: Cao jin @ 2015-11-12 11:58 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Chen Fan, qemu-devel, mst


On 11/12/2015 04:58 AM, Alex Williamson wrote:
> On Wed, 2015-11-11 at 18:34 +0800, Cao jin wrote:
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>
> ...
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index b385f07..1b89b83 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -144,6 +144,8 @@ typedef struct VFIOPCIDevice {
>>       bool no_kvm_msix;
>>
>>       NotifierWithReturn hotplug_notifier;
>> +    bool aer_reset;
>> +    bool single_depend_dev;
>>   } VFIOPCIDevice;
>
> Add these to with the rest of the bools above hotplug_notifier so the
> structure isn't larger than it needs to be.
>

OK

>

-- 
Yours Sincerely,

Cao Jin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device
  2015-11-12 11:51   ` Michael S. Tsirkin
@ 2015-11-13  3:28     ` Cao jin
  2015-11-13 21:04       ` Alex Williamson
  0 siblings, 1 reply; 30+ messages in thread
From: Cao jin @ 2015-11-13  3:28 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Chen Fan, alex.williamson, qemu-devel



On 11/12/2015 07:51 PM, Michael S. Tsirkin wrote:
> On Wed, Nov 11, 2015 at 06:34:27PM +0800, Cao jin wrote:
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Since we support multi-function hotplug. the function 0 indicate
>> the closure of the slot, so we have the chance to do the check.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
>>   hw/vfio/pci.c            | 19 +++++++++++++++++++
>>   hw/vfio/pci.h            |  2 ++
>>   include/hw/pci/pci_bus.h |  5 +++++
>>   4 files changed, 55 insertions(+)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 168b9cc..f6ca6ef 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
>>       PCIBus *bus = PCI_BUS(qbus);
>>
>>       vmstate_register(NULL, -1, &vmstate_pcibus, bus);
>> +    notifier_with_return_list_init(&bus->hotplug_notifiers);
>>   }
>>
>>   static void pci_bus_unrealize(BusState *qbus, Error **errp)
>> @@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
>>       return bus->devices[devfn];
>>   }
>>
>> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
>> +{
>> +    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
>> +}
>> +
>> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
>> +{
>> +    notifier_with_return_remove(notifier);
>> +}
>> +
>> +static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
>> +{
>> +    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
>> +                                            opaque);
>> +}
>> +
>>   static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>   {
>>       PCIDevice *pci_dev = (PCIDevice *)qdev;
>> @@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>           pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>>           return;
>>       }
>> +
>> +    /*
>> +     *  If the function is func 0, indicate the closure of the slot.
>> +     *  signal the callback.
>> +     */
>> +    if (DEVICE(pci_dev)->hotplugged &&
>> +        pci_get_function_0(pci_dev) == pci_dev &&
>> +        pci_bus_hotplug_notifier(bus, pci_dev)) {
>> +        error_setg(errp, "failed to hotplug function 0");
>> +        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>> +        return;
>> +    }
>
> I don't understand why this is required in pci core.
> PCI Device is already constructed anyway.
> Just do the checks and call unrealize in vfio.

Because when do multi-function hotplug, the function 0 on the pcie bus 
probably is not a vfio device. so we should trigger the check from pci 
core.

> I also don't see why you are tying this to hotplug.
> I would check each function as it's added.
> But that's a vfio thing, if both you and Alex think
> it's a good idea, fine by me.

The device is  initialized one by one no matter it is cold plugged or 
hot plugged, but for the vfio with aer that need to get depended devices 
required by bus reset, so need to make sure the reset depended devices 
are assigned to qemu, in vfio, there is a machine done callback to check 
the bus reset for boot up, so it also should be done in hotplug。

it looks little complicated, Alex, any idea?

>
>>   }
>>
>>   static void pci_default_realize(PCIDevice *dev, Error **errp)
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 31ffd44..e619998 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1990,6 +1990,19 @@ static int vfio_check_devices_host_bus_reset(void)
>>       return 0;
>>   }
>>
>> +static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
>> +{
>> +    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, hotplug_notifier);
>> +    PCIDevice *pci_dev = PCI_DEVICE(vdev);
>> +    PCIDevice *pci_func0 = opaque;
>> +
>> +    if (pci_get_function_0(pci_dev) != pci_func0) {
>> +        return 0;
>> +    }
>> +
>> +    return vfio_check_host_bus_reset(vdev);
>> +}
>> +
>>   static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>                             int pos, uint16_t size)
>>   {
>> @@ -2044,6 +2057,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>           return ret;
>>       }
>>
>> +    vdev->hotplug_notifier.notify = vfio_check_bus_reset;
>> +    pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
>> +
>>       return 0;
>>
>>   error:
>> @@ -2919,6 +2935,9 @@ static void vfio_exitfn(PCIDevice *pdev)
>>       vfio_unregister_req_notifier(vdev);
>>       vfio_unregister_err_notifier(vdev);
>>       pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
>> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
>> +        pci_bus_remove_hotplug_notifier(&vdev->hotplug_notifier);
>> +    }
>>       vfio_disable_interrupts(vdev);
>>       if (vdev->intx.mmap_timer) {
>>           timer_free(vdev->intx.mmap_timer);
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index 59ae194..b385f07 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
>>       bool no_kvm_intx;
>>       bool no_kvm_msi;
>>       bool no_kvm_msix;
>> +
>> +    NotifierWithReturn hotplug_notifier;
>>   } VFIOPCIDevice;
>>
>>   uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
>> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
>> index 403fec6..7812fa9 100644
>> --- a/include/hw/pci/pci_bus.h
>> +++ b/include/hw/pci/pci_bus.h
>> @@ -39,8 +39,13 @@ struct PCIBus {
>>          Keep a count of the number of devices with raised IRQs.  */
>>       int nirq;
>>       int *irq_count;
>> +
>> +    NotifierWithReturnList hotplug_notifiers;
>>   };
>>
>> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify);
>> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notify);
>> +
>>   typedef struct PCIBridgeWindows PCIBridgeWindows;
>>
>>   /*
>> --
>> 1.9.3
> .
>

-- 
Yours Sincerely,

Cao Jin

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device
  2015-11-13  3:28     ` Cao jin
@ 2015-11-13 21:04       ` Alex Williamson
  2015-11-16 10:18         ` Chen Fan
  0 siblings, 1 reply; 30+ messages in thread
From: Alex Williamson @ 2015-11-13 21:04 UTC (permalink / raw)
  To: Cao jin; +Cc: Chen Fan, qemu-devel, Michael S. Tsirkin

On Fri, 2015-11-13 at 11:28 +0800, Cao jin wrote:
> 
> On 11/12/2015 07:51 PM, Michael S. Tsirkin wrote:
> > On Wed, Nov 11, 2015 at 06:34:27PM +0800, Cao jin wrote:
> >> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >>
> >> Since we support multi-function hotplug. the function 0 indicate
> >> the closure of the slot, so we have the chance to do the check.
> >>
> >> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >> ---
> >>   hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
> >>   hw/vfio/pci.c            | 19 +++++++++++++++++++
> >>   hw/vfio/pci.h            |  2 ++
> >>   include/hw/pci/pci_bus.h |  5 +++++
> >>   4 files changed, 55 insertions(+)
> >>
> >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >> index 168b9cc..f6ca6ef 100644
> >> --- a/hw/pci/pci.c
> >> +++ b/hw/pci/pci.c
> >> @@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
> >>       PCIBus *bus = PCI_BUS(qbus);
> >>
> >>       vmstate_register(NULL, -1, &vmstate_pcibus, bus);
> >> +    notifier_with_return_list_init(&bus->hotplug_notifiers);
> >>   }
> >>
> >>   static void pci_bus_unrealize(BusState *qbus, Error **errp)
> >> @@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
> >>       return bus->devices[devfn];
> >>   }
> >>
> >> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
> >> +{
> >> +    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
> >> +}
> >> +
> >> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
> >> +{
> >> +    notifier_with_return_remove(notifier);
> >> +}
> >> +
> >> +static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
> >> +{
> >> +    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
> >> +                                            opaque);
> >> +}
> >> +
> >>   static void pci_qdev_realize(DeviceState *qdev, Error **errp)
> >>   {
> >>       PCIDevice *pci_dev = (PCIDevice *)qdev;
> >> @@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
> >>           pci_qdev_unrealize(DEVICE(pci_dev), NULL);
> >>           return;
> >>       }
> >> +
> >> +    /*
> >> +     *  If the function is func 0, indicate the closure of the slot.
> >> +     *  signal the callback.
> >> +     */
> >> +    if (DEVICE(pci_dev)->hotplugged &&
> >> +        pci_get_function_0(pci_dev) == pci_dev &&
> >> +        pci_bus_hotplug_notifier(bus, pci_dev)) {
> >> +        error_setg(errp, "failed to hotplug function 0");
> >> +        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
> >> +        return;
> >> +    }
> >
> > I don't understand why this is required in pci core.
> > PCI Device is already constructed anyway.
> > Just do the checks and call unrealize in vfio.
> 
> Because when do multi-function hotplug, the function 0 on the pcie bus 
> probably is not a vfio device. so we should trigger the check from pci 
> core.
> 
> > I also don't see why you are tying this to hotplug.
> > I would check each function as it's added.
> > But that's a vfio thing, if both you and Alex think
> > it's a good idea, fine by me.
> 
> The device is  initialized one by one no matter it is cold plugged or 
> hot plugged, but for the vfio with aer that need to get depended devices 
> required by bus reset, so need to make sure the reset depended devices 
> are assigned to qemu, in vfio, there is a machine done callback to check 
> the bus reset for boot up, so it also should be done in hotplug。
> 
> it looks little complicated, Alex, any idea?


So the problem is that to support AER we need to be able to do a bus
reset of the device, both in the virtual and physical spaces.  A
physical bus reset is likely to affect more than a single device since
we're often dealing with multifunction endpoints.  Those functions may
be considered isolated on the host due to ACS, but we cannot reset the
bus without affecting all of the functions.  Therefore, we need to test
whether we have a compatible setup, but it involves more than a single
device.  We cannot test each device as it is initialized because any
time more than one device is affected, and we haven't yet added the
other devices, we'll fail the test.

There are two separate cases where we need to solve this problem,
coldplug and hotplug.  Coldplug can be resolved by using the
machine-init done notifier to verify that our configuration is
compatible.  We have no requirements for the ordering of devices during
cold initialization.  For the hotplug case, we've defined that function
0 closes the slot, which provides an opportunity for us to do the same
verification.  However, function 0 is not necessarily a vfio-pci device.
We can create our own multifunction devices in the VM, where function 0
could be any type of pci device.  Thus vfio-pci cannot notify itself
when a slot is closed and due to the above mentioned problem, we cannot
verify as each device is added.

So, I don't really see a better way to solve the problem than what's
being proposed here.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device
  2015-11-13 21:04       ` Alex Williamson
@ 2015-11-16 10:18         ` Chen Fan
  2015-11-16 16:05           ` Alex Williamson
  0 siblings, 1 reply; 30+ messages in thread
From: Chen Fan @ 2015-11-16 10:18 UTC (permalink / raw)
  To: Alex Williamson, Cao jin; +Cc: qemu-devel, Michael S. Tsirkin

Hi Alex,

   Thanks for your detailed explanation.
   during my test, I found that maybe there was another problem in vfio 
driver,
I use a dual-port NIC which address are: 06:00.0 and 06:00.1 two functions.
then I use aer-inject to inject one error to one function like following:
AER
ID 0000:06:00.0
UNCOR_STATUS DLP
HEADER_LOG 0 1 2 3

here I boot qemu with one enable aer, one disable aer:
./x86_64-softmmu/qemu-system-x86_64 -M q35 -device 
ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1
  -device vfio-pci,host=06:00.1,bus=bridge1,addr=00.1
  -device 
vfio-pci,host=06:00.0,bus=bridge1,addr=00.0,aer=true,multifunction=on

so we expected that the error only sent to the vfio device with host 
address is 06:00.0,
but I found that all devices (06:00.0 , 06:00.1) receive the signal in 
qemu, which sent by vfio driver
in vfio_pci_aer_err_detected. then qemu stopped by the device with 
06:00.1 received the signal.
is that right?

Thanks,
Chen


On 11/14/2015 05:04 AM, Alex Williamson wrote:
> On Fri, 2015-11-13 at 11:28 +0800, Cao jin wrote:
>> On 11/12/2015 07:51 PM, Michael S. Tsirkin wrote:
>>> On Wed, Nov 11, 2015 at 06:34:27PM +0800, Cao jin wrote:
>>>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>>
>>>> Since we support multi-function hotplug. the function 0 indicate
>>>> the closure of the slot, so we have the chance to do the check.
>>>>
>>>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>> ---
>>>>    hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
>>>>    hw/vfio/pci.c            | 19 +++++++++++++++++++
>>>>    hw/vfio/pci.h            |  2 ++
>>>>    include/hw/pci/pci_bus.h |  5 +++++
>>>>    4 files changed, 55 insertions(+)
>>>>
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index 168b9cc..f6ca6ef 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
>>>>        PCIBus *bus = PCI_BUS(qbus);
>>>>
>>>>        vmstate_register(NULL, -1, &vmstate_pcibus, bus);
>>>> +    notifier_with_return_list_init(&bus->hotplug_notifiers);
>>>>    }
>>>>
>>>>    static void pci_bus_unrealize(BusState *qbus, Error **errp)
>>>> @@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
>>>>        return bus->devices[devfn];
>>>>    }
>>>>
>>>> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
>>>> +{
>>>> +    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
>>>> +}
>>>> +
>>>> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
>>>> +{
>>>> +    notifier_with_return_remove(notifier);
>>>> +}
>>>> +
>>>> +static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
>>>> +{
>>>> +    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
>>>> +                                            opaque);
>>>> +}
>>>> +
>>>>    static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>>    {
>>>>        PCIDevice *pci_dev = (PCIDevice *)qdev;
>>>> @@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>>>>            pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>>>>            return;
>>>>        }
>>>> +
>>>> +    /*
>>>> +     *  If the function is func 0, indicate the closure of the slot.
>>>> +     *  signal the callback.
>>>> +     */
>>>> +    if (DEVICE(pci_dev)->hotplugged &&
>>>> +        pci_get_function_0(pci_dev) == pci_dev &&
>>>> +        pci_bus_hotplug_notifier(bus, pci_dev)) {
>>>> +        error_setg(errp, "failed to hotplug function 0");
>>>> +        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>>>> +        return;
>>>> +    }
>>> I don't understand why this is required in pci core.
>>> PCI Device is already constructed anyway.
>>> Just do the checks and call unrealize in vfio.
>> Because when do multi-function hotplug, the function 0 on the pcie bus
>> probably is not a vfio device. so we should trigger the check from pci
>> core.
>>
>>> I also don't see why you are tying this to hotplug.
>>> I would check each function as it's added.
>>> But that's a vfio thing, if both you and Alex think
>>> it's a good idea, fine by me.
>> The device is  initialized one by one no matter it is cold plugged or
>> hot plugged, but for the vfio with aer that need to get depended devices
>> required by bus reset, so need to make sure the reset depended devices
>> are assigned to qemu, in vfio, there is a machine done callback to check
>> the bus reset for boot up, so it also should be done in hotplug。
>>
>> it looks little complicated, Alex, any idea?
>
> So the problem is that to support AER we need to be able to do a bus
> reset of the device, both in the virtual and physical spaces.  A
> physical bus reset is likely to affect more than a single device since
> we're often dealing with multifunction endpoints.  Those functions may
> be considered isolated on the host due to ACS, but we cannot reset the
> bus without affecting all of the functions.  Therefore, we need to test
> whether we have a compatible setup, but it involves more than a single
> device.  We cannot test each device as it is initialized because any
> time more than one device is affected, and we haven't yet added the
> other devices, we'll fail the test.
>
> There are two separate cases where we need to solve this problem,
> coldplug and hotplug.  Coldplug can be resolved by using the
> machine-init done notifier to verify that our configuration is
> compatible.  We have no requirements for the ordering of devices during
> cold initialization.  For the hotplug case, we've defined that function
> 0 closes the slot, which provides an opportunity for us to do the same
> verification.  However, function 0 is not necessarily a vfio-pci device.
> We can create our own multifunction devices in the VM, where function 0
> could be any type of pci device.  Thus vfio-pci cannot notify itself
> when a slot is closed and due to the above mentioned problem, we cannot
> verify as each device is added.
>
> So, I don't really see a better way to solve the problem than what's
> being proposed here.  Thanks,
>
> Alex
>
> .
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device
  2015-11-16 10:18         ` Chen Fan
@ 2015-11-16 16:05           ` Alex Williamson
  2015-11-17  2:48             ` Chen Fan
  0 siblings, 1 reply; 30+ messages in thread
From: Alex Williamson @ 2015-11-16 16:05 UTC (permalink / raw)
  To: Chen Fan; +Cc: Cao jin, qemu-devel, Michael S. Tsirkin

On Mon, 2015-11-16 at 18:18 +0800, Chen Fan wrote:
> Hi Alex,
> 
>    Thanks for your detailed explanation.
>    during my test, I found that maybe there was another problem in vfio 
> driver,
> I use a dual-port NIC which address are: 06:00.0 and 06:00.1 two functions.
> then I use aer-inject to inject one error to one function like following:
> AER
> ID 0000:06:00.0
> UNCOR_STATUS DLP
> HEADER_LOG 0 1 2 3
> 
> here I boot qemu with one enable aer, one disable aer:
> ./x86_64-softmmu/qemu-system-x86_64 -M q35 -device 
> ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1
>   -device vfio-pci,host=06:00.1,bus=bridge1,addr=00.1
>   -device 
> vfio-pci,host=06:00.0,bus=bridge1,addr=00.0,aer=true,multifunction=on
> 
> so we expected that the error only sent to the vfio device with host 
> address is 06:00.0,
> but I found that all devices (06:00.0 , 06:00.1) receive the signal in 
> qemu, which sent by vfio driver
> in vfio_pci_aer_err_detected. then qemu stopped by the device with 
> 06:00.1 received the signal.
> is that right?

You would need to know whether the response for the injected AER affects
all devices on the link or is isolated to the function specified.  VFIO
is just a passthrough for pci_error_handlers, so if error_detected is
getting called for each host device, it's going to signal each device to
the user.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device
  2015-11-16 16:05           ` Alex Williamson
@ 2015-11-17  2:48             ` Chen Fan
  0 siblings, 0 replies; 30+ messages in thread
From: Chen Fan @ 2015-11-17  2:48 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Cao jin, qemu-devel, Michael S. Tsirkin


On 11/17/2015 12:05 AM, Alex Williamson wrote:
> On Mon, 2015-11-16 at 18:18 +0800, Chen Fan wrote:
>> Hi Alex,
>>
>>     Thanks for your detailed explanation.
>>     during my test, I found that maybe there was another problem in vfio
>> driver,
>> I use a dual-port NIC which address are: 06:00.0 and 06:00.1 two functions.
>> then I use aer-inject to inject one error to one function like following:
>> AER
>> ID 0000:06:00.0
>> UNCOR_STATUS DLP
>> HEADER_LOG 0 1 2 3
>>
>> here I boot qemu with one enable aer, one disable aer:
>> ./x86_64-softmmu/qemu-system-x86_64 -M q35 -device
>> ioh3420,bus=pcie.0,addr=1c.0,port=1,id=bridge1,chassis=1
>>    -device vfio-pci,host=06:00.1,bus=bridge1,addr=00.1
>>    -device
>> vfio-pci,host=06:00.0,bus=bridge1,addr=00.0,aer=true,multifunction=on
>>
>> so we expected that the error only sent to the vfio device with host
>> address is 06:00.0,
>> but I found that all devices (06:00.0 , 06:00.1) receive the signal in
>> qemu, which sent by vfio driver
>> in vfio_pci_aer_err_detected. then qemu stopped by the device with
>> 06:00.1 received the signal.
>> is that right?
> You would need to know whether the response for the injected AER affects
> all devices on the link or is isolated to the function specified.  VFIO
> is just a passthrough for pci_error_handlers, so if error_detected is
> getting called for each host device, it's going to signal each device to
> the user.  Thanks,
I saw that in broadcast error message, if the error is reported by an 
end point,
aer driver would broadcast the error to all functions under the upstream 
link of the end point.
so here in qemu, I think we should enable AER for all functions in one 
endpoint.

Thanks,
Chen


> Alex
>
> .
>

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2015-11-17  2:53 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-11 10:34 [Qemu-devel] [PATCH v13 00/13] vfio-pci: pass the aer error to guest Cao jin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 01/13] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 02/13] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 03/13] pcie: modify the capability size assert Cao jin
2015-11-11 16:55   ` Michael S. Tsirkin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 04/13] vfio: make the 4 bytes aligned for capability size Cao jin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 05/13] vfio: add pcie extanded capability support Cao jin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 06/13] aer: impove pcie_aer_init to support vfio device Cao jin
2015-11-11 16:55   ` Michael S. Tsirkin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 07/13] vfio: add aer support for " Cao jin
2015-11-11 20:49   ` Alex Williamson
2015-11-12 11:54     ` Cao jin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 08/13] vfio: add check host bus reset is support or not Cao jin
2015-11-11 20:53   ` Alex Williamson
2015-11-12 11:56     ` Cao jin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 09/13] add check reset mechanism when hotplug vfio device Cao jin
2015-11-12 11:51   ` Michael S. Tsirkin
2015-11-13  3:28     ` Cao jin
2015-11-13 21:04       ` Alex Williamson
2015-11-16 10:18         ` Chen Fan
2015-11-16 16:05           ` Alex Williamson
2015-11-17  2:48             ` Chen Fan
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
2015-11-11 16:56   ` Michael S. Tsirkin
2015-11-11 20:58   ` Alex Williamson
2015-11-12 11:58     ` Cao jin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 11/13] pcie_aer: expose pcie_aer_msg() interface Cao jin
2015-11-11 16:56   ` Michael S. Tsirkin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 12/13] vfio-pci: pass the aer error to guest Cao jin
2015-11-11 10:34 ` [Qemu-devel] [PATCH v13 13/13] vfio: add 'aer' property to expose aercap Cao jin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.