All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest
@ 2015-11-17  8:41 Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 01/13] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
                   ` (13 more replies)
  0 siblings, 14 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

For now, for vfio pci passthough devices when qemu receives
an error from host aer report, currentlly just terminate the guest,
but usually user want to know what error occurred but stopping the
guest, so this patches add aer capability support for vfio device,
and pass the error to guest, and have guest driver to recover
from the error.

v13-v14:
   1. for multifunction device, requiring all functions enable AER.(9/13)
   2. due to all affected functions receive error signal, ignore no
      error occurred function. (12/13)

v12-v13:
   1. since support multifuncion hotplug, here add callback to enable aer.
   2. add pci device pre+post reset for aer host reset.

Chen Fan (13):
  vfio: extract vfio_get_hot_reset_info as a single function
  vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
  pcie: modify the capability size assert
  vfio: make the 4 bytes aligned for capability size
  vfio: add pcie extanded capability support
  aer: impove pcie_aer_init to support vfio device
  vfio: add aer support for vfio device
  vfio: add check host bus reset is support or not
  add check reset mechanism when hotplug vfio device
  pci: add pci device pre-post reset callbacks for host bus reset
  pcie_aer: expose pcie_aer_msg() interface
  vfio-pci: pass the aer error to guest
  vfio: add 'aer' property to expose aercap

 hw/pci-bridge/ioh3420.c            |   2 +-
 hw/pci-bridge/xio3130_downstream.c |   2 +-
 hw/pci-bridge/xio3130_upstream.c   |   2 +-
 hw/pci/pci.c                       |  47 +++
 hw/pci/pci_bridge.c                |   9 +
 hw/pci/pcie.c                      |   2 +-
 hw/pci/pcie_aer.c                  |   6 +-
 hw/vfio/pci.c                      | 625 +++++++++++++++++++++++++++++++++----
 hw/vfio/pci.h                      |   8 +
 include/hw/pci/pci.h               |   7 +
 include/hw/pci/pci_bus.h           |   5 +
 include/hw/pci/pcie_aer.h          |   3 +-
 12 files changed, 646 insertions(+), 72 deletions(-)

-- 
1.9.3

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 01/13] vfio: extract vfio_get_hot_reset_info as a single function
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
@ 2015-11-17  8:41 ` Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 02/13] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

the function is used to get affected devices by bus reset.
so here extract it, and can used for aer soon.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 66 +++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 18 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8fadbcf..464e6b7 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1628,6 +1628,51 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
     }
 }
 
+/*
+ * return negative with errno, return 0 on success.
+ * if success, the point of ret_info fill with the affected device reset info.
+ *
+ */
+static int vfio_get_hot_reset_info(VFIOPCIDevice *vdev,
+                                   struct vfio_pci_hot_reset_info **ret_info)
+{
+    struct vfio_pci_hot_reset_info *info;
+    int ret, count;
+
+    *ret_info = NULL;
+
+    info = g_malloc0(sizeof(*info));
+    info->argsz = sizeof(*info);
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+    if (ret && errno != ENOSPC) {
+        ret = -errno;
+        goto error;
+    }
+
+    count = info->count;
+
+    info = g_realloc(info, sizeof(*info) +
+                     (count * sizeof(struct vfio_pci_dependent_device)));
+    info->argsz = sizeof(*info) +
+                  (count * sizeof(struct vfio_pci_dependent_device));
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+    if (ret) {
+        ret = -errno;
+        error_report("vfio: hot reset info failed: %m");
+        goto error;
+    }
+
+    *ret_info = info;
+    info = NULL;
+
+    return 0;
+error:
+    g_free(info);
+    return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1767,7 +1812,7 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
     VFIOGroup *group;
-    struct vfio_pci_hot_reset_info *info;
+    struct vfio_pci_hot_reset_info *info = NULL;
     struct vfio_pci_dependent_device *devices;
     struct vfio_pci_hot_reset *reset;
     int32_t *fds;
@@ -1779,12 +1824,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     vfio_pci_pre_reset(vdev);
     vdev->vbasedev.needs_reset = false;
 
-    info = g_malloc0(sizeof(*info));
-    info->argsz = sizeof(*info);
-
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-    if (ret && errno != ENOSPC) {
-        ret = -errno;
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
         if (!vdev->has_pm_reset) {
             error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
                          "no available reset mechanism.", vdev->host.domain,
@@ -1793,18 +1834,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         goto out_single;
     }
 
-    count = info->count;
-    info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
-    info->argsz = sizeof(*info) + (count * sizeof(*devices));
     devices = &info->devices[0];
-
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-    if (ret) {
-        ret = -errno;
-        error_report("vfio: hot reset info failed: %m");
-        goto out_single;
-    }
-
     trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
 
     /* Verify that we have all the groups required */
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 02/13] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 01/13] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
@ 2015-11-17  8:41 ` Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 03/13] pcie: modify the capability size assert Cao jin
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

squeeze out vfio_pci_do_hot_reset to do host bus reset when AER recovery.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 75 +++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 464e6b7..f333dfc 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1673,6 +1673,48 @@ error:
     return ret;
 }
 
+static int vfio_pci_do_hot_reset(VFIOPCIDevice *vdev,
+                                 struct vfio_pci_hot_reset_info *info)
+{
+    VFIOGroup *group;
+    struct vfio_pci_hot_reset *reset;
+    int32_t *fds;
+    int ret, i, count;
+    struct vfio_pci_dependent_device *devices;
+
+    /* Determine how many group fds need to be passed */
+    count = 0;
+    devices = &info->devices[0];
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        for (i = 0; i < info->count; i++) {
+            if (group->groupid == devices[i].group_id) {
+                count++;
+                break;
+            }
+        }
+    }
+
+    reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+    reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+    fds = &reset->group_fds[0];
+
+    /* Fill in group fds */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        for (i = 0; i < info->count; i++) {
+            if (group->groupid == devices[i].group_id) {
+                fds[reset->count++] = group->fd;
+                break;
+            }
+        }
+    }
+
+    /* Bus reset! */
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+    g_free(reset);
+
+    return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1814,9 +1856,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     VFIOGroup *group;
     struct vfio_pci_hot_reset_info *info = NULL;
     struct vfio_pci_dependent_device *devices;
-    struct vfio_pci_hot_reset *reset;
-    int32_t *fds;
-    int ret, i, count;
+    int ret, i;
     bool multi = false;
 
     trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
@@ -1895,34 +1935,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         goto out_single;
     }
 
-    /* Determine how many group fds need to be passed */
-    count = 0;
-    QLIST_FOREACH(group, &vfio_group_list, next) {
-        for (i = 0; i < info->count; i++) {
-            if (group->groupid == devices[i].group_id) {
-                count++;
-                break;
-            }
-        }
-    }
-
-    reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
-    reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
-    fds = &reset->group_fds[0];
-
-    /* Fill in group fds */
-    QLIST_FOREACH(group, &vfio_group_list, next) {
-        for (i = 0; i < info->count; i++) {
-            if (group->groupid == devices[i].group_id) {
-                fds[reset->count++] = group->fd;
-                break;
-            }
-        }
-    }
-
-    /* Bus reset! */
-    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
-    g_free(reset);
+    ret = vfio_pci_do_hot_reset(vdev, info);
 
     trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
                                     ret ? "%m" : "Success");
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 03/13] pcie: modify the capability size assert
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 01/13] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 02/13] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
@ 2015-11-17  8:41 ` Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 04/13] vfio: make the 4 bytes aligned for capability size Cao jin
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

 Device's Offset and size can reach PCIE_CONFIG_SPACE_SIZE,
 fix the corresponding assert.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/pcie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 0eab29d..8f4c0e5 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -607,7 +607,7 @@ void pcie_add_capability(PCIDevice *dev,
 
     assert(offset >= PCI_CONFIG_SPACE_SIZE);
     assert(offset < offset + size);
-    assert(offset + size < PCIE_CONFIG_SPACE_SIZE);
+    assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
     assert(size >= 8);
     assert(pci_is_express(dev));
 
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 04/13] vfio: make the 4 bytes aligned for capability size
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (2 preceding siblings ...)
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 03/13] pcie: modify the capability size assert Cao jin
@ 2015-11-17  8:41 ` Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 05/13] vfio: add pcie extanded capability support Cao jin
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

this function search the capability from the end, the last
size should 0x100 - pos, not 0xff - pos.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index f333dfc..e305cda 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1468,7 +1468,8 @@ static void vfio_unmap_bars(VFIOPCIDevice *vdev)
  */
 static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
 {
-    uint8_t tmp, next = 0xff;
+    uint8_t tmp;
+    uint16_t next = PCI_CONFIG_SPACE_SIZE;
 
     for (tmp = pdev->config[PCI_CAPABILITY_LIST]; tmp;
          tmp = pdev->config[tmp + 1]) {
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 05/13] vfio: add pcie extanded capability support
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (3 preceding siblings ...)
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 04/13] vfio: make the 4 bytes aligned for capability size Cao jin
@ 2015-11-17  8:41 ` Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 06/13] aer: impove pcie_aer_init to support vfio device Cao jin
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

For vfio pcie device, we could expose the extended capability on
PCIE bus. in order to avoid config space broken, we introduce
a copy config for parsing extended caps. and rebuild the pcie
extended config space.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e305cda..4bc2b51 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1481,6 +1481,21 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
     return next - pos;
 }
 
+
+static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos)
+{
+    uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE;
+
+    for (tmp = PCI_CONFIG_SPACE_SIZE; tmp;
+        tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) {
+        if (tmp > pos && tmp < next) {
+            next = tmp;
+        }
+    }
+
+    return next - pos;
+}
+
 static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
 {
     pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
@@ -1791,16 +1806,69 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint32_t header;
+    uint16_t cap_id, next, size;
+    uint8_t cap_ver;
+    uint8_t *config;
+
+    /*
+     * In order to avoid breaking config space, create a copy to
+     * use for parsing extended capabilities.
+     */
+    config = g_memdup(pdev->config, vdev->config_size);
+
+    for (next = PCI_CONFIG_SPACE_SIZE; next;
+         next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
+        header = pci_get_long(config + next);
+        cap_id = PCI_EXT_CAP_ID(header);
+        cap_ver = PCI_EXT_CAP_VER(header);
+
+        /*
+         * If it becomes important to configure extended capabilities to their
+         * actual size, use this as the default when it's something we don't
+         * recognize. Since QEMU doesn't actually handle many of the config
+         * accesses, exact size doesn't seem worthwhile.
+         */
+        size = vfio_ext_cap_max_size(config, next);
+
+        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+        pci_set_long(dev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+
+        /* Use emulated next pointer to allow dropping extended caps */
+        pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
+                                   PCI_EXT_CAP_NEXT_MASK);
+    }
+
+    g_free(config);
+    return 0;
+}
+
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
+    int ret;
 
     if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
         !pdev->config[PCI_CAPABILITY_LIST]) {
         return 0; /* Nothing to add */
     }
 
-    return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    if (ret) {
+        return ret;
+    }
+
+    /* on PCI bus, it doesn't make sense to expose extended capabilities. */
+    if (!pci_is_express(pdev) ||
+        !pci_bus_is_express(pdev->bus) ||
+        !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
+        return 0;
+    }
+
+    return vfio_add_ext_cap(vdev);
 }
 
 static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 06/13] aer: impove pcie_aer_init to support vfio device
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (4 preceding siblings ...)
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 05/13] vfio: add pcie extanded capability support Cao jin
@ 2015-11-17  8:41 ` Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 07/13] vfio: add aer support for " Cao jin
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

pcie_aer_init was used to emulate an aer capability for pcie device,
but for vfio device, the aer config space size is mutable and is not
always equal to PCI_ERR_SIZEOF(0x48). it depends on where the TLP Prefix
register required, so here we add a size argument.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci-bridge/ioh3420.c            | 2 +-
 hw/pci-bridge/xio3130_downstream.c | 2 +-
 hw/pci-bridge/xio3130_upstream.c   | 2 +-
 hw/pci/pcie_aer.c                  | 4 ++--
 include/hw/pci/pcie_aer.h          | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index cce2fdd..4d9cd3f 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -129,7 +129,7 @@ static int ioh3420_initfn(PCIDevice *d)
         goto err_pcie_cap;
     }
     pcie_cap_root_init(d);
-    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET);
+    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index b3a6479..9737041 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -92,7 +92,7 @@ static int xio3130_downstream_initfn(PCIDevice *d)
         goto err_pcie_cap;
     }
     pcie_cap_arifwd_init(d);
-    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
+    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index eada582..4d7f894 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -81,7 +81,7 @@ static int xio3130_upstream_initfn(PCIDevice *d)
     }
     pcie_cap_flr_init(d);
     pcie_cap_deverr_init(d);
-    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
+    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 98d2c18..45f351b 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -94,12 +94,12 @@ static void aer_log_clear_all_err(PCIEAERLog *aer_log)
     aer_log->log_num = 0;
 }
 
-int pcie_aer_init(PCIDevice *dev, uint16_t offset)
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
 {
     PCIExpressDevice *exp;
 
     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
-                        offset, PCI_ERR_SIZEOF);
+                        offset, size);
     exp = &dev->exp;
     exp->aer_cap = offset;
 
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index 2fb8388..156acb0 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -87,7 +87,7 @@ struct PCIEAERErr {
 
 extern const VMStateDescription vmstate_pcie_aer_log;
 
-int pcie_aer_init(PCIDevice *dev, uint16_t offset);
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size);
 void pcie_aer_exit(PCIDevice *dev);
 void pcie_aer_write_config(PCIDevice *dev,
                            uint32_t addr, uint32_t val, int len);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 07/13] vfio: add aer support for vfio device
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (5 preceding siblings ...)
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 06/13] aer: impove pcie_aer_init to support vfio device Cao jin
@ 2015-11-17  8:41 ` Cao jin
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 08/13] vfio: add check host bus reset is support or not Cao jin
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Calling pcie_aer_init to initilize aer related registers for
vfio device, then reload physical related registers to expose
device capability.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 hw/vfio/pci.h |  3 +++
 2 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 4bc2b51..d00b0e4 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1806,6 +1806,62 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
+                          int pos, uint16_t size)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    PCIDevice *dev_iter;
+    uint8_t type;
+    uint32_t errcap;
+
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+        pcie_add_capability(pdev, PCI_EXT_CAP_ID_ERR,
+                            cap_ver, pos, size);
+        return 0;
+    }
+
+    dev_iter = pci_bridge_get_device(pdev->bus);
+    if (!dev_iter) {
+        goto error;
+    }
+
+    while (dev_iter) {
+        type = pcie_cap_get_type(dev_iter);
+        if ((type != PCI_EXP_TYPE_ROOT_PORT &&
+             type != PCI_EXP_TYPE_UPSTREAM &&
+             type != PCI_EXP_TYPE_DOWNSTREAM)) {
+            goto error;
+        }
+
+        if (!dev_iter->exp.aer_cap) {
+            goto error;
+        }
+
+        dev_iter = pci_bridge_get_device(dev_iter->bus);
+    }
+
+    errcap = vfio_pci_read_config(pdev, pos + PCI_ERR_CAP, 4);
+    /*
+     * The ability to record multiple headers is depending on
+     * the state of the Multiple Header Recording Capable bit and
+     * enabled by the Multiple Header Recording Enable bit.
+     */
+    if ((errcap & PCI_ERR_CAP_MHRC) &&
+        (errcap & PCI_ERR_CAP_MHRE)) {
+        pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+    } else {
+        pdev->exp.aer_log.log_max = 0;
+    }
+
+    pcie_cap_deverr_init(pdev);
+    return pcie_aer_init(pdev, pos, size);
+
+error:
+    error_report("vfio: Unable to enable AER for device %s, parent bus "
+                 "does not support AER signaling", vdev->vbasedev.name);
+    return -1;
+}
+
 static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
@@ -1813,6 +1869,7 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
     uint16_t cap_id, next, size;
     uint8_t cap_ver;
     uint8_t *config;
+    int ret = 0;
 
     /*
      * In order to avoid breaking config space, create a copy to
@@ -1834,16 +1891,29 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
          */
         size = vfio_ext_cap_max_size(config, next);
 
-        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
-        pci_set_long(dev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+        switch (cap_id) {
+        case PCI_EXT_CAP_ID_ERR:
+            ret = vfio_setup_aer(vdev, cap_ver, next, size);
+            break;
+        default:
+            pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+            break;
+        }
+
+        if (ret) {
+            goto out;
+        }
+
+        pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
 
         /* Use emulated next pointer to allow dropping extended caps */
         pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
                                    PCI_EXT_CAP_NEXT_MASK);
     }
 
+out:
     g_free(config);
-    return 0;
+    return ret;
 }
 
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
@@ -2598,6 +2668,11 @@ static int vfio_initfn(PCIDevice *pdev)
         goto out_teardown;
     }
 
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        !pdev->exp.aer_cap) {
+        goto out_teardown;
+    }
+
     /* QEMU emulates all of MSI & MSIX */
     if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
         memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index f004d52..48c1f69 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"
 #include "qemu/queue.h"
@@ -127,6 +128,8 @@ typedef struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
 #define VFIO_FEATURE_ENABLE_REQ_BIT 1
 #define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
+#define VFIO_FEATURE_ENABLE_AER_BIT 2
+#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
     int32_t bootindex;
     uint8_t pm_cap;
     bool has_vga;
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 08/13] vfio: add check host bus reset is support or not
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (6 preceding siblings ...)
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 07/13] vfio: add aer support for " Cao jin
@ 2015-11-17  8:41 ` Cao jin
  2015-12-17 20:32   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
  2015-12-24 14:32   ` Michael S. Tsirkin
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 09/13] add check reset mechanism when hotplug vfio device Cao jin
                   ` (5 subsequent siblings)
  13 siblings, 2 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:41 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

when init vfio devices done, we should test all the devices supported
aer whether conflict with others. For each one, get the hot reset
info for the affected device list.  For each affected device, all
should attach to the VM and on/below the same bus. also, we should test
all of the non-AER supporting vfio-pci devices on or below the target
bus to verify they have a reset mechanism.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 hw/vfio/pci.h |   1 +
 2 files changed, 230 insertions(+), 7 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d00b0e4..6926dcc 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
+                                     PCIHostDeviceAddress *host2)
+{
+    return (host1->domain == host2->domain && host1->bus == host2->bus &&
+            host1->slot == host2->slot);
+}
+
+static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
+                                PCIHostDeviceAddress *host2)
+{
+    return (vfio_pci_host_slot_match(host1, host2) &&
+            host1->function == host2->function);
+}
+
+struct VFIODeviceFind {
+    PCIDevice *pdev;
+    bool found;
+};
+
+static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
+                                      void *opaque)
+{
+    DeviceState *dev = DEVICE(pdev);
+    DeviceClass *dc = DEVICE_GET_CLASS(dev);
+    VFIOPCIDevice *vdev;
+    struct VFIODeviceFind *find = opaque;
+
+    if (find->found) {
+        return;
+    }
+
+    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+        if (!dc->reset) {
+            goto found;
+        }
+        return;
+    }
+    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        !vdev->vbasedev.reset_works) {
+        goto found;
+    }
+
+    return;
+found:
+    find->pdev = pdev;
+    find->found = true;
+}
+
+static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque)
+{
+    struct VFIODeviceFind *find = opaque;
+
+    if (find->found) {
+        return;
+    }
+
+    if (pdev == find->pdev) {
+        find->found = true;
+    }
+}
+
+static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
+{
+    PCIBus *bus = vdev->pdev.bus;
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    VFIOGroup *group;
+    struct VFIODeviceFind find;
+    int ret, i;
+
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
+        error_report("vfio: Cannot enable AER for device %s,"
+                     " device does not support hot reset.",
+                     vdev->vbasedev.name);
+        goto out;
+    }
+
+    /* List all affected devices by bus reset */
+    devices = &info->devices[0];
+
+    /* Verify that we have all the groups required */
+    for (i = 0; i < info->count; i++) {
+        PCIHostDeviceAddress host;
+        VFIOPCIDevice *tmp;
+        VFIODevice *vbasedev_iter;
+        bool found = false;
+
+        host.domain = devices[i].segment;
+        host.bus = devices[i].bus;
+        host.slot = PCI_SLOT(devices[i].devfn);
+        host.function = PCI_FUNC(devices[i].devfn);
+
+        /* Skip the current device */
+        if (vfio_pci_host_match(&host, &vdev->host)) {
+            continue;
+        }
+
+        /* Ensure we own the group of the affected device */
+        QLIST_FOREACH(group, &vfio_group_list, next) {
+            if (group->groupid == devices[i].group_id) {
+                break;
+            }
+        }
+
+        if (!group) {
+            error_report("vfio: Cannot enable AER for device %s, "
+                         "depends on group %d which is not owned.",
+                         vdev->vbasedev.name, devices[i].group_id);
+            ret = -1;
+            goto out;
+        }
+
+        /* Ensure affected devices for reset on/blow the bus */
+        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+            if (vfio_pci_host_match(&host, &tmp->host)) {
+                PCIDevice *pci = PCI_DEVICE(tmp);
+
+                /*
+                 * For multifunction device, due to vfio driver signal all
+                 * functions under the upstream link of the end point. here
+                 * we validate all functions whether enable AER.
+                 */
+                if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
+                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
+                    error_report("vfio: Cannot enable AER for device %s, on same slot"
+                                 " the dependent device %s which does not enable AER.",
+                                 vdev->vbasedev.name, tmp->vbasedev.name);
+                    ret = -1;
+                    goto out;
+                }
+
+                find.pdev = pci;
+                find.found = false;
+                pci_for_each_device(bus, pci_bus_num(bus),
+                                    device_find, &find);
+                if (!find.found) {
+                    error_report("vfio: Cannot enable AER for device %s, "
+                                 "the dependent device %s is not under the same bus",
+                                 vdev->vbasedev.name, tmp->vbasedev.name);
+                    ret = -1;
+                    goto out;
+                }
+                found = true;
+                break;
+            }
+        }
+
+        /* Ensure all affected devices assigned to VM */
+        if (!found) {
+            error_report("vfio: Cannot enable AER for device %s, "
+                         "the dependent device %04x:%02x:%02x.%x "
+                         "is not assigned to VM.",
+                         vdev->vbasedev.name, host.domain, host.bus,
+                         host.slot, host.function);
+            ret = -1;
+            goto out;
+        }
+    }
+
+    /*
+     * Check the all pci devices on or below the target bus
+     * have a reset mechanism at least.
+     */
+    find.pdev = NULL;
+    find.found = false;
+    pci_for_each_device(bus, pci_bus_num(bus),
+                        vfio_check_device_noreset, &find);
+    if (find.found) {
+        error_report("vfio: Cannot enable AER for device %s, "
+                     "the affected device %s does not have a reset mechanism.",
+                     vdev->vbasedev.name, find.pdev->name);
+        ret = -1;
+        goto out;
+    }
+
+    ret = 0;
+out:
+    g_free(info);
+    return ret;
+}
+
+static int vfio_check_devices_host_bus_reset(void)
+{
+    VFIOGroup *group;
+    VFIODevice *vbasedev;
+    VFIOPCIDevice *vdev;
+
+    /* Check All vfio-pci devices if have bus reset capability */
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        QLIST_FOREACH(vbasedev, &group->device_list, next) {
+            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
+                continue;
+            }
+            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+                vfio_check_host_bus_reset(vdev)) {
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
     vfio_intx_enable(vdev);
 }
 
-static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
-                                PCIHostDeviceAddress *host2)
-{
-    return (host1->domain == host2->domain && host1->bus == host2->bus &&
-            host1->slot == host2->slot && host1->function == host2->function);
-}
-
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
     VFIOGroup *group;
@@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
+{
+    int ret;
+
+    ret = vfio_check_devices_host_bus_reset();
+    if (ret) {
+        exit(1);
+    }
+}
+
+static Notifier machine_notifier = {
+    .notify = vfio_pci_machine_done_notify,
+};
+
 static int vfio_initfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = {
 static void register_vfio_pci_dev_type(void)
 {
     type_register_static(&vfio_pci_dev_info);
+    /*
+     * Register notifier when machine init is done, since we need
+     * check the configration manner after all vfio device are inited.
+     */
+    qemu_add_machine_init_done_notifier(&machine_notifier);
 }
 
 type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 48c1f69..59ae194 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 09/13] add check reset mechanism when hotplug vfio device
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (7 preceding siblings ...)
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 08/13] vfio: add check host bus reset is support or not Cao jin
@ 2015-11-17  8:42 ` Cao jin
  2015-12-17 20:32   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
                   ` (4 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Since we support multi-function hotplug. the function 0 indicate
the closure of the slot, so we have the chance to do the check.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
 hw/vfio/pci.c            | 19 +++++++++++++++++++
 hw/vfio/pci.h            |  2 ++
 include/hw/pci/pci_bus.h |  5 +++++
 4 files changed, 55 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 168b9cc..f6ca6ef 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
     PCIBus *bus = PCI_BUS(qbus);
 
     vmstate_register(NULL, -1, &vmstate_pcibus, bus);
+    notifier_with_return_list_init(&bus->hotplug_notifiers);
 }
 
 static void pci_bus_unrealize(BusState *qbus, Error **errp)
@@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
     return bus->devices[devfn];
 }
 
+void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify)
+{
+    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
+}
+
+void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
+{
+    notifier_with_return_remove(notifier);
+}
+
+static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
+{
+    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
+                                            opaque);
+}
+
 static void pci_qdev_realize(DeviceState *qdev, Error **errp)
 {
     PCIDevice *pci_dev = (PCIDevice *)qdev;
@@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
         pci_qdev_unrealize(DEVICE(pci_dev), NULL);
         return;
     }
+
+    /*
+     *  If the function is func 0, indicate the closure of the slot.
+     *  signal the callback.
+     */
+    if (DEVICE(pci_dev)->hotplugged &&
+        pci_get_function_0(pci_dev) == pci_dev &&
+        pci_bus_hotplug_notifier(bus, pci_dev)) {
+        error_setg(errp, "failed to hotplug function 0");
+        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+        return;
+    }
 }
 
 static void pci_default_realize(PCIDevice *dev, Error **errp)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6926dcc..e17dc89 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2016,6 +2016,19 @@ static int vfio_check_devices_host_bus_reset(void)
     return 0;
 }
 
+static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
+{
+    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, hotplug_notifier);
+    PCIDevice *pci_dev = PCI_DEVICE(vdev);
+    PCIDevice *pci_func0 = opaque;
+
+    if (pci_get_function_0(pci_dev) != pci_func0) {
+        return 0;
+    }
+
+    return vfio_check_host_bus_reset(vdev);
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
@@ -2063,6 +2076,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
         pdev->exp.aer_log.log_max = 0;
     }
 
+    vdev->hotplug_notifier.notify = vfio_check_bus_reset;
+    pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
+
     pcie_cap_deverr_init(pdev);
     return pcie_aer_init(pdev, pos, size);
 
@@ -2944,6 +2960,9 @@ static void vfio_exitfn(PCIDevice *pdev)
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+        pci_bus_remove_hotplug_notifier(&vdev->hotplug_notifier);
+    }
     vfio_disable_interrupts(vdev);
     if (vdev->intx.mmap_timer) {
         timer_free(vdev->intx.mmap_timer);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 59ae194..b385f07 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
     bool no_kvm_intx;
     bool no_kvm_msi;
     bool no_kvm_msix;
+
+    NotifierWithReturn hotplug_notifier;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 403fec6..7812fa9 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -39,8 +39,13 @@ struct PCIBus {
        Keep a count of the number of devices with raised IRQs.  */
     int nirq;
     int *irq_count;
+
+    NotifierWithReturnList hotplug_notifiers;
 };
 
+void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn *notify);
+void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notify);
+
 typedef struct PCIBridgeWindows PCIBridgeWindows;
 
 /*
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (8 preceding siblings ...)
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 09/13] add check reset mechanism when hotplug vfio device Cao jin
@ 2015-11-17  8:42 ` Cao jin
  2015-12-17 20:31   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
  2015-12-23 12:00   ` Michael S. Tsirkin
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 11/13] pcie_aer: expose pcie_aer_msg() interface Cao jin
                   ` (3 subsequent siblings)
  13 siblings, 2 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Particularly, For vfio devices, Once need to recovery devices
by bus reset such as AER, we always need to reset the host bus
to recovery the devices under the bus, so we need to add pci device
callbacks to specify to do host bus reset.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/pci.c         | 18 ++++++++++++++++++
 hw/pci/pci_bridge.c  |  9 +++++++++
 hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
 hw/vfio/pci.h        |  2 ++
 include/hw/pci/pci.h |  7 +++++++
 5 files changed, 62 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index f6ca6ef..64fa2cc 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice *dev)
     msix_reset(dev);
 }
 
+void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void *unused)
+{
+    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
+
+    if (dc->pre_reset) {
+        dc->pre_reset(dev);
+    }
+}
+
+void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void *unused)
+{
+    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
+
+    if (dc->post_reset) {
+        dc->post_reset(dev);
+    }
+}
+
 /*
  * This function is called on #RST and FLR.
  * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 40c97b1..ddb76ab 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
 
     newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
     if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
+        /*
+         * Notify all vfio-pci devices under the bus
+         * should do physical bus reset.
+         */
+        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
+        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                            pci_device_pre_reset, NULL);
         /* Trigger hot reset on 0->1 transition. */
         qbus_reset_all(&s->sec_bus.qbus);
+        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                            pci_device_post_reset, NULL);
     }
 }
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e17dc89..df32618 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -39,6 +39,7 @@
 
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
+static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
 
 /*
  * Disabling BAR mmaping can be slow, but toggling it around INTx can
@@ -1888,6 +1889,8 @@ static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
     /* List all affected devices by bus reset */
     devices = &info->devices[0];
 
+    vdev->single_depend_dev = (info->count == 1);
+
     /* Verify that we have all the groups required */
     for (i = 0; i < info->count; i++) {
         PCIHostDeviceAddress host;
@@ -2029,10 +2032,26 @@ static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
     return vfio_check_host_bus_reset(vdev);
 }
 
+static void vfio_aer_pre_reset(PCIDevice *pdev)
+{
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+
+    vdev->aer_reset = true;
+    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+}
+
+static void vfio_aer_post_reset(PCIDevice *pdev)
+{
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+
+    vdev->aer_reset = false;
+}
+
 static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
                           int pos, uint16_t size)
 {
     PCIDevice *pdev = &vdev->pdev;
+    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(pdev);
     PCIDevice *dev_iter;
     uint8_t type;
     uint32_t errcap;
@@ -2079,6 +2098,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
     vdev->hotplug_notifier.notify = vfio_check_bus_reset;
     pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
 
+    dc->pre_reset = vfio_aer_pre_reset;
+    dc->post_reset = vfio_aer_post_reset;
+
     pcie_cap_deverr_init(pdev);
     return pcie_aer_init(pdev, pos, size);
 
@@ -2978,6 +3000,10 @@ static void vfio_pci_reset(DeviceState *dev)
 
     trace_vfio_pci_reset(vdev->vbasedev.name);
 
+    if (vdev->aer_reset) {
+        return;
+    }
+
     vfio_pci_pre_reset(vdev);
 
     if (vdev->resetfn && !vdev->resetfn(vdev)) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index b385f07..5470b97 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
     bool no_kvm_intx;
     bool no_kvm_msi;
     bool no_kvm_msix;
+    bool aer_reset;
+    bool single_depend_dev;
 
     NotifierWithReturn hotplug_notifier;
 } VFIOPCIDevice;
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 379b6e1..6b1f2d4 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -105,6 +105,9 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
                                 pcibus_t addr, pcibus_t size, int type);
 typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
 
+typedef void PCIPreResetFunc(PCIDevice *pci_dev);
+typedef void PCIPostResetFunc(PCIDevice *pci_dev);
+
 typedef struct PCIIORegion {
     pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
 #define PCI_BAR_UNMAPPED (~(pcibus_t)0)
@@ -193,6 +196,8 @@ typedef struct PCIDeviceClass {
     PCIUnregisterFunc *exit;
     PCIConfigReadFunc *config_read;
     PCIConfigWriteFunc *config_write;
+    PCIPreResetFunc *pre_reset;
+    PCIPostResetFunc *post_reset;
 
     uint16_t vendor_id;
     uint16_t device_id;
@@ -380,6 +385,8 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
 void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
 void pci_device_set_intx_routing_notifier(PCIDevice *dev,
                                           PCIINTxRoutingNotifier notifier);
+void pci_device_pre_reset(PCIBus *bus, PCIDevice *d, void *opaque);
+void pci_device_post_reset(PCIBus *bus, PCIDevice *d, void *opaque);
 void pci_device_reset(PCIDevice *dev);
 
 PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 11/13] pcie_aer: expose pcie_aer_msg() interface
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (9 preceding siblings ...)
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
@ 2015-11-17  8:42 ` Cao jin
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 12/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

For vfio device, we need to propagate the aer error to
Guest OS. we use the pcie_aer_msg() to send aer error
to guest.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/pci/pcie_aer.c         | 2 +-
 include/hw/pci/pcie_aer.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 45f351b..fbbd7d2 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -370,7 +370,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
  *
  * Walk up the bus tree from the device, propagate the error message.
  */
-static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
 {
     uint8_t type;
 
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index 156acb0..c2ee4e2 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
 
 /* error injection */
 int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
 
 #endif /* QEMU_PCIE_AER_H */
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 12/13] vfio-pci: pass the aer error to guest
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (10 preceding siblings ...)
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 11/13] pcie_aer: expose pcie_aer_msg() interface Cao jin
@ 2015-11-17  8:42 ` Cao jin
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 13/13] vfio: add 'aer' property to expose aercap Cao jin
  2015-11-18 17:06 ` [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Michael S. Tsirkin
  13 siblings, 0 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

when the vfio device encounters an uncorrectable error in host,
the vfio_pci driver will signal the eventfd registered by this
vfio device, the results in the qemu eventfd handler getting
invoked.

this patch is to pass the error to guest and have the guest driver
recover from the error.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 47 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index df32618..c1300e9 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2543,18 +2543,59 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
 static void vfio_err_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
+    PCIDevice *dev = &vdev->pdev;
+    PCIEAERMsg msg = {
+        .severity = 0,
+        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
+    };
 
     if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
         return;
     }
 
     /*
-     * TBD. Retrieve the error details and decide what action
-     * needs to be taken. One of the actions could be to pass
-     * the error to the guest and have the guest driver recover
-     * from the error. This requires that PCIe capabilities be
-     * exposed to the guest. For now, we just terminate the
-     * guest to contain the error.
+     * in case the real hardware configration has been changed,
+     * here we should recheck the bus reset capability.
+     */
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        vfio_check_host_bus_reset(vdev)) {
+        goto stop;
+    }
+    /*
+     * we should read the error details from the real hardware
+     * configuration spaces, here we only need to do is signaling
+     * to guest an uncorrectable error has occurred.
+     */
+    if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+        dev->exp.aer_cap) {
+        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+        uint32_t uncor_status;
+        bool isfatal;
+
+        uncor_status = vfio_pci_read_config(dev,
+                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+        /*
+         * if we receive the error signal but not this device, we can
+         * just ignore it.
+         */
+        if (!(uncor_status & ~0UL)) {
+            return;
+        }
+
+        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
+
+        pcie_aer_msg(dev, &msg);
+        return;
+    }
+
+stop:
+    /*
+     * If the aer capability is not exposed to the guest. we just
+     * terminate the guest to contain the error.
      */
 
     error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [Qemu-devel] [PATCH v14 13/13] vfio: add 'aer' property to expose aercap
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (11 preceding siblings ...)
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 12/13] vfio-pci: pass the aer error to guest Cao jin
@ 2015-11-17  8:42 ` Cao jin
  2015-11-18 17:06 ` [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Michael S. Tsirkin
  13 siblings, 0 replies; 34+ messages in thread
From: Cao jin @ 2015-11-17  8:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: chen.fan.fnst, alex.williamson, mst

From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

add 'aer' property to let user able to decide whether expose
the aer capability. by default we should disable aer feature,
because it needs configuration restrictions.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c1300e9..6fc4be0 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3102,6 +3102,8 @@ static Property vfio_pci_dev_properties[] = {
                        sub_vendor_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
                        sub_device_id, PCI_ANY_ID),
+    DEFINE_PROP_BIT("aer", VFIOPCIDevice, features,
+                    VFIO_FEATURE_ENABLE_AER_BIT, false),
     /*
      * TODO - support passed fds... is this necessary?
      * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest
  2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
                   ` (12 preceding siblings ...)
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 13/13] vfio: add 'aer' property to expose aercap Cao jin
@ 2015-11-18 17:06 ` Michael S. Tsirkin
  13 siblings, 0 replies; 34+ messages in thread
From: Michael S. Tsirkin @ 2015-11-18 17:06 UTC (permalink / raw)
  To: Cao jin; +Cc: chen.fan.fnst, alex.williamson, qemu-devel

Focusing on 2.5 now.
Pls post after 2.5.

On Tue, Nov 17, 2015 at 04:41:51PM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> For now, for vfio pci passthough devices when qemu receives
> an error from host aer report, currentlly just terminate the guest,
> but usually user want to know what error occurred but stopping the
> guest, so this patches add aer capability support for vfio device,
> and pass the error to guest, and have guest driver to recover
> from the error.
> 
> v13-v14:
>    1. for multifunction device, requiring all functions enable AER.(9/13)
>    2. due to all affected functions receive error signal, ignore no
>       error occurred function. (12/13)
> 
> v12-v13:
>    1. since support multifuncion hotplug, here add callback to enable aer.
>    2. add pci device pre+post reset for aer host reset.
> 
> Chen Fan (13):
>   vfio: extract vfio_get_hot_reset_info as a single function
>   vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
>   pcie: modify the capability size assert
>   vfio: make the 4 bytes aligned for capability size
>   vfio: add pcie extanded capability support
>   aer: impove pcie_aer_init to support vfio device
>   vfio: add aer support for vfio device
>   vfio: add check host bus reset is support or not
>   add check reset mechanism when hotplug vfio device
>   pci: add pci device pre-post reset callbacks for host bus reset
>   pcie_aer: expose pcie_aer_msg() interface
>   vfio-pci: pass the aer error to guest
>   vfio: add 'aer' property to expose aercap
> 
>  hw/pci-bridge/ioh3420.c            |   2 +-
>  hw/pci-bridge/xio3130_downstream.c |   2 +-
>  hw/pci-bridge/xio3130_upstream.c   |   2 +-
>  hw/pci/pci.c                       |  47 +++
>  hw/pci/pci_bridge.c                |   9 +
>  hw/pci/pcie.c                      |   2 +-
>  hw/pci/pcie_aer.c                  |   6 +-
>  hw/vfio/pci.c                      | 625 +++++++++++++++++++++++++++++++++----
>  hw/vfio/pci.h                      |   8 +
>  include/hw/pci/pci.h               |   7 +
>  include/hw/pci/pci_bus.h           |   5 +
>  include/hw/pci/pcie_aer.h          |   3 +-
>  12 files changed, 646 insertions(+), 72 deletions(-)
> 
> -- 
> 1.9.3

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
@ 2015-12-17 20:31   ` Alex Williamson
  2015-12-18  3:29     ` Chen Fan
  2015-12-23 12:00   ` Michael S. Tsirkin
  1 sibling, 1 reply; 34+ messages in thread
From: Alex Williamson @ 2015-12-17 20:31 UTC (permalink / raw)
  To: Cao jin, qemu-devel; +Cc: chen.fan.fnst, mst

On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Particularly, For vfio devices, Once need to recovery devices
> by bus reset such as AER, we always need to reset the host bus
> to recovery the devices under the bus, so we need to add pci device
> callbacks to specify to do host bus reset.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/pci/pci.c         | 18 ++++++++++++++++++
>  hw/pci/pci_bridge.c  |  9 +++++++++
>  hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
>  hw/vfio/pci.h        |  2 ++
>  include/hw/pci/pci.h |  7 +++++++
>  5 files changed, 62 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index f6ca6ef..64fa2cc 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice *dev)
>      msix_reset(dev);
>  }
>  
> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void *unused)
> +{
> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
> +
> +    if (dc->pre_reset) {
> +        dc->pre_reset(dev);
> +    }
> +}
> +
> +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void
> *unused)
> +{
> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
> +
> +    if (dc->post_reset) {
> +        dc->post_reset(dev);
> +    }
> +}
> +
>  /*
>   * This function is called on #RST and FLR.
>   * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> index 40c97b1..ddb76ab 100644
> --- a/hw/pci/pci_bridge.c
> +++ b/hw/pci/pci_bridge.c
> @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
>  
>      newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
>      if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
> +        /*
> +         * Notify all vfio-pci devices under the bus
> +         * should do physical bus reset.
> +         */
> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> +                            pci_device_pre_reset, NULL);
>          /* Trigger hot reset on 0->1 transition. */
>          qbus_reset_all(&s->sec_bus.qbus);
> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> +                            pci_device_post_reset, NULL);
>      }
>  }
>  
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index e17dc89..df32618 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -39,6 +39,7 @@
>  
>  static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>  static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool
> enabled);
> +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
>  
>  /*
>   * Disabling BAR mmaping can be slow, but toggling it around INTx
> can
> @@ -1888,6 +1889,8 @@ static int
> vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
>      /* List all affected devices by bus reset */
>      devices = &info->devices[0];
>  
> +    vdev->single_depend_dev = (info->count == 1);
> +
>      /* Verify that we have all the groups required */
>      for (i = 0; i < info->count; i++) {
>          PCIHostDeviceAddress host;
> @@ -2029,10 +2032,26 @@ static int
> vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
>      return vfio_check_host_bus_reset(vdev);
>  }
>  
> +static void vfio_aer_pre_reset(PCIDevice *pdev)
> +{
> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +
> +    vdev->aer_reset = true;
> +    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +}

Doesn't this lead to multiple host bus resets per guest bus reset in
many cases?  It looks like we'll do it once per vfio-pci device, even
if those devices are on the same host bus.  That's a 1 second operation
per device.  Can we avoid that?  Maybe some sort of sequence ID could
help a device figure out whether it's already been reset as part of a
dependent device for this particular guest bus reset.  Thanks,

Alex

> +
> +static void vfio_aer_post_reset(PCIDevice *pdev)
> +{
> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +
> +    vdev->aer_reset = false;
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
>      PCIDevice *pdev = &vdev->pdev;
> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(pdev);
>      PCIDevice *dev_iter;
>      uint8_t type;
>      uint32_t errcap;
> @@ -2079,6 +2098,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev,
> uint8_t cap_ver,
>      vdev->hotplug_notifier.notify = vfio_check_bus_reset;
>      pci_bus_add_hotplug_notifier(pdev->bus, &vdev-
> >hotplug_notifier);
>  
> +    dc->pre_reset = vfio_aer_pre_reset;
> +    dc->post_reset = vfio_aer_post_reset;
> +
>      pcie_cap_deverr_init(pdev);
>      return pcie_aer_init(pdev, pos, size);
>  
> @@ -2978,6 +3000,10 @@ static void vfio_pci_reset(DeviceState *dev)
>  
>      trace_vfio_pci_reset(vdev->vbasedev.name);
>  
> +    if (vdev->aer_reset) {
> +        return;
> +    }
> +
>      vfio_pci_pre_reset(vdev);
>  
>      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index b385f07..5470b97 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +    bool aer_reset;
> +    bool single_depend_dev;
>  
>      NotifierWithReturn hotplug_notifier;
>  } VFIOPCIDevice;
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 379b6e1..6b1f2d4 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -105,6 +105,9 @@ typedef void PCIMapIORegionFunc(PCIDevice
> *pci_dev, int region_num,
>                                  pcibus_t addr, pcibus_t size, int
> type);
>  typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
>  
> +typedef void PCIPreResetFunc(PCIDevice *pci_dev);
> +typedef void PCIPostResetFunc(PCIDevice *pci_dev);
> +
>  typedef struct PCIIORegion {
>      pcibus_t addr; /* current PCI mapping address. -1 means not
> mapped */
>  #define PCI_BAR_UNMAPPED (~(pcibus_t)0)
> @@ -193,6 +196,8 @@ typedef struct PCIDeviceClass {
>      PCIUnregisterFunc *exit;
>      PCIConfigReadFunc *config_read;
>      PCIConfigWriteFunc *config_write;
> +    PCIPreResetFunc *pre_reset;
> +    PCIPostResetFunc *post_reset;
>  
>      uint16_t vendor_id;
>      uint16_t device_id;
> @@ -380,6 +385,8 @@ bool pci_intx_route_changed(PCIINTxRoute *old,
> PCIINTxRoute *new);
>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>                                            PCIINTxRoutingNotifier
> notifier);
> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *d, void *opaque);
> +void pci_device_post_reset(PCIBus *bus, PCIDevice *d, void *opaque);
>  void pci_device_reset(PCIDevice *dev);
>  
>  PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 09/13] add check reset mechanism when hotplug vfio device
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 09/13] add check reset mechanism when hotplug vfio device Cao jin
@ 2015-12-17 20:32   ` Alex Williamson
  0 siblings, 0 replies; 34+ messages in thread
From: Alex Williamson @ 2015-12-17 20:32 UTC (permalink / raw)
  To: Cao jin, qemu-devel; +Cc: chen.fan.fnst, mst

On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Since we support multi-function hotplug. the function 0 indicate
> the closure of the slot, so we have the chance to do the check.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/pci/pci.c             | 29 +++++++++++++++++++++++++++++
>  hw/vfio/pci.c            | 19 +++++++++++++++++++
>  hw/vfio/pci.h            |  2 ++
>  include/hw/pci/pci_bus.h |  5 +++++
>  4 files changed, 55 insertions(+)


MST, can I get a review/ack for this one?  Thanks,

Alex


> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 168b9cc..f6ca6ef 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -81,6 +81,7 @@ static void pci_bus_realize(BusState *qbus, Error
> **errp)
>      PCIBus *bus = PCI_BUS(qbus);
>  
>      vmstate_register(NULL, -1, &vmstate_pcibus, bus);
> +    notifier_with_return_list_init(&bus->hotplug_notifiers);
>  }
>  
>  static void pci_bus_unrealize(BusState *qbus, Error **errp)
> @@ -1835,6 +1836,22 @@ PCIDevice *pci_find_device(PCIBus *bus, int
> bus_num, uint8_t devfn)
>      return bus->devices[devfn];
>  }
>  
> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn
> *notify)
> +{
> +    notifier_with_return_list_add(&bus->hotplug_notifiers, notify);
> +}
> +
> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notifier)
> +{
> +    notifier_with_return_remove(notifier);
> +}
> +
> +static int pci_bus_hotplug_notifier(PCIBus *bus, void *opaque)
> +{
> +    return notifier_with_return_list_notify(&bus->hotplug_notifiers,
> +                                            opaque);
> +}
> +
>  static void pci_qdev_realize(DeviceState *qdev, Error **errp)
>  {
>      PCIDevice *pci_dev = (PCIDevice *)qdev;
> @@ -1877,6 +1894,18 @@ static void pci_qdev_realize(DeviceState
> *qdev, Error **errp)
>          pci_qdev_unrealize(DEVICE(pci_dev), NULL);
>          return;
>      }
> +
> +    /*
> +     *  If the function is func 0, indicate the closure of the slot.
> +     *  signal the callback.
> +     */
> +    if (DEVICE(pci_dev)->hotplugged &&
> +        pci_get_function_0(pci_dev) == pci_dev &&
> +        pci_bus_hotplug_notifier(bus, pci_dev)) {
> +        error_setg(errp, "failed to hotplug function 0");
> +        pci_qdev_unrealize(DEVICE(pci_dev), NULL);
> +        return;
> +    }
>  }
>  
>  static void pci_default_realize(PCIDevice *dev, Error **errp)
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 6926dcc..e17dc89 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2016,6 +2016,19 @@ static int
> vfio_check_devices_host_bus_reset(void)
>      return 0;
>  }
>  
> +static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
> +{
> +    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice,
> hotplug_notifier);
> +    PCIDevice *pci_dev = PCI_DEVICE(vdev);
> +    PCIDevice *pci_func0 = opaque;
> +
> +    if (pci_get_function_0(pci_dev) != pci_func0) {
> +        return 0;
> +    }
> +
> +    return vfio_check_host_bus_reset(vdev);
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
> @@ -2063,6 +2076,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev,
> uint8_t cap_ver,
>          pdev->exp.aer_log.log_max = 0;
>      }
>  
> +    vdev->hotplug_notifier.notify = vfio_check_bus_reset;
> +    pci_bus_add_hotplug_notifier(pdev->bus, &vdev-
> >hotplug_notifier);
> +
>      pcie_cap_deverr_init(pdev);
>      return pcie_aer_init(pdev, pos, size);
>  
> @@ -2944,6 +2960,9 @@ static void vfio_exitfn(PCIDevice *pdev)
>      vfio_unregister_req_notifier(vdev);
>      vfio_unregister_err_notifier(vdev);
>      pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
> +    if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
> +        pci_bus_remove_hotplug_notifier(&vdev->hotplug_notifier);
> +    }
>      vfio_disable_interrupts(vdev);
>      if (vdev->intx.mmap_timer) {
>          timer_free(vdev->intx.mmap_timer);
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 59ae194..b385f07 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +
> +    NotifierWithReturn hotplug_notifier;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int
> len);
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 403fec6..7812fa9 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -39,8 +39,13 @@ struct PCIBus {
>         Keep a count of the number of devices with raised IRQs.  */
>      int nirq;
>      int *irq_count;
> +
> +    NotifierWithReturnList hotplug_notifiers;
>  };
>  
> +void pci_bus_add_hotplug_notifier(PCIBus *bus, NotifierWithReturn
> *notify);
> +void pci_bus_remove_hotplug_notifier(NotifierWithReturn *notify);
> +
>  typedef struct PCIBridgeWindows PCIBridgeWindows;
>  
>  /*

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 08/13] vfio: add check host bus reset is support or not
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 08/13] vfio: add check host bus reset is support or not Cao jin
@ 2015-12-17 20:32   ` Alex Williamson
  2015-12-18  1:14     ` Chen Fan
  2015-12-24 14:32   ` Michael S. Tsirkin
  1 sibling, 1 reply; 34+ messages in thread
From: Alex Williamson @ 2015-12-17 20:32 UTC (permalink / raw)
  To: Cao jin, qemu-devel; +Cc: chen.fan.fnst, mst

On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> when init vfio devices done, we should test all the devices supported
> aer whether conflict with others. For each one, get the hot reset
> info for the affected device list.  For each affected device, all
> should attach to the VM and on/below the same bus. also, we should test
> all of the non-AER supporting vfio-pci devices on or below the target
> bus to verify they have a reset mechanism.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  hw/vfio/pci.h |   1 +
>  2 files changed, 230 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index d00b0e4..6926dcc 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
> +                                     PCIHostDeviceAddress *host2)
> +{
> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> +            host1->slot == host2->slot);
> +}
> +
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> +                                PCIHostDeviceAddress *host2)
> +{
> +    return (vfio_pci_host_slot_match(host1, host2) &&
> +            host1->function == host2->function);
> +}
> +
> +struct VFIODeviceFind {
> +    PCIDevice *pdev;
> +    bool found;
> +};
> +
> +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
> +                                      void *opaque)
> +{
> +    DeviceState *dev = DEVICE(pdev);
> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> +    VFIOPCIDevice *vdev;
> +    struct VFIODeviceFind *find = opaque;
> +
> +    if (find->found) {
> +        return;
> +    }
> +
> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +        if (!dc->reset) {
> +            goto found;
> +        }
> +        return;
> +    }
> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +        !vdev->vbasedev.reset_works) {
> +        goto found;
> +    }
> +
> +    return;
> +found:
> +    find->pdev = pdev;
> +    find->found = true;
> +}
> +
> +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque)
> +{
> +    struct VFIODeviceFind *find = opaque;
> +
> +    if (find->found) {
> +        return;
> +    }
> +
> +    if (pdev == find->pdev) {
> +        find->found = true;
> +    }
> +}
> +
> +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> +{
> +    PCIBus *bus = vdev->pdev.bus;
> +    struct vfio_pci_hot_reset_info *info = NULL;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +    struct VFIODeviceFind find;
> +    int ret, i;
> +
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret) {
> +        error_report("vfio: Cannot enable AER for device %s,"
> +                     " device does not support hot reset.",
> +                     vdev->vbasedev.name);
> +        goto out;
> +    }
> +
> +    /* List all affected devices by bus reset */
> +    devices = &info->devices[0];
> +
> +    /* Verify that we have all the groups required */
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +        VFIOPCIDevice *tmp;
> +        VFIODevice *vbasedev_iter;
> +        bool found = false;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        /* Skip the current device */
> +        if (vfio_pci_host_match(&host, &vdev->host)) {
> +            continue;
> +        }
> +
> +        /* Ensure we own the group of the affected device */
> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> +            if (group->groupid == devices[i].group_id) {
> +                break;
> +            }
> +        }
> +
> +        if (!group) {
> +            error_report("vfio: Cannot enable AER for device %s, "
> +                         "depends on group %d which is not owned.",
> +                         vdev->vbasedev.name, devices[i].group_id);
> +            ret = -1;
> +            goto out;
> +        }
> +
> +        /* Ensure affected devices for reset on/blow the bus */
> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> +            if (vfio_pci_host_match(&host, &tmp->host)) {
> +                PCIDevice *pci = PCI_DEVICE(tmp);
> +
> +                /*
> +                 * For multifunction device, due to vfio driver signal all
> +                 * functions under the upstream link of the end point. here
> +                 * we validate all functions whether enable AER.
> +                 */
> +                if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> +                    error_report("vfio: Cannot enable AER for device %s, on same slot"
> +                                 " the dependent device %s which does not enable AER.",
> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
> +                    ret = -1;
> +                    goto out;
> +                }

It took me a while to understand this code block, so I've updated the
comment to read as follows:

                /*
                 * AER errors may be broadcast to all functions of a multi-
                 * function endpoint.  If any of those sibling functions are
                 * also assigned, they need to have AER enabled or else an
                 * error may continue to cause a vm_stop condition.  IOW,
                 * AER setup of this function would be pointless.
                 */

Does that match your intention?

> +
> +                find.pdev = pci;
> +                find.found = false;
> +                pci_for_each_device(bus, pci_bus_num(bus),
> +                                    device_find, &find);
> +                if (!find.found) {
> +                    error_report("vfio: Cannot enable AER for device %s, "
> +                                 "the dependent device %s is not under the same bus",
> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
> +                    ret = -1;
> +                    goto out;
> +                }
> +                found = true;
> +                break;
> +            }
> +        }
> +
> +        /* Ensure all affected devices assigned to VM */
> +        if (!found) {
> +            error_report("vfio: Cannot enable AER for device %s, "
> +                         "the dependent device %04x:%02x:%02x.%x "
> +                         "is not assigned to VM.",
> +                         vdev->vbasedev.name, host.domain, host.bus,
> +                         host.slot, host.function);
> +            ret = -1;
> +            goto out;
> +        }
> +    }
> +
> +    /*
> +     * Check the all pci devices on or below the target bus
> +     * have a reset mechanism at least.
> +     */
> +    find.pdev = NULL;
> +    find.found = false;
> +    pci_for_each_device(bus, pci_bus_num(bus),
> +                        vfio_check_device_noreset, &find);
> +    if (find.found) {
> +        error_report("vfio: Cannot enable AER for device %s, "
> +                     "the affected device %s does not have a reset mechanism.",
> +                     vdev->vbasedev.name, find.pdev->name);
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    ret = 0;
> +out:
> +    g_free(info);
> +    return ret;
> +}
> +
> +static int vfio_check_devices_host_bus_reset(void)
> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +    VFIOPCIDevice *vdev;
> +
> +    /* Check All vfio-pci devices if have bus reset capability */
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +                vfio_check_host_bus_reset(vdev)) {
> +                return -1;
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
> @@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>      vfio_intx_enable(vdev);
>  }
>  
> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> -                                PCIHostDeviceAddress *host2)
> -{
> -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> -            host1->slot == host2->slot && host1->function == host2->function);
> -}
> -
>  static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>  {
>      VFIOGroup *group;
> @@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
> +{
> +    int ret;
> +
> +    ret = vfio_check_devices_host_bus_reset();
> +    if (ret) {
> +        exit(1);
> +    }
> +}
> +
> +static Notifier machine_notifier = {
> +    .notify = vfio_pci_machine_done_notify,
> +};
> +
>  static int vfio_initfn(PCIDevice *pdev)
>  {
>      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> @@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = {
>  static void register_vfio_pci_dev_type(void)
>  {
>      type_register_static(&vfio_pci_dev_info);
> +    /*
> +     * Register notifier when machine init is done, since we need
> +     * check the configration manner after all vfio device are inited.
> +     */
> +    qemu_add_machine_init_done_notifier(&machine_notifier);
>  }
>  
>  type_init(register_vfio_pci_dev_type)
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 48c1f69..59ae194 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -15,6 +15,7 @@
>  #include "qemu-common.h"
>  #include "exec/memory.h"
>  #include "hw/pci/pci.h"
> +#include "hw/pci/pci_bus.h"
>  #include "hw/pci/pci_bridge.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "qemu/event_notifier.h"

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 08/13] vfio: add check host bus reset is support or not
  2015-12-17 20:32   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
@ 2015-12-18  1:14     ` Chen Fan
  0 siblings, 0 replies; 34+ messages in thread
From: Chen Fan @ 2015-12-18  1:14 UTC (permalink / raw)
  To: Alex Williamson, Cao jin, qemu-devel; +Cc: mst


On 12/18/2015 04:32 AM, Alex Williamson wrote:
> On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> when init vfio devices done, we should test all the devices supported
>> aer whether conflict with others. For each one, get the hot reset
>> info for the affected device list.  For each affected device, all
>> should attach to the VM and on/below the same bus. also, we should test
>> all of the non-AER supporting vfio-pci devices on or below the target
>> bus to verify they have a reset mechanism.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>>   hw/vfio/pci.h |   1 +
>>   2 files changed, 230 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index d00b0e4..6926dcc 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>>       return 0;
>>   }
>>   
>> +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
>> +                                     PCIHostDeviceAddress *host2)
>> +{
>> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
>> +            host1->slot == host2->slot);
>> +}
>> +
>> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
>> +                                PCIHostDeviceAddress *host2)
>> +{
>> +    return (vfio_pci_host_slot_match(host1, host2) &&
>> +            host1->function == host2->function);
>> +}
>> +
>> +struct VFIODeviceFind {
>> +    PCIDevice *pdev;
>> +    bool found;
>> +};
>> +
>> +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
>> +                                      void *opaque)
>> +{
>> +    DeviceState *dev = DEVICE(pdev);
>> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
>> +    VFIOPCIDevice *vdev;
>> +    struct VFIODeviceFind *find = opaque;
>> +
>> +    if (find->found) {
>> +        return;
>> +    }
>> +
>> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> +        if (!dc->reset) {
>> +            goto found;
>> +        }
>> +        return;
>> +    }
>> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
>> +        !vdev->vbasedev.reset_works) {
>> +        goto found;
>> +    }
>> +
>> +    return;
>> +found:
>> +    find->pdev = pdev;
>> +    find->found = true;
>> +}
>> +
>> +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque)
>> +{
>> +    struct VFIODeviceFind *find = opaque;
>> +
>> +    if (find->found) {
>> +        return;
>> +    }
>> +
>> +    if (pdev == find->pdev) {
>> +        find->found = true;
>> +    }
>> +}
>> +
>> +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
>> +{
>> +    PCIBus *bus = vdev->pdev.bus;
>> +    struct vfio_pci_hot_reset_info *info = NULL;
>> +    struct vfio_pci_dependent_device *devices;
>> +    VFIOGroup *group;
>> +    struct VFIODeviceFind find;
>> +    int ret, i;
>> +
>> +    ret = vfio_get_hot_reset_info(vdev, &info);
>> +    if (ret) {
>> +        error_report("vfio: Cannot enable AER for device %s,"
>> +                     " device does not support hot reset.",
>> +                     vdev->vbasedev.name);
>> +        goto out;
>> +    }
>> +
>> +    /* List all affected devices by bus reset */
>> +    devices = &info->devices[0];
>> +
>> +    /* Verify that we have all the groups required */
>> +    for (i = 0; i < info->count; i++) {
>> +        PCIHostDeviceAddress host;
>> +        VFIOPCIDevice *tmp;
>> +        VFIODevice *vbasedev_iter;
>> +        bool found = false;
>> +
>> +        host.domain = devices[i].segment;
>> +        host.bus = devices[i].bus;
>> +        host.slot = PCI_SLOT(devices[i].devfn);
>> +        host.function = PCI_FUNC(devices[i].devfn);
>> +
>> +        /* Skip the current device */
>> +        if (vfio_pci_host_match(&host, &vdev->host)) {
>> +            continue;
>> +        }
>> +
>> +        /* Ensure we own the group of the affected device */
>> +        QLIST_FOREACH(group, &vfio_group_list, next) {
>> +            if (group->groupid == devices[i].group_id) {
>> +                break;
>> +            }
>> +        }
>> +
>> +        if (!group) {
>> +            error_report("vfio: Cannot enable AER for device %s, "
>> +                         "depends on group %d which is not owned.",
>> +                         vdev->vbasedev.name, devices[i].group_id);
>> +            ret = -1;
>> +            goto out;
>> +        }
>> +
>> +        /* Ensure affected devices for reset on/blow the bus */
>> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
>> +                continue;
>> +            }
>> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
>> +            if (vfio_pci_host_match(&host, &tmp->host)) {
>> +                PCIDevice *pci = PCI_DEVICE(tmp);
>> +
>> +                /*
>> +                 * For multifunction device, due to vfio driver signal all
>> +                 * functions under the upstream link of the end point. here
>> +                 * we validate all functions whether enable AER.
>> +                 */
>> +                if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
>> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
>> +                    error_report("vfio: Cannot enable AER for device %s, on same slot"
>> +                                 " the dependent device %s which does not enable AER.",
>> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
>> +                    ret = -1;
>> +                    goto out;
>> +                }
> It took me a while to understand this code block, so I've updated the
> comment to read as follows:
>
>                  /*
>                   * AER errors may be broadcast to all functions of a multi-
>                   * function endpoint.  If any of those sibling functions are
>                   * also assigned, they need to have AER enabled or else an
>                   * error may continue to cause a vm_stop condition.  IOW,
>                   * AER setup of this function would be pointless.
>                   */
>
> Does that match your intention?
Yes, this is the code's mean. your explanation is much better, I will 
update the common in next version.

Thanks,
Chen

>
>> +
>> +                find.pdev = pci;
>> +                find.found = false;
>> +                pci_for_each_device(bus, pci_bus_num(bus),
>> +                                    device_find, &find);
>> +                if (!find.found) {
>> +                    error_report("vfio: Cannot enable AER for device %s, "
>> +                                 "the dependent device %s is not under the same bus",
>> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
>> +                    ret = -1;
>> +                    goto out;
>> +                }
>> +                found = true;
>> +                break;
>> +            }
>> +        }
>> +
>> +        /* Ensure all affected devices assigned to VM */
>> +        if (!found) {
>> +            error_report("vfio: Cannot enable AER for device %s, "
>> +                         "the dependent device %04x:%02x:%02x.%x "
>> +                         "is not assigned to VM.",
>> +                         vdev->vbasedev.name, host.domain, host.bus,
>> +                         host.slot, host.function);
>> +            ret = -1;
>> +            goto out;
>> +        }
>> +    }
>> +
>> +    /*
>> +     * Check the all pci devices on or below the target bus
>> +     * have a reset mechanism at least.
>> +     */
>> +    find.pdev = NULL;
>> +    find.found = false;
>> +    pci_for_each_device(bus, pci_bus_num(bus),
>> +                        vfio_check_device_noreset, &find);
>> +    if (find.found) {
>> +        error_report("vfio: Cannot enable AER for device %s, "
>> +                     "the affected device %s does not have a reset mechanism.",
>> +                     vdev->vbasedev.name, find.pdev->name);
>> +        ret = -1;
>> +        goto out;
>> +    }
>> +
>> +    ret = 0;
>> +out:
>> +    g_free(info);
>> +    return ret;
>> +}
>> +
>> +static int vfio_check_devices_host_bus_reset(void)
>> +{
>> +    VFIOGroup *group;
>> +    VFIODevice *vbasedev;
>> +    VFIOPCIDevice *vdev;
>> +
>> +    /* Check All vfio-pci devices if have bus reset capability */
>> +    QLIST_FOREACH(group, &vfio_group_list, next) {
>> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
>> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
>> +                continue;
>> +            }
>> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>> +            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
>> +                vfio_check_host_bus_reset(vdev)) {
>> +                return -1;
>> +            }
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>                             int pos, uint16_t size)
>>   {
>> @@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>>       vfio_intx_enable(vdev);
>>   }
>>   
>> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
>> -                                PCIHostDeviceAddress *host2)
>> -{
>> -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
>> -            host1->slot == host2->slot && host1->function == host2->function);
>> -}
>> -
>>   static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>>   {
>>       VFIOGroup *group;
>> @@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>>       vdev->req_enabled = false;
>>   }
>>   
>> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
>> +{
>> +    int ret;
>> +
>> +    ret = vfio_check_devices_host_bus_reset();
>> +    if (ret) {
>> +        exit(1);
>> +    }
>> +}
>> +
>> +static Notifier machine_notifier = {
>> +    .notify = vfio_pci_machine_done_notify,
>> +};
>> +
>>   static int vfio_initfn(PCIDevice *pdev)
>>   {
>>       VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> @@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = {
>>   static void register_vfio_pci_dev_type(void)
>>   {
>>       type_register_static(&vfio_pci_dev_info);
>> +    /*
>> +     * Register notifier when machine init is done, since we need
>> +     * check the configration manner after all vfio device are inited.
>> +     */
>> +    qemu_add_machine_init_done_notifier(&machine_notifier);
>>   }
>>   
>>   type_init(register_vfio_pci_dev_type)
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index 48c1f69..59ae194 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -15,6 +15,7 @@
>>   #include "qemu-common.h"
>>   #include "exec/memory.h"
>>   #include "hw/pci/pci.h"
>> +#include "hw/pci/pci_bus.h"
>>   #include "hw/pci/pci_bridge.h"
>>   #include "hw/vfio/vfio-common.h"
>>   #include "qemu/event_notifier.h"
>
>
> .
>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-12-17 20:31   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
@ 2015-12-18  3:29     ` Chen Fan
  2015-12-21 21:07       ` Alex Williamson
  0 siblings, 1 reply; 34+ messages in thread
From: Chen Fan @ 2015-12-18  3:29 UTC (permalink / raw)
  To: Alex Williamson, Cao jin, qemu-devel; +Cc: mst


On 12/18/2015 04:31 AM, Alex Williamson wrote:
> On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Particularly, For vfio devices, Once need to recovery devices
>> by bus reset such as AER, we always need to reset the host bus
>> to recovery the devices under the bus, so we need to add pci device
>> callbacks to specify to do host bus reset.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>> ---
>>   hw/pci/pci.c         | 18 ++++++++++++++++++
>>   hw/pci/pci_bridge.c  |  9 +++++++++
>>   hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
>>   hw/vfio/pci.h        |  2 ++
>>   include/hw/pci/pci.h |  7 +++++++
>>   5 files changed, 62 insertions(+)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index f6ca6ef..64fa2cc 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice *dev)
>>       msix_reset(dev);
>>   }
>>   
>> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void *unused)
>> +{
>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
>> +
>> +    if (dc->pre_reset) {
>> +        dc->pre_reset(dev);
>> +    }
>> +}
>> +
>> +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void
>> *unused)
>> +{
>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
>> +
>> +    if (dc->post_reset) {
>> +        dc->post_reset(dev);
>> +    }
>> +}
>> +
>>   /*
>>    * This function is called on #RST and FLR.
>>    * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>> index 40c97b1..ddb76ab 100644
>> --- a/hw/pci/pci_bridge.c
>> +++ b/hw/pci/pci_bridge.c
>> @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
>>   
>>       newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
>>       if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
>> +        /*
>> +         * Notify all vfio-pci devices under the bus
>> +         * should do physical bus reset.
>> +         */
>> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
>> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>> +                            pci_device_pre_reset, NULL);
>>           /* Trigger hot reset on 0->1 transition. */
>>           qbus_reset_all(&s->sec_bus.qbus);
>> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>> +                            pci_device_post_reset, NULL);
>>       }
>>   }
>>   
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index e17dc89..df32618 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -39,6 +39,7 @@
>>   
>>   static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>>   static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool
>> enabled);
>> +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
>>   
>>   /*
>>    * Disabling BAR mmaping can be slow, but toggling it around INTx
>> can
>> @@ -1888,6 +1889,8 @@ static int
>> vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
>>       /* List all affected devices by bus reset */
>>       devices = &info->devices[0];
>>   
>> +    vdev->single_depend_dev = (info->count == 1);
>> +
>>       /* Verify that we have all the groups required */
>>       for (i = 0; i < info->count; i++) {
>>           PCIHostDeviceAddress host;
>> @@ -2029,10 +2032,26 @@ static int
>> vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
>>       return vfio_check_host_bus_reset(vdev);
>>   }
>>   
>> +static void vfio_aer_pre_reset(PCIDevice *pdev)
>> +{
>> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> +
>> +    vdev->aer_reset = true;
>> +    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>> +}
> Doesn't this lead to multiple host bus resets per guest bus reset in
> many cases?  It looks like we'll do it once per vfio-pci device, even
> if those devices are on the same host bus.  That's a 1 second operation
> per device.  Can we avoid that?  Maybe some sort of sequence ID could
> help a device figure out whether it's already been reset as part of a
> dependent device for this particular guest bus reset.  Thanks,
That's right, I missed this case, but I don't understand the scenario how to
use a sequence ID to mark the device if been reset. can you detail it ?
additional, there was a mechanism to compute device whether need to be reset
by hot reset. so I simply modify the code as the following:

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a9bc67e..42774ca 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2063,13 +2063,19 @@ static void vfio_aer_pre_reset(PCIDevice *pdev)
      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);

      vdev->aer_reset = true;
-    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+    vdev->vbasedev.needs_reset = true;
  }

  static void vfio_aer_post_reset(PCIDevice *pdev)
  {
      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);

+    if (!vdev->single_depend_dev && vdev->vbasedev.needs_reset) {
+        vfio_pci_hot_reset(vdev, false);
+    } else {
+        vfio_pci_hot_reset(vdev, true);
+    }
+
      vdev->aer_reset = false;
  }

what do you think of this ?

Thanks,
Chen

>
> Alex
>
>> +
>> +static void vfio_aer_post_reset(PCIDevice *pdev)
>> +{
>> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> +
>> +    vdev->aer_reset = false;
>> +}
>> +
>>   static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>                             int pos, uint16_t size)
>>   {
>>       PCIDevice *pdev = &vdev->pdev;
>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(pdev);
>>       PCIDevice *dev_iter;
>>       uint8_t type;
>>       uint32_t errcap;
>> @@ -2079,6 +2098,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev,
>> uint8_t cap_ver,
>>       vdev->hotplug_notifier.notify = vfio_check_bus_reset;
>>       pci_bus_add_hotplug_notifier(pdev->bus, &vdev-
>>> hotplug_notifier);
>>   
>> +    dc->pre_reset = vfio_aer_pre_reset;
>> +    dc->post_reset = vfio_aer_post_reset;
>> +
>>       pcie_cap_deverr_init(pdev);
>>       return pcie_aer_init(pdev, pos, size);
>>   
>> @@ -2978,6 +3000,10 @@ static void vfio_pci_reset(DeviceState *dev)
>>   
>>       trace_vfio_pci_reset(vdev->vbasedev.name);
>>   
>> +    if (vdev->aer_reset) {
>> +        return;
>> +    }
>> +
>>       vfio_pci_pre_reset(vdev);
>>   
>>       if (vdev->resetfn && !vdev->resetfn(vdev)) {
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index b385f07..5470b97 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
>>       bool no_kvm_intx;
>>       bool no_kvm_msi;
>>       bool no_kvm_msix;
>> +    bool aer_reset;
>> +    bool single_depend_dev;
>>   
>>       NotifierWithReturn hotplug_notifier;
>>   } VFIOPCIDevice;
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index 379b6e1..6b1f2d4 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -105,6 +105,9 @@ typedef void PCIMapIORegionFunc(PCIDevice
>> *pci_dev, int region_num,
>>                                   pcibus_t addr, pcibus_t size, int
>> type);
>>   typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
>>   
>> +typedef void PCIPreResetFunc(PCIDevice *pci_dev);
>> +typedef void PCIPostResetFunc(PCIDevice *pci_dev);
>> +
>>   typedef struct PCIIORegion {
>>       pcibus_t addr; /* current PCI mapping address. -1 means not
>> mapped */
>>   #define PCI_BAR_UNMAPPED (~(pcibus_t)0)
>> @@ -193,6 +196,8 @@ typedef struct PCIDeviceClass {
>>       PCIUnregisterFunc *exit;
>>       PCIConfigReadFunc *config_read;
>>       PCIConfigWriteFunc *config_write;
>> +    PCIPreResetFunc *pre_reset;
>> +    PCIPostResetFunc *post_reset;
>>   
>>       uint16_t vendor_id;
>>       uint16_t device_id;
>> @@ -380,6 +385,8 @@ bool pci_intx_route_changed(PCIINTxRoute *old,
>> PCIINTxRoute *new);
>>   void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>>   void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>                                             PCIINTxRoutingNotifier
>> notifier);
>> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *d, void *opaque);
>> +void pci_device_post_reset(PCIBus *bus, PCIDevice *d, void *opaque);
>>   void pci_device_reset(PCIDevice *dev);
>>   
>>   PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
>
>
> .
>

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-12-18  3:29     ` Chen Fan
@ 2015-12-21 21:07       ` Alex Williamson
  2015-12-22  7:18         ` Chen Fan
  2015-12-24  5:10         ` Chen Fan
  0 siblings, 2 replies; 34+ messages in thread
From: Alex Williamson @ 2015-12-21 21:07 UTC (permalink / raw)
  To: Chen Fan, Cao jin, qemu-devel; +Cc: mst

On Fri, 2015-12-18 at 11:29 +0800, Chen Fan wrote:
> On 12/18/2015 04:31 AM, Alex Williamson wrote:
> > On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
> > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > 
> > > Particularly, For vfio devices, Once need to recovery devices
> > > by bus reset such as AER, we always need to reset the host bus
> > > to recovery the devices under the bus, so we need to add pci
> > > device
> > > callbacks to specify to do host bus reset.
> > > 
> > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> > > ---
> > >   hw/pci/pci.c         | 18 ++++++++++++++++++
> > >   hw/pci/pci_bridge.c  |  9 +++++++++
> > >   hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
> > >   hw/vfio/pci.h        |  2 ++
> > >   include/hw/pci/pci.h |  7 +++++++
> > >   5 files changed, 62 insertions(+)
> > > 
> > > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > > index f6ca6ef..64fa2cc 100644
> > > --- a/hw/pci/pci.c
> > > +++ b/hw/pci/pci.c
> > > @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice
> > > *dev)
> > >       msix_reset(dev);
> > >   }
> > >   
> > > +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void
> > > *unused)
> > > +{
> > > +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
> > > +
> > > +    if (dc->pre_reset) {
> > > +        dc->pre_reset(dev);
> > > +    }
> > > +}
> > > +
> > > +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void
> > > *unused)
> > > +{
> > > +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
> > > +
> > > +    if (dc->post_reset) {
> > > +        dc->post_reset(dev);
> > > +    }
> > > +}
> > > +
> > >   /*
> > >    * This function is called on #RST and FLR.
> > >    * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
> > > diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> > > index 40c97b1..ddb76ab 100644
> > > --- a/hw/pci/pci_bridge.c
> > > +++ b/hw/pci/pci_bridge.c
> > > @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
> > >   
> > >       newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
> > >       if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
> > > +        /*
> > > +         * Notify all vfio-pci devices under the bus
> > > +         * should do physical bus reset.
> > > +         */
> > > +        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
> > > +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> > > +                            pci_device_pre_reset, NULL);
> > >           /* Trigger hot reset on 0->1 transition. */
> > >           qbus_reset_all(&s->sec_bus.qbus);
> > > +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> > > +                            pci_device_post_reset, NULL);
> > >       }
> > >   }
> > >   
> > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > index e17dc89..df32618 100644
> > > --- a/hw/vfio/pci.c
> > > +++ b/hw/vfio/pci.c
> > > @@ -39,6 +39,7 @@
> > >   
> > >   static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
> > >   static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool
> > > enabled);
> > > +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
> > >   
> > >   /*
> > >    * Disabling BAR mmaping can be slow, but toggling it around
> > > INTx
> > > can
> > > @@ -1888,6 +1889,8 @@ static int
> > > vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > >       /* List all affected devices by bus reset */
> > >       devices = &info->devices[0];
> > >   
> > > +    vdev->single_depend_dev = (info->count == 1);
> > > +
> > >       /* Verify that we have all the groups required */
> > >       for (i = 0; i < info->count; i++) {
> > >           PCIHostDeviceAddress host;
> > > @@ -2029,10 +2032,26 @@ static int
> > > vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
> > >       return vfio_check_host_bus_reset(vdev);
> > >   }
> > >   
> > > +static void vfio_aer_pre_reset(PCIDevice *pdev)
> > > +{
> > > +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > +
> > > +    vdev->aer_reset = true;
> > > +    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> > > +}
> > Doesn't this lead to multiple host bus resets per guest bus reset
> > in
> > many cases?  It looks like we'll do it once per vfio-pci device,
> > even
> > if those devices are on the same host bus.  That's a 1 second
> > operation
> > per device.  Can we avoid that?  Maybe some sort of sequence ID
> > could
> > help a device figure out whether it's already been reset as part of
> > a
> > dependent device for this particular guest bus reset.  Thanks,
> That's right, I missed this case, but I don't understand the scenario
> how to
> use a sequence ID to mark the device if been reset. can you detail it
> ?

I don't really have a concrete idea for a sequence ID, it was just a
thought that maybe if each bus reset had a sequence ID then devices
could know whether they've already been reset for that sequence ID.
 The basic problem we have is that reset callbacks are per device and
it's difficult to infer which individual resets are part of that bus
reset.  In fact, do we propagate resets correctly down secondary
bridges?  We're triggering off a VM write of the bridge control bus
reset bit triggering from 0->1 and we then call qbus_reset_all() on
that qbus, which I think is just going to call pci_bridge_reset() for
any other bridges, which doesn't do anything about resetting deeper
subordinate buses.  I think that means that if we had a root port with
a switch below it and endpoints below that, if the VM triggered a
secondary bus reset at the root port, the endpoints would never see it,
which is not how real hardware works.

> additional, there was a mechanism to compute device whether need to
> be reset
> by hot reset. so I simply modify the code as the following:
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index a9bc67e..42774ca 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2063,13 +2063,19 @@ static void vfio_aer_pre_reset(PCIDevice
> *pdev)
>       VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> 
>       vdev->aer_reset = true;
> -    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +    vdev->vbasedev.needs_reset = true;
>   }
> 
>   static void vfio_aer_post_reset(PCIDevice *pdev)
>   {
>       VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> 
> +    if (!vdev->single_depend_dev && vdev->vbasedev.needs_reset) {
> +        vfio_pci_hot_reset(vdev, false);
> +    } else {
> +        vfio_pci_hot_reset(vdev, true);
> +    }
> +
>       vdev->aer_reset = false;
>   }
> 
> what do you think of this ?

I think it might be a bigger problem than that subtle change.  I wonder
if we really need a better model of the reset line through the
subordinate buses.  When reset is asserted, we'd set a bus_in_reset
flag on the bus and trigger downstream bridges to do the same.  Then
when the user de-asserts reset, we'd call qbus_reset_all() and
propagate it through to downstream buses.  That way the per device
reset callback could check to see if the bus is in reset and aer
devices can then know to do a bus reset.  Finally, the bus_in_reset
flag would be cleared on all the affected buses.  I'm sure there are
numerous details missing there, but it seems like it might be a
reasonable approach.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-12-21 21:07       ` Alex Williamson
@ 2015-12-22  7:18         ` Chen Fan
  2015-12-24  5:10         ` Chen Fan
  1 sibling, 0 replies; 34+ messages in thread
From: Chen Fan @ 2015-12-22  7:18 UTC (permalink / raw)
  To: Alex Williamson, Cao jin, qemu-devel; +Cc: mst


On 12/22/2015 05:07 AM, Alex Williamson wrote:
> On Fri, 2015-12-18 at 11:29 +0800, Chen Fan wrote:
>> On 12/18/2015 04:31 AM, Alex Williamson wrote:
>>> On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
>>>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>>
>>>> Particularly, For vfio devices, Once need to recovery devices
>>>> by bus reset such as AER, we always need to reset the host bus
>>>> to recovery the devices under the bus, so we need to add pci
>>>> device
>>>> callbacks to specify to do host bus reset.
>>>>
>>>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>>>> ---
>>>>    hw/pci/pci.c         | 18 ++++++++++++++++++
>>>>    hw/pci/pci_bridge.c  |  9 +++++++++
>>>>    hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
>>>>    hw/vfio/pci.h        |  2 ++
>>>>    include/hw/pci/pci.h |  7 +++++++
>>>>    5 files changed, 62 insertions(+)
>>>>
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index f6ca6ef..64fa2cc 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice
>>>> *dev)
>>>>        msix_reset(dev);
>>>>    }
>>>>    
>>>> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void
>>>> *unused)
>>>> +{
>>>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
>>>> +
>>>> +    if (dc->pre_reset) {
>>>> +        dc->pre_reset(dev);
>>>> +    }
>>>> +}
>>>> +
>>>> +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void
>>>> *unused)
>>>> +{
>>>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
>>>> +
>>>> +    if (dc->post_reset) {
>>>> +        dc->post_reset(dev);
>>>> +    }
>>>> +}
>>>> +
>>>>    /*
>>>>     * This function is called on #RST and FLR.
>>>>     * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
>>>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>>>> index 40c97b1..ddb76ab 100644
>>>> --- a/hw/pci/pci_bridge.c
>>>> +++ b/hw/pci/pci_bridge.c
>>>> @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
>>>>    
>>>>        newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
>>>>        if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
>>>> +        /*
>>>> +         * Notify all vfio-pci devices under the bus
>>>> +         * should do physical bus reset.
>>>> +         */
>>>> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
>>>> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>>>> +                            pci_device_pre_reset, NULL);
>>>>            /* Trigger hot reset on 0->1 transition. */
>>>>            qbus_reset_all(&s->sec_bus.qbus);
>>>> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>>>> +                            pci_device_post_reset, NULL);
>>>>        }
>>>>    }
>>>>    
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index e17dc89..df32618 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -39,6 +39,7 @@
>>>>    
>>>>    static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>>>>    static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool
>>>> enabled);
>>>> +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
>>>>    
>>>>    /*
>>>>     * Disabling BAR mmaping can be slow, but toggling it around
>>>> INTx
>>>> can
>>>> @@ -1888,6 +1889,8 @@ static int
>>>> vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
>>>>        /* List all affected devices by bus reset */
>>>>        devices = &info->devices[0];
>>>>    
>>>> +    vdev->single_depend_dev = (info->count == 1);
>>>> +
>>>>        /* Verify that we have all the groups required */
>>>>        for (i = 0; i < info->count; i++) {
>>>>            PCIHostDeviceAddress host;
>>>> @@ -2029,10 +2032,26 @@ static int
>>>> vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
>>>>        return vfio_check_host_bus_reset(vdev);
>>>>    }
>>>>    
>>>> +static void vfio_aer_pre_reset(PCIDevice *pdev)
>>>> +{
>>>> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>>>> +
>>>> +    vdev->aer_reset = true;
>>>> +    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>>>> +}
>>> Doesn't this lead to multiple host bus resets per guest bus reset
>>> in
>>> many cases?  It looks like we'll do it once per vfio-pci device,
>>> even
>>> if those devices are on the same host bus.  That's a 1 second
>>> operation
>>> per device.  Can we avoid that?  Maybe some sort of sequence ID
>>> could
>>> help a device figure out whether it's already been reset as part of
>>> a
>>> dependent device for this particular guest bus reset.  Thanks,
>> That's right, I missed this case, but I don't understand the scenario
>> how to
>> use a sequence ID to mark the device if been reset. can you detail it
>> ?
> I don't really have a concrete idea for a sequence ID, it was just a
> thought that maybe if each bus reset had a sequence ID then devices
> could know whether they've already been reset for that sequence ID.
>   The basic problem we have is that reset callbacks are per device and
> it's difficult to infer which individual resets are part of that bus
> reset.  In fact, do we propagate resets correctly down secondary
> bridges?  We're triggering off a VM write of the bridge control bus
> reset bit triggering from 0->1 and we then call qbus_reset_all() on
> that qbus, which I think is just going to call pci_bridge_reset() for
> any other bridges, which doesn't do anything about resetting deeper
> subordinate buses.  I think that means that if we had a root port with
> a switch below it and endpoints below that, if the VM triggered a
> secondary bus reset at the root port, the endpoints would never see it,
> which is not how real hardware works.
Indeed, you're right, for subordinate buses reset, we should have a common
mechanism for all bridges.

>
>> additional, there was a mechanism to compute device whether need to
>> be reset
>> by hot reset. so I simply modify the code as the following:
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index a9bc67e..42774ca 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2063,13 +2063,19 @@ static void vfio_aer_pre_reset(PCIDevice
>> *pdev)
>>        VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>>
>>        vdev->aer_reset = true;
>> -    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>> +    vdev->vbasedev.needs_reset = true;
>>    }
>>
>>    static void vfio_aer_post_reset(PCIDevice *pdev)
>>    {
>>        VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>>
>> +    if (!vdev->single_depend_dev && vdev->vbasedev.needs_reset) {
>> +        vfio_pci_hot_reset(vdev, false);
>> +    } else {
>> +        vfio_pci_hot_reset(vdev, true);
>> +    }
>> +
>>        vdev->aer_reset = false;
>>    }
>>
>> what do you think of this ?
> I think it might be a bigger problem than that subtle change.  I wonder
> if we really need a better model of the reset line through the
> subordinate buses.  When reset is asserted, we'd set a bus_in_reset
> flag on the bus and trigger downstream bridges to do the same.  Then
> when the user de-asserts reset, we'd call qbus_reset_all() and
> propagate it through to downstream buses.  That way the per device
> reset callback could check to see if the bus is in reset and aer
> devices can then know to do a bus reset.  Finally, the bus_in_reset
> flag would be cleared on all the affected buses.  I'm sure there are
> numerous details missing there, but it seems like it might be a
> reasonable approach.  Thanks,
it should be, let me try to think about it carefully.;)

Thanks,
Chen
>
> Alex
>
>
> .
>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
  2015-12-17 20:31   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
@ 2015-12-23 12:00   ` Michael S. Tsirkin
  2015-12-24  5:14     ` Chen Fan
  1 sibling, 1 reply; 34+ messages in thread
From: Michael S. Tsirkin @ 2015-12-23 12:00 UTC (permalink / raw)
  To: Cao jin; +Cc: chen.fan.fnst, alex.williamson, qemu-devel

On Thu, Dec 17, 2015 at 09:41:51AM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Particularly, For vfio devices, Once need to recovery devices
> by bus reset such as AER, we always need to reset the host bus
> to recovery the devices under the bus, so we need to add pci device
> callbacks to specify to do host bus reset.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/pci/pci.c         | 18 ++++++++++++++++++
>  hw/pci/pci_bridge.c  |  9 +++++++++
>  hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
>  hw/vfio/pci.h        |  2 ++
>  include/hw/pci/pci.h |  7 +++++++
>  5 files changed, 62 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index f6ca6ef..64fa2cc 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice *dev)
>      msix_reset(dev);
>  }
>  
> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void *unused)
> +{
> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
> +
> +    if (dc->pre_reset) {
> +        dc->pre_reset(dev);
> +    }
> +}
> +
> +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void *unused)
> +{
> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
> +
> +    if (dc->post_reset) {
> +        dc->post_reset(dev);
> +    }
> +}
> +
>  /*
>   * This function is called on #RST and FLR.
>   * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> index 40c97b1..ddb76ab 100644
> --- a/hw/pci/pci_bridge.c
> +++ b/hw/pci/pci_bridge.c
> @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
>  
>      newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
>      if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
> +        /*
> +         * Notify all vfio-pci devices under the bus
> +         * should do physical bus reset.
> +         */
> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> +                            pci_device_pre_reset, NULL);
>          /* Trigger hot reset on 0->1 transition. */
>          qbus_reset_all(&s->sec_bus.qbus);
> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> +                            pci_device_post_reset, NULL);
>      }
>  }
>  

So this boils down to need to detect hot reset,
you don't actually call this for all resets.
Callback name should reflect this IMO.

Also, why do we need two callbacks?
How about we have a single hot_reset callback,
and then if device has it, hot reset does not
invoke the regular reset?

Finally, you discussed multiple resets triggering with many vfio devices
on the same bus.  To solve this, will it help to have a bus-level
callback instead?


> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index e17dc89..df32618 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -39,6 +39,7 @@
>  
>  static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>  static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
> +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
>  
>  /*
>   * Disabling BAR mmaping can be slow, but toggling it around INTx can
> @@ -1888,6 +1889,8 @@ static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
>      /* List all affected devices by bus reset */
>      devices = &info->devices[0];
>  
> +    vdev->single_depend_dev = (info->count == 1);
> +
>      /* Verify that we have all the groups required */
>      for (i = 0; i < info->count; i++) {
>          PCIHostDeviceAddress host;
> @@ -2029,10 +2032,26 @@ static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
>      return vfio_check_host_bus_reset(vdev);
>  }
>  
> +static void vfio_aer_pre_reset(PCIDevice *pdev)
> +{
> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +
> +    vdev->aer_reset = true;
> +    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +}
> +
> +static void vfio_aer_post_reset(PCIDevice *pdev)
> +{
> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +
> +    vdev->aer_reset = false;
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
>      PCIDevice *pdev = &vdev->pdev;
> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(pdev);
>      PCIDevice *dev_iter;
>      uint8_t type;
>      uint32_t errcap;
> @@ -2079,6 +2098,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>      vdev->hotplug_notifier.notify = vfio_check_bus_reset;
>      pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
>  
> +    dc->pre_reset = vfio_aer_pre_reset;
> +    dc->post_reset = vfio_aer_post_reset;
> +
>      pcie_cap_deverr_init(pdev);
>      return pcie_aer_init(pdev, pos, size);
>  
> @@ -2978,6 +3000,10 @@ static void vfio_pci_reset(DeviceState *dev)
>  
>      trace_vfio_pci_reset(vdev->vbasedev.name);
>  
> +    if (vdev->aer_reset) {
> +        return;
> +    }
> +
>      vfio_pci_pre_reset(vdev);
>  
>      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index b385f07..5470b97 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +    bool aer_reset;
> +    bool single_depend_dev;
>  
>      NotifierWithReturn hotplug_notifier;
>  } VFIOPCIDevice;
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index 379b6e1..6b1f2d4 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -105,6 +105,9 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
>                                  pcibus_t addr, pcibus_t size, int type);
>  typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
>  
> +typedef void PCIPreResetFunc(PCIDevice *pci_dev);
> +typedef void PCIPostResetFunc(PCIDevice *pci_dev);
> +
>  typedef struct PCIIORegion {
>      pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
>  #define PCI_BAR_UNMAPPED (~(pcibus_t)0)
> @@ -193,6 +196,8 @@ typedef struct PCIDeviceClass {
>      PCIUnregisterFunc *exit;
>      PCIConfigReadFunc *config_read;
>      PCIConfigWriteFunc *config_write;
> +    PCIPreResetFunc *pre_reset;
> +    PCIPostResetFunc *post_reset;
>  
>      uint16_t vendor_id;
>      uint16_t device_id;
> @@ -380,6 +385,8 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>  void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>  void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>                                            PCIINTxRoutingNotifier notifier);
> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *d, void *opaque);
> +void pci_device_post_reset(PCIBus *bus, PCIDevice *d, void *opaque);
>  void pci_device_reset(PCIDevice *dev);
>  
>  PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
> -- 
> 1.9.3
> 
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-12-21 21:07       ` Alex Williamson
  2015-12-22  7:18         ` Chen Fan
@ 2015-12-24  5:10         ` Chen Fan
  2015-12-24 14:34           ` Michael S. Tsirkin
  1 sibling, 1 reply; 34+ messages in thread
From: Chen Fan @ 2015-12-24  5:10 UTC (permalink / raw)
  To: Alex Williamson, Cao jin, qemu-devel; +Cc: mst


On 12/22/2015 05:07 AM, Alex Williamson wrote:
> On Fri, 2015-12-18 at 11:29 +0800, Chen Fan wrote:
>> On 12/18/2015 04:31 AM, Alex Williamson wrote:
>>> On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
>>>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>>
>>>> Particularly, For vfio devices, Once need to recovery devices
>>>> by bus reset such as AER, we always need to reset the host bus
>>>> to recovery the devices under the bus, so we need to add pci
>>>> device
>>>> callbacks to specify to do host bus reset.
>>>>
>>>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>>>> ---
>>>>    hw/pci/pci.c         | 18 ++++++++++++++++++
>>>>    hw/pci/pci_bridge.c  |  9 +++++++++
>>>>    hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
>>>>    hw/vfio/pci.h        |  2 ++
>>>>    include/hw/pci/pci.h |  7 +++++++
>>>>    5 files changed, 62 insertions(+)
>>>>
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index f6ca6ef..64fa2cc 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice
>>>> *dev)
>>>>        msix_reset(dev);
>>>>    }
>>>>    
>>>> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void
>>>> *unused)
>>>> +{
>>>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
>>>> +
>>>> +    if (dc->pre_reset) {
>>>> +        dc->pre_reset(dev);
>>>> +    }
>>>> +}
>>>> +
>>>> +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void
>>>> *unused)
>>>> +{
>>>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
>>>> +
>>>> +    if (dc->post_reset) {
>>>> +        dc->post_reset(dev);
>>>> +    }
>>>> +}
>>>> +
>>>>    /*
>>>>     * This function is called on #RST and FLR.
>>>>     * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
>>>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>>>> index 40c97b1..ddb76ab 100644
>>>> --- a/hw/pci/pci_bridge.c
>>>> +++ b/hw/pci/pci_bridge.c
>>>> @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
>>>>    
>>>>        newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
>>>>        if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
>>>> +        /*
>>>> +         * Notify all vfio-pci devices under the bus
>>>> +         * should do physical bus reset.
>>>> +         */
>>>> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
>>>> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>>>> +                            pci_device_pre_reset, NULL);
>>>>            /* Trigger hot reset on 0->1 transition. */
>>>>            qbus_reset_all(&s->sec_bus.qbus);
>>>> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>>>> +                            pci_device_post_reset, NULL);
>>>>        }
>>>>    }
>>>>    
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index e17dc89..df32618 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -39,6 +39,7 @@
>>>>    
>>>>    static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>>>>    static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool
>>>> enabled);
>>>> +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
>>>>    
>>>>    /*
>>>>     * Disabling BAR mmaping can be slow, but toggling it around
>>>> INTx
>>>> can
>>>> @@ -1888,6 +1889,8 @@ static int
>>>> vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
>>>>        /* List all affected devices by bus reset */
>>>>        devices = &info->devices[0];
>>>>    
>>>> +    vdev->single_depend_dev = (info->count == 1);
>>>> +
>>>>        /* Verify that we have all the groups required */
>>>>        for (i = 0; i < info->count; i++) {
>>>>            PCIHostDeviceAddress host;
>>>> @@ -2029,10 +2032,26 @@ static int
>>>> vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
>>>>        return vfio_check_host_bus_reset(vdev);
>>>>    }
>>>>    
>>>> +static void vfio_aer_pre_reset(PCIDevice *pdev)
>>>> +{
>>>> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>>>> +
>>>> +    vdev->aer_reset = true;
>>>> +    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>>>> +}
>>> Doesn't this lead to multiple host bus resets per guest bus reset
>>> in
>>> many cases?  It looks like we'll do it once per vfio-pci device,
>>> even
>>> if those devices are on the same host bus.  That's a 1 second
>>> operation
>>> per device.  Can we avoid that?  Maybe some sort of sequence ID
>>> could
>>> help a device figure out whether it's already been reset as part of
>>> a
>>> dependent device for this particular guest bus reset.  Thanks,
>> That's right, I missed this case, but I don't understand the scenario
>> how to
>> use a sequence ID to mark the device if been reset. can you detail it
>> ?
> I don't really have a concrete idea for a sequence ID, it was just a
> thought that maybe if each bus reset had a sequence ID then devices
> could know whether they've already been reset for that sequence ID.
>   The basic problem we have is that reset callbacks are per device and
> it's difficult to infer which individual resets are part of that bus
> reset.  In fact, do we propagate resets correctly down secondary
> bridges?  We're triggering off a VM write of the bridge control bus
> reset bit triggering from 0->1 and we then call qbus_reset_all() on
> that qbus, which I think is just going to call pci_bridge_reset() for
> any other bridges, which doesn't do anything about resetting deeper
> subordinate buses.  I think that means that if we had a root port with
> a switch below it and endpoints below that, if the VM triggered a
> secondary bus reset at the root port, the endpoints would never see it,
> which is not how real hardware works.
Hi Alex,

     After think over the subordinate case, I think the qbus_reset_all()
should also reset the deeper endpoints. which recursive the qbus and qdev
to reset all devices blow sec bus and bridge device independently.

MST's suggestion in another mail to add a hot reset callback for sec bus 
reset
should be good to avoid the couple of pre_reset/post_reset callbacks. 
thanks:)

but for the multiple resets triggering on the same bus, I think the 
virtual bus-level
callback do not guarantee the host bus reset only once for all dependent 
devices.
because the host reset dependent devices may not on the same virtual bus.
we allowed that the dependent devices address below the bus. so the devices
still don't know the host bus is in reset. here I think we should have a 
host bus
model bind to all the dependent devices. when devices do hot reset, we mark
the bus in reset. then other dependent devices know the bus is reset. 
how about
this?

Thanks,
Chen

>
>> additional, there was a mechanism to compute device whether need to
>> be reset
>> by hot reset. so I simply modify the code as the following:
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index a9bc67e..42774ca 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2063,13 +2063,19 @@ static void vfio_aer_pre_reset(PCIDevice
>> *pdev)
>>        VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>>
>>        vdev->aer_reset = true;
>> -    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>> +    vdev->vbasedev.needs_reset = true;
>>    }
>>
>>    static void vfio_aer_post_reset(PCIDevice *pdev)
>>    {
>>        VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>>
>> +    if (!vdev->single_depend_dev && vdev->vbasedev.needs_reset) {
>> +        vfio_pci_hot_reset(vdev, false);
>> +    } else {
>> +        vfio_pci_hot_reset(vdev, true);
>> +    }
>> +
>>        vdev->aer_reset = false;
>>    }
>>
>> what do you think of this ?
> I think it might be a bigger problem than that subtle change.  I wonder
> if we really need a better model of the reset line through the
> subordinate buses.  When reset is asserted, we'd set a bus_in_reset
> flag on the bus and trigger downstream bridges to do the same.  Then
> when the user de-asserts reset, we'd call qbus_reset_all() and
> propagate it through to downstream buses.  That way the per device
> reset callback could check to see if the bus is in reset and aer
> devices can then know to do a bus reset.  Finally, the bus_in_reset
> flag would be cleared on all the affected buses.  I'm sure there are
> numerous details missing there, but it seems like it might be a
> reasonable approach.  Thanks,
>
> Alex
>
>
> .
>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-12-23 12:00   ` Michael S. Tsirkin
@ 2015-12-24  5:14     ` Chen Fan
  0 siblings, 0 replies; 34+ messages in thread
From: Chen Fan @ 2015-12-24  5:14 UTC (permalink / raw)
  To: Michael S. Tsirkin, Cao jin; +Cc: alex.williamson, qemu-devel

Hi mst,

     thanks for your suggestion. I have replied it in another alex's email.
so we could discuss it in there.;)

Thanks,
Chen

On 12/23/2015 08:00 PM, Michael S. Tsirkin wrote:
> On Thu, Dec 17, 2015 at 09:41:51AM +0800, Cao jin wrote:
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Particularly, For vfio devices, Once need to recovery devices
>> by bus reset such as AER, we always need to reset the host bus
>> to recovery the devices under the bus, so we need to add pci device
>> callbacks to specify to do host bus reset.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>> ---
>>   hw/pci/pci.c         | 18 ++++++++++++++++++
>>   hw/pci/pci_bridge.c  |  9 +++++++++
>>   hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
>>   hw/vfio/pci.h        |  2 ++
>>   include/hw/pci/pci.h |  7 +++++++
>>   5 files changed, 62 insertions(+)
>>
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index f6ca6ef..64fa2cc 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice *dev)
>>       msix_reset(dev);
>>   }
>>   
>> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void *unused)
>> +{
>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
>> +
>> +    if (dc->pre_reset) {
>> +        dc->pre_reset(dev);
>> +    }
>> +}
>> +
>> +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void *unused)
>> +{
>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
>> +
>> +    if (dc->post_reset) {
>> +        dc->post_reset(dev);
>> +    }
>> +}
>> +
>>   /*
>>    * This function is called on #RST and FLR.
>>    * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>> index 40c97b1..ddb76ab 100644
>> --- a/hw/pci/pci_bridge.c
>> +++ b/hw/pci/pci_bridge.c
>> @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
>>   
>>       newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
>>       if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
>> +        /*
>> +         * Notify all vfio-pci devices under the bus
>> +         * should do physical bus reset.
>> +         */
>> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
>> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>> +                            pci_device_pre_reset, NULL);
>>           /* Trigger hot reset on 0->1 transition. */
>>           qbus_reset_all(&s->sec_bus.qbus);
>> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>> +                            pci_device_post_reset, NULL);
>>       }
>>   }
>>   
> So this boils down to need to detect hot reset,
> you don't actually call this for all resets.
> Callback name should reflect this IMO.
>
> Also, why do we need two callbacks?
> How about we have a single hot_reset callback,
> and then if device has it, hot reset does not
> invoke the regular reset?
>
> Finally, you discussed multiple resets triggering with many vfio devices
> on the same bus.  To solve this, will it help to have a bus-level
> callback instead?
>
>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index e17dc89..df32618 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -39,6 +39,7 @@
>>   
>>   static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>>   static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
>> +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
>>   
>>   /*
>>    * Disabling BAR mmaping can be slow, but toggling it around INTx can
>> @@ -1888,6 +1889,8 @@ static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
>>       /* List all affected devices by bus reset */
>>       devices = &info->devices[0];
>>   
>> +    vdev->single_depend_dev = (info->count == 1);
>> +
>>       /* Verify that we have all the groups required */
>>       for (i = 0; i < info->count; i++) {
>>           PCIHostDeviceAddress host;
>> @@ -2029,10 +2032,26 @@ static int vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
>>       return vfio_check_host_bus_reset(vdev);
>>   }
>>   
>> +static void vfio_aer_pre_reset(PCIDevice *pdev)
>> +{
>> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> +
>> +    vdev->aer_reset = true;
>> +    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>> +}
>> +
>> +static void vfio_aer_post_reset(PCIDevice *pdev)
>> +{
>> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> +
>> +    vdev->aer_reset = false;
>> +}
>> +
>>   static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>                             int pos, uint16_t size)
>>   {
>>       PCIDevice *pdev = &vdev->pdev;
>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(pdev);
>>       PCIDevice *dev_iter;
>>       uint8_t type;
>>       uint32_t errcap;
>> @@ -2079,6 +2098,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>       vdev->hotplug_notifier.notify = vfio_check_bus_reset;
>>       pci_bus_add_hotplug_notifier(pdev->bus, &vdev->hotplug_notifier);
>>   
>> +    dc->pre_reset = vfio_aer_pre_reset;
>> +    dc->post_reset = vfio_aer_post_reset;
>> +
>>       pcie_cap_deverr_init(pdev);
>>       return pcie_aer_init(pdev, pos, size);
>>   
>> @@ -2978,6 +3000,10 @@ static void vfio_pci_reset(DeviceState *dev)
>>   
>>       trace_vfio_pci_reset(vdev->vbasedev.name);
>>   
>> +    if (vdev->aer_reset) {
>> +        return;
>> +    }
>> +
>>       vfio_pci_pre_reset(vdev);
>>   
>>       if (vdev->resetfn && !vdev->resetfn(vdev)) {
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index b385f07..5470b97 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -142,6 +142,8 @@ typedef struct VFIOPCIDevice {
>>       bool no_kvm_intx;
>>       bool no_kvm_msi;
>>       bool no_kvm_msix;
>> +    bool aer_reset;
>> +    bool single_depend_dev;
>>   
>>       NotifierWithReturn hotplug_notifier;
>>   } VFIOPCIDevice;
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index 379b6e1..6b1f2d4 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -105,6 +105,9 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
>>                                   pcibus_t addr, pcibus_t size, int type);
>>   typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
>>   
>> +typedef void PCIPreResetFunc(PCIDevice *pci_dev);
>> +typedef void PCIPostResetFunc(PCIDevice *pci_dev);
>> +
>>   typedef struct PCIIORegion {
>>       pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
>>   #define PCI_BAR_UNMAPPED (~(pcibus_t)0)
>> @@ -193,6 +196,8 @@ typedef struct PCIDeviceClass {
>>       PCIUnregisterFunc *exit;
>>       PCIConfigReadFunc *config_read;
>>       PCIConfigWriteFunc *config_write;
>> +    PCIPreResetFunc *pre_reset;
>> +    PCIPostResetFunc *post_reset;
>>   
>>       uint16_t vendor_id;
>>       uint16_t device_id;
>> @@ -380,6 +385,8 @@ bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
>>   void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
>>   void pci_device_set_intx_routing_notifier(PCIDevice *dev,
>>                                             PCIINTxRoutingNotifier notifier);
>> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *d, void *opaque);
>> +void pci_device_post_reset(PCIBus *bus, PCIDevice *d, void *opaque);
>>   void pci_device_reset(PCIDevice *dev);
>>   
>>   PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
>> -- 
>> 1.9.3
>>
>>
>
>
> .
>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 08/13] vfio: add check host bus reset is support or not
  2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 08/13] vfio: add check host bus reset is support or not Cao jin
  2015-12-17 20:32   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
@ 2015-12-24 14:32   ` Michael S. Tsirkin
  2015-12-24 17:47     ` Alex Williamson
  1 sibling, 1 reply; 34+ messages in thread
From: Michael S. Tsirkin @ 2015-12-24 14:32 UTC (permalink / raw)
  To: Cao jin; +Cc: chen.fan.fnst, alex.williamson, qemu-devel

On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> when init vfio devices done, we should test all the devices supported
> aer whether conflict with others. For each one, get the hot reset
> info for the affected device list.  For each affected device, all
> should attach to the VM and on/below the same bus. also, we should test
> all of the non-AER supporting vfio-pci devices on or below the target
> bus to verify they have a reset mechanism.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  hw/vfio/pci.h |   1 +
>  2 files changed, 230 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index d00b0e4..6926dcc 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
> +                                     PCIHostDeviceAddress *host2)
> +{
> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> +            host1->slot == host2->slot);
> +}
> +
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> +                                PCIHostDeviceAddress *host2)
> +{
> +    return (vfio_pci_host_slot_match(host1, host2) &&
> +            host1->function == host2->function);
> +}
> +
> +struct VFIODeviceFind {
> +    PCIDevice *pdev;
> +    bool found;
> +};
> +
> +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
> +                                      void *opaque)
> +{
> +    DeviceState *dev = DEVICE(pdev);
> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> +    VFIOPCIDevice *vdev;
> +    struct VFIODeviceFind *find = opaque;
> +
> +    if (find->found) {
> +        return;
> +    }
> +
> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +        if (!dc->reset) {
> +            goto found;
> +        }
> +        return;
> +    }
> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +        !vdev->vbasedev.reset_works) {
> +        goto found;
> +    }
> +
> +    return;
> +found:
> +    find->pdev = pdev;
> +    find->found = true;
> +}
> +
> +static void device_find(PCIBus *bus, PCIDevice *pdev, void *opaque)
> +{
> +    struct VFIODeviceFind *find = opaque;
> +
> +    if (find->found) {
> +        return;
> +    }
> +
> +    if (pdev == find->pdev) {
> +        find->found = true;
> +    }
> +}
> +
> +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> +{
> +    PCIBus *bus = vdev->pdev.bus;
> +    struct vfio_pci_hot_reset_info *info = NULL;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +    struct VFIODeviceFind find;
> +    int ret, i;
> +
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret) {
> +        error_report("vfio: Cannot enable AER for device %s,"
> +                     " device does not support hot reset.",
> +                     vdev->vbasedev.name);
> +        goto out;
> +    }
> +
> +    /* List all affected devices by bus reset */
> +    devices = &info->devices[0];
> +
> +    /* Verify that we have all the groups required */
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +        VFIOPCIDevice *tmp;
> +        VFIODevice *vbasedev_iter;
> +        bool found = false;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        /* Skip the current device */
> +        if (vfio_pci_host_match(&host, &vdev->host)) {
> +            continue;
> +        }
> +
> +        /* Ensure we own the group of the affected device */
> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> +            if (group->groupid == devices[i].group_id) {
> +                break;
> +            }
> +        }
> +
> +        if (!group) {
> +            error_report("vfio: Cannot enable AER for device %s, "
> +                         "depends on group %d which is not owned.",
> +                         vdev->vbasedev.name, devices[i].group_id);
> +            ret = -1;
> +            goto out;
> +        }
> +
> +        /* Ensure affected devices for reset on/blow the bus */
> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> +            if (vfio_pci_host_match(&host, &tmp->host)) {
> +                PCIDevice *pci = PCI_DEVICE(tmp);
> +
> +                /*
> +                 * For multifunction device, due to vfio driver signal all
> +                 * functions under the upstream link of the end point. here
> +                 * we validate all functions whether enable AER.
> +                 */
> +                if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> +                    error_report("vfio: Cannot enable AER for device %s, on same slot"
> +                                 " the dependent device %s which does not enable AER.",
> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
> +                    ret = -1;
> +                    goto out;
> +                }
> +
> +                find.pdev = pci;
> +                find.found = false;
> +                pci_for_each_device(bus, pci_bus_num(bus),
> +                                    device_find, &find);
> +                if (!find.found) {
> +                    error_report("vfio: Cannot enable AER for device %s, "
> +                                 "the dependent device %s is not under the same bus",
> +                                 vdev->vbasedev.name, tmp->vbasedev.name);
> +                    ret = -1;
> +                    goto out;
> +                }
> +                found = true;
> +                break;
> +            }
> +        }
> +
> +        /* Ensure all affected devices assigned to VM */

I am puzzled.
Does not kernel enforce this already?
If not it's a security problem.
If yes why does userspace need to check this?

> +        if (!found) {
> +            error_report("vfio: Cannot enable AER for device %s, "
> +                         "the dependent device %04x:%02x:%02x.%x "
> +                         "is not assigned to VM.",
> +                         vdev->vbasedev.name, host.domain, host.bus,
> +                         host.slot, host.function);
> +            ret = -1;
> +            goto out;
> +        }
> +    }
> +
> +    /*
> +     * Check the all pci devices on or below the target bus
> +     * have a reset mechanism at least.
> +     */
> +    find.pdev = NULL;
> +    find.found = false;
> +    pci_for_each_device(bus, pci_bus_num(bus),
> +                        vfio_check_device_noreset, &find);
> +    if (find.found) {
> +        error_report("vfio: Cannot enable AER for device %s, "
> +                     "the affected device %s does not have a reset mechanism.",
> +                     vdev->vbasedev.name, find.pdev->name);
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    ret = 0;
> +out:
> +    g_free(info);
> +    return ret;
> +}
> +
> +static int vfio_check_devices_host_bus_reset(void)
> +{
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev;
> +    VFIOPCIDevice *vdev;
> +
> +    /* Check All vfio-pci devices if have bus reset capability */
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
> +                continue;
> +            }
> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
> +            if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> +                vfio_check_host_bus_reset(vdev)) {
> +                return -1;
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>                            int pos, uint16_t size)
>  {
> @@ -1983,13 +2193,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>      vfio_intx_enable(vdev);
>  }
>  
> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> -                                PCIHostDeviceAddress *host2)
> -{
> -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> -            host1->slot == host2->slot && host1->function == host2->function);
> -}
> -
>  static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>  {
>      VFIOGroup *group;
> @@ -2495,6 +2698,20 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>      vdev->req_enabled = false;
>  }
>  
> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
> +{
> +    int ret;
> +
> +    ret = vfio_check_devices_host_bus_reset();
> +    if (ret) {
> +        exit(1);
> +    }
> +}
> +
> +static Notifier machine_notifier = {
> +    .notify = vfio_pci_machine_done_notify,
> +};
> +
>  static int vfio_initfn(PCIDevice *pdev)
>  {
>      VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> @@ -2841,6 +3058,11 @@ static const TypeInfo vfio_pci_dev_info = {
>  static void register_vfio_pci_dev_type(void)
>  {
>      type_register_static(&vfio_pci_dev_info);
> +    /*
> +     * Register notifier when machine init is done, since we need
> +     * check the configration manner after all vfio device are inited.
> +     */
> +    qemu_add_machine_init_done_notifier(&machine_notifier);
>  }
>  
>  type_init(register_vfio_pci_dev_type)
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 48c1f69..59ae194 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -15,6 +15,7 @@
>  #include "qemu-common.h"
>  #include "exec/memory.h"
>  #include "hw/pci/pci.h"
> +#include "hw/pci/pci_bus.h"
>  #include "hw/pci/pci_bridge.h"
>  #include "hw/vfio/vfio-common.h"
>  #include "qemu/event_notifier.h"
> -- 
> 1.9.3
> 
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-12-24  5:10         ` Chen Fan
@ 2015-12-24 14:34           ` Michael S. Tsirkin
  2015-12-25  1:18             ` Chen Fan
  0 siblings, 1 reply; 34+ messages in thread
From: Michael S. Tsirkin @ 2015-12-24 14:34 UTC (permalink / raw)
  To: Chen Fan; +Cc: Alex Williamson, Cao jin, qemu-devel

On Thu, Dec 24, 2015 at 01:10:25PM +0800, Chen Fan wrote:
> 
> On 12/22/2015 05:07 AM, Alex Williamson wrote:
> >On Fri, 2015-12-18 at 11:29 +0800, Chen Fan wrote:
> >>On 12/18/2015 04:31 AM, Alex Williamson wrote:
> >>>On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
> >>>>From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >>>>
> >>>>Particularly, For vfio devices, Once need to recovery devices
> >>>>by bus reset such as AER, we always need to reset the host bus
> >>>>to recovery the devices under the bus, so we need to add pci
> >>>>device
> >>>>callbacks to specify to do host bus reset.
> >>>>
> >>>>Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >>>>Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
> >>>>---
> >>>>   hw/pci/pci.c         | 18 ++++++++++++++++++
> >>>>   hw/pci/pci_bridge.c  |  9 +++++++++
> >>>>   hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
> >>>>   hw/vfio/pci.h        |  2 ++
> >>>>   include/hw/pci/pci.h |  7 +++++++
> >>>>   5 files changed, 62 insertions(+)
> >>>>
> >>>>diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>>>index f6ca6ef..64fa2cc 100644
> >>>>--- a/hw/pci/pci.c
> >>>>+++ b/hw/pci/pci.c
> >>>>@@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice
> >>>>*dev)
> >>>>       msix_reset(dev);
> >>>>   }
> >>>>+void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void
> >>>>*unused)
> >>>>+{
> >>>>+    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
> >>>>+
> >>>>+    if (dc->pre_reset) {
> >>>>+        dc->pre_reset(dev);
> >>>>+    }
> >>>>+}
> >>>>+
> >>>>+void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void
> >>>>*unused)
> >>>>+{
> >>>>+    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
> >>>>+
> >>>>+    if (dc->post_reset) {
> >>>>+        dc->post_reset(dev);
> >>>>+    }
> >>>>+}
> >>>>+
> >>>>   /*
> >>>>    * This function is called on #RST and FLR.
> >>>>    * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
> >>>>diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> >>>>index 40c97b1..ddb76ab 100644
> >>>>--- a/hw/pci/pci_bridge.c
> >>>>+++ b/hw/pci/pci_bridge.c
> >>>>@@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
> >>>>       newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
> >>>>       if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
> >>>>+        /*
> >>>>+         * Notify all vfio-pci devices under the bus
> >>>>+         * should do physical bus reset.
> >>>>+         */
> >>>>+        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
> >>>>+        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> >>>>+                            pci_device_pre_reset, NULL);
> >>>>           /* Trigger hot reset on 0->1 transition. */
> >>>>           qbus_reset_all(&s->sec_bus.qbus);
> >>>>+        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
> >>>>+                            pci_device_post_reset, NULL);
> >>>>       }
> >>>>   }
> >>>>diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >>>>index e17dc89..df32618 100644
> >>>>--- a/hw/vfio/pci.c
> >>>>+++ b/hw/vfio/pci.c
> >>>>@@ -39,6 +39,7 @@
> >>>>   static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
> >>>>   static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool
> >>>>enabled);
> >>>>+static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
> >>>>   /*
> >>>>    * Disabling BAR mmaping can be slow, but toggling it around
> >>>>INTx
> >>>>can
> >>>>@@ -1888,6 +1889,8 @@ static int
> >>>>vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> >>>>       /* List all affected devices by bus reset */
> >>>>       devices = &info->devices[0];
> >>>>+    vdev->single_depend_dev = (info->count == 1);
> >>>>+
> >>>>       /* Verify that we have all the groups required */
> >>>>       for (i = 0; i < info->count; i++) {
> >>>>           PCIHostDeviceAddress host;
> >>>>@@ -2029,10 +2032,26 @@ static int
> >>>>vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
> >>>>       return vfio_check_host_bus_reset(vdev);
> >>>>   }
> >>>>+static void vfio_aer_pre_reset(PCIDevice *pdev)
> >>>>+{
> >>>>+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> >>>>+
> >>>>+    vdev->aer_reset = true;
> >>>>+    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> >>>>+}
> >>>Doesn't this lead to multiple host bus resets per guest bus reset
> >>>in
> >>>many cases?  It looks like we'll do it once per vfio-pci device,
> >>>even
> >>>if those devices are on the same host bus.  That's a 1 second
> >>>operation
> >>>per device.  Can we avoid that?  Maybe some sort of sequence ID
> >>>could
> >>>help a device figure out whether it's already been reset as part of
> >>>a
> >>>dependent device for this particular guest bus reset.  Thanks,
> >>That's right, I missed this case, but I don't understand the scenario
> >>how to
> >>use a sequence ID to mark the device if been reset. can you detail it
> >>?
> >I don't really have a concrete idea for a sequence ID, it was just a
> >thought that maybe if each bus reset had a sequence ID then devices
> >could know whether they've already been reset for that sequence ID.
> >  The basic problem we have is that reset callbacks are per device and
> >it's difficult to infer which individual resets are part of that bus
> >reset.  In fact, do we propagate resets correctly down secondary
> >bridges?  We're triggering off a VM write of the bridge control bus
> >reset bit triggering from 0->1 and we then call qbus_reset_all() on
> >that qbus, which I think is just going to call pci_bridge_reset() for
> >any other bridges, which doesn't do anything about resetting deeper
> >subordinate buses.  I think that means that if we had a root port with
> >a switch below it and endpoints below that, if the VM triggered a
> >secondary bus reset at the root port, the endpoints would never see it,
> >which is not how real hardware works.
> Hi Alex,
> 
>     After think over the subordinate case, I think the qbus_reset_all()
> should also reset the deeper endpoints. which recursive the qbus and qdev
> to reset all devices blow sec bus and bridge device independently.
> 
> MST's suggestion in another mail to add a hot reset callback for sec bus
> reset
> should be good to avoid the couple of pre_reset/post_reset callbacks.
> thanks:)
> 
> but for the multiple resets triggering on the same bus, I think the virtual
> bus-level
> callback do not guarantee the host bus reset only once for all dependent
> devices.
> because the host reset dependent devices may not on the same virtual bus.
> we allowed that the dependent devices address below the bus. so the devices
> still don't know the host bus is in reset. here I think we should have a
> host bus
> model bind to all the dependent devices. when devices do hot reset, we mark
> the bus in reset. then other dependent devices know the bus is reset. how
> about
> this?
> 
> Thanks,
> Chen


What does "below bus" mean here?
Below what?


> >
> >>additional, there was a mechanism to compute device whether need to
> >>be reset
> >>by hot reset. so I simply modify the code as the following:
> >>
> >>diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >>index a9bc67e..42774ca 100644
> >>--- a/hw/vfio/pci.c
> >>+++ b/hw/vfio/pci.c
> >>@@ -2063,13 +2063,19 @@ static void vfio_aer_pre_reset(PCIDevice
> >>*pdev)
> >>       VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> >>
> >>       vdev->aer_reset = true;
> >>-    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> >>+    vdev->vbasedev.needs_reset = true;
> >>   }
> >>
> >>   static void vfio_aer_post_reset(PCIDevice *pdev)
> >>   {
> >>       VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> >>
> >>+    if (!vdev->single_depend_dev && vdev->vbasedev.needs_reset) {
> >>+        vfio_pci_hot_reset(vdev, false);
> >>+    } else {
> >>+        vfio_pci_hot_reset(vdev, true);
> >>+    }
> >>+
> >>       vdev->aer_reset = false;
> >>   }
> >>
> >>what do you think of this ?
> >I think it might be a bigger problem than that subtle change.  I wonder
> >if we really need a better model of the reset line through the
> >subordinate buses.  When reset is asserted, we'd set a bus_in_reset
> >flag on the bus and trigger downstream bridges to do the same.  Then
> >when the user de-asserts reset, we'd call qbus_reset_all() and
> >propagate it through to downstream buses.  That way the per device
> >reset callback could check to see if the bus is in reset and aer
> >devices can then know to do a bus reset.  Finally, the bus_in_reset
> >flag would be cleared on all the affected buses.  I'm sure there are
> >numerous details missing there, but it seems like it might be a
> >reasonable approach.  Thanks,
> >
> >Alex
> >
> >
> >.
> >
> 
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 08/13] vfio: add check host bus reset is support or not
  2015-12-24 14:32   ` Michael S. Tsirkin
@ 2015-12-24 17:47     ` Alex Williamson
  2015-12-24 18:06       ` Michael S. Tsirkin
  0 siblings, 1 reply; 34+ messages in thread
From: Alex Williamson @ 2015-12-24 17:47 UTC (permalink / raw)
  To: Michael S. Tsirkin, Cao jin; +Cc: chen.fan.fnst, qemu-devel

On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > 
> > when init vfio devices done, we should test all the devices
> > supported
> > aer whether conflict with others. For each one, get the hot reset
> > info for the affected device list.  For each affected device, all
> > should attach to the VM and on/below the same bus. also, we should
> > test
> > all of the non-AER supporting vfio-pci devices on or below the
> > target
> > bus to verify they have a reset mechanism.
> > 
> > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > ---
> >  hw/vfio/pci.c | 236
> > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> >  hw/vfio/pci.h |   1 +
> >  2 files changed, 230 insertions(+), 7 deletions(-)
> > 
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index d00b0e4..6926dcc 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice
> > *vdev, uint8_t pos)
> >      return 0;
> >  }
> >  
> > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
> > +                                     PCIHostDeviceAddress *host2)
> > +{
> > +    return (host1->domain == host2->domain && host1->bus == host2-
> > >bus &&
> > +            host1->slot == host2->slot);
> > +}
> > +
> > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> > +                                PCIHostDeviceAddress *host2)
> > +{
> > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > +            host1->function == host2->function);
> > +}
> > +
> > +struct VFIODeviceFind {
> > +    PCIDevice *pdev;
> > +    bool found;
> > +};
> > +
> > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice
> > *pdev,
> > +                                      void *opaque)
> > +{
> > +    DeviceState *dev = DEVICE(pdev);
> > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > +    VFIOPCIDevice *vdev;
> > +    struct VFIODeviceFind *find = opaque;
> > +
> > +    if (find->found) {
> > +        return;
> > +    }
> > +
> > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > +        if (!dc->reset) {
> > +            goto found;
> > +        }
> > +        return;
> > +    }
> > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > +        !vdev->vbasedev.reset_works) {
> > +        goto found;
> > +    }
> > +
> > +    return;
> > +found:
> > +    find->pdev = pdev;
> > +    find->found = true;
> > +}
> > +
> > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > *opaque)
> > +{
> > +    struct VFIODeviceFind *find = opaque;
> > +
> > +    if (find->found) {
> > +        return;
> > +    }
> > +
> > +    if (pdev == find->pdev) {
> > +        find->found = true;
> > +    }
> > +}
> > +
> > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > +{
> > +    PCIBus *bus = vdev->pdev.bus;
> > +    struct vfio_pci_hot_reset_info *info = NULL;
> > +    struct vfio_pci_dependent_device *devices;
> > +    VFIOGroup *group;
> > +    struct VFIODeviceFind find;
> > +    int ret, i;
> > +
> > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > +    if (ret) {
> > +        error_report("vfio: Cannot enable AER for device %s,"
> > +                     " device does not support hot reset.",
> > +                     vdev->vbasedev.name);
> > +        goto out;
> > +    }
> > +
> > +    /* List all affected devices by bus reset */
> > +    devices = &info->devices[0];
> > +
> > +    /* Verify that we have all the groups required */
> > +    for (i = 0; i < info->count; i++) {
> > +        PCIHostDeviceAddress host;
> > +        VFIOPCIDevice *tmp;
> > +        VFIODevice *vbasedev_iter;
> > +        bool found = false;
> > +
> > +        host.domain = devices[i].segment;
> > +        host.bus = devices[i].bus;
> > +        host.slot = PCI_SLOT(devices[i].devfn);
> > +        host.function = PCI_FUNC(devices[i].devfn);
> > +
> > +        /* Skip the current device */
> > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > +            continue;
> > +        }
> > +
> > +        /* Ensure we own the group of the affected device */
> > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > +            if (group->groupid == devices[i].group_id) {
> > +                break;
> > +            }
> > +        }
> > +
> > +        if (!group) {
> > +            error_report("vfio: Cannot enable AER for device %s, "
> > +                         "depends on group %d which is not
> > owned.",
> > +                         vdev->vbasedev.name,
> > devices[i].group_id);
> > +            ret = -1;
> > +            goto out;
> > +        }
> > +
> > +        /* Ensure affected devices for reset on/blow the bus */
> > +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> > +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> > +                continue;
> > +            }
> > +            tmp = container_of(vbasedev_iter, VFIOPCIDevice,
> > vbasedev);
> > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > +
> > +                /*
> > +                 * For multifunction device, due to vfio driver
> > signal all
> > +                 * functions under the upstream link of the end
> > point. here
> > +                 * we validate all functions whether enable AER.
> > +                 */
> > +                if (vfio_pci_host_slot_match(&vdev->host, &tmp-
> > >host) &&
> > +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> > +                    error_report("vfio: Cannot enable AER for
> > device %s, on same slot"
> > +                                 " the dependent device %s which
> > does not enable AER.",
> > +                                 vdev->vbasedev.name, tmp-
> > >vbasedev.name);
> > +                    ret = -1;
> > +                    goto out;
> > +                }
> > +
> > +                find.pdev = pci;
> > +                find.found = false;
> > +                pci_for_each_device(bus, pci_bus_num(bus),
> > +                                    device_find, &find);
> > +                if (!find.found) {
> > +                    error_report("vfio: Cannot enable AER for
> > device %s, "
> > +                                 "the dependent device %s is not
> > under the same bus",
> > +                                 vdev->vbasedev.name, tmp-
> > >vbasedev.name);
> > +                    ret = -1;
> > +                    goto out;
> > +                }
> > +                found = true;
> > +                break;
> > +            }
> > +        }
> > +
> > +        /* Ensure all affected devices assigned to VM */
> 
> I am puzzled.
> Does not kernel enforce this already?
> If not it's a security problem.
> If yes why does userspace need to check this?

DMA isolation and bus level isolation are separate concepts.  Each
function of a multi-function device can have DMA isolation, but a user
needs to own all of the functions affected by a bus reset in order to
perform one.  An AER configuration can only be created if the user can
translate a guest bus reset into a host bus reset and therefore needs
to test whether it has the permissions to do so.  I believe over the
course of reviews we've also added some simplifying constraints around
this to reduce the problem set, things like all the groups being
assigned rather than just owned by the user.  However, I believe the
kernel is sound in how it provides security for bus resets.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 08/13] vfio: add check host bus reset is support or not
  2015-12-24 17:47     ` Alex Williamson
@ 2015-12-24 18:06       ` Michael S. Tsirkin
  2015-12-24 18:20         ` Alex Williamson
  0 siblings, 1 reply; 34+ messages in thread
From: Michael S. Tsirkin @ 2015-12-24 18:06 UTC (permalink / raw)
  To: Alex Williamson; +Cc: chen.fan.fnst, Cao jin, qemu-devel

On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > 
> > > when init vfio devices done, we should test all the devices
> > > supported
> > > aer whether conflict with others. For each one, get the hot reset
> > > info for the affected device list.  For each affected device, all
> > > should attach to the VM and on/below the same bus. also, we should
> > > test
> > > all of the non-AER supporting vfio-pci devices on or below the
> > > target
> > > bus to verify they have a reset mechanism.
> > > 
> > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > ---
> > >  hw/vfio/pci.c | 236
> > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > >  hw/vfio/pci.h |   1 +
> > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > index d00b0e4..6926dcc 100644
> > > --- a/hw/vfio/pci.c
> > > +++ b/hw/vfio/pci.c
> > > @@ -1806,6 +1806,216 @@ static int vfio_add_std_cap(VFIOPCIDevice
> > > *vdev, uint8_t pos)
> > >      return 0;
> > >  }
> > >  
> > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
> > > +                                     PCIHostDeviceAddress *host2)
> > > +{
> > > +    return (host1->domain == host2->domain && host1->bus == host2-
> > > >bus &&
> > > +            host1->slot == host2->slot);
> > > +}
> > > +
> > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> > > +                                PCIHostDeviceAddress *host2)
> > > +{
> > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > +            host1->function == host2->function);
> > > +}
> > > +
> > > +struct VFIODeviceFind {
> > > +    PCIDevice *pdev;
> > > +    bool found;
> > > +};
> > > +
> > > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice
> > > *pdev,
> > > +                                      void *opaque)
> > > +{
> > > +    DeviceState *dev = DEVICE(pdev);
> > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > +    VFIOPCIDevice *vdev;
> > > +    struct VFIODeviceFind *find = opaque;
> > > +
> > > +    if (find->found) {
> > > +        return;
> > > +    }
> > > +
> > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > +        if (!dc->reset) {
> > > +            goto found;
> > > +        }
> > > +        return;
> > > +    }
> > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > +        !vdev->vbasedev.reset_works) {
> > > +        goto found;
> > > +    }
> > > +
> > > +    return;
> > > +found:
> > > +    find->pdev = pdev;
> > > +    find->found = true;
> > > +}
> > > +
> > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > *opaque)
> > > +{
> > > +    struct VFIODeviceFind *find = opaque;
> > > +
> > > +    if (find->found) {
> > > +        return;
> > > +    }
> > > +
> > > +    if (pdev == find->pdev) {
> > > +        find->found = true;
> > > +    }
> > > +}
> > > +
> > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > +{
> > > +    PCIBus *bus = vdev->pdev.bus;
> > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > +    struct vfio_pci_dependent_device *devices;
> > > +    VFIOGroup *group;
> > > +    struct VFIODeviceFind find;
> > > +    int ret, i;
> > > +
> > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > +    if (ret) {
> > > +        error_report("vfio: Cannot enable AER for device %s,"
> > > +                     " device does not support hot reset.",
> > > +                     vdev->vbasedev.name);
> > > +        goto out;
> > > +    }
> > > +
> > > +    /* List all affected devices by bus reset */
> > > +    devices = &info->devices[0];
> > > +
> > > +    /* Verify that we have all the groups required */
> > > +    for (i = 0; i < info->count; i++) {
> > > +        PCIHostDeviceAddress host;
> > > +        VFIOPCIDevice *tmp;
> > > +        VFIODevice *vbasedev_iter;
> > > +        bool found = false;
> > > +
> > > +        host.domain = devices[i].segment;
> > > +        host.bus = devices[i].bus;
> > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > +
> > > +        /* Skip the current device */
> > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > +            continue;
> > > +        }
> > > +
> > > +        /* Ensure we own the group of the affected device */
> > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > +            if (group->groupid == devices[i].group_id) {
> > > +                break;
> > > +            }
> > > +        }
> > > +
> > > +        if (!group) {
> > > +            error_report("vfio: Cannot enable AER for device %s, "
> > > +                         "depends on group %d which is not
> > > owned.",
> > > +                         vdev->vbasedev.name,
> > > devices[i].group_id);
> > > +            ret = -1;
> > > +            goto out;
> > > +        }
> > > +
> > > +        /* Ensure affected devices for reset on/blow the bus */
> > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> > > +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> > > +                continue;
> > > +            }
> > > +            tmp = container_of(vbasedev_iter, VFIOPCIDevice,
> > > vbasedev);
> > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > +
> > > +                /*
> > > +                 * For multifunction device, due to vfio driver
> > > signal all
> > > +                 * functions under the upstream link of the end
> > > point. here
> > > +                 * we validate all functions whether enable AER.
> > > +                 */
> > > +                if (vfio_pci_host_slot_match(&vdev->host, &tmp-
> > > >host) &&
> > > +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
> > > +                    error_report("vfio: Cannot enable AER for
> > > device %s, on same slot"
> > > +                                 " the dependent device %s which
> > > does not enable AER.",
> > > +                                 vdev->vbasedev.name, tmp-
> > > >vbasedev.name);
> > > +                    ret = -1;
> > > +                    goto out;
> > > +                }
> > > +
> > > +                find.pdev = pci;
> > > +                find.found = false;
> > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > +                                    device_find, &find);
> > > +                if (!find.found) {
> > > +                    error_report("vfio: Cannot enable AER for
> > > device %s, "
> > > +                                 "the dependent device %s is not
> > > under the same bus",
> > > +                                 vdev->vbasedev.name, tmp-
> > > >vbasedev.name);
> > > +                    ret = -1;
> > > +                    goto out;
> > > +                }
> > > +                found = true;
> > > +                break;
> > > +            }
> > > +        }
> > > +
> > > +        /* Ensure all affected devices assigned to VM */
> > 
> > I am puzzled.
> > Does not kernel enforce this already?
> > If not it's a security problem.
> > If yes why does userspace need to check this?
> 
> DMA isolation and bus level isolation are separate concepts.  Each
> function of a multi-function device can have DMA isolation, but a user
> needs to own all of the functions affected by a bus reset in order to
> perform one.  An AER configuration can only be created if the user can
> translate a guest bus reset into a host bus reset and therefore needs
> to test whether it has the permissions to do so.  I believe over the
> course of reviews we've also added some simplifying constraints around
> this to reduce the problem set, things like all the groups being
> assigned rather than just owned by the user.  However, I believe the
> kernel is sound in how it provides security for bus resets.  Thanks,
> 
> Alex

Yes, sounds good.

So how about just trying to do bus reset at setup time?
If kernel allows this, we know it is safe ...

-- 
MST

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 08/13] vfio: add check host bus reset is support or not
  2015-12-24 18:06       ` Michael S. Tsirkin
@ 2015-12-24 18:20         ` Alex Williamson
  2015-12-24 18:23           ` Michael S. Tsirkin
  0 siblings, 1 reply; 34+ messages in thread
From: Alex Williamson @ 2015-12-24 18:20 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: chen.fan.fnst, Cao jin, qemu-devel

On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote:
> On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > 
> > > > when init vfio devices done, we should test all the devices
> > > > supported
> > > > aer whether conflict with others. For each one, get the hot
> > > > reset
> > > > info for the affected device list.  For each affected device,
> > > > all
> > > > should attach to the VM and on/below the same bus. also, we
> > > > should
> > > > test
> > > > all of the non-AER supporting vfio-pci devices on or below the
> > > > target
> > > > bus to verify they have a reset mechanism.
> > > > 
> > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > ---
> > > >  hw/vfio/pci.c | 236
> > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > >  hw/vfio/pci.h |   1 +
> > > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > index d00b0e4..6926dcc 100644
> > > > --- a/hw/vfio/pci.c
> > > > +++ b/hw/vfio/pci.c
> > > > @@ -1806,6 +1806,216 @@ static int
> > > > vfio_add_std_cap(VFIOPCIDevice
> > > > *vdev, uint8_t pos)
> > > >      return 0;
> > > >  }
> > > >  
> > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress
> > > > *host1,
> > > > +                                     PCIHostDeviceAddress
> > > > *host2)
> > > > +{
> > > > +    return (host1->domain == host2->domain && host1->bus ==
> > > > host2-
> > > > > bus &&
> > > > +            host1->slot == host2->slot);
> > > > +}
> > > > +
> > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> > > > +                                PCIHostDeviceAddress *host2)
> > > > +{
> > > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > > +            host1->function == host2->function);
> > > > +}
> > > > +
> > > > +struct VFIODeviceFind {
> > > > +    PCIDevice *pdev;
> > > > +    bool found;
> > > > +};
> > > > +
> > > > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice
> > > > *pdev,
> > > > +                                      void *opaque)
> > > > +{
> > > > +    DeviceState *dev = DEVICE(pdev);
> > > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > > +    VFIOPCIDevice *vdev;
> > > > +    struct VFIODeviceFind *find = opaque;
> > > > +
> > > > +    if (find->found) {
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > > +        if (!dc->reset) {
> > > > +            goto found;
> > > > +        }
> > > > +        return;
> > > > +    }
> > > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > > +        !vdev->vbasedev.reset_works) {
> > > > +        goto found;
> > > > +    }
> > > > +
> > > > +    return;
> > > > +found:
> > > > +    find->pdev = pdev;
> > > > +    find->found = true;
> > > > +}
> > > > +
> > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > > *opaque)
> > > > +{
> > > > +    struct VFIODeviceFind *find = opaque;
> > > > +
> > > > +    if (find->found) {
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    if (pdev == find->pdev) {
> > > > +        find->found = true;
> > > > +    }
> > > > +}
> > > > +
> > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > > +{
> > > > +    PCIBus *bus = vdev->pdev.bus;
> > > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > > +    struct vfio_pci_dependent_device *devices;
> > > > +    VFIOGroup *group;
> > > > +    struct VFIODeviceFind find;
> > > > +    int ret, i;
> > > > +
> > > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > > +    if (ret) {
> > > > +        error_report("vfio: Cannot enable AER for device %s,"
> > > > +                     " device does not support hot reset.",
> > > > +                     vdev->vbasedev.name);
> > > > +        goto out;
> > > > +    }
> > > > +
> > > > +    /* List all affected devices by bus reset */
> > > > +    devices = &info->devices[0];
> > > > +
> > > > +    /* Verify that we have all the groups required */
> > > > +    for (i = 0; i < info->count; i++) {
> > > > +        PCIHostDeviceAddress host;
> > > > +        VFIOPCIDevice *tmp;
> > > > +        VFIODevice *vbasedev_iter;
> > > > +        bool found = false;
> > > > +
> > > > +        host.domain = devices[i].segment;
> > > > +        host.bus = devices[i].bus;
> > > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > > +
> > > > +        /* Skip the current device */
> > > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > > +            continue;
> > > > +        }
> > > > +
> > > > +        /* Ensure we own the group of the affected device */
> > > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > > +            if (group->groupid == devices[i].group_id) {
> > > > +                break;
> > > > +            }
> > > > +        }
> > > > +
> > > > +        if (!group) {
> > > > +            error_report("vfio: Cannot enable AER for device
> > > > %s, "
> > > > +                         "depends on group %d which is not
> > > > owned.",
> > > > +                         vdev->vbasedev.name,
> > > > devices[i].group_id);
> > > > +            ret = -1;
> > > > +            goto out;
> > > > +        }
> > > > +
> > > > +        /* Ensure affected devices for reset on/blow the bus
> > > > */
> > > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list,
> > > > next) {
> > > > +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> > > > +                continue;
> > > > +            }
> > > > +            tmp = container_of(vbasedev_iter, VFIOPCIDevice,
> > > > vbasedev);
> > > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > > +
> > > > +                /*
> > > > +                 * For multifunction device, due to vfio
> > > > driver
> > > > signal all
> > > > +                 * functions under the upstream link of the
> > > > end
> > > > point. here
> > > > +                 * we validate all functions whether enable
> > > > AER.
> > > > +                 */
> > > > +                if (vfio_pci_host_slot_match(&vdev->host,
> > > > &tmp-
> > > > > host) &&
> > > > +                    !(tmp->features &
> > > > VFIO_FEATURE_ENABLE_AER)) {
> > > > +                    error_report("vfio: Cannot enable AER for
> > > > device %s, on same slot"
> > > > +                                 " the dependent device %s
> > > > which
> > > > does not enable AER.",
> > > > +                                 vdev->vbasedev.name, tmp-
> > > > > vbasedev.name);
> > > > +                    ret = -1;
> > > > +                    goto out;
> > > > +                }
> > > > +
> > > > +                find.pdev = pci;
> > > > +                find.found = false;
> > > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > > +                                    device_find, &find);
> > > > +                if (!find.found) {
> > > > +                    error_report("vfio: Cannot enable AER for
> > > > device %s, "
> > > > +                                 "the dependent device %s is
> > > > not
> > > > under the same bus",
> > > > +                                 vdev->vbasedev.name, tmp-
> > > > > vbasedev.name);
> > > > +                    ret = -1;
> > > > +                    goto out;
> > > > +                }
> > > > +                found = true;
> > > > +                break;
> > > > +            }
> > > > +        }
> > > > +
> > > > +        /* Ensure all affected devices assigned to VM */
> > > 
> > > I am puzzled.
> > > Does not kernel enforce this already?
> > > If not it's a security problem.
> > > If yes why does userspace need to check this?
> > 
> > DMA isolation and bus level isolation are separate concepts.  Each
> > function of a multi-function device can have DMA isolation, but a
> > user
> > needs to own all of the functions affected by a bus reset in order
> > to
> > perform one.  An AER configuration can only be created if the user
> > can
> > translate a guest bus reset into a host bus reset and therefore
> > needs
> > to test whether it has the permissions to do so.  I believe over
> > the
> > course of reviews we've also added some simplifying constraints
> > around
> > this to reduce the problem set, things like all the groups being
> > assigned rather than just owned by the user.  However, I believe
> > the
> > kernel is sound in how it provides security for bus resets.
> >  Thanks,
> > 
> > Alex
> 
> Yes, sounds good.
> 
> So how about just trying to do bus reset at setup time?
> If kernel allows this, we know it is safe ...

The host may support hotplug, what's possible at setup time may not be
possible when an error occurs.  It's unlikely, but worth considering I
think.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 08/13] vfio: add check host bus reset is support or not
  2015-12-24 18:20         ` Alex Williamson
@ 2015-12-24 18:23           ` Michael S. Tsirkin
  2015-12-24 18:41             ` Alex Williamson
  0 siblings, 1 reply; 34+ messages in thread
From: Michael S. Tsirkin @ 2015-12-24 18:23 UTC (permalink / raw)
  To: Alex Williamson; +Cc: chen.fan.fnst, Cao jin, qemu-devel

On Thu, Dec 24, 2015 at 11:20:26AM -0700, Alex Williamson wrote:
> On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote:
> > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > 
> > > > > when init vfio devices done, we should test all the devices
> > > > > supported
> > > > > aer whether conflict with others. For each one, get the hot
> > > > > reset
> > > > > info for the affected device list.  For each affected device,
> > > > > all
> > > > > should attach to the VM and on/below the same bus. also, we
> > > > > should
> > > > > test
> > > > > all of the non-AER supporting vfio-pci devices on or below the
> > > > > target
> > > > > bus to verify they have a reset mechanism.
> > > > > 
> > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > ---
> > > > >  hw/vfio/pci.c | 236
> > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > >  hw/vfio/pci.h |   1 +
> > > > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > > > 
> > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > > index d00b0e4..6926dcc 100644
> > > > > --- a/hw/vfio/pci.c
> > > > > +++ b/hw/vfio/pci.c
> > > > > @@ -1806,6 +1806,216 @@ static int
> > > > > vfio_add_std_cap(VFIOPCIDevice
> > > > > *vdev, uint8_t pos)
> > > > >      return 0;
> > > > >  }
> > > > >  
> > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress
> > > > > *host1,
> > > > > +                                     PCIHostDeviceAddress
> > > > > *host2)
> > > > > +{
> > > > > +    return (host1->domain == host2->domain && host1->bus ==
> > > > > host2-
> > > > > > bus &&
> > > > > +            host1->slot == host2->slot);
> > > > > +}
> > > > > +
> > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> > > > > +                                PCIHostDeviceAddress *host2)
> > > > > +{
> > > > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > > > +            host1->function == host2->function);
> > > > > +}
> > > > > +
> > > > > +struct VFIODeviceFind {
> > > > > +    PCIDevice *pdev;
> > > > > +    bool found;
> > > > > +};
> > > > > +
> > > > > +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice
> > > > > *pdev,
> > > > > +                                      void *opaque)
> > > > > +{
> > > > > +    DeviceState *dev = DEVICE(pdev);
> > > > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > > > +    VFIOPCIDevice *vdev;
> > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > +
> > > > > +    if (find->found) {
> > > > > +        return;
> > > > > +    }
> > > > > +
> > > > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > > > +        if (!dc->reset) {
> > > > > +            goto found;
> > > > > +        }
> > > > > +        return;
> > > > > +    }
> > > > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > > > +        !vdev->vbasedev.reset_works) {
> > > > > +        goto found;
> > > > > +    }
> > > > > +
> > > > > +    return;
> > > > > +found:
> > > > > +    find->pdev = pdev;
> > > > > +    find->found = true;
> > > > > +}
> > > > > +
> > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > > > *opaque)
> > > > > +{
> > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > +
> > > > > +    if (find->found) {
> > > > > +        return;
> > > > > +    }
> > > > > +
> > > > > +    if (pdev == find->pdev) {
> > > > > +        find->found = true;
> > > > > +    }
> > > > > +}
> > > > > +
> > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > > > +{
> > > > > +    PCIBus *bus = vdev->pdev.bus;
> > > > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > > > +    struct vfio_pci_dependent_device *devices;
> > > > > +    VFIOGroup *group;
> > > > > +    struct VFIODeviceFind find;
> > > > > +    int ret, i;
> > > > > +
> > > > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > > > +    if (ret) {
> > > > > +        error_report("vfio: Cannot enable AER for device %s,"
> > > > > +                     " device does not support hot reset.",
> > > > > +                     vdev->vbasedev.name);
> > > > > +        goto out;
> > > > > +    }
> > > > > +
> > > > > +    /* List all affected devices by bus reset */
> > > > > +    devices = &info->devices[0];
> > > > > +
> > > > > +    /* Verify that we have all the groups required */
> > > > > +    for (i = 0; i < info->count; i++) {
> > > > > +        PCIHostDeviceAddress host;
> > > > > +        VFIOPCIDevice *tmp;
> > > > > +        VFIODevice *vbasedev_iter;
> > > > > +        bool found = false;
> > > > > +
> > > > > +        host.domain = devices[i].segment;
> > > > > +        host.bus = devices[i].bus;
> > > > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > > > +
> > > > > +        /* Skip the current device */
> > > > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > > > +            continue;
> > > > > +        }
> > > > > +
> > > > > +        /* Ensure we own the group of the affected device */
> > > > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > > > +            if (group->groupid == devices[i].group_id) {
> > > > > +                break;
> > > > > +            }
> > > > > +        }
> > > > > +
> > > > > +        if (!group) {
> > > > > +            error_report("vfio: Cannot enable AER for device
> > > > > %s, "
> > > > > +                         "depends on group %d which is not
> > > > > owned.",
> > > > > +                         vdev->vbasedev.name,
> > > > > devices[i].group_id);
> > > > > +            ret = -1;
> > > > > +            goto out;
> > > > > +        }
> > > > > +
> > > > > +        /* Ensure affected devices for reset on/blow the bus
> > > > > */
> > > > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list,
> > > > > next) {
> > > > > +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
> > > > > +                continue;
> > > > > +            }
> > > > > +            tmp = container_of(vbasedev_iter, VFIOPCIDevice,
> > > > > vbasedev);
> > > > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > > > +
> > > > > +                /*
> > > > > +                 * For multifunction device, due to vfio
> > > > > driver
> > > > > signal all
> > > > > +                 * functions under the upstream link of the
> > > > > end
> > > > > point. here
> > > > > +                 * we validate all functions whether enable
> > > > > AER.
> > > > > +                 */
> > > > > +                if (vfio_pci_host_slot_match(&vdev->host,
> > > > > &tmp-
> > > > > > host) &&
> > > > > +                    !(tmp->features &
> > > > > VFIO_FEATURE_ENABLE_AER)) {
> > > > > +                    error_report("vfio: Cannot enable AER for
> > > > > device %s, on same slot"
> > > > > +                                 " the dependent device %s
> > > > > which
> > > > > does not enable AER.",
> > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > vbasedev.name);
> > > > > +                    ret = -1;
> > > > > +                    goto out;
> > > > > +                }
> > > > > +
> > > > > +                find.pdev = pci;
> > > > > +                find.found = false;
> > > > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > > > +                                    device_find, &find);
> > > > > +                if (!find.found) {
> > > > > +                    error_report("vfio: Cannot enable AER for
> > > > > device %s, "
> > > > > +                                 "the dependent device %s is
> > > > > not
> > > > > under the same bus",
> > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > vbasedev.name);
> > > > > +                    ret = -1;
> > > > > +                    goto out;
> > > > > +                }
> > > > > +                found = true;
> > > > > +                break;
> > > > > +            }
> > > > > +        }
> > > > > +
> > > > > +        /* Ensure all affected devices assigned to VM */
> > > > 
> > > > I am puzzled.
> > > > Does not kernel enforce this already?
> > > > If not it's a security problem.
> > > > If yes why does userspace need to check this?
> > > 
> > > DMA isolation and bus level isolation are separate concepts.  Each
> > > function of a multi-function device can have DMA isolation, but a
> > > user
> > > needs to own all of the functions affected by a bus reset in order
> > > to
> > > perform one.  An AER configuration can only be created if the user
> > > can
> > > translate a guest bus reset into a host bus reset and therefore
> > > needs
> > > to test whether it has the permissions to do so.  I believe over
> > > the
> > > course of reviews we've also added some simplifying constraints
> > > around
> > > this to reduce the problem set, things like all the groups being
> > > assigned rather than just owned by the user.  However, I believe
> > > the
> > > kernel is sound in how it provides security for bus resets.
> > >  Thanks,
> > > 
> > > Alex
> > 
> > Yes, sounds good.
> > 
> > So how about just trying to do bus reset at setup time?
> > If kernel allows this, we know it is safe ...
> 
> The host may support hotplug, what's possible at setup time may not be
> possible when an error occurs.

How does this patch help solve this problem?

> It's unlikely, but worth considering I
> think.

I suspect vfio will have to solve this in kernel
(e.g. automatically add all new devices in the same group
wrt reset).

>  Thanks,
> 
> Alex

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 08/13] vfio: add check host bus reset is support or not
  2015-12-24 18:23           ` Michael S. Tsirkin
@ 2015-12-24 18:41             ` Alex Williamson
  2015-12-24 19:42               ` Michael S. Tsirkin
  0 siblings, 1 reply; 34+ messages in thread
From: Alex Williamson @ 2015-12-24 18:41 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: chen.fan.fnst, Cao jin, qemu-devel

On Thu, 2015-12-24 at 20:23 +0200, Michael S. Tsirkin wrote:
> On Thu, Dec 24, 2015 at 11:20:26AM -0700, Alex Williamson wrote:
> > On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote:
> > > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> > > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > 
> > > > > > when init vfio devices done, we should test all the devices
> > > > > > supported
> > > > > > aer whether conflict with others. For each one, get the hot
> > > > > > reset
> > > > > > info for the affected device list.  For each affected
> > > > > > device,
> > > > > > all
> > > > > > should attach to the VM and on/below the same bus. also, we
> > > > > > should
> > > > > > test
> > > > > > all of the non-AER supporting vfio-pci devices on or below
> > > > > > the
> > > > > > target
> > > > > > bus to verify they have a reset mechanism.
> > > > > > 
> > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > ---
> > > > > >  hw/vfio/pci.c | 236
> > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > > >  hw/vfio/pci.h |   1 +
> > > > > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > > > > 
> > > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > > > index d00b0e4..6926dcc 100644
> > > > > > --- a/hw/vfio/pci.c
> > > > > > +++ b/hw/vfio/pci.c
> > > > > > @@ -1806,6 +1806,216 @@ static int
> > > > > > vfio_add_std_cap(VFIOPCIDevice
> > > > > > *vdev, uint8_t pos)
> > > > > >      return 0;
> > > > > >  }
> > > > > >  
> > > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress
> > > > > > *host1,
> > > > > > +                                     PCIHostDeviceAddress
> > > > > > *host2)
> > > > > > +{
> > > > > > +    return (host1->domain == host2->domain && host1->bus
> > > > > > ==
> > > > > > host2-
> > > > > > > bus &&
> > > > > > +            host1->slot == host2->slot);
> > > > > > +}
> > > > > > +
> > > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress
> > > > > > *host1,
> > > > > > +                                PCIHostDeviceAddress
> > > > > > *host2)
> > > > > > +{
> > > > > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > > > > +            host1->function == host2->function);
> > > > > > +}
> > > > > > +
> > > > > > +struct VFIODeviceFind {
> > > > > > +    PCIDevice *pdev;
> > > > > > +    bool found;
> > > > > > +};
> > > > > > +
> > > > > > +static void vfio_check_device_noreset(PCIBus *bus,
> > > > > > PCIDevice
> > > > > > *pdev,
> > > > > > +                                      void *opaque)
> > > > > > +{
> > > > > > +    DeviceState *dev = DEVICE(pdev);
> > > > > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > > > > +    VFIOPCIDevice *vdev;
> > > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > > +
> > > > > > +    if (find->found) {
> > > > > > +        return;
> > > > > > +    }
> > > > > > +
> > > > > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > > > > +        if (!dc->reset) {
> > > > > > +            goto found;
> > > > > > +        }
> > > > > > +        return;
> > > > > > +    }
> > > > > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > > > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > > > > +        !vdev->vbasedev.reset_works) {
> > > > > > +        goto found;
> > > > > > +    }
> > > > > > +
> > > > > > +    return;
> > > > > > +found:
> > > > > > +    find->pdev = pdev;
> > > > > > +    find->found = true;
> > > > > > +}
> > > > > > +
> > > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > > > > *opaque)
> > > > > > +{
> > > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > > +
> > > > > > +    if (find->found) {
> > > > > > +        return;
> > > > > > +    }
> > > > > > +
> > > > > > +    if (pdev == find->pdev) {
> > > > > > +        find->found = true;
> > > > > > +    }
> > > > > > +}
> > > > > > +
> > > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > > > > +{
> > > > > > +    PCIBus *bus = vdev->pdev.bus;
> > > > > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > > > > +    struct vfio_pci_dependent_device *devices;
> > > > > > +    VFIOGroup *group;
> > > > > > +    struct VFIODeviceFind find;
> > > > > > +    int ret, i;
> > > > > > +
> > > > > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > > > > +    if (ret) {
> > > > > > +        error_report("vfio: Cannot enable AER for device
> > > > > > %s,"
> > > > > > +                     " device does not support hot
> > > > > > reset.",
> > > > > > +                     vdev->vbasedev.name);
> > > > > > +        goto out;
> > > > > > +    }
> > > > > > +
> > > > > > +    /* List all affected devices by bus reset */
> > > > > > +    devices = &info->devices[0];
> > > > > > +
> > > > > > +    /* Verify that we have all the groups required */
> > > > > > +    for (i = 0; i < info->count; i++) {
> > > > > > +        PCIHostDeviceAddress host;
> > > > > > +        VFIOPCIDevice *tmp;
> > > > > > +        VFIODevice *vbasedev_iter;
> > > > > > +        bool found = false;
> > > > > > +
> > > > > > +        host.domain = devices[i].segment;
> > > > > > +        host.bus = devices[i].bus;
> > > > > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > > > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > > > > +
> > > > > > +        /* Skip the current device */
> > > > > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > > > > +            continue;
> > > > > > +        }
> > > > > > +
> > > > > > +        /* Ensure we own the group of the affected device
> > > > > > */
> > > > > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > > > > +            if (group->groupid == devices[i].group_id) {
> > > > > > +                break;
> > > > > > +            }
> > > > > > +        }
> > > > > > +
> > > > > > +        if (!group) {
> > > > > > +            error_report("vfio: Cannot enable AER for
> > > > > > device
> > > > > > %s, "
> > > > > > +                         "depends on group %d which is not
> > > > > > owned.",
> > > > > > +                         vdev->vbasedev.name,
> > > > > > devices[i].group_id);
> > > > > > +            ret = -1;
> > > > > > +            goto out;
> > > > > > +        }
> > > > > > +
> > > > > > +        /* Ensure affected devices for reset on/blow the
> > > > > > bus
> > > > > > */
> > > > > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list,
> > > > > > next) {
> > > > > > +            if (vbasedev_iter->type !=
> > > > > > VFIO_DEVICE_TYPE_PCI) {
> > > > > > +                continue;
> > > > > > +            }
> > > > > > +            tmp = container_of(vbasedev_iter,
> > > > > > VFIOPCIDevice,
> > > > > > vbasedev);
> > > > > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > > > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > > > > +
> > > > > > +                /*
> > > > > > +                 * For multifunction device, due to vfio
> > > > > > driver
> > > > > > signal all
> > > > > > +                 * functions under the upstream link of
> > > > > > the
> > > > > > end
> > > > > > point. here
> > > > > > +                 * we validate all functions whether
> > > > > > enable
> > > > > > AER.
> > > > > > +                 */
> > > > > > +                if (vfio_pci_host_slot_match(&vdev->host,
> > > > > > &tmp-
> > > > > > > host) &&
> > > > > > +                    !(tmp->features &
> > > > > > VFIO_FEATURE_ENABLE_AER)) {
> > > > > > +                    error_report("vfio: Cannot enable AER
> > > > > > for
> > > > > > device %s, on same slot"
> > > > > > +                                 " the dependent device %s
> > > > > > which
> > > > > > does not enable AER.",
> > > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > > vbasedev.name);
> > > > > > +                    ret = -1;
> > > > > > +                    goto out;
> > > > > > +                }
> > > > > > +
> > > > > > +                find.pdev = pci;
> > > > > > +                find.found = false;
> > > > > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > > > > +                                    device_find, &find);
> > > > > > +                if (!find.found) {
> > > > > > +                    error_report("vfio: Cannot enable AER
> > > > > > for
> > > > > > device %s, "
> > > > > > +                                 "the dependent device %s
> > > > > > is
> > > > > > not
> > > > > > under the same bus",
> > > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > > vbasedev.name);
> > > > > > +                    ret = -1;
> > > > > > +                    goto out;
> > > > > > +                }
> > > > > > +                found = true;
> > > > > > +                break;
> > > > > > +            }
> > > > > > +        }
> > > > > > +
> > > > > > +        /* Ensure all affected devices assigned to VM */
> > > > > 
> > > > > I am puzzled.
> > > > > Does not kernel enforce this already?
> > > > > If not it's a security problem.
> > > > > If yes why does userspace need to check this?
> > > > 
> > > > DMA isolation and bus level isolation are separate concepts.
> > > >  Each
> > > > function of a multi-function device can have DMA isolation, but
> > > > a
> > > > user
> > > > needs to own all of the functions affected by a bus reset in
> > > > order
> > > > to
> > > > perform one.  An AER configuration can only be created if the
> > > > user
> > > > can
> > > > translate a guest bus reset into a host bus reset and therefore
> > > > needs
> > > > to test whether it has the permissions to do so.  I believe
> > > > over
> > > > the
> > > > course of reviews we've also added some simplifying constraints
> > > > around
> > > > this to reduce the problem set, things like all the groups
> > > > being
> > > > assigned rather than just owned by the user.  However, I
> > > > believe
> > > > the
> > > > kernel is sound in how it provides security for bus resets.
> > > >  Thanks,
> > > > 
> > > > Alex
> > > 
> > > Yes, sounds good.
> > > 
> > > So how about just trying to do bus reset at setup time?
> > > If kernel allows this, we know it is safe ...
> > 
> > The host may support hotplug, what's possible at setup time may not
> > be
> > possible when an error occurs.
> 
> How does this patch help solve this problem?

I believe there's a patch in this series that re-tests on the
occurrence of an error, before injecting the AER into the guest.

> > It's unlikely, but worth considering I
> > think.
> 
> I suspect vfio will have to solve this in kernel
> (e.g. automatically add all new devices in the same group
> wrt reset).

Nope, the user simply loses their ability to reset the bus if they
don't own all the groups at the time they attempt to do a bus reset.
 Mixing bus isolation and DMA isolation would cause a mess of groups.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 08/13] vfio: add check host bus reset is support or not
  2015-12-24 18:41             ` Alex Williamson
@ 2015-12-24 19:42               ` Michael S. Tsirkin
  0 siblings, 0 replies; 34+ messages in thread
From: Michael S. Tsirkin @ 2015-12-24 19:42 UTC (permalink / raw)
  To: Alex Williamson; +Cc: chen.fan.fnst, Cao jin, qemu-devel

On Thu, Dec 24, 2015 at 11:41:15AM -0700, Alex Williamson wrote:
> On Thu, 2015-12-24 at 20:23 +0200, Michael S. Tsirkin wrote:
> > On Thu, Dec 24, 2015 at 11:20:26AM -0700, Alex Williamson wrote:
> > > On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> > > > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > > > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > 
> > > > > > > when init vfio devices done, we should test all the devices
> > > > > > > supported
> > > > > > > aer whether conflict with others. For each one, get the hot
> > > > > > > reset
> > > > > > > info for the affected device list.  For each affected
> > > > > > > device,
> > > > > > > all
> > > > > > > should attach to the VM and on/below the same bus. also, we
> > > > > > > should
> > > > > > > test
> > > > > > > all of the non-AER supporting vfio-pci devices on or below
> > > > > > > the
> > > > > > > target
> > > > > > > bus to verify they have a reset mechanism.
> > > > > > > 
> > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > ---
> > > > > > >  hw/vfio/pci.c | 236
> > > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > > > >  hw/vfio/pci.h |   1 +
> > > > > > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > > > > index d00b0e4..6926dcc 100644
> > > > > > > --- a/hw/vfio/pci.c
> > > > > > > +++ b/hw/vfio/pci.c
> > > > > > > @@ -1806,6 +1806,216 @@ static int
> > > > > > > vfio_add_std_cap(VFIOPCIDevice
> > > > > > > *vdev, uint8_t pos)
> > > > > > >      return 0;
> > > > > > >  }
> > > > > > >  
> > > > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress
> > > > > > > *host1,
> > > > > > > +                                     PCIHostDeviceAddress
> > > > > > > *host2)
> > > > > > > +{
> > > > > > > +    return (host1->domain == host2->domain && host1->bus
> > > > > > > ==
> > > > > > > host2-
> > > > > > > > bus &&
> > > > > > > +            host1->slot == host2->slot);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress
> > > > > > > *host1,
> > > > > > > +                                PCIHostDeviceAddress
> > > > > > > *host2)
> > > > > > > +{
> > > > > > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > > > > > +            host1->function == host2->function);
> > > > > > > +}
> > > > > > > +
> > > > > > > +struct VFIODeviceFind {
> > > > > > > +    PCIDevice *pdev;
> > > > > > > +    bool found;
> > > > > > > +};
> > > > > > > +
> > > > > > > +static void vfio_check_device_noreset(PCIBus *bus,
> > > > > > > PCIDevice
> > > > > > > *pdev,
> > > > > > > +                                      void *opaque)
> > > > > > > +{
> > > > > > > +    DeviceState *dev = DEVICE(pdev);
> > > > > > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > > > > > +    VFIOPCIDevice *vdev;
> > > > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > > > +
> > > > > > > +    if (find->found) {
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > > > > > +        if (!dc->reset) {
> > > > > > > +            goto found;
> > > > > > > +        }
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > > > > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > > > > > +        !vdev->vbasedev.reset_works) {
> > > > > > > +        goto found;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    return;
> > > > > > > +found:
> > > > > > > +    find->pdev = pdev;
> > > > > > > +    find->found = true;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > > > > > *opaque)
> > > > > > > +{
> > > > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > > > +
> > > > > > > +    if (find->found) {
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if (pdev == find->pdev) {
> > > > > > > +        find->found = true;
> > > > > > > +    }
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > > > > > +{
> > > > > > > +    PCIBus *bus = vdev->pdev.bus;
> > > > > > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > > > > > +    struct vfio_pci_dependent_device *devices;
> > > > > > > +    VFIOGroup *group;
> > > > > > > +    struct VFIODeviceFind find;
> > > > > > > +    int ret, i;
> > > > > > > +
> > > > > > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > > > > > +    if (ret) {
> > > > > > > +        error_report("vfio: Cannot enable AER for device
> > > > > > > %s,"
> > > > > > > +                     " device does not support hot
> > > > > > > reset.",
> > > > > > > +                     vdev->vbasedev.name);
> > > > > > > +        goto out;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    /* List all affected devices by bus reset */
> > > > > > > +    devices = &info->devices[0];
> > > > > > > +
> > > > > > > +    /* Verify that we have all the groups required */
> > > > > > > +    for (i = 0; i < info->count; i++) {
> > > > > > > +        PCIHostDeviceAddress host;
> > > > > > > +        VFIOPCIDevice *tmp;
> > > > > > > +        VFIODevice *vbasedev_iter;
> > > > > > > +        bool found = false;
> > > > > > > +
> > > > > > > +        host.domain = devices[i].segment;
> > > > > > > +        host.bus = devices[i].bus;
> > > > > > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > > > > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > > > > > +
> > > > > > > +        /* Skip the current device */
> > > > > > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > > > > > +            continue;
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        /* Ensure we own the group of the affected device
> > > > > > > */
> > > > > > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > > > > > +            if (group->groupid == devices[i].group_id) {
> > > > > > > +                break;
> > > > > > > +            }
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        if (!group) {
> > > > > > > +            error_report("vfio: Cannot enable AER for
> > > > > > > device
> > > > > > > %s, "
> > > > > > > +                         "depends on group %d which is not
> > > > > > > owned.",
> > > > > > > +                         vdev->vbasedev.name,
> > > > > > > devices[i].group_id);
> > > > > > > +            ret = -1;
> > > > > > > +            goto out;
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        /* Ensure affected devices for reset on/blow the
> > > > > > > bus
> > > > > > > */
> > > > > > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list,
> > > > > > > next) {
> > > > > > > +            if (vbasedev_iter->type !=
> > > > > > > VFIO_DEVICE_TYPE_PCI) {
> > > > > > > +                continue;
> > > > > > > +            }
> > > > > > > +            tmp = container_of(vbasedev_iter,
> > > > > > > VFIOPCIDevice,
> > > > > > > vbasedev);
> > > > > > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > > > > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > > > > > +
> > > > > > > +                /*
> > > > > > > +                 * For multifunction device, due to vfio
> > > > > > > driver
> > > > > > > signal all
> > > > > > > +                 * functions under the upstream link of
> > > > > > > the
> > > > > > > end
> > > > > > > point. here
> > > > > > > +                 * we validate all functions whether
> > > > > > > enable
> > > > > > > AER.
> > > > > > > +                 */
> > > > > > > +                if (vfio_pci_host_slot_match(&vdev->host,
> > > > > > > &tmp-
> > > > > > > > host) &&
> > > > > > > +                    !(tmp->features &
> > > > > > > VFIO_FEATURE_ENABLE_AER)) {
> > > > > > > +                    error_report("vfio: Cannot enable AER
> > > > > > > for
> > > > > > > device %s, on same slot"
> > > > > > > +                                 " the dependent device %s
> > > > > > > which
> > > > > > > does not enable AER.",
> > > > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > > > vbasedev.name);
> > > > > > > +                    ret = -1;
> > > > > > > +                    goto out;
> > > > > > > +                }
> > > > > > > +
> > > > > > > +                find.pdev = pci;
> > > > > > > +                find.found = false;
> > > > > > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > > > > > +                                    device_find, &find);
> > > > > > > +                if (!find.found) {
> > > > > > > +                    error_report("vfio: Cannot enable AER
> > > > > > > for
> > > > > > > device %s, "
> > > > > > > +                                 "the dependent device %s
> > > > > > > is
> > > > > > > not
> > > > > > > under the same bus",
> > > > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > > > vbasedev.name);
> > > > > > > +                    ret = -1;
> > > > > > > +                    goto out;
> > > > > > > +                }
> > > > > > > +                found = true;
> > > > > > > +                break;
> > > > > > > +            }
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        /* Ensure all affected devices assigned to VM */
> > > > > > 
> > > > > > I am puzzled.
> > > > > > Does not kernel enforce this already?
> > > > > > If not it's a security problem.
> > > > > > If yes why does userspace need to check this?
> > > > > 
> > > > > DMA isolation and bus level isolation are separate concepts.
> > > > >  Each
> > > > > function of a multi-function device can have DMA isolation, but
> > > > > a
> > > > > user
> > > > > needs to own all of the functions affected by a bus reset in
> > > > > order
> > > > > to
> > > > > perform one.  An AER configuration can only be created if the
> > > > > user
> > > > > can
> > > > > translate a guest bus reset into a host bus reset and therefore
> > > > > needs
> > > > > to test whether it has the permissions to do so.  I believe
> > > > > over
> > > > > the
> > > > > course of reviews we've also added some simplifying constraints
> > > > > around
> > > > > this to reduce the problem set, things like all the groups
> > > > > being
> > > > > assigned rather than just owned by the user.  However, I
> > > > > believe
> > > > > the
> > > > > kernel is sound in how it provides security for bus resets.
> > > > >  Thanks,
> > > > > 
> > > > > Alex
> > > > 
> > > > Yes, sounds good.
> > > > 
> > > > So how about just trying to do bus reset at setup time?
> > > > If kernel allows this, we know it is safe ...
> > > 
> > > The host may support hotplug, what's possible at setup time may not
> > > be
> > > possible when an error occurs.
> > 
> > How does this patch help solve this problem?
> 
> I believe there's a patch in this series that re-tests on the
> occurrence of an error, before injecting the AER into the guest.

Doesn't seem robust.  What if hotplug happens right after error is
injected?

> > > It's unlikely, but worth considering I
> > > think.
> > 
> > I suspect vfio will have to solve this in kernel
> > (e.g. automatically add all new devices in the same group
> > wrt reset).
> 
> Nope, the user simply loses their ability to reset the bus if they
> don't own all the groups at the time they attempt to do a bus reset.

Hmm, this is sub-optimal.
Assume I hot-plug a device behind a bus.
I fully intend to pass it through to a VM
where all other devices are but before I
manage to do this, an error triggers.

>  Mixing bus isolation and DMA isolation would cause a mess of groups.

Not sure how what I said implies this.

I merely suggested that if vfio takes over bus reset
it should take over handling hotplug as well,
so devices added on this bus are automatically
pevented from being used by anyone except
the same VM, making it safe to reset them.

-- 
MST

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [Qemu-devel] [PATCH v14 Resend 10/13] pci: add pci device pre-post reset callbacks for host bus reset
  2015-12-24 14:34           ` Michael S. Tsirkin
@ 2015-12-25  1:18             ` Chen Fan
  0 siblings, 0 replies; 34+ messages in thread
From: Chen Fan @ 2015-12-25  1:18 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Alex Williamson, Cao jin, qemu-devel


On 12/24/2015 10:34 PM, Michael S. Tsirkin wrote:
> On Thu, Dec 24, 2015 at 01:10:25PM +0800, Chen Fan wrote:
>> On 12/22/2015 05:07 AM, Alex Williamson wrote:
>>> On Fri, 2015-12-18 at 11:29 +0800, Chen Fan wrote:
>>>> On 12/18/2015 04:31 AM, Alex Williamson wrote:
>>>>> On Thu, 2015-12-17 at 09:41 +0800, Cao jin wrote:
>>>>>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>>>>
>>>>>> Particularly, For vfio devices, Once need to recovery devices
>>>>>> by bus reset such as AER, we always need to reset the host bus
>>>>>> to recovery the devices under the bus, so we need to add pci
>>>>>> device
>>>>>> callbacks to specify to do host bus reset.
>>>>>>
>>>>>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>>>> Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
>>>>>> ---
>>>>>>    hw/pci/pci.c         | 18 ++++++++++++++++++
>>>>>>    hw/pci/pci_bridge.c  |  9 +++++++++
>>>>>>    hw/vfio/pci.c        | 26 ++++++++++++++++++++++++++
>>>>>>    hw/vfio/pci.h        |  2 ++
>>>>>>    include/hw/pci/pci.h |  7 +++++++
>>>>>>    5 files changed, 62 insertions(+)
>>>>>>
>>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>>> index f6ca6ef..64fa2cc 100644
>>>>>> --- a/hw/pci/pci.c
>>>>>> +++ b/hw/pci/pci.c
>>>>>> @@ -247,6 +247,24 @@ static void pci_do_device_reset(PCIDevice
>>>>>> *dev)
>>>>>>        msix_reset(dev);
>>>>>>    }
>>>>>> +void pci_device_pre_reset(PCIBus *bus, PCIDevice *dev, void
>>>>>> *unused)
>>>>>> +{
>>>>>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
>>>>>> +
>>>>>> +    if (dc->pre_reset) {
>>>>>> +        dc->pre_reset(dev);
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +void pci_device_post_reset(PCIBus *bus, PCIDevice *dev, void
>>>>>> *unused)
>>>>>> +{
>>>>>> +    PCIDeviceClass *dc = PCI_DEVICE_GET_CLASS(dev);
>>>>>> +
>>>>>> +    if (dc->post_reset) {
>>>>>> +        dc->post_reset(dev);
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>>    /*
>>>>>>     * This function is called on #RST and FLR.
>>>>>>     * FLR if PCI_EXP_DEVCTL_BCR_FLR is set
>>>>>> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
>>>>>> index 40c97b1..ddb76ab 100644
>>>>>> --- a/hw/pci/pci_bridge.c
>>>>>> +++ b/hw/pci/pci_bridge.c
>>>>>> @@ -267,8 +267,17 @@ void pci_bridge_write_config(PCIDevice *d,
>>>>>>        newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
>>>>>>        if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
>>>>>> +        /*
>>>>>> +         * Notify all vfio-pci devices under the bus
>>>>>> +         * should do physical bus reset.
>>>>>> +         */
>>>>>> +        PCIBus *sec_bus = pci_bridge_get_sec_bus(s);
>>>>>> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>>>>>> +                            pci_device_pre_reset, NULL);
>>>>>>            /* Trigger hot reset on 0->1 transition. */
>>>>>>            qbus_reset_all(&s->sec_bus.qbus);
>>>>>> +        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
>>>>>> +                            pci_device_post_reset, NULL);
>>>>>>        }
>>>>>>    }
>>>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>>>> index e17dc89..df32618 100644
>>>>>> --- a/hw/vfio/pci.c
>>>>>> +++ b/hw/vfio/pci.c
>>>>>> @@ -39,6 +39,7 @@
>>>>>>    static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
>>>>>>    static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool
>>>>>> enabled);
>>>>>> +static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single);
>>>>>>    /*
>>>>>>     * Disabling BAR mmaping can be slow, but toggling it around
>>>>>> INTx
>>>>>> can
>>>>>> @@ -1888,6 +1889,8 @@ static int
>>>>>> vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
>>>>>>        /* List all affected devices by bus reset */
>>>>>>        devices = &info->devices[0];
>>>>>> +    vdev->single_depend_dev = (info->count == 1);
>>>>>> +
>>>>>>        /* Verify that we have all the groups required */
>>>>>>        for (i = 0; i < info->count; i++) {
>>>>>>            PCIHostDeviceAddress host;
>>>>>> @@ -2029,10 +2032,26 @@ static int
>>>>>> vfio_check_bus_reset(NotifierWithReturn *n, void *opaque)
>>>>>>        return vfio_check_host_bus_reset(vdev);
>>>>>>    }
>>>>>> +static void vfio_aer_pre_reset(PCIDevice *pdev)
>>>>>> +{
>>>>>> +    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>>>>>> +
>>>>>> +    vdev->aer_reset = true;
>>>>>> +    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>>>>>> +}
>>>>> Doesn't this lead to multiple host bus resets per guest bus reset
>>>>> in
>>>>> many cases?  It looks like we'll do it once per vfio-pci device,
>>>>> even
>>>>> if those devices are on the same host bus.  That's a 1 second
>>>>> operation
>>>>> per device.  Can we avoid that?  Maybe some sort of sequence ID
>>>>> could
>>>>> help a device figure out whether it's already been reset as part of
>>>>> a
>>>>> dependent device for this particular guest bus reset.  Thanks,
>>>> That's right, I missed this case, but I don't understand the scenario
>>>> how to
>>>> use a sequence ID to mark the device if been reset. can you detail it
>>>> ?
>>> I don't really have a concrete idea for a sequence ID, it was just a
>>> thought that maybe if each bus reset had a sequence ID then devices
>>> could know whether they've already been reset for that sequence ID.
>>>   The basic problem we have is that reset callbacks are per device and
>>> it's difficult to infer which individual resets are part of that bus
>>> reset.  In fact, do we propagate resets correctly down secondary
>>> bridges?  We're triggering off a VM write of the bridge control bus
>>> reset bit triggering from 0->1 and we then call qbus_reset_all() on
>>> that qbus, which I think is just going to call pci_bridge_reset() for
>>> any other bridges, which doesn't do anything about resetting deeper
>>> subordinate buses.  I think that means that if we had a root port with
>>> a switch below it and endpoints below that, if the VM triggered a
>>> secondary bus reset at the root port, the endpoints would never see it,
>>> which is not how real hardware works.
>> Hi Alex,
>>
>>      After think over the subordinate case, I think the qbus_reset_all()
>> should also reset the deeper endpoints. which recursive the qbus and qdev
>> to reset all devices blow sec bus and bridge device independently.
>>
>> MST's suggestion in another mail to add a hot reset callback for sec bus
>> reset
>> should be good to avoid the couple of pre_reset/post_reset callbacks.
>> thanks:)
>>
>> but for the multiple resets triggering on the same bus, I think the virtual
>> bus-level
>> callback do not guarantee the host bus reset only once for all dependent
>> devices.
>> because the host reset dependent devices may not on the same virtual bus.
>> we allowed that the dependent devices address below the bus. so the devices
>> still don't know the host bus is in reset. here I think we should have a
>> host bus
>> model bind to all the dependent devices. when devices do hot reset, we mark
>> the bus in reset. then other dependent devices know the bus is reset. how
>> about
>> this?
>>
>> Thanks,
>> Chen
>
> What does "below bus" mean here?
> Below what?
I just said that assume the interacted devices on the same host bus. we 
allowed
one device with aer was assigned on a virtual bus and the others were 
assigned
on the secondary bus.

Thanks,
Chen

>
>
>>>> additional, there was a mechanism to compute device whether need to
>>>> be reset
>>>> by hot reset. so I simply modify the code as the following:
>>>>
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index a9bc67e..42774ca 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -2063,13 +2063,19 @@ static void vfio_aer_pre_reset(PCIDevice
>>>> *pdev)
>>>>        VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>>>>
>>>>        vdev->aer_reset = true;
>>>> -    vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>>>> +    vdev->vbasedev.needs_reset = true;
>>>>    }
>>>>
>>>>    static void vfio_aer_post_reset(PCIDevice *pdev)
>>>>    {
>>>>        VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>>>>
>>>> +    if (!vdev->single_depend_dev && vdev->vbasedev.needs_reset) {
>>>> +        vfio_pci_hot_reset(vdev, false);
>>>> +    } else {
>>>> +        vfio_pci_hot_reset(vdev, true);
>>>> +    }
>>>> +
>>>>        vdev->aer_reset = false;
>>>>    }
>>>>
>>>> what do you think of this ?
>>> I think it might be a bigger problem than that subtle change.  I wonder
>>> if we really need a better model of the reset line through the
>>> subordinate buses.  When reset is asserted, we'd set a bus_in_reset
>>> flag on the bus and trigger downstream bridges to do the same.  Then
>>> when the user de-asserts reset, we'd call qbus_reset_all() and
>>> propagate it through to downstream buses.  That way the per device
>>> reset callback could check to see if the bus is in reset and aer
>>> devices can then know to do a bus reset.  Finally, the bus_in_reset
>>> flag would be cleared on all the affected buses.  I'm sure there are
>>> numerous details missing there, but it seems like it might be a
>>> reasonable approach.  Thanks,
>>>
>>> Alex
>>>
>>>
>>> .
>>>
>>
>
> .
>

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2015-12-25  1:24 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 01/13] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 02/13] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 03/13] pcie: modify the capability size assert Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 04/13] vfio: make the 4 bytes aligned for capability size Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 05/13] vfio: add pcie extanded capability support Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 06/13] aer: impove pcie_aer_init to support vfio device Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 07/13] vfio: add aer support for " Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 08/13] vfio: add check host bus reset is support or not Cao jin
2015-12-17 20:32   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
2015-12-18  1:14     ` Chen Fan
2015-12-24 14:32   ` Michael S. Tsirkin
2015-12-24 17:47     ` Alex Williamson
2015-12-24 18:06       ` Michael S. Tsirkin
2015-12-24 18:20         ` Alex Williamson
2015-12-24 18:23           ` Michael S. Tsirkin
2015-12-24 18:41             ` Alex Williamson
2015-12-24 19:42               ` Michael S. Tsirkin
2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 09/13] add check reset mechanism when hotplug vfio device Cao jin
2015-12-17 20:32   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
2015-12-17 20:31   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
2015-12-18  3:29     ` Chen Fan
2015-12-21 21:07       ` Alex Williamson
2015-12-22  7:18         ` Chen Fan
2015-12-24  5:10         ` Chen Fan
2015-12-24 14:34           ` Michael S. Tsirkin
2015-12-25  1:18             ` Chen Fan
2015-12-23 12:00   ` Michael S. Tsirkin
2015-12-24  5:14     ` Chen Fan
2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 11/13] pcie_aer: expose pcie_aer_msg() interface Cao jin
2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 12/13] vfio-pci: pass the aer error to guest Cao jin
2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 13/13] vfio: add 'aer' property to expose aercap Cao jin
2015-11-18 17:06 ` [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Michael S. Tsirkin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.