All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device
@ 2015-03-12 10:23 Chen Fan
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 1/7] vfio: add pcie extanded capability support Chen Fan
                   ` (7 more replies)
  0 siblings, 8 replies; 53+ messages in thread
From: Chen Fan @ 2015-03-12 10:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

For now, for vfio pci passthough devices when qemu receives
an error from host aer report, there just terminate the guest,
but usually user want to know what error occurred but stop the
guest, so this patches add aer capability support for vfio device,
and pass the error to guest, and have guest driver to recover
from the error.
and turning on SERR# for error forwording in bridge control register
patch in seabios has been merged.

v3-v4:
   1. add 'x-aer' for user to off aer capability.
   2. refactor vfio device to parse extended capabilities.

v2-v3:
   1. refactor vfio device to parse extended capability.
   2. add global property for piix4 to disable vfio aer cap.

v1-v2:
   1. turn on SERR# for bridge control register in firmware.
   2. initilize aer capability for vfio device.
   3. fix some trivial bug.

Chen Fan (7):
  vfio: add pcie extanded capability support
  aer: impove pcie_aer_init to support vfio device
  vfio: add aer support for vfio device
  pcie_aer: expose pcie_aer_msg() interface
  vfio-pci: pass the aer error to guest
  vfio: add 'x-aer' property to expose aercap
  pc: add PC_I440FX_COMPAT to disable aercap for vifo device

 hw/i386/pc_piix.c                  |   9 +++
 hw/i386/pc_q35.c                   |   4 +
 hw/pci-bridge/ioh3420.c            |   2 +-
 hw/pci-bridge/xio3130_downstream.c |   2 +-
 hw/pci-bridge/xio3130_upstream.c   |   2 +-
 hw/pci/pcie_aer.c                  |   6 +-
 hw/vfio/pci.c                      | 158 +++++++++++++++++++++++++++++++++++--
 include/hw/compat.h                |  10 +++
 include/hw/pci/pcie_aer.h          |   3 +-
 9 files changed, 182 insertions(+), 14 deletions(-)

-- 
1.9.3

^ permalink raw reply	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v5 1/7] vfio: add pcie extanded capability support
  2015-03-12 10:23 [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
@ 2015-03-12 10:23 ` Chen Fan
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 2/7] aer: impove pcie_aer_init to support vfio device Chen Fan
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 53+ messages in thread
From: Chen Fan @ 2015-03-12 10:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

For vfio pcie device, we could expose the extended capability on
PCIE bus. in order to avoid config space broken, we introduce
a copy config for parsing extended caps. and rebuild the pcie
extended config space.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6b80539..2190102 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2486,6 +2486,21 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
     return next - pos;
 }
 
+
+static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos)
+{
+    uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE - 1;
+
+    for (tmp = PCI_CONFIG_SPACE_SIZE; tmp;
+        tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) {
+        if (tmp > pos && tmp < next) {
+            next = tmp;
+        }
+    }
+
+    return next - pos;
+}
+
 static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
 {
     pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
@@ -2709,16 +2724,72 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_add_ext_cap(VFIOPCIDevice *vdev, const uint8_t *config)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint32_t header;
+    uint16_t cap_id, next, size;
+    uint8_t cap_ver;
+
+    for (next = PCI_CONFIG_SPACE_SIZE; next;
+         next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
+        header = pci_get_long(config + next);
+        cap_id = PCI_EXT_CAP_ID(header);
+        cap_ver = PCI_EXT_CAP_VER(header);
+
+        /*
+         * If it becomes important to configure extended capabilities to their
+         * actual size, use this as the default when it's something we don't
+         * recognize. Since QEMU doesn't actually handle many of the config
+         * accesses, exact size doesn't seem worthwhile.
+         */
+        size = vfio_ext_cap_max_size(config, next);
+
+        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+        if (next == PCI_CONFIG_SPACE_SIZE) {
+            /* Begin the rebuild, we should set the next offset zero. */
+            pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+        }
+
+        /* Use emulated header pointer to allow dropping extended caps */
+        pci_set_long(vdev->emulated_config_bits + next, 0xffffffff);
+    }
+
+    return 0;
+}
+
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
+    int ret;
+    uint8_t *config;
 
     if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
         !pdev->config[PCI_CAPABILITY_LIST]) {
         return 0; /* Nothing to add */
     }
 
-    return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    if (ret) {
+        return ret;
+    }
+
+    /* on PCI bus, it doesn't make sense to expose extended capabilities. */
+    if (!pci_is_express(pdev) ||
+        !pci_bus_is_express(pdev->bus) ||
+        !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
+        return 0;
+    }
+
+    /*
+     * In order to avoid config space broken, here using a copy config to
+     * parse extended capabilities.
+     */
+    config = g_memdup(pdev->config, vdev->config_size);
+    ret = vfio_add_ext_cap(vdev, config);
+
+    g_free(config);
+    return ret;
 }
 
 static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v5 2/7] aer: impove pcie_aer_init to support vfio device
  2015-03-12 10:23 [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 1/7] vfio: add pcie extanded capability support Chen Fan
@ 2015-03-12 10:23 ` Chen Fan
  2015-03-13 22:25   ` Alex Williamson
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 3/7] vfio: add aer support for " Chen Fan
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-03-12 10:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

pcie_aer_init was used to emulate an aer capability for pcie device,
but for vfio device, the aer config space size is mutable and is not
always equal to PCI_ERR_SIZEOF(0x48). it depends on where the TLP Prefix
register required, so here we add a size argument.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci-bridge/ioh3420.c            | 2 +-
 hw/pci-bridge/xio3130_downstream.c | 2 +-
 hw/pci-bridge/xio3130_upstream.c   | 2 +-
 hw/pci/pcie_aer.c                  | 4 ++--
 include/hw/pci/pcie_aer.h          | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index cce2fdd..4d9cd3f 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -129,7 +129,7 @@ static int ioh3420_initfn(PCIDevice *d)
         goto err_pcie_cap;
     }
     pcie_cap_root_init(d);
-    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET);
+    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
index b3a6479..9737041 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -92,7 +92,7 @@ static int xio3130_downstream_initfn(PCIDevice *d)
         goto err_pcie_cap;
     }
     pcie_cap_arifwd_init(d);
-    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
+    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
index eada582..4d7f894 100644
--- a/hw/pci-bridge/xio3130_upstream.c
+++ b/hw/pci-bridge/xio3130_upstream.c
@@ -81,7 +81,7 @@ static int xio3130_upstream_initfn(PCIDevice *d)
     }
     pcie_cap_flr_init(d);
     pcie_cap_deverr_init(d);
-    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
+    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
     if (rc < 0) {
         goto err;
     }
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 5a25c32..71045eb 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -94,12 +94,12 @@ static void aer_log_clear_all_err(PCIEAERLog *aer_log)
     aer_log->log_num = 0;
 }
 
-int pcie_aer_init(PCIDevice *dev, uint16_t offset)
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
 {
     PCIExpressDevice *exp;
 
     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
-                        offset, PCI_ERR_SIZEOF);
+                        offset, size);
     exp = &dev->exp;
     exp->aer_cap = offset;
 
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index bcac80a..a4cc6f3 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -87,7 +87,7 @@ struct PCIEAERErr {
 
 extern const VMStateDescription vmstate_pcie_aer_log;
 
-int pcie_aer_init(PCIDevice *dev, uint16_t offset);
+int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size);
 void pcie_aer_exit(PCIDevice *dev);
 void pcie_aer_write_config(PCIDevice *dev,
                            uint32_t addr, uint32_t val, int len);
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v5 3/7] vfio: add aer support for vfio device
  2015-03-12 10:23 [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 1/7] vfio: add pcie extanded capability support Chen Fan
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 2/7] aer: impove pcie_aer_init to support vfio device Chen Fan
@ 2015-03-12 10:23 ` Chen Fan
  2015-03-13 22:28   ` Alex Williamson
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 4/7] pcie_aer: expose pcie_aer_msg() interface Chen Fan
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-03-12 10:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

Calling pcie_aer_init to initilize aer related registers for
vfio device, then reload physical related registers to expose
device capability.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 2190102..0a515b6 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2724,12 +2724,43 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
     return 0;
 }
 
+static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint8_t *exp_cap = pdev->config + pdev->exp.exp_cap;
+    uint32_t severity, errcap;
+    int ret;
+
+    errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
+    /* The ability to record multiple headers is depending on
+       the state of the Multiple Header Recording Capable bit and
+       enabled by the Multiple Header Recording Enable bit */
+    if ((errcap & PCI_ERR_CAP_MHRC) &&
+        (errcap & PCI_ERR_CAP_MHRE)) {
+        pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+    } else {
+        pdev->exp.aer_log.log_max = 0;
+    }
+    ret = pcie_aer_init(pdev, pos, size);
+    if (ret) {
+        return ret;
+    }
+
+    /* load physical registers */
+    severity = vfio_pci_read_config(pdev,
+                   pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
+    pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
+
+    return 0;
+}
+
 static int vfio_add_ext_cap(VFIOPCIDevice *vdev, const uint8_t *config)
 {
     PCIDevice *pdev = &vdev->pdev;
     uint32_t header;
     uint16_t cap_id, next, size;
     uint8_t cap_ver;
+    int ret = 0;
 
     for (next = PCI_CONFIG_SPACE_SIZE; next;
          next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
@@ -2745,7 +2776,19 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev, const uint8_t *config)
          */
         size = vfio_ext_cap_max_size(config, next);
 
-        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+        switch (cap_id) {
+        case PCI_EXT_CAP_ID_ERR:
+            ret = vfio_setup_aer(vdev, next, size);
+            break;
+        default:
+            pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+            break;
+        }
+
+        if (ret) {
+            return ret;
+        }
+
         if (next == PCI_CONFIG_SPACE_SIZE) {
             /* Begin the rebuild, we should set the next offset zero. */
             pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v5 4/7] pcie_aer: expose pcie_aer_msg() interface
  2015-03-12 10:23 [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
                   ` (2 preceding siblings ...)
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 3/7] vfio: add aer support for " Chen Fan
@ 2015-03-12 10:23 ` Chen Fan
  2015-03-13 22:30   ` Alex Williamson
  2015-03-18 13:29   ` Michael S. Tsirkin
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest Chen Fan
                   ` (3 subsequent siblings)
  7 siblings, 2 replies; 53+ messages in thread
From: Chen Fan @ 2015-03-12 10:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

For vfio device, we need to propagate the aer error to
Guest OS. we use the pcie_aer_msg() to send aer error
to guest.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/pci/pcie_aer.c         | 2 +-
 include/hw/pci/pcie_aer.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 71045eb..f2d5d13 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -370,7 +370,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
  *
  * Walk up the bus tree from the device, propagate the error message.
  */
-static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
 {
     uint8_t type;
 
diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
index a4cc6f3..80cba7b 100644
--- a/include/hw/pci/pcie_aer.h
+++ b/include/hw/pci/pcie_aer.h
@@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
 
 /* error injection */
 int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
+void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
 
 #endif /* QEMU_PCIE_AER_H */
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-12 10:23 [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
                   ` (3 preceding siblings ...)
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 4/7] pcie_aer: expose pcie_aer_msg() interface Chen Fan
@ 2015-03-12 10:23 ` Chen Fan
  2015-03-13 22:34   ` Alex Williamson
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 6/7] vfio: add 'x-aer' property to expose aercap Chen Fan
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-03-12 10:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

when the vfio device encounters an uncorrectable error in host,
the vfio_pci driver will signal the eventfd registered by this
vfio device, the results in the qemu eventfd handler getting
invoked.

this patch is to pass the error to guest and have the guest driver
recover from the error.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0a515b6..8966c49 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3240,18 +3240,40 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
 static void vfio_err_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
+    PCIDevice *dev = &vdev->pdev;
+    PCIEAERMsg msg = {
+        .severity = 0,
+        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
+    };
 
     if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
         return;
     }
 
+    /* we should read the error details from the real hardware
+     * configuration spaces, here we only need to do is signaling
+     * to guest an uncorrectable error has occurred.
+     */
+     if(dev->exp.aer_cap) {
+        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+        uint32_t uncor_status;
+        bool isfatal;
+
+        uncor_status = vfio_pci_read_config(dev,
+                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
+
+        pcie_aer_msg(dev, &msg);
+        return;
+    }
+
     /*
-     * TBD. Retrieve the error details and decide what action
-     * needs to be taken. One of the actions could be to pass
-     * the error to the guest and have the guest driver recover
-     * from the error. This requires that PCIe capabilities be
-     * exposed to the guest. For now, we just terminate the
-     * guest to contain the error.
+     * If the aer capability is not exposed to the guest. we just
+     * terminate the guest to contain the error.
      */
 
     error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v5 6/7] vfio: add 'x-aer' property to expose aercap
  2015-03-12 10:23 [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
                   ` (4 preceding siblings ...)
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest Chen Fan
@ 2015-03-12 10:23 ` Chen Fan
  2015-03-18 13:23   ` Michael S. Tsirkin
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device Chen Fan
  2015-03-16  2:52 ` [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
  7 siblings, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-03-12 10:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

add 'x-aer' property to let user able to decide whether expose
the aer capability.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8966c49..0517091 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -159,6 +159,8 @@ typedef struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
 #define VFIO_FEATURE_ENABLE_REQ_BIT 1
 #define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
+#define VFIO_FEATURE_ENABLE_AER_BIT 2
+#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
     int32_t bootindex;
     uint8_t pm_cap;
     bool has_vga;
@@ -2731,6 +2733,10 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
     uint32_t severity, errcap;
     int ret;
 
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+        return 0;
+    }
+
     errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
     /* The ability to record multiple headers is depending on
        the state of the Multiple Header Recording Capable bit and
@@ -3691,6 +3697,8 @@ static Property vfio_pci_dev_properties[] = {
                     VFIO_FEATURE_ENABLE_VGA_BIT, false),
     DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features,
                     VFIO_FEATURE_ENABLE_REQ_BIT, true),
+    DEFINE_PROP_BIT("x-aer", VFIOPCIDevice, features,
+                    VFIO_FEATURE_ENABLE_AER_BIT, true),
     DEFINE_PROP_INT32("bootindex", VFIOPCIDevice, bootindex, -1),
     DEFINE_PROP_BOOL("x-mmap", VFIOPCIDevice, vbasedev.allow_mmap, true),
     /*
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-12 10:23 [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
                   ` (5 preceding siblings ...)
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 6/7] vfio: add 'x-aer' property to expose aercap Chen Fan
@ 2015-03-12 10:23 ` Chen Fan
  2015-03-13 22:38   ` Alex Williamson
  2015-03-18 13:23   ` Michael S. Tsirkin
  2015-03-16  2:52 ` [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
  7 siblings, 2 replies; 53+ messages in thread
From: Chen Fan @ 2015-03-12 10:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson

for piix4 chipset, we don't need to expose aer, so introduce
PC_I440FX_COMPAT for all piix4 machines to disable aercap,
and add HW_COMPAT_2_2 to disable aercap for all lower
than 2.3.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/i386/pc_piix.c   |  9 +++++++++
 hw/i386/pc_q35.c    |  4 ++++
 include/hw/compat.h | 10 ++++++++++
 3 files changed, 23 insertions(+)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 8eab4ba..ff9d312 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -307,6 +307,11 @@ static void pc_init1(MachineState *machine,
 
 static void pc_init_pci(MachineState *machine)
 {
+    static GlobalProperty pc_compat_props[] = {
+        PC_I440FX_COMPAT,
+        { /* end of list */ }
+    };
+    qdev_prop_register_global_list(pc_compat_props);
     pc_init1(machine, 1, 1);
 }
 
@@ -534,6 +539,10 @@ static QEMUMachine pc_i440fx_machine_v2_2 = {
     PC_I440FX_2_2_MACHINE_OPTIONS,
     .name = "pc-i440fx-2.2",
     .init = pc_init_pci_2_2,
+    .compat_props = (GlobalProperty[]) {
+        HW_COMPAT_2_2,
+        { /* end of list */ }
+    },
 };
 
 #define PC_I440FX_2_1_MACHINE_OPTIONS                           \
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index c0f21fe..97afb7d 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -431,6 +431,10 @@ static QEMUMachine pc_q35_machine_v2_2 = {
     PC_Q35_2_2_MACHINE_OPTIONS,
     .name = "pc-q35-2.2",
     .init = pc_q35_init_2_2,
+    .compat_props = (GlobalProperty[]) {
+        HW_COMPAT_2_2,
+        { /* end of list */ }
+    },
 };
 
 #define PC_Q35_2_1_MACHINE_OPTIONS                      \
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 313682a..40c974a 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -1,7 +1,17 @@
 #ifndef HW_COMPAT_H
 #define HW_COMPAT_H
 
+#define HW_COMPAT_2_2 PC_I440FX_COMPAT
+
+#define PC_I440FX_COMPAT \
+        {\
+            .driver   = "vfio-pci",\
+            .property = "x-aer",\
+            .value    = "off",\
+        }
+
 #define HW_COMPAT_2_1 \
+        HW_COMPAT_2_2, \
         {\
             .driver   = "intel-hda",\
             .property = "old_msi_addr",\
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 2/7] aer: impove pcie_aer_init to support vfio device
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 2/7] aer: impove pcie_aer_init to support vfio device Chen Fan
@ 2015-03-13 22:25   ` Alex Williamson
  2015-03-16  2:30     ` Chen Fan
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-03-13 22:25 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> pcie_aer_init was used to emulate an aer capability for pcie device,
> but for vfio device, the aer config space size is mutable and is not
> always equal to PCI_ERR_SIZEOF(0x48). it depends on where the TLP Prefix
> register required, so here we add a size argument.

This would need to go through the QEMU PCI tree.  Question, do all of
the fields of the AER capability directly translate to the guest view of
the device?  For instance, are there source and destination values that
need to be emulated for the guest?  I'm not really sure what we're
exposing via the TLP Prefix area.  Thanks,

Alex

> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/pci-bridge/ioh3420.c            | 2 +-
>  hw/pci-bridge/xio3130_downstream.c | 2 +-
>  hw/pci-bridge/xio3130_upstream.c   | 2 +-
>  hw/pci/pcie_aer.c                  | 4 ++--
>  include/hw/pci/pcie_aer.h          | 2 +-
>  5 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
> index cce2fdd..4d9cd3f 100644
> --- a/hw/pci-bridge/ioh3420.c
> +++ b/hw/pci-bridge/ioh3420.c
> @@ -129,7 +129,7 @@ static int ioh3420_initfn(PCIDevice *d)
>          goto err_pcie_cap;
>      }
>      pcie_cap_root_init(d);
> -    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET);
> +    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
>      if (rc < 0) {
>          goto err;
>      }
> diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
> index b3a6479..9737041 100644
> --- a/hw/pci-bridge/xio3130_downstream.c
> +++ b/hw/pci-bridge/xio3130_downstream.c
> @@ -92,7 +92,7 @@ static int xio3130_downstream_initfn(PCIDevice *d)
>          goto err_pcie_cap;
>      }
>      pcie_cap_arifwd_init(d);
> -    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
> +    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
>      if (rc < 0) {
>          goto err;
>      }
> diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
> index eada582..4d7f894 100644
> --- a/hw/pci-bridge/xio3130_upstream.c
> +++ b/hw/pci-bridge/xio3130_upstream.c
> @@ -81,7 +81,7 @@ static int xio3130_upstream_initfn(PCIDevice *d)
>      }
>      pcie_cap_flr_init(d);
>      pcie_cap_deverr_init(d);
> -    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
> +    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
>      if (rc < 0) {
>          goto err;
>      }
> diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
> index 5a25c32..71045eb 100644
> --- a/hw/pci/pcie_aer.c
> +++ b/hw/pci/pcie_aer.c
> @@ -94,12 +94,12 @@ static void aer_log_clear_all_err(PCIEAERLog *aer_log)
>      aer_log->log_num = 0;
>  }
>  
> -int pcie_aer_init(PCIDevice *dev, uint16_t offset)
> +int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
>  {
>      PCIExpressDevice *exp;
>  
>      pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
> -                        offset, PCI_ERR_SIZEOF);
> +                        offset, size);
>      exp = &dev->exp;
>      exp->aer_cap = offset;
>  
> diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
> index bcac80a..a4cc6f3 100644
> --- a/include/hw/pci/pcie_aer.h
> +++ b/include/hw/pci/pcie_aer.h
> @@ -87,7 +87,7 @@ struct PCIEAERErr {
>  
>  extern const VMStateDescription vmstate_pcie_aer_log;
>  
> -int pcie_aer_init(PCIDevice *dev, uint16_t offset);
> +int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size);
>  void pcie_aer_exit(PCIDevice *dev);
>  void pcie_aer_write_config(PCIDevice *dev,
>                             uint32_t addr, uint32_t val, int len);

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 3/7] vfio: add aer support for vfio device
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 3/7] vfio: add aer support for " Chen Fan
@ 2015-03-13 22:28   ` Alex Williamson
  0 siblings, 0 replies; 53+ messages in thread
From: Alex Williamson @ 2015-03-13 22:28 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> Calling pcie_aer_init to initilize aer related registers for
> vfio device, then reload physical related registers to expose
> device capability.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 2190102..0a515b6 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2724,12 +2724,43 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
> +{
> +    PCIDevice *pdev = &vdev->pdev;
> +    uint8_t *exp_cap = pdev->config + pdev->exp.exp_cap;
> +    uint32_t severity, errcap;
> +    int ret;
> +
> +    errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
> +    /* The ability to record multiple headers is depending on
> +       the state of the Multiple Header Recording Capable bit and
> +       enabled by the Multiple Header Recording Enable bit */

Please follow the existing comment style in this file

> +    if ((errcap & PCI_ERR_CAP_MHRC) &&
> +        (errcap & PCI_ERR_CAP_MHRE)) {
> +        pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
> +    } else {
> +        pdev->exp.aer_log.log_max = 0;
> +    }
> +    ret = pcie_aer_init(pdev, pos, size);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    /* load physical registers */
> +    severity = vfio_pci_read_config(pdev,
> +                   pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
> +    pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
> +
> +    return 0;
> +}
> +
>  static int vfio_add_ext_cap(VFIOPCIDevice *vdev, const uint8_t *config)
>  {
>      PCIDevice *pdev = &vdev->pdev;
>      uint32_t header;
>      uint16_t cap_id, next, size;
>      uint8_t cap_ver;
> +    int ret = 0;
>  
>      for (next = PCI_CONFIG_SPACE_SIZE; next;
>           next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
> @@ -2745,7 +2776,19 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev, const uint8_t *config)
>           */
>          size = vfio_ext_cap_max_size(config, next);
>  
> -        pcie_add_capability(pdev, cap_id, cap_ver, next, size);
> +        switch (cap_id) {
> +        case PCI_EXT_CAP_ID_ERR:
> +            ret = vfio_setup_aer(vdev, next, size);
> +            break;
> +        default:
> +            pcie_add_capability(pdev, cap_id, cap_ver, next, size);
> +            break;
> +        }
> +
> +        if (ret) {
> +            return ret;
> +        }
> +
>          if (next == PCI_CONFIG_SPACE_SIZE) {
>              /* Begin the rebuild, we should set the next offset zero. */
>              pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 4/7] pcie_aer: expose pcie_aer_msg() interface
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 4/7] pcie_aer: expose pcie_aer_msg() interface Chen Fan
@ 2015-03-13 22:30   ` Alex Williamson
  2015-03-18 13:29   ` Michael S. Tsirkin
  1 sibling, 0 replies; 53+ messages in thread
From: Alex Williamson @ 2015-03-13 22:30 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> For vfio device, we need to propagate the aer error to
> Guest OS. we use the pcie_aer_msg() to send aer error
> to guest.

This also would need to go in through the QEMU PCI tree, or at least get
an ack from Michael.  Thanks,

Alex


> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/pci/pcie_aer.c         | 2 +-
>  include/hw/pci/pcie_aer.h | 1 +
>  2 files changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
> index 71045eb..f2d5d13 100644
> --- a/hw/pci/pcie_aer.c
> +++ b/hw/pci/pcie_aer.c
> @@ -370,7 +370,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
>   *
>   * Walk up the bus tree from the device, propagate the error message.
>   */
> -static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
>  {
>      uint8_t type;
>  
> diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
> index a4cc6f3..80cba7b 100644
> --- a/include/hw/pci/pcie_aer.h
> +++ b/include/hw/pci/pcie_aer.h
> @@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
>  
>  /* error injection */
>  int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
>  
>  #endif /* QEMU_PCIE_AER_H */

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest Chen Fan
@ 2015-03-13 22:34   ` Alex Williamson
  2015-03-16  3:05     ` Chen Fan
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-03-13 22:34 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> when the vfio device encounters an uncorrectable error in host,
> the vfio_pci driver will signal the eventfd registered by this
> vfio device, the results in the qemu eventfd handler getting
> invoked.
> 
> this patch is to pass the error to guest and have the guest driver
> recover from the error.

What is going to be the typical recovery mechanism for the guest?  I'm
concerned that the topology of the device in the guest doesn't
necessarily match the topology of the device in the host, so if the
guest were to attempt a bus reset to recover a device, for instance,
what happens?

> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 34 ++++++++++++++++++++++++++++------
>  1 file changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 0a515b6..8966c49 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3240,18 +3240,40 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
>  static void vfio_err_notifier_handler(void *opaque)
>  {
>      VFIOPCIDevice *vdev = opaque;
> +    PCIDevice *dev = &vdev->pdev;
> +    PCIEAERMsg msg = {
> +        .severity = 0,
> +        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
> +    };
>  
>      if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
>          return;
>      }
>  
> +    /* we should read the error details from the real hardware
> +     * configuration spaces, here we only need to do is signaling
> +     * to guest an uncorrectable error has occurred.
> +     */

Inconsistent comment style

> +     if(dev->exp.aer_cap) {
         ^ space

> +        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> +        uint32_t uncor_status;
> +        bool isfatal;
> +
> +        uncor_status = vfio_pci_read_config(dev,
> +                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
> +
> +        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
> +
> +        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
> +                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
> +
> +        pcie_aer_msg(dev, &msg);
> +        return;
> +    }
> +
>      /*
> -     * TBD. Retrieve the error details and decide what action
> -     * needs to be taken. One of the actions could be to pass
> -     * the error to the guest and have the guest driver recover
> -     * from the error. This requires that PCIe capabilities be
> -     * exposed to the guest. For now, we just terminate the
> -     * guest to contain the error.
> +     * If the aer capability is not exposed to the guest. we just
> +     * terminate the guest to contain the error.
>       */
>  
>      error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device Chen Fan
@ 2015-03-13 22:38   ` Alex Williamson
  2015-03-16  2:48     ` Chen Fan
  2015-03-16  2:49     ` Chen Fan
  2015-03-18 13:23   ` Michael S. Tsirkin
  1 sibling, 2 replies; 53+ messages in thread
From: Alex Williamson @ 2015-03-13 22:38 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> for piix4 chipset, we don't need to expose aer, so introduce
> PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> and add HW_COMPAT_2_2 to disable aercap for all lower
> than 2.3.

440FX is not PCIe, so it doesn't seem like we need to do anything there.
Shouldn't this only cover q35 machine types through 2.3?  (QEMU 2.3 is
already in hard freeze afaik, this won't go in until after 2.4
development opens).  Thanks,

Alex

> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/i386/pc_piix.c   |  9 +++++++++
>  hw/i386/pc_q35.c    |  4 ++++
>  include/hw/compat.h | 10 ++++++++++
>  3 files changed, 23 insertions(+)
> 
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 8eab4ba..ff9d312 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -307,6 +307,11 @@ static void pc_init1(MachineState *machine,
>  
>  static void pc_init_pci(MachineState *machine)
>  {
> +    static GlobalProperty pc_compat_props[] = {
> +        PC_I440FX_COMPAT,
> +        { /* end of list */ }
> +    };
> +    qdev_prop_register_global_list(pc_compat_props);
>      pc_init1(machine, 1, 1);
>  }
>  
> @@ -534,6 +539,10 @@ static QEMUMachine pc_i440fx_machine_v2_2 = {
>      PC_I440FX_2_2_MACHINE_OPTIONS,
>      .name = "pc-i440fx-2.2",
>      .init = pc_init_pci_2_2,
> +    .compat_props = (GlobalProperty[]) {
> +        HW_COMPAT_2_2,
> +        { /* end of list */ }
> +    },
>  };
>  
>  #define PC_I440FX_2_1_MACHINE_OPTIONS                           \
> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index c0f21fe..97afb7d 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -431,6 +431,10 @@ static QEMUMachine pc_q35_machine_v2_2 = {
>      PC_Q35_2_2_MACHINE_OPTIONS,
>      .name = "pc-q35-2.2",
>      .init = pc_q35_init_2_2,
> +    .compat_props = (GlobalProperty[]) {
> +        HW_COMPAT_2_2,
> +        { /* end of list */ }
> +    },
>  };
>  
>  #define PC_Q35_2_1_MACHINE_OPTIONS                      \
> diff --git a/include/hw/compat.h b/include/hw/compat.h
> index 313682a..40c974a 100644
> --- a/include/hw/compat.h
> +++ b/include/hw/compat.h
> @@ -1,7 +1,17 @@
>  #ifndef HW_COMPAT_H
>  #define HW_COMPAT_H
>  
> +#define HW_COMPAT_2_2 PC_I440FX_COMPAT
> +
> +#define PC_I440FX_COMPAT \
> +        {\
> +            .driver   = "vfio-pci",\
> +            .property = "x-aer",\
> +            .value    = "off",\
> +        }
> +
>  #define HW_COMPAT_2_1 \
> +        HW_COMPAT_2_2, \
>          {\
>              .driver   = "intel-hda",\
>              .property = "old_msi_addr",\

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 2/7] aer: impove pcie_aer_init to support vfio device
  2015-03-13 22:25   ` Alex Williamson
@ 2015-03-16  2:30     ` Chen Fan
  0 siblings, 0 replies; 53+ messages in thread
From: Chen Fan @ 2015-03-16  2:30 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 03/14/2015 06:25 AM, Alex Williamson wrote:
> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>> pcie_aer_init was used to emulate an aer capability for pcie device,
>> but for vfio device, the aer config space size is mutable and is not
>> always equal to PCI_ERR_SIZEOF(0x48). it depends on where the TLP Prefix
>> register required, so here we add a size argument.
> This would need to go through the QEMU PCI tree.  Question, do all of
> the fields of the AER capability directly translate to the guest view of
> the device?  For instance, are there source and destination values that
> need to be emulated for the guest?  I'm not really sure what we're
> exposing via the TLP Prefix area.  Thanks,
I think all of the fields of the AER do not need to be emulated for the 
guest.
the guest directly read the register of err status, Header log and TLP 
prefix log.

and we don't need to care what TLP prefix exposes, due to the TLP prefix
area is depended on the End-End TLP Prefix Supported bit. the spec said
If the End-End TLP Prefix Supported bit (Section 7.8.15) is Clear, the 
TLP Prefix
Log register is not required to be implemented. therefore if we directly use
the PCI_ERR_SIZEOF size to initialize aer, the maybe occur size overlap.

Thanks,
Chen



>
> Alex
>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/pci-bridge/ioh3420.c            | 2 +-
>>   hw/pci-bridge/xio3130_downstream.c | 2 +-
>>   hw/pci-bridge/xio3130_upstream.c   | 2 +-
>>   hw/pci/pcie_aer.c                  | 4 ++--
>>   include/hw/pci/pcie_aer.h          | 2 +-
>>   5 files changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
>> index cce2fdd..4d9cd3f 100644
>> --- a/hw/pci-bridge/ioh3420.c
>> +++ b/hw/pci-bridge/ioh3420.c
>> @@ -129,7 +129,7 @@ static int ioh3420_initfn(PCIDevice *d)
>>           goto err_pcie_cap;
>>       }
>>       pcie_cap_root_init(d);
>> -    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET);
>> +    rc = pcie_aer_init(d, IOH_EP_AER_OFFSET, PCI_ERR_SIZEOF);
>>       if (rc < 0) {
>>           goto err;
>>       }
>> diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c
>> index b3a6479..9737041 100644
>> --- a/hw/pci-bridge/xio3130_downstream.c
>> +++ b/hw/pci-bridge/xio3130_downstream.c
>> @@ -92,7 +92,7 @@ static int xio3130_downstream_initfn(PCIDevice *d)
>>           goto err_pcie_cap;
>>       }
>>       pcie_cap_arifwd_init(d);
>> -    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
>> +    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
>>       if (rc < 0) {
>>           goto err;
>>       }
>> diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c
>> index eada582..4d7f894 100644
>> --- a/hw/pci-bridge/xio3130_upstream.c
>> +++ b/hw/pci-bridge/xio3130_upstream.c
>> @@ -81,7 +81,7 @@ static int xio3130_upstream_initfn(PCIDevice *d)
>>       }
>>       pcie_cap_flr_init(d);
>>       pcie_cap_deverr_init(d);
>> -    rc = pcie_aer_init(d, XIO3130_AER_OFFSET);
>> +    rc = pcie_aer_init(d, XIO3130_AER_OFFSET, PCI_ERR_SIZEOF);
>>       if (rc < 0) {
>>           goto err;
>>       }
>> diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
>> index 5a25c32..71045eb 100644
>> --- a/hw/pci/pcie_aer.c
>> +++ b/hw/pci/pcie_aer.c
>> @@ -94,12 +94,12 @@ static void aer_log_clear_all_err(PCIEAERLog *aer_log)
>>       aer_log->log_num = 0;
>>   }
>>   
>> -int pcie_aer_init(PCIDevice *dev, uint16_t offset)
>> +int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size)
>>   {
>>       PCIExpressDevice *exp;
>>   
>>       pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, PCI_ERR_VER,
>> -                        offset, PCI_ERR_SIZEOF);
>> +                        offset, size);
>>       exp = &dev->exp;
>>       exp->aer_cap = offset;
>>   
>> diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
>> index bcac80a..a4cc6f3 100644
>> --- a/include/hw/pci/pcie_aer.h
>> +++ b/include/hw/pci/pcie_aer.h
>> @@ -87,7 +87,7 @@ struct PCIEAERErr {
>>   
>>   extern const VMStateDescription vmstate_pcie_aer_log;
>>   
>> -int pcie_aer_init(PCIDevice *dev, uint16_t offset);
>> +int pcie_aer_init(PCIDevice *dev, uint16_t offset, uint16_t size);
>>   void pcie_aer_exit(PCIDevice *dev);
>>   void pcie_aer_write_config(PCIDevice *dev,
>>                              uint32_t addr, uint32_t val, int len);
>
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-13 22:38   ` Alex Williamson
@ 2015-03-16  2:48     ` Chen Fan
  2015-03-16  2:49     ` Chen Fan
  1 sibling, 0 replies; 53+ messages in thread
From: Chen Fan @ 2015-03-16  2:48 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 03/14/2015 06:38 AM, Alex Williamson wrote:
> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>> for piix4 chipset, we don't need to expose aer, so introduce
>> PC_I440FX_COMPAT for all piix4 machines to disable aercap,
>> and add HW_COMPAT_2_2 to disable aercap for all lower
>> than 2.3.
> 440FX is not PCIe, so it doesn't seem like we need to do anything there.
> Shouldn't this only cover q35 machine types through 2.3?  (QEMU 2.3 is
> already in hard freeze afaik, this won't go in until after 2.4
> development opens).  Thanks,
Yes, 440Fx is not PCIe, so extended cap won't be parsed.
I knew this patches won't be in QEMU 2.3. I want to wait for
the 2.4 opens and then rebase this patch.

Thanks,
Chen


>
> Alex
>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/i386/pc_piix.c   |  9 +++++++++
>>   hw/i386/pc_q35.c    |  4 ++++
>>   include/hw/compat.h | 10 ++++++++++
>>   3 files changed, 23 insertions(+)
>>
>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>> index 8eab4ba..ff9d312 100644
>> --- a/hw/i386/pc_piix.c
>> +++ b/hw/i386/pc_piix.c
>> @@ -307,6 +307,11 @@ static void pc_init1(MachineState *machine,
>>   
>>   static void pc_init_pci(MachineState *machine)
>>   {
>> +    static GlobalProperty pc_compat_props[] = {
>> +        PC_I440FX_COMPAT,
>> +        { /* end of list */ }
>> +    };
>> +    qdev_prop_register_global_list(pc_compat_props);
>>       pc_init1(machine, 1, 1);
>>   }
>>   
>> @@ -534,6 +539,10 @@ static QEMUMachine pc_i440fx_machine_v2_2 = {
>>       PC_I440FX_2_2_MACHINE_OPTIONS,
>>       .name = "pc-i440fx-2.2",
>>       .init = pc_init_pci_2_2,
>> +    .compat_props = (GlobalProperty[]) {
>> +        HW_COMPAT_2_2,
>> +        { /* end of list */ }
>> +    },
>>   };
>>   
>>   #define PC_I440FX_2_1_MACHINE_OPTIONS                           \
>> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>> index c0f21fe..97afb7d 100644
>> --- a/hw/i386/pc_q35.c
>> +++ b/hw/i386/pc_q35.c
>> @@ -431,6 +431,10 @@ static QEMUMachine pc_q35_machine_v2_2 = {
>>       PC_Q35_2_2_MACHINE_OPTIONS,
>>       .name = "pc-q35-2.2",
>>       .init = pc_q35_init_2_2,
>> +    .compat_props = (GlobalProperty[]) {
>> +        HW_COMPAT_2_2,
>> +        { /* end of list */ }
>> +    },
>>   };
>>   
>>   #define PC_Q35_2_1_MACHINE_OPTIONS                      \
>> diff --git a/include/hw/compat.h b/include/hw/compat.h
>> index 313682a..40c974a 100644
>> --- a/include/hw/compat.h
>> +++ b/include/hw/compat.h
>> @@ -1,7 +1,17 @@
>>   #ifndef HW_COMPAT_H
>>   #define HW_COMPAT_H
>>   
>> +#define HW_COMPAT_2_2 PC_I440FX_COMPAT
>> +
>> +#define PC_I440FX_COMPAT \
>> +        {\
>> +            .driver   = "vfio-pci",\
>> +            .property = "x-aer",\
>> +            .value    = "off",\
>> +        }
>> +
>>   #define HW_COMPAT_2_1 \
>> +        HW_COMPAT_2_2, \
>>           {\
>>               .driver   = "intel-hda",\
>>               .property = "old_msi_addr",\
>
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-13 22:38   ` Alex Williamson
  2015-03-16  2:48     ` Chen Fan
@ 2015-03-16  2:49     ` Chen Fan
  1 sibling, 0 replies; 53+ messages in thread
From: Chen Fan @ 2015-03-16  2:49 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 03/14/2015 06:38 AM, Alex Williamson wrote:
> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>> for piix4 chipset, we don't need to expose aer, so introduce
>> PC_I440FX_COMPAT for all piix4 machines to disable aercap,
>> and add HW_COMPAT_2_2 to disable aercap for all lower
>> than 2.3.
> 440FX is not PCIe, so it doesn't seem like we need to do anything there.
> Shouldn't this only cover q35 machine types through 2.3?  (QEMU 2.3 is
> already in hard freeze afaik, this won't go in until after 2.4
> development opens).  Thanks,
Yes, 440Fx is not PCIe, so extended cap won't be parsed.
I knew this patches won't be in QEMU 2.3. I want to wait for
the 2.4 opens and then rebase this patch separately.

Thanks,
Chen


>
> Alex
>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/i386/pc_piix.c   |  9 +++++++++
>>   hw/i386/pc_q35.c    |  4 ++++
>>   include/hw/compat.h | 10 ++++++++++
>>   3 files changed, 23 insertions(+)
>>
>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>> index 8eab4ba..ff9d312 100644
>> --- a/hw/i386/pc_piix.c
>> +++ b/hw/i386/pc_piix.c
>> @@ -307,6 +307,11 @@ static void pc_init1(MachineState *machine,
>>   
>>   static void pc_init_pci(MachineState *machine)
>>   {
>> +    static GlobalProperty pc_compat_props[] = {
>> +        PC_I440FX_COMPAT,
>> +        { /* end of list */ }
>> +    };
>> +    qdev_prop_register_global_list(pc_compat_props);
>>       pc_init1(machine, 1, 1);
>>   }
>>   
>> @@ -534,6 +539,10 @@ static QEMUMachine pc_i440fx_machine_v2_2 = {
>>       PC_I440FX_2_2_MACHINE_OPTIONS,
>>       .name = "pc-i440fx-2.2",
>>       .init = pc_init_pci_2_2,
>> +    .compat_props = (GlobalProperty[]) {
>> +        HW_COMPAT_2_2,
>> +        { /* end of list */ }
>> +    },
>>   };
>>   
>>   #define PC_I440FX_2_1_MACHINE_OPTIONS                           \
>> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>> index c0f21fe..97afb7d 100644
>> --- a/hw/i386/pc_q35.c
>> +++ b/hw/i386/pc_q35.c
>> @@ -431,6 +431,10 @@ static QEMUMachine pc_q35_machine_v2_2 = {
>>       PC_Q35_2_2_MACHINE_OPTIONS,
>>       .name = "pc-q35-2.2",
>>       .init = pc_q35_init_2_2,
>> +    .compat_props = (GlobalProperty[]) {
>> +        HW_COMPAT_2_2,
>> +        { /* end of list */ }
>> +    },
>>   };
>>   
>>   #define PC_Q35_2_1_MACHINE_OPTIONS                      \
>> diff --git a/include/hw/compat.h b/include/hw/compat.h
>> index 313682a..40c974a 100644
>> --- a/include/hw/compat.h
>> +++ b/include/hw/compat.h
>> @@ -1,7 +1,17 @@
>>   #ifndef HW_COMPAT_H
>>   #define HW_COMPAT_H
>>   
>> +#define HW_COMPAT_2_2 PC_I440FX_COMPAT
>> +
>> +#define PC_I440FX_COMPAT \
>> +        {\
>> +            .driver   = "vfio-pci",\
>> +            .property = "x-aer",\
>> +            .value    = "off",\
>> +        }
>> +
>>   #define HW_COMPAT_2_1 \
>> +        HW_COMPAT_2_2, \
>>           {\
>>               .driver   = "intel-hda",\
>>               .property = "old_msi_addr",\
>
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device
  2015-03-12 10:23 [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
                   ` (6 preceding siblings ...)
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device Chen Fan
@ 2015-03-16  2:52 ` Chen Fan
  2015-03-16  4:57   ` Michael S. Tsirkin
  7 siblings, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-03-16  2:52 UTC (permalink / raw)
  To: qemu-devel; +Cc: izumi.taku, alex.williamson, mst

Cc: Michael S. Tsirkin

On 03/12/2015 06:23 PM, Chen Fan wrote:
> For now, for vfio pci passthough devices when qemu receives
> an error from host aer report, there just terminate the guest,
> but usually user want to know what error occurred but stop the
> guest, so this patches add aer capability support for vfio device,
> and pass the error to guest, and have guest driver to recover
> from the error.
> and turning on SERR# for error forwording in bridge control register
> patch in seabios has been merged.
>
> v3-v4:
>     1. add 'x-aer' for user to off aer capability.
>     2. refactor vfio device to parse extended capabilities.
>
> v2-v3:
>     1. refactor vfio device to parse extended capability.
>     2. add global property for piix4 to disable vfio aer cap.
>
> v1-v2:
>     1. turn on SERR# for bridge control register in firmware.
>     2. initilize aer capability for vfio device.
>     3. fix some trivial bug.
>
> Chen Fan (7):
>    vfio: add pcie extanded capability support
>    aer: impove pcie_aer_init to support vfio device
>    vfio: add aer support for vfio device
>    pcie_aer: expose pcie_aer_msg() interface
>    vfio-pci: pass the aer error to guest
>    vfio: add 'x-aer' property to expose aercap
>    pc: add PC_I440FX_COMPAT to disable aercap for vifo device
>
>   hw/i386/pc_piix.c                  |   9 +++
>   hw/i386/pc_q35.c                   |   4 +
>   hw/pci-bridge/ioh3420.c            |   2 +-
>   hw/pci-bridge/xio3130_downstream.c |   2 +-
>   hw/pci-bridge/xio3130_upstream.c   |   2 +-
>   hw/pci/pcie_aer.c                  |   6 +-
>   hw/vfio/pci.c                      | 158 +++++++++++++++++++++++++++++++++++--
>   include/hw/compat.h                |  10 +++
>   include/hw/pci/pcie_aer.h          |   3 +-
>   9 files changed, 182 insertions(+), 14 deletions(-)
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-13 22:34   ` Alex Williamson
@ 2015-03-16  3:05     ` Chen Fan
  2015-03-16  3:52       ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-03-16  3:05 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 03/14/2015 06:34 AM, Alex Williamson wrote:
> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>> when the vfio device encounters an uncorrectable error in host,
>> the vfio_pci driver will signal the eventfd registered by this
>> vfio device, the results in the qemu eventfd handler getting
>> invoked.
>>
>> this patch is to pass the error to guest and have the guest driver
>> recover from the error.
> What is going to be the typical recovery mechanism for the guest?  I'm
> concerned that the topology of the device in the guest doesn't
> necessarily match the topology of the device in the host, so if the
> guest were to attempt a bus reset to recover a device, for instance,
> what happens?
the recovery mechanism is that when guest got an aer error from a device,
guest will clean the corresponding status bit in device register. and for
need reset device, the guest aer driver would reset all devices under bus.

Thanks,
Chen


>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 34 ++++++++++++++++++++++++++++------
>>   1 file changed, 28 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 0a515b6..8966c49 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -3240,18 +3240,40 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
>>   static void vfio_err_notifier_handler(void *opaque)
>>   {
>>       VFIOPCIDevice *vdev = opaque;
>> +    PCIDevice *dev = &vdev->pdev;
>> +    PCIEAERMsg msg = {
>> +        .severity = 0,
>> +        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
>> +    };
>>   
>>       if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
>>           return;
>>       }
>>   
>> +    /* we should read the error details from the real hardware
>> +     * configuration spaces, here we only need to do is signaling
>> +     * to guest an uncorrectable error has occurred.
>> +     */
> Inconsistent comment style
>
>> +     if(dev->exp.aer_cap) {
>           ^ space
>
>> +        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
>> +        uint32_t uncor_status;
>> +        bool isfatal;
>> +
>> +        uncor_status = vfio_pci_read_config(dev,
>> +                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
>> +
>> +        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
>> +
>> +        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
>> +                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
>> +
>> +        pcie_aer_msg(dev, &msg);
>> +        return;
>> +    }
>> +
>>       /*
>> -     * TBD. Retrieve the error details and decide what action
>> -     * needs to be taken. One of the actions could be to pass
>> -     * the error to the guest and have the guest driver recover
>> -     * from the error. This requires that PCIe capabilities be
>> -     * exposed to the guest. For now, we just terminate the
>> -     * guest to contain the error.
>> +     * If the aer capability is not exposed to the guest. we just
>> +     * terminate the guest to contain the error.
>>        */
>>   
>>       error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
>
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-16  3:05     ` Chen Fan
@ 2015-03-16  3:52       ` Alex Williamson
  2015-03-16  7:35         ` Chen Fan
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-03-16  3:52 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
> On 03/14/2015 06:34 AM, Alex Williamson wrote:
> > On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> >> when the vfio device encounters an uncorrectable error in host,
> >> the vfio_pci driver will signal the eventfd registered by this
> >> vfio device, the results in the qemu eventfd handler getting
> >> invoked.
> >>
> >> this patch is to pass the error to guest and have the guest driver
> >> recover from the error.
> > What is going to be the typical recovery mechanism for the guest?  I'm
> > concerned that the topology of the device in the guest doesn't
> > necessarily match the topology of the device in the host, so if the
> > guest were to attempt a bus reset to recover a device, for instance,
> > what happens?
> the recovery mechanism is that when guest got an aer error from a device,
> guest will clean the corresponding status bit in device register. and for
> need reset device, the guest aer driver would reset all devices under bus.

Sorry, I'm still confused, how does the guest aer driver reset all
devices under a bus?  Are we talking about function-level, device
specific reset mechanisms or secondary bus resets?  If the guest is
performing secondary bus resets, what guarantee do they have that it
will translate to a physical secondary bus reset?  vfio may only do an
FLR when the bus is reset or it may not be able to do anything depending
on the available function-level resets and physical and virtual topology
of the device.  Thanks,

Alex

> >> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >> ---
> >>   hw/vfio/pci.c | 34 ++++++++++++++++++++++++++++------
> >>   1 file changed, 28 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index 0a515b6..8966c49 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -3240,18 +3240,40 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
> >>   static void vfio_err_notifier_handler(void *opaque)
> >>   {
> >>       VFIOPCIDevice *vdev = opaque;
> >> +    PCIDevice *dev = &vdev->pdev;
> >> +    PCIEAERMsg msg = {
> >> +        .severity = 0,
> >> +        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
> >> +    };
> >>   
> >>       if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
> >>           return;
> >>       }
> >>   
> >> +    /* we should read the error details from the real hardware
> >> +     * configuration spaces, here we only need to do is signaling
> >> +     * to guest an uncorrectable error has occurred.
> >> +     */
> > Inconsistent comment style
> >
> >> +     if(dev->exp.aer_cap) {
> >           ^ space
> >
> >> +        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
> >> +        uint32_t uncor_status;
> >> +        bool isfatal;
> >> +
> >> +        uncor_status = vfio_pci_read_config(dev,
> >> +                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
> >> +
> >> +        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
> >> +
> >> +        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
> >> +                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
> >> +
> >> +        pcie_aer_msg(dev, &msg);
> >> +        return;
> >> +    }
> >> +
> >>       /*
> >> -     * TBD. Retrieve the error details and decide what action
> >> -     * needs to be taken. One of the actions could be to pass
> >> -     * the error to the guest and have the guest driver recover
> >> -     * from the error. This requires that PCIe capabilities be
> >> -     * exposed to the guest. For now, we just terminate the
> >> -     * guest to contain the error.
> >> +     * If the aer capability is not exposed to the guest. we just
> >> +     * terminate the guest to contain the error.
> >>        */
> >>   
> >>       error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
> >
> >
> > .
> >
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device
  2015-03-16  2:52 ` [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
@ 2015-03-16  4:57   ` Michael S. Tsirkin
  2015-03-19 21:44     ` Paolo Bonzini
  0 siblings, 1 reply; 53+ messages in thread
From: Michael S. Tsirkin @ 2015-03-16  4:57 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, alex.williamson, qemu-devel

On Mon, Mar 16, 2015 at 10:52:52AM +0800, Chen Fan wrote:
> Cc: Michael S. Tsirkin
> 
> On 03/12/2015 06:23 PM, Chen Fan wrote:
> >For now, for vfio pci passthough devices when qemu receives
> >an error from host aer report, there just terminate the guest,
> >but usually user want to know what error occurred but stop the
> >guest, so this patches add aer capability support for vfio device,
> >and pass the error to guest, and have guest driver to recover
> >from the error.
> >and turning on SERR# for error forwording in bridge control register
> >patch in seabios has been merged.
> >
> >v3-v4:
> >    1. add 'x-aer' for user to off aer capability.

User-exposed properties should not start with x- - by convention
that's for internal properties.

> >    2. refactor vfio device to parse extended capabilities.
> >
> >v2-v3:
> >    1. refactor vfio device to parse extended capability.
> >    2. add global property for piix4 to disable vfio aer cap.
> >
> >v1-v2:
> >    1. turn on SERR# for bridge control register in firmware.
> >    2. initilize aer capability for vfio device.
> >    3. fix some trivial bug.
> >
> >Chen Fan (7):
> >   vfio: add pcie extanded capability support
> >   aer: impove pcie_aer_init to support vfio device
> >   vfio: add aer support for vfio device
> >   pcie_aer: expose pcie_aer_msg() interface
> >   vfio-pci: pass the aer error to guest
> >   vfio: add 'x-aer' property to expose aercap
> >   pc: add PC_I440FX_COMPAT to disable aercap for vifo device
> >
> >  hw/i386/pc_piix.c                  |   9 +++
> >  hw/i386/pc_q35.c                   |   4 +
> >  hw/pci-bridge/ioh3420.c            |   2 +-
> >  hw/pci-bridge/xio3130_downstream.c |   2 +-
> >  hw/pci-bridge/xio3130_upstream.c   |   2 +-
> >  hw/pci/pcie_aer.c                  |   6 +-
> >  hw/vfio/pci.c                      | 158 +++++++++++++++++++++++++++++++++++--
> >  include/hw/compat.h                |  10 +++
> >  include/hw/pci/pcie_aer.h          |   3 +-
> >  9 files changed, 182 insertions(+), 14 deletions(-)
> >

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-16  3:52       ` Alex Williamson
@ 2015-03-16  7:35         ` Chen Fan
  2015-03-16 14:09           ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-03-16  7:35 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 03/16/2015 11:52 AM, Alex Williamson wrote:
> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>>>> when the vfio device encounters an uncorrectable error in host,
>>>> the vfio_pci driver will signal the eventfd registered by this
>>>> vfio device, the results in the qemu eventfd handler getting
>>>> invoked.
>>>>
>>>> this patch is to pass the error to guest and have the guest driver
>>>> recover from the error.
>>> What is going to be the typical recovery mechanism for the guest?  I'm
>>> concerned that the topology of the device in the guest doesn't
>>> necessarily match the topology of the device in the host, so if the
>>> guest were to attempt a bus reset to recover a device, for instance,
>>> what happens?
>> the recovery mechanism is that when guest got an aer error from a device,
>> guest will clean the corresponding status bit in device register. and for
>> need reset device, the guest aer driver would reset all devices under bus.
> Sorry, I'm still confused, how does the guest aer driver reset all
> devices under a bus?  Are we talking about function-level, device
> specific reset mechanisms or secondary bus resets?  If the guest is
> performing secondary bus resets, what guarantee do they have that it
> will translate to a physical secondary bus reset?  vfio may only do an
> FLR when the bus is reset or it may not be able to do anything depending
> on the available function-level resets and physical and virtual topology
> of the device.  Thanks,
in general, functions depends on the corresponding device driver behaviors
to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
and for link reset, it usually do secondary bus reset.

and do we must require to the physical secondary bus reset for vfio device
as bus reset?

Thanks,
Chen

>
> Alex
>
>>>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>>> ---
>>>>    hw/vfio/pci.c | 34 ++++++++++++++++++++++++++++------
>>>>    1 file changed, 28 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>>> index 0a515b6..8966c49 100644
>>>> --- a/hw/vfio/pci.c
>>>> +++ b/hw/vfio/pci.c
>>>> @@ -3240,18 +3240,40 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
>>>>    static void vfio_err_notifier_handler(void *opaque)
>>>>    {
>>>>        VFIOPCIDevice *vdev = opaque;
>>>> +    PCIDevice *dev = &vdev->pdev;
>>>> +    PCIEAERMsg msg = {
>>>> +        .severity = 0,
>>>> +        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
>>>> +    };
>>>>    
>>>>        if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
>>>>            return;
>>>>        }
>>>>    
>>>> +    /* we should read the error details from the real hardware
>>>> +     * configuration spaces, here we only need to do is signaling
>>>> +     * to guest an uncorrectable error has occurred.
>>>> +     */
>>> Inconsistent comment style
>>>
>>>> +     if(dev->exp.aer_cap) {
>>>            ^ space
>>>
>>>> +        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
>>>> +        uint32_t uncor_status;
>>>> +        bool isfatal;
>>>> +
>>>> +        uncor_status = vfio_pci_read_config(dev,
>>>> +                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
>>>> +
>>>> +        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
>>>> +
>>>> +        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
>>>> +                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
>>>> +
>>>> +        pcie_aer_msg(dev, &msg);
>>>> +        return;
>>>> +    }
>>>> +
>>>>        /*
>>>> -     * TBD. Retrieve the error details and decide what action
>>>> -     * needs to be taken. One of the actions could be to pass
>>>> -     * the error to the guest and have the guest driver recover
>>>> -     * from the error. This requires that PCIe capabilities be
>>>> -     * exposed to the guest. For now, we just terminate the
>>>> -     * guest to contain the error.
>>>> +     * If the aer capability is not exposed to the guest. we just
>>>> +     * terminate the guest to contain the error.
>>>>         */
>>>>    
>>>>        error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
>>>
>>> .
>>>
>
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-16  7:35         ` Chen Fan
@ 2015-03-16 14:09           ` Alex Williamson
  2015-03-25  1:33             ` Chen Fan
  2015-03-25  1:53             ` Chen Fan
  0 siblings, 2 replies; 53+ messages in thread
From: Alex Williamson @ 2015-03-16 14:09 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
> On 03/16/2015 11:52 AM, Alex Williamson wrote:
> > On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
> >> On 03/14/2015 06:34 AM, Alex Williamson wrote:
> >>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> >>>> when the vfio device encounters an uncorrectable error in host,
> >>>> the vfio_pci driver will signal the eventfd registered by this
> >>>> vfio device, the results in the qemu eventfd handler getting
> >>>> invoked.
> >>>>
> >>>> this patch is to pass the error to guest and have the guest driver
> >>>> recover from the error.
> >>> What is going to be the typical recovery mechanism for the guest?  I'm
> >>> concerned that the topology of the device in the guest doesn't
> >>> necessarily match the topology of the device in the host, so if the
> >>> guest were to attempt a bus reset to recover a device, for instance,
> >>> what happens?
> >> the recovery mechanism is that when guest got an aer error from a device,
> >> guest will clean the corresponding status bit in device register. and for
> >> need reset device, the guest aer driver would reset all devices under bus.
> > Sorry, I'm still confused, how does the guest aer driver reset all
> > devices under a bus?  Are we talking about function-level, device
> > specific reset mechanisms or secondary bus resets?  If the guest is
> > performing secondary bus resets, what guarantee do they have that it
> > will translate to a physical secondary bus reset?  vfio may only do an
> > FLR when the bus is reset or it may not be able to do anything depending
> > on the available function-level resets and physical and virtual topology
> > of the device.  Thanks,
> in general, functions depends on the corresponding device driver behaviors
> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
> and for link reset, it usually do secondary bus reset.
> 
> and do we must require to the physical secondary bus reset for vfio device
> as bus reset?

That depends on how the guest driver attempts recovery, doesn't it?
There are only a very limited number of cases where a secondary bus
reset initiated by the guest will translate to a secondary bus reset of
the physical device (iirc, single function device without FLR).  In most
cases, it will at best be translated to an FLR.  VFIO really only does
bus resets on VM reset because that's the only time we know that it's ok
to reset multiple devices.  If the guest driver is depending on a
secondary bus reset to put the device into a recoverable state and we're
not able to provide that, then we're actually reducing containment of
the error by exposing AER to the guest and allowing it to attempt
recovery.  So in practice, I'm afraid we're risking the integrity of the
VM by exposing AER to the guest and making it think that it can perform
recovery operations that are not effective.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device Chen Fan
  2015-03-13 22:38   ` Alex Williamson
@ 2015-03-18 13:23   ` Michael S. Tsirkin
  2015-03-18 14:02     ` Alex Williamson
  1 sibling, 1 reply; 53+ messages in thread
From: Michael S. Tsirkin @ 2015-03-18 13:23 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, alex.williamson, qemu-devel

typo in subject: vfio, not vifo.

On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> for piix4 chipset, we don't need to expose aer, so introduce
> PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> and add HW_COMPAT_2_2 to disable aercap for all lower
> than 2.3.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Well vfio is never migrated ATM.
So why is compat code needed at all?

> ---
>  hw/i386/pc_piix.c   |  9 +++++++++
>  hw/i386/pc_q35.c    |  4 ++++
>  include/hw/compat.h | 10 ++++++++++
>  3 files changed, 23 insertions(+)
> 
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 8eab4ba..ff9d312 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -307,6 +307,11 @@ static void pc_init1(MachineState *machine,
>  
>  static void pc_init_pci(MachineState *machine)
>  {
> +    static GlobalProperty pc_compat_props[] = {
> +        PC_I440FX_COMPAT,
> +        { /* end of list */ }
> +    };
> +    qdev_prop_register_global_list(pc_compat_props);
>      pc_init1(machine, 1, 1);
>  }
>  
> @@ -534,6 +539,10 @@ static QEMUMachine pc_i440fx_machine_v2_2 = {
>      PC_I440FX_2_2_MACHINE_OPTIONS,
>      .name = "pc-i440fx-2.2",
>      .init = pc_init_pci_2_2,
> +    .compat_props = (GlobalProperty[]) {
> +        HW_COMPAT_2_2,
> +        { /* end of list */ }
> +    },
>  };
>  
>  #define PC_I440FX_2_1_MACHINE_OPTIONS                           \
> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index c0f21fe..97afb7d 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -431,6 +431,10 @@ static QEMUMachine pc_q35_machine_v2_2 = {
>      PC_Q35_2_2_MACHINE_OPTIONS,
>      .name = "pc-q35-2.2",
>      .init = pc_q35_init_2_2,
> +    .compat_props = (GlobalProperty[]) {
> +        HW_COMPAT_2_2,
> +        { /* end of list */ }
> +    },
>  };
>  
>  #define PC_Q35_2_1_MACHINE_OPTIONS                      \
> diff --git a/include/hw/compat.h b/include/hw/compat.h
> index 313682a..40c974a 100644
> --- a/include/hw/compat.h
> +++ b/include/hw/compat.h
> @@ -1,7 +1,17 @@
>  #ifndef HW_COMPAT_H
>  #define HW_COMPAT_H
>  
> +#define HW_COMPAT_2_2 PC_I440FX_COMPAT
> +
> +#define PC_I440FX_COMPAT \
> +        {\
> +            .driver   = "vfio-pci",\
> +            .property = "x-aer",\
> +            .value    = "off",\
> +        }
> +
>  #define HW_COMPAT_2_1 \
> +        HW_COMPAT_2_2, \
>          {\
>              .driver   = "intel-hda",\
>              .property = "old_msi_addr",\
> -- 
> 1.9.3
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 6/7] vfio: add 'x-aer' property to expose aercap
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 6/7] vfio: add 'x-aer' property to expose aercap Chen Fan
@ 2015-03-18 13:23   ` Michael S. Tsirkin
  2015-03-18 14:09     ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Michael S. Tsirkin @ 2015-03-18 13:23 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, alex.williamson, qemu-devel

On Thu, Mar 12, 2015 at 06:23:58PM +0800, Chen Fan wrote:
> add 'x-aer' property to let user able to decide whether expose
> the aer capability.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

If it's exposed to users, it should not have the x- prefix.
That prefix means "internal only".

> ---
>  hw/vfio/pci.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 8966c49..0517091 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -159,6 +159,8 @@ typedef struct VFIOPCIDevice {
>  #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
>  #define VFIO_FEATURE_ENABLE_REQ_BIT 1
>  #define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
> +#define VFIO_FEATURE_ENABLE_AER_BIT 2
> +#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
>      int32_t bootindex;
>      uint8_t pm_cap;
>      bool has_vga;
> @@ -2731,6 +2733,10 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
>      uint32_t severity, errcap;
>      int ret;
>  
> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> +        return 0;
> +    }
> +
>      errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
>      /* The ability to record multiple headers is depending on
>         the state of the Multiple Header Recording Capable bit and
> @@ -3691,6 +3697,8 @@ static Property vfio_pci_dev_properties[] = {
>                      VFIO_FEATURE_ENABLE_VGA_BIT, false),
>      DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features,
>                      VFIO_FEATURE_ENABLE_REQ_BIT, true),
> +    DEFINE_PROP_BIT("x-aer", VFIOPCIDevice, features,
> +                    VFIO_FEATURE_ENABLE_AER_BIT, true),
>      DEFINE_PROP_INT32("bootindex", VFIOPCIDevice, bootindex, -1),
>      DEFINE_PROP_BOOL("x-mmap", VFIOPCIDevice, vbasedev.allow_mmap, true),
>      /*
> -- 
> 1.9.3
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 4/7] pcie_aer: expose pcie_aer_msg() interface
  2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 4/7] pcie_aer: expose pcie_aer_msg() interface Chen Fan
  2015-03-13 22:30   ` Alex Williamson
@ 2015-03-18 13:29   ` Michael S. Tsirkin
  2015-03-19  1:33     ` Chen Fan
  1 sibling, 1 reply; 53+ messages in thread
From: Michael S. Tsirkin @ 2015-03-18 13:29 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, alex.williamson, qemu-devel

On Thu, Mar 12, 2015 at 06:23:56PM +0800, Chen Fan wrote:
> For vfio device, we need to propagate the aer error to
> Guest OS. we use the pcie_aer_msg() to send aer error
> to guest.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Interesting.
pcie_aer_inject_error was intended as an interface to
send errors to guest.
For example, pcie_aer_msg does not handle the log at all.
Why do you use the lower-level pcie_aer_msg?

> ---
>  hw/pci/pcie_aer.c         | 2 +-
>  include/hw/pci/pcie_aer.h | 1 +
>  2 files changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
> index 71045eb..f2d5d13 100644
> --- a/hw/pci/pcie_aer.c
> +++ b/hw/pci/pcie_aer.c
> @@ -370,7 +370,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
>   *
>   * Walk up the bus tree from the device, propagate the error message.
>   */
> -static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
>  {
>      uint8_t type;
>  
> diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
> index a4cc6f3..80cba7b 100644
> --- a/include/hw/pci/pcie_aer.h
> +++ b/include/hw/pci/pcie_aer.h
> @@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
>  
>  /* error injection */
>  int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
>  
>  #endif /* QEMU_PCIE_AER_H */
> -- 
> 1.9.3
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 13:23   ` Michael S. Tsirkin
@ 2015-03-18 14:02     ` Alex Williamson
  2015-03-18 14:05       ` Michael S. Tsirkin
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-03-18 14:02 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> typo in subject: vfio, not vifo.
> 
> On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > for piix4 chipset, we don't need to expose aer, so introduce
> > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > and add HW_COMPAT_2_2 to disable aercap for all lower
> > than 2.3.
> > 
> > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Well vfio is never migrated ATM.
> So why is compat code needed at all?

It's not for migration, it's to maintain current behavior on existing
platforms.  If someone gets an uncorrected AER error on q35 machine type
today, the VM stops.  With this change, AER would be exposed to the
guest and the guest could handle it.  The compat change therefore
maintains the stop VM behavior on existing q35 machine types.  As I
commented here, the 440fx part of this patch is unnecessary since AER
cannot be exposed to the guest on a conventional PCI chipset.  Thanks,

Alex

> > ---
> >  hw/i386/pc_piix.c   |  9 +++++++++
> >  hw/i386/pc_q35.c    |  4 ++++
> >  include/hw/compat.h | 10 ++++++++++
> >  3 files changed, 23 insertions(+)
> > 
> > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > index 8eab4ba..ff9d312 100644
> > --- a/hw/i386/pc_piix.c
> > +++ b/hw/i386/pc_piix.c
> > @@ -307,6 +307,11 @@ static void pc_init1(MachineState *machine,
> >  
> >  static void pc_init_pci(MachineState *machine)
> >  {
> > +    static GlobalProperty pc_compat_props[] = {
> > +        PC_I440FX_COMPAT,
> > +        { /* end of list */ }
> > +    };
> > +    qdev_prop_register_global_list(pc_compat_props);
> >      pc_init1(machine, 1, 1);
> >  }
> >  
> > @@ -534,6 +539,10 @@ static QEMUMachine pc_i440fx_machine_v2_2 = {
> >      PC_I440FX_2_2_MACHINE_OPTIONS,
> >      .name = "pc-i440fx-2.2",
> >      .init = pc_init_pci_2_2,
> > +    .compat_props = (GlobalProperty[]) {
> > +        HW_COMPAT_2_2,
> > +        { /* end of list */ }
> > +    },
> >  };
> >  
> >  #define PC_I440FX_2_1_MACHINE_OPTIONS                           \
> > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > index c0f21fe..97afb7d 100644
> > --- a/hw/i386/pc_q35.c
> > +++ b/hw/i386/pc_q35.c
> > @@ -431,6 +431,10 @@ static QEMUMachine pc_q35_machine_v2_2 = {
> >      PC_Q35_2_2_MACHINE_OPTIONS,
> >      .name = "pc-q35-2.2",
> >      .init = pc_q35_init_2_2,
> > +    .compat_props = (GlobalProperty[]) {
> > +        HW_COMPAT_2_2,
> > +        { /* end of list */ }
> > +    },
> >  };
> >  
> >  #define PC_Q35_2_1_MACHINE_OPTIONS                      \
> > diff --git a/include/hw/compat.h b/include/hw/compat.h
> > index 313682a..40c974a 100644
> > --- a/include/hw/compat.h
> > +++ b/include/hw/compat.h
> > @@ -1,7 +1,17 @@
> >  #ifndef HW_COMPAT_H
> >  #define HW_COMPAT_H
> >  
> > +#define HW_COMPAT_2_2 PC_I440FX_COMPAT
> > +
> > +#define PC_I440FX_COMPAT \
> > +        {\
> > +            .driver   = "vfio-pci",\
> > +            .property = "x-aer",\
> > +            .value    = "off",\
> > +        }
> > +
> >  #define HW_COMPAT_2_1 \
> > +        HW_COMPAT_2_2, \
> >          {\
> >              .driver   = "intel-hda",\
> >              .property = "old_msi_addr",\
> > -- 
> > 1.9.3
> > 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 14:02     ` Alex Williamson
@ 2015-03-18 14:05       ` Michael S. Tsirkin
  2015-03-18 14:15         ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Michael S. Tsirkin @ 2015-03-18 14:05 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > typo in subject: vfio, not vifo.
> > 
> > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > for piix4 chipset, we don't need to expose aer, so introduce
> > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > than 2.3.
> > > 
> > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > 
> > Well vfio is never migrated ATM.
> > So why is compat code needed at all?
> 
> It's not for migration, it's to maintain current behavior on existing
> platforms.  If someone gets an uncorrected AER error on q35 machine type
> today, the VM stops.  With this change, AER would be exposed to the
> guest and the guest could handle it.  The compat change therefore
> maintains the stop VM behavior on existing q35 machine types.

If stop VM behaviour is useful, expose it to users.
If not, then don't.
I don't see why does it have to be tied to machine types.

>  As I
> commented here, the 440fx part of this patch is unnecessary since AER
> cannot be exposed to the guest on a conventional PCI chipset.  Thanks,
> 
> Alex
> 
> > > ---
> > >  hw/i386/pc_piix.c   |  9 +++++++++
> > >  hw/i386/pc_q35.c    |  4 ++++
> > >  include/hw/compat.h | 10 ++++++++++
> > >  3 files changed, 23 insertions(+)
> > > 
> > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > index 8eab4ba..ff9d312 100644
> > > --- a/hw/i386/pc_piix.c
> > > +++ b/hw/i386/pc_piix.c
> > > @@ -307,6 +307,11 @@ static void pc_init1(MachineState *machine,
> > >  
> > >  static void pc_init_pci(MachineState *machine)
> > >  {
> > > +    static GlobalProperty pc_compat_props[] = {
> > > +        PC_I440FX_COMPAT,
> > > +        { /* end of list */ }
> > > +    };
> > > +    qdev_prop_register_global_list(pc_compat_props);
> > >      pc_init1(machine, 1, 1);
> > >  }
> > >  
> > > @@ -534,6 +539,10 @@ static QEMUMachine pc_i440fx_machine_v2_2 = {
> > >      PC_I440FX_2_2_MACHINE_OPTIONS,
> > >      .name = "pc-i440fx-2.2",
> > >      .init = pc_init_pci_2_2,
> > > +    .compat_props = (GlobalProperty[]) {
> > > +        HW_COMPAT_2_2,
> > > +        { /* end of list */ }
> > > +    },
> > >  };
> > >  
> > >  #define PC_I440FX_2_1_MACHINE_OPTIONS                           \
> > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > index c0f21fe..97afb7d 100644
> > > --- a/hw/i386/pc_q35.c
> > > +++ b/hw/i386/pc_q35.c
> > > @@ -431,6 +431,10 @@ static QEMUMachine pc_q35_machine_v2_2 = {
> > >      PC_Q35_2_2_MACHINE_OPTIONS,
> > >      .name = "pc-q35-2.2",
> > >      .init = pc_q35_init_2_2,
> > > +    .compat_props = (GlobalProperty[]) {
> > > +        HW_COMPAT_2_2,
> > > +        { /* end of list */ }
> > > +    },
> > >  };
> > >  
> > >  #define PC_Q35_2_1_MACHINE_OPTIONS                      \
> > > diff --git a/include/hw/compat.h b/include/hw/compat.h
> > > index 313682a..40c974a 100644
> > > --- a/include/hw/compat.h
> > > +++ b/include/hw/compat.h
> > > @@ -1,7 +1,17 @@
> > >  #ifndef HW_COMPAT_H
> > >  #define HW_COMPAT_H
> > >  
> > > +#define HW_COMPAT_2_2 PC_I440FX_COMPAT
> > > +
> > > +#define PC_I440FX_COMPAT \
> > > +        {\
> > > +            .driver   = "vfio-pci",\
> > > +            .property = "x-aer",\
> > > +            .value    = "off",\
> > > +        }
> > > +
> > >  #define HW_COMPAT_2_1 \
> > > +        HW_COMPAT_2_2, \
> > >          {\
> > >              .driver   = "intel-hda",\
> > >              .property = "old_msi_addr",\
> > > -- 
> > > 1.9.3
> > > 
> 
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 6/7] vfio: add 'x-aer' property to expose aercap
  2015-03-18 13:23   ` Michael S. Tsirkin
@ 2015-03-18 14:09     ` Alex Williamson
  0 siblings, 0 replies; 53+ messages in thread
From: Alex Williamson @ 2015-03-18 14:09 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> On Thu, Mar 12, 2015 at 06:23:58PM +0800, Chen Fan wrote:
> > add 'x-aer' property to let user able to decide whether expose
> > the aer capability.
> > 
> > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> If it's exposed to users, it should not have the x- prefix.
> That prefix means "internal only".

It's my understanding that the x- prefix is to be used for any
"unsupported" option that should not be picked up by libvirt or other
management tools as an official API.  It may be removed or changed at
any point.  That doesn't imply it's internal-only.  See for example
x-vga, which is toggled by the user, but has a number of complicated
issues to call officially supported.  Or x-req, which allows a user to
opt-out of the collaborative device request interface, but should not be
the default and should not be easily accessible.

I still don't know if x- is really the correct option in this case.  If
the guest is known not to support AER, then the user may desire the stop
VM behavior.  I also don't understand how a guest can necessarily
recover from an AER since things like bus reset aren't exposed to guests
except for very limited cases.  Thanks,

Alex

> > ---
> >  hw/vfio/pci.c | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index 8966c49..0517091 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -159,6 +159,8 @@ typedef struct VFIOPCIDevice {
> >  #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
> >  #define VFIO_FEATURE_ENABLE_REQ_BIT 1
> >  #define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
> > +#define VFIO_FEATURE_ENABLE_AER_BIT 2
> > +#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
> >      int32_t bootindex;
> >      uint8_t pm_cap;
> >      bool has_vga;
> > @@ -2731,6 +2733,10 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
> >      uint32_t severity, errcap;
> >      int ret;
> >  
> > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> > +        return 0;
> > +    }
> > +
> >      errcap = vfio_pci_read_config(pdev, pdev->exp.aer_cap + PCI_ERR_CAP, 4);
> >      /* The ability to record multiple headers is depending on
> >         the state of the Multiple Header Recording Capable bit and
> > @@ -3691,6 +3697,8 @@ static Property vfio_pci_dev_properties[] = {
> >                      VFIO_FEATURE_ENABLE_VGA_BIT, false),
> >      DEFINE_PROP_BIT("x-req", VFIOPCIDevice, features,
> >                      VFIO_FEATURE_ENABLE_REQ_BIT, true),
> > +    DEFINE_PROP_BIT("x-aer", VFIOPCIDevice, features,
> > +                    VFIO_FEATURE_ENABLE_AER_BIT, true),
> >      DEFINE_PROP_INT32("bootindex", VFIOPCIDevice, bootindex, -1),
> >      DEFINE_PROP_BOOL("x-mmap", VFIOPCIDevice, vbasedev.allow_mmap, true),
> >      /*
> > -- 
> > 1.9.3
> > 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 14:05       ` Michael S. Tsirkin
@ 2015-03-18 14:15         ` Alex Williamson
  2015-03-18 14:36           ` Michael S. Tsirkin
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-03-18 14:15 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > typo in subject: vfio, not vifo.
> > > 
> > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > than 2.3.
> > > > 
> > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > 
> > > Well vfio is never migrated ATM.
> > > So why is compat code needed at all?
> > 
> > It's not for migration, it's to maintain current behavior on existing
> > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > today, the VM stops.  With this change, AER would be exposed to the
> > guest and the guest could handle it.  The compat change therefore
> > maintains the stop VM behavior on existing q35 machine types.
> 
> If stop VM behaviour is useful, expose it to users.
> If not, then don't.
> I don't see why does it have to be tied to machine types.

Because q35-2.2 machine type will currently do a stop VM on uncorrected
AER error.  If we don't tie that to a machine option then q35-2.2 would
suddenly start exposing the error to the guest.  That's a fairly
significant change in behavior for a static machine type.

> >  As I
> > commented here, the 440fx part of this patch is unnecessary since AER
> > cannot be exposed to the guest on a conventional PCI chipset.  Thanks,
> > 
> > Alex
> > 
> > > > ---
> > > >  hw/i386/pc_piix.c   |  9 +++++++++
> > > >  hw/i386/pc_q35.c    |  4 ++++
> > > >  include/hw/compat.h | 10 ++++++++++
> > > >  3 files changed, 23 insertions(+)
> > > > 
> > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > > index 8eab4ba..ff9d312 100644
> > > > --- a/hw/i386/pc_piix.c
> > > > +++ b/hw/i386/pc_piix.c
> > > > @@ -307,6 +307,11 @@ static void pc_init1(MachineState *machine,
> > > >  
> > > >  static void pc_init_pci(MachineState *machine)
> > > >  {
> > > > +    static GlobalProperty pc_compat_props[] = {
> > > > +        PC_I440FX_COMPAT,
> > > > +        { /* end of list */ }
> > > > +    };
> > > > +    qdev_prop_register_global_list(pc_compat_props);
> > > >      pc_init1(machine, 1, 1);
> > > >  }
> > > >  
> > > > @@ -534,6 +539,10 @@ static QEMUMachine pc_i440fx_machine_v2_2 = {
> > > >      PC_I440FX_2_2_MACHINE_OPTIONS,
> > > >      .name = "pc-i440fx-2.2",
> > > >      .init = pc_init_pci_2_2,
> > > > +    .compat_props = (GlobalProperty[]) {
> > > > +        HW_COMPAT_2_2,
> > > > +        { /* end of list */ }
> > > > +    },
> > > >  };
> > > >  
> > > >  #define PC_I440FX_2_1_MACHINE_OPTIONS                           \
> > > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > > index c0f21fe..97afb7d 100644
> > > > --- a/hw/i386/pc_q35.c
> > > > +++ b/hw/i386/pc_q35.c
> > > > @@ -431,6 +431,10 @@ static QEMUMachine pc_q35_machine_v2_2 = {
> > > >      PC_Q35_2_2_MACHINE_OPTIONS,
> > > >      .name = "pc-q35-2.2",
> > > >      .init = pc_q35_init_2_2,
> > > > +    .compat_props = (GlobalProperty[]) {
> > > > +        HW_COMPAT_2_2,
> > > > +        { /* end of list */ }
> > > > +    },
> > > >  };
> > > >  
> > > >  #define PC_Q35_2_1_MACHINE_OPTIONS                      \
> > > > diff --git a/include/hw/compat.h b/include/hw/compat.h
> > > > index 313682a..40c974a 100644
> > > > --- a/include/hw/compat.h
> > > > +++ b/include/hw/compat.h
> > > > @@ -1,7 +1,17 @@
> > > >  #ifndef HW_COMPAT_H
> > > >  #define HW_COMPAT_H
> > > >  
> > > > +#define HW_COMPAT_2_2 PC_I440FX_COMPAT
> > > > +
> > > > +#define PC_I440FX_COMPAT \
> > > > +        {\
> > > > +            .driver   = "vfio-pci",\
> > > > +            .property = "x-aer",\
> > > > +            .value    = "off",\
> > > > +        }
> > > > +
> > > >  #define HW_COMPAT_2_1 \
> > > > +        HW_COMPAT_2_2, \
> > > >          {\
> > > >              .driver   = "intel-hda",\
> > > >              .property = "old_msi_addr",\
> > > > -- 
> > > > 1.9.3
> > > > 
> > 
> > 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 14:15         ` Alex Williamson
@ 2015-03-18 14:36           ` Michael S. Tsirkin
  2015-03-18 14:50             ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Michael S. Tsirkin @ 2015-03-18 14:36 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, Mar 18, 2015 at 08:15:01AM -0600, Alex Williamson wrote:
> On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> > On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > > typo in subject: vfio, not vifo.
> > > > 
> > > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > > than 2.3.
> > > > > 
> > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > 
> > > > Well vfio is never migrated ATM.
> > > > So why is compat code needed at all?
> > > 
> > > It's not for migration, it's to maintain current behavior on existing
> > > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > > today, the VM stops.  With this change, AER would be exposed to the
> > > guest and the guest could handle it.  The compat change therefore
> > > maintains the stop VM behavior on existing q35 machine types.
> > 
> > If stop VM behaviour is useful, expose it to users.
> > If not, then don't.
> > I don't see why does it have to be tied to machine types.
> 
> Because q35-2.2 machine type will currently do a stop VM on uncorrected
> AER error.  If we don't tie that to a machine option then q35-2.2 would
> suddenly start exposing the error to the guest.  That's a fairly
> significant change in behavior for a static machine type.

I don't think you can classify it as a behaviour change. VM stop is not
guest visible behaviour.
Are you worrying about guests misbehaving when they see these errors?
Then you want this as user-controlled, supported option.

In other words: we only tie things to machine types when we
have to. This code gets almost no testing, and is a lot of
work to test. This one sounds like "just in case" is not a good
motivation.


-- 
MST

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 14:36           ` Michael S. Tsirkin
@ 2015-03-18 14:50             ` Alex Williamson
  2015-03-18 15:02               ` Michael S. Tsirkin
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-03-18 14:50 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, 2015-03-18 at 15:36 +0100, Michael S. Tsirkin wrote:
> On Wed, Mar 18, 2015 at 08:15:01AM -0600, Alex Williamson wrote:
> > On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> > > On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > > > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > > > typo in subject: vfio, not vifo.
> > > > > 
> > > > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > > > than 2.3.
> > > > > > 
> > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > 
> > > > > Well vfio is never migrated ATM.
> > > > > So why is compat code needed at all?
> > > > 
> > > > It's not for migration, it's to maintain current behavior on existing
> > > > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > > > today, the VM stops.  With this change, AER would be exposed to the
> > > > guest and the guest could handle it.  The compat change therefore
> > > > maintains the stop VM behavior on existing q35 machine types.
> > > 
> > > If stop VM behaviour is useful, expose it to users.
> > > If not, then don't.
> > > I don't see why does it have to be tied to machine types.
> > 
> > Because q35-2.2 machine type will currently do a stop VM on uncorrected
> > AER error.  If we don't tie that to a machine option then q35-2.2 would
> > suddenly start exposing the error to the guest.  That's a fairly
> > significant change in behavior for a static machine type.
> 
> I don't think you can classify it as a behaviour change. VM stop is not
> guest visible behaviour.

In one case, an uncorrected AER occurs and the VM is stopped by QEMU.
In the other case, the guest is notified and may attempt corrective
action... or maybe the guest doesn't understand AER and the user is
depending on the previous behavior.  That is absolutely a behavior
change.

> Are you worrying about guests misbehaving when they see these errors?
> Then you want this as user-controlled, supported option.

Whether the option is user visible is tangential to whether the behavior
of existing machine types should be maintained.  Existing machine types
can impose a different default than current machine types.

> In other words: we only tie things to machine types when we
> have to. This code gets almost no testing, and is a lot of
> work to test. This one sounds like "just in case" is not a good
> motivation.

It seems like an obvious use case for using machine types to maintain
compatibility with previous behavior, which is exactly why we have
machine types.  If we're not going to use it, why do we have it?

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 14:50             ` Alex Williamson
@ 2015-03-18 15:02               ` Michael S. Tsirkin
  2015-03-18 15:45                 ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Michael S. Tsirkin @ 2015-03-18 15:02 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, Mar 18, 2015 at 08:50:54AM -0600, Alex Williamson wrote:
> On Wed, 2015-03-18 at 15:36 +0100, Michael S. Tsirkin wrote:
> > On Wed, Mar 18, 2015 at 08:15:01AM -0600, Alex Williamson wrote:
> > > On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> > > > On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > > > > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > > > > typo in subject: vfio, not vifo.
> > > > > > 
> > > > > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > > > > than 2.3.
> > > > > > > 
> > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > 
> > > > > > Well vfio is never migrated ATM.
> > > > > > So why is compat code needed at all?
> > > > > 
> > > > > It's not for migration, it's to maintain current behavior on existing
> > > > > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > > > > today, the VM stops.  With this change, AER would be exposed to the
> > > > > guest and the guest could handle it.  The compat change therefore
> > > > > maintains the stop VM behavior on existing q35 machine types.
> > > > 
> > > > If stop VM behaviour is useful, expose it to users.
> > > > If not, then don't.
> > > > I don't see why does it have to be tied to machine types.
> > > 
> > > Because q35-2.2 machine type will currently do a stop VM on uncorrected
> > > AER error.  If we don't tie that to a machine option then q35-2.2 would
> > > suddenly start exposing the error to the guest.  That's a fairly
> > > significant change in behavior for a static machine type.
> > 
> > I don't think you can classify it as a behaviour change. VM stop is not
> > guest visible behaviour.
> 
> In one case, an uncorrected AER occurs and the VM is stopped by QEMU.
> In the other case, the guest is notified and may attempt corrective
> action... or maybe the guest doesn't understand AER and the user is
> depending on the previous behavior.  That is absolutely a behavior
> change.
> 
> > Are you worrying about guests misbehaving when they see these errors?
> > Then you want this as user-controlled, supported option.
> 
> Whether the option is user visible is tangential to whether the behavior
> of existing machine types should be maintained.  Existing machine types
> can impose a different default than current machine types.
>
> > In other words: we only tie things to machine types when we
> > have to. This code gets almost no testing, and is a lot of
> > work to test. This one sounds like "just in case" is not a good
> > motivation.
> 
> It seems like an obvious use case for using machine types to maintain
> compatibility with previous behavior, which is exactly why we have
> machine types.  If we're not going to use it, why do we have it?

We have machine types because of the following issues:
- some silent changes confuse guests. For example guest installed with
  one machine type might not boot if you try to use it after
  changing something, or - in case of windows - throw up warnings.
- some changes break migration

Looks like none of these cases.
If AER is unsafe, turn it off by default for everyone.


-- 
MST

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 15:02               ` Michael S. Tsirkin
@ 2015-03-18 15:45                 ` Alex Williamson
  2015-03-18 16:44                   ` Michael S. Tsirkin
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-03-18 15:45 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, 2015-03-18 at 16:02 +0100, Michael S. Tsirkin wrote:
> On Wed, Mar 18, 2015 at 08:50:54AM -0600, Alex Williamson wrote:
> > On Wed, 2015-03-18 at 15:36 +0100, Michael S. Tsirkin wrote:
> > > On Wed, Mar 18, 2015 at 08:15:01AM -0600, Alex Williamson wrote:
> > > > On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> > > > > On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > > > > > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > > > > > typo in subject: vfio, not vifo.
> > > > > > > 
> > > > > > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > > > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > > > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > > > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > > > > > than 2.3.
> > > > > > > > 
> > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > 
> > > > > > > Well vfio is never migrated ATM.
> > > > > > > So why is compat code needed at all?
> > > > > > 
> > > > > > It's not for migration, it's to maintain current behavior on existing
> > > > > > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > > > > > today, the VM stops.  With this change, AER would be exposed to the
> > > > > > guest and the guest could handle it.  The compat change therefore
> > > > > > maintains the stop VM behavior on existing q35 machine types.
> > > > > 
> > > > > If stop VM behaviour is useful, expose it to users.
> > > > > If not, then don't.
> > > > > I don't see why does it have to be tied to machine types.
> > > > 
> > > > Because q35-2.2 machine type will currently do a stop VM on uncorrected
> > > > AER error.  If we don't tie that to a machine option then q35-2.2 would
> > > > suddenly start exposing the error to the guest.  That's a fairly
> > > > significant change in behavior for a static machine type.
> > > 
> > > I don't think you can classify it as a behaviour change. VM stop is not
> > > guest visible behaviour.
> > 
> > In one case, an uncorrected AER occurs and the VM is stopped by QEMU.
> > In the other case, the guest is notified and may attempt corrective
> > action... or maybe the guest doesn't understand AER and the user is
> > depending on the previous behavior.  That is absolutely a behavior
> > change.
> > 
> > > Are you worrying about guests misbehaving when they see these errors?
> > > Then you want this as user-controlled, supported option.
> > 
> > Whether the option is user visible is tangential to whether the behavior
> > of existing machine types should be maintained.  Existing machine types
> > can impose a different default than current machine types.
> >
> > > In other words: we only tie things to machine types when we
> > > have to. This code gets almost no testing, and is a lot of
> > > work to test. This one sounds like "just in case" is not a good
> > > motivation.
> > 
> > It seems like an obvious use case for using machine types to maintain
> > compatibility with previous behavior, which is exactly why we have
> > machine types.  If we're not going to use it, why do we have it?
> 
> We have machine types because of the following issues:
> - some silent changes confuse guests. For example guest installed with
>   one machine type might not boot if you try to use it after
>   changing something, or - in case of windows - throw up warnings.
> - some changes break migration
> 
> Looks like none of these cases.
> If AER is unsafe, turn it off by default for everyone.

This is silly, we have the tools, let's use them.  If a user is running
a VM that gets a VM stop on AER error one day and they upgrade QEMU and
restart it, they should get the same behavior, whether a migration is
involved or not.  Maybe the default should be disabled, this patch
series hasn't yet even convinced me that there's a worthwhile general
case where the guest can recover, but using the existing machine
compatibility infrastructure should be at our disposal if we do think
the default going forward should be different than the behavior today.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 15:45                 ` Alex Williamson
@ 2015-03-18 16:44                   ` Michael S. Tsirkin
  2015-03-18 17:11                     ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Michael S. Tsirkin @ 2015-03-18 16:44 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, Mar 18, 2015 at 09:45:29AM -0600, Alex Williamson wrote:
> On Wed, 2015-03-18 at 16:02 +0100, Michael S. Tsirkin wrote:
> > On Wed, Mar 18, 2015 at 08:50:54AM -0600, Alex Williamson wrote:
> > > On Wed, 2015-03-18 at 15:36 +0100, Michael S. Tsirkin wrote:
> > > > On Wed, Mar 18, 2015 at 08:15:01AM -0600, Alex Williamson wrote:
> > > > > On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> > > > > > On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > > > > > > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > > > > > > typo in subject: vfio, not vifo.
> > > > > > > > 
> > > > > > > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > > > > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > > > > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > > > > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > > > > > > than 2.3.
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > > 
> > > > > > > > Well vfio is never migrated ATM.
> > > > > > > > So why is compat code needed at all?
> > > > > > > 
> > > > > > > It's not for migration, it's to maintain current behavior on existing
> > > > > > > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > > > > > > today, the VM stops.  With this change, AER would be exposed to the
> > > > > > > guest and the guest could handle it.  The compat change therefore
> > > > > > > maintains the stop VM behavior on existing q35 machine types.
> > > > > > 
> > > > > > If stop VM behaviour is useful, expose it to users.
> > > > > > If not, then don't.
> > > > > > I don't see why does it have to be tied to machine types.
> > > > > 
> > > > > Because q35-2.2 machine type will currently do a stop VM on uncorrected
> > > > > AER error.  If we don't tie that to a machine option then q35-2.2 would
> > > > > suddenly start exposing the error to the guest.  That's a fairly
> > > > > significant change in behavior for a static machine type.
> > > > 
> > > > I don't think you can classify it as a behaviour change. VM stop is not
> > > > guest visible behaviour.
> > > 
> > > In one case, an uncorrected AER occurs and the VM is stopped by QEMU.
> > > In the other case, the guest is notified and may attempt corrective
> > > action... or maybe the guest doesn't understand AER and the user is
> > > depending on the previous behavior.  That is absolutely a behavior
> > > change.
> > > 
> > > > Are you worrying about guests misbehaving when they see these errors?
> > > > Then you want this as user-controlled, supported option.
> > > 
> > > Whether the option is user visible is tangential to whether the behavior
> > > of existing machine types should be maintained.  Existing machine types
> > > can impose a different default than current machine types.
> > >
> > > > In other words: we only tie things to machine types when we
> > > > have to. This code gets almost no testing, and is a lot of
> > > > work to test. This one sounds like "just in case" is not a good
> > > > motivation.
> > > 
> > > It seems like an obvious use case for using machine types to maintain
> > > compatibility with previous behavior, which is exactly why we have
> > > machine types.  If we're not going to use it, why do we have it?
> > 
> > We have machine types because of the following issues:
> > - some silent changes confuse guests. For example guest installed with
> >   one machine type might not boot if you try to use it after
> >   changing something, or - in case of windows - throw up warnings.
> > - some changes break migration
> > 
> > Looks like none of these cases.
> > If AER is unsafe, turn it off by default for everyone.
> 
> This is silly, we have the tools, let's use them.

It's a very expensive tool, maintainance-wise. We often don't
have the choice but I'm not going to use this tool by choice
unless we know why we are doing this.

> If a user is running
> a VM that gets a VM stop on AER error one day and they upgrade QEMU and
> restart it, they should get the same behavior, whether a migration is
> involved or not.

You keep saying this, but why should it? Answer that question, the rest
will follow.

> Maybe the default should be disabled, this patch
> series hasn't yet even convinced me that there's a worthwhile general
> case where the guest can recover, but using the existing machine
> compatibility infrastructure should be at our disposal if we do think
> the default going forward should be different than the behavior today.

I'm sorry, I don't think it's the right tool for the job.

-- 
MST

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 16:44                   ` Michael S. Tsirkin
@ 2015-03-18 17:11                     ` Alex Williamson
  2015-03-18 17:45                       ` Michael S. Tsirkin
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-03-18 17:11 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, 2015-03-18 at 17:44 +0100, Michael S. Tsirkin wrote:
> On Wed, Mar 18, 2015 at 09:45:29AM -0600, Alex Williamson wrote:
> > On Wed, 2015-03-18 at 16:02 +0100, Michael S. Tsirkin wrote:
> > > On Wed, Mar 18, 2015 at 08:50:54AM -0600, Alex Williamson wrote:
> > > > On Wed, 2015-03-18 at 15:36 +0100, Michael S. Tsirkin wrote:
> > > > > On Wed, Mar 18, 2015 at 08:15:01AM -0600, Alex Williamson wrote:
> > > > > > On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> > > > > > > On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > > > > > > > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > > > > > > > typo in subject: vfio, not vifo.
> > > > > > > > > 
> > > > > > > > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > > > > > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > > > > > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > > > > > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > > > > > > > than 2.3.
> > > > > > > > > > 
> > > > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > > > 
> > > > > > > > > Well vfio is never migrated ATM.
> > > > > > > > > So why is compat code needed at all?
> > > > > > > > 
> > > > > > > > It's not for migration, it's to maintain current behavior on existing
> > > > > > > > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > > > > > > > today, the VM stops.  With this change, AER would be exposed to the
> > > > > > > > guest and the guest could handle it.  The compat change therefore
> > > > > > > > maintains the stop VM behavior on existing q35 machine types.
> > > > > > > 
> > > > > > > If stop VM behaviour is useful, expose it to users.
> > > > > > > If not, then don't.
> > > > > > > I don't see why does it have to be tied to machine types.
> > > > > > 
> > > > > > Because q35-2.2 machine type will currently do a stop VM on uncorrected
> > > > > > AER error.  If we don't tie that to a machine option then q35-2.2 would
> > > > > > suddenly start exposing the error to the guest.  That's a fairly
> > > > > > significant change in behavior for a static machine type.
> > > > > 
> > > > > I don't think you can classify it as a behaviour change. VM stop is not
> > > > > guest visible behaviour.
> > > > 
> > > > In one case, an uncorrected AER occurs and the VM is stopped by QEMU.
> > > > In the other case, the guest is notified and may attempt corrective
> > > > action... or maybe the guest doesn't understand AER and the user is
> > > > depending on the previous behavior.  That is absolutely a behavior
> > > > change.
> > > > 
> > > > > Are you worrying about guests misbehaving when they see these errors?
> > > > > Then you want this as user-controlled, supported option.
> > > > 
> > > > Whether the option is user visible is tangential to whether the behavior
> > > > of existing machine types should be maintained.  Existing machine types
> > > > can impose a different default than current machine types.
> > > >
> > > > > In other words: we only tie things to machine types when we
> > > > > have to. This code gets almost no testing, and is a lot of
> > > > > work to test. This one sounds like "just in case" is not a good
> > > > > motivation.
> > > > 
> > > > It seems like an obvious use case for using machine types to maintain
> > > > compatibility with previous behavior, which is exactly why we have
> > > > machine types.  If we're not going to use it, why do we have it?
> > > 
> > > We have machine types because of the following issues:
> > > - some silent changes confuse guests. For example guest installed with
> > >   one machine type might not boot if you try to use it after
> > >   changing something, or - in case of windows - throw up warnings.
> > > - some changes break migration
> > > 
> > > Looks like none of these cases.
> > > If AER is unsafe, turn it off by default for everyone.
> > 
> > This is silly, we have the tools, let's use them.
> 
> It's a very expensive tool, maintainance-wise. We often don't
> have the choice but I'm not going to use this tool by choice
> unless we know why we are doing this.
> 
> > If a user is running
> > a VM that gets a VM stop on AER error one day and they upgrade QEMU and
> > restart it, they should get the same behavior, whether a migration is
> > involved or not.
> 
> You keep saying this, but why should it? Answer that question, the rest
> will follow.

My answer is that it's a user visible change in behavior that they may
rely on and would not expect to change within an existing machine type.
Silently modifying the default may expose them to error conditions that
were previously handled by QEMU and adds AER handling requirements to
the guest.

Ignoring the question about whether an AER can be reliably recovered in
the guest for a moment, a typical PC hardware platform will signal the
running OS with AER errors, so it seems that *if* we can achieve parity
in a virtual machine with that signaling, and more importantly the
recovery process, then the default going forward should be to mimic bare
metal behavior, thus changing the default from what we do today.

As it is now, I think we have too many outstanding questions about how
recover occurs to change the default.

So let me return the question, if we were to resolve those questions and
change the default handling from what we do today, creating a user
visible behavior change and imposing new requirements for AER handling
in the guest, why is that not worthy of machine type stability?  I'd
certainly expect it from a distro specific machine type.

> > Maybe the default should be disabled, this patch
> > series hasn't yet even convinced me that there's a worthwhile general
> > case where the guest can recover, but using the existing machine
> > compatibility infrastructure should be at our disposal if we do think
> > the default going forward should be different than the behavior today.
> 
> I'm sorry, I don't think it's the right tool for the job.
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 17:11                     ` Alex Williamson
@ 2015-03-18 17:45                       ` Michael S. Tsirkin
  2015-03-18 18:08                         ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Michael S. Tsirkin @ 2015-03-18 17:45 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, Mar 18, 2015 at 11:11:28AM -0600, Alex Williamson wrote:
> On Wed, 2015-03-18 at 17:44 +0100, Michael S. Tsirkin wrote:
> > On Wed, Mar 18, 2015 at 09:45:29AM -0600, Alex Williamson wrote:
> > > On Wed, 2015-03-18 at 16:02 +0100, Michael S. Tsirkin wrote:
> > > > On Wed, Mar 18, 2015 at 08:50:54AM -0600, Alex Williamson wrote:
> > > > > On Wed, 2015-03-18 at 15:36 +0100, Michael S. Tsirkin wrote:
> > > > > > On Wed, Mar 18, 2015 at 08:15:01AM -0600, Alex Williamson wrote:
> > > > > > > On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> > > > > > > > On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > > > > > > > > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > > > > > > > > typo in subject: vfio, not vifo.
> > > > > > > > > > 
> > > > > > > > > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > > > > > > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > > > > > > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > > > > > > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > > > > > > > > than 2.3.
> > > > > > > > > > > 
> > > > > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > > > > 
> > > > > > > > > > Well vfio is never migrated ATM.
> > > > > > > > > > So why is compat code needed at all?
> > > > > > > > > 
> > > > > > > > > It's not for migration, it's to maintain current behavior on existing
> > > > > > > > > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > > > > > > > > today, the VM stops.  With this change, AER would be exposed to the
> > > > > > > > > guest and the guest could handle it.  The compat change therefore
> > > > > > > > > maintains the stop VM behavior on existing q35 machine types.
> > > > > > > > 
> > > > > > > > If stop VM behaviour is useful, expose it to users.
> > > > > > > > If not, then don't.
> > > > > > > > I don't see why does it have to be tied to machine types.
> > > > > > > 
> > > > > > > Because q35-2.2 machine type will currently do a stop VM on uncorrected
> > > > > > > AER error.  If we don't tie that to a machine option then q35-2.2 would
> > > > > > > suddenly start exposing the error to the guest.  That's a fairly
> > > > > > > significant change in behavior for a static machine type.
> > > > > > 
> > > > > > I don't think you can classify it as a behaviour change. VM stop is not
> > > > > > guest visible behaviour.
> > > > > 
> > > > > In one case, an uncorrected AER occurs and the VM is stopped by QEMU.
> > > > > In the other case, the guest is notified and may attempt corrective
> > > > > action... or maybe the guest doesn't understand AER and the user is
> > > > > depending on the previous behavior.  That is absolutely a behavior
> > > > > change.
> > > > > 
> > > > > > Are you worrying about guests misbehaving when they see these errors?
> > > > > > Then you want this as user-controlled, supported option.
> > > > > 
> > > > > Whether the option is user visible is tangential to whether the behavior
> > > > > of existing machine types should be maintained.  Existing machine types
> > > > > can impose a different default than current machine types.
> > > > >
> > > > > > In other words: we only tie things to machine types when we
> > > > > > have to. This code gets almost no testing, and is a lot of
> > > > > > work to test. This one sounds like "just in case" is not a good
> > > > > > motivation.
> > > > > 
> > > > > It seems like an obvious use case for using machine types to maintain
> > > > > compatibility with previous behavior, which is exactly why we have
> > > > > machine types.  If we're not going to use it, why do we have it?
> > > > 
> > > > We have machine types because of the following issues:
> > > > - some silent changes confuse guests. For example guest installed with
> > > >   one machine type might not boot if you try to use it after
> > > >   changing something, or - in case of windows - throw up warnings.
> > > > - some changes break migration
> > > > 
> > > > Looks like none of these cases.
> > > > If AER is unsafe, turn it off by default for everyone.
> > > 
> > > This is silly, we have the tools, let's use them.
> > 
> > It's a very expensive tool, maintainance-wise. We often don't
> > have the choice but I'm not going to use this tool by choice
> > unless we know why we are doing this.
> > 
> > > If a user is running
> > > a VM that gets a VM stop on AER error one day and they upgrade QEMU and
> > > restart it, they should get the same behavior, whether a migration is
> > > involved or not.
> > 
> > You keep saying this, but why should it? Answer that question, the rest
> > will follow.
> 
> My answer is that it's a user visible change in behavior that they may
> rely on and would not expect to change within an existing machine type.
> Silently modifying the default may expose them to error conditions that
> were previously handled by QEMU and adds AER handling requirements to
> the guest.
> 
> Ignoring the question about whether an AER can be reliably recovered in
> the guest for a moment, a typical PC hardware platform will signal the
> running OS with AER errors, so it seems that *if* we can achieve parity
> in a virtual machine with that signaling, and more importantly the
> recovery process, then the default going forward should be to mimic bare
> metal behavior, thus changing the default from what we do today.
> 
> As it is now, I think we have too many outstanding questions about how
> recover occurs to change the default.
> 
> So let me return the question, if we were to resolve those questions and
> change the default handling from what we do today, creating a user
> visible behavior change and imposing new requirements for AER handling
> in the guest, why is that not worthy of machine type stability?  I'd
> certainly expect it from a distro specific machine type.

This really depends on what guests do with this.
We try to avoid guest visible changes during migration
(even that's not 100%).
There aren't such requirements for devices where migration
is disabled, e.g. we did many guest visible changes in q35.

Latest machine types are better tested. Deviating from latest machine
just for a theoretical "this is guest visible" isn't going to give
you better stability, it will give you worse stability.

If you are worried about guest bugs, they are not going
away just because we release new QEMU.So if guests are
buggy with this feature, this needs a solution for all
machine types.

Anyway, if we keep the default, that's even easier.

> > > Maybe the default should be disabled, this patch
> > > series hasn't yet even convinced me that there's a worthwhile general
> > > case where the guest can recover, but using the existing machine
> > > compatibility infrastructure should be at our disposal if we do think
> > > the default going forward should be different than the behavior today.
> > 
> > I'm sorry, I don't think it's the right tool for the job.
> > 
> 
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 17:45                       ` Michael S. Tsirkin
@ 2015-03-18 18:08                         ` Alex Williamson
  2015-03-18 18:56                           ` Michael S. Tsirkin
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-03-18 18:08 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, 2015-03-18 at 18:45 +0100, Michael S. Tsirkin wrote:
> On Wed, Mar 18, 2015 at 11:11:28AM -0600, Alex Williamson wrote:
> > On Wed, 2015-03-18 at 17:44 +0100, Michael S. Tsirkin wrote:
> > > On Wed, Mar 18, 2015 at 09:45:29AM -0600, Alex Williamson wrote:
> > > > On Wed, 2015-03-18 at 16:02 +0100, Michael S. Tsirkin wrote:
> > > > > On Wed, Mar 18, 2015 at 08:50:54AM -0600, Alex Williamson wrote:
> > > > > > On Wed, 2015-03-18 at 15:36 +0100, Michael S. Tsirkin wrote:
> > > > > > > On Wed, Mar 18, 2015 at 08:15:01AM -0600, Alex Williamson wrote:
> > > > > > > > On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> > > > > > > > > On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > > > > > > > > > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > > > > > > > > > typo in subject: vfio, not vifo.
> > > > > > > > > > > 
> > > > > > > > > > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > > > > > > > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > > > > > > > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > > > > > > > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > > > > > > > > > than 2.3.
> > > > > > > > > > > > 
> > > > > > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > > > > > 
> > > > > > > > > > > Well vfio is never migrated ATM.
> > > > > > > > > > > So why is compat code needed at all?
> > > > > > > > > > 
> > > > > > > > > > It's not for migration, it's to maintain current behavior on existing
> > > > > > > > > > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > > > > > > > > > today, the VM stops.  With this change, AER would be exposed to the
> > > > > > > > > > guest and the guest could handle it.  The compat change therefore
> > > > > > > > > > maintains the stop VM behavior on existing q35 machine types.
> > > > > > > > > 
> > > > > > > > > If stop VM behaviour is useful, expose it to users.
> > > > > > > > > If not, then don't.
> > > > > > > > > I don't see why does it have to be tied to machine types.
> > > > > > > > 
> > > > > > > > Because q35-2.2 machine type will currently do a stop VM on uncorrected
> > > > > > > > AER error.  If we don't tie that to a machine option then q35-2.2 would
> > > > > > > > suddenly start exposing the error to the guest.  That's a fairly
> > > > > > > > significant change in behavior for a static machine type.
> > > > > > > 
> > > > > > > I don't think you can classify it as a behaviour change. VM stop is not
> > > > > > > guest visible behaviour.
> > > > > > 
> > > > > > In one case, an uncorrected AER occurs and the VM is stopped by QEMU.
> > > > > > In the other case, the guest is notified and may attempt corrective
> > > > > > action... or maybe the guest doesn't understand AER and the user is
> > > > > > depending on the previous behavior.  That is absolutely a behavior
> > > > > > change.
> > > > > > 
> > > > > > > Are you worrying about guests misbehaving when they see these errors?
> > > > > > > Then you want this as user-controlled, supported option.
> > > > > > 
> > > > > > Whether the option is user visible is tangential to whether the behavior
> > > > > > of existing machine types should be maintained.  Existing machine types
> > > > > > can impose a different default than current machine types.
> > > > > >
> > > > > > > In other words: we only tie things to machine types when we
> > > > > > > have to. This code gets almost no testing, and is a lot of
> > > > > > > work to test. This one sounds like "just in case" is not a good
> > > > > > > motivation.
> > > > > > 
> > > > > > It seems like an obvious use case for using machine types to maintain
> > > > > > compatibility with previous behavior, which is exactly why we have
> > > > > > machine types.  If we're not going to use it, why do we have it?
> > > > > 
> > > > > We have machine types because of the following issues:
> > > > > - some silent changes confuse guests. For example guest installed with
> > > > >   one machine type might not boot if you try to use it after
> > > > >   changing something, or - in case of windows - throw up warnings.
> > > > > - some changes break migration
> > > > > 
> > > > > Looks like none of these cases.
> > > > > If AER is unsafe, turn it off by default for everyone.
> > > > 
> > > > This is silly, we have the tools, let's use them.
> > > 
> > > It's a very expensive tool, maintainance-wise. We often don't
> > > have the choice but I'm not going to use this tool by choice
> > > unless we know why we are doing this.
> > > 
> > > > If a user is running
> > > > a VM that gets a VM stop on AER error one day and they upgrade QEMU and
> > > > restart it, they should get the same behavior, whether a migration is
> > > > involved or not.
> > > 
> > > You keep saying this, but why should it? Answer that question, the rest
> > > will follow.
> > 
> > My answer is that it's a user visible change in behavior that they may
> > rely on and would not expect to change within an existing machine type.
> > Silently modifying the default may expose them to error conditions that
> > were previously handled by QEMU and adds AER handling requirements to
> > the guest.
> > 
> > Ignoring the question about whether an AER can be reliably recovered in
> > the guest for a moment, a typical PC hardware platform will signal the
> > running OS with AER errors, so it seems that *if* we can achieve parity
> > in a virtual machine with that signaling, and more importantly the
> > recovery process, then the default going forward should be to mimic bare
> > metal behavior, thus changing the default from what we do today.
> > 
> > As it is now, I think we have too many outstanding questions about how
> > recover occurs to change the default.
> > 
> > So let me return the question, if we were to resolve those questions and
> > change the default handling from what we do today, creating a user
> > visible behavior change and imposing new requirements for AER handling
> > in the guest, why is that not worthy of machine type stability?  I'd
> > certainly expect it from a distro specific machine type.
> 
> This really depends on what guests do with this.
> We try to avoid guest visible changes during migration
> (even that's not 100%).
> There aren't such requirements for devices where migration
> is disabled, e.g. we did many guest visible changes in q35.
> 
> Latest machine types are better tested. Deviating from latest machine
> just for a theoretical "this is guest visible" isn't going to give
> you better stability, it will give you worse stability.

You keep calling this theoretical.  As it exists today, an uncorrected
AER error results in a VM stop with no guest intervention or support
required.  If we were to change the default, the guest would instead be
notified of the AER and given the opportunity to recover.  That's not
just guest visible, that's a complete change in error handling paradigm.

> If you are worried about guest bugs, they are not going
> away just because we release new QEMU.So if guests are
> buggy with this feature, this needs a solution for all
> machine types.

Guest bugs are not my issue.

> Anyway, if we keep the default, that's even easier.

Agreed, and that may be where we end if we can't come up with a
reasonable expectation that the guest can recover from an AER.  However,
I think machine compatibility should be fair game if we do want to flip
that switch.

> > > > Maybe the default should be disabled, this patch
> > > > series hasn't yet even convinced me that there's a worthwhile general
> > > > case where the guest can recover, but using the existing machine
> > > > compatibility infrastructure should be at our disposal if we do think
> > > > the default going forward should be different than the behavior today.
> > > 
> > > I'm sorry, I don't think it's the right tool for the job.
> > > 
> > 
> > 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 18:08                         ` Alex Williamson
@ 2015-03-18 18:56                           ` Michael S. Tsirkin
  2015-03-18 19:05                             ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Michael S. Tsirkin @ 2015-03-18 18:56 UTC (permalink / raw)
  To: Alex Williamson; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, Mar 18, 2015 at 12:08:15PM -0600, Alex Williamson wrote:
> On Wed, 2015-03-18 at 18:45 +0100, Michael S. Tsirkin wrote:
> > On Wed, Mar 18, 2015 at 11:11:28AM -0600, Alex Williamson wrote:
> > > On Wed, 2015-03-18 at 17:44 +0100, Michael S. Tsirkin wrote:
> > > > On Wed, Mar 18, 2015 at 09:45:29AM -0600, Alex Williamson wrote:
> > > > > On Wed, 2015-03-18 at 16:02 +0100, Michael S. Tsirkin wrote:
> > > > > > On Wed, Mar 18, 2015 at 08:50:54AM -0600, Alex Williamson wrote:
> > > > > > > On Wed, 2015-03-18 at 15:36 +0100, Michael S. Tsirkin wrote:
> > > > > > > > On Wed, Mar 18, 2015 at 08:15:01AM -0600, Alex Williamson wrote:
> > > > > > > > > On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> > > > > > > > > > On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > > > > > > > > > > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > > > > > > > > > > typo in subject: vfio, not vifo.
> > > > > > > > > > > > 
> > > > > > > > > > > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > > > > > > > > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > > > > > > > > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > > > > > > > > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > > > > > > > > > > than 2.3.
> > > > > > > > > > > > > 
> > > > > > > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > > > > > > 
> > > > > > > > > > > > Well vfio is never migrated ATM.
> > > > > > > > > > > > So why is compat code needed at all?
> > > > > > > > > > > 
> > > > > > > > > > > It's not for migration, it's to maintain current behavior on existing
> > > > > > > > > > > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > > > > > > > > > > today, the VM stops.  With this change, AER would be exposed to the
> > > > > > > > > > > guest and the guest could handle it.  The compat change therefore
> > > > > > > > > > > maintains the stop VM behavior on existing q35 machine types.
> > > > > > > > > > 
> > > > > > > > > > If stop VM behaviour is useful, expose it to users.
> > > > > > > > > > If not, then don't.
> > > > > > > > > > I don't see why does it have to be tied to machine types.
> > > > > > > > > 
> > > > > > > > > Because q35-2.2 machine type will currently do a stop VM on uncorrected
> > > > > > > > > AER error.  If we don't tie that to a machine option then q35-2.2 would
> > > > > > > > > suddenly start exposing the error to the guest.  That's a fairly
> > > > > > > > > significant change in behavior for a static machine type.
> > > > > > > > 
> > > > > > > > I don't think you can classify it as a behaviour change. VM stop is not
> > > > > > > > guest visible behaviour.
> > > > > > > 
> > > > > > > In one case, an uncorrected AER occurs and the VM is stopped by QEMU.
> > > > > > > In the other case, the guest is notified and may attempt corrective
> > > > > > > action... or maybe the guest doesn't understand AER and the user is
> > > > > > > depending on the previous behavior.  That is absolutely a behavior
> > > > > > > change.
> > > > > > > 
> > > > > > > > Are you worrying about guests misbehaving when they see these errors?
> > > > > > > > Then you want this as user-controlled, supported option.
> > > > > > > 
> > > > > > > Whether the option is user visible is tangential to whether the behavior
> > > > > > > of existing machine types should be maintained.  Existing machine types
> > > > > > > can impose a different default than current machine types.
> > > > > > >
> > > > > > > > In other words: we only tie things to machine types when we
> > > > > > > > have to. This code gets almost no testing, and is a lot of
> > > > > > > > work to test. This one sounds like "just in case" is not a good
> > > > > > > > motivation.
> > > > > > > 
> > > > > > > It seems like an obvious use case for using machine types to maintain
> > > > > > > compatibility with previous behavior, which is exactly why we have
> > > > > > > machine types.  If we're not going to use it, why do we have it?
> > > > > > 
> > > > > > We have machine types because of the following issues:
> > > > > > - some silent changes confuse guests. For example guest installed with
> > > > > >   one machine type might not boot if you try to use it after
> > > > > >   changing something, or - in case of windows - throw up warnings.
> > > > > > - some changes break migration
> > > > > > 
> > > > > > Looks like none of these cases.
> > > > > > If AER is unsafe, turn it off by default for everyone.
> > > > > 
> > > > > This is silly, we have the tools, let's use them.
> > > > 
> > > > It's a very expensive tool, maintainance-wise. We often don't
> > > > have the choice but I'm not going to use this tool by choice
> > > > unless we know why we are doing this.
> > > > 
> > > > > If a user is running
> > > > > a VM that gets a VM stop on AER error one day and they upgrade QEMU and
> > > > > restart it, they should get the same behavior, whether a migration is
> > > > > involved or not.
> > > > 
> > > > You keep saying this, but why should it? Answer that question, the rest
> > > > will follow.
> > > 
> > > My answer is that it's a user visible change in behavior that they may
> > > rely on and would not expect to change within an existing machine type.
> > > Silently modifying the default may expose them to error conditions that
> > > were previously handled by QEMU and adds AER handling requirements to
> > > the guest.
> > > 
> > > Ignoring the question about whether an AER can be reliably recovered in
> > > the guest for a moment, a typical PC hardware platform will signal the
> > > running OS with AER errors, so it seems that *if* we can achieve parity
> > > in a virtual machine with that signaling, and more importantly the
> > > recovery process, then the default going forward should be to mimic bare
> > > metal behavior, thus changing the default from what we do today.
> > > 
> > > As it is now, I think we have too many outstanding questions about how
> > > recover occurs to change the default.
> > > 
> > > So let me return the question, if we were to resolve those questions and
> > > change the default handling from what we do today, creating a user
> > > visible behavior change and imposing new requirements for AER handling
> > > in the guest, why is that not worthy of machine type stability?  I'd
> > > certainly expect it from a distro specific machine type.
> > 
> > This really depends on what guests do with this.
> > We try to avoid guest visible changes during migration
> > (even that's not 100%).
> > There aren't such requirements for devices where migration
> > is disabled, e.g. we did many guest visible changes in q35.
> > 
> > Latest machine types are better tested. Deviating from latest machine
> > just for a theoretical "this is guest visible" isn't going to give
> > you better stability, it will give you worse stability.
> 
> You keep calling this theoretical.  As it exists today, an uncorrected
> AER error results in a VM stop with no guest intervention or support
> required.  If we were to change the default, the guest would instead be
> notified of the AER and given the opportunity to recover.  That's not
> just guest visible, that's a complete change in error handling paradigm.

OK, so maybe it's a feature that users should have control over.
But tying it to machine types makes no sense.

> > If you are worried about guest bugs, they are not going
> > away just because we release new QEMU.So if guests are
> > buggy with this feature, this needs a solution for all
> > machine types.
> 
> Guest bugs are not my issue.
> 
> > Anyway, if we keep the default, that's even easier.
> 
> Agreed, and that may be where we end if we can't come up with a
> reasonable expectation that the guest can recover from an AER.  However,
> I think machine compatibility should be fair game if we do want to flip
> that switch.
> 
> > > > > Maybe the default should be disabled, this patch
> > > > > series hasn't yet even convinced me that there's a worthwhile general
> > > > > case where the guest can recover, but using the existing machine
> > > > > compatibility infrastructure should be at our disposal if we do think
> > > > > the default going forward should be different than the behavior today.
> > > > 
> > > > I'm sorry, I don't think it's the right tool for the job.
> > > > 
> > > 
> > > 
> 
> 

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 18:56                           ` Michael S. Tsirkin
@ 2015-03-18 19:05                             ` Alex Williamson
  2015-03-19 21:26                               ` Paolo Bonzini
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-03-18 19:05 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Chen Fan, izumi.taku, qemu-devel

On Wed, 2015-03-18 at 19:56 +0100, Michael S. Tsirkin wrote:
> On Wed, Mar 18, 2015 at 12:08:15PM -0600, Alex Williamson wrote:
> > On Wed, 2015-03-18 at 18:45 +0100, Michael S. Tsirkin wrote:
> > > On Wed, Mar 18, 2015 at 11:11:28AM -0600, Alex Williamson wrote:
> > > > On Wed, 2015-03-18 at 17:44 +0100, Michael S. Tsirkin wrote:
> > > > > On Wed, Mar 18, 2015 at 09:45:29AM -0600, Alex Williamson wrote:
> > > > > > On Wed, 2015-03-18 at 16:02 +0100, Michael S. Tsirkin wrote:
> > > > > > > On Wed, Mar 18, 2015 at 08:50:54AM -0600, Alex Williamson wrote:
> > > > > > > > On Wed, 2015-03-18 at 15:36 +0100, Michael S. Tsirkin wrote:
> > > > > > > > > On Wed, Mar 18, 2015 at 08:15:01AM -0600, Alex Williamson wrote:
> > > > > > > > > > On Wed, 2015-03-18 at 15:05 +0100, Michael S. Tsirkin wrote:
> > > > > > > > > > > On Wed, Mar 18, 2015 at 08:02:26AM -0600, Alex Williamson wrote:
> > > > > > > > > > > > On Wed, 2015-03-18 at 14:23 +0100, Michael S. Tsirkin wrote:
> > > > > > > > > > > > > typo in subject: vfio, not vifo.
> > > > > > > > > > > > > 
> > > > > > > > > > > > > On Thu, Mar 12, 2015 at 06:23:59PM +0800, Chen Fan wrote:
> > > > > > > > > > > > > > for piix4 chipset, we don't need to expose aer, so introduce
> > > > > > > > > > > > > > PC_I440FX_COMPAT for all piix4 machines to disable aercap,
> > > > > > > > > > > > > > and add HW_COMPAT_2_2 to disable aercap for all lower
> > > > > > > > > > > > > > than 2.3.
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > > > > > > > 
> > > > > > > > > > > > > Well vfio is never migrated ATM.
> > > > > > > > > > > > > So why is compat code needed at all?
> > > > > > > > > > > > 
> > > > > > > > > > > > It's not for migration, it's to maintain current behavior on existing
> > > > > > > > > > > > platforms.  If someone gets an uncorrected AER error on q35 machine type
> > > > > > > > > > > > today, the VM stops.  With this change, AER would be exposed to the
> > > > > > > > > > > > guest and the guest could handle it.  The compat change therefore
> > > > > > > > > > > > maintains the stop VM behavior on existing q35 machine types.
> > > > > > > > > > > 
> > > > > > > > > > > If stop VM behaviour is useful, expose it to users.
> > > > > > > > > > > If not, then don't.
> > > > > > > > > > > I don't see why does it have to be tied to machine types.
> > > > > > > > > > 
> > > > > > > > > > Because q35-2.2 machine type will currently do a stop VM on uncorrected
> > > > > > > > > > AER error.  If we don't tie that to a machine option then q35-2.2 would
> > > > > > > > > > suddenly start exposing the error to the guest.  That's a fairly
> > > > > > > > > > significant change in behavior for a static machine type.
> > > > > > > > > 
> > > > > > > > > I don't think you can classify it as a behaviour change. VM stop is not
> > > > > > > > > guest visible behaviour.
> > > > > > > > 
> > > > > > > > In one case, an uncorrected AER occurs and the VM is stopped by QEMU.
> > > > > > > > In the other case, the guest is notified and may attempt corrective
> > > > > > > > action... or maybe the guest doesn't understand AER and the user is
> > > > > > > > depending on the previous behavior.  That is absolutely a behavior
> > > > > > > > change.
> > > > > > > > 
> > > > > > > > > Are you worrying about guests misbehaving when they see these errors?
> > > > > > > > > Then you want this as user-controlled, supported option.
> > > > > > > > 
> > > > > > > > Whether the option is user visible is tangential to whether the behavior
> > > > > > > > of existing machine types should be maintained.  Existing machine types
> > > > > > > > can impose a different default than current machine types.
> > > > > > > >
> > > > > > > > > In other words: we only tie things to machine types when we
> > > > > > > > > have to. This code gets almost no testing, and is a lot of
> > > > > > > > > work to test. This one sounds like "just in case" is not a good
> > > > > > > > > motivation.
> > > > > > > > 
> > > > > > > > It seems like an obvious use case for using machine types to maintain
> > > > > > > > compatibility with previous behavior, which is exactly why we have
> > > > > > > > machine types.  If we're not going to use it, why do we have it?
> > > > > > > 
> > > > > > > We have machine types because of the following issues:
> > > > > > > - some silent changes confuse guests. For example guest installed with
> > > > > > >   one machine type might not boot if you try to use it after
> > > > > > >   changing something, or - in case of windows - throw up warnings.
> > > > > > > - some changes break migration
> > > > > > > 
> > > > > > > Looks like none of these cases.
> > > > > > > If AER is unsafe, turn it off by default for everyone.
> > > > > > 
> > > > > > This is silly, we have the tools, let's use them.
> > > > > 
> > > > > It's a very expensive tool, maintainance-wise. We often don't
> > > > > have the choice but I'm not going to use this tool by choice
> > > > > unless we know why we are doing this.
> > > > > 
> > > > > > If a user is running
> > > > > > a VM that gets a VM stop on AER error one day and they upgrade QEMU and
> > > > > > restart it, they should get the same behavior, whether a migration is
> > > > > > involved or not.
> > > > > 
> > > > > You keep saying this, but why should it? Answer that question, the rest
> > > > > will follow.
> > > > 
> > > > My answer is that it's a user visible change in behavior that they may
> > > > rely on and would not expect to change within an existing machine type.
> > > > Silently modifying the default may expose them to error conditions that
> > > > were previously handled by QEMU and adds AER handling requirements to
> > > > the guest.
> > > > 
> > > > Ignoring the question about whether an AER can be reliably recovered in
> > > > the guest for a moment, a typical PC hardware platform will signal the
> > > > running OS with AER errors, so it seems that *if* we can achieve parity
> > > > in a virtual machine with that signaling, and more importantly the
> > > > recovery process, then the default going forward should be to mimic bare
> > > > metal behavior, thus changing the default from what we do today.
> > > > 
> > > > As it is now, I think we have too many outstanding questions about how
> > > > recover occurs to change the default.
> > > > 
> > > > So let me return the question, if we were to resolve those questions and
> > > > change the default handling from what we do today, creating a user
> > > > visible behavior change and imposing new requirements for AER handling
> > > > in the guest, why is that not worthy of machine type stability?  I'd
> > > > certainly expect it from a distro specific machine type.
> > > 
> > > This really depends on what guests do with this.
> > > We try to avoid guest visible changes during migration
> > > (even that's not 100%).
> > > There aren't such requirements for devices where migration
> > > is disabled, e.g. we did many guest visible changes in q35.
> > > 
> > > Latest machine types are better tested. Deviating from latest machine
> > > just for a theoretical "this is guest visible" isn't going to give
> > > you better stability, it will give you worse stability.
> > 
> > You keep calling this theoretical.  As it exists today, an uncorrected
> > AER error results in a VM stop with no guest intervention or support
> > required.  If we were to change the default, the guest would instead be
> > notified of the AER and given the opportunity to recover.  That's not
> > just guest visible, that's a complete change in error handling paradigm.
> 
> OK, so maybe it's a feature that users should have control over.
> But tying it to machine types makes no sense.

If we were to change the default, where else would you tie it?  Machine
types are the only finger hold we have to maintain VM stability afaik.

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 4/7] pcie_aer: expose pcie_aer_msg() interface
  2015-03-18 13:29   ` Michael S. Tsirkin
@ 2015-03-19  1:33     ` Chen Fan
  0 siblings, 0 replies; 53+ messages in thread
From: Chen Fan @ 2015-03-19  1:33 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: izumi.taku, alex.williamson, qemu-devel


On 03/18/2015 09:29 PM, Michael S. Tsirkin wrote:
> On Thu, Mar 12, 2015 at 06:23:56PM +0800, Chen Fan wrote:
>> For vfio device, we need to propagate the aer error to
>> Guest OS. we use the pcie_aer_msg() to send aer error
>> to guest.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> Interesting.
> pcie_aer_inject_error was intended as an interface to
> send errors to guest.
> For example, pcie_aer_msg does not handle the log at all.
> Why do you use the lower-level pcie_aer_msg?
because if the AER error comes from physical device,
the error information is located in physical device space,
so we only need to notify guest the error occurs, then
guest should collect the error from the real device.

but pcie_aer_inject_error is to inject the error to device
virtual space(dev->config). we seem don't need to do that
for physical aer error occur.

Thanks,
Chen

>
>> ---
>>   hw/pci/pcie_aer.c         | 2 +-
>>   include/hw/pci/pcie_aer.h | 1 +
>>   2 files changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
>> index 71045eb..f2d5d13 100644
>> --- a/hw/pci/pcie_aer.c
>> +++ b/hw/pci/pcie_aer.c
>> @@ -370,7 +370,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
>>    *
>>    * Walk up the bus tree from the device, propagate the error message.
>>    */
>> -static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
>> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
>>   {
>>       uint8_t type;
>>   
>> diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h
>> index a4cc6f3..80cba7b 100644
>> --- a/include/hw/pci/pcie_aer.h
>> +++ b/include/hw/pci/pcie_aer.h
>> @@ -102,5 +102,6 @@ void pcie_aer_root_write_config(PCIDevice *dev,
>>   
>>   /* error injection */
>>   int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
>> +void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg);
>>   
>>   #endif /* QEMU_PCIE_AER_H */
>> -- 
>> 1.9.3
>>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device
  2015-03-18 19:05                             ` Alex Williamson
@ 2015-03-19 21:26                               ` Paolo Bonzini
  0 siblings, 0 replies; 53+ messages in thread
From: Paolo Bonzini @ 2015-03-19 21:26 UTC (permalink / raw)
  To: Alex Williamson, Michael S. Tsirkin; +Cc: Chen Fan, izumi.taku, qemu-devel



On 18/03/2015 20:05, Alex Williamson wrote:
> > OK, so maybe it's a feature that users should have control over.
> > But tying it to machine types makes no sense.
>
> If we were to change the default, where else would you tie it?  Machine
> types are the only finger hold we have to maintain VM stability afaik.

I agree.

Machine type are there to avoid exposing "important" changes in the
behavior of the device model to the guest.  These include the number and
size of capabilities and BARs (and other fields in configuration space),
the number of virtqueues and how they are used (virtio features), etc.

Migration is definitely the main reason to introduce a new property and
add compatibility glue for older machine types, but it does not have to
be the only one.

AER seems to me like it is a significant-enough change in how the guest
sees an assigned device.  You certainly don't want it to appear in a
configuration that was previously stable.

So, migration could be a reason why we _have_ to introduce compatibility
glue, but AER to me is a perfect example of why we may sometimes _want_
to have compatibility glue.  It's different, but it makes a lot of sense.

The maintenance cost of the compatibility glue is small.  Hiding the
capability is half a dozen lines of code, and the behavior is no
different than when the guest doesn't want to see errors (which has to
be supported, if only because not all chipsets are PCIe).

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device
  2015-03-16  4:57   ` Michael S. Tsirkin
@ 2015-03-19 21:44     ` Paolo Bonzini
  0 siblings, 0 replies; 53+ messages in thread
From: Paolo Bonzini @ 2015-03-19 21:44 UTC (permalink / raw)
  To: Michael S. Tsirkin, Chen Fan; +Cc: izumi.taku, alex.williamson, qemu-devel



On 16/03/2015 05:57, Michael S. Tsirkin wrote:
> >    1. add 'x-aer' for user to off aer capability.
> 
> User-exposed properties should not start with x- - by convention
> that's for internal properties.

Wrong, x- is for properties that we don't guarantee to remain there in
the future.  It looks indeed like the property should be called simply
'aer', but not for the reason you mentioned.

Paolo

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-16 14:09           ` Alex Williamson
@ 2015-03-25  1:33             ` Chen Fan
  2015-03-25  2:31               ` Alex Williamson
  2015-03-25  1:53             ` Chen Fan
  1 sibling, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-03-25  1:33 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 03/16/2015 10:09 PM, Alex Williamson wrote:
> On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
>> On 03/16/2015 11:52 AM, Alex Williamson wrote:
>>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
>>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
>>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>>>>>> when the vfio device encounters an uncorrectable error in host,
>>>>>> the vfio_pci driver will signal the eventfd registered by this
>>>>>> vfio device, the results in the qemu eventfd handler getting
>>>>>> invoked.
>>>>>>
>>>>>> this patch is to pass the error to guest and have the guest driver
>>>>>> recover from the error.
>>>>> What is going to be the typical recovery mechanism for the guest?  I'm
>>>>> concerned that the topology of the device in the guest doesn't
>>>>> necessarily match the topology of the device in the host, so if the
>>>>> guest were to attempt a bus reset to recover a device, for instance,
>>>>> what happens?
>>>> the recovery mechanism is that when guest got an aer error from a device,
>>>> guest will clean the corresponding status bit in device register. and for
>>>> need reset device, the guest aer driver would reset all devices under bus.
>>> Sorry, I'm still confused, how does the guest aer driver reset all
>>> devices under a bus?  Are we talking about function-level, device
>>> specific reset mechanisms or secondary bus resets?  If the guest is
>>> performing secondary bus resets, what guarantee do they have that it
>>> will translate to a physical secondary bus reset?  vfio may only do an
>>> FLR when the bus is reset or it may not be able to do anything depending
>>> on the available function-level resets and physical and virtual topology
>>> of the device.  Thanks,
>> in general, functions depends on the corresponding device driver behaviors
>> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
>> and for link reset, it usually do secondary bus reset.
>>
>> and do we must require to the physical secondary bus reset for vfio device
>> as bus reset?
> That depends on how the guest driver attempts recovery, doesn't it?
> There are only a very limited number of cases where a secondary bus
> reset initiated by the guest will translate to a secondary bus reset of
> the physical device (iirc, single function device without FLR).  In most
> cases, it will at best be translated to an FLR.  VFIO really only does
> bus resets on VM reset because that's the only time we know that it's ok
> to reset multiple devices.  If the guest driver is depending on a
> secondary bus reset to put the device into a recoverable state and we're
> not able to provide that, then we're actually reducing containment of
> the error by exposing AER to the guest and allowing it to attempt
> recovery.  So in practice, I'm afraid we're risking the integrity of the
> VM by exposing AER to the guest and making it think that it can perform
> recovery operations that are not effective.  Thanks,
Hi Alex,

     if guest driver need reset a vfio device by secondary bus reset when
an aer occured. how about keeping the behavior by stopping VM and
output an fatal error information to user.

Thanks,
Chen

>
> Alex
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-16 14:09           ` Alex Williamson
  2015-03-25  1:33             ` Chen Fan
@ 2015-03-25  1:53             ` Chen Fan
  2015-03-25  2:41               ` Alex Williamson
  1 sibling, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-03-25  1:53 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 03/16/2015 10:09 PM, Alex Williamson wrote:
> On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
>> On 03/16/2015 11:52 AM, Alex Williamson wrote:
>>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
>>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
>>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>>>>>> when the vfio device encounters an uncorrectable error in host,
>>>>>> the vfio_pci driver will signal the eventfd registered by this
>>>>>> vfio device, the results in the qemu eventfd handler getting
>>>>>> invoked.
>>>>>>
>>>>>> this patch is to pass the error to guest and have the guest driver
>>>>>> recover from the error.
>>>>> What is going to be the typical recovery mechanism for the guest?  I'm
>>>>> concerned that the topology of the device in the guest doesn't
>>>>> necessarily match the topology of the device in the host, so if the
>>>>> guest were to attempt a bus reset to recover a device, for instance,
>>>>> what happens?
>>>> the recovery mechanism is that when guest got an aer error from a device,
>>>> guest will clean the corresponding status bit in device register. and for
>>>> need reset device, the guest aer driver would reset all devices under bus.
>>> Sorry, I'm still confused, how does the guest aer driver reset all
>>> devices under a bus?  Are we talking about function-level, device
>>> specific reset mechanisms or secondary bus resets?  If the guest is
>>> performing secondary bus resets, what guarantee do they have that it
>>> will translate to a physical secondary bus reset?  vfio may only do an
>>> FLR when the bus is reset or it may not be able to do anything depending
>>> on the available function-level resets and physical and virtual topology
>>> of the device.  Thanks,
>> in general, functions depends on the corresponding device driver behaviors
>> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
>> and for link reset, it usually do secondary bus reset.
>>
>> and do we must require to the physical secondary bus reset for vfio device
>> as bus reset?
> That depends on how the guest driver attempts recovery, doesn't it?
> There are only a very limited number of cases where a secondary bus
> reset initiated by the guest will translate to a secondary bus reset of
> the physical device (iirc, single function device without FLR).  In most
> cases, it will at best be translated to an FLR.  VFIO really only does
> bus resets on VM reset because that's the only time we know that it's ok
> to reset multiple devices.  If the guest driver is depending on a
> secondary bus reset to put the device into a recoverable state and we're
> not able to provide that, then we're actually reducing containment of
> the error by exposing AER to the guest and allowing it to attempt
> recovery.  So in practice, I'm afraid we're risking the integrity of the
> VM by exposing AER to the guest and making it think that it can perform
> recovery operations that are not effective.  Thanks,
I also have seen that if device without FLR, it seems can do hot reset
by ioctl VFIO_DEVICE_PCI_HOT_RESET to reset the physical slot or bus
in vfio_pci_reset. does it satisfy the recovery issues that you said?

Thanks,
Chen



>
> Alex
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-25  1:33             ` Chen Fan
@ 2015-03-25  2:31               ` Alex Williamson
  0 siblings, 0 replies; 53+ messages in thread
From: Alex Williamson @ 2015-03-25  2:31 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Wed, 2015-03-25 at 09:33 +0800, Chen Fan wrote:
> On 03/16/2015 10:09 PM, Alex Williamson wrote:
> > On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
> >> On 03/16/2015 11:52 AM, Alex Williamson wrote:
> >>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
> >>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
> >>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> >>>>>> when the vfio device encounters an uncorrectable error in host,
> >>>>>> the vfio_pci driver will signal the eventfd registered by this
> >>>>>> vfio device, the results in the qemu eventfd handler getting
> >>>>>> invoked.
> >>>>>>
> >>>>>> this patch is to pass the error to guest and have the guest driver
> >>>>>> recover from the error.
> >>>>> What is going to be the typical recovery mechanism for the guest?  I'm
> >>>>> concerned that the topology of the device in the guest doesn't
> >>>>> necessarily match the topology of the device in the host, so if the
> >>>>> guest were to attempt a bus reset to recover a device, for instance,
> >>>>> what happens?
> >>>> the recovery mechanism is that when guest got an aer error from a device,
> >>>> guest will clean the corresponding status bit in device register. and for
> >>>> need reset device, the guest aer driver would reset all devices under bus.
> >>> Sorry, I'm still confused, how does the guest aer driver reset all
> >>> devices under a bus?  Are we talking about function-level, device
> >>> specific reset mechanisms or secondary bus resets?  If the guest is
> >>> performing secondary bus resets, what guarantee do they have that it
> >>> will translate to a physical secondary bus reset?  vfio may only do an
> >>> FLR when the bus is reset or it may not be able to do anything depending
> >>> on the available function-level resets and physical and virtual topology
> >>> of the device.  Thanks,
> >> in general, functions depends on the corresponding device driver behaviors
> >> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
> >> and for link reset, it usually do secondary bus reset.
> >>
> >> and do we must require to the physical secondary bus reset for vfio device
> >> as bus reset?
> > That depends on how the guest driver attempts recovery, doesn't it?
> > There are only a very limited number of cases where a secondary bus
> > reset initiated by the guest will translate to a secondary bus reset of
> > the physical device (iirc, single function device without FLR).  In most
> > cases, it will at best be translated to an FLR.  VFIO really only does
> > bus resets on VM reset because that's the only time we know that it's ok
> > to reset multiple devices.  If the guest driver is depending on a
> > secondary bus reset to put the device into a recoverable state and we're
> > not able to provide that, then we're actually reducing containment of
> > the error by exposing AER to the guest and allowing it to attempt
> > recovery.  So in practice, I'm afraid we're risking the integrity of the
> > VM by exposing AER to the guest and making it think that it can perform
> > recovery operations that are not effective.  Thanks,
> Hi Alex,
> 
>      if guest driver need reset a vfio device by secondary bus reset when
> an aer occured. how about keeping the behavior by stopping VM and
> output an fatal error information to user.

That sounds like a very fragile heuristic to try to associate the reason
for a secondary bus reset based on the timing of an AER notification.
How can we be sure there's an association?  Is it still worthwhile to
allow the guest to participate in recovery or will most of the cases
just stall the VM stop until a bus reset is attempted?  Thanks,

Alex

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-25  1:53             ` Chen Fan
@ 2015-03-25  2:41               ` Alex Williamson
  2015-03-25  3:07                 ` Chen Fan
  2015-04-01  4:12                 ` Chen Fan
  0 siblings, 2 replies; 53+ messages in thread
From: Alex Williamson @ 2015-03-25  2:41 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Wed, 2015-03-25 at 09:53 +0800, Chen Fan wrote:
> On 03/16/2015 10:09 PM, Alex Williamson wrote:
> > On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
> >> On 03/16/2015 11:52 AM, Alex Williamson wrote:
> >>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
> >>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
> >>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> >>>>>> when the vfio device encounters an uncorrectable error in host,
> >>>>>> the vfio_pci driver will signal the eventfd registered by this
> >>>>>> vfio device, the results in the qemu eventfd handler getting
> >>>>>> invoked.
> >>>>>>
> >>>>>> this patch is to pass the error to guest and have the guest driver
> >>>>>> recover from the error.
> >>>>> What is going to be the typical recovery mechanism for the guest?  I'm
> >>>>> concerned that the topology of the device in the guest doesn't
> >>>>> necessarily match the topology of the device in the host, so if the
> >>>>> guest were to attempt a bus reset to recover a device, for instance,
> >>>>> what happens?
> >>>> the recovery mechanism is that when guest got an aer error from a device,
> >>>> guest will clean the corresponding status bit in device register. and for
> >>>> need reset device, the guest aer driver would reset all devices under bus.
> >>> Sorry, I'm still confused, how does the guest aer driver reset all
> >>> devices under a bus?  Are we talking about function-level, device
> >>> specific reset mechanisms or secondary bus resets?  If the guest is
> >>> performing secondary bus resets, what guarantee do they have that it
> >>> will translate to a physical secondary bus reset?  vfio may only do an
> >>> FLR when the bus is reset or it may not be able to do anything depending
> >>> on the available function-level resets and physical and virtual topology
> >>> of the device.  Thanks,
> >> in general, functions depends on the corresponding device driver behaviors
> >> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
> >> and for link reset, it usually do secondary bus reset.
> >>
> >> and do we must require to the physical secondary bus reset for vfio device
> >> as bus reset?
> > That depends on how the guest driver attempts recovery, doesn't it?
> > There are only a very limited number of cases where a secondary bus
> > reset initiated by the guest will translate to a secondary bus reset of
> > the physical device (iirc, single function device without FLR).  In most
> > cases, it will at best be translated to an FLR.  VFIO really only does
> > bus resets on VM reset because that's the only time we know that it's ok
> > to reset multiple devices.  If the guest driver is depending on a
> > secondary bus reset to put the device into a recoverable state and we're
> > not able to provide that, then we're actually reducing containment of
> > the error by exposing AER to the guest and allowing it to attempt
> > recovery.  So in practice, I'm afraid we're risking the integrity of the
> > VM by exposing AER to the guest and making it think that it can perform
> > recovery operations that are not effective.  Thanks,
> I also have seen that if device without FLR, it seems can do hot reset
> by ioctl VFIO_DEVICE_PCI_HOT_RESET to reset the physical slot or bus
> in vfio_pci_reset. does it satisfy the recovery issues that you said?

The hot reset interface can only be used when a) the user (QEMU) owns
all of the devices on the bus and b) we know we're resetting all of the
devices.  That mostly limits its use to VM reset.  I think that on a
secondary bus reset, we don't know the scope of the reset at the QEMU
vfio driver, so we only make use of reset methods with a function-level
scope.  That would only result in a secondary bus reset if that's the
reset mechanism used by the host kernel's PCI code (pci_reset_function),
which is limited to single function devices on a secondary bus, with no
other reset mechanisms.  The host reset is also only available in some
configurations, for instance if we have a dual-port NIC where each
function is a separate IOMMU group, then we clearly cannot do a hot
reset unless both functions are assigned to the same VM _and_ appear to
the guest on the same virtual bus.  So even if we could know the scope
of the reset in the QEMU vfio driver, we can only make use of it under
very strict guest configurations.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-25  2:41               ` Alex Williamson
@ 2015-03-25  3:07                 ` Chen Fan
  2015-04-01  4:12                 ` Chen Fan
  1 sibling, 0 replies; 53+ messages in thread
From: Chen Fan @ 2015-03-25  3:07 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 03/25/2015 10:41 AM, Alex Williamson wrote:
> On Wed, 2015-03-25 at 09:53 +0800, Chen Fan wrote:
>> On 03/16/2015 10:09 PM, Alex Williamson wrote:
>>> On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
>>>> On 03/16/2015 11:52 AM, Alex Williamson wrote:
>>>>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
>>>>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
>>>>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>>>>>>>> when the vfio device encounters an uncorrectable error in host,
>>>>>>>> the vfio_pci driver will signal the eventfd registered by this
>>>>>>>> vfio device, the results in the qemu eventfd handler getting
>>>>>>>> invoked.
>>>>>>>>
>>>>>>>> this patch is to pass the error to guest and have the guest driver
>>>>>>>> recover from the error.
>>>>>>> What is going to be the typical recovery mechanism for the guest?  I'm
>>>>>>> concerned that the topology of the device in the guest doesn't
>>>>>>> necessarily match the topology of the device in the host, so if the
>>>>>>> guest were to attempt a bus reset to recover a device, for instance,
>>>>>>> what happens?
>>>>>> the recovery mechanism is that when guest got an aer error from a device,
>>>>>> guest will clean the corresponding status bit in device register. and for
>>>>>> need reset device, the guest aer driver would reset all devices under bus.
>>>>> Sorry, I'm still confused, how does the guest aer driver reset all
>>>>> devices under a bus?  Are we talking about function-level, device
>>>>> specific reset mechanisms or secondary bus resets?  If the guest is
>>>>> performing secondary bus resets, what guarantee do they have that it
>>>>> will translate to a physical secondary bus reset?  vfio may only do an
>>>>> FLR when the bus is reset or it may not be able to do anything depending
>>>>> on the available function-level resets and physical and virtual topology
>>>>> of the device.  Thanks,
>>>> in general, functions depends on the corresponding device driver behaviors
>>>> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
>>>> and for link reset, it usually do secondary bus reset.
>>>>
>>>> and do we must require to the physical secondary bus reset for vfio device
>>>> as bus reset?
>>> That depends on how the guest driver attempts recovery, doesn't it?
>>> There are only a very limited number of cases where a secondary bus
>>> reset initiated by the guest will translate to a secondary bus reset of
>>> the physical device (iirc, single function device without FLR).  In most
>>> cases, it will at best be translated to an FLR.  VFIO really only does
>>> bus resets on VM reset because that's the only time we know that it's ok
>>> to reset multiple devices.  If the guest driver is depending on a
>>> secondary bus reset to put the device into a recoverable state and we're
>>> not able to provide that, then we're actually reducing containment of
>>> the error by exposing AER to the guest and allowing it to attempt
>>> recovery.  So in practice, I'm afraid we're risking the integrity of the
>>> VM by exposing AER to the guest and making it think that it can perform
>>> recovery operations that are not effective.  Thanks,
>> I also have seen that if device without FLR, it seems can do hot reset
>> by ioctl VFIO_DEVICE_PCI_HOT_RESET to reset the physical slot or bus
>> in vfio_pci_reset. does it satisfy the recovery issues that you said?
> The hot reset interface can only be used when a) the user (QEMU) owns
> all of the devices on the bus and b) we know we're resetting all of the
> devices.  That mostly limits its use to VM reset.  I think that on a
> secondary bus reset, we don't know the scope of the reset at the QEMU
> vfio driver, so we only make use of reset methods with a function-level
> scope.  That would only result in a secondary bus reset if that's the
> reset mechanism used by the host kernel's PCI code (pci_reset_function),
> which is limited to single function devices on a secondary bus, with no
> other reset mechanisms.  The host reset is also only available in some
> configurations, for instance if we have a dual-port NIC where each
> function is a separate IOMMU group, then we clearly cannot do a hot
> reset unless both functions are assigned to the same VM _and_ appear to
> the guest on the same virtual bus.  So even if we could know the scope
> of the reset in the QEMU vfio driver, we can only make use of it under
> very strict guest configurations.  Thanks,

it seems difficult to allow guest to participate in recovery.
but I think that we might be able to capture the vfio_pci_reset
result. if vfio device reset fail. then we stop the VM.

Thanks,
Chen

>
> Alex
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-03-25  2:41               ` Alex Williamson
  2015-03-25  3:07                 ` Chen Fan
@ 2015-04-01  4:12                 ` Chen Fan
  2015-04-01 15:46                   ` Alex Williamson
  1 sibling, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-04-01  4:12 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 03/25/2015 10:41 AM, Alex Williamson wrote:
> On Wed, 2015-03-25 at 09:53 +0800, Chen Fan wrote:
>> On 03/16/2015 10:09 PM, Alex Williamson wrote:
>>> On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
>>>> On 03/16/2015 11:52 AM, Alex Williamson wrote:
>>>>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
>>>>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
>>>>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>>>>>>>> when the vfio device encounters an uncorrectable error in host,
>>>>>>>> the vfio_pci driver will signal the eventfd registered by this
>>>>>>>> vfio device, the results in the qemu eventfd handler getting
>>>>>>>> invoked.
>>>>>>>>
>>>>>>>> this patch is to pass the error to guest and have the guest driver
>>>>>>>> recover from the error.
>>>>>>> What is going to be the typical recovery mechanism for the guest?  I'm
>>>>>>> concerned that the topology of the device in the guest doesn't
>>>>>>> necessarily match the topology of the device in the host, so if the
>>>>>>> guest were to attempt a bus reset to recover a device, for instance,
>>>>>>> what happens?
>>>>>> the recovery mechanism is that when guest got an aer error from a device,
>>>>>> guest will clean the corresponding status bit in device register. and for
>>>>>> need reset device, the guest aer driver would reset all devices under bus.
>>>>> Sorry, I'm still confused, how does the guest aer driver reset all
>>>>> devices under a bus?  Are we talking about function-level, device
>>>>> specific reset mechanisms or secondary bus resets?  If the guest is
>>>>> performing secondary bus resets, what guarantee do they have that it
>>>>> will translate to a physical secondary bus reset?  vfio may only do an
>>>>> FLR when the bus is reset or it may not be able to do anything depending
>>>>> on the available function-level resets and physical and virtual topology
>>>>> of the device.  Thanks,
>>>> in general, functions depends on the corresponding device driver behaviors
>>>> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
>>>> and for link reset, it usually do secondary bus reset.
>>>>
>>>> and do we must require to the physical secondary bus reset for vfio device
>>>> as bus reset?
>>> That depends on how the guest driver attempts recovery, doesn't it?
>>> There are only a very limited number of cases where a secondary bus
>>> reset initiated by the guest will translate to a secondary bus reset of
>>> the physical device (iirc, single function device without FLR).  In most
>>> cases, it will at best be translated to an FLR.  VFIO really only does
>>> bus resets on VM reset because that's the only time we know that it's ok
>>> to reset multiple devices.  If the guest driver is depending on a
>>> secondary bus reset to put the device into a recoverable state and we're
>>> not able to provide that, then we're actually reducing containment of
>>> the error by exposing AER to the guest and allowing it to attempt
>>> recovery.  So in practice, I'm afraid we're risking the integrity of the
>>> VM by exposing AER to the guest and making it think that it can perform
>>> recovery operations that are not effective.  Thanks,
>> I also have seen that if device without FLR, it seems can do hot reset
>> by ioctl VFIO_DEVICE_PCI_HOT_RESET to reset the physical slot or bus
>> in vfio_pci_reset. does it satisfy the recovery issues that you said?
> The hot reset interface can only be used when a) the user (QEMU) owns
> all of the devices on the bus and b) we know we're resetting all of the
> devices.  That mostly limits its use to VM reset.  I think that on a
> secondary bus reset, we don't know the scope of the reset at the QEMU
> vfio driver, so we only make use of reset methods with a function-level
> scope.  That would only result in a secondary bus reset if that's the
> reset mechanism used by the host kernel's PCI code (pci_reset_function),
> which is limited to single function devices on a secondary bus, with no
> other reset mechanisms.  The host reset is also only available in some
> configurations, for instance if we have a dual-port NIC where each
> function is a separate IOMMU group, then we clearly cannot do a hot
> reset unless both functions are assigned to the same VM _and_ appear to
> the guest on the same virtual bus.  So even if we could know the scope
> of the reset in the QEMU vfio driver, we can only make use of it under
> very strict guest configurations.  Thanks,
Hi Alex,

    have you some idea or scenario to fix/escape this issue?

Thanks,
Chen


>
> Alex
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-04-01  4:12                 ` Chen Fan
@ 2015-04-01 15:46                   ` Alex Williamson
  2015-04-08  8:59                     ` Chen Fan
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-04-01 15:46 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Wed, 2015-04-01 at 12:12 +0800, Chen Fan wrote:
> On 03/25/2015 10:41 AM, Alex Williamson wrote:
> > On Wed, 2015-03-25 at 09:53 +0800, Chen Fan wrote:
> >> On 03/16/2015 10:09 PM, Alex Williamson wrote:
> >>> On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
> >>>> On 03/16/2015 11:52 AM, Alex Williamson wrote:
> >>>>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
> >>>>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
> >>>>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> >>>>>>>> when the vfio device encounters an uncorrectable error in host,
> >>>>>>>> the vfio_pci driver will signal the eventfd registered by this
> >>>>>>>> vfio device, the results in the qemu eventfd handler getting
> >>>>>>>> invoked.
> >>>>>>>>
> >>>>>>>> this patch is to pass the error to guest and have the guest driver
> >>>>>>>> recover from the error.
> >>>>>>> What is going to be the typical recovery mechanism for the guest?  I'm
> >>>>>>> concerned that the topology of the device in the guest doesn't
> >>>>>>> necessarily match the topology of the device in the host, so if the
> >>>>>>> guest were to attempt a bus reset to recover a device, for instance,
> >>>>>>> what happens?
> >>>>>> the recovery mechanism is that when guest got an aer error from a device,
> >>>>>> guest will clean the corresponding status bit in device register. and for
> >>>>>> need reset device, the guest aer driver would reset all devices under bus.
> >>>>> Sorry, I'm still confused, how does the guest aer driver reset all
> >>>>> devices under a bus?  Are we talking about function-level, device
> >>>>> specific reset mechanisms or secondary bus resets?  If the guest is
> >>>>> performing secondary bus resets, what guarantee do they have that it
> >>>>> will translate to a physical secondary bus reset?  vfio may only do an
> >>>>> FLR when the bus is reset or it may not be able to do anything depending
> >>>>> on the available function-level resets and physical and virtual topology
> >>>>> of the device.  Thanks,
> >>>> in general, functions depends on the corresponding device driver behaviors
> >>>> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
> >>>> and for link reset, it usually do secondary bus reset.
> >>>>
> >>>> and do we must require to the physical secondary bus reset for vfio device
> >>>> as bus reset?
> >>> That depends on how the guest driver attempts recovery, doesn't it?
> >>> There are only a very limited number of cases where a secondary bus
> >>> reset initiated by the guest will translate to a secondary bus reset of
> >>> the physical device (iirc, single function device without FLR).  In most
> >>> cases, it will at best be translated to an FLR.  VFIO really only does
> >>> bus resets on VM reset because that's the only time we know that it's ok
> >>> to reset multiple devices.  If the guest driver is depending on a
> >>> secondary bus reset to put the device into a recoverable state and we're
> >>> not able to provide that, then we're actually reducing containment of
> >>> the error by exposing AER to the guest and allowing it to attempt
> >>> recovery.  So in practice, I'm afraid we're risking the integrity of the
> >>> VM by exposing AER to the guest and making it think that it can perform
> >>> recovery operations that are not effective.  Thanks,
> >> I also have seen that if device without FLR, it seems can do hot reset
> >> by ioctl VFIO_DEVICE_PCI_HOT_RESET to reset the physical slot or bus
> >> in vfio_pci_reset. does it satisfy the recovery issues that you said?
> > The hot reset interface can only be used when a) the user (QEMU) owns
> > all of the devices on the bus and b) we know we're resetting all of the
> > devices.  That mostly limits its use to VM reset.  I think that on a
> > secondary bus reset, we don't know the scope of the reset at the QEMU
> > vfio driver, so we only make use of reset methods with a function-level
> > scope.  That would only result in a secondary bus reset if that's the
> > reset mechanism used by the host kernel's PCI code (pci_reset_function),
> > which is limited to single function devices on a secondary bus, with no
> > other reset mechanisms.  The host reset is also only available in some
> > configurations, for instance if we have a dual-port NIC where each
> > function is a separate IOMMU group, then we clearly cannot do a hot
> > reset unless both functions are assigned to the same VM _and_ appear to
> > the guest on the same virtual bus.  So even if we could know the scope
> > of the reset in the QEMU vfio driver, we can only make use of it under
> > very strict guest configurations.  Thanks,
> Hi Alex,
> 
>     have you some idea or scenario to fix/escape this issue?

Hi Chen,

I expect there are two major components to this.  The first is that
QEMU/vfio-pci needs to enforce that a bus reset is possible for the host
and guest topology when guest AER handling is specified for a device.
That means that everything affected by the bus reset needs to be exposed
to the guest in a compatible way.  For instance, if a bus reset affects
devices from multiple groups, the guest needs to not only own all of
those groups, but they also need to be exposed to the guest such that
the virtual bus layout reflects the extent of the reset for the physical
bus.  This also implies that guest AER handling cannot be the default
since it will impose significant configuration restrictions on device
assignment.

This seems like a difficult configuration enforcement to make, but maybe
there are simplifying assumptions that can help.  For instance the
devices need to be exposed as PCIe therefore we won't have multiple
slots in use on a bus and I think we can therefore mostly ignore hotplug
since we can only hotplug at a slot granularity.  That may also imply
that we should simply enforce a 1:1 mapping of physical functions to
virtual functions.  At least one function from each group affected by a
reset must be exposed to the guest.

The second issue is that individual QEMU PCI devices have no callback
for a bus reset.  QEMU/vfio-pci currently has the DeviceClass.reset
callback, which we assume to be a function-level reset.  We also
register with qemu_register_reset() for a VM reset, which is the only
point currently that we know we can do a reset affecting multiple
devices.  Infrastructure will need to be added to QEMU/PCI to expose the
link down/RST signal to devices on a bus to trigger a multi-device reset
in vfio-pci.

Hopefully I'm not missing something, but I think both of those changes
are going to be required before we can have anything remotely
supportable for guest-based AER error handle.  This pretty complicated
for the user and also for libvirt to figure out.  At a minimum libvirt
would need to support a new guest-based AER handling flag for devices.
We probably need to determine whether this is unique to vfio-pci or a
generic PCIDevice option.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-04-01 15:46                   ` Alex Williamson
@ 2015-04-08  8:59                     ` Chen Fan
  2015-04-08 15:36                       ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-04-08  8:59 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 04/01/2015 11:46 PM, Alex Williamson wrote:
> On Wed, 2015-04-01 at 12:12 +0800, Chen Fan wrote:
>> On 03/25/2015 10:41 AM, Alex Williamson wrote:
>>> On Wed, 2015-03-25 at 09:53 +0800, Chen Fan wrote:
>>>> On 03/16/2015 10:09 PM, Alex Williamson wrote:
>>>>> On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
>>>>>> On 03/16/2015 11:52 AM, Alex Williamson wrote:
>>>>>>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
>>>>>>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
>>>>>>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>>>>>>>>>> when the vfio device encounters an uncorrectable error in host,
>>>>>>>>>> the vfio_pci driver will signal the eventfd registered by this
>>>>>>>>>> vfio device, the results in the qemu eventfd handler getting
>>>>>>>>>> invoked.
>>>>>>>>>>
>>>>>>>>>> this patch is to pass the error to guest and have the guest driver
>>>>>>>>>> recover from the error.
>>>>>>>>> What is going to be the typical recovery mechanism for the guest?  I'm
>>>>>>>>> concerned that the topology of the device in the guest doesn't
>>>>>>>>> necessarily match the topology of the device in the host, so if the
>>>>>>>>> guest were to attempt a bus reset to recover a device, for instance,
>>>>>>>>> what happens?
>>>>>>>> the recovery mechanism is that when guest got an aer error from a device,
>>>>>>>> guest will clean the corresponding status bit in device register. and for
>>>>>>>> need reset device, the guest aer driver would reset all devices under bus.
>>>>>>> Sorry, I'm still confused, how does the guest aer driver reset all
>>>>>>> devices under a bus?  Are we talking about function-level, device
>>>>>>> specific reset mechanisms or secondary bus resets?  If the guest is
>>>>>>> performing secondary bus resets, what guarantee do they have that it
>>>>>>> will translate to a physical secondary bus reset?  vfio may only do an
>>>>>>> FLR when the bus is reset or it may not be able to do anything depending
>>>>>>> on the available function-level resets and physical and virtual topology
>>>>>>> of the device.  Thanks,
>>>>>> in general, functions depends on the corresponding device driver behaviors
>>>>>> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
>>>>>> and for link reset, it usually do secondary bus reset.
>>>>>>
>>>>>> and do we must require to the physical secondary bus reset for vfio device
>>>>>> as bus reset?
>>>>> That depends on how the guest driver attempts recovery, doesn't it?
>>>>> There are only a very limited number of cases where a secondary bus
>>>>> reset initiated by the guest will translate to a secondary bus reset of
>>>>> the physical device (iirc, single function device without FLR).  In most
>>>>> cases, it will at best be translated to an FLR.  VFIO really only does
>>>>> bus resets on VM reset because that's the only time we know that it's ok
>>>>> to reset multiple devices.  If the guest driver is depending on a
>>>>> secondary bus reset to put the device into a recoverable state and we're
>>>>> not able to provide that, then we're actually reducing containment of
>>>>> the error by exposing AER to the guest and allowing it to attempt
>>>>> recovery.  So in practice, I'm afraid we're risking the integrity of the
>>>>> VM by exposing AER to the guest and making it think that it can perform
>>>>> recovery operations that are not effective.  Thanks,
>>>> I also have seen that if device without FLR, it seems can do hot reset
>>>> by ioctl VFIO_DEVICE_PCI_HOT_RESET to reset the physical slot or bus
>>>> in vfio_pci_reset. does it satisfy the recovery issues that you said?
>>> The hot reset interface can only be used when a) the user (QEMU) owns
>>> all of the devices on the bus and b) we know we're resetting all of the
>>> devices.  That mostly limits its use to VM reset.  I think that on a
>>> secondary bus reset, we don't know the scope of the reset at the QEMU
>>> vfio driver, so we only make use of reset methods with a function-level
>>> scope.  That would only result in a secondary bus reset if that's the
>>> reset mechanism used by the host kernel's PCI code (pci_reset_function),
>>> which is limited to single function devices on a secondary bus, with no
>>> other reset mechanisms.  The host reset is also only available in some
>>> configurations, for instance if we have a dual-port NIC where each
>>> function is a separate IOMMU group, then we clearly cannot do a hot
>>> reset unless both functions are assigned to the same VM _and_ appear to
>>> the guest on the same virtual bus.  So even if we could know the scope
>>> of the reset in the QEMU vfio driver, we can only make use of it under
>>> very strict guest configurations.  Thanks,
>> Hi Alex,
>>
>>      have you some idea or scenario to fix/escape this issue?
> Hi Chen,
>
> I expect there are two major components to this.  The first is that
> QEMU/vfio-pci needs to enforce that a bus reset is possible for the host
> and guest topology when guest AER handling is specified for a device.
> That means that everything affected by the bus reset needs to be exposed
> to the guest in a compatible way.  For instance, if a bus reset affects
> devices from multiple groups, the guest needs to not only own all of
> those groups, but they also need to be exposed to the guest such that
> the virtual bus layout reflects the extent of the reset for the physical
> bus.  This also implies that guest AER handling cannot be the default
> since it will impose significant configuration restrictions on device
> assignment.
>
> This seems like a difficult configuration enforcement to make, but maybe
> there are simplifying assumptions that can help.  For instance the
> devices need to be exposed as PCIe therefore we won't have multiple
> slots in use on a bus and I think we can therefore mostly ignore hotplug
> since we can only hotplug at a slot granularity.  That may also imply
> that we should simply enforce a 1:1 mapping of physical functions to
> virtual functions.  At least one function from each group affected by a
> reset must be exposed to the guest.
>
> The second issue is that individual QEMU PCI devices have no callback
> for a bus reset.  QEMU/vfio-pci currently has the DeviceClass.reset
> callback, which we assume to be a function-level reset.  We also
> register with qemu_register_reset() for a VM reset, which is the only
> point currently that we know we can do a reset affecting multiple
> devices.  Infrastructure will need to be added to QEMU/PCI to expose the
> link down/RST signal to devices on a bus to trigger a multi-device reset
> in vfio-pci.
>
> Hopefully I'm not missing something, but I think both of those changes
> are going to be required before we can have anything remotely
> supportable for guest-based AER error handle.  This pretty complicated
> for the user and also for libvirt to figure out.  At a minimum libvirt
> would need to support a new guest-based AER handling flag for devices.
> We probably need to determine whether this is unique to vfio-pci or a
> generic PCIDevice option.  Thanks,

Hi Alex,
   Solving the two issues seem like a big workload. do we have a simple
   way to support qemu AER ?

Thanks,
Chen

>
> Alex
>
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-04-08  8:59                     ` Chen Fan
@ 2015-04-08 15:36                       ` Alex Williamson
  2015-04-15 10:30                         ` Chen Fan
  0 siblings, 1 reply; 53+ messages in thread
From: Alex Williamson @ 2015-04-08 15:36 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Wed, 2015-04-08 at 16:59 +0800, Chen Fan wrote:
> On 04/01/2015 11:46 PM, Alex Williamson wrote:
> > On Wed, 2015-04-01 at 12:12 +0800, Chen Fan wrote:
> >> On 03/25/2015 10:41 AM, Alex Williamson wrote:
> >>> On Wed, 2015-03-25 at 09:53 +0800, Chen Fan wrote:
> >>>> On 03/16/2015 10:09 PM, Alex Williamson wrote:
> >>>>> On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
> >>>>>> On 03/16/2015 11:52 AM, Alex Williamson wrote:
> >>>>>>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
> >>>>>>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
> >>>>>>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> >>>>>>>>>> when the vfio device encounters an uncorrectable error in host,
> >>>>>>>>>> the vfio_pci driver will signal the eventfd registered by this
> >>>>>>>>>> vfio device, the results in the qemu eventfd handler getting
> >>>>>>>>>> invoked.
> >>>>>>>>>>
> >>>>>>>>>> this patch is to pass the error to guest and have the guest driver
> >>>>>>>>>> recover from the error.
> >>>>>>>>> What is going to be the typical recovery mechanism for the guest?  I'm
> >>>>>>>>> concerned that the topology of the device in the guest doesn't
> >>>>>>>>> necessarily match the topology of the device in the host, so if the
> >>>>>>>>> guest were to attempt a bus reset to recover a device, for instance,
> >>>>>>>>> what happens?
> >>>>>>>> the recovery mechanism is that when guest got an aer error from a device,
> >>>>>>>> guest will clean the corresponding status bit in device register. and for
> >>>>>>>> need reset device, the guest aer driver would reset all devices under bus.
> >>>>>>> Sorry, I'm still confused, how does the guest aer driver reset all
> >>>>>>> devices under a bus?  Are we talking about function-level, device
> >>>>>>> specific reset mechanisms or secondary bus resets?  If the guest is
> >>>>>>> performing secondary bus resets, what guarantee do they have that it
> >>>>>>> will translate to a physical secondary bus reset?  vfio may only do an
> >>>>>>> FLR when the bus is reset or it may not be able to do anything depending
> >>>>>>> on the available function-level resets and physical and virtual topology
> >>>>>>> of the device.  Thanks,
> >>>>>> in general, functions depends on the corresponding device driver behaviors
> >>>>>> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
> >>>>>> and for link reset, it usually do secondary bus reset.
> >>>>>>
> >>>>>> and do we must require to the physical secondary bus reset for vfio device
> >>>>>> as bus reset?
> >>>>> That depends on how the guest driver attempts recovery, doesn't it?
> >>>>> There are only a very limited number of cases where a secondary bus
> >>>>> reset initiated by the guest will translate to a secondary bus reset of
> >>>>> the physical device (iirc, single function device without FLR).  In most
> >>>>> cases, it will at best be translated to an FLR.  VFIO really only does
> >>>>> bus resets on VM reset because that's the only time we know that it's ok
> >>>>> to reset multiple devices.  If the guest driver is depending on a
> >>>>> secondary bus reset to put the device into a recoverable state and we're
> >>>>> not able to provide that, then we're actually reducing containment of
> >>>>> the error by exposing AER to the guest and allowing it to attempt
> >>>>> recovery.  So in practice, I'm afraid we're risking the integrity of the
> >>>>> VM by exposing AER to the guest and making it think that it can perform
> >>>>> recovery operations that are not effective.  Thanks,
> >>>> I also have seen that if device without FLR, it seems can do hot reset
> >>>> by ioctl VFIO_DEVICE_PCI_HOT_RESET to reset the physical slot or bus
> >>>> in vfio_pci_reset. does it satisfy the recovery issues that you said?
> >>> The hot reset interface can only be used when a) the user (QEMU) owns
> >>> all of the devices on the bus and b) we know we're resetting all of the
> >>> devices.  That mostly limits its use to VM reset.  I think that on a
> >>> secondary bus reset, we don't know the scope of the reset at the QEMU
> >>> vfio driver, so we only make use of reset methods with a function-level
> >>> scope.  That would only result in a secondary bus reset if that's the
> >>> reset mechanism used by the host kernel's PCI code (pci_reset_function),
> >>> which is limited to single function devices on a secondary bus, with no
> >>> other reset mechanisms.  The host reset is also only available in some
> >>> configurations, for instance if we have a dual-port NIC where each
> >>> function is a separate IOMMU group, then we clearly cannot do a hot
> >>> reset unless both functions are assigned to the same VM _and_ appear to
> >>> the guest on the same virtual bus.  So even if we could know the scope
> >>> of the reset in the QEMU vfio driver, we can only make use of it under
> >>> very strict guest configurations.  Thanks,
> >> Hi Alex,
> >>
> >>      have you some idea or scenario to fix/escape this issue?
> > Hi Chen,
> >
> > I expect there are two major components to this.  The first is that
> > QEMU/vfio-pci needs to enforce that a bus reset is possible for the host
> > and guest topology when guest AER handling is specified for a device.
> > That means that everything affected by the bus reset needs to be exposed
> > to the guest in a compatible way.  For instance, if a bus reset affects
> > devices from multiple groups, the guest needs to not only own all of
> > those groups, but they also need to be exposed to the guest such that
> > the virtual bus layout reflects the extent of the reset for the physical
> > bus.  This also implies that guest AER handling cannot be the default
> > since it will impose significant configuration restrictions on device
> > assignment.
> >
> > This seems like a difficult configuration enforcement to make, but maybe
> > there are simplifying assumptions that can help.  For instance the
> > devices need to be exposed as PCIe therefore we won't have multiple
> > slots in use on a bus and I think we can therefore mostly ignore hotplug
> > since we can only hotplug at a slot granularity.  That may also imply
> > that we should simply enforce a 1:1 mapping of physical functions to
> > virtual functions.  At least one function from each group affected by a
> > reset must be exposed to the guest.
> >
> > The second issue is that individual QEMU PCI devices have no callback
> > for a bus reset.  QEMU/vfio-pci currently has the DeviceClass.reset
> > callback, which we assume to be a function-level reset.  We also
> > register with qemu_register_reset() for a VM reset, which is the only
> > point currently that we know we can do a reset affecting multiple
> > devices.  Infrastructure will need to be added to QEMU/PCI to expose the
> > link down/RST signal to devices on a bus to trigger a multi-device reset
> > in vfio-pci.
> >
> > Hopefully I'm not missing something, but I think both of those changes
> > are going to be required before we can have anything remotely
> > supportable for guest-based AER error handle.  This pretty complicated
> > for the user and also for libvirt to figure out.  At a minimum libvirt
> > would need to support a new guest-based AER handling flag for devices.
> > We probably need to determine whether this is unique to vfio-pci or a
> > generic PCIDevice option.  Thanks,
> 
> Hi Alex,
>    Solving the two issues seem like a big workload. do we have a simple
>    way to support qemu AER ?

Hi Chen,

The simpler way is the existing, containment-only solution where QEMU
stops the guest on an uncorrected error.  Do you have any other
suggestions?  I don't see how we can rely on guest involvement in
recovery unless the guest has the same abilities to reset the device as
it would on bare metal.  Thanks,

Alex

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-04-08 15:36                       ` Alex Williamson
@ 2015-04-15 10:30                         ` Chen Fan
  2015-04-15 14:18                           ` Alex Williamson
  0 siblings, 1 reply; 53+ messages in thread
From: Chen Fan @ 2015-04-15 10:30 UTC (permalink / raw)
  To: Alex Williamson; +Cc: izumi.taku, qemu-devel


On 04/08/2015 11:36 PM, Alex Williamson wrote:
> On Wed, 2015-04-08 at 16:59 +0800, Chen Fan wrote:
>> On 04/01/2015 11:46 PM, Alex Williamson wrote:
>>> On Wed, 2015-04-01 at 12:12 +0800, Chen Fan wrote:
>>>> On 03/25/2015 10:41 AM, Alex Williamson wrote:
>>>>> On Wed, 2015-03-25 at 09:53 +0800, Chen Fan wrote:
>>>>>> On 03/16/2015 10:09 PM, Alex Williamson wrote:
>>>>>>> On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
>>>>>>>> On 03/16/2015 11:52 AM, Alex Williamson wrote:
>>>>>>>>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
>>>>>>>>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
>>>>>>>>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
>>>>>>>>>>>> when the vfio device encounters an uncorrectable error in host,
>>>>>>>>>>>> the vfio_pci driver will signal the eventfd registered by this
>>>>>>>>>>>> vfio device, the results in the qemu eventfd handler getting
>>>>>>>>>>>> invoked.
>>>>>>>>>>>>
>>>>>>>>>>>> this patch is to pass the error to guest and have the guest driver
>>>>>>>>>>>> recover from the error.
>>>>>>>>>>> What is going to be the typical recovery mechanism for the guest?  I'm
>>>>>>>>>>> concerned that the topology of the device in the guest doesn't
>>>>>>>>>>> necessarily match the topology of the device in the host, so if the
>>>>>>>>>>> guest were to attempt a bus reset to recover a device, for instance,
>>>>>>>>>>> what happens?
>>>>>>>>>> the recovery mechanism is that when guest got an aer error from a device,
>>>>>>>>>> guest will clean the corresponding status bit in device register. and for
>>>>>>>>>> need reset device, the guest aer driver would reset all devices under bus.
>>>>>>>>> Sorry, I'm still confused, how does the guest aer driver reset all
>>>>>>>>> devices under a bus?  Are we talking about function-level, device
>>>>>>>>> specific reset mechanisms or secondary bus resets?  If the guest is
>>>>>>>>> performing secondary bus resets, what guarantee do they have that it
>>>>>>>>> will translate to a physical secondary bus reset?  vfio may only do an
>>>>>>>>> FLR when the bus is reset or it may not be able to do anything depending
>>>>>>>>> on the available function-level resets and physical and virtual topology
>>>>>>>>> of the device.  Thanks,
>>>>>>>> in general, functions depends on the corresponding device driver behaviors
>>>>>>>> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
>>>>>>>> and for link reset, it usually do secondary bus reset.
>>>>>>>>
>>>>>>>> and do we must require to the physical secondary bus reset for vfio device
>>>>>>>> as bus reset?
>>>>>>> That depends on how the guest driver attempts recovery, doesn't it?
>>>>>>> There are only a very limited number of cases where a secondary bus
>>>>>>> reset initiated by the guest will translate to a secondary bus reset of
>>>>>>> the physical device (iirc, single function device without FLR).  In most
>>>>>>> cases, it will at best be translated to an FLR.  VFIO really only does
>>>>>>> bus resets on VM reset because that's the only time we know that it's ok
>>>>>>> to reset multiple devices.  If the guest driver is depending on a
>>>>>>> secondary bus reset to put the device into a recoverable state and we're
>>>>>>> not able to provide that, then we're actually reducing containment of
>>>>>>> the error by exposing AER to the guest and allowing it to attempt
>>>>>>> recovery.  So in practice, I'm afraid we're risking the integrity of the
>>>>>>> VM by exposing AER to the guest and making it think that it can perform
>>>>>>> recovery operations that are not effective.  Thanks,
>>>>>> I also have seen that if device without FLR, it seems can do hot reset
>>>>>> by ioctl VFIO_DEVICE_PCI_HOT_RESET to reset the physical slot or bus
>>>>>> in vfio_pci_reset. does it satisfy the recovery issues that you said?
>>>>> The hot reset interface can only be used when a) the user (QEMU) owns
>>>>> all of the devices on the bus and b) we know we're resetting all of the
>>>>> devices.  That mostly limits its use to VM reset.  I think that on a
>>>>> secondary bus reset, we don't know the scope of the reset at the QEMU
>>>>> vfio driver, so we only make use of reset methods with a function-level
>>>>> scope.  That would only result in a secondary bus reset if that's the
>>>>> reset mechanism used by the host kernel's PCI code (pci_reset_function),
>>>>> which is limited to single function devices on a secondary bus, with no
>>>>> other reset mechanisms.  The host reset is also only available in some
>>>>> configurations, for instance if we have a dual-port NIC where each
>>>>> function is a separate IOMMU group, then we clearly cannot do a hot
>>>>> reset unless both functions are assigned to the same VM _and_ appear to
>>>>> the guest on the same virtual bus.  So even if we could know the scope
>>>>> of the reset in the QEMU vfio driver, we can only make use of it under
>>>>> very strict guest configurations.  Thanks,
>>>> Hi Alex,
>>>>
>>>>       have you some idea or scenario to fix/escape this issue?
>>> Hi Chen,
>>>
>>> I expect there are two major components to this.  The first is that
>>> QEMU/vfio-pci needs to enforce that a bus reset is possible for the host
>>> and guest topology when guest AER handling is specified for a device.
>>> That means that everything affected by the bus reset needs to be exposed
>>> to the guest in a compatible way.  For instance, if a bus reset affects
>>> devices from multiple groups, the guest needs to not only own all of
>>> those groups, but they also need to be exposed to the guest such that
>>> the virtual bus layout reflects the extent of the reset for the physical
>>> bus.  This also implies that guest AER handling cannot be the default
>>> since it will impose significant configuration restrictions on device
>>> assignment.
>>>
>>> This seems like a difficult configuration enforcement to make, but maybe
>>> there are simplifying assumptions that can help.  For instance the
>>> devices need to be exposed as PCIe therefore we won't have multiple
>>> slots in use on a bus and I think we can therefore mostly ignore hotplug
>>> since we can only hotplug at a slot granularity.  That may also imply
>>> that we should simply enforce a 1:1 mapping of physical functions to
>>> virtual functions.  At least one function from each group affected by a
>>> reset must be exposed to the guest.
>>>
>>> The second issue is that individual QEMU PCI devices have no callback
>>> for a bus reset.  QEMU/vfio-pci currently has the DeviceClass.reset
>>> callback, which we assume to be a function-level reset.  We also
>>> register with qemu_register_reset() for a VM reset, which is the only
>>> point currently that we know we can do a reset affecting multiple
>>> devices.  Infrastructure will need to be added to QEMU/PCI to expose the
>>> link down/RST signal to devices on a bus to trigger a multi-device reset
>>> in vfio-pci.
>>>
>>> Hopefully I'm not missing something, but I think both of those changes
>>> are going to be required before we can have anything remotely
>>> supportable for guest-based AER error handle.  This pretty complicated
>>> for the user and also for libvirt to figure out.  At a minimum libvirt
>>> would need to support a new guest-based AER handling flag for devices.
>>> We probably need to determine whether this is unique to vfio-pci or a
>>> generic PCIDevice option.  Thanks,
>> Hi Alex,
>>     Solving the two issues seem like a big workload. do we have a simple
>>     way to support qemu AER ?
> Hi Chen,
>
> The simpler way is the existing, containment-only solution where QEMU
> stops the guest on an uncorrected error.  Do you have any other
> suggestions?  I don't see how we can rely on guest involvement in
> recovery unless the guest has the same abilities to reset the device as
> it would on bare metal.  Thanks,
Hi Alex,

for the first issue, I think the functions affected by a bus reset need 
to assign to
guest are too restricted.
I suppose if we enable support the aer feature, only need to do
is check the pass through device's host bus whether have other endpoint,
if no other pci device, we can support the host bus reset in qemu vfio-pci.

Thanks,
Chen


>
> Alex
>
> .
>

^ permalink raw reply	[flat|nested] 53+ messages in thread

* Re: [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest
  2015-04-15 10:30                         ` Chen Fan
@ 2015-04-15 14:18                           ` Alex Williamson
  0 siblings, 0 replies; 53+ messages in thread
From: Alex Williamson @ 2015-04-15 14:18 UTC (permalink / raw)
  To: Chen Fan; +Cc: izumi.taku, qemu-devel

On Wed, 2015-04-15 at 18:30 +0800, Chen Fan wrote:
> On 04/08/2015 11:36 PM, Alex Williamson wrote:
> > On Wed, 2015-04-08 at 16:59 +0800, Chen Fan wrote:
> >> On 04/01/2015 11:46 PM, Alex Williamson wrote:
> >>> On Wed, 2015-04-01 at 12:12 +0800, Chen Fan wrote:
> >>>> On 03/25/2015 10:41 AM, Alex Williamson wrote:
> >>>>> On Wed, 2015-03-25 at 09:53 +0800, Chen Fan wrote:
> >>>>>> On 03/16/2015 10:09 PM, Alex Williamson wrote:
> >>>>>>> On Mon, 2015-03-16 at 15:35 +0800, Chen Fan wrote:
> >>>>>>>> On 03/16/2015 11:52 AM, Alex Williamson wrote:
> >>>>>>>>> On Mon, 2015-03-16 at 11:05 +0800, Chen Fan wrote:
> >>>>>>>>>> On 03/14/2015 06:34 AM, Alex Williamson wrote:
> >>>>>>>>>>> On Thu, 2015-03-12 at 18:23 +0800, Chen Fan wrote:
> >>>>>>>>>>>> when the vfio device encounters an uncorrectable error in host,
> >>>>>>>>>>>> the vfio_pci driver will signal the eventfd registered by this
> >>>>>>>>>>>> vfio device, the results in the qemu eventfd handler getting
> >>>>>>>>>>>> invoked.
> >>>>>>>>>>>>
> >>>>>>>>>>>> this patch is to pass the error to guest and have the guest driver
> >>>>>>>>>>>> recover from the error.
> >>>>>>>>>>> What is going to be the typical recovery mechanism for the guest?  I'm
> >>>>>>>>>>> concerned that the topology of the device in the guest doesn't
> >>>>>>>>>>> necessarily match the topology of the device in the host, so if the
> >>>>>>>>>>> guest were to attempt a bus reset to recover a device, for instance,
> >>>>>>>>>>> what happens?
> >>>>>>>>>> the recovery mechanism is that when guest got an aer error from a device,
> >>>>>>>>>> guest will clean the corresponding status bit in device register. and for
> >>>>>>>>>> need reset device, the guest aer driver would reset all devices under bus.
> >>>>>>>>> Sorry, I'm still confused, how does the guest aer driver reset all
> >>>>>>>>> devices under a bus?  Are we talking about function-level, device
> >>>>>>>>> specific reset mechanisms or secondary bus resets?  If the guest is
> >>>>>>>>> performing secondary bus resets, what guarantee do they have that it
> >>>>>>>>> will translate to a physical secondary bus reset?  vfio may only do an
> >>>>>>>>> FLR when the bus is reset or it may not be able to do anything depending
> >>>>>>>>> on the available function-level resets and physical and virtual topology
> >>>>>>>>> of the device.  Thanks,
> >>>>>>>> in general, functions depends on the corresponding device driver behaviors
> >>>>>>>> to do the recovery. e.g: implemented the error_detect, slot_reset callbacks.
> >>>>>>>> and for link reset, it usually do secondary bus reset.
> >>>>>>>>
> >>>>>>>> and do we must require to the physical secondary bus reset for vfio device
> >>>>>>>> as bus reset?
> >>>>>>> That depends on how the guest driver attempts recovery, doesn't it?
> >>>>>>> There are only a very limited number of cases where a secondary bus
> >>>>>>> reset initiated by the guest will translate to a secondary bus reset of
> >>>>>>> the physical device (iirc, single function device without FLR).  In most
> >>>>>>> cases, it will at best be translated to an FLR.  VFIO really only does
> >>>>>>> bus resets on VM reset because that's the only time we know that it's ok
> >>>>>>> to reset multiple devices.  If the guest driver is depending on a
> >>>>>>> secondary bus reset to put the device into a recoverable state and we're
> >>>>>>> not able to provide that, then we're actually reducing containment of
> >>>>>>> the error by exposing AER to the guest and allowing it to attempt
> >>>>>>> recovery.  So in practice, I'm afraid we're risking the integrity of the
> >>>>>>> VM by exposing AER to the guest and making it think that it can perform
> >>>>>>> recovery operations that are not effective.  Thanks,
> >>>>>> I also have seen that if device without FLR, it seems can do hot reset
> >>>>>> by ioctl VFIO_DEVICE_PCI_HOT_RESET to reset the physical slot or bus
> >>>>>> in vfio_pci_reset. does it satisfy the recovery issues that you said?
> >>>>> The hot reset interface can only be used when a) the user (QEMU) owns
> >>>>> all of the devices on the bus and b) we know we're resetting all of the
> >>>>> devices.  That mostly limits its use to VM reset.  I think that on a
> >>>>> secondary bus reset, we don't know the scope of the reset at the QEMU
> >>>>> vfio driver, so we only make use of reset methods with a function-level
> >>>>> scope.  That would only result in a secondary bus reset if that's the
> >>>>> reset mechanism used by the host kernel's PCI code (pci_reset_function),
> >>>>> which is limited to single function devices on a secondary bus, with no
> >>>>> other reset mechanisms.  The host reset is also only available in some
> >>>>> configurations, for instance if we have a dual-port NIC where each
> >>>>> function is a separate IOMMU group, then we clearly cannot do a hot
> >>>>> reset unless both functions are assigned to the same VM _and_ appear to
> >>>>> the guest on the same virtual bus.  So even if we could know the scope
> >>>>> of the reset in the QEMU vfio driver, we can only make use of it under
> >>>>> very strict guest configurations.  Thanks,
> >>>> Hi Alex,
> >>>>
> >>>>       have you some idea or scenario to fix/escape this issue?
> >>> Hi Chen,
> >>>
> >>> I expect there are two major components to this.  The first is that
> >>> QEMU/vfio-pci needs to enforce that a bus reset is possible for the host
> >>> and guest topology when guest AER handling is specified for a device.
> >>> That means that everything affected by the bus reset needs to be exposed
> >>> to the guest in a compatible way.  For instance, if a bus reset affects
> >>> devices from multiple groups, the guest needs to not only own all of
> >>> those groups, but they also need to be exposed to the guest such that
> >>> the virtual bus layout reflects the extent of the reset for the physical
> >>> bus.  This also implies that guest AER handling cannot be the default
> >>> since it will impose significant configuration restrictions on device
> >>> assignment.
> >>>
> >>> This seems like a difficult configuration enforcement to make, but maybe
> >>> there are simplifying assumptions that can help.  For instance the
> >>> devices need to be exposed as PCIe therefore we won't have multiple
> >>> slots in use on a bus and I think we can therefore mostly ignore hotplug
> >>> since we can only hotplug at a slot granularity.  That may also imply
> >>> that we should simply enforce a 1:1 mapping of physical functions to
> >>> virtual functions.  At least one function from each group affected by a
> >>> reset must be exposed to the guest.
> >>>
> >>> The second issue is that individual QEMU PCI devices have no callback
> >>> for a bus reset.  QEMU/vfio-pci currently has the DeviceClass.reset
> >>> callback, which we assume to be a function-level reset.  We also
> >>> register with qemu_register_reset() for a VM reset, which is the only
> >>> point currently that we know we can do a reset affecting multiple
> >>> devices.  Infrastructure will need to be added to QEMU/PCI to expose the
> >>> link down/RST signal to devices on a bus to trigger a multi-device reset
> >>> in vfio-pci.
> >>>
> >>> Hopefully I'm not missing something, but I think both of those changes
> >>> are going to be required before we can have anything remotely
> >>> supportable for guest-based AER error handle.  This pretty complicated
> >>> for the user and also for libvirt to figure out.  At a minimum libvirt
> >>> would need to support a new guest-based AER handling flag for devices.
> >>> We probably need to determine whether this is unique to vfio-pci or a
> >>> generic PCIDevice option.  Thanks,
> >> Hi Alex,
> >>     Solving the two issues seem like a big workload. do we have a simple
> >>     way to support qemu AER ?
> > Hi Chen,
> >
> > The simpler way is the existing, containment-only solution where QEMU
> > stops the guest on an uncorrected error.  Do you have any other
> > suggestions?  I don't see how we can rely on guest involvement in
> > recovery unless the guest has the same abilities to reset the device as
> > it would on bare metal.  Thanks,
> Hi Alex,
> 
> for the first issue, I think the functions affected by a bus reset need 
> to assign to
> guest are too restricted.

Why?  If the guest thinks that it's doing a bus reset to recover the
device, I don't think we can ignore that or do a lesser function-level
reset.  If the guest thought it could recover using a function-level
reset, it probably would have used that instead.

> I suppose if we enable support the aer feature, only need to do
> is check the pass through device's host bus whether have other endpoint,
> if no other pci device, we can support the host bus reset in qemu vfio-pci.

I don't think that restricting the problem to single-function endpoints
changes the requirements at all.  vfio-pci in QEMU would still need to
restrict that AER forwarding to the guest can only be enabled in
supported configurations and the QEMU PCI-core code would need to
differentiate a PCI bus reset from a regular single device scope reset.
In fact, restricting the configuration to single function endpoints
appears to be the same amount of work, only reducing the usefulness.
Thanks,

Alex

^ permalink raw reply	[flat|nested] 53+ messages in thread

end of thread, other threads:[~2015-04-15 14:18 UTC | newest]

Thread overview: 53+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-12 10:23 [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 1/7] vfio: add pcie extanded capability support Chen Fan
2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 2/7] aer: impove pcie_aer_init to support vfio device Chen Fan
2015-03-13 22:25   ` Alex Williamson
2015-03-16  2:30     ` Chen Fan
2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 3/7] vfio: add aer support for " Chen Fan
2015-03-13 22:28   ` Alex Williamson
2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 4/7] pcie_aer: expose pcie_aer_msg() interface Chen Fan
2015-03-13 22:30   ` Alex Williamson
2015-03-18 13:29   ` Michael S. Tsirkin
2015-03-19  1:33     ` Chen Fan
2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 5/7] vfio-pci: pass the aer error to guest Chen Fan
2015-03-13 22:34   ` Alex Williamson
2015-03-16  3:05     ` Chen Fan
2015-03-16  3:52       ` Alex Williamson
2015-03-16  7:35         ` Chen Fan
2015-03-16 14:09           ` Alex Williamson
2015-03-25  1:33             ` Chen Fan
2015-03-25  2:31               ` Alex Williamson
2015-03-25  1:53             ` Chen Fan
2015-03-25  2:41               ` Alex Williamson
2015-03-25  3:07                 ` Chen Fan
2015-04-01  4:12                 ` Chen Fan
2015-04-01 15:46                   ` Alex Williamson
2015-04-08  8:59                     ` Chen Fan
2015-04-08 15:36                       ` Alex Williamson
2015-04-15 10:30                         ` Chen Fan
2015-04-15 14:18                           ` Alex Williamson
2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 6/7] vfio: add 'x-aer' property to expose aercap Chen Fan
2015-03-18 13:23   ` Michael S. Tsirkin
2015-03-18 14:09     ` Alex Williamson
2015-03-12 10:23 ` [Qemu-devel] [PATCH v5 7/7] pc: add PC_I440FX_COMPAT to disable aercap for vifo device Chen Fan
2015-03-13 22:38   ` Alex Williamson
2015-03-16  2:48     ` Chen Fan
2015-03-16  2:49     ` Chen Fan
2015-03-18 13:23   ` Michael S. Tsirkin
2015-03-18 14:02     ` Alex Williamson
2015-03-18 14:05       ` Michael S. Tsirkin
2015-03-18 14:15         ` Alex Williamson
2015-03-18 14:36           ` Michael S. Tsirkin
2015-03-18 14:50             ` Alex Williamson
2015-03-18 15:02               ` Michael S. Tsirkin
2015-03-18 15:45                 ` Alex Williamson
2015-03-18 16:44                   ` Michael S. Tsirkin
2015-03-18 17:11                     ` Alex Williamson
2015-03-18 17:45                       ` Michael S. Tsirkin
2015-03-18 18:08                         ` Alex Williamson
2015-03-18 18:56                           ` Michael S. Tsirkin
2015-03-18 19:05                             ` Alex Williamson
2015-03-19 21:26                               ` Paolo Bonzini
2015-03-16  2:52 ` [Qemu-devel] [PATCH v5 0/7] pass aer error to guest for vfio device Chen Fan
2015-03-16  4:57   ` Michael S. Tsirkin
2015-03-19 21:44     ` Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.