All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC Patch] xen/pt: Emulate FLR capability
@ 2019-08-29  9:02 ` Chao Gao
  0 siblings, 0 replies; 14+ messages in thread
From: Chao Gao @ 2019-08-29  9:02 UTC (permalink / raw)
  To: xen-devel, qemu-devel
  Cc: Stefano Stabellini, Paul Durrant, Jan Beulich, Anthony Perard,
	Chao Gao, Roger Pau Monné

Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
perspective, assigned devices cannot be reset. But some devices rely on PCI
reset to recover from hardware hangs. When being assigned to a VM, those
devices cannot be reset and won't work any longer if a hardware hang occurs.
We have to reboot VM to trigger PCI reset on host to recover the device.

This patch exposes FLR capability to VMs if the assigned device can be reset on
host. When VM initiates an FLR to a device, qemu cleans up the device state,
(including disabling of intx and/or MSI and unmapping BARs from guest, deleting
emulated registers), then initiate PCI reset through 'reset' knob under the
device's sysfs, finally initialize the device again.

Signed-off-by: Chao Gao <chao.gao@intel.com>
---
Do we need to introduce an attribute, like "permissive" to explicitly
enable FLR capability emulation? During PCI reset, interrupts and BARs are
unmapped from the guest. It seems that guest cannot interact with the device
directly except access to device's configuration space which is emulated by
qemu. If proper method can be used to prevent qemu accessing the physical
device there is no new security hole caused by the FLR emulation.

VM's FLR may be backed by any reset function on host to the physical device,
for example: FLR, D3softreset, secondary bus reset. Not sure it is fine to mix
them. Given Linux kernel just uses an unified API to reset device and caller
cannot choose a specific one, it might be OK.
---
 hw/xen/xen-host-pci-device.c | 30 ++++++++++++++++++++++++++++++
 hw/xen/xen-host-pci-device.h |  3 +++
 hw/xen/xen_pt.c              |  9 +++++++++
 hw/xen/xen_pt.h              |  1 +
 hw/xen/xen_pt_config_init.c  | 30 +++++++++++++++++++++++++++---
 5 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
index 1b44dcafaf..d549656f42 100644
--- a/hw/xen/xen-host-pci-device.c
+++ b/hw/xen/xen-host-pci-device.c
@@ -198,6 +198,35 @@ static bool xen_host_pci_dev_is_virtfn(XenHostPCIDevice *d)
     return !stat(path, &buf);
 }
 
+static bool xen_host_pci_resetable(XenHostPCIDevice *d)
+{
+    char path[PATH_MAX];
+
+    xen_host_pci_sysfs_path(d, "reset", path, sizeof(path));
+
+    return !access(path, W_OK);
+}
+
+void xen_host_pci_reset(XenHostPCIDevice *d)
+{
+    char path[PATH_MAX];
+    int fd;
+
+    xen_host_pci_sysfs_path(d, "reset", path, sizeof(path));
+
+    fd = open(path, O_WRONLY);
+    if (fd == -1) {
+        XEN_HOST_PCI_LOG("Xen host pci reset: open error\n");
+        return;
+    }
+
+    if (write(fd, "1", 1) != 1) {
+        XEN_HOST_PCI_LOG("Xen host pci reset: write error\n");
+    }
+
+    return;
+}
+
 static void xen_host_pci_config_open(XenHostPCIDevice *d, Error **errp)
 {
     char path[PATH_MAX];
@@ -377,6 +406,7 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain,
     d->class_code = v;
 
     d->is_virtfn = xen_host_pci_dev_is_virtfn(d);
+    d->is_resetable = xen_host_pci_resetable(d);
 
     return;
 
diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h
index 4d8d34ecb0..cacf9b3df8 100644
--- a/hw/xen/xen-host-pci-device.h
+++ b/hw/xen/xen-host-pci-device.h
@@ -32,6 +32,7 @@ typedef struct XenHostPCIDevice {
     XenHostPCIIORegion rom;
 
     bool is_virtfn;
+    bool is_resetable;
 
     int config_fd;
 } XenHostPCIDevice;
@@ -55,4 +56,6 @@ int xen_host_pci_set_block(XenHostPCIDevice *d, int pos, uint8_t *buf,
 
 int xen_host_pci_find_ext_cap_offset(XenHostPCIDevice *s, uint32_t cap);
 
+void xen_host_pci_reset(XenHostPCIDevice *d);
+
 #endif /* XEN_HOST_PCI_DEVICE_H */
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 8fbaf2eae9..d750367c0a 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -938,6 +938,15 @@ static void xen_pt_unregister_device(PCIDevice *d)
     xen_pt_destroy(d);
 }
 
+void xen_pt_reset(XenPCIPassthroughState *s)
+{
+    PCIDevice *d = PCI_DEVICE(s);
+
+    xen_pt_unregister_device(d);
+    xen_host_pci_reset(&s->real_device);
+    xen_pt_realize(d, NULL);
+}
+
 static Property xen_pci_passthrough_properties[] = {
     DEFINE_PROP_PCI_HOST_DEVADDR("hostaddr", XenPCIPassthroughState, hostaddr),
     DEFINE_PROP_BOOL("permissive", XenPCIPassthroughState, permissive, false),
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index 9167bbaf6d..ed05bc0d39 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -332,4 +332,5 @@ int xen_pt_register_vga_regions(XenHostPCIDevice *dev);
 int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev);
 void xen_pt_setup_vga(XenPCIPassthroughState *s, XenHostPCIDevice *dev,
                      Error **errp);
+void xen_pt_reset(XenPCIPassthroughState *s);
 #endif /* XEN_PT_H */
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 31ec5add1d..435abd7286 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -852,6 +852,30 @@ static inline uint8_t get_device_type(XenPCIPassthroughState *s,
     return (flag & PCI_EXP_FLAGS_TYPE) >> 4;
 }
 
+/* Initialize Device Capability register */
+static int xen_pt_devcap_reg_init(XenPCIPassthroughState *s,
+                                  XenPTRegInfo *reg, uint32_t real_offset,
+                                  uint32_t *data)
+{
+    *data = reg->init_val;
+
+    if (s->real_device.is_resetable) {
+        *data |= PCI_EXP_DEVCAP_FLR;
+    }
+
+    return 0;
+}
+
+static int xen_pt_devctl_reg_write(XenPCIPassthroughState *s,
+                                   XenPTReg *cfg_entry, uint16_t *val,
+                                   uint16_t dev_value, uint16_t valid_mask)
+{
+    if (s->real_device.is_resetable && (*val & PCI_EXP_DEVCTL_BCR_FLR)) {
+        xen_pt_reset(s);
+    }
+    return xen_pt_word_reg_write(s, cfg_entry, val, dev_value, valid_mask);
+}
+
 /* initialize Link Control register */
 static int xen_pt_linkctrl_reg_init(XenPCIPassthroughState *s,
                                     XenPTRegInfo *reg, uint32_t real_offset,
@@ -933,7 +957,7 @@ static XenPTRegInfo xen_pt_emu_reg_pcie[] = {
         .init_val   = 0x00000000,
         .ro_mask    = 0xFFFFFFFF,
         .emu_mask   = 0x10000000,
-        .init       = xen_pt_common_reg_init,
+        .init       = xen_pt_devcap_reg_init,
         .u.dw.read  = xen_pt_long_reg_read,
         .u.dw.write = xen_pt_long_reg_write,
     },
@@ -946,7 +970,7 @@ static XenPTRegInfo xen_pt_emu_reg_pcie[] = {
         .emu_mask   = 0xFFFF,
         .init       = xen_pt_common_reg_init,
         .u.w.read   = xen_pt_word_reg_read,
-        .u.w.write  = xen_pt_word_reg_write,
+        .u.w.write  = xen_pt_devctl_reg_write,
     },
     /* Device Status reg */
     {
@@ -1969,7 +1993,7 @@ static void xen_pt_config_reg_init(XenPCIPassthroughState *s,
             /* Mask out host (including past size). */
             new_val = val & host_mask;
             /* Merge emulated ones (excluding the non-emulated ones). */
-            new_val |= data & host_mask;
+            new_val |= data & ~host_mask;
             /* Leave intact host and emulated values past the size - even though
              * we do not care as we write per reg->size granularity, but for the
              * logging below lets have the proper value. */
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Xen-devel] [RFC Patch] xen/pt: Emulate FLR capability
@ 2019-08-29  9:02 ` Chao Gao
  0 siblings, 0 replies; 14+ messages in thread
From: Chao Gao @ 2019-08-29  9:02 UTC (permalink / raw)
  To: xen-devel, qemu-devel
  Cc: Stefano Stabellini, Paul Durrant, Jan Beulich, Anthony Perard,
	Chao Gao, Roger Pau Monné

Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
perspective, assigned devices cannot be reset. But some devices rely on PCI
reset to recover from hardware hangs. When being assigned to a VM, those
devices cannot be reset and won't work any longer if a hardware hang occurs.
We have to reboot VM to trigger PCI reset on host to recover the device.

This patch exposes FLR capability to VMs if the assigned device can be reset on
host. When VM initiates an FLR to a device, qemu cleans up the device state,
(including disabling of intx and/or MSI and unmapping BARs from guest, deleting
emulated registers), then initiate PCI reset through 'reset' knob under the
device's sysfs, finally initialize the device again.

Signed-off-by: Chao Gao <chao.gao@intel.com>
---
Do we need to introduce an attribute, like "permissive" to explicitly
enable FLR capability emulation? During PCI reset, interrupts and BARs are
unmapped from the guest. It seems that guest cannot interact with the device
directly except access to device's configuration space which is emulated by
qemu. If proper method can be used to prevent qemu accessing the physical
device there is no new security hole caused by the FLR emulation.

VM's FLR may be backed by any reset function on host to the physical device,
for example: FLR, D3softreset, secondary bus reset. Not sure it is fine to mix
them. Given Linux kernel just uses an unified API to reset device and caller
cannot choose a specific one, it might be OK.
---
 hw/xen/xen-host-pci-device.c | 30 ++++++++++++++++++++++++++++++
 hw/xen/xen-host-pci-device.h |  3 +++
 hw/xen/xen_pt.c              |  9 +++++++++
 hw/xen/xen_pt.h              |  1 +
 hw/xen/xen_pt_config_init.c  | 30 +++++++++++++++++++++++++++---
 5 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
index 1b44dcafaf..d549656f42 100644
--- a/hw/xen/xen-host-pci-device.c
+++ b/hw/xen/xen-host-pci-device.c
@@ -198,6 +198,35 @@ static bool xen_host_pci_dev_is_virtfn(XenHostPCIDevice *d)
     return !stat(path, &buf);
 }
 
+static bool xen_host_pci_resetable(XenHostPCIDevice *d)
+{
+    char path[PATH_MAX];
+
+    xen_host_pci_sysfs_path(d, "reset", path, sizeof(path));
+
+    return !access(path, W_OK);
+}
+
+void xen_host_pci_reset(XenHostPCIDevice *d)
+{
+    char path[PATH_MAX];
+    int fd;
+
+    xen_host_pci_sysfs_path(d, "reset", path, sizeof(path));
+
+    fd = open(path, O_WRONLY);
+    if (fd == -1) {
+        XEN_HOST_PCI_LOG("Xen host pci reset: open error\n");
+        return;
+    }
+
+    if (write(fd, "1", 1) != 1) {
+        XEN_HOST_PCI_LOG("Xen host pci reset: write error\n");
+    }
+
+    return;
+}
+
 static void xen_host_pci_config_open(XenHostPCIDevice *d, Error **errp)
 {
     char path[PATH_MAX];
@@ -377,6 +406,7 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain,
     d->class_code = v;
 
     d->is_virtfn = xen_host_pci_dev_is_virtfn(d);
+    d->is_resetable = xen_host_pci_resetable(d);
 
     return;
 
diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h
index 4d8d34ecb0..cacf9b3df8 100644
--- a/hw/xen/xen-host-pci-device.h
+++ b/hw/xen/xen-host-pci-device.h
@@ -32,6 +32,7 @@ typedef struct XenHostPCIDevice {
     XenHostPCIIORegion rom;
 
     bool is_virtfn;
+    bool is_resetable;
 
     int config_fd;
 } XenHostPCIDevice;
@@ -55,4 +56,6 @@ int xen_host_pci_set_block(XenHostPCIDevice *d, int pos, uint8_t *buf,
 
 int xen_host_pci_find_ext_cap_offset(XenHostPCIDevice *s, uint32_t cap);
 
+void xen_host_pci_reset(XenHostPCIDevice *d);
+
 #endif /* XEN_HOST_PCI_DEVICE_H */
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 8fbaf2eae9..d750367c0a 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -938,6 +938,15 @@ static void xen_pt_unregister_device(PCIDevice *d)
     xen_pt_destroy(d);
 }
 
+void xen_pt_reset(XenPCIPassthroughState *s)
+{
+    PCIDevice *d = PCI_DEVICE(s);
+
+    xen_pt_unregister_device(d);
+    xen_host_pci_reset(&s->real_device);
+    xen_pt_realize(d, NULL);
+}
+
 static Property xen_pci_passthrough_properties[] = {
     DEFINE_PROP_PCI_HOST_DEVADDR("hostaddr", XenPCIPassthroughState, hostaddr),
     DEFINE_PROP_BOOL("permissive", XenPCIPassthroughState, permissive, false),
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index 9167bbaf6d..ed05bc0d39 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -332,4 +332,5 @@ int xen_pt_register_vga_regions(XenHostPCIDevice *dev);
 int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev);
 void xen_pt_setup_vga(XenPCIPassthroughState *s, XenHostPCIDevice *dev,
                      Error **errp);
+void xen_pt_reset(XenPCIPassthroughState *s);
 #endif /* XEN_PT_H */
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 31ec5add1d..435abd7286 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -852,6 +852,30 @@ static inline uint8_t get_device_type(XenPCIPassthroughState *s,
     return (flag & PCI_EXP_FLAGS_TYPE) >> 4;
 }
 
+/* Initialize Device Capability register */
+static int xen_pt_devcap_reg_init(XenPCIPassthroughState *s,
+                                  XenPTRegInfo *reg, uint32_t real_offset,
+                                  uint32_t *data)
+{
+    *data = reg->init_val;
+
+    if (s->real_device.is_resetable) {
+        *data |= PCI_EXP_DEVCAP_FLR;
+    }
+
+    return 0;
+}
+
+static int xen_pt_devctl_reg_write(XenPCIPassthroughState *s,
+                                   XenPTReg *cfg_entry, uint16_t *val,
+                                   uint16_t dev_value, uint16_t valid_mask)
+{
+    if (s->real_device.is_resetable && (*val & PCI_EXP_DEVCTL_BCR_FLR)) {
+        xen_pt_reset(s);
+    }
+    return xen_pt_word_reg_write(s, cfg_entry, val, dev_value, valid_mask);
+}
+
 /* initialize Link Control register */
 static int xen_pt_linkctrl_reg_init(XenPCIPassthroughState *s,
                                     XenPTRegInfo *reg, uint32_t real_offset,
@@ -933,7 +957,7 @@ static XenPTRegInfo xen_pt_emu_reg_pcie[] = {
         .init_val   = 0x00000000,
         .ro_mask    = 0xFFFFFFFF,
         .emu_mask   = 0x10000000,
-        .init       = xen_pt_common_reg_init,
+        .init       = xen_pt_devcap_reg_init,
         .u.dw.read  = xen_pt_long_reg_read,
         .u.dw.write = xen_pt_long_reg_write,
     },
@@ -946,7 +970,7 @@ static XenPTRegInfo xen_pt_emu_reg_pcie[] = {
         .emu_mask   = 0xFFFF,
         .init       = xen_pt_common_reg_init,
         .u.w.read   = xen_pt_word_reg_read,
-        .u.w.write  = xen_pt_word_reg_write,
+        .u.w.write  = xen_pt_devctl_reg_write,
     },
     /* Device Status reg */
     {
@@ -1969,7 +1993,7 @@ static void xen_pt_config_reg_init(XenPCIPassthroughState *s,
             /* Mask out host (including past size). */
             new_val = val & host_mask;
             /* Merge emulated ones (excluding the non-emulated ones). */
-            new_val |= data & host_mask;
+            new_val |= data & ~host_mask;
             /* Leave intact host and emulated values past the size - even though
              * we do not care as we write per reg->size granularity, but for the
              * logging below lets have the proper value. */
-- 
2.17.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC Patch] xen/pt: Emulate FLR capability
  2019-08-29  9:02 ` [Xen-devel] " Chao Gao
@ 2019-08-29 10:03   ` Jan Beulich
  -1 siblings, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2019-08-29 10:03 UTC (permalink / raw)
  To: Chao Gao
  Cc: Stefano Stabellini, qemu-devel, Paul Durrant, Anthony Perard,
	xen-devel, Roger Pau Monné

On 29.08.2019 11:02, Chao Gao wrote:
> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
> perspective, assigned devices cannot be reset. But some devices rely on PCI
> reset to recover from hardware hangs. When being assigned to a VM, those
> devices cannot be reset and won't work any longer if a hardware hang occurs.
> We have to reboot VM to trigger PCI reset on host to recover the device.

Did you consider a hot-unplug, reset (by host), hot-plug cycle instead?

> +static int xen_pt_devctl_reg_write(XenPCIPassthroughState *s,
> +                                   XenPTReg *cfg_entry, uint16_t *val,
> +                                   uint16_t dev_value, uint16_t valid_mask)
> +{
> +    if (s->real_device.is_resetable && (*val & PCI_EXP_DEVCTL_BCR_FLR)) {
> +        xen_pt_reset(s);
> +    }
> +    return xen_pt_word_reg_write(s, cfg_entry, val, dev_value, valid_mask);

I think you also need to clear the bit before handing on the request,
such that reads will always observe it clear.

Jan


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [RFC Patch] xen/pt: Emulate FLR capability
@ 2019-08-29 10:03   ` Jan Beulich
  0 siblings, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2019-08-29 10:03 UTC (permalink / raw)
  To: Chao Gao
  Cc: Stefano Stabellini, qemu-devel, Paul Durrant, Anthony Perard,
	xen-devel, Roger Pau Monné

On 29.08.2019 11:02, Chao Gao wrote:
> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
> perspective, assigned devices cannot be reset. But some devices rely on PCI
> reset to recover from hardware hangs. When being assigned to a VM, those
> devices cannot be reset and won't work any longer if a hardware hang occurs.
> We have to reboot VM to trigger PCI reset on host to recover the device.

Did you consider a hot-unplug, reset (by host), hot-plug cycle instead?

> +static int xen_pt_devctl_reg_write(XenPCIPassthroughState *s,
> +                                   XenPTReg *cfg_entry, uint16_t *val,
> +                                   uint16_t dev_value, uint16_t valid_mask)
> +{
> +    if (s->real_device.is_resetable && (*val & PCI_EXP_DEVCTL_BCR_FLR)) {
> +        xen_pt_reset(s);
> +    }
> +    return xen_pt_word_reg_write(s, cfg_entry, val, dev_value, valid_mask);

I think you also need to clear the bit before handing on the request,
such that reads will always observe it clear.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC Patch] xen/pt: Emulate FLR capability
  2019-08-29  9:02 ` [Xen-devel] " Chao Gao
@ 2019-08-29 10:21   ` Roger Pau Monné
  -1 siblings, 0 replies; 14+ messages in thread
From: Roger Pau Monné @ 2019-08-29 10:21 UTC (permalink / raw)
  To: Chao Gao
  Cc: Stefano Stabellini, qemu-devel, Paul Durrant, Jan Beulich,
	Anthony Perard, xen-devel

On Thu, Aug 29, 2019 at 05:02:27PM +0800, Chao Gao wrote:
> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
> perspective, assigned devices cannot be reset. But some devices rely on PCI
> reset to recover from hardware hangs. When being assigned to a VM, those
> devices cannot be reset and won't work any longer if a hardware hang occurs.
> We have to reboot VM to trigger PCI reset on host to recover the device.
>
> This patch exposes FLR capability to VMs if the assigned device can be reset on
> host. When VM initiates an FLR to a device, qemu cleans up the device state,
> (including disabling of intx and/or MSI and unmapping BARs from guest, deleting
> emulated registers), then initiate PCI reset through 'reset' knob under the
> device's sysfs, finally initialize the device again.

I think you likely need to deassign the device from the VM, perform
the reset, and then assign the device again, so that there's no Xen
internal state carried over prior to the reset?

Thanks, Roger.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [RFC Patch] xen/pt: Emulate FLR capability
@ 2019-08-29 10:21   ` Roger Pau Monné
  0 siblings, 0 replies; 14+ messages in thread
From: Roger Pau Monné @ 2019-08-29 10:21 UTC (permalink / raw)
  To: Chao Gao
  Cc: Stefano Stabellini, qemu-devel, Paul Durrant, Jan Beulich,
	Anthony Perard, xen-devel

On Thu, Aug 29, 2019 at 05:02:27PM +0800, Chao Gao wrote:
> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
> perspective, assigned devices cannot be reset. But some devices rely on PCI
> reset to recover from hardware hangs. When being assigned to a VM, those
> devices cannot be reset and won't work any longer if a hardware hang occurs.
> We have to reboot VM to trigger PCI reset on host to recover the device.
>
> This patch exposes FLR capability to VMs if the assigned device can be reset on
> host. When VM initiates an FLR to a device, qemu cleans up the device state,
> (including disabling of intx and/or MSI and unmapping BARs from guest, deleting
> emulated registers), then initiate PCI reset through 'reset' knob under the
> device's sysfs, finally initialize the device again.

I think you likely need to deassign the device from the VM, perform
the reset, and then assign the device again, so that there's no Xen
internal state carried over prior to the reset?

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC Patch] xen/pt: Emulate FLR capability
  2019-08-29 10:03   ` [Xen-devel] " Jan Beulich
@ 2019-08-29 15:22     ` Chao Gao
  -1 siblings, 0 replies; 14+ messages in thread
From: Chao Gao @ 2019-08-29 15:22 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, qemu-devel, Paul Durrant, Anthony Perard,
	xen-devel, Roger Pau Monné

On Thu, Aug 29, 2019 at 12:03:44PM +0200, Jan Beulich wrote:
>On 29.08.2019 11:02, Chao Gao wrote:
>> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
>> perspective, assigned devices cannot be reset. But some devices rely on PCI
>> reset to recover from hardware hangs. When being assigned to a VM, those
>> devices cannot be reset and won't work any longer if a hardware hang occurs.
>> We have to reboot VM to trigger PCI reset on host to recover the device.
>
>Did you consider a hot-unplug, reset (by host), hot-plug cycle instead?

Yes. I considered this means. But it needs host to initiate this action.
However, when a device needs reset is determined by the device driver
in VM. So in practice, VM still needs a way to notify host to do
unplug/reset/plug. As the standard FLR capability can meet the
requirement, I don't try to invent one.

>
>> +static int xen_pt_devctl_reg_write(XenPCIPassthroughState *s,
>> +                                   XenPTReg *cfg_entry, uint16_t *val,
>> +                                   uint16_t dev_value, uint16_t valid_mask)
>> +{
>> +    if (s->real_device.is_resetable && (*val & PCI_EXP_DEVCTL_BCR_FLR)) {
>> +        xen_pt_reset(s);
>> +    }
>> +    return xen_pt_word_reg_write(s, cfg_entry, val, dev_value, valid_mask);
>
>I think you also need to clear the bit before handing on the request,
>such that reads will always observe it clear.

Will do.

Thanks
Chao


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [RFC Patch] xen/pt: Emulate FLR capability
@ 2019-08-29 15:22     ` Chao Gao
  0 siblings, 0 replies; 14+ messages in thread
From: Chao Gao @ 2019-08-29 15:22 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, qemu-devel, Paul Durrant, Anthony Perard,
	xen-devel, Roger Pau Monné

On Thu, Aug 29, 2019 at 12:03:44PM +0200, Jan Beulich wrote:
>On 29.08.2019 11:02, Chao Gao wrote:
>> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
>> perspective, assigned devices cannot be reset. But some devices rely on PCI
>> reset to recover from hardware hangs. When being assigned to a VM, those
>> devices cannot be reset and won't work any longer if a hardware hang occurs.
>> We have to reboot VM to trigger PCI reset on host to recover the device.
>
>Did you consider a hot-unplug, reset (by host), hot-plug cycle instead?

Yes. I considered this means. But it needs host to initiate this action.
However, when a device needs reset is determined by the device driver
in VM. So in practice, VM still needs a way to notify host to do
unplug/reset/plug. As the standard FLR capability can meet the
requirement, I don't try to invent one.

>
>> +static int xen_pt_devctl_reg_write(XenPCIPassthroughState *s,
>> +                                   XenPTReg *cfg_entry, uint16_t *val,
>> +                                   uint16_t dev_value, uint16_t valid_mask)
>> +{
>> +    if (s->real_device.is_resetable && (*val & PCI_EXP_DEVCTL_BCR_FLR)) {
>> +        xen_pt_reset(s);
>> +    }
>> +    return xen_pt_word_reg_write(s, cfg_entry, val, dev_value, valid_mask);
>
>I think you also need to clear the bit before handing on the request,
>such that reads will always observe it clear.

Will do.

Thanks
Chao

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC Patch] xen/pt: Emulate FLR capability
  2019-08-29 10:21   ` [Xen-devel] " Roger Pau Monné
@ 2019-09-06  9:01     ` Chao Gao
  -1 siblings, 0 replies; 14+ messages in thread
From: Chao Gao @ 2019-09-06  9:01 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Stefano Stabellini, qemu-devel, Paul Durrant, Jan Beulich,
	Anthony Perard, xen-devel

On Thu, Aug 29, 2019 at 12:21:11PM +0200, Roger Pau Monné wrote:
>On Thu, Aug 29, 2019 at 05:02:27PM +0800, Chao Gao wrote:
>> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
>> perspective, assigned devices cannot be reset. But some devices rely on PCI
>> reset to recover from hardware hangs. When being assigned to a VM, those
>> devices cannot be reset and won't work any longer if a hardware hang occurs.
>> We have to reboot VM to trigger PCI reset on host to recover the device.
>>
>> This patch exposes FLR capability to VMs if the assigned device can be reset on
>> host. When VM initiates an FLR to a device, qemu cleans up the device state,
>> (including disabling of intx and/or MSI and unmapping BARs from guest, deleting
>> emulated registers), then initiate PCI reset through 'reset' knob under the
>> device's sysfs, finally initialize the device again.
>
>I think you likely need to deassign the device from the VM, perform
>the reset, and then assign the device again, so that there's no Xen
>internal state carried over prior to the reset?

Yes. It is the safest way. But here I want to present the feature as FLR
(such that the device driver in guest can issue PCI reset whenever
needed and no change is needed to device driver).  Current device
deassignment notifies guest that the device is going to be removed
It is not the standard PCI reset. Is it possible to make guest unaware
of the device deassignment to emulate a standard PCI reset? In my mind,
we can expose do_pci_remove/add to qemu or rewrite them in qemu (but
don't remove the device from guest's PCI hierarchy). Do you think it is
the right direction?

Thanks
Chao


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [RFC Patch] xen/pt: Emulate FLR capability
@ 2019-09-06  9:01     ` Chao Gao
  0 siblings, 0 replies; 14+ messages in thread
From: Chao Gao @ 2019-09-06  9:01 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Stefano Stabellini, qemu-devel, Paul Durrant, Jan Beulich,
	Anthony Perard, xen-devel

On Thu, Aug 29, 2019 at 12:21:11PM +0200, Roger Pau Monné wrote:
>On Thu, Aug 29, 2019 at 05:02:27PM +0800, Chao Gao wrote:
>> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
>> perspective, assigned devices cannot be reset. But some devices rely on PCI
>> reset to recover from hardware hangs. When being assigned to a VM, those
>> devices cannot be reset and won't work any longer if a hardware hang occurs.
>> We have to reboot VM to trigger PCI reset on host to recover the device.
>>
>> This patch exposes FLR capability to VMs if the assigned device can be reset on
>> host. When VM initiates an FLR to a device, qemu cleans up the device state,
>> (including disabling of intx and/or MSI and unmapping BARs from guest, deleting
>> emulated registers), then initiate PCI reset through 'reset' knob under the
>> device's sysfs, finally initialize the device again.
>
>I think you likely need to deassign the device from the VM, perform
>the reset, and then assign the device again, so that there's no Xen
>internal state carried over prior to the reset?

Yes. It is the safest way. But here I want to present the feature as FLR
(such that the device driver in guest can issue PCI reset whenever
needed and no change is needed to device driver).  Current device
deassignment notifies guest that the device is going to be removed
It is not the standard PCI reset. Is it possible to make guest unaware
of the device deassignment to emulate a standard PCI reset? In my mind,
we can expose do_pci_remove/add to qemu or rewrite them in qemu (but
don't remove the device from guest's PCI hierarchy). Do you think it is
the right direction?

Thanks
Chao

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC Patch] xen/pt: Emulate FLR capability
  2019-09-06  9:01     ` [Xen-devel] " Chao Gao
@ 2019-09-06  9:20       ` Paul Durrant
  -1 siblings, 0 replies; 14+ messages in thread
From: Paul Durrant @ 2019-09-06  9:20 UTC (permalink / raw)
  To: 'Chao Gao', Roger Pau Monne
  Cc: Anthony Perard, xen-devel, Stefano Stabellini, qemu-devel, Jan Beulich

> -----Original Message-----
> From: Chao Gao <chao.gao@intel.com>
> Sent: 06 September 2019 10:01
> To: Roger Pau Monne <roger.pau@citrix.com>
> Cc: xen-devel@lists.xenproject.org; qemu-devel@nongnu.org; Stefano Stabellini
> <sstabellini@kernel.org>; Anthony Perard <anthony.perard@citrix.com>; Paul Durrant
> <Paul.Durrant@citrix.com>; Jan Beulich <jbeulich@suse.com>
> Subject: Re: [RFC Patch] xen/pt: Emulate FLR capability
> 
> On Thu, Aug 29, 2019 at 12:21:11PM +0200, Roger Pau Monné wrote:
> >On Thu, Aug 29, 2019 at 05:02:27PM +0800, Chao Gao wrote:
> >> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
> >> perspective, assigned devices cannot be reset. But some devices rely on PCI
> >> reset to recover from hardware hangs. When being assigned to a VM, those
> >> devices cannot be reset and won't work any longer if a hardware hang occurs.
> >> We have to reboot VM to trigger PCI reset on host to recover the device.
> >>
> >> This patch exposes FLR capability to VMs if the assigned device can be reset on
> >> host. When VM initiates an FLR to a device, qemu cleans up the device state,
> >> (including disabling of intx and/or MSI and unmapping BARs from guest, deleting
> >> emulated registers), then initiate PCI reset through 'reset' knob under the
> >> device's sysfs, finally initialize the device again.
> >
> >I think you likely need to deassign the device from the VM, perform
> >the reset, and then assign the device again, so that there's no Xen
> >internal state carried over prior to the reset?
> 
> Yes. It is the safest way. But here I want to present the feature as FLR
> (such that the device driver in guest can issue PCI reset whenever
> needed and no change is needed to device driver).  Current device
> deassignment notifies guest that the device is going to be removed
> It is not the standard PCI reset. Is it possible to make guest unaware
> of the device deassignment to emulate a standard PCI reset?

It should be, I would have thought. QEMU emulates all config space so any config access by the guest would be unaffected by de-assignment. The BARs and interrupts would be unmapped... but that's what you'd want anyway.

> In my mind,
> we can expose do_pci_remove/add to qemu or rewrite them in qemu (but
> don't remove the device from guest's PCI hierarchy). Do you think it is
> the right direction?

Long term I think we want to get pass-through emulation out of QEMU and into Xen.

  Paul

> 
> Thanks
> Chao


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [RFC Patch] xen/pt: Emulate FLR capability
@ 2019-09-06  9:20       ` Paul Durrant
  0 siblings, 0 replies; 14+ messages in thread
From: Paul Durrant @ 2019-09-06  9:20 UTC (permalink / raw)
  To: 'Chao Gao', Roger Pau Monne
  Cc: Anthony Perard, xen-devel, Stefano Stabellini, qemu-devel, Jan Beulich

> -----Original Message-----
> From: Chao Gao <chao.gao@intel.com>
> Sent: 06 September 2019 10:01
> To: Roger Pau Monne <roger.pau@citrix.com>
> Cc: xen-devel@lists.xenproject.org; qemu-devel@nongnu.org; Stefano Stabellini
> <sstabellini@kernel.org>; Anthony Perard <anthony.perard@citrix.com>; Paul Durrant
> <Paul.Durrant@citrix.com>; Jan Beulich <jbeulich@suse.com>
> Subject: Re: [RFC Patch] xen/pt: Emulate FLR capability
> 
> On Thu, Aug 29, 2019 at 12:21:11PM +0200, Roger Pau Monné wrote:
> >On Thu, Aug 29, 2019 at 05:02:27PM +0800, Chao Gao wrote:
> >> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
> >> perspective, assigned devices cannot be reset. But some devices rely on PCI
> >> reset to recover from hardware hangs. When being assigned to a VM, those
> >> devices cannot be reset and won't work any longer if a hardware hang occurs.
> >> We have to reboot VM to trigger PCI reset on host to recover the device.
> >>
> >> This patch exposes FLR capability to VMs if the assigned device can be reset on
> >> host. When VM initiates an FLR to a device, qemu cleans up the device state,
> >> (including disabling of intx and/or MSI and unmapping BARs from guest, deleting
> >> emulated registers), then initiate PCI reset through 'reset' knob under the
> >> device's sysfs, finally initialize the device again.
> >
> >I think you likely need to deassign the device from the VM, perform
> >the reset, and then assign the device again, so that there's no Xen
> >internal state carried over prior to the reset?
> 
> Yes. It is the safest way. But here I want to present the feature as FLR
> (such that the device driver in guest can issue PCI reset whenever
> needed and no change is needed to device driver).  Current device
> deassignment notifies guest that the device is going to be removed
> It is not the standard PCI reset. Is it possible to make guest unaware
> of the device deassignment to emulate a standard PCI reset?

It should be, I would have thought. QEMU emulates all config space so any config access by the guest would be unaffected by de-assignment. The BARs and interrupts would be unmapped... but that's what you'd want anyway.

> In my mind,
> we can expose do_pci_remove/add to qemu or rewrite them in qemu (but
> don't remove the device from guest's PCI hierarchy). Do you think it is
> the right direction?

Long term I think we want to get pass-through emulation out of QEMU and into Xen.

  Paul

> 
> Thanks
> Chao

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC Patch] xen/pt: Emulate FLR capability
  2019-09-06  9:01     ` [Xen-devel] " Chao Gao
@ 2019-09-06 10:28       ` Roger Pau Monné
  -1 siblings, 0 replies; 14+ messages in thread
From: Roger Pau Monné @ 2019-09-06 10:28 UTC (permalink / raw)
  To: Chao Gao
  Cc: Stefano Stabellini, qemu-devel, Paul Durrant, Jan Beulich,
	Anthony Perard, xen-devel

On Fri, Sep 06, 2019 at 05:01:09PM +0800, Chao Gao wrote:
> On Thu, Aug 29, 2019 at 12:21:11PM +0200, Roger Pau Monné wrote:
> >On Thu, Aug 29, 2019 at 05:02:27PM +0800, Chao Gao wrote:
> >> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
> >> perspective, assigned devices cannot be reset. But some devices rely on PCI
> >> reset to recover from hardware hangs. When being assigned to a VM, those
> >> devices cannot be reset and won't work any longer if a hardware hang occurs.
> >> We have to reboot VM to trigger PCI reset on host to recover the device.
> >>
> >> This patch exposes FLR capability to VMs if the assigned device can be reset on
> >> host. When VM initiates an FLR to a device, qemu cleans up the device state,
> >> (including disabling of intx and/or MSI and unmapping BARs from guest, deleting
> >> emulated registers), then initiate PCI reset through 'reset' knob under the
> >> device's sysfs, finally initialize the device again.
> >
> >I think you likely need to deassign the device from the VM, perform
> >the reset, and then assign the device again, so that there's no Xen
> >internal state carried over prior to the reset?
> 
> Yes. It is the safest way. But here I want to present the feature as FLR
> (such that the device driver in guest can issue PCI reset whenever
> needed and no change is needed to device driver).  Current device
> deassignment notifies guest that the device is going to be removed

In which way does a guest get notified?

AFAICT XEN_DOMCTL_deassign_device doesn't do any kind of guest
notification, it just tears down the device.

> It is not the standard PCI reset. Is it possible to make guest unaware
> of the device deassignment to emulate a standard PCI reset?

That would be my expectation. Such deassignment/assignment should be
completely transparent from a guest PoV. My suggestion for doing
the reassignment is to ensure there's no device state carried over.

> In my mind,
> we can expose do_pci_remove/add to qemu or rewrite them in qemu (but
> don't remove the device from guest's PCI hierarchy). Do you think it is
> the right direction?

Doing all this cleanup without reassigning the device seems more
complicated and likely to miss stuff to cleanup IMO, but as long as
you can guarantee there's no state carried over from before the reset
it should be fine.

I think you also need some dom0 cooperation for this, so that for
example the BARs are correctly re-positioned after the reset?

Thanks, Roger.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Xen-devel] [RFC Patch] xen/pt: Emulate FLR capability
@ 2019-09-06 10:28       ` Roger Pau Monné
  0 siblings, 0 replies; 14+ messages in thread
From: Roger Pau Monné @ 2019-09-06 10:28 UTC (permalink / raw)
  To: Chao Gao
  Cc: Stefano Stabellini, qemu-devel, Paul Durrant, Jan Beulich,
	Anthony Perard, xen-devel

On Fri, Sep 06, 2019 at 05:01:09PM +0800, Chao Gao wrote:
> On Thu, Aug 29, 2019 at 12:21:11PM +0200, Roger Pau Monné wrote:
> >On Thu, Aug 29, 2019 at 05:02:27PM +0800, Chao Gao wrote:
> >> Currently, for a HVM on Xen, no reset method is virtualized. So in a VM's
> >> perspective, assigned devices cannot be reset. But some devices rely on PCI
> >> reset to recover from hardware hangs. When being assigned to a VM, those
> >> devices cannot be reset and won't work any longer if a hardware hang occurs.
> >> We have to reboot VM to trigger PCI reset on host to recover the device.
> >>
> >> This patch exposes FLR capability to VMs if the assigned device can be reset on
> >> host. When VM initiates an FLR to a device, qemu cleans up the device state,
> >> (including disabling of intx and/or MSI and unmapping BARs from guest, deleting
> >> emulated registers), then initiate PCI reset through 'reset' knob under the
> >> device's sysfs, finally initialize the device again.
> >
> >I think you likely need to deassign the device from the VM, perform
> >the reset, and then assign the device again, so that there's no Xen
> >internal state carried over prior to the reset?
> 
> Yes. It is the safest way. But here I want to present the feature as FLR
> (such that the device driver in guest can issue PCI reset whenever
> needed and no change is needed to device driver).  Current device
> deassignment notifies guest that the device is going to be removed

In which way does a guest get notified?

AFAICT XEN_DOMCTL_deassign_device doesn't do any kind of guest
notification, it just tears down the device.

> It is not the standard PCI reset. Is it possible to make guest unaware
> of the device deassignment to emulate a standard PCI reset?

That would be my expectation. Such deassignment/assignment should be
completely transparent from a guest PoV. My suggestion for doing
the reassignment is to ensure there's no device state carried over.

> In my mind,
> we can expose do_pci_remove/add to qemu or rewrite them in qemu (but
> don't remove the device from guest's PCI hierarchy). Do you think it is
> the right direction?

Doing all this cleanup without reassigning the device seems more
complicated and likely to miss stuff to cleanup IMO, but as long as
you can guarantee there's no state carried over from before the reset
it should be fine.

I think you also need some dom0 cooperation for this, so that for
example the BARs are correctly re-positioned after the reset?

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2019-09-06 10:29 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-29  9:02 [Qemu-devel] [RFC Patch] xen/pt: Emulate FLR capability Chao Gao
2019-08-29  9:02 ` [Xen-devel] " Chao Gao
2019-08-29 10:03 ` [Qemu-devel] " Jan Beulich
2019-08-29 10:03   ` [Xen-devel] " Jan Beulich
2019-08-29 15:22   ` [Qemu-devel] " Chao Gao
2019-08-29 15:22     ` [Xen-devel] " Chao Gao
2019-08-29 10:21 ` [Qemu-devel] " Roger Pau Monné
2019-08-29 10:21   ` [Xen-devel] " Roger Pau Monné
2019-09-06  9:01   ` [Qemu-devel] " Chao Gao
2019-09-06  9:01     ` [Xen-devel] " Chao Gao
2019-09-06  9:20     ` [Qemu-devel] " Paul Durrant
2019-09-06  9:20       ` [Xen-devel] " Paul Durrant
2019-09-06 10:28     ` [Qemu-devel] " Roger Pau Monné
2019-09-06 10:28       ` [Xen-devel] " Roger Pau Monné

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.