* [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
@ 2018-04-10 6:02 Yulei Zhang
2018-04-16 14:44 ` Kirti Wankhede
0 siblings, 1 reply; 7+ messages in thread
From: Yulei Zhang @ 2018-04-10 6:02 UTC (permalink / raw)
To: qemu-devel
Cc: kevin.tian, joonas.lahtinen, zhenyuw, kwankhede, zhi.a.wang,
alex.williamson, dgilbert, quintela, Yulei Zhang
New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
to fetch and restore the status of mdev device vGPU during the
live migration.
Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
---
hw/vfio/pci.c | 25 ++++++++++++++++++++++++-
hw/vfio/pci.h | 2 ++
| 9 ++++++---
3 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c977ee3..f98a9dd 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -32,6 +32,7 @@
#include "pci.h"
#include "trace.h"
#include "qapi/error.h"
+#include "migration/blocker.h"
#define MSIX_CAP_LENGTH 12
@@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vfio_vga_quirk_setup(vdev);
}
+ struct vfio_region_info *device_state;
+ /* device state region setup */
+ if (!vfio_get_dev_region_info(&vdev->vbasedev,
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+ VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
+ memcpy(&vdev->device_state, device_state,
+ sizeof(struct vfio_region_info));
+ g_free(device_state);
+ } else {
+ error_setg(&vdev->migration_blocker,
+ "Migration disabled: cannot support device state region");
+ migrate_add_blocker(vdev->migration_blocker, &err);
+ if (err) {
+ error_propagate(errp, err);
+ error_free(vdev->migration_blocker);
+ goto error;
+ }
+ }
+
for (i = 0; i < PCI_ROM_SLOT; i++) {
vfio_bar_quirk_setup(vdev, i);
}
@@ -2884,6 +2904,10 @@ out_teardown:
vfio_teardown_msi(vdev);
vfio_bars_exit(vdev);
error:
+ if (vdev->migration_blocker) {
+ migrate_del_blocker(vdev->migration_blocker);
+ error_free(vdev->migration_blocker);
+ }
error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
}
@@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
static const VMStateDescription vfio_pci_vmstate = {
.name = "vfio-pci",
- .unmigratable = 1,
};
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 502a575..0ee1724 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
void *igd_opregion;
+ struct vfio_region_info device_state;
+ Error *migration_blocker;
PCIHostDeviceAddress host;
EventNotifier err_notifier;
EventNotifier req_notifier;
--git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 4312e96..e3380ad 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
/* 8086 Vendor sub-types */
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
+
+/* Mdev sub-type for device state save and restore */
+#define VFIO_REGION_SUBTYPE_DEVICE_STATE (4)
/**
* VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
--
2.7.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
2018-04-10 6:02 [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support Yulei Zhang
@ 2018-04-16 14:44 ` Kirti Wankhede
2018-04-16 20:15 ` Alex Williamson
0 siblings, 1 reply; 7+ messages in thread
From: Kirti Wankhede @ 2018-04-16 14:44 UTC (permalink / raw)
To: Yulei Zhang, qemu-devel
Cc: kevin.tian, joonas.lahtinen, zhenyuw, zhi.a.wang,
alex.williamson, dgilbert, quintela
On 4/10/2018 11:32 AM, Yulei Zhang wrote:
> New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
> to fetch and restore the status of mdev device vGPU during the
> live migration.
>
> Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
> ---
> hw/vfio/pci.c | 25 ++++++++++++++++++++++++-
> hw/vfio/pci.h | 2 ++
> linux-headers/linux/vfio.h | 9 ++++++---
> 3 files changed, 32 insertions(+), 4 deletions(-)
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c977ee3..f98a9dd 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -32,6 +32,7 @@
> #include "pci.h"
> #include "trace.h"
> #include "qapi/error.h"
> +#include "migration/blocker.h"
>
> #define MSIX_CAP_LENGTH 12
>
> @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> vfio_vga_quirk_setup(vdev);
> }
>
> + struct vfio_region_info *device_state;
> + /* device state region setup */
> + if (!vfio_get_dev_region_info(&vdev->vbasedev,
> + VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
> + VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
> + memcpy(&vdev->device_state, device_state,
> + sizeof(struct vfio_region_info));
> + g_free(device_state);
> + } else {
> + error_setg(&vdev->migration_blocker,
> + "Migration disabled: cannot support device state region");
> + migrate_add_blocker(vdev->migration_blocker, &err);
> + if (err) {
> + error_propagate(errp, err);
> + error_free(vdev->migration_blocker);
> + goto error;
> + }
> + }
> +
I think there should be a _PROBE ioctl before trying to setup region for
migration. If vfio device driver or vendor driver for mdev device
supports migration capability, _PROBE ioctl should return success along
with region's information <index, size> which can be used to setup the
region.
> for (i = 0; i < PCI_ROM_SLOT; i++) {
> vfio_bar_quirk_setup(vdev, i);
> }
> @@ -2884,6 +2904,10 @@ out_teardown:
> vfio_teardown_msi(vdev);
> vfio_bars_exit(vdev);
> error:
> + if (vdev->migration_blocker) {
> + migrate_del_blocker(vdev->migration_blocker);
> + error_free(vdev->migration_blocker);
> + }
> error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
> }
>
> @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
>
> static const VMStateDescription vfio_pci_vmstate = {
> .name = "vfio-pci",
> - .unmigratable = 1,
Based on the result of above _PROBE ioctl, 'unmigratable' should be set
if vendor driver doesn't support migration capability.
Thanks,
Kirti
> };
>
> static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 502a575..0ee1724 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
> VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
> VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
> void *igd_opregion;
> + struct vfio_region_info device_state;
> + Error *migration_blocker;
> PCIHostDeviceAddress host;
> EventNotifier err_notifier;
> EventNotifier req_notifier;
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 4312e96..e3380ad 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
> #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
>
> /* 8086 Vendor sub-types */
> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
> +
> +/* Mdev sub-type for device state save and restore */
> +#define VFIO_REGION_SUBTYPE_DEVICE_STATE (4)
>
> /**
> * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
2018-04-16 14:44 ` Kirti Wankhede
@ 2018-04-16 20:15 ` Alex Williamson
2018-04-17 10:36 ` Zhang, Yulei
2018-04-17 19:30 ` Kirti Wankhede
0 siblings, 2 replies; 7+ messages in thread
From: Alex Williamson @ 2018-04-16 20:15 UTC (permalink / raw)
To: Kirti Wankhede
Cc: Yulei Zhang, qemu-devel, kevin.tian, joonas.lahtinen, zhenyuw,
zhi.a.wang, dgilbert, quintela
On Mon, 16 Apr 2018 20:14:03 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:
> On 4/10/2018 11:32 AM, Yulei Zhang wrote:
> > New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
> > to fetch and restore the status of mdev device vGPU during the
> > live migration.
> >
> > Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
> > ---
> > hw/vfio/pci.c | 25 ++++++++++++++++++++++++-
> > hw/vfio/pci.h | 2 ++
> > linux-headers/linux/vfio.h | 9 ++++++---
> > 3 files changed, 32 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index c977ee3..f98a9dd 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -32,6 +32,7 @@
> > #include "pci.h"
> > #include "trace.h"
> > #include "qapi/error.h"
> > +#include "migration/blocker.h"
> >
> > #define MSIX_CAP_LENGTH 12
> >
> > @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> > vfio_vga_quirk_setup(vdev);
> > }
> >
> > + struct vfio_region_info *device_state;
> > + /* device state region setup */
> > + if (!vfio_get_dev_region_info(&vdev->vbasedev,
> > + VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
This is not how VENDOR_TYPE is meant to be used. We have a 32-bit type
and 32-bit sub-type. When bit 31 of type is set (ie. VENDOR_TYPE),
then the low 16-bits (VENDOR_MASK) defines a vendor specific set of
sub-types. Using it as above, you're stomping on vendor 0x0000's
vendor sub-types. I would expect non-vendor specific types to leave
bit 31 of the type field clear. We could then define a universal type
for device state with a sub-type specifying the API such that we could
revise the API by providing an updated sub-type. Or perhaps there are
devices that want separate save vs restore regions, we could do that
with the sub-type.
> > + VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
> > + memcpy(&vdev->device_state, device_state,
> > + sizeof(struct vfio_region_info));
> > + g_free(device_state);
> > + } else {
> > + error_setg(&vdev->migration_blocker,
> > + "Migration disabled: cannot support device state region");
> > + migrate_add_blocker(vdev->migration_blocker, &err);
This appears as if it's going to be rather verbose and generate errors
for anything not supporting migration, which is currently everything.
Maybe there should be an OnOffAuto vfio-pci device option for the user
to specify migration such that if migration=on is specified the device
will fail if it's not available. Otherwise the default would be auto.
> > + if (err) {
> > + error_propagate(errp, err);
> > + error_free(vdev->migration_blocker);
> > + goto error;
> > + }
> > + }
> > +
>
> I think there should be a _PROBE ioctl before trying to setup region for
> migration. If vfio device driver or vendor driver for mdev device
> supports migration capability, _PROBE ioctl should return success along
> with region's information <index, size> which can be used to setup the
> region.
How is _PROBE different from _REGION_INFO which already exists to tell
us information about the region?
> > for (i = 0; i < PCI_ROM_SLOT; i++) {
> > vfio_bar_quirk_setup(vdev, i);
> > }
> > @@ -2884,6 +2904,10 @@ out_teardown:
> > vfio_teardown_msi(vdev);
> > vfio_bars_exit(vdev);
> > error:
> > + if (vdev->migration_blocker) {
> > + migrate_del_blocker(vdev->migration_blocker);
> > + error_free(vdev->migration_blocker);
> > + }
> > error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
> > }
> >
> > @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
> >
> > static const VMStateDescription vfio_pci_vmstate = {
> > .name = "vfio-pci",
> > - .unmigratable = 1,
>
> Based on the result of above _PROBE ioctl, 'unmigratable' should be set
> if vendor driver doesn't support migration capability.
.unmigratable isn't really dynamically settable aiui, thus the
existence of the migration blocker.
> > };
> >
> > static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > index 502a575..0ee1724 100644
> > --- a/hw/vfio/pci.h
> > +++ b/hw/vfio/pci.h
> > @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
> > VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
> > VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
> > void *igd_opregion;
> > + struct vfio_region_info device_state;
> > + Error *migration_blocker;
> > PCIHostDeviceAddress host;
> > EventNotifier err_notifier;
> > EventNotifier req_notifier;
> > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > index 4312e96..e3380ad 100644
> > --- a/linux-headers/linux/vfio.h
> > +++ b/linux-headers/linux/vfio.h
> > @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
> > #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
> >
> > /* 8086 Vendor sub-types */
> > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
> > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
> > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
> > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
> > +
> > +/* Mdev sub-type for device state save and restore */
> > +#define VFIO_REGION_SUBTYPE_DEVICE_STATE (4)
This continues on with the 0x8086 vendor sub-types, which is entirely
unnecessary as each vendor has an entire sub-type address space and we
shouldn't be using a vendor type anyway. Additionally, we need
significantly more documentation on the API for this region. Thanks,
Alex
> >
> > /**
> > * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> >
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
2018-04-16 20:15 ` Alex Williamson
@ 2018-04-17 10:36 ` Zhang, Yulei
2018-04-17 14:42 ` Alex Williamson
2018-04-17 19:30 ` Kirti Wankhede
1 sibling, 1 reply; 7+ messages in thread
From: Zhang, Yulei @ 2018-04-17 10:36 UTC (permalink / raw)
To: Alex Williamson, Kirti Wankhede
Cc: qemu-devel, Tian, Kevin, joonas.lahtinen, zhenyuw, Wang, Zhi A,
dgilbert, quintela
> -----Original Message-----
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Tuesday, April 17, 2018 4:15 AM
> To: Kirti Wankhede <kwankhede@nvidia.com>
> Cc: Zhang, Yulei <yulei.zhang@intel.com>; qemu-devel@nongnu.org; Tian,
> Kevin <kevin.tian@intel.com>; joonas.lahtinen@linux.intel.com;
> zhenyuw@linux.intel.com; Wang, Zhi A <zhi.a.wang@intel.com>;
> dgilbert@redhat.com; quintela@redhat.com
> Subject: Re: [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for
> mdev device migration support
>
> On Mon, 16 Apr 2018 20:14:03 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
>
> > On 4/10/2018 11:32 AM, Yulei Zhang wrote:
> > > New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
> > > to fetch and restore the status of mdev device vGPU during the
> > > live migration.
> > >
> > > Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
> > > ---
> > > hw/vfio/pci.c | 25 ++++++++++++++++++++++++-
> > > hw/vfio/pci.h | 2 ++
> > > linux-headers/linux/vfio.h | 9 ++++++---
> > > 3 files changed, 32 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > index c977ee3..f98a9dd 100644
> > > --- a/hw/vfio/pci.c
> > > +++ b/hw/vfio/pci.c
> > > @@ -32,6 +32,7 @@
> > > #include "pci.h"
> > > #include "trace.h"
> > > #include "qapi/error.h"
> > > +#include "migration/blocker.h"
> > >
> > > #define MSIX_CAP_LENGTH 12
> > >
> > > @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error
> **errp)
> > > vfio_vga_quirk_setup(vdev);
> > > }
> > >
> > > + struct vfio_region_info *device_state;
> > > + /* device state region setup */
> > > + if (!vfio_get_dev_region_info(&vdev->vbasedev,
> > > + VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
>
> This is not how VENDOR_TYPE is meant to be used. We have a 32-bit type
> and 32-bit sub-type. When bit 31 of type is set (ie. VENDOR_TYPE),
> then the low 16-bits (VENDOR_MASK) defines a vendor specific set of
> sub-types. Using it as above, you're stomping on vendor 0x0000's
> vendor sub-types. I would expect non-vendor specific types to leave
> bit 31 of the type field clear. We could then define a universal type
> for device state with a sub-type specifying the API such that we could
> revise the API by providing an updated sub-type. Or perhaps there are
> devices that want separate save vs restore regions, we could do that
> with the sub-type.
Got it. For the subtype, can we use it to specify how to use the region,
to indicate it can be mmaped or not.
>
> > > + VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
> > > + memcpy(&vdev->device_state, device_state,
> > > + sizeof(struct vfio_region_info));
> > > + g_free(device_state);
> > > + } else {
> > > + error_setg(&vdev->migration_blocker,
> > > + "Migration disabled: cannot support device state region");
> > > + migrate_add_blocker(vdev->migration_blocker, &err);
>
> This appears as if it's going to be rather verbose and generate errors
> for anything not supporting migration, which is currently everything.
> Maybe there should be an OnOffAuto vfio-pci device option for the user
> to specify migration such that if migration=on is specified the device
> will fail if it's not available. Otherwise the default would be auto.
>
We only get this error message when we try to migrate the guest with
vfio-pci device which doesn't contain the device_state region, just like
currently we set unmigratable=1.
> > > + if (err) {
> > > + error_propagate(errp, err);
> > > + error_free(vdev->migration_blocker);
> > > + goto error;
> > > + }
> > > + }
> > > +
> >
> > I think there should be a _PROBE ioctl before trying to setup region for
> > migration. If vfio device driver or vendor driver for mdev device
> > supports migration capability, _PROBE ioctl should return success along
> > with region's information <index, size> which can be used to setup the
> > region.
>
> How is _PROBE different from _REGION_INFO which already exists to tell
> us information about the region?
>
> > > for (i = 0; i < PCI_ROM_SLOT; i++) {
> > > vfio_bar_quirk_setup(vdev, i);
> > > }
> > > @@ -2884,6 +2904,10 @@ out_teardown:
> > > vfio_teardown_msi(vdev);
> > > vfio_bars_exit(vdev);
> > > error:
> > > + if (vdev->migration_blocker) {
> > > + migrate_del_blocker(vdev->migration_blocker);
> > > + error_free(vdev->migration_blocker);
> > > + }
> > > error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
> > > }
> > >
> > > @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
> > >
> > > static const VMStateDescription vfio_pci_vmstate = {
> > > .name = "vfio-pci",
> > > - .unmigratable = 1,
> >
> > Based on the result of above _PROBE ioctl, 'unmigratable' should be set
> > if vendor driver doesn't support migration capability.
>
> .unmigratable isn't really dynamically settable aiui, thus the
> existence of the migration blocker.
>
> > > };
> > >
> > > static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > > index 502a575..0ee1724 100644
> > > --- a/hw/vfio/pci.h
> > > +++ b/hw/vfio/pci.h
> > > @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
> > > VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
> > > VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
> > > void *igd_opregion;
> > > + struct vfio_region_info device_state;
> > > + Error *migration_blocker;
> > > PCIHostDeviceAddress host;
> > > EventNotifier err_notifier;
> > > EventNotifier req_notifier;
> > > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > > index 4312e96..e3380ad 100644
> > > --- a/linux-headers/linux/vfio.h
> > > +++ b/linux-headers/linux/vfio.h
> > > @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
> > > #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
> > >
> > > /* 8086 Vendor sub-types */
> > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
> > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
> > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
> > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
> > > +
> > > +/* Mdev sub-type for device state save and restore */
> > > +#define VFIO_REGION_SUBTYPE_DEVICE_STATE (4)
>
> This continues on with the 0x8086 vendor sub-types, which is entirely
> unnecessary as each vendor has an entire sub-type address space and we
> shouldn't be using a vendor type anyway. Additionally, we need
> significantly more documentation on the API for this region. Thanks,
>
> Alex
>
> > >
> > > /**
> > > * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> > >
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
2018-04-17 10:36 ` Zhang, Yulei
@ 2018-04-17 14:42 ` Alex Williamson
2018-04-17 19:04 ` Dr. David Alan Gilbert
0 siblings, 1 reply; 7+ messages in thread
From: Alex Williamson @ 2018-04-17 14:42 UTC (permalink / raw)
To: Zhang, Yulei
Cc: Kirti Wankhede, qemu-devel, Tian, Kevin, joonas.lahtinen,
zhenyuw, Wang, Zhi A, dgilbert, quintela
On Tue, 17 Apr 2018 10:36:53 +0000
"Zhang, Yulei" <yulei.zhang@intel.com> wrote:
> > -----Original Message-----
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Tuesday, April 17, 2018 4:15 AM
> > To: Kirti Wankhede <kwankhede@nvidia.com>
> > Cc: Zhang, Yulei <yulei.zhang@intel.com>; qemu-devel@nongnu.org; Tian,
> > Kevin <kevin.tian@intel.com>; joonas.lahtinen@linux.intel.com;
> > zhenyuw@linux.intel.com; Wang, Zhi A <zhi.a.wang@intel.com>;
> > dgilbert@redhat.com; quintela@redhat.com
> > Subject: Re: [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for
> > mdev device migration support
> >
> > On Mon, 16 Apr 2018 20:14:03 +0530
> > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> >
> > > On 4/10/2018 11:32 AM, Yulei Zhang wrote:
> > > > New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
> > > > to fetch and restore the status of mdev device vGPU during the
> > > > live migration.
> > > >
> > > > Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
> > > > ---
> > > > hw/vfio/pci.c | 25 ++++++++++++++++++++++++-
> > > > hw/vfio/pci.h | 2 ++
> > > > linux-headers/linux/vfio.h | 9 ++++++---
> > > > 3 files changed, 32 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > index c977ee3..f98a9dd 100644
> > > > --- a/hw/vfio/pci.c
> > > > +++ b/hw/vfio/pci.c
> > > > @@ -32,6 +32,7 @@
> > > > #include "pci.h"
> > > > #include "trace.h"
> > > > #include "qapi/error.h"
> > > > +#include "migration/blocker.h"
> > > >
> > > > #define MSIX_CAP_LENGTH 12
> > > >
> > > > @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error
> > **errp)
> > > > vfio_vga_quirk_setup(vdev);
> > > > }
> > > >
> > > > + struct vfio_region_info *device_state;
> > > > + /* device state region setup */
> > > > + if (!vfio_get_dev_region_info(&vdev->vbasedev,
> > > > + VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
> >
> > This is not how VENDOR_TYPE is meant to be used. We have a 32-bit type
> > and 32-bit sub-type. When bit 31 of type is set (ie. VENDOR_TYPE),
> > then the low 16-bits (VENDOR_MASK) defines a vendor specific set of
> > sub-types. Using it as above, you're stomping on vendor 0x0000's
> > vendor sub-types. I would expect non-vendor specific types to leave
> > bit 31 of the type field clear. We could then define a universal type
> > for device state with a sub-type specifying the API such that we could
> > revise the API by providing an updated sub-type. Or perhaps there are
> > devices that want separate save vs restore regions, we could do that
> > with the sub-type.
>
> Got it. For the subtype, can we use it to specify how to use the region,
> to indicate it can be mmaped or not.
No, REGION_INFO tells us about mmap'ability and device state regions
can certainly support the sparse mmap capability, we already have full
ability to describe the mmap features of a region without dedicating a
sub-type to it. A sub-type must have a defined API, but use the
existing mechanisms for mmap.
> > > > + VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
> > > > + memcpy(&vdev->device_state, device_state,
> > > > + sizeof(struct vfio_region_info));
> > > > + g_free(device_state);
> > > > + } else {
> > > > + error_setg(&vdev->migration_blocker,
> > > > + "Migration disabled: cannot support device state region");
> > > > + migrate_add_blocker(vdev->migration_blocker, &err);
> >
> > This appears as if it's going to be rather verbose and generate errors
> > for anything not supporting migration, which is currently everything.
> > Maybe there should be an OnOffAuto vfio-pci device option for the user
> > to specify migration such that if migration=on is specified the device
> > will fail if it's not available. Otherwise the default would be auto.
> >
> We only get this error message when we try to migrate the guest with
> vfio-pci device which doesn't contain the device_state region, just like
> currently we set unmigratable=1.
Ok, I still wonder though if management tools might want the ability to
fail the device if it doesn't support migration via an OnOffAuto
option. I can imagine a customer that doesn't want to compromise the
migrate'ability of their VM and doesn't want to wait until a migration
is attempted to find out it's blocked. Thanks,
Alex
> > > > + if (err) {
> > > > + error_propagate(errp, err);
> > > > + error_free(vdev->migration_blocker);
> > > > + goto error;
> > > > + }
> > > > + }
> > > > +
> > >
> > > I think there should be a _PROBE ioctl before trying to setup region for
> > > migration. If vfio device driver or vendor driver for mdev device
> > > supports migration capability, _PROBE ioctl should return success along
> > > with region's information <index, size> which can be used to setup the
> > > region.
> >
> > How is _PROBE different from _REGION_INFO which already exists to tell
> > us information about the region?
> >
> > > > for (i = 0; i < PCI_ROM_SLOT; i++) {
> > > > vfio_bar_quirk_setup(vdev, i);
> > > > }
> > > > @@ -2884,6 +2904,10 @@ out_teardown:
> > > > vfio_teardown_msi(vdev);
> > > > vfio_bars_exit(vdev);
> > > > error:
> > > > + if (vdev->migration_blocker) {
> > > > + migrate_del_blocker(vdev->migration_blocker);
> > > > + error_free(vdev->migration_blocker);
> > > > + }
> > > > error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
> > > > }
> > > >
> > > > @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
> > > >
> > > > static const VMStateDescription vfio_pci_vmstate = {
> > > > .name = "vfio-pci",
> > > > - .unmigratable = 1,
> > >
> > > Based on the result of above _PROBE ioctl, 'unmigratable' should be set
> > > if vendor driver doesn't support migration capability.
> >
> > .unmigratable isn't really dynamically settable aiui, thus the
> > existence of the migration blocker.
> >
> > > > };
> > > >
> > > > static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> > > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > > > index 502a575..0ee1724 100644
> > > > --- a/hw/vfio/pci.h
> > > > +++ b/hw/vfio/pci.h
> > > > @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
> > > > VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
> > > > VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
> > > > void *igd_opregion;
> > > > + struct vfio_region_info device_state;
> > > > + Error *migration_blocker;
> > > > PCIHostDeviceAddress host;
> > > > EventNotifier err_notifier;
> > > > EventNotifier req_notifier;
> > > > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > > > index 4312e96..e3380ad 100644
> > > > --- a/linux-headers/linux/vfio.h
> > > > +++ b/linux-headers/linux/vfio.h
> > > > @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
> > > > #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
> > > >
> > > > /* 8086 Vendor sub-types */
> > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
> > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
> > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
> > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
> > > > +
> > > > +/* Mdev sub-type for device state save and restore */
> > > > +#define VFIO_REGION_SUBTYPE_DEVICE_STATE (4)
> >
> > This continues on with the 0x8086 vendor sub-types, which is entirely
> > unnecessary as each vendor has an entire sub-type address space and we
> > shouldn't be using a vendor type anyway. Additionally, we need
> > significantly more documentation on the API for this region. Thanks,
> >
> > Alex
> >
> > > >
> > > > /**
> > > > * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> > > >
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
2018-04-17 14:42 ` Alex Williamson
@ 2018-04-17 19:04 ` Dr. David Alan Gilbert
0 siblings, 0 replies; 7+ messages in thread
From: Dr. David Alan Gilbert @ 2018-04-17 19:04 UTC (permalink / raw)
To: Alex Williamson
Cc: Zhang, Yulei, Kirti Wankhede, qemu-devel, Tian, Kevin,
joonas.lahtinen, zhenyuw, Wang, Zhi A, quintela
* Alex Williamson (alex.williamson@redhat.com) wrote:
> On Tue, 17 Apr 2018 10:36:53 +0000
> "Zhang, Yulei" <yulei.zhang@intel.com> wrote:
>
> > > -----Original Message-----
> > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > Sent: Tuesday, April 17, 2018 4:15 AM
> > > To: Kirti Wankhede <kwankhede@nvidia.com>
> > > Cc: Zhang, Yulei <yulei.zhang@intel.com>; qemu-devel@nongnu.org; Tian,
> > > Kevin <kevin.tian@intel.com>; joonas.lahtinen@linux.intel.com;
> > > zhenyuw@linux.intel.com; Wang, Zhi A <zhi.a.wang@intel.com>;
> > > dgilbert@redhat.com; quintela@redhat.com
> > > Subject: Re: [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for
> > > mdev device migration support
> > >
> > > On Mon, 16 Apr 2018 20:14:03 +0530
> > > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> > >
> > > > On 4/10/2018 11:32 AM, Yulei Zhang wrote:
> > > > > New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
> > > > > to fetch and restore the status of mdev device vGPU during the
> > > > > live migration.
> > > > >
> > > > > Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
> > > > > ---
> > > > > hw/vfio/pci.c | 25 ++++++++++++++++++++++++-
> > > > > hw/vfio/pci.h | 2 ++
> > > > > linux-headers/linux/vfio.h | 9 ++++++---
> > > > > 3 files changed, 32 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > > index c977ee3..f98a9dd 100644
> > > > > --- a/hw/vfio/pci.c
> > > > > +++ b/hw/vfio/pci.c
> > > > > @@ -32,6 +32,7 @@
> > > > > #include "pci.h"
> > > > > #include "trace.h"
> > > > > #include "qapi/error.h"
> > > > > +#include "migration/blocker.h"
> > > > >
> > > > > #define MSIX_CAP_LENGTH 12
> > > > >
> > > > > @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error
> > > **errp)
> > > > > vfio_vga_quirk_setup(vdev);
> > > > > }
> > > > >
> > > > > + struct vfio_region_info *device_state;
> > > > > + /* device state region setup */
> > > > > + if (!vfio_get_dev_region_info(&vdev->vbasedev,
> > > > > + VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
> > >
> > > This is not how VENDOR_TYPE is meant to be used. We have a 32-bit type
> > > and 32-bit sub-type. When bit 31 of type is set (ie. VENDOR_TYPE),
> > > then the low 16-bits (VENDOR_MASK) defines a vendor specific set of
> > > sub-types. Using it as above, you're stomping on vendor 0x0000's
> > > vendor sub-types. I would expect non-vendor specific types to leave
> > > bit 31 of the type field clear. We could then define a universal type
> > > for device state with a sub-type specifying the API such that we could
> > > revise the API by providing an updated sub-type. Or perhaps there are
> > > devices that want separate save vs restore regions, we could do that
> > > with the sub-type.
> >
> > Got it. For the subtype, can we use it to specify how to use the region,
> > to indicate it can be mmaped or not.
>
> No, REGION_INFO tells us about mmap'ability and device state regions
> can certainly support the sparse mmap capability, we already have full
> ability to describe the mmap features of a region without dedicating a
> sub-type to it. A sub-type must have a defined API, but use the
> existing mechanisms for mmap.
>
> > > > > + VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
> > > > > + memcpy(&vdev->device_state, device_state,
> > > > > + sizeof(struct vfio_region_info));
> > > > > + g_free(device_state);
> > > > > + } else {
> > > > > + error_setg(&vdev->migration_blocker,
> > > > > + "Migration disabled: cannot support device state region");
> > > > > + migrate_add_blocker(vdev->migration_blocker, &err);
> > >
> > > This appears as if it's going to be rather verbose and generate errors
> > > for anything not supporting migration, which is currently everything.
> > > Maybe there should be an OnOffAuto vfio-pci device option for the user
> > > to specify migration such that if migration=on is specified the device
> > > will fail if it's not available. Otherwise the default would be auto.
> > >
> > We only get this error message when we try to migrate the guest with
> > vfio-pci device which doesn't contain the device_state region, just like
> > currently we set unmigratable=1.
>
> Ok, I still wonder though if management tools might want the ability to
> fail the device if it doesn't support migration via an OnOffAuto
> option. I can imagine a customer that doesn't want to compromise the
> migrate'ability of their VM and doesn't want to wait until a migration
> is attempted to find out it's blocked. Thanks,
The fix here is to check the return value from migrate_add_blocker
and bail if it's not happy.
If QEMU is started with --only-migratable it'll fail with the error
'disallowing migration blocker (--only migratable) for:' and then
your message.
Dave
> Alex
>
> > > > > + if (err) {
> > > > > + error_propagate(errp, err);
> > > > > + error_free(vdev->migration_blocker);
> > > > > + goto error;
> > > > > + }
> > > > > + }
> > > > > +
> > > >
> > > > I think there should be a _PROBE ioctl before trying to setup region for
> > > > migration. If vfio device driver or vendor driver for mdev device
> > > > supports migration capability, _PROBE ioctl should return success along
> > > > with region's information <index, size> which can be used to setup the
> > > > region.
> > >
> > > How is _PROBE different from _REGION_INFO which already exists to tell
> > > us information about the region?
> > >
> > > > > for (i = 0; i < PCI_ROM_SLOT; i++) {
> > > > > vfio_bar_quirk_setup(vdev, i);
> > > > > }
> > > > > @@ -2884,6 +2904,10 @@ out_teardown:
> > > > > vfio_teardown_msi(vdev);
> > > > > vfio_bars_exit(vdev);
> > > > > error:
> > > > > + if (vdev->migration_blocker) {
> > > > > + migrate_del_blocker(vdev->migration_blocker);
> > > > > + error_free(vdev->migration_blocker);
> > > > > + }
> > > > > error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
> > > > > }
> > > > >
> > > > > @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
> > > > >
> > > > > static const VMStateDescription vfio_pci_vmstate = {
> > > > > .name = "vfio-pci",
> > > > > - .unmigratable = 1,
> > > >
> > > > Based on the result of above _PROBE ioctl, 'unmigratable' should be set
> > > > if vendor driver doesn't support migration capability.
> > >
> > > .unmigratable isn't really dynamically settable aiui, thus the
> > > existence of the migration blocker.
> > >
> > > > > };
> > > > >
> > > > > static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> > > > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > > > > index 502a575..0ee1724 100644
> > > > > --- a/hw/vfio/pci.h
> > > > > +++ b/hw/vfio/pci.h
> > > > > @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
> > > > > VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
> > > > > VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
> > > > > void *igd_opregion;
> > > > > + struct vfio_region_info device_state;
> > > > > + Error *migration_blocker;
> > > > > PCIHostDeviceAddress host;
> > > > > EventNotifier err_notifier;
> > > > > EventNotifier req_notifier;
> > > > > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > > > > index 4312e96..e3380ad 100644
> > > > > --- a/linux-headers/linux/vfio.h
> > > > > +++ b/linux-headers/linux/vfio.h
> > > > > @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
> > > > > #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
> > > > >
> > > > > /* 8086 Vendor sub-types */
> > > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
> > > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> > > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
> > > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
> > > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> > > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
> > > > > +
> > > > > +/* Mdev sub-type for device state save and restore */
> > > > > +#define VFIO_REGION_SUBTYPE_DEVICE_STATE (4)
> > >
> > > This continues on with the 0x8086 vendor sub-types, which is entirely
> > > unnecessary as each vendor has an entire sub-type address space and we
> > > shouldn't be using a vendor type anyway. Additionally, we need
> > > significantly more documentation on the API for this region. Thanks,
> > >
> > > Alex
> > >
> > > > >
> > > > > /**
> > > > > * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> > > > >
> >
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
2018-04-16 20:15 ` Alex Williamson
2018-04-17 10:36 ` Zhang, Yulei
@ 2018-04-17 19:30 ` Kirti Wankhede
1 sibling, 0 replies; 7+ messages in thread
From: Kirti Wankhede @ 2018-04-17 19:30 UTC (permalink / raw)
To: Alex Williamson
Cc: Yulei Zhang, qemu-devel, kevin.tian, joonas.lahtinen, zhenyuw,
zhi.a.wang, dgilbert, quintela
On 4/17/2018 1:45 AM, Alex Williamson wrote:
> On Mon, 16 Apr 2018 20:14:03 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
>
>> On 4/10/2018 11:32 AM, Yulei Zhang wrote:
>>> New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
>>> to fetch and restore the status of mdev device vGPU during the
>>> live migration.
>>>
>>> Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
>>> ---
>>> hw/vfio/pci.c | 25 ++++++++++++++++++++++++-
>>> hw/vfio/pci.h | 2 ++
>>> linux-headers/linux/vfio.h | 9 ++++++---
>>> 3 files changed, 32 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index c977ee3..f98a9dd 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -32,6 +32,7 @@
>>> #include "pci.h"
>>> #include "trace.h"
>>> #include "qapi/error.h"
>>> +#include "migration/blocker.h"
>>>
>>> #define MSIX_CAP_LENGTH 12
>>>
>>> @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>> vfio_vga_quirk_setup(vdev);
>>> }
>>>
>>> + struct vfio_region_info *device_state;
>>> + /* device state region setup */
>>> + if (!vfio_get_dev_region_info(&vdev->vbasedev,
>>> + VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
>
> This is not how VENDOR_TYPE is meant to be used. We have a 32-bit type
> and 32-bit sub-type. When bit 31 of type is set (ie. VENDOR_TYPE),
> then the low 16-bits (VENDOR_MASK) defines a vendor specific set of
> sub-types. Using it as above, you're stomping on vendor 0x0000's
> vendor sub-types. I would expect non-vendor specific types to leave
> bit 31 of the type field clear. We could then define a universal type
> for device state with a sub-type specifying the API such that we could
> revise the API by providing an updated sub-type. Or perhaps there are
> devices that want separate save vs restore regions, we could do that
> with the sub-type.
>
>>> + VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
>>> + memcpy(&vdev->device_state, device_state,
>>> + sizeof(struct vfio_region_info));
>>> + g_free(device_state);
>>> + } else {
>>> + error_setg(&vdev->migration_blocker,
>>> + "Migration disabled: cannot support device state region");
>>> + migrate_add_blocker(vdev->migration_blocker, &err);
>
> This appears as if it's going to be rather verbose and generate errors
> for anything not supporting migration, which is currently everything.
> Maybe there should be an OnOffAuto vfio-pci device option for the user
> to specify migration such that if migration=on is specified the device
> will fail if it's not available. Otherwise the default would be auto.
>
>>> + if (err) {
>>> + error_propagate(errp, err);
>>> + error_free(vdev->migration_blocker);
>>> + goto error;
>>> + }
>>> + }
>>> +
>>
>> I think there should be a _PROBE ioctl before trying to setup region for
>> migration. If vfio device driver or vendor driver for mdev device
>> supports migration capability, _PROBE ioctl should return success along
>> with region's information <index, size> which can be used to setup the
>> region.
>
> How is _PROBE different from _REGION_INFO which already exists to tell
> us information about the region?
>
Fine with _REGION_INFO if SUBTYPE is non-vendor specific as you
mentioned above.
Thanks,
Kirti
>>> for (i = 0; i < PCI_ROM_SLOT; i++) {
>>> vfio_bar_quirk_setup(vdev, i);
>>> }
>>> @@ -2884,6 +2904,10 @@ out_teardown:
>>> vfio_teardown_msi(vdev);
>>> vfio_bars_exit(vdev);
>>> error:
>>> + if (vdev->migration_blocker) {
>>> + migrate_del_blocker(vdev->migration_blocker);
>>> + error_free(vdev->migration_blocker);
>>> + }
>>> error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
>>> }
>>>
>>> @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
>>>
>>> static const VMStateDescription vfio_pci_vmstate = {
>>> .name = "vfio-pci",
>>> - .unmigratable = 1,
>>
>> Based on the result of above _PROBE ioctl, 'unmigratable' should be set
>> if vendor driver doesn't support migration capability.
>
> .unmigratable isn't really dynamically settable aiui, thus the
> existence of the migration blocker.
>
>>> };
>>>
>>> static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
>>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>>> index 502a575..0ee1724 100644
>>> --- a/hw/vfio/pci.h
>>> +++ b/hw/vfio/pci.h
>>> @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
>>> VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
>>> VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
>>> void *igd_opregion;
>>> + struct vfio_region_info device_state;
>>> + Error *migration_blocker;
>>> PCIHostDeviceAddress host;
>>> EventNotifier err_notifier;
>>> EventNotifier req_notifier;
>>> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
>>> index 4312e96..e3380ad 100644
>>> --- a/linux-headers/linux/vfio.h
>>> +++ b/linux-headers/linux/vfio.h
>>> @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
>>> #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
>>>
>>> /* 8086 Vendor sub-types */
>>> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
>>> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
>>> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
>>> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
>>> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
>>> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
>>> +
>>> +/* Mdev sub-type for device state save and restore */
>>> +#define VFIO_REGION_SUBTYPE_DEVICE_STATE (4)
>
> This continues on with the 0x8086 vendor sub-types, which is entirely
> unnecessary as each vendor has an entire sub-type address space and we
> shouldn't be using a vendor type anyway. Additionally, we need
> significantly more documentation on the API for this region. Thanks,
>
> Alex
>
>>>
>>> /**
>>> * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
>>>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2018-04-17 19:30 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-10 6:02 [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support Yulei Zhang
2018-04-16 14:44 ` Kirti Wankhede
2018-04-16 20:15 ` Alex Williamson
2018-04-17 10:36 ` Zhang, Yulei
2018-04-17 14:42 ` Alex Williamson
2018-04-17 19:04 ` Dr. David Alan Gilbert
2018-04-17 19:30 ` Kirti Wankhede
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.