All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
@ 2018-04-10  6:02 Yulei Zhang
  2018-04-16 14:44 ` Kirti Wankhede
  0 siblings, 1 reply; 7+ messages in thread
From: Yulei Zhang @ 2018-04-10  6:02 UTC (permalink / raw)
  To: qemu-devel
  Cc: kevin.tian, joonas.lahtinen, zhenyuw, kwankhede, zhi.a.wang,
	alex.williamson, dgilbert, quintela, Yulei Zhang

New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
to fetch and restore the status of mdev device vGPU during the
live migration.

Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
---
 hw/vfio/pci.c              | 25 ++++++++++++++++++++++++-
 hw/vfio/pci.h              |  2 ++
 linux-headers/linux/vfio.h |  9 ++++++---
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c977ee3..f98a9dd 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -32,6 +32,7 @@
 #include "pci.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "migration/blocker.h"
 
 #define MSIX_CAP_LENGTH 12
 
@@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
         vfio_vga_quirk_setup(vdev);
     }
 
+    struct vfio_region_info *device_state;
+    /* device state region setup */
+    if (!vfio_get_dev_region_info(&vdev->vbasedev,
+                VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+                VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
+        memcpy(&vdev->device_state, device_state,
+               sizeof(struct vfio_region_info));
+        g_free(device_state);
+    } else {
+        error_setg(&vdev->migration_blocker,
+                "Migration disabled: cannot support device state region");
+        migrate_add_blocker(vdev->migration_blocker, &err);
+        if (err) {
+            error_propagate(errp, err);
+            error_free(vdev->migration_blocker);
+            goto error;
+        }
+    }
+
     for (i = 0; i < PCI_ROM_SLOT; i++) {
         vfio_bar_quirk_setup(vdev, i);
     }
@@ -2884,6 +2904,10 @@ out_teardown:
     vfio_teardown_msi(vdev);
     vfio_bars_exit(vdev);
 error:
+    if (vdev->migration_blocker) {
+        migrate_del_blocker(vdev->migration_blocker);
+        error_free(vdev->migration_blocker);
+    }
     error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
 }
 
@@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
 
 static const VMStateDescription vfio_pci_vmstate = {
     .name = "vfio-pci",
-    .unmigratable = 1,
 };
 
 static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 502a575..0ee1724 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
     VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
     VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
     void *igd_opregion;
+    struct vfio_region_info device_state;
+    Error *migration_blocker;
     PCIHostDeviceAddress host;
     EventNotifier err_notifier;
     EventNotifier req_notifier;
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 4312e96..e3380ad 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
 
 /* 8086 Vendor sub-types */
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION		(1)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG		(2)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG		(3)
+
+/* Mdev sub-type for device state save and restore */
+#define VFIO_REGION_SUBTYPE_DEVICE_STATE	(4)
 
 /**
  * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
  2018-04-10  6:02 [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support Yulei Zhang
@ 2018-04-16 14:44 ` Kirti Wankhede
  2018-04-16 20:15   ` Alex Williamson
  0 siblings, 1 reply; 7+ messages in thread
From: Kirti Wankhede @ 2018-04-16 14:44 UTC (permalink / raw)
  To: Yulei Zhang, qemu-devel
  Cc: kevin.tian, joonas.lahtinen, zhenyuw, zhi.a.wang,
	alex.williamson, dgilbert, quintela



On 4/10/2018 11:32 AM, Yulei Zhang wrote:
> New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
> to fetch and restore the status of mdev device vGPU during the
> live migration.
> 
> Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
> ---
>  hw/vfio/pci.c              | 25 ++++++++++++++++++++++++-
>  hw/vfio/pci.h              |  2 ++
>  linux-headers/linux/vfio.h |  9 ++++++---
>  3 files changed, 32 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c977ee3..f98a9dd 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -32,6 +32,7 @@
>  #include "pci.h"
>  #include "trace.h"
>  #include "qapi/error.h"
> +#include "migration/blocker.h"
>  
>  #define MSIX_CAP_LENGTH 12
>  
> @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>          vfio_vga_quirk_setup(vdev);
>      }
>  
> +    struct vfio_region_info *device_state;
> +    /* device state region setup */
> +    if (!vfio_get_dev_region_info(&vdev->vbasedev,
> +                VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
> +                VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
> +        memcpy(&vdev->device_state, device_state,
> +               sizeof(struct vfio_region_info));
> +        g_free(device_state);
> +    } else {
> +        error_setg(&vdev->migration_blocker,
> +                "Migration disabled: cannot support device state region");
> +        migrate_add_blocker(vdev->migration_blocker, &err);
> +        if (err) {
> +            error_propagate(errp, err);
> +            error_free(vdev->migration_blocker);
> +            goto error;
> +        }
> +    }
> +

I think there should be a _PROBE ioctl before trying to setup region for
migration. If vfio device driver or vendor driver for mdev device
supports migration capability, _PROBE ioctl should return success along
with region's information <index, size> which can be used to setup the
region.


>      for (i = 0; i < PCI_ROM_SLOT; i++) {
>          vfio_bar_quirk_setup(vdev, i);
>      }
> @@ -2884,6 +2904,10 @@ out_teardown:
>      vfio_teardown_msi(vdev);
>      vfio_bars_exit(vdev);
>  error:
> +    if (vdev->migration_blocker) {
> +        migrate_del_blocker(vdev->migration_blocker);
> +        error_free(vdev->migration_blocker);
> +    }
>      error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
>  }
>  
> @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
>  
>  static const VMStateDescription vfio_pci_vmstate = {
>      .name = "vfio-pci",
> -    .unmigratable = 1,

Based on the result of above _PROBE ioctl, 'unmigratable' should be set
if vendor driver doesn't support migration capability.

Thanks,
Kirti

>  };
>  
>  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 502a575..0ee1724 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
>      VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
>      VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
>      void *igd_opregion;
> +    struct vfio_region_info device_state;
> +    Error *migration_blocker;
>      PCIHostDeviceAddress host;
>      EventNotifier err_notifier;
>      EventNotifier req_notifier;
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 4312e96..e3380ad 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
>  #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
>  
>  /* 8086 Vendor sub-types */
> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION		(1)
> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG		(2)
> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG		(3)
> +
> +/* Mdev sub-type for device state save and restore */
> +#define VFIO_REGION_SUBTYPE_DEVICE_STATE	(4)
>  
>  /**
>   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
  2018-04-16 14:44 ` Kirti Wankhede
@ 2018-04-16 20:15   ` Alex Williamson
  2018-04-17 10:36     ` Zhang, Yulei
  2018-04-17 19:30     ` Kirti Wankhede
  0 siblings, 2 replies; 7+ messages in thread
From: Alex Williamson @ 2018-04-16 20:15 UTC (permalink / raw)
  To: Kirti Wankhede
  Cc: Yulei Zhang, qemu-devel, kevin.tian, joonas.lahtinen, zhenyuw,
	zhi.a.wang, dgilbert, quintela

On Mon, 16 Apr 2018 20:14:03 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> On 4/10/2018 11:32 AM, Yulei Zhang wrote:
> > New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
> > to fetch and restore the status of mdev device vGPU during the
> > live migration.
> > 
> > Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
> > ---
> >  hw/vfio/pci.c              | 25 ++++++++++++++++++++++++-
> >  hw/vfio/pci.h              |  2 ++
> >  linux-headers/linux/vfio.h |  9 ++++++---
> >  3 files changed, 32 insertions(+), 4 deletions(-)
> > 
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index c977ee3..f98a9dd 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -32,6 +32,7 @@
> >  #include "pci.h"
> >  #include "trace.h"
> >  #include "qapi/error.h"
> > +#include "migration/blocker.h"
> >  
> >  #define MSIX_CAP_LENGTH 12
> >  
> > @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> >          vfio_vga_quirk_setup(vdev);
> >      }
> >  
> > +    struct vfio_region_info *device_state;
> > +    /* device state region setup */
> > +    if (!vfio_get_dev_region_info(&vdev->vbasedev,
> > +                VFIO_REGION_TYPE_PCI_VENDOR_TYPE,

This is not how VENDOR_TYPE is meant to be used.  We have a 32-bit type
and 32-bit sub-type.  When bit 31 of type is set (ie. VENDOR_TYPE),
then the low 16-bits (VENDOR_MASK) defines a vendor specific set of
sub-types.  Using it as above, you're stomping on vendor 0x0000's
vendor sub-types.  I would expect non-vendor specific types to leave
bit 31 of the type field clear.  We could then define a universal type
for device state with a sub-type specifying the API such that we could
revise the API by providing an updated sub-type.  Or perhaps there are
devices that want separate save vs restore regions, we could do that
with the sub-type.

> > +                VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
> > +        memcpy(&vdev->device_state, device_state,
> > +               sizeof(struct vfio_region_info));
> > +        g_free(device_state);
> > +    } else {
> > +        error_setg(&vdev->migration_blocker,
> > +                "Migration disabled: cannot support device state region");
> > +        migrate_add_blocker(vdev->migration_blocker, &err);

This appears as if it's going to be rather verbose and generate errors
for anything not supporting migration, which is currently everything.
Maybe there should be an OnOffAuto vfio-pci device option for the user
to specify migration such that if migration=on is specified the device
will fail if it's not available.  Otherwise the default would be auto.

> > +        if (err) {
> > +            error_propagate(errp, err);
> > +            error_free(vdev->migration_blocker);
> > +            goto error;
> > +        }
> > +    }
> > +  
> 
> I think there should be a _PROBE ioctl before trying to setup region for
> migration. If vfio device driver or vendor driver for mdev device
> supports migration capability, _PROBE ioctl should return success along
> with region's information <index, size> which can be used to setup the
> region.

How is _PROBE different from _REGION_INFO which already exists to tell
us information about the region?

> >      for (i = 0; i < PCI_ROM_SLOT; i++) {
> >          vfio_bar_quirk_setup(vdev, i);
> >      }
> > @@ -2884,6 +2904,10 @@ out_teardown:
> >      vfio_teardown_msi(vdev);
> >      vfio_bars_exit(vdev);
> >  error:
> > +    if (vdev->migration_blocker) {
> > +        migrate_del_blocker(vdev->migration_blocker);
> > +        error_free(vdev->migration_blocker);
> > +    }
> >      error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
> >  }
> >  
> > @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
> >  
> >  static const VMStateDescription vfio_pci_vmstate = {
> >      .name = "vfio-pci",
> > -    .unmigratable = 1,  
> 
> Based on the result of above _PROBE ioctl, 'unmigratable' should be set
> if vendor driver doesn't support migration capability.

.unmigratable isn't really dynamically settable aiui, thus the
existence of the migration blocker.

> >  };
> >  
> >  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > index 502a575..0ee1724 100644
> > --- a/hw/vfio/pci.h
> > +++ b/hw/vfio/pci.h
> > @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
> >      VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
> >      VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
> >      void *igd_opregion;
> > +    struct vfio_region_info device_state;
> > +    Error *migration_blocker;
> >      PCIHostDeviceAddress host;
> >      EventNotifier err_notifier;
> >      EventNotifier req_notifier;
> > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > index 4312e96..e3380ad 100644
> > --- a/linux-headers/linux/vfio.h
> > +++ b/linux-headers/linux/vfio.h
> > @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
> >  #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
> >  
> >  /* 8086 Vendor sub-types */
> > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
> > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
> > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
> > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION		(1)
> > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG		(2)
> > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG		(3)
> > +
> > +/* Mdev sub-type for device state save and restore */
> > +#define VFIO_REGION_SUBTYPE_DEVICE_STATE	(4)

This continues on with the 0x8086 vendor sub-types, which is entirely
unnecessary as each vendor has an entire sub-type address space and we
shouldn't be using a vendor type anyway.  Additionally, we need
significantly more documentation on the API for this region.  Thanks,

Alex

> >  
> >  /**
> >   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> >   

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
  2018-04-16 20:15   ` Alex Williamson
@ 2018-04-17 10:36     ` Zhang, Yulei
  2018-04-17 14:42       ` Alex Williamson
  2018-04-17 19:30     ` Kirti Wankhede
  1 sibling, 1 reply; 7+ messages in thread
From: Zhang, Yulei @ 2018-04-17 10:36 UTC (permalink / raw)
  To: Alex Williamson, Kirti Wankhede
  Cc: qemu-devel, Tian, Kevin, joonas.lahtinen, zhenyuw, Wang, Zhi A,
	dgilbert, quintela



> -----Original Message-----
> From: Alex Williamson [mailto:alex.williamson@redhat.com]
> Sent: Tuesday, April 17, 2018 4:15 AM
> To: Kirti Wankhede <kwankhede@nvidia.com>
> Cc: Zhang, Yulei <yulei.zhang@intel.com>; qemu-devel@nongnu.org; Tian,
> Kevin <kevin.tian@intel.com>; joonas.lahtinen@linux.intel.com;
> zhenyuw@linux.intel.com; Wang, Zhi A <zhi.a.wang@intel.com>;
> dgilbert@redhat.com; quintela@redhat.com
> Subject: Re: [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for
> mdev device migration support
> 
> On Mon, 16 Apr 2018 20:14:03 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
> > On 4/10/2018 11:32 AM, Yulei Zhang wrote:
> > > New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
> > > to fetch and restore the status of mdev device vGPU during the
> > > live migration.
> > >
> > > Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
> > > ---
> > >  hw/vfio/pci.c              | 25 ++++++++++++++++++++++++-
> > >  hw/vfio/pci.h              |  2 ++
> > >  linux-headers/linux/vfio.h |  9 ++++++---
> > >  3 files changed, 32 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > index c977ee3..f98a9dd 100644
> > > --- a/hw/vfio/pci.c
> > > +++ b/hw/vfio/pci.c
> > > @@ -32,6 +32,7 @@
> > >  #include "pci.h"
> > >  #include "trace.h"
> > >  #include "qapi/error.h"
> > > +#include "migration/blocker.h"
> > >
> > >  #define MSIX_CAP_LENGTH 12
> > >
> > > @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error
> **errp)
> > >          vfio_vga_quirk_setup(vdev);
> > >      }
> > >
> > > +    struct vfio_region_info *device_state;
> > > +    /* device state region setup */
> > > +    if (!vfio_get_dev_region_info(&vdev->vbasedev,
> > > +                VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
> 
> This is not how VENDOR_TYPE is meant to be used.  We have a 32-bit type
> and 32-bit sub-type.  When bit 31 of type is set (ie. VENDOR_TYPE),
> then the low 16-bits (VENDOR_MASK) defines a vendor specific set of
> sub-types.  Using it as above, you're stomping on vendor 0x0000's
> vendor sub-types.  I would expect non-vendor specific types to leave
> bit 31 of the type field clear.  We could then define a universal type
> for device state with a sub-type specifying the API such that we could
> revise the API by providing an updated sub-type.  Or perhaps there are
> devices that want separate save vs restore regions, we could do that
> with the sub-type.

Got it. For the subtype, can we use it to specify how to use the region,
to indicate it can be mmaped or not. 

> 
> > > +                VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
> > > +        memcpy(&vdev->device_state, device_state,
> > > +               sizeof(struct vfio_region_info));
> > > +        g_free(device_state);
> > > +    } else {
> > > +        error_setg(&vdev->migration_blocker,
> > > +                "Migration disabled: cannot support device state region");
> > > +        migrate_add_blocker(vdev->migration_blocker, &err);
> 
> This appears as if it's going to be rather verbose and generate errors
> for anything not supporting migration, which is currently everything.
> Maybe there should be an OnOffAuto vfio-pci device option for the user
> to specify migration such that if migration=on is specified the device
> will fail if it's not available.  Otherwise the default would be auto.
> 
We only get this error message when we try to migrate the guest with
vfio-pci device which doesn't contain the device_state region, just like 
currently we set unmigratable=1.

> > > +        if (err) {
> > > +            error_propagate(errp, err);
> > > +            error_free(vdev->migration_blocker);
> > > +            goto error;
> > > +        }
> > > +    }
> > > +
> >
> > I think there should be a _PROBE ioctl before trying to setup region for
> > migration. If vfio device driver or vendor driver for mdev device
> > supports migration capability, _PROBE ioctl should return success along
> > with region's information <index, size> which can be used to setup the
> > region.
> 
> How is _PROBE different from _REGION_INFO which already exists to tell
> us information about the region?
> 
> > >      for (i = 0; i < PCI_ROM_SLOT; i++) {
> > >          vfio_bar_quirk_setup(vdev, i);
> > >      }
> > > @@ -2884,6 +2904,10 @@ out_teardown:
> > >      vfio_teardown_msi(vdev);
> > >      vfio_bars_exit(vdev);
> > >  error:
> > > +    if (vdev->migration_blocker) {
> > > +        migrate_del_blocker(vdev->migration_blocker);
> > > +        error_free(vdev->migration_blocker);
> > > +    }
> > >      error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
> > >  }
> > >
> > > @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
> > >
> > >  static const VMStateDescription vfio_pci_vmstate = {
> > >      .name = "vfio-pci",
> > > -    .unmigratable = 1,
> >
> > Based on the result of above _PROBE ioctl, 'unmigratable' should be set
> > if vendor driver doesn't support migration capability.
> 
> .unmigratable isn't really dynamically settable aiui, thus the
> existence of the migration blocker.
> 
> > >  };
> > >
> > >  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > > index 502a575..0ee1724 100644
> > > --- a/hw/vfio/pci.h
> > > +++ b/hw/vfio/pci.h
> > > @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
> > >      VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
> > >      VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
> > >      void *igd_opregion;
> > > +    struct vfio_region_info device_state;
> > > +    Error *migration_blocker;
> > >      PCIHostDeviceAddress host;
> > >      EventNotifier err_notifier;
> > >      EventNotifier req_notifier;
> > > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > > index 4312e96..e3380ad 100644
> > > --- a/linux-headers/linux/vfio.h
> > > +++ b/linux-headers/linux/vfio.h
> > > @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
> > >  #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
> > >
> > >  /* 8086 Vendor sub-types */
> > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
> > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
> > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
> > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION		(1)
> > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG		(2)
> > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG		(3)
> > > +
> > > +/* Mdev sub-type for device state save and restore */
> > > +#define VFIO_REGION_SUBTYPE_DEVICE_STATE	(4)
> 
> This continues on with the 0x8086 vendor sub-types, which is entirely
> unnecessary as each vendor has an entire sub-type address space and we
> shouldn't be using a vendor type anyway.  Additionally, we need
> significantly more documentation on the API for this region.  Thanks,
> 
> Alex
> 
> > >
> > >  /**
> > >   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> > >

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
  2018-04-17 10:36     ` Zhang, Yulei
@ 2018-04-17 14:42       ` Alex Williamson
  2018-04-17 19:04         ` Dr. David Alan Gilbert
  0 siblings, 1 reply; 7+ messages in thread
From: Alex Williamson @ 2018-04-17 14:42 UTC (permalink / raw)
  To: Zhang, Yulei
  Cc: Kirti Wankhede, qemu-devel, Tian, Kevin, joonas.lahtinen,
	zhenyuw, Wang, Zhi A, dgilbert, quintela

On Tue, 17 Apr 2018 10:36:53 +0000
"Zhang, Yulei" <yulei.zhang@intel.com> wrote:

> > -----Original Message-----
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Tuesday, April 17, 2018 4:15 AM
> > To: Kirti Wankhede <kwankhede@nvidia.com>
> > Cc: Zhang, Yulei <yulei.zhang@intel.com>; qemu-devel@nongnu.org; Tian,
> > Kevin <kevin.tian@intel.com>; joonas.lahtinen@linux.intel.com;
> > zhenyuw@linux.intel.com; Wang, Zhi A <zhi.a.wang@intel.com>;
> > dgilbert@redhat.com; quintela@redhat.com
> > Subject: Re: [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for
> > mdev device migration support
> > 
> > On Mon, 16 Apr 2018 20:14:03 +0530
> > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> >   
> > > On 4/10/2018 11:32 AM, Yulei Zhang wrote:  
> > > > New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
> > > > to fetch and restore the status of mdev device vGPU during the
> > > > live migration.
> > > >
> > > > Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
> > > > ---
> > > >  hw/vfio/pci.c              | 25 ++++++++++++++++++++++++-
> > > >  hw/vfio/pci.h              |  2 ++
> > > >  linux-headers/linux/vfio.h |  9 ++++++---
> > > >  3 files changed, 32 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > index c977ee3..f98a9dd 100644
> > > > --- a/hw/vfio/pci.c
> > > > +++ b/hw/vfio/pci.c
> > > > @@ -32,6 +32,7 @@
> > > >  #include "pci.h"
> > > >  #include "trace.h"
> > > >  #include "qapi/error.h"
> > > > +#include "migration/blocker.h"
> > > >
> > > >  #define MSIX_CAP_LENGTH 12
> > > >
> > > > @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error  
> > **errp)  
> > > >          vfio_vga_quirk_setup(vdev);
> > > >      }
> > > >
> > > > +    struct vfio_region_info *device_state;
> > > > +    /* device state region setup */
> > > > +    if (!vfio_get_dev_region_info(&vdev->vbasedev,
> > > > +                VFIO_REGION_TYPE_PCI_VENDOR_TYPE,  
> > 
> > This is not how VENDOR_TYPE is meant to be used.  We have a 32-bit type
> > and 32-bit sub-type.  When bit 31 of type is set (ie. VENDOR_TYPE),
> > then the low 16-bits (VENDOR_MASK) defines a vendor specific set of
> > sub-types.  Using it as above, you're stomping on vendor 0x0000's
> > vendor sub-types.  I would expect non-vendor specific types to leave
> > bit 31 of the type field clear.  We could then define a universal type
> > for device state with a sub-type specifying the API such that we could
> > revise the API by providing an updated sub-type.  Or perhaps there are
> > devices that want separate save vs restore regions, we could do that
> > with the sub-type.  
> 
> Got it. For the subtype, can we use it to specify how to use the region,
> to indicate it can be mmaped or not. 

No, REGION_INFO tells us about mmap'ability and device state regions
can certainly support the sparse mmap capability, we already have full
ability to describe the mmap features of a region without dedicating a
sub-type to it.  A sub-type must have a defined API, but use the
existing mechanisms for mmap.

> > > > +                VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
> > > > +        memcpy(&vdev->device_state, device_state,
> > > > +               sizeof(struct vfio_region_info));
> > > > +        g_free(device_state);
> > > > +    } else {
> > > > +        error_setg(&vdev->migration_blocker,
> > > > +                "Migration disabled: cannot support device state region");
> > > > +        migrate_add_blocker(vdev->migration_blocker, &err);  
> > 
> > This appears as if it's going to be rather verbose and generate errors
> > for anything not supporting migration, which is currently everything.
> > Maybe there should be an OnOffAuto vfio-pci device option for the user
> > to specify migration such that if migration=on is specified the device
> > will fail if it's not available.  Otherwise the default would be auto.
> >   
> We only get this error message when we try to migrate the guest with
> vfio-pci device which doesn't contain the device_state region, just like 
> currently we set unmigratable=1.

Ok, I still wonder though if management tools might want the ability to
fail the device if it doesn't support migration via an OnOffAuto
option.  I can imagine a customer that doesn't want to compromise the
migrate'ability of their VM and doesn't want to wait until a migration
is attempted to find out it's blocked.  Thanks,

Alex
 
> > > > +        if (err) {
> > > > +            error_propagate(errp, err);
> > > > +            error_free(vdev->migration_blocker);
> > > > +            goto error;
> > > > +        }
> > > > +    }
> > > > +  
> > >
> > > I think there should be a _PROBE ioctl before trying to setup region for
> > > migration. If vfio device driver or vendor driver for mdev device
> > > supports migration capability, _PROBE ioctl should return success along
> > > with region's information <index, size> which can be used to setup the
> > > region.  
> > 
> > How is _PROBE different from _REGION_INFO which already exists to tell
> > us information about the region?
> >   
> > > >      for (i = 0; i < PCI_ROM_SLOT; i++) {
> > > >          vfio_bar_quirk_setup(vdev, i);
> > > >      }
> > > > @@ -2884,6 +2904,10 @@ out_teardown:
> > > >      vfio_teardown_msi(vdev);
> > > >      vfio_bars_exit(vdev);
> > > >  error:
> > > > +    if (vdev->migration_blocker) {
> > > > +        migrate_del_blocker(vdev->migration_blocker);
> > > > +        error_free(vdev->migration_blocker);
> > > > +    }
> > > >      error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
> > > >  }
> > > >
> > > > @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
> > > >
> > > >  static const VMStateDescription vfio_pci_vmstate = {
> > > >      .name = "vfio-pci",
> > > > -    .unmigratable = 1,  
> > >
> > > Based on the result of above _PROBE ioctl, 'unmigratable' should be set
> > > if vendor driver doesn't support migration capability.  
> > 
> > .unmigratable isn't really dynamically settable aiui, thus the
> > existence of the migration blocker.
> >   
> > > >  };
> > > >
> > > >  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> > > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > > > index 502a575..0ee1724 100644
> > > > --- a/hw/vfio/pci.h
> > > > +++ b/hw/vfio/pci.h
> > > > @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
> > > >      VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
> > > >      VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
> > > >      void *igd_opregion;
> > > > +    struct vfio_region_info device_state;
> > > > +    Error *migration_blocker;
> > > >      PCIHostDeviceAddress host;
> > > >      EventNotifier err_notifier;
> > > >      EventNotifier req_notifier;
> > > > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > > > index 4312e96..e3380ad 100644
> > > > --- a/linux-headers/linux/vfio.h
> > > > +++ b/linux-headers/linux/vfio.h
> > > > @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
> > > >  #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
> > > >
> > > >  /* 8086 Vendor sub-types */
> > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
> > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
> > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
> > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION		(1)
> > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG		(2)
> > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG		(3)
> > > > +
> > > > +/* Mdev sub-type for device state save and restore */
> > > > +#define VFIO_REGION_SUBTYPE_DEVICE_STATE	(4)  
> > 
> > This continues on with the 0x8086 vendor sub-types, which is entirely
> > unnecessary as each vendor has an entire sub-type address space and we
> > shouldn't be using a vendor type anyway.  Additionally, we need
> > significantly more documentation on the API for this region.  Thanks,
> > 
> > Alex
> >   
> > > >
> > > >  /**
> > > >   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> > > >  
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
  2018-04-17 14:42       ` Alex Williamson
@ 2018-04-17 19:04         ` Dr. David Alan Gilbert
  0 siblings, 0 replies; 7+ messages in thread
From: Dr. David Alan Gilbert @ 2018-04-17 19:04 UTC (permalink / raw)
  To: Alex Williamson
  Cc: Zhang, Yulei, Kirti Wankhede, qemu-devel, Tian, Kevin,
	joonas.lahtinen, zhenyuw, Wang, Zhi A, quintela

* Alex Williamson (alex.williamson@redhat.com) wrote:
> On Tue, 17 Apr 2018 10:36:53 +0000
> "Zhang, Yulei" <yulei.zhang@intel.com> wrote:
> 
> > > -----Original Message-----
> > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > Sent: Tuesday, April 17, 2018 4:15 AM
> > > To: Kirti Wankhede <kwankhede@nvidia.com>
> > > Cc: Zhang, Yulei <yulei.zhang@intel.com>; qemu-devel@nongnu.org; Tian,
> > > Kevin <kevin.tian@intel.com>; joonas.lahtinen@linux.intel.com;
> > > zhenyuw@linux.intel.com; Wang, Zhi A <zhi.a.wang@intel.com>;
> > > dgilbert@redhat.com; quintela@redhat.com
> > > Subject: Re: [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for
> > > mdev device migration support
> > > 
> > > On Mon, 16 Apr 2018 20:14:03 +0530
> > > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> > >   
> > > > On 4/10/2018 11:32 AM, Yulei Zhang wrote:  
> > > > > New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
> > > > > to fetch and restore the status of mdev device vGPU during the
> > > > > live migration.
> > > > >
> > > > > Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
> > > > > ---
> > > > >  hw/vfio/pci.c              | 25 ++++++++++++++++++++++++-
> > > > >  hw/vfio/pci.h              |  2 ++
> > > > >  linux-headers/linux/vfio.h |  9 ++++++---
> > > > >  3 files changed, 32 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > > index c977ee3..f98a9dd 100644
> > > > > --- a/hw/vfio/pci.c
> > > > > +++ b/hw/vfio/pci.c
> > > > > @@ -32,6 +32,7 @@
> > > > >  #include "pci.h"
> > > > >  #include "trace.h"
> > > > >  #include "qapi/error.h"
> > > > > +#include "migration/blocker.h"
> > > > >
> > > > >  #define MSIX_CAP_LENGTH 12
> > > > >
> > > > > @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error  
> > > **errp)  
> > > > >          vfio_vga_quirk_setup(vdev);
> > > > >      }
> > > > >
> > > > > +    struct vfio_region_info *device_state;
> > > > > +    /* device state region setup */
> > > > > +    if (!vfio_get_dev_region_info(&vdev->vbasedev,
> > > > > +                VFIO_REGION_TYPE_PCI_VENDOR_TYPE,  
> > > 
> > > This is not how VENDOR_TYPE is meant to be used.  We have a 32-bit type
> > > and 32-bit sub-type.  When bit 31 of type is set (ie. VENDOR_TYPE),
> > > then the low 16-bits (VENDOR_MASK) defines a vendor specific set of
> > > sub-types.  Using it as above, you're stomping on vendor 0x0000's
> > > vendor sub-types.  I would expect non-vendor specific types to leave
> > > bit 31 of the type field clear.  We could then define a universal type
> > > for device state with a sub-type specifying the API such that we could
> > > revise the API by providing an updated sub-type.  Or perhaps there are
> > > devices that want separate save vs restore regions, we could do that
> > > with the sub-type.  
> > 
> > Got it. For the subtype, can we use it to specify how to use the region,
> > to indicate it can be mmaped or not. 
> 
> No, REGION_INFO tells us about mmap'ability and device state regions
> can certainly support the sparse mmap capability, we already have full
> ability to describe the mmap features of a region without dedicating a
> sub-type to it.  A sub-type must have a defined API, but use the
> existing mechanisms for mmap.
> 
> > > > > +                VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
> > > > > +        memcpy(&vdev->device_state, device_state,
> > > > > +               sizeof(struct vfio_region_info));
> > > > > +        g_free(device_state);
> > > > > +    } else {
> > > > > +        error_setg(&vdev->migration_blocker,
> > > > > +                "Migration disabled: cannot support device state region");
> > > > > +        migrate_add_blocker(vdev->migration_blocker, &err);  
> > > 
> > > This appears as if it's going to be rather verbose and generate errors
> > > for anything not supporting migration, which is currently everything.
> > > Maybe there should be an OnOffAuto vfio-pci device option for the user
> > > to specify migration such that if migration=on is specified the device
> > > will fail if it's not available.  Otherwise the default would be auto.
> > >   
> > We only get this error message when we try to migrate the guest with
> > vfio-pci device which doesn't contain the device_state region, just like 
> > currently we set unmigratable=1.
> 
> Ok, I still wonder though if management tools might want the ability to
> fail the device if it doesn't support migration via an OnOffAuto
> option.  I can imagine a customer that doesn't want to compromise the
> migrate'ability of their VM and doesn't want to wait until a migration
> is attempted to find out it's blocked.  Thanks,

The fix here is to check the return value from migrate_add_blocker
and bail if it's not happy.
If QEMU is started with --only-migratable it'll fail with the error
   'disallowing migration blocker (--only migratable) for:' and then
your message.

Dave

> Alex
>  
> > > > > +        if (err) {
> > > > > +            error_propagate(errp, err);
> > > > > +            error_free(vdev->migration_blocker);
> > > > > +            goto error;
> > > > > +        }
> > > > > +    }
> > > > > +  
> > > >
> > > > I think there should be a _PROBE ioctl before trying to setup region for
> > > > migration. If vfio device driver or vendor driver for mdev device
> > > > supports migration capability, _PROBE ioctl should return success along
> > > > with region's information <index, size> which can be used to setup the
> > > > region.  
> > > 
> > > How is _PROBE different from _REGION_INFO which already exists to tell
> > > us information about the region?
> > >   
> > > > >      for (i = 0; i < PCI_ROM_SLOT; i++) {
> > > > >          vfio_bar_quirk_setup(vdev, i);
> > > > >      }
> > > > > @@ -2884,6 +2904,10 @@ out_teardown:
> > > > >      vfio_teardown_msi(vdev);
> > > > >      vfio_bars_exit(vdev);
> > > > >  error:
> > > > > +    if (vdev->migration_blocker) {
> > > > > +        migrate_del_blocker(vdev->migration_blocker);
> > > > > +        error_free(vdev->migration_blocker);
> > > > > +    }
> > > > >      error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
> > > > >  }
> > > > >
> > > > > @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
> > > > >
> > > > >  static const VMStateDescription vfio_pci_vmstate = {
> > > > >      .name = "vfio-pci",
> > > > > -    .unmigratable = 1,  
> > > >
> > > > Based on the result of above _PROBE ioctl, 'unmigratable' should be set
> > > > if vendor driver doesn't support migration capability.  
> > > 
> > > .unmigratable isn't really dynamically settable aiui, thus the
> > > existence of the migration blocker.
> > >   
> > > > >  };
> > > > >
> > > > >  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
> > > > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > > > > index 502a575..0ee1724 100644
> > > > > --- a/hw/vfio/pci.h
> > > > > +++ b/hw/vfio/pci.h
> > > > > @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
> > > > >      VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
> > > > >      VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
> > > > >      void *igd_opregion;
> > > > > +    struct vfio_region_info device_state;
> > > > > +    Error *migration_blocker;
> > > > >      PCIHostDeviceAddress host;
> > > > >      EventNotifier err_notifier;
> > > > >      EventNotifier req_notifier;
> > > > > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > > > > index 4312e96..e3380ad 100644
> > > > > --- a/linux-headers/linux/vfio.h
> > > > > +++ b/linux-headers/linux/vfio.h
> > > > > @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
> > > > >  #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
> > > > >
> > > > >  /* 8086 Vendor sub-types */
> > > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
> > > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
> > > > > -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
> > > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION		(1)
> > > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG		(2)
> > > > > +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG		(3)
> > > > > +
> > > > > +/* Mdev sub-type for device state save and restore */
> > > > > +#define VFIO_REGION_SUBTYPE_DEVICE_STATE	(4)  
> > > 
> > > This continues on with the 0x8086 vendor sub-types, which is entirely
> > > unnecessary as each vendor has an entire sub-type address space and we
> > > shouldn't be using a vendor type anyway.  Additionally, we need
> > > significantly more documentation on the API for this region.  Thanks,
> > > 
> > > Alex
> > >   
> > > > >
> > > > >  /**
> > > > >   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> > > > >  
> > 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support
  2018-04-16 20:15   ` Alex Williamson
  2018-04-17 10:36     ` Zhang, Yulei
@ 2018-04-17 19:30     ` Kirti Wankhede
  1 sibling, 0 replies; 7+ messages in thread
From: Kirti Wankhede @ 2018-04-17 19:30 UTC (permalink / raw)
  To: Alex Williamson
  Cc: Yulei Zhang, qemu-devel, kevin.tian, joonas.lahtinen, zhenyuw,
	zhi.a.wang, dgilbert, quintela



On 4/17/2018 1:45 AM, Alex Williamson wrote:
> On Mon, 16 Apr 2018 20:14:03 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
>> On 4/10/2018 11:32 AM, Yulei Zhang wrote:
>>> New VFIO sub region VFIO_REGION_SUBTYPE_DEVICE_STATE is added
>>> to fetch and restore the status of mdev device vGPU during the
>>> live migration.
>>>
>>> Signed-off-by: Yulei Zhang <yulei.zhang@intel.com>
>>> ---
>>>  hw/vfio/pci.c              | 25 ++++++++++++++++++++++++-
>>>  hw/vfio/pci.h              |  2 ++
>>>  linux-headers/linux/vfio.h |  9 ++++++---
>>>  3 files changed, 32 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index c977ee3..f98a9dd 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -32,6 +32,7 @@
>>>  #include "pci.h"
>>>  #include "trace.h"
>>>  #include "qapi/error.h"
>>> +#include "migration/blocker.h"
>>>  
>>>  #define MSIX_CAP_LENGTH 12
>>>  
>>> @@ -2821,6 +2822,25 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>>          vfio_vga_quirk_setup(vdev);
>>>      }
>>>  
>>> +    struct vfio_region_info *device_state;
>>> +    /* device state region setup */
>>> +    if (!vfio_get_dev_region_info(&vdev->vbasedev,
>>> +                VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
> 
> This is not how VENDOR_TYPE is meant to be used.  We have a 32-bit type
> and 32-bit sub-type.  When bit 31 of type is set (ie. VENDOR_TYPE),
> then the low 16-bits (VENDOR_MASK) defines a vendor specific set of
> sub-types.  Using it as above, you're stomping on vendor 0x0000's
> vendor sub-types.  I would expect non-vendor specific types to leave
> bit 31 of the type field clear.  We could then define a universal type
> for device state with a sub-type specifying the API such that we could
> revise the API by providing an updated sub-type.  Or perhaps there are
> devices that want separate save vs restore regions, we could do that
> with the sub-type.
> 
>>> +                VFIO_REGION_SUBTYPE_DEVICE_STATE, &device_state)) {
>>> +        memcpy(&vdev->device_state, device_state,
>>> +               sizeof(struct vfio_region_info));
>>> +        g_free(device_state);
>>> +    } else {
>>> +        error_setg(&vdev->migration_blocker,
>>> +                "Migration disabled: cannot support device state region");
>>> +        migrate_add_blocker(vdev->migration_blocker, &err);
> 
> This appears as if it's going to be rather verbose and generate errors
> for anything not supporting migration, which is currently everything.
> Maybe there should be an OnOffAuto vfio-pci device option for the user
> to specify migration such that if migration=on is specified the device
> will fail if it's not available.  Otherwise the default would be auto.
> 
>>> +        if (err) {
>>> +            error_propagate(errp, err);
>>> +            error_free(vdev->migration_blocker);
>>> +            goto error;
>>> +        }
>>> +    }
>>> +  
>>
>> I think there should be a _PROBE ioctl before trying to setup region for
>> migration. If vfio device driver or vendor driver for mdev device
>> supports migration capability, _PROBE ioctl should return success along
>> with region's information <index, size> which can be used to setup the
>> region.
> 
> How is _PROBE different from _REGION_INFO which already exists to tell
> us information about the region?
> 

Fine with _REGION_INFO if SUBTYPE is non-vendor specific as you
mentioned above.

Thanks,
Kirti

>>>      for (i = 0; i < PCI_ROM_SLOT; i++) {
>>>          vfio_bar_quirk_setup(vdev, i);
>>>      }
>>> @@ -2884,6 +2904,10 @@ out_teardown:
>>>      vfio_teardown_msi(vdev);
>>>      vfio_bars_exit(vdev);
>>>  error:
>>> +    if (vdev->migration_blocker) {
>>> +        migrate_del_blocker(vdev->migration_blocker);
>>> +        error_free(vdev->migration_blocker);
>>> +    }
>>>      error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
>>>  }
>>>  
>>> @@ -3009,7 +3033,6 @@ static Property vfio_pci_dev_properties[] = {
>>>  
>>>  static const VMStateDescription vfio_pci_vmstate = {
>>>      .name = "vfio-pci",
>>> -    .unmigratable = 1,  
>>
>> Based on the result of above _PROBE ioctl, 'unmigratable' should be set
>> if vendor driver doesn't support migration capability.
> 
> .unmigratable isn't really dynamically settable aiui, thus the
> existence of the migration blocker.
> 
>>>  };
>>>  
>>>  static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
>>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>>> index 502a575..0ee1724 100644
>>> --- a/hw/vfio/pci.h
>>> +++ b/hw/vfio/pci.h
>>> @@ -116,6 +116,8 @@ typedef struct VFIOPCIDevice {
>>>      VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
>>>      VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
>>>      void *igd_opregion;
>>> +    struct vfio_region_info device_state;
>>> +    Error *migration_blocker;
>>>      PCIHostDeviceAddress host;
>>>      EventNotifier err_notifier;
>>>      EventNotifier req_notifier;
>>> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
>>> index 4312e96..e3380ad 100644
>>> --- a/linux-headers/linux/vfio.h
>>> +++ b/linux-headers/linux/vfio.h
>>> @@ -297,9 +297,12 @@ struct vfio_region_info_cap_type {
>>>  #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
>>>  
>>>  /* 8086 Vendor sub-types */
>>> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
>>> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
>>> -#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
>>> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION		(1)
>>> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG		(2)
>>> +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG		(3)
>>> +
>>> +/* Mdev sub-type for device state save and restore */
>>> +#define VFIO_REGION_SUBTYPE_DEVICE_STATE	(4)
> 
> This continues on with the 0x8086 vendor sub-types, which is entirely
> unnecessary as each vendor has an entire sub-type address space and we
> shouldn't be using a vendor type anyway.  Additionally, we need
> significantly more documentation on the API for this region.  Thanks,
> 
> Alex
> 
>>>  
>>>  /**
>>>   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
>>>   
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-04-17 19:30 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-10  6:02 [Qemu-devel] [RFC PATCH V4 1/4] vfio: introduce a new VFIO subregion for mdev device migration support Yulei Zhang
2018-04-16 14:44 ` Kirti Wankhede
2018-04-16 20:15   ` Alex Williamson
2018-04-17 10:36     ` Zhang, Yulei
2018-04-17 14:42       ` Alex Williamson
2018-04-17 19:04         ` Dr. David Alan Gilbert
2018-04-17 19:30     ` Kirti Wankhede

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.