All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thanos Makatos <thanos.makatos@nutanix.com>
To: John Johnson <john.g.johnson@oracle.com>,
	"qemu-devel@nongnu.org" <qemu-devel@nongnu.org>
Subject: RE: [RFC v4 20/21] vfio-user: migration support
Date: Fri, 11 Feb 2022 13:31:09 +0000	[thread overview]
Message-ID: <DM8PR02MB800573717D16BE0BDD1DBB8D8B309@DM8PR02MB8005.namprd02.prod.outlook.com> (raw)
In-Reply-To: <27b636c6c861e0a05278e2d1cbf07d3adec2d505.1641584317.git.john.g.johnson@oracle.com>



> -----Original Message-----
> From: Qemu-devel <qemu-devel-
> bounces+thanos.makatos=nutanix.com@nongnu.org> On Behalf Of John
> Johnson
> Sent: 12 January 2022 00:44
> To: qemu-devel@nongnu.org
> Subject: [RFC v4 20/21] vfio-user: migration support
> 
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
>  hw/vfio/user-protocol.h | 18 +++++++++++++++++
>  hw/vfio/migration.c     | 30 +++++++++++++--------------
>  hw/vfio/pci.c           |  7 +++++++
>  hw/vfio/user.c          | 54
> +++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 93 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
> index 8932311..abe7002 100644
> --- a/hw/vfio/user-protocol.h
> +++ b/hw/vfio/user-protocol.h
> @@ -193,6 +193,10 @@ typedef struct {
>      char data[];
>  } VFIOUserDMARW;
> 
> +/*
> + * VFIO_USER_DIRTY_PAGES
> + */
> +
>  /*imported from struct vfio_bitmap */
>  typedef struct {
>      uint64_t pgsize;
> @@ -200,4 +204,18 @@ typedef struct {
>      char data[];
>  } VFIOUserBitmap;
> 
> +/* imported from struct vfio_iommu_type1_dirty_bitmap_get */
> +typedef struct {
> +    uint64_t iova;
> +    uint64_t size;
> +    VFIOUserBitmap bitmap;
> +} VFIOUserBitmapRange;
> +
> +/* imported from struct vfio_iommu_type1_dirty_bitmap */
> +typedef struct {
> +    VFIOUserHdr hdr;
> +    uint32_t argsz;
> +    uint32_t flags;
> +} VFIOUserDirtyPages;
> +
>  #endif /* VFIO_USER_PROTOCOL_H */
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index ff6b45d..df63f5c 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -27,6 +27,7 @@
>  #include "pci.h"
>  #include "trace.h"
>  #include "hw/hw.h"
> +#include "user.h"
> 
>  /*
>   * Flags to be used as unique delimiters for VFIO devices in the migration
> @@ -49,11 +50,13 @@ static int64_t bytes_transferred;
>  static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
>                                    off_t off, bool iswrite)
>  {
> +    VFIORegion *region = &vbasedev->migration->region;
>      int ret;
> 
> -    ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
> -                    pread(vbasedev->fd, val, count, off);
> -    if (ret < count) {
> +    ret = iswrite ?
> +        VDEV_REGION_WRITE(vbasedev, region->nr, off, count, val, false) :
> +        VDEV_REGION_READ(vbasedev, region->nr, off, count, val);
> +     if (ret < count) {
>          error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
>                       HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
>                       vbasedev->name, off, strerror(errno));
> @@ -111,9 +114,7 @@ static int vfio_migration_set_state(VFIODevice
> *vbasedev, uint32_t mask,
>                                      uint32_t value)
>  {
>      VFIOMigration *migration = vbasedev->migration;
> -    VFIORegion *region = &migration->region;
> -    off_t dev_state_off = region->fd_offset +
> -                          VFIO_MIG_STRUCT_OFFSET(device_state);
> +    off_t dev_state_off = VFIO_MIG_STRUCT_OFFSET(device_state);
>      uint32_t device_state;
>      int ret;
> 
> @@ -201,13 +202,13 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice
> *vbasedev, uint64_t *size)
>      int ret;
> 
>      ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
> -                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
> +                        VFIO_MIG_STRUCT_OFFSET(data_offset));
>      if (ret < 0) {
>          return ret;
>      }
> 
>      ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
> -                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
> +                        VFIO_MIG_STRUCT_OFFSET(data_size));
>      if (ret < 0) {
>          return ret;
>      }
> @@ -233,8 +234,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice
> *vbasedev, uint64_t *size)
>              }
>              buf_allocated = true;
> 
> -            ret = vfio_mig_read(vbasedev, buf, sec_size,
> -                                region->fd_offset + data_offset);
> +            ret = vfio_mig_read(vbasedev, buf, sec_size, data_offset);
>              if (ret < 0) {
>                  g_free(buf);
>                  return ret;
> @@ -269,7 +269,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice
> *vbasedev,
> 
>      do {
>          ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
> -                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
> +                            VFIO_MIG_STRUCT_OFFSET(data_offset));
>          if (ret < 0) {
>              return ret;
>          }
> @@ -309,8 +309,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice
> *vbasedev,
>              qemu_get_buffer(f, buf, sec_size);
> 
>              if (buf_alloc) {
> -                ret = vfio_mig_write(vbasedev, buf, sec_size,
> -                        region->fd_offset + data_offset);
> +                ret = vfio_mig_write(vbasedev, buf, sec_size, data_offset);
>                  g_free(buf);
> 
>                  if (ret < 0) {
> @@ -322,7 +321,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice
> *vbasedev,
>          }
> 
>          ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
> -                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
> +                             VFIO_MIG_STRUCT_OFFSET(data_size));
>          if (ret < 0) {
>              return ret;
>          }
> @@ -334,12 +333,11 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice
> *vbasedev,
>  static int vfio_update_pending(VFIODevice *vbasedev)
>  {
>      VFIOMigration *migration = vbasedev->migration;
> -    VFIORegion *region = &migration->region;
>      uint64_t pending_bytes = 0;
>      int ret;
> 
>      ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
> -                    region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
> +                        VFIO_MIG_STRUCT_OFFSET(pending_bytes));
>      if (ret < 0) {
>          migration->pending_bytes = 0;
>          return ret;
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index d47b98e..598e9ed 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3677,6 +3677,13 @@ static void vfio_user_pci_realize(PCIDevice *pdev,
> Error **errp)
>          goto out_teardown;
>      }
> 
> +    if (!pdev->failover_pair_id) {
> +        ret = vfio_migration_probe(&vdev->vbasedev, errp);
> +        if (ret) {
> +            error_report("%s: Migration disabled", vdev->vbasedev.name);
> +        }
> +    }
> +
>      vfio_register_err_notifier(vdev);
>      vfio_register_req_notifier(vdev);
> 
> diff --git a/hw/vfio/user.c b/hw/vfio/user.c
> index 33d8f06..2eac62a 100644
> --- a/hw/vfio/user.c
> +++ b/hw/vfio/user.c
> @@ -1410,6 +1410,52 @@ void vfio_user_reset(VFIOProxy *proxy)
>      }
>  }
> 
> +static int vfio_user_dirty_bitmap(VFIOProxy *proxy,
> +                                  struct vfio_iommu_type1_dirty_bitmap *cmd,
> +                                  struct vfio_iommu_type1_dirty_bitmap_get
> +                                  *dbitmap)
> +{
> +    g_autofree struct {
> +        VFIOUserDirtyPages msg;
> +        VFIOUserBitmapRange range;
> +    } *msgp = NULL;
> +    int msize, rsize;
> +
> +    /*
> +     * If just the command is sent, the returned bitmap isn't needed.
> +     * The bitmap structs are different from the ioctl() versions,
> +     * ioctl() returns the bitmap in a local VA
> +     */
> +    if (dbitmap != NULL) {
> +        msize = sizeof(*msgp);
> +        rsize = msize + dbitmap->bitmap.size;
> +        msgp = g_malloc0(rsize);
> +        msgp->range.iova = dbitmap->iova;
> +        msgp->range.size = dbitmap->size;
> +        msgp->range.bitmap.pgsize = dbitmap->bitmap.pgsize;
> +        msgp->range.bitmap.size = dbitmap->bitmap.size;
> +    } else {
> +        msize = rsize = sizeof(VFIOUserDirtyPages);
> +        msgp = g_malloc0(rsize);
> +    }
> +
> +    vfio_user_request_msg(&msgp->msg.hdr, VFIO_USER_DIRTY_PAGES, msize,
> 0);
> +    msgp->msg.argsz = rsize - sizeof(VFIOUserHdr);
> +    msgp->msg.flags = cmd->flags;
> +
> +    vfio_user_send_wait(proxy, &msgp->msg.hdr, NULL, rsize, false);
> +    if (msgp->msg.hdr.flags & VFIO_USER_ERROR) {
> +        return -msgp->msg.hdr.error_reply;
> +    }

We need to check argsz in the response, in which case the client needs to retry with a larger argsz.

> +
> +    if (dbitmap != NULL) {
> +        memcpy(dbitmap->bitmap.data, &msgp->range.bitmap.data,
> +               dbitmap->bitmap.size);
> +    }
> +
> +    return 0;
> +}
> +
> 
>  /*
>   * Socket-based io_ops
> @@ -1530,6 +1576,13 @@ static int vfio_user_io_dma_unmap(VFIOContainer
> *container,
>                                 container->async_ops);
>  }
> 
> +static int vfio_user_io_dirty_bitmap(VFIOContainer *container,
> +                        struct vfio_iommu_type1_dirty_bitmap *bitmap,
> +                        struct vfio_iommu_type1_dirty_bitmap_get *range)
> +{
> +    return vfio_user_dirty_bitmap(container->proxy, bitmap, range);
> +}
> +
>  static void vfio_user_io_wait_commit(VFIOContainer *container)
>  {
>      vfio_user_wait_reqs(container->proxy);
> @@ -1538,5 +1591,6 @@ static void vfio_user_io_wait_commit(VFIOContainer
> *container)
>  VFIOContIO vfio_cont_io_sock = {
>      .dma_map = vfio_user_io_dma_map,
>      .dma_unmap = vfio_user_io_dma_unmap,
> +    .dirty_bitmap = vfio_user_io_dirty_bitmap,
>      .wait_commit = vfio_user_io_wait_commit,
>  };
> --
> 1.8.3.1
> 


  reply	other threads:[~2022-02-11 13:37 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-12  0:43 [RFC v4 00/21] vfio-user client John Johnson
2022-01-12  0:43 ` [RFC v4 01/21] vfio-user: introduce vfio-user protocol specification John Johnson
2022-02-14 13:10   ` Thanos Makatos
2022-03-09 22:34   ` Alex Williamson
2022-03-10 10:20     ` John Levon
2022-03-14  6:04     ` John Johnson
2022-03-15 21:43     ` Thanos Makatos
2022-03-15 22:28       ` Alex Williamson
2022-07-22  6:23     ` John Johnson
2022-01-12  0:43 ` [RFC v4 02/21] vfio-user: add VFIO base abstract class John Johnson
2022-01-12  0:43 ` [RFC v4 03/21] vfio-user: add container IO ops vector John Johnson
2022-01-12  0:43 ` [RFC v4 04/21] vfio-user: add region cache John Johnson
2022-03-09 23:40   ` Alex Williamson
2022-01-12  0:43 ` [RFC v4 05/21] vfio-user: add device IO ops vector John Johnson
2022-01-12  0:43 ` [RFC v4 06/21] vfio-user: Define type vfio_user_pci_dev_info John Johnson
2022-01-12  0:43 ` [RFC v4 07/21] vfio-user: connect vfio proxy to remote server John Johnson
2022-01-12  0:43 ` [RFC v4 08/21] vfio-user: define socket receive functions John Johnson
2022-02-03 21:53   ` Thanos Makatos
2022-02-04 12:42     ` Thanos Makatos
2022-02-07  7:07       ` John Johnson
2022-02-15 13:35   ` Thanos Makatos
2022-02-15 14:50     ` Thanos Makatos
2022-02-16  2:09       ` John Johnson
2022-02-16  9:31         ` Thanos Makatos
2022-01-12  0:43 ` [RFC v4 09/21] vfio-user: define socket send functions John Johnson
2022-01-26 10:17   ` Thanos Makatos
2022-02-07  7:09     ` John Johnson
2022-01-12  0:43 ` [RFC v4 10/21] vfio-user: get device info John Johnson
2022-01-12  0:43 ` [RFC v4 11/21] vfio-user: get region info John Johnson
2022-01-12  0:43 ` [RFC v4 12/21] vfio-user: region read/write John Johnson
2022-01-26 21:57   ` Thanos Makatos
2022-01-12  0:43 ` [RFC v4 13/21] vfio-user: pci_user_realize PCI setup John Johnson
2022-01-12  0:43 ` [RFC v4 14/21] vfio-user: get and set IRQs John Johnson
2022-01-12  0:43 ` [RFC v4 15/21] vfio-user: proxy container connect/disconnect John Johnson
2022-01-12  0:43 ` [RFC v4 16/21] vfio-user: dma map/unmap operations John Johnson
2022-01-12  0:43 ` [RFC v4 17/21] vfio-user: secure DMA support John Johnson
2022-01-12  0:43 ` [RFC v4 18/21] vfio-user: dma read/write operations John Johnson
2022-01-12  0:43 ` [RFC v4 19/21] vfio-user: pci reset John Johnson
2022-01-12  0:43 ` [RFC v4 20/21] vfio-user: migration support John Johnson
2022-02-11 13:31   ` Thanos Makatos [this message]
2022-02-14 18:50     ` John Johnson
2022-02-15 14:53       ` Thanos Makatos
2022-01-12  0:43 ` [RFC v4 21/21] Only set qemu file error if saving state so the file exists John Johnson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DM8PR02MB800573717D16BE0BDD1DBB8D8B309@DM8PR02MB8005.namprd02.prod.outlook.com \
    --to=thanos.makatos@nutanix.com \
    --cc=john.g.johnson@oracle.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.