All of lore.kernel.org
 help / color / mirror / Atom feed
From: Elena Ufimtseva <elena.ufimtseva@oracle.com>
To: qemu-devel@nongnu.org
Cc: elena.ufimtseva@oracle.com, john.g.johnson@oracle.com,
	jag.raman@oracle.com, swapnil.ingle@nutanix.com,
	john.levon@nutanix.com, alex.williamson@redhat.com,
	stefanha@redhat.com
Subject: [PATCH RFC 18/19] vfio-user: migration support
Date: Sun, 18 Jul 2021 23:27:57 -0700	[thread overview]
Message-ID: <df0d1a424490a3f9f6c176c92e2776ea065ededf.1626675354.git.elena.ufimtseva@oracle.com> (raw)
In-Reply-To: <cover.1626675354.git.elena.ufimtseva@oracle.com>

From: John G Johnson <john.g.johnson@oracle.com>

Send migration region operations to remote server.
Send VFIO_USER_USER_DIRTY_PAGES to get remote dirty bitmap.

Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/vfio/user.h      | 17 +++++++++++++++
 hw/vfio/common.c    | 51 ++++++++++++++++++++++++++++++++++++---------
 hw/vfio/migration.c | 35 ++++++++++++++++++-------------
 hw/vfio/pci.c       |  7 +++++++
 hw/vfio/user.c      | 45 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 130 insertions(+), 25 deletions(-)

diff --git a/hw/vfio/user.h b/hw/vfio/user.h
index 95c2fb1707..eeb328c0a9 100644
--- a/hw/vfio/user.h
+++ b/hw/vfio/user.h
@@ -230,6 +230,20 @@ struct vfio_user_irq_set {
     uint32_t count;
 };
 
+/* imported from struct vfio_iommu_type1_dirty_bitmap_get */
+struct vfio_user_bitmap_range {
+    uint64_t iova;
+    uint64_t size;
+    struct vfio_user_bitmap bitmap;
+};
+
+/* imported from struct vfio_iommu_type1_dirty_bitmap */
+struct vfio_user_dirty_pages {
+    vfio_user_hdr_t hdr;
+    uint32_t argsz;
+    uint32_t flags;
+};
+
 void vfio_user_recv(void *opaque);
 void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret);
 VFIOProxy *vfio_user_connect_dev(char *sockname, Error **errp);
@@ -255,4 +269,7 @@ void vfio_user_set_reqhandler(VFIODevice *vbasdev,
                                              void *reqarg);
 int vfio_user_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irq);
 void vfio_user_reset(VFIODevice *vbasedev);
+int vfio_user_dirty_bitmap(VFIOProxy *proxy,
+                           struct vfio_iommu_type1_dirty_bitmap *bitmap,
+                           struct vfio_iommu_type1_dirty_bitmap_get *range);
 #endif /* VFIO_USER_H */
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 953d9e7b55..bd31731c0f 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -460,7 +460,11 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
         goto unmap_exit;
     }
 
-    ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+    if (container->proxy != NULL) {
+        ret = vfio_user_dma_unmap(container->proxy, unmap, bitmap);
+    } else {
+        ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+    }
     if (!ret) {
         cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
                 iotlb->translated_addr, pages);
@@ -1278,10 +1282,19 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
         dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
     }
 
-    ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
-    if (ret) {
-        error_report("Failed to set dirty tracking flag 0x%x errno: %d",
-                     dirty.flags, errno);
+    if (container->proxy != NULL) {
+        ret = vfio_user_dirty_bitmap(container->proxy, &dirty, NULL);
+        if (ret) {
+            error_report("Failed to set dirty tracking flag 0x%x errno: %d",
+                         dirty.flags, -ret);
+        }
+    } else {
+        ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
+        if (ret) {
+            error_report("Failed to set dirty tracking flag 0x%x errno: %d",
+                         dirty.flags, errno);
+            ret = -errno;
+        }
     }
 }
 
@@ -1331,7 +1344,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
         goto err_out;
     }
 
-    ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
+    if (container->proxy != NULL) {
+        ret = vfio_user_dirty_bitmap(container->proxy, dbitmap, range);
+    } else {
+        ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
+    }
     if (ret) {
         error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
                 " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
@@ -2282,6 +2299,12 @@ void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as)
     VFIOAddressSpace *space;
     VFIOContainer *container;
 
+    if (QLIST_EMPTY(&vfio_group_list)) {
+        qemu_register_reset(vfio_reset_handler, NULL);
+    }
+
+    QLIST_INSERT_HEAD(&vfio_group_list, group, next);
+
     /*
      * try to mirror vfio_connect_container()
      * as much as possible
@@ -2292,18 +2315,26 @@ void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as)
     container = g_malloc0(sizeof(*container));
     container->space = space;
     container->fd = -1;
+    QLIST_INIT(&container->giommu_list);
     QLIST_INIT(&container->hostwin_list);
     container->proxy = proxy;
 
+    /*
+     * The proxy uses a SW IOMMU in lieu of the HW one
+     * used in the ioctl() version.  Use TYPE1 with the
+     * target's page size for maximum capatibility
+     */
     container->iommu_type = VFIO_TYPE1_IOMMU;
-    vfio_host_win_add(container, 0, (hwaddr)-1, 4096);
-    container->pgsizes = 4096;
+    vfio_host_win_add(container, 0, (hwaddr)-1, TARGET_PAGE_SIZE);
+    container->pgsizes = TARGET_PAGE_SIZE;
+
+    container->dirty_pages_supported = true;
+    container->max_dirty_bitmap_size = VFIO_USER_DEF_MAX_XFER;
+    container->dirty_pgsizes = TARGET_PAGE_SIZE;
 
     QLIST_INIT(&container->group_list);
     QLIST_INSERT_HEAD(&space->containers, container, next);
 
-    QLIST_INIT(&container->giommu_list);
-
     group->container = container;
     QLIST_INSERT_HEAD(&container->group_list, group, container_next);
 
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 82f654afb6..8005b1171a 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -27,6 +27,7 @@
 #include "pci.h"
 #include "trace.h"
 #include "hw/hw.h"
+#include "user.h"
 
 /*
  * Flags to be used as unique delimiters for VFIO devices in the migration
@@ -49,10 +50,18 @@ static int64_t bytes_transferred;
 static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
                                   off_t off, bool iswrite)
 {
+    VFIORegion *region = &vbasedev->migration->region;
     int ret;
 
-    ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
-                    pread(vbasedev->fd, val, count, off);
+    if (vbasedev->proxy != NULL) {
+        ret = iswrite ?
+            vfio_user_region_write(vbasedev, region->nr, off, count, val) :
+            vfio_user_region_read(vbasedev, region->nr, off, count, val);
+    } else {
+        off += region->fd_offset;
+        ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
+                        pread(vbasedev->fd, val, count, off);
+    }
     if (ret < count) {
         error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
                      HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
@@ -111,9 +120,7 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
                                     uint32_t value)
 {
     VFIOMigration *migration = vbasedev->migration;
-    VFIORegion *region = &migration->region;
-    off_t dev_state_off = region->fd_offset +
-                          VFIO_MIG_STRUCT_OFFSET(device_state);
+    off_t dev_state_off = VFIO_MIG_STRUCT_OFFSET(device_state);
     uint32_t device_state;
     int ret;
 
@@ -201,13 +208,13 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
     int ret;
 
     ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
-                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
+                        VFIO_MIG_STRUCT_OFFSET(data_offset));
     if (ret < 0) {
         return ret;
     }
 
     ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
-                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
+                        VFIO_MIG_STRUCT_OFFSET(data_size));
     if (ret < 0) {
         return ret;
     }
@@ -233,8 +240,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
             }
             buf_allocated = true;
 
-            ret = vfio_mig_read(vbasedev, buf, sec_size,
-                                region->fd_offset + data_offset);
+            ret = vfio_mig_read(vbasedev, buf, sec_size, data_offset);
             if (ret < 0) {
                 g_free(buf);
                 return ret;
@@ -269,7 +275,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
 
     do {
         ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
-                      region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
+                            VFIO_MIG_STRUCT_OFFSET(data_offset));
         if (ret < 0) {
             return ret;
         }
@@ -309,8 +315,8 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
             qemu_get_buffer(f, buf, sec_size);
 
             if (buf_alloc) {
-                ret = vfio_mig_write(vbasedev, buf, sec_size,
-                        region->fd_offset + data_offset);
+
+                ret = vfio_mig_write(vbasedev, buf, sec_size, data_offset);
                 g_free(buf);
 
                 if (ret < 0) {
@@ -322,7 +328,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
         }
 
         ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
-                        region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
+                             VFIO_MIG_STRUCT_OFFSET(data_size));
         if (ret < 0) {
             return ret;
         }
@@ -334,12 +340,11 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
 static int vfio_update_pending(VFIODevice *vbasedev)
 {
     VFIOMigration *migration = vbasedev->migration;
-    VFIORegion *region = &migration->region;
     uint64_t pending_bytes = 0;
     int ret;
 
     ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
-                    region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
+                        VFIO_MIG_STRUCT_OFFSET(pending_bytes));
     if (ret < 0) {
         migration->pending_bytes = 0;
         return ret;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index bed8eaa4c2..36f8524e7c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3633,6 +3633,13 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
         }
     }
 
+    if (!pdev->failover_pair_id) {
+        ret = vfio_migration_probe(&vdev->vbasedev, errp);
+        if (ret) {
+            error_report("%s: Migration disabled", vdev->vbasedev.name);
+        }
+    }
+
     vfio_register_err_notifier(vdev);
     vfio_register_req_notifier(vdev);
 
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index 8917596a2f..eceaeeccea 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -917,3 +917,48 @@ void vfio_user_reset(VFIODevice *vbasedev)
         error_printf("reset reply error %d\n", msg.error_reply);
     }
 }
+
+int vfio_user_dirty_bitmap(VFIOProxy *proxy,
+                           struct vfio_iommu_type1_dirty_bitmap *cmd,
+                           struct vfio_iommu_type1_dirty_bitmap_get *dbitmap)
+{
+    g_autofree struct {
+        struct vfio_user_dirty_pages msg;
+        struct vfio_user_bitmap_range range;
+    } *msgp = NULL;
+    int msize, rsize;
+
+    /*
+     * If just the command is sent, the returned bitmap isn't needed.
+     * The bitmap structs are different from the ioctl() versions,
+     * ioctl() returns the bitmap in a local VA
+     */
+    if (dbitmap != NULL) {
+        msize = sizeof(*msgp);
+        rsize = msize + dbitmap->bitmap.size;
+        msgp = g_malloc0(rsize);
+        msgp->range.iova = dbitmap->iova;
+        msgp->range.size = dbitmap->size;
+        msgp->range.bitmap.pgsize = dbitmap->bitmap.pgsize;
+        msgp->range.bitmap.size = dbitmap->bitmap.size;
+    } else {
+        msize = rsize = sizeof(struct vfio_user_dirty_pages);
+        msgp = g_malloc0(rsize);
+    }
+
+    vfio_user_request_msg(&msgp->msg.hdr, VFIO_USER_DIRTY_PAGES, msize, 0);
+    msgp->msg.argsz = msize - sizeof(msgp->msg.hdr);
+    msgp->msg.flags = cmd->flags;
+
+    vfio_user_send_recv(proxy, &msgp->msg.hdr, NULL, rsize);
+    if (msgp->msg.hdr.flags & VFIO_USER_ERROR) {
+        return -msgp->msg.hdr.error_reply;
+    }
+
+    if (dbitmap != NULL) {
+        memcpy(dbitmap->bitmap.data, &msgp->range.bitmap.data,
+               dbitmap->bitmap.size);
+    }
+
+    return 0;
+}
-- 
2.25.1



  parent reply	other threads:[~2021-07-19  6:45 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-19  6:27 [PATCH RFC 00/19] vfio-user implementation Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 01/19] vfio-user: introduce vfio-user protocol specification Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 02/19] vfio-user: add VFIO base abstract class Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functions Elena Ufimtseva
2021-07-27 16:34   ` Stefan Hajnoczi
2021-07-28 18:08     ` John Johnson
2021-07-29  8:06       ` Stefan Hajnoczi
2021-07-19  6:27 ` [PATCH RFC 04/19] vfio-user: Define type vfio_user_pci_dev_info Elena Ufimtseva
2021-07-28 10:16   ` Stefan Hajnoczi
2021-07-29  0:55     ` John Johnson
2021-07-29  8:22       ` Stefan Hajnoczi
2021-07-19  6:27 ` [PATCH RFC 05/19] vfio-user: connect vfio proxy to remote server Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 06/19] vfio-user: negotiate protocol with " Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 07/19] vfio-user: define vfio-user pci ops Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 08/19] vfio-user: VFIO container setup & teardown Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 09/19] vfio-user: get device info and get irq info Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 10/19] vfio-user: device region read/write Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 11/19] vfio-user: get region and DMA map/unmap operations Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 12/19] vfio-user: probe remote device's BARs Elena Ufimtseva
2021-07-19 22:59   ` Alex Williamson
2021-07-20  1:39     ` John Johnson
2021-07-20  3:01       ` Alex Williamson
2021-07-19  6:27 ` [PATCH RFC 13/19] vfio-user: respond to remote DMA read/write requests Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 14/19] vfio_user: setup MSI/X interrupts and PCI config operations Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 15/19] vfio-user: vfio user device realize Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 16/19] vfio-user: pci reset Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 17/19] vfio-user: probe remote device ROM BAR Elena Ufimtseva
2021-07-19  6:27 ` Elena Ufimtseva [this message]
2021-07-19  6:27 ` [PATCH RFC 19/19] vfio-user: add migration cli options and version negotiation Elena Ufimtseva
2021-07-19 20:00 ` [PATCH RFC server 00/11] vfio-user server in QEMU Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 01/11] vfio-user: build library Jagannathan Raman
2021-07-19 20:24     ` John Levon
2021-07-20 12:06       ` Jag Raman
2021-07-20 12:20         ` Marc-André Lureau
2021-07-20 13:09           ` John Levon
2021-07-19 20:00   ` [PATCH RFC server 02/11] vfio-user: define vfio-user object Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 03/11] vfio-user: instantiate vfio-user context Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 04/11] vfio-user: find and init PCI device Jagannathan Raman
2021-07-26 15:05     ` John Levon
2021-07-28 17:08       ` Jag Raman
2021-07-19 20:00   ` [PATCH RFC server 05/11] vfio-user: run vfio-user context Jagannathan Raman
2021-07-20 14:17     ` Thanos Makatos
2021-08-13 14:51       ` Jag Raman
2021-08-16 12:52         ` John Levon
2021-08-16 14:10           ` Jag Raman
2021-07-19 20:00   ` [PATCH RFC server 06/11] vfio-user: handle PCI config space accesses Jagannathan Raman
2021-07-26 15:10     ` John Levon
2021-07-19 20:00   ` [PATCH RFC server 07/11] vfio-user: handle DMA mappings Jagannathan Raman
2021-07-20 14:38     ` Thanos Makatos
2021-07-19 20:00   ` [PATCH RFC server 08/11] vfio-user: handle PCI BAR accesses Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 09/11] vfio-user: handle device interrupts Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 10/11] vfio-user: register handlers to facilitate migration Jagannathan Raman
2021-07-20 14:05     ` Thanos Makatos
2021-07-19 20:00   ` [PATCH RFC server 11/11] vfio-user: acceptance test Jagannathan Raman
2021-07-20 16:12     ` Thanos Makatos

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=df0d1a424490a3f9f6c176c92e2776ea065ededf.1626675354.git.elena.ufimtseva@oracle.com \
    --to=elena.ufimtseva@oracle.com \
    --cc=alex.williamson@redhat.com \
    --cc=jag.raman@oracle.com \
    --cc=john.g.johnson@oracle.com \
    --cc=john.levon@nutanix.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=swapnil.ingle@nutanix.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.