qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Jagannathan Raman <jag.raman@oracle.com>
To: qemu-devel@nongnu.org
Cc: elena.ufimtseva@oracle.com, john.g.johnson@oracle.com,
	jag.raman@oracle.com, swapnil.ingle@nutanix.com,
	john.levon@nutanix.com, alex.williamson@redhat.com,
	stefanha@redhat.com, thanos.makatos@nutanix.com
Subject: [PATCH RFC server 10/11] vfio-user: register handlers to facilitate migration
Date: Mon, 19 Jul 2021 16:00:12 -0400	[thread overview]
Message-ID: <2b9b94e7b5e1bcf77c0b900bc0a5488db4910833.1626722742.git.jag.raman@oracle.com> (raw)
In-Reply-To: <cover.1626722742.git.jag.raman@oracle.com>

Store and load the device's state using handlers for live migration

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 migration/savevm.h        |   2 +
 hw/remote/vfio-user-obj.c | 287 ++++++++++++++++++++++++++++++++++++++++++++++
 migration/savevm.c        |  63 ++++++++++
 3 files changed, 352 insertions(+)

diff --git a/migration/savevm.h b/migration/savevm.h
index 6461342..71d1733 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -67,5 +67,7 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
 int qemu_load_device_state(QEMUFile *f);
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
         bool in_postcopy, bool inactivate_disks);
+int qemu_remote_savevm(QEMUFile *f);
+int qemu_remote_loadvm(QEMUFile *f);
 
 #endif
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index d2a2e51..5948576 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -44,6 +44,10 @@
 #include "hw/boards.h"
 #include "hw/remote/iohub.h"
 #include "hw/remote/machine.h"
+#include "migration/qemu-file.h"
+#include "migration/savevm.h"
+#include "migration/global_state.h"
+#include "block/block.h"
 
 #include "libvfio-user/include/libvfio-user.h"
 
@@ -73,6 +77,31 @@ struct VfuObject {
     PCIDevice *pci_dev;
 
     QemuThread vfu_ctx_thread;
+
+    /*
+     * vfu_mig_buf holds the migration data. In the remote process, this
+     * buffer replaces the role of an IO channel which links the source
+     * and the destination.
+     *
+     * Whenever the client QEMU process initiates migration, the libvfio-user
+     * library notifies that to this server. The remote/server QEMU sets up a
+     * QEMUFile object using this buffer as backend. The remote passes this
+     * object to its migration subsystem, and it slirps the VMSDs of all its
+     * devices and stores them in this buffer.
+     *
+     * libvfio-user library subsequetly asks the remote for any data that needs
+     * to be moved over to the destination using its vfu_migration_callbacks_t
+     * APIs. The remote hands over this buffer as data at this time.
+     *
+     * A reverse of this process happens at the destination.
+     */
+    uint8_t *vfu_mig_buf;
+
+    uint64_t vfu_mig_buf_size;
+
+    uint64_t vfu_mig_buf_pending;
+
+    QEMUFile *vfu_mig_file;
 };
 
 static void vfu_object_set_socket(Object *obj, const char *str, Error **errp)
@@ -97,6 +126,226 @@ static void vfu_object_set_devid(Object *obj, const char *str, Error **errp)
     trace_vfu_prop("devid", str);
 }
 
+/**
+ * Migration helper functions
+ *
+ * vfu_mig_buf_read & vfu_mig_buf_write are used by QEMU's migration
+ * subsystem - qemu_remote_savevm & qemu_remote_loadvm. savevm/loadvm
+ * call these functions via QEMUFileOps to save/load the VMSD of all
+ * the devices into vfu_mig_buf
+ *
+ */
+static ssize_t vfu_mig_buf_read(void *opaque, uint8_t *buf, int64_t pos,
+                                size_t size, Error **errp)
+{
+    VfuObject *o = opaque;
+
+    if (pos > o->vfu_mig_buf_size) {
+        size = 0;
+    } else if ((pos + size) > o->vfu_mig_buf_size) {
+        size = o->vfu_mig_buf_size;
+    }
+
+    memcpy(buf, (o->vfu_mig_buf + pos), size);
+
+    o->vfu_mig_buf_size -= size;
+
+    return size;
+}
+
+static ssize_t vfu_mig_buf_write(void *opaque, struct iovec *iov, int iovcnt,
+                                 int64_t pos, Error **errp)
+{
+    VfuObject *o = opaque;
+    uint64_t end = pos + iov_size(iov, iovcnt);
+    int i;
+
+    if (end > o->vfu_mig_buf_size) {
+        o->vfu_mig_buf = g_realloc(o->vfu_mig_buf, end);
+    }
+
+    for (i = 0; i < iovcnt; i++) {
+        memcpy((o->vfu_mig_buf + o->vfu_mig_buf_size), iov[i].iov_base,
+               iov[i].iov_len);
+        o->vfu_mig_buf_size += iov[i].iov_len;
+        o->vfu_mig_buf_pending += iov[i].iov_len;
+    }
+
+    return iov_size(iov, iovcnt);
+}
+
+static int vfu_mig_buf_shutdown(void *opaque, bool rd, bool wr, Error **errp)
+{
+    VfuObject *o = opaque;
+
+    o->vfu_mig_buf_size = 0;
+
+    g_free(o->vfu_mig_buf);
+
+    return 0;
+}
+
+static const QEMUFileOps vfu_mig_fops_save = {
+    .writev_buffer  = vfu_mig_buf_write,
+    .shut_down      = vfu_mig_buf_shutdown,
+};
+
+static const QEMUFileOps vfu_mig_fops_load = {
+    .get_buffer     = vfu_mig_buf_read,
+    .shut_down      = vfu_mig_buf_shutdown,
+};
+
+/**
+ * handlers for vfu_migration_callbacks_t
+ *
+ * The libvfio-user library accesses these handlers to drive the migration
+ * at the remote end, and also to transport the data stored in vfu_mig_buf
+ *
+ */
+static void vfu_mig_state_precopy(vfu_ctx_t *vfu_ctx)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+    int ret;
+
+    if (!o->vfu_mig_file) {
+        o->vfu_mig_file = qemu_fopen_ops(o, &vfu_mig_fops_save);
+    }
+
+    global_state_store();
+
+    ret = qemu_remote_savevm(o->vfu_mig_file);
+    if (ret) {
+        qemu_file_shutdown(o->vfu_mig_file);
+        return;
+    }
+
+    qemu_fflush(o->vfu_mig_file);
+
+    bdrv_inactivate_all();
+}
+
+static void vfu_mig_state_running(vfu_ctx_t *vfu_ctx)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+    Error *local_err = NULL;
+    int ret;
+
+    ret = qemu_remote_loadvm(o->vfu_mig_file);
+    if (ret) {
+        error_setg(&error_abort, "vfu: failed to restore device state");
+        return;
+    }
+
+    bdrv_invalidate_cache_all(&local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return;
+    }
+
+    vm_start();
+}
+
+static int vfu_mig_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state)
+{
+    switch (state) {
+    case VFU_MIGR_STATE_RESUME:
+    case VFU_MIGR_STATE_STOP_AND_COPY:
+    case VFU_MIGR_STATE_STOP:
+        break;
+    case VFU_MIGR_STATE_PRE_COPY:
+        vfu_mig_state_precopy(vfu_ctx);
+        break;
+    case VFU_MIGR_STATE_RUNNING:
+        if (!runstate_is_running()) {
+            vfu_mig_state_running(vfu_ctx);
+        }
+        break;
+    default:
+        warn_report("vfu: Unknown migration state %d", state);
+    }
+
+    return 0;
+}
+
+static uint64_t vfu_mig_get_pending_bytes(vfu_ctx_t *vfu_ctx)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+
+    return o->vfu_mig_buf_pending;
+}
+
+static int vfu_mig_prepare_data(vfu_ctx_t *vfu_ctx, uint64_t *offset,
+                                uint64_t *size)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+
+    if (offset) {
+        *offset = 0;
+    }
+
+    if (size) {
+        *size = o->vfu_mig_buf_size;
+    }
+
+    return 0;
+}
+
+static ssize_t vfu_mig_read_data(vfu_ctx_t *vfu_ctx, void *buf,
+                                 uint64_t size, uint64_t offset)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+
+    if (offset > o->vfu_mig_buf_size) {
+        return -1;
+    }
+
+    if ((offset + size) > o->vfu_mig_buf_size) {
+        warn_report("vfu: buffer overflow - check pending_bytes");
+        size = o->vfu_mig_buf_size - offset;
+    }
+
+    memcpy(buf, (o->vfu_mig_buf + offset), size);
+
+    o->vfu_mig_buf_pending -= size;
+
+    return size;
+}
+
+static ssize_t vfu_mig_write_data(vfu_ctx_t *vfu_ctx, void *data,
+                                  uint64_t size, uint64_t offset)
+{
+    VfuObject *o = vfu_get_private(vfu_ctx);
+    uint64_t end = offset + size;
+
+    if (end > o->vfu_mig_buf_size) {
+        o->vfu_mig_buf = g_realloc(o->vfu_mig_buf, end);
+        o->vfu_mig_buf_size = end;
+    }
+
+    memcpy((o->vfu_mig_buf + offset), data, size);
+
+    if (!o->vfu_mig_file) {
+        o->vfu_mig_file = qemu_fopen_ops(o, &vfu_mig_fops_load);
+    }
+
+    return size;
+}
+
+static int vfu_mig_data_written(vfu_ctx_t *vfu_ctx, uint64_t count)
+{
+    return 0;
+}
+
+static const vfu_migration_callbacks_t vfu_mig_cbs = {
+    .version = VFU_MIGR_CALLBACKS_VERS,
+    .transition = &vfu_mig_transition,
+    .get_pending_bytes = &vfu_mig_get_pending_bytes,
+    .prepare_data = &vfu_mig_prepare_data,
+    .read_data = &vfu_mig_read_data,
+    .data_written = &vfu_mig_data_written,
+    .write_data = &vfu_mig_write_data,
+};
+
 static void *vfu_object_ctx_run(void *opaque)
 {
     VfuObject *o = opaque;
@@ -332,6 +581,7 @@ static void vfu_object_machine_done(Notifier *notifier, void *data)
 {
     VfuObject *o = container_of(notifier, VfuObject, machine_done);
     DeviceState *dev = NULL;
+    size_t migr_area_size;
     int ret;
 
     o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket, 0,
@@ -391,6 +641,35 @@ static void vfu_object_machine_done(Notifier *notifier, void *data)
         return;
     }
 
+    /*
+     * TODO: The 0x20000 number used below is a temporary. We are working on
+     *     a cleaner fix for this.
+     *
+     *     The libvfio-user library assumes that the remote knows the size of
+     *     the data to be migrated at boot time, but that is not the case with
+     *     VMSDs, as it can contain a variable-size buffer. 0x20000 is used
+     *     as a sufficiently large buffer to demonstrate migration, but that
+     *     cannot be used as a solution.
+     *
+     */
+    ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_MIGR_REGION_IDX,
+                           0x20000, NULL,
+                           VFU_REGION_FLAG_RW, NULL, 0, -1, 0);
+    if (ret < 0) {
+        error_setg(&error_abort, "vfu: Failed to register migration BAR %s- %s",
+                   o->devid, strerror(errno));
+        return;
+    }
+
+    migr_area_size = vfu_get_migr_register_area_size();
+    ret = vfu_setup_device_migration_callbacks(o->vfu_ctx, &vfu_mig_cbs,
+                                               migr_area_size);
+    if (ret < 0) {
+        error_setg(&error_abort, "vfu: Failed to setup migration %s- %s",
+                   o->devid, strerror(errno));
+        return;
+    }
+
     qemu_thread_create(&o->vfu_ctx_thread, "VFU ctx runner", vfu_object_ctx_run,
                        o, QEMU_THREAD_JOINABLE);
 }
@@ -412,6 +691,14 @@ static void vfu_object_init(Object *obj)
 
     o->machine_done.notify = vfu_object_machine_done;
     qemu_add_machine_init_done_notifier(&o->machine_done);
+
+    o->vfu_mig_file = NULL;
+
+    o->vfu_mig_buf = NULL;
+
+    o->vfu_mig_buf_size = 0;
+
+    o->vfu_mig_buf_pending = 0;
 }
 
 static void vfu_object_finalize(Object *obj)
diff --git a/migration/savevm.c b/migration/savevm.c
index 72848b9..c2279af 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1603,6 +1603,33 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
     return ret;
 }
 
+int qemu_remote_savevm(QEMUFile *f)
+{
+    SaveStateEntry *se;
+    int ret;
+
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (!se->vmsd || !vmstate_save_needed(se->vmsd, se->opaque)) {
+            continue;
+        }
+
+        save_section_header(f, se, QEMU_VM_SECTION_FULL);
+
+        ret = vmstate_save(f, se, NULL);
+        if (ret) {
+            qemu_file_set_error(f, ret);
+            return ret;
+        }
+
+        save_section_footer(f, se);
+    }
+
+    qemu_put_byte(f, QEMU_VM_EOF);
+    qemu_fflush(f);
+
+    return 0;
+}
+
 void qemu_savevm_live_state(QEMUFile *f)
 {
     /* save QEMU_VM_SECTION_END section */
@@ -2443,6 +2470,42 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
     return 0;
 }
 
+int qemu_remote_loadvm(QEMUFile *f)
+{
+    uint8_t section_type;
+    int ret = 0;
+
+    qemu_mutex_lock_iothread();
+
+    while (true) {
+        section_type = qemu_get_byte(f);
+
+        if (qemu_file_get_error(f)) {
+            ret = qemu_file_get_error(f);
+            break;
+        }
+
+        switch (section_type) {
+        case QEMU_VM_SECTION_FULL:
+            ret = qemu_loadvm_section_start_full(f, NULL);
+            if (ret < 0) {
+                break;
+            }
+            break;
+        case QEMU_VM_EOF:
+            goto out;
+        default:
+            ret = -EINVAL;
+            goto out;
+        }
+    }
+
+out:
+    qemu_mutex_unlock_iothread();
+
+    return ret;
+}
+
 static int
 qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
 {
-- 
1.8.3.1



  parent reply	other threads:[~2021-07-19 20:03 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-19  6:27 [PATCH RFC 00/19] vfio-user implementation Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 01/19] vfio-user: introduce vfio-user protocol specification Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 02/19] vfio-user: add VFIO base abstract class Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functions Elena Ufimtseva
2021-07-27 16:34   ` Stefan Hajnoczi
2021-07-28 18:08     ` John Johnson
2021-07-29  8:06       ` Stefan Hajnoczi
2021-07-19  6:27 ` [PATCH RFC 04/19] vfio-user: Define type vfio_user_pci_dev_info Elena Ufimtseva
2021-07-28 10:16   ` Stefan Hajnoczi
2021-07-29  0:55     ` John Johnson
2021-07-29  8:22       ` Stefan Hajnoczi
2021-07-19  6:27 ` [PATCH RFC 05/19] vfio-user: connect vfio proxy to remote server Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 06/19] vfio-user: negotiate protocol with " Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 07/19] vfio-user: define vfio-user pci ops Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 08/19] vfio-user: VFIO container setup & teardown Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 09/19] vfio-user: get device info and get irq info Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 10/19] vfio-user: device region read/write Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 11/19] vfio-user: get region and DMA map/unmap operations Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 12/19] vfio-user: probe remote device's BARs Elena Ufimtseva
2021-07-19 22:59   ` Alex Williamson
2021-07-20  1:39     ` John Johnson
2021-07-20  3:01       ` Alex Williamson
2021-07-19  6:27 ` [PATCH RFC 13/19] vfio-user: respond to remote DMA read/write requests Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 14/19] vfio_user: setup MSI/X interrupts and PCI config operations Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 15/19] vfio-user: vfio user device realize Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 16/19] vfio-user: pci reset Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 17/19] vfio-user: probe remote device ROM BAR Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 18/19] vfio-user: migration support Elena Ufimtseva
2021-07-19  6:27 ` [PATCH RFC 19/19] vfio-user: add migration cli options and version negotiation Elena Ufimtseva
2021-07-19 20:00 ` [PATCH RFC server 00/11] vfio-user server in QEMU Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 01/11] vfio-user: build library Jagannathan Raman
2021-07-19 20:24     ` John Levon
2021-07-20 12:06       ` Jag Raman
2021-07-20 12:20         ` Marc-André Lureau
2021-07-20 13:09           ` John Levon
2021-07-19 20:00   ` [PATCH RFC server 02/11] vfio-user: define vfio-user object Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 03/11] vfio-user: instantiate vfio-user context Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 04/11] vfio-user: find and init PCI device Jagannathan Raman
2021-07-26 15:05     ` John Levon
2021-07-28 17:08       ` Jag Raman
2021-07-19 20:00   ` [PATCH RFC server 05/11] vfio-user: run vfio-user context Jagannathan Raman
2021-07-20 14:17     ` Thanos Makatos
2021-08-13 14:51       ` Jag Raman
2021-08-16 12:52         ` John Levon
2021-08-16 14:10           ` Jag Raman
2021-07-19 20:00   ` [PATCH RFC server 06/11] vfio-user: handle PCI config space accesses Jagannathan Raman
2021-07-26 15:10     ` John Levon
2021-07-19 20:00   ` [PATCH RFC server 07/11] vfio-user: handle DMA mappings Jagannathan Raman
2021-07-20 14:38     ` Thanos Makatos
2021-07-19 20:00   ` [PATCH RFC server 08/11] vfio-user: handle PCI BAR accesses Jagannathan Raman
2021-07-19 20:00   ` [PATCH RFC server 09/11] vfio-user: handle device interrupts Jagannathan Raman
2021-07-19 20:00   ` Jagannathan Raman [this message]
2021-07-20 14:05     ` [PATCH RFC server 10/11] vfio-user: register handlers to facilitate migration Thanos Makatos
2021-07-19 20:00   ` [PATCH RFC server 11/11] vfio-user: acceptance test Jagannathan Raman
2021-07-20 16:12     ` Thanos Makatos

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2b9b94e7b5e1bcf77c0b900bc0a5488db4910833.1626722742.git.jag.raman@oracle.com \
    --to=jag.raman@oracle.com \
    --cc=alex.williamson@redhat.com \
    --cc=elena.ufimtseva@oracle.com \
    --cc=john.g.johnson@oracle.com \
    --cc=john.levon@nutanix.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=swapnil.ingle@nutanix.com \
    --cc=thanos.makatos@nutanix.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).