From: Jagannathan Raman <jag.raman@oracle.com>
To: qemu-devel@nongnu.org
Cc: elena.ufimtseva@oracle.com, john.g.johnson@oracle.com,
jag.raman@oracle.com, swapnil.ingle@nutanix.com,
john.levon@nutanix.com, alex.williamson@redhat.com,
stefanha@redhat.com, thanos.makatos@nutanix.com
Subject: [PATCH RFC server 10/11] vfio-user: register handlers to facilitate migration
Date: Mon, 19 Jul 2021 16:00:12 -0400 [thread overview]
Message-ID: <2b9b94e7b5e1bcf77c0b900bc0a5488db4910833.1626722742.git.jag.raman@oracle.com> (raw)
In-Reply-To: <cover.1626722742.git.jag.raman@oracle.com>
Store and load the device's state using handlers for live migration
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
migration/savevm.h | 2 +
hw/remote/vfio-user-obj.c | 287 ++++++++++++++++++++++++++++++++++++++++++++++
migration/savevm.c | 63 ++++++++++
3 files changed, 352 insertions(+)
diff --git a/migration/savevm.h b/migration/savevm.h
index 6461342..71d1733 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -67,5 +67,7 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
int qemu_load_device_state(QEMUFile *f);
int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
bool in_postcopy, bool inactivate_disks);
+int qemu_remote_savevm(QEMUFile *f);
+int qemu_remote_loadvm(QEMUFile *f);
#endif
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index d2a2e51..5948576 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -44,6 +44,10 @@
#include "hw/boards.h"
#include "hw/remote/iohub.h"
#include "hw/remote/machine.h"
+#include "migration/qemu-file.h"
+#include "migration/savevm.h"
+#include "migration/global_state.h"
+#include "block/block.h"
#include "libvfio-user/include/libvfio-user.h"
@@ -73,6 +77,31 @@ struct VfuObject {
PCIDevice *pci_dev;
QemuThread vfu_ctx_thread;
+
+ /*
+ * vfu_mig_buf holds the migration data. In the remote process, this
+ * buffer replaces the role of an IO channel which links the source
+ * and the destination.
+ *
+ * Whenever the client QEMU process initiates migration, the libvfio-user
+ * library notifies that to this server. The remote/server QEMU sets up a
+ * QEMUFile object using this buffer as backend. The remote passes this
+ * object to its migration subsystem, and it slirps the VMSDs of all its
+ * devices and stores them in this buffer.
+ *
+ * libvfio-user library subsequetly asks the remote for any data that needs
+ * to be moved over to the destination using its vfu_migration_callbacks_t
+ * APIs. The remote hands over this buffer as data at this time.
+ *
+ * A reverse of this process happens at the destination.
+ */
+ uint8_t *vfu_mig_buf;
+
+ uint64_t vfu_mig_buf_size;
+
+ uint64_t vfu_mig_buf_pending;
+
+ QEMUFile *vfu_mig_file;
};
static void vfu_object_set_socket(Object *obj, const char *str, Error **errp)
@@ -97,6 +126,226 @@ static void vfu_object_set_devid(Object *obj, const char *str, Error **errp)
trace_vfu_prop("devid", str);
}
+/**
+ * Migration helper functions
+ *
+ * vfu_mig_buf_read & vfu_mig_buf_write are used by QEMU's migration
+ * subsystem - qemu_remote_savevm & qemu_remote_loadvm. savevm/loadvm
+ * call these functions via QEMUFileOps to save/load the VMSD of all
+ * the devices into vfu_mig_buf
+ *
+ */
+static ssize_t vfu_mig_buf_read(void *opaque, uint8_t *buf, int64_t pos,
+ size_t size, Error **errp)
+{
+ VfuObject *o = opaque;
+
+ if (pos > o->vfu_mig_buf_size) {
+ size = 0;
+ } else if ((pos + size) > o->vfu_mig_buf_size) {
+ size = o->vfu_mig_buf_size;
+ }
+
+ memcpy(buf, (o->vfu_mig_buf + pos), size);
+
+ o->vfu_mig_buf_size -= size;
+
+ return size;
+}
+
+static ssize_t vfu_mig_buf_write(void *opaque, struct iovec *iov, int iovcnt,
+ int64_t pos, Error **errp)
+{
+ VfuObject *o = opaque;
+ uint64_t end = pos + iov_size(iov, iovcnt);
+ int i;
+
+ if (end > o->vfu_mig_buf_size) {
+ o->vfu_mig_buf = g_realloc(o->vfu_mig_buf, end);
+ }
+
+ for (i = 0; i < iovcnt; i++) {
+ memcpy((o->vfu_mig_buf + o->vfu_mig_buf_size), iov[i].iov_base,
+ iov[i].iov_len);
+ o->vfu_mig_buf_size += iov[i].iov_len;
+ o->vfu_mig_buf_pending += iov[i].iov_len;
+ }
+
+ return iov_size(iov, iovcnt);
+}
+
+static int vfu_mig_buf_shutdown(void *opaque, bool rd, bool wr, Error **errp)
+{
+ VfuObject *o = opaque;
+
+ o->vfu_mig_buf_size = 0;
+
+ g_free(o->vfu_mig_buf);
+
+ return 0;
+}
+
+static const QEMUFileOps vfu_mig_fops_save = {
+ .writev_buffer = vfu_mig_buf_write,
+ .shut_down = vfu_mig_buf_shutdown,
+};
+
+static const QEMUFileOps vfu_mig_fops_load = {
+ .get_buffer = vfu_mig_buf_read,
+ .shut_down = vfu_mig_buf_shutdown,
+};
+
+/**
+ * handlers for vfu_migration_callbacks_t
+ *
+ * The libvfio-user library accesses these handlers to drive the migration
+ * at the remote end, and also to transport the data stored in vfu_mig_buf
+ *
+ */
+static void vfu_mig_state_precopy(vfu_ctx_t *vfu_ctx)
+{
+ VfuObject *o = vfu_get_private(vfu_ctx);
+ int ret;
+
+ if (!o->vfu_mig_file) {
+ o->vfu_mig_file = qemu_fopen_ops(o, &vfu_mig_fops_save);
+ }
+
+ global_state_store();
+
+ ret = qemu_remote_savevm(o->vfu_mig_file);
+ if (ret) {
+ qemu_file_shutdown(o->vfu_mig_file);
+ return;
+ }
+
+ qemu_fflush(o->vfu_mig_file);
+
+ bdrv_inactivate_all();
+}
+
+static void vfu_mig_state_running(vfu_ctx_t *vfu_ctx)
+{
+ VfuObject *o = vfu_get_private(vfu_ctx);
+ Error *local_err = NULL;
+ int ret;
+
+ ret = qemu_remote_loadvm(o->vfu_mig_file);
+ if (ret) {
+ error_setg(&error_abort, "vfu: failed to restore device state");
+ return;
+ }
+
+ bdrv_invalidate_cache_all(&local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return;
+ }
+
+ vm_start();
+}
+
+static int vfu_mig_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state)
+{
+ switch (state) {
+ case VFU_MIGR_STATE_RESUME:
+ case VFU_MIGR_STATE_STOP_AND_COPY:
+ case VFU_MIGR_STATE_STOP:
+ break;
+ case VFU_MIGR_STATE_PRE_COPY:
+ vfu_mig_state_precopy(vfu_ctx);
+ break;
+ case VFU_MIGR_STATE_RUNNING:
+ if (!runstate_is_running()) {
+ vfu_mig_state_running(vfu_ctx);
+ }
+ break;
+ default:
+ warn_report("vfu: Unknown migration state %d", state);
+ }
+
+ return 0;
+}
+
+static uint64_t vfu_mig_get_pending_bytes(vfu_ctx_t *vfu_ctx)
+{
+ VfuObject *o = vfu_get_private(vfu_ctx);
+
+ return o->vfu_mig_buf_pending;
+}
+
+static int vfu_mig_prepare_data(vfu_ctx_t *vfu_ctx, uint64_t *offset,
+ uint64_t *size)
+{
+ VfuObject *o = vfu_get_private(vfu_ctx);
+
+ if (offset) {
+ *offset = 0;
+ }
+
+ if (size) {
+ *size = o->vfu_mig_buf_size;
+ }
+
+ return 0;
+}
+
+static ssize_t vfu_mig_read_data(vfu_ctx_t *vfu_ctx, void *buf,
+ uint64_t size, uint64_t offset)
+{
+ VfuObject *o = vfu_get_private(vfu_ctx);
+
+ if (offset > o->vfu_mig_buf_size) {
+ return -1;
+ }
+
+ if ((offset + size) > o->vfu_mig_buf_size) {
+ warn_report("vfu: buffer overflow - check pending_bytes");
+ size = o->vfu_mig_buf_size - offset;
+ }
+
+ memcpy(buf, (o->vfu_mig_buf + offset), size);
+
+ o->vfu_mig_buf_pending -= size;
+
+ return size;
+}
+
+static ssize_t vfu_mig_write_data(vfu_ctx_t *vfu_ctx, void *data,
+ uint64_t size, uint64_t offset)
+{
+ VfuObject *o = vfu_get_private(vfu_ctx);
+ uint64_t end = offset + size;
+
+ if (end > o->vfu_mig_buf_size) {
+ o->vfu_mig_buf = g_realloc(o->vfu_mig_buf, end);
+ o->vfu_mig_buf_size = end;
+ }
+
+ memcpy((o->vfu_mig_buf + offset), data, size);
+
+ if (!o->vfu_mig_file) {
+ o->vfu_mig_file = qemu_fopen_ops(o, &vfu_mig_fops_load);
+ }
+
+ return size;
+}
+
+static int vfu_mig_data_written(vfu_ctx_t *vfu_ctx, uint64_t count)
+{
+ return 0;
+}
+
+static const vfu_migration_callbacks_t vfu_mig_cbs = {
+ .version = VFU_MIGR_CALLBACKS_VERS,
+ .transition = &vfu_mig_transition,
+ .get_pending_bytes = &vfu_mig_get_pending_bytes,
+ .prepare_data = &vfu_mig_prepare_data,
+ .read_data = &vfu_mig_read_data,
+ .data_written = &vfu_mig_data_written,
+ .write_data = &vfu_mig_write_data,
+};
+
static void *vfu_object_ctx_run(void *opaque)
{
VfuObject *o = opaque;
@@ -332,6 +581,7 @@ static void vfu_object_machine_done(Notifier *notifier, void *data)
{
VfuObject *o = container_of(notifier, VfuObject, machine_done);
DeviceState *dev = NULL;
+ size_t migr_area_size;
int ret;
o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket, 0,
@@ -391,6 +641,35 @@ static void vfu_object_machine_done(Notifier *notifier, void *data)
return;
}
+ /*
+ * TODO: The 0x20000 number used below is a temporary. We are working on
+ * a cleaner fix for this.
+ *
+ * The libvfio-user library assumes that the remote knows the size of
+ * the data to be migrated at boot time, but that is not the case with
+ * VMSDs, as it can contain a variable-size buffer. 0x20000 is used
+ * as a sufficiently large buffer to demonstrate migration, but that
+ * cannot be used as a solution.
+ *
+ */
+ ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_MIGR_REGION_IDX,
+ 0x20000, NULL,
+ VFU_REGION_FLAG_RW, NULL, 0, -1, 0);
+ if (ret < 0) {
+ error_setg(&error_abort, "vfu: Failed to register migration BAR %s- %s",
+ o->devid, strerror(errno));
+ return;
+ }
+
+ migr_area_size = vfu_get_migr_register_area_size();
+ ret = vfu_setup_device_migration_callbacks(o->vfu_ctx, &vfu_mig_cbs,
+ migr_area_size);
+ if (ret < 0) {
+ error_setg(&error_abort, "vfu: Failed to setup migration %s- %s",
+ o->devid, strerror(errno));
+ return;
+ }
+
qemu_thread_create(&o->vfu_ctx_thread, "VFU ctx runner", vfu_object_ctx_run,
o, QEMU_THREAD_JOINABLE);
}
@@ -412,6 +691,14 @@ static void vfu_object_init(Object *obj)
o->machine_done.notify = vfu_object_machine_done;
qemu_add_machine_init_done_notifier(&o->machine_done);
+
+ o->vfu_mig_file = NULL;
+
+ o->vfu_mig_buf = NULL;
+
+ o->vfu_mig_buf_size = 0;
+
+ o->vfu_mig_buf_pending = 0;
}
static void vfu_object_finalize(Object *obj)
diff --git a/migration/savevm.c b/migration/savevm.c
index 72848b9..c2279af 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1603,6 +1603,33 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
return ret;
}
+int qemu_remote_savevm(QEMUFile *f)
+{
+ SaveStateEntry *se;
+ int ret;
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (!se->vmsd || !vmstate_save_needed(se->vmsd, se->opaque)) {
+ continue;
+ }
+
+ save_section_header(f, se, QEMU_VM_SECTION_FULL);
+
+ ret = vmstate_save(f, se, NULL);
+ if (ret) {
+ qemu_file_set_error(f, ret);
+ return ret;
+ }
+
+ save_section_footer(f, se);
+ }
+
+ qemu_put_byte(f, QEMU_VM_EOF);
+ qemu_fflush(f);
+
+ return 0;
+}
+
void qemu_savevm_live_state(QEMUFile *f)
{
/* save QEMU_VM_SECTION_END section */
@@ -2443,6 +2470,42 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
return 0;
}
+int qemu_remote_loadvm(QEMUFile *f)
+{
+ uint8_t section_type;
+ int ret = 0;
+
+ qemu_mutex_lock_iothread();
+
+ while (true) {
+ section_type = qemu_get_byte(f);
+
+ if (qemu_file_get_error(f)) {
+ ret = qemu_file_get_error(f);
+ break;
+ }
+
+ switch (section_type) {
+ case QEMU_VM_SECTION_FULL:
+ ret = qemu_loadvm_section_start_full(f, NULL);
+ if (ret < 0) {
+ break;
+ }
+ break;
+ case QEMU_VM_EOF:
+ goto out;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+out:
+ qemu_mutex_unlock_iothread();
+
+ return ret;
+}
+
static int
qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
{
--
1.8.3.1
next prev parent reply other threads:[~2021-07-19 20:03 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-19 6:27 [PATCH RFC 00/19] vfio-user implementation Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 01/19] vfio-user: introduce vfio-user protocol specification Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 02/19] vfio-user: add VFIO base abstract class Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 03/19] vfio-user: define VFIO Proxy and communication functions Elena Ufimtseva
2021-07-27 16:34 ` Stefan Hajnoczi
2021-07-28 18:08 ` John Johnson
2021-07-29 8:06 ` Stefan Hajnoczi
2021-07-19 6:27 ` [PATCH RFC 04/19] vfio-user: Define type vfio_user_pci_dev_info Elena Ufimtseva
2021-07-28 10:16 ` Stefan Hajnoczi
2021-07-29 0:55 ` John Johnson
2021-07-29 8:22 ` Stefan Hajnoczi
2021-07-19 6:27 ` [PATCH RFC 05/19] vfio-user: connect vfio proxy to remote server Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 06/19] vfio-user: negotiate protocol with " Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 07/19] vfio-user: define vfio-user pci ops Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 08/19] vfio-user: VFIO container setup & teardown Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 09/19] vfio-user: get device info and get irq info Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 10/19] vfio-user: device region read/write Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 11/19] vfio-user: get region and DMA map/unmap operations Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 12/19] vfio-user: probe remote device's BARs Elena Ufimtseva
2021-07-19 22:59 ` Alex Williamson
2021-07-20 1:39 ` John Johnson
2021-07-20 3:01 ` Alex Williamson
2021-07-19 6:27 ` [PATCH RFC 13/19] vfio-user: respond to remote DMA read/write requests Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 14/19] vfio_user: setup MSI/X interrupts and PCI config operations Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 15/19] vfio-user: vfio user device realize Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 16/19] vfio-user: pci reset Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 17/19] vfio-user: probe remote device ROM BAR Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 18/19] vfio-user: migration support Elena Ufimtseva
2021-07-19 6:27 ` [PATCH RFC 19/19] vfio-user: add migration cli options and version negotiation Elena Ufimtseva
2021-07-19 20:00 ` [PATCH RFC server 00/11] vfio-user server in QEMU Jagannathan Raman
2021-07-19 20:00 ` [PATCH RFC server 01/11] vfio-user: build library Jagannathan Raman
2021-07-19 20:24 ` John Levon
2021-07-20 12:06 ` Jag Raman
2021-07-20 12:20 ` Marc-André Lureau
2021-07-20 13:09 ` John Levon
2021-07-19 20:00 ` [PATCH RFC server 02/11] vfio-user: define vfio-user object Jagannathan Raman
2021-07-19 20:00 ` [PATCH RFC server 03/11] vfio-user: instantiate vfio-user context Jagannathan Raman
2021-07-19 20:00 ` [PATCH RFC server 04/11] vfio-user: find and init PCI device Jagannathan Raman
2021-07-26 15:05 ` John Levon
2021-07-28 17:08 ` Jag Raman
2021-07-19 20:00 ` [PATCH RFC server 05/11] vfio-user: run vfio-user context Jagannathan Raman
2021-07-20 14:17 ` Thanos Makatos
2021-08-13 14:51 ` Jag Raman
2021-08-16 12:52 ` John Levon
2021-08-16 14:10 ` Jag Raman
2021-07-19 20:00 ` [PATCH RFC server 06/11] vfio-user: handle PCI config space accesses Jagannathan Raman
2021-07-26 15:10 ` John Levon
2021-07-19 20:00 ` [PATCH RFC server 07/11] vfio-user: handle DMA mappings Jagannathan Raman
2021-07-20 14:38 ` Thanos Makatos
2021-07-19 20:00 ` [PATCH RFC server 08/11] vfio-user: handle PCI BAR accesses Jagannathan Raman
2021-07-19 20:00 ` [PATCH RFC server 09/11] vfio-user: handle device interrupts Jagannathan Raman
2021-07-19 20:00 ` Jagannathan Raman [this message]
2021-07-20 14:05 ` [PATCH RFC server 10/11] vfio-user: register handlers to facilitate migration Thanos Makatos
2021-07-19 20:00 ` [PATCH RFC server 11/11] vfio-user: acceptance test Jagannathan Raman
2021-07-20 16:12 ` Thanos Makatos
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2b9b94e7b5e1bcf77c0b900bc0a5488db4910833.1626722742.git.jag.raman@oracle.com \
--to=jag.raman@oracle.com \
--cc=alex.williamson@redhat.com \
--cc=elena.ufimtseva@oracle.com \
--cc=john.g.johnson@oracle.com \
--cc=john.levon@nutanix.com \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
--cc=swapnil.ingle@nutanix.com \
--cc=thanos.makatos@nutanix.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).