Hi

On Wed, Jul 7, 2021 at 9:45 PM Steve Sistare <steven.sistare@oracle.com> wrote:
Provide the cprsave and cprload functions for live update.  These save and
restore VM state, with minimal guest pause time, so that qemu may be updated
to a new version in between.

cprsave stops the VM and saves vmstate to an ordinary file.  It supports any
type of guest image and block device, but the caller must not modify guest
block devices between cprsave and cprload.

cprsave supports several modes, the first of which is reboot.  In this mode,
the caller invokes cprsave and then terminates qemu.  The caller may then
update the host kernel and system software and reboot.  The caller resumes
the guest by running qemu with the same arguments as the original process
and invoking cprload.  To use this mode, guest ram must be mapped to a
persistent shared memory file such as /dev/dax0.0 or /dev/shm PKRAM.

The reboot mode supports vfio devices if the caller first suspends the
guest, such as by issuing guest-suspend-ram to the qemu guest agent.  The
guest drivers' suspend methods flush outstanding requests and re-initialize
the devices, and thus there is no device state to save and restore.

cprload loads state from the file.  If the VM was running at cprsave time,
then VM execution resumes.  If the VM was suspended at cprsave time, then
the caller must issue a system_wakeup command to resume.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 MAINTAINERS               |   7 +++
 include/migration/cpr.h   |  17 ++++++
 include/sysemu/runstate.h |   1 +
 migration/cpr.c           | 149 ++++++++++++++++++++++++++++++++++++++++++++++
 migration/meson.build     |   1 +
 migration/savevm.h        |   2 +
 softmmu/runstate.c        |  21 ++++++-
 7 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 include/migration/cpr.h
 create mode 100644 migration/cpr.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 684142e..c3573aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2858,6 +2858,13 @@ F: net/colo*
 F: net/filter-rewriter.c
 F: net/filter-mirror.c

+CPR
+M: Steve Sistare <steven.sistare@oracle.com>
+M: Mark Kanda <mark.kanda@oracle.com>
+S: Maintained
+F: include/migration/cpr.h
+F: migration/cpr.c
+
 Record/replay
 M: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
 R: Paolo Bonzini <pbonzini@redhat.com>
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
new file mode 100644
index 0000000..bffee19
--- /dev/null
+++ b/include/migration/cpr.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef MIGRATION_CPR_H
+#define MIGRATION_CPR_H
+
+#include "qapi/qapi-types-cpr.h"
+
+void cprsave(const char *file, CprMode mode, Error **errp);

I'd rather use "path" or "filename".

+void cprexec(strList *args, Error **errp);
+void cprload(const char *file, Error **errp);

same

It's recommended to return a bool value TRUE for success.
(see include/qapi/error.h)

+
+#endif
diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index a535691..ed4b735 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -51,6 +51,7 @@ void qemu_system_reset_request(ShutdownCause reason);
 void qemu_system_suspend_request(void);
 void qemu_register_suspend_notifier(Notifier *notifier);
 bool qemu_wakeup_suspend_enabled(void);
+void qemu_system_start_on_wake_request(void);

I suggest introducing the function in a preliminary commit.

Also for consistency with the rest of symbols, use "wakeup".

 void qemu_system_wakeup_request(WakeupReason reason, Error **errp);
 void qemu_system_wakeup_enable(WakeupReason reason, bool enabled);
 void qemu_register_wakeup_notifier(Notifier *notifier);
diff --git a/migration/cpr.c b/migration/cpr.c
new file mode 100644
index 0000000..c5bad8a
--- /dev/null
+++ b/migration/cpr.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "monitor/monitor.h"
+#include "migration.h"
+#include "migration/snapshot.h"
+#include "chardev/char.h"
+#include "migration/misc.h"
+#include "migration/cpr.h"
+#include "migration/global_state.h"
+#include "qemu-file-channel.h"
+#include "qemu-file.h"
+#include "savevm.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+#include "io/channel-buffer.h"
+#include "io/channel-file.h"
+#include "sysemu/cpu-timers.h"
+#include "sysemu/runstate.h"
+#include "sysemu/runstate-action.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/replay.h"
+#include "sysemu/xen.h"
+#include "hw/vfio/vfio-common.h"
+#include "hw/virtio/vhost.h"
+
+QEMUFile *qf_file_open(const char *path, int flags, int mode,
+                              const char *name, Error **errp)

None of our functions have qf_ prefix. We are not very consistent with QEMUFile functions, but I suggest to spell it out qemu_file_open().

Also, it should probably be in migration/qemu-file.c.

+{

I'd ERRP_GUARD on every function with an errp argument.

+    QIOChannelFile *fioc;

Let's not miss an opportunity to use g_auto
    g_autoptr(QIOChannelFile) fioc = NULL;

+    QIOChannel *ioc;
+    QEMUFile *f;
+
+    if (flags & O_RDWR) {
+        error_setg(errp, "qf_file_open %s: O_RDWR not supported", path);
+        return 0;
+    }
+
+    fioc = qio_channel_file_new_path(path, flags, mode, errp);
+    if (!fioc) {
+        return 0;
+    }
+
+    ioc = QIO_CHANNEL(fioc);
+    qio_channel_set_name(ioc, name);
+    f = (flags & O_WRONLY) ? qemu_fopen_channel_output(ioc) :
+                             qemu_fopen_channel_input(ioc);
 +    object_unref(OBJECT(fioc));
 
With g_auto, can be removed, and value returned directly.

+    return f;
+}
+
+void cprsave(const char *file, CprMode mode, Error **errp)
+{
+    int ret;
+    QEMUFile *f;
+    int saved_vm_running = runstate_is_running();
+
+    if (mode == CPR_MODE_REBOOT && qemu_ram_volatile(errp)) {
+        return;
+    }
+
+    if (migrate_colo_enabled()) {
+        error_setg(errp, "error: cprsave does not support x-colo");

Remove error:

+        return;
+    }
+
+    if (replay_mode != REPLAY_MODE_NONE) {
+        error_setg(errp, "error: cprsave does not support replay");

same

+        return;
+    }
+
+    f = qf_file_open(file, O_CREAT | O_WRONLY | O_TRUNC, 0600, "cprsave", errp);
+    if (!f) {
+        return;
+    }
+
+    if (global_state_store()) {
+        error_setg(errp, "Error saving global state");
+        qemu_fclose(f);
+        return;
+    }

Could be called before opening cprsave file?

+    if (runstate_check(RUN_STATE_SUSPENDED)) {
+        /* Update timers_state before saving.  Suspend did not so do. */
+        cpu_disable_ticks();
+    }
+    vm_stop(RUN_STATE_SAVE_VM);
+
+    ret = qemu_save_device_state(f);
+    qemu_fclose(f);
+    if (ret < 0) {
+        error_setg(errp, "Error %d while saving VM state", ret);
+        goto err;

Needless goto / labels.
 
+    }
+
+    goto done;
+
+err:
+    if (saved_vm_running) {
+        vm_start();
+    }
+done:
+    return;
+}
+
+void cprload(const char *file, Error **errp)
+{
+    QEMUFile *f;
+    int ret;
+    RunState state;
+
+    if (runstate_is_running()) {
+        error_setg(errp, "cprload called for a running VM");
+        return;
+    }
+
+    f = qf_file_open(file, O_RDONLY, 0, "cprload", errp);
+    if (!f) {
+        return;
+    }
+
+    if (qemu_get_be32(f) != QEMU_VM_FILE_MAGIC ||
+        qemu_get_be32(f) != QEMU_VM_FILE_VERSION) {
+        error_setg(errp, "error: %s is not a vmstate file", file);

f is leaked

+        return;
+    }
+
+    ret = qemu_load_device_state(f);
+    qemu_fclose(f);
+    if (ret < 0) {
+        error_setg(errp, "Error %d while loading VM state", ret);
+        return;
+    }
+
+    state = global_state_get_runstate();
+    if (state == RUN_STATE_RUNNING) {
+        vm_start();
+    } else {
+        runstate_set(state);
+        if (runstate_check(RUN_STATE_SUSPENDED)) {
+            qemu_system_start_on_wake_request();
+        }
+    }
+}
diff --git a/migration/meson.build b/migration/meson.build
index f8714dc..fd59281 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -15,6 +15,7 @@ softmmu_ss.add(files(
   'channel.c',
   'colo-failover.c',
   'colo.c',
+  'cpr.c',
   'exec.c',
   'fd.c',
   'global_state.c',
diff --git a/migration/savevm.h b/migration/savevm.h
index 6461342..ce5d710 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -67,5 +67,7 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
 int qemu_load_device_state(QEMUFile *f);
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
         bool in_postcopy, bool inactivate_disks);
+QEMUFile *qf_file_open(const char *path, int flags, int mode,
+                       const char *name, Error **errp);

 #endif
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 10d9b73..7fe4967 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -115,6 +115,8 @@ static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING },
     { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE },
     { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
+    { RUN_STATE_PRELAUNCH, RUN_STATE_SUSPENDED },
+    { RUN_STATE_PRELAUNCH, RUN_STATE_PAUSED },

     { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING },
     { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED },
@@ -335,6 +337,7 @@ void vm_state_notify(bool running, RunState state)
     }
 }

+static bool start_on_wake_requested;
 static ShutdownCause reset_requested;
 static ShutdownCause shutdown_requested;
 static int shutdown_signal;
@@ -562,6 +565,11 @@ void qemu_register_suspend_notifier(Notifier *notifier)
     notifier_list_add(&suspend_notifiers, notifier);
 }

+void qemu_system_start_on_wake_request(void)
+{
+    start_on_wake_requested = true;
+}
+
 void qemu_system_wakeup_request(WakeupReason reason, Error **errp)
 {
     trace_system_wakeup_request(reason);
@@ -574,7 +582,18 @@ void qemu_system_wakeup_request(WakeupReason reason, Error **errp)
     if (!(wakeup_reason_mask & (1 << reason))) {
         return;
     }
-    runstate_set(RUN_STATE_RUNNING);
+
+    /*
+     * Must call vm_start if it has never been called, to invoke the state
+     * change callbacks for the first time.
+     */
+    if (start_on_wake_requested) {
+        start_on_wake_requested = false;
+        vm_start();
+    } else {
+        runstate_set(RUN_STATE_RUNNING);
+    }
+
     wakeup_reason = reason;
     qemu_notify_event();
 }
--
1.8.3.1




--
Marc-André Lureau