Hi

On Wed, Jul 7, 2021 at 9:31 PM Steve Sistare <steven.sistare@oracle.com> wrote:
Provide the cprsave restart mode, which preserves the guest VM across a
restart of the qemu process.  After cprsave, the caller passes qemu
command-line arguments to cprexec, which directly exec's the new qemu
binary.  The arguments must include -S so new qemu starts in a paused state.
The caller resumes the guest by calling cprload.

To use the restart mode, qemu must be started with the memfd-alloc machine
option.  The memfd's are saved to the environment and kept open across exec,
after which they are found from the environment and re-mmap'd.  Hence guest
ram is preserved in place, albeit with new virtual addresses in the qemu
process.

The restart mode supports vfio devices in a subsequent patch.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>

What's the plan to make it work with -object memory-backend-memfd -machine memory-backend? (or memory-backend-file, I guess that should work?)

There should be some extra checks before accepting cprexec() on a misconfigured VM.

---
 migration/cpr.c   | 21 +++++++++++++++++++++
 softmmu/physmem.c |  6 +++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/migration/cpr.c b/migration/cpr.c
index c5bad8a..fb57dec 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -29,6 +29,7 @@
 #include "sysemu/xen.h"
 #include "hw/vfio/vfio-common.h"
 #include "hw/virtio/vhost.h"
+#include "qemu/env.h"

 QEMUFile *qf_file_open(const char *path, int flags, int mode,
                               const char *name, Error **errp)
@@ -108,6 +109,26 @@ done:
     return;
 }

+static int preserve_fd(const char *name, const char *val, void *handle)
+{
+    qemu_clr_cloexec(atoi(val));
+    return 0;
+}
+
+void cprexec(strList *args, Error **errp)
+{
+    if (xen_enabled()) {
+        error_setg(errp, "xen does not support cprexec");
+        return;
+    }
+    if (!runstate_check(RUN_STATE_SAVE_VM)) {
+        error_setg(errp, "runstate is not save-vm");
+        return;
+    }
+    walkenv(FD_PREFIX, preserve_fd, 0);

I am  not convinced that relying on environment variables here is the best thing to do.

+    qemu_system_exec_request(args);
+}
+
 void cprload(const char *file, Error **errp)
 {
     QEMUFile *f;
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index b149250..8a65ef7 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -65,6 +65,7 @@
 #include "qemu/pmem.h"

 #include "qemu/memfd.h"
+#include "qemu/env.h"
 #include "migration/vmstate.h"

 #include "qemu/range.h"
@@ -1986,7 +1987,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
         } else {
             name = memory_region_name(new_block->mr);
             if (ms->memfd_alloc) {


-                int mfd = -1;          /* placeholder until next patch */
+                int mfd = getenv_fd(name);
                 mr->align = QEMU_VMALLOC_ALIGN;
                 if (mfd < 0) {
                     mfd = qemu_memfd_create(name, maxlen + mr->align,
@@ -1994,7 +1995,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
                     if (mfd < 0) {
                         return;
                     }
+                    setenv_fd(name, mfd);
                 }
+                qemu_clr_cloexec(mfd);

Why clear it now, and on exec again?

                 new_block->flags |= RAM_SHARED;
                 addr = file_ram_alloc(new_block, maxlen, mfd,
                                       false, false, 0, errp);
@@ -2246,6 +2249,7 @@ void qemu_ram_free(RAMBlock *block)
     }

     qemu_mutex_lock_ramlist();
+    unsetenv_fd(memory_region_name(block->mr));
     QLIST_REMOVE_RCU(block, next);
     ram_list.mru_block = NULL;
     /* Write list before version */
--
1.8.3.1




--
Marc-André Lureau