All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0 of 6] libxl: refactor suspend/resume code
@ 2012-01-31  1:05 rshriram
  2012-01-31  1:05 ` [PATCH 1 of 6] libxl: helper function to send commands to traditional qemu rshriram
                   ` (5 more replies)
  0 siblings, 6 replies; 30+ messages in thread
From: rshriram @ 2012-01-31  1:05 UTC (permalink / raw)
  To: xen-devel; +Cc: anthony.perard, brendan, ian.jackson, ian.campbell

This patch series refactors the suspend/resume code to minimize 
Remus specific code in libxl. There are a couple of trivial bug
fixes too.

Shriram

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 1 of 6] libxl: helper function to send commands to traditional qemu
  2012-01-31  1:05 [PATCH 0 of 6] libxl: refactor suspend/resume code rshriram
@ 2012-01-31  1:05 ` rshriram
  2012-01-31  9:46   ` Ian Campbell
  2012-01-31  1:05 ` [PATCH 2 of 6] libxl: bugfix: create_domain() return to caller if !daemonize rshriram
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 30+ messages in thread
From: rshriram @ 2012-01-31  1:05 UTC (permalink / raw)
  To: xen-devel; +Cc: anthony.perard, brendan, ian.jackson, ian.campbell

# HG changeset patch
# User Shriram Rajagopalan <rshriram@cs.ubc.ca>
# Date 1327971493 28800
# Node ID 20bbc4a754a701ef14c9136a1adffc1c90bc1f4a
# Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
libxl: helper function to send commands to traditional qemu

Introduce a helper function to send commands to traditional
qemu. qemu_pci_add_xenstore, qemu_pci_remove_xenstore,
libxl__domain_save_device_model and libxl_domain_unpause have
been refactored to use this function.

Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>

diff -r e2722b24dc09 -r 20bbc4a754a7 tools/libxl/libxl.c
--- a/tools/libxl/libxl.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/tools/libxl/libxl.c	Mon Jan 30 16:58:13 2012 -0800
@@ -517,7 +517,7 @@ int libxl_domain_unpause(libxl_ctx *ctx,
         path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
         state = libxl__xs_read(gc, XBT_NULL, path);
         if (state != NULL && !strcmp(state, "paused")) {
-            libxl__xs_write(gc, XBT_NULL, libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", domid), "continue");
+            libxl__qemu_traditional_cmd(gc, domid, "continue");
             libxl__wait_for_device_model(gc, domid, "running",
                                          NULL, NULL, NULL);
         }
diff -r e2722b24dc09 -r 20bbc4a754a7 tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/tools/libxl/libxl_dom.c	Mon Jan 30 16:58:13 2012 -0800
@@ -349,6 +349,15 @@ out:
     return rc;
 }
 
+int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
+                                const char *cmd)
+{
+    char *path = NULL;
+    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
+                          domid);
+    return libxl__xs_write(gc, XBT_NULL, path, "%s", cmd);
+}
+
 int libxl__domain_restore_common(libxl__gc *gc, uint32_t domid,
                                  libxl_domain_build_info *info,
                                  libxl__domain_build_state *state,
@@ -631,12 +640,9 @@ int libxl__domain_save_device_model(libx
 
     switch (libxl__device_model_version_running(gc, domid)) {
     case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
-        char *path = NULL;
         LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
                    "Saving device model state to %s", filename);
-        path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
-                              domid);
-        libxl__xs_write(gc, XBT_NULL, path, "save");
+        libxl__qemu_traditional_cmd(gc, domid, "save");
         libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
         break;
     }
diff -r e2722b24dc09 -r 20bbc4a754a7 tools/libxl/libxl_internal.h
--- a/tools/libxl/libxl_internal.h	Thu Jan 26 17:43:31 2012 +0000
+++ b/tools/libxl/libxl_internal.h	Mon Jan 30 16:58:13 2012 -0800
@@ -263,6 +263,8 @@ _hidden int libxl__build_hvm(libxl__gc *
               libxl_device_model_info *dm_info,
               libxl__domain_build_state *state);
 
+_hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
+                                        const char *cmd);
 _hidden int libxl__domain_rename(libxl__gc *gc, uint32_t domid,
                                  const char *old_name, const char *new_name,
                                  xs_transaction_t trans);
diff -r e2722b24dc09 -r 20bbc4a754a7 tools/libxl/libxl_pci.c
--- a/tools/libxl/libxl_pci.c	Thu Jan 26 17:43:31 2012 +0000
+++ b/tools/libxl/libxl_pci.c	Mon Jan 30 16:58:13 2012 -0800
@@ -602,9 +602,8 @@ static int qemu_pci_add_xenstore(libxl__
         libxl__xs_write(gc, XBT_NULL, path, PCI_BDF, pcidev->domain,
                         pcidev->bus, pcidev->dev, pcidev->func);
     }
-    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
-                          domid);
-    xs_write(ctx->xsh, XBT_NULL, path, "pci-ins", strlen("pci-ins"));
+
+    libxl__qemu_traditional_cmd(gc, domid, "pci-ins");
     rc = libxl__wait_for_device_model(gc, domid, NULL, NULL,
                                       pci_ins_check, state);
     path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter",
@@ -857,12 +856,11 @@ static int qemu_pci_remove_xenstore(libx
     path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter", domid);
     libxl__xs_write(gc, XBT_NULL, path, PCI_BDF, pcidev->domain,
                     pcidev->bus, pcidev->dev, pcidev->func);
-    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", domid);
 
     /* Remove all functions at once atomically by only signalling
      * device-model for function 0 */
     if ( !force && (pcidev->vdevfn & 0x7) == 0 ) {
-        xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem"));
+        libxl__qemu_traditional_cmd(gc, domid, "pci-rem");
         if (libxl__wait_for_device_model(gc, domid, "pci-removed",
                                          NULL, NULL, NULL) < 0) {
             LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Device Model didn't respond in time");

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 2 of 6] libxl: bugfix: create_domain() return to caller if !daemonize
  2012-01-31  1:05 [PATCH 0 of 6] libxl: refactor suspend/resume code rshriram
  2012-01-31  1:05 ` [PATCH 1 of 6] libxl: helper function to send commands to traditional qemu rshriram
@ 2012-01-31  1:05 ` rshriram
  2012-01-31  9:47   ` Ian Campbell
  2012-01-31  1:05 ` [PATCH 3 of 6] libxl: QMP stop/resume & refactor QEMU suspend/resume/save rshriram
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 30+ messages in thread
From: rshriram @ 2012-01-31  1:05 UTC (permalink / raw)
  To: xen-devel; +Cc: anthony.perard, brendan, ian.jackson, ian.campbell

# HG changeset patch
# User Shriram Rajagopalan <rshriram@cs.ubc.ca>
# Date 1327971504 28800
# Node ID 39f63438767a8225a3148a43139c10f55a62c669
# Parent  20bbc4a754a701ef14c9136a1adffc1c90bc1f4a
libxl: bugfix: create_domain() return to caller if !daemonize

Currently the create_domain function does not honor
the daemonize flag properly. It exits irrespective of
the value of the flag. This patch fixes the issue.

Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>

diff -r 20bbc4a754a7 -r 39f63438767a tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:13 2012 -0800
+++ b/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:24 2012 -0800
@@ -1814,7 +1814,7 @@ waitpid_out:
      * If we have daemonized then do not return to the caller -- this has
      * already happened in the parent.
      */
-    if ( !need_daemon )
+    if ( daemonize && !need_daemon )
         exit(ret);
 
     return ret;

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 3 of 6] libxl: QMP stop/resume & refactor QEMU suspend/resume/save
  2012-01-31  1:05 [PATCH 0 of 6] libxl: refactor suspend/resume code rshriram
  2012-01-31  1:05 ` [PATCH 1 of 6] libxl: helper function to send commands to traditional qemu rshriram
  2012-01-31  1:05 ` [PATCH 2 of 6] libxl: bugfix: create_domain() return to caller if !daemonize rshriram
@ 2012-01-31  1:05 ` rshriram
  2012-01-31  9:54   ` Ian Campbell
  2012-01-31  1:05 ` [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume rshriram
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 30+ messages in thread
From: rshriram @ 2012-01-31  1:05 UTC (permalink / raw)
  To: xen-devel; +Cc: anthony.perard, brendan, ian.jackson, ian.campbell

# HG changeset patch
# User Shriram Rajagopalan <rshriram@cs.ubc.ca>
# Date 1327971511 28800
# Node ID cd3d43d88c0568142dd061e744c34479e1a440f4
# Parent  39f63438767a8225a3148a43139c10f55a62c669
libxl: QMP stop/resume & refactor QEMU suspend/resume/save

Implement QMP stop and resume functionality and split
device model save into 3 parts:
 suspend_dm(domid)
 save_dm(domid, fd)
 resume_dm(domid)

Integrate Device model suspend into suspend_common_callback

Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>

diff -r 39f63438767a -r cd3d43d88c05 tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c	Mon Jan 30 16:58:24 2012 -0800
+++ b/tools/libxl/libxl_dom.c	Mon Jan 30 16:58:31 2012 -0800
@@ -425,6 +425,61 @@ static int libxl__domain_suspend_common_
     return rc ? 0 : 1;
 }
 
+int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid)
+{
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+    int ret = 0, fd2 = -1;
+    const char *filename = libxl__device_model_savefile(gc, domid);
+
+    switch (libxl__device_model_version_running(gc, domid)) {
+    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
+        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
+                   "Saving device model state to %s", filename);
+        libxl__qemu_traditional_cmd(gc, domid, "save");
+        libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
+        break;
+    }
+    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
+        if (libxl__qmp_stop(gc, domid))
+            return ERROR_FAIL;
+        fd2 = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
+        if (fd2 < 0) {
+            LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
+                             "Unable to create a QEMU save file\n");
+            return ERROR_FAIL;
+        }
+        /* Save DM state into fd2 */
+        ret = libxl__qmp_migrate(gc, domid, fd2);
+        if (ret)
+            unlink(filename);
+        close(fd2);
+        fd2 = -1;
+        break;
+    default:
+        return ERROR_INVAL;
+    }
+
+    return ret;
+}
+
+int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid)
+{
+
+    switch (libxl__device_model_version_running(gc, domid)) {
+    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
+        libxl__qemu_traditional_cmd(gc, domid, "continue");
+        break;
+    }
+    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
+        if (libxl__qmp_resume(gc, domid))
+            return ERROR_FAIL;
+    default:
+        return ERROR_INVAL;
+    }
+
+    return 0;
+}
+
 static int libxl__domain_suspend_common_callback(void *data)
 {
     struct suspendinfo *si = data;
@@ -454,7 +509,7 @@ static int libxl__domain_suspend_common_
             return 0;
         }
         si->guest_responded = 1;
-        return 1;
+        goto suspend_dm;
     }
 
     if (si->hvm && (!hvm_pvdrv || hvm_s_state)) {
@@ -532,7 +587,7 @@ static int libxl__domain_suspend_common_
             shutdown_reason = (info.flags >> XEN_DOMINF_shutdownshift) & XEN_DOMINF_shutdownmask;
             if (shutdown_reason == SHUTDOWN_suspend) {
                 LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest has suspended");
-                return 1;
+                goto suspend_dm;
             }
         }
 
@@ -541,6 +596,17 @@ static int libxl__domain_suspend_common_
 
     LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest did not suspend");
     return 0;
+
+ suspend_dm:
+    if (si->hvm) {
+        ret = libxl__domain_suspend_device_model(si->gc, si->domid);
+        if (ret) {
+            LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
+                       "libxl__domain_suspend_device_model failed ret=%d", ret);
+            return 0;
+        }
+    }
+    return 1;
 }
 
 int libxl__domain_suspend_common(libxl__gc *gc, uint32_t domid, int fd,
@@ -638,32 +704,6 @@ int libxl__domain_save_device_model(libx
     struct stat st;
     uint32_t qemu_state_len;
 
-    switch (libxl__device_model_version_running(gc, domid)) {
-    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
-        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
-                   "Saving device model state to %s", filename);
-        libxl__qemu_traditional_cmd(gc, domid, "save");
-        libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
-        break;
-    }
-    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
-        fd2 = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
-        if (fd2 < 0) {
-            LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
-                             "Unable to create a QEMU save file\n");
-            return ERROR_FAIL;
-        }
-        /* Save DM state into fd2 */
-        ret = libxl__qmp_migrate(gc, domid, fd2);
-        if (ret)
-            goto out;
-        close(fd2);
-        fd2 = -1;
-        break;
-    default:
-        return ERROR_INVAL;
-    }
-
     if (stat(filename, &st) < 0)
     {
         LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Unable to stat qemu save file\n");
diff -r 39f63438767a -r cd3d43d88c05 tools/libxl/libxl_internal.h
--- a/tools/libxl/libxl_internal.h	Mon Jan 30 16:58:24 2012 -0800
+++ b/tools/libxl/libxl_internal.h	Mon Jan 30 16:58:31 2012 -0800
@@ -277,6 +277,8 @@ _hidden int libxl__domain_suspend_common
                                          libxl_domain_type type,
                                          int live, int debug);
 _hidden const char *libxl__device_model_savefile(libxl__gc *gc, uint32_t domid);
+_hidden int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid);
+_hidden int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_save_device_model(libxl__gc *gc, uint32_t domid, int fd);
 _hidden void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid);
 
@@ -620,6 +622,10 @@ _hidden int libxl__qmp_query_serial(libx
 _hidden int libxl__qmp_pci_add(libxl__gc *gc, int d, libxl_device_pci *pcidev);
 _hidden int libxl__qmp_pci_del(libxl__gc *gc, int domid,
                                libxl_device_pci *pcidev);
+/* Suspend QEMU. */
+_hidden int libxl__qmp_stop(libxl__gc *gc, int domid);
+/* Resume QEMU. */
+_hidden int libxl__qmp_resume(libxl__gc *gc, int domid);
 /* Save current QEMU state into fd. */
 _hidden int libxl__qmp_migrate(libxl__gc *gc, int domid, int fd);
 /* close and free the QMP handler */
diff -r 39f63438767a -r cd3d43d88c05 tools/libxl/libxl_qmp.c
--- a/tools/libxl/libxl_qmp.c	Mon Jan 30 16:58:24 2012 -0800
+++ b/tools/libxl/libxl_qmp.c	Mon Jan 30 16:58:31 2012 -0800
@@ -878,6 +878,38 @@ out:
     return rc;
 }
 
+int libxl__qmp_stop(libxl__gc *gc, int domid)
+{
+    libxl__qmp_handler *qmp = NULL;
+    int rc = 0;
+
+    qmp = libxl__qmp_initialize(libxl__gc_owner(gc), domid);
+    if (!qmp)
+        return ERROR_FAIL;
+
+    rc = qmp_synchronous_send(qmp, "stop", NULL,
+                              NULL, NULL, qmp->timeout);
+
+    libxl__qmp_close(qmp);
+    return rc;
+}
+
+int libxl__qmp_resume(libxl__gc *gc, int domid)
+{
+    libxl__qmp_handler *qmp = NULL;
+    int rc = 0;
+
+    qmp = libxl__qmp_initialize(libxl__gc_owner(gc), domid);
+    if (!qmp)
+        return ERROR_FAIL;
+
+    rc = qmp_synchronous_send(qmp, "cont", NULL,
+                              NULL, NULL, qmp->timeout);
+
+    libxl__qmp_close(qmp);
+    return rc;
+}
+
 int libxl__qmp_initializations(libxl_ctx *ctx, uint32_t domid)
 {
     libxl__qmp_handler *qmp = NULL;

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-01-31  1:05 [PATCH 0 of 6] libxl: refactor suspend/resume code rshriram
                   ` (2 preceding siblings ...)
  2012-01-31  1:05 ` [PATCH 3 of 6] libxl: QMP stop/resume & refactor QEMU suspend/resume/save rshriram
@ 2012-01-31  1:05 ` rshriram
  2012-01-31 10:00   ` Ian Campbell
  2012-01-31  1:05 ` [PATCH 5 of 6] libxl: refactor migrate_domain and generalize migrate_receive rshriram
  2012-01-31  1:05 ` [PATCH 6 of 6] libxl: resume instead of unpause on xl save -c rshriram
  5 siblings, 1 reply; 30+ messages in thread
From: rshriram @ 2012-01-31  1:05 UTC (permalink / raw)
  To: xen-devel; +Cc: anthony.perard, brendan, ian.jackson, ian.campbell

# HG changeset patch
# User Shriram Rajagopalan <rshriram@cs.ubc.ca>
# Date 1327971527 28800
# Node ID efe92d80c47487485056266a1404a8d2817b7f09
# Parent  cd3d43d88c0568142dd061e744c34479e1a440f4
libxl: support suspend_cancel in domain_resume

Add an extra parameter to libxl_domain_resume indicating
if the caller wishes to use the SUSPEND_CANCEL style
resume instead of the normal resume.

Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>

diff -r cd3d43d88c05 -r efe92d80c474 tools/libxl/libxl.c
--- a/tools/libxl/libxl.c	Mon Jan 30 16:58:31 2012 -0800
+++ b/tools/libxl/libxl.c	Mon Jan 30 16:58:47 2012 -0800
@@ -229,24 +229,29 @@ int libxl_domain_rename(libxl_ctx *ctx, 
     return rc;
 }
 
-int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid)
+int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid, int fast)
 {
     GC_INIT(ctx);
     int rc = 0;
 
-    if (LIBXL__DOMAIN_IS_TYPE(gc,  domid, HVM)) {
-        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Called domain_resume on "
-                "non-cooperative hvm domain %u", domid);
-        rc = ERROR_NI;
-        goto out;
-    }
-    if (xc_domain_resume(ctx->xch, domid, 0)) {
+    if (xc_domain_resume(ctx->xch, domid, fast)) {
         LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
                         "xc_domain_resume failed for domain %u",
                         domid);
         rc = ERROR_FAIL;
         goto out;
     }
+
+    if (LIBXL__DOMAIN_IS_TYPE(gc,  domid, HVM)) {
+        rc = libxl__domain_resume_device_model(gc, domid);
+        if (rc) {
+            LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
+                       "failed to resume device model for domain %u:%d",
+                       domid, rc);
+            goto out;
+        }
+    }
+
     if (!xs_resume_domain(ctx->xsh, domid)) {
         LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
                         "xs_resume_domain failed for domain %u",
diff -r cd3d43d88c05 -r efe92d80c474 tools/libxl/libxl.h
--- a/tools/libxl/libxl.h	Mon Jan 30 16:58:31 2012 -0800
+++ b/tools/libxl/libxl.h	Mon Jan 30 16:58:47 2012 -0800
@@ -268,7 +268,7 @@ int libxl_domain_create_restore(libxl_ct
 void libxl_domain_config_dispose(libxl_domain_config *d_config);
 int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info,
                           uint32_t domid, int fd);
-int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid);
+int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid, int fast);
 int libxl_domain_shutdown(libxl_ctx *ctx, uint32_t domid);
 int libxl_domain_reboot(libxl_ctx *ctx, uint32_t domid);
 int libxl_domain_destroy(libxl_ctx *ctx, uint32_t domid);
diff -r cd3d43d88c05 -r efe92d80c474 tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:31 2012 -0800
+++ b/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:47 2012 -0800
@@ -2751,7 +2751,7 @@ static void migrate_domain(const char *d
         if (common_domname) {
             libxl_domain_rename(ctx, domid, away_domname, common_domname);
         }
-        rc = libxl_domain_resume(ctx, domid);
+        rc = libxl_domain_resume(ctx, domid, 1);
         if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n");
 
         fprintf(stderr, "Migration failed due to problems at target.\n");
@@ -2773,7 +2773,7 @@ static void migrate_domain(const char *d
     close(send_fd);
     migration_child_report(child, recv_fd);
     fprintf(stderr, "Migration failed, resuming at sender.\n");
-    libxl_domain_resume(ctx, domid);
+    libxl_domain_resume(ctx, domid, 1);
     exit(-ERROR_FAIL);
 
  failed_badly:

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 5 of 6] libxl: refactor migrate_domain and generalize migrate_receive
  2012-01-31  1:05 [PATCH 0 of 6] libxl: refactor suspend/resume code rshriram
                   ` (3 preceding siblings ...)
  2012-01-31  1:05 ` [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume rshriram
@ 2012-01-31  1:05 ` rshriram
  2012-01-31 10:19   ` Ian Campbell
  2012-01-31  1:05 ` [PATCH 6 of 6] libxl: resume instead of unpause on xl save -c rshriram
  5 siblings, 1 reply; 30+ messages in thread
From: rshriram @ 2012-01-31  1:05 UTC (permalink / raw)
  To: xen-devel; +Cc: anthony.perard, brendan, ian.jackson, ian.campbell

# HG changeset patch
# User Shriram Rajagopalan <rshriram@cs.ubc.ca>
# Date 1327971533 28800
# Node ID d79c7a853c644d459cda93bf61657be48104cd63
# Parent  efe92d80c47487485056266a1404a8d2817b7f09
libxl: refactor migrate_domain and generalize migrate_receive

Refactor some tasks like establishing the migration channel,
initial migration protocol exchange into separate functions,
to facilitate re-use, when remus support is introduced. Also,
make migrate_receive generic (instead of resorting to stdin and
stdout as the file descriptors for communication).

Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>

diff -r efe92d80c474 -r d79c7a853c64 tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:47 2012 -0800
+++ b/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:53 2012 -0800
@@ -2531,6 +2531,43 @@ static int save_domain(const char *p, co
     exit(0);
 }
 
+static pid_t create_migration_child(const char *rune, int *send_fd,
+                                        int *recv_fd)
+{
+    int sendpipe[2], recvpipe[2];
+    pid_t child = -1;
+
+    if (!rune || !send_fd || !recv_fd)
+        return -1;
+
+    MUST( libxl_pipe(ctx, sendpipe) );
+    MUST( libxl_pipe(ctx, recvpipe) );
+
+    child = libxl_fork(ctx);
+    if (child==-1) exit(1);
+
+    if (!child) {
+        dup2(sendpipe[0], 0);
+        dup2(recvpipe[1], 1);
+        close(sendpipe[0]); close(sendpipe[1]);
+        close(recvpipe[0]); close(recvpipe[1]);
+        execlp("sh","sh","-c",rune,(char*)0);
+        perror("failed to exec sh");
+        exit(-1);
+    }
+
+    close(sendpipe[0]);
+    close(recvpipe[1]);
+    *send_fd = sendpipe[1];
+    *recv_fd = recvpipe[0];
+
+    /* if receiver dies, we get an error and can clean up
+       rather than just dying */
+    signal(SIGPIPE, SIG_IGN);
+
+    return child;
+}
+
 static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz,
                                      const char *what, const char *rune) {
     char buf[msgsz];
@@ -2616,18 +2653,23 @@ static void migration_child_report(pid_t
     migration_child = 0;
 }
 
-static void migrate_domain(const char *domain_spec, const char *rune,
-                           const char *override_config_file)
+/* It is okay to have a NULL rune (we could be communicating over tcp sockets
+ * but not both NULL rune and NULL(send_fd, recv_fd).
+ */
+static void migrate_do_preamble(const char *domain_spec, const char *rune,
+                                const char *override_config_file,
+                                int *send_fd, int *recv_fd, pid_t *mchild)
 {
-    pid_t child = -1;
-    int rc;
-    int sendpipe[2], recvpipe[2];
-    int send_fd, recv_fd;
-    libxl_domain_suspend_info suspinfo;
-    char *away_domname;
-    char rc_buf;
+    int rc = 0;
     uint8_t *config_data;
     int config_len;
+    pid_t child = -1;
+
+    if (!send_fd || !recv_fd) {
+        fprintf(stderr, "Cannot create migration channel:"
+                "Null file descriptors supplied!\n");
+        exit(1);
+    }
 
     save_domain_core_begin(domain_spec, override_config_file,
                            &config_data, &config_len);
@@ -2638,43 +2680,44 @@ static void migrate_domain(const char *d
         exit(1);
     }
 
-    MUST( libxl_pipe(ctx, sendpipe) );
-    MUST( libxl_pipe(ctx, recvpipe) );
-
-    child = libxl_fork(ctx);
-    if (child==-1) exit(1);
-
-    if (!child) {
-        dup2(sendpipe[0], 0);
-        dup2(recvpipe[1], 1);
-        close(sendpipe[0]); close(sendpipe[1]);
-        close(recvpipe[0]); close(recvpipe[1]);
-        execlp("sh","sh","-c",rune,(char*)0);
-        perror("failed to exec sh");
-        exit(-1);
-    }
-
-    close(sendpipe[0]);
-    close(recvpipe[1]);
-    send_fd = sendpipe[1];
-    recv_fd = recvpipe[0];
-
-    signal(SIGPIPE, SIG_IGN);
-    /* if receiver dies, we get an error and can clean up
-       rather than just dying */
-
-    rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_banner,
+    if (*send_fd < 0 || *recv_fd < 0) {
+        if (rune)
+            child = create_migration_child(rune, send_fd, recv_fd);
+        if (child < 0) {
+            fprintf(stderr, "failed to create migration channel"
+                    " - empty command ?\n");
+            exit(1);
+        }
+    }
+
+    rc = migrate_read_fixedmessage(*recv_fd, migrate_receiver_banner,
                                    sizeof(migrate_receiver_banner)-1,
                                    "banner", rune);
     if (rc) {
-        close(send_fd);
-        migration_child_report(child, recv_fd);
+        close(*send_fd);
+        migration_child_report(child, *recv_fd);
         exit(-rc);
     }
 
-    save_domain_core_writeconfig(send_fd, "migration stream",
+    save_domain_core_writeconfig(*send_fd, "migration stream",
                                  config_data, config_len);
-
+    if (mchild)
+        *mchild = child;
+    return;
+}
+
+static void migrate_domain(const char *domain_spec, const char *rune,
+                           const char *override_config_file)
+{
+    int rc;
+    int send_fd = -1, recv_fd = -1;
+    libxl_domain_suspend_info suspinfo;
+    char *away_domname;
+    char rc_buf;
+    pid_t child = -1;
+
+    migrate_do_preamble(domain_spec, rune, override_config_file,
+                        &send_fd, &recv_fd, &child);
     xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0);
 
     memset(&suspinfo, 0, sizeof(suspinfo));
@@ -2798,7 +2841,12 @@ static void core_dump_domain(const char 
     if (rc) { fprintf(stderr,"core dump failed (rc=%d)\n",rc);exit(-1); }
 }
 
-static void migrate_receive(int debug, int daemonize, int monitor)
+/* Explicitly specifying a send & recv fd allows us to switch to a tcp socket
+ * based migration/replication channel in future, instead of exec/forking from
+ * an ssh channel.
+ */
+static void migrate_receive(int debug, int daemonize, int monitor,
+                            int send_fd, int recv_fd)
 {
     int rc, rc2;
     char rc_buf;
@@ -2810,7 +2858,7 @@ static void migrate_receive(int debug, i
 
     fprintf(stderr, "migration target: Ready to receive domain.\n");
 
-    CHK_ERRNO( libxl_write_exactly(ctx, 1,
+    CHK_ERRNO( libxl_write_exactly(ctx, send_fd,
                                    migrate_receiver_banner,
                                    sizeof(migrate_receiver_banner)-1,
                                    "migration ack stream",
@@ -2822,7 +2870,7 @@ static void migrate_receive(int debug, i
     dom_info.monitor = monitor;
     dom_info.paused = 1;
     dom_info.restore_file = "incoming migration stream";
-    dom_info.migrate_fd = 0; /* stdin */
+    dom_info.migrate_fd = recv_fd;
     dom_info.migration_domname_r = &migration_domname;
     dom_info.no_incr_generationid = 1;
 
@@ -2836,13 +2884,13 @@ static void migrate_receive(int debug, i
     fprintf(stderr, "migration target: Transfer complete,"
             " requesting permission to start domain.\n");
 
-    rc = libxl_write_exactly(ctx, 1,
+    rc = libxl_write_exactly(ctx, send_fd,
                              migrate_receiver_ready,
                              sizeof(migrate_receiver_ready),
                              "migration ack stream", "ready message");
     if (rc) exit(-rc);
 
-    rc = migrate_read_fixedmessage(0, migrate_permission_to_go,
+    rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go,
                                    sizeof(migrate_permission_to_go),
                                    "GO message", 0);
     if (rc) goto perhaps_destroy_notify_rc;
@@ -2861,7 +2909,7 @@ static void migrate_receive(int debug, i
     rc = 0;
 
  perhaps_destroy_notify_rc:
-    rc2 = libxl_write_exactly(ctx, 1,
+    rc2 = libxl_write_exactly(ctx, send_fd,
                               migrate_report, sizeof(migrate_report),
                               "migration ack stream",
                               "success/failure report");
@@ -2869,7 +2917,7 @@ static void migrate_receive(int debug, i
 
     rc_buf = -rc;
     assert(!!rc_buf == !!rc);
-    rc2 = libxl_write_exactly(ctx, 1, &rc_buf, 1,
+    rc2 = libxl_write_exactly(ctx, send_fd, &rc_buf, 1,
                               "migration ack stream",
                               "success/failure code");
     if (rc2) exit(-ERROR_BADFAIL);
@@ -2887,7 +2935,7 @@ static void migrate_receive(int debug, i
         fprintf(stderr, "migration target: Cleanup OK, granting sender"
                 " permission to resume.\n");
 
-        rc2 = libxl_write_exactly(ctx, 1,
+        rc2 = libxl_write_exactly(ctx, send_fd,
                                   migrate_permission_to_go,
                                   sizeof(migrate_permission_to_go),
                                   "migration ack stream",
@@ -2983,7 +3031,8 @@ int main_migrate_receive(int argc, char 
         help("migrate-receive");
         return 2;
     }
-    migrate_receive(debug, daemonize, monitor);
+    migrate_receive(debug, daemonize, monitor,
+                    /* write to stdout */1, /* read from stdin */0);
     return 0;
 }

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 6 of 6] libxl: resume instead of unpause on xl save -c
  2012-01-31  1:05 [PATCH 0 of 6] libxl: refactor suspend/resume code rshriram
                   ` (4 preceding siblings ...)
  2012-01-31  1:05 ` [PATCH 5 of 6] libxl: refactor migrate_domain and generalize migrate_receive rshriram
@ 2012-01-31  1:05 ` rshriram
  2012-01-31 10:21   ` Ian Campbell
  5 siblings, 1 reply; 30+ messages in thread
From: rshriram @ 2012-01-31  1:05 UTC (permalink / raw)
  To: xen-devel; +Cc: anthony.perard, brendan, ian.jackson, ian.campbell

# HG changeset patch
# User Shriram Rajagopalan <rshriram@cs.ubc.ca>
# Date 1327971541 28800
# Node ID ffc99e708e90eb140b0a6f2e7087d567e71e9d0f
# Parent  d79c7a853c644d459cda93bf61657be48104cd63
libxl: resume instead of unpause on xl save -c

The guest is "suspended" via libxl_domain_suspend when taking a snapshot.
So call libxl_domain_resume instead of libxl_domain_unpause, when taking
a checkpoint of the domain (using xl save -c).

Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>

diff -r d79c7a853c64 -r ffc99e708e90 tools/libxl/xl_cmdimpl.c
--- a/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:53 2012 -0800
+++ b/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:59:01 2012 -0800
@@ -2524,7 +2524,7 @@ static int save_domain(const char *p, co
     close(fd);
 
     if (checkpoint)
-        libxl_domain_unpause(ctx, domid);
+        libxl_domain_resume(ctx, domid, 1);
     else
         libxl_domain_destroy(ctx, domid);

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1 of 6] libxl: helper function to send commands to traditional qemu
  2012-01-31  1:05 ` [PATCH 1 of 6] libxl: helper function to send commands to traditional qemu rshriram
@ 2012-01-31  9:46   ` Ian Campbell
  2012-01-31 17:30     ` Shriram Rajagopalan
  0 siblings, 1 reply; 30+ messages in thread
From: Ian Campbell @ 2012-01-31  9:46 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson

On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> # Date 1327971493 28800
> # Node ID 20bbc4a754a701ef14c9136a1adffc1c90bc1f4a
> # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
> libxl: helper function to send commands to traditional qemu
> 
> Introduce a helper function to send commands to traditional
> qemu. qemu_pci_add_xenstore, qemu_pci_remove_xenstore,
> libxl__domain_save_device_model and libxl_domain_unpause have
> been refactored to use this function.
> 
> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>

Acked-by: Ian Campbell <ian.campbell@citrix.com>

Every caller to libxl__qemu_traditional_cmd seems to be followed with a
call to libxl__wait_for_device_model -- might be worth pushing that down
into the function?

> 
> diff -r e2722b24dc09 -r 20bbc4a754a7 tools/libxl/libxl.c
> --- a/tools/libxl/libxl.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/tools/libxl/libxl.c	Mon Jan 30 16:58:13 2012 -0800
> @@ -517,7 +517,7 @@ int libxl_domain_unpause(libxl_ctx *ctx,
>          path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
>          state = libxl__xs_read(gc, XBT_NULL, path);
>          if (state != NULL && !strcmp(state, "paused")) {
> -            libxl__xs_write(gc, XBT_NULL, libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", domid), "continue");
> +            libxl__qemu_traditional_cmd(gc, domid, "continue");
>              libxl__wait_for_device_model(gc, domid, "running",
>                                           NULL, NULL, NULL);
>          }
> diff -r e2722b24dc09 -r 20bbc4a754a7 tools/libxl/libxl_dom.c
> --- a/tools/libxl/libxl_dom.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/tools/libxl/libxl_dom.c	Mon Jan 30 16:58:13 2012 -0800
> @@ -349,6 +349,15 @@ out:
>      return rc;
>  }
>  
> +int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
> +                                const char *cmd)
> +{
> +    char *path = NULL;
> +    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
> +                          domid);
> +    return libxl__xs_write(gc, XBT_NULL, path, "%s", cmd);
> +}
> +
>  int libxl__domain_restore_common(libxl__gc *gc, uint32_t domid,
>                                   libxl_domain_build_info *info,
>                                   libxl__domain_build_state *state,
> @@ -631,12 +640,9 @@ int libxl__domain_save_device_model(libx
>  
>      switch (libxl__device_model_version_running(gc, domid)) {
>      case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
> -        char *path = NULL;
>          LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
>                     "Saving device model state to %s", filename);
> -        path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
> -                              domid);
> -        libxl__xs_write(gc, XBT_NULL, path, "save");
> +        libxl__qemu_traditional_cmd(gc, domid, "save");
>          libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
>          break;
>      }
> diff -r e2722b24dc09 -r 20bbc4a754a7 tools/libxl/libxl_internal.h
> --- a/tools/libxl/libxl_internal.h	Thu Jan 26 17:43:31 2012 +0000
> +++ b/tools/libxl/libxl_internal.h	Mon Jan 30 16:58:13 2012 -0800
> @@ -263,6 +263,8 @@ _hidden int libxl__build_hvm(libxl__gc *
>                libxl_device_model_info *dm_info,
>                libxl__domain_build_state *state);
>  
> +_hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
> +                                        const char *cmd);
>  _hidden int libxl__domain_rename(libxl__gc *gc, uint32_t domid,
>                                   const char *old_name, const char *new_name,
>                                   xs_transaction_t trans);
> diff -r e2722b24dc09 -r 20bbc4a754a7 tools/libxl/libxl_pci.c
> --- a/tools/libxl/libxl_pci.c	Thu Jan 26 17:43:31 2012 +0000
> +++ b/tools/libxl/libxl_pci.c	Mon Jan 30 16:58:13 2012 -0800
> @@ -602,9 +602,8 @@ static int qemu_pci_add_xenstore(libxl__
>          libxl__xs_write(gc, XBT_NULL, path, PCI_BDF, pcidev->domain,
>                          pcidev->bus, pcidev->dev, pcidev->func);
>      }
> -    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command",
> -                          domid);
> -    xs_write(ctx->xsh, XBT_NULL, path, "pci-ins", strlen("pci-ins"));
> +
> +    libxl__qemu_traditional_cmd(gc, domid, "pci-ins");
>      rc = libxl__wait_for_device_model(gc, domid, NULL, NULL,
>                                        pci_ins_check, state);
>      path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter",
> @@ -857,12 +856,11 @@ static int qemu_pci_remove_xenstore(libx
>      path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter", domid);
>      libxl__xs_write(gc, XBT_NULL, path, PCI_BDF, pcidev->domain,
>                      pcidev->bus, pcidev->dev, pcidev->func);
> -    path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", domid);
>  
>      /* Remove all functions at once atomically by only signalling
>       * device-model for function 0 */
>      if ( !force && (pcidev->vdevfn & 0x7) == 0 ) {
> -        xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem"));
> +        libxl__qemu_traditional_cmd(gc, domid, "pci-rem");
>          if (libxl__wait_for_device_model(gc, domid, "pci-removed",
>                                           NULL, NULL, NULL) < 0) {
>              LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Device Model didn't respond in time");

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2 of 6] libxl: bugfix: create_domain() return to caller if !daemonize
  2012-01-31  1:05 ` [PATCH 2 of 6] libxl: bugfix: create_domain() return to caller if !daemonize rshriram
@ 2012-01-31  9:47   ` Ian Campbell
  2012-02-09 18:06     ` Ian Jackson
  0 siblings, 1 reply; 30+ messages in thread
From: Ian Campbell @ 2012-01-31  9:47 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson

On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> # Date 1327971504 28800
> # Node ID 39f63438767a8225a3148a43139c10f55a62c669
> # Parent  20bbc4a754a701ef14c9136a1adffc1c90bc1f4a
> libxl: bugfix: create_domain() return to caller if !daemonize
> 
> Currently the create_domain function does not honor
> the daemonize flag properly. It exits irrespective of
> the value of the flag. This patch fixes the issue.
> 
> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>

Acked-by: Ian Campbell <ian.campbell@citrix.com>

> 
> diff -r 20bbc4a754a7 -r 39f63438767a tools/libxl/xl_cmdimpl.c
> --- a/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:13 2012 -0800
> +++ b/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:24 2012 -0800
> @@ -1814,7 +1814,7 @@ waitpid_out:
>       * If we have daemonized then do not return to the caller -- this has
>       * already happened in the parent.
>       */
> -    if ( !need_daemon )
> +    if ( daemonize && !need_daemon )
>          exit(ret);
>  
>      return ret;

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 3 of 6] libxl: QMP stop/resume & refactor QEMU suspend/resume/save
  2012-01-31  1:05 ` [PATCH 3 of 6] libxl: QMP stop/resume & refactor QEMU suspend/resume/save rshriram
@ 2012-01-31  9:54   ` Ian Campbell
  2012-01-31 17:32     ` Shriram Rajagopalan
  0 siblings, 1 reply; 30+ messages in thread
From: Ian Campbell @ 2012-01-31  9:54 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson

On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> # Date 1327971511 28800
> # Node ID cd3d43d88c0568142dd061e744c34479e1a440f4
> # Parent  39f63438767a8225a3148a43139c10f55a62c669
> libxl: QMP stop/resume & refactor QEMU suspend/resume/save
> 
> Implement QMP stop and resume functionality and split
> device model save into 3 parts:
>  suspend_dm(domid)
>  save_dm(domid, fd)
>  resume_dm(domid)
> 
> Integrate Device model suspend into suspend_common_callback
> 
> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
> 
> diff -r 39f63438767a -r cd3d43d88c05 tools/libxl/libxl_dom.c
> --- a/tools/libxl/libxl_dom.c	Mon Jan 30 16:58:24 2012 -0800
> +++ b/tools/libxl/libxl_dom.c	Mon Jan 30 16:58:31 2012 -0800
> @@ -425,6 +425,61 @@ static int libxl__domain_suspend_common_
>      return rc ? 0 : 1;
>  }
>  
> +int libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid)
> +{
> +    libxl_ctx *ctx = libxl__gc_owner(gc);
> +    int ret = 0, fd2 = -1;
> +    const char *filename = libxl__device_model_savefile(gc, domid);
> +
> +    switch (libxl__device_model_version_running(gc, domid)) {
> +    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
> +        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
> +                   "Saving device model state to %s", filename);
> +        libxl__qemu_traditional_cmd(gc, domid, "save");
> +        libxl__wait_for_device_model(gc, domid, "paused", NULL, NULL, NULL);
> +        break;
> +    }
> +    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
> +        if (libxl__qmp_stop(gc, domid))
> +            return ERROR_FAIL;
> +        fd2 = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
> +        if (fd2 < 0) {
> +            LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
> +                             "Unable to create a QEMU save file\n");
> +            return ERROR_FAIL;
> +        }
> +        /* Save DM state into fd2 */
> +        ret = libxl__qmp_migrate(gc, domid, fd2);
> +        if (ret)
> +            unlink(filename);
> +        close(fd2);
> +        fd2 = -1;
> +        break;
> +    default:
> +        return ERROR_INVAL;
> +    }
> +
> +    return ret;
> +}
> +
> +int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid)
> +{
> +
> +    switch (libxl__device_model_version_running(gc, domid)) {
> +    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
> +        libxl__qemu_traditional_cmd(gc, domid, "continue");

No libxl__wait_for_device_model -> "running"?

> +        break;
> +    }
> +    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
> +        if (libxl__qmp_resume(gc, domid))
> +            return ERROR_FAIL;
> +    default:
> +        return ERROR_INVAL;
> +    }
> +
> +    return 0;
> +}
> +
>  static int libxl__domain_suspend_common_callback(void *data)
>  {
>      struct suspendinfo *si = data;
> @@ -454,7 +509,7 @@ static int libxl__domain_suspend_common_
>              return 0;
>          }
>          si->guest_responded = 1;
> -        return 1;
> +        goto suspend_dm;
>      }
>  
>      if (si->hvm && (!hvm_pvdrv || hvm_s_state)) {
> @@ -532,7 +587,7 @@ static int libxl__domain_suspend_common_
>              shutdown_reason = (info.flags >> XEN_DOMINF_shutdownshift) & XEN_DOMINF_shutdownmask;
>              if (shutdown_reason == SHUTDOWN_suspend) {
>                  LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest has suspended");
> -                return 1;
> +                goto suspend_dm;
>              }
>          }
>  
> @@ -541,6 +596,17 @@ static int libxl__domain_suspend_common_
>  
>      LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest did not suspend");
>      return 0;
> +
> + suspend_dm:

The goto's make the code flow a bit tricky to follow (this function is
already a bit tricksy with the early exit for the evtchn suspend case).

Why not pass si to libxl__domain_suspend_device_model and let it contain
the "if !hvm return" and logging stuff so you can just call in in place
of the two goto statements?

> +    if (si->hvm) {
> +        ret = libxl__domain_suspend_device_model(si->gc, si->domid);
> +        if (ret) {
> +            LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
> +                       "libxl__domain_suspend_device_model failed ret=%d", ret);
> +            return 0;
> +        }
> +    }
> +    return 1;
>  }
>  

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-01-31  1:05 ` [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume rshriram
@ 2012-01-31 10:00   ` Ian Campbell
  2012-01-31 10:04     ` Ian Campbell
  2012-01-31 17:52     ` Shriram Rajagopalan
  0 siblings, 2 replies; 30+ messages in thread
From: Ian Campbell @ 2012-01-31 10:00 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson

On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> # Date 1327971527 28800
> # Node ID efe92d80c47487485056266a1404a8d2817b7f09
> # Parent  cd3d43d88c0568142dd061e744c34479e1a440f4
> libxl: support suspend_cancel in domain_resume
> 
> Add an extra parameter to libxl_domain_resume indicating
> if the caller wishes to use the SUSPEND_CANCEL style
> resume instead of the normal resume.
> 
> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
> 
> diff -r cd3d43d88c05 -r efe92d80c474 tools/libxl/libxl.c
> --- a/tools/libxl/libxl.c	Mon Jan 30 16:58:31 2012 -0800
> +++ b/tools/libxl/libxl.c	Mon Jan 30 16:58:47 2012 -0800
> @@ -229,24 +229,29 @@ int libxl_domain_rename(libxl_ctx *ctx, 
>      return rc;
>  }
>  
> -int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid)
> +int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid, int fast)
>  {
>      GC_INIT(ctx);
>      int rc = 0;
>  
> -    if (LIBXL__DOMAIN_IS_TYPE(gc,  domid, HVM)) {
> -        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Called domain_resume on "
> -                "non-cooperative hvm domain %u", domid);
> -        rc = ERROR_NI;
> -        goto out;
> -    }
> -    if (xc_domain_resume(ctx->xch, domid, 0)) {
> +    if (xc_domain_resume(ctx->xch, domid, fast)) {
>          LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
>                          "xc_domain_resume failed for domain %u",
>                          domid);
>          rc = ERROR_FAIL;
>          goto out;
>      }
> +
> +    if (LIBXL__DOMAIN_IS_TYPE(gc,  domid, HVM)) {
> +        rc = libxl__domain_resume_device_model(gc, domid);
> +        if (rc) {
> +            LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
> +                       "failed to resume device model for domain %u:%d",
> +                       domid, rc);
> +            goto out;
> +        }
> +    }
> +
>      if (!xs_resume_domain(ctx->xsh, domid)) {
>          LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR,
>                          "xs_resume_domain failed for domain %u",
> diff -r cd3d43d88c05 -r efe92d80c474 tools/libxl/libxl.h
> --- a/tools/libxl/libxl.h	Mon Jan 30 16:58:31 2012 -0800
> +++ b/tools/libxl/libxl.h	Mon Jan 30 16:58:47 2012 -0800
> @@ -268,7 +268,7 @@ int libxl_domain_create_restore(libxl_ct
>  void libxl_domain_config_dispose(libxl_domain_config *d_config);
>  int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info,
>                            uint32_t domid, int fd);
> -int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid);
> +int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid, int fast);

Please can you add a few words about what fast means and when its use
would be appropriate.

I'm not sure fast is right word -- it happens that this mechanism is
faster but isn't the real meaning "co-operative" or something along
those lines?

Would a better name be libxl_domain_suspend_cancel, sharing a common
helper internally with libxl_domain_resume?

Ian.

>  int libxl_domain_shutdown(libxl_ctx *ctx, uint32_t domid);
>  int libxl_domain_reboot(libxl_ctx *ctx, uint32_t domid);
>  int libxl_domain_destroy(libxl_ctx *ctx, uint32_t domid);
> diff -r cd3d43d88c05 -r efe92d80c474 tools/libxl/xl_cmdimpl.c
> --- a/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:31 2012 -0800
> +++ b/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:47 2012 -0800
> @@ -2751,7 +2751,7 @@ static void migrate_domain(const char *d
>          if (common_domname) {
>              libxl_domain_rename(ctx, domid, away_domname, common_domname);
>          }
> -        rc = libxl_domain_resume(ctx, domid);
> +        rc = libxl_domain_resume(ctx, domid, 1);
>          if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n");
>  
>          fprintf(stderr, "Migration failed due to problems at target.\n");
> @@ -2773,7 +2773,7 @@ static void migrate_domain(const char *d
>      close(send_fd);
>      migration_child_report(child, recv_fd);
>      fprintf(stderr, "Migration failed, resuming at sender.\n");
> -    libxl_domain_resume(ctx, domid);
> +    libxl_domain_resume(ctx, domid, 1);
>      exit(-ERROR_FAIL);
>  
>   failed_badly:

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-01-31 10:00   ` Ian Campbell
@ 2012-01-31 10:04     ` Ian Campbell
  2012-01-31 17:52     ` Shriram Rajagopalan
  1 sibling, 0 replies; 30+ messages in thread
From: Ian Campbell @ 2012-01-31 10:04 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson

On Tue, 2012-01-31 at 10:00 +0000, Ian Campbell wrote:
> On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> > diff -r cd3d43d88c05 -r efe92d80c474 tools/libxl/libxl.h
> > --- a/tools/libxl/libxl.h	Mon Jan 30 16:58:31 2012 -0800
> > +++ b/tools/libxl/libxl.h	Mon Jan 30 16:58:47 2012 -0800
> > @@ -268,7 +268,7 @@ int libxl_domain_create_restore(libxl_ct
> >  void libxl_domain_config_dispose(libxl_domain_config *d_config);
> >  int libxl_domain_suspend(libxl_ctx *ctx, libxl_domain_suspend_info *info,
> >                            uint32_t domid, int fd);
> > -int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid);
> > +int libxl_domain_resume(libxl_ctx *ctx, uint32_t domid, int fast);
> 
> Please can you add a few words about what fast means and when its use
> would be appropriate.
> 
> I'm not sure fast is right word -- it happens that this mechanism is
> faster but isn't the real meaning "co-operative" or something along
> those lines?
> 
> Would a better name be libxl_domain_suspend_cancel, sharing a common
> helper internally with libxl_domain_resume?

BTW, is there any way to tell if the guest even claims to support this
mechanism?

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 5 of 6] libxl: refactor migrate_domain and generalize migrate_receive
  2012-01-31  1:05 ` [PATCH 5 of 6] libxl: refactor migrate_domain and generalize migrate_receive rshriram
@ 2012-01-31 10:19   ` Ian Campbell
  0 siblings, 0 replies; 30+ messages in thread
From: Ian Campbell @ 2012-01-31 10:19 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson

On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> # Date 1327971533 28800
> # Node ID d79c7a853c644d459cda93bf61657be48104cd63
> # Parent  efe92d80c47487485056266a1404a8d2817b7f09
> libxl: refactor migrate_domain and generalize migrate_receive
> 
> Refactor some tasks like establishing the migration channel,
> initial migration protocol exchange into separate functions,
> to facilitate re-use, when remus support is introduced. Also,
> make migrate_receive generic (instead of resorting to stdin and
> stdout as the file descriptors for communication).
> 
> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
> 
> diff -r efe92d80c474 -r d79c7a853c64 tools/libxl/xl_cmdimpl.c
> --- a/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:47 2012 -0800
> +++ b/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:53 2012 -0800
> @@ -2531,6 +2531,43 @@ static int save_domain(const char *p, co
>      exit(0);
>  }
>  
> +static pid_t create_migration_child(const char *rune, int *send_fd,
> +                                        int *recv_fd)
> +{
> +    int sendpipe[2], recvpipe[2];
> +    pid_t child = -1;
> +
> +    if (!rune || !send_fd || !recv_fd)
> +        return -1;
> +
> +    MUST( libxl_pipe(ctx, sendpipe) );
> +    MUST( libxl_pipe(ctx, recvpipe) );
> +
> +    child = libxl_fork(ctx);
> +    if (child==-1) exit(1);
> +
> +    if (!child) {
> +        dup2(sendpipe[0], 0);
> +        dup2(recvpipe[1], 1);
> +        close(sendpipe[0]); close(sendpipe[1]);
> +        close(recvpipe[0]); close(recvpipe[1]);
> +        execlp("sh","sh","-c",rune,(char*)0);
> +        perror("failed to exec sh");
> +        exit(-1);
> +    }
> +
> +    close(sendpipe[0]);
> +    close(recvpipe[1]);
> +    *send_fd = sendpipe[1];
> +    *recv_fd = recvpipe[0];
> +
> +    /* if receiver dies, we get an error and can clean up
> +       rather than just dying */
> +    signal(SIGPIPE, SIG_IGN);
> +
> +    return child;
> +}
> +
>  static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz,
>                                       const char *what, const char *rune) {
>      char buf[msgsz];
> @@ -2616,18 +2653,23 @@ static void migration_child_report(pid_t
>      migration_child = 0;
>  }
>  
> -static void migrate_domain(const char *domain_spec, const char *rune,
> -                           const char *override_config_file)
> +/* It is okay to have a NULL rune (we could be communicating over tcp sockets
> + * but not both NULL rune and NULL(send_fd, recv_fd).
> + */

The first check in the function ("Cannot create migration channel...")
seems to insist that both send_fd and recv_fd must be non-NULL without
reference to rune?

> +static void migrate_do_preamble(const char *domain_spec, const char *rune,
> +                                const char *override_config_file,
> +                                int *send_fd, int *recv_fd, pid_t *mchild)
>  {
> -    pid_t child = -1;
> -    int rc;
> -    int sendpipe[2], recvpipe[2];
> -    int send_fd, recv_fd;
> -    libxl_domain_suspend_info suspinfo;
> -    char *away_domname;
> -    char rc_buf;
> +    int rc = 0;
>      uint8_t *config_data;
>      int config_len;
> +    pid_t child = -1;
> +
> +    if (!send_fd || !recv_fd) {
> +        fprintf(stderr, "Cannot create migration channel:"
> +                "Null file descriptors supplied!\n");
> +        exit(1);
> +    }
>  
>      save_domain_core_begin(domain_spec, override_config_file,
>                             &config_data, &config_len);
> @@ -2638,43 +2680,44 @@ static void migrate_domain(const char *d
[...]
> +    if (*send_fd < 0 || *recv_fd < 0) {
> +        if (rune)
> +            child = create_migration_child(rune, send_fd, recv_fd);
> +        if (child < 0) {
> +            fprintf(stderr, "failed to create migration channel"
> +                    " - empty command ?\n");
> +            exit(1);
> +        }
> +    }

I think the migrate_domain function should contain:
	save_domain_core_begin
	create_migration_child(&rx_fd, &tx_fd) (or int fd[2])
	migrate_do_preamble(rx_fd, tx_fd) 

And that migrate_do_preamble should start at this point:

> +
> +    rc = migrate_read_fixedmessage(*recv_fd, migrate_receiver_banner,
>                                     sizeof(migrate_receiver_banner)-1,
>                                     "banner", rune);

Rather than trying to push the create_migration_child down into
migrate_do_preamble. I think that gives us sufficient building blocks
and shared code to do what you want or to add tcp support later etc.

> @@ -2798,7 +2841,12 @@ static void core_dump_domain(const char 
>      if (rc) { fprintf(stderr,"core dump failed (rc=%d)\n",rc);exit(-1); }
>  }
>  
> -static void migrate_receive(int debug, int daemonize, int monitor)
> +/* Explicitly specifying a send & recv fd allows us to switch to a tcp socket
> + * based migration/replication channel in future, instead of exec/forking from
> + * an ssh channel.
> + */

I don't think you need to justify the use of send_fd/recv_fd in the
comment here, it seems natural to allow the caller to pass in the
communication fds for whatever reason and the commit message is a fine
place to mention your specific reasons for wanting it.

[...]
> @@ -2983,7 +3031,8 @@ int main_migrate_receive(int argc, char 
>          help("migrate-receive");
>          return 2;
>      }
> -    migrate_receive(debug, daemonize, monitor);
> +    migrate_receive(debug, daemonize, monitor,
> +                    /* write to stdout */1, /* read from stdin */0);

unistd.h defines STD{IN,OUT,ERR}_FILENO which would be useful here.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 6 of 6] libxl: resume instead of unpause on xl save -c
  2012-01-31  1:05 ` [PATCH 6 of 6] libxl: resume instead of unpause on xl save -c rshriram
@ 2012-01-31 10:21   ` Ian Campbell
  2012-01-31 17:55     ` Shriram Rajagopalan
  0 siblings, 1 reply; 30+ messages in thread
From: Ian Campbell @ 2012-01-31 10:21 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson

On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> # Date 1327971541 28800
> # Node ID ffc99e708e90eb140b0a6f2e7087d567e71e9d0f
> # Parent  d79c7a853c644d459cda93bf61657be48104cd63
> libxl: resume instead of unpause on xl save -c
> 
> The guest is "suspended" via libxl_domain_suspend when taking a snapshot.
> So call libxl_domain_resume instead of libxl_domain_unpause, when taking
> a checkpoint of the domain (using xl save -c).
> 
> Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>

Does checkpoint imply/require support for this resume mechanism?

> diff -r d79c7a853c64 -r ffc99e708e90 tools/libxl/xl_cmdimpl.c
> --- a/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:58:53 2012 -0800
> +++ b/tools/libxl/xl_cmdimpl.c	Mon Jan 30 16:59:01 2012 -0800
> @@ -2524,7 +2524,7 @@ static int save_domain(const char *p, co
>      close(fd);
>  
>      if (checkpoint)
> -        libxl_domain_unpause(ctx, domid);
> +        libxl_domain_resume(ctx, domid, 1);
>      else
>          libxl_domain_destroy(ctx, domid);
>  

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1 of 6] libxl: helper function to send commands to traditional qemu
  2012-01-31  9:46   ` Ian Campbell
@ 2012-01-31 17:30     ` Shriram Rajagopalan
  2012-02-09 18:08       ` Ian Jackson
  0 siblings, 1 reply; 30+ messages in thread
From: Shriram Rajagopalan @ 2012-01-31 17:30 UTC (permalink / raw)
  To: Ian Campbell; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson


[-- Attachment #1.1: Type: text/plain, Size: 1436 bytes --]

On Tue, Jan 31, 2012 at 1:46 AM, Ian Campbell <Ian.Campbell@citrix.com>wrote:

> On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> > # HG changeset patch
> > # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> > # Date 1327971493 28800
> > # Node ID 20bbc4a754a701ef14c9136a1adffc1c90bc1f4a
> > # Parent  e2722b24dc0962de37215320b05d1bb7c4c42864
> > libxl: helper function to send commands to traditional qemu
> >
> > Introduce a helper function to send commands to traditional
> > qemu. qemu_pci_add_xenstore, qemu_pci_remove_xenstore,
> > libxl__domain_save_device_model and libxl_domain_unpause have
> > been refactored to use this function.
> >
> > Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
>
> Acked-by: Ian Campbell <ian.campbell@citrix.com>
>
> Every caller to libxl__qemu_traditional_cmd seems to be followed with a
> call to libxl__wait_for_device_model -- might be worth pushing that down
> into the function?
>
>
Yes I noticed that. Though some seem to be using the callback while others
dont.
This would necessitate another helper function, like
libxl_qemu_traditional_change_state
that does both the command and the state check (if callback supplied). This
way people could still use the libxl_qemu_traditional_cmd for use-cases
that dont
require a special state check. But all this is for another , later patch
:P. At the moment,
I just want to get the remus framework in :D.

thanks for the ack.

[-- Attachment #1.2: Type: text/html, Size: 2055 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 3 of 6] libxl: QMP stop/resume & refactor QEMU suspend/resume/save
  2012-01-31  9:54   ` Ian Campbell
@ 2012-01-31 17:32     ` Shriram Rajagopalan
  2012-01-31 23:53       ` Shriram Rajagopalan
  0 siblings, 1 reply; 30+ messages in thread
From: Shriram Rajagopalan @ 2012-01-31 17:32 UTC (permalink / raw)
  To: Ian Campbell; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson


[-- Attachment #1.1: Type: text/plain, Size: 2179 bytes --]

On Tue, Jan 31, 2012 at 1:54 AM, Ian Campbell <Ian.Campbell@citrix.com>wrote:

> On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> > +int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid)
> > +{
> > +
> > +    switch (libxl__device_model_version_running(gc, domid)) {
> > +    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
> > +        libxl__qemu_traditional_cmd(gc, domid, "continue");
>
> No libxl__wait_for_device_model -> "running"?
>
>
Nope. Thats how xend/remus also does it. There seems to be no reference to
such
a state anywhere.


>  > +        break;
> > +    }
> > +    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
> > +        if (libxl__qmp_resume(gc, domid))
> > +            return ERROR_FAIL;
> > +    default:
> > +        return ERROR_INVAL;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static int libxl__domain_suspend_common_callback(void *data)
> >  {
> >      struct suspendinfo *si = data;
> > @@ -454,7 +509,7 @@ static int libxl__domain_suspend_common_
> >              return 0;
> >          }
> >          si->guest_responded = 1;
> > -        return 1;
> > +        goto suspend_dm;
> >      }
> >
> >      if (si->hvm && (!hvm_pvdrv || hvm_s_state)) {
> > @@ -532,7 +587,7 @@ static int libxl__domain_suspend_common_
> >              shutdown_reason = (info.flags >> XEN_DOMINF_shutdownshift)
> & XEN_DOMINF_shutdownmask;
> >              if (shutdown_reason == SHUTDOWN_suspend) {
> >                  LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest has
> suspended");
> > -                return 1;
> > +                goto suspend_dm;
> >              }
> >          }
> >
> > @@ -541,6 +596,17 @@ static int libxl__domain_suspend_common_
> >
> >      LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest did not suspend");
> >      return 0;
> > +
> > + suspend_dm:
>
> The goto's make the code flow a bit tricky to follow (this function is
> already a bit tricksy with the early exit for the evtchn suspend case).
>
> Why not pass si to libxl__domain_suspend_device_model and let it contain
> the "if !hvm return" and logging stuff so you can just call in in place
> of the two goto statements?
>
>
will do.

shriram

[-- Attachment #1.2: Type: text/html, Size: 3178 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-01-31 10:00   ` Ian Campbell
  2012-01-31 10:04     ` Ian Campbell
@ 2012-01-31 17:52     ` Shriram Rajagopalan
  2012-02-01 10:47       ` Ian Campbell
  2012-02-01 10:53       ` Ian Campbell
  1 sibling, 2 replies; 30+ messages in thread
From: Shriram Rajagopalan @ 2012-01-31 17:52 UTC (permalink / raw)
  To: Ian Campbell; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson


[-- Attachment #1.1: Type: text/plain, Size: 2267 bytes --]

On Tue, Jan 31, 2012 at 2:00 AM, Ian Campbell <Ian.Campbell@citrix.com>wrote:

> On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:Please can
> you add a few words about what fast means and when its use
> would be appropriate.
>
> I'm not sure fast is right word -- it happens that this mechanism is
> faster but isn't the real meaning "co-operative" or something along
> those lines?
>
> Would a better name be libxl_domain_suspend_cancel, sharing a common
> helper internally with libxl_domain_resume?
>
>
The explanation is already there, on top of xc_domain_resume in libxc.
fast suspend - hvm guest checks for the return code of the shutdown call.
If it returns 1,
  then it just resumes, else shuts down. (similar case for PV guest)

if fast = 0, then the guest is led to believe that it is resuming in a new
host (ie wakeup
from hibernate sorts).

As for guests supporting this fast suspend, almost all guests (pv/hvm
linux/windows)
do support suspend_cancel. IIRC, I think some very old kernels didnt have
this ability.


 Ian.
>
> >  int libxl_domain_shutdown(libxl_ctx *ctx, uint32_t domid);
> >  int libxl_domain_reboot(libxl_ctx *ctx, uint32_t domid);
> >  int libxl_domain_destroy(libxl_ctx *ctx, uint32_t domid);
> > diff -r cd3d43d88c05 -r efe92d80c474 tools/libxl/xl_cmdimpl.c
> > --- a/tools/libxl/xl_cmdimpl.c        Mon Jan 30 16:58:31 2012 -0800
> > +++ b/tools/libxl/xl_cmdimpl.c        Mon Jan 30 16:58:47 2012 -0800
> > @@ -2751,7 +2751,7 @@ static void migrate_domain(const char *d
> >          if (common_domname) {
> >              libxl_domain_rename(ctx, domid, away_domname,
> common_domname);
> >          }
> > -        rc = libxl_domain_resume(ctx, domid);
> > +        rc = libxl_domain_resume(ctx, domid, 1);
> >          if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n");
> >
> >          fprintf(stderr, "Migration failed due to problems at
> target.\n");
> > @@ -2773,7 +2773,7 @@ static void migrate_domain(const char *d
> >      close(send_fd);
> >      migration_child_report(child, recv_fd);
> >      fprintf(stderr, "Migration failed, resuming at sender.\n");
> > -    libxl_domain_resume(ctx, domid);
> > +    libxl_domain_resume(ctx, domid, 1);
> >      exit(-ERROR_FAIL);
> >
> >   failed_badly:
>
>
>

[-- Attachment #1.2: Type: text/html, Size: 3181 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 6 of 6] libxl: resume instead of unpause on xl save -c
  2012-01-31 10:21   ` Ian Campbell
@ 2012-01-31 17:55     ` Shriram Rajagopalan
  0 siblings, 0 replies; 30+ messages in thread
From: Shriram Rajagopalan @ 2012-01-31 17:55 UTC (permalink / raw)
  To: Ian Campbell; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson


[-- Attachment #1.1: Type: text/plain, Size: 1464 bytes --]

On Tue, Jan 31, 2012 at 2:21 AM, Ian Campbell <Ian.Campbell@citrix.com>wrote:

> On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> > # HG changeset patch
> > # User Shriram Rajagopalan <rshriram@cs.ubc.ca>
> > # Date 1327971541 28800
> > # Node ID ffc99e708e90eb140b0a6f2e7087d567e71e9d0f
> > # Parent  d79c7a853c644d459cda93bf61657be48104cd63
> > libxl: resume instead of unpause on xl save -c
> >
> > The guest is "suspended" via libxl_domain_suspend when taking a snapshot.
> > So call libxl_domain_resume instead of libxl_domain_unpause, when taking
> > a checkpoint of the domain (using xl save -c).
> >
> > Signed-off-by: Shriram Rajagopalan <rshriram@cs.ubc.ca>
>
> Does checkpoint imply/require support for this resume mechanism?
>
>
Yes. pause & unpause, suspend & resume - thats the order of calls.
A checkpoint involves
 suspend_guest
  copy out data
 resume_guest

The current libxl code does
 suspend_guest
   copy out data
 unpause_guest



>  > diff -r d79c7a853c64 -r ffc99e708e90 tools/libxl/xl_cmdimpl.c
> > --- a/tools/libxl/xl_cmdimpl.c        Mon Jan 30 16:58:53 2012 -0800
> > +++ b/tools/libxl/xl_cmdimpl.c        Mon Jan 30 16:59:01 2012 -0800
> > @@ -2524,7 +2524,7 @@ static int save_domain(const char *p, co
> >      close(fd);
> >
> >      if (checkpoint)
> > -        libxl_domain_unpause(ctx, domid);
> > +        libxl_domain_resume(ctx, domid, 1);
> >      else
> >          libxl_domain_destroy(ctx, domid);
> >
>
>
>

[-- Attachment #1.2: Type: text/html, Size: 2320 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 3 of 6] libxl: QMP stop/resume & refactor QEMU suspend/resume/save
  2012-01-31 17:32     ` Shriram Rajagopalan
@ 2012-01-31 23:53       ` Shriram Rajagopalan
  2012-02-01 10:46         ` Ian Campbell
  0 siblings, 1 reply; 30+ messages in thread
From: Shriram Rajagopalan @ 2012-01-31 23:53 UTC (permalink / raw)
  To: Ian Campbell; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson


[-- Attachment #1.1: Type: text/plain, Size: 2847 bytes --]

On Tue, Jan 31, 2012 at 9:32 AM, Shriram Rajagopalan <rshriram@cs.ubc.ca>wrote:

> On Tue, Jan 31, 2012 at 1:54 AM, Ian Campbell <Ian.Campbell@citrix.com>wrote:
>
>> On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
>> > +int libxl__domain_resume_device_model(libxl__gc *gc, uint32_t domid)
>> > +{
>> > +
>> > +    switch (libxl__device_model_version_running(gc, domid)) {
>> > +    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
>> > +        libxl__qemu_traditional_cmd(gc, domid, "continue");
>>
>> No libxl__wait_for_device_model -> "running"?
>>
>>
> Nope. Thats how xend/remus also does it. There seems to be no reference to
> such
> a state anywhere.
>
>
>>  > +        break;
>> > +    }
>> > +    case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
>> > +        if (libxl__qmp_resume(gc, domid))
>> > +            return ERROR_FAIL;
>> > +    default:
>> > +        return ERROR_INVAL;
>> > +    }
>> > +
>> > +    return 0;
>> > +}
>> > +
>> >  static int libxl__domain_suspend_common_callback(void *data)
>> >  {
>> >      struct suspendinfo *si = data;
>> > @@ -454,7 +509,7 @@ static int libxl__domain_suspend_common_
>> >              return 0;
>> >          }
>> >          si->guest_responded = 1;
>> > -        return 1;
>> > +        goto suspend_dm;
>> >      }
>> >
>> >      if (si->hvm && (!hvm_pvdrv || hvm_s_state)) {
>> > @@ -532,7 +587,7 @@ static int libxl__domain_suspend_common_
>> >              shutdown_reason = (info.flags >> XEN_DOMINF_shutdownshift)
>> & XEN_DOMINF_shutdownmask;
>> >              if (shutdown_reason == SHUTDOWN_suspend) {
>> >                  LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest has
>> suspended");
>> > -                return 1;
>> > +                goto suspend_dm;
>> >              }
>> >          }
>> >
>> > @@ -541,6 +596,17 @@ static int libxl__domain_suspend_common_
>> >
>> >      LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest did not suspend");
>> >      return 0;
>> > +
>> > + suspend_dm:
>>
>> The goto's make the code flow a bit tricky to follow (this function is
>> already a bit tricksy with the early exit for the evtchn suspend case).
>>
>> Why not pass si to libxl__domain_suspend_device_model and let it contain
>> the "if !hvm return" and logging stuff so you can just call in in place
>> of the two goto statements?
>>
>>
> will do.
>
>
I gave it a shot. Passing suspendinfo struct to suspend_device_model is not
feasible, as the function is declared in libxl_internal.h, but suspendinfo
struct
declaration is local to libxl_dom.c.  I am not sure if its worthwhile to
declare a private struct like suspendinfo in libxl_internal.h, just to get
rid of this goto.

OTOH, passing a dummy hvm parameter to the function looks a bit silly

libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid, int hvm)

What do you think? goto needs to go?

shriram

[-- Attachment #1.2: Type: text/html, Size: 4191 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 3 of 6] libxl: QMP stop/resume & refactor QEMU suspend/resume/save
  2012-01-31 23:53       ` Shriram Rajagopalan
@ 2012-02-01 10:46         ` Ian Campbell
  0 siblings, 0 replies; 30+ messages in thread
From: Ian Campbell @ 2012-02-01 10:46 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson

On Tue, 2012-01-31 at 23:53 +0000, Shriram Rajagopalan wrote:
> On Tue, Jan 31, 2012 at 9:32 AM, Shriram Rajagopalan
> <rshriram@cs.ubc.ca> wrote:
>         On Tue, Jan 31, 2012 at 1:54 AM, Ian Campbell
>         <Ian.Campbell@citrix.com> wrote:
>         
>                 On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca
>                 wrote:
>                 
>                 > +int libxl__domain_resume_device_model(libxl__gc
>                 *gc, uint32_t domid)
>                 > +{
>                 > +
>                 > +    switch (libxl__device_model_version_running(gc,
>                 domid)) {
>                 > +    case
>                 LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL: {
>                 > +        libxl__qemu_traditional_cmd(gc, domid,
>                 "continue");
>                 
>                 
>                 No libxl__wait_for_device_model -> "running"?
>                 
>                 
>         
>         Nope. Thats how xend/remus also does it.

That doesn't mean they are correct. How do you handle the error case
where qemu does not restart correctly?

>          There seems to be no reference to such a state anywhere.

I see several calls to libxl__wait_for_device_model with the parameter
running in the libxl code base:

$ rgrep -E "libxl__wait_for_device_model.*\"running\"" tools/libxl/
tools/libxl/libxl_pci.c:        if (libxl__wait_for_device_model(gc, domid, "running",
tools/libxl/libxl_pci.c:        if (libxl__wait_for_device_model(gc, domid, "running",
tools/libxl/libxl.c:            libxl__wait_for_device_model(gc, domid, "running",

>          [...]
>                 
>                 >
>                 > @@ -541,6 +596,17 @@ static int
>                 libxl__domain_suspend_common_
>                 >
>                 >      LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest did
>                 not suspend");
>                 >      return 0;
>                 > +
>                 > + suspend_dm:
>                 
>                 
>                 The goto's make the code flow a bit tricky to follow
>                 (this function is
>                 already a bit tricksy with the early exit for the
>                 evtchn suspend case).
>                 
>                 Why not pass si to libxl__domain_suspend_device_model
>                 and let it contain
>                 the "if !hvm return" and logging stuff so you can just
>                 call in in place
>                 of the two goto statements?
>                 
>                 
>         
>         will do. 
>         
>         
>         
> 
> I gave it a shot. Passing suspendinfo struct to suspend_device_model
> is not feasible, as the function is declared in libxl_internal.h, but
> suspendinfo struct declaration is local to libxl_dom.c.  I am not sure
> if its worthwhile to declare a private struct like suspendinfo in
> libxl_internal.h, just to get rid of this goto.

I see two choices, you could either have a forward declaration of
suspendinfo in libxl_internal.h or you could make
libxl__domain_suspend_device_model static/internal to libxl_dom.c. I
slightly prefer the latter unless there is some reason to expose it to
the rest of libxl.

The same would go for libxl__domain_resume_device_model apart from the
call from libxl_domain_resume in libxl.c. There's an argument that this
function should be in libxl_dom.c anyway.

> OTOH, passing a dummy hvm parameter to the function looks a bit silly
> 
> libxl__domain_suspend_device_model(libxl__gc *gc, uint32_t domid, int
> hvm)
> 
> 
> What do you think? goto needs to go? 

Taking another look at the control flow perhaps it does make sense, if
you think of the label as the tail of the success case rather than
something specific to suspending the device model then it removes the
two "return success" cases and combines them into a common tail, so long
as all success cases go through here (which I think is the case).

We still have multiple error exit cases, including the one where you
just fall through the loop (which combined with the unusual 0==error
return is a little confusing), but I think we can live with those.

So on second thoughts I think just renaming the label "guest_suspended"
would be fine.

Whether you also want to make
libxl__domain_{suspend,resume}_device_model internal to libxl_dom.c is
up to you.

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-01-31 17:52     ` Shriram Rajagopalan
@ 2012-02-01 10:47       ` Ian Campbell
  2012-02-01 10:53       ` Ian Campbell
  1 sibling, 0 replies; 30+ messages in thread
From: Ian Campbell @ 2012-02-01 10:47 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson

On Tue, 2012-01-31 at 17:52 +0000, Shriram Rajagopalan wrote:
> On Tue, Jan 31, 2012 at 2:00 AM, Ian Campbell
> <Ian.Campbell@citrix.com> wrote:
>         On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca
>         wrote:Please can you add a few words about what fast means and
>         when its use
>         
>         would be appropriate.
>         
>         I'm not sure fast is right word -- it happens that this
>         mechanism is
>         faster but isn't the real meaning "co-operative" or something
>         along
>         those lines?
>         
>         Would a better name be libxl_domain_suspend_cancel, sharing a
>         common
>         helper internally with libxl_domain_resume?
>         
> 
> The explanation is already there, on top of xc_domain_resume in libxc.

It says that fast == cooperative resume but my concern was whether
"fast" is the right word for this parameter. Fast isn't really the
salient property here -- the fact that it requires guest cooperation
(which does happen to make it faster) is.

Anyway, regardless of what it is called it should be explained as part
of the libxl function otherwise users of libxl are never going to see
it.

[...]
> As for guests supporting this fast suspend, almost all guests (pv/hvm
> linux/windows) do support suspend_cancel. IIRC, I think some very old
> kernels didnt have this ability.

Old kernel or not, how do we detect it?

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-01-31 17:52     ` Shriram Rajagopalan
  2012-02-01 10:47       ` Ian Campbell
@ 2012-02-01 10:53       ` Ian Campbell
  2012-02-01 15:48         ` Pasi Kärkkäinen
  1 sibling, 1 reply; 30+ messages in thread
From: Ian Campbell @ 2012-02-01 10:53 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Jackson

On Tue, 2012-01-31 at 17:52 +0000, Shriram Rajagopalan wrote:
> As for guests supporting this fast suspend, almost all guests (pv/hvm
> linux/windows) do support suspend_cancel. IIRC, I think some very old
> kernels didnt have this ability.

Slight aside, does Remus work with mainline Linux domU kernels? Users
seem to be under under that impression.

I know you had some patches at one point but I a) don't remember if they
went in and b) don't remember if they were sufficient.

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-02-01 10:53       ` Ian Campbell
@ 2012-02-01 15:48         ` Pasi Kärkkäinen
  2012-02-01 16:02           ` Ian Campbell
  0 siblings, 1 reply; 30+ messages in thread
From: Pasi Kärkkäinen @ 2012-02-01 15:48 UTC (permalink / raw)
  To: Ian Campbell; +Cc: rshriram, Anthony Perard, xen-devel, Ian Jackson, brendan

On Wed, Feb 01, 2012 at 10:53:54AM +0000, Ian Campbell wrote:
> On Tue, 2012-01-31 at 17:52 +0000, Shriram Rajagopalan wrote:
> > As for guests supporting this fast suspend, almost all guests (pv/hvm
> > linux/windows) do support suspend_cancel. IIRC, I think some very old
> > kernels didnt have this ability.
> 
> Slight aside, does Remus work with mainline Linux domU kernels? Users
> seem to be under under that impression.
> 
> I know you had some patches at one point but I a) don't remember if they
> went in and b) don't remember if they were sufficient.
> 

http://wiki.xen.org/wiki/Remus
"Linux 2.6.39.2 and later upstream kernel.org versions are now supported as PV domU kernels"

-- Pasi

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-02-01 15:48         ` Pasi Kärkkäinen
@ 2012-02-01 16:02           ` Ian Campbell
  2012-02-01 19:30             ` Shriram Rajagopalan
  0 siblings, 1 reply; 30+ messages in thread
From: Ian Campbell @ 2012-02-01 16:02 UTC (permalink / raw)
  To: Pasi Kärkkäinen
  Cc: rshriram, Anthony Perard, xen-devel, Ian Jackson, brendan

On Wed, 2012-02-01 at 15:48 +0000, Pasi Kärkkäinen wrote:
> On Wed, Feb 01, 2012 at 10:53:54AM +0000, Ian Campbell wrote:
> > On Tue, 2012-01-31 at 17:52 +0000, Shriram Rajagopalan wrote:
> > > As for guests supporting this fast suspend, almost all guests (pv/hvm
> > > linux/windows) do support suspend_cancel. IIRC, I think some very old
> > > kernels didnt have this ability.
> > 
> > Slight aside, does Remus work with mainline Linux domU kernels? Users
> > seem to be under under that impression.
> > 
> > I know you had some patches at one point but I a) don't remember if they
> > went in and b) don't remember if they were sufficient.
> > 
> 
> http://wiki.xen.org/wiki/Remus
> "Linux 2.6.39.2 and later upstream kernel.org versions are now supported as PV domU kernels"

Great, someone was suggesting on list that this wasn't the case. Must
dig out that mail and respond.

Ian.



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-02-01 16:02           ` Ian Campbell
@ 2012-02-01 19:30             ` Shriram Rajagopalan
  2012-02-02  7:04               ` Shriram Rajagopalan
  2012-02-02  7:58               ` Ian Campbell
  0 siblings, 2 replies; 30+ messages in thread
From: Shriram Rajagopalan @ 2012-02-01 19:30 UTC (permalink / raw)
  To: Ian Campbell
  Cc: xen-devel, Stefano Stabellini, Ian Jackson, brendan, Anthony Perard

On 2012-02-01, at 8:02 AM, Ian Campbell <Ian.Campbell@citrix.com> wrote:

> On Wed, 2012-02-01 at 15:48 +0000, Pasi Kärkkäinen wrote:
>> On Wed, Feb 01, 2012 at 10:53:54AM +0000, Ian Campbell wrote:
>>> On Tue, 2012-01-31 at 17:52 +0000, Shriram Rajagopalan wrote:
>>>> As for guests supporting this fast suspend, almost all guests (pv/hvm
>>>> linux/windows) do support suspend_cancel. IIRC, I think some very old
>>>> kernels didnt have this ability.
>>> 
>>> Slight aside, does Remus work with mainline Linux domU kernels? Users
>>> seem to be under under that impression.
>>> 
>>> I know you had some patches at one point but I a) don't remember if they
>>> went in and b) don't remember if they were sufficient.
>>> 
>> 
>> http://wiki.xen.org/wiki/Remus
>> "Linux 2.6.39.2 and later upstream kernel.org versions are now supported as PV domU kernels"
> 
> Great, someone was suggesting on list that this wasn't the case. Must
> dig out that mail and respond.
> 
> Ian.
> 
> 

I added the comments on top of the second version of the patch series.
Please let me know if it looks okay.

I will be spinning out a V3 anyway, since Stefano has removed  qmp_migrate.
But this patch would remain the same, if it's current form is okay.

Shriram
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-02-01 19:30             ` Shriram Rajagopalan
@ 2012-02-02  7:04               ` Shriram Rajagopalan
  2012-02-02  7:59                 ` Ian Campbell
  2012-02-02  7:58               ` Ian Campbell
  1 sibling, 1 reply; 30+ messages in thread
From: Shriram Rajagopalan @ 2012-02-02  7:04 UTC (permalink / raw)
  To: Ian Campbell
  Cc: xen-devel, Stefano Stabellini, Ian Jackson, brendan, Anthony Perard


[-- Attachment #1.1: Type: text/plain, Size: 927 bytes --]

2012/2/1 Shriram Rajagopalan <rshriram@cs.ubc.ca>

> On 2012-02-01, at 8:02 AM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
>
> > On Wed, 2012-02-01 at 15:48 +0000, Pasi Kärkkäinen wrote:
> >> On Wed, Feb 01, 2012 at 10:53:54AM +0000, Ian Campbell wrote:
> >>> On Tue, 2012-01-31 at 17:52 +0000, Shriram Rajagopalan wrote:
> >>>> As for guests supporting this fast suspend, almost all guests (pv/hvm
> >>>> linux/windows) do support suspend_cancel. IIRC, I think some very old
> >>>> kernels didnt have this ability.
> >>>
>

xenstore entry, local/domain/<domid>/image/suspend-cancel - this entry
indicates
if the guest supports co-operative suspend or not.
 But unfortunately, I coundnt find any reference in xend, that "creates"
this entry.
 There are references in xen/ and the tools/ folder, that read the
SUSPEND_CANCEL entry,
  like readnotes, XendDomainInfo.py, xen/common/libelf/libelf-dominfo.c

[-- Attachment #1.2: Type: text/html, Size: 1371 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-02-01 19:30             ` Shriram Rajagopalan
  2012-02-02  7:04               ` Shriram Rajagopalan
@ 2012-02-02  7:58               ` Ian Campbell
  1 sibling, 0 replies; 30+ messages in thread
From: Ian Campbell @ 2012-02-02  7:58 UTC (permalink / raw)
  To: Shriram Rajagopalan
  Cc: xen-devel, Stefano Stabellini, Ian Jackson, brendan, Anthony Perard

On Wed, 2012-02-01 at 19:30 +0000, Shriram Rajagopalan wrote:
> On 2012-02-01, at 8:02 AM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> 
> > On Wed, 2012-02-01 at 15:48 +0000, Pasi Kärkkäinen wrote:
> >> On Wed, Feb 01, 2012 at 10:53:54AM +0000, Ian Campbell wrote:
> >>> On Tue, 2012-01-31 at 17:52 +0000, Shriram Rajagopalan wrote:
> >>>> As for guests supporting this fast suspend, almost all guests (pv/hvm
> >>>> linux/windows) do support suspend_cancel. IIRC, I think some very old
> >>>> kernels didnt have this ability.
> >>> 
> >>> Slight aside, does Remus work with mainline Linux domU kernels? Users
> >>> seem to be under under that impression.
> >>> 
> >>> I know you had some patches at one point but I a) don't remember if they
> >>> went in and b) don't remember if they were sufficient.
> >>> 
> >> 
> >> http://wiki.xen.org/wiki/Remus
> >> "Linux 2.6.39.2 and later upstream kernel.org versions are now supported as PV domU kernels"
> > 
> > Great, someone was suggesting on list that this wasn't the case. Must
> > dig out that mail and respond.
> > 
> > Ian.
> > 
> > 
> 
> I added the comments on top of the second version of the patch series.
> Please let me know if it looks okay.
> 
> I will be spinning out a V3 anyway, since Stefano has removed  qmp_migrate.
> But this patch would remain the same, if it's current form is okay.

If you are respinning anyway then putting a comment in the header rather
than the implementation would be better since that is where users of the
library will be looking for it.

The comment from xc_resume.c which you copied explains the two modes but
doesn't actually say how the fast parameter relates to them. A better
comment to lift would be the description of @fast from xenctrl.h:

	"fast [means] use cooperative resume (guest must support this)"

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume
  2012-02-02  7:04               ` Shriram Rajagopalan
@ 2012-02-02  7:59                 ` Ian Campbell
  0 siblings, 0 replies; 30+ messages in thread
From: Ian Campbell @ 2012-02-02  7:59 UTC (permalink / raw)
  To: rshriram
  Cc: xen-devel, Stefano Stabellini, Ian Jackson, brendan, Anthony Perard

On Thu, 2012-02-02 at 07:04 +0000, Shriram Rajagopalan wrote:
> 2012/2/1 Shriram Rajagopalan <rshriram@cs.ubc.ca>
>         On 2012-02-01, at 8:02 AM, Ian Campbell
>         <Ian.Campbell@citrix.com> wrote:
>         
>         > On Wed, 2012-02-01 at 15:48 +0000, Pasi Kärkkäinen wrote:
>         >> On Wed, Feb 01, 2012 at 10:53:54AM +0000, Ian Campbell
>         wrote:
>         >>> On Tue, 2012-01-31 at 17:52 +0000, Shriram Rajagopalan
>         wrote:
>         >>>> As for guests supporting this fast suspend, almost all
>         guests (pv/hvm
>         >>>> linux/windows) do support suspend_cancel. IIRC, I think
>         some very old
>         >>>> kernels didnt have this ability.
>         >>>
>         
> 
> 
> xenstore entry, local/domain/<domid>/image/suspend-cancel - this entry
> indicates if the guest supports co-operative suspend or not. 
>  But unfortunately, I coundnt find any reference in xend, that
> "creates" this entry.  There are references in xen/ and the tools/
> folder, that read the SUSPEND_CANCEL entry,   like readnotes,
> XendDomainInfo.py, xen/common/libelf/libelf-dominfo.c 

The value of this setting needs to come from the kernel itself. In
arch/x86/xen/xen-head.S I see:
        ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)

Is something (presumably the toolstack, in collaboration with libelf and
xc_dom_mumble) supposed to reflect this into XenStore somehow? (and
preserve it on migration etc etc)

It looks like xend just assumes it to be true, judging from the various 
	# TODO: currently assumes SUSPEND_CANCEL is available
comments. I guess assumption that just gets reflected into XS somewhere?

Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 2 of 6] libxl: bugfix: create_domain() return to caller if !daemonize
  2012-01-31  9:47   ` Ian Campbell
@ 2012-02-09 18:06     ` Ian Jackson
  0 siblings, 0 replies; 30+ messages in thread
From: Ian Jackson @ 2012-02-09 18:06 UTC (permalink / raw)
  To: Ian Campbell; +Cc: rshriram, Anthony Perard, xen-devel, brendan

Ian Campbell writes ("Re: [Xen-devel] [PATCH 2 of 6] libxl: bugfix: create_domain() return to caller if !daemonize"):
> On Tue, 2012-01-31 at 01:05 +0000, rshriram@cs.ubc.ca wrote:
> > libxl: bugfix: create_domain() return to caller if !daemonize

Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1 of 6] libxl: helper function to send commands to traditional qemu
  2012-01-31 17:30     ` Shriram Rajagopalan
@ 2012-02-09 18:08       ` Ian Jackson
  0 siblings, 0 replies; 30+ messages in thread
From: Ian Jackson @ 2012-02-09 18:08 UTC (permalink / raw)
  To: rshriram; +Cc: Anthony Perard, brendan, xen-devel, Ian Campbell

Shriram Rajagopalan writes ("Re: [Xen-devel] [PATCH 1 of 6] libxl: helper function to send commands to traditional qemu"):
> On Tue, Jan 31, 2012 at 1:46 AM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
>     Every caller to libxl__qemu_traditional_cmd seems to be followed with a
>     call to libxl__wait_for_device_model -- might be worth pushing that down
>     into the function?
...
> Yes I noticed that. Though some seem to be using the callback while
> others dont.  This would necessitate another helper function, like
> libxl_qemu_traditional_change_state that does both the command and
> the state check (if callback supplied). This way people could still
> use the libxl_qemu_traditional_cmd for use-cases that dont require a
> special state check. But all this is for another , later patch
> :P. At the moment, I just want to get the remus framework in :D.

Right.

Well, your patch was a good cleanup in itself so I have applied it.

Thanks,
Ian.

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2012-02-09 18:08 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-31  1:05 [PATCH 0 of 6] libxl: refactor suspend/resume code rshriram
2012-01-31  1:05 ` [PATCH 1 of 6] libxl: helper function to send commands to traditional qemu rshriram
2012-01-31  9:46   ` Ian Campbell
2012-01-31 17:30     ` Shriram Rajagopalan
2012-02-09 18:08       ` Ian Jackson
2012-01-31  1:05 ` [PATCH 2 of 6] libxl: bugfix: create_domain() return to caller if !daemonize rshriram
2012-01-31  9:47   ` Ian Campbell
2012-02-09 18:06     ` Ian Jackson
2012-01-31  1:05 ` [PATCH 3 of 6] libxl: QMP stop/resume & refactor QEMU suspend/resume/save rshriram
2012-01-31  9:54   ` Ian Campbell
2012-01-31 17:32     ` Shriram Rajagopalan
2012-01-31 23:53       ` Shriram Rajagopalan
2012-02-01 10:46         ` Ian Campbell
2012-01-31  1:05 ` [PATCH 4 of 6] libxl: support suspend_cancel in domain_resume rshriram
2012-01-31 10:00   ` Ian Campbell
2012-01-31 10:04     ` Ian Campbell
2012-01-31 17:52     ` Shriram Rajagopalan
2012-02-01 10:47       ` Ian Campbell
2012-02-01 10:53       ` Ian Campbell
2012-02-01 15:48         ` Pasi Kärkkäinen
2012-02-01 16:02           ` Ian Campbell
2012-02-01 19:30             ` Shriram Rajagopalan
2012-02-02  7:04               ` Shriram Rajagopalan
2012-02-02  7:59                 ` Ian Campbell
2012-02-02  7:58               ` Ian Campbell
2012-01-31  1:05 ` [PATCH 5 of 6] libxl: refactor migrate_domain and generalize migrate_receive rshriram
2012-01-31 10:19   ` Ian Campbell
2012-01-31  1:05 ` [PATCH 6 of 6] libxl: resume instead of unpause on xl save -c rshriram
2012-01-31 10:21   ` Ian Campbell
2012-01-31 17:55     ` Shriram Rajagopalan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.