All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>
Cc: Ian Campbell <Ian.Campbell@citrix.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Dong Eddie <eddie.dong@intel.com>,
	Yang Hongyang <yanghy@cn.fujitsu.com>,
	Lai Jiangshan <laijs@cn.fujitsu.com>
Subject: [RFC Patch v3 12/22] implement the cmdline for COLO
Date: Fri, 5 Sep 2014 17:25:47 +0800	[thread overview]
Message-ID: <1409909158-19243-13-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1409909158-19243-1-git-send-email-wency@cn.fujitsu.com>

Add a new option -c to the command 'xl remus'. If you want
to use COLO HA instead of Remus HA, please use -c option.

Update man pages to reflect the addition of a new option to
'xl remus' command.

Also add a new option -c to the internal command 'xl migrate-receive'.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 docs/man/xl.pod.1         | 11 +++++++++--
 tools/libxl/libxl.c       |  6 ++++++
 tools/libxl/xl_cmdimpl.c  | 48 ++++++++++++++++++++++++++++++++++++++---------
 tools/libxl/xl_cmdtable.c |  3 ++-
 4 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index bce4bfe..297cd04 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -427,12 +427,15 @@ Print huge (!) amount of debug during the migration process.
 
 =item B<remus> [I<OPTIONS>] I<domain-id> I<host>
 
-Enable Remus HA for domain. By default B<xl> relies on ssh as a transport
-mechanism between the two hosts.
+Enable Remus HA or COLO HA for domain. By default B<xl> relies on ssh as a
+transport mechanism between the two hosts.
 
 N.B: Remus support in xl is still in experimental (proof-of-concept) phase.
      Disk replication support is limited to DRBD disks.
 
+     COLO support in xl is still in experimental (proof-of-concept) phase.
+     There is no support for network or disk at the moment.
+
 B<OPTIONS>
 
 =over 4
@@ -478,6 +481,10 @@ Disable network output buffering. Requires enabling unsafe mode.
 
 Disable disk replication. Requires enabling unsafe mode.
 
+=item B<-c>
+
+Enable COLO HA. It is conflict with B<-i> and B<-b>.
+
 =back
 
 =item B<pause> I<domain-id>
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index c86b988..39a1879 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -802,6 +802,12 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
         goto out;
     }
 
+    /* The caller must set this defbool */
+    if (libxl_defbool_is_default(info->colo)) {
+        LOG(ERROR, "colo mode must be enabled/disabled");
+        goto out;
+    }
+
     libxl_defbool_setdefault(&info->unsafe, false);
     libxl_defbool_setdefault(&info->blackhole, false);
     libxl_defbool_setdefault(&info->compression, true);
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 8702e08..3709dd2 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -3791,6 +3791,9 @@ static void migrate_receive(int debug, int daemonize, int monitor,
     dom_info.send_fd = send_fd;
     dom_info.migration_domname_r = &migration_domname;
     dom_info.checkpointed_stream = remus;
+    if (remus == LIBXL_CHECKPOINTED_STREAM_COLO)
+        /* COLO uses stdout to send control message to master */
+        dom_info.quiet = 1;
 
     rc = create_domain(&dom_info);
     if (rc < 0) {
@@ -3805,7 +3808,8 @@ static void migrate_receive(int debug, int daemonize, int monitor,
         /* If we are here, it means that the sender (primary) has crashed.
          * TODO: Split-Brain Check.
          */
-        fprintf(stderr, "migration target: Remus Failover for domain %u\n",
+        fprintf(stderr, "migration target: %s Failover for domain %u\n",
+                remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
                 domid);
 
         /*
@@ -3822,15 +3826,21 @@ static void migrate_receive(int debug, int daemonize, int monitor,
             rc = libxl_domain_rename(ctx, domid, migration_domname,
                                      common_domname);
             if (rc)
-                fprintf(stderr, "migration target (Remus): "
+                fprintf(stderr, "migration target (%s): "
                         "Failed to rename domain from %s to %s:%d\n",
+                        remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
                         migration_domname, common_domname, rc);
         }
 
+        if (remus == LIBXL_CHECKPOINTED_STREAM_COLO)
+            /* The guest is running after failover in COLO mode */
+            exit(rc ? -ERROR_FAIL: 0);
+
         rc = libxl_domain_unpause(ctx, domid);
         if (rc)
-            fprintf(stderr, "migration target (Remus): "
+            fprintf(stderr, "migration target (%s): "
                     "Failed to unpause domain %s (id: %u):%d\n",
+                    remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus",
                     common_domname, domid, rc);
 
         exit(rc ? -ERROR_FAIL: 0);
@@ -3976,7 +3986,7 @@ int main_migrate_receive(int argc, char **argv)
     int debug = 0, daemonize = 1, monitor = 1, remus = 0;
     int opt;
 
-    SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) {
+    SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) {
     case 'F':
         daemonize = 0;
         break;
@@ -3988,8 +3998,10 @@ int main_migrate_receive(int argc, char **argv)
         debug = 1;
         break;
     case 'r':
-        remus = 1;
+        remus = LIBXL_CHECKPOINTED_STREAM_REMUS;
         break;
+    case 'c':
+        remus = LIBXL_CHECKPOINTED_STREAM_COLO;
     }
 
     if (argc-optind != 0) {
@@ -7290,15 +7302,18 @@ int main_remus(int argc, char **argv)
     pid_t child = -1;
     uint8_t *config_data;
     int config_len;
+    int interval = 0;
 
     memset(&r_info, 0, sizeof(libxl_domain_remus_info));
     /* Defaults */
     r_info.interval = 200;
     libxl_defbool_setdefault(&r_info.blackhole, false);
+    libxl_defbool_setdefault(&r_info.colo, false);
 
-    SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) {
+    SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) {
     case 'i':
         r_info.interval = atoi(optarg);
+        interval = 1;
         break;
     case 'F':
         libxl_defbool_set(&r_info.unsafe, true);
@@ -7324,11 +7339,23 @@ int main_remus(int argc, char **argv)
     case 'e':
         daemonize = 0;
         break;
+    case 'c':
+        libxl_defbool_set(&r_info.colo, true);
     }
 
     domid = find_domain(argv[optind]);
     host = argv[optind + 1];
 
+    if (libxl_defbool_val(r_info.colo)) {
+        if (!interval)
+            r_info.interval = 0;
+
+        if (r_info.interval || libxl_defbool_val(r_info.blackhole)) {
+            perror("option -c is conflict with -i or -b");
+            exit(-1);
+        }
+    }
+
     if (!r_info.netbufscript)
         r_info.netbufscript = default_remus_netbufscript;
 
@@ -7343,8 +7370,9 @@ int main_remus(int argc, char **argv)
         if (!ssh_command[0]) {
             rune = host;
         } else {
-            if (asprintf(&rune, "exec %s %s xl migrate-receive -r %s",
+            if (asprintf(&rune, "exec %s %s xl migrate-receive %s %s",
                          ssh_command, host,
+                         libxl_defbool_val(r_info.colo) ? "-c" : "-r",
                          daemonize ? "" : " -e") < 0)
                 return 1;
         }
@@ -7373,7 +7401,8 @@ int main_remus(int argc, char **argv)
      * domain to force failover
      */
     if (libxl_domain_info(ctx, 0, domid)) {
-        fprintf(stderr, "Remus: Primary domain has been destroyed.\n");
+        fprintf(stderr, "%s: Primary domain has been destroyed.\n",
+                libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
         close(send_fd);
         return 0;
     }
@@ -7385,7 +7414,8 @@ int main_remus(int argc, char **argv)
     if (rc == ERROR_GUEST_TIMEDOUT)
         fprintf(stderr, "Failed to suspend domain at primary.\n");
     else {
-        fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n");
+        fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n",
+                libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
         libxl_domain_resume(ctx, domid, 1, 0);
     }
 
diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c
index 6d4596b..22b63db 100644
--- a/tools/libxl/xl_cmdtable.c
+++ b/tools/libxl/xl_cmdtable.c
@@ -498,7 +498,8 @@ struct cmd_spec cmd_table[] = {
       "-b                      Replicate memory checkpoints to /dev/null (blackhole).\n"
       "                        Works only in unsafe mode.\n"
       "-n                      Disable network output buffering. Works only in unsafe mode.\n"
-      "-d                      Disable disk replication. Works only in unsafe mode."
+      "-d                      Disable disk replication. Works only in unsafe mode.\n"
+      "-c                      Enable COLO HA. It is conflict with -i and -b"
     },
 #endif
     { "devd",
-- 
1.9.3

  parent reply	other threads:[~2014-09-05  9:25 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-05  9:25 [RFC Patch v3 00/22] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 01/22] move remus related codes to libxl_remus.c Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 02/22] rename remus device to checkpoint device Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 03/22] adjust the indentation Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 04/22] don't touch remus in checkpoint_device Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 05/22] Update libxl_save_msgs_gen.pl to support return data from xl to xc Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 06/22] Allow slave sends data to master Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 07/22] secondary vm suspend/resume/checkpoint code Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 08/22] primary vm suspend/get_dirty_pfn/resume/checkpoint code Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 09/22] xc_domain_save: flush cache before calling callbacks->postcopy() in colo mode Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 10/22] COLO: xc related codes Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 11/22] send store mfn and console mfn to xl before resuming secondary vm Wen Congyang
2014-09-05  9:25 ` Wen Congyang [this message]
2014-09-05  9:25 ` [RFC Patch v3 13/22] blktap2: connect to backup asynchronously Wen Congyang
2014-09-24 19:11   ` Shriram Rajagopalan
2014-09-25  5:40     ` Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 14/22] switch to unprotected mode before closing Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 15/22] blktap2: move async connect related codes to block-replication.c Wen Congyang
2014-09-24 18:48   ` Shriram Rajagopalan
2014-09-05  9:25 ` [RFC Patch v3 16/22] blktap2: move ramdisk " Wen Congyang
2014-09-24 18:44   ` Shriram Rajagopalan
2014-09-26  5:18     ` Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 17/22] block-colo: implement colo disk replication Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 18/22] support blktap COLO in xl: Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 19/22] libxl/colo: setup and control disk replication for blktap2 backends Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 20/22] setup and control colo-agent for primary vm Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 21/22] setup and control colo-agent for secondary vm Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 22/22] colo: cmdline switches and config vars to control colo-agent Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 23/22] Introduce "xen-load-devices-state" Wen Congyang
2014-09-05 21:57   ` Stefano Stabellini
2014-09-05 21:57   ` [Qemu-devel] [Xen-devel] " Stefano Stabellini
2014-09-09  2:47     ` Wen Congyang
2014-09-09  2:47     ` [Qemu-devel] [Xen-devel] " Wen Congyang
2014-09-10 19:15       ` Stefano Stabellini
2014-09-10 19:15       ` [Qemu-devel] [Xen-devel] " Stefano Stabellini
2014-09-11  5:03         ` Wen Congyang
2014-09-11  5:03         ` Wen Congyang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1409909158-19243-13-git-send-email-wency@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yanghy@cn.fujitsu.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.