xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Campbell <ian.campbell@citrix.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	Wei Liu <wei.liu2@citrix.com>
Cc: Lars Kurth <lars.kurth@citrix.com>,
	Changlong Xie <xiecl.fnst@cn.fujitsu.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Gui Jianfeng <guijianfeng@cn.fujitsu.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Dong Eddie <eddie.dong@intel.com>,
	Anthony Perard <anthony.perard@citrix.com>,
	Shriram Rajagopalan <rshriram@cs.ubc.ca>,
	Yang Hongyang <hongyang.yang@easystack.cn>
Subject: [PATCH v11 18/27] implement the cmdline for COLO
Date: Fri, 4 Mar 2016 16:41:22 +0800	[thread overview]
Message-ID: <1457080891-26054-19-git-send-email-xiecl.fnst@cn.fujitsu.com> (raw)
In-Reply-To: <1457080891-26054-1-git-send-email-xiecl.fnst@cn.fujitsu.com>

From: Wen Congyang <wency@cn.fujitsu.com>

Add a new option -c to the command 'xl remus'. If you want
to use COLO HA instead of Remus HA, please use -c option.

Update man pages to reflect the addition of a new option to
'xl remus' command.

Also add a new option -c to the internal command 'xl migrate-receive'.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
---
 docs/man/xl.pod.1         | 12 ++++++++--
 tools/libxl/libxl.c       | 22 ++++++++++++++++--
 tools/libxl/xl_cmdimpl.c  | 59 ++++++++++++++++++++++++++++++++++++-----------
 tools/libxl/xl_cmdtable.c |  4 +++-
 4 files changed, 78 insertions(+), 19 deletions(-)

diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1
index 4279c7c..1c6dd87 100644
--- a/docs/man/xl.pod.1
+++ b/docs/man/xl.pod.1
@@ -447,12 +447,15 @@ Print huge (!) amount of debug during the migration process.
 
 =item B<remus> [I<OPTIONS>] I<domain-id> I<host>
 
-Enable Remus HA for domain. By default B<xl> relies on ssh as a transport
-mechanism between the two hosts.
+Enable Remus HA or COLO HA for domain. By default B<xl> relies on ssh as a
+transport mechanism between the two hosts.
 
 N.B: Remus support in xl is still in experimental (proof-of-concept) phase.
      Disk replication support is limited to DRBD disks.
 
+     COLO support in xl is still in experimental (proof-of-concept) phase.
+     There is no support for network or disk at the moment.
+
 B<OPTIONS>
 
 =over 4
@@ -498,6 +501,11 @@ Disable network output buffering. Requires enabling unsafe mode.
 
 Disable disk replication. Requires enabling unsafe mode.
 
+=item B<-c>
+
+Enable COLO HA. This conflicts with B<-i> and B<-b>, and memory
+checkpoint compression must be disabled.
+
 =back
 
 =item B<pause> I<domain-id>
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 305231b..3689dfc 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -848,12 +848,27 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
         goto out;
     }
 
+    /* The caller must set this defbool */
+    if (libxl_defbool_is_default(info->colo)) {
+        LOG(ERROR, "colo mode must be enabled/disabled");
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
     libxl_defbool_setdefault(&info->allow_unsafe, false);
     libxl_defbool_setdefault(&info->blackhole, false);
-    libxl_defbool_setdefault(&info->compression, true);
+    libxl_defbool_setdefault(&info->compression,
+                             !libxl_defbool_val(info->colo));
     libxl_defbool_setdefault(&info->netbuf, true);
     libxl_defbool_setdefault(&info->diskbuf, true);
 
+    if (libxl_defbool_val(info->colo) &&
+        libxl_defbool_val(info->compression)) {
+            LOG(ERROR, "cannot use memory checkpoint compression in COLO mode");
+            rc = ERROR_FAIL;
+            goto out;
+    }
+
     if (!libxl_defbool_val(info->allow_unsafe) &&
         (libxl_defbool_val(info->blackhole) ||
          !libxl_defbool_val(info->netbuf) ||
@@ -875,7 +890,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
     dss->live = 1;
     dss->debug = 0;
     dss->remus = info;
-    dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS;
+    if (libxl_defbool_val(info->colo))
+        dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_COLO;
+    else
+        dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS;
 
     assert(info);
 
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 523e0e9..30d26bc 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -4445,6 +4445,8 @@ static void migrate_receive(int debug, int daemonize, int monitor,
     char rc_buf;
     char *migration_domname;
     struct domain_create dom_info;
+    const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ?
+                     "COLO" : "Remus";
 
     signal(SIGPIPE, SIG_IGN);
     /* if we get SIGPIPE we'd rather just have it as an error */
@@ -4477,11 +4479,12 @@ static void migrate_receive(int debug, int daemonize, int monitor,
 
     switch (checkpointed) {
     case LIBXL_CHECKPOINTED_STREAM_REMUS:
+    case LIBXL_CHECKPOINTED_STREAM_COLO:
         /* If we are here, it means that the sender (primary) has crashed.
          * TODO: Split-Brain Check.
          */
-        fprintf(stderr, "migration target: Remus Failover for domain %u\n",
-                domid);
+        fprintf(stderr, "migration target: %s Failover for domain %u\n",
+                ha, domid);
 
         /*
          * If domain renaming fails, lets just continue (as we need the domain
@@ -4497,16 +4500,20 @@ static void migrate_receive(int debug, int daemonize, int monitor,
             rc = libxl_domain_rename(ctx, domid, migration_domname,
                                      common_domname);
             if (rc)
-                fprintf(stderr, "migration target (Remus): "
+                fprintf(stderr, "migration target (%s): "
                         "Failed to rename domain from %s to %s:%d\n",
-                        migration_domname, common_domname, rc);
+                        ha, migration_domname, common_domname, rc);
         }
 
+        if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO)
+            /* The guest is running after failover in COLO mode */
+            exit(rc ? -ERROR_FAIL: 0);
+
         rc = libxl_domain_unpause(ctx, domid);
         if (rc)
-            fprintf(stderr, "migration target (Remus): "
+            fprintf(stderr, "migration target (%s): "
                     "Failed to unpause domain %s (id: %u):%d\n",
-                    common_domname, domid, rc);
+                    ha, common_domname, domid, rc);
 
         exit(rc ? -ERROR_FAIL: 0);
     default:
@@ -4654,7 +4661,7 @@ int main_migrate_receive(int argc, char **argv)
     libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE;
     int opt;
 
-    SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) {
+    SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) {
     case 'F':
         daemonize = 0;
         break;
@@ -4668,6 +4675,9 @@ int main_migrate_receive(int argc, char **argv)
     case 'r':
         checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS;
         break;
+    case 'c':
+        checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO;
+        break;
     }
 
     if (argc-optind != 0) {
@@ -8043,11 +8053,8 @@ int main_remus(int argc, char **argv)
     int config_len;
 
     memset(&r_info, 0, sizeof(libxl_domain_remus_info));
-    /* Defaults */
-    r_info.interval = 200;
-    libxl_defbool_setdefault(&r_info.blackhole, false);
 
-    SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) {
+    SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) {
     case 'i':
         r_info.interval = atoi(optarg);
         break;
@@ -8075,11 +8082,32 @@ int main_remus(int argc, char **argv)
     case 'e':
         daemonize = 0;
         break;
+    case 'c':
+        libxl_defbool_set(&r_info.colo, true);
     }
 
     domid = find_domain(argv[optind]);
     host = argv[optind + 1];
 
+    /* Defaults */
+    libxl_defbool_setdefault(&r_info.blackhole, false);
+    libxl_defbool_setdefault(&r_info.colo, false);
+    if (!libxl_defbool_val(r_info.colo) && !r_info.interval)
+        r_info.interval = 200;
+
+    if (libxl_defbool_val(r_info.colo)) {
+        if (r_info.interval || libxl_defbool_val(r_info.blackhole)) {
+            perror("Option -c conflicts with -i or -b");
+            exit(-1);
+        }
+
+        if (libxl_defbool_is_default(r_info.compression)) {
+            perror("COLO can't be used with memory compression. "
+                   "Disable memory checkpoint compression now...");
+            libxl_defbool_set(&r_info.compression, false);
+        }
+    }
+
     if (!r_info.netbufscript)
         r_info.netbufscript = default_remus_netbufscript;
 
@@ -8094,8 +8122,9 @@ int main_remus(int argc, char **argv)
         if (!ssh_command[0]) {
             rune = host;
         } else {
-            xasprintf(&rune, "exec %s %s xl migrate-receive -r %s",
+            xasprintf(&rune, "exec %s %s xl migrate-receive %s %s",
                       ssh_command, host,
+                      libxl_defbool_val(r_info.colo) ? "-c" : "-r",
                       daemonize ? "" : " -e");
         }
 
@@ -8123,7 +8152,8 @@ int main_remus(int argc, char **argv)
      * domain to force failover
      */
     if (libxl_domain_info(ctx, 0, domid)) {
-        fprintf(stderr, "Remus: Primary domain has been destroyed.\n");
+        fprintf(stderr, "%s: Primary domain has been destroyed.\n",
+                libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
         close(send_fd);
         return 0;
     }
@@ -8135,7 +8165,8 @@ int main_remus(int argc, char **argv)
     if (rc == ERROR_GUEST_TIMEDOUT)
         fprintf(stderr, "Failed to suspend domain at primary.\n");
     else {
-        fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n");
+        fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n",
+                libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
         libxl_domain_resume(ctx, domid, 1, 0);
     }
 
diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c
index fdc1ac6..b6b630c 100644
--- a/tools/libxl/xl_cmdtable.c
+++ b/tools/libxl/xl_cmdtable.c
@@ -499,7 +499,9 @@ struct cmd_spec cmd_table[] = {
       "-b                      Replicate memory checkpoints to /dev/null (blackhole).\n"
       "                        Works only in unsafe mode.\n"
       "-n                      Disable network output buffering. Works only in unsafe mode.\n"
-      "-d                      Disable disk replication. Works only in unsafe mode."
+      "-d                      Disable disk replication. Works only in unsafe mode.\n"
+      "-c                      Enable COLO HA. It is conflict with -i and -b, and memory\n"
+      "                        checkpoint must be disabled"
     },
 #endif
     { "devd",
-- 
1.9.3




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

  parent reply	other threads:[~2016-03-04  8:41 UTC|newest]

Thread overview: 100+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-04  8:41 [PATCH v11 00/27] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Changlong Xie
2016-03-04  8:41 ` [PATCH v11 01/27] tools/libxl: introduction of libxl__qmp_restore to load qemu state Changlong Xie
2016-03-04 16:30   ` Ian Jackson
2016-03-14  9:03     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 02/27] tools/libxl: introduce libxl__domain_common_switch_qemu_logdirty() Changlong Xie
2016-03-04  8:41 ` [PATCH v11 03/27] tools/libxl: Add back channel to allow migration target send data back Changlong Xie
2016-03-04 16:38   ` Ian Jackson
2016-03-08 16:38     ` Wei Liu
2016-03-17  8:07     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 04/27] tools/libxl: Introduce new helper function dup_fd_helper() Changlong Xie
2016-03-04 16:42   ` Ian Jackson
2016-03-17  8:08     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 05/27] tools/libx{l, c}: add back channel to libxc Changlong Xie
2016-03-04 16:45   ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 06/27] docs: add colo readme Changlong Xie
2016-03-04  8:41 ` [PATCH v11 07/27] docs/libxl: Introduce CHECKPOINT_CONTEXT to support migration v2 colo streams Changlong Xie
2016-03-04 16:51   ` Ian Jackson
2016-03-08 16:38     ` Wei Liu
2016-03-11  7:13     ` Wen Congyang
2016-03-04  8:41 ` [PATCH v11 08/27] libxc/migration: Specification update for DIRTY_PFN_LIST records Changlong Xie
2016-03-04 16:53   ` Ian Jackson
2016-03-17  8:10     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 09/27] libxc/migration: export read_record for common use Changlong Xie
2016-03-04 16:55   ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 10/27] tools/libxl: add back channel support to write stream Changlong Xie
2016-03-04 17:00   ` Ian Jackson
2016-03-07  2:13     ` Wen Congyang
2016-03-11  9:05     ` Wen Congyang
2016-03-17  8:11     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 11/27] tools/libxl: add back channel support to read stream Changlong Xie
2016-03-04 17:01   ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 12/27] tools/libx{l, c}: introduce wait_checkpoint callback Changlong Xie
2016-03-04 17:03   ` Ian Jackson
2016-03-04 20:23     ` Konrad Rzeszutek Wilk
2016-03-07  2:16       ` Wen Congyang
2016-03-17  8:16     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 13/27] tools/libx{l, c}: add postcopy/suspend callback to restore side Changlong Xie
2016-03-04 17:05   ` Ian Jackson
2016-03-17  8:17     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 14/27] secondary vm suspend/resume/checkpoint code Changlong Xie
2016-03-04 17:11   ` Ian Jackson
2016-03-07  2:57     ` Wen Congyang
2016-03-17  9:03     ` Changlong Xie
2016-03-17 12:19       ` Wei Liu
2016-03-04  8:41 ` [PATCH v11 15/27] primary " Changlong Xie
2016-03-04 17:14   ` Ian Jackson
2016-03-07  2:59     ` Wen Congyang
2016-03-04  8:41 ` [PATCH v11 16/27] libxc/restore: support COLO restore Changlong Xie
2016-03-04 17:16   ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 17/27] libxc/save: support COLO save Changlong Xie
2016-03-04 17:18   ` Ian Jackson
2016-03-07  3:00     ` Wen Congyang
2016-03-04  8:41 ` Changlong Xie [this message]
2016-03-04 17:22   ` [PATCH v11 18/27] implement the cmdline for COLO Ian Jackson
2016-03-07  3:04     ` Wen Congyang
2016-03-04  8:41 ` [PATCH v11 19/27] COLO: introduce new API to prepare/start/do/get_error/stop replication Changlong Xie
2016-03-04 17:26   ` Ian Jackson
2016-03-08 16:46     ` Wei Liu
2016-03-18  3:44     ` Changlong Xie
2016-03-18 11:35       ` Wei Liu
2016-03-18  3:45     ` Changlong Xie
2016-03-04 17:29   ` Ian Jackson
2016-03-18  3:49     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 20/27] Support colo mode for qemu disk Changlong Xie
2016-03-04 17:44   ` Ian Jackson
2016-03-07  2:06     ` Wen Congyang
2016-03-17 17:18       ` Ian Jackson
2016-03-18  5:42         ` Wen Congyang
2016-03-04 17:52   ` Ian Jackson
2016-03-04 20:30     ` Konrad Rzeszutek Wilk
2016-03-07  2:10       ` Wen Congyang
2016-03-08 17:22         ` Wei Liu
2016-03-09  2:09           ` Konrad Rzeszutek Wilk
2016-03-09 16:55             ` Wei Liu
2016-03-17 17:09           ` Ian Jackson
2016-03-17 17:10       ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 21/27] COLO: use qemu block replication Changlong Xie
2016-03-04  8:41 ` [PATCH v11 22/27] COLO proxy: implement setup/teardown of COLO proxy module Changlong Xie
2016-03-04 17:59   ` Ian Jackson
2016-03-18  8:22     ` Changlong Xie
2016-03-22  5:44     ` Changlong Xie
2016-03-22  5:55       ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 23/27] COLO proxy: preresume, postresume and checkpoint Changlong Xie
2016-03-04 18:01   ` Ian Jackson
2016-03-18  8:20     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 24/27] COLO nic: implement COLO nic subkind Changlong Xie
2016-03-04 18:02   ` Ian Jackson
2016-03-18  8:20     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 25/27] setup and control colo proxy on primary side Changlong Xie
2016-03-04 18:05   ` Ian Jackson
2016-03-22  6:01     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 26/27] setup and control colo proxy on secondary side Changlong Xie
2016-03-04 18:05   ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 27/27] cmdline switches and config vars to control colo-proxy Changlong Xie
2016-03-04 18:09   ` Ian Jackson
2016-03-22  4:13     ` Changlong Xie
2016-03-04 18:17 ` [PATCH v11 00/27] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Ian Jackson
2016-03-04 20:35   ` Konrad Rzeszutek Wilk
2016-03-17 17:19     ` Ian Jackson
2016-03-17 17:41   ` Ian Jackson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1457080891-26054-19-git-send-email-xiecl.fnst@cn.fujitsu.com \
    --to=xiecl.fnst@cn.fujitsu.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=anthony.perard@citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=hongyang.yang@easystack.cn \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=konrad.wilk@oracle.com \
    --cc=lars.kurth@citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=wei.liu2@citrix.com \
    --cc=wency@cn.fujitsu.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).