From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wen Congyang Subject: [RFC Patch v3 12/22] implement the cmdline for COLO Date: Fri, 5 Sep 2014 17:25:47 +0800 Message-ID: <1409909158-19243-13-git-send-email-wency@cn.fujitsu.com> References: <1409909158-19243-1-git-send-email-wency@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1409909158-19243-1-git-send-email-wency@cn.fujitsu.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen devel Cc: Ian Campbell , Wen Congyang , Ian Jackson , Jiang Yunhong , Dong Eddie , Yang Hongyang , Lai Jiangshan List-Id: xen-devel@lists.xenproject.org Add a new option -c to the command 'xl remus'. If you want to use COLO HA instead of Remus HA, please use -c option. Update man pages to reflect the addition of a new option to 'xl remus' command. Also add a new option -c to the internal command 'xl migrate-receive'. Signed-off-by: Wen Congyang --- docs/man/xl.pod.1 | 11 +++++++++-- tools/libxl/libxl.c | 6 ++++++ tools/libxl/xl_cmdimpl.c | 48 ++++++++++++++++++++++++++++++++++++++--------- tools/libxl/xl_cmdtable.c | 3 ++- 4 files changed, 56 insertions(+), 12 deletions(-) diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1 index bce4bfe..297cd04 100644 --- a/docs/man/xl.pod.1 +++ b/docs/man/xl.pod.1 @@ -427,12 +427,15 @@ Print huge (!) amount of debug during the migration process. =item B [I] I I -Enable Remus HA for domain. By default B relies on ssh as a transport -mechanism between the two hosts. +Enable Remus HA or COLO HA for domain. By default B relies on ssh as a +transport mechanism between the two hosts. N.B: Remus support in xl is still in experimental (proof-of-concept) phase. Disk replication support is limited to DRBD disks. + COLO support in xl is still in experimental (proof-of-concept) phase. + There is no support for network or disk at the moment. + B =over 4 @@ -478,6 +481,10 @@ Disable network output buffering. Requires enabling unsafe mode. Disable disk replication. Requires enabling unsafe mode. +=item B<-c> + +Enable COLO HA. It is conflict with B<-i> and B<-b>. + =back =item B I diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index c86b988..39a1879 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -802,6 +802,12 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, goto out; } + /* The caller must set this defbool */ + if (libxl_defbool_is_default(info->colo)) { + LOG(ERROR, "colo mode must be enabled/disabled"); + goto out; + } + libxl_defbool_setdefault(&info->unsafe, false); libxl_defbool_setdefault(&info->blackhole, false); libxl_defbool_setdefault(&info->compression, true); diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c index 8702e08..3709dd2 100644 --- a/tools/libxl/xl_cmdimpl.c +++ b/tools/libxl/xl_cmdimpl.c @@ -3791,6 +3791,9 @@ static void migrate_receive(int debug, int daemonize, int monitor, dom_info.send_fd = send_fd; dom_info.migration_domname_r = &migration_domname; dom_info.checkpointed_stream = remus; + if (remus == LIBXL_CHECKPOINTED_STREAM_COLO) + /* COLO uses stdout to send control message to master */ + dom_info.quiet = 1; rc = create_domain(&dom_info); if (rc < 0) { @@ -3805,7 +3808,8 @@ static void migrate_receive(int debug, int daemonize, int monitor, /* If we are here, it means that the sender (primary) has crashed. * TODO: Split-Brain Check. */ - fprintf(stderr, "migration target: Remus Failover for domain %u\n", + fprintf(stderr, "migration target: %s Failover for domain %u\n", + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", domid); /* @@ -3822,15 +3826,21 @@ static void migrate_receive(int debug, int daemonize, int monitor, rc = libxl_domain_rename(ctx, domid, migration_domname, common_domname); if (rc) - fprintf(stderr, "migration target (Remus): " + fprintf(stderr, "migration target (%s): " "Failed to rename domain from %s to %s:%d\n", + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", migration_domname, common_domname, rc); } + if (remus == LIBXL_CHECKPOINTED_STREAM_COLO) + /* The guest is running after failover in COLO mode */ + exit(rc ? -ERROR_FAIL: 0); + rc = libxl_domain_unpause(ctx, domid); if (rc) - fprintf(stderr, "migration target (Remus): " + fprintf(stderr, "migration target (%s): " "Failed to unpause domain %s (id: %u):%d\n", + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", common_domname, domid, rc); exit(rc ? -ERROR_FAIL: 0); @@ -3976,7 +3986,7 @@ int main_migrate_receive(int argc, char **argv) int debug = 0, daemonize = 1, monitor = 1, remus = 0; int opt; - SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) { + SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) { case 'F': daemonize = 0; break; @@ -3988,8 +3998,10 @@ int main_migrate_receive(int argc, char **argv) debug = 1; break; case 'r': - remus = 1; + remus = LIBXL_CHECKPOINTED_STREAM_REMUS; break; + case 'c': + remus = LIBXL_CHECKPOINTED_STREAM_COLO; } if (argc-optind != 0) { @@ -7290,15 +7302,18 @@ int main_remus(int argc, char **argv) pid_t child = -1; uint8_t *config_data; int config_len; + int interval = 0; memset(&r_info, 0, sizeof(libxl_domain_remus_info)); /* Defaults */ r_info.interval = 200; libxl_defbool_setdefault(&r_info.blackhole, false); + libxl_defbool_setdefault(&r_info.colo, false); - SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) { + SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) { case 'i': r_info.interval = atoi(optarg); + interval = 1; break; case 'F': libxl_defbool_set(&r_info.unsafe, true); @@ -7324,11 +7339,23 @@ int main_remus(int argc, char **argv) case 'e': daemonize = 0; break; + case 'c': + libxl_defbool_set(&r_info.colo, true); } domid = find_domain(argv[optind]); host = argv[optind + 1]; + if (libxl_defbool_val(r_info.colo)) { + if (!interval) + r_info.interval = 0; + + if (r_info.interval || libxl_defbool_val(r_info.blackhole)) { + perror("option -c is conflict with -i or -b"); + exit(-1); + } + } + if (!r_info.netbufscript) r_info.netbufscript = default_remus_netbufscript; @@ -7343,8 +7370,9 @@ int main_remus(int argc, char **argv) if (!ssh_command[0]) { rune = host; } else { - if (asprintf(&rune, "exec %s %s xl migrate-receive -r %s", + if (asprintf(&rune, "exec %s %s xl migrate-receive %s %s", ssh_command, host, + libxl_defbool_val(r_info.colo) ? "-c" : "-r", daemonize ? "" : " -e") < 0) return 1; } @@ -7373,7 +7401,8 @@ int main_remus(int argc, char **argv) * domain to force failover */ if (libxl_domain_info(ctx, 0, domid)) { - fprintf(stderr, "Remus: Primary domain has been destroyed.\n"); + fprintf(stderr, "%s: Primary domain has been destroyed.\n", + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); close(send_fd); return 0; } @@ -7385,7 +7414,8 @@ int main_remus(int argc, char **argv) if (rc == ERROR_GUEST_TIMEDOUT) fprintf(stderr, "Failed to suspend domain at primary.\n"); else { - fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n"); + fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n", + libxl_defbool_val(r_info.colo) ? "COLO" : "Remus"); libxl_domain_resume(ctx, domid, 1, 0); } diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c index 6d4596b..22b63db 100644 --- a/tools/libxl/xl_cmdtable.c +++ b/tools/libxl/xl_cmdtable.c @@ -498,7 +498,8 @@ struct cmd_spec cmd_table[] = { "-b Replicate memory checkpoints to /dev/null (blackhole).\n" " Works only in unsafe mode.\n" "-n Disable network output buffering. Works only in unsafe mode.\n" - "-d Disable disk replication. Works only in unsafe mode." + "-d Disable disk replication. Works only in unsafe mode.\n" + "-c Enable COLO HA. It is conflict with -i and -b" }, #endif { "devd", -- 1.9.3