From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ian Campbell Subject: Re: [PATCH v6 COLO 07/15] implement the cmdline for COLO Date: Tue, 16 Jun 2015 12:19:43 +0100 Message-ID: <1434453583.13744.117.camel@citrix.com> References: <1433735159-26739-1-git-send-email-yanghy@cn.fujitsu.com> <1433735159-26739-8-git-send-email-yanghy@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1433735159-26739-8-git-send-email-yanghy@cn.fujitsu.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Yang Hongyang Cc: wei.liu2@citrix.com, wency@cn.fujitsu.com, andrew.cooper3@citrix.com, yunhong.jiang@intel.com, eddie.dong@intel.com, xen-devel@lists.xen.org, guijianfeng@cn.fujitsu.com, rshriram@cs.ubc.ca, ian.jackson@eu.citrix.com List-Id: xen-devel@lists.xenproject.org On Mon, 2015-06-08 at 11:45 +0800, Yang Hongyang wrote: > From: Wen Congyang > > Add a new option -c to the command 'xl remus'. If you want > to use COLO HA instead of Remus HA, please use -c option. > > Update man pages to reflect the addition of a new option to > 'xl remus' command. > > Also add a new option -c to the internal command 'xl migrate-receive'. I asked about whether COLO was an extension or a peer to Remus in an earlier patch. the answer may have an impact here too. > @@ -498,6 +501,11 @@ Disable network output buffering. Requires enabling unsafe mode. > > Disable disk replication. Requires enabling unsafe mode. > > +=item B<-c> > + > +Enable COLO HA. It is conflict with B<-i> and B<-b>, and memory "It conflicts with" or "This conflicts with". > +checkpoint compression must be disabled. > + > =back > > =item B I > diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c > index 1145ae4..7df2466 100644 > --- a/tools/libxl/libxl.c > +++ b/tools/libxl/libxl.c > @@ -811,6 +811,22 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info, > goto out; > } > > + /* The caller must set this defbool */ > + if (libxl_defbool_is_default(info->colo)) { > + LOG(ERROR, "colo mode must be enabled/disabled"); As I wondered earlier -- this suggests it should not be a defbool, or that the interfaces should split. > + rc = ERROR_FAIL; > + goto out; > + } > + > + if (libxl_defbool_val(info->colo)) { > + libxl_defbool_setdefault(&info->compression, false); Assuming this isn't invalidated by the above comments, you should make the existing: libxl_defbool_setdefault(&info->compression, true); into libxl_defbool_setdefault(&info->compression, libxl_defbool_val(colo)); and then do an error check later. > + if (libxl_defbool_val(info->compression)) { > + LOG(ERROR, "cannot use memory checkpoint compression in COLO mode"); > + rc = ERROR_FAIL; > + goto out; > + } > + } > + > libxl_defbool_setdefault(&info->allow_unsafe, false); > libxl_defbool_setdefault(&info->blackhole, false); > libxl_defbool_setdefault(&info->compression, true); > diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c > index adfadd1..4bbadd3 100644 > --- a/tools/libxl/xl_cmdimpl.c > +++ b/tools/libxl/xl_cmdimpl.c > @@ -4273,6 +4273,9 @@ static void migrate_receive(int debug, int daemonize, int monitor, > dom_info.send_fd = send_fd; > dom_info.migration_domname_r = &migration_domname; > dom_info.checkpointed_stream = remus; > + if (remus == LIBXL_CHECKPOINTED_STREAM_COLO) > + /* COLO uses stdout to send control message to master */ > + dom_info.quiet = 1; Please set a const char * to either "COLO" or "Remus" here and use it everywhere you've currently got an open coded decision on that. > > rc = create_domain(&dom_info); > if (rc < 0) { > @@ -4287,7 +4290,8 @@ static void migrate_receive(int debug, int daemonize, int monitor, > /* If we are here, it means that the sender (primary) has crashed. > * TODO: Split-Brain Check. > */ > - fprintf(stderr, "migration target: Remus Failover for domain %u\n", > + fprintf(stderr, "migration target: %s Failover for domain %u\n", > + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", > domid); > > /* > @@ -4304,15 +4308,21 @@ static void migrate_receive(int debug, int daemonize, int monitor, > rc = libxl_domain_rename(ctx, domid, migration_domname, > common_domname); > if (rc) > - fprintf(stderr, "migration target (Remus): " > + fprintf(stderr, "migration target (%s): " > "Failed to rename domain from %s to %s:%d\n", > + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", > migration_domname, common_domname, rc); > } > > + if (remus == LIBXL_CHECKPOINTED_STREAM_COLO) > + /* The guest is running after failover in COLO mode */ > + exit(rc ? -ERROR_FAIL: 0); > + > rc = libxl_domain_unpause(ctx, domid); > if (rc) > - fprintf(stderr, "migration target (Remus): " > + fprintf(stderr, "migration target (%s): " > "Failed to unpause domain %s (id: %u):%d\n", > + remus == LIBXL_CHECKPOINTED_STREAM_COLO ? "COLO" : "Remus", > common_domname, domid, rc); > > exit(rc ? -ERROR_FAIL: 0); > @@ -4458,7 +4468,7 @@ int main_migrate_receive(int argc, char **argv) > int debug = 0, daemonize = 1, monitor = 1, remus = 0; > int opt; > > - SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) { > + SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) { > case 'F': > daemonize = 0; > break; > @@ -4470,8 +4480,10 @@ int main_migrate_receive(int argc, char **argv) > debug = 1; > break; > case 'r': > - remus = 1; > + remus = LIBXL_CHECKPOINTED_STREAM_REMUS; > break; > + case 'c': > + remus = LIBXL_CHECKPOINTED_STREAM_COLO; > } > > if (argc-optind != 0) { > @@ -7958,15 +7970,18 @@ int main_remus(int argc, char **argv) > pid_t child = -1; > uint8_t *config_data; > int config_len; > + int interval = 0; > > memset(&r_info, 0, sizeof(libxl_domain_remus_info)); > /* Defaults */ > r_info.interval = 200; > libxl_defbool_setdefault(&r_info.blackhole, false); > + libxl_defbool_setdefault(&r_info.colo, false); > > - SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) { > + SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) { > case 'i': > r_info.interval = atoi(optarg); > + interval = 1; This duplication of r_info.interval and interval seems odd. Perhaps interval is really "interval_was_set" but even so I'm not sure this would be better achieved with some more refactoring. > break; > case 'F': > libxl_defbool_set(&r_info.allow_unsafe, true); > @@ -7992,11 +8007,28 @@ int main_remus(int argc, char **argv) > case 'e': > daemonize = 0; > break; > + case 'c': > + libxl_defbool_set(&r_info.colo, true); > } > > domid = find_domain(argv[optind]); > host = argv[optind + 1]; > > + if (libxl_defbool_val(r_info.colo)) { > + if (!interval) > + r_info.interval = 0; > + > + if (r_info.interval || libxl_defbool_val(r_info.blackhole)) { > + perror("option -c is conflict with -i or -b"); "...-c conflicts with..." > + exit(-1); > + } > + > + if (libxl_defbool_is_default(r_info.compression)) { > + perror("option -u must be specified when using COLO"); Then enforce it using setdefault I think instead of making the user jump through a pointless hoop. Ian.