xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: Wei Liu <wei.liu2@citrix.com>
Cc: Lars Kurth <lars.kurth@citrix.com>,
	Changlong Xie <xiecl.fnst@cn.fujitsu.com>,
	Ian Campbell <ian.campbell@citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	xen devel <xen-devel@lists.xen.org>,
	Dong Eddie <eddie.dong@intel.com>,
	Gui Jianfeng <guijianfeng@cn.fujitsu.com>,
	Shriram Rajagopalan <rshriram@cs.ubc.ca>,
	Yang Hongyang <hongyang.yang@easystack.cn>
Subject: Re: [PATCH v10 22/31] implement the cmdline for COLO
Date: Thu, 3 Mar 2016 09:30:10 +0800	[thread overview]
Message-ID: <56D793A2.2080303@cn.fujitsu.com> (raw)
In-Reply-To: <20160302150348.GC1657@citrix.com>

On 03/02/2016 11:03 PM, Wei Liu wrote:
> On Mon, Feb 22, 2016 at 10:52:26AM +0800, Wen Congyang wrote:
> [...]
>> +    if (libxl_defbool_val(info->colo)) {
>> +        if (libxl_defbool_val(info->compression)) {
> 
> This can be simplified as
> 
>        if (libxl_defbool_val(xxx) && libxl_defbool_val(yyy))

OK. will fix it in the next version.

> 
>> +            LOG(ERROR, "cannot use memory checkpoint compression in COLO mode");
>> +            rc = ERROR_FAIL;
>> +            goto out;
>> +        }
>> +    }
>> +
>>      if (!libxl_defbool_val(info->allow_unsafe) &&
>>          (libxl_defbool_val(info->blackhole) ||
>>           !libxl_defbool_val(info->netbuf) ||
>> @@ -876,7 +892,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
>>      dss->live = 1;
>>      dss->debug = 0;
>>      dss->remus = info;
>> -    dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS;
>> +    if (libxl_defbool_val(info->colo))
>> +        dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_COLO;
>> +    else
>> +        dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS;
>>  
>>      assert(info);
>>  
>> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
>> index df7268b..0dc7220 100644
>> --- a/tools/libxl/xl_cmdimpl.c
>> +++ b/tools/libxl/xl_cmdimpl.c
>> @@ -4440,6 +4440,8 @@ static void migrate_receive(int debug, int daemonize, int monitor,
>>      char rc_buf;
>>      char *migration_domname;
>>      struct domain_create dom_info;
>> +    const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ?
>> +                     "COLO" : "Remus";
>>  
>>      signal(SIGPIPE, SIG_IGN);
>>      /* if we get SIGPIPE we'd rather just have it as an error */
>> @@ -4460,6 +4462,9 @@ static void migrate_receive(int debug, int daemonize, int monitor,
>>      dom_info.send_back_fd = send_fd;
>>      dom_info.migration_domname_r = &migration_domname;
>>      dom_info.checkpointed_stream = checkpointed;
>> +    if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO)
>> +        /* COLO uses stdout to send control message to master */
>> +        dom_info.quiet = 1;
>>  
> 
> It seems that dom_info->quiet affects stderr, not stdout. See the only
> place that checks this in xl_cmdimpl.c.
> 
>>      rc = create_domain(&dom_info);
>>      if (rc < 0) {
>> @@ -4472,11 +4477,12 @@ static void migrate_receive(int debug, int daemonize, int monitor,
>>  
>>      switch (checkpointed) {
>>      case LIBXL_CHECKPOINTED_STREAM_REMUS:
>> +    case LIBXL_CHECKPOINTED_STREAM_COLO:
>>          /* If we are here, it means that the sender (primary) has crashed.
>>           * TODO: Split-Brain Check.
>>           */
>> -        fprintf(stderr, "migration target: Remus Failover for domain %u\n",
>> -                domid);
>> +        fprintf(stderr, "migration target: %s Failover for domain %u\n",
>> +                ha, domid);
>>  
>>          /*
>>           * If domain renaming fails, lets just continue (as we need the domain
>> @@ -4492,16 +4498,20 @@ static void migrate_receive(int debug, int daemonize, int monitor,
>>              rc = libxl_domain_rename(ctx, domid, migration_domname,
>>                                       common_domname);
>>              if (rc)
>> -                fprintf(stderr, "migration target (Remus): "
>> +                fprintf(stderr, "migration target (%s): "
>>                          "Failed to rename domain from %s to %s:%d\n",
>> -                        migration_domname, common_domname, rc);
>> +                        ha, migration_domname, common_domname, rc);
>>          }
>>  
>> +        if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO)
>> +            /* The guest is running after failover in COLO mode */
>> +            exit(rc ? -ERROR_FAIL: 0);
>> +
>>          rc = libxl_domain_unpause(ctx, domid);
>>          if (rc)
>> -            fprintf(stderr, "migration target (Remus): "
>> +            fprintf(stderr, "migration target (%s): "
>>                      "Failed to unpause domain %s (id: %u):%d\n",
>> -                    common_domname, domid, rc);
>> +                    ha, common_domname, domid, rc);
>>  
>>          exit(rc ? -ERROR_FAIL: 0);
>>      default:
>> @@ -4649,7 +4659,7 @@ int main_migrate_receive(int argc, char **argv)
>>      libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE;
>>      int opt;
>>  
>> -    SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) {
>> +    SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) {
>>      case 'F':
>>          daemonize = 0;
>>          break;
>> @@ -4663,6 +4673,9 @@ int main_migrate_receive(int argc, char **argv)
>>      case 'r':
>>          checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS;
>>          break;
>> +    case 'c':
>> +        checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO;
>> +        break;
>>      }
>>  
>>      if (argc-optind != 0) {
>> @@ -8032,11 +8045,8 @@ int main_remus(int argc, char **argv)
>>      int config_len;
>>  
>>      memset(&r_info, 0, sizeof(libxl_domain_remus_info));
>> -    /* Defaults */
>> -    r_info.interval = 200;
>> -    libxl_defbool_setdefault(&r_info.blackhole, false);
>>  
>> -    SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) {
>> +    SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) {
>>      case 'i':
>>          r_info.interval = atoi(optarg);
>>          break;
>> @@ -8064,11 +8074,32 @@ int main_remus(int argc, char **argv)
>>      case 'e':
>>          daemonize = 0;
>>          break;
>> +    case 'c':
>> +        libxl_defbool_set(&r_info.colo, true);
>>      }
>>  
>>      domid = find_domain(argv[optind]);
>>      host = argv[optind + 1];
>>  
>> +    /* Defaults */
>> +    libxl_defbool_setdefault(&r_info.blackhole, false);
>> +    libxl_defbool_setdefault(&r_info.colo, false);
>> +    if (!libxl_defbool_val(r_info.colo) && !r_info.interval)
>> +        r_info.interval = 200;
>> +
>> +    if (libxl_defbool_val(r_info.colo)) {
>> +        if (r_info.interval || libxl_defbool_val(r_info.blackhole)) {
>> +            perror("Option -c conflicts with -i or -b");
>> +            exit(-1);
>> +        }
>> +
>> +        if (libxl_defbool_is_default(r_info.compression)) {
>> +            perror("COLO can't be used with memory compression. "
>> +                   "Disable memory checkpoint compression now...");
>> +            libxl_defbool_set(&r_info.compression, false);
>> +        }
>> +    }
>> +
> 
> I don't think I'm entirely happy with how these things are arranged.
> Remus and COLO don't seem to have a set of consistent APIs that
> arbitrary users can call.
> 
> But for the sake of not growing this series any longer let's leave it
> like this for the moment. I think COLO at best is going to be (as you
> stated in manpage) experimental at this stage.

Yes, it is experimental now.

Thanks
Wen Congyang

> 
> 
>>      if (!r_info.netbufscript)
>>          r_info.netbufscript = default_remus_netbufscript;
>>  
>> @@ -8083,8 +8114,9 @@ int main_remus(int argc, char **argv)
>>          if (!ssh_command[0]) {
>>              rune = host;
>>          } else {
>> -            xasprintf(&rune, "exec %s %s xl migrate-receive -r %s",
>> +            xasprintf(&rune, "exec %s %s xl migrate-receive %s %s",
>>                        ssh_command, host,
>> +                      libxl_defbool_val(r_info.colo) ? "-c" : "-r",
>>                        daemonize ? "" : " -e");
>>          }
>>  
>> @@ -8112,7 +8144,8 @@ int main_remus(int argc, char **argv)
>>       * domain to force failover
>>       */
>>      if (libxl_domain_info(ctx, 0, domid)) {
>> -        fprintf(stderr, "Remus: Primary domain has been destroyed.\n");
>> +        fprintf(stderr, "%s: Primary domain has been destroyed.\n",
>> +                libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
>>          close(send_fd);
>>          return 0;
>>      }
>> @@ -8124,7 +8157,8 @@ int main_remus(int argc, char **argv)
>>      if (rc == ERROR_GUEST_TIMEDOUT)
>>          fprintf(stderr, "Failed to suspend domain at primary.\n");
>>      else {
>> -        fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n");
>> +        fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n",
>> +                libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
>>          libxl_domain_resume(ctx, domid, 1, 0);
>>      }
>>  
>> diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c
>> index fdc1ac6..b6b630c 100644
>> --- a/tools/libxl/xl_cmdtable.c
>> +++ b/tools/libxl/xl_cmdtable.c
>> @@ -499,7 +499,9 @@ struct cmd_spec cmd_table[] = {
>>        "-b                      Replicate memory checkpoints to /dev/null (blackhole).\n"
>>        "                        Works only in unsafe mode.\n"
>>        "-n                      Disable network output buffering. Works only in unsafe mode.\n"
>> -      "-d                      Disable disk replication. Works only in unsafe mode."
>> +      "-d                      Disable disk replication. Works only in unsafe mode.\n"
>> +      "-c                      Enable COLO HA. It is conflict with -i and -b, and memory\n"
>> +      "                        checkpoint must be disabled"
>>      },
>>  #endif
>>      { "devd",
>> -- 
>> 2.5.0
>>
>>
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xen.org
>> http://lists.xen.org/xen-devel
> 
> 
> .
> 




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

  reply	other threads:[~2016-03-03  1:30 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-22  2:52 [PATCH v10 00/31] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2016-02-22  2:52 ` [PATCH v10 01/31] tools/libxl: introduce libxl__domain_restore_device_model to load qemu state Wen Congyang
2016-02-25 15:53   ` Wei Liu
2016-02-26  1:55     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 02/31] tools/libxl: introduce libxl__domain_common_switch_qemu_logdirty() Wen Congyang
2016-02-22  2:52 ` [PATCH v10 03/31] tools/libxl: Add back channel to allow migration target send data back Wen Congyang
2016-02-22  2:52 ` [PATCH v10 04/31] tools/libxl: Introduce new helper function dup_fd_helper() Wen Congyang
2016-02-25 15:53   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 05/31] tools/libx{l, c}: add back channel to libxc Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 06/31] docs: add colo readme Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 07/31] docs/libxl: Introduce CHECKPOINT_CONTEXT to support migration v2 colo streams Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-26  1:59     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 08/31] libxc/migration: Specification update for DIRTY_PFN_LIST records Wen Congyang
2016-02-22  2:52 ` [PATCH v10 09/31] libxc/migration: export read_record for common use Wen Congyang
2016-02-22  2:52 ` [PATCH v10 10/31] tools/libxl: add back channel support to write stream Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-26  2:11     ` Wen Congyang
2016-03-02 15:02       ` Wei Liu
2016-03-03  1:25         ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 11/31] tools/libxl: write checkpoint_state records into the stream Wen Congyang
2016-02-22  2:52 ` [PATCH v10 12/31] tools/libxl: add back channel support to read stream Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-26  2:16     ` Wen Congyang
2016-03-02 15:03       ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 13/31] tools/libxl: handle checkpoint_state records in a libxl migration v2 " Wen Congyang
2016-02-22  2:52 ` [PATCH v10 14/31] tools/libx{l, c}: introduce wait_checkpoint callback Wen Congyang
2016-02-22  2:52 ` [PATCH v10 15/31] tools/libx{l, c}: add postcopy/suspend callback to restore side Wen Congyang
2016-02-22  2:52 ` [PATCH v10 16/31] secondary vm suspend/resume/checkpoint code Wen Congyang
2016-02-25 15:56   ` Wei Liu
2016-02-26  2:30     ` Wen Congyang
2016-03-01 10:06     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 17/31] primary " Wen Congyang
2016-02-25 15:57   ` Wei Liu
2016-02-26  2:32     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 18/31] libxc/restore: support COLO restore Wen Congyang
2016-02-25 15:57   ` Wei Liu
2016-02-26  2:33     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 19/31] libxc/restore: send dirty pfn list to primary when checkpoint under colo Wen Congyang
2016-02-22  2:52 ` [PATCH v10 20/31] send store gfn and console gfn to xl before resuming secondary vm Wen Congyang
2016-02-22  2:52 ` [PATCH v10 21/31] libxc/save: support COLO save Wen Congyang
2016-02-25 15:58   ` Wei Liu
2016-02-26  2:35     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 22/31] implement the cmdline for COLO Wen Congyang
2016-03-02 15:03   ` Wei Liu
2016-03-03  1:30     ` Wen Congyang [this message]
2016-02-22  2:52 ` [PATCH v10 23/31] COLO: introduce new API to prepare/start/do/get_error/stop replication Wen Congyang
2016-03-02 15:03   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 24/31] Support colo mode for qemu disk Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-03-03  1:40     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 25/31] COLO: use qemu block replication Wen Congyang
2016-03-02 15:03   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 26/31] COLO proxy: implement setup/teardown of COLO proxy module Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-03-11 22:25   ` Konrad Rzeszutek Wilk
2016-03-14  9:13     ` Wen Congyang
2016-03-22  3:40       ` Changlong Xie
2016-02-22  2:52 ` [PATCH v10 27/31] COLO proxy: preresume, postresume and checkpoint Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 28/31] COLO nic: implement COLO nic subkind Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 29/31] setup and control colo proxy on primary side Wen Congyang
2016-02-22  2:52 ` [PATCH v10 30/31] setup and control colo proxy on secondary side Wen Congyang
2016-02-22  2:52 ` [PATCH v10 31/31] cmdline switches and config vars to control colo-proxy Wen Congyang
2016-03-02 15:05   ` Wei Liu
2016-03-03  1:41     ` Wen Congyang
2016-02-25 16:05 ` [PATCH v10 00/31] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56D793A2.2080303@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=hongyang.yang@easystack.cn \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=lars.kurth@citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    --cc=xiecl.fnst@cn.fujitsu.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).