All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: Wei Liu <wei.liu2@citrix.com>
Cc: Lars Kurth <lars.kurth@citrix.com>,
	Changlong Xie <xiecl.fnst@cn.fujitsu.com>,
	Ian Campbell <ian.campbell@citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	xen devel <xen-devel@lists.xen.org>,
	Dong Eddie <eddie.dong@intel.com>,
	Gui Jianfeng <guijianfeng@cn.fujitsu.com>,
	Shriram Rajagopalan <rshriram@cs.ubc.ca>,
	Yang Hongyang <hongyang.yang@easystack.cn>
Subject: Re: [PATCH v10 22/31] implement the cmdline for COLO
Date: Thu, 3 Mar 2016 09:30:10 +0800	[thread overview]
Message-ID: <56D793A2.2080303@cn.fujitsu.com> (raw)
In-Reply-To: <20160302150348.GC1657@citrix.com>

On 03/02/2016 11:03 PM, Wei Liu wrote:
> On Mon, Feb 22, 2016 at 10:52:26AM +0800, Wen Congyang wrote:
> [...]
>> +    if (libxl_defbool_val(info->colo)) {
>> +        if (libxl_defbool_val(info->compression)) {
> 
> This can be simplified as
> 
>        if (libxl_defbool_val(xxx) && libxl_defbool_val(yyy))

OK. will fix it in the next version.

> 
>> +            LOG(ERROR, "cannot use memory checkpoint compression in COLO mode");
>> +            rc = ERROR_FAIL;
>> +            goto out;
>> +        }
>> +    }
>> +
>>      if (!libxl_defbool_val(info->allow_unsafe) &&
>>          (libxl_defbool_val(info->blackhole) ||
>>           !libxl_defbool_val(info->netbuf) ||
>> @@ -876,7 +892,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
>>      dss->live = 1;
>>      dss->debug = 0;
>>      dss->remus = info;
>> -    dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS;
>> +    if (libxl_defbool_val(info->colo))
>> +        dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_COLO;
>> +    else
>> +        dss->checkpointed_stream = LIBXL_CHECKPOINTED_STREAM_REMUS;
>>  
>>      assert(info);
>>  
>> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
>> index df7268b..0dc7220 100644
>> --- a/tools/libxl/xl_cmdimpl.c
>> +++ b/tools/libxl/xl_cmdimpl.c
>> @@ -4440,6 +4440,8 @@ static void migrate_receive(int debug, int daemonize, int monitor,
>>      char rc_buf;
>>      char *migration_domname;
>>      struct domain_create dom_info;
>> +    const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ?
>> +                     "COLO" : "Remus";
>>  
>>      signal(SIGPIPE, SIG_IGN);
>>      /* if we get SIGPIPE we'd rather just have it as an error */
>> @@ -4460,6 +4462,9 @@ static void migrate_receive(int debug, int daemonize, int monitor,
>>      dom_info.send_back_fd = send_fd;
>>      dom_info.migration_domname_r = &migration_domname;
>>      dom_info.checkpointed_stream = checkpointed;
>> +    if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO)
>> +        /* COLO uses stdout to send control message to master */
>> +        dom_info.quiet = 1;
>>  
> 
> It seems that dom_info->quiet affects stderr, not stdout. See the only
> place that checks this in xl_cmdimpl.c.
> 
>>      rc = create_domain(&dom_info);
>>      if (rc < 0) {
>> @@ -4472,11 +4477,12 @@ static void migrate_receive(int debug, int daemonize, int monitor,
>>  
>>      switch (checkpointed) {
>>      case LIBXL_CHECKPOINTED_STREAM_REMUS:
>> +    case LIBXL_CHECKPOINTED_STREAM_COLO:
>>          /* If we are here, it means that the sender (primary) has crashed.
>>           * TODO: Split-Brain Check.
>>           */
>> -        fprintf(stderr, "migration target: Remus Failover for domain %u\n",
>> -                domid);
>> +        fprintf(stderr, "migration target: %s Failover for domain %u\n",
>> +                ha, domid);
>>  
>>          /*
>>           * If domain renaming fails, lets just continue (as we need the domain
>> @@ -4492,16 +4498,20 @@ static void migrate_receive(int debug, int daemonize, int monitor,
>>              rc = libxl_domain_rename(ctx, domid, migration_domname,
>>                                       common_domname);
>>              if (rc)
>> -                fprintf(stderr, "migration target (Remus): "
>> +                fprintf(stderr, "migration target (%s): "
>>                          "Failed to rename domain from %s to %s:%d\n",
>> -                        migration_domname, common_domname, rc);
>> +                        ha, migration_domname, common_domname, rc);
>>          }
>>  
>> +        if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO)
>> +            /* The guest is running after failover in COLO mode */
>> +            exit(rc ? -ERROR_FAIL: 0);
>> +
>>          rc = libxl_domain_unpause(ctx, domid);
>>          if (rc)
>> -            fprintf(stderr, "migration target (Remus): "
>> +            fprintf(stderr, "migration target (%s): "
>>                      "Failed to unpause domain %s (id: %u):%d\n",
>> -                    common_domname, domid, rc);
>> +                    ha, common_domname, domid, rc);
>>  
>>          exit(rc ? -ERROR_FAIL: 0);
>>      default:
>> @@ -4649,7 +4659,7 @@ int main_migrate_receive(int argc, char **argv)
>>      libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE;
>>      int opt;
>>  
>> -    SWITCH_FOREACH_OPT(opt, "Fedr", NULL, "migrate-receive", 0) {
>> +    SWITCH_FOREACH_OPT(opt, "Fedrc", NULL, "migrate-receive", 0) {
>>      case 'F':
>>          daemonize = 0;
>>          break;
>> @@ -4663,6 +4673,9 @@ int main_migrate_receive(int argc, char **argv)
>>      case 'r':
>>          checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS;
>>          break;
>> +    case 'c':
>> +        checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO;
>> +        break;
>>      }
>>  
>>      if (argc-optind != 0) {
>> @@ -8032,11 +8045,8 @@ int main_remus(int argc, char **argv)
>>      int config_len;
>>  
>>      memset(&r_info, 0, sizeof(libxl_domain_remus_info));
>> -    /* Defaults */
>> -    r_info.interval = 200;
>> -    libxl_defbool_setdefault(&r_info.blackhole, false);
>>  
>> -    SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:e", NULL, "remus", 2) {
>> +    SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ec", NULL, "remus", 2) {
>>      case 'i':
>>          r_info.interval = atoi(optarg);
>>          break;
>> @@ -8064,11 +8074,32 @@ int main_remus(int argc, char **argv)
>>      case 'e':
>>          daemonize = 0;
>>          break;
>> +    case 'c':
>> +        libxl_defbool_set(&r_info.colo, true);
>>      }
>>  
>>      domid = find_domain(argv[optind]);
>>      host = argv[optind + 1];
>>  
>> +    /* Defaults */
>> +    libxl_defbool_setdefault(&r_info.blackhole, false);
>> +    libxl_defbool_setdefault(&r_info.colo, false);
>> +    if (!libxl_defbool_val(r_info.colo) && !r_info.interval)
>> +        r_info.interval = 200;
>> +
>> +    if (libxl_defbool_val(r_info.colo)) {
>> +        if (r_info.interval || libxl_defbool_val(r_info.blackhole)) {
>> +            perror("Option -c conflicts with -i or -b");
>> +            exit(-1);
>> +        }
>> +
>> +        if (libxl_defbool_is_default(r_info.compression)) {
>> +            perror("COLO can't be used with memory compression. "
>> +                   "Disable memory checkpoint compression now...");
>> +            libxl_defbool_set(&r_info.compression, false);
>> +        }
>> +    }
>> +
> 
> I don't think I'm entirely happy with how these things are arranged.
> Remus and COLO don't seem to have a set of consistent APIs that
> arbitrary users can call.
> 
> But for the sake of not growing this series any longer let's leave it
> like this for the moment. I think COLO at best is going to be (as you
> stated in manpage) experimental at this stage.

Yes, it is experimental now.

Thanks
Wen Congyang

> 
> 
>>      if (!r_info.netbufscript)
>>          r_info.netbufscript = default_remus_netbufscript;
>>  
>> @@ -8083,8 +8114,9 @@ int main_remus(int argc, char **argv)
>>          if (!ssh_command[0]) {
>>              rune = host;
>>          } else {
>> -            xasprintf(&rune, "exec %s %s xl migrate-receive -r %s",
>> +            xasprintf(&rune, "exec %s %s xl migrate-receive %s %s",
>>                        ssh_command, host,
>> +                      libxl_defbool_val(r_info.colo) ? "-c" : "-r",
>>                        daemonize ? "" : " -e");
>>          }
>>  
>> @@ -8112,7 +8144,8 @@ int main_remus(int argc, char **argv)
>>       * domain to force failover
>>       */
>>      if (libxl_domain_info(ctx, 0, domid)) {
>> -        fprintf(stderr, "Remus: Primary domain has been destroyed.\n");
>> +        fprintf(stderr, "%s: Primary domain has been destroyed.\n",
>> +                libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
>>          close(send_fd);
>>          return 0;
>>      }
>> @@ -8124,7 +8157,8 @@ int main_remus(int argc, char **argv)
>>      if (rc == ERROR_GUEST_TIMEDOUT)
>>          fprintf(stderr, "Failed to suspend domain at primary.\n");
>>      else {
>> -        fprintf(stderr, "Remus: Backup failed? resuming domain at primary.\n");
>> +        fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n",
>> +                libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
>>          libxl_domain_resume(ctx, domid, 1, 0);
>>      }
>>  
>> diff --git a/tools/libxl/xl_cmdtable.c b/tools/libxl/xl_cmdtable.c
>> index fdc1ac6..b6b630c 100644
>> --- a/tools/libxl/xl_cmdtable.c
>> +++ b/tools/libxl/xl_cmdtable.c
>> @@ -499,7 +499,9 @@ struct cmd_spec cmd_table[] = {
>>        "-b                      Replicate memory checkpoints to /dev/null (blackhole).\n"
>>        "                        Works only in unsafe mode.\n"
>>        "-n                      Disable network output buffering. Works only in unsafe mode.\n"
>> -      "-d                      Disable disk replication. Works only in unsafe mode."
>> +      "-d                      Disable disk replication. Works only in unsafe mode.\n"
>> +      "-c                      Enable COLO HA. It is conflict with -i and -b, and memory\n"
>> +      "                        checkpoint must be disabled"
>>      },
>>  #endif
>>      { "devd",
>> -- 
>> 2.5.0
>>
>>
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xen.org
>> http://lists.xen.org/xen-devel
> 
> 
> .
> 




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

  reply	other threads:[~2016-03-03  1:30 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-22  2:52 [PATCH v10 00/31] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2016-02-22  2:52 ` [PATCH v10 01/31] tools/libxl: introduce libxl__domain_restore_device_model to load qemu state Wen Congyang
2016-02-25 15:53   ` Wei Liu
2016-02-26  1:55     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 02/31] tools/libxl: introduce libxl__domain_common_switch_qemu_logdirty() Wen Congyang
2016-02-22  2:52 ` [PATCH v10 03/31] tools/libxl: Add back channel to allow migration target send data back Wen Congyang
2016-02-22  2:52 ` [PATCH v10 04/31] tools/libxl: Introduce new helper function dup_fd_helper() Wen Congyang
2016-02-25 15:53   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 05/31] tools/libx{l, c}: add back channel to libxc Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 06/31] docs: add colo readme Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 07/31] docs/libxl: Introduce CHECKPOINT_CONTEXT to support migration v2 colo streams Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-26  1:59     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 08/31] libxc/migration: Specification update for DIRTY_PFN_LIST records Wen Congyang
2016-02-22  2:52 ` [PATCH v10 09/31] libxc/migration: export read_record for common use Wen Congyang
2016-02-22  2:52 ` [PATCH v10 10/31] tools/libxl: add back channel support to write stream Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-26  2:11     ` Wen Congyang
2016-03-02 15:02       ` Wei Liu
2016-03-03  1:25         ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 11/31] tools/libxl: write checkpoint_state records into the stream Wen Congyang
2016-02-22  2:52 ` [PATCH v10 12/31] tools/libxl: add back channel support to read stream Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-26  2:16     ` Wen Congyang
2016-03-02 15:03       ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 13/31] tools/libxl: handle checkpoint_state records in a libxl migration v2 " Wen Congyang
2016-02-22  2:52 ` [PATCH v10 14/31] tools/libx{l, c}: introduce wait_checkpoint callback Wen Congyang
2016-02-22  2:52 ` [PATCH v10 15/31] tools/libx{l, c}: add postcopy/suspend callback to restore side Wen Congyang
2016-02-22  2:52 ` [PATCH v10 16/31] secondary vm suspend/resume/checkpoint code Wen Congyang
2016-02-25 15:56   ` Wei Liu
2016-02-26  2:30     ` Wen Congyang
2016-03-01 10:06     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 17/31] primary " Wen Congyang
2016-02-25 15:57   ` Wei Liu
2016-02-26  2:32     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 18/31] libxc/restore: support COLO restore Wen Congyang
2016-02-25 15:57   ` Wei Liu
2016-02-26  2:33     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 19/31] libxc/restore: send dirty pfn list to primary when checkpoint under colo Wen Congyang
2016-02-22  2:52 ` [PATCH v10 20/31] send store gfn and console gfn to xl before resuming secondary vm Wen Congyang
2016-02-22  2:52 ` [PATCH v10 21/31] libxc/save: support COLO save Wen Congyang
2016-02-25 15:58   ` Wei Liu
2016-02-26  2:35     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 22/31] implement the cmdline for COLO Wen Congyang
2016-03-02 15:03   ` Wei Liu
2016-03-03  1:30     ` Wen Congyang [this message]
2016-02-22  2:52 ` [PATCH v10 23/31] COLO: introduce new API to prepare/start/do/get_error/stop replication Wen Congyang
2016-03-02 15:03   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 24/31] Support colo mode for qemu disk Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-03-03  1:40     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 25/31] COLO: use qemu block replication Wen Congyang
2016-03-02 15:03   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 26/31] COLO proxy: implement setup/teardown of COLO proxy module Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-03-11 22:25   ` Konrad Rzeszutek Wilk
2016-03-14  9:13     ` Wen Congyang
2016-03-22  3:40       ` Changlong Xie
2016-02-22  2:52 ` [PATCH v10 27/31] COLO proxy: preresume, postresume and checkpoint Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 28/31] COLO nic: implement COLO nic subkind Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 29/31] setup and control colo proxy on primary side Wen Congyang
2016-02-22  2:52 ` [PATCH v10 30/31] setup and control colo proxy on secondary side Wen Congyang
2016-02-22  2:52 ` [PATCH v10 31/31] cmdline switches and config vars to control colo-proxy Wen Congyang
2016-03-02 15:05   ` Wei Liu
2016-03-03  1:41     ` Wen Congyang
2016-02-25 16:05 ` [PATCH v10 00/31] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56D793A2.2080303@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=hongyang.yang@easystack.cn \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=lars.kurth@citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    --cc=xiecl.fnst@cn.fujitsu.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.