From: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>,
Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
Andrew Cooper <andrew.cooper3@citrix.com>,
Ian Campbell <ian.campbell@citrix.com>,
Ian Jackson <ian.jackson@eu.citrix.com>,
Wei Liu <wei.liu2@citrix.com>
Cc: Lars Kurth <lars.kurth@citrix.com>,
Changlong Xie <xiecl.fnst@cn.fujitsu.com>,
Wen Congyang <wency@cn.fujitsu.com>,
Li Zhijian <lizhijian@cn.fujitsu.com>,
Gui Jianfeng <guijianfeng@cn.fujitsu.com>,
Jiang Yunhong <yunhong.jiang@intel.com>,
Dong Eddie <eddie.dong@intel.com>,
Anthony Perard <anthony.perard@citrix.com>,
Shriram Rajagopalan <rshriram@cs.ubc.ca>,
Yang Hongyang <hongyang.yang@easystack.cn>
Subject: [PATCH v12 14/26] primary vm suspend/resume/checkpoint code
Date: Wed, 23 Mar 2016 16:06:28 +0800 [thread overview]
Message-ID: <1458720400-4699-15-git-send-email-xiecl.fnst@cn.fujitsu.com> (raw)
In-Reply-To: <1458720400-4699-1-git-send-email-xiecl.fnst@cn.fujitsu.com>
From: Wen Congyang <wency@cn.fujitsu.com>
We will do the following things again and again:
1. Suspend primary vm
a. Suspend primary vm
b. do postsuspend
c. Read CHECKPOINT_SVM_SUSPENDED sent by secondary
2. Checkpoint
a. Write emulator xenstore data and emulator context
b. Write checkpoint end record
3. Resume primary vm
a. Read CHECKPOINT_SVM_READY from slave
b. Do presume
c. Resume primary vm
d. Read CHECKPOINT_SVM_RESUMED from slave
4. Wait a new checkpoint
a. Wait a new checkpoint(not implemented)
b. Send CHECKPOINT_NEW to slave
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
---
tools/libxc/include/xenguest.h | 9 +
tools/libxl/Makefile | 2 +-
tools/libxl/libxl.c | 5 +-
tools/libxl/libxl_colo.h | 6 +
tools/libxl/libxl_colo_save.c | 566 +++++++++++++++++++++++++++++++++++++
tools/libxl/libxl_dom_save.c | 7 +-
tools/libxl/libxl_internal.h | 25 +-
tools/libxl/libxl_save_msgs_gen.pl | 2 +-
tools/libxl/libxl_types.idl | 1 +
9 files changed, 614 insertions(+), 9 deletions(-)
create mode 100644 tools/libxl/libxl_colo_save.c
diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
index 3193d0f..8ea5a3c 100644
--- a/tools/libxc/include/xenguest.h
+++ b/tools/libxc/include/xenguest.h
@@ -68,6 +68,15 @@ struct save_callbacks {
* 1: take another checkpoint */
int (*checkpoint)(void* data);
+ /*
+ * Called after the checkpoint callback.
+ *
+ * returns:
+ * 0: terminate checkpointing gracefully
+ * 1: take another checkpoint
+ */
+ int (*wait_checkpoint)(void* data);
+
/* Enable qemu-dm logging dirty pages to xen */
int (*switch_qemu_logdirty)(int domid, unsigned enable, void *data); /* HVM only */
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index d8612eb..2016393 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -65,7 +65,7 @@ LIBXL_OBJS-y += libxl_no_convert_callout.o
endif
LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
-LIBXL_OBJS-y += libxl_colo_restore.o
+LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 83a419e..22734b4 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -880,7 +880,10 @@ int libxl_domain_remus_start(libxl_ctx *ctx, libxl_domain_remus_info *info,
assert(info);
/* Point of no return */
- libxl__remus_setup(egc, &dss->rs);
+ if (libxl_defbool_val(info->colo))
+ libxl__colo_save_setup(egc, &dss->css);
+ else
+ libxl__remus_setup(egc, &dss->rs);
return AO_INPROGRESS;
out:
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index f2b98cc..feec7f1 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -18,6 +18,7 @@
struct libxl__ao;
struct libxl__egc;
+struct libxl__colo_save_state;
enum {
LIBXL_COLO_SETUPED,
@@ -52,4 +53,9 @@ extern void libxl__colo_restore_setup(struct libxl__egc *egc,
libxl__colo_restore_state *crs);
extern void libxl__colo_restore_teardown(struct libxl__egc *egc, void *dcs_void,
int ret, int retval, int errnoval);
+extern void libxl__colo_save_setup(struct libxl__egc *egc,
+ struct libxl__colo_save_state *css);
+extern void libxl__colo_save_teardown(struct libxl__egc *egc,
+ struct libxl__colo_save_state *css,
+ int rc);
#endif
diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c
new file mode 100644
index 0000000..f25e06b
--- /dev/null
+++ b/tools/libxl/libxl_colo_save.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (C) 2016 FUJITSU LIMITED
+ * Author: Wen Congyang <wency@cn.fujitsu.com>
+ * Yang Hongyang <hongyang.yang@easystack.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+static const libxl__checkpoint_device_instance_ops *colo_ops[] = {
+ NULL,
+};
+
+/* ================= helper functions ================= */
+
+static int init_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+ /* init device subkind-specific state in the libxl ctx */
+ int rc;
+ STATE_AO_GC(cds->ao);
+
+ rc = 0;
+ return rc;
+}
+
+static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds)
+{
+ /* cleanup device subkind-specific state in the libxl ctx */
+ STATE_AO_GC(cds->ao);
+}
+
+/* ================= colo: setup save environment ================= */
+
+static void colo_save_setup_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+static void colo_save_setup_failed(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+/*
+ * checkpoint callbacks are called in the following order:
+ * 1. suspend
+ * 2. checkpoint
+ * 3. resume
+ * 4. wait checkpoint
+ */
+static void libxl__colo_save_domain_suspend_callback(void *data);
+static void libxl__colo_save_domain_checkpoint_callback(void *data);
+static void libxl__colo_save_domain_resume_callback(void *data);
+static void libxl__colo_save_domain_wait_checkpoint_callback(void *data);
+
+void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css)
+{
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ /* Convenience aliases */
+ libxl__checkpoint_devices_state *const cds = &dss->cds;
+ libxl__srm_save_autogen_callbacks *const callbacks =
+ &dss->sws.shs.callbacks.save.a;
+
+ STATE_AO_GC(dss->ao);
+
+ if (dss->type != LIBXL_DOMAIN_TYPE_HVM) {
+ LOG(ERROR, "COLO only supports hvm now");
+ goto out;
+ }
+
+ css->send_fd = dss->fd;
+ css->recv_fd = dss->recv_fd;
+ css->svm_running = false;
+
+ /* TODO: disk/nic support */
+ cds->device_kind_flags = 0;
+ cds->ops = colo_ops;
+ cds->callback = colo_save_setup_done;
+ cds->ao = ao;
+ cds->domid = dss->domid;
+ cds->concrete_data = css;
+
+ css->srs.ao = ao;
+ css->srs.fd = css->recv_fd;
+ css->srs.back_channel = true;
+ libxl__stream_read_start(egc, &css->srs);
+
+ if (init_device_subkind(cds))
+ goto out;
+
+ callbacks->suspend = libxl__colo_save_domain_suspend_callback;
+ callbacks->checkpoint = libxl__colo_save_domain_checkpoint_callback;
+ callbacks->postcopy = libxl__colo_save_domain_resume_callback;
+ callbacks->wait_checkpoint = libxl__colo_save_domain_wait_checkpoint_callback;
+
+ libxl__checkpoint_devices_setup(egc, &dss->cds);
+
+ return;
+
+out:
+ libxl__ao_complete(egc, ao, ERROR_FAIL);
+}
+
+static void colo_save_setup_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = cds->concrete_data;
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+ EGC_GC;
+
+ if (!rc) {
+ libxl__domain_save(egc, dss);
+ return;
+ }
+
+ LOG(ERROR, "COLO: failed to setup device for guest with domid %u",
+ dss->domid);
+ cds->callback = colo_save_setup_failed;
+ libxl__checkpoint_devices_teardown(egc, cds);
+}
+
+static void colo_save_setup_failed(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ STATE_AO_GC(cds->ao);
+
+ if (rc)
+ LOG(ERROR, "COLO: failed to teardown device after setup failed"
+ " for guest with domid %u, rc %d", cds->domid, rc);
+
+ cleanup_device_subkind(cds);
+ libxl__ao_complete(egc, ao, rc);
+}
+
+/* ================= colo: teardown save environment ================= */
+
+static void colo_teardown_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+
+void libxl__colo_save_teardown(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int rc)
+{
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ EGC_GC;
+
+ LOG(WARN, "COLO: Domain suspend terminated with rc %d,"
+ " teardown COLO devices...", rc);
+
+ libxl__stream_read_abort(egc, &css->srs, 1);
+
+ dss->cds.callback = colo_teardown_done;
+ libxl__checkpoint_devices_teardown(egc, &dss->cds);
+ return;
+}
+
+static void colo_teardown_done(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = cds->concrete_data;
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ cleanup_device_subkind(cds);
+ dss->callback(egc, dss, rc);
+}
+
+static void colo_common_write_stream_done(libxl__egc *egc,
+ libxl__stream_write_state *stream,
+ int rc);
+static void colo_common_read_stream_done(libxl__egc *egc,
+ libxl__stream_read_state *stream,
+ int rc);
+
+/* ===================== colo: suspend primary vm ===================== */
+
+static void colo_read_svm_suspended_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id);
+/*
+ * Do the following things when suspending primary vm:
+ * 1. suspend primary vm
+ * 2. do postsuspend
+ * 3. read CHECKPOINT_SVM_SUSPENDED
+ * 4. read secondary vm's dirty pages
+ */
+static void colo_suspend_primary_vm_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dsps,
+ int ok);
+static void colo_postsuspend_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+
+static void libxl__colo_save_domain_suspend_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__egc *egc = shs->egc;
+ libxl__stream_write_state *sws = CONTAINER_OF(shs, *sws, shs);
+ libxl__domain_save_state *dss = sws->dss;
+
+ /* Convenience aliases */
+ libxl__domain_suspend_state *dsps = &dss->dsps;
+
+ dsps->callback_common_done = colo_suspend_primary_vm_done;
+ libxl__domain_suspend(egc, dsps);
+}
+
+static void colo_suspend_primary_vm_done(libxl__egc *egc,
+ libxl__domain_suspend_state *dsps,
+ int rc)
+{
+ libxl__domain_save_state *dss = CONTAINER_OF(dsps, *dss, dsps);
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "cannot suspend primary vm");
+ goto out;
+ }
+
+ /* Convenience aliases */
+ libxl__checkpoint_devices_state *const cds = &dss->cds;
+
+ cds->callback = colo_postsuspend_cb;
+ libxl__checkpoint_devices_postsuspend(egc, cds);
+ return;
+
+out:
+ dss->rc = rc;
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, !rc);
+}
+
+static void colo_postsuspend_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = cds->concrete_data;
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "postsuspend fails");
+ goto out;
+ }
+
+ if (!css->svm_running) {
+ rc = 0;
+ goto out;
+ }
+
+ /*
+ * read CHECKPOINT_SVM_SUSPENDED
+ */
+ css->callback = colo_read_svm_suspended_done;
+ css->srs.checkpoint_callback = colo_common_read_stream_done;
+ libxl__stream_read_checkpoint_state(egc, &css->srs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, !rc);
+}
+
+static void colo_read_svm_suspended_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id)
+{
+ int ok = 0;
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ EGC_GC;
+
+ if (id != CHECKPOINT_SVM_SUSPENDED) {
+ LOG(ERROR, "invalid section: %d, expected: %d", id,
+ CHECKPOINT_SVM_SUSPENDED);
+ goto out;
+ }
+
+ ok = 1;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, ok);
+}
+
+/* ===================== colo: send tailbuf ========================== */
+
+static void libxl__colo_save_domain_checkpoint_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__stream_write_state *sws = CONTAINER_OF(shs, *sws, shs);
+ libxl__domain_save_state *dss = sws->dss;
+
+ /* Convenience aliases */
+ libxl__colo_save_state *const css = &dss->css;
+
+ /* write emulator xenstore data, emulator context, and checkpoint end */
+ css->callback = NULL;
+ dss->sws.checkpoint_callback = colo_common_write_stream_done;
+ libxl__stream_write_start_checkpoint(shs->egc, &dss->sws);
+}
+
+/* ===================== colo: resume primary vm ===================== */
+
+/*
+ * Do the following things when resuming primary vm:
+ * 1. read CHECKPOINT_SVM_READY
+ * 2. do preresume
+ * 3. resume primary vm
+ * 4. read CHECKPOINT_SVM_RESUMED
+ */
+static void colo_read_svm_ready_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id);
+static void colo_preresume_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+static void colo_read_svm_resumed_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id);
+
+static void libxl__colo_save_domain_resume_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__egc *egc = shs->egc;
+ libxl__stream_write_state *sws = CONTAINER_OF(shs, *sws, shs);
+ libxl__domain_save_state *dss = sws->dss;
+
+ /* Convenience aliases */
+ libxl__colo_save_state *const css = &dss->css;
+
+ EGC_GC;
+
+ /* read CHECKPOINT_SVM_READY */
+ css->callback = colo_read_svm_ready_done;
+ css->srs.checkpoint_callback = colo_common_read_stream_done;
+ libxl__stream_read_checkpoint_state(egc, &css->srs);
+}
+
+static void colo_read_svm_ready_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id)
+{
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ EGC_GC;
+
+ if (id != CHECKPOINT_SVM_READY) {
+ LOG(ERROR, "invalid section: %d, expected: %d", id,
+ CHECKPOINT_SVM_READY);
+ goto out;
+ }
+
+ css->svm_running = true;
+ dss->cds.callback = colo_preresume_cb;
+ libxl__checkpoint_devices_preresume(egc, &dss->cds);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
+}
+
+static void colo_preresume_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = cds->concrete_data;
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "preresume fails");
+ goto out;
+ }
+
+ /* Resumes the domain and the device model */
+ if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) {
+ LOG(ERROR, "cannot resume primary vm");
+ goto out;
+ }
+
+ /* read CHECKPOINT_SVM_RESUMED */
+ css->callback = colo_read_svm_resumed_done;
+ css->srs.checkpoint_callback = colo_common_read_stream_done;
+ libxl__stream_read_checkpoint_state(egc, &css->srs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
+}
+
+static void colo_read_svm_resumed_done(libxl__egc *egc,
+ libxl__colo_save_state *css,
+ int id)
+{
+ int ok = 0;
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ EGC_GC;
+
+ if (id != CHECKPOINT_SVM_RESUMED) {
+ LOG(ERROR, "invalid section: %d, expected: %d", id,
+ CHECKPOINT_SVM_RESUMED);
+ goto out;
+ }
+
+ ok = 1;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, ok);
+}
+
+/* ===================== colo: wait new checkpoint ===================== */
+
+/*
+ * Do the following things:
+ * 1. do commit
+ * 2. wait for a new checkpoint
+ * 3. write CHECKPOINT_NEW
+ */
+static void colo_device_commit_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+static void colo_start_new_checkpoint(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc);
+
+static void libxl__colo_save_domain_wait_checkpoint_callback(void *data)
+{
+ libxl__save_helper_state *shs = data;
+ libxl__stream_write_state *sws = CONTAINER_OF(shs, *sws, shs);
+ libxl__domain_save_state *dss = sws->dss;
+ libxl__egc *egc = dss->sws.shs.egc;
+
+ /* Convenience aliases */
+ libxl__checkpoint_devices_state *const cds = &dss->cds;
+
+ cds->callback = colo_device_commit_cb;
+ libxl__checkpoint_devices_commit(egc, cds);
+}
+
+static void colo_device_commit_cb(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = cds->concrete_data;
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+
+ EGC_GC;
+
+ if (rc) {
+ LOG(ERROR, "commit fails");
+ goto out;
+ }
+
+ /* TODO: wait a new checkpoint */
+ colo_start_new_checkpoint(egc, cds, 0);
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
+}
+
+static void colo_start_new_checkpoint(libxl__egc *egc,
+ libxl__checkpoint_devices_state *cds,
+ int rc)
+{
+ libxl__colo_save_state *css = cds->concrete_data;
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+ libxl_sr_checkpoint_state srcs = { .id = CHECKPOINT_NEW };
+
+ if (rc)
+ goto out;
+
+ /* write CHECKPOINT_NEW */
+ css->callback = NULL;
+ dss->sws.checkpoint_callback = colo_common_write_stream_done;
+ libxl__stream_write_checkpoint_state(egc, &dss->sws, &srcs);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
+}
+
+/* ===================== colo: common callback ===================== */
+
+static void colo_common_write_stream_done(libxl__egc *egc,
+ libxl__stream_write_state *stream,
+ int rc)
+{
+ libxl__domain_save_state *dss = CONTAINER_OF(stream, *dss, sws);
+ int ok;
+
+ /* Convenience aliases */
+ libxl__colo_save_state *const css = &dss->css;
+
+ EGC_GC;
+
+ if (rc < 0) {
+ /* TODO: it may be a internal error, but we don't know */
+ LOG(ERROR, "sending data fails");
+ ok = 0;
+ goto out;
+ }
+
+ if (!css->callback) {
+ /* Everythins is OK */
+ ok = 1;
+ goto out;
+ }
+
+ css->callback(egc, css, 0);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, ok);
+}
+
+static void colo_common_read_stream_done(libxl__egc *egc,
+ libxl__stream_read_state *stream,
+ int rc)
+{
+ libxl__colo_save_state *css = CONTAINER_OF(stream, *css, srs);
+ libxl__domain_save_state *dss = CONTAINER_OF(css, *dss, css);
+ int ok;
+
+ EGC_GC;
+
+ if (rc < 0) {
+ /* TODO: it may be a internal error, but we don't know */
+ LOG(ERROR, "reading data fails");
+ ok = 0;
+ goto out;
+ }
+
+ if (!css->callback) {
+ /* Everythins is OK */
+ ok = 1;
+ goto out;
+ }
+
+ /* rc contains the id */
+ css->callback(egc, css, rc);
+
+ return;
+
+out:
+ libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, ok);
+}
diff --git a/tools/libxl/libxl_dom_save.c b/tools/libxl/libxl_dom_save.c
index cd324bb..821f862 100644
--- a/tools/libxl/libxl_dom_save.c
+++ b/tools/libxl/libxl_dom_save.c
@@ -438,12 +438,15 @@ static void domain_save_done(libxl__egc *egc,
if (dss->remus) {
/*
- * With Remus, if we reach this point, it means either
+ * With Remus/COLO, if we reach this point, it means either
* backup died or some network error occurred preventing us
* from sending checkpoints. Teardown the network buffers and
* release netlink resources. This is an async op.
*/
- libxl__remus_teardown(egc, &dss->rs, rc);
+ if (libxl_defbool_val(dss->remus->colo))
+ libxl__colo_save_teardown(egc, &dss->css, rc);
+ else
+ libxl__remus_teardown(egc, &dss->rs, rc);
return;
}
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 16c627d..e4b237f 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2802,7 +2802,7 @@ typedef struct libxl__save_helper_state {
/*
* The abstract checkpoint device layer exposes a common
* set of API to [external] libxl for manipulating devices attached to
- * a guest protected by Remus. The device layer also exposes a set of
+ * a guest protected by Remus/COLO. The device layer also exposes a set of
* [internal] interfaces that every device type must implement.
*
* The following API are exposed to libxl:
@@ -2820,7 +2820,7 @@ typedef struct libxl__save_helper_state {
* +libxl__checkpoint_devices_commit
*
* Each device type needs to implement the interfaces specified in
- * the libxl__checkpoint_device_instance_ops if it wishes to support Remus.
+ * the libxl__checkpoint_device_instance_ops if it wishes to support Remus/COLO.
*
* The high-level control flow through the checkpoint device layer is shown
* below:
@@ -2840,7 +2840,7 @@ typedef struct libxl__checkpoint_device_instance_ops libxl__checkpoint_device_in
/*
* Interfaces to be implemented by every device subkind that wishes to
- * support Remus. Functions must be implemented unless otherwise
+ * support Remus/COLO. Functions must be implemented unless otherwise
* stated. Many of these functions are asynchronous. They call
* dev->aodev.callback when done. The actual implementations may be
* synchronous and call dev->aodev.callback directly (as the last
@@ -3159,6 +3159,18 @@ libxl__stream_write_inuse(const libxl__stream_write_state *stream)
return stream->running;
}
+/*----- colo related state structure -----*/
+typedef struct libxl__colo_save_state libxl__colo_save_state;
+struct libxl__colo_save_state {
+ int send_fd;
+ int recv_fd;
+
+ /* private */
+ libxl__stream_read_state srs;
+ void (*callback)(libxl__egc *, libxl__colo_save_state *, int);
+ bool svm_running;
+};
+
typedef struct libxl__logdirty_switch {
/* Set by caller of libxl__domain_common_switch_qemu_logdirty */
libxl__ao *ao;
@@ -3217,7 +3229,12 @@ struct libxl__domain_save_state {
int hvm;
int xcflags;
libxl__domain_suspend_state dsps;
- libxl__remus_state rs;
+ union {
+ /* for Remus */
+ libxl__remus_state rs;
+ /* for COLO */
+ libxl__colo_save_state css;
+ };
libxl__checkpoint_devices_state cds;
libxl__stream_write_state sws;
libxl__logdirty_switch logdirty;
diff --git a/tools/libxl/libxl_save_msgs_gen.pl b/tools/libxl/libxl_save_msgs_gen.pl
index cbb6ca1..6016706 100755
--- a/tools/libxl/libxl_save_msgs_gen.pl
+++ b/tools/libxl/libxl_save_msgs_gen.pl
@@ -26,7 +26,7 @@ our @msgs = (
[ 3, 'srcxA', "suspend", [] ],
[ 4, 'srcxA', "postcopy", [] ],
[ 5, 'srcxA', "checkpoint", [] ],
- [ 6, 'rcxA', "wait_checkpoint", [] ],
+ [ 6, 'srcxA', "wait_checkpoint", [] ],
[ 7, 'scxA', "switch_qemu_logdirty", [qw(int domid
unsigned enable)] ],
[ 8, 'r', "restore_results", ['unsigned long', 'store_mfn',
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 33f4a90..9b0a537 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -763,6 +763,7 @@ libxl_domain_remus_info = Struct("domain_remus_info",[
("netbuf", libxl_defbool),
("netbufscript", string),
("diskbuf", libxl_defbool),
+ ("colo", libxl_defbool)
])
libxl_event_type = Enumeration("event_type", [
--
1.9.3
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2016-03-23 8:06 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-23 8:06 [PATCH v12 00/26] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Changlong Xie
2016-03-23 8:06 ` [PATCH v12 01/26] tools/libxl: introduction of libxl__qmp_restore to load qemu state Changlong Xie
2016-03-23 8:06 ` [PATCH v12 02/26] tools/libxl: introduce libxl__domain_common_switch_qemu_logdirty() Changlong Xie
2016-03-23 8:06 ` [PATCH v12 03/26] tools/libxl: Add back channel to allow migration target send data back Changlong Xie
2016-03-23 8:06 ` [PATCH v12 04/26] tools/libxl: Introduce new helper function dup_fd_helper() Changlong Xie
2016-03-23 8:06 ` [PATCH v12 05/26] tools/libx{l, c}: add back channel to libxc Changlong Xie
2016-03-23 8:06 ` [PATCH v12 06/26] docs: add colo readme Changlong Xie
2016-03-23 8:06 ` [PATCH v12 07/26] docs/libxl: Introduce CHECKPOINT_CONTEXT to support migration v2 colo streams Changlong Xie
2016-03-24 14:53 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 08/26] libxc/migration: Specification update for DIRTY_PFN_LIST records Changlong Xie
2016-03-24 14:56 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 09/26] libxc/migration: export read_record for common use Changlong Xie
2016-03-23 8:06 ` [PATCH v12 10/26] tools/libxl: add back channel support to write stream Changlong Xie
2016-03-24 16:49 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 11/26] tools/libxl: add back channel support to read stream Changlong Xie
2016-03-24 14:57 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 12/26] secondary vm suspend/resume/checkpoint code Changlong Xie
2016-03-24 15:15 ` Ian Jackson
2016-03-25 2:00 ` Changlong Xie
2016-03-23 8:06 ` [PATCH v12 13/26] libxl_internal: move stream read manipulations to right place Changlong Xie
2016-03-24 15:17 ` Ian Jackson
2016-03-23 8:06 ` Changlong Xie [this message]
2016-03-24 15:24 ` [PATCH v12 14/26] primary vm suspend/resume/checkpoint code Ian Jackson
2016-03-25 2:00 ` Changlong Xie
2016-03-25 6:33 ` Changlong Xie
2016-03-23 8:06 ` [PATCH v12 15/26] libxc/restore: support COLO restore Changlong Xie
2016-03-24 15:27 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 16/26] libxc/save: support COLO save Changlong Xie
2016-03-24 15:28 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 17/26] implement the cmdline for COLO Changlong Xie
2016-03-24 15:34 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 18/26] COLO: introduce new API to prepare/start/do/get_error/stop replication Changlong Xie
2016-03-23 8:06 ` [PATCH v12 19/26] Introduce COLO mode and refactor relevant function Changlong Xie
2016-03-24 15:45 ` Ian Jackson
2016-03-25 2:02 ` Changlong Xie
2016-03-23 8:06 ` [PATCH v12 20/26] Support colo mode for qemu disk Changlong Xie
2016-03-23 8:06 ` [PATCH v12 21/26] COLO: use qemu block replication Changlong Xie
2016-03-24 15:54 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 22/26] COLO proxy: implement setup/teardown/preresume/postresume/checkpoint Changlong Xie
2016-03-24 15:59 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 23/26] COLO nic: implement COLO nic subkind Changlong Xie
2016-03-24 16:05 ` Ian Jackson
2016-03-25 2:29 ` Changlong Xie
2016-03-25 6:09 ` Changlong Xie
2016-03-25 12:23 ` Wei Liu
2016-03-28 3:20 ` Changlong Xie
2016-03-23 8:06 ` [PATCH v12 24/26] setup and control colo proxy on primary side Changlong Xie
2016-03-24 16:06 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 25/26] setup and control colo proxy on secondary side Changlong Xie
2016-03-24 16:06 ` Ian Jackson
2016-03-23 8:06 ` [PATCH v12 26/26] cmdline switches and config vars to control colo-proxy Changlong Xie
2016-03-24 16:12 ` Ian Jackson
2016-03-25 2:57 ` Changlong Xie
2016-03-25 6:10 ` Changlong Xie
2016-03-25 12:29 ` Wei Liu
2016-03-28 3:21 ` Changlong Xie
2016-03-24 16:21 ` [PATCH v12 00/26] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Ian Jackson
2016-03-24 16:43 ` Lars Kurth
2016-03-24 17:06 ` Wei Liu
2016-03-24 17:07 ` Ian Jackson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1458720400-4699-15-git-send-email-xiecl.fnst@cn.fujitsu.com \
--to=xiecl.fnst@cn.fujitsu.com \
--cc=andrew.cooper3@citrix.com \
--cc=anthony.perard@citrix.com \
--cc=eddie.dong@intel.com \
--cc=guijianfeng@cn.fujitsu.com \
--cc=hongyang.yang@easystack.cn \
--cc=ian.campbell@citrix.com \
--cc=ian.jackson@eu.citrix.com \
--cc=konrad.wilk@oracle.com \
--cc=lars.kurth@citrix.com \
--cc=lizhijian@cn.fujitsu.com \
--cc=rshriram@cs.ubc.ca \
--cc=wei.liu2@citrix.com \
--cc=wency@cn.fujitsu.com \
--cc=xen-devel@lists.xen.org \
--cc=yunhong.jiang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).