From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wen Congyang Subject: [RFC Patch v3 07/22] secondary vm suspend/resume/checkpoint code Date: Fri, 5 Sep 2014 17:25:42 +0800 Message-ID: <1409909158-19243-8-git-send-email-wency@cn.fujitsu.com> References: <1409909158-19243-1-git-send-email-wency@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1409909158-19243-1-git-send-email-wency@cn.fujitsu.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen devel Cc: Ian Campbell , Wen Congyang , Ian Jackson , Jiang Yunhong , Dong Eddie , Yang Hongyang , Lai Jiangshan List-Id: xen-devel@lists.xenproject.org Secondary vm is running in colo mode. So we will do the following things again and again: 1. Resume secondary vm a. Send LIBXL_COLO_SVM_READY to master b. If it is resumed the first time, call libxl__xc_domain_restore_done() to build the secondary vm. We should also enable secondary vm's logdirty. Otherwise, call libxl__domain_resume() to resume secondary vm. c. Send LIBXL_COLO_SVM_RESUMED to master 2. Wait a new checkpoint a. Read LIBXL_COLO_NEW_CHECKPOINT from master 3. Suspend secondary vm a. Suspend secondary vm b. Get secondary vm's dirty page information c. Send LIBXL_COLO_SVM_SUSPENDED to master d. Send secondary vm's dirty page information to master(count + pfn list) Signed-off-by: Wen Congyang --- tools/libxc/xenguest.h | 20 + tools/libxl/Makefile | 1 + tools/libxl/libxl_colo.h | 38 ++ tools/libxl/libxl_colo_restore.c | 883 +++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_create.c | 116 ++++- tools/libxl/libxl_dom.c | 2 +- tools/libxl/libxl_internal.h | 22 + tools/libxl/libxl_save_callout.c | 6 +- tools/libxl/libxl_save_msgs_gen.pl | 6 +- 9 files changed, 1087 insertions(+), 7 deletions(-) create mode 100644 tools/libxl/libxl_colo.h create mode 100644 tools/libxl/libxl_colo_restore.c diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h index 40bbac8..d3061c7 100644 --- a/tools/libxc/xenguest.h +++ b/tools/libxc/xenguest.h @@ -91,6 +91,26 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter /* callbacks provided by xc_domain_restore */ struct restore_callbacks { + /* Called after a new checkpoint to suspend the guest. + */ + int (*suspend)(void* data); + + /* Called after the secondary vm is ready to resume. + * Callback function resumes the guest & the device model, + * returns to xc_domain_restore. + */ + int (*postcopy)(void* data); + + /* callback to wait a new checkpoint + * + * returns: + * 0: terminate checkpointing gracefully + * 1: take another checkpoint */ + int (*checkpoint)(void* data); + + /* Enable qemu-dm logging dirty pages to xen */ + int (*switch_qemu_logdirty)(int domid, unsigned enable, void *data); /* HVM only */ + /* callback to restore toolstack specific data */ int (*toolstack_restore)(uint32_t domid, const uint8_t *buf, uint32_t size, void* data); diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index 5427461..c026bdd 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -57,6 +57,7 @@ LIBXL_OBJS-y += libxl_nonetbuffer.o endif LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o +LIBXL_OBJS-y += libxl_colo_restore.o LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h new file mode 100644 index 0000000..91df275 --- /dev/null +++ b/tools/libxl/libxl_colo.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2014 FUJITSU LIMITED + * Author: Wen Congyang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#ifndef LIBXL_COLO_H +#define LIBXL_COLO_H + +/* + * values to control suspend/resume primary vm and secondary vm + * at the same time + */ +enum { + LIBXL_COLO_NEW_CHECKPOINT = 1, + LIBXL_COLO_SVM_SUSPENDED, + LIBXL_COLO_SVM_READY, + LIBXL_COLO_SVM_RESUMED, +}; + +extern void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void, + int ret, int retval, int errnoval); +extern void libxl__colo_restore_setup(libxl__egc *egc, + libxl__colo_restore_state *crs); +extern void libxl__colo_restore_teardown(libxl__egc *egc, + libxl__colo_restore_state *crs, + int rc); + +#endif diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c new file mode 100644 index 0000000..bb5ef9f --- /dev/null +++ b/tools/libxl/libxl_colo_restore.c @@ -0,0 +1,883 @@ +/* + * Copyright (C) 2014 FUJITSU LIMITED + * Author: Wen Congyang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" +#include "libxl_colo.h" +#include "xg_private.h" +#include "xc_bitops.h" + +enum { + LIBXL_COLO_SETUPED, + LIBXL_COLO_SUSPENDED, + LIBXL_COLO_RESUMED, +}; + +typedef struct libxl__colo_restore_checkpoint_state libxl__colo_restore_checkpoint_state; +struct libxl__colo_restore_checkpoint_state { + xc_hypercall_buffer_t _dirty_bitmap; + xc_hypercall_buffer_t *dirty_bitmap; + unsigned long p2m_size; + libxl__domain_suspend_state2 dss2; + /* for sending data to master */ + libxl__datacopier_state dc; + /* for reading data from master */ + libxl__datareader_state drs; + uint8_t section; + libxl__logdirty_switch lds; + libxl__colo_restore_state *crs; + int status; + + void (*callback)(libxl__egc *, + libxl__colo_restore_checkpoint_state *, + int); + + /* + * 0: secondary vm's dirty bitmap for domain @domid + * 1: secondary vm is ready(domain @domid) + * 2: secondary vm is resumed(domain @domid) + */ + const char *copywhat[3]; +}; + + +static void libxl__colo_restore_domain_resume_callback(void *data); +static void libxl__colo_restore_domain_checkpoint_callback(void *data); +static void libxl__colo_restore_domain_suspend_callback(void *data); + +/* ===================== colo: common functions ===================== */ +static void colo_enable_logdirty(libxl__colo_restore_state *crs, libxl__egc *egc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + + /* Convenience aliases */ + const uint32_t domid = crs->domid; + libxl__logdirty_switch *const lds = &crcs->lds; + + STATE_AO_GC(crs->ao); + + /* we need to know which pages are dirty to restore the guest */ + if (xc_shadow_control(CTX->xch, domid, + XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY, + NULL, 0, NULL, 0, NULL) < 0) { + LOG(ERROR, "cannot enable secondary vm's logdirty"); + lds->callback(egc, lds, ERROR_FAIL); + return; + } + + if (crs->hvm) { + libxl__domain_common_switch_qemu_logdirty(domid, 1, lds, egc); + return; + } + + lds->callback(egc, lds, 0); +} + +static void colo_disable_logdirty(libxl__colo_restore_state *crs, + libxl__egc *egc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + + /* Convenience aliases */ + const uint32_t domid = crs->domid; + libxl__logdirty_switch *const lds = &crcs->lds; + + STATE_AO_GC(crs->ao); + + /* we need to know which pages are dirty to restore the guest */ + if (xc_shadow_control(CTX->xch, domid, XEN_DOMCTL_SHADOW_OP_OFF, + NULL, 0, NULL, 0, NULL) < 0) + LOG(WARN, "cannot disable secondary vm's logdirty"); + + if (crs->hvm) { + libxl__domain_common_switch_qemu_logdirty(domid, 0, lds, egc); + return; + } + + lds->callback(egc, lds, 0); +} + +static void colo_resume_vm(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + int rc; + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + + STATE_AO_GC(crs->ao); + + if (!crs->saved_cb) { + /* TODO: sync mmu for hvm? */ + rc = libxl__domain_resume(gc, crs->domid, 0, 1); + if (rc) + LOG(ERROR, "cannot resume secondary vm"); + + crcs->callback(egc, crcs, rc); + return; + } + + /* + * TODO: get store mfn and console mfn + * We should call the callback restore_results in + * xc_domain_restore() before resuming the guest. + */ + libxl__xc_domain_restore_done(egc, dcs, 0, 0, 0); + + return; +} + + +/* ================ colo: setup restore environment ================ */ +static void libxl__colo_domain_create_cb(libxl__egc *egc, + libxl__domain_create_state *dcs, + int rc, uint32_t domid); + +static int init_dss2(libxl__domain_suspend_state2 *dss2) +{ + int rc = ERROR_FAIL; + libxl_domain_type type; + + STATE_AO_GC(dss2->ao); + + type = libxl__domain_type(gc, dss2->domid); + if (type == LIBXL_DOMAIN_TYPE_INVALID) + goto out; + + libxl__xswait_init(&dss2->pvcontrol); + libxl__ev_evtchn_init(&dss2->guest_evtchn); + libxl__ev_xswatch_init(&dss2->guest_watch); + libxl__ev_time_init(&dss2->guest_timeout); + + if (type == LIBXL_DOMAIN_TYPE_HVM) + dss2->hvm = 1; + else + dss2->hvm = 0; + + dss2->guest_evtchn.port = -1; + dss2->guest_evtchn_lockfd = -1; + dss2->guest_responded = 0; + dss2->dm_savefile = libxl__device_model_savefile(gc, dss2->domid); + dss2->save_dm = 0; + + /* Secondary vm is not created, so we cannot get evtchn port */ + + rc = 0; + +out: + return rc; +} + +void libxl__colo_restore_setup(libxl__egc *egc, + libxl__colo_restore_state *crs) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + libxl__colo_restore_checkpoint_state *crcs; + DECLARE_HYPERCALL_BUFFER(unsigned long, dirty_bitmap); + int rc = ERROR_FAIL; + int bsize; + + /* Convenience aliases */ + libxl__srm_restore_autogen_callbacks *const callbacks = + &dcs->shs.callbacks.restore.a; + const int domid = crs->domid; + + STATE_AO_GC(crs->ao); + + GCNEW(crcs); + crs->crcs = crcs; + crcs->crs = crs; + + crcs->p2m_size = xc_domain_maximum_gpfn(CTX->xch, domid) + 1; + + crcs->copywhat[0] = GCSPRINTF("secondary vm's dirty bitmap for domain %"PRIu32, + domid); + crcs->copywhat[1] = GCSPRINTF("secondary vm is ready(domain %"PRIu32")", + domid); + crcs->copywhat[2] = GCSPRINTF("secondary vm is resumed(domain %"PRIu32")", + domid); + + bsize = bitmap_size(crcs->p2m_size); + dirty_bitmap = xc_hypercall_buffer_alloc_pages(CTX->xch, dirty_bitmap, + NRPAGES(bsize)); + if (!dirty_bitmap) { + rc = ERROR_NOMEM; + goto err; + } + memset(dirty_bitmap, 0, bsize); + crcs->_dirty_bitmap = *HYPERCALL_BUFFER(dirty_bitmap); + crcs->dirty_bitmap = &crcs->_dirty_bitmap; + + /* setup dss2 */ + crcs->dss2.ao = ao; + crcs->dss2.domid = domid; + if (init_dss2(&crcs->dss2)) + goto err_init_dss2; + + callbacks->suspend = libxl__colo_restore_domain_suspend_callback; + callbacks->postcopy = libxl__colo_restore_domain_resume_callback; + callbacks->checkpoint = libxl__colo_restore_domain_checkpoint_callback; + + /* + * Secondary vm is running in colo mode, so we need to call + * libxl__xc_domain_restore_done() to create secondary vm. + * But we will exit in domain_create_cb(). So replace the + * callback here. + */ + crs->saved_cb = dcs->callback; + dcs->callback = libxl__colo_domain_create_cb; + crcs->status = LIBXL_COLO_SETUPED; + + logdirty_init(&crcs->lds); + crcs->lds.ao = ao; + + rc = 0; + +out: + crs->callback(egc, crs, rc); + return; + +err_init_dss2: + xc_hypercall_buffer_free_pages(CTX->xch, dirty_bitmap, NRPAGES(bsize)); + crcs->dirty_bitmap = NULL; +err: + goto out; +} + +static void libxl__colo_domain_create_cb(libxl__egc *egc, + libxl__domain_create_state *dcs, + int rc, uint32_t domid) +{ + libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs; + + crcs->callback(egc, crcs, rc); +} + + +/* ================ colo: teardown restore environment ================ */ +static void do_failover_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state* crcs, + int rc); +static void colo_disable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc); + +static void do_failover(libxl__egc *egc, libxl__colo_restore_state *crs) +{ + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + + /* Convenience aliases */ + const int status = crcs->status; + libxl__logdirty_switch *const lds = &crcs->lds; + + STATE_AO_GC(crs->ao); + + switch(status) { + case LIBXL_COLO_SETUPED: + /* We don't enable logdirty now */ + colo_resume_vm(egc, crcs); + return; + case LIBXL_COLO_SUSPENDED: + case LIBXL_COLO_RESUMED: + /* disable logdirty first */ + lds->callback = colo_disable_logdirty_done; + colo_disable_logdirty(crs, egc); + return; + default: + LOG(ERROR, "invalid status: %d", status); + crcs->callback(egc, crcs, ERROR_FAIL); + } +} + +void libxl__colo_restore_teardown(libxl__egc *egc, + libxl__colo_restore_state *crs, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = crs->crcs; + DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap, crcs->dirty_bitmap); + int bsize = bitmap_size(crcs->p2m_size); + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + + EGC_GC; + + if (!dirty_bitmap) + goto do_failover; + + xc_hypercall_buffer_free_pages(CTX->xch, dirty_bitmap, NRPAGES(bsize)); + +do_failover: + if (!rc) { + crcs->callback = do_failover_done; + do_failover(egc, crs); + return; + } + + if (crs->saved_cb) { + dcs->callback = crs->saved_cb; + crs->saved_cb = NULL; + } + crs->callback(egc, crs, rc); +} + +static void do_failover_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state* crcs, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + + STATE_AO_GC(crs->ao); + + if (rc) + LOG(ERROR, "cannot do failover"); + + if (crs->saved_cb) { + dcs->callback = crs->saved_cb; + crs->saved_cb = NULL; + } + + crs->callback(egc, crs, rc); +} + +static void colo_disable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds); + + STATE_AO_GC(lds->ao); + + if (rc) + LOG(WARN, "cannot disable logdirty"); + + if (crcs->status == LIBXL_COLO_SUSPENDED) { + colo_resume_vm(egc, crcs); + return; + } + + /* If we cannot disable logdirty, we still can do failover */ + crcs->callback(egc, crcs, 0); +} + +/* + * checkpoint callbacks are called in the following order: + * 1. resume + * 2. checkpoint + * 3. suspend + */ +static void colo_common_send_data_done(libxl__egc *egc, + libxl__datacopier_state *dc, + int onwrite, int errnoval); +/* ===================== colo: resume secondary vm ===================== */ +/* + * Do the following things when resuming secondary vm: + * 1. write LIBXL_COLO_SVM_READY + * 2. resume secondary vm + * 3. write LIBXL_COLO_SVM_RESUMED + */ +static void colo_send_svm_ready_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int rc); +static void colo_resume_vm_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int rc); +static void colo_write_svm_resumed(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs); +static void colo_enable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int retval); +static void colo_reenable_logdirty(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc); +static void colo_reenable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc); + +static void libxl__colo_restore_domain_resume_callback(void *data) +{ + libxl__save_helper_state *shs = data; + libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs); + libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs; + uint8_t section = LIBXL_COLO_SVM_READY; + int rc; + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = &dcs->crs; + const int send_fd = crs->send_fd; + libxl__datacopier_state *const dc = &crcs->dc; + + STATE_AO_GC(crs->ao); + + memset(dc, 0, sizeof(*dc)); + dc->ao = ao; + dc->readfd = -1; + dc->writefd = send_fd; + dc->maxsz = INT_MAX; + dc->copywhat = crcs->copywhat[1]; + dc->writewhat = "colo stream"; + dc->callback = colo_common_send_data_done; + crcs->callback = colo_send_svm_ready_done; + + rc = libxl__datacopier_start(dc); + if (rc) { + LOG(ERROR, "libxl__datacopier_start() fails"); + goto out; + } + + /* tell master that secondary vm is ready */ + libxl__datacopier_prefixdata(shs->egc, dc, §ion, sizeof(section)); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0); +} + +static void colo_send_svm_ready_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int rc) +{ + crcs->callback = colo_resume_vm_done; + colo_resume_vm(egc, crcs); + + return; +} + +static void colo_resume_vm_done(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + libxl__logdirty_switch *const lds = &crcs->lds; + libxl__save_helper_state *const shs = &dcs->shs; + + STATE_AO_GC(crs->ao); + + if (rc) { + LOG(ERROR, "cannot resume secondary vm"); + goto out; + } + + crcs->status = LIBXL_COLO_RESUMED; + + /* avoid calling libxl__xc_domain_restore_done() more than once */ + if (crs->saved_cb) { + dcs->callback = crs->saved_cb; + crs->saved_cb = NULL; + + lds->callback = colo_enable_logdirty_done; + colo_enable_logdirty(crs, egc); + return; + } + + colo_write_svm_resumed(egc, crcs); + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +static void colo_write_svm_resumed(libxl__egc *egc, + libxl__colo_restore_checkpoint_state *crcs) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + uint8_t section = LIBXL_COLO_SVM_RESUMED; + int rc; + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + const int send_fd = crs->send_fd; + libxl__datacopier_state *const dc = &crcs->dc; + libxl__save_helper_state *const shs = &dcs->shs; + + STATE_AO_GC(crs->ao); + + memset(dc, 0, sizeof(*dc)); + dc->ao = ao; + dc->readfd = -1; + dc->writefd = send_fd; + dc->maxsz = INT_MAX; + dc->copywhat = crcs->copywhat[2]; + dc->writewhat = "colo stream"; + dc->callback = colo_common_send_data_done; + /* TODO: configure network */ + crcs->callback = NULL; + + rc = libxl__datacopier_start(dc); + if (rc) { + LOG(ERROR, "libxl__datacopier_start() fails"); + goto out; + } + + /* tell master that secondary vm is resumed */ + libxl__datacopier_prefixdata(egc, dc, §ion, sizeof(section)); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +static void colo_enable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds); + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + libxl__save_helper_state *const shs = &dcs->shs; + const uint32_t domid = crs->domid; + + STATE_AO_GC(crs->ao); + + if (rc) { + /* + * log-dirty already enabled? There's no test op, + * so attempt to disable then reenable it + */ + lds->callback = colo_reenable_logdirty; + colo_disable_logdirty(crs, egc); + return; + } + + /* We have enabled secondary vm's logdirty, so we can unpause it now */ + rc = libxl__domain_unpause(gc, domid); + if (rc) { + LOG(ERROR, "cannot unpause secondary vm"); + goto out; + } + + colo_write_svm_resumed(egc, crcs); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +static void colo_reenable_logdirty(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds); + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + + /* Convenience aliases */ + libxl__colo_restore_state *const crs = crcs->crs; + libxl__save_helper_state *const shs = &dcs->shs; + + STATE_AO_GC(crs->ao); + + if (rc) { + LOG(ERROR, "cannot enable logdirty"); + goto out; + } + + lds->callback = colo_reenable_logdirty_done; + colo_enable_logdirty(crs, egc); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + +static void colo_reenable_logdirty_done(libxl__egc *egc, + libxl__logdirty_switch *lds, + int rc) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(lds, *crcs, lds); + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + + /* Convenience aliases */ + libxl__save_helper_state *const shs = &dcs->shs; + const uint32_t domid = crcs->crs->domid; + + STATE_AO_GC(crcs->crs->ao); + + if (rc) { + LOG(ERROR, "cannot enable logdirty"); + goto out; + } + + /* We have enabled secondary vm's logdirty, so we can unpause it now */ + rc = libxl__domain_unpause(gc, domid); + if (rc) { + LOG(ERROR, "cannot unpause secondary vm"); + goto out; + } + + colo_write_svm_resumed(egc, crcs); + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, 0); +} + + +/* ===================== colo: wait new checkpoint ===================== */ +static void colo_stream_read_done(libxl__egc *egc, + libxl__datareader_state *drs, + ssize_t real_size, int errnoval); + +static void libxl__colo_restore_domain_checkpoint_callback(void *data) +{ + libxl__save_helper_state *shs = data; + libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs); + libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs; + + /* Convenience aliases */ + const int recv_fd = dcs->crs.recv_fd; + libxl__datareader_state *const drs = &crcs->drs; + + STATE_AO_GC(dcs->crs.ao); + + memset(drs, 0, sizeof(*drs)); + drs->ao = ao; + drs->readfd = recv_fd; + drs->readsize = sizeof(crcs->section); + drs->readwhat = "colo stream"; + drs->callback = colo_stream_read_done; + drs->buf = &crcs->section; + + if (libxl__datareader_start(drs)) { + LOG(ERROR, "libxl__datareader_start() fails"); + goto out; + } + + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, 0); +} + +static void colo_stream_read_done(libxl__egc *egc, + libxl__datareader_state *drs, + ssize_t real_size, int errnoval) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(drs, *crcs, drs); + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + int ok = 0; + + /* Convenience aliases */ + libxl__save_helper_state *const shs = &dcs->shs; + + STATE_AO_GC(drs->ao); + + if (real_size < drs->readsize) { + LOG(ERROR, "reading data fails: %lld", (long long)real_size); + goto out; + } + + if (crcs->section != LIBXL_COLO_NEW_CHECKPOINT) { + LOG(ERROR, "invalid section: %d", crcs->section); + goto out; + } + + ok = 1; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, shs, ok); +} + + +/* ===================== colo: suspend secondary vm ===================== */ +/* + * Do the following things when resuming secondary vm: + * 1. suspend secondary vm + * 2. get secondary vm's dirty page information + * 3. send LIBXL_COLO_SVM_SUSPENDED + * 4. send secondary vm's dirty page information(count + pfn list) + */ +static void colo_suspend_vm_done(libxl__egc *egc, + libxl__domain_suspend_state2 *dss2, + int ok); +static void colo_append_pfn_type(libxl__egc *egc, + libxl__datacopier_state *dc, + unsigned long *dirty_bitmap, + unsigned long p2m_size); + +static void libxl__colo_restore_domain_suspend_callback(void *data) +{ + libxl__save_helper_state *shs = data; + libxl__domain_create_state *dcs = CONTAINER_OF(shs, *dcs, shs); + libxl__colo_restore_checkpoint_state *crcs = dcs->crs.crcs; + + STATE_AO_GC(dcs->ao); + + /* Convenience aliases */ + libxl__domain_suspend_state2 *const dss2 = &crcs->dss2; + + /* suspend secondary vm */ + dss2->callback_common_done = colo_suspend_vm_done; + + libxl__domain_suspend2(shs->egc, dss2); +} + +static void colo_suspend_vm_done(libxl__egc *egc, + libxl__domain_suspend_state2 *dss2, + int ok) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(dss2, *crcs, dss2); + libxl__colo_restore_state *crs = crcs->crs; + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap, crcs->dirty_bitmap); + uint8_t section = LIBXL_COLO_SVM_SUSPENDED; + int i, rc; + uint64_t count; + + /* Convenience aliases */ + const int send_fd = crs->send_fd; + const unsigned long p2m_size = crcs->p2m_size; + const uint32_t domid = crs->domid; + libxl__datacopier_state *const dc = &crcs->dc; + + STATE_AO_GC(crs->ao); + + if (!ok) { + LOG(ERROR, "cannot suspend secondary vm"); + goto out; + } + + crcs->status = LIBXL_COLO_SUSPENDED; + + /* + * Secondary vm is running, so there are some dirty pages + * that are non-dirty in master. Get dirty bitmap and + * send it to master. + */ + if (xc_shadow_control(CTX->xch, domid, XEN_DOMCTL_SHADOW_OP_CLEAN, + HYPERCALL_BUFFER(dirty_bitmap), p2m_size, + NULL, 0, NULL) != p2m_size) { + LOG(ERROR, "getting secondary vm's dirty bitmap fails"); + goto out; + } + + count = 0; + for (i = 0; i < p2m_size; i++) { + if (test_bit(i, dirty_bitmap)) + count++; + } + + memset(dc, 0, sizeof(*dc)); + dc->ao = ao; + dc->readfd = -1; + dc->writefd = send_fd; + dc->maxsz = INT_MAX; + dc->copywhat = crcs->copywhat[0]; + dc->writewhat = "colo stream"; + dc->callback = colo_common_send_data_done; + crcs->callback = NULL; + + rc = libxl__datacopier_start(dc); + if (rc) { + LOG(ERROR, "libxl__datacopier_start() fails"); + goto out; + } + + /* tell master that secondary vm is suspended */ + libxl__datacopier_prefixdata(egc, dc, §ion, sizeof(section)); + + /* send dirty pages to master */ + libxl__datacopier_prefixdata(egc, dc, &count, sizeof(count)); + colo_append_pfn_type(egc, dc, dirty_bitmap, p2m_size); + return; + +out: + ok = 0; + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->shs, ok); +} + +static void colo_append_pfn_type(libxl__egc *egc, + libxl__datacopier_state *dc, + unsigned long *dirty_bitmap, + unsigned long p2m_size) +{ + int i, count; + /* Hack, buf->buf is private member... */ + libxl__datacopier_buf *buf = NULL; + int max_batch = sizeof(buf->buf) / sizeof(uint64_t); + int buf_size = max_batch * sizeof(uint64_t); + uint64_t *pfn; + + STATE_AO_GC(dc->ao); + + pfn = libxl__zalloc(NOGC, buf_size); + + count = 0; + for (i = 0; i < p2m_size; i++) { + if (!test_bit(i, dirty_bitmap)) + continue; + + pfn[count++] = i; + if (count == max_batch) { + libxl__datacopier_prefixdata(egc, dc, pfn, buf_size); + count = 0; + } + } + + if (count) + libxl__datacopier_prefixdata(egc, dc, pfn, count * sizeof(uint64_t)); + + free(pfn); +} + + +/* ===================== colo: common callback ===================== */ +static void colo_common_send_data_done(libxl__egc *egc, + libxl__datacopier_state *dc, + int onwrite, int errnoval) +{ + libxl__colo_restore_checkpoint_state *crcs = CONTAINER_OF(dc, *crcs, dc); + libxl__domain_create_state *dcs = CONTAINER_OF(crcs->crs, *dcs, crs); + int ok; + STATE_AO_GC(dc->ao); + + if (onwrite == -1) { + LOG(ERROR, "sending data fails"); + ok = 0; + goto out; + } + + if (errnoval) { + /* failure happens when reading/writing, do failover? */ + ok = 2; + goto out; + } + + if (!crcs->callback) { + /* Everythins is OK */ + ok = 1; + goto out; + } + + crcs->callback(egc, crcs, 0); + return; + +out: + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->shs, ok); +} diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index 545100b..6e7d2c9 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -19,6 +19,7 @@ #include "libxl_internal.h" #include "libxl_arch.h" +#include "libxl_colo.h" #include #include @@ -921,6 +922,96 @@ static void domcreate_console_available(libxl__egc *egc, dcs->aop_console_how.for_event)); } +static void libxl__colo_restore_teardown_done(libxl__egc *egc, + libxl__colo_restore_state *crs, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + STATE_AO_GC(crs->ao); + + /* convenience aliases */ + libxl__save_helper_state *const shs = &dcs->shs; + const int domid = crs->domid; + const libxl_ctx *const ctx = libxl__gc_owner(gc); + xc_interface *const xch = ctx->xch; + + if (!rc) + /* failover, no need to destroy the secondary vm */ + goto out; + + if (shs->retval) + /* + * shs->retval stores the return value of xc_domain_restore(). + * If it is not 0, we have destroyed the secondary vm in + * xc_domain_restore(); + */ + goto out; + + xc_domain_destroy(xch, domid); + +out: + dcs->callback(egc, dcs, rc, crs->domid); +} + +void libxl__colo_restore_done(libxl__egc *egc, void *dcs_void, + int ret, int retval, int errnoval) +{ + libxl__domain_create_state *dcs = dcs_void; + int rc = 1; + + /* convenience aliases */ + libxl__colo_restore_state *const crs = &dcs->crs; + STATE_AO_GC(crs->ao); + + /* teardown and failover */ + crs->callback = libxl__colo_restore_teardown_done; + + if (ret == 0 && retval == 0) + rc = 0; + + LOG(INFO, "%s", rc ? "colo fails" : "failover"); + libxl__colo_restore_teardown(egc, crs, rc); +} + +static void libxl__colo_restore_cp_done(libxl__egc *egc, + libxl__colo_restore_state *crs, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + int ok = 0; + + /* convenience aliases */ + libxl__save_helper_state *const shs = &dcs->shs; + + if (!rc) + ok = 1; + + libxl__xc_domain_saverestore_async_callback_done(shs->egc, shs, ok); +} + +static void libxl__colo_restore_setup_done(libxl__egc *egc, + libxl__colo_restore_state *crs, + int rc) +{ + libxl__domain_create_state *dcs = CONTAINER_OF(crs, *dcs, crs); + + /* convenience aliases */ + const int hvm = crs->hvm; + const int superpages = crs->superpages; + const int pae = crs->pae; + STATE_AO_GC(crs->ao); + + if (rc) { + LOG(ERROR, "colo restore setup fails: %d", rc); + libxl__xc_domain_restore_done(egc, dcs, rc, 0, 0); + return; + } + + crs->callback = libxl__colo_restore_cp_done; + libxl__xc_domain_restore(egc, dcs, + hvm, pae, superpages); +} + static void domcreate_bootloader_done(libxl__egc *egc, libxl__bootloader_state *bl, int rc) @@ -936,6 +1027,8 @@ static void domcreate_bootloader_done(libxl__egc *egc, libxl__domain_build_state *const state = &dcs->build_state; libxl__srm_restore_autogen_callbacks *const callbacks = &dcs->shs.callbacks.restore.a; + const int checkpointed_stream = dcs->checkpointed_stream; + libxl__colo_restore_state *const crs = &dcs->crs; if (rc) { domcreate_rebuild_done(egc, dcs, rc); @@ -964,6 +1057,13 @@ static void domcreate_bootloader_done(libxl__egc *egc, /* Restore */ + /* COLO only supports HVM now */ + if (info->type != LIBXL_DOMAIN_TYPE_HVM && + checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) { + rc = ERROR_FAIL; + goto out; + } + rc = libxl__build_pre(gc, domid, d_config, state); if (rc) goto out; @@ -986,8 +1086,20 @@ static void domcreate_bootloader_done(libxl__egc *egc, rc = ERROR_INVAL; goto out; } - libxl__xc_domain_restore(egc, dcs, - hvm, pae, superpages); + + if (checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) { + crs->ao = ao; + crs->domid = domid; + crs->send_fd = dcs->send_fd; + crs->recv_fd = restore_fd; + crs->hvm = hvm; + crs->superpages = superpages; + crs->pae = pae; + crs->callback = libxl__colo_restore_setup_done; + libxl__colo_restore_setup(egc, crs); + } else + libxl__xc_domain_restore(egc, dcs, + hvm, pae, superpages); return; out: diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index 6488ffd..e49f40f 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -862,7 +862,7 @@ static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*, static void switch_logdirty_done(libxl__egc *egc, libxl__logdirty_switch *lds, int ok); -static void logdirty_init(libxl__logdirty_switch *lds) +void logdirty_init(libxl__logdirty_switch *lds) { lds->cmd_path = 0; libxl__ev_xswatch_init(&lds->watch); diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 190c0d4..d5b3197 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2738,6 +2738,7 @@ struct libxl__logdirty_switch { libxl__ev_xswatch watch; libxl__ev_time timeout; }; +_hidden void logdirty_init(libxl__logdirty_switch *lds); /* * libxl__domain_suspend_state is for saving guest, not @@ -3029,6 +3030,26 @@ typedef void libxl__domain_create_cb(libxl__egc *egc, libxl__domain_create_state*, int rc, uint32_t domid); +/* colo related structure */ +typedef struct libxl__colo_restore_state libxl__colo_restore_state; +typedef void libxl__colo_callback(libxl__egc *, + libxl__colo_restore_state *, int rc); +struct libxl__colo_restore_state { + /* must set by caller of libxl__colo_(setup|teardown) */ + libxl__ao *ao; + uint32_t domid; + int send_fd; + int recv_fd; + int hvm; + int pae; + int superpages; + libxl__colo_callback *callback; + + /* private, colo restore checkpoint state */ + libxl__domain_create_cb *saved_cb; + void *crcs; +}; + struct libxl__domain_create_state { /* filled in by user */ libxl__ao *ao; @@ -3041,6 +3062,7 @@ struct libxl__domain_create_state { int guest_domid; int checkpointed_stream; libxl__domain_build_state build_state; + libxl__colo_restore_state crs; libxl__bootloader_state bl; libxl__stub_dm_spawn_state dmss; /* If we're not doing stubdom, we use only dmss.dm, diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c index 0c09d94..e251181 100644 --- a/tools/libxl/libxl_save_callout.c +++ b/tools/libxl/libxl_save_callout.c @@ -15,6 +15,7 @@ #include "libxl_osdeps.h" #include "libxl_internal.h" +#include "libxl_colo.h" /* stream_fd is as from the caller (eventually, the application). * It may be 0, 1 or 2, in which case we need to dup it elsewhere. @@ -65,7 +66,10 @@ void libxl__xc_domain_restore(libxl__egc *egc, libxl__domain_create_state *dcs, dcs->shs.ao = ao; dcs->shs.domid = domid; dcs->shs.recv_callback = libxl__srm_callout_received_restore; - dcs->shs.completion_callback = libxl__xc_domain_restore_done; + if (dcs->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) + dcs->shs.completion_callback = libxl__colo_restore_done; + else + dcs->shs.completion_callback = libxl__xc_domain_restore_done; dcs->shs.caller_state = dcs; dcs->shs.need_results = 1; dcs->shs.toolstack_data_file = 0; diff --git a/tools/libxl/libxl_save_msgs_gen.pl b/tools/libxl/libxl_save_msgs_gen.pl index 41ee000..0239cac 100755 --- a/tools/libxl/libxl_save_msgs_gen.pl +++ b/tools/libxl/libxl_save_msgs_gen.pl @@ -24,9 +24,9 @@ our @msgs = ( STRING doing_what), 'unsigned long', 'done', 'unsigned long', 'total'] ], - [ 3, 'scxA', "suspend", [] ], - [ 4, 'scxA', "postcopy", [] ], - [ 5, 'scxA', "checkpoint", [] ], + [ 3, 'srcxA', "suspend", [] ], + [ 4, 'srcxA', "postcopy", [] ], + [ 5, 'srcxA', "checkpoint", [] ], [ 6, 'scxA', "switch_qemu_logdirty", [qw(int domid unsigned enable)] ], # toolstack_save done entirely `by hand' -- 1.9.3