From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wen Congyang Subject: [PATCH v10 25/31] COLO: use qemu block replication Date: Mon, 22 Feb 2016 10:52:29 +0800 Message-ID: <1456109555-28299-26-git-send-email-wency@cn.fujitsu.com> References: <1456109555-28299-1-git-send-email-wency@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1456109555-28299-1-git-send-email-wency@cn.fujitsu.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen devel , Konrad Rzeszutek Wilk , Andrew Cooper , Ian Campbell , Ian Jackson , Wei Liu Cc: Lars Kurth , Changlong Xie , Wen Congyang , Gui Jianfeng , Jiang Yunhong , Dong Eddie , Shriram Rajagopalan , Yang Hongyang List-Id: xen-devel@lists.xenproject.org Use qemu block replication as our block replication solution. Note that guest must be paused before starting COLO, otherwise, the disk won't be consistent between primary and secondary. Signed-off-by: Wen Congyang Signed-off-by: Yang Hongyang --- tools/libxl/Makefile | 1 + tools/libxl/libxl_colo_qdisk.c | 226 +++++++++++++++++++++++++++++++++++++++ tools/libxl/libxl_colo_restore.c | 42 +++++++- tools/libxl/libxl_colo_save.c | 54 +++++++++- tools/libxl/libxl_internal.h | 13 +++ 5 files changed, 331 insertions(+), 5 deletions(-) create mode 100644 tools/libxl/libxl_colo_qdisk.c diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index 2016393..28d54d0 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -66,6 +66,7 @@ endif LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o +LIBXL_OBJS-y += libxl_colo_qdisk.o LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o diff --git a/tools/libxl/libxl_colo_qdisk.c b/tools/libxl/libxl_colo_qdisk.c new file mode 100644 index 0000000..6179947 --- /dev/null +++ b/tools/libxl/libxl_colo_qdisk.c @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2015 FUJITSU LIMITED + * Author: Wen Congyang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" + +typedef struct libxl__colo_qdisk { + bool setuped; +} libxl__colo_qdisk; + +/* ========== init() and cleanup() ========== */ +int init_subkind_qdisk(libxl__checkpoint_devices_state *cds) +{ + /* + * We don't know if we use qemu block replication, so + * we cannot start block replication here. + */ + return 0; +} + +void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds) +{ +} + +/* ========== setup() and teardown() ========== */ +static void colo_qdisk_setup(libxl__egc *egc, libxl__checkpoint_device *dev, + bool primary) +{ + const libxl_device_disk *disk = dev->backend_dev; + int ret, rc = 0; + libxl__colo_qdisk *colo_qdisk = NULL; + + /* Convenience aliases */ + libxl__checkpoint_devices_state *const cds = dev->cds; + const char *host = disk->colo_host; + const char *port = disk->colo_port; + const char *export_name = disk->colo_export; + const int domid = cds->domid; + + STATE_AO_GC(dev->cds->ao); + + if (disk->backend != LIBXL_DISK_BACKEND_QDISK || + !libxl_defbool_val(disk->colo_enable)) { + rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH; + goto out; + } + + dev->matched = true; + + GCNEW(colo_qdisk); + dev->concrete_data = colo_qdisk; + + if (primary) { + libxl__colo_save_state *css = cds->concrete_data; + + css->qdisk_used = true; + /* NBD server is not ready, so we cannot start block replication now */ + goto out; + } else { + libxl__colo_restore_state *crs = cds->concrete_data; + + if (!crs->qdisk_used) { + /* start nbd server */ + ret = libxl__qmp_nbd_server_start(gc, domid, host, port); + if (ret) { + rc = ERROR_FAIL; + goto out; + } + crs->host = host; + crs->port = port; + } else { + if (strcmp(crs->host, host) || strcmp(crs->port, port)) { + LOG(ERROR, "The host and port of all disks must be the same"); + rc = ERROR_FAIL; + goto out; + } + } + + crs->qdisk_used = true; + + ret = libxl__qmp_nbd_server_add(gc, domid, export_name); + if (ret) + rc = ERROR_FAIL; + + colo_qdisk->setuped = true; + } + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +static void colo_qdisk_teardown(libxl__egc *egc, libxl__checkpoint_device *dev, + bool primary) +{ + int ret, rc = 0; + const libxl__colo_qdisk *colo_qdisk = dev->concrete_data; + const libxl_device_disk *disk = dev->backend_dev; + + /* Convenience aliases */ + libxl__checkpoint_devices_state *const cds = dev->cds; + const int domid = cds->domid; + const char *export_name = disk->colo_export; + + EGC_GC; + + if (primary) { + if (!colo_qdisk->setuped) + goto out; + + /* + * There is no way to get the child name, but we know it is children.1 + */ + ret = libxl__qmp_x_blockdev_change(gc, domid, export_name, + "children.1", NULL); + if (ret) + rc = ERROR_FAIL; + } else { + libxl__colo_restore_state *crs = cds->concrete_data; + + if (crs->qdisk_used) { + ret = libxl__qmp_nbd_server_stop(gc, domid); + if (ret) + rc = ERROR_FAIL; + } + } + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +/* ========== checkpointing APIs ========== */ +static void colo_qdisk_save_preresume(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + libxl__colo_qdisk *colo_qdisk = dev->concrete_data; + const libxl_device_disk *disk = dev->backend_dev; + int ret, rc = 0; + char *node = NULL; + char *cmd = NULL; + + /* Convenience aliases */ + const int domid = dev->cds->domid; + const char *host = disk->colo_host; + const char *port = disk->colo_port; + const char *export_name = disk->colo_export; + + EGC_GC; + + if (colo_qdisk->setuped) + goto out; + + /* qmp command doesn't support the driver "nbd" */ + node = GCSPRINTF("colo_node%d", + libxl__device_disk_dev_number(disk->vdev, NULL, NULL)); + cmd = GCSPRINTF("drive_add buddy driver=replication,mode=primary," + "file.driver=nbd,file.host=%s,file.port=%s," + "file.export=%s,node-name=%s,if=none", + host, port, export_name, node); + ret = libxl__qmp_hmp(gc, domid, cmd); + if (ret) + rc = ERROR_FAIL; + + ret = libxl__qmp_x_blockdev_change(gc, domid, export_name, NULL, node); + if (ret) + rc = ERROR_FAIL; + + colo_qdisk->setuped = true; + +out: + dev->aodev.rc = rc; + dev->aodev.callback(egc, &dev->aodev); +} + +/* ======== primary ======== */ +static void colo_qdisk_save_setup(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_setup(egc, dev, true); +} + +static void colo_qdisk_save_teardown(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_teardown(egc, dev, true); +} + +const libxl__checkpoint_device_instance_ops colo_save_device_qdisk = { + .kind = LIBXL__DEVICE_KIND_VBD, + .setup = colo_qdisk_save_setup, + .teardown = colo_qdisk_save_teardown, + .preresume = colo_qdisk_save_preresume, +}; + +/* ======== secondary ======== */ +static void colo_qdisk_restore_setup(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_setup(egc, dev, false); +} + +static void colo_qdisk_restore_teardown(libxl__egc *egc, + libxl__checkpoint_device *dev) +{ + colo_qdisk_teardown(egc, dev, false); +} + +const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk = { + .kind = LIBXL__DEVICE_KIND_VBD, + .setup = colo_qdisk_restore_setup, + .teardown = colo_qdisk_restore_teardown, +}; diff --git a/tools/libxl/libxl_colo_restore.c b/tools/libxl/libxl_colo_restore.c index 0ef5b5f..eff80ca 100644 --- a/tools/libxl/libxl_colo_restore.c +++ b/tools/libxl/libxl_colo_restore.c @@ -50,7 +50,10 @@ static void libxl__colo_restore_domain_checkpoint_callback(void *data); static void libxl__colo_restore_domain_wait_checkpoint_callback(void *data); static void libxl__colo_restore_domain_suspend_callback(void *data); +extern const libxl__checkpoint_device_instance_ops colo_restore_device_qdisk; + static const libxl__checkpoint_device_instance_ops *colo_restore_ops[] = { + &colo_restore_device_qdisk, NULL, }; @@ -150,7 +153,11 @@ static int init_device_subkind(libxl__checkpoint_devices_state *cds) int rc; STATE_AO_GC(cds->ao); + rc = init_subkind_qdisk(cds); + if (rc) goto out; + rc = 0; +out: return rc; } @@ -158,6 +165,8 @@ static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds) { /* cleanup device subkind-specific state in the libxl ctx */ STATE_AO_GC(cds->ao); + + cleanup_subkind_qdisk(cds); } @@ -214,6 +223,8 @@ void libxl__colo_restore_setup(libxl__egc *egc, GCNEW(crcs); crs->crcs = crcs; crcs->crs = crs; + crs->qdisk_setuped = false; + crs->qdisk_used = false; /* setup dsps */ crcs->dsps.ao = ao; @@ -305,6 +316,11 @@ void libxl__colo_restore_teardown(libxl__egc *egc, void *dcs_void, } libxl__xc_domain_restore_done(egc, dcs, ret, retval, errnoval); + if (crs->qdisk_setuped) { + libxl__qmp_stop_replication(gc, crs->domid, false); + crs->qdisk_setuped = false; + } + crcs->saved_rc = rc; if (!crcs->teardown_devices) { colo_restore_teardown_devices_done(egc, &dcs->cds, 0); @@ -582,6 +598,13 @@ static void colo_restore_preresume_cb(libxl__egc *egc, goto out; } + if (crs->qdisk_setuped) { + if (libxl__qmp_do_checkpoint(gc, crs->domid)) { + LOG(ERROR, "doing checkpoint fails"); + goto out; + } + } + colo_restore_resume_vm(egc, crcs); return; @@ -739,8 +762,8 @@ static void colo_setup_checkpoint_devices(libxl__egc *egc, STATE_AO_GC(crs->ao); - /* TODO: disk/nic support */ - cds->device_kind_flags = 0; + /* TODO: nic support */ + cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD); cds->callback = colo_restore_setup_cds_done; cds->ao = ao; cds->domid = crs->domid; @@ -777,6 +800,14 @@ static void colo_restore_setup_cds_done(libxl__egc *egc, goto out; } + if (crs->qdisk_used && !crs->qdisk_setuped) { + if (libxl__qmp_start_replication(gc, crs->domid, false)) { + LOG(ERROR, "starting replication fails"); + goto out; + } + crs->qdisk_setuped = true; + } + colo_send_svm_ready(egc, crcs); return; @@ -931,13 +962,18 @@ static void colo_suspend_vm_done(libxl__egc *egc, crcs->status = LIBXL_COLO_SUSPENDED; + if (libxl__qmp_get_replication_error(gc, crs->domid)) { + LOG(ERROR, "replication error occurs when secondary vm is running"); + goto out; + } + cds->callback = colo_restore_postsuspend_cb; libxl__checkpoint_devices_postsuspend(egc, cds); return; out: - libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, !rc); + libxl__xc_domain_saverestore_async_callback_done(egc, &dcs->srs.shs, 0); } static void colo_restore_postsuspend_cb(libxl__egc *egc, diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c index 8b18a91..5f8456c 100644 --- a/tools/libxl/libxl_colo_save.c +++ b/tools/libxl/libxl_colo_save.c @@ -19,7 +19,10 @@ #include "libxl_internal.h" #include "libxl_colo.h" +extern const libxl__checkpoint_device_instance_ops colo_save_device_qdisk; + static const libxl__checkpoint_device_instance_ops *colo_ops[] = { + &colo_save_device_qdisk, NULL, }; @@ -30,7 +33,11 @@ static int init_device_subkind(libxl__checkpoint_devices_state *cds) int rc; STATE_AO_GC(cds->ao); + rc = init_subkind_qdisk(cds); + if (rc) goto out; + rc = 0; +out: return rc; } @@ -38,6 +45,8 @@ static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds) { /* cleanup device subkind-specific state in the libxl ctx */ STATE_AO_GC(cds->ao); + + cleanup_subkind_qdisk(cds); } /* ================= colo: setup save environment ================= */ @@ -71,9 +80,12 @@ void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css) css->send_fd = dss->fd; css->recv_fd = dss->recv_fd; css->svm_running = false; + css->paused = true; + css->qdisk_setuped = false; + css->qdisk_used = false; - /* TODO: disk/nic support */ - cds->device_kind_flags = 0; + /* TODO: nic support */ + cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_VBD); cds->ops = colo_ops; cds->callback = colo_save_setup_done; cds->ao = ao; @@ -153,6 +165,11 @@ void libxl__colo_save_teardown(libxl__egc *egc, libxl__stream_read_abort(egc, &css->srs, 1); + if (css->qdisk_setuped) { + libxl__qmp_stop_replication(gc, dss->domid, true); + css->qdisk_setuped = false; + } + dss->cds.callback = colo_teardown_done; libxl__checkpoint_devices_teardown(egc, &dss->cds); return; @@ -287,6 +304,11 @@ static void colo_read_svm_suspended_done(libxl__egc *egc, goto out; } + if (!css->paused && libxl__qmp_get_replication_error(gc, dss->domid)) { + LOG(ERROR, "replication error occurs when primary vm is running"); + goto out; + } + ok = 1; out: @@ -384,12 +406,40 @@ static void colo_preresume_cb(libxl__egc *egc, goto out; } + if (css->qdisk_used && !css->qdisk_setuped) { + if (libxl__qmp_start_replication(gc, dss->domid, true)) { + LOG(ERROR, "starting replication fails"); + goto out; + } + css->qdisk_setuped = true; + } + + if (!css->paused) { + if (libxl__qmp_do_checkpoint(gc, dss->domid)) { + LOG(ERROR, "doing checkpoint fails"); + goto out; + } + } + /* Resumes the domain and the device model */ if (libxl__domain_resume(gc, dss->domid, /* Fast Suspend */1)) { LOG(ERROR, "cannot resume primary vm"); goto out; } + /* + * The guest should be paused before doing colo because there is + * no disk migration. + */ + if (css->paused) { + rc = libxl_domain_unpause(CTX, dss->domid); + if (rc) { + LOG(ERROR, "cannot unpause primary vm"); + goto out; + } + css->paused = false; + } + /* read CHECKPOINT_SVM_RESUMED */ css->callback = colo_read_svm_resumed_done; css->srs.checkpoint_callback = colo_common_read_stream_done; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 227b1d3..3af5fdd 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2902,6 +2902,8 @@ int init_subkind_nic(libxl__checkpoint_devices_state *cds); void cleanup_subkind_nic(libxl__checkpoint_devices_state *cds); int init_subkind_drbd_disk(libxl__checkpoint_devices_state *cds); void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds); +int init_subkind_qdisk(libxl__checkpoint_devices_state *cds); +void cleanup_subkind_qdisk(libxl__checkpoint_devices_state *cds); typedef void libxl__checkpoint_callback(libxl__egc *, libxl__checkpoint_devices_state *, @@ -3119,6 +3121,11 @@ struct libxl__colo_save_state { libxl__stream_read_state srs; void (*callback)(libxl__egc *, libxl__colo_save_state *, int); bool svm_running; + bool paused; + + /* private, used by qdisk block replication */ + bool qdisk_used; + bool qdisk_setuped; }; /*----- Domain suspend (save) state structure -----*/ @@ -3522,6 +3529,12 @@ struct libxl__colo_restore_state { /* private, colo restore checkpoint state */ libxl__domain_create_cb *saved_cb; void *crcs; + + /* private, used by qdisk block replication */ + bool qdisk_used; + bool qdisk_setuped; + const char *host; + const char *port; }; struct libxl__domain_create_state { -- 2.5.0