From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wen Congyang Subject: [RFC Patch v3 19/22] libxl/colo: setup and control disk replication for blktap2 backends Date: Fri, 5 Sep 2014 17:25:54 +0800 Message-ID: <1409909158-19243-20-git-send-email-wency@cn.fujitsu.com> References: <1409909158-19243-1-git-send-email-wency@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1409909158-19243-1-git-send-email-wency@cn.fujitsu.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen devel Cc: Ian Campbell , Wen Congyang , Ian Jackson , Jiang Yunhong , Dong Eddie , Yang Hongyang , Lai Jiangshan List-Id: xen-devel@lists.xenproject.org This patch adds the machinery required for protecting a guest's disk state, when the guest disk uses a blktap2 disk backend. 1. COLO blktap2 disk device: Implements the interfaces required by the checkpoint abstract device layer. A note about the implementation: a) setup() is called for each disk attached to the guest. During setup(): i) perform the sanity check: backend type should be LIBXL_DISK_BACKEND_TAP and format should be LIBXL_DISK_FORMAT_COLO. ii) connect to the control socket: /var/run/tap/colo_xxx, xxx is "host:port"(The character ':/' will be changed to '_'). b) The postsuspend callback() will write "flush" to this socket c) The commit callback() will wait and read "done" from this socket Signed-off-by: Wen Congyang --- docs/man/xl.pod.1 | 3 +- tools/libxl/Makefile | 2 +- tools/libxl/libxl_colo_save.c | 36 ++++- tools/libxl/libxl_colo_save_disk_blktap2.c | 214 +++++++++++++++++++++++++++++ tools/libxl/libxl_create.c | 7 + tools/libxl/libxl_internal.h | 2 + tools/libxl/libxl_noblktap2.c | 29 ++++ 7 files changed, 289 insertions(+), 4 deletions(-) create mode 100644 tools/libxl/libxl_colo_save_disk_blktap2.c diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1 index 297cd04..d528e7a 100644 --- a/docs/man/xl.pod.1 +++ b/docs/man/xl.pod.1 @@ -434,7 +434,8 @@ N.B: Remus support in xl is still in experimental (proof-of-concept) phase. Disk replication support is limited to DRBD disks. COLO support in xl is still in experimental (proof-of-concept) phase. - There is no support for network or disk at the moment. + There is no support for network at the moment. + Disk replication support is limited to blktap2 disks. B diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index 1c32ae2..b4755c8 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -45,7 +45,7 @@ LIBXLU_LIBS = LIBXL_OBJS-y = osdeps.o libxl_paths.o libxl_bootloader.o flexarray.o ifeq ($(LIBXL_BLKTAP),y) -LIBXL_OBJS-y += libxl_blktap2.o +LIBXL_OBJS-y += libxl_blktap2.o libxl_colo_save_disk_blktap2.o else LIBXL_OBJS-y += libxl_noblktap2.o endif diff --git a/tools/libxl/libxl_colo_save.c b/tools/libxl/libxl_colo_save.c index 7b76d3f..7d6f269 100644 --- a/tools/libxl/libxl_colo_save.c +++ b/tools/libxl/libxl_colo_save.c @@ -18,10 +18,36 @@ #include "libxl_internal.h" #include "libxl_colo.h" +extern const libxl__checkpoint_device_instance_ops colo_save_device_blktap2_disk; + static const libxl__checkpoint_device_instance_ops *colo_ops[] = { + &colo_save_device_blktap2_disk, NULL, }; +/* ================= helper functions ================= */ +static int init_device_subkind(libxl__checkpoint_devices_state *cds) +{ + /* init device subkind-specific state in the libxl ctx */ + int rc; + STATE_AO_GC(cds->ao); + + rc = init_subkind_drbd_disk(cds); + if (rc) goto out; + + rc = 0; +out: + return rc; +} + +static void cleanup_device_subkind(libxl__checkpoint_devices_state *cds) +{ + /* cleanup device subkind-specific state in the libxl ctx */ + STATE_AO_GC(cds->ao); + + cleanup_subkind_blktap2_disk(cds); +} + /* ================= colo: setup save environment ================= */ static void colo_save_setup_done(libxl__egc *egc, libxl__checkpoint_devices_state *cds, @@ -48,13 +74,16 @@ void libxl__colo_save_setup(libxl__egc *egc, libxl__colo_save_state *css) css->recv_fd = dss->recv_fd; css->svm_running = false; - /* TODO: disk/nic support */ - cds->device_kind_flags = 0; + /* TODO: nic support */ + cds->device_kind_flags = (1 << LIBXL__DEVICE_KIND_CHECKPOINT_DISK); cds->ops = colo_ops; cds->callback = colo_save_setup_done; cds->ao = ao; cds->domid = dss->domid; + if (init_device_subkind(cds)) + goto out; + libxl__checkpoint_devices_setup(egc, &css->cds); return; @@ -92,6 +121,7 @@ static void colo_save_setup_failed(libxl__egc *egc, LOG(ERROR, "COLO: failed to teardown device after setup failed" " for guest with domid %u, rc %d", cds->domid, rc); + cleanup_device_subkind(cds); libxl__ao_complete(egc, ao, rc); } @@ -122,6 +152,8 @@ static void colo_teardown_done(libxl__egc *egc, { libxl__colo_save_state *css = CONTAINER_OF(cds, *css, cds); libxl__domain_suspend_state *dss = CONTAINER_OF(css, *dss, css); + + cleanup_device_subkind(cds); dss->callback(egc, dss, rc); } diff --git a/tools/libxl/libxl_colo_save_disk_blktap2.c b/tools/libxl/libxl_colo_save_disk_blktap2.c new file mode 100644 index 0000000..19ba6d8 --- /dev/null +++ b/tools/libxl/libxl_colo_save_disk_blktap2.c @@ -0,0 +1,214 @@ +/* + * Copyright (C) 2014 FUJITSU LIMITED + * Author: Wen Congyang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" + +#include +#include + +#define BLKTAP2_REQUEST "flush" +#define BLKTAP2_RESPONSE "done" +#define BLKTAP_CTRL_DIR "/var/run/tap" + +typedef struct libxl__colo_blktap2_disk { + char *name; + char *ctl_socket_path; + int fd; + libxl__ev_fd ev; + libxl__checkpoint_device *dev; +}libxl__colo_blktap2_disk; + +/* ========== init() and cleanup() ========== */ +int init_subkind_blktap2_disk(libxl__checkpoint_devices_state *cds) +{ + return 0; +} + +void cleanup_subkind_blktap2_disk(libxl__checkpoint_devices_state *cds) +{ +} + +/* ========== setup() and teardown() ========== */ +static int blktap2_control_connect(libxl__gc *gc, + libxl__colo_blktap2_disk *blktap2_disk) +{ + struct sockaddr_un saddr; + int fd, err; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + LOG(ERROR, "cannot creating socket fd"); + return ERROR_FAIL; + } + + memset(&saddr, 0, sizeof(saddr)); + saddr.sun_family = AF_UNIX; + strcpy(saddr.sun_path, blktap2_disk->ctl_socket_path); + + err = connect(fd, (const struct sockaddr *)&saddr, sizeof(saddr)); + if (err) { + LOG(ERROR, "cannot connecte to %s", blktap2_disk->ctl_socket_path); + close(fd); + return ERROR_FAIL; + } + + blktap2_disk->fd = fd; + return 0; +} + +static void blktap2_colo_setup(libxl__checkpoint_device *dev) +{ + const libxl_device_disk *disk = dev->backend_dev; + libxl__colo_blktap2_disk *blktap2_disk; + int rc; + char *type; + int i, l; + + STATE_AO_GC(dev->cds->ao); + + if (disk->backend != LIBXL_DISK_BACKEND_TAP || + disk->format != LIBXL_DISK_FORMAT_COLO) { + rc = ERROR_CHECKPOINT_DEVOPS_DOES_NOT_MATCH; + goto out; + } + + dev->matched = 1; + GCNEW(blktap2_disk); + dev->concrete_data = blktap2_disk; + blktap2_disk->fd = -1; + blktap2_disk->dev = dev; + + type = strchr(disk->pdev_path, '|'); + if (!type) { + LOG(ERROR, "unexpected pdev_path: %s", disk->pdev_path); + rc = ERROR_FAIL; + goto out; + } + blktap2_disk->name = libxl__strndup(gc, disk->pdev_path, + type - disk->pdev_path); + blktap2_disk->ctl_socket_path = libxl__sprintf(gc, "%s/colo_%s", + BLKTAP_CTRL_DIR, + blktap2_disk->name); + /* scrub socket pathname */ + l = strlen(blktap2_disk->ctl_socket_path); + for (i = strlen(BLKTAP_CTRL_DIR) + 1; i < l; i++) { + if (strchr(":/", blktap2_disk->ctl_socket_path[i])) + blktap2_disk->ctl_socket_path[i] = '_'; + } + + libxl__ev_fd_init(&blktap2_disk->ev); + + rc = blktap2_control_connect(gc, blktap2_disk); + +out: + dev->aodev.rc = rc; + dev->aodev.callback(dev->cds->egc, &dev->aodev); +} + +static void blktap2_colo_teardown(libxl__checkpoint_device *dev) +{ + libxl__colo_blktap2_disk *blktap2_disk = dev->concrete_data; + + if (blktap2_disk->fd > 0) { + close(blktap2_disk->fd); + blktap2_disk->fd = -1; + } + + dev->aodev.rc = 0; + dev->aodev.callback(dev->cds->egc, &dev->aodev); +} + +/* ========== checkpointing APIs ========== */ +static void blktap2_control_readable(libxl__egc *egc, libxl__ev_fd *ev, + int fd, short events, short revents); + +static void blktap2_colo_postsuspend(libxl__checkpoint_device *dev) +{ + int ret; + libxl__colo_blktap2_disk *blktap2_disk = dev->concrete_data; + int rc = 0; + + /* unit socket fd, so not block */ + ret = write(blktap2_disk->fd, BLKTAP2_REQUEST, strlen(BLKTAP2_REQUEST)); + if (ret < strlen(BLKTAP2_REQUEST)) + rc = ERROR_FAIL; + + dev->aodev.rc = rc; + dev->aodev.callback(dev->cds->egc, &dev->aodev); +} + +static void blktap2_colo_commit(libxl__checkpoint_device *dev) +{ + libxl__colo_blktap2_disk *blktap2_disk = dev->concrete_data; + int rc; + + /* Convenience aliases */ + const int fd = blktap2_disk->fd; + libxl__ev_fd *const ev = &blktap2_disk->ev; + + STATE_AO_GC(dev->cds->ao); + + rc = libxl__ev_fd_register(gc, ev, blktap2_control_readable, fd, POLLIN); + if (rc) { + dev->aodev.rc = rc; + dev->aodev.callback(dev->cds->egc, &dev->aodev); + } +} + +static void blktap2_control_readable(libxl__egc *egc, libxl__ev_fd *ev, + int fd, short events, short revents) +{ + libxl__colo_blktap2_disk *blktap2_disk = + CONTAINER_OF(ev, *blktap2_disk, ev); + int rc = 0, ret; + char response[5]; + + /* Convenience aliases */ + libxl__checkpoint_device *const dev = blktap2_disk->dev; + + EGC_GC; + + libxl__ev_fd_deregister(gc, ev); + + if (revents & ~POLLIN) { + LOG(ERROR, "unexpected poll event 0x%x (should be POLLIN)", revents); + rc = ERROR_FAIL; + goto out; + } + + ret = read(blktap2_disk->fd, response, sizeof(response) - 1); + if (ret < sizeof(response) - 1) { + rc = ERROR_FAIL; + goto out; + } + + response[4] = '\0'; + if (strcmp(response, BLKTAP2_RESPONSE)) + rc = ERROR_FAIL; + +out: + dev->aodev.rc = rc; + dev->aodev.callback(dev->cds->egc, &dev->aodev); +} + +const libxl__checkpoint_device_instance_ops colo_save_device_blktap2_disk = { + .kind = LIBXL__DEVICE_KIND_CHECKPOINT_DISK, + .setup = blktap2_colo_setup, + .teardown = blktap2_colo_teardown, + .postsuspend = blktap2_colo_postsuspend, + .commit = blktap2_colo_commit, +}; diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index ec5946d..b3a2f33 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -854,6 +854,13 @@ static void initiate_domain_create(libxl__egc *egc, for (i = 0; i < d_config->num_disks; i++) { ret = libxl__device_disk_setdefault(gc, &d_config->disks[i]); if (ret) goto error_out; + + /* TODO: cleanup it when destroying the domain */ + if (d_config->disks[i].backend == LIBXL_DISK_BACKEND_TAP && + (d_config->disks[i].format == LIBXL_DISK_FORMAT_REMUS || + d_config->disks[i].format == LIBXL_DISK_FORMAT_COLO)) + libxl__blktap_devpath(gc, d_config->disks[i].pdev_path, + d_config->disks[i].format); } dcs->bl.ao = ao; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 1cc90fb..120b389 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2606,6 +2606,8 @@ int init_subkind_nic(libxl__checkpoint_devices_state *cds); void cleanup_subkind_nic(libxl__checkpoint_devices_state *cds); int init_subkind_drbd_disk(libxl__checkpoint_devices_state *cds); void cleanup_subkind_drbd_disk(libxl__checkpoint_devices_state *cds); +int init_subkind_blktap2_disk(libxl__checkpoint_devices_state *cds); +void cleanup_subkind_blktap2_disk(libxl__checkpoint_devices_state *cds); typedef void libxl__checkpoint_callback(libxl__egc *, libxl__checkpoint_devices_state *, diff --git a/tools/libxl/libxl_noblktap2.c b/tools/libxl/libxl_noblktap2.c index 38696ec..b66ab95 100644 --- a/tools/libxl/libxl_noblktap2.c +++ b/tools/libxl/libxl_noblktap2.c @@ -39,6 +39,35 @@ libxl_disk_format libxl__blktap_get_real_format(const char *disk, return format; } +static int blktap2_colo_init(libxl__checkpoint_device *cds) +{ + return 0; +} + +static void blktap2_colo_cleanup(libxl__checkpoint_device *cds) +{ + return; +} + +static void blktap2_colo_setup(libxl__checkpoint_device *cds) +{ + dev->aodev.rc = ERROR_FAIL; + dev->aodev.callback(dev->cds->egc, &dev->aodev); +} + +static void blktap2_colo_teardown(libxl__checkpoint_device *cds) +{ + return; +} + +const libxl__checkpoint_device_instance_ops colo_save_device_blktap2_disk = { + .kind = LIBXL__CHECKPOINT_DEVICE_DISK, + .init = blktap2_colo_init, + .cleanup = blktap2_colo_cleanup, + .setup = blktap2_colo_setup, + .teardown = blktap2_colo_teardown, +}; + /* * Local variables: * mode: C -- 1.9.3