From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wen Congyang Subject: [PATCH v10 24/31] Support colo mode for qemu disk Date: Mon, 22 Feb 2016 10:52:28 +0800 Message-ID: <1456109555-28299-25-git-send-email-wency@cn.fujitsu.com> References: <1456109555-28299-1-git-send-email-wency@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1456109555-28299-1-git-send-email-wency@cn.fujitsu.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen devel , Konrad Rzeszutek Wilk , Andrew Cooper , Ian Campbell , Ian Jackson , Wei Liu Cc: Lars Kurth , Changlong Xie , Wen Congyang , Gui Jianfeng , Jiang Yunhong , Dong Eddie , Shriram Rajagopalan , Yang Hongyang List-Id: xen-devel@lists.xenproject.org Usage: disk = ['...,colo,colo-host=xxx,colo-port=xxx,colo-export=xxx,active-disk=xxx,hidden-disk=xxx...'] For QEMU block replication details: http://wiki.qemu.org/Features/BlockReplication Signed-off-by: Wen Congyang Signed-off-by: Yang Hongyang --- docs/man/xl.pod.1 | 2 +- docs/misc/xl-disk-configuration.txt | 50 ++++++++++ tools/libxl/libxl.c | 62 +++++++++++- tools/libxl/libxl_create.c | 25 ++++- tools/libxl/libxl_device.c | 54 +++++++++++ tools/libxl/libxl_dm.c | 184 ++++++++++++++++++++++++++++++++++-- tools/libxl/libxl_types.idl | 7 ++ tools/libxl/libxlu_disk_l.l | 7 ++ 8 files changed, 382 insertions(+), 9 deletions(-) diff --git a/docs/man/xl.pod.1 b/docs/man/xl.pod.1 index 1c6dd87..4f1901d 100644 --- a/docs/man/xl.pod.1 +++ b/docs/man/xl.pod.1 @@ -454,7 +454,7 @@ N.B: Remus support in xl is still in experimental (proof-of-concept) phase. Disk replication support is limited to DRBD disks. COLO support in xl is still in experimental (proof-of-concept) phase. - There is no support for network or disk at the moment. + There is no support for network at the moment. B diff --git a/docs/misc/xl-disk-configuration.txt b/docs/misc/xl-disk-configuration.txt index 29f6ddb..6f23c2d 100644 --- a/docs/misc/xl-disk-configuration.txt +++ b/docs/misc/xl-disk-configuration.txt @@ -234,6 +234,56 @@ were intentionally created non-sparse to avoid fragmentation of the file. +=============== +COLO PARAMETERS +=============== + + +colo +---- + +Enable COLO HA for disk. For better understanding block replication on +QEMU, please refer to: +http://wiki.qemu.org/Features/BlockReplication + + +colo-host +--------- +Description: Secondary host's address +Mandatory: Yes when COLO enabled + + +colo-port +--------- +Description: Secondary port + We will run a nbd server on secondary host, + and the nbd server will listen this port. +Mandatory: Yes when COLO enabled + + +colo-export +--------- +Description: We will run a nbd server on secondary host, + exportname is the nbd server's disk export name. +Mandatory: Yes when COLO enabled + + +active-disk +----------- + +Description: This is used by secondary. Secondary guest's write + will be buffered in this disk. +Mandatory: Yes when COLO enabled + + +hidden-disk +----------- + +Description: This is used by secondary. It buffers the original + content that is modified by the primary VM. +Mandatory: Yes when COLO enabled + + ============================================ DEPRECATED PARAMETERS, PREFIXES AND SYNTAXES ============================================ diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c index 12df81a..f691628 100644 --- a/tools/libxl/libxl.c +++ b/tools/libxl/libxl.c @@ -2309,6 +2309,8 @@ int libxl__device_disk_setdefault(libxl__gc *gc, libxl_device_disk *disk) int rc; libxl_defbool_setdefault(&disk->discard_enable, !!disk->readwrite); + libxl_defbool_setdefault(&disk->colo_enable, false); + libxl_defbool_setdefault(&disk->colo_restore_enable, false); rc = libxl__resolve_domid(gc, disk->backend_domname, &disk->backend_domid); if (rc < 0) return rc; @@ -2507,6 +2509,18 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid, flexarray_append(back, "params"); flexarray_append(back, GCSPRINTF("%s:%s", libxl__device_disk_string_of_format(disk->format), disk->pdev_path)); + if (libxl_defbool_val(disk->colo_enable)) { + flexarray_append(back, "colo-host"); + flexarray_append(back, libxl__sprintf(gc, "%s", disk->colo_host)); + flexarray_append(back, "colo-port"); + flexarray_append(back, libxl__sprintf(gc, "%s", disk->colo_port)); + flexarray_append(back, "colo-export"); + flexarray_append(back, libxl__sprintf(gc, "%s", disk->colo_export)); + flexarray_append(back, "active-disk"); + flexarray_append(back, libxl__sprintf(gc, "%s", disk->active_disk)); + flexarray_append(back, "hidden-disk"); + flexarray_append(back, libxl__sprintf(gc, "%s", disk->hidden_disk)); + } assert(device->backend_kind == LIBXL__DEVICE_KIND_QDISK); break; default: @@ -2622,7 +2636,10 @@ static int libxl__device_disk_from_xs_be(libxl__gc *gc, goto cleanup; } - /* "params" may not be present; but everything else must be. */ + /* + * "params" and "colo-host" may not be present; but everything + * else must be. + */ tmp = xs_read(ctx->xsh, XBT_NULL, GCSPRINTF("%s/params", be_path), &len); if (tmp && strchr(tmp, ':')) { @@ -2632,6 +2649,49 @@ static int libxl__device_disk_from_xs_be(libxl__gc *gc, disk->pdev_path = tmp; } + tmp = xs_read(ctx->xsh, XBT_NULL, + GCSPRINTF("%s/colo-host", be_path), &len); + if (tmp) { + libxl_defbool_set(&disk->colo_enable, true); + disk->colo_host = tmp; + } else { + libxl_defbool_set(&disk->colo_enable, false); + } + + if (libxl_defbool_val(disk->colo_enable)) { + tmp = xs_read(ctx->xsh, XBT_NULL, + GCSPRINTF("%s/colo-port", be_path), &len); + if (!tmp) { + LOG(ERROR, "Missing xenstore node %s/colo-port", be_path); + goto cleanup; + } + disk->colo_port = tmp; + + tmp = xs_read(ctx->xsh, XBT_NULL, + GCSPRINTF("%s/colo-export", be_path), &len); + if (!tmp) { + LOG(ERROR, "Missing xenstore node %s/colo-export", be_path); + goto cleanup; + } + disk->colo_export = tmp; + + tmp = xs_read(ctx->xsh, XBT_NULL, + GCSPRINTF("%s/active-disk", be_path), &len); + if (!tmp) { + LOG(ERROR, "Missing xenstore node %s/active-disk", be_path); + goto cleanup; + } + disk->active_disk = tmp; + + tmp = xs_read(ctx->xsh, XBT_NULL, + GCSPRINTF("%s/hidden-disk", be_path), &len); + if (!tmp) { + LOG(ERROR, "Missing xenstore node %s/hidden-disk", be_path); + goto cleanup; + } + disk->hidden_disk = tmp; + } + tmp = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/type", be_path)); diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c index f32ca98..c68dd49 100644 --- a/tools/libxl/libxl_create.c +++ b/tools/libxl/libxl_create.c @@ -1801,12 +1801,29 @@ static void domain_create_cb(libxl__egc *egc, libxl__ao_complete(egc, ao, rc); } - + +static void set_disk_colo_restore(libxl_domain_config *d_config) +{ + int i; + + for (i = 0; i < d_config->num_disks; i++) + libxl_defbool_set(&d_config->disks[i].colo_restore_enable, true); +} + +static void unset_disk_colo_restore(libxl_domain_config *d_config) +{ + int i; + + for (i = 0; i < d_config->num_disks; i++) + libxl_defbool_set(&d_config->disks[i].colo_restore_enable, false); +} + int libxl_domain_create_new(libxl_ctx *ctx, libxl_domain_config *d_config, uint32_t *domid, const libxl_asyncop_how *ao_how, const libxl_asyncprogress_how *aop_console_how) { + unset_disk_colo_restore(d_config); return do_domain_create(ctx, d_config, domid, -1, -1, NULL, ao_how, aop_console_how); } @@ -1818,6 +1835,12 @@ int libxl_domain_create_restore(libxl_ctx *ctx, libxl_domain_config *d_config, const libxl_asyncop_how *ao_how, const libxl_asyncprogress_how *aop_console_how) { + if (params->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_COLO) { + set_disk_colo_restore(d_config); + } else { + unset_disk_colo_restore(d_config); + } + return do_domain_create(ctx, d_config, domid, restore_fd, send_back_fd, params, ao_how, aop_console_how); } diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c index 8bb5e93..039afc6 100644 --- a/tools/libxl/libxl_device.c +++ b/tools/libxl/libxl_device.c @@ -196,6 +196,10 @@ static int disk_try_backend(disk_try_backend_args *a, goto bad_format; } + if (libxl_defbool_val(a->disk->colo_enable) || + a->disk->active_disk || a->disk->hidden_disk) + goto bad_colo; + if (a->disk->backend_domid != LIBXL_TOOLSTACK_DOMID) { LOG(DEBUG, "Disk vdev=%s, is using a storage driver domain, " "skipping physical device check", a->disk->vdev); @@ -218,6 +222,10 @@ static int disk_try_backend(disk_try_backend_args *a, case LIBXL_DISK_BACKEND_TAP: if (a->disk->script) goto bad_script; + if (libxl_defbool_val(a->disk->colo_enable) || + a->disk->active_disk || a->disk->hidden_disk) + goto bad_colo; + if (a->disk->is_cdrom) { LOG(DEBUG, "Disk vdev=%s, backend tap unsuitable for cdroms", a->disk->vdev); @@ -236,6 +244,22 @@ static int disk_try_backend(disk_try_backend_args *a, case LIBXL_DISK_BACKEND_QDISK: if (a->disk->script) goto bad_script; + if (libxl_defbool_val(a->disk->colo_enable)) { + if (!a->disk->colo_host) + goto bad_colo_host; + + if (!a->disk->colo_port) + goto bad_colo_port; + + if (!a->disk->colo_export) + goto bad_colo_export; + + if (!a->disk->active_disk) + goto bad_active_disk; + + if (!a->disk->hidden_disk) + goto bad_hidden_disk; + } return backend; default: @@ -256,6 +280,36 @@ static int disk_try_backend(disk_try_backend_args *a, LOG(DEBUG, "Disk vdev=%s, backend %s not compatible with script=...", a->disk->vdev, libxl_disk_backend_to_string(backend)); return 0; + + bad_colo: + LOG(DEBUG, "Disk vdev=%s, backend %s not compatible with colo", + a->disk->vdev, libxl_disk_backend_to_string(backend)); + return 0; + + bad_colo_host: + LOG(DEBUG, "Disk vdev=%s, backend %s needs colo-host=... for colo", + a->disk->vdev, libxl_disk_backend_to_string(backend)); + return 0; + + bad_colo_port: + LOG(DEBUG, "Disk vdev=%s, backend %s needs colo-port=... for colo", + a->disk->vdev, libxl_disk_backend_to_string(backend)); + return 0; + + bad_colo_export: + LOG(DEBUG, "Disk vdev=%s, backend %s needs colo-export=... for colo", + a->disk->vdev, libxl_disk_backend_to_string(backend)); + return 0; + + bad_active_disk: + LOG(DEBUG, "Disk vdev=%s, backend %s needs active-disk=... for colo", + a->disk->vdev, libxl_disk_backend_to_string(backend)); + return 0; + + bad_hidden_disk: + LOG(DEBUG, "Disk vdev=%s, backend %s needs hidden-disk=... for colo", + a->disk->vdev, libxl_disk_backend_to_string(backend)); + return 0; } int libxl__device_disk_set_backend(libxl__gc *gc, libxl_device_disk *disk) { diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c index 4aca38e..ba17251 100644 --- a/tools/libxl/libxl_dm.c +++ b/tools/libxl/libxl_dm.c @@ -751,6 +751,139 @@ static int libxl__dm_runas_helper(libxl__gc *gc, const char *username) } } +/* colo mode */ +enum { + LIBXL__COLO_NONE = 0, + LIBXL__COLO_PRIMARY, + LIBXL__COLO_SECONDARY, +}; + +static char *qemu_disk_scsi_drive_string(libxl__gc *gc, const char *pdev_path, + int unit, const char *format, + const libxl_device_disk *disk, + int colo_mode) +{ + char *drive = NULL; + const char *exportname = disk->colo_export; + const char *active_disk = disk->active_disk; + const char *hidden_disk = disk->hidden_disk; + + switch (colo_mode) { + case LIBXL__COLO_NONE: + drive = libxl__sprintf + (gc, "file=%s,if=scsi,bus=0,unit=%d,format=%s,cache=writeback", + pdev_path, unit, format); + break; + case LIBXL__COLO_PRIMARY: + /* + * primary: + * -dirve if=scsi,bus=0,unit=x,cache=writeback,driver=quorum,\ + * id=exportname,\ + * children.0.file.filename=pdev_path,\ + * children.0.driver=format,\ + * read-pattern=fifo,\ + * vote-threshold=1 + */ + drive = GCSPRINTF( + "if=scsi,bus=0,unit=%d,cache=writeback,driver=quorum," + "id=%s," + "children.0.file.filename=%s," + "children.0.driver=%s," + "read-pattern=fifo," + "vote-threshold=1", + unit, exportname, pdev_path, format); + break; + case LIBXL__COLO_SECONDARY: + /* + * secondary: + * -drive if=scsi,bus=0,unit=x,cache=writeback,driver=replication,\ + * mode=secondary,\ + * file.driver=qcow2,\ + * file.file.filename=active_disk,\ + * file.backing.driver=qcow2,\ + * file.backing.file.filename=hidden_disk,\ + * file.backing.backing=exportname, + */ + drive = GCSPRINTF( + "if=scsi,bus=0,unit=%d,cache=writeback,driver=replication," + "mode=secondary," + "file.driver=qcow2," + "file.file.filename=%s," + "file.backing.driver=qcow2," + "file.backing.file.filename=%s," + "file.backing.backing=%s", + unit, active_disk, hidden_disk, exportname); + break; + default: + abort(); + } + + return drive; +} + +static char *qemu_disk_ide_drive_string(libxl__gc *gc, const char *pdev_path, + int unit, const char *format, + const libxl_device_disk *disk, + int colo_mode) +{ + char *drive = NULL; + const char *exportname = disk->colo_export; + const char *active_disk = disk->active_disk; + const char *hidden_disk = disk->hidden_disk; + + switch (colo_mode) { + case LIBXL__COLO_NONE: + drive = GCSPRINTF + ("file=%s,if=ide,index=%d,media=disk,format=%s,cache=writeback", + pdev_path, unit, format); + break; + case LIBXL__COLO_PRIMARY: + /* + * primary: + * -dirve if=ide,index=x,media=disk,cache=writeback,driver=quorum,\ + * id=exportname,\ + * children.0.file.filename=pdev_path,\ + * children.0.driver=format,\ + * read-pattern=fifo,\ + * vote-threshold=1 + */ + drive = GCSPRINTF( + "if=ide,index=%d,media=disk,cache=writeback,driver=quorum," + "id=%s," + "children.0.file.filename=%s," + "children.0.driver=%s," + "read-pattern=fifo," + "vote-threshold=1", + unit, exportname, pdev_path, format); + break; + case LIBXL__COLO_SECONDARY: + /* + * secondary: + * -drive if=ide,index=x,media=disk,cache=writeback,driver=replication,\ + * mode=secondary,\ + * file.driver=qcow2,\ + * file.file.filename=active_disk,\ + * file.backing.driver=qcow2,\ + * file.backing.file.filename=hidden_disk,\ + * file.backing.backing=exportname, + */ + drive = GCSPRINTF( + "if=ide,index=%d,media=disk,cache=writeback,driver=replication," + "mode=secondary," + "file.driver=qcow2," + "file.file.filename=%s," + "file.backing.driver=qcow2," + "file.backing.file.filename=%s," + "file.backing.backing=%s", + unit, active_disk, hidden_disk, exportname); + break; + default: + abort(); + } + + return drive; +} + static int libxl__build_device_model_args_new(libxl__gc *gc, const char *dm, int guest_domid, const libxl_domain_config *guest_config, @@ -1164,6 +1297,7 @@ static int libxl__build_device_model_args_new(libxl__gc *gc, const char *format = qemu_disk_format_string(disks[i].format); char *drive; const char *pdev_path; + int colo_mode; if (dev_number == -1) { LOG(WARN, "unable to determine"" disk number for %s", @@ -1208,10 +1342,32 @@ static int libxl__build_device_model_args_new(libxl__gc *gc, * For other disks we translate devices 0..3 into * hd[a-d] and ignore the rest. */ + if (libxl_defbool_val(disks[i].colo_enable)) { + if (libxl_defbool_val(disks[i].colo_restore_enable)) + colo_mode = LIBXL__COLO_SECONDARY; + else + colo_mode = LIBXL__COLO_PRIMARY; + } else { + colo_mode = LIBXL__COLO_NONE; + } + if (strncmp(disks[i].vdev, "sd", 2) == 0) { - drive = libxl__sprintf - (gc, "file=%s,if=scsi,bus=0,unit=%d,format=%s,readonly=%s,cache=writeback", - pdev_path, disk, format, disks[i].readwrite ? "off" : "on"); + if (colo_mode == LIBXL__COLO_SECONDARY) { + /* + * -drive if=none,driver=format,file=pdev_path,\ + * id=exportname + */ + drive = libxl__sprintf + (gc, "if=none,driver=%s,file=%s,id=%s", + format, pdev_path, disks[i].colo_export); + + flexarray_append(dm_args, "-drive"); + flexarray_append(dm_args, drive); + } + drive = qemu_disk_scsi_drive_string(gc, pdev_path, disk, + format, + &disks[i], + colo_mode); } else if (strncmp(disks[i].vdev, "xvd", 3) == 0) { /* * Do not add any emulated disk when PV disk are @@ -1234,12 +1390,28 @@ static int libxl__build_device_model_args_new(libxl__gc *gc, LOG(ERROR, "qemu-xen doesn't support read-only IDE disk drivers"); return ERROR_INVAL; } - drive = libxl__sprintf - (gc, "file=%s,if=ide,index=%d,media=disk,format=%s,cache=writeback", - pdev_path, disk, format); + if (colo_mode == LIBXL__COLO_SECONDARY) { + /* + * -drive if=none,driver=format,file=pdev_path,\ + * id=exportname + */ + drive = libxl__sprintf + (gc, "if=none,driver=%s,file=%s,id=%s", + format, pdev_path, disks[i].colo_export); + + flexarray_append(dm_args, "-drive"); + flexarray_append(dm_args, drive); + } + drive = qemu_disk_ide_drive_string(gc, pdev_path, disk, + format, + &disks[i], + colo_mode); } else { continue; /* Do not emulate this disk */ } + + if (!drive) + continue; } flexarray_append(dm_args, "-drive"); diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl index 9b0a537..a2078d1 100644 --- a/tools/libxl/libxl_types.idl +++ b/tools/libxl/libxl_types.idl @@ -575,6 +575,13 @@ libxl_device_disk = Struct("device_disk", [ ("is_cdrom", integer), ("direct_io_safe", bool), ("discard_enable", libxl_defbool), + ("colo_enable", libxl_defbool), + ("colo_restore_enable", libxl_defbool), + ("colo_host", string), + ("colo_port", string), + ("colo_export", string), + ("active_disk", string), + ("hidden_disk", string) ]) libxl_device_nic = Struct("device_nic", [ diff --git a/tools/libxl/libxlu_disk_l.l b/tools/libxl/libxlu_disk_l.l index 1a5deb5..58da943 100644 --- a/tools/libxl/libxlu_disk_l.l +++ b/tools/libxl/libxlu_disk_l.l @@ -176,6 +176,13 @@ script=[^,]*,? { STRIP(','); SAVESTRING("script", script, FROMEQUALS); } direct-io-safe,? { DPC->disk->direct_io_safe = 1; } discard,? { libxl_defbool_set(&DPC->disk->discard_enable, true); } no-discard,? { libxl_defbool_set(&DPC->disk->discard_enable, false); } +colo,? { libxl_defbool_set(&DPC->disk->colo_enable, true); } +no-colo,? { libxl_defbool_set(&DPC->disk->colo_enable, false); } +colo-host=[^,]*,? { STRIP(','); SAVESTRING("colo-host", colo_host, FROMEQUALS); } +colo-port=[^,]*,? { STRIP(','); SAVESTRING("colo-port", colo_port, FROMEQUALS); } +colo-export=[^,]*,? { STRIP(','); SAVESTRING("colo-export", colo_export, FROMEQUALS); } +active-disk=[^,]*,? { STRIP(','); SAVESTRING("active-disk", active_disk, FROMEQUALS); } +hidden-disk=[^,]*,? { STRIP(','); SAVESTRING("hidden-disk", hidden_disk, FROMEQUALS); } /* the target magic parameter, eats the rest of the string */ -- 2.5.0