From: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>,
Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
Andrew Cooper <andrew.cooper3@citrix.com>,
Ian Campbell <ian.campbell@citrix.com>,
Ian Jackson <ian.jackson@eu.citrix.com>,
Wei Liu <wei.liu2@citrix.com>
Cc: Lars Kurth <lars.kurth@citrix.com>,
Changlong Xie <xiecl.fnst@cn.fujitsu.com>,
Wen Congyang <wency@cn.fujitsu.com>,
Li Zhijian <lizhijian@cn.fujitsu.com>,
Gui Jianfeng <guijianfeng@cn.fujitsu.com>,
Jiang Yunhong <yunhong.jiang@intel.com>,
Dong Eddie <eddie.dong@intel.com>,
Anthony Perard <anthony.perard@citrix.com>,
Shriram Rajagopalan <rshriram@cs.ubc.ca>,
Yang Hongyang <hongyang.yang@easystack.cn>
Subject: [PATCH v13 22/26] COLO proxy: implement setup/teardown/preresume/postresume/checkpoint
Date: Fri, 25 Mar 2016 14:44:29 +0800 [thread overview]
Message-ID: <1458888273-7469-23-git-send-email-xiecl.fnst@cn.fujitsu.com> (raw)
In-Reply-To: <1458888273-7469-1-git-send-email-xiecl.fnst@cn.fujitsu.com>
From: Wen Congyang <wency@cn.fujitsu.com>
setup/teardown/preresume/postresume/checkpoint of COLO proxy module.
we use netlink to communicate with proxy module.
About colo-proxy module:
http://www.spinics.net/lists/netdev/msg333520.html
https://github.com/wencongyang/colo-proxy
How to use:
http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping
Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
tools/libxl/Makefile | 1 +
tools/libxl/libxl_colo.h | 32 +++++
tools/libxl/libxl_colo_proxy.c | 277 +++++++++++++++++++++++++++++++++++++++++
tools/libxl/libxl_internal.h | 3 +
4 files changed, 313 insertions(+)
create mode 100644 tools/libxl/libxl_colo_proxy.c
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 701c069..72f3b1a 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -67,6 +67,7 @@ endif
LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
LIBXL_OBJS-y += libxl_colo_qdisk.o
+LIBXL_OBJS-y += libxl_colo_proxy.o
LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 90345f4..a529ce8 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -16,21 +16,43 @@
#ifndef LIBXL_COLO_H
#define LIBXL_COLO_H
+#include <linux/netlink.h>
+
struct libxl__ao;
struct libxl__egc;
struct libxl__colo_save_state;
struct libxl__checkpoint_devices_state;
+/* Consistent with the new COLO netlink channel in kernel side */
+#define NETLINK_COLO 28
+
enum {
LIBXL_COLO_SETUPED,
LIBXL_COLO_SUSPENDED,
LIBXL_COLO_RESUMED,
};
+enum colo_netlink_op {
+ COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1),
+ COLO_CHECKPOINT,
+ COLO_FAILOVER,
+ COLO_PROXY_INIT,
+ COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */
+};
+
typedef struct libxl__colo_qdisk {
bool setuped;
} libxl__colo_qdisk;
+typedef struct libxl__colo_proxy_state libxl__colo_proxy_state;
+struct libxl__colo_proxy_state {
+ /* set by caller of colo_proxy_setup */
+ struct libxl__ao *ao;
+
+ int sock_fd;
+ int index;
+};
+
typedef struct libxl__domain_create_state libxl__domain_create_state;
typedef void libxl__domain_create_cb(struct libxl__egc *egc,
libxl__domain_create_state *dcs,
@@ -58,6 +80,9 @@ struct libxl__colo_restore_state {
bool qdisk_setuped;
const char *host;
const char *port;
+
+ /* private, used by colo-proxy */
+ libxl__colo_proxy_state cps;
};
int init_subkind_qdisk(struct libxl__checkpoint_devices_state *cds);
@@ -73,4 +98,11 @@ extern void libxl__colo_save_setup(struct libxl__egc *egc,
extern void libxl__colo_save_teardown(struct libxl__egc *egc,
struct libxl__colo_save_state *css,
int rc);
+extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
+extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
+extern void colo_proxy_preresume(libxl__colo_proxy_state *cps);
+extern void colo_proxy_postresume(libxl__colo_proxy_state *cps);
+extern int colo_proxy_checkpoint(libxl__colo_proxy_state *cps,
+ unsigned int timeout_us);
+
#endif
diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
new file mode 100644
index 0000000..991bd0d
--- /dev/null
+++ b/tools/libxl/libxl_colo_proxy.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2016 FUJITSU LIMITED
+ * Author: Yang Hongyang <hongyang.yang@easystack.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+/* ========= colo-proxy: helper functions ========== */
+
+static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff,
+ uint64_t size, int type)
+{
+ struct sockaddr_nl sa;
+ struct nlmsghdr msg;
+ struct iovec iov;
+ struct msghdr mh;
+ int ret;
+
+ STATE_AO_GC(cps->ao);
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+ sa.nl_pid = 0;
+ sa.nl_groups = 0;
+
+ msg.nlmsg_len = NLMSG_SPACE(0);
+ msg.nlmsg_flags = NLM_F_REQUEST;
+ if (type == COLO_PROXY_INIT)
+ msg.nlmsg_flags |= NLM_F_ACK;
+ msg.nlmsg_seq = 0;
+ msg.nlmsg_pid = cps->index;
+ msg.nlmsg_type = type;
+
+ iov.iov_base = &msg;
+ iov.iov_len = msg.nlmsg_len;
+
+ mh.msg_name = &sa;
+ mh.msg_namelen = sizeof(sa);
+ mh.msg_iov = &iov;
+ mh.msg_iovlen = 1;
+ mh.msg_control = NULL;
+ mh.msg_controllen = 0;
+ mh.msg_flags = 0;
+
+ ret = sendmsg(cps->sock_fd, &mh, 0);
+ if (ret <= 0) {
+ LOG(ERROR, "can't send msg to kernel by netlink: %s",
+ strerror(errno));
+ }
+
+ return ret;
+}
+
+/* error: return -1, otherwise return 0 */
+static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff,
+ unsigned int timeout_us)
+{
+ struct sockaddr_nl sa;
+ struct iovec iov;
+ struct msghdr mh = {
+ .msg_name = &sa,
+ .msg_namelen = sizeof(sa),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ struct timeval tv;
+ uint32_t size = 16384;
+ int64_t len = 0;
+ int ret;
+
+ STATE_AO_GC(cps->ao);
+ uint8_t *tmp = libxl__malloc(NOGC, size);
+
+ if (timeout_us) {
+ tv.tv_sec = timeout_us / 1000000;
+ tv.tv_usec = timeout_us % 1000000;
+ setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+ }
+
+ iov.iov_base = tmp;
+ iov.iov_len = size;
+next:
+ ret = recvmsg(cps->sock_fd, &mh, 0);
+ if (ret <= 0) {
+ if (errno != EAGAIN && errno != EWOULDBLOCK)
+ LOGE(ERROR, "can't recv msg from kernel by netlink");
+ goto err;
+ }
+
+ len += ret;
+ if (mh.msg_flags & MSG_TRUNC) {
+ size += 16384;
+ tmp = libxl__realloc(NOGC, tmp, size);
+ iov.iov_base = tmp + len;
+ iov.iov_len = size - len;
+ goto next;
+ }
+
+ *buff = tmp;
+ ret = len;
+ goto out;
+
+err:
+ free(tmp);
+ *buff = NULL;
+
+out:
+ if (timeout_us) {
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+ }
+ return ret;
+}
+
+/* ========= colo-proxy: setup and teardown ========== */
+
+int colo_proxy_setup(libxl__colo_proxy_state *cps)
+{
+ int skfd = 0;
+ struct sockaddr_nl sa;
+ struct nlmsghdr *h;
+ int i = 1;
+ int ret = ERROR_FAIL;
+ uint8_t *buff = NULL;
+ int64_t size;
+
+ STATE_AO_GC(cps->ao);
+
+ skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO);
+ if (skfd < 0) {
+ LOG(ERROR, "can not create a netlink socket: %s", strerror(errno));
+ goto out;
+ }
+ cps->sock_fd = skfd;
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+ sa.nl_groups = 0;
+retry:
+ sa.nl_pid = i++;
+
+ if (i > 10) {
+ LOG(ERROR, "netlink bind error");
+ goto out;
+ }
+
+ ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa));
+ if (ret < 0 && errno == EADDRINUSE) {
+ LOG(ERROR, "colo index %d has already in used", sa.nl_pid);
+ goto retry;
+ } else if (ret < 0) {
+ LOG(ERROR, "netlink bind error");
+ goto out;
+ }
+
+ cps->index = sa.nl_pid;
+ ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT);
+ if (ret < 0)
+ goto out;
+
+ /* receive ack */
+ size = colo_proxy_recv(cps, &buff, 500000);
+ if (size < 0) {
+ LOG(ERROR, "Can't recv msg from kernel by netlink: %s",
+ strerror(errno));
+ goto out;
+ }
+
+ if (size) {
+ h = (struct nlmsghdr *)buff;
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ /* ack's type is NLMSG_ERROR */
+ struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
+
+ if (size - sizeof(*h) < sizeof(*err)) {
+ LOG(ERROR, "NLMSG_LENGTH is too short");
+ goto out;
+ }
+
+ if (err->error) {
+ LOG(ERROR, "NLMSG_ERROR contains error %d", err->error);
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+
+out:
+ free(buff);
+ if (ret) {
+ close(cps->sock_fd);
+ cps->sock_fd = -1;
+ }
+ return ret;
+}
+
+void colo_proxy_teardown(libxl__colo_proxy_state *cps)
+{
+ if (cps->sock_fd >= 0) {
+ close(cps->sock_fd);
+ cps->sock_fd = -1;
+ }
+}
+
+/* ========= colo-proxy: preresume, postresume and checkpoint ========== */
+
+void colo_proxy_preresume(libxl__colo_proxy_state *cps)
+{
+ colo_proxy_send(cps, NULL, 0, COLO_CHECKPOINT);
+ /* TODO: need to handle if the call fails... */
+}
+
+void colo_proxy_postresume(libxl__colo_proxy_state *cps)
+{
+ /* nothing to do... */
+}
+
+typedef struct colo_msg {
+ bool is_checkpoint;
+} colo_msg;
+
+/*
+ * Return value:
+ * -1: error
+ * 0: no checkpoint event is received before timeout
+ * 1: do checkpoint
+ */
+int colo_proxy_checkpoint(libxl__colo_proxy_state *cps,
+ unsigned int timeout_us)
+{
+ uint8_t *buff;
+ int64_t size;
+ struct nlmsghdr *h;
+ struct colo_msg *m;
+ int ret = -1;
+
+ STATE_AO_GC(cps->ao);
+
+ size = colo_proxy_recv(cps, &buff, timeout_us);
+
+ /* timeout, return no checkpoint message. */
+ if (size <= 0)
+ return 0;
+
+ h = (struct nlmsghdr *) buff;
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ LOG(ERROR, "receive NLMSG_ERROR");
+ goto out;
+ }
+
+ if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*m))) {
+ LOG(ERROR, "NLMSG_LENGTH is too short");
+ goto out;
+ }
+
+ m = NLMSG_DATA(h);
+
+ ret = m->is_checkpoint ? 1 : 0;
+
+out:
+ free(buff);
+ return ret;
+}
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index c3366d7..8f02222 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3216,6 +3216,9 @@ struct libxl__colo_save_state {
/* private, used by qdisk block replication */
bool qdisk_used;
bool qdisk_setuped;
+
+ /* private, used by colo-proxy */
+ libxl__colo_proxy_state cps;
};
typedef struct libxl__logdirty_switch {
--
1.9.3
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
next prev parent reply other threads:[~2016-03-25 6:44 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-25 6:44 [PATCH v13 00/26] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Changlong Xie
2016-03-25 6:44 ` [PATCH v13 01/26] tools/libxl: introduction of libxl__qmp_restore to load qemu state Changlong Xie
2016-03-25 6:44 ` [PATCH v13 02/26] tools/libxl: introduce libxl__domain_common_switch_qemu_logdirty() Changlong Xie
2016-03-25 6:44 ` [PATCH v13 03/26] tools/libxl: Add back channel to allow migration target send data back Changlong Xie
2016-04-04 12:07 ` Olaf Hering
2016-04-04 13:02 ` Wei Liu
2016-04-04 15:29 ` Olaf Hering
2016-03-25 6:44 ` [PATCH v13 04/26] tools/libxl: Introduce new helper function dup_fd_helper() Changlong Xie
2016-03-25 6:44 ` [PATCH v13 05/26] tools/libx{l, c}: add back channel to libxc Changlong Xie
2016-03-25 6:44 ` [PATCH v13 06/26] docs: add colo readme Changlong Xie
2016-03-25 6:44 ` [PATCH v13 07/26] docs/libxl: Introduce CHECKPOINT_CONTEXT to support migration v2 colo streams Changlong Xie
2016-03-25 6:44 ` [PATCH v13 08/26] libxc/migration: Specification update for DIRTY_PFN_LIST records Changlong Xie
2016-03-25 6:44 ` [PATCH v13 09/26] libxc/migration: export read_record for common use Changlong Xie
2016-03-25 6:44 ` [PATCH v13 10/26] tools/libxl: add back channel support to write stream Changlong Xie
2016-03-25 6:44 ` [PATCH v13 11/26] tools/libxl: add back channel support to read stream Changlong Xie
2016-03-25 6:44 ` [PATCH v13 12/26] secondary vm suspend/resume/checkpoint code Changlong Xie
2016-03-30 14:07 ` Ian Jackson
2016-03-25 6:44 ` [PATCH v13 13/26] libxl_internal: move stream read manipulations to right place Changlong Xie
2016-03-25 6:44 ` [PATCH v13 14/26] primary vm suspend/resume/checkpoint code Changlong Xie
2016-03-30 14:10 ` Ian Jackson
2016-03-25 6:44 ` [PATCH v13 15/26] libxc/restore: support COLO restore Changlong Xie
2016-03-25 6:44 ` [PATCH v13 16/26] libxc/save: support COLO save Changlong Xie
2016-03-25 6:44 ` [PATCH v13 17/26] implement the cmdline for COLO Changlong Xie
2016-03-25 6:44 ` [PATCH v13 18/26] COLO: introduce new API to prepare/start/do/get_error/stop replication Changlong Xie
2016-03-25 6:44 ` [PATCH v13 19/26] Introduce COLO mode and refactor relevant function Changlong Xie
2016-03-25 6:44 ` [PATCH v13 20/26] Support colo mode for qemu disk Changlong Xie
2016-03-28 3:46 ` [PATCH v13.1 " Changlong Xie
2016-03-30 14:17 ` Ian Jackson
2016-03-30 14:36 ` Ian Jackson
2016-03-25 6:44 ` [PATCH v13 21/26] COLO: use qemu block replication Changlong Xie
2016-03-25 6:44 ` Changlong Xie [this message]
2016-03-25 6:44 ` [PATCH v13 23/26] COLO nic: implement COLO nic subkind Changlong Xie
2016-03-25 12:56 ` Wei Liu
2016-03-28 3:46 ` [PATCH v13.1 " Changlong Xie
2016-03-30 14:22 ` Ian Jackson
2016-03-30 14:38 ` Ian Jackson
2016-03-30 14:40 ` Ian Jackson
2016-03-25 6:44 ` [PATCH v13 24/26] setup and control colo proxy on primary side Changlong Xie
2016-03-25 6:44 ` [PATCH v13 25/26] setup and control colo proxy on secondary side Changlong Xie
2016-03-30 14:24 ` Ian Jackson
2016-03-31 2:19 ` Changlong Xie
2016-03-25 6:44 ` [PATCH v13 26/26] cmdline switches and config vars to control colo-proxy Changlong Xie
2016-03-28 3:47 ` [PATCH v13.1 " Changlong Xie
2016-03-30 14:28 ` Ian Jackson
2016-03-30 14:42 ` Ian Jackson
2016-03-25 15:51 ` [PATCH v13 00/26] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wei Liu
2016-03-28 3:52 ` Changlong Xie
2016-03-30 14:52 ` Ian Jackson
2016-03-30 14:50 ` Ian Jackson
2016-03-31 1:26 ` Wen Congyang
2016-03-31 2:28 ` Changlong Xie
2016-03-31 14:22 ` Wei Liu
2016-04-01 1:59 ` Changlong Xie
2016-04-01 13:47 ` Ian Jackson
2016-04-01 14:37 ` Changlong Xie
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1458888273-7469-23-git-send-email-xiecl.fnst@cn.fujitsu.com \
--to=xiecl.fnst@cn.fujitsu.com \
--cc=andrew.cooper3@citrix.com \
--cc=anthony.perard@citrix.com \
--cc=eddie.dong@intel.com \
--cc=guijianfeng@cn.fujitsu.com \
--cc=hongyang.yang@easystack.cn \
--cc=ian.campbell@citrix.com \
--cc=ian.jackson@eu.citrix.com \
--cc=konrad.wilk@oracle.com \
--cc=lars.kurth@citrix.com \
--cc=lizhijian@cn.fujitsu.com \
--cc=rshriram@cs.ubc.ca \
--cc=wei.liu2@citrix.com \
--cc=wency@cn.fujitsu.com \
--cc=xen-devel@lists.xen.org \
--cc=yunhong.jiang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).