xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Campbell <ian.campbell@citrix.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	Wei Liu <wei.liu2@citrix.com>
Cc: Lars Kurth <lars.kurth@citrix.com>,
	Changlong Xie <xiecl.fnst@cn.fujitsu.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Gui Jianfeng <guijianfeng@cn.fujitsu.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Dong Eddie <eddie.dong@intel.com>,
	Anthony Perard <anthony.perard@citrix.com>,
	Shriram Rajagopalan <rshriram@cs.ubc.ca>,
	Yang Hongyang <hongyang.yang@easystack.cn>
Subject: [PATCH v11 22/27] COLO proxy: implement setup/teardown of COLO proxy module
Date: Fri, 4 Mar 2016 16:41:26 +0800	[thread overview]
Message-ID: <1457080891-26054-23-git-send-email-xiecl.fnst@cn.fujitsu.com> (raw)
In-Reply-To: <1457080891-26054-1-git-send-email-xiecl.fnst@cn.fujitsu.com>

From: Wen Congyang <wency@cn.fujitsu.com>

setup/teardown of COLO proxy module.
we use netlink to communicate with proxy module.
About colo-proxy module:
https://lkml.org/lkml/2015/6/18/32
How to use:
http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping

Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
---
 tools/libxl/Makefile           |   1 +
 tools/libxl/libxl_colo.h       |  27 +++++
 tools/libxl/libxl_colo_proxy.c | 218 +++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h   |   3 +
 4 files changed, 249 insertions(+)
 create mode 100644 tools/libxl/libxl_colo_proxy.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 28d54d0..6fea9e0 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -67,6 +67,7 @@ endif
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
 LIBXL_OBJS-y += libxl_colo_qdisk.o
+LIBXL_OBJS-y += libxl_colo_proxy.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 90345f4..9e7f99c 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -16,21 +16,42 @@
 #ifndef LIBXL_COLO_H
 #define LIBXL_COLO_H
 
+#include <linux/netlink.h>
+
 struct libxl__ao;
 struct libxl__egc;
 struct libxl__colo_save_state;
 struct libxl__checkpoint_devices_state;
 
+#define NETLINK_COLO 28
+
 enum {
     LIBXL_COLO_SETUPED,
     LIBXL_COLO_SUSPENDED,
     LIBXL_COLO_RESUMED,
 };
 
+enum colo_netlink_op {
+    COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1),
+    COLO_CHECKPOINT,
+    COLO_FAILOVER,
+    COLO_PROXY_INIT,
+    COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */
+};
+
 typedef struct libxl__colo_qdisk {
     bool setuped;
 } libxl__colo_qdisk;
 
+typedef struct libxl__colo_proxy_state libxl__colo_proxy_state;
+struct libxl__colo_proxy_state {
+    /* set by caller of colo_proxy_setup */
+    struct libxl__ao *ao;
+
+    int sock_fd;
+    int index;
+};
+
 typedef struct libxl__domain_create_state libxl__domain_create_state;
 typedef void libxl__domain_create_cb(struct libxl__egc *egc,
                                      libxl__domain_create_state *dcs,
@@ -58,6 +79,9 @@ struct libxl__colo_restore_state {
     bool qdisk_setuped;
     const char *host;
     const char *port;
+
+    /* private, used by colo-proxy */
+    libxl__colo_proxy_state cps;
 };
 
 int init_subkind_qdisk(struct libxl__checkpoint_devices_state *cds);
@@ -73,4 +97,7 @@ extern void libxl__colo_save_setup(struct libxl__egc *egc,
 extern void libxl__colo_save_teardown(struct libxl__egc *egc,
                                       struct libxl__colo_save_state *css,
                                       int rc);
+extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
+extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
+
 #endif
diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
new file mode 100644
index 0000000..2b3baa3
--- /dev/null
+++ b/tools/libxl/libxl_colo_proxy.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <hongyang.yang@easystack.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+
+/* ========= colo-proxy: helper functions ========== */
+
+static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff,
+                           uint64_t size, int type)
+{
+    struct sockaddr_nl sa;
+    struct nlmsghdr msg;
+    struct iovec iov;
+    struct msghdr mh;
+    int ret;
+
+    STATE_AO_GC(cps->ao);
+
+    memset(&sa, 0, sizeof(sa));
+    sa.nl_family = AF_NETLINK;
+    sa.nl_pid = 0;
+    sa.nl_groups = 0;
+
+    msg.nlmsg_len = NLMSG_SPACE(0);
+    msg.nlmsg_flags = NLM_F_REQUEST;
+    if (type == COLO_PROXY_INIT) {
+        msg.nlmsg_flags |= NLM_F_ACK;
+    }
+    msg.nlmsg_seq = 0;
+    /* This is untrusty */
+    msg.nlmsg_pid = cps->index;
+    msg.nlmsg_type = type;
+
+    iov.iov_base = &msg;
+    iov.iov_len = msg.nlmsg_len;
+
+    mh.msg_name = &sa;
+    mh.msg_namelen = sizeof(sa);
+    mh.msg_iov = &iov;
+    mh.msg_iovlen = 1;
+    mh.msg_control = NULL;
+    mh.msg_controllen = 0;
+    mh.msg_flags = 0;
+
+    ret = sendmsg(cps->sock_fd, &mh, 0);
+    if (ret <= 0) {
+        LOG(ERROR, "can't send msg to kernel by netlink: %s",
+            strerror(errno));
+    }
+
+    return ret;
+}
+
+/* error: return -1, otherwise return 0 */
+static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff,
+                               unsigned int timeout_us)
+{
+    struct sockaddr_nl sa;
+    struct iovec iov;
+    struct msghdr mh = {
+        .msg_name = &sa,
+        .msg_namelen = sizeof(sa),
+        .msg_iov = &iov,
+        .msg_iovlen = 1,
+    };
+    struct timeval tv;
+    uint32_t size = 16384;
+    int64_t len = 0;
+    int ret;
+
+    STATE_AO_GC(cps->ao);
+    uint8_t *tmp = libxl__malloc(NOGC, size);
+
+    if (timeout_us) {
+        tv.tv_sec = timeout_us / 1000000;
+        tv.tv_usec = timeout_us % 1000000;
+        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+    }
+
+    iov.iov_base = tmp;
+    iov.iov_len = size;
+next:
+    ret = recvmsg(cps->sock_fd, &mh, 0);
+    if (ret <= 0) {
+        if (errno != EAGAIN && errno != EWOULDBLOCK)
+            LOGE(ERROR, "can't recv msg from kernel by netlink");
+        goto err;
+    }
+
+    len += ret;
+    if (mh.msg_flags & MSG_TRUNC) {
+        size += 16384;
+        tmp = libxl__realloc(NOGC, tmp, size);
+        iov.iov_base = tmp + len;
+        iov.iov_len = size - len;
+        goto next;
+    }
+
+    *buff = tmp;
+    ret = len;
+    goto out;
+
+err:
+    free(tmp);
+    *buff = NULL;
+
+out:
+    if (timeout_us) {
+        tv.tv_sec = 0;
+        tv.tv_usec = 0;
+        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+    }
+    return ret;
+}
+
+/* ========= colo-proxy: setup and teardown ========== */
+
+int colo_proxy_setup(libxl__colo_proxy_state *cps)
+{
+    int skfd = 0;
+    struct sockaddr_nl sa;
+    struct nlmsghdr *h;
+    int i = 1;
+    int ret = ERROR_FAIL;
+    uint8_t *buff = NULL;
+    int64_t size;
+
+    STATE_AO_GC(cps->ao);
+
+    skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO);
+    if (skfd < 0) {
+        LOG(ERROR, "can not create a netlink socket: %s", strerror(errno));
+        goto out;
+    }
+    cps->sock_fd = skfd;
+    memset(&sa, 0, sizeof(sa));
+    sa.nl_family = AF_NETLINK;
+    sa.nl_groups = 0;
+retry:
+    sa.nl_pid = i++;
+
+    if (i > 10) {
+        LOG(ERROR, "netlink bind error");
+        goto out;
+    }
+
+    ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa));
+    if (ret < 0 && errno == EADDRINUSE) {
+        LOG(ERROR, "colo index %d has already in used", sa.nl_pid);
+        goto retry;
+    } else if (ret < 0) {
+        LOG(ERROR, "netlink bind error");
+        goto out;
+    }
+
+    cps->index = sa.nl_pid;
+    ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT);
+    if (ret < 0) {
+        goto out;
+    }
+    /* receive ack */
+    size = colo_proxy_recv(cps, &buff, 500000);
+    if (size < 0) {
+        LOG(ERROR, "Can't recv msg from kernel by netlink: %s",
+            strerror(errno));
+        goto out;
+    }
+
+    if (size) {
+        h = (struct nlmsghdr *)buff;
+        if (h->nlmsg_type == NLMSG_ERROR) {
+            /* ack's type is NLMSG_ERROR */
+            struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
+
+            if (size - sizeof(*h) < sizeof(*err)) {
+                LOG(ERROR, "NLMSG_LENGTH is too short");
+                goto out;
+            }
+
+            if (err->error) {
+                LOG(ERROR, "NLMSG_ERROR contains error %d", err->error);
+                goto out;
+            }
+        }
+    }
+
+    ret = 0;
+
+out:
+    free(buff);
+    if (ret) {
+        close(cps->sock_fd);
+        cps->sock_fd = -1;
+    }
+    return ret;
+}
+
+void colo_proxy_teardown(libxl__colo_proxy_state *cps)
+{
+    if (cps->sock_fd >= 0) {
+        close(cps->sock_fd);
+        cps->sock_fd = -1;
+    }
+}
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 0aeaf18..4940b59 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3194,6 +3194,9 @@ struct libxl__colo_save_state {
     /* private, used by qdisk block replication */
     bool qdisk_used;
     bool qdisk_setuped;
+
+    /* private, used by colo-proxy */
+    libxl__colo_proxy_state cps;
 };
 
 typedef struct libxl__logdirty_switch {
-- 
1.9.3




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

  parent reply	other threads:[~2016-03-04  8:41 UTC|newest]

Thread overview: 100+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-04  8:41 [PATCH v11 00/27] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Changlong Xie
2016-03-04  8:41 ` [PATCH v11 01/27] tools/libxl: introduction of libxl__qmp_restore to load qemu state Changlong Xie
2016-03-04 16:30   ` Ian Jackson
2016-03-14  9:03     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 02/27] tools/libxl: introduce libxl__domain_common_switch_qemu_logdirty() Changlong Xie
2016-03-04  8:41 ` [PATCH v11 03/27] tools/libxl: Add back channel to allow migration target send data back Changlong Xie
2016-03-04 16:38   ` Ian Jackson
2016-03-08 16:38     ` Wei Liu
2016-03-17  8:07     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 04/27] tools/libxl: Introduce new helper function dup_fd_helper() Changlong Xie
2016-03-04 16:42   ` Ian Jackson
2016-03-17  8:08     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 05/27] tools/libx{l, c}: add back channel to libxc Changlong Xie
2016-03-04 16:45   ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 06/27] docs: add colo readme Changlong Xie
2016-03-04  8:41 ` [PATCH v11 07/27] docs/libxl: Introduce CHECKPOINT_CONTEXT to support migration v2 colo streams Changlong Xie
2016-03-04 16:51   ` Ian Jackson
2016-03-08 16:38     ` Wei Liu
2016-03-11  7:13     ` Wen Congyang
2016-03-04  8:41 ` [PATCH v11 08/27] libxc/migration: Specification update for DIRTY_PFN_LIST records Changlong Xie
2016-03-04 16:53   ` Ian Jackson
2016-03-17  8:10     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 09/27] libxc/migration: export read_record for common use Changlong Xie
2016-03-04 16:55   ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 10/27] tools/libxl: add back channel support to write stream Changlong Xie
2016-03-04 17:00   ` Ian Jackson
2016-03-07  2:13     ` Wen Congyang
2016-03-11  9:05     ` Wen Congyang
2016-03-17  8:11     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 11/27] tools/libxl: add back channel support to read stream Changlong Xie
2016-03-04 17:01   ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 12/27] tools/libx{l, c}: introduce wait_checkpoint callback Changlong Xie
2016-03-04 17:03   ` Ian Jackson
2016-03-04 20:23     ` Konrad Rzeszutek Wilk
2016-03-07  2:16       ` Wen Congyang
2016-03-17  8:16     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 13/27] tools/libx{l, c}: add postcopy/suspend callback to restore side Changlong Xie
2016-03-04 17:05   ` Ian Jackson
2016-03-17  8:17     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 14/27] secondary vm suspend/resume/checkpoint code Changlong Xie
2016-03-04 17:11   ` Ian Jackson
2016-03-07  2:57     ` Wen Congyang
2016-03-17  9:03     ` Changlong Xie
2016-03-17 12:19       ` Wei Liu
2016-03-04  8:41 ` [PATCH v11 15/27] primary " Changlong Xie
2016-03-04 17:14   ` Ian Jackson
2016-03-07  2:59     ` Wen Congyang
2016-03-04  8:41 ` [PATCH v11 16/27] libxc/restore: support COLO restore Changlong Xie
2016-03-04 17:16   ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 17/27] libxc/save: support COLO save Changlong Xie
2016-03-04 17:18   ` Ian Jackson
2016-03-07  3:00     ` Wen Congyang
2016-03-04  8:41 ` [PATCH v11 18/27] implement the cmdline for COLO Changlong Xie
2016-03-04 17:22   ` Ian Jackson
2016-03-07  3:04     ` Wen Congyang
2016-03-04  8:41 ` [PATCH v11 19/27] COLO: introduce new API to prepare/start/do/get_error/stop replication Changlong Xie
2016-03-04 17:26   ` Ian Jackson
2016-03-08 16:46     ` Wei Liu
2016-03-18  3:44     ` Changlong Xie
2016-03-18 11:35       ` Wei Liu
2016-03-18  3:45     ` Changlong Xie
2016-03-04 17:29   ` Ian Jackson
2016-03-18  3:49     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 20/27] Support colo mode for qemu disk Changlong Xie
2016-03-04 17:44   ` Ian Jackson
2016-03-07  2:06     ` Wen Congyang
2016-03-17 17:18       ` Ian Jackson
2016-03-18  5:42         ` Wen Congyang
2016-03-04 17:52   ` Ian Jackson
2016-03-04 20:30     ` Konrad Rzeszutek Wilk
2016-03-07  2:10       ` Wen Congyang
2016-03-08 17:22         ` Wei Liu
2016-03-09  2:09           ` Konrad Rzeszutek Wilk
2016-03-09 16:55             ` Wei Liu
2016-03-17 17:09           ` Ian Jackson
2016-03-17 17:10       ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 21/27] COLO: use qemu block replication Changlong Xie
2016-03-04  8:41 ` Changlong Xie [this message]
2016-03-04 17:59   ` [PATCH v11 22/27] COLO proxy: implement setup/teardown of COLO proxy module Ian Jackson
2016-03-18  8:22     ` Changlong Xie
2016-03-22  5:44     ` Changlong Xie
2016-03-22  5:55       ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 23/27] COLO proxy: preresume, postresume and checkpoint Changlong Xie
2016-03-04 18:01   ` Ian Jackson
2016-03-18  8:20     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 24/27] COLO nic: implement COLO nic subkind Changlong Xie
2016-03-04 18:02   ` Ian Jackson
2016-03-18  8:20     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 25/27] setup and control colo proxy on primary side Changlong Xie
2016-03-04 18:05   ` Ian Jackson
2016-03-22  6:01     ` Changlong Xie
2016-03-04  8:41 ` [PATCH v11 26/27] setup and control colo proxy on secondary side Changlong Xie
2016-03-04 18:05   ` Ian Jackson
2016-03-04  8:41 ` [PATCH v11 27/27] cmdline switches and config vars to control colo-proxy Changlong Xie
2016-03-04 18:09   ` Ian Jackson
2016-03-22  4:13     ` Changlong Xie
2016-03-04 18:17 ` [PATCH v11 00/27] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Ian Jackson
2016-03-04 20:35   ` Konrad Rzeszutek Wilk
2016-03-17 17:19     ` Ian Jackson
2016-03-17 17:41   ` Ian Jackson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1457080891-26054-23-git-send-email-xiecl.fnst@cn.fujitsu.com \
    --to=xiecl.fnst@cn.fujitsu.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=anthony.perard@citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=hongyang.yang@easystack.cn \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=konrad.wilk@oracle.com \
    --cc=lars.kurth@citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=wei.liu2@citrix.com \
    --cc=wency@cn.fujitsu.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).