All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Campbell <ian.campbell@citrix.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	Wei Liu <wei.liu2@citrix.com>
Cc: Lars Kurth <lars.kurth@citrix.com>,
	Changlong Xie <xiecl.fnst@cn.fujitsu.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Gui Jianfeng <guijianfeng@cn.fujitsu.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Dong Eddie <eddie.dong@intel.com>,
	Shriram Rajagopalan <rshriram@cs.ubc.ca>,
	Yang Hongyang <hongyang.yang@easystack.cn>
Subject: [PATCH v10 26/31] COLO proxy: implement setup/teardown of COLO proxy module
Date: Mon, 22 Feb 2016 10:52:30 +0800	[thread overview]
Message-ID: <1456109555-28299-27-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1456109555-28299-1-git-send-email-wency@cn.fujitsu.com>

setup/teardown of COLO proxy module.
we use netlink to communicate with proxy module.
About colo-proxy module:
https://lkml.org/lkml/2015/6/18/32
How to use:
http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping

Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 tools/libxl/Makefile           |   1 +
 tools/libxl/libxl_colo.h       |   2 +
 tools/libxl/libxl_colo_proxy.c | 230 +++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h   |  15 +++
 4 files changed, 248 insertions(+)
 create mode 100644 tools/libxl/libxl_colo_proxy.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 28d54d0..6fea9e0 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -67,6 +67,7 @@ endif
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
 LIBXL_OBJS-y += libxl_colo_qdisk.o
+LIBXL_OBJS-y += libxl_colo_proxy.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index b08f672..6d8cd4f 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -27,4 +27,6 @@ extern void libxl__colo_save_teardown(libxl__egc *egc,
                                       libxl__colo_save_state *css,
                                       int rc);
 
+extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
+extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
 #endif
diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
new file mode 100644
index 0000000..e07e640
--- /dev/null
+++ b/tools/libxl/libxl_colo_proxy.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <hongyang.yang@easystack.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+#include <linux/netlink.h>
+
+#define NETLINK_COLO 28
+
+enum colo_netlink_op {
+    COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1),
+    COLO_CHECKPOINT,
+    COLO_FAILOVER,
+    COLO_PROXY_INIT,
+    COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */
+};
+
+/* ========= colo-proxy: helper functions ========== */
+
+static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff,
+                           uint64_t size, int type)
+{
+    struct sockaddr_nl sa;
+    struct nlmsghdr msg;
+    struct iovec iov;
+    struct msghdr mh;
+    int ret;
+
+    STATE_AO_GC(cps->ao);
+
+    memset(&sa, 0, sizeof(sa));
+    sa.nl_family = AF_NETLINK;
+    sa.nl_pid = 0;
+    sa.nl_groups = 0;
+
+    msg.nlmsg_len = NLMSG_SPACE(0);
+    msg.nlmsg_flags = NLM_F_REQUEST;
+    if (type == COLO_PROXY_INIT) {
+        msg.nlmsg_flags |= NLM_F_ACK;
+    }
+    msg.nlmsg_seq = 0;
+    /* This is untrusty */
+    msg.nlmsg_pid = cps->index;
+    msg.nlmsg_type = type;
+
+    iov.iov_base = &msg;
+    iov.iov_len = msg.nlmsg_len;
+
+    mh.msg_name = &sa;
+    mh.msg_namelen = sizeof(sa);
+    mh.msg_iov = &iov;
+    mh.msg_iovlen = 1;
+    mh.msg_control = NULL;
+    mh.msg_controllen = 0;
+    mh.msg_flags = 0;
+
+    ret = sendmsg(cps->sock_fd, &mh, 0);
+    if (ret <= 0) {
+        LOG(ERROR, "can't send msg to kernel by netlink: %s",
+            strerror(errno));
+    }
+
+    return ret;
+}
+
+/* error: return -1, otherwise return 0 */
+static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff,
+                               unsigned int timeout_us)
+{
+    struct sockaddr_nl sa;
+    struct iovec iov;
+    struct msghdr mh = {
+        .msg_name = &sa,
+        .msg_namelen = sizeof(sa),
+        .msg_iov = &iov,
+        .msg_iovlen = 1,
+    };
+    struct timeval tv;
+    uint32_t size = 16384;
+    int64_t len = 0;
+    int ret;
+
+    STATE_AO_GC(cps->ao);
+    uint8_t *tmp = libxl__malloc(NOGC, size);
+
+    if (timeout_us) {
+        tv.tv_sec = timeout_us / 1000000;
+        tv.tv_usec = timeout_us % 1000000;
+        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+    }
+
+    iov.iov_base = tmp;
+    iov.iov_len = size;
+next:
+    ret = recvmsg(cps->sock_fd, &mh, 0);
+    if (ret <= 0) {
+        if (errno != EAGAIN && errno != EWOULDBLOCK)
+            LOGE(ERROR, "can't recv msg from kernel by netlink");
+        goto err;
+    }
+
+    len += ret;
+    if (mh.msg_flags & MSG_TRUNC) {
+        size += 16384;
+        tmp = libxl__realloc(NOGC, tmp, size);
+        iov.iov_base = tmp + len;
+        iov.iov_len = size - len;
+        goto next;
+    }
+
+    *buff = tmp;
+    ret = len;
+    goto out;
+
+err:
+    free(tmp);
+    *buff = NULL;
+
+out:
+    if (timeout_us) {
+        tv.tv_sec = 0;
+        tv.tv_usec = 0;
+        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+    }
+    return ret;
+}
+
+/* ========= colo-proxy: setup and teardown ========== */
+
+int colo_proxy_setup(libxl__colo_proxy_state *cps)
+{
+    int skfd = 0;
+    struct sockaddr_nl sa;
+    struct nlmsghdr *h;
+    int i = 1;
+    int ret = ERROR_FAIL;
+    uint8_t *buff = NULL;
+    int64_t size;
+
+    STATE_AO_GC(cps->ao);
+
+    skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO);
+    if (skfd < 0) {
+        LOG(ERROR, "can not create a netlink socket: %s", strerror(errno));
+        goto out;
+    }
+    cps->sock_fd = skfd;
+    memset(&sa, 0, sizeof(sa));
+    sa.nl_family = AF_NETLINK;
+    sa.nl_groups = 0;
+retry:
+    sa.nl_pid = i++;
+
+    if (i > 10) {
+        LOG(ERROR, "netlink bind error");
+        goto out;
+    }
+
+    ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa));
+    if (ret < 0 && errno == EADDRINUSE) {
+        LOG(ERROR, "colo index %d has already in used", sa.nl_pid);
+        goto retry;
+    } else if (ret < 0) {
+        LOG(ERROR, "netlink bind error");
+        goto out;
+    }
+
+    cps->index = sa.nl_pid;
+    ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT);
+    if (ret < 0) {
+        goto out;
+    }
+    /* receive ack */
+    size = colo_proxy_recv(cps, &buff, 500000);
+    if (size < 0) {
+        LOG(ERROR, "Can't recv msg from kernel by netlink: %s",
+            strerror(errno));
+        goto out;
+    }
+
+    if (size) {
+        h = (struct nlmsghdr *)buff;
+        if (h->nlmsg_type == NLMSG_ERROR) {
+            /* ack's type is NLMSG_ERROR */
+            struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
+
+            if (size - sizeof(*h) < sizeof(*err)) {
+                LOG(ERROR, "NLMSG_LENGTH is too short");
+                goto out;
+            }
+
+            if (err->error) {
+                LOG(ERROR, "NLMSG_ERROR contains error %d", err->error);
+                goto out;
+            }
+        }
+    }
+
+    ret = 0;
+
+out:
+    free(buff);
+    if (ret) {
+        close(cps->sock_fd);
+        cps->sock_fd = -1;
+    }
+    return ret;
+}
+
+void colo_proxy_teardown(libxl__colo_proxy_state *cps)
+{
+    if (cps->sock_fd >= 0) {
+        close(cps->sock_fd);
+        cps->sock_fd = -1;
+    }
+}
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 3af5fdd..3b44b09 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3112,6 +3112,15 @@ libxl__stream_read_inuse(const libxl__stream_read_state *stream)
 }
 
 /*----- colo related state structure -----*/
+typedef struct libxl__colo_proxy_state libxl__colo_proxy_state;
+struct libxl__colo_proxy_state {
+    /* set by caller of colo_proxy_setup */
+    libxl__ao *ao;
+
+    int sock_fd;
+    int index;
+};
+
 typedef struct libxl__colo_save_state libxl__colo_save_state;
 struct libxl__colo_save_state {
     int send_fd;
@@ -3126,6 +3135,9 @@ struct libxl__colo_save_state {
     /* private, used by qdisk block replication */
     bool qdisk_used;
     bool qdisk_setuped;
+
+    /* private, used by colo-proxy */
+    libxl__colo_proxy_state cps;
 };
 
 /*----- Domain suspend (save) state structure -----*/
@@ -3535,6 +3547,9 @@ struct libxl__colo_restore_state {
     bool qdisk_setuped;
     const char *host;
     const char *port;
+
+    /* private, used by colo-proxy */
+    libxl__colo_proxy_state cps;
 };
 
 struct libxl__domain_create_state {
-- 
2.5.0

  parent reply	other threads:[~2016-02-22  2:52 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-22  2:52 [PATCH v10 00/31] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2016-02-22  2:52 ` [PATCH v10 01/31] tools/libxl: introduce libxl__domain_restore_device_model to load qemu state Wen Congyang
2016-02-25 15:53   ` Wei Liu
2016-02-26  1:55     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 02/31] tools/libxl: introduce libxl__domain_common_switch_qemu_logdirty() Wen Congyang
2016-02-22  2:52 ` [PATCH v10 03/31] tools/libxl: Add back channel to allow migration target send data back Wen Congyang
2016-02-22  2:52 ` [PATCH v10 04/31] tools/libxl: Introduce new helper function dup_fd_helper() Wen Congyang
2016-02-25 15:53   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 05/31] tools/libx{l, c}: add back channel to libxc Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 06/31] docs: add colo readme Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 07/31] docs/libxl: Introduce CHECKPOINT_CONTEXT to support migration v2 colo streams Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-26  1:59     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 08/31] libxc/migration: Specification update for DIRTY_PFN_LIST records Wen Congyang
2016-02-22  2:52 ` [PATCH v10 09/31] libxc/migration: export read_record for common use Wen Congyang
2016-02-22  2:52 ` [PATCH v10 10/31] tools/libxl: add back channel support to write stream Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-26  2:11     ` Wen Congyang
2016-03-02 15:02       ` Wei Liu
2016-03-03  1:25         ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 11/31] tools/libxl: write checkpoint_state records into the stream Wen Congyang
2016-02-22  2:52 ` [PATCH v10 12/31] tools/libxl: add back channel support to read stream Wen Congyang
2016-02-25 15:54   ` Wei Liu
2016-02-26  2:16     ` Wen Congyang
2016-03-02 15:03       ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 13/31] tools/libxl: handle checkpoint_state records in a libxl migration v2 " Wen Congyang
2016-02-22  2:52 ` [PATCH v10 14/31] tools/libx{l, c}: introduce wait_checkpoint callback Wen Congyang
2016-02-22  2:52 ` [PATCH v10 15/31] tools/libx{l, c}: add postcopy/suspend callback to restore side Wen Congyang
2016-02-22  2:52 ` [PATCH v10 16/31] secondary vm suspend/resume/checkpoint code Wen Congyang
2016-02-25 15:56   ` Wei Liu
2016-02-26  2:30     ` Wen Congyang
2016-03-01 10:06     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 17/31] primary " Wen Congyang
2016-02-25 15:57   ` Wei Liu
2016-02-26  2:32     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 18/31] libxc/restore: support COLO restore Wen Congyang
2016-02-25 15:57   ` Wei Liu
2016-02-26  2:33     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 19/31] libxc/restore: send dirty pfn list to primary when checkpoint under colo Wen Congyang
2016-02-22  2:52 ` [PATCH v10 20/31] send store gfn and console gfn to xl before resuming secondary vm Wen Congyang
2016-02-22  2:52 ` [PATCH v10 21/31] libxc/save: support COLO save Wen Congyang
2016-02-25 15:58   ` Wei Liu
2016-02-26  2:35     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 22/31] implement the cmdline for COLO Wen Congyang
2016-03-02 15:03   ` Wei Liu
2016-03-03  1:30     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 23/31] COLO: introduce new API to prepare/start/do/get_error/stop replication Wen Congyang
2016-03-02 15:03   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 24/31] Support colo mode for qemu disk Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-03-03  1:40     ` Wen Congyang
2016-02-22  2:52 ` [PATCH v10 25/31] COLO: use qemu block replication Wen Congyang
2016-03-02 15:03   ` Wei Liu
2016-02-22  2:52 ` Wen Congyang [this message]
2016-03-02 15:04   ` [PATCH v10 26/31] COLO proxy: implement setup/teardown of COLO proxy module Wei Liu
2016-03-11 22:25   ` Konrad Rzeszutek Wilk
2016-03-14  9:13     ` Wen Congyang
2016-03-22  3:40       ` Changlong Xie
2016-02-22  2:52 ` [PATCH v10 27/31] COLO proxy: preresume, postresume and checkpoint Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 28/31] COLO nic: implement COLO nic subkind Wen Congyang
2016-03-02 15:04   ` Wei Liu
2016-02-22  2:52 ` [PATCH v10 29/31] setup and control colo proxy on primary side Wen Congyang
2016-02-22  2:52 ` [PATCH v10 30/31] setup and control colo proxy on secondary side Wen Congyang
2016-02-22  2:52 ` [PATCH v10 31/31] cmdline switches and config vars to control colo-proxy Wen Congyang
2016-03-02 15:05   ` Wei Liu
2016-03-03  1:41     ` Wen Congyang
2016-02-25 16:05 ` [PATCH v10 00/31] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1456109555-28299-27-git-send-email-wency@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=hongyang.yang@easystack.cn \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=konrad.wilk@oracle.com \
    --cc=lars.kurth@citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    --cc=xiecl.fnst@cn.fujitsu.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.