All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Campbell <ian.campbell@citrix.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	Wei Liu <wei.liu2@citrix.com>
Cc: Lars Kurth <lars.kurth@citrix.com>,
	Changlong Xie <xiecl.fnst@cn.fujitsu.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Gui Jianfeng <guijianfeng@cn.fujitsu.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Dong Eddie <eddie.dong@intel.com>,
	Shriram Rajagopalan <rshriram@cs.ubc.ca>,
	Yang Hongyang <hongyang.yang@easystack.cn>
Subject: [PATCH v9 20/25] COLO proxy: implement setup/teardown of COLO proxy module
Date: Wed, 30 Dec 2015 10:37:50 +0800	[thread overview]
Message-ID: <1451443075-27428-21-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1451443075-27428-1-git-send-email-wency@cn.fujitsu.com>

setup/teardown of COLO proxy module.
we use netlink to communicate with proxy module.
About colo-proxy module:
https://lkml.org/lkml/2015/6/18/32
How to use:
http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping

Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 tools/libxl/Makefile           |   1 +
 tools/libxl/libxl_colo.h       |   2 +
 tools/libxl/libxl_colo_proxy.c | 230 +++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h   |  15 +++
 4 files changed, 248 insertions(+)
 create mode 100644 tools/libxl/libxl_colo_proxy.c

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index a4156c1..8c7e5c0 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -65,6 +65,7 @@ endif
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
 LIBXL_OBJS-y += libxl_colo_qdisk.o
+LIBXL_OBJS-y += libxl_colo_proxy.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index 39515c4..2604b0f 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -31,4 +31,6 @@ extern void libxl__colo_save_teardown(libxl__egc *egc,
                                       libxl__colo_save_state *css,
                                       int rc);
 
+extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
+extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
 #endif
diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
new file mode 100644
index 0000000..e07e640
--- /dev/null
+++ b/tools/libxl/libxl_colo_proxy.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <hongyang.yang@easystack.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+#include <linux/netlink.h>
+
+#define NETLINK_COLO 28
+
+enum colo_netlink_op {
+    COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1),
+    COLO_CHECKPOINT,
+    COLO_FAILOVER,
+    COLO_PROXY_INIT,
+    COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */
+};
+
+/* ========= colo-proxy: helper functions ========== */
+
+static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff,
+                           uint64_t size, int type)
+{
+    struct sockaddr_nl sa;
+    struct nlmsghdr msg;
+    struct iovec iov;
+    struct msghdr mh;
+    int ret;
+
+    STATE_AO_GC(cps->ao);
+
+    memset(&sa, 0, sizeof(sa));
+    sa.nl_family = AF_NETLINK;
+    sa.nl_pid = 0;
+    sa.nl_groups = 0;
+
+    msg.nlmsg_len = NLMSG_SPACE(0);
+    msg.nlmsg_flags = NLM_F_REQUEST;
+    if (type == COLO_PROXY_INIT) {
+        msg.nlmsg_flags |= NLM_F_ACK;
+    }
+    msg.nlmsg_seq = 0;
+    /* This is untrusty */
+    msg.nlmsg_pid = cps->index;
+    msg.nlmsg_type = type;
+
+    iov.iov_base = &msg;
+    iov.iov_len = msg.nlmsg_len;
+
+    mh.msg_name = &sa;
+    mh.msg_namelen = sizeof(sa);
+    mh.msg_iov = &iov;
+    mh.msg_iovlen = 1;
+    mh.msg_control = NULL;
+    mh.msg_controllen = 0;
+    mh.msg_flags = 0;
+
+    ret = sendmsg(cps->sock_fd, &mh, 0);
+    if (ret <= 0) {
+        LOG(ERROR, "can't send msg to kernel by netlink: %s",
+            strerror(errno));
+    }
+
+    return ret;
+}
+
+/* error: return -1, otherwise return 0 */
+static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff,
+                               unsigned int timeout_us)
+{
+    struct sockaddr_nl sa;
+    struct iovec iov;
+    struct msghdr mh = {
+        .msg_name = &sa,
+        .msg_namelen = sizeof(sa),
+        .msg_iov = &iov,
+        .msg_iovlen = 1,
+    };
+    struct timeval tv;
+    uint32_t size = 16384;
+    int64_t len = 0;
+    int ret;
+
+    STATE_AO_GC(cps->ao);
+    uint8_t *tmp = libxl__malloc(NOGC, size);
+
+    if (timeout_us) {
+        tv.tv_sec = timeout_us / 1000000;
+        tv.tv_usec = timeout_us % 1000000;
+        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+    }
+
+    iov.iov_base = tmp;
+    iov.iov_len = size;
+next:
+    ret = recvmsg(cps->sock_fd, &mh, 0);
+    if (ret <= 0) {
+        if (errno != EAGAIN && errno != EWOULDBLOCK)
+            LOGE(ERROR, "can't recv msg from kernel by netlink");
+        goto err;
+    }
+
+    len += ret;
+    if (mh.msg_flags & MSG_TRUNC) {
+        size += 16384;
+        tmp = libxl__realloc(NOGC, tmp, size);
+        iov.iov_base = tmp + len;
+        iov.iov_len = size - len;
+        goto next;
+    }
+
+    *buff = tmp;
+    ret = len;
+    goto out;
+
+err:
+    free(tmp);
+    *buff = NULL;
+
+out:
+    if (timeout_us) {
+        tv.tv_sec = 0;
+        tv.tv_usec = 0;
+        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+    }
+    return ret;
+}
+
+/* ========= colo-proxy: setup and teardown ========== */
+
+int colo_proxy_setup(libxl__colo_proxy_state *cps)
+{
+    int skfd = 0;
+    struct sockaddr_nl sa;
+    struct nlmsghdr *h;
+    int i = 1;
+    int ret = ERROR_FAIL;
+    uint8_t *buff = NULL;
+    int64_t size;
+
+    STATE_AO_GC(cps->ao);
+
+    skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO);
+    if (skfd < 0) {
+        LOG(ERROR, "can not create a netlink socket: %s", strerror(errno));
+        goto out;
+    }
+    cps->sock_fd = skfd;
+    memset(&sa, 0, sizeof(sa));
+    sa.nl_family = AF_NETLINK;
+    sa.nl_groups = 0;
+retry:
+    sa.nl_pid = i++;
+
+    if (i > 10) {
+        LOG(ERROR, "netlink bind error");
+        goto out;
+    }
+
+    ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa));
+    if (ret < 0 && errno == EADDRINUSE) {
+        LOG(ERROR, "colo index %d has already in used", sa.nl_pid);
+        goto retry;
+    } else if (ret < 0) {
+        LOG(ERROR, "netlink bind error");
+        goto out;
+    }
+
+    cps->index = sa.nl_pid;
+    ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT);
+    if (ret < 0) {
+        goto out;
+    }
+    /* receive ack */
+    size = colo_proxy_recv(cps, &buff, 500000);
+    if (size < 0) {
+        LOG(ERROR, "Can't recv msg from kernel by netlink: %s",
+            strerror(errno));
+        goto out;
+    }
+
+    if (size) {
+        h = (struct nlmsghdr *)buff;
+        if (h->nlmsg_type == NLMSG_ERROR) {
+            /* ack's type is NLMSG_ERROR */
+            struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
+
+            if (size - sizeof(*h) < sizeof(*err)) {
+                LOG(ERROR, "NLMSG_LENGTH is too short");
+                goto out;
+            }
+
+            if (err->error) {
+                LOG(ERROR, "NLMSG_ERROR contains error %d", err->error);
+                goto out;
+            }
+        }
+    }
+
+    ret = 0;
+
+out:
+    free(buff);
+    if (ret) {
+        close(cps->sock_fd);
+        cps->sock_fd = -1;
+    }
+    return ret;
+}
+
+void colo_proxy_teardown(libxl__colo_proxy_state *cps)
+{
+    if (cps->sock_fd >= 0) {
+        close(cps->sock_fd);
+        cps->sock_fd = -1;
+    }
+}
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index ddf4980..abaa98c 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3109,6 +3109,15 @@ libxl__stream_read_inuse(const libxl__stream_read_state *stream)
 }
 
 /*----- colo related state structure -----*/
+typedef struct libxl__colo_proxy_state libxl__colo_proxy_state;
+struct libxl__colo_proxy_state {
+    /* set by caller of colo_proxy_setup */
+    libxl__ao *ao;
+
+    int sock_fd;
+    int index;
+};
+
 typedef struct libxl__colo_save_state libxl__colo_save_state;
 struct libxl__colo_save_state {
     int send_fd;
@@ -3123,6 +3132,9 @@ struct libxl__colo_save_state {
     /* private, used by qdisk block replication */
     bool qdisk_used;
     bool qdisk_setuped;
+
+    /* private, used by colo-proxy */
+    libxl__colo_proxy_state cps;
 };
 
 /*----- Domain suspend (save) state structure -----*/
@@ -3534,6 +3546,9 @@ struct libxl__colo_restore_state {
     bool qdisk_setuped;
     const char *host;
     const char *port;
+
+    /* private, used by colo-proxy */
+    libxl__colo_proxy_state cps;
 };
 
 struct libxl__domain_create_state {
-- 
2.5.0

  parent reply	other threads:[~2015-12-30  2:37 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-30  2:37 [PATCH v9 00/25] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2015-12-30  2:37 ` [PATCH v9 01/25] docs: add colo readme Wen Congyang
2015-12-30  2:37 ` [PATCH v9 02/25] docs/libxl: Introduce COLO_CONTEXT to support migration v2 colo streams Wen Congyang
2016-01-26 20:40   ` Konrad Rzeszutek Wilk
2016-01-27  6:47     ` Wen Congyang
2016-01-27 11:00       ` Andrew Cooper
2016-01-27 15:11         ` Konrad Rzeszutek Wilk
2016-01-27 15:15           ` Andrew Cooper
2016-01-27 15:28             ` Konrad Rzeszutek Wilk
2016-01-27 15:30               ` Andrew Cooper
2016-01-27 16:01                 ` Ian Jackson
2015-12-30  2:37 ` [PATCH v9 03/25] libxc/migration: Specification update for DIRTY_PFN_LIST records Wen Congyang
2016-01-26 20:44   ` Konrad Rzeszutek Wilk
2016-01-27  6:47     ` Wen Congyang
2016-01-27  7:12     ` Wen Congyang
2016-01-27 10:00       ` Ian Campbell
2016-01-27 11:01         ` Andrew Cooper
2015-12-30  2:37 ` [PATCH v9 04/25] libxc/migration: export read_record for common use Wen Congyang
2016-01-26 20:45   ` Konrad Rzeszutek Wilk
2016-01-27  0:57     ` Wen Congyang
2015-12-30  2:37 ` [PATCH v9 05/25] tools/libxl: add back channel support to write stream Wen Congyang
2015-12-30  2:37 ` [PATCH v9 06/25] tools/libxl: write checkpoint_state records into the stream Wen Congyang
2015-12-30  2:37 ` [PATCH v9 07/25] tools/libxl: add back channel support to read stream Wen Congyang
2015-12-30  2:37 ` [PATCH v9 08/25] tools/libxl: handle checkpoint_state records in a libxl migration v2 " Wen Congyang
2015-12-30  2:37 ` [PATCH v9 09/25] tools/libx{l, c}: introduce should_checkpoint callback Wen Congyang
2016-01-26 20:50   ` Konrad Rzeszutek Wilk
2016-01-26 21:09     ` Konrad Rzeszutek Wilk
2016-01-27  1:03       ` Wen Congyang
2016-01-27  1:18     ` Wen Congyang
2015-12-30  2:37 ` [PATCH v9 10/25] tools/libx{l, c}: add postcopy/suspend callback to restore side Wen Congyang
2015-12-30  2:37 ` [PATCH v9 11/25] secondary vm suspend/resume/checkpoint code Wen Congyang
2015-12-30  2:37 ` [PATCH v9 12/25] primary " Wen Congyang
2015-12-30  2:37 ` [PATCH v9 13/25] libxc/restore: support COLO restore Wen Congyang
2015-12-30  2:37 ` [PATCH v9 14/25] libxc/restore: send dirty pfn list to primary when checkpoint under colo Wen Congyang
2015-12-30  2:37 ` [PATCH v9 15/25] send store gfn and console gfn to xl before resuming secondary vm Wen Congyang
2015-12-30  2:37 ` [PATCH v9 16/25] libxc/save: support COLO save Wen Congyang
2015-12-30  2:37 ` [PATCH v9 17/25] implement the cmdline for COLO Wen Congyang
2015-12-30  2:37 ` [PATCH v9 18/25] Support colo mode for qemu disk Wen Congyang
2015-12-30  2:37 ` [PATCH v9 19/25] COLO: use qemu block replication Wen Congyang
2015-12-30  2:37 ` Wen Congyang [this message]
2015-12-30  2:37 ` [PATCH v9 21/25] COLO proxy: preresume, postresume and checkpoint Wen Congyang
2015-12-30  2:37 ` [PATCH v9 22/25] COLO nic: implement COLO nic subkind Wen Congyang
2015-12-30  2:37 ` [PATCH v9 23/25] setup and control colo proxy on primary side Wen Congyang
2015-12-30  2:37 ` [PATCH v9 24/25] setup and control colo proxy on secondary side Wen Congyang
2015-12-30  2:37 ` [PATCH v9 25/25] cmdline switches and config vars to control colo-proxy Wen Congyang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1451443075-27428-21-git-send-email-wency@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=guijianfeng@cn.fujitsu.com \
    --cc=hongyang.yang@easystack.cn \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=lars.kurth@citrix.com \
    --cc=rshriram@cs.ubc.ca \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    --cc=xiecl.fnst@cn.fujitsu.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.