From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47329) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Yb0Pa-0008WN-Pw for qemu-devel@nongnu.org; Thu, 26 Mar 2015 01:32:59 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Yb0PW-0007Ja-H5 for qemu-devel@nongnu.org; Thu, 26 Mar 2015 01:32:58 -0400 Received: from szxga02-in.huawei.com ([119.145.14.65]:42018) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Yb0PU-0007In-Uz for qemu-devel@nongnu.org; Thu, 26 Mar 2015 01:32:54 -0400 From: zhanghailiang Date: Thu, 26 Mar 2015 13:29:28 +0800 Message-ID: <1427347774-8960-23-git-send-email-zhang.zhanghailiang@huawei.com> In-Reply-To: <1427347774-8960-1-git-send-email-zhang.zhanghailiang@huawei.com> References: <1427347774-8960-1-git-send-email-zhang.zhanghailiang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain Subject: [Qemu-devel] [RFC PATCH v4 22/28] COLO: Do checkpoint according to the result of net packets comparing List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: lizhijian@cn.fujitsu.com, quintela@redhat.com, yunhong.jiang@intel.com, eddie.dong@intel.com, peter.huangpeng@huawei.com, dgilbert@redhat.com, zhanghailiang , arei.gonglei@huawei.com, amit.shah@redhat.com, david@gibson.dropbear.id.au Only do checkpoint, when the VMs' output net packets are inconsistent, We also limit the min time between two continuous checkpoint action, to give VM a change to run. Signed-off-by: zhanghailiang Signed-off-by: Li Zhijian --- include/net/colo-nic.h | 2 ++ migration/colo.c | 34 ++++++++++++++++++++++++++++++++++ net/colo-nic.c | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/include/net/colo-nic.h b/include/net/colo-nic.h index 40dbcfb..67c9807 100644 --- a/include/net/colo-nic.h +++ b/include/net/colo-nic.h @@ -19,4 +19,6 @@ void colo_proxy_destroy(int side); void colo_add_nic_devices(NetClientState *nc); void colo_remove_nic_devices(NetClientState *nc); +int colo_proxy_compare(void); + #endif diff --git a/migration/colo.c b/migration/colo.c index dffd6f9..9ef4554 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -25,6 +25,13 @@ } \ } while (0) +/* +* We should not do checkpoint one after another without any time interval, +* Because this will lead continuous 'stop' status for VM. +* CHECKPOINT_MIN_PERIOD is the min time limit between two checkpoint action. +*/ +#define CHECKPOINT_MIN_PERIOD 100 /* unit: ms */ + enum { COLO_READY = 0x46, @@ -290,6 +297,7 @@ static void *colo_thread(void *opaque) { MigrationState *s = opaque; QEMUFile *colo_control = NULL; + int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); int ret; if (colo_proxy_init(COLO_PRIMARY_MODE) != 0) { @@ -326,10 +334,36 @@ static void *colo_thread(void *opaque) DPRINTF("vm resume to run\n"); while (s->state == MIGRATION_STATUS_COLO) { + int proxy_checkpoint_req; + + /* wait for a colo checkpoint */ + proxy_checkpoint_req = colo_proxy_compare(); + if (proxy_checkpoint_req < 0) { + goto out; + } else if (!proxy_checkpoint_req) { + /* + * No checkpoint is needed, wait for 1ms and then + * check if we need checkpoint again + */ + g_usleep(1000); + continue; + } else { + int64_t interval; + + current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); + interval = current_time - checkpoint_time; + if (interval < CHECKPOINT_MIN_PERIOD) { + /* Limit the min time between two checkpoint */ + g_usleep((1000*(CHECKPOINT_MIN_PERIOD - interval))); + } + DPRINTF("Net packets is not consistent!!!\n"); + } + /* start a colo checkpoint */ if (colo_do_checkpoint_transaction(s, colo_control)) { goto out; } + checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); } out: diff --git a/net/colo-nic.c b/net/colo-nic.c index 38d9bf5..563d661 100644 --- a/net/colo-nic.c +++ b/net/colo-nic.c @@ -37,6 +37,9 @@ typedef struct nic_device { bool is_up; } nic_device; +typedef struct colo_msg { + bool is_checkpoint; +} colo_msg; typedef struct colo_proxy { int sockfd; @@ -376,3 +379,41 @@ void colo_proxy_destroy(int side) cp_info.index = -1; colo_nic_side = -1; } +/* +do checkpoint: return 1 +error: return -1 +do not checkpoint: return 0 +*/ +int colo_proxy_compare(void) +{ + uint8_t *buff; + int64_t size; + struct nlmsghdr *h; + struct colo_msg *m; + int ret = -1; + + size = colo_proxy_recv(&buff, MSG_DONTWAIT); + + /* timeout, return no checkpoint message. */ + if (size <= 0) { + return 0; + } + + h = (struct nlmsghdr *) buff; + + if (h->nlmsg_type == NLMSG_ERROR) { + goto out; + } + + if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*m))) { + goto out; + } + + m = NLMSG_DATA(h); + + ret = m->is_checkpoint ? 1 : 0; + +out: + g_free(buff); + return ret; +} -- 1.7.12.4