All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wen Congyang <wency@cn.fujitsu.com>
To: xen devel <xen-devel@lists.xen.org>
Cc: Ian Campbell <Ian.Campbell@citrix.com>,
	Wen Congyang <wency@cn.fujitsu.com>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Jiang Yunhong <yunhong.jiang@intel.com>,
	Dong Eddie <eddie.dong@intel.com>,
	Yang Hongyang <yanghy@cn.fujitsu.com>,
	Lai Jiangshan <laijs@cn.fujitsu.com>
Subject: [RFC Patch v3 10/22] COLO: xc related codes
Date: Fri, 5 Sep 2014 17:25:45 +0800	[thread overview]
Message-ID: <1409909158-19243-11-git-send-email-wency@cn.fujitsu.com> (raw)
In-Reply-To: <1409909158-19243-1-git-send-email-wency@cn.fujitsu.com>

Save:
1. send XC_SAVE_ID_LAST_CHECKPOINT, so secondary vm can be resumed
2. call callbacks->get_dirty_pfn() after suspend primary vm if we
   are doing checkpoint.

Restore:
1. call the callbacks resume/checkpoint/suspend if secondary vm's
   status is the same as primary vm's status.
2. zero out tdata because we will use it zero out pagebuf.tdata.

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 tools/libxc/xc_domain_restore.c | 44 ++++++++++++++++++++++++++++++++--
 tools/libxc/xc_domain_save.c    | 52 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c
index 21a6177..f7f683a 100644
--- a/tools/libxc/xc_domain_restore.c
+++ b/tools/libxc/xc_domain_restore.c
@@ -1454,7 +1454,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
     int nraces = 0;
 
     /* The new domain's shared-info frame number. */
-    unsigned long shared_info_frame;
+    unsigned long shared_info_frame = 0;
     unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
     shared_info_any_t *old_shared_info = 
         (shared_info_any_t *)shared_info_page;
@@ -1504,6 +1504,8 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
 
     DPRINTF("%s: starting restore of new domid %u", __func__, dom);
 
+    n = m = 0;
+
     pagebuf_init(&pagebuf);
     memset(&tailbuf, 0, sizeof(tailbuf));
     tailbuf.ishvm = hvm;
@@ -1629,7 +1631,6 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
      * We uncanonicalise page tables as we go.
      */
 
-    n = m = 0;
  loadpages:
     for ( ; ; )
     {
@@ -1793,6 +1794,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
         goto finish;
     }
 
+new_checkpoint:
     // DPRINTF("Buffered checkpoint\n");
 
     if ( pagebuf_get(xch, ctx, &pagebuf, io_fd, dom) ) {
@@ -2301,6 +2303,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
             free(tdata.data);
             goto out;
         }
+        memset(&tdata, 0, sizeof(tdata));
     }
 
     /* Dump the QEMU state to a state file for QEMU to load */
@@ -2367,6 +2370,43 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
     rc = 0;
 
  out:
+    if ( !rc && callbacks->checkpoint )
+    {
+#define HANDLE_CALLBACK_RETURN_VALUE(frc)                   \
+    do {                                                    \
+        if ( frc == 0 )                                     \
+        {                                                   \
+            /* Some internal error happens */               \
+            rc = 1;                                         \
+            goto out;                                       \
+        }                                                   \
+        else if ( frc == 2 )                                \
+        {                                                   \
+            /* Reading/writing error, do failover */        \
+            rc = 0;                                         \
+            goto failover;                                  \
+        }                                                   \
+    } while (0)
+        /* COLO */
+
+        /* TODO: call restore_results */
+
+        /* Resume secondary vm */
+        frc = callbacks->postcopy(callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+        /* wait for new checkpoint */
+        frc = callbacks->checkpoint(callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+        /* suspend secondary vm */
+        frc = callbacks->suspend(callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(frc);
+
+        goto new_checkpoint;
+    }
+
+failover:
     if ( (rc != 0) && (dom != 0) )
         xc_domain_destroy(xch, dom);
     xc_hypercall_buffer_free(xch, ctxt);
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index 61caa47..79cc2c8 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -377,6 +377,31 @@ static int suspend_and_state(int (*suspend)(void*), void* data,
     return 0;
 }
 
+static int update_dirty_bitmap(uint8_t *(*get_dirty_pfn)(void *), void *data,
+                               unsigned long p2m_size, unsigned long *to_send)
+{
+    uint64_t *pfn_list;
+    uint64_t count, i;
+    uint64_t pfn;
+
+    pfn_list = (uint64_t *)get_dirty_pfn(data);
+    assert(pfn_list);
+
+    count = pfn_list[0];
+    for (i = 0; i < count; i++) {
+        pfn = pfn_list[i + 1];
+        if (pfn > p2m_size) {
+            errno = EINVAL;
+            return -1;
+        }
+
+        set_bit(pfn, to_send);
+    }
+
+    free(pfn_list);
+    return 0;
+}
+
 /*
 ** Map the top-level page of MFNs from the guest. The guest might not have
 ** finished resuming from a previous restore operation, so we wait a while for
@@ -1769,11 +1794,14 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
         free(buf);
     }
 
-    if ( !callbacks->checkpoint )
+    if ( !callbacks->checkpoint || callbacks->get_dirty_pfn )
     {
         /*
          * If this is not a checkpointed save then this must be the first and
          * last checkpoint.
+         *
+         * If we are in colo mode, send last checkpoint to resume secondary
+         * vm.
          */
         i = XC_SAVE_ID_LAST_CHECKPOINT;
         if ( wrexact(io_fd, &i, sizeof(int)) )
@@ -2119,7 +2147,14 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
      * primary vm and secondary vm now.
      */
     if ( !rc && callbacks->postcopy && callbacks->get_dirty_pfn )
-        callbacks->postcopy(callbacks->data);
+    {
+        if ( !callbacks->postcopy(callbacks->data) )
+        {
+            ERROR("postcopy fails");
+            /* postcopy may be implemented in libxl, no way to get errno */
+            rc = -1;
+        }
+    }
 
     /* Enable compression now, finally */
     compressing = (flags & XCFLAGS_CHECKPOINT_COMPRESS);
@@ -2136,8 +2171,11 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
                                io_fd, dom, &info) )
         {
             ERROR("Domain appears not to have suspended");
+            /* postcopy may be implemented in libxl, no way to get errno */
+            errno = -1;
             goto out;
         }
+
         DPRINTF("SUSPEND shinfo %08lx\n", info.shared_info_frame);
         print_stats(xch, dom, 0, &time_stats, &shadow_stats, 1);
 
@@ -2148,6 +2186,16 @@ int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t max_iter
             PERROR("Error flushing shadow PT");
         }
 
+        if ( callbacks->get_dirty_pfn )
+        {
+            if ( update_dirty_bitmap(callbacks->get_dirty_pfn, callbacks->data,
+                                     dinfo->p2m_size, to_send) )
+            {
+                ERROR("getting secondary vm's dirty pages failed");
+                goto out;
+            }
+        }
+
         goto copypages;
     }
 
-- 
1.9.3

  parent reply	other threads:[~2014-09-05  9:25 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-05  9:25 [RFC Patch v3 00/22] COarse-grain LOck-stepping Virtual Machines for Non-stop Service Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 01/22] move remus related codes to libxl_remus.c Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 02/22] rename remus device to checkpoint device Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 03/22] adjust the indentation Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 04/22] don't touch remus in checkpoint_device Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 05/22] Update libxl_save_msgs_gen.pl to support return data from xl to xc Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 06/22] Allow slave sends data to master Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 07/22] secondary vm suspend/resume/checkpoint code Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 08/22] primary vm suspend/get_dirty_pfn/resume/checkpoint code Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 09/22] xc_domain_save: flush cache before calling callbacks->postcopy() in colo mode Wen Congyang
2014-09-05  9:25 ` Wen Congyang [this message]
2014-09-05  9:25 ` [RFC Patch v3 11/22] send store mfn and console mfn to xl before resuming secondary vm Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 12/22] implement the cmdline for COLO Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 13/22] blktap2: connect to backup asynchronously Wen Congyang
2014-09-24 19:11   ` Shriram Rajagopalan
2014-09-25  5:40     ` Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 14/22] switch to unprotected mode before closing Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 15/22] blktap2: move async connect related codes to block-replication.c Wen Congyang
2014-09-24 18:48   ` Shriram Rajagopalan
2014-09-05  9:25 ` [RFC Patch v3 16/22] blktap2: move ramdisk " Wen Congyang
2014-09-24 18:44   ` Shriram Rajagopalan
2014-09-26  5:18     ` Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 17/22] block-colo: implement colo disk replication Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 18/22] support blktap COLO in xl: Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 19/22] libxl/colo: setup and control disk replication for blktap2 backends Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 20/22] setup and control colo-agent for primary vm Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 21/22] setup and control colo-agent for secondary vm Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 22/22] colo: cmdline switches and config vars to control colo-agent Wen Congyang
2014-09-05  9:25 ` [RFC Patch v3 23/22] Introduce "xen-load-devices-state" Wen Congyang
2014-09-05 21:57   ` Stefano Stabellini
2014-09-05 21:57   ` [Qemu-devel] [Xen-devel] " Stefano Stabellini
2014-09-09  2:47     ` Wen Congyang
2014-09-09  2:47     ` [Qemu-devel] [Xen-devel] " Wen Congyang
2014-09-10 19:15       ` Stefano Stabellini
2014-09-10 19:15       ` [Qemu-devel] [Xen-devel] " Stefano Stabellini
2014-09-11  5:03         ` Wen Congyang
2014-09-11  5:03         ` Wen Congyang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1409909158-19243-11-git-send-email-wency@cn.fujitsu.com \
    --to=wency@cn.fujitsu.com \
    --cc=Ian.Campbell@citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=eddie.dong@intel.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yanghy@cn.fujitsu.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.