From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:38208)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1aRzBm-0002pL-LP
	for qemu-devel@nongnu.org; Sat, 06 Feb 2016 04:29:59 -0500
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1aRzBj-0001S9-Ru
	for qemu-devel@nongnu.org; Sat, 06 Feb 2016 04:29:58 -0500
Received: from szxga03-in.huawei.com ([119.145.14.66]:22491)
	by eggs.gnu.org with esmtp (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1aRzBj-0001RW-34
	for qemu-devel@nongnu.org; Sat, 06 Feb 2016 04:29:55 -0500
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
Date: Sat, 6 Feb 2016 17:28:41 +0800
Message-ID: <1454750932-7556-30-git-send-email-zhang.zhanghailiang@huawei.com>
In-Reply-To: <1454750932-7556-1-git-send-email-zhang.zhanghailiang@huawei.com>
References: <1454750932-7556-1-git-send-email-zhang.zhanghailiang@huawei.com>
MIME-Version: 1.0
Content-Type: text/plain
Subject: [Qemu-devel] [PATCH COLO-Frame v14 29/40] COLO: Separate the
	process of saving/loading ram and device state
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: qemu-devel@nongnu.org
Cc: xiecl.fnst@cn.fujitsu.com, lizhijian@cn.fujitsu.com, quintela@redhat.com, armbru@redhat.com, yunhong.jiang@intel.com, eddie.dong@intel.com, peter.huangpeng@huawei.com, dgilbert@redhat.com, zhanghailiang <zhang.zhanghailiang@huawei.com>, arei.gonglei@huawei.com, stefanha@redhat.com, amit.shah@redhat.com, zhangchen.fnst@cn.fujitsu.com, hongyang.yang@easystack.cn

We separate the process of saving/loading ram and device state when do
checkpoint, we add new helpers for save/load ram/device. With this change,
we can directly transfer ram from master to slave without using
QEMUSizeBufferas assistant, which also reduce the size of extra memory been
used during checkpoint.

Besides, we move the colo_flush_ram_cache to the proper position after the
above change.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
v14:
- split two new patches from this patch
- Some minor fixes from Dave
v13:
- Re-use some existed helper functions to realize saving/loading ram and device.
v11:
- Remove load configuration section in qemu_loadvm_state_begin()
---
 migration/colo.c   | 48 ++++++++++++++++++++++++++++++++++++++----------
 migration/ram.c    |  5 -----
 migration/savevm.c |  5 +++++
 3 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/migration/colo.c b/migration/colo.c
index a30167c..9ac46ec 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -288,21 +288,37 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
         goto out;
     }
 
+    colo_put_cmd(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
     /* Disable block migration */
     s->params.blk = 0;
     s->params.shared = 0;
-    qemu_savevm_state_header(trans);
-    qemu_savevm_state_begin(trans, &s->params);
+    qemu_savevm_state_begin(s->to_dst_file, &s->params);
+    ret = qemu_file_get_error(s->to_dst_file);
+    if (ret < 0) {
+        error_report("Save vm state begin error");
+        goto out;
+    }
+
     qemu_mutex_lock_iothread();
-    qemu_savevm_state_complete_precopy(trans, false);
+    /*
+    * Only save VM's live state, which not including device state.
+    * TODO: We may need a timeout mechanism to prevent COLO process
+    * to be blocked here.
+    */
+    qemu_savevm_live_state(s->to_dst_file);
+    /* Note: device state is saved into buffer */
+    ret = qemu_save_device_state(trans);
     qemu_mutex_unlock_iothread();
-
-    qemu_fflush(trans);
-
-    colo_put_cmd(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
-    if (local_err) {
+    if (ret < 0) {
+        error_report("Save device state error");
         goto out;
     }
+    qemu_fflush(trans);
+
     /* we send the total size of the vmstate first */
     size = qsb_get_length(buffer);
     colo_put_cmd_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
@@ -571,6 +587,16 @@ void *colo_process_incoming_thread(void *opaque)
             goto out;
         }
 
+        ret = qemu_loadvm_state_begin(mis->from_src_file);
+        if (ret < 0) {
+            error_report("Load vm state begin error, ret=%d", ret);
+            goto out;
+        }
+        ret = qemu_loadvm_state_main(mis->from_src_file, mis);
+        if (ret < 0) {
+            error_report("Load VM's live state (ram) error");
+            goto out;
+        }
         /* read the VM state total size first */
         value = colo_get_cmd_value(mis->from_src_file,
                                  COLO_MESSAGE_VMSTATE_SIZE, &local_err);
@@ -603,8 +629,10 @@ void *colo_process_incoming_thread(void *opaque)
         qemu_mutex_lock_iothread();
         qemu_system_reset(VMRESET_SILENT);
         vmstate_loading = true;
-        if (qemu_loadvm_state(fb) < 0) {
-            error_report("COLO: loadvm failed");
+        colo_flush_ram_cache();
+        ret = qemu_load_device_state(fb);
+        if (ret < 0) {
+            error_report("COLO: load device state failed");
             qemu_mutex_unlock_iothread();
             goto out;
         }
diff --git a/migration/ram.c b/migration/ram.c
index 519ebdc..35ea795 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2465,7 +2465,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
      * be atomic
      */
     bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
-    bool need_flush = false;
 
     seq_iter++;
 
@@ -2500,7 +2499,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
             /* After going into COLO, we should load the Page into colo_cache */
             if (ram_cache_enable) {
                 host = colo_cache_from_block_offset(block, addr);
-                need_flush = true;
             } else {
                 host = host_from_ram_block_offset(block, addr);
             }
@@ -2594,9 +2592,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
 
     rcu_read_unlock();
 
-    if (!ret  && ram_cache_enable && need_flush) {
-        colo_flush_ram_cache();
-    }
     DPRINTF("Completed load of VM with exit code %d seq iteration "
             "%" PRIu64 "\n", ret, seq_iter);
     return ret;
diff --git a/migration/savevm.c b/migration/savevm.c
index 0c3b960..ace4ad9 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -50,6 +50,7 @@
 #include "qemu/iov.h"
 #include "block/snapshot.h"
 #include "block/qapi.h"
+#include "migration/colo.h"
 
 
 #ifndef ETH_P_RARP
@@ -923,6 +924,10 @@ void qemu_savevm_state_begin(QEMUFile *f,
             break;
         }
     }
+    if (migration_in_colo_state()) {
+        qemu_put_byte(f, QEMU_VM_EOF);
+        qemu_fflush(f);
+    }
 }
 
 /*
-- 
1.8.3.1