From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:48762)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1Yb0Sx-000697-5m
	for qemu-devel@nongnu.org; Thu, 26 Mar 2015 01:36:28 -0400
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1Yb0St-0000WS-4r
	for qemu-devel@nongnu.org; Thu, 26 Mar 2015 01:36:27 -0400
Received: from szxga03-in.huawei.com ([119.145.14.66]:48457)
	by eggs.gnu.org with esmtp (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1Yb0Sf-0000QS-OH
	for qemu-devel@nongnu.org; Thu, 26 Mar 2015 01:36:23 -0400
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
Date: Thu, 26 Mar 2015 13:29:16 +0800
Message-ID: <1427347774-8960-11-git-send-email-zhang.zhanghailiang@huawei.com>
In-Reply-To: <1427347774-8960-1-git-send-email-zhang.zhanghailiang@huawei.com>
References: <1427347774-8960-1-git-send-email-zhang.zhanghailiang@huawei.com>
MIME-Version: 1.0
Content-Type: text/plain
Subject: [Qemu-devel] [RFC PATCH v4 10/28] COLO RAM: Load PVM's dirty page
	into SVM's RAM cache temporarily
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: qemu-devel@nongnu.org
Cc: lizhijian@cn.fujitsu.com, quintela@redhat.com, yunhong.jiang@intel.com, eddie.dong@intel.com, peter.huangpeng@huawei.com, dgilbert@redhat.com, zhanghailiang <zhang.zhanghailiang@huawei.com>, arei.gonglei@huawei.com, amit.shah@redhat.com, Lai Jiangshan <laijs@cn.fujitsu.com>, Yang Hongyang <yanghy@cn.fujitsu.com>, david@gibson.dropbear.id.au

The ram cache is initially the same as SVM/PVM's memory.

At checkpoint, we cache the dirty RAM of PVM into RAM cache in the slave
(so that RAM cache always the same as PVM's memory at every
checkpoint), we will flush cached RAM to SVM after we receive
all PVM's vmstate (RAM/device).

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Yang Hongyang <yanghy@cn.fujitsu.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
 arch_init.c                        | 70 ++++++++++++++++++++++++++++++++++++--
 include/exec/cpu-all.h             |  1 +
 include/migration/migration-colo.h |  3 ++
 migration/colo.c                   | 27 ++++++++++++---
 4 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index e928e11..e32d258 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -314,6 +314,7 @@ static RAMBlock *last_sent_block;
 static ram_addr_t last_offset;
 static unsigned long *migration_bitmap;
 static uint64_t migration_dirty_pages;
+static bool ram_cache_enable;
 static uint32_t last_version;
 static bool ram_bulk_stage;
 
@@ -1085,6 +1086,8 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
     return 0;
 }
 
+static void *memory_region_get_ram_cache_ptr(MemoryRegion *mr, RAMBlock *block);
+
 /* Must be called from within a rcu critical section.
  * Returns a pointer from within the RCU-protected ram_list.
  */
@@ -1102,7 +1105,17 @@ static inline void *host_from_stream_offset(QEMUFile *f,
             return NULL;
         }
 
-        return memory_region_get_ram_ptr(block->mr) + offset;
+        if (ram_cache_enable) {
+            /*
+            * During colo checkpoint, we need bitmap of these migrated pages.
+            * It help us to decide which pages in ram cache should be flushed
+            * into VM's RAM later.
+            */
+            migration_bitmap_set_dirty(block->mr->ram_addr + offset);
+            return memory_region_get_ram_cache_ptr(block->mr, block) + offset;
+        } else {
+            return memory_region_get_ram_ptr(block->mr) + offset;
+        }
     }
 
     len = qemu_get_byte(f);
@@ -1112,7 +1125,13 @@ static inline void *host_from_stream_offset(QEMUFile *f,
     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         if (!strncmp(id, block->idstr, sizeof(id)) &&
             block->max_length > offset) {
-            return memory_region_get_ram_ptr(block->mr) + offset;
+            if (ram_cache_enable) {
+                migration_bitmap_set_dirty(block->mr->ram_addr + offset);
+                return memory_region_get_ram_cache_ptr(block->mr, block)
+                       + offset;
+            } else {
+                return memory_region_get_ram_ptr(block->mr) + offset;
+            }
         }
     }
 
@@ -1251,6 +1270,53 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
     return ret;
 }
 
+/*
+ * colo cache: this is for secondary VM, we cache the whole
+ * memory of the secondary VM, it will be called after first migration.
+ */
+void create_and_init_ram_cache(void)
+{
+    RAMBlock *block;
+
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        block->host_cache = g_malloc(block->used_length);
+        memcpy(block->host_cache, block->host, block->used_length);
+    }
+    rcu_read_unlock();
+
+    ram_cache_enable = true;
+}
+
+void release_ram_cache(void)
+{
+    RAMBlock *block;
+
+    ram_cache_enable = false;
+
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        g_free(block->host_cache);
+    }
+    rcu_read_unlock();
+}
+
+static void *memory_region_get_ram_cache_ptr(MemoryRegion *mr, RAMBlock *block)
+{
+   if (mr->alias) {
+        return memory_region_get_ram_cache_ptr(mr->alias, block) +
+               mr->alias_offset;
+    }
+
+    assert(mr->terminates);
+
+    ram_addr_t addr = mr->ram_addr & TARGET_PAGE_MASK;
+
+    assert(addr - block->offset < block->used_length);
+
+    return block->host_cache + (addr - block->offset);
+}
+
 static SaveVMHandlers savevm_ram_handlers = {
     .save_live_setup = ram_save_setup,
     .save_live_iterate = ram_save_iterate,
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index ac06c67..bcfa3bc 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -272,6 +272,7 @@ struct RAMBlock {
     struct rcu_head rcu;
     struct MemoryRegion *mr;
     uint8_t *host;
+    uint8_t *host_cache; /* For colo, VM's ram cache */
     ram_addr_t offset;
     ram_addr_t used_length;
     ram_addr_t max_length;
diff --git a/include/migration/migration-colo.h b/include/migration/migration-colo.h
index b326c35..d47ad72 100644
--- a/include/migration/migration-colo.h
+++ b/include/migration/migration-colo.h
@@ -35,4 +35,7 @@ bool loadvm_enable_colo(void);
 void loadvm_exit_colo(void);
 void *colo_process_incoming_checkpoints(void *opaque);
 bool loadvm_in_colo_state(void);
+/* ram cache */
+void create_and_init_ram_cache(void);
+void release_ram_cache(void);
 #endif
diff --git a/migration/colo.c b/migration/colo.c
index 64e3f3a..105434e 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -326,11 +326,18 @@ void *colo_process_incoming_checkpoints(void *opaque)
         error_report("Can't open incoming channel!");
         goto out;
     }
+
+    create_and_init_ram_cache();
+
     ret = colo_ctl_put(ctl, COLO_READY);
     if (ret < 0) {
         goto out;
     }
-    /* TODO: in COLO mode, slave is runing, so start the vm */
+    qemu_mutex_lock_iothread();
+    /* in COLO mode, slave is runing, so start the vm */
+    vm_start();
+    qemu_mutex_unlock_iothread();
+    DPRINTF("vm is start\n");
     while (true) {
         int request = 0;
         int ret = colo_wait_handle_cmd(f, &request);
@@ -343,7 +350,12 @@ void *colo_process_incoming_checkpoints(void *opaque)
             }
         }
 
-        /* TODO: suspend guest */
+        /* suspend guest */
+        qemu_mutex_lock_iothread();
+        vm_stop_force_state(RUN_STATE_COLO);
+        qemu_mutex_unlock_iothread();
+        DPRINTF("suspend vm for checkpoint\n");
+
         ret = colo_ctl_put(ctl, COLO_CHECKPOINT_SUSPENDED);
         if (ret < 0) {
             goto out;
@@ -355,7 +367,7 @@ void *colo_process_incoming_checkpoints(void *opaque)
         }
         DPRINTF("Got COLO_CHECKPOINT_SEND\n");
 
-        /* TODO: read migration data into colo buffer */
+        /*TODO Load VM state */
 
         ret = colo_ctl_put(ctl, COLO_CHECKPOINT_RECEIVED);
         if (ret < 0) {
@@ -363,16 +375,23 @@ void *colo_process_incoming_checkpoints(void *opaque)
         }
         DPRINTF("Recived vm state\n");
 
-        /* TODO: load vm state */
+        /* TODO: flush vm state */
 
         ret = colo_ctl_put(ctl, COLO_CHECKPOINT_LOADED);
         if (ret < 0) {
             goto out;
         }
+
+        /* resume guest */
+        qemu_mutex_lock_iothread();
+        vm_start();
+        qemu_mutex_unlock_iothread();
+        DPRINTF("OK, vm runs again\n");
 }
 
 out:
     colo = NULL;
+    release_ram_cache();
     if (ctl) {
         qemu_fclose(ctl);
     }
-- 
1.7.12.4