From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:53507) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZxGS3-0004yt-Km for qemu-devel@nongnu.org; Fri, 13 Nov 2015 10:39:49 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZxGRz-0001r3-Ca for qemu-devel@nongnu.org; Fri, 13 Nov 2015 10:39:47 -0500 Received: from mx1.redhat.com ([209.132.183.28]:50381) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZxGRz-0001qr-2s for qemu-devel@nongnu.org; Fri, 13 Nov 2015 10:39:43 -0500 Date: Fri, 13 Nov 2015 15:39:37 +0000 From: "Dr. David Alan Gilbert" Message-ID: <20151113153936.GJ2456@work-vm> References: <1446551816-15768-1-git-send-email-zhang.zhanghailiang@huawei.com> <1446551816-15768-14-git-send-email-zhang.zhanghailiang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1446551816-15768-14-git-send-email-zhang.zhanghailiang@huawei.com> Subject: Re: [Qemu-devel] [PATCH COLO-Frame v10 13/38] COLO: Load PVM's dirty pages into SVM's RAM cache temporarily List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: zhanghailiang Cc: lizhijian@cn.fujitsu.com, quintela@redhat.com, yunhong.jiang@intel.com, eddie.dong@intel.com, peter.huangpeng@huawei.com, qemu-devel@nongnu.org, arei.gonglei@huawei.com, stefanha@redhat.com, amit.shah@redhat.com * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote: > We should not load PVM's state directly into SVM, because there maybe some > errors happen when SVM is receving data, which will break SVM. > > We need to ensure receving all data before load the state into SVM. We use > an extra memory to cache these data (PVM's ram). The ram cache in secondary side > is initially the same as SVM/PVM's memory. And in the process of checkpoint, > we cache the dirty pages of PVM into this ram cache firstly, so this ram cache > always the same as PVM's memory at every checkpoint, then we flush this cached ram > to SVM after we receive all PVM's state. > > Signed-off-by: zhanghailiang > Signed-off-by: Li Zhijian > Signed-off-by: Gonglei > --- > v10: Split the process of dirty pages recording into a new patch > --- > include/exec/ram_addr.h | 1 + > include/migration/colo.h | 3 +++ > migration/colo.c | 14 +++++++++-- > migration/ram.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++-- > 4 files changed, 75 insertions(+), 4 deletions(-) > > diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h > index 3360ac5..e7c4310 100644 > --- a/include/exec/ram_addr.h > +++ b/include/exec/ram_addr.h > @@ -28,6 +28,7 @@ struct RAMBlock { > struct rcu_head rcu; > struct MemoryRegion *mr; > uint8_t *host; > + uint8_t *host_cache; /* For colo, VM's ram cache */ I suggest you make the name have 'colo' in it; e.g. colo_cache; 'host_cache' is a bit generic. > ram_addr_t offset; > ram_addr_t used_length; > ram_addr_t max_length; > diff --git a/include/migration/colo.h b/include/migration/colo.h > index 2676c4a..8edd5f1 100644 > --- a/include/migration/colo.h > +++ b/include/migration/colo.h > @@ -29,4 +29,7 @@ bool migration_incoming_enable_colo(void); > void migration_incoming_exit_colo(void); > void *colo_process_incoming_thread(void *opaque); > bool migration_incoming_in_colo_state(void); > +/* ram cache */ > +int colo_init_ram_cache(void); > +void colo_release_ram_cache(void); > #endif > diff --git a/migration/colo.c b/migration/colo.c > index b865513..25f85b2 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -304,6 +304,12 @@ void *colo_process_incoming_thread(void *opaque) > goto out; > } > > + ret = colo_init_ram_cache(); > + if (ret < 0) { > + error_report("Failed to initialize ram cache"); > + goto out; > + } > + > ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_CHECKPOINT_READY, 0); > if (ret < 0) { > goto out; > @@ -331,14 +337,14 @@ void *colo_process_incoming_thread(void *opaque) > goto out; > } > > - /* TODO: read migration data into colo buffer */ > + /* TODO Load VM state */ > > ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_RECEIVED, 0); > if (ret < 0) { > goto out; > } > > - /* TODO: load vm state */ > + /* TODO: flush vm state */ Do you really need to update/change the TODOs here? > ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_LOADED, 0); > if (ret < 0) { > @@ -352,6 +358,10 @@ out: > strerror(-ret)); > } > > + qemu_mutex_lock_iothread(); > + colo_release_ram_cache(); > + qemu_mutex_unlock_iothread(); > + > if (mis->to_src_file) { > qemu_fclose(mis->to_src_file); > } > diff --git a/migration/ram.c b/migration/ram.c > index 5784c15..b094dc3 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -222,6 +222,7 @@ static RAMBlock *last_sent_block; > static ram_addr_t last_offset; > static QemuMutex migration_bitmap_mutex; > static uint64_t migration_dirty_pages; > +static bool ram_cache_enable; > static uint32_t last_version; > static bool ram_bulk_stage; > > @@ -1446,7 +1447,11 @@ static inline void *host_from_stream_offset(QEMUFile *f, > return NULL; > } > > - return block->host + offset; > + if (ram_cache_enable) { > + return block->host_cache + offset; > + } else { > + return block->host + offset; > + } > } > > len = qemu_get_byte(f); > @@ -1456,7 +1461,11 @@ static inline void *host_from_stream_offset(QEMUFile *f, > QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > if (!strncmp(id, block->idstr, sizeof(id)) && > block->max_length > offset) { > - return block->host + offset; > + if (ram_cache_enable) { > + return block->host_cache + offset; > + } else { > + return block->host + offset; > + } > } > } > > @@ -1707,6 +1716,54 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) > return ret; > } > > +/* > + * colo cache: this is for secondary VM, we cache the whole > + * memory of the secondary VM, it will be called after first migration. > + */ > +int colo_init_ram_cache(void) > +{ > + RAMBlock *block; > + > + rcu_read_lock(); > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > + block->host_cache = qemu_anon_ram_alloc(block->used_length, NULL); > + if (!block->host_cache) { > + goto out_locked; > + } Please print an error message; stating the function, block name and size that failed. > + memcpy(block->host_cache, block->host, block->used_length); > + } > + rcu_read_unlock(); > + ram_cache_enable = true; > + return 0; > + > +out_locked: > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > + if (block->host_cache) { > + qemu_anon_ram_free(block->host_cache, block->used_length); > + block->host_cache = NULL; > + } > + } > + > + rcu_read_unlock(); > + return -errno; > +} > + > +void colo_release_ram_cache(void) > +{ > + RAMBlock *block; > + > + ram_cache_enable = false; > + > + rcu_read_lock(); > + QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { > + if (block->host_cache) { > + qemu_anon_ram_free(block->host_cache, block->used_length); > + block->host_cache = NULL; > + } > + } > + rcu_read_unlock(); > +} > + > static SaveVMHandlers savevm_ram_handlers = { > .save_live_setup = ram_save_setup, > .save_live_iterate = ram_save_iterate, > -- > 1.8.3.1 > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK