From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:34339) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZumEW-0001Ng-8j for qemu-devel@nongnu.org; Fri, 06 Nov 2015 13:59:34 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZumES-0003Mz-Sx for qemu-devel@nongnu.org; Fri, 06 Nov 2015 13:59:32 -0500 Received: from mx1.redhat.com ([209.132.183.28]:57568) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZumES-0003Ma-J8 for qemu-devel@nongnu.org; Fri, 06 Nov 2015 13:59:28 -0500 Date: Fri, 6 Nov 2015 18:59:21 +0000 From: "Dr. David Alan Gilbert" Message-ID: <20151106185921.GK4267@work-vm> References: <1446551816-15768-1-git-send-email-zhang.zhanghailiang@huawei.com> <1446551816-15768-13-git-send-email-zhang.zhanghailiang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1446551816-15768-13-git-send-email-zhang.zhanghailiang@huawei.com> Subject: Re: [Qemu-devel] [PATCH COLO-Frame v10 12/38] COLO: Save PVM state to secondary side when do checkpoint List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: zhanghailiang Cc: lizhijian@cn.fujitsu.com, quintela@redhat.com, yunhong.jiang@intel.com, eddie.dong@intel.com, peter.huangpeng@huawei.com, qemu-devel@nongnu.org, arei.gonglei@huawei.com, stefanha@redhat.com, amit.shah@redhat.com * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote: > The main process of checkpoint is to synchronize SVM with PVM. > VM's state includes ram and device state. So we will migrate PVM's > state to SVM when do checkpoint, just like migration does. > > We will cache PVM's state in slave, we use QEMUSizedBuffer > to store the data, we need to know the size of VM state, so in master, > we use qsb to store VM state temporarily, get the data size by call qsb_get_length() > and then migrate the data to the qsb in the secondary side. > > Signed-off-by: zhanghailiang > Signed-off-by: Gonglei > Signed-off-by: Li Zhijian > --- > migration/colo.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++---- > migration/ram.c | 47 +++++++++++++++++++++++++++++-------- > migration/savevm.c | 2 +- > 3 files changed, 101 insertions(+), 16 deletions(-) > > diff --git a/migration/colo.c b/migration/colo.c > index 2510762..b865513 100644 > --- a/migration/colo.c > +++ b/migration/colo.c > @@ -17,6 +17,9 @@ > #include "qemu/error-report.h" > #include "qemu/sockets.h" > > +/* colo buffer */ > +#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) > + > bool colo_supported(void) > { > return true; > @@ -94,9 +97,12 @@ static int colo_ctl_get(QEMUFile *f, uint32_t require) > return value; > } > > -static int colo_do_checkpoint_transaction(MigrationState *s) > +static int colo_do_checkpoint_transaction(MigrationState *s, > + QEMUSizedBuffer *buffer) > { > int ret; > + size_t size; > + QEMUFile *trans = NULL; > > ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_CHECKPOINT_REQUEST, 0); > if (ret < 0) { > @@ -107,15 +113,47 @@ static int colo_do_checkpoint_transaction(MigrationState *s) > if (ret < 0) { > goto out; > } > + /* Reset colo buffer and open it for write */ > + qsb_set_length(buffer, 0); > + trans = qemu_bufopen("w", buffer); > + if (!trans) { > + error_report("Open colo buffer for write failed"); > + goto out; > + } > > - /* TODO: suspend and save vm state to colo buffer */ > + qemu_mutex_lock_iothread(); > + vm_stop_force_state(RUN_STATE_COLO); > + qemu_mutex_unlock_iothread(); > + trace_colo_vm_state_change("run", "stop"); > + > + /* Disable block migration */ > + s->params.blk = 0; > + s->params.shared = 0; > + qemu_savevm_state_header(trans); > + qemu_savevm_state_begin(trans, &s->params); > + qemu_mutex_lock_iothread(); > + qemu_savevm_state_complete(trans); > + qemu_mutex_unlock_iothread(); > + > + qemu_fflush(trans); > > ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, 0); > if (ret < 0) { > goto out; > } > + /* we send the total size of the vmstate first */ > + size = qsb_get_length(buffer); > + ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SIZE, size); > + if (ret < 0) { > + goto out; > + } > > - /* TODO: send vmstate to Secondary */ > + qsb_put_buffer(s->to_dst_file, buffer, size); > + qemu_fflush(s->to_dst_file); > + ret = qemu_file_get_error(s->to_dst_file); > + if (ret < 0) { > + goto out; > + } > > ret = colo_ctl_get(s->from_dst_file, COLO_COMMAND_VMSTATE_RECEIVED); > if (ret < 0) { > @@ -127,14 +165,24 @@ static int colo_do_checkpoint_transaction(MigrationState *s) > goto out; > } > > - /* TODO: resume Primary */ > + ret = 0; > + /* resume master */ > + qemu_mutex_lock_iothread(); > + vm_start(); > + qemu_mutex_unlock_iothread(); > + trace_colo_vm_state_change("stop", "run"); > > out: > + if (trans) { > + qemu_fclose(trans); > + } > + > return ret; > } > > static void colo_process_checkpoint(MigrationState *s) > { > + QEMUSizedBuffer *buffer = NULL; > int fd, ret = 0; > > /* Dup the fd of to_dst_file */ > @@ -159,6 +207,13 @@ static void colo_process_checkpoint(MigrationState *s) > goto out; > } > > + buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE); > + if (buffer == NULL) { > + ret = -ENOMEM; > + error_report("Failed to allocate buffer!"); Please say 'Failed to allocate colo buffer'; QEMU has lots and lots of buffers. > + goto out; > + } > + > qemu_mutex_lock_iothread(); > vm_start(); > qemu_mutex_unlock_iothread(); > @@ -166,7 +221,7 @@ static void colo_process_checkpoint(MigrationState *s) > > while (s->state == MIGRATION_STATUS_COLO) { > /* start a colo checkpoint */ > - ret = colo_do_checkpoint_transaction(s); > + ret = colo_do_checkpoint_transaction(s, buffer); > if (ret < 0) { > goto out; > } > @@ -179,6 +234,9 @@ out: > migrate_set_state(&s->state, MIGRATION_STATUS_COLO, > MIGRATION_STATUS_COMPLETED); > > + qsb_free(buffer); > + buffer = NULL; > + > if (s->from_dst_file) { > qemu_fclose(s->from_dst_file); > } > diff --git a/migration/ram.c b/migration/ram.c > index a25bcc7..5784c15 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -38,6 +38,7 @@ > #include "trace.h" > #include "exec/ram_addr.h" > #include "qemu/rcu_queue.h" > +#include "migration/colo.h" > > #ifdef DEBUG_MIGRATION_RAM > #define DPRINTF(fmt, ...) \ > @@ -1165,15 +1166,8 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) > } > } > > -/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has > - * long-running RCU critical section. When rcu-reclaims in the code > - * start to become numerous it will be necessary to reduce the > - * granularity of these critical sections. > - */ > - > -static int ram_save_setup(QEMUFile *f, void *opaque) > +static int ram_save_init_globals(void) > { > - RAMBlock *block; > int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ > > dirty_rate_high_cnt = 0; > @@ -1233,6 +1227,31 @@ static int ram_save_setup(QEMUFile *f, void *opaque) > migration_bitmap_sync(); > qemu_mutex_unlock_ramlist(); > qemu_mutex_unlock_iothread(); > + rcu_read_unlock(); > + > + return 0; > +} It surprises me you want migration_bitmap_sync in ram_save_init_globals(), but I guess you want the first sync at the start. > +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has > + * long-running RCU critical section. When rcu-reclaims in the code > + * start to become numerous it will be necessary to reduce the > + * granularity of these critical sections. > + */ > + > +static int ram_save_setup(QEMUFile *f, void *opaque) > +{ > + RAMBlock *block; > + > + /* > + * migration has already setup the bitmap, reuse it. > + */ > + if (!migration_in_colo_state()) { > + if (ram_save_init_globals() < 0) { > + return -1; > + } > + } > + > + rcu_read_lock(); > > qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); > > @@ -1332,7 +1351,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque) > while (true) { > int pages; > > - pages = ram_find_and_save_block(f, true, &bytes_transferred); > + pages = ram_find_and_save_block(f, !migration_in_colo_state(), > + &bytes_transferred); > /* no more blocks to sent */ > if (pages == 0) { > break; > @@ -1343,8 +1363,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque) > ram_control_after_iterate(f, RAM_CONTROL_FINISH); > > rcu_read_unlock(); > + /* > + * Since we need to reuse dirty bitmap in colo, > + * don't cleanup the bitmap. > + */ > + if (!migrate_colo_enabled() || > + migration_has_failed(migrate_get_current())) { > + migration_end(); > + } > > - migration_end(); > qemu_put_be64(f, RAM_SAVE_FLAG_EOS); > > return 0; > diff --git a/migration/savevm.c b/migration/savevm.c > index dbcc39a..0faf12b 100644 > --- a/migration/savevm.c > +++ b/migration/savevm.c > @@ -48,7 +48,7 @@ > #include "qemu/iov.h" > #include "block/snapshot.h" > #include "block/qapi.h" > - > +#include "migration/colo.h" > > #ifndef ETH_P_RARP > #define ETH_P_RARP 0x8035 Wrong patch? So other than those minor things: Reviewed-by: Dr. David Alan Gilbert but watch out for the recent changes to migrate_end that went in a few days ago. Dave > -- > 1.8.3.1 > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK