All of lore.kernel.org
 help / color / mirror / Atom feed
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
To: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: lizhijian@cn.fujitsu.com, quintela@redhat.com,
	yunhong.jiang@intel.com, eddie.dong@intel.com,
	peter.huangpeng@huawei.com, qemu-devel@nongnu.org,
	arei.gonglei@huawei.com, stefanha@redhat.com,
	amit.shah@redhat.com
Subject: Re: [Qemu-devel] [PATCH COLO-Frame v10 13/38] COLO: Load PVM's dirty pages into SVM's RAM cache temporarily
Date: Mon, 16 Nov 2015 15:57:55 +0800	[thread overview]
Message-ID: <56498C83.4060707@huawei.com> (raw)
In-Reply-To: <20151113153936.GJ2456@work-vm>

On 2015/11/13 23:39, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> We should not load PVM's state directly into SVM, because there maybe some
>> errors happen when SVM is receving data, which will break SVM.
>>
>> We need to ensure receving all data before load the state into SVM. We use
>> an extra memory to cache these data (PVM's ram). The ram cache in secondary side
>> is initially the same as SVM/PVM's memory. And in the process of checkpoint,
>> we cache the dirty pages of PVM into this ram cache firstly, so this ram cache
>> always the same as PVM's memory at every checkpoint, then we flush this cached ram
>> to SVM after we receive all PVM's state.
>>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
>> ---
>> v10: Split the process of dirty pages recording into a new patch
>> ---
>>   include/exec/ram_addr.h  |  1 +
>>   include/migration/colo.h |  3 +++
>>   migration/colo.c         | 14 +++++++++--
>>   migration/ram.c          | 61 ++++++++++++++++++++++++++++++++++++++++++++++--
>>   4 files changed, 75 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
>> index 3360ac5..e7c4310 100644
>> --- a/include/exec/ram_addr.h
>> +++ b/include/exec/ram_addr.h
>> @@ -28,6 +28,7 @@ struct RAMBlock {
>>       struct rcu_head rcu;
>>       struct MemoryRegion *mr;
>>       uint8_t *host;
>> +    uint8_t *host_cache; /* For colo, VM's ram cache */
>
> I suggest you make the name have 'colo' in it; e.g. colo_cache;
> 'host_cache' is a bit generic.
>

Hmm, this change makes sense, will update it in next version.

>>       ram_addr_t offset;
>>       ram_addr_t used_length;
>>       ram_addr_t max_length;
>> diff --git a/include/migration/colo.h b/include/migration/colo.h
>> index 2676c4a..8edd5f1 100644
>> --- a/include/migration/colo.h
>> +++ b/include/migration/colo.h
>> @@ -29,4 +29,7 @@ bool migration_incoming_enable_colo(void);
>>   void migration_incoming_exit_colo(void);
>>   void *colo_process_incoming_thread(void *opaque);
>>   bool migration_incoming_in_colo_state(void);
>> +/* ram cache */
>> +int colo_init_ram_cache(void);
>> +void colo_release_ram_cache(void);
>>   #endif
>> diff --git a/migration/colo.c b/migration/colo.c
>> index b865513..25f85b2 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -304,6 +304,12 @@ void *colo_process_incoming_thread(void *opaque)
>>           goto out;
>>       }
>>
>> +    ret = colo_init_ram_cache();
>> +    if (ret < 0) {
>> +        error_report("Failed to initialize ram cache");
>> +        goto out;
>> +    }
>> +
>>       ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_CHECKPOINT_READY, 0);
>>       if (ret < 0) {
>>           goto out;
>> @@ -331,14 +337,14 @@ void *colo_process_incoming_thread(void *opaque)
>>               goto out;
>>           }
>>
>> -        /* TODO: read migration data into colo buffer */
>> +        /* TODO Load VM state */
>>
>>           ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_RECEIVED, 0);
>>           if (ret < 0) {
>>               goto out;
>>           }
>>
>> -        /* TODO: load vm state */
>> +        /* TODO: flush vm state */
>
> Do you really need to update/change the TODOs here?
>

No, i will drop this ;)

>>           ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_LOADED, 0);
>>           if (ret < 0) {
>> @@ -352,6 +358,10 @@ out:
>>                        strerror(-ret));
>>       }
>>
>> +    qemu_mutex_lock_iothread();
>> +    colo_release_ram_cache();
>> +    qemu_mutex_unlock_iothread();
>> +
>>       if (mis->to_src_file) {
>>           qemu_fclose(mis->to_src_file);
>>       }
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 5784c15..b094dc3 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -222,6 +222,7 @@ static RAMBlock *last_sent_block;
>>   static ram_addr_t last_offset;
>>   static QemuMutex migration_bitmap_mutex;
>>   static uint64_t migration_dirty_pages;
>> +static bool ram_cache_enable;
>>   static uint32_t last_version;
>>   static bool ram_bulk_stage;
>>
>> @@ -1446,7 +1447,11 @@ static inline void *host_from_stream_offset(QEMUFile *f,
>>               return NULL;
>>           }
>>
>> -        return block->host + offset;
>> +        if (ram_cache_enable) {
>> +            return block->host_cache + offset;
>> +        } else {
>> +            return block->host + offset;
>> +        }
>>       }
>>
>>       len = qemu_get_byte(f);
>> @@ -1456,7 +1461,11 @@ static inline void *host_from_stream_offset(QEMUFile *f,
>>       QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>>           if (!strncmp(id, block->idstr, sizeof(id)) &&
>>               block->max_length > offset) {
>> -            return block->host + offset;
>> +            if (ram_cache_enable) {
>> +                return block->host_cache + offset;
>> +            } else {
>> +                return block->host + offset;
>> +            }
>>           }
>>       }
>>
>> @@ -1707,6 +1716,54 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>>       return ret;
>>   }
>>
>> +/*
>> + * colo cache: this is for secondary VM, we cache the whole
>> + * memory of the secondary VM, it will be called after first migration.
>> + */
>> +int colo_init_ram_cache(void)
>> +{
>> +    RAMBlock *block;
>> +
>> +    rcu_read_lock();
>> +    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>> +        block->host_cache = qemu_anon_ram_alloc(block->used_length, NULL);
>> +        if (!block->host_cache) {
>> +            goto out_locked;
>> +        }
>
> Please print an error message; stating the function, block name and size that
> failed.
>

Good idea, will fix in next version, thanks.

>> +        memcpy(block->host_cache, block->host, block->used_length);
>> +    }
>> +    rcu_read_unlock();
>> +    ram_cache_enable = true;
>> +    return 0;
>> +
>> +out_locked:
>> +    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>> +        if (block->host_cache) {
>> +            qemu_anon_ram_free(block->host_cache, block->used_length);
>> +            block->host_cache = NULL;
>> +        }
>> +    }
>> +
>> +    rcu_read_unlock();
>> +    return -errno;
>> +}
>> +
>> +void colo_release_ram_cache(void)
>> +{
>> +    RAMBlock *block;
>> +
>> +    ram_cache_enable = false;
>> +
>> +    rcu_read_lock();
>> +    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>> +        if (block->host_cache) {
>> +            qemu_anon_ram_free(block->host_cache, block->used_length);
>> +            block->host_cache = NULL;
>> +        }
>> +    }
>> +    rcu_read_unlock();
>> +}
>> +
>>   static SaveVMHandlers savevm_ram_handlers = {
>>       .save_live_setup = ram_save_setup,
>>       .save_live_iterate = ram_save_iterate,
>> --
>> 1.8.3.1
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>

  reply	other threads:[~2015-11-16  7:58 UTC|newest]

Thread overview: 100+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-03 11:56 [Qemu-devel] [PATCH COLO-Frame v10 00/38] COarse-grain LOck-stepping(COLO) Virtual Machines for Non-stop Service (FT) zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 01/38] configure: Add parameter for configure to enable/disable COLO support zhanghailiang
2015-11-05 14:52   ` Eric Blake
2015-11-06  7:36     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 02/38] migration: Introduce capability 'x-colo' to migration zhanghailiang
2015-11-13 16:01   ` Eric Blake
2015-11-16  8:35     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 03/38] COLO: migrate colo related info to secondary node zhanghailiang
2015-11-06 16:36   ` Dr. David Alan Gilbert
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 04/38] migration: Add state records for migration incoming zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 05/38] migration: Integrate COLO checkpoint process into migration zhanghailiang
2015-11-06 16:48   ` Dr. David Alan Gilbert
2015-11-13 16:42   ` Eric Blake
2015-11-16 13:00     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 06/38] migration: Integrate COLO checkpoint process into loadvm zhanghailiang
2015-11-06 17:29   ` Dr. David Alan Gilbert
2015-11-09  6:09     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 07/38] migration: Rename the'file' member of MigrationState and MigrationIncomingState zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 08/38] COLO/migration: establish a new communication path from destination to source zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 09/38] COLO: Implement colo checkpoint protocol zhanghailiang
2015-11-06 18:26   ` Dr. David Alan Gilbert
2015-11-09  6:51     ` zhanghailiang
2015-11-09  7:33       ` zhanghailiang
2015-11-13 16:46   ` Eric Blake
2015-11-17  7:04     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 10/38] COLO: Add a new RunState RUN_STATE_COLO zhanghailiang
2015-11-06 18:28   ` Dr. David Alan Gilbert
2015-11-13 16:47   ` Eric Blake
2015-11-17  7:15     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 11/38] QEMUSizedBuffer: Introduce two help functions for qsb zhanghailiang
2015-11-06 18:30   ` Dr. David Alan Gilbert
2015-11-09  8:14     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 12/38] COLO: Save PVM state to secondary side when do checkpoint zhanghailiang
2015-11-06 18:59   ` Dr. David Alan Gilbert
2015-11-09  9:17     ` zhanghailiang
2015-11-13 18:53       ` Dr. David Alan Gilbert
2015-11-17 10:20         ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 13/38] COLO: Load PVM's dirty pages into SVM's RAM cache temporarily zhanghailiang
2015-11-13 15:39   ` Dr. David Alan Gilbert
2015-11-16  7:57     ` zhanghailiang [this message]
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 14/38] COLO: Load VMState into qsb before restore it zhanghailiang
2015-11-13 16:02   ` Dr. David Alan Gilbert
2015-11-16  8:46     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 15/38] ram/COLO: Record pages received from PVM by re-using migration dirty bitmap zhanghailiang
2015-11-13 16:19   ` Dr. David Alan Gilbert
2015-11-16  9:07     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 16/38] COLO: Flush PVM's cached RAM into SVM's memory zhanghailiang
2015-11-13 16:38   ` Dr. David Alan Gilbert
2015-11-16 12:46     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 17/38] COLO: synchronize PVM's state to SVM periodically zhanghailiang
2015-11-13 18:34   ` Dr. David Alan Gilbert
2015-11-17  9:11     ` zhanghailiang
2015-11-17 10:08       ` Dr. David Alan Gilbert
2015-11-17 10:29         ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 18/38] COLO failover: Introduce a new command to trigger a failover zhanghailiang
2015-11-13 16:59   ` Eric Blake
2015-11-17  8:03     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 19/38] COLO failover: Introduce state to record failover process zhanghailiang
2015-11-20 15:51   ` Dr. David Alan Gilbert
2015-11-23  5:56     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 20/38] COLO: Implement failover work for Primary VM zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 21/38] COLO: Implement failover work for Secondary VM zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 22/38] COLO: implement default failover treatment zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 23/38] qmp event: Add event notification for COLO error zhanghailiang
2015-11-20 21:50   ` Eric Blake
2015-11-23  6:01     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 24/38] COLO failover: Shutdown related socket fd when do failover zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 25/38] COLO failover: Don't do failover during loading VM's state zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 26/38] COLO: Control the checkpoint delay time by migrate-set-parameters command zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 27/38] COLO: Process shutdown command for VM in COLO state zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 28/38] COLO: Update the global runstate after going into colo state zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 29/38] savevm: Split load vm state function qemu_loadvm_state zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 30/38] COLO: Separate the process of saving/loading ram and device state zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 31/38] COLO: Split qemu_savevm_state_begin out of checkpoint process zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 32/38] netfilter: Add a public API to release all the buffered packets zhanghailiang
2015-11-03 12:39   ` Yang Hongyang
2015-11-03 13:19     ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 33/38] netfilter: Introduce an API to delete the timer of all buffer-filters zhanghailiang
2015-11-03 12:41   ` Yang Hongyang
2015-11-03 13:07     ` zhanghailiang
2015-11-04  2:51       ` Jason Wang
2015-11-04  3:08         ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 34/38] filter-buffer: Accept zero interval zhanghailiang
2015-11-03 12:43   ` Yang Hongyang
2015-11-04  2:52     ` Jason Wang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 35/38] netfilter: Introduce a API to automatically add filter-buffer for each netdev zhanghailiang
2015-11-03 12:57   ` Yang Hongyang
2015-11-03 13:16     ` zhanghailiang
2015-11-04  2:56   ` Jason Wang
2015-11-04  3:07     ` zhanghailiang
2015-11-05  7:43     ` zhanghailiang
2015-11-05  8:52       ` Wen Congyang
2015-11-05  9:21         ` Jason Wang
2015-11-05  9:33           ` Wen Congyang
2015-11-05  9:19       ` Jason Wang
2015-11-05 10:58         ` zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 36/38] netfilter: Introduce an API to delete all the automatically added netfilters zhanghailiang
2015-11-03 12:58   ` Yang Hongyang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 37/38] colo: Use the netfilter to buffer and release packets zhanghailiang
2015-11-03 11:56 ` [Qemu-devel] [PATCH COLO-Frame v10 38/38] COLO: Add block replication into colo process zhanghailiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56498C83.4060707@huawei.com \
    --to=zhang.zhanghailiang@huawei.com \
    --cc=amit.shah@redhat.com \
    --cc=arei.gonglei@huawei.com \
    --cc=dgilbert@redhat.com \
    --cc=eddie.dong@intel.com \
    --cc=lizhijian@cn.fujitsu.com \
    --cc=peter.huangpeng@huawei.com \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=yunhong.jiang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.