From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:49862)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1ZyEgK-0006Es-S8
	for qemu-devel@nongnu.org; Mon, 16 Nov 2015 02:58:34 -0500
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1ZyEgG-0007MD-J0
	for qemu-devel@nongnu.org; Mon, 16 Nov 2015 02:58:32 -0500
Received: from szxga01-in.huawei.com ([58.251.152.64]:4230)
	by eggs.gnu.org with esmtp (Exim 4.71)
	(envelope-from <zhang.zhanghailiang@huawei.com>) id 1ZyEgF-0007Li-91
	for qemu-devel@nongnu.org; Mon, 16 Nov 2015 02:58:28 -0500
References: <1446551816-15768-1-git-send-email-zhang.zhanghailiang@huawei.com>
	<1446551816-15768-14-git-send-email-zhang.zhanghailiang@huawei.com>
	<20151113153936.GJ2456@work-vm>
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
Message-ID: <56498C83.4060707@huawei.com>
Date: Mon, 16 Nov 2015 15:57:55 +0800
MIME-Version: 1.0
In-Reply-To: <20151113153936.GJ2456@work-vm>
Content-Type: text/plain; charset="windows-1252"; format=flowed
Content-Transfer-Encoding: 7bit
Subject: Re: [Qemu-devel] [PATCH COLO-Frame v10 13/38] COLO: Load PVM's
 dirty pages into SVM's RAM cache temporarily
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: lizhijian@cn.fujitsu.com, quintela@redhat.com, yunhong.jiang@intel.com, eddie.dong@intel.com, peter.huangpeng@huawei.com, qemu-devel@nongnu.org, arei.gonglei@huawei.com, stefanha@redhat.com, amit.shah@redhat.com

On 2015/11/13 23:39, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> We should not load PVM's state directly into SVM, because there maybe some
>> errors happen when SVM is receving data, which will break SVM.
>>
>> We need to ensure receving all data before load the state into SVM. We use
>> an extra memory to cache these data (PVM's ram). The ram cache in secondary side
>> is initially the same as SVM/PVM's memory. And in the process of checkpoint,
>> we cache the dirty pages of PVM into this ram cache firstly, so this ram cache
>> always the same as PVM's memory at every checkpoint, then we flush this cached ram
>> to SVM after we receive all PVM's state.
>>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
>> ---
>> v10: Split the process of dirty pages recording into a new patch
>> ---
>>   include/exec/ram_addr.h  |  1 +
>>   include/migration/colo.h |  3 +++
>>   migration/colo.c         | 14 +++++++++--
>>   migration/ram.c          | 61 ++++++++++++++++++++++++++++++++++++++++++++++--
>>   4 files changed, 75 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
>> index 3360ac5..e7c4310 100644
>> --- a/include/exec/ram_addr.h
>> +++ b/include/exec/ram_addr.h
>> @@ -28,6 +28,7 @@ struct RAMBlock {
>>       struct rcu_head rcu;
>>       struct MemoryRegion *mr;
>>       uint8_t *host;
>> +    uint8_t *host_cache; /* For colo, VM's ram cache */
>
> I suggest you make the name have 'colo' in it; e.g. colo_cache;
> 'host_cache' is a bit generic.
>

Hmm, this change makes sense, will update it in next version.

>>       ram_addr_t offset;
>>       ram_addr_t used_length;
>>       ram_addr_t max_length;
>> diff --git a/include/migration/colo.h b/include/migration/colo.h
>> index 2676c4a..8edd5f1 100644
>> --- a/include/migration/colo.h
>> +++ b/include/migration/colo.h
>> @@ -29,4 +29,7 @@ bool migration_incoming_enable_colo(void);
>>   void migration_incoming_exit_colo(void);
>>   void *colo_process_incoming_thread(void *opaque);
>>   bool migration_incoming_in_colo_state(void);
>> +/* ram cache */
>> +int colo_init_ram_cache(void);
>> +void colo_release_ram_cache(void);
>>   #endif
>> diff --git a/migration/colo.c b/migration/colo.c
>> index b865513..25f85b2 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -304,6 +304,12 @@ void *colo_process_incoming_thread(void *opaque)
>>           goto out;
>>       }
>>
>> +    ret = colo_init_ram_cache();
>> +    if (ret < 0) {
>> +        error_report("Failed to initialize ram cache");
>> +        goto out;
>> +    }
>> +
>>       ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_CHECKPOINT_READY, 0);
>>       if (ret < 0) {
>>           goto out;
>> @@ -331,14 +337,14 @@ void *colo_process_incoming_thread(void *opaque)
>>               goto out;
>>           }
>>
>> -        /* TODO: read migration data into colo buffer */
>> +        /* TODO Load VM state */
>>
>>           ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_RECEIVED, 0);
>>           if (ret < 0) {
>>               goto out;
>>           }
>>
>> -        /* TODO: load vm state */
>> +        /* TODO: flush vm state */
>
> Do you really need to update/change the TODOs here?
>

No, i will drop this ;)

>>           ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_LOADED, 0);
>>           if (ret < 0) {
>> @@ -352,6 +358,10 @@ out:
>>                        strerror(-ret));
>>       }
>>
>> +    qemu_mutex_lock_iothread();
>> +    colo_release_ram_cache();
>> +    qemu_mutex_unlock_iothread();
>> +
>>       if (mis->to_src_file) {
>>           qemu_fclose(mis->to_src_file);
>>       }
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 5784c15..b094dc3 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -222,6 +222,7 @@ static RAMBlock *last_sent_block;
>>   static ram_addr_t last_offset;
>>   static QemuMutex migration_bitmap_mutex;
>>   static uint64_t migration_dirty_pages;
>> +static bool ram_cache_enable;
>>   static uint32_t last_version;
>>   static bool ram_bulk_stage;
>>
>> @@ -1446,7 +1447,11 @@ static inline void *host_from_stream_offset(QEMUFile *f,
>>               return NULL;
>>           }
>>
>> -        return block->host + offset;
>> +        if (ram_cache_enable) {
>> +            return block->host_cache + offset;
>> +        } else {
>> +            return block->host + offset;
>> +        }
>>       }
>>
>>       len = qemu_get_byte(f);
>> @@ -1456,7 +1461,11 @@ static inline void *host_from_stream_offset(QEMUFile *f,
>>       QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>>           if (!strncmp(id, block->idstr, sizeof(id)) &&
>>               block->max_length > offset) {
>> -            return block->host + offset;
>> +            if (ram_cache_enable) {
>> +                return block->host_cache + offset;
>> +            } else {
>> +                return block->host + offset;
>> +            }
>>           }
>>       }
>>
>> @@ -1707,6 +1716,54 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>>       return ret;
>>   }
>>
>> +/*
>> + * colo cache: this is for secondary VM, we cache the whole
>> + * memory of the secondary VM, it will be called after first migration.
>> + */
>> +int colo_init_ram_cache(void)
>> +{
>> +    RAMBlock *block;
>> +
>> +    rcu_read_lock();
>> +    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>> +        block->host_cache = qemu_anon_ram_alloc(block->used_length, NULL);
>> +        if (!block->host_cache) {
>> +            goto out_locked;
>> +        }
>
> Please print an error message; stating the function, block name and size that
> failed.
>

Good idea, will fix in next version, thanks.

>> +        memcpy(block->host_cache, block->host, block->used_length);
>> +    }
>> +    rcu_read_unlock();
>> +    ram_cache_enable = true;
>> +    return 0;
>> +
>> +out_locked:
>> +    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>> +        if (block->host_cache) {
>> +            qemu_anon_ram_free(block->host_cache, block->used_length);
>> +            block->host_cache = NULL;
>> +        }
>> +    }
>> +
>> +    rcu_read_unlock();
>> +    return -errno;
>> +}
>> +
>> +void colo_release_ram_cache(void)
>> +{
>> +    RAMBlock *block;
>> +
>> +    ram_cache_enable = false;
>> +
>> +    rcu_read_lock();
>> +    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
>> +        if (block->host_cache) {
>> +            qemu_anon_ram_free(block->host_cache, block->used_length);
>> +            block->host_cache = NULL;
>> +        }
>> +    }
>> +    rcu_read_unlock();
>> +}
>> +
>>   static SaveVMHandlers savevm_ram_handlers = {
>>       .save_live_setup = ram_save_setup,
>>       .save_live_iterate = ram_save_iterate,
>> --
>> 1.8.3.1
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>