From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:44196) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aH9Ye-0007vw-E8 for qemu-devel@nongnu.org; Thu, 07 Jan 2016 07:20:49 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1aH9Yd-0004NO-5P for qemu-devel@nongnu.org; Thu, 07 Jan 2016 07:20:48 -0500 Received: from szxga02-in.huawei.com ([119.145.14.65]:22815) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aH9Yc-0004MD-Cd for qemu-devel@nongnu.org; Thu, 07 Jan 2016 07:20:47 -0500 From: zhanghailiang Date: Thu, 7 Jan 2016 20:20:08 +0800 Message-ID: <1452169208-840-14-git-send-email-zhang.zhanghailiang@huawei.com> In-Reply-To: <1452169208-840-1-git-send-email-zhang.zhanghailiang@huawei.com> References: <1452169208-840-1-git-send-email-zhang.zhanghailiang@huawei.com> MIME-Version: 1.0 Content-Type: text/plain Subject: [Qemu-devel] [RFC 13/13] snapshot: Remove page's write-protect and copy the content during setup stage List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: aarcange@redhat.com, zhanghailiang , hanweidong@huawei.com, quintela@redhat.com, peter.huangpeng@huawei.com, dgilbert@redhat.com, amit.shah@redhat.com If we modify VM's RAM (pages) during setup stage after enable write-protect notification in snapshot thread, the modification action will get stuck because we only remove the page's write-protect in savevm process, it blocked by itself. To fix this bug, we will remove page's write-protect in fault thread during the setup stage. Besides, we should not try to get global lock after setup stage, or there maybe an deadlock error. Signed-off-by: zhanghailiang --- include/migration/migration.h | 4 ++-- migration/migration.c | 2 +- migration/postcopy-ram.c | 17 ++++++++++++++++- migration/ram.c | 37 +++++++++++++++++++++++++++++++------ 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/include/migration/migration.h b/include/migration/migration.h index ef4c071..435de31 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -127,7 +127,7 @@ struct MigrationSrcPageRequest { RAMBlock *rb; hwaddr offset; hwaddr len; - + uint8_t *pages_copy_addr; QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req; }; @@ -333,7 +333,7 @@ void global_state_store_running(void); void flush_page_queue(MigrationState *ms); int ram_save_queue_pages(MigrationState *ms, const char *rbname, - ram_addr_t start, ram_addr_t len); + ram_addr_t start, ram_addr_t len, bool copy_pages); PostcopyState postcopy_state_get(void); /* Set the state and return the old state */ diff --git a/migration/migration.c b/migration/migration.c index 3765c3b..bf4c7a1 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1248,7 +1248,7 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, return; } - if (ram_save_queue_pages(ms, rbname, start, len)) { + if (ram_save_queue_pages(ms, rbname, start, len, false)) { mark_source_rp_bad(ms); } } diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 61392d3..2cf477d 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -543,13 +543,28 @@ static void *postcopy_ram_fault_thread(void *opaque) MigrationState *ms = container_of(us, MigrationState, userfault_state); ret = ram_save_queue_pages(ms, qemu_ram_get_idstr(rb), rb_offset, - hostpagesize); + hostpagesize, true); if (ret < 0) { error_report("%s: Save: %"PRIx64 " failed!", __func__, (uint64_t)msg.arg.pagefault.address); break; } + + /* Note: In the setup process, snapshot_thread may modify VM's + * write-protected pages, we should not block it there, or there + * will be an deadlock error. + */ + if (migration_in_setup(ms)) { + uint64_t host = msg.arg.pagefault.address; + + host &= ~(hostpagesize - 1); + ret = ram_set_pages_wp(host, getpagesize(), true, + us->userfault_fd); + if (ret < 0) { + error_report("Remove page's write-protect failed"); + } + } } } trace_postcopy_ram_fault_thread_exit(); diff --git a/migration/ram.c b/migration/ram.c index 8656719..747f9aa 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -233,6 +233,7 @@ struct PageSearchStatus { ram_addr_t offset; /* Set once we wrap around */ bool complete_round; + uint8_t *pages_copy; }; typedef struct PageSearchStatus PageSearchStatus; @@ -742,7 +743,12 @@ static int ram_save_page(QEMUFile *f, PageSearchStatus *pss, RAMBlock *block = pss->block; ram_addr_t offset = pss->offset; - p = block->host + offset; + /* If we have a copy of this page, use the backup page first */ + if (pss->pages_copy) { + p = pss->pages_copy; + } else { + p = block->host + offset; + } /* In doubt sent page as normal */ bytes_xmit = 0; @@ -926,7 +932,12 @@ static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss, RAMBlock *block = pss->block; ram_addr_t offset = pss->offset; - p = block->host + offset; + /* If we have a copy of this page, use the backup first */ + if (pss->pages_copy) { + p = pss->pages_copy; + } else { + p = block->host + offset; + } bytes_xmit = 0; ret = ram_control_save_page(f, block->offset, @@ -1043,7 +1054,7 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, * Returns: block (or NULL if none available) */ static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset, - ram_addr_t *ram_addr_abs) + ram_addr_t *ram_addr_abs, uint8 **pages_copy_addr) { RAMBlock *block = NULL; @@ -1055,7 +1066,7 @@ static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset, *offset = entry->offset; *ram_addr_abs = (entry->offset + entry->rb->offset) & TARGET_PAGE_MASK; - + *pages_copy_addr = entry->pages_copy_addr; if (entry->len > TARGET_PAGE_SIZE) { entry->len -= TARGET_PAGE_SIZE; entry->offset += TARGET_PAGE_SIZE; @@ -1086,9 +1097,10 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, RAMBlock *block; ram_addr_t offset; bool dirty; + uint8 *pages_backup_addr = NULL; do { - block = unqueue_page(ms, &offset, ram_addr_abs); + block = unqueue_page(ms, &offset, ram_addr_abs, &pages_backup_addr); /* * We're sending this page, and since it's postcopy nothing else * will dirty it, and we must make sure it doesn't get sent again @@ -1130,6 +1142,7 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, */ pss->block = block; pss->offset = offset; + pss->pages_copy = pages_backup_addr; } return !!block; @@ -1166,7 +1179,7 @@ void flush_page_queue(MigrationState *ms) * Return: 0 on success */ int ram_save_queue_pages(MigrationState *ms, const char *rbname, - ram_addr_t start, ram_addr_t len) + ram_addr_t start, ram_addr_t len, bool copy_pages) { RAMBlock *ramblock; @@ -1206,6 +1219,17 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname, new_entry->rb = ramblock; new_entry->offset = start; new_entry->len = len; + if (copy_pages) { + /* Fix me: Better to realize a memory pool */ + new_entry->pages_copy_addr = g_try_malloc0(len); + + if (!new_entry->pages_copy_addr) { + error_report("%s: Failed to alloc memory", __func__); + return -1; + } + + memcpy(new_entry->pages_copy_addr, ramblock_ptr(ramblock, start), len); + } memory_region_ref(ramblock->mr); qemu_mutex_lock(&ms->src_page_req_mutex); @@ -1342,6 +1366,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, pss.block = last_seen_block; pss.offset = last_offset; pss.complete_round = false; + pss.pages_copy = NULL; if (!pss.block) { pss.block = QLIST_FIRST_RCU(&ram_list.blocks); -- 1.8.3.1