From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:34858) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YQfDC-00011p-Pi for qemu-devel@nongnu.org; Wed, 25 Feb 2015 11:53:28 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YQfDA-0001LX-N1 for qemu-devel@nongnu.org; Wed, 25 Feb 2015 11:53:26 -0500 Received: from mx1.redhat.com ([209.132.183.28]:55069) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YQfDA-0001LI-Fd for qemu-devel@nongnu.org; Wed, 25 Feb 2015 11:53:24 -0500 From: "Dr. David Alan Gilbert (git)" Date: Wed, 25 Feb 2015 16:51:51 +0000 Message-Id: <1424883128-9841-29-git-send-email-dgilbert@redhat.com> In-Reply-To: <1424883128-9841-1-git-send-email-dgilbert@redhat.com> References: <1424883128-9841-1-git-send-email-dgilbert@redhat.com> Subject: [Qemu-devel] [PATCH v5 28/45] postcopy: Incoming initialisation List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: aarcange@redhat.com, yamahata@private.email.ne.jp, quintela@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com, yanghy@cn.fujitsu.com, david@gibson.dropbear.id.au From: "Dr. David Alan Gilbert" Signed-off-by: Dr. David Alan Gilbert --- arch_init.c | 11 ++++ include/migration/migration.h | 3 + include/migration/postcopy-ram.h | 12 ++++ migration/migration.c | 1 + migration/postcopy-ram.c | 119 ++++++++++++++++++++++++++++++++++++++- savevm.c | 4 ++ trace-events | 2 + 7 files changed, 151 insertions(+), 1 deletion(-) diff --git a/arch_init.c b/arch_init.c index 21e7ebe..d2c4457 100644 --- a/arch_init.c +++ b/arch_init.c @@ -1363,6 +1363,17 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) } } +/* + * Allocate data structures etc needed by incoming migration with postcopy-ram + * postcopy-ram's similarly names postcopy_ram_incoming_init does the work + */ +int ram_postcopy_incoming_init(MigrationIncomingState *mis) +{ + size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + + return postcopy_ram_incoming_init(mis, ram_pages); +} + static int ram_load(QEMUFile *f, void *opaque, int version_id) { int flags = 0, ret = 0; diff --git a/include/migration/migration.h b/include/migration/migration.h index e749f4c..d09561e 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -84,6 +84,8 @@ struct MigrationIncomingState { PostcopyState postcopy_state; + /* For the kernel to send us notifications */ + int userfault_fd; QEMUFile *return_path; QemuMutex rp_mutex; /* We send replies from multiple threads */ PostcopyPMI postcopy_pmi; @@ -206,6 +208,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms); /* For incoming postcopy discard */ int ram_discard_range(MigrationIncomingState *mis, const char *block_name, uint64_t start, uint64_t end); +int ram_postcopy_incoming_init(MigrationIncomingState *mis); /** * @migrate_add_blocker - prevent migration from proceeding diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h index 1fec1c1..305c26b 100644 --- a/include/migration/postcopy-ram.h +++ b/include/migration/postcopy-ram.h @@ -19,6 +19,18 @@ bool postcopy_ram_supported_by_host(void); /* + * Initialise postcopy-ram, setting the RAM to a state where we can go into + * postcopy later; must be called prior to any precopy. + * called from arch_init's similarly named ram_postcopy_incoming_init + */ +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages); + +/* + * At the end of a migration where postcopy_ram_incoming_init was called. + */ +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis); + +/* * In 'advise' mode record that a page has been received. */ void postcopy_hook_early_receive(MigrationIncomingState *mis, diff --git a/migration/migration.c b/migration/migration.c index 850fe1a..b1ad7b1 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -88,6 +88,7 @@ MigrationIncomingState *migration_incoming_state_new(QEMUFile* f) void migration_incoming_state_destroy(void) { + postcopy_pmi_destroy(mis_current); loadvm_free_handlers(mis_current); g_free(mis_current); mis_current = NULL; diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 391e9c6..dbe1892 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -74,7 +74,6 @@ struct PostcopyDiscardState { /* the postcopy phase, there is no such guarantee during precopy. We */ /* could boil this down to only holding one bit per-host page, but we lose*/ /* sanity checking that we really do get whole host-pages from the source.*/ -__attribute__ (( unused )) /* Until later in patch series */ static void postcopy_pmi_init(MigrationIncomingState *mis, size_t ram_pages) { unsigned int tpb = qemu_target_page_bits(); @@ -392,6 +391,113 @@ int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, return 0; } +/* + * Setup an area of RAM so that it *can* be used for postcopy later; this + * must be done right at the start prior to pre-copy. + * opaque should be the MIS. + */ +static int init_area(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + MigrationIncomingState *mis = opaque; + + trace_postcopy_init_area(block_name, host_addr, offset, length); + + /* + * We need the whole of RAM to be truly empty for postcopy, so things + * like ROMs and any data tables built during init must be zero'd + * - we're going to get the copy from the source anyway. + * (Precopy will just overwrite this data, so doesn't need the discard) + */ + if (postcopy_ram_discard_range(mis, host_addr, (host_addr + length - 1))) { + return -1; + } + + /* + * We also need the area to be normal 4k pages, not huge pages + * (otherwise we can't be sure we can atopically place the + * 4k page in later). THP might come along and map a 2MB page + * and when it's partially accessed in precopy it might not break + * it down, but leave a 2MB zero'd page. + */ +#ifdef MADV_NOHUGEPAGE + if (madvise(host_addr, length, MADV_NOHUGEPAGE)) { + perror("init_area: NOHUGEPAGE"); + return -1; + } +#endif + + return 0; +} + +/* + * At the end of migration, undo the effects of init_area + * opaque should be the MIS. + */ +static int cleanup_area(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + MigrationIncomingState *mis = opaque; + struct uffdio_range range_struct; + trace_postcopy_cleanup_area(block_name, host_addr, offset, length); + + /* + * We turned off hugepage for the precopy stage with postcopy enabled + * we can turn it back on now. + */ +#ifdef MADV_HUGEPAGE + if (madvise(host_addr, length, MADV_HUGEPAGE)) { + perror("cleanup_area: HUGEPAGE"); + return -1; + } +#endif + + /* + * We can also turn off userfault now since we should have all the + * pages. It can be useful to leave it on to debug postcopy + * if you're not sure it's always getting every page. + */ + range_struct.start = (uint64_t)(uintptr_t)host_addr; + range_struct.len = (uint64_t)length; + + if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) { + perror("cleanup_area: userfault unregister"); + + return -1; + } + + return 0; +} + +/* + * Initialise postcopy-ram, setting the RAM to a state where we can go into + * postcopy later; must be called prior to any precopy. + * called from arch_init's similarly named ram_postcopy_incoming_init + */ +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +{ + postcopy_pmi_init(mis, ram_pages); + + if (qemu_ram_foreach_block(init_area, mis)) { + return -1; + } + + return 0; +} + +/* + * At the end of a migration where postcopy_ram_incoming_init was called. + */ +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) +{ + /* TODO: Join the fault thread once we're sure it will exit */ + if (qemu_ram_foreach_block(cleanup_area, mis)) { + return -1; + } + + return 0; +} + #else /* No target OS support, stubs just fail */ @@ -408,6 +514,17 @@ void postcopy_hook_early_receive(MigrationIncomingState *mis, /* We don't support postcopy so don't care */ } +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +{ + error_report("postcopy_ram_incoming_init: No OS support"); + return -1; +} + +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) +{ + assert(0); +} + void postcopy_pmi_destroy(MigrationIncomingState *mis) { /* Called in normal cleanup path - so it's OK */ diff --git a/savevm.c b/savevm.c index 2589b8c..6857660 100644 --- a/savevm.c +++ b/savevm.c @@ -1186,6 +1186,10 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, return -1; } + if (ram_postcopy_incoming_init(mis)) { + return -1; + } + postcopy_state_set(mis, POSTCOPY_INCOMING_ADVISE); return 0; diff --git a/trace-events b/trace-events index f985117..59dea4c 100644 --- a/trace-events +++ b/trace-events @@ -1481,6 +1481,8 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) "" # migration/postcopy-ram.c postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands" +postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" +postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" # kvm-all.c kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" -- 2.1.0