From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:34837) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YQfDB-0000yF-HC for qemu-devel@nongnu.org; Wed, 25 Feb 2015 11:53:27 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1YQfD6-0001KA-GG for qemu-devel@nongnu.org; Wed, 25 Feb 2015 11:53:25 -0500 Received: from mx1.redhat.com ([209.132.183.28]:33288) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1YQfD6-0001Js-9L for qemu-devel@nongnu.org; Wed, 25 Feb 2015 11:53:20 -0500 From: "Dr. David Alan Gilbert (git)" Date: Wed, 25 Feb 2015 16:51:49 +0000 Message-Id: <1424883128-9841-27-git-send-email-dgilbert@redhat.com> In-Reply-To: <1424883128-9841-1-git-send-email-dgilbert@redhat.com> References: <1424883128-9841-1-git-send-email-dgilbert@redhat.com> Subject: [Qemu-devel] [PATCH v5 26/45] Postcopy page-map-incoming (PMI) structure List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: aarcange@redhat.com, yamahata@private.email.ne.jp, quintela@redhat.com, amit.shah@redhat.com, pbonzini@redhat.com, yanghy@cn.fujitsu.com, david@gibson.dropbear.id.au From: "Dr. David Alan Gilbert" The PMI holds the state of each page on the incoming side, so that we can tell if the page is missing, already received or there is a request outstanding for it. Signed-off-by: Dr. David Alan Gilbert --- include/migration/migration.h | 18 ++++ include/migration/postcopy-ram.h | 12 +++ include/qemu/typedefs.h | 1 + migration/postcopy-ram.c | 223 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 254 insertions(+) diff --git a/include/migration/migration.h b/include/migration/migration.h index b44b9b2..86200b9 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -48,6 +48,23 @@ enum mig_rpcomm_cmd { MIG_RP_CMD_PONG, /* Response to a PING; data (seq: be32 ) */ }; +/* Postcopy page-map-incoming - data about each page on the inbound side */ +typedef enum { + POSTCOPY_PMI_MISSING = 0, /* page hasn't yet been received */ + POSTCOPY_PMI_REQUESTED = 1, /* Kernel asked for a page, not yet got it */ + POSTCOPY_PMI_RECEIVED = 2, /* We've got the page */ +} PostcopyPMIState; + +struct PostcopyPMI { + QemuMutex mutex; + unsigned long *state0; /* Together with state1 form a */ + unsigned long *state1; /* PostcopyPMIState */ + unsigned long host_mask; /* A mask with enough bits set to cover one + host page in the PMI */ + unsigned long host_bits; /* The number of bits in the map representing + one host page */ +}; + typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head; typedef enum { @@ -69,6 +86,7 @@ struct MigrationIncomingState { QEMUFile *return_path; QemuMutex rp_mutex; /* We send replies from multiple threads */ + PostcopyPMI postcopy_pmi; }; MigrationIncomingState *migration_incoming_get_current(void); diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h index d81934f..e93ee8a 100644 --- a/include/migration/postcopy-ram.h +++ b/include/migration/postcopy-ram.h @@ -13,7 +13,19 @@ #ifndef QEMU_POSTCOPY_RAM_H #define QEMU_POSTCOPY_RAM_H +#include "migration/migration.h" + /* Return true if the host supports everything we need to do postcopy-ram */ bool postcopy_ram_supported_by_host(void); +/* + * In 'advise' mode record that a page has been received. + */ +void postcopy_hook_early_receive(MigrationIncomingState *mis, + size_t bitmap_index); + +void postcopy_pmi_destroy(MigrationIncomingState *mis); +void postcopy_pmi_discard_range(MigrationIncomingState *mis, + size_t start, size_t npages); +void postcopy_pmi_dump(MigrationIncomingState *mis); #endif diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index 611db46..924eeb6 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -61,6 +61,7 @@ typedef struct PCIExpressHost PCIExpressHost; typedef struct PCIHostState PCIHostState; typedef struct PCMCIACardState PCMCIACardState; typedef struct PixelFormat PixelFormat; +typedef struct PostcopyPMI PostcopyPMI; typedef struct PropertyInfo PropertyInfo; typedef struct Property Property; typedef struct QEMUBH QEMUBH; diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index a0e20b2..4f29055 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -24,6 +24,7 @@ #include "migration/migration.h" #include "migration/postcopy-ram.h" #include "sysemu/sysemu.h" +#include "qemu/bitmap.h" #include "qemu/error-report.h" #include "trace.h" @@ -49,6 +50,220 @@ #if defined(__linux__) && defined(__NR_userfaultfd) +/* ---------------------------------------------------------------------- */ +/* Postcopy pagemap-inbound (pmi) - data structures that record the */ +/* state of each page used by the inbound postcopy */ +/* It's a pair of bitmaps (of the same structure as the migration bitmaps)*/ +/* holding one bit per target-page, although most operations work on host */ +/* pages, the exception being a hook that receives incoming pages off the */ +/* migration stream which come in a TP at a time, although the source */ +/* _should_ guarantee it sends a sequence of TPs representing HPs during */ +/* the postcopy phase, there is no such guarantee during precopy. We */ +/* could boil this down to only holding one bit per-host page, but we lose*/ +/* sanity checking that we really do get whole host-pages from the source.*/ +__attribute__ (( unused )) /* Until later in patch series */ +static void postcopy_pmi_init(MigrationIncomingState *mis, size_t ram_pages) +{ + unsigned int tpb = qemu_target_page_bits(); + unsigned long host_bits; + + qemu_mutex_init(&mis->postcopy_pmi.mutex); + mis->postcopy_pmi.state0 = bitmap_new(ram_pages); + mis->postcopy_pmi.state1 = bitmap_new(ram_pages); + bitmap_clear(mis->postcopy_pmi.state0, 0, ram_pages); + bitmap_clear(mis->postcopy_pmi.state1, 0, ram_pages); + /* + * Each bit in the map represents one 'target page' which is no bigger + * than a host page but can be smaller. It's useful to have some + * convenience masks for later + */ + + /* + * The number of bits one host page takes up in the bitmap + * e.g. on a 64k host page, 4k Target page, host_bits=64/4=16 + */ + host_bits = getpagesize() / (1ul << tpb); + assert(is_power_of_2(host_bits)); + + mis->postcopy_pmi.host_bits = host_bits; + + if (host_bits < BITS_PER_LONG) { + /* A mask starting at bit 0 containing host_bits continuous set bits */ + mis->postcopy_pmi.host_mask = (1ul << host_bits) - 1; + } else { + /* + * This is a host where the ratio between host and target pages is + * bigger than the size of our longs, so we can't make a mask + * but we are only losing sanity checking if we just check one long's + * worth of bits. + */ + mis->postcopy_pmi.host_mask = ~0l; + } + + + assert((ram_pages % host_bits) == 0); +} + +void postcopy_pmi_destroy(MigrationIncomingState *mis) +{ + g_free(mis->postcopy_pmi.state0); + mis->postcopy_pmi.state0 = NULL; + g_free(mis->postcopy_pmi.state1); + mis->postcopy_pmi.state1 = NULL; + qemu_mutex_destroy(&mis->postcopy_pmi.mutex); +} + +/* + * Mark a set of pages in the PMI as being clear; this is used by the discard + * at the start of postcopy, and before the postcopy stream starts. + */ +void postcopy_pmi_discard_range(MigrationIncomingState *mis, + size_t start, size_t npages) +{ + /* Clear to state 0 = missing */ + bitmap_clear(mis->postcopy_pmi.state0, start, npages); + bitmap_clear(mis->postcopy_pmi.state1, start, npages); +} + +/* + * Test a host-page worth of bits in the map starting at bitmap_index + * The bits should all be consistent + */ +static bool test_hpbits(MigrationIncomingState *mis, + size_t bitmap_index, unsigned long *map) +{ + long masked; + + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0); + + masked = (map[BIT_WORD(bitmap_index)] >> + (bitmap_index % BITS_PER_LONG)) & + mis->postcopy_pmi.host_mask; + + assert((masked == 0) || (masked == mis->postcopy_pmi.host_mask)); + return !!masked; +} + +/* + * Set host-page worth of bits in the map starting at bitmap_index + * to the given state + */ +static void set_hp(MigrationIncomingState *mis, + size_t bitmap_index, PostcopyPMIState state) +{ + long shifted_mask = mis->postcopy_pmi.host_mask << + (bitmap_index % BITS_PER_LONG); + + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0); + + if (state & 1) { + mis->postcopy_pmi.state0[BIT_WORD(bitmap_index)] |= shifted_mask; + } else { + mis->postcopy_pmi.state0[BIT_WORD(bitmap_index)] &= ~shifted_mask; + } + if (state & 2) { + mis->postcopy_pmi.state1[BIT_WORD(bitmap_index)] |= shifted_mask; + } else { + mis->postcopy_pmi.state1[BIT_WORD(bitmap_index)] &= ~shifted_mask; + } +} + +/* + * Retrieve the state of the given page + * Note: This version for use by callers already holding the lock + */ +static PostcopyPMIState postcopy_pmi_get_state_nolock( + MigrationIncomingState *mis, + size_t bitmap_index) +{ + bool b0, b1; + + b0 = test_hpbits(mis, bitmap_index, mis->postcopy_pmi.state0); + b1 = test_hpbits(mis, bitmap_index, mis->postcopy_pmi.state1); + + return (b0 ? 1 : 0) + (b1 ? 2 : 0); +} + +/* Retrieve the state of the given page */ +__attribute__ (( unused )) /* Until later in patch series */ +static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis, + size_t bitmap_index) +{ + PostcopyPMIState ret; + qemu_mutex_lock(&mis->postcopy_pmi.mutex); + ret = postcopy_pmi_get_state_nolock(mis, bitmap_index); + qemu_mutex_unlock(&mis->postcopy_pmi.mutex); + + return ret; +} + +/* + * Set the page state to the given state if the previous state was as expected + * Return the actual previous state. + */ +__attribute__ (( unused )) /* Until later in patch series */ +static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState *mis, + size_t bitmap_index, + PostcopyPMIState expected_state, + PostcopyPMIState new_state) +{ + PostcopyPMIState old_state; + + qemu_mutex_lock(&mis->postcopy_pmi.mutex); + old_state = postcopy_pmi_get_state_nolock(mis, bitmap_index); + + if (old_state == expected_state) { + switch (new_state) { + case POSTCOPY_PMI_MISSING: + assert(0); /* This shouldn't happen - use discard_range */ + break; + + case POSTCOPY_PMI_REQUESTED: + assert(old_state == POSTCOPY_PMI_MISSING); + /* missing -> requested */ + set_hp(mis, bitmap_index, POSTCOPY_PMI_REQUESTED); + break; + + case POSTCOPY_PMI_RECEIVED: + assert(old_state == POSTCOPY_PMI_MISSING || + old_state == POSTCOPY_PMI_REQUESTED); + /* -> received */ + set_hp(mis, bitmap_index, POSTCOPY_PMI_RECEIVED); + break; + } + } + + qemu_mutex_unlock(&mis->postcopy_pmi.mutex); + return old_state; +} + +/* + * Useful when debugging postcopy, although if it failed early the + * received map can be quite sparse and thus big when dumped. + */ +void postcopy_pmi_dump(MigrationIncomingState *mis) +{ + fprintf(stderr, "postcopy_pmi_dump: bit 0\n"); + ram_debug_dump_bitmap(mis->postcopy_pmi.state0, false); + fprintf(stderr, "postcopy_pmi_dump: bit 1\n"); + ram_debug_dump_bitmap(mis->postcopy_pmi.state1, true); + fprintf(stderr, "postcopy_pmi_dump: end\n"); +} + +/* Called by ram_load prior to mapping the page */ +void postcopy_hook_early_receive(MigrationIncomingState *mis, + size_t bitmap_index) +{ + if (mis->postcopy_state == POSTCOPY_INCOMING_ADVISE) { + /* + * If we're in precopy-advise mode we need to track received pages even + * though we don't need to place pages atomically yet. + * In advise mode there's only a single thread, so don't need locks + */ + set_bit(bitmap_index, mis->postcopy_pmi.state1); /* 2=received */ + } +} + static bool ufd_version_check(int ufd) { struct uffdio_api api_struct; @@ -71,6 +286,7 @@ static bool ufd_version_check(int ufd) return true; } + bool postcopy_ram_supported_by_host(void) { long pagesize = getpagesize(); @@ -157,5 +373,12 @@ bool postcopy_ram_supported_by_host(void) return false; } +/* Called by ram_load prior to mapping the page */ +void postcopy_hook_early_receive(MigrationIncomingState *mis, + size_t bitmap_index) +{ + /* We don't support postcopy so don't care */ +} + #endif -- 2.1.0