On Wed, Feb 25, 2015 at 04:51:49PM +0000, Dr. David Alan Gilbert (git) wrote: > From: "Dr. David Alan Gilbert" > > The PMI holds the state of each page on the incoming side, > so that we can tell if the page is missing, already received > or there is a request outstanding for it. > > Signed-off-by: Dr. David Alan Gilbert > --- > include/migration/migration.h | 18 ++++ > include/migration/postcopy-ram.h | 12 +++ > include/qemu/typedefs.h | 1 + > migration/postcopy-ram.c | 223 +++++++++++++++++++++++++++++++++++++++ > 4 files changed, 254 insertions(+) > > diff --git a/include/migration/migration.h b/include/migration/migration.h > index b44b9b2..86200b9 100644 > --- a/include/migration/migration.h > +++ b/include/migration/migration.h > @@ -48,6 +48,23 @@ enum mig_rpcomm_cmd { > MIG_RP_CMD_PONG, /* Response to a PING; data (seq: be32 ) */ > }; > > +/* Postcopy page-map-incoming - data about each page on the inbound side */ > +typedef enum { > + POSTCOPY_PMI_MISSING = 0, /* page hasn't yet been received */ This appears to be a 3 space indent instead of the usual 4. > + POSTCOPY_PMI_REQUESTED = 1, /* Kernel asked for a page, not yet got it */ > + POSTCOPY_PMI_RECEIVED = 2, /* We've got the page */ > +} PostcopyPMIState; TBH, I'm not sure this enum actually helps anything. I wonder if things might actually be cleared if you simply treat the received and requested bitmaps separately. > +struct PostcopyPMI { > + QemuMutex mutex; > + unsigned long *state0; /* Together with state1 form a */ > + unsigned long *state1; /* PostcopyPMIState */ The comments on the lines above don't appear to shed any light on anything. > + unsigned long host_mask; /* A mask with enough bits set to cover one > + host page in the PMI */ > + unsigned long host_bits; /* The number of bits in the map representing > + one host page */ I find the host_bits name fairly confusing. Maybe "tp_per_hp"? > +}; > + > typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head; > > typedef enum { > @@ -69,6 +86,7 @@ struct MigrationIncomingState { > > QEMUFile *return_path; > QemuMutex rp_mutex; /* We send replies from multiple threads */ > + PostcopyPMI postcopy_pmi; > }; > > MigrationIncomingState *migration_incoming_get_current(void); > diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h > index d81934f..e93ee8a 100644 > --- a/include/migration/postcopy-ram.h > +++ b/include/migration/postcopy-ram.h > @@ -13,7 +13,19 @@ > #ifndef QEMU_POSTCOPY_RAM_H > #define QEMU_POSTCOPY_RAM_H > > +#include "migration/migration.h" > + > /* Return true if the host supports everything we need to do postcopy-ram */ > bool postcopy_ram_supported_by_host(void); > > +/* > + * In 'advise' mode record that a page has been received. > + */ > +void postcopy_hook_early_receive(MigrationIncomingState *mis, > + size_t bitmap_index); > + > +void postcopy_pmi_destroy(MigrationIncomingState *mis); > +void postcopy_pmi_discard_range(MigrationIncomingState *mis, > + size_t start, size_t npages); > +void postcopy_pmi_dump(MigrationIncomingState *mis); > #endif > diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h > index 611db46..924eeb6 100644 > --- a/include/qemu/typedefs.h > +++ b/include/qemu/typedefs.h > @@ -61,6 +61,7 @@ typedef struct PCIExpressHost PCIExpressHost; > typedef struct PCIHostState PCIHostState; > typedef struct PCMCIACardState PCMCIACardState; > typedef struct PixelFormat PixelFormat; > +typedef struct PostcopyPMI PostcopyPMI; > typedef struct PropertyInfo PropertyInfo; > typedef struct Property Property; > typedef struct QEMUBH QEMUBH; > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > index a0e20b2..4f29055 100644 > --- a/migration/postcopy-ram.c > +++ b/migration/postcopy-ram.c > @@ -24,6 +24,7 @@ > #include "migration/migration.h" > #include "migration/postcopy-ram.h" > #include "sysemu/sysemu.h" > +#include "qemu/bitmap.h" > #include "qemu/error-report.h" > #include "trace.h" > > @@ -49,6 +50,220 @@ > > #if defined(__linux__) && defined(__NR_userfaultfd) > > +/* ---------------------------------------------------------------------- */ > +/* Postcopy pagemap-inbound (pmi) - data structures that record the */ > +/* state of each page used by the inbound postcopy */ > +/* It's a pair of bitmaps (of the same structure as the migration bitmaps)*/ > +/* holding one bit per target-page, although most operations work on host */ > +/* pages, the exception being a hook that receives incoming pages off the */ > +/* migration stream which come in a TP at a time, although the source */ > +/* _should_ guarantee it sends a sequence of TPs representing HPs during */ > +/* the postcopy phase, there is no such guarantee during precopy. We */ > +/* could boil this down to only holding one bit per-host page, but we lose*/ > +/* sanity checking that we really do get whole host-pages from the source.*/ > +__attribute__ (( unused )) /* Until later in patch series */ > +static void postcopy_pmi_init(MigrationIncomingState *mis, size_t ram_pages) > +{ > + unsigned int tpb = qemu_target_page_bits(); > + unsigned long host_bits; > + > + qemu_mutex_init(&mis->postcopy_pmi.mutex); > + mis->postcopy_pmi.state0 = bitmap_new(ram_pages); > + mis->postcopy_pmi.state1 = bitmap_new(ram_pages); > + bitmap_clear(mis->postcopy_pmi.state0, 0, ram_pages); > + bitmap_clear(mis->postcopy_pmi.state1, 0, ram_pages); > + /* > + * Each bit in the map represents one 'target page' which is no bigger > + * than a host page but can be smaller. It's useful to have some > + * convenience masks for later > + */ > + > + /* > + * The number of bits one host page takes up in the bitmap > + * e.g. on a 64k host page, 4k Target page, host_bits=64/4=16 > + */ > + host_bits = getpagesize() / (1ul << tpb); That's equivalent to getpagesize() >> tpb, isn't it? > + assert(is_power_of_2(host_bits)); > + > + mis->postcopy_pmi.host_bits = host_bits; > + > + if (host_bits < BITS_PER_LONG) { > + /* A mask starting at bit 0 containing host_bits continuous set bits */ > + mis->postcopy_pmi.host_mask = (1ul << host_bits) - 1; > + } else { > + /* > + * This is a host where the ratio between host and target pages is > + * bigger than the size of our longs, so we can't make a mask > + * but we are only losing sanity checking if we just check one long's > + * worth of bits. > + */ > + mis->postcopy_pmi.host_mask = ~0l; > + } > + > + > + assert((ram_pages % host_bits) == 0); > +} > + > +void postcopy_pmi_destroy(MigrationIncomingState *mis) > +{ > + g_free(mis->postcopy_pmi.state0); > + mis->postcopy_pmi.state0 = NULL; > + g_free(mis->postcopy_pmi.state1); > + mis->postcopy_pmi.state1 = NULL; > + qemu_mutex_destroy(&mis->postcopy_pmi.mutex); > +} > + > +/* > + * Mark a set of pages in the PMI as being clear; this is used by the discard > + * at the start of postcopy, and before the postcopy stream starts. > + */ > +void postcopy_pmi_discard_range(MigrationIncomingState *mis, > + size_t start, size_t npages) > +{ > + /* Clear to state 0 = missing */ > + bitmap_clear(mis->postcopy_pmi.state0, start, npages); > + bitmap_clear(mis->postcopy_pmi.state1, start, npages); > +} > + > +/* > + * Test a host-page worth of bits in the map starting at bitmap_index > + * The bits should all be consistent > + */ > +static bool test_hpbits(MigrationIncomingState *mis, > + size_t bitmap_index, unsigned long *map) > +{ > + long masked; > + > + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0); > + > + masked = (map[BIT_WORD(bitmap_index)] >> > + (bitmap_index % BITS_PER_LONG)) & > + mis->postcopy_pmi.host_mask; > + > + assert((masked == 0) || (masked == mis->postcopy_pmi.host_mask)); > + return !!masked; > +} > + > +/* > + * Set host-page worth of bits in the map starting at bitmap_index > + * to the given state > + */ > +static void set_hp(MigrationIncomingState *mis, > + size_t bitmap_index, PostcopyPMIState state) > +{ > + long shifted_mask = mis->postcopy_pmi.host_mask << > + (bitmap_index % BITS_PER_LONG); > + > + assert((bitmap_index & (mis->postcopy_pmi.host_bits-1)) == 0); assert(state != 0)? > + > + if (state & 1) { Using the symbolic constants for PostcopyPMIState values here might be better. > + mis->postcopy_pmi.state0[BIT_WORD(bitmap_index)] |= shifted_mask; > + } else { > + mis->postcopy_pmi.state0[BIT_WORD(bitmap_index)] &= ~shifted_mask; > + } > + if (state & 2) { > + mis->postcopy_pmi.state1[BIT_WORD(bitmap_index)] |= shifted_mask; > + } else { > + mis->postcopy_pmi.state1[BIT_WORD(bitmap_index)] &= ~shifted_mask; > + } > +} > + > +/* > + * Retrieve the state of the given page > + * Note: This version for use by callers already holding the lock > + */ > +static PostcopyPMIState postcopy_pmi_get_state_nolock( > + MigrationIncomingState *mis, > + size_t bitmap_index) > +{ > + bool b0, b1; > + > + b0 = test_hpbits(mis, bitmap_index, mis->postcopy_pmi.state0); > + b1 = test_hpbits(mis, bitmap_index, mis->postcopy_pmi.state1); > + > + return (b0 ? 1 : 0) + (b1 ? 2 : 0); Ugh.. this is a hidden dependency on the PostcopyPMIState enum elements never changing value. Safer to code it as: if (!b0 && !b1) { return POSTCOPY_PMI_MISSING; } else if (...) ... and let gcc sort it out. > +} > + > +/* Retrieve the state of the given page */ > +__attribute__ (( unused )) /* Until later in patch series */ > +static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis, > + size_t bitmap_index) > +{ > + PostcopyPMIState ret; > + qemu_mutex_lock(&mis->postcopy_pmi.mutex); > + ret = postcopy_pmi_get_state_nolock(mis, bitmap_index); > + qemu_mutex_unlock(&mis->postcopy_pmi.mutex); > + > + return ret; > +} > + > +/* > + * Set the page state to the given state if the previous state was as expected > + * Return the actual previous state. > + */ > +__attribute__ (( unused )) /* Until later in patch series */ > +static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState *mis, > + size_t bitmap_index, > + PostcopyPMIState expected_state, > + PostcopyPMIState new_state) > +{ > + PostcopyPMIState old_state; > + > + qemu_mutex_lock(&mis->postcopy_pmi.mutex); > + old_state = postcopy_pmi_get_state_nolock(mis, bitmap_index); > + > + if (old_state == expected_state) { > + switch (new_state) { > + case POSTCOPY_PMI_MISSING: > + assert(0); /* This shouldn't happen - use discard_range */ > + break; > + > + case POSTCOPY_PMI_REQUESTED: > + assert(old_state == POSTCOPY_PMI_MISSING); > + /* missing -> requested */ > + set_hp(mis, bitmap_index, POSTCOPY_PMI_REQUESTED); > + break; > + > + case POSTCOPY_PMI_RECEIVED: > + assert(old_state == POSTCOPY_PMI_MISSING || > + old_state == POSTCOPY_PMI_REQUESTED); > + /* -> received */ > + set_hp(mis, bitmap_index, POSTCOPY_PMI_RECEIVED); > + break; > + } > + } > + > + qemu_mutex_unlock(&mis->postcopy_pmi.mutex); > + return old_state; > +} > + > +/* > + * Useful when debugging postcopy, although if it failed early the > + * received map can be quite sparse and thus big when dumped. > + */ > +void postcopy_pmi_dump(MigrationIncomingState *mis) > +{ > + fprintf(stderr, "postcopy_pmi_dump: bit 0\n"); > + ram_debug_dump_bitmap(mis->postcopy_pmi.state0, false); > + fprintf(stderr, "postcopy_pmi_dump: bit 1\n"); > + ram_debug_dump_bitmap(mis->postcopy_pmi.state1, true); > + fprintf(stderr, "postcopy_pmi_dump: end\n"); > +} > + > +/* Called by ram_load prior to mapping the page */ > +void postcopy_hook_early_receive(MigrationIncomingState *mis, > + size_t bitmap_index) > +{ > + if (mis->postcopy_state == POSTCOPY_INCOMING_ADVISE) { > + /* > + * If we're in precopy-advise mode we need to track received pages even > + * though we don't need to place pages atomically yet. > + * In advise mode there's only a single thread, so don't need locks > + */ > + set_bit(bitmap_index, mis->postcopy_pmi.state1); /* 2=received */ Yeah.. so this bypasses postcopy_pmi_{get,change}_state, which again makes me wonder whether the enum serves any purpose. > + } > +} > + > static bool ufd_version_check(int ufd) > { > struct uffdio_api api_struct; > @@ -71,6 +286,7 @@ static bool ufd_version_check(int ufd) > return true; > } > > + Extraneous whitespace change. > bool postcopy_ram_supported_by_host(void) > { > long pagesize = getpagesize(); > @@ -157,5 +373,12 @@ bool postcopy_ram_supported_by_host(void) > return false; > } > > +/* Called by ram_load prior to mapping the page */ > +void postcopy_hook_early_receive(MigrationIncomingState *mis, > + size_t bitmap_index) > +{ > + /* We don't support postcopy so don't care */ > +} > + > #endif > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson