On Wed, Feb 25, 2015 at 04:51:58PM +0000, Dr. David Alan Gilbert (git) wrote: > From: "Dr. David Alan Gilbert" > > postcopy_place_page (etc) provide a way for postcopy to place a page > into guests memory atomically (using the copy ioctl on the ufd). > > Signed-off-by: Dr. David Alan Gilbert > --- > include/migration/migration.h | 2 + > include/migration/postcopy-ram.h | 16 ++++++ > migration/postcopy-ram.c | 113 ++++++++++++++++++++++++++++++++++++++- > trace-events | 1 + > 4 files changed, 130 insertions(+), 2 deletions(-) > > diff --git a/include/migration/migration.h b/include/migration/migration.h > index b1c7cad..139bb1b 100644 > --- a/include/migration/migration.h > +++ b/include/migration/migration.h > @@ -94,6 +94,8 @@ struct MigrationIncomingState { > QEMUFile *return_path; > QemuMutex rp_mutex; /* We send replies from multiple threads */ > PostcopyPMI postcopy_pmi; > + void *postcopy_tmp_page; > + long postcopy_place_skipped; /* Check for incorrect place ops */ > }; > > MigrationIncomingState *migration_incoming_get_current(void); > diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h > index fbb2a93..3d30280 100644 > --- a/include/migration/postcopy-ram.h > +++ b/include/migration/postcopy-ram.h > @@ -80,4 +80,20 @@ void postcopy_discard_send_chunk(MigrationState *ms, PostcopyDiscardState *pds, > void postcopy_discard_send_finish(MigrationState *ms, > PostcopyDiscardState *pds); > > +/* > + * Place a page (from) at (host) efficiently > + * There are restrictions on how 'from' must be mapped, in general best > + * to use other postcopy_ routines to allocate. > + * returns 0 on success > + */ > +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, > + long bitmap_offset, bool all_zero); > + > +/* > + * Allocate a page of memory that can be mapped at a later point in time > + * using postcopy_place_page > + * Returns: Pointer to allocated page > + */ > +void *postcopy_get_tmp_page(MigrationIncomingState *mis); > + > #endif > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c > index 33dd332..86fa5a0 100644 > --- a/migration/postcopy-ram.c > +++ b/migration/postcopy-ram.c > @@ -197,7 +197,6 @@ static PostcopyPMIState postcopy_pmi_get_state_nolock( > } > > /* Retrieve the state of the given page */ > -__attribute__ (( unused )) /* Until later in patch series */ > static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis, > size_t bitmap_index) > { > @@ -213,7 +212,6 @@ static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis, > * Set the page state to the given state if the previous state was as expected > * Return the actual previous state. > */ > -__attribute__ (( unused )) /* Until later in patch series */ > static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState *mis, > size_t bitmap_index, > PostcopyPMIState expected_state, > @@ -477,6 +475,7 @@ static int cleanup_area(const char *block_name, void *host_addr, > int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) > { > postcopy_pmi_init(mis, ram_pages); > + mis->postcopy_place_skipped = -1; > > if (qemu_ram_foreach_block(init_area, mis)) { > return -1; > @@ -495,6 +494,10 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) > return -1; > } > > + if (mis->postcopy_tmp_page) { > + munmap(mis->postcopy_tmp_page, getpagesize()); > + mis->postcopy_tmp_page = NULL; > + } > return 0; > } > > @@ -561,6 +564,100 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) > return 0; > } > > +/* > + * Place a host page (from) at (host) tomically s/tomically/atomically/ > + * There are restrictions on how 'from' must be mapped, in general best > + * to use other postcopy_ routines to allocate. > + * all_zero: Hint that the page being placed is 0 throughout > + * returns 0 on success > + * bitmap_offset: Index into the migration bitmaps > + * > + * State changes: > + * none -> received > + * requested -> received (ack) > + * > + * Note the UF thread is also updating the state, and maybe none->requested > + * at the same time. Hrm.. these facts do tend me towards thinking that separate and explicit requested and received bits will be clearer than treating it as an enum state variable > + */ > +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, > + long bitmap_offset, bool all_zero) > +{ > + PostcopyPMIState old_state, tmp_state, new_state; > + > + if (!all_zero) { > + struct uffdio_copy copy_struct; > + > + copy_struct.dst = (uint64_t)(uintptr_t)host; > + copy_struct.src = (uint64_t)(uintptr_t)from; > + copy_struct.len = getpagesize(); > + copy_struct.mode = 0; > + > + /* copy also acks to the kernel waking the stalled thread up > + * TODO: We can inhibit that ack and only do it if it was requested > + * which would be slightly cheaper, but we'd have to be careful > + * of the order of updating our page state. > + */ > + if (ioctl(mis->userfault_fd, UFFDIO_COPY, ©_struct)) { > + int e = errno; > + error_report("%s: %s copy host: %p from: %p pmi=%d", > + __func__, strerror(e), host, from, > + postcopy_pmi_get_state(mis, bitmap_offset)); > + > + return -e; > + } > + } else { > + struct uffdio_zeropage zero_struct; > + > + zero_struct.range.start = (uint64_t)(uintptr_t)host; > + zero_struct.range.len = getpagesize(); > + zero_struct.mode = 0; > + > + if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) { > + int e = errno; > + error_report("%s: %s zero host: %p from: %p pmi=%d", > + __func__, strerror(e), host, from, > + postcopy_pmi_get_state(mis, bitmap_offset)); > + > + return -e; > + } > + } > + > + bitmap_offset &= ~(mis->postcopy_pmi.host_bits-1); > + new_state = POSTCOPY_PMI_RECEIVED; > + tmp_state = postcopy_pmi_get_state(mis, bitmap_offset); > + do { > + old_state = tmp_state; > + tmp_state = postcopy_pmi_change_state(mis, bitmap_offset, old_state, > + new_state); > + } while (old_state != tmp_state); Yeah.. see treating the state as two separate booleans, here you'd just need to update received, without caring about whether requested has changed. > + trace_postcopy_place_page(bitmap_offset, host, all_zero, old_state); > + > + return 0; > +} > + > +/* > + * Returns a target page of memory that can be mapped at a later point in time > + * using postcopy_place_page > + * The same address is used repeatedly, postcopy_place_page just takes the > + * backing page away. > + * Returns: Pointer to allocated page > + * > + */ > +void *postcopy_get_tmp_page(MigrationIncomingState *mis) > +{ > + if (!mis->postcopy_tmp_page) { > + mis->postcopy_tmp_page = mmap(NULL, getpagesize(), > + PROT_READ | PROT_WRITE, MAP_PRIVATE | > + MAP_ANONYMOUS, -1, 0); > + if (!mis->postcopy_tmp_page) { > + perror("mapping postcopy tmp page"); > + return NULL; > + } > + } > + > + return mis->postcopy_tmp_page; > +} > + > #else > /* No target OS support, stubs just fail */ > bool postcopy_ram_supported_by_host(void) > @@ -608,6 +705,18 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis) > { > assert(0); > } > + > +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, > + long bitmap_offset, bool all_zero) > +{ > + assert(0); > +} > + > +void *postcopy_get_tmp_page(MigrationIncomingState *mis) > +{ > + assert(0); > +} > + > #endif > > /* ------------------------------------------------------------------------- */ > diff --git a/trace-events b/trace-events > index 781cf5c..16a91d9 100644 > --- a/trace-events > +++ b/trace-events > @@ -1497,6 +1497,7 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) "" > postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands" > postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" > postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" > +postcopy_place_page(unsigned long offset, void *host_addr, bool all_zero, int old_state) "offset=%lx host=%p all_zero=%d old_state=%d" > > # kvm-all.c > kvm_ioctl(int type, void *arg) "type 0x%x, arg %p" -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson