From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: [PATCH v5 RFC 14/14] tools/libxc: noarch restore code Date: Wed, 11 Jun 2014 19:14:42 +0100 Message-ID: <1402510482-21099-15-git-send-email-andrew.cooper3@citrix.com> References: <1402510482-21099-1-git-send-email-andrew.cooper3@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1402510482-21099-1-git-send-email-andrew.cooper3@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Xen-devel Cc: Andrew Cooper , Frediano Ziglio , David Vrabel List-Id: xen-devel@lists.xenproject.org Signed-off-by: Andrew Cooper Signed-off-by: Frediano Ziglio Signed-off-by: David Vrabel --- tools/libxc/saverestore/common.h | 6 + tools/libxc/saverestore/restore.c | 556 ++++++++++++++++++++++++++++++++++++- 2 files changed, 561 insertions(+), 1 deletion(-) diff --git a/tools/libxc/saverestore/common.h b/tools/libxc/saverestore/common.h index e16e0de..2d44961 100644 --- a/tools/libxc/saverestore/common.h +++ b/tools/libxc/saverestore/common.h @@ -292,6 +292,12 @@ static inline int write_record(struct context *ctx, struct record *rec) return write_split_record(ctx, rec, NULL, 0); } +/* TODO - find a better way of hiding this. It should be private to + * restore.c, but is needed by x86_pv_localise_page() + */ +int populate_pfns(struct context *ctx, unsigned count, + const xen_pfn_t *original_pfns, const uint32_t *types); + #endif /* * Local variables: diff --git a/tools/libxc/saverestore/restore.c b/tools/libxc/saverestore/restore.c index 6624baa..c00742d 100644 --- a/tools/libxc/saverestore/restore.c +++ b/tools/libxc/saverestore/restore.c @@ -1,5 +1,499 @@ +#include + #include "common.h" +/* + * Read and validate the Image and Domain headers. + */ +static int read_headers(struct context *ctx) +{ + xc_interface *xch = ctx->xch; + struct ihdr ihdr; + struct dhdr dhdr; + + if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) ) + { + PERROR("Failed to read Image Header from stream"); + return -1; + } + + ihdr.id = ntohl(ihdr.id); + ihdr.version = ntohl(ihdr.version); + ihdr.options = ntohs(ihdr.options); + + if ( ihdr.marker != IHDR_MARKER ) + { + ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker); + return -1; + } + else if ( ihdr.id != IHDR_ID ) + { + ERROR("Invalid ID: Expected 0x%08"PRIx32", Got 0x%08"PRIx32, + IHDR_ID, ihdr.id); + return -1; + } + else if ( ihdr.version != IHDR_VERSION ) + { + ERROR("Invalid Version: Expected %d, Got %d", ihdr.version, IHDR_VERSION); + return -1; + } + else if ( ihdr.options & IHDR_OPT_BIG_ENDIAN ) + { + ERROR("Unable to handle big endian streams"); + return -1; + } + + ctx->restore.format_version = ihdr.version; + + if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) ) + { + PERROR("Failed to read Domain Header from stream"); + return -1; + } + + ctx->restore.guest_type = dhdr.type; + ctx->restore.guest_page_size = (1U << dhdr.page_shift); + + IPRINTF("Found %s domain from Xen %d.%d", + dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor); + return 0; +} + +/** + * Reads a record from the stream, and fills in the record structure. + * + * Returns 0 on success and non-0 on failure. + * + * On success, the records type and size shall be valid. + * - If size is 0, data shall be NULL. + * - If size is non-0, data shall be a buffer allocated by malloc() which must + * be passed to free() by the caller. + * + * On failure, the contents of the record structure are undefined. + */ +static int read_record(struct context *ctx, struct record *rec) +{ + xc_interface *xch = ctx->xch; + struct rhdr rhdr; + size_t datasz; + + if ( read_exact(ctx->fd, &rhdr, sizeof(rhdr)) ) + { + PERROR("Failed to read Record Header from stream"); + return -1; + } + else if ( rhdr.length > REC_LENGTH_MAX ) + { + ERROR("Record (0x%08"PRIx32", %s) length 0x%"PRIx32 + " exceeds max (0x%"PRIx32")", + rhdr.type, rec_type_to_str(rhdr.type), + rhdr.length, REC_LENGTH_MAX); + return -1; + } + + datasz = ROUNDUP(rhdr.length, REC_ALIGN_ORDER); + + if ( datasz ) + { + rec->data = malloc(datasz); + + if ( !rec->data ) + { + ERROR("Unable to allocate %zu bytes for record data (0x%08"PRIx32", %s)", + datasz, rhdr.type, rec_type_to_str(rhdr.type)); + return -1; + } + + if ( read_exact(ctx->fd, rec->data, datasz) ) + { + free(rec->data); + rec->data = NULL; + PERROR("Failed to read %zu bytes of data for record (0x%08"PRIx32", %s)", + datasz, rhdr.type, rec_type_to_str(rhdr.type)); + return -1; + } + } + else + rec->data = NULL; + + rec->type = rhdr.type; + rec->length = rhdr.length; + + return 0; +}; + +/* + * Is a pfn populated? + */ +static bool pfn_is_populated(const struct context *ctx, xen_pfn_t pfn) +{ + if ( !ctx->restore.populated_pfns || pfn > ctx->restore.max_populated_pfn ) + return false; + return test_bit(pfn, ctx->restore.populated_pfns); +} + +/* + * Set a pfn as populated, expanding the tracking structures if needed. + */ +static int pfn_set_populated(struct context *ctx, xen_pfn_t pfn) +{ + xc_interface *xch = ctx->xch; + + if ( !ctx->restore.populated_pfns || pfn > ctx->restore.max_populated_pfn ) + { + unsigned long new_max_pfn = ((pfn + 1024) & ~1023) - 1; + size_t old_sz, new_sz; + unsigned long *p; + + old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1); + new_sz = bitmap_size(new_max_pfn + 1); + + p = realloc(ctx->restore.populated_pfns, new_sz); + if ( !p ) + { + PERROR("Failed to realloc populated bitmap"); + return -1; + } + + memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz); + + ctx->restore.populated_pfns = p; + ctx->restore.max_populated_pfn = new_max_pfn; + } + + set_bit(pfn, ctx->restore.populated_pfns); + + return 0; +} + +/* + * Given a set of pfns, obtain memory from Xen to fill the physmap for the + * unpopulated subset. + */ +int populate_pfns(struct context *ctx, unsigned count, + const xen_pfn_t *original_pfns, const uint32_t *types) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t *mfns = malloc(count * sizeof(*mfns)), + *pfns = malloc(count * sizeof(*pfns)); + unsigned i, nr_pfns = 0; + int rc = -1; + + if ( !mfns || !pfns ) + { + ERROR("Failed to allocate %zu bytes for populating the physmap", + 2 * count * sizeof(*mfns)); + goto err; + } + + for ( i = 0; i < count; ++i ) + { + if ( types[i] != XEN_DOMCTL_PFINFO_XTAB && + types[i] != XEN_DOMCTL_PFINFO_BROKEN && + !pfn_is_populated(ctx, original_pfns[i]) ) + { + pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i]; + ++nr_pfns; + } + } + + if ( nr_pfns ) + { + rc = xc_domain_populate_physmap_exact(xch, ctx->domid, nr_pfns, 0, 0, mfns); + if ( rc ) + { + PERROR("Failed to populate physmap"); + goto err; + } + + for ( i = 0; i < nr_pfns; ++i ) + { + rc = pfn_set_populated(ctx, pfns[i]); + if ( rc ) + goto err; + ctx->ops.set_gfn(ctx, pfns[i], mfns[i]); + } + } + + rc = 0; + + err: + free(pfns); + free(mfns); + + return rc; +} + +/* + * Given a list of pfns, their types, and a block of page data from the + * stream, populate and record their types, map the relevent subset and copy + * the data into the guest. + */ +static int process_page_data(struct context *ctx, unsigned count, + xen_pfn_t *pfns, uint32_t *types, void *page_data) +{ + xc_interface *xch = ctx->xch; + xen_pfn_t *mfns = malloc(count * sizeof(*mfns)); + int *map_errs = malloc(count * sizeof(*map_errs)); + int rc = -1; + void *mapping = NULL, *guest_page = NULL; + unsigned i, /* i indexes the pfns from the record. */ + j, /* j indexes the subset of pfns we decide to map. */ + nr_pages; + + if ( !mfns || !map_errs ) + { + ERROR("Failed to allocate %zu bytes to process page data", + count * (sizeof(*mfns) + sizeof(*map_errs))); + goto err; + } + + rc = populate_pfns(ctx, count, pfns, types); + if ( rc ) + { + ERROR("Failed to populate pfns for batch of %u pages", count); + goto err; + } + rc = -1; + + for ( i = 0, nr_pages = 0; i < count; ++i ) + { + ctx->ops.set_page_type(ctx, pfns[i], types[i]); + + switch ( types[i] ) + { + case XEN_DOMCTL_PFINFO_NOTAB: + + case XEN_DOMCTL_PFINFO_L1TAB: + case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + case XEN_DOMCTL_PFINFO_L2TAB: + case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + case XEN_DOMCTL_PFINFO_L3TAB: + case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + case XEN_DOMCTL_PFINFO_L4TAB: + case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB: + + mfns[nr_pages++] = ctx->ops.pfn_to_gfn(ctx, pfns[i]); + break; + } + + } + + if ( nr_pages > 0 ) + { + mapping = guest_page = xc_map_foreign_bulk( + xch, ctx->domid, PROT_READ | PROT_WRITE, + mfns, map_errs, nr_pages); + if ( !mapping ) + { + PERROR("Unable to map %u mfns for %u pages of data", + nr_pages, count); + goto err; + } + } + + for ( i = 0, j = 0; i < count; ++i ) + { + switch ( types[i] ) + { + case XEN_DOMCTL_PFINFO_XTAB: + case XEN_DOMCTL_PFINFO_BROKEN: + case XEN_DOMCTL_PFINFO_XALLOC: + /* No page data to deal with. */ + continue; + } + + if ( map_errs[j] ) + { + ERROR("Mapping pfn %lx (mfn %lx, type %#"PRIx32")failed with %d", + pfns[i], mfns[j], types[i], map_errs[j]); + goto err; + } + + memcpy(guest_page, page_data, PAGE_SIZE); + + /* Undo page normalisation done by the saver. */ + rc = ctx->restore.ops.localise_page(ctx, types[i], guest_page); + if ( rc ) + { + DPRINTF("Failed to localise"); + goto err; + } + + ++j; + guest_page += PAGE_SIZE; + page_data += PAGE_SIZE; + } + + rc = 0; + + err: + if ( mapping ) + munmap(mapping, nr_pages * PAGE_SIZE); + + free(map_errs); + free(mfns); + + return rc; +} + +/* + * Validate a PAGE_DATA record from the stream, and pass the results to + * process_page_data() to actually perform the legwork. + */ +static int handle_page_data(struct context *ctx, struct record *rec) +{ + xc_interface *xch = ctx->xch; + struct rec_page_data_header *pages = rec->data; + unsigned i, pages_of_data = 0; + int rc = -1; + + xen_pfn_t *pfns = NULL, pfn; + uint32_t *types = NULL, type; + + if ( rec->length < sizeof(*pages) ) + { + ERROR("PAGE_DATA record truncated: length %"PRIu32", min %zu", + rec->length, sizeof(*pages)); + goto err; + } + else if ( pages->count < 1 ) + { + ERROR("Expected at least 1 pfn in PAGE_DATA record"); + goto err; + } + else if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) ) + { + ERROR("PAGE_DATA record (length %"PRIu32") too short to contain %" + PRIu32" pfns worth of information", rec->length, pages->count); + goto err; + } + + pfns = malloc(pages->count * sizeof(*pfns)); + types = malloc(pages->count * sizeof(*types)); + if ( !pfns || !types ) + { + ERROR("Unable to allocate enough memory for %"PRIu32" pfns", + pages->count); + goto err; + } + + for ( i = 0; i < pages->count; ++i ) + { + pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK; + if ( !ctx->ops.pfn_is_valid(ctx, pfn) ) + { + ERROR("pfn %#lx (index %u) outside domain maximum", pfn, i); + goto err; + } + + type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32; + if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) && + ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) ) + { + ERROR("Invalid type %#"PRIx32" for pfn %#lx (index %u)", type, pfn, i); + goto err; + } + else if ( type < XEN_DOMCTL_PFINFO_BROKEN ) + /* NOTAB and all L1 thru L4 tables (including pinned) should have + * a page worth of data in the record. */ + pages_of_data++; + + pfns[i] = pfn; + types[i] = type; + } + + if ( rec->length != (sizeof(*pages) + + (sizeof(uint64_t) * pages->count) + + (PAGE_SIZE * pages_of_data)) ) + { + ERROR("PAGE_DATA record wrong size: length %"PRIu32", expected " + "%zu + %zu + %zu", rec->length, sizeof(*pages), + (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data)); + goto err; + } + + rc = process_page_data(ctx, pages->count, pfns, types, + &pages->pfn[pages->count]); + err: + free(types); + free(pfns); + + return rc; +} + +/* + * Restore a domain. + */ +static int restore(struct context *ctx) +{ + xc_interface *xch = ctx->xch; + struct record rec; + int rc, saved_rc = 0, saved_errno = 0; + + IPRINTF("Restoring domain"); + + rc = ctx->restore.ops.setup(ctx); + if ( rc ) + goto err; + + do + { + rc = read_record(ctx, &rec); + if ( rc ) + goto err; + + switch ( rec.type ) + { + case REC_TYPE_END: + DPRINTF("End record"); + break; + + case REC_TYPE_PAGE_DATA: + rc = handle_page_data(ctx, &rec); + break; + + default: + rc = ctx->restore.ops.process_record(ctx, &rec); + break; + } + + free(rec.data); + if ( rc ) + goto err; + + } while ( rec.type != REC_TYPE_END ); + + rc = ctx->restore.ops.stream_complete(ctx); + if ( rc ) + goto err; + + IPRINTF("Restore successful"); + goto done; + + err: + saved_errno = errno; + saved_rc = rc; + PERROR("Restore failed"); + + done: + free(ctx->restore.populated_pfns); + rc = ctx->restore.ops.cleanup(ctx); + if ( rc ) + PERROR("Failed to clean up"); + + if ( saved_rc ) + { + rc = saved_rc; + errno = saved_errno; + } + + return rc; +} + int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom, unsigned int store_evtchn, unsigned long *store_mfn, domid_t store_domid, unsigned int console_evtchn, @@ -8,8 +502,68 @@ int xc_domain_restore2(xc_interface *xch, int io_fd, uint32_t dom, int checkpointed_stream, struct restore_callbacks *callbacks) { + struct context ctx = + { + .xch = xch, + .fd = io_fd, + }; + + /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions :( */ + ctx.restore.console_evtchn = console_evtchn; + ctx.restore.console_domid = console_domid; + ctx.restore.xenstore_evtchn = store_evtchn; + ctx.restore.xenstore_domid = store_domid; + ctx.restore.callbacks = callbacks; + IPRINTF("In experimental %s", __func__); - return -1; + + if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 ) + { + PERROR("Failed to get domain info"); + return -1; + } + + if ( ctx.dominfo.domid != dom ) + { + ERROR("Domain %d does not exist", dom); + return -1; + } + + ctx.domid = dom; + IPRINTF("Restoring domain %d", dom); + + if ( read_headers(&ctx) ) + return -1; + + if ( ctx.dominfo.hvm ) + { + ctx.ops = common_ops_x86_hvm; + ctx.restore.ops = restore_ops_x86_hvm; + if ( restore(&ctx) ) + return -1; + } + else + { + ctx.ops = common_ops_x86_pv; + ctx.restore.ops = restore_ops_x86_pv; + if ( restore(&ctx) ) + return -1; + } + + DPRINTF("XenStore: mfn %#lx, dom %d, evt %u", + ctx.restore.xenstore_mfn, + ctx.restore.xenstore_domid, + ctx.restore.xenstore_evtchn); + + DPRINTF("Console: mfn %#lx, dom %d, evt %u", + ctx.restore.console_mfn, + ctx.restore.console_domid, + ctx.restore.console_evtchn); + + *console_mfn = ctx.restore.console_mfn; + *store_mfn = ctx.restore.xenstore_mfn; + + return 0; } /* -- 1.7.10.4