From mboxrd@z Thu Jan 1 00:00:00 1970 From: Konrad Rzeszutek Wilk Subject: [PATCH RFC] xen/libxl/libxl: RIP PV superpage. Date: Fri, 29 Jan 2016 10:30:46 -0500 Message-ID: <1454081446-9753-2-git-send-email-konrad.wilk@oracle.com> References: <1454081446-9753-1-git-send-email-konrad.wilk@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1454081446-9753-1-git-send-email-konrad.wilk@oracle.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Ian Jackson , Stefano Stabellini , Ian Campbell , Wei Liu , Keir Fraser , Jan Beulich , Andrew Cooper , xen-devel@lists.xen.org Cc: Konrad Rzeszutek Wilk List-Id: xen-devel@lists.xenproject.org The last bastion of users is moving away from needing this. Please note that there was an partial removal by c/s 9b3c6b13e0c085ceb53210f8e682a073096b94fa "libxc: remove superpages option for pv domains" so an user could not even run PV guests with superpage. HVM guests are not affected - they have been able to use superpages (called hugepages) if the CPU supports 2MB or 1GB since Xen 3.3. This is reported in the ring buffer: (XEN) HVM: HAP page sizes: 4kB, 2MB, 1GB AMD CPUs that support 2MB require NPT aka Rapid Virtualization Indexing. This was introduced in third generation Opteron - Barcelona. Intel CPUs that support 2MB require EPT aka Extended Page Tables. This was introduced in Westmere microarchitecture. Signed-off-by: Konrad Rzeszutek Wilk --- docs/misc/xen-command-line.markdown | 7 -- tools/libxc/include/xenguest.h | 3 +- tools/libxc/xc_nomigrate.c | 2 +- tools/libxc/xc_sr_restore.c | 6 +- tools/libxl/libxl_internal.h | 2 +- tools/libxl/libxl_save_callout.c | 4 +- tools/libxl/libxl_save_helper.c | 3 +- tools/libxl/libxl_stream_read.c | 2 +- xen/arch/x86/domain.c | 2 - xen/arch/x86/domain_build.c | 2 - xen/arch/x86/mm.c | 241 ++---------------------------------- xen/include/asm-x86/config.h | 9 +- xen/include/asm-x86/guest_pt.h | 4 +- xen/include/asm-x86/mm.h | 19 --- xen/include/public/xen.h | 4 +- 15 files changed, 23 insertions(+), 287 deletions(-) diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown index d267a04..6ea89b0 100644 --- a/docs/misc/xen-command-line.markdown +++ b/docs/misc/xen-command-line.markdown @@ -133,13 +133,6 @@ resume. `s3_mode` instructs Xen to set up the boot time (option `vga=`) video mode during S3 resume. -### allowsuperpage -> `= ` - -> Default: `true` - -Permit Xen to use superpages when performing memory management. - ### altp2m (Intel) > `= ` diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h index d48b3ff..11ea79b 100644 --- a/tools/libxc/include/xenguest.h +++ b/tools/libxc/include/xenguest.h @@ -113,7 +113,6 @@ struct restore_callbacks { * @parm store_mfn returned with the mfn of the store page * @parm hvm non-zero if this is a HVM restore * @parm pae non-zero if this HVM domain has PAE support enabled - * @parm superpages non-zero to allocate guest memory with superpages * @parm checkpointed_stream non-zero if the far end of the stream is using checkpointing * @parm callbacks non-NULL to receive a callback to restore toolstack * specific data @@ -123,7 +122,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, unsigned int store_evtchn, unsigned long *store_mfn, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, - unsigned int hvm, unsigned int pae, int superpages, + unsigned int hvm, unsigned int pae, int checkpointed_stream, struct restore_callbacks *callbacks); diff --git a/tools/libxc/xc_nomigrate.c b/tools/libxc/xc_nomigrate.c index 902429e..1bb4477 100644 --- a/tools/libxc/xc_nomigrate.c +++ b/tools/libxc/xc_nomigrate.c @@ -32,7 +32,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, unsigned int store_evtchn, unsigned long *store_mfn, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_mfn, domid_t console_domid, - unsigned int hvm, unsigned int pae, int superpages, + unsigned int hvm, unsigned int pae, int checkpointed_stream, struct restore_callbacks *callbacks) { diff --git a/tools/libxc/xc_sr_restore.c b/tools/libxc/xc_sr_restore.c index d4d33fd..c8b7d1f 100644 --- a/tools/libxc/xc_sr_restore.c +++ b/tools/libxc/xc_sr_restore.c @@ -724,7 +724,7 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, unsigned int store_evtchn, unsigned long *store_mfn, domid_t store_domid, unsigned int console_evtchn, unsigned long *console_gfn, domid_t console_domid, - unsigned int hvm, unsigned int pae, int superpages, + unsigned int hvm, unsigned int pae, int checkpointed_stream, struct restore_callbacks *callbacks) { @@ -746,9 +746,9 @@ int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom, if ( checkpointed_stream ) assert(callbacks->checkpoint); - DPRINTF("fd %d, dom %u, hvm %u, pae %u, superpages %d" + DPRINTF("fd %d, dom %u, hvm %u, pae %u" ", checkpointed_stream %d", io_fd, dom, hvm, pae, - superpages, checkpointed_stream); + checkpointed_stream); if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 ) { diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index b194e65..a6d167d 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -3478,7 +3478,7 @@ _hidden int libxl__restore_emulator_xenstore_data _hidden void libxl__xc_domain_restore(libxl__egc *egc, libxl__domain_create_state *dcs, libxl__save_helper_state *shs, - int hvm, int pae, int superpages); + int hvm, int pae); /* If rc==0 then retval is the return value from xc_domain_save * and errnoval is the errno value it provided. * If rc!=0, retval and errnoval are undefined. */ diff --git a/tools/libxl/libxl_save_callout.c b/tools/libxl/libxl_save_callout.c index 45b9727..0b60422 100644 --- a/tools/libxl/libxl_save_callout.c +++ b/tools/libxl/libxl_save_callout.c @@ -43,7 +43,7 @@ static void helper_done(libxl__egc *egc, libxl__save_helper_state *shs); void libxl__xc_domain_restore(libxl__egc *egc, libxl__domain_create_state *dcs, libxl__save_helper_state *shs, - int hvm, int pae, int superpages) + int hvm, int pae) { STATE_AO_GC(dcs->ao); @@ -60,7 +60,7 @@ void libxl__xc_domain_restore(libxl__egc *egc, libxl__domain_create_state *dcs, state->store_port, state->store_domid, state->console_port, state->console_domid, - hvm, pae, superpages, + hvm, pae, cbflags, dcs->restore_params.checkpointed_stream, }; diff --git a/tools/libxl/libxl_save_helper.c b/tools/libxl/libxl_save_helper.c index 39038f9..d1bb5b9 100644 --- a/tools/libxl/libxl_save_helper.c +++ b/tools/libxl/libxl_save_helper.c @@ -274,7 +274,6 @@ int main(int argc, char **argv) domid_t console_domid = strtoul(NEXTARG,0,10); unsigned int hvm = strtoul(NEXTARG,0,10); unsigned int pae = strtoul(NEXTARG,0,10); - int superpages = strtoul(NEXTARG,0,10); unsigned cbflags = strtoul(NEXTARG,0,10); int checkpointed = strtoul(NEXTARG,0,10); assert(!*++argv); @@ -289,7 +288,7 @@ int main(int argc, char **argv) r = xc_domain_restore(xch, io_fd, dom, store_evtchn, &store_mfn, store_domid, console_evtchn, &console_mfn, - console_domid, hvm, pae, superpages, + console_domid, hvm, pae, checkpointed, &helper_restore_callbacks); helper_stub_restore_results(store_mfn,console_mfn,0); diff --git a/tools/libxl/libxl_stream_read.c b/tools/libxl/libxl_stream_read.c index dac134e..3be8b91 100644 --- a/tools/libxl/libxl_stream_read.c +++ b/tools/libxl/libxl_stream_read.c @@ -549,7 +549,7 @@ static bool process_record(libxl__egc *egc, break; case REC_TYPE_LIBXC_CONTEXT: - libxl__xc_domain_restore(egc, dcs, &stream->shs, 0, 0, 0); + libxl__xc_domain_restore(egc, dcs, &stream->shs, 0, 0); break; case REC_TYPE_EMULATOR_XENSTORE_DATA: diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 352c194..5f800f6 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -2327,8 +2327,6 @@ static int relinquish_memory( BUG(); } - clear_superpage_mark(page); - if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index cb287f0..7ee1227 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -828,8 +828,6 @@ static __init void setup_pv_physmap(struct domain *d, unsigned long pgtbl_pfn, 0)) != NULL ) { *pl2e = l2e_from_page(page, L1_PROT|_PAGE_DIRTY|_PAGE_PSE); - if ( opt_allow_superpage ) - get_superpage(page_to_mfn(page), d); vphysmap_start += 1UL << L2_PAGETABLE_SHIFT; continue; } diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index 1ee431e..5a529f2 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -152,18 +152,13 @@ struct rangeset *__read_mostly mmio_ro_ranges; #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT) -bool_t __read_mostly opt_allow_superpage; -boolean_param("allowsuperpage", opt_allow_superpage); - static void put_superpage(unsigned long mfn); static uint32_t base_disallow_mask; /* Global bit is allowed to be set on L1 PTEs. Intended for user mappings. */ #define L1_DISALLOW_MASK ((base_disallow_mask | _PAGE_GNTTAB) & ~_PAGE_GLOBAL) -#define L2_DISALLOW_MASK (unlikely(opt_allow_superpage) \ - ? base_disallow_mask & ~_PAGE_PSE \ - : base_disallow_mask) +#define L2_DISALLOW_MASK (base_disallow_mask) #define l3_disallow_mask(d) (!is_pv_32bit_domain(d) ? \ base_disallow_mask : 0xFFFFF198U) @@ -209,15 +204,6 @@ static void __init init_frametable_chunk(void *start, void *end) memset(end, -1, s - e); } -static void __init init_spagetable(void) -{ - BUILD_BUG_ON(XEN_VIRT_END > SPAGETABLE_VIRT_START); - - init_frametable_chunk(spage_table, - mem_hotplug ? spage_table + SPAGETABLE_NR - : pdx_to_spage(max_pdx - 1) + 1); -} - void __init init_frametable(void) { unsigned int sidx, eidx, nidx; @@ -242,9 +228,6 @@ void __init init_frametable(void) : end_pg; init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT), top_pg); memset(end_pg, -1, (unsigned long)top_pg - (unsigned long)end_pg); - - if (opt_allow_superpage) - init_spagetable(); } #ifndef NDEBUG @@ -985,20 +968,7 @@ get_page_from_l2e( return rc; } - if ( !opt_allow_superpage ) - { - MEM_LOG("Attempt to map superpage without allowsuperpage " - "flag in hypervisor"); - return -EINVAL; - } - - if ( mfn & (L1_PAGETABLE_ENTRIES-1) ) - { - MEM_LOG("Unaligned superpage map attempt mfn %lx", mfn); - return -EINVAL; - } - - return get_superpage(mfn, d); + return -EINVAL; } @@ -1853,10 +1823,7 @@ static int mod_l2_entry(l2_pgentry_t *pl2e, } /* Fast path for identical mapping and presence. */ - if ( !l2e_has_changed(ol2e, nl2e, - unlikely(opt_allow_superpage) - ? _PAGE_PSE | _PAGE_RW | _PAGE_PRESENT - : _PAGE_PRESENT) ) + if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) ) { adjust_guest_l2e(nl2e, d); if ( UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad) ) @@ -2511,22 +2478,6 @@ int get_page_type_preemptible(struct page_info *page, unsigned long type) return __get_page_type(page, type, 1); } -static int get_spage_pages(struct page_info *page, struct domain *d) -{ - int i; - - for (i = 0; i < (1<= 0) - put_page_and_type(--page); - return 0; - } - } - return 1; -} - static void put_spage_pages(struct page_info *page) { int i; @@ -2538,167 +2489,10 @@ static void put_spage_pages(struct page_info *page) return; } -static int mark_superpage(struct spage_info *spage, struct domain *d) -{ - unsigned long x, nx, y = spage->type_info; - int pages_done = 0; - - ASSERT(opt_allow_superpage); - - do { - x = y; - nx = x + 1; - if ( (x & SGT_type_mask) == SGT_mark ) - { - MEM_LOG("Duplicate superpage mark attempt mfn %lx", - spage_to_mfn(spage)); - if ( pages_done ) - put_spage_pages(spage_to_page(spage)); - return -EINVAL; - } - if ( (x & SGT_type_mask) == SGT_dynamic ) - { - if ( pages_done ) - { - put_spage_pages(spage_to_page(spage)); - pages_done = 0; - } - } - else if ( !pages_done ) - { - if ( !get_spage_pages(spage_to_page(spage), d) ) - { - MEM_LOG("Superpage type conflict in mark attempt mfn %lx", - spage_to_mfn(spage)); - return -EINVAL; - } - pages_done = 1; - } - nx = (nx & ~SGT_type_mask) | SGT_mark; - - } while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); - - return 0; -} - -static int unmark_superpage(struct spage_info *spage) -{ - unsigned long x, nx, y = spage->type_info; - unsigned long do_pages = 0; - - ASSERT(opt_allow_superpage); - - do { - x = y; - nx = x - 1; - if ( (x & SGT_type_mask) != SGT_mark ) - { - MEM_LOG("Attempt to unmark unmarked superpage mfn %lx", - spage_to_mfn(spage)); - return -EINVAL; - } - if ( (nx & SGT_count_mask) == 0 ) - { - nx = (nx & ~SGT_type_mask) | SGT_none; - do_pages = 1; - } - else - { - nx = (nx & ~SGT_type_mask) | SGT_dynamic; - } - } while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); - - if ( do_pages ) - put_spage_pages(spage_to_page(spage)); - - return 0; -} - -void clear_superpage_mark(struct page_info *page) -{ - struct spage_info *spage; - - if ( !opt_allow_superpage ) - return; - - spage = page_to_spage(page); - if ((spage->type_info & SGT_type_mask) == SGT_mark) - unmark_superpage(spage); - -} - -int get_superpage(unsigned long mfn, struct domain *d) -{ - struct spage_info *spage; - unsigned long x, nx, y; - int pages_done = 0; - - ASSERT(opt_allow_superpage); - - if ( !mfn_valid(mfn | (L1_PAGETABLE_ENTRIES - 1)) ) - return -EINVAL; - - spage = mfn_to_spage(mfn); - y = spage->type_info; - do { - x = y; - nx = x + 1; - if ( (x & SGT_type_mask) != SGT_none ) - { - if ( pages_done ) - { - put_spage_pages(spage_to_page(spage)); - pages_done = 0; - } - } - else - { - if ( !get_spage_pages(spage_to_page(spage), d) ) - { - MEM_LOG("Type conflict on superpage mapping mfn %lx", - spage_to_mfn(spage)); - return -EINVAL; - } - pages_done = 1; - nx = (nx & ~SGT_type_mask) | SGT_dynamic; - } - } while ( (y = cmpxchg(&spage->type_info, x, nx)) != x ); - - return 0; -} - static void put_superpage(unsigned long mfn) { - struct spage_info *spage; - unsigned long x, nx, y; - unsigned long do_pages = 0; - - if ( !opt_allow_superpage ) - { - put_spage_pages(mfn_to_page(mfn)); - return; - } - - spage = mfn_to_spage(mfn); - y = spage->type_info; - do { - x = y; - nx = x - 1; - if ((x & SGT_type_mask) == SGT_dynamic) - { - if ((nx & SGT_count_mask) == 0) - { - nx = (nx & ~SGT_type_mask) | SGT_none; - do_pages = 1; - } - } - - } while ((y = cmpxchg(&spage->type_info, x, nx)) != x); - - if (do_pages) - put_spage_pages(spage_to_page(spage)); - - return; + ASSERT(1); /* This should really not be called. */ + put_spage_pages(mfn_to_page(mfn)); } int put_old_guest_table(struct vcpu *v) @@ -3437,29 +3231,8 @@ long do_mmuext_op( case MMUEXT_MARK_SUPER: case MMUEXT_UNMARK_SUPER: - { - unsigned long mfn = op.arg1.mfn; - - if ( !opt_allow_superpage ) - { - MEM_LOG("Superpages disallowed"); - rc = -ENOSYS; - } - else if ( unlikely(d != pg_owner) ) - rc = -EPERM; - else if ( mfn & (L1_PAGETABLE_ENTRIES - 1) ) - { - MEM_LOG("Unaligned superpage reference mfn %lx", mfn); - rc = -EINVAL; - } - else if ( !mfn_valid(mfn | (L1_PAGETABLE_ENTRIES - 1)) ) - rc = -EINVAL; - else if ( op.cmd == MMUEXT_MARK_SUPER ) - rc = mark_superpage(mfn_to_spage(mfn), d); - else - rc = unmark_superpage(mfn_to_spage(mfn)); - break; - } + MEM_LOG("Superpages are deprecated."); + rc = -ENOSYS; default: MEM_LOG("Invalid extended pt command %#x", op.cmd); diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index d97877d..eb08aa2 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -239,13 +239,8 @@ extern unsigned char boot_edid_info[128]; #define XEN_VIRT_START (HIRO_COMPAT_MPT_VIRT_END) #define XEN_VIRT_END (XEN_VIRT_START + GB(1)) -/* Slot 261: superpage information array (64MB or 512MB). */ -#define SPAGETABLE_VIRT_END FRAMETABLE_VIRT_START -#define SPAGETABLE_NR (((FRAMETABLE_NR - 1) >> (SUPERPAGE_SHIFT - \ - PAGE_SHIFT)) + 1) -#define SPAGETABLE_SIZE (SPAGETABLE_NR * sizeof(struct spage_info)) -#define SPAGETABLE_VIRT_START ((SPAGETABLE_VIRT_END - SPAGETABLE_SIZE) & \ - (_AC(-1,UL) << SUPERPAGE_SHIFT)) +/* Slot 261: superpage information array (64MB or 512MB). + * Now free. */ #ifndef CONFIG_BIGMEM /* Slot 261: page-frame information array (128GB). */ diff --git a/xen/include/asm-x86/guest_pt.h b/xen/include/asm-x86/guest_pt.h index 3447973..c163af6 100644 --- a/xen/include/asm-x86/guest_pt.h +++ b/xen/include/asm-x86/guest_pt.h @@ -186,9 +186,7 @@ guest_supports_superpages(struct vcpu *v) /* The _PAGE_PSE bit must be honoured in HVM guests, whenever * CR4.PSE is set or the guest is in PAE or long mode. * It's also used in the dummy PT for vcpus with CR4.PG cleared. */ - return (is_pv_vcpu(v) - ? opt_allow_superpage - : (GUEST_PAGING_LEVELS != 2 + return (is_pv_vcpu(v) ? 0 /* Deprecated. */ : (GUEST_PAGING_LEVELS != 2 || !hvm_paging_enabled(v) || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE))); } diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 7598414..7a0d258 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -233,21 +233,6 @@ struct page_info #define PGC_count_width PG_shift(9) #define PGC_count_mask ((1UL<count_info & PGC_xen_heap) #define is_xen_heap_mfn(mfn) \ (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn))) @@ -281,7 +266,6 @@ extern void free_shared_domheap_page(struct page_info *page); #define frame_table ((struct page_info *)FRAMETABLE_VIRT_START) #define spage_table ((struct spage_info *)SPAGETABLE_VIRT_START) -int get_superpage(unsigned long mfn, struct domain *d); extern unsigned long max_page; extern unsigned long total_pages; void init_frametable(void); @@ -327,8 +311,6 @@ void zap_ro_mpt(unsigned long mfn); int is_iomem_page(unsigned long mfn); -void clear_superpage_mark(struct page_info *page); - const unsigned long *get_platform_badpages(unsigned int *array_size); /* Per page locks: * page_lock() is used for two purposes: pte serialization, and memory sharing. @@ -402,7 +384,6 @@ static inline int get_page_and_type(struct page_info *page, int check_descriptor(const struct domain *, struct desc_struct *d); -extern bool_t opt_allow_superpage; extern paddr_t mem_hotplug; /****************************************************************************** diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index 7b629b1..c3b290d 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -272,7 +272,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); * The one bits that does not fit with the default layout is the PAGE_PSE * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB - * (or 2MB) instead of using the PAGE_PSE bit. + * (or 2MB) instead of using the PAGE_PSE bit. That functionality is now + * deprecated and will return -ENOSYS. * * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen * using it as the Page Attribute Table (PAT) bit - for details on it please @@ -382,6 +383,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); * * cmd: MMUEXT_[UN]MARK_SUPER * mfn: Machine frame number of head of superpage to be [un]marked. + * These sub-ops are deprecated and will return -ENOSYS. */ /* ` enum mmuext_cmd { */ #define MMUEXT_PIN_L1_TABLE 0 -- 2.1.0