From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH 03/11] x86: re-introduce map_domain_page() et al Date: Tue, 22 Jan 2013 10:50:55 +0000 Message-ID: <50FE7D1F02000078000B831B@nat28.tlf.novell.com> References: <50FE7BF502000078000B82F8@nat28.tlf.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__PartB687151F.0__=" Return-path: In-Reply-To: <50FE7BF502000078000B82F8@nat28.tlf.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel List-Id: xen-devel@lists.xenproject.org This is a MIME message. If you are reading this text, you may want to consider changing to a mail reader or gateway that understands how to properly handle MIME multipart messages. --=__PartB687151F.0__= Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Content-Disposition: inline This is being done mostly in the form previously used on x86-32, utilizing the second L3 page table slot within the per-domain mapping area for those mappings. It remains to be determined whether that concept is really suitable, or whether instead re-implementing at least the non-global variant from scratch would be better. Also add the helpers {clear,copy}_domain_page() as well as initial uses of them. One question is whether, to exercise the non-trivial code paths, we shouldn't make the trivial shortcuts conditional upon NDEBUG being defined. See the debugging patch at the end of the series. Signed-off-by: Jan Beulich --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -19,6 +19,7 @@ obj-bin-y +=3D dmi_scan.init.o obj-y +=3D domctl.o obj-y +=3D domain.o obj-bin-y +=3D domain_build.init.o +obj-y +=3D domain_page.o obj-y +=3D e820.o obj-y +=3D extable.o obj-y +=3D flushtlb.o --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -397,10 +397,14 @@ int vcpu_initialise(struct vcpu *v) return -ENOMEM; clear_page(page_to_virt(pg)); perdomain_pt_page(d, idx) =3D pg; - d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+idx]= + d->arch.mm_perdomain_l2[0][l2_table_offset(PERDOMAIN_VIRT_START)+i= dx] =3D l2e_from_page(pg, __PAGE_HYPERVISOR); } =20 + rc =3D mapcache_vcpu_init(v); + if ( rc ) + return rc; + paging_vcpu_init(v); =20 v->arch.perdomain_ptes =3D perdomain_ptes(d, v); @@ -526,8 +530,8 @@ int arch_domain_create(struct domain *d, pg =3D alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); if ( pg =3D=3D NULL ) goto fail; - d->arch.mm_perdomain_l2 =3D page_to_virt(pg); - clear_page(d->arch.mm_perdomain_l2); + d->arch.mm_perdomain_l2[0] =3D page_to_virt(pg); + clear_page(d->arch.mm_perdomain_l2[0]); =20 pg =3D alloc_domheap_page(NULL, MEMF_node(domain_to_node(d))); if ( pg =3D=3D NULL ) @@ -535,8 +539,10 @@ int arch_domain_create(struct domain *d, d->arch.mm_perdomain_l3 =3D page_to_virt(pg); clear_page(d->arch.mm_perdomain_l3); d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =3D - l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2), - __PAGE_HYPERVISOR); + l3e_from_pfn(virt_to_mfn(d->arch.mm_perdomain_l2[0]), + __PAGE_HYPERVISOR); + + mapcache_domain_init(d); =20 HYPERVISOR_COMPAT_VIRT_START(d) =3D is_hvm_domain(d) ? ~0u : __HYPERVISOR_COMPAT_VIRT_START; @@ -609,8 +615,9 @@ int arch_domain_create(struct domain *d, free_xenheap_page(d->shared_info); if ( paging_initialised ) paging_final_teardown(d); - if ( d->arch.mm_perdomain_l2 ) - free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2)); + mapcache_domain_exit(d); + if ( d->arch.mm_perdomain_l2[0] ) + free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2[0])); if ( d->arch.mm_perdomain_l3 ) free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3)); if ( d->arch.mm_perdomain_pt_pages ) @@ -633,13 +640,15 @@ void arch_domain_destroy(struct domain * =20 paging_final_teardown(d); =20 + mapcache_domain_exit(d); + for ( i =3D 0; i < PDPT_L2_ENTRIES; ++i ) { if ( perdomain_pt_page(d, i) ) free_domheap_page(perdomain_pt_page(d, i)); } free_domheap_page(virt_to_page(d->arch.mm_perdomain_pt_pages)); - free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2)); + free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2[0])); free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3)); =20 free_xenheap_page(d->shared_info); --- /dev/null +++ b/xen/arch/x86/domain_page.c @@ -0,0 +1,471 @@ +/*************************************************************************= ***** + * domain_page.h + * + * Allow temporary mapping of domain pages. + * + * Copyright (c) 2003-2006, Keir Fraser + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static inline struct vcpu *mapcache_current_vcpu(void) +{ + /* In the common case we use the mapcache of the running VCPU. */ + struct vcpu *v =3D current; + + /* + * When current isn't properly set up yet, this is equivalent to + * running in an idle vCPU (callers must check for NULL). + */ + if ( v =3D=3D (struct vcpu *)0xfffff000 ) + return NULL; + + /* + * If guest_table is NULL, and we are running a paravirtualised = guest, + * then it means we are running on the idle domain's page table and = must + * therefore use its mapcache. + */ + if ( unlikely(pagetable_is_null(v->arch.guest_table)) && !is_hvm_vcpu(= v) ) + { + /* If we really are idling, perform lazy context switch now. */ + if ( (v =3D idle_vcpu[smp_processor_id()]) =3D=3D current ) + sync_local_execstate(); + /* We must now be running on the idle page table. */ + ASSERT(read_cr3() =3D=3D __pa(idle_pg_table)); + } + + return v; +} + +#define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER) +#define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1) +#define DCACHE_L1ENT(dc, idx) \ + ((dc)->l1tab[(idx) >> PAGETABLE_ORDER] \ + [(idx) & ((1 << PAGETABLE_ORDER) - 1)]) + +void *map_domain_page(unsigned long mfn) +{ + unsigned long flags; + unsigned int idx, i; + struct vcpu *v; + struct mapcache_domain *dcache; + struct mapcache_vcpu *vcache; + struct vcpu_maphash_entry *hashent; + + if ( mfn <=3D PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) + return mfn_to_virt(mfn); + + v =3D mapcache_current_vcpu(); + if ( !v || is_hvm_vcpu(v) ) + return mfn_to_virt(mfn); + + dcache =3D &v->domain->arch.pv_domain.mapcache; + vcache =3D &v->arch.pv_vcpu.mapcache; + if ( !dcache->l1tab ) + return mfn_to_virt(mfn); + + perfc_incr(map_domain_page_count); + + local_irq_save(flags); + + hashent =3D &vcache->hash[MAPHASH_HASHFN(mfn)]; + if ( hashent->mfn =3D=3D mfn ) + { + idx =3D hashent->idx; + ASSERT(idx < dcache->entries); + hashent->refcnt++; + ASSERT(hashent->refcnt); + ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) =3D=3D mfn); + goto out; + } + + spin_lock(&dcache->lock); + + /* Has some other CPU caused a wrap? We must flush if so. */ + if ( unlikely(dcache->epoch !=3D vcache->shadow_epoch) ) + { + vcache->shadow_epoch =3D dcache->epoch; + if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestam= p) ) + { + perfc_incr(domain_page_tlb_flush); + flush_tlb_local(); + } + } + + idx =3D find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cur= sor); + if ( unlikely(idx >=3D dcache->entries) ) + { + unsigned long accum =3D 0; + + /* /First/, clean the garbage map and update the inuse list. */ + for ( i =3D 0; i < BITS_TO_LONGS(dcache->entries); i++ ) + { + dcache->inuse[i] &=3D ~xchg(&dcache->garbage[i], 0); + accum |=3D ~dcache->inuse[i]; + } + + if ( accum ) + idx =3D find_first_zero_bit(dcache->inuse, dcache->entries); + else + { + /* Replace a hash entry instead. */ + i =3D MAPHASH_HASHFN(mfn); + do { + hashent =3D &vcache->hash[i]; + if ( hashent->idx !=3D MAPHASHENT_NOTINUSE && !hashent->re= fcnt ) + { + idx =3D hashent->idx; + ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) =3D=3D + hashent->mfn); + l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty()); + hashent->idx =3D MAPHASHENT_NOTINUSE; + hashent->mfn =3D ~0UL; + break; + } + if ( ++i =3D=3D MAPHASH_ENTRIES ) + i =3D 0; + } while ( i !=3D MAPHASH_HASHFN(mfn) ); + } + BUG_ON(idx >=3D dcache->entries); + + /* /Second/, flush TLBs. */ + perfc_incr(domain_page_tlb_flush); + flush_tlb_local(); + vcache->shadow_epoch =3D ++dcache->epoch; + dcache->tlbflush_timestamp =3D tlbflush_current_time(); + } + + set_bit(idx, dcache->inuse); + dcache->cursor =3D idx + 1; + + spin_unlock(&dcache->lock); + + l1e_write(&DCACHE_L1ENT(dcache, idx), + l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); + + out: + local_irq_restore(flags); + return (void *)MAPCACHE_VIRT_START + pfn_to_paddr(idx); +} + +void unmap_domain_page(const void *ptr) +{ + unsigned int idx; + struct vcpu *v; + struct mapcache_domain *dcache; + unsigned long va =3D (unsigned long)ptr, mfn, flags; + struct vcpu_maphash_entry *hashent; + + if ( va >=3D DIRECTMAP_VIRT_START ) + return; + + ASSERT(va >=3D MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END); + + v =3D mapcache_current_vcpu(); + ASSERT(v && !is_hvm_vcpu(v)); + + dcache =3D &v->domain->arch.pv_domain.mapcache; + ASSERT(dcache->l1tab); + + idx =3D PFN_DOWN(va - MAPCACHE_VIRT_START); + mfn =3D l1e_get_pfn(DCACHE_L1ENT(dcache, idx)); + hashent =3D &v->arch.pv_vcpu.mapcache.hash[MAPHASH_HASHFN(mfn)]; + + local_irq_save(flags); + + if ( hashent->idx =3D=3D idx ) + { + ASSERT(hashent->mfn =3D=3D mfn); + ASSERT(hashent->refcnt); + hashent->refcnt--; + } + else if ( !hashent->refcnt ) + { + if ( hashent->idx !=3D MAPHASHENT_NOTINUSE ) + { + /* /First/, zap the PTE. */ + ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, hashent->idx)) =3D=3D + hashent->mfn); + l1e_write(&DCACHE_L1ENT(dcache, hashent->idx), l1e_empty()); + /* /Second/, mark as garbage. */ + set_bit(hashent->idx, dcache->garbage); + } + + /* Add newly-freed mapping to the maphash. */ + hashent->mfn =3D mfn; + hashent->idx =3D idx; + } + else + { + /* /First/, zap the PTE. */ + l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty()); + /* /Second/, mark as garbage. */ + set_bit(idx, dcache->garbage); + } + + local_irq_restore(flags); +} + +void clear_domain_page(unsigned long mfn) +{ + void *ptr =3D map_domain_page(mfn); + + clear_page(ptr); + unmap_domain_page(ptr); +} + +void copy_domain_page(unsigned long dmfn, unsigned long smfn) +{ + const void *src =3D map_domain_page(smfn); + void *dst =3D map_domain_page(dmfn); + + copy_page(dst, src); + unmap_domain_page(dst); + unmap_domain_page(src); +} + +int mapcache_domain_init(struct domain *d) +{ + struct mapcache_domain *dcache =3D &d->arch.pv_domain.mapcache; + unsigned int i, bitmap_pages, memf =3D MEMF_node(domain_to_node(d)); + unsigned long *end; + + if ( is_hvm_domain(d) || is_idle_domain(d) ) + return 0; + + if ( !mem_hotplug && max_page <=3D PFN_DOWN(__pa(HYPERVISOR_VIRT_END = - 1)) ) + return 0; + + dcache->l1tab =3D xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + = 1); + d->arch.mm_perdomain_l2[MAPCACHE_SLOT] =3D alloc_xenheap_pages(0, = memf); + if ( !dcache->l1tab || !d->arch.mm_perdomain_l2[MAPCACHE_SLOT] ) + return -ENOMEM; + + clear_page(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]); + d->arch.mm_perdomain_l3[l3_table_offset(MAPCACHE_VIRT_START)] =3D + l3e_from_paddr(__pa(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]), + __PAGE_HYPERVISOR); + + BUILD_BUG_ON(MAPCACHE_VIRT_END + 3 + + 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)= ) > + MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20)); + bitmap_pages =3D PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)= ); + dcache->inuse =3D (void *)MAPCACHE_VIRT_END + PAGE_SIZE; + dcache->garbage =3D dcache->inuse + + (bitmap_pages + 1) * PAGE_SIZE / sizeof(long); + end =3D dcache->garbage + bitmap_pages * PAGE_SIZE / sizeof(long); + + for ( i =3D l2_table_offset((unsigned long)dcache->inuse); + i <=3D l2_table_offset((unsigned long)(end - 1)); ++i ) + { + ASSERT(i <=3D MAPCACHE_L2_ENTRIES); + dcache->l1tab[i] =3D alloc_xenheap_pages(0, memf); + if ( !dcache->l1tab[i] ) + return -ENOMEM; + clear_page(dcache->l1tab[i]); + d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] =3D + l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR); + } + + spin_lock_init(&dcache->lock); + + return 0; +} + +void mapcache_domain_exit(struct domain *d) +{ + struct mapcache_domain *dcache =3D &d->arch.pv_domain.mapcache; + + if ( is_hvm_domain(d) ) + return; + + if ( dcache->l1tab ) + { + unsigned long i; + + for ( i =3D (unsigned long)dcache->inuse; ; i +=3D PAGE_SIZE ) + { + l1_pgentry_t *pl1e; + + if ( l2_table_offset(i) > MAPCACHE_L2_ENTRIES || + !dcache->l1tab[l2_table_offset(i)] ) + break; + + pl1e =3D &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)= ]; + if ( l1e_get_flags(*pl1e) ) + free_domheap_page(l1e_get_page(*pl1e)); + } + + for ( i =3D 0; i < MAPCACHE_L2_ENTRIES + 1; ++i ) + free_xenheap_page(dcache->l1tab[i]); + + xfree(dcache->l1tab); + } + free_xenheap_page(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]); +} + +int mapcache_vcpu_init(struct vcpu *v) +{ + struct domain *d =3D v->domain; + struct mapcache_domain *dcache =3D &d->arch.pv_domain.mapcache; + unsigned long i; + unsigned int memf =3D MEMF_node(vcpu_to_node(v)); + + if ( is_hvm_vcpu(v) || !dcache->l1tab ) + return 0; + + while ( dcache->entries < d->max_vcpus * MAPCACHE_VCPU_ENTRIES ) + { + unsigned int ents =3D dcache->entries + MAPCACHE_VCPU_ENTRIES; + l1_pgentry_t *pl1e; + + /* Populate page tables. */ + if ( !dcache->l1tab[i =3D mapcache_l2_entry(ents - 1)] ) + { + dcache->l1tab[i] =3D alloc_xenheap_pages(0, memf); + if ( !dcache->l1tab[i] ) + return -ENOMEM; + clear_page(dcache->l1tab[i]); + d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] =3D + l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR);= + } + + /* Populate bit maps. */ + i =3D (unsigned long)(dcache->inuse + BITS_TO_LONGS(ents)); + pl1e =3D &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)]; + if ( !l1e_get_flags(*pl1e) ) + { + struct page_info *pg =3D alloc_domheap_page(NULL, memf); + + if ( !pg ) + return -ENOMEM; + clear_domain_page(page_to_mfn(pg)); + *pl1e =3D l1e_from_page(pg, __PAGE_HYPERVISOR); + + i =3D (unsigned long)(dcache->garbage + BITS_TO_LONGS(ents)); + pl1e =3D &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)= ]; + ASSERT(!l1e_get_flags(*pl1e)); + + pg =3D alloc_domheap_page(NULL, memf); + if ( !pg ) + return -ENOMEM; + clear_domain_page(page_to_mfn(pg)); + *pl1e =3D l1e_from_page(pg, __PAGE_HYPERVISOR); + } + + dcache->entries =3D ents; + } + + /* Mark all maphash entries as not in use. */ + BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES); + for ( i =3D 0; i < MAPHASH_ENTRIES; i++ ) + { + struct vcpu_maphash_entry *hashent =3D &v->arch.pv_vcpu.mapcache.h= ash[i]; + + hashent->mfn =3D ~0UL; /* never valid to map */ + hashent->idx =3D MAPHASHENT_NOTINUSE; + } + + return 0; +} + +#define GLOBALMAP_BITS (GLOBALMAP_GBYTES << (30 - PAGE_SHIFT)) +static unsigned long inuse[BITS_TO_LONGS(GLOBALMAP_BITS)]; +static unsigned long garbage[BITS_TO_LONGS(GLOBALMAP_BITS)]; +static unsigned int inuse_cursor; +static DEFINE_SPINLOCK(globalmap_lock); + +void *map_domain_page_global(unsigned long mfn) +{ + l1_pgentry_t *pl1e; + unsigned int idx, i; + unsigned long va; + + ASSERT(!in_irq() && local_irq_is_enabled()); + + if ( mfn <=3D PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) + return mfn_to_virt(mfn); + + spin_lock(&globalmap_lock); + + idx =3D find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor); + va =3D GLOBALMAP_VIRT_START + pfn_to_paddr(idx); + if ( unlikely(va >=3D GLOBALMAP_VIRT_END) ) + { + /* /First/, clean the garbage map and update the inuse list. */ + for ( i =3D 0; i < ARRAY_SIZE(garbage); i++ ) + inuse[i] &=3D ~xchg(&garbage[i], 0); + + /* /Second/, flush all TLBs to get rid of stale garbage mappings. = */ + flush_tlb_all(); + + idx =3D find_first_zero_bit(inuse, GLOBALMAP_BITS); + va =3D GLOBALMAP_VIRT_START + pfn_to_paddr(idx); + if ( unlikely(va >=3D GLOBALMAP_VIRT_END) ) + { + spin_unlock(&globalmap_lock); + return NULL; + } + } + + set_bit(idx, inuse); + inuse_cursor =3D idx + 1; + + spin_unlock(&globalmap_lock); + + pl1e =3D virt_to_xen_l1e(va); + if ( !pl1e ) + return NULL; + l1e_write(pl1e, l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); + + return (void *)va; +} + +void unmap_domain_page_global(const void *ptr) +{ + unsigned long va =3D (unsigned long)ptr; + l1_pgentry_t *pl1e; + + if ( va >=3D DIRECTMAP_VIRT_START ) + return; + + ASSERT(va >=3D GLOBALMAP_VIRT_START && va < GLOBALMAP_VIRT_END); + + /* /First/, we zap the PTE. */ + pl1e =3D virt_to_xen_l1e(va); + BUG_ON(!pl1e); + l1e_write(pl1e, l1e_empty()); + + /* /Second/, we add to the garbage map. */ + set_bit(PFN_DOWN(va - GLOBALMAP_VIRT_START), garbage); +} + +/* Translate a map-domain-page'd address to the underlying MFN */ +unsigned long domain_page_map_to_mfn(const void *ptr) +{ + unsigned long va =3D (unsigned long)ptr; + const l1_pgentry_t *pl1e; + + if ( va >=3D DIRECTMAP_VIRT_START ) + return virt_to_mfn(ptr); + + if ( va >=3D GLOBALMAP_VIRT_START && va < GLOBALMAP_VIRT_END ) + { + pl1e =3D virt_to_xen_l1e(va); + BUG_ON(!pl1e); + } + else + { + ASSERT(va >=3D MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END); + pl1e =3D &__linear_l1_table[l1_linear_offset(va)]; + } + + return l1e_get_pfn(*pl1e); +} --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -2661,9 +2661,6 @@ static inline int vcpumask_to_pcpumask( } } =20 -#define fixmap_domain_page(mfn) mfn_to_virt(mfn) -#define fixunmap_domain_page(ptr) ((void)(ptr)) - long do_mmuext_op( XEN_GUEST_HANDLE_PARAM(mmuext_op_t) uops, unsigned int count, @@ -2983,7 +2980,6 @@ long do_mmuext_op( =20 case MMUEXT_CLEAR_PAGE: { struct page_info *page; - unsigned char *ptr; =20 page =3D get_page_from_gfn(d, op.arg1.mfn, NULL, P2M_ALLOC); if ( !page || !get_page_type(page, PGT_writable_page) ) @@ -2998,9 +2994,7 @@ long do_mmuext_op( /* A page is dirtied when it's being cleared. */ paging_mark_dirty(d, page_to_mfn(page)); =20 - ptr =3D fixmap_domain_page(page_to_mfn(page)); - clear_page(ptr); - fixunmap_domain_page(ptr); + clear_domain_page(page_to_mfn(page)); =20 put_page_and_type(page); break; @@ -3008,8 +3002,6 @@ long do_mmuext_op( =20 case MMUEXT_COPY_PAGE: { - const unsigned char *src; - unsigned char *dst; struct page_info *src_page, *dst_page; =20 src_page =3D get_page_from_gfn(d, op.arg2.src_mfn, NULL, = P2M_ALLOC); @@ -3034,11 +3026,7 @@ long do_mmuext_op( /* A page is dirtied when it's being copied to. */ paging_mark_dirty(d, page_to_mfn(dst_page)); =20 - src =3D __map_domain_page(src_page); - dst =3D fixmap_domain_page(page_to_mfn(dst_page)); - copy_page(dst, src); - fixunmap_domain_page(dst); - unmap_domain_page(src); + copy_domain_page(page_to_mfn(dst_page), page_to_mfn(src_page))= ; =20 put_page_and_type(dst_page); put_page(src_page); --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -27,6 +27,7 @@ #define CONFIG_DISCONTIGMEM 1 #define CONFIG_NUMA_EMU 1 #define CONFIG_PAGEALLOC_MAX_ORDER (2 * PAGETABLE_ORDER) +#define CONFIG_DOMAIN_PAGE 1 =20 /* Intel P4 currently has largest cache line (L2 line size is 128 bytes). = */ #define CONFIG_X86_L1_CACHE_SHIFT 7 @@ -147,12 +148,14 @@ extern unsigned char boot_edid_info[128] * 0xffff82c000000000 - 0xffff82c3ffffffff [16GB, 2^34 bytes, PML4:261] * vmap()/ioremap()/fixmap area. * 0xffff82c400000000 - 0xffff82c43fffffff [1GB, 2^30 bytes, PML4:261] - * Compatibility machine-to-phys translation table. + * Global domain page map area. * 0xffff82c440000000 - 0xffff82c47fffffff [1GB, 2^30 bytes, PML4:261] - * High read-only compatibility machine-to-phys translation table. + * Compatibility machine-to-phys translation table. * 0xffff82c480000000 - 0xffff82c4bfffffff [1GB, 2^30 bytes, PML4:261] + * High read-only compatibility machine-to-phys translation table. + * 0xffff82c4c0000000 - 0xffff82c4ffffffff [1GB, 2^30 bytes, PML4:261] * Xen text, static data, bss. - * 0xffff82c4c0000000 - 0xffff82dffbffffff [109GB - 64MB, PML4:261] + * 0xffff82c500000000 - 0xffff82dffbffffff [108GB - 64MB, PML4:261] * Reserved for future use. * 0xffff82dffc000000 - 0xffff82dfffffffff [64MB, 2^26 bytes, PML4:261] * Super-page information array. @@ -201,18 +204,24 @@ extern unsigned char boot_edid_info[128] /* Slot 259: linear page table (shadow table). */ #define SH_LINEAR_PT_VIRT_START (PML4_ADDR(259)) #define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTE= S) -/* Slot 260: per-domain mappings. */ +/* Slot 260: per-domain mappings (including map cache). */ #define PERDOMAIN_VIRT_START (PML4_ADDR(260)) -#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (PERDOMAIN_MBYTES<= <20)) -#define PERDOMAIN_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER= )) +#define PERDOMAIN_SLOT_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER= )) +#define PERDOMAIN_SLOTS 2 +#define PERDOMAIN_VIRT_SLOT(s) (PERDOMAIN_VIRT_START + (s) * \ + (PERDOMAIN_SLOT_MBYTES << 20)) /* Slot 261: machine-to-phys conversion table (256GB). */ #define RDWR_MPT_VIRT_START (PML4_ADDR(261)) #define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + MPT_VIRT_SIZE) /* Slot 261: vmap()/ioremap()/fixmap area (16GB). */ #define VMAP_VIRT_START RDWR_MPT_VIRT_END #define VMAP_VIRT_END (VMAP_VIRT_START + GB(16)) +/* Slot 261: global domain page map area (1GB). */ +#define GLOBALMAP_GBYTES 1 +#define GLOBALMAP_VIRT_START VMAP_VIRT_END +#define GLOBALMAP_VIRT_END (GLOBALMAP_VIRT_START + (GLOBALMAP_GBYTES<= <30)) /* Slot 261: compatibility machine-to-phys conversion table (1GB). */ -#define RDWR_COMPAT_MPT_VIRT_START VMAP_VIRT_END +#define RDWR_COMPAT_MPT_VIRT_START GLOBALMAP_VIRT_END #define RDWR_COMPAT_MPT_VIRT_END (RDWR_COMPAT_MPT_VIRT_START + GB(1)) /* Slot 261: high read-only compat machine-to-phys conversion table = (1GB). */ #define HIRO_COMPAT_MPT_VIRT_START RDWR_COMPAT_MPT_VIRT_END @@ -279,9 +288,9 @@ extern unsigned long xen_phys_start; /* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */ #define GDT_LDT_VCPU_SHIFT 5 #define GDT_LDT_VCPU_VA_SHIFT (GDT_LDT_VCPU_SHIFT + PAGE_SHIFT) -#define GDT_LDT_MBYTES PERDOMAIN_MBYTES +#define GDT_LDT_MBYTES PERDOMAIN_SLOT_MBYTES #define MAX_VIRT_CPUS (GDT_LDT_MBYTES << (20-GDT_LDT_VCPU_VA_SH= IFT)) -#define GDT_LDT_VIRT_START PERDOMAIN_VIRT_START +#define GDT_LDT_VIRT_START PERDOMAIN_VIRT_SLOT(0) #define GDT_LDT_VIRT_END (GDT_LDT_VIRT_START + (GDT_LDT_MBYTES << = 20)) =20 /* The address of a particular VCPU's GDT or LDT. */ @@ -290,8 +299,16 @@ extern unsigned long xen_phys_start; #define LDT_VIRT_START(v) \ (GDT_VIRT_START(v) + (64*1024)) =20 +/* map_domain_page() map cache. The last per-domain-mapping sub-area. */ +#define MAPCACHE_VCPU_ENTRIES (CONFIG_PAGING_LEVELS * CONFIG_PAGING_LEV= ELS) +#define MAPCACHE_ENTRIES (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES) +#define MAPCACHE_SLOT (PERDOMAIN_SLOTS - 1) +#define MAPCACHE_VIRT_START PERDOMAIN_VIRT_SLOT(MAPCACHE_SLOT) +#define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + \ + MAPCACHE_ENTRIES * PAGE_SIZE) + #define PDPT_L1_ENTRIES \ - ((PERDOMAIN_VIRT_END - PERDOMAIN_VIRT_START) >> PAGE_SHIFT) + ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 1) - PERDOMAIN_VIRT_START) >> = PAGE_SHIFT) #define PDPT_L2_ENTRIES \ ((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >> PAGETABLE_ORDER) =20 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -39,7 +39,7 @@ struct trap_bounce { =20 #define MAPHASH_ENTRIES 8 #define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1)) -#define MAPHASHENT_NOTINUSE ((u16)~0U) +#define MAPHASHENT_NOTINUSE ((u32)~0U) struct mapcache_vcpu { /* Shadow of mapcache_domain.epoch. */ unsigned int shadow_epoch; @@ -47,16 +47,15 @@ struct mapcache_vcpu { /* Lock-free per-VCPU hash of recently-used mappings. */ struct vcpu_maphash_entry { unsigned long mfn; - uint16_t idx; - uint16_t refcnt; + uint32_t idx; + uint32_t refcnt; } hash[MAPHASH_ENTRIES]; }; =20 -#define MAPCACHE_ORDER 10 -#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER) struct mapcache_domain { /* The PTEs that provide the mappings, and a cursor into the array. = */ - l1_pgentry_t *l1tab; + l1_pgentry_t **l1tab; + unsigned int entries; unsigned int cursor; =20 /* Protects map_domain_page(). */ @@ -67,12 +66,13 @@ struct mapcache_domain { u32 tlbflush_timestamp; =20 /* Which mappings are in use, and which are garbage to reap next = epoch? */ - unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)]; - unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)]; + unsigned long *inuse; + unsigned long *garbage; }; =20 -void mapcache_domain_init(struct domain *); -void mapcache_vcpu_init(struct vcpu *); +int mapcache_domain_init(struct domain *); +void mapcache_domain_exit(struct domain *); +int mapcache_vcpu_init(struct vcpu *); =20 /* x86/64: toggle guest between kernel and user modes. */ void toggle_guest_mode(struct vcpu *); @@ -229,6 +229,9 @@ struct pv_domain * unmask the event channel */ bool_t auto_unmask; =20 + /* map_domain_page() mapping cache. */ + struct mapcache_domain mapcache; + /* Pseudophysical e820 map (XENMEM_memory_map). */ spinlock_t e820_lock; struct e820entry *e820; @@ -238,7 +241,7 @@ struct pv_domain struct arch_domain { struct page_info **mm_perdomain_pt_pages; - l2_pgentry_t *mm_perdomain_l2; + l2_pgentry_t *mm_perdomain_l2[PERDOMAIN_SLOTS]; l3_pgentry_t *mm_perdomain_l3; =20 unsigned int hv_compat_vstart; @@ -324,6 +327,9 @@ struct arch_domain =20 struct pv_vcpu { + /* map_domain_page() mapping cache. */ + struct mapcache_vcpu mapcache; + struct trap_info *trap_ctxt; =20 unsigned long gdt_frames[FIRST_RESERVED_GDT_PAGE]; --- a/xen/include/xen/domain_page.h +++ b/xen/include/xen/domain_page.h @@ -25,11 +25,16 @@ void *map_domain_page(unsigned long mfn) */ void unmap_domain_page(const void *va); =20 +/* + * Clear a given page frame, or copy between two of them. + */ +void clear_domain_page(unsigned long mfn); +void copy_domain_page(unsigned long dmfn, unsigned long smfn); =20 /*=20 * Given a VA from map_domain_page(), return its underlying MFN. */ -unsigned long domain_page_map_to_mfn(void *va); +unsigned long domain_page_map_to_mfn(const void *va); =20 /* * Similar to the above calls, except the mapping is accessible in all @@ -107,6 +112,9 @@ domain_mmap_cache_destroy(struct domain_ #define map_domain_page(mfn) mfn_to_virt(mfn) #define __map_domain_page(pg) page_to_virt(pg) #define unmap_domain_page(va) ((void)(va)) +#define clear_domain_page(mfn) clear_page(mfn_to_virt(mfn)) +#define copy_domain_page(dmfn, smfn) copy_page(mfn_to_virt(dmfn), = \ + mfn_to_virt(smfn)) #define domain_page_map_to_mfn(va) virt_to_mfn((unsigned = long)(va)) =20 #define map_domain_page_global(mfn) mfn_to_virt(mfn) --=__PartB687151F.0__= Content-Type: text/plain; name="x86-map-domain-page.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="x86-map-domain-page.patch" x86: re-introduce map_domain_page() et al=0A=0AThis is being done mostly = in the form previously used on x86-32,=0Autilizing the second L3 page = table slot within the per-domain mapping=0Aarea for those mappings. It = remains to be determined whether that=0Aconcept is really suitable, or = whether instead re-implementing at least=0Athe non-global variant from = scratch would be better.=0A=0AAlso add the helpers {clear,copy}_domain_page= () as well as initial uses=0Aof them.=0A=0AOne question is whether, to = exercise the non-trivial code paths, we=0Ashouldn't make the trivial = shortcuts conditional upon NDEBUG being=0Adefined. See the debugging patch = at the end of the series.=0A=0ASigned-off-by: Jan Beulich =0A=0A--- a/xen/arch/x86/Makefile=0A+++ b/xen/arch/x86/Makefile=0A@@ = -19,6 +19,7 @@ obj-bin-y +=3D dmi_scan.init.o=0A obj-y +=3D domctl.o=0A = obj-y +=3D domain.o=0A obj-bin-y +=3D domain_build.init.o=0A+obj-y +=3D = domain_page.o=0A obj-y +=3D e820.o=0A obj-y +=3D extable.o=0A obj-y +=3D = flushtlb.o=0A--- a/xen/arch/x86/domain.c=0A+++ b/xen/arch/x86/domain.c=0A@@= -397,10 +397,14 @@ int vcpu_initialise(struct vcpu *v)=0A = return -ENOMEM;=0A clear_page(page_to_virt(pg));=0A = perdomain_pt_page(d, idx) =3D pg;=0A- d->arch.mm_perdomain_l2[l2_tab= le_offset(PERDOMAIN_VIRT_START)+idx]=0A+ d->arch.mm_perdomain_l2[0][= l2_table_offset(PERDOMAIN_VIRT_START)+idx]=0A =3D l2e_from_page= (pg, __PAGE_HYPERVISOR);=0A }=0A =0A+ rc =3D mapcache_vcpu_init(v);= =0A+ if ( rc )=0A+ return rc;=0A+=0A paging_vcpu_init(v);=0A = =0A v->arch.perdomain_ptes =3D perdomain_ptes(d, v);=0A@@ -526,8 = +530,8 @@ int arch_domain_create(struct domain *d,=0A pg =3D alloc_domh= eap_page(NULL, MEMF_node(domain_to_node(d)));=0A if ( pg =3D=3D NULL = )=0A goto fail;=0A- d->arch.mm_perdomain_l2 =3D page_to_virt(pg)= ;=0A- clear_page(d->arch.mm_perdomain_l2);=0A+ d->arch.mm_perdomain_l= 2[0] =3D page_to_virt(pg);=0A+ clear_page(d->arch.mm_perdomain_l2[0]);= =0A =0A pg =3D alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));= =0A if ( pg =3D=3D NULL )=0A@@ -535,8 +539,10 @@ int arch_domain_create= (struct domain *d,=0A d->arch.mm_perdomain_l3 =3D page_to_virt(pg);=0A = clear_page(d->arch.mm_perdomain_l3);=0A d->arch.mm_perdomain_l3[l3_= table_offset(PERDOMAIN_VIRT_START)] =3D=0A- l3e_from_page(virt_to_pa= ge(d->arch.mm_perdomain_l2),=0A- __PAGE_HYPERVIS= OR);=0A+ l3e_from_pfn(virt_to_mfn(d->arch.mm_perdomain_l2[0]),=0A+ = __PAGE_HYPERVISOR);=0A+=0A+ mapcache_domain_init(d);= =0A =0A HYPERVISOR_COMPAT_VIRT_START(d) =3D=0A is_hvm_domain(d)= ? ~0u : __HYPERVISOR_COMPAT_VIRT_START;=0A@@ -609,8 +615,9 @@ int = arch_domain_create(struct domain *d,=0A free_xenheap_page(d->shared_inf= o);=0A if ( paging_initialised )=0A paging_final_teardown(d);= =0A- if ( d->arch.mm_perdomain_l2 )=0A- free_domheap_page(virt_to= _page(d->arch.mm_perdomain_l2));=0A+ mapcache_domain_exit(d);=0A+ if = ( d->arch.mm_perdomain_l2[0] )=0A+ free_domheap_page(virt_to_page(d-= >arch.mm_perdomain_l2[0]));=0A if ( d->arch.mm_perdomain_l3 )=0A = free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3));=0A if ( = d->arch.mm_perdomain_pt_pages )=0A@@ -633,13 +640,15 @@ void arch_domain_de= stroy(struct domain *=0A =0A paging_final_teardown(d);=0A =0A+ = mapcache_domain_exit(d);=0A+=0A for ( i =3D 0; i < PDPT_L2_ENTRIES; = ++i )=0A {=0A if ( perdomain_pt_page(d, i) )=0A = free_domheap_page(perdomain_pt_page(d, i));=0A }=0A free_domheap_pa= ge(virt_to_page(d->arch.mm_perdomain_pt_pages));=0A- free_domheap_page(v= irt_to_page(d->arch.mm_perdomain_l2));=0A+ free_domheap_page(virt_to_pag= e(d->arch.mm_perdomain_l2[0]));=0A free_domheap_page(virt_to_page(d->ar= ch.mm_perdomain_l3));=0A =0A free_xenheap_page(d->shared_info);=0A--- = /dev/null=0A+++ b/xen/arch/x86/domain_page.c=0A@@ -0,0 +1,471 @@=0A+/******= ************************************************************************=0A= + * domain_page.h=0A+ *=0A+ * Allow temporary mapping of domain pages.=0A+ = *=0A+ * Copyright (c) 2003-2006, Keir Fraser =0A+ = */=0A+=0A+#include =0A+#include =0A+#include = =0A+#include =0A+#include =0A+#include= =0A+#include =0A+#include = =0A+=0A+static inline struct vcpu *mapcache_current_vcpu(void)=0A+{=0A+ = /* In the common case we use the mapcache of the running VCPU. */=0A+ = struct vcpu *v =3D current;=0A+=0A+ /*=0A+ * When current isn't = properly set up yet, this is equivalent to=0A+ * running in an idle = vCPU (callers must check for NULL).=0A+ */=0A+ if ( v =3D=3D = (struct vcpu *)0xfffff000 )=0A+ return NULL;=0A+=0A+ /*=0A+ = * If guest_table is NULL, and we are running a paravirtualised guest,=0A+ = * then it means we are running on the idle domain's page table and = must=0A+ * therefore use its mapcache.=0A+ */=0A+ if ( = unlikely(pagetable_is_null(v->arch.guest_table)) && !is_hvm_vcpu(v) )=0A+ = {=0A+ /* If we really are idling, perform lazy context switch = now. */=0A+ if ( (v =3D idle_vcpu[smp_processor_id()]) =3D=3D = current )=0A+ sync_local_execstate();=0A+ /* We must now = be running on the idle page table. */=0A+ ASSERT(read_cr3() =3D=3D = __pa(idle_pg_table));=0A+ }=0A+=0A+ return v;=0A+}=0A+=0A+#define = mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)=0A+#define MAPCACHE_L2_ENTRIE= S (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1)=0A+#define DCACHE_L1ENT(dc,= idx) \=0A+ ((dc)->l1tab[(idx) >> PAGETABLE_ORDER] \=0A+ = [(idx) & ((1 << PAGETABLE_ORDER) - 1)])=0A+=0A+void *map_domain_page(unsign= ed long mfn)=0A+{=0A+ unsigned long flags;=0A+ unsigned int idx, = i;=0A+ struct vcpu *v;=0A+ struct mapcache_domain *dcache;=0A+ = struct mapcache_vcpu *vcache;=0A+ struct vcpu_maphash_entry *hashent;=0A= +=0A+ if ( mfn <=3D PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )=0A+ = return mfn_to_virt(mfn);=0A+=0A+ v =3D mapcache_current_vcpu();=0A+ = if ( !v || is_hvm_vcpu(v) )=0A+ return mfn_to_virt(mfn);=0A+=0A+ = dcache =3D &v->domain->arch.pv_domain.mapcache;=0A+ vcache =3D = &v->arch.pv_vcpu.mapcache;=0A+ if ( !dcache->l1tab )=0A+ return = mfn_to_virt(mfn);=0A+=0A+ perfc_incr(map_domain_page_count);=0A+=0A+ = local_irq_save(flags);=0A+=0A+ hashent =3D &vcache->hash[MAPHASH_HASHFN(= mfn)];=0A+ if ( hashent->mfn =3D=3D mfn )=0A+ {=0A+ idx =3D = hashent->idx;=0A+ ASSERT(idx < dcache->entries);=0A+ = hashent->refcnt++;=0A+ ASSERT(hashent->refcnt);=0A+ = ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) =3D=3D mfn);=0A+ goto = out;=0A+ }=0A+=0A+ spin_lock(&dcache->lock);=0A+=0A+ /* Has some = other CPU caused a wrap? We must flush if so. */=0A+ if ( unlikely(dcach= e->epoch !=3D vcache->shadow_epoch) )=0A+ {=0A+ vcache->shadow_ep= och =3D dcache->epoch;=0A+ if ( NEED_FLUSH(this_cpu(tlbflush_time), = dcache->tlbflush_timestamp) )=0A+ {=0A+ perfc_incr(domain= _page_tlb_flush);=0A+ flush_tlb_local();=0A+ }=0A+ = }=0A+=0A+ idx =3D find_next_zero_bit(dcache->inuse, dcache->entries, = dcache->cursor);=0A+ if ( unlikely(idx >=3D dcache->entries) )=0A+ = {=0A+ unsigned long accum =3D 0;=0A+=0A+ /* /First/, clean = the garbage map and update the inuse list. */=0A+ for ( i =3D 0; i = < BITS_TO_LONGS(dcache->entries); i++ )=0A+ {=0A+ = dcache->inuse[i] &=3D ~xchg(&dcache->garbage[i], 0);=0A+ accum = |=3D ~dcache->inuse[i];=0A+ }=0A+=0A+ if ( accum )=0A+ = idx =3D find_first_zero_bit(dcache->inuse, dcache->entries);=0A+ = else=0A+ {=0A+ /* Replace a hash entry instead. */=0A+ = i =3D MAPHASH_HASHFN(mfn);=0A+ do {=0A+ = hashent =3D &vcache->hash[i];=0A+ if ( hashent->idx !=3D = MAPHASHENT_NOTINUSE && !hashent->refcnt )=0A+ {=0A+ = idx =3D hashent->idx;=0A+ ASSERT(l1e_get_pfn(= DCACHE_L1ENT(dcache, idx)) =3D=3D=0A+ hashent->mf= n);=0A+ l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty(= ));=0A+ hashent->idx =3D MAPHASHENT_NOTINUSE;=0A+ = hashent->mfn =3D ~0UL;=0A+ break;=0A+ = }=0A+ if ( ++i =3D=3D MAPHASH_ENTRIES )=0A+ = i =3D 0;=0A+ } while ( i !=3D MAPHASH_HASHFN(mfn) = );=0A+ }=0A+ BUG_ON(idx >=3D dcache->entries);=0A+=0A+ = /* /Second/, flush TLBs. */=0A+ perfc_incr(domain_page_tlb_flush);= =0A+ flush_tlb_local();=0A+ vcache->shadow_epoch =3D = ++dcache->epoch;=0A+ dcache->tlbflush_timestamp =3D tlbflush_current= _time();=0A+ }=0A+=0A+ set_bit(idx, dcache->inuse);=0A+ dcache->cu= rsor =3D idx + 1;=0A+=0A+ spin_unlock(&dcache->lock);=0A+=0A+ = l1e_write(&DCACHE_L1ENT(dcache, idx),=0A+ l1e_from_pfn(mfn, = __PAGE_HYPERVISOR));=0A+=0A+ out:=0A+ local_irq_restore(flags);=0A+ = return (void *)MAPCACHE_VIRT_START + pfn_to_paddr(idx);=0A+}=0A+=0A+void = unmap_domain_page(const void *ptr)=0A+{=0A+ unsigned int idx;=0A+ = struct vcpu *v;=0A+ struct mapcache_domain *dcache;=0A+ unsigned = long va =3D (unsigned long)ptr, mfn, flags;=0A+ struct vcpu_maphash_entr= y *hashent;=0A+=0A+ if ( va >=3D DIRECTMAP_VIRT_START )=0A+ = return;=0A+=0A+ ASSERT(va >=3D MAPCACHE_VIRT_START && va < MAPCACHE_VIRT= _END);=0A+=0A+ v =3D mapcache_current_vcpu();=0A+ ASSERT(v && = !is_hvm_vcpu(v));=0A+=0A+ dcache =3D &v->domain->arch.pv_domain.mapcache= ;=0A+ ASSERT(dcache->l1tab);=0A+=0A+ idx =3D PFN_DOWN(va - MAPCACHE_V= IRT_START);=0A+ mfn =3D l1e_get_pfn(DCACHE_L1ENT(dcache, idx));=0A+ = hashent =3D &v->arch.pv_vcpu.mapcache.hash[MAPHASH_HASHFN(mfn)];=0A+=0A+ = local_irq_save(flags);=0A+=0A+ if ( hashent->idx =3D=3D idx )=0A+ = {=0A+ ASSERT(hashent->mfn =3D=3D mfn);=0A+ ASSERT(hashent->re= fcnt);=0A+ hashent->refcnt--;=0A+ }=0A+ else if ( !hashent->re= fcnt )=0A+ {=0A+ if ( hashent->idx !=3D MAPHASHENT_NOTINUSE = )=0A+ {=0A+ /* /First/, zap the PTE. */=0A+ = ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, hashent->idx)) =3D=3D=0A+ = hashent->mfn);=0A+ l1e_write(&DCACHE_L1ENT(dcache, = hashent->idx), l1e_empty());=0A+ /* /Second/, mark as garbage. = */=0A+ set_bit(hashent->idx, dcache->garbage);=0A+ = }=0A+=0A+ /* Add newly-freed mapping to the maphash. */=0A+ = hashent->mfn =3D mfn;=0A+ hashent->idx =3D idx;=0A+ }=0A+ = else=0A+ {=0A+ /* /First/, zap the PTE. */=0A+ l1e_write(&= DCACHE_L1ENT(dcache, idx), l1e_empty());=0A+ /* /Second/, mark as = garbage. */=0A+ set_bit(idx, dcache->garbage);=0A+ }=0A+=0A+ = local_irq_restore(flags);=0A+}=0A+=0A+void clear_domain_page(unsigned long = mfn)=0A+{=0A+ void *ptr =3D map_domain_page(mfn);=0A+=0A+ clear_page(= ptr);=0A+ unmap_domain_page(ptr);=0A+}=0A+=0A+void copy_domain_page(unsi= gned long dmfn, unsigned long smfn)=0A+{=0A+ const void *src =3D = map_domain_page(smfn);=0A+ void *dst =3D map_domain_page(dmfn);=0A+=0A+ = copy_page(dst, src);=0A+ unmap_domain_page(dst);=0A+ unmap_domain_= page(src);=0A+}=0A+=0A+int mapcache_domain_init(struct domain *d)=0A+{=0A+ = struct mapcache_domain *dcache =3D &d->arch.pv_domain.mapcache;=0A+ = unsigned int i, bitmap_pages, memf =3D MEMF_node(domain_to_node(d));=0A+ = unsigned long *end;=0A+=0A+ if ( is_hvm_domain(d) || is_idle_domain(d) = )=0A+ return 0;=0A+=0A+ if ( !mem_hotplug && max_page <=3D = PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )=0A+ return 0;=0A+=0A+ = dcache->l1tab =3D xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + = 1);=0A+ d->arch.mm_perdomain_l2[MAPCACHE_SLOT] =3D alloc_xenheap_pages(0= , memf);=0A+ if ( !dcache->l1tab || !d->arch.mm_perdomain_l2[MAPCACHE_SL= OT] )=0A+ return -ENOMEM;=0A+=0A+ clear_page(d->arch.mm_perdomain= _l2[MAPCACHE_SLOT]);=0A+ d->arch.mm_perdomain_l3[l3_table_offset(MAPCACH= E_VIRT_START)] =3D=0A+ l3e_from_paddr(__pa(d->arch.mm_perdomain_l2[M= APCACHE_SLOT]),=0A+ __PAGE_HYPERVISOR);=0A+=0A+ = BUILD_BUG_ON(MAPCACHE_VIRT_END + 3 +=0A+ 2 * PFN_UP(BITS_TO= _LONGS(MAPCACHE_ENTRIES) * sizeof(long)) >=0A+ MAPCACHE_VIR= T_START + (PERDOMAIN_SLOT_MBYTES << 20));=0A+ bitmap_pages =3D = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));=0A+ dcache->inus= e =3D (void *)MAPCACHE_VIRT_END + PAGE_SIZE;=0A+ dcache->garbage =3D = dcache->inuse +=0A+ (bitmap_pages + 1) * PAGE_SIZE / = sizeof(long);=0A+ end =3D dcache->garbage + bitmap_pages * PAGE_SIZE / = sizeof(long);=0A+=0A+ for ( i =3D l2_table_offset((unsigned long)dcache-= >inuse);=0A+ i <=3D l2_table_offset((unsigned long)(end - 1)); = ++i )=0A+ {=0A+ ASSERT(i <=3D MAPCACHE_L2_ENTRIES);=0A+ = dcache->l1tab[i] =3D alloc_xenheap_pages(0, memf);=0A+ if ( = !dcache->l1tab[i] )=0A+ return -ENOMEM;=0A+ clear_page(dc= ache->l1tab[i]);=0A+ d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] = =3D=0A+ l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR= );=0A+ }=0A+=0A+ spin_lock_init(&dcache->lock);=0A+=0A+ return = 0;=0A+}=0A+=0A+void mapcache_domain_exit(struct domain *d)=0A+{=0A+ = struct mapcache_domain *dcache =3D &d->arch.pv_domain.mapcache;=0A+=0A+ = if ( is_hvm_domain(d) )=0A+ return;=0A+=0A+ if ( dcache->l1tab = )=0A+ {=0A+ unsigned long i;=0A+=0A+ for ( i =3D = (unsigned long)dcache->inuse; ; i +=3D PAGE_SIZE )=0A+ {=0A+ = l1_pgentry_t *pl1e;=0A+=0A+ if ( l2_table_offset(i) > = MAPCACHE_L2_ENTRIES ||=0A+ !dcache->l1tab[l2_table_offset(i= )] )=0A+ break;=0A+=0A+ pl1e =3D &dcache->l1tab[l= 2_table_offset(i)][l1_table_offset(i)];=0A+ if ( l1e_get_flags(*= pl1e) )=0A+ free_domheap_page(l1e_get_page(*pl1e));=0A+ = }=0A+=0A+ for ( i =3D 0; i < MAPCACHE_L2_ENTRIES + 1; ++i )=0A+ = free_xenheap_page(dcache->l1tab[i]);=0A+=0A+ xfree(dcache-= >l1tab);=0A+ }=0A+ free_xenheap_page(d->arch.mm_perdomain_l2[MAPCACHE= _SLOT]);=0A+}=0A+=0A+int mapcache_vcpu_init(struct vcpu *v)=0A+{=0A+ = struct domain *d =3D v->domain;=0A+ struct mapcache_domain *dcache =3D = &d->arch.pv_domain.mapcache;=0A+ unsigned long i;=0A+ unsigned int = memf =3D MEMF_node(vcpu_to_node(v));=0A+=0A+ if ( is_hvm_vcpu(v) || = !dcache->l1tab )=0A+ return 0;=0A+=0A+ while ( dcache->entries < = d->max_vcpus * MAPCACHE_VCPU_ENTRIES )=0A+ {=0A+ unsigned int = ents =3D dcache->entries + MAPCACHE_VCPU_ENTRIES;=0A+ l1_pgentry_t = *pl1e;=0A+=0A+ /* Populate page tables. */=0A+ if ( = !dcache->l1tab[i =3D mapcache_l2_entry(ents - 1)] )=0A+ {=0A+ = dcache->l1tab[i] =3D alloc_xenheap_pages(0, memf);=0A+ if = ( !dcache->l1tab[i] )=0A+ return -ENOMEM;=0A+ = clear_page(dcache->l1tab[i]);=0A+ d->arch.mm_perdomain_l2[MAPCAC= HE_SLOT][i] =3D=0A+ l2e_from_paddr(__pa(dcache->l1tab[i]), = __PAGE_HYPERVISOR);=0A+ }=0A+=0A+ /* Populate bit maps. = */=0A+ i =3D (unsigned long)(dcache->inuse + BITS_TO_LONGS(ents));= =0A+ pl1e =3D &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)]= ;=0A+ if ( !l1e_get_flags(*pl1e) )=0A+ {=0A+ = struct page_info *pg =3D alloc_domheap_page(NULL, memf);=0A+=0A+ = if ( !pg )=0A+ return -ENOMEM;=0A+ clear_domain_= page(page_to_mfn(pg));=0A+ *pl1e =3D l1e_from_page(pg, = __PAGE_HYPERVISOR);=0A+=0A+ i =3D (unsigned long)(dcache->garbag= e + BITS_TO_LONGS(ents));=0A+ pl1e =3D &dcache->l1tab[l2_table_o= ffset(i)][l1_table_offset(i)];=0A+ ASSERT(!l1e_get_flags(*pl1e))= ;=0A+=0A+ pg =3D alloc_domheap_page(NULL, memf);=0A+ = if ( !pg )=0A+ return -ENOMEM;=0A+ clear_domain_p= age(page_to_mfn(pg));=0A+ *pl1e =3D l1e_from_page(pg, __PAGE_HYP= ERVISOR);=0A+ }=0A+=0A+ dcache->entries =3D ents;=0A+ = }=0A+=0A+ /* Mark all maphash entries as not in use. */=0A+ = BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES);=0A+ for ( i =3D = 0; i < MAPHASH_ENTRIES; i++ )=0A+ {=0A+ struct vcpu_maphash_entry= *hashent =3D &v->arch.pv_vcpu.mapcache.hash[i];=0A+=0A+ hashent->mf= n =3D ~0UL; /* never valid to map */=0A+ hashent->idx =3D MAPHASHENT= _NOTINUSE;=0A+ }=0A+=0A+ return 0;=0A+}=0A+=0A+#define GLOBALMAP_BITS= (GLOBALMAP_GBYTES << (30 - PAGE_SHIFT))=0A+static unsigned long inuse[BITS= _TO_LONGS(GLOBALMAP_BITS)];=0A+static unsigned long garbage[BITS_TO_LONGS(G= LOBALMAP_BITS)];=0A+static unsigned int inuse_cursor;=0A+static DEFINE_SPIN= LOCK(globalmap_lock);=0A+=0A+void *map_domain_page_global(unsigned long = mfn)=0A+{=0A+ l1_pgentry_t *pl1e;=0A+ unsigned int idx, i;=0A+ = unsigned long va;=0A+=0A+ ASSERT(!in_irq() && local_irq_is_enabled());= =0A+=0A+ if ( mfn <=3D PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )=0A+ = return mfn_to_virt(mfn);=0A+=0A+ spin_lock(&globalmap_lock);=0A+=0A+= idx =3D find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);=0A+ = va =3D GLOBALMAP_VIRT_START + pfn_to_paddr(idx);=0A+ if ( unlikely(va = >=3D GLOBALMAP_VIRT_END) )=0A+ {=0A+ /* /First/, clean the = garbage map and update the inuse list. */=0A+ for ( i =3D 0; i < = ARRAY_SIZE(garbage); i++ )=0A+ inuse[i] &=3D ~xchg(&garbage[i], = 0);=0A+=0A+ /* /Second/, flush all TLBs to get rid of stale garbage = mappings. */=0A+ flush_tlb_all();=0A+=0A+ idx =3D find_first_= zero_bit(inuse, GLOBALMAP_BITS);=0A+ va =3D GLOBALMAP_VIRT_START + = pfn_to_paddr(idx);=0A+ if ( unlikely(va >=3D GLOBALMAP_VIRT_END) = )=0A+ {=0A+ spin_unlock(&globalmap_lock);=0A+ = return NULL;=0A+ }=0A+ }=0A+=0A+ set_bit(idx, inuse);=0A+ = inuse_cursor =3D idx + 1;=0A+=0A+ spin_unlock(&globalmap_lock);=0A+=0A+ = pl1e =3D virt_to_xen_l1e(va);=0A+ if ( !pl1e )=0A+ return = NULL;=0A+ l1e_write(pl1e, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));=0A+=0A+= return (void *)va;=0A+}=0A+=0A+void unmap_domain_page_global(const = void *ptr)=0A+{=0A+ unsigned long va =3D (unsigned long)ptr;=0A+ = l1_pgentry_t *pl1e;=0A+=0A+ if ( va >=3D DIRECTMAP_VIRT_START )=0A+ = return;=0A+=0A+ ASSERT(va >=3D GLOBALMAP_VIRT_START && va < = GLOBALMAP_VIRT_END);=0A+=0A+ /* /First/, we zap the PTE. */=0A+ pl1e = =3D virt_to_xen_l1e(va);=0A+ BUG_ON(!pl1e);=0A+ l1e_write(pl1e, = l1e_empty());=0A+=0A+ /* /Second/, we add to the garbage map. */=0A+ = set_bit(PFN_DOWN(va - GLOBALMAP_VIRT_START), garbage);=0A+}=0A+=0A+/* = Translate a map-domain-page'd address to the underlying MFN */=0A+unsigned = long domain_page_map_to_mfn(const void *ptr)=0A+{=0A+ unsigned long va = =3D (unsigned long)ptr;=0A+ const l1_pgentry_t *pl1e;=0A+=0A+ if ( = va >=3D DIRECTMAP_VIRT_START )=0A+ return virt_to_mfn(ptr);=0A+=0A+ = if ( va >=3D GLOBALMAP_VIRT_START && va < GLOBALMAP_VIRT_END )=0A+ = {=0A+ pl1e =3D virt_to_xen_l1e(va);=0A+ BUG_ON(!pl1e);=0A+ = }=0A+ else=0A+ {=0A+ ASSERT(va >=3D MAPCACHE_VIRT_START && = va < MAPCACHE_VIRT_END);=0A+ pl1e =3D &__linear_l1_table[l1_linear_o= ffset(va)];=0A+ }=0A+=0A+ return l1e_get_pfn(*pl1e);=0A+}=0A--- = a/xen/arch/x86/mm.c=0A+++ b/xen/arch/x86/mm.c=0A@@ -2661,9 +2661,6 @@ = static inline int vcpumask_to_pcpumask(=0A }=0A }=0A =0A-#define = fixmap_domain_page(mfn) mfn_to_virt(mfn)=0A-#define fixunmap_domain_page(pt= r) ((void)(ptr))=0A-=0A long do_mmuext_op(=0A XEN_GUEST_HANDLE_PARAM(mm= uext_op_t) uops,=0A unsigned int count,=0A@@ -2983,7 +2980,6 @@ long = do_mmuext_op(=0A =0A case MMUEXT_CLEAR_PAGE: {=0A = struct page_info *page;=0A- unsigned char *ptr;=0A =0A = page =3D get_page_from_gfn(d, op.arg1.mfn, NULL, P2M_ALLOC);=0A = if ( !page || !get_page_type(page, PGT_writable_page) )=0A@@ -2998,9 = +2994,7 @@ long do_mmuext_op(=0A /* A page is dirtied when = it's being cleared. */=0A paging_mark_dirty(d, page_to_mfn(page= ));=0A =0A- ptr =3D fixmap_domain_page(page_to_mfn(page));=0A- = clear_page(ptr);=0A- fixunmap_domain_page(ptr);=0A+ = clear_domain_page(page_to_mfn(page));=0A =0A = put_page_and_type(page);=0A break;=0A@@ -3008,8 +3002,6 @@ = long do_mmuext_op(=0A =0A case MMUEXT_COPY_PAGE:=0A {=0A- = const unsigned char *src;=0A- unsigned char *dst;=0A = struct page_info *src_page, *dst_page;=0A =0A = src_page =3D get_page_from_gfn(d, op.arg2.src_mfn, NULL, P2M_ALLOC);=0A@@ = -3034,11 +3026,7 @@ long do_mmuext_op(=0A /* A page is dirtied = when it's being copied to. */=0A paging_mark_dirty(d, = page_to_mfn(dst_page));=0A =0A- src =3D __map_domain_page(src_pa= ge);=0A- dst =3D fixmap_domain_page(page_to_mfn(dst_page));=0A- = copy_page(dst, src);=0A- fixunmap_domain_page(dst);= =0A- unmap_domain_page(src);=0A+ copy_domain_page(pag= e_to_mfn(dst_page), page_to_mfn(src_page));=0A =0A put_page_and= _type(dst_page);=0A put_page(src_page);=0A--- a/xen/include/asm= -x86/config.h=0A+++ b/xen/include/asm-x86/config.h=0A@@ -27,6 +27,7 @@=0A = #define CONFIG_DISCONTIGMEM 1=0A #define CONFIG_NUMA_EMU 1=0A #define = CONFIG_PAGEALLOC_MAX_ORDER (2 * PAGETABLE_ORDER)=0A+#define CONFIG_DOMAIN_P= AGE 1=0A =0A /* Intel P4 currently has largest cache line (L2 line size is = 128 bytes). */=0A #define CONFIG_X86_L1_CACHE_SHIFT 7=0A@@ -147,12 +148,14 = @@ extern unsigned char boot_edid_info[128]=0A * 0xffff82c000000000 - = 0xffff82c3ffffffff [16GB, 2^34 bytes, PML4:261]=0A * vmap()/ioremap()/= fixmap area.=0A * 0xffff82c400000000 - 0xffff82c43fffffff [1GB, 2^30 = bytes, PML4:261]=0A- * Compatibility machine-to-phys translation = table.=0A+ * Global domain page map area.=0A * 0xffff82c440000000 - = 0xffff82c47fffffff [1GB, 2^30 bytes, PML4:261]=0A- * High read-only = compatibility machine-to-phys translation table.=0A+ * Compatibility = machine-to-phys translation table.=0A * 0xffff82c480000000 - 0xffff82c4bf= ffffff [1GB, 2^30 bytes, PML4:261]=0A+ * High read-only compatibility = machine-to-phys translation table.=0A+ * 0xffff82c4c0000000 - 0xffff82c4ff= ffffff [1GB, 2^30 bytes, PML4:261]=0A * Xen text, static data, = bss.=0A- * 0xffff82c4c0000000 - 0xffff82dffbffffff [109GB - 64MB, = PML4:261]=0A+ * 0xffff82c500000000 - 0xffff82dffbffffff [108GB - 64MB, = PML4:261]=0A * Reserved for future use.=0A * 0xffff82dffc000000 - = 0xffff82dfffffffff [64MB, 2^26 bytes, PML4:261]=0A * Super-page = information array.=0A@@ -201,18 +204,24 @@ extern unsigned char boot_edid_i= nfo[128]=0A /* Slot 259: linear page table (shadow table). */=0A #define = SH_LINEAR_PT_VIRT_START (PML4_ADDR(259))=0A #define SH_LINEAR_PT_VIRT_END = (SH_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)=0A-/* Slot 260: per-domain = mappings. */=0A+/* Slot 260: per-domain mappings (including map cache). = */=0A #define PERDOMAIN_VIRT_START (PML4_ADDR(260))=0A-#define = PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (PERDOMAIN_MBYTES<<20))=0A-= #define PERDOMAIN_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER)= )=0A+#define PERDOMAIN_SLOT_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_O= RDER))=0A+#define PERDOMAIN_SLOTS 2=0A+#define PERDOMAIN_VIRT_SLOT(= s) (PERDOMAIN_VIRT_START + (s) * \=0A+ = (PERDOMAIN_SLOT_MBYTES << 20))=0A /* Slot 261: machine-to-phys conversion = table (256GB). */=0A #define RDWR_MPT_VIRT_START (PML4_ADDR(261))=0A = #define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + MPT_VIRT_SIZE)=0A = /* Slot 261: vmap()/ioremap()/fixmap area (16GB). */=0A #define VMAP_VIRT_S= TART RDWR_MPT_VIRT_END=0A #define VMAP_VIRT_END = (VMAP_VIRT_START + GB(16))=0A+/* Slot 261: global domain page map area = (1GB). */=0A+#define GLOBALMAP_GBYTES 1=0A+#define GLOBALMAP_VIRT_ST= ART VMAP_VIRT_END=0A+#define GLOBALMAP_VIRT_END (GLOBALMAP_VIRT_STA= RT + (GLOBALMAP_GBYTES<<30))=0A /* Slot 261: compatibility machine-to-phys = conversion table (1GB). */=0A-#define RDWR_COMPAT_MPT_VIRT_START VMAP_VIRT_= END=0A+#define RDWR_COMPAT_MPT_VIRT_START GLOBALMAP_VIRT_END=0A #define = RDWR_COMPAT_MPT_VIRT_END (RDWR_COMPAT_MPT_VIRT_START + GB(1))=0A /* Slot = 261: high read-only compat machine-to-phys conversion table (1GB). */=0A = #define HIRO_COMPAT_MPT_VIRT_START RDWR_COMPAT_MPT_VIRT_END=0A@@ -279,9 = +288,9 @@ extern unsigned long xen_phys_start;=0A /* GDT/LDT shadow = mapping area. The first per-domain-mapping sub-area. */=0A #define = GDT_LDT_VCPU_SHIFT 5=0A #define GDT_LDT_VCPU_VA_SHIFT (GDT_LDT_VCP= U_SHIFT + PAGE_SHIFT)=0A-#define GDT_LDT_MBYTES PERDOMAIN_MBYTES= =0A+#define GDT_LDT_MBYTES PERDOMAIN_SLOT_MBYTES=0A #define = MAX_VIRT_CPUS (GDT_LDT_MBYTES << (20-GDT_LDT_VCPU_VA_SHIFT))=0A-= #define GDT_LDT_VIRT_START PERDOMAIN_VIRT_START=0A+#define GDT_LDT_VI= RT_START PERDOMAIN_VIRT_SLOT(0)=0A #define GDT_LDT_VIRT_END = (GDT_LDT_VIRT_START + (GDT_LDT_MBYTES << 20))=0A =0A /* The address of a = particular VCPU's GDT or LDT. */=0A@@ -290,8 +299,16 @@ extern unsigned = long xen_phys_start;=0A #define LDT_VIRT_START(v) \=0A (GDT_VIRT_STA= RT(v) + (64*1024))=0A =0A+/* map_domain_page() map cache. The last = per-domain-mapping sub-area. */=0A+#define MAPCACHE_VCPU_ENTRIES = (CONFIG_PAGING_LEVELS * CONFIG_PAGING_LEVELS)=0A+#define MAPCACHE_ENTRIES = (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES)=0A+#define MAPCACHE_SLOT = (PERDOMAIN_SLOTS - 1)=0A+#define MAPCACHE_VIRT_START = PERDOMAIN_VIRT_SLOT(MAPCACHE_SLOT)=0A+#define MAPCACHE_VIRT_END = (MAPCACHE_VIRT_START + \=0A+ MAPCACHE_ENTR= IES * PAGE_SIZE)=0A+=0A #define PDPT_L1_ENTRIES \=0A- ((PERDOMAIN_= VIRT_END - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)=0A+ ((PERDOMAIN_VIRT_SLO= T(PERDOMAIN_SLOTS - 1) - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)=0A #define = PDPT_L2_ENTRIES \=0A ((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) = - 1) >> PAGETABLE_ORDER)=0A =0A--- a/xen/include/asm-x86/domain.h=0A+++ = b/xen/include/asm-x86/domain.h=0A@@ -39,7 +39,7 @@ struct trap_bounce {=0A = =0A #define MAPHASH_ENTRIES 8=0A #define MAPHASH_HASHFN(pfn) ((pfn) & = (MAPHASH_ENTRIES-1))=0A-#define MAPHASHENT_NOTINUSE ((u16)~0U)=0A+#define = MAPHASHENT_NOTINUSE ((u32)~0U)=0A struct mapcache_vcpu {=0A /* Shadow = of mapcache_domain.epoch. */=0A unsigned int shadow_epoch;=0A@@ -47,16 = +47,15 @@ struct mapcache_vcpu {=0A /* Lock-free per-VCPU hash of = recently-used mappings. */=0A struct vcpu_maphash_entry {=0A = unsigned long mfn;=0A- uint16_t idx;=0A- uint16_t = refcnt;=0A+ uint32_t idx;=0A+ uint32_t refcnt;=0A = } hash[MAPHASH_ENTRIES];=0A };=0A =0A-#define MAPCACHE_ORDER = 10=0A-#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)=0A struct mapcache_dom= ain {=0A /* The PTEs that provide the mappings, and a cursor into the = array. */=0A- l1_pgentry_t *l1tab;=0A+ l1_pgentry_t **l1tab;=0A+ = unsigned int entries;=0A unsigned int cursor;=0A =0A /* Protects = map_domain_page(). */=0A@@ -67,12 +66,13 @@ struct mapcache_domain {=0A = u32 tlbflush_timestamp;=0A =0A /* Which mappings are in use, and = which are garbage to reap next epoch? */=0A- unsigned long inuse[BITS_TO= _LONGS(MAPCACHE_ENTRIES)];=0A- unsigned long garbage[BITS_TO_LONGS(MAPCA= CHE_ENTRIES)];=0A+ unsigned long *inuse;=0A+ unsigned long = *garbage;=0A };=0A =0A-void mapcache_domain_init(struct domain *);=0A-void = mapcache_vcpu_init(struct vcpu *);=0A+int mapcache_domain_init(struct = domain *);=0A+void mapcache_domain_exit(struct domain *);=0A+int mapcache_v= cpu_init(struct vcpu *);=0A =0A /* x86/64: toggle guest between kernel and = user modes. */=0A void toggle_guest_mode(struct vcpu *);=0A@@ -229,6 = +229,9 @@ struct pv_domain=0A * unmask the event channel */=0A = bool_t auto_unmask;=0A =0A+ /* map_domain_page() mapping cache. */=0A+ = struct mapcache_domain mapcache;=0A+=0A /* Pseudophysical e820 map = (XENMEM_memory_map). */=0A spinlock_t e820_lock;=0A struct = e820entry *e820;=0A@@ -238,7 +241,7 @@ struct pv_domain=0A struct = arch_domain=0A {=0A struct page_info **mm_perdomain_pt_pages;=0A- = l2_pgentry_t *mm_perdomain_l2;=0A+ l2_pgentry_t *mm_perdomain_l2[PERDOMA= IN_SLOTS];=0A l3_pgentry_t *mm_perdomain_l3;=0A =0A unsigned int = hv_compat_vstart;=0A@@ -324,6 +327,9 @@ struct arch_domain=0A =0A struct = pv_vcpu=0A {=0A+ /* map_domain_page() mapping cache. */=0A+ struct = mapcache_vcpu mapcache;=0A+=0A struct trap_info *trap_ctxt;=0A =0A = unsigned long gdt_frames[FIRST_RESERVED_GDT_PAGE];=0A--- a/xen/include/xen/= domain_page.h=0A+++ b/xen/include/xen/domain_page.h=0A@@ -25,11 +25,16 @@ = void *map_domain_page(unsigned long mfn)=0A */=0A void unmap_domain_page(c= onst void *va);=0A =0A+/*=0A+ * Clear a given page frame, or copy between = two of them.=0A+ */=0A+void clear_domain_page(unsigned long mfn);=0A+void = copy_domain_page(unsigned long dmfn, unsigned long smfn);=0A =0A /* =0A * = Given a VA from map_domain_page(), return its underlying MFN.=0A = */=0A-unsigned long domain_page_map_to_mfn(void *va);=0A+unsigned long = domain_page_map_to_mfn(const void *va);=0A =0A /*=0A * Similar to the = above calls, except the mapping is accessible in all=0A@@ -107,6 +112,9 @@ = domain_mmap_cache_destroy(struct domain_=0A #define map_domain_page(mfn) = mfn_to_virt(mfn)=0A #define __map_domain_page(pg) = page_to_virt(pg)=0A #define unmap_domain_page(va) = ((void)(va))=0A+#define clear_domain_page(mfn) clear_page(mfn_= to_virt(mfn))=0A+#define copy_domain_page(dmfn, smfn) copy_page(mfn_= to_virt(dmfn), \=0A+ = mfn_to_virt(smfn))=0A #define domain_page_map_to_mfn(va) = virt_to_mfn((unsigned long)(va))=0A =0A #define map_domain_page_global(mfn)= mfn_to_virt(mfn)=0A --=__PartB687151F.0__= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel --=__PartB687151F.0__=--