From mboxrd@z Thu Jan 1 00:00:00 1970 From: George Dunlap Subject: Re: [PATCHv4 2/2] x86/ept: defer the invalidation until the p2m lock is released Date: Tue, 15 Dec 2015 16:00:01 +0000 Message-ID: <56703901.9090709@citrix.com> References: <1450103946-14232-1-git-send-email-david.vrabel@citrix.com> <1450103946-14232-3-git-send-email-david.vrabel@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mail6.bemta5.messagelabs.com ([195.245.231.135]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1a8s1W-000782-9X for xen-devel@lists.xenproject.org; Tue, 15 Dec 2015 16:00:22 +0000 In-Reply-To: <1450103946-14232-3-git-send-email-david.vrabel@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: David Vrabel , xen-devel@lists.xenproject.org Cc: Kevin Tian , Jun Nakajima , George Dunlap , Andrew Cooper , Tim Deegan , Jan Beulich List-Id: xen-devel@lists.xenproject.org On 14/12/15 14:39, David Vrabel wrote: > Holding the p2m lock while calling ept_sync_domain() is very expensive > since it does a on_selected_cpus() call. IPIs on many socket machines > can be very slows and on_selected_cpus() is serialized. > > Defer the invalidate until the p2m lock is released. Since the processor > may cache partial translations, we also need to make sure any page table > pages to be freed are not freed until the invalidate is complete. Such > pages are temporarily stored in a list. > > Signed-off-by: David Vrabel Reviewed-by: George Dunlap > --- > v2: > - use per-p2m list for deferred pages. > - update synced_mask while holding write lock. > --- > xen/arch/x86/mm/mm-locks.h | 23 ++++++++++++++-------- > xen/arch/x86/mm/p2m-ept.c | 48 +++++++++++++++++++++++++++++++++++++--------- > xen/arch/x86/mm/p2m.c | 18 +++++++++++++++++ > xen/include/asm-x86/p2m.h | 7 +++++++ > 4 files changed, 79 insertions(+), 17 deletions(-) > > diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h > index 76c7217..b5eb560 100644 > --- a/xen/arch/x86/mm/mm-locks.h > +++ b/xen/arch/x86/mm/mm-locks.h > @@ -263,14 +263,21 @@ declare_mm_lock(altp2mlist) > */ > > declare_mm_rwlock(altp2m); > -#define p2m_lock(p) \ > -{ \ > - if ( p2m_is_altp2m(p) ) \ > - mm_write_lock(altp2m, &(p)->lock); \ > - else \ > - mm_write_lock(p2m, &(p)->lock); \ > -} > -#define p2m_unlock(p) mm_write_unlock(&(p)->lock); > +#define p2m_lock(p) \ > + do { \ > + if ( p2m_is_altp2m(p) ) \ > + mm_write_lock(altp2m, &(p)->lock); \ > + else \ > + mm_write_lock(p2m, &(p)->lock); \ > + (p)->defer_flush++; \ > + } while (0) > +#define p2m_unlock(p) \ > + do { \ > + if ( --(p)->defer_flush == 0 && (p)->need_flush ) \ > + (p)->flush_and_unlock(p); \ > + else \ > + mm_write_unlock(&(p)->lock); \ > + } while (0) > #define gfn_lock(p,g,o) p2m_lock(p) > #define gfn_unlock(p,g,o) p2m_unlock(p) > #define p2m_read_lock(p) mm_read_lock(p2m, &(p)->lock) > diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c > index 6e0cf89..25575a5 100644 > --- a/xen/arch/x86/mm/p2m-ept.c > +++ b/xen/arch/x86/mm/p2m-ept.c > @@ -263,7 +263,7 @@ static void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int l > unmap_domain_page(epte); > } > > - p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn)); > + p2m_free_ptp_defer(p2m, mfn_to_page(ept_entry->mfn)); > } > > static bool_t ept_split_super_page(struct p2m_domain *p2m, > @@ -1095,24 +1095,53 @@ static void __ept_sync_domain(void *info) > */ > } > > -void ept_sync_domain(struct p2m_domain *p2m) > +static void ept_sync_domain_prepare(struct p2m_domain *p2m) > { > struct domain *d = p2m->domain; > struct ept_data *ept = &p2m->ept; > - /* Only if using EPT and this domain has some VCPUs to dirty. */ > - if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] ) > - return; > - > - ASSERT(local_irq_is_enabled()); > > if ( nestedhvm_enabled(d) && !p2m_is_nestedp2m(p2m) ) > p2m_flush_nestedp2m(d); > > /* May need to invalidate on all PCPUs. */ > cpumask_setall(ept->invalidate); > +} > + > +static void ept_sync_domain_mask(struct p2m_domain *p2m, const cpumask_t *mask) > +{ > + on_selected_cpus(mask, __ept_sync_domain, p2m, 1); > +} > + > +void ept_sync_domain(struct p2m_domain *p2m) > +{ > + struct domain *d = p2m->domain; > + > + /* Only if using EPT and this domain has some VCPUs to dirty. */ > + if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] ) > + return; > + > + ept_sync_domain_prepare(p2m); > + > + if ( p2m->defer_flush ) > + { > + p2m->need_flush = 1; > + return; > + } > + p2m->need_flush = 0; > + > + ept_sync_domain_mask(p2m, d->domain_dirty_cpumask); > +} > + > +static void ept_flush_and_unlock(struct p2m_domain *p2m) > +{ > + PAGE_LIST_HEAD(deferred_pages); > + > + page_list_move(&deferred_pages, &p2m->deferred_pages); > + > + mm_write_unlock(&p2m->lock); > > - on_selected_cpus(d->domain_dirty_cpumask, > - __ept_sync_domain, p2m, 1); > + ept_sync_domain_mask(p2m, p2m->domain->domain_dirty_cpumask); > + p2m_free_ptp_list(p2m, &deferred_pages); > } > > static void ept_enable_pml(struct p2m_domain *p2m) > @@ -1163,6 +1192,7 @@ int ept_p2m_init(struct p2m_domain *p2m) > p2m->change_entry_type_range = ept_change_entry_type_range; > p2m->memory_type_changed = ept_memory_type_changed; > p2m->audit_p2m = NULL; > + p2m->flush_and_unlock = ept_flush_and_unlock; > > /* Set the memory type used when accessing EPT paging structures. */ > ept->ept_mt = EPT_DEFAULT_MT; > diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c > index ed0bbd7..502bdad 100644 > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -65,6 +65,7 @@ static int p2m_initialise(struct domain *d, struct p2m_domain *p2m) > mm_lock_init(&p2m->pod.lock); > INIT_LIST_HEAD(&p2m->np2m_list); > INIT_PAGE_LIST_HEAD(&p2m->pages); > + INIT_PAGE_LIST_HEAD(&p2m->deferred_pages); > INIT_PAGE_LIST_HEAD(&p2m->pod.super); > INIT_PAGE_LIST_HEAD(&p2m->pod.single); > > @@ -504,6 +505,23 @@ void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg) > return; > } > > +void p2m_free_ptp_defer(struct p2m_domain *p2m, struct page_info *pg) > +{ > + page_list_del(pg, &p2m->pages); > + page_list_add(pg, &p2m->deferred_pages); > +} > + > +void p2m_free_ptp_list(struct p2m_domain *p2m, struct page_list_head *list) > +{ > + struct page_info *pg, *tmp; > + > + page_list_for_each_safe(pg, tmp, list) > + { > + page_list_del(pg, list); > + p2m->domain->arch.paging.free_page(p2m->domain, pg); > + } > +} > + > /* > * Allocate a new p2m table for a domain. > * > diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h > index fa46dd9..ee03997 100644 > --- a/xen/include/asm-x86/p2m.h > +++ b/xen/include/asm-x86/p2m.h > @@ -261,6 +261,11 @@ struct p2m_domain { > unsigned long gfn, l1_pgentry_t *p, > l1_pgentry_t new, unsigned int level); > long (*audit_p2m)(struct p2m_domain *p2m); > + void (*flush_and_unlock)(struct p2m_domain *p2m); > + > + struct page_list_head deferred_pages; > + unsigned int defer_flush; > + bool_t need_flush; > > /* Default P2M access type for each page in the the domain: new pages, > * swapped in pages, cleared pages, and pages that are ambiguously > @@ -688,6 +693,8 @@ static inline bool_t p2m_mem_access_sanity_check(struct domain *d) > > struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type); > void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg); > +void p2m_free_ptp_defer(struct p2m_domain *p2m, struct page_info *pg); > +void p2m_free_ptp_list(struct p2m_domain *p2m, struct page_list_head *list); > > /* Directly set a p2m entry: only for use by p2m code. Does not need > * a call to put_gfn afterwards/ */ >