All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Vrabel <david.vrabel@citrix.com>
To: xen-devel@lists.xenproject.org
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	Kevin Tian <kevin.tian@intel.com>,
	Jan Beulich <jbeulich@suse.com>,
	David Vrabel <david.vrabel@citrix.com>,
	Jun Nakajima <jun.nakajima@intel.com>
Subject: [PATCHv7] x86/ept: defer the invalidation until the p2m lock is released
Date: Mon, 1 Feb 2016 16:26:38 +0000	[thread overview]
Message-ID: <1454343998-26294-2-git-send-email-david.vrabel@citrix.com> (raw)
In-Reply-To: <1454343998-26294-1-git-send-email-david.vrabel@citrix.com>

Holding the p2m lock while calling ept_sync_domain() is very expensive
since it does a on_selected_cpus() call.  IPIs on many socket machines
can be very slows and on_selected_cpus() is serialized.

It is safe to defer the invalidate until the p2m lock is released
except for two cases:

1. When freeing a page table page (since partial translations may be
   cached).
2. When reclaiming a zero page as part of PoD.

For these cases, add p2m_tlb_flush_sync() calls which will immediately
perform the invalidate before the page is freed or reclaimed.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
v7:
- Add some more p2m_tlb_flush_sync() calls to PoD.
- More comments.

v6:
- Move p2m_tlb_flush_sync() to immediately before p2m_free_ptp().  It was
  called all the time otherwise.

v5:
- add p2m_tlb_flush_sync() and call it before freeing pgae table pages
  and reclaiming zeroed pod pages.

v2:
- use per-p2m list for deferred pages.
- update synced_mask while holding write lock.
---
 xen/arch/x86/mm/mm-locks.h | 23 +++++++++++++++--------
 xen/arch/x86/mm/p2m-ept.c  | 42 ++++++++++++++++++++++++++++++++++--------
 xen/arch/x86/mm/p2m-pod.c  |  4 ++++
 xen/arch/x86/mm/p2m.c      | 19 +++++++++++++++++++
 xen/include/asm-x86/p2m.h  | 24 ++++++++++++++++++++++++
 5 files changed, 96 insertions(+), 16 deletions(-)

diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
index 8a40986..2e8747e 100644
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -265,14 +265,21 @@ declare_mm_lock(altp2mlist)
  */
 
 declare_mm_rwlock(altp2m);
-#define p2m_lock(p)                         \
-{                                           \
-    if ( p2m_is_altp2m(p) )                 \
-        mm_write_lock(altp2m, &(p)->lock);  \
-    else                                    \
-        mm_write_lock(p2m, &(p)->lock);     \
-}
-#define p2m_unlock(p)         mm_write_unlock(&(p)->lock);
+#define p2m_lock(p)                             \
+    do {                                        \
+        if ( p2m_is_altp2m(p) )                 \
+            mm_write_lock(altp2m, &(p)->lock);  \
+        else                                    \
+            mm_write_lock(p2m, &(p)->lock);     \
+        (p)->defer_flush++;                     \
+    } while (0)
+#define p2m_unlock(p)                           \
+    do {                                        \
+        if ( --(p)->defer_flush == 0 )          \
+            p2m_tlb_flush_and_unlock(p);        \
+        else                                    \
+            mm_write_unlock(&(p)->lock);        \
+    } while (0)
 #define gfn_lock(p,g,o)       p2m_lock(p)
 #define gfn_unlock(p,g,o)     p2m_unlock(p)
 #define p2m_read_lock(p)      mm_read_lock(p2m, &(p)->lock)
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index c094320..43c7f1b 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -263,6 +263,7 @@ static void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int l
         unmap_domain_page(epte);
     }
     
+    p2m_tlb_flush_sync(p2m);
     p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn));
 }
 
@@ -1095,15 +1096,10 @@ static void __ept_sync_domain(void *info)
      */
 }
 
-void ept_sync_domain(struct p2m_domain *p2m)
+static void ept_sync_domain_prepare(struct p2m_domain *p2m)
 {
     struct domain *d = p2m->domain;
     struct ept_data *ept = &p2m->ept;
-    /* Only if using EPT and this domain has some VCPUs to dirty. */
-    if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
-        return;
-
-    ASSERT(local_irq_is_enabled());
 
     if ( nestedhvm_enabled(d) && !p2m_is_nestedp2m(p2m) )
         p2m_flush_nestedp2m(d);
@@ -1116,9 +1112,38 @@ void ept_sync_domain(struct p2m_domain *p2m)
      *    of an EP4TA reuse is still needed.
      */
     cpumask_setall(ept->invalidate);
+}
+
+static void ept_sync_domain_mask(struct p2m_domain *p2m, const cpumask_t *mask)
+{
+    on_selected_cpus(mask, __ept_sync_domain, p2m, 1);
+}
+
+void ept_sync_domain(struct p2m_domain *p2m)
+{
+    struct domain *d = p2m->domain;
 
-    on_selected_cpus(d->domain_dirty_cpumask,
-                     __ept_sync_domain, p2m, 1);
+    /* Only if using EPT and this domain has some VCPUs to dirty. */
+    if ( !paging_mode_hap(d) || !d->vcpu || !d->vcpu[0] )
+        return;
+
+    ept_sync_domain_prepare(p2m);
+
+    if ( p2m->defer_flush )
+    {
+        p2m->need_flush = 1;
+        return;
+    }
+
+    ept_sync_domain_mask(p2m, d->domain_dirty_cpumask);
+}
+
+static void ept_flush_and_unlock(struct p2m_domain *p2m, bool_t unlock)
+{
+    p2m->need_flush = 0;
+    if ( unlock )
+        mm_write_unlock(&p2m->lock);
+    ept_sync_domain_mask(p2m, p2m->domain->domain_dirty_cpumask);
 }
 
 static void ept_enable_pml(struct p2m_domain *p2m)
@@ -1169,6 +1194,7 @@ int ept_p2m_init(struct p2m_domain *p2m)
     p2m->change_entry_type_range = ept_change_entry_type_range;
     p2m->memory_type_changed = ept_memory_type_changed;
     p2m->audit_p2m = NULL;
+    p2m->flush_and_unlock = ept_flush_and_unlock;
 
     /* Set the memory type used when accessing EPT paging structures. */
     ept->ept_mt = EPT_DEFAULT_MT;
diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
index ea16d3e..35835d1 100644
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -626,6 +626,7 @@ p2m_pod_decrease_reservation(struct domain *d,
 
             p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
                           p2m_invalid, p2m->default_access);
+            p2m_tlb_flush_sync(p2m);
             for ( j = 0; j < n; ++j )
                 set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
             p2m_pod_cache_add(p2m, page, cur_order);
@@ -755,6 +756,7 @@ p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn)
     /* Try to remove the page, restoring old mapping if it fails. */
     p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_2M,
                   p2m_populate_on_demand, p2m->default_access);
+    p2m_tlb_flush_sync(p2m);
 
     /* Make none of the MFNs are used elsewhere... for example, mapped
      * via the grant table interface, or by qemu.  Allow one refcount for
@@ -886,6 +888,8 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count)
         }
     }
 
+    p2m_tlb_flush_sync(p2m);
+
     /* Now check each page for real */
     for ( i=0; i < count; i++ )
     {
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index a45ee35..36a8fb7 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -325,6 +325,25 @@ void p2m_flush_hardware_cached_dirty(struct domain *d)
     }
 }
 
+/*
+ * Force a synchronous P2M TLB flush if a deferred flush is pending.
+ *
+ * Must be called with the p2m lock held.
+ */
+void p2m_tlb_flush_sync(struct p2m_domain *p2m)
+{
+    if ( p2m->need_flush )
+        p2m->flush_and_unlock(p2m, 0);
+}
+
+void p2m_tlb_flush_and_unlock(struct p2m_domain *p2m)
+{
+    if ( p2m->need_flush )
+        p2m->flush_and_unlock(p2m, 1);
+    else
+        mm_write_unlock(&p2m->lock);
+}
+
 mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn,
                     p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
                     unsigned int *page_order, bool_t locked)
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index fa46dd9..d1c4a41 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -262,6 +262,24 @@ struct p2m_domain {
                                           l1_pgentry_t new, unsigned int level);
     long               (*audit_p2m)(struct p2m_domain *p2m);
 
+    /*
+     * P2M updates may require TLBs to be flushed (invalidated).
+     *
+     * If 'defer_flush' is set, flushes may be deferred by setting
+     * 'need_flush' and then flushing in 'flush_and_unlock()'.
+     *
+     * 'flush_and_unlock()' is only called if 'need_flush' is set.  It
+     * must clear 'need_flush', call 'mm_write_unlock&p2m->lock)' if
+     * 'unlock' is true, and perform the flush.
+     *
+     * If a flush may be being deferred but an immediate flush is
+     * required (e.g., if a page is being freed to pool other than the
+     * domheap), call p2m_tlb_flush_sync().
+     */
+    void (*flush_and_unlock)(struct p2m_domain *p2m, bool_t unlock);
+    unsigned int defer_flush;
+    bool_t need_flush;
+
     /* Default P2M access type for each page in the the domain: new pages,
      * swapped in pages, cleared pages, and pages that are ambiguously
      * retyped get this access type.  See definition of p2m_access_t. */
@@ -353,6 +371,12 @@ static inline bool_t p2m_is_altp2m(const struct p2m_domain *p2m)
 
 #define p2m_get_pagetable(p2m)  ((p2m)->phys_table)
 
+/*
+ * Ensure any deferred p2m TLB flush has been completed on all VCPUs.
+ */
+void p2m_tlb_flush_sync(struct p2m_domain *p2m);
+void p2m_tlb_flush_and_unlock(struct p2m_domain *p2m);
+
 /**** p2m query accessors. They lock p2m_lock, and thus serialize
  * lookups wrt modifications. They _do not_ release the lock on exit.
  * After calling any of the variants below, caller needs to use
-- 
2.1.4

  reply	other threads:[~2016-02-01 16:26 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-01 16:26 [PATCHv7 0/1] x86/ept: reduce translation invalidation impact David Vrabel
2016-02-01 16:26 ` David Vrabel [this message]
2016-02-03  3:44   ` [PATCHv7] x86/ept: defer the invalidation until the p2m lock is released Tian, Kevin
2016-04-12 13:08     ` David Vrabel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1454343998-26294-2-git-send-email-david.vrabel@citrix.com \
    --to=david.vrabel@citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=kevin.tian@intel.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.