[PATCH 6/9] x86/np2m: Send flush IPIs only when a vcpu is actively using a shadow p2m

From: George Dunlap <george.dunlap@citrix.com>
To: xen-devel@lists.xenproject.org
Cc: Sergey Dyasli <sergey.dyasli@citrix.com>,
	Kevin Tian <kevin.tian@intel.com>,
	Jan Beulich <jbeulich@suse.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	George Dunlap <george.dunlap@citrix.com>,
	Jun Nakajima <jun.nakajima@intel.com>
Subject: [PATCH 6/9] x86/np2m: Send flush IPIs only when a vcpu is actively using a shadow p2m
Date: Fri, 29 Sep 2017 16:01:41 +0100	[thread overview]
Message-ID: <20170929150144.7602-6-george.dunlap@citrix.com> (raw)
In-Reply-To: <20170929150144.7602-1-george.dunlap@citrix.com>

Flush IPIs are sent to all cpus in a shadow p2m's dirty_cpumask when
updated.  This mask however is far to broad.  A pcpu's bit is set in
the cpumask when a vcpu runs on that pcpu, but is only cleared when a
flush happens.  This means that the IPI includes the current pcpu of
vcpus that are not currently running, and also includes any pcpu that
has ever had a vcpu use this p2m since the last flush (which in turn
will cause spurious invalidations if a different vcpu is using a
shadow p2m).

Avoid these IPIs by keeping closer track of where a p2m is being used,
and when a vcpu needs to be flushed:

- On schedule-out, clear v->processor in p2m->dirty_cpumask
- Add a 'generation' counter to the p2m and nestedvcpu structs to
  detect changes that would require re-loads on re-entry
- On schedule-in or p2m change:
  - Set v->processor in p2m->dirty_cpumask
  - flush the vcpu's nested p2m pointer (and update nv->generation) if
    the generation changed

Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
Changes since v1:
- Combine patches 5 and 8, and the scheduling bits of patch 11
  ("x86/np2m: add np2m_generation", "x86/np2m: add np2m_schedule()",
   and "x86/np2m: implement sharing of np2m between vCPUs")
- Reword commit message

CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>
---
 xen/arch/x86/domain.c          |  2 ++
 xen/arch/x86/hvm/nestedhvm.c   |  1 +
 xen/arch/x86/hvm/vmx/vvmx.c    |  3 +++
 xen/arch/x86/mm/p2m.c          | 55 +++++++++++++++++++++++++++++++++++++++++-
 xen/include/asm-x86/hvm/vcpu.h |  1 +
 xen/include/asm-x86/p2m.h      |  6 +++++
 6 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 466a1a2fac..35ea0d2418 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1668,6 +1668,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
     {
         _update_runstate_area(prev);
         vpmu_switch_from(prev);
+        np2m_schedule(NP2M_SCHEDLE_OUT);
     }
 
     if ( is_hvm_domain(prevd) && !list_empty(&prev->arch.hvm_vcpu.tm_list) )
@@ -1716,6 +1717,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
 
         /* Must be done with interrupts enabled */
         vpmu_switch_to(next);
+        np2m_schedule(NP2M_SCHEDLE_IN);
     }
 
     /* Ensure that the vcpu has an up-to-date time base. */
diff --git a/xen/arch/x86/hvm/nestedhvm.c b/xen/arch/x86/hvm/nestedhvm.c
index 74a464d162..ab50b2ab98 100644
--- a/xen/arch/x86/hvm/nestedhvm.c
+++ b/xen/arch/x86/hvm/nestedhvm.c
@@ -57,6 +57,7 @@ nestedhvm_vcpu_reset(struct vcpu *v)
     nv->nv_flushp2m = 0;
     nv->nv_p2m = NULL;
     nv->stale_np2m = false;
+    nv->np2m_generation = 0;
 
     hvm_asid_flush_vcpu_asid(&nv->nv_n2asid);
 
diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
index 48e37158af..a6a558b460 100644
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1367,6 +1367,9 @@ static void virtual_vmexit(struct cpu_user_regs *regs)
          !(v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
         shadow_to_vvmcs_bulk(v, ARRAY_SIZE(gpdpte_fields), gpdpte_fields);
 
+    /* This will clear current pCPU bit in p2m->dirty_cpumask */
+    np2m_schedule(NP2M_SCHEDLE_OUT);
+
     vmx_vmcs_switch(v->arch.hvm_vmx.vmcs_pa, nvcpu->nv_n1vmcx_pa);
 
     nestedhvm_vcpu_exit_guestmode(v);
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index fd48a3b9db..3c6c486c00 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -73,6 +73,7 @@ static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
     p2m->p2m_class = p2m_host;
 
     p2m->np2m_base = P2M_BASE_EADDR;
+    p2m->np2m_generation = 0;
 
     for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i )
         p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN);
@@ -1735,6 +1736,7 @@ p2m_flush_table_locked(struct p2m_domain *p2m)
 
     /* This is no longer a valid nested p2m for any address space */
     p2m->np2m_base = P2M_BASE_EADDR;
+    p2m->np2m_generation++;
 
     /* Make sure nobody else is using this p2m table */
     nestedhvm_vmcx_flushtlb(p2m);
@@ -1809,6 +1811,7 @@ static void assign_np2m(struct vcpu *v, struct p2m_domain *p2m)
 
     nv->nv_flushp2m = 0;
     nv->nv_p2m = p2m;
+    nv->np2m_generation = p2m->np2m_generation;
     cpumask_set_cpu(v->processor, p2m->dirty_cpumask);
 }
 
@@ -1840,7 +1843,9 @@ p2m_get_nestedp2m_locked(struct vcpu *v)
         p2m_lock(p2m);
         if ( p2m->np2m_base == np2m_base || p2m->np2m_base == P2M_BASE_EADDR )
         {
-            if ( p2m->np2m_base == P2M_BASE_EADDR )
+            /* Check if np2m was flushed just before the lock */
+            if ( p2m->np2m_base == P2M_BASE_EADDR ||
+                 nv->np2m_generation != p2m->np2m_generation )
                 nvcpu_flush(v);
             p2m->np2m_base = np2m_base;
             assign_np2m(v, p2m);
@@ -1848,6 +1853,11 @@ p2m_get_nestedp2m_locked(struct vcpu *v)
 
             return p2m;
         }
+        else
+        {
+            /* vCPU is switching from some other valid np2m */
+            cpumask_clear_cpu(v->processor, p2m->dirty_cpumask);
+        }
         p2m_unlock(p2m);
     }
 
@@ -1881,6 +1891,49 @@ p2m_get_p2m(struct vcpu *v)
     return p2m_get_nestedp2m(v);
 }
 
+void np2m_schedule(int dir)
+{
+    struct nestedvcpu *nv = &vcpu_nestedhvm(current);
+    struct p2m_domain *p2m;
+
+    ASSERT(dir == NP2M_SCHEDLE_IN || dir == NP2M_SCHEDLE_OUT);
+
+    if ( !nestedhvm_enabled(current->domain) ||
+         !nestedhvm_vcpu_in_guestmode(current) ||
+         !nestedhvm_paging_mode_hap(current) )
+        return;
+
+    p2m = nv->nv_p2m;
+    if ( p2m )
+    {
+        bool np2m_valid;
+
+        p2m_lock(p2m);
+        np2m_valid = p2m->np2m_base == nhvm_vcpu_p2m_base(current) &&
+                     nv->np2m_generation == p2m->np2m_generation;
+        if ( dir == NP2M_SCHEDLE_OUT && np2m_valid )
+        {
+            /*
+             * The np2m is up to date but this vCPU will no longer use it,
+             * which means there are no reasons to send a flush IPI.
+             */
+            cpumask_clear_cpu(current->processor, p2m->dirty_cpumask);
+        }
+        else if ( dir == NP2M_SCHEDLE_IN )
+        {
+            if ( !np2m_valid )
+            {
+                /* This vCPU's np2m was flushed while it was not runnable */
+                hvm_asid_flush_core();
+                vcpu_nestedhvm(current).nv_p2m = NULL;
+            }
+            else
+                cpumask_set_cpu(current->processor, p2m->dirty_cpumask);
+        }
+        p2m_unlock(p2m);
+    }
+}
+
 unsigned long paging_gva_to_gfn(struct vcpu *v,
                                 unsigned long va,
                                 uint32_t *pfec)
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index 5cfa4b4aa4..afe5ffc6b3 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -116,6 +116,7 @@ struct nestedvcpu {
     bool_t nv_flushp2m; /* True, when p2m table must be flushed */
     struct p2m_domain *nv_p2m; /* used p2m table for this vcpu */
     bool stale_np2m; /* True when p2m_base in VMCX02 is no longer valid */
+    uint64_t np2m_generation;
 
     struct hvm_vcpu_asid nv_n2asid;
 
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 4a1c10c130..8d4aa8c6bf 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -209,6 +209,7 @@ struct p2m_domain {
      * to set it to any other value. */
 #define P2M_BASE_EADDR     (~0ULL)
     uint64_t           np2m_base;
+    uint64_t           np2m_generation;
 
     /* Nested p2ms: linked list of n2pms allocated to this domain. 
      * The host p2m hasolds the head of the list and the np2ms are 
@@ -371,6 +372,11 @@ struct p2m_domain *p2m_get_nestedp2m_locked(struct vcpu *v);
  */
 struct p2m_domain *p2m_get_p2m(struct vcpu *v);
 
+#define NP2M_SCHEDLE_IN  0
+#define NP2M_SCHEDLE_OUT 1
+
+void np2m_schedule(int dir);
+
 static inline bool_t p2m_is_hostp2m(const struct p2m_domain *p2m)
 {
     return p2m->p2m_class == p2m_host;
-- 
2.14.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel