All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0 of 4] RFC: Nested-p2m cleanups and locking changes
@ 2011-06-22 16:10 Tim Deegan
  2011-06-22 16:10 ` [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action Tim Deegan
                   ` (3 more replies)
  0 siblings, 4 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-22 16:10 UTC (permalink / raw)
  To: xen-devel; +Cc: Christoph Egger


This patch series tidies up a few bits ofthe nested p2m code. 
The main thing it does is reorganize the locking so that most of the 
changes to nested p2m tables happen only under the p2m lock, and the 
nestedp2m lock is only needed to reassign p2m tables to new cr3 values.

Unfortunately I'm having terrible trouble trying to run Xen under Xen 
to test these, so this version of the series is only compile-tested.
I'm just posting them for feedback from Christoph on whether this approach
seems reasonable. 

Cheers,

Tim.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action
  2011-06-22 16:10 [PATCH 0 of 4] RFC: Nested-p2m cleanups and locking changes Tim Deegan
@ 2011-06-22 16:10 ` Tim Deegan
  2011-06-23 12:50   ` Christoph Egger
  2011-06-22 16:10 ` [PATCH 2 of 4] Nested p2m: remove bogus check of CR3 value Tim Deegan
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 18+ messages in thread
From: Tim Deegan @ 2011-06-22 16:10 UTC (permalink / raw)
  To: xen-devel; +Cc: Christoph Egger

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1308758648 -3600
# Node ID c323e69a0a08ce9f1e54d2e2fa2edd9845bc8efe
# Parent  b7e5a25663329254cba539e21f4fbd5b32c67556
Nested p2m: implement "flush" as a first-class action
rather than using the teardown and init functions.
This makes the locking clearer and avoids an expensive scan of all
pfns that's only needed for non-nested p2ms.  It also moves the
tlb flush into the proper place in the flush logic, avoiding a
possible race against other CPUs.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/hvm/nestedhvm.c
--- a/xen/arch/x86/hvm/nestedhvm.c	Tue Jun 21 18:28:53 2011 +0100
+++ b/xen/arch/x86/hvm/nestedhvm.c	Wed Jun 22 17:04:08 2011 +0100
@@ -119,12 +119,6 @@ nestedhvm_vmcx_flushtlb(struct p2m_domai
     cpus_clear(p2m->p2m_dirty_cpumask);
 }
 
-void
-nestedhvm_vmcx_flushtlbdomain(struct domain *d)
-{
-    on_selected_cpus(d->domain_dirty_cpumask, nestedhvm_flushtlb_ipi, d, 1);
-}
-
 bool_t
 nestedhvm_is_n2(struct vcpu *v)
 {
diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Tue Jun 21 18:28:53 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
@@ -1050,20 +1050,41 @@ p2m_getlru_nestedp2m(struct domain *d, s
     return lrup2m;
 }
 
-static int 
+/* Reset this p2m table to be empty */
+static void
 p2m_flush_locked(struct p2m_domain *p2m)
 {
-    ASSERT(p2m);
-    if (p2m->cr3 == CR3_EADDR)
-        /* Microoptimisation: p2m is already empty.
-         * => about 0.3% speedup of overall system performance.
-         */
-        return 0;
+    struct page_info *top, *pg;
+    struct domain *d = p2m->domain;
+    void *p;
 
-    p2m_teardown(p2m);
-    p2m_initialise(p2m->domain, p2m);
-    p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
-    return p2m_alloc_table(p2m);
+    p2m_lock(p2m);
+
+    /* "Host" p2m tables can have shared entries &c that need a bit more 
+     * care when discarding them */
+    ASSERT(p2m_is_nestedp2m(p2m));
+    ASSERT(page_list_empty(&p2m->pod.super));
+    ASSERT(page_list_empty(&p2m->pod.single));
+
+    /* This is no longer a valid nested p2m for any address space */
+    p2m->cr3 = CR3_EADDR;
+    
+    /* Zap the top level of the trie */
+    top = mfn_to_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
+    p = map_domain_page(top);
+    clear_page(p);
+    unmap_domain_page(p);
+
+    /* Make sure nobody else is using this p2m table */
+    nestedhvm_vmcx_flushtlb(p2m);
+
+    /* Free the rest of the trie pages back to the paging pool */
+    while ( (pg = page_list_remove_head(&p2m->pages)) )
+        if ( pg != top ) 
+            d->arch.paging.free_page(d, pg);
+    page_list_add(top, &p2m->pages);
+
+    p2m_unlock(p2m);
 }
 
 void
@@ -1074,9 +1095,8 @@ p2m_flush(struct vcpu *v, struct p2m_dom
     ASSERT(v->domain == d);
     vcpu_nestedhvm(v).nv_p2m = NULL;
     nestedp2m_lock(d);
-    BUG_ON(p2m_flush_locked(p2m) != 0);
+    p2m_flush_locked(p2m);
     hvm_asid_flush_vcpu(v);
-    nestedhvm_vmcx_flushtlb(p2m);
     nestedp2m_unlock(d);
 }
 
@@ -1086,12 +1106,8 @@ p2m_flush_nestedp2m(struct domain *d)
     int i;
 
     nestedp2m_lock(d);
-    for (i = 0; i < MAX_NESTEDP2M; i++) {
-        struct p2m_domain *p2m = d->arch.nested_p2m[i];
-        BUG_ON(p2m_flush_locked(p2m) != 0);
-        cpus_clear(p2m->p2m_dirty_cpumask);
-    }
-    nestedhvm_vmcx_flushtlbdomain(d);
+    for ( i = 0; i < MAX_NESTEDP2M; i++ )
+        p2m_flush_locked(d->arch.nested_p2m[i]);
     nestedp2m_unlock(d);
 }
 
@@ -1104,7 +1120,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
     volatile struct nestedvcpu *nv = &vcpu_nestedhvm(v);
     struct domain *d;
     struct p2m_domain *p2m;
-    int i, rv;
+    int i;
 
     if (cr3 == 0 || cr3 == CR3_EADDR)
         cr3 = v->arch.hvm_vcpu.guest_cr[3];
@@ -1136,9 +1152,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
      */
     for (i = 0; i < MAX_NESTEDP2M; i++) {
         p2m = p2m_getlru_nestedp2m(d, NULL);
-        rv = p2m_flush_locked(p2m);
-        if (rv == 0)
-            break;
+        p2m_flush_locked(p2m);
     }
     nv->nv_p2m = p2m;
     p2m->cr3 = cr3;
diff -r b7e5a2566332 -r c323e69a0a08 xen/include/asm-x86/hvm/nestedhvm.h
--- a/xen/include/asm-x86/hvm/nestedhvm.h	Tue Jun 21 18:28:53 2011 +0100
+++ b/xen/include/asm-x86/hvm/nestedhvm.h	Wed Jun 22 17:04:08 2011 +0100
@@ -61,7 +61,6 @@ unsigned long *nestedhvm_vcpu_iomap_get(
     (!!vcpu_nestedhvm((v)).nv_vmswitch_in_progress)
 
 void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
-void nestedhvm_vmcx_flushtlbdomain(struct domain *d);
 
 bool_t nestedhvm_is_n2(struct vcpu *v);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 2 of 4] Nested p2m: remove bogus check of CR3 value
  2011-06-22 16:10 [PATCH 0 of 4] RFC: Nested-p2m cleanups and locking changes Tim Deegan
  2011-06-22 16:10 ` [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action Tim Deegan
@ 2011-06-22 16:10 ` Tim Deegan
  2011-06-22 16:10 ` [PATCH 3 of 4] Nested p2m: clarify logic in p2m_get_nestedp2m() Tim Deegan
  2011-06-22 16:10 ` [PATCH 4 of 4] Nested p2m: rework locking around nested-p2m flushes and updates Tim Deegan
  3 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-22 16:10 UTC (permalink / raw)
  To: xen-devel; +Cc: Christoph Egger

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1308758648 -3600
# Node ID dcb8ae5e3eaf6516c889087dfb15efa41a1ac3e9
# Parent  c323e69a0a08ce9f1e54d2e2fa2edd9845bc8efe
Nested p2m: remove bogus check of CR3 value.

0 is a valid CR3 value; CR3_EADDR isn't but there's nothing stopping a
guest from putting it in its VMCB.  The special case was broken anyway
since AFAICT "p2m->cr3" is a nester-cr3 (i.e. p2m-table) value and
guest_cr[3] is an actual-cr3 (pagetable) value.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r c323e69a0a08 -r dcb8ae5e3eaf xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
@@ -1122,8 +1122,8 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
     struct p2m_domain *p2m;
     int i;
 
-    if (cr3 == 0 || cr3 == CR3_EADDR)
-        cr3 = v->arch.hvm_vcpu.guest_cr[3];
+    /* Mask out low bits; this avoids collisions with CR3_EADDR */
+    cr3 &= ~(0xfffull);
 
     if (nv->nv_flushp2m && nv->nv_p2m) {
         nv->nv_p2m = NULL;

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 3 of 4] Nested p2m: clarify logic in p2m_get_nestedp2m()
  2011-06-22 16:10 [PATCH 0 of 4] RFC: Nested-p2m cleanups and locking changes Tim Deegan
  2011-06-22 16:10 ` [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action Tim Deegan
  2011-06-22 16:10 ` [PATCH 2 of 4] Nested p2m: remove bogus check of CR3 value Tim Deegan
@ 2011-06-22 16:10 ` Tim Deegan
  2011-06-24 14:25   ` Christoph Egger
  2011-06-22 16:10 ` [PATCH 4 of 4] Nested p2m: rework locking around nested-p2m flushes and updates Tim Deegan
  3 siblings, 1 reply; 18+ messages in thread
From: Tim Deegan @ 2011-06-22 16:10 UTC (permalink / raw)
  To: xen-devel; +Cc: Christoph Egger

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1308758648 -3600
# Node ID b265371addbbc8a58c95a269fe3cd0fdc866aaa3
# Parent  dcb8ae5e3eaf6516c889087dfb15efa41a1ac3e9
Nested p2m: clarify logic in p2m_get_nestedp2m()

This just makes the behaviour of this function a bit more explicit.  It
may be that it also needs to be changed. :)

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r dcb8ae5e3eaf -r b265371addbb xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
@@ -1131,11 +1131,9 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
 
     d = v->domain;
     nestedp2m_lock(d);
-    for (i = 0; i < MAX_NESTEDP2M; i++) {
-        p2m = d->arch.nested_p2m[i];
-        if ((p2m->cr3 != cr3 && p2m->cr3 != CR3_EADDR) || (p2m != nv->nv_p2m))
-            continue;
-
+    p2m = nv->nv_p2m;
+    if ( p2m && (p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR) )
+    {
         nv->nv_flushp2m = 0;
         p2m_getlru_nestedp2m(d, p2m);
         nv->nv_p2m = p2m;

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 4 of 4] Nested p2m: rework locking around nested-p2m flushes and updates
  2011-06-22 16:10 [PATCH 0 of 4] RFC: Nested-p2m cleanups and locking changes Tim Deegan
                   ` (2 preceding siblings ...)
  2011-06-22 16:10 ` [PATCH 3 of 4] Nested p2m: clarify logic in p2m_get_nestedp2m() Tim Deegan
@ 2011-06-22 16:10 ` Tim Deegan
  2011-06-23 15:08   ` Christoph Egger
  3 siblings, 1 reply; 18+ messages in thread
From: Tim Deegan @ 2011-06-22 16:10 UTC (permalink / raw)
  To: xen-devel; +Cc: Christoph Egger

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1308758840 -3600
# Node ID a168af9b6d2f604c841465e5ed911b7a12b5ba15
# Parent  b265371addbbc8a58c95a269fe3cd0fdc866aaa3
Nested p2m: rework locking around nested-p2m flushes and updates.

The nestedp2m_lock now only covers the mapping from nested-cr3 to
nested-p2m; the tables themselves may be updated or flushed using only
the relevant p2m lock.

This means that the nested-p2m lock is only taken on one path, and
always before any p2m locks.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r b265371addbb -r a168af9b6d2f xen/arch/x86/mm/hap/nested_hap.c
--- a/xen/arch/x86/mm/hap/nested_hap.c	Wed Jun 22 17:04:08 2011 +0100
+++ b/xen/arch/x86/mm/hap/nested_hap.c	Wed Jun 22 17:07:20 2011 +0100
@@ -96,17 +96,23 @@ nestedp2m_write_p2m_entry(struct p2m_dom
 /*          NESTED VIRT FUNCTIONS           */
 /********************************************/
 static void
-nestedhap_fix_p2m(struct p2m_domain *p2m, paddr_t L2_gpa, paddr_t L0_gpa,
-    p2m_type_t p2mt, p2m_access_t p2ma)
+nestedhap_fix_p2m(struct vcpu *v, struct p2m_domain *p2m, 
+                  paddr_t L2_gpa, paddr_t L0_gpa,
+                  p2m_type_t p2mt, p2m_access_t p2ma)
 {
-    int rv;
+    int rv = 0;
     ASSERT(p2m);
     ASSERT(p2m->set_entry);
 
     p2m_lock(p2m);
-    rv = set_p2m_entry(p2m, L2_gpa >> PAGE_SHIFT,
-                         page_to_mfn(maddr_to_page(L0_gpa)),
-                         0 /*4K*/, p2mt, p2ma);
+
+    /* If this p2m table has been flushed or recycled under our feet, 
+     * leave it alone.  We'll pick up the right one as we try to 
+     * vmenter the guest. */
+    if ( p2m->cr3 == nhvm_vcpu_hostcr3(v) )
+         rv = set_p2m_entry(p2m, L2_gpa >> PAGE_SHIFT,
+                            page_to_mfn(maddr_to_page(L0_gpa)),
+                            0 /*4K*/, p2mt, p2ma);
     p2m_unlock(p2m);
 
     if (rv == 0) {
@@ -211,12 +217,10 @@ nestedhvm_hap_nested_page_fault(struct v
         break;
     }
 
-    nestedp2m_lock(d);
     /* fix p2m_get_pagetable(nested_p2m) */
-    nestedhap_fix_p2m(nested_p2m, L2_gpa, L0_gpa,
+    nestedhap_fix_p2m(v, nested_p2m, L2_gpa, L0_gpa,
         p2m_ram_rw,
         p2m_access_rwx /* FIXME: Should use same permission as l1 guest */);
-    nestedp2m_unlock(d);
 
     return NESTEDHVM_PAGEFAULT_DONE;
 }
diff -r b265371addbb -r a168af9b6d2f xen/arch/x86/mm/mm-locks.h
--- a/xen/arch/x86/mm/mm-locks.h	Wed Jun 22 17:04:08 2011 +0100
+++ b/xen/arch/x86/mm/mm-locks.h	Wed Jun 22 17:07:20 2011 +0100
@@ -96,8 +96,11 @@ declare_mm_lock(shr)
 
 /* Nested P2M lock (per-domain)
  *
- * A per-domain lock that protects some of the nested p2m datastructures.
- * TODO: find out exactly what needs to be covered by this lock */
+ * A per-domain lock that protects the mapping from nested-CR3 to 
+ * nested-p2m.  In particular it covers:
+ * - the array of nested-p2m tables, and all LRU activity therein; and
+ * - setting the "cr3" field of any p2m table to a non-CR3_EADDR value. 
+ *   (i.e. assigning a p2m table to be the shadow of that cr3 */
 
 declare_mm_lock(nestedp2m)
 #define nestedp2m_lock(d)   mm_lock(nestedp2m, &(d)->arch.nested_p2m_lock)
diff -r b265371addbb -r a168af9b6d2f xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:07:20 2011 +0100
@@ -1052,7 +1052,7 @@ p2m_getlru_nestedp2m(struct domain *d, s
 
 /* Reset this p2m table to be empty */
 static void
-p2m_flush_locked(struct p2m_domain *p2m)
+p2m_flush_table(struct p2m_domain *p2m)
 {
     struct page_info *top, *pg;
     struct domain *d = p2m->domain;
@@ -1094,21 +1094,16 @@ p2m_flush(struct vcpu *v, struct p2m_dom
 
     ASSERT(v->domain == d);
     vcpu_nestedhvm(v).nv_p2m = NULL;
-    nestedp2m_lock(d);
-    p2m_flush_locked(p2m);
+    p2m_flush_table(p2m);
     hvm_asid_flush_vcpu(v);
-    nestedp2m_unlock(d);
 }
 
 void
 p2m_flush_nestedp2m(struct domain *d)
 {
     int i;
-
-    nestedp2m_lock(d);
     for ( i = 0; i < MAX_NESTEDP2M; i++ )
-        p2m_flush_locked(d->arch.nested_p2m[i]);
-    nestedp2m_unlock(d);
+        p2m_flush_table(d->arch.nested_p2m[i]);
 }
 
 struct p2m_domain *
@@ -1132,17 +1127,23 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
     d = v->domain;
     nestedp2m_lock(d);
     p2m = nv->nv_p2m;
-    if ( p2m && (p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR) )
+    if ( p2m ) 
     {
-        nv->nv_flushp2m = 0;
-        p2m_getlru_nestedp2m(d, p2m);
-        nv->nv_p2m = p2m;
-        if (p2m->cr3 == CR3_EADDR)
-            hvm_asid_flush_vcpu(v);
-        p2m->cr3 = cr3;
-        cpu_set(v->processor, p2m->p2m_dirty_cpumask);
-        nestedp2m_unlock(d);
-        return p2m;
+        p2m_lock(p2m);
+        if ( p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR )
+        {
+            nv->nv_flushp2m = 0;
+            p2m_getlru_nestedp2m(d, p2m);
+            nv->nv_p2m = p2m;
+            if (p2m->cr3 == CR3_EADDR)
+                hvm_asid_flush_vcpu(v);
+            p2m->cr3 = cr3;
+            cpu_set(v->processor, p2m->p2m_dirty_cpumask);
+            p2m_unlock(p2m);
+            nestedp2m_unlock(d);
+            return p2m;
+        }
+        p2m_unlock(p2m)
     }
 
     /* All p2m's are or were in use. Take the least recent used one,
@@ -1150,14 +1151,16 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
      */
     for (i = 0; i < MAX_NESTEDP2M; i++) {
         p2m = p2m_getlru_nestedp2m(d, NULL);
-        p2m_flush_locked(p2m);
+        p2m_flush_table(p2m);
     }
+    p2m_lock(p2m);
     nv->nv_p2m = p2m;
     p2m->cr3 = cr3;
     nv->nv_flushp2m = 0;
     hvm_asid_flush_vcpu(v);
     nestedhvm_vmcx_flushtlb(nv->nv_p2m);
     cpu_set(v->processor, p2m->p2m_dirty_cpumask);
+    p2m_unlock(p2m);
     nestedp2m_unlock(d);
 
     return p2m;
diff -r b265371addbb -r a168af9b6d2f xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Wed Jun 22 17:04:08 2011 +0100
+++ b/xen/include/asm-x86/p2m.h	Wed Jun 22 17:07:20 2011 +0100
@@ -201,8 +201,13 @@ struct p2m_domain {
     cpumask_t          p2m_dirty_cpumask;
 
     struct domain     *domain;   /* back pointer to domain */
+
+    /* Nested p2ms only: nested-CR3 value that this p2m shadows. 
+     * This can be cleared to CR3_EADDR under the per-p2m lock but
+     * needs both the per-p2m lock and the per-domain nestedp2m lock
+     * to set it to any other value. */
 #define CR3_EADDR     (~0ULL)
-    uint64_t           cr3;      /* to identify this p2m for re-use */
+    uint64_t           cr3;
 
     /* Pages used to construct the p2m */
     struct page_list_head pages;

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action
  2011-06-22 16:10 ` [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action Tim Deegan
@ 2011-06-23 12:50   ` Christoph Egger
  2011-06-23 12:56     ` Christoph Egger
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Egger @ 2011-06-23 12:50 UTC (permalink / raw)
  To: Tim Deegan, xen-devel


This patch crashes the host (and it doesn't disappear with the other 
patches applied):

(XEN) Assertion 'pfn_to_pdx(ma >> PAGE_SHIFT) < (DIRECTMAP_SIZE >> 
PAGE_SHIFT)'
failed at /data/xen/xen-nestedhvm-patches.hg/xen/include/asm:99
(XEN) ----[ Xen-4.2-unstable  x86_64  debug=y  Tainted:    C ]----
(XEN) CPU:    1
(XEN) RIP:    e008:[<ffff82c4801d2007>] p2m_flush_locked+0x1a8/0x2be
(XEN) RFLAGS: 0000000000010212   CONTEXT: hypervisor
(XEN) rax: 0000000000000000   rbx: ffff830421db1d90   rcx: 0000000000000000
(XEN) rdx: f82f608076a60000   rsi: 000f82f608076a60   rdi: 0000000000000000
(XEN) rbp: ffff830425217a08   rsp: ffff8304252179c8   r8:  0000000000000000
(XEN) r9:  0000000000000004   r10: 00000000fffffffb   r11: 0000000000000003
(XEN) r12: ffff830421db1d90   r13: ffff82f608076a60   r14: ffff830403bbebe8
(XEN) r15: ffff830403bbe000   cr0: 000000008005003b   cr4: 00000000000406f0
(XEN) cr3: 0000000420f2b000   cr2: 000000000045f000
(XEN) ds: 0017   es: 0017   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen stack trace from rsp=ffff8304252179c8:
(XEN)    ffff830403bbebe8 ffff830421db1d90 ffff830421db1d90 0000000000000000
(XEN)    ffff830403bbe000 ffff830403bbebe8 ffff830403bbebe8 ffff830403bbeab0
(XEN)    ffff830425217a38 ffff82c4801d2599 0000000000000001 0000000000000067
(XEN)    ffff830403bbeab0 ffff830403af4000 ffff830425217a88 ffff82c480208a8c
(XEN)    0000000125941810 ffff830403bbe000 0000000000000000 ffff830425217b18
(XEN)    ffff830421db1cc0 00000000000ff000 ffffffffffffffff 0000000000000000
(XEN)    ffff830425217a98 ffff82c4801ce585 ffff830425217b68 ffff82c4801d4b4e
(XEN)    0000000000000200 1000000000000000 ffff830425217b28 ffff830425217b20
(XEN)    0000000000000001 ffff830425217ae8 ffff830425217ef8 ffff830403af4000
(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
(XEN)    0000000000000000 0000000000000000 0000000000000000 ffff830403af4000
(XEN)    0000000000403af4 ffff82c4801d5ec1 ffff830425217bfc ffffffffffffffff
(XEN)    0000000000000000 0000000000000001 00000000000ff000 ffff830421db1cc0
(XEN)    ffff830425217bb8 ffff82c4801d02d1 0000000100000007 ffff830403bbe000
(XEN)    0000000125217bc8 ffff8280019fafc0 ffff82c400cfd7e0 000000000033f5f8
(XEN)    ffff830421db1cc0 0000000000000001 ffff830425217c28 ffff82c4801d053d
(XEN)    ffff830425217bfc ffff830425217bf8 0000000025217bf0 00000000000ff000
(XEN)    0000000000000001 ffff830425217c10 0000000000000007 ffff830421db1cc0
(XEN)    ffff830421db1cc0 ffff830425217f18 ffff82c4802e1260 0000000000000000
(XEN)    ffff830425217c78 ffff82c4801d11fa 000000000033f5f7 00000000000ff000
(XEN) Xen call trace:
(XEN)    [<ffff82c4801d2007>] p2m_flush_locked+0x1a8/0x2be
(XEN)    [<ffff82c4801d2599>] p2m_flush_nestedp2m+0xea/0x140
(XEN)    [<ffff82c480208a8c>] hap_write_p2m_entry+0x223/0x246
(XEN)    [<ffff82c4801ce585>] paging_write_p2m_entry+0x6e/0x75
(XEN)    [<ffff82c4801d4b4e>] p2m_set_entry+0x42b/0x6a8
(XEN)    [<ffff82c4801d02d1>] set_p2m_entry+0xf2/0x140
(XEN)    [<ffff82c4801d053d>] p2m_remove_page+0x1dd/0x1ec
(XEN)    [<ffff82c4801d11fa>] guest_physmap_remove_page+0x109/0x148
(XEN)    [<ffff82c480168000>] arch_memory_op+0x6af/0x1039
(XEN)    [<ffff82c4801134fb>] do_memory_op+0x1bd7/0x1c2c
(XEN)    [<ffff82c480217708>] syscall_enter+0xc8/0x122
(XEN)
(XEN)
(XEN) ****************************************

Christoph



On 06/22/11 18:10, Tim Deegan wrote:
> # HG changeset patch
> # User Tim Deegan<Tim.Deegan@citrix.com>
> # Date 1308758648 -3600
> # Node ID c323e69a0a08ce9f1e54d2e2fa2edd9845bc8efe
> # Parent  b7e5a25663329254cba539e21f4fbd5b32c67556
> Nested p2m: implement "flush" as a first-class action
> rather than using the teardown and init functions.
> This makes the locking clearer and avoids an expensive scan of all
> pfns that's only needed for non-nested p2ms.  It also moves the
> tlb flush into the proper place in the flush logic, avoiding a
> possible race against other CPUs.
>
> Signed-off-by: Tim Deegan<Tim.Deegan@citrix.com>
>
> diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/hvm/nestedhvm.c
> --- a/xen/arch/x86/hvm/nestedhvm.c	Tue Jun 21 18:28:53 2011 +0100
> +++ b/xen/arch/x86/hvm/nestedhvm.c	Wed Jun 22 17:04:08 2011 +0100
> @@ -119,12 +119,6 @@ nestedhvm_vmcx_flushtlb(struct p2m_domai
>       cpus_clear(p2m->p2m_dirty_cpumask);
>   }
>
> -void
> -nestedhvm_vmcx_flushtlbdomain(struct domain *d)
> -{
> -    on_selected_cpus(d->domain_dirty_cpumask, nestedhvm_flushtlb_ipi, d, 1);
> -}
> -
>   bool_t
>   nestedhvm_is_n2(struct vcpu *v)
>   {
> diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c	Tue Jun 21 18:28:53 2011 +0100
> +++ b/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
> @@ -1050,20 +1050,41 @@ p2m_getlru_nestedp2m(struct domain *d, s
>       return lrup2m;
>   }
>
> -static int
> +/* Reset this p2m table to be empty */
> +static void
>   p2m_flush_locked(struct p2m_domain *p2m)
>   {
> -    ASSERT(p2m);
> -    if (p2m->cr3 == CR3_EADDR)
> -        /* Microoptimisation: p2m is already empty.
> -         * =>  about 0.3% speedup of overall system performance.
> -         */
> -        return 0;
> +    struct page_info *top, *pg;
> +    struct domain *d = p2m->domain;
> +    void *p;
>
> -    p2m_teardown(p2m);
> -    p2m_initialise(p2m->domain, p2m);
> -    p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
> -    return p2m_alloc_table(p2m);
> +    p2m_lock(p2m);
> +
> +    /* "Host" p2m tables can have shared entries&c that need a bit more
> +     * care when discarding them */
> +    ASSERT(p2m_is_nestedp2m(p2m));
> +    ASSERT(page_list_empty(&p2m->pod.super));
> +    ASSERT(page_list_empty(&p2m->pod.single));
> +
> +    /* This is no longer a valid nested p2m for any address space */
> +    p2m->cr3 = CR3_EADDR;
> +
> +    /* Zap the top level of the trie */
> +    top = mfn_to_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
> +    p = map_domain_page(top);
> +    clear_page(p);
> +    unmap_domain_page(p);
> +
> +    /* Make sure nobody else is using this p2m table */
> +    nestedhvm_vmcx_flushtlb(p2m);
> +
> +    /* Free the rest of the trie pages back to the paging pool */
> +    while ( (pg = page_list_remove_head(&p2m->pages)) )
> +        if ( pg != top )
> +            d->arch.paging.free_page(d, pg);
> +    page_list_add(top,&p2m->pages);
> +
> +    p2m_unlock(p2m);
>   }
>
>   void
> @@ -1074,9 +1095,8 @@ p2m_flush(struct vcpu *v, struct p2m_dom
>       ASSERT(v->domain == d);
>       vcpu_nestedhvm(v).nv_p2m = NULL;
>       nestedp2m_lock(d);
> -    BUG_ON(p2m_flush_locked(p2m) != 0);
> +    p2m_flush_locked(p2m);
>       hvm_asid_flush_vcpu(v);
> -    nestedhvm_vmcx_flushtlb(p2m);
>       nestedp2m_unlock(d);
>   }
>
> @@ -1086,12 +1106,8 @@ p2m_flush_nestedp2m(struct domain *d)
>       int i;
>
>       nestedp2m_lock(d);
> -    for (i = 0; i<  MAX_NESTEDP2M; i++) {
> -        struct p2m_domain *p2m = d->arch.nested_p2m[i];
> -        BUG_ON(p2m_flush_locked(p2m) != 0);
> -        cpus_clear(p2m->p2m_dirty_cpumask);
> -    }
> -    nestedhvm_vmcx_flushtlbdomain(d);
> +    for ( i = 0; i<  MAX_NESTEDP2M; i++ )
> +        p2m_flush_locked(d->arch.nested_p2m[i]);
>       nestedp2m_unlock(d);
>   }
>
> @@ -1104,7 +1120,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>       volatile struct nestedvcpu *nv =&vcpu_nestedhvm(v);
>       struct domain *d;
>       struct p2m_domain *p2m;
> -    int i, rv;
> +    int i;
>
>       if (cr3 == 0 || cr3 == CR3_EADDR)
>           cr3 = v->arch.hvm_vcpu.guest_cr[3];
> @@ -1136,9 +1152,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>        */
>       for (i = 0; i<  MAX_NESTEDP2M; i++) {
>           p2m = p2m_getlru_nestedp2m(d, NULL);
> -        rv = p2m_flush_locked(p2m);
> -        if (rv == 0)
> -            break;
> +        p2m_flush_locked(p2m);
>       }
>       nv->nv_p2m = p2m;
>       p2m->cr3 = cr3;
> diff -r b7e5a2566332 -r c323e69a0a08 xen/include/asm-x86/hvm/nestedhvm.h
> --- a/xen/include/asm-x86/hvm/nestedhvm.h	Tue Jun 21 18:28:53 2011 +0100
> +++ b/xen/include/asm-x86/hvm/nestedhvm.h	Wed Jun 22 17:04:08 2011 +0100
> @@ -61,7 +61,6 @@ unsigned long *nestedhvm_vcpu_iomap_get(
>       (!!vcpu_nestedhvm((v)).nv_vmswitch_in_progress)
>
>   void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
> -void nestedhvm_vmcx_flushtlbdomain(struct domain *d);
>
>   bool_t nestedhvm_is_n2(struct vcpu *v);
>
>


-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action
  2011-06-23 12:50   ` Christoph Egger
@ 2011-06-23 12:56     ` Christoph Egger
  2011-06-23 15:04       ` Christoph Egger
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Egger @ 2011-06-23 12:56 UTC (permalink / raw)
  To: Tim Deegan, xen-devel

On 06/23/11 14:50, Christoph Egger wrote:
>
> This patch crashes the host (and it doesn't disappear with the other
> patches applied):

err.. it crashes the host when the l1 guest boots the hvmloader invokes
the VGA BIOS.

> (XEN) Assertion 'pfn_to_pdx(ma>>  PAGE_SHIFT)<  (DIRECTMAP_SIZE>>
> PAGE_SHIFT)'
> failed at xen/include/asm/x86_64/page.h:99
> (XEN) ----[ Xen-4.2-unstable  x86_64  debug=y  Tainted:    C ]----
> (XEN) CPU:    1
> (XEN) RIP:    e008:[<ffff82c4801d2007>] p2m_flush_locked+0x1a8/0x2be
> (XEN) RFLAGS: 0000000000010212   CONTEXT: hypervisor
> (XEN) rax: 0000000000000000   rbx: ffff830421db1d90   rcx: 0000000000000000
> (XEN) rdx: f82f608076a60000   rsi: 000f82f608076a60   rdi: 0000000000000000
> (XEN) rbp: ffff830425217a08   rsp: ffff8304252179c8   r8:  0000000000000000
> (XEN) r9:  0000000000000004   r10: 00000000fffffffb   r11: 0000000000000003
> (XEN) r12: ffff830421db1d90   r13: ffff82f608076a60   r14: ffff830403bbebe8
> (XEN) r15: ffff830403bbe000   cr0: 000000008005003b   cr4: 00000000000406f0
> (XEN) cr3: 0000000420f2b000   cr2: 000000000045f000
> (XEN) ds: 0017   es: 0017   fs: 0000   gs: 0000   ss: e010   cs: e008
> (XEN) Xen stack trace from rsp=ffff8304252179c8:
> (XEN)    ffff830403bbebe8 ffff830421db1d90 ffff830421db1d90 0000000000000000
> (XEN)    ffff830403bbe000 ffff830403bbebe8 ffff830403bbebe8 ffff830403bbeab0
> (XEN)    ffff830425217a38 ffff82c4801d2599 0000000000000001 0000000000000067
> (XEN)    ffff830403bbeab0 ffff830403af4000 ffff830425217a88 ffff82c480208a8c
> (XEN)    0000000125941810 ffff830403bbe000 0000000000000000 ffff830425217b18
> (XEN)    ffff830421db1cc0 00000000000ff000 ffffffffffffffff 0000000000000000
> (XEN)    ffff830425217a98 ffff82c4801ce585 ffff830425217b68 ffff82c4801d4b4e
> (XEN)    0000000000000200 1000000000000000 ffff830425217b28 ffff830425217b20
> (XEN)    0000000000000001 ffff830425217ae8 ffff830425217ef8 ffff830403af4000
> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> (XEN)    0000000000000000 0000000000000000 0000000000000000 ffff830403af4000
> (XEN)    0000000000403af4 ffff82c4801d5ec1 ffff830425217bfc ffffffffffffffff
> (XEN)    0000000000000000 0000000000000001 00000000000ff000 ffff830421db1cc0
> (XEN)    ffff830425217bb8 ffff82c4801d02d1 0000000100000007 ffff830403bbe000
> (XEN)    0000000125217bc8 ffff8280019fafc0 ffff82c400cfd7e0 000000000033f5f8
> (XEN)    ffff830421db1cc0 0000000000000001 ffff830425217c28 ffff82c4801d053d
> (XEN)    ffff830425217bfc ffff830425217bf8 0000000025217bf0 00000000000ff000
> (XEN)    0000000000000001 ffff830425217c10 0000000000000007 ffff830421db1cc0
> (XEN)    ffff830421db1cc0 ffff830425217f18 ffff82c4802e1260 0000000000000000
> (XEN)    ffff830425217c78 ffff82c4801d11fa 000000000033f5f7 00000000000ff000
> (XEN) Xen call trace:
> (XEN)    [<ffff82c4801d2007>] p2m_flush_locked+0x1a8/0x2be
> (XEN)    [<ffff82c4801d2599>] p2m_flush_nestedp2m+0xea/0x140
> (XEN)    [<ffff82c480208a8c>] hap_write_p2m_entry+0x223/0x246
> (XEN)    [<ffff82c4801ce585>] paging_write_p2m_entry+0x6e/0x75
> (XEN)    [<ffff82c4801d4b4e>] p2m_set_entry+0x42b/0x6a8
> (XEN)    [<ffff82c4801d02d1>] set_p2m_entry+0xf2/0x140
> (XEN)    [<ffff82c4801d053d>] p2m_remove_page+0x1dd/0x1ec
> (XEN)    [<ffff82c4801d11fa>] guest_physmap_remove_page+0x109/0x148
> (XEN)    [<ffff82c480168000>] arch_memory_op+0x6af/0x1039
> (XEN)    [<ffff82c4801134fb>] do_memory_op+0x1bd7/0x1c2c
> (XEN)    [<ffff82c480217708>] syscall_enter+0xc8/0x122
> (XEN)
> (XEN)
> (XEN) ****************************************
>
> Christoph
>
>
>
> On 06/22/11 18:10, Tim Deegan wrote:
>> # HG changeset patch
>> # User Tim Deegan<Tim.Deegan@citrix.com>
>> # Date 1308758648 -3600
>> # Node ID c323e69a0a08ce9f1e54d2e2fa2edd9845bc8efe
>> # Parent  b7e5a25663329254cba539e21f4fbd5b32c67556
>> Nested p2m: implement "flush" as a first-class action
>> rather than using the teardown and init functions.
>> This makes the locking clearer and avoids an expensive scan of all
>> pfns that's only needed for non-nested p2ms.  It also moves the
>> tlb flush into the proper place in the flush logic, avoiding a
>> possible race against other CPUs.
>>
>> Signed-off-by: Tim Deegan<Tim.Deegan@citrix.com>
>>
>> diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/hvm/nestedhvm.c
>> --- a/xen/arch/x86/hvm/nestedhvm.c    Tue Jun 21 18:28:53 2011 +0100
>> +++ b/xen/arch/x86/hvm/nestedhvm.c    Wed Jun 22 17:04:08 2011 +0100
>> @@ -119,12 +119,6 @@ nestedhvm_vmcx_flushtlb(struct p2m_domai
>>        cpus_clear(p2m->p2m_dirty_cpumask);
>>    }
>>
>> -void
>> -nestedhvm_vmcx_flushtlbdomain(struct domain *d)
>> -{
>> -    on_selected_cpus(d->domain_dirty_cpumask, nestedhvm_flushtlb_ipi, d, 1);
>> -}
>> -
>>    bool_t
>>    nestedhvm_is_n2(struct vcpu *v)
>>    {
>> diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/mm/p2m.c
>> --- a/xen/arch/x86/mm/p2m.c   Tue Jun 21 18:28:53 2011 +0100
>> +++ b/xen/arch/x86/mm/p2m.c   Wed Jun 22 17:04:08 2011 +0100
>> @@ -1050,20 +1050,41 @@ p2m_getlru_nestedp2m(struct domain *d, s
>>        return lrup2m;
>>    }
>>
>> -static int
>> +/* Reset this p2m table to be empty */
>> +static void
>>    p2m_flush_locked(struct p2m_domain *p2m)
>>    {
>> -    ASSERT(p2m);
>> -    if (p2m->cr3 == CR3_EADDR)
>> -        /* Microoptimisation: p2m is already empty.
>> -         * =>   about 0.3% speedup of overall system performance.
>> -         */
>> -        return 0;
>> +    struct page_info *top, *pg;
>> +    struct domain *d = p2m->domain;
>> +    void *p;
>>
>> -    p2m_teardown(p2m);
>> -    p2m_initialise(p2m->domain, p2m);
>> -    p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
>> -    return p2m_alloc_table(p2m);
>> +    p2m_lock(p2m);
>> +
>> +    /* "Host" p2m tables can have shared entries&c that need a bit more
>> +     * care when discarding them */
>> +    ASSERT(p2m_is_nestedp2m(p2m));
>> +    ASSERT(page_list_empty(&p2m->pod.super));
>> +    ASSERT(page_list_empty(&p2m->pod.single));
>> +
>> +    /* This is no longer a valid nested p2m for any address space */
>> +    p2m->cr3 = CR3_EADDR;
>> +
>> +    /* Zap the top level of the trie */
>> +    top = mfn_to_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
>> +    p = map_domain_page(top);
>> +    clear_page(p);
>> +    unmap_domain_page(p);
>> +
>> +    /* Make sure nobody else is using this p2m table */
>> +    nestedhvm_vmcx_flushtlb(p2m);
>> +
>> +    /* Free the rest of the trie pages back to the paging pool */
>> +    while ( (pg = page_list_remove_head(&p2m->pages)) )
>> +        if ( pg != top )
>> +            d->arch.paging.free_page(d, pg);
>> +    page_list_add(top,&p2m->pages);
>> +
>> +    p2m_unlock(p2m);
>>    }
>>
>>    void
>> @@ -1074,9 +1095,8 @@ p2m_flush(struct vcpu *v, struct p2m_dom
>>        ASSERT(v->domain == d);
>>        vcpu_nestedhvm(v).nv_p2m = NULL;
>>        nestedp2m_lock(d);
>> -    BUG_ON(p2m_flush_locked(p2m) != 0);
>> +    p2m_flush_locked(p2m);
>>        hvm_asid_flush_vcpu(v);
>> -    nestedhvm_vmcx_flushtlb(p2m);
>>        nestedp2m_unlock(d);
>>    }
>>
>> @@ -1086,12 +1106,8 @@ p2m_flush_nestedp2m(struct domain *d)
>>        int i;
>>
>>        nestedp2m_lock(d);
>> -    for (i = 0; i<   MAX_NESTEDP2M; i++) {
>> -        struct p2m_domain *p2m = d->arch.nested_p2m[i];
>> -        BUG_ON(p2m_flush_locked(p2m) != 0);
>> -        cpus_clear(p2m->p2m_dirty_cpumask);
>> -    }
>> -    nestedhvm_vmcx_flushtlbdomain(d);
>> +    for ( i = 0; i<   MAX_NESTEDP2M; i++ )
>> +        p2m_flush_locked(d->arch.nested_p2m[i]);
>>        nestedp2m_unlock(d);
>>    }
>>
>> @@ -1104,7 +1120,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>>        volatile struct nestedvcpu *nv =&vcpu_nestedhvm(v);
>>        struct domain *d;
>>        struct p2m_domain *p2m;
>> -    int i, rv;
>> +    int i;
>>
>>        if (cr3 == 0 || cr3 == CR3_EADDR)
>>            cr3 = v->arch.hvm_vcpu.guest_cr[3];
>> @@ -1136,9 +1152,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>>         */
>>        for (i = 0; i<   MAX_NESTEDP2M; i++) {
>>            p2m = p2m_getlru_nestedp2m(d, NULL);
>> -        rv = p2m_flush_locked(p2m);
>> -        if (rv == 0)
>> -            break;
>> +        p2m_flush_locked(p2m);
>>        }
>>        nv->nv_p2m = p2m;
>>        p2m->cr3 = cr3;
>> diff -r b7e5a2566332 -r c323e69a0a08 xen/include/asm-x86/hvm/nestedhvm.h
>> --- a/xen/include/asm-x86/hvm/nestedhvm.h     Tue Jun 21 18:28:53 2011 +0100
>> +++ b/xen/include/asm-x86/hvm/nestedhvm.h     Wed Jun 22 17:04:08 2011 +0100
>> @@ -61,7 +61,6 @@ unsigned long *nestedhvm_vcpu_iomap_get(
>>        (!!vcpu_nestedhvm((v)).nv_vmswitch_in_progress)
>>
>>    void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
>> -void nestedhvm_vmcx_flushtlbdomain(struct domain *d);
>>
>>    bool_t nestedhvm_is_n2(struct vcpu *v);
>>
>>

-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action
  2011-06-23 12:56     ` Christoph Egger
@ 2011-06-23 15:04       ` Christoph Egger
  2011-06-23 15:21         ` Christoph Egger
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Egger @ 2011-06-23 15:04 UTC (permalink / raw)
  To: Tim Deegan, xen-devel


I have a fix for this crash. See inline.

Christoph


On 06/23/11 14:56, Christoph Egger wrote:
> On 06/23/11 14:50, Christoph Egger wrote:
>>
>> This patch crashes the host (and it doesn't disappear with the other
>> patches applied):
>
> err.. it crashes the host when the l1 guest boots the hvmloader invokes
> the VGA BIOS.
>
>> (XEN) Assertion 'pfn_to_pdx(ma>>   PAGE_SHIFT)<   (DIRECTMAP_SIZE>>
>> PAGE_SHIFT)'
>> failed at xen/include/asm/x86_64/page.h:99
>> (XEN) ----[ Xen-4.2-unstable  x86_64  debug=y  Tainted:    C ]----
>> (XEN) CPU:    1
>> (XEN) RIP:    e008:[<ffff82c4801d2007>] p2m_flush_locked+0x1a8/0x2be
>> (XEN) RFLAGS: 0000000000010212   CONTEXT: hypervisor
>> (XEN) rax: 0000000000000000   rbx: ffff830421db1d90   rcx: 0000000000000000
>> (XEN) rdx: f82f608076a60000   rsi: 000f82f608076a60   rdi: 0000000000000000
>> (XEN) rbp: ffff830425217a08   rsp: ffff8304252179c8   r8:  0000000000000000
>> (XEN) r9:  0000000000000004   r10: 00000000fffffffb   r11: 0000000000000003
>> (XEN) r12: ffff830421db1d90   r13: ffff82f608076a60   r14: ffff830403bbebe8
>> (XEN) r15: ffff830403bbe000   cr0: 000000008005003b   cr4: 00000000000406f0
>> (XEN) cr3: 0000000420f2b000   cr2: 000000000045f000
>> (XEN) ds: 0017   es: 0017   fs: 0000   gs: 0000   ss: e010   cs: e008
>> (XEN) Xen stack trace from rsp=ffff8304252179c8:
>> (XEN)    ffff830403bbebe8 ffff830421db1d90 ffff830421db1d90 0000000000000000
>> (XEN)    ffff830403bbe000 ffff830403bbebe8 ffff830403bbebe8 ffff830403bbeab0
>> (XEN)    ffff830425217a38 ffff82c4801d2599 0000000000000001 0000000000000067
>> (XEN)    ffff830403bbeab0 ffff830403af4000 ffff830425217a88 ffff82c480208a8c
>> (XEN)    0000000125941810 ffff830403bbe000 0000000000000000 ffff830425217b18
>> (XEN)    ffff830421db1cc0 00000000000ff000 ffffffffffffffff 0000000000000000
>> (XEN)    ffff830425217a98 ffff82c4801ce585 ffff830425217b68 ffff82c4801d4b4e
>> (XEN)    0000000000000200 1000000000000000 ffff830425217b28 ffff830425217b20
>> (XEN)    0000000000000001 ffff830425217ae8 ffff830425217ef8 ffff830403af4000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>> (XEN)    0000000000000000 0000000000000000 0000000000000000 ffff830403af4000
>> (XEN)    0000000000403af4 ffff82c4801d5ec1 ffff830425217bfc ffffffffffffffff
>> (XEN)    0000000000000000 0000000000000001 00000000000ff000 ffff830421db1cc0
>> (XEN)    ffff830425217bb8 ffff82c4801d02d1 0000000100000007 ffff830403bbe000
>> (XEN)    0000000125217bc8 ffff8280019fafc0 ffff82c400cfd7e0 000000000033f5f8
>> (XEN)    ffff830421db1cc0 0000000000000001 ffff830425217c28 ffff82c4801d053d
>> (XEN)    ffff830425217bfc ffff830425217bf8 0000000025217bf0 00000000000ff000
>> (XEN)    0000000000000001 ffff830425217c10 0000000000000007 ffff830421db1cc0
>> (XEN)    ffff830421db1cc0 ffff830425217f18 ffff82c4802e1260 0000000000000000
>> (XEN)    ffff830425217c78 ffff82c4801d11fa 000000000033f5f7 00000000000ff000
>> (XEN) Xen call trace:
>> (XEN)    [<ffff82c4801d2007>] p2m_flush_locked+0x1a8/0x2be
>> (XEN)    [<ffff82c4801d2599>] p2m_flush_nestedp2m+0xea/0x140
>> (XEN)    [<ffff82c480208a8c>] hap_write_p2m_entry+0x223/0x246
>> (XEN)    [<ffff82c4801ce585>] paging_write_p2m_entry+0x6e/0x75
>> (XEN)    [<ffff82c4801d4b4e>] p2m_set_entry+0x42b/0x6a8
>> (XEN)    [<ffff82c4801d02d1>] set_p2m_entry+0xf2/0x140
>> (XEN)    [<ffff82c4801d053d>] p2m_remove_page+0x1dd/0x1ec
>> (XEN)    [<ffff82c4801d11fa>] guest_physmap_remove_page+0x109/0x148
>> (XEN)    [<ffff82c480168000>] arch_memory_op+0x6af/0x1039
>> (XEN)    [<ffff82c4801134fb>] do_memory_op+0x1bd7/0x1c2c
>> (XEN)    [<ffff82c480217708>] syscall_enter+0xc8/0x122
>> (XEN)
>> (XEN)
>> (XEN) ****************************************
>>
>> Christoph
>>
>>
>>
>> On 06/22/11 18:10, Tim Deegan wrote:
>>> # HG changeset patch
>>> # User Tim Deegan<Tim.Deegan@citrix.com>
>>> # Date 1308758648 -3600
>>> # Node ID c323e69a0a08ce9f1e54d2e2fa2edd9845bc8efe
>>> # Parent  b7e5a25663329254cba539e21f4fbd5b32c67556
>>> Nested p2m: implement "flush" as a first-class action
>>> rather than using the teardown and init functions.
>>> This makes the locking clearer and avoids an expensive scan of all
>>> pfns that's only needed for non-nested p2ms.  It also moves the
>>> tlb flush into the proper place in the flush logic, avoiding a
>>> possible race against other CPUs.
>>>
>>> Signed-off-by: Tim Deegan<Tim.Deegan@citrix.com>
>>>
>>> diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/hvm/nestedhvm.c
>>> --- a/xen/arch/x86/hvm/nestedhvm.c    Tue Jun 21 18:28:53 2011 +0100
>>> +++ b/xen/arch/x86/hvm/nestedhvm.c    Wed Jun 22 17:04:08 2011 +0100
>>> @@ -119,12 +119,6 @@ nestedhvm_vmcx_flushtlb(struct p2m_domai
>>>         cpus_clear(p2m->p2m_dirty_cpumask);
>>>     }
>>>
>>> -void
>>> -nestedhvm_vmcx_flushtlbdomain(struct domain *d)
>>> -{
>>> -    on_selected_cpus(d->domain_dirty_cpumask, nestedhvm_flushtlb_ipi, d, 1);
>>> -}
>>> -
>>>     bool_t
>>>     nestedhvm_is_n2(struct vcpu *v)
>>>     {
>>> diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/mm/p2m.c
>>> --- a/xen/arch/x86/mm/p2m.c   Tue Jun 21 18:28:53 2011 +0100
>>> +++ b/xen/arch/x86/mm/p2m.c   Wed Jun 22 17:04:08 2011 +0100
>>> @@ -1050,20 +1050,41 @@ p2m_getlru_nestedp2m(struct domain *d, s
>>>         return lrup2m;
>>>     }
>>>
>>> -static int
>>> +/* Reset this p2m table to be empty */
>>> +static void
>>>     p2m_flush_locked(struct p2m_domain *p2m)
>>>     {
>>> -    ASSERT(p2m);
>>> -    if (p2m->cr3 == CR3_EADDR)
>>> -        /* Microoptimisation: p2m is already empty.
>>> -         * =>    about 0.3% speedup of overall system performance.
>>> -         */
>>> -        return 0;
>>> +    struct page_info *top, *pg;
>>> +    struct domain *d = p2m->domain;
>>> +    void *p;
+ mfn_t top_mfn;

>>>
>>> -    p2m_teardown(p2m);
>>> -    p2m_initialise(p2m->domain, p2m);
>>> -    p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
>>> -    return p2m_alloc_table(p2m);
>>> +    p2m_lock(p2m);
>>> +
>>> +    /* "Host" p2m tables can have shared entries&c that need a bit more
>>> +     * care when discarding them */
>>> +    ASSERT(p2m_is_nestedp2m(p2m));
>>> +    ASSERT(page_list_empty(&p2m->pod.super));
>>> +    ASSERT(page_list_empty(&p2m->pod.single));
>>> +
>>> +    /* This is no longer a valid nested p2m for any address space */
>>> +    p2m->cr3 = CR3_EADDR;
>>> +
>>> +    /* Zap the top level of the trie */

+    top_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
+    top = mfn_to_page(top_mfn);
+    p = map_domain_page(mfn_x(top_mfn));

>>> +    clear_page(p);
>>> +    unmap_domain_page(p);
>>> +
>>> +    /* Make sure nobody else is using this p2m table */
>>> +    nestedhvm_vmcx_flushtlb(p2m);
>>> +
>>> +    /* Free the rest of the trie pages back to the paging pool */
>>> +    while ( (pg = page_list_remove_head(&p2m->pages)) )
>>> +        if ( pg != top )
>>> +            d->arch.paging.free_page(d, pg);
>>> +    page_list_add(top,&p2m->pages);
>>> +
>>> +    p2m_unlock(p2m);
>>>     }
>>>
>>>     void
>>> @@ -1074,9 +1095,8 @@ p2m_flush(struct vcpu *v, struct p2m_dom
>>>         ASSERT(v->domain == d);
>>>         vcpu_nestedhvm(v).nv_p2m = NULL;
>>>         nestedp2m_lock(d);
>>> -    BUG_ON(p2m_flush_locked(p2m) != 0);
>>> +    p2m_flush_locked(p2m);
>>>         hvm_asid_flush_vcpu(v);
>>> -    nestedhvm_vmcx_flushtlb(p2m);
>>>         nestedp2m_unlock(d);
>>>     }
>>>
>>> @@ -1086,12 +1106,8 @@ p2m_flush_nestedp2m(struct domain *d)
>>>         int i;
>>>
>>>         nestedp2m_lock(d);
>>> -    for (i = 0; i<    MAX_NESTEDP2M; i++) {
>>> -        struct p2m_domain *p2m = d->arch.nested_p2m[i];
>>> -        BUG_ON(p2m_flush_locked(p2m) != 0);
>>> -        cpus_clear(p2m->p2m_dirty_cpumask);
>>> -    }
>>> -    nestedhvm_vmcx_flushtlbdomain(d);
>>> +    for ( i = 0; i<    MAX_NESTEDP2M; i++ )
>>> +        p2m_flush_locked(d->arch.nested_p2m[i]);
>>>         nestedp2m_unlock(d);
>>>     }
>>>
>>> @@ -1104,7 +1120,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>>>         volatile struct nestedvcpu *nv =&vcpu_nestedhvm(v);
>>>         struct domain *d;
>>>         struct p2m_domain *p2m;
>>> -    int i, rv;
>>> +    int i;
>>>
>>>         if (cr3 == 0 || cr3 == CR3_EADDR)
>>>             cr3 = v->arch.hvm_vcpu.guest_cr[3];
>>> @@ -1136,9 +1152,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>>>          */
>>>         for (i = 0; i<    MAX_NESTEDP2M; i++) {
>>>             p2m = p2m_getlru_nestedp2m(d, NULL);
>>> -        rv = p2m_flush_locked(p2m);
>>> -        if (rv == 0)
>>> -            break;
>>> +        p2m_flush_locked(p2m);
>>>         }
>>>         nv->nv_p2m = p2m;
>>>         p2m->cr3 = cr3;
>>> diff -r b7e5a2566332 -r c323e69a0a08 xen/include/asm-x86/hvm/nestedhvm.h
>>> --- a/xen/include/asm-x86/hvm/nestedhvm.h     Tue Jun 21 18:28:53 2011 +0100
>>> +++ b/xen/include/asm-x86/hvm/nestedhvm.h     Wed Jun 22 17:04:08 2011 +0100
>>> @@ -61,7 +61,6 @@ unsigned long *nestedhvm_vcpu_iomap_get(
>>>         (!!vcpu_nestedhvm((v)).nv_vmswitch_in_progress)
>>>
>>>     void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
>>> -void nestedhvm_vmcx_flushtlbdomain(struct domain *d);
>>>
>>>     bool_t nestedhvm_is_n2(struct vcpu *v);
>>>
>>>

-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4 of 4] Nested p2m: rework locking around nested-p2m flushes and updates
  2011-06-22 16:10 ` [PATCH 4 of 4] Nested p2m: rework locking around nested-p2m flushes and updates Tim Deegan
@ 2011-06-23 15:08   ` Christoph Egger
  2011-06-24 10:45     ` Tim Deegan
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Egger @ 2011-06-23 15:08 UTC (permalink / raw)
  To: Tim Deegan; +Cc: xen-devel

On 06/22/11 18:10, Tim Deegan wrote:
> # HG changeset patch
> # User Tim Deegan<Tim.Deegan@citrix.com>
> # Date 1308758840 -3600
> # Node ID a168af9b6d2f604c841465e5ed911b7a12b5ba15
> # Parent  b265371addbbc8a58c95a269fe3cd0fdc866aaa3
> Nested p2m: rework locking around nested-p2m flushes and updates.
>
> The nestedp2m_lock now only covers the mapping from nested-cr3 to
> nested-p2m; the tables themselves may be updated or flushed using only
> the relevant p2m lock.
>
> This means that the nested-p2m lock is only taken on one path, and
> always before any p2m locks.
>
> Signed-off-by: Tim Deegan<Tim.Deegan@citrix.com>
>
> diff -r b265371addbb -r a168af9b6d2f xen/arch/x86/mm/hap/nested_hap.c
> --- a/xen/arch/x86/mm/hap/nested_hap.c	Wed Jun 22 17:04:08 2011 +0100
> +++ b/xen/arch/x86/mm/hap/nested_hap.c	Wed Jun 22 17:07:20 2011 +0100
> @@ -96,17 +96,23 @@ nestedp2m_write_p2m_entry(struct p2m_dom
>   /*          NESTED VIRT FUNCTIONS           */
>   /********************************************/
>   static void
> -nestedhap_fix_p2m(struct p2m_domain *p2m, paddr_t L2_gpa, paddr_t L0_gpa,
> -    p2m_type_t p2mt, p2m_access_t p2ma)
> +nestedhap_fix_p2m(struct vcpu *v, struct p2m_domain *p2m,
> +                  paddr_t L2_gpa, paddr_t L0_gpa,
> +                  p2m_type_t p2mt, p2m_access_t p2ma)
>   {
> -    int rv;
> +    int rv = 0;
>       ASSERT(p2m);
>       ASSERT(p2m->set_entry);
>
>       p2m_lock(p2m);
> -    rv = set_p2m_entry(p2m, L2_gpa>>  PAGE_SHIFT,
> -                         page_to_mfn(maddr_to_page(L0_gpa)),
> -                         0 /*4K*/, p2mt, p2ma);
> +
> +    /* If this p2m table has been flushed or recycled under our feet,
> +     * leave it alone.  We'll pick up the right one as we try to
> +     * vmenter the guest. */
> +    if ( p2m->cr3 == nhvm_vcpu_hostcr3(v) )
> +         rv = set_p2m_entry(p2m, L2_gpa>>  PAGE_SHIFT,
> +                            page_to_mfn(maddr_to_page(L0_gpa)),
> +                            0 /*4K*/, p2mt, p2ma);

The introduction of this check leads to this crash when the L2 guest
initializes its third vcpu:


(XEN) nested_hap.c:121:d1 failed to set entry for 0xebcff0 -> 0x2654bcff0
(XEN) Xen BUG at nested_hap.c:122
(XEN) ----[ Xen-4.2-unstable  x86_64  debug=y  Tainted:    C ]----
(XEN) CPU:    1
(XEN) RIP:    e008:[<ffff82c48020aa70>] 
nestedhvm_hap_nested_page_fault+0x27f/0x29d
(XEN) RFLAGS: 0000000000010292   CONTEXT: hypervisor
(XEN) rax: 0000000000000000   rbx: 0000000000000000   rcx: 0000000000000001
(XEN) rdx: ffff82c48025b2e8   rsi: 000000000000000a   rdi: ffff82c48025b2e8
(XEN) rbp: ffff830425217cf8   rsp: ffff830425217ca8   r8:  0000000000000001
(XEN) r9:  0000000000000004   r10: 0000000000000004   r11: 0000000000000004
(XEN) r12: ffff8300cf2a1000   r13: ffff830421db1d90   r14: ffff830421db1d90
(XEN) r15: ffff830421db1d90   cr0: 000000008005003b   cr4: 00000000000406f0
(XEN) cr3: 0000000403b02000   cr2: 0000000000000000
(XEN) ds: 0000   es: 0000   fs: 0010   gs: 0010   ss: 0000   cs: e008
(XEN) Xen stack trace from rsp=ffff830425217ca8:
(XEN)    ffff8300cf2a1000 0000000000ebcff0 00000002654bcff0 00000002654bcff0
(XEN)    00000000d98bcff0 0000000000000000 ffff8300cf2a1000 0000000000ebcff0
(XEN)    ffff8300cf2a1000 0000000000ebcff0 ffff830425217d58 ffff82c4801aa78d
(XEN)    00000000cf2a1000 ffffffffffffffff 00ff830425217d28 ffff82c4801a66ad
(XEN)    ffff830425217d58 0000000000000000 0000000000000ebc 0000000000ebcff0
(XEN)    ffff8300cf2a1000 ffff83023ec07000 ffff830425217f08 ffff82c4801c0faa
(XEN)    ffff830400000000 ffff82c48017638e ffff830425217d90 ffff830421db1d90
(XEN)    ffff830425217f18<G><0>nested_hap.c:121:d1 failed to set entry 
for 0x7ff8 ->
0x2644d1ff8
(XEN)  0100000000000293Xen BUG at nested_hap.c:122

Christoph



>       p2m_unlock(p2m);
>
>       if (rv == 0) {
> @@ -211,12 +217,10 @@ nestedhvm_hap_nested_page_fault(struct v
>           break;
>       }
>
> -    nestedp2m_lock(d);
>       /* fix p2m_get_pagetable(nested_p2m) */
> -    nestedhap_fix_p2m(nested_p2m, L2_gpa, L0_gpa,
> +    nestedhap_fix_p2m(v, nested_p2m, L2_gpa, L0_gpa,
>           p2m_ram_rw,
>           p2m_access_rwx /* FIXME: Should use same permission as l1 guest */);
> -    nestedp2m_unlock(d);
>
>       return NESTEDHVM_PAGEFAULT_DONE;
>   }
> diff -r b265371addbb -r a168af9b6d2f xen/arch/x86/mm/mm-locks.h
> --- a/xen/arch/x86/mm/mm-locks.h	Wed Jun 22 17:04:08 2011 +0100
> +++ b/xen/arch/x86/mm/mm-locks.h	Wed Jun 22 17:07:20 2011 +0100
> @@ -96,8 +96,11 @@ declare_mm_lock(shr)
>
>   /* Nested P2M lock (per-domain)
>    *
> - * A per-domain lock that protects some of the nested p2m datastructures.
> - * TODO: find out exactly what needs to be covered by this lock */
> + * A per-domain lock that protects the mapping from nested-CR3 to
> + * nested-p2m.  In particular it covers:
> + * - the array of nested-p2m tables, and all LRU activity therein; and
> + * - setting the "cr3" field of any p2m table to a non-CR3_EADDR value.
> + *   (i.e. assigning a p2m table to be the shadow of that cr3 */
>
>   declare_mm_lock(nestedp2m)
>   #define nestedp2m_lock(d)   mm_lock(nestedp2m,&(d)->arch.nested_p2m_lock)
> diff -r b265371addbb -r a168af9b6d2f xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
> +++ b/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:07:20 2011 +0100
> @@ -1052,7 +1052,7 @@ p2m_getlru_nestedp2m(struct domain *d, s
>
>   /* Reset this p2m table to be empty */
>   static void
> -p2m_flush_locked(struct p2m_domain *p2m)
> +p2m_flush_table(struct p2m_domain *p2m)
>   {
>       struct page_info *top, *pg;
>       struct domain *d = p2m->domain;
> @@ -1094,21 +1094,16 @@ p2m_flush(struct vcpu *v, struct p2m_dom
>
>       ASSERT(v->domain == d);
>       vcpu_nestedhvm(v).nv_p2m = NULL;
> -    nestedp2m_lock(d);
> -    p2m_flush_locked(p2m);
> +    p2m_flush_table(p2m);
>       hvm_asid_flush_vcpu(v);
> -    nestedp2m_unlock(d);
>   }
>
>   void
>   p2m_flush_nestedp2m(struct domain *d)
>   {
>       int i;
> -
> -    nestedp2m_lock(d);
>       for ( i = 0; i<  MAX_NESTEDP2M; i++ )
> -        p2m_flush_locked(d->arch.nested_p2m[i]);
> -    nestedp2m_unlock(d);
> +        p2m_flush_table(d->arch.nested_p2m[i]);
>   }
>
>   struct p2m_domain *
> @@ -1132,17 +1127,23 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>       d = v->domain;
>       nestedp2m_lock(d);
>       p2m = nv->nv_p2m;
> -    if ( p2m&&  (p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR) )
> +    if ( p2m )
>       {
> -        nv->nv_flushp2m = 0;
> -        p2m_getlru_nestedp2m(d, p2m);
> -        nv->nv_p2m = p2m;
> -        if (p2m->cr3 == CR3_EADDR)
> -            hvm_asid_flush_vcpu(v);
> -        p2m->cr3 = cr3;
> -        cpu_set(v->processor, p2m->p2m_dirty_cpumask);
> -        nestedp2m_unlock(d);
> -        return p2m;
> +        p2m_lock(p2m);
> +        if ( p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR )
> +        {
> +            nv->nv_flushp2m = 0;
> +            p2m_getlru_nestedp2m(d, p2m);
> +            nv->nv_p2m = p2m;
> +            if (p2m->cr3 == CR3_EADDR)
> +                hvm_asid_flush_vcpu(v);
> +            p2m->cr3 = cr3;
> +            cpu_set(v->processor, p2m->p2m_dirty_cpumask);
> +            p2m_unlock(p2m);
> +            nestedp2m_unlock(d);
> +            return p2m;
> +        }
> +        p2m_unlock(p2m)
>       }
>
>       /* All p2m's are or were in use. Take the least recent used one,
> @@ -1150,14 +1151,16 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>        */
>       for (i = 0; i<  MAX_NESTEDP2M; i++) {
>           p2m = p2m_getlru_nestedp2m(d, NULL);
> -        p2m_flush_locked(p2m);
> +        p2m_flush_table(p2m);
>       }
> +    p2m_lock(p2m);
>       nv->nv_p2m = p2m;
>       p2m->cr3 = cr3;
>       nv->nv_flushp2m = 0;
>       hvm_asid_flush_vcpu(v);
>       nestedhvm_vmcx_flushtlb(nv->nv_p2m);
>       cpu_set(v->processor, p2m->p2m_dirty_cpumask);
> +    p2m_unlock(p2m);
>       nestedp2m_unlock(d);
>
>       return p2m;
> diff -r b265371addbb -r a168af9b6d2f xen/include/asm-x86/p2m.h
> --- a/xen/include/asm-x86/p2m.h	Wed Jun 22 17:04:08 2011 +0100
> +++ b/xen/include/asm-x86/p2m.h	Wed Jun 22 17:07:20 2011 +0100
> @@ -201,8 +201,13 @@ struct p2m_domain {
>       cpumask_t          p2m_dirty_cpumask;
>
>       struct domain     *domain;   /* back pointer to domain */
> +
> +    /* Nested p2ms only: nested-CR3 value that this p2m shadows.
> +     * This can be cleared to CR3_EADDR under the per-p2m lock but
> +     * needs both the per-p2m lock and the per-domain nestedp2m lock
> +     * to set it to any other value. */
>   #define CR3_EADDR     (~0ULL)
> -    uint64_t           cr3;      /* to identify this p2m for re-use */
> +    uint64_t           cr3;
>
>       /* Pages used to construct the p2m */
>       struct page_list_head pages;
>


-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action
  2011-06-23 15:04       ` Christoph Egger
@ 2011-06-23 15:21         ` Christoph Egger
  2011-06-24  9:07           ` Tim Deegan
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Egger @ 2011-06-23 15:21 UTC (permalink / raw)
  To: Tim Deegan, xen-devel


Now I see a significant slowdown of the L2 guest with multiple vcpus.
The reason is I see 10 times more IPIs for each vcpu with
p2m_flush_nestedp2m().

Please add back nestedhvm_vmcx_flushtlbdomain().

Christoph


On 06/23/11 17:04, Christoph Egger wrote:
>
> I have a fix for this crash. See inline.
>
> Christoph
>
>
> On 06/23/11 14:56, Christoph Egger wrote:
>> On 06/23/11 14:50, Christoph Egger wrote:
>>>
>>> This patch crashes the host (and it doesn't disappear with the other
>>> patches applied):
>>
>> err.. it crashes the host when the l1 guest boots the hvmloader invokes
>> the VGA BIOS.
>>
>>> (XEN) Assertion 'pfn_to_pdx(ma>>    PAGE_SHIFT)<    (DIRECTMAP_SIZE>>
>>> PAGE_SHIFT)'
>>> failed at xen/include/asm/x86_64/page.h:99
>>> (XEN) ----[ Xen-4.2-unstable  x86_64  debug=y  Tainted:    C ]----
>>> (XEN) CPU:    1
>>> (XEN) RIP:    e008:[<ffff82c4801d2007>] p2m_flush_locked+0x1a8/0x2be
>>> (XEN) RFLAGS: 0000000000010212   CONTEXT: hypervisor
>>> (XEN) rax: 0000000000000000   rbx: ffff830421db1d90   rcx: 0000000000000000
>>> (XEN) rdx: f82f608076a60000   rsi: 000f82f608076a60   rdi: 0000000000000000
>>> (XEN) rbp: ffff830425217a08   rsp: ffff8304252179c8   r8:  0000000000000000
>>> (XEN) r9:  0000000000000004   r10: 00000000fffffffb   r11: 0000000000000003
>>> (XEN) r12: ffff830421db1d90   r13: ffff82f608076a60   r14: ffff830403bbebe8
>>> (XEN) r15: ffff830403bbe000   cr0: 000000008005003b   cr4: 00000000000406f0
>>> (XEN) cr3: 0000000420f2b000   cr2: 000000000045f000
>>> (XEN) ds: 0017   es: 0017   fs: 0000   gs: 0000   ss: e010   cs: e008
>>> (XEN) Xen stack trace from rsp=ffff8304252179c8:
>>> (XEN)    ffff830403bbebe8 ffff830421db1d90 ffff830421db1d90 0000000000000000
>>> (XEN)    ffff830403bbe000 ffff830403bbebe8 ffff830403bbebe8 ffff830403bbeab0
>>> (XEN)    ffff830425217a38 ffff82c4801d2599 0000000000000001 0000000000000067
>>> (XEN)    ffff830403bbeab0 ffff830403af4000 ffff830425217a88 ffff82c480208a8c
>>> (XEN)    0000000125941810 ffff830403bbe000 0000000000000000 ffff830425217b18
>>> (XEN)    ffff830421db1cc0 00000000000ff000 ffffffffffffffff 0000000000000000
>>> (XEN)    ffff830425217a98 ffff82c4801ce585 ffff830425217b68 ffff82c4801d4b4e
>>> (XEN)    0000000000000200 1000000000000000 ffff830425217b28 ffff830425217b20
>>> (XEN)    0000000000000001 ffff830425217ae8 ffff830425217ef8 ffff830403af4000
>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
>>> (XEN)    0000000000000000 0000000000000000 0000000000000000 ffff830403af4000
>>> (XEN)    0000000000403af4 ffff82c4801d5ec1 ffff830425217bfc ffffffffffffffff
>>> (XEN)    0000000000000000 0000000000000001 00000000000ff000 ffff830421db1cc0
>>> (XEN)    ffff830425217bb8 ffff82c4801d02d1 0000000100000007 ffff830403bbe000
>>> (XEN)    0000000125217bc8 ffff8280019fafc0 ffff82c400cfd7e0 000000000033f5f8
>>> (XEN)    ffff830421db1cc0 0000000000000001 ffff830425217c28 ffff82c4801d053d
>>> (XEN)    ffff830425217bfc ffff830425217bf8 0000000025217bf0 00000000000ff000
>>> (XEN)    0000000000000001 ffff830425217c10 0000000000000007 ffff830421db1cc0
>>> (XEN)    ffff830421db1cc0 ffff830425217f18 ffff82c4802e1260 0000000000000000
>>> (XEN)    ffff830425217c78 ffff82c4801d11fa 000000000033f5f7 00000000000ff000
>>> (XEN) Xen call trace:
>>> (XEN)    [<ffff82c4801d2007>] p2m_flush_locked+0x1a8/0x2be
>>> (XEN)    [<ffff82c4801d2599>] p2m_flush_nestedp2m+0xea/0x140
>>> (XEN)    [<ffff82c480208a8c>] hap_write_p2m_entry+0x223/0x246
>>> (XEN)    [<ffff82c4801ce585>] paging_write_p2m_entry+0x6e/0x75
>>> (XEN)    [<ffff82c4801d4b4e>] p2m_set_entry+0x42b/0x6a8
>>> (XEN)    [<ffff82c4801d02d1>] set_p2m_entry+0xf2/0x140
>>> (XEN)    [<ffff82c4801d053d>] p2m_remove_page+0x1dd/0x1ec
>>> (XEN)    [<ffff82c4801d11fa>] guest_physmap_remove_page+0x109/0x148
>>> (XEN)    [<ffff82c480168000>] arch_memory_op+0x6af/0x1039
>>> (XEN)    [<ffff82c4801134fb>] do_memory_op+0x1bd7/0x1c2c
>>> (XEN)    [<ffff82c480217708>] syscall_enter+0xc8/0x122
>>> (XEN)
>>> (XEN)
>>> (XEN) ****************************************
>>>
>>> Christoph
>>>
>>>
>>>
>>> On 06/22/11 18:10, Tim Deegan wrote:
>>>> # HG changeset patch
>>>> # User Tim Deegan<Tim.Deegan@citrix.com>
>>>> # Date 1308758648 -3600
>>>> # Node ID c323e69a0a08ce9f1e54d2e2fa2edd9845bc8efe
>>>> # Parent  b7e5a25663329254cba539e21f4fbd5b32c67556
>>>> Nested p2m: implement "flush" as a first-class action
>>>> rather than using the teardown and init functions.
>>>> This makes the locking clearer and avoids an expensive scan of all
>>>> pfns that's only needed for non-nested p2ms.  It also moves the
>>>> tlb flush into the proper place in the flush logic, avoiding a
>>>> possible race against other CPUs.
>>>>
>>>> Signed-off-by: Tim Deegan<Tim.Deegan@citrix.com>
>>>>
>>>> diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/hvm/nestedhvm.c
>>>> --- a/xen/arch/x86/hvm/nestedhvm.c    Tue Jun 21 18:28:53 2011 +0100
>>>> +++ b/xen/arch/x86/hvm/nestedhvm.c    Wed Jun 22 17:04:08 2011 +0100
>>>> @@ -119,12 +119,6 @@ nestedhvm_vmcx_flushtlb(struct p2m_domai
>>>>          cpus_clear(p2m->p2m_dirty_cpumask);
>>>>      }
>>>>
>>>> -void
>>>> -nestedhvm_vmcx_flushtlbdomain(struct domain *d)
>>>> -{
>>>> -    on_selected_cpus(d->domain_dirty_cpumask, nestedhvm_flushtlb_ipi, d, 1);
>>>> -}
>>>> -
>>>>      bool_t
>>>>      nestedhvm_is_n2(struct vcpu *v)
>>>>      {
>>>> diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/mm/p2m.c
>>>> --- a/xen/arch/x86/mm/p2m.c   Tue Jun 21 18:28:53 2011 +0100
>>>> +++ b/xen/arch/x86/mm/p2m.c   Wed Jun 22 17:04:08 2011 +0100
>>>> @@ -1050,20 +1050,41 @@ p2m_getlru_nestedp2m(struct domain *d, s
>>>>          return lrup2m;
>>>>      }
>>>>
>>>> -static int
>>>> +/* Reset this p2m table to be empty */
>>>> +static void
>>>>      p2m_flush_locked(struct p2m_domain *p2m)
>>>>      {
>>>> -    ASSERT(p2m);
>>>> -    if (p2m->cr3 == CR3_EADDR)
>>>> -        /* Microoptimisation: p2m is already empty.
>>>> -         * =>     about 0.3% speedup of overall system performance.
>>>> -         */
>>>> -        return 0;
>>>> +    struct page_info *top, *pg;
>>>> +    struct domain *d = p2m->domain;
>>>> +    void *p;
> + mfn_t top_mfn;
>
>>>>
>>>> -    p2m_teardown(p2m);
>>>> -    p2m_initialise(p2m->domain, p2m);
>>>> -    p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
>>>> -    return p2m_alloc_table(p2m);
>>>> +    p2m_lock(p2m);
>>>> +
>>>> +    /* "Host" p2m tables can have shared entries&c that need a bit more
>>>> +     * care when discarding them */
>>>> +    ASSERT(p2m_is_nestedp2m(p2m));
>>>> +    ASSERT(page_list_empty(&p2m->pod.super));
>>>> +    ASSERT(page_list_empty(&p2m->pod.single));
>>>> +
>>>> +    /* This is no longer a valid nested p2m for any address space */
>>>> +    p2m->cr3 = CR3_EADDR;
>>>> +
>>>> +    /* Zap the top level of the trie */
>
> +    top_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
> +    top = mfn_to_page(top_mfn);
> +    p = map_domain_page(mfn_x(top_mfn));
>
>>>> +    clear_page(p);
>>>> +    unmap_domain_page(p);
>>>> +
>>>> +    /* Make sure nobody else is using this p2m table */
>>>> +    nestedhvm_vmcx_flushtlb(p2m);
>>>> +
>>>> +    /* Free the rest of the trie pages back to the paging pool */
>>>> +    while ( (pg = page_list_remove_head(&p2m->pages)) )
>>>> +        if ( pg != top )
>>>> +            d->arch.paging.free_page(d, pg);
>>>> +    page_list_add(top,&p2m->pages);
>>>> +
>>>> +    p2m_unlock(p2m);
>>>>      }
>>>>
>>>>      void
>>>> @@ -1074,9 +1095,8 @@ p2m_flush(struct vcpu *v, struct p2m_dom
>>>>          ASSERT(v->domain == d);
>>>>          vcpu_nestedhvm(v).nv_p2m = NULL;
>>>>          nestedp2m_lock(d);
>>>> -    BUG_ON(p2m_flush_locked(p2m) != 0);
>>>> +    p2m_flush_locked(p2m);
>>>>          hvm_asid_flush_vcpu(v);
>>>> -    nestedhvm_vmcx_flushtlb(p2m);
>>>>          nestedp2m_unlock(d);
>>>>      }
>>>>
>>>> @@ -1086,12 +1106,8 @@ p2m_flush_nestedp2m(struct domain *d)
>>>>          int i;
>>>>
>>>>          nestedp2m_lock(d);
>>>> -    for (i = 0; i<     MAX_NESTEDP2M; i++) {
>>>> -        struct p2m_domain *p2m = d->arch.nested_p2m[i];
>>>> -        BUG_ON(p2m_flush_locked(p2m) != 0);
>>>> -        cpus_clear(p2m->p2m_dirty_cpumask);
>>>> -    }
>>>> -    nestedhvm_vmcx_flushtlbdomain(d);
>>>> +    for ( i = 0; i<     MAX_NESTEDP2M; i++ )
>>>> +        p2m_flush_locked(d->arch.nested_p2m[i]);
>>>>          nestedp2m_unlock(d);
>>>>      }
>>>>
>>>> @@ -1104,7 +1120,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>>>>          volatile struct nestedvcpu *nv =&vcpu_nestedhvm(v);
>>>>          struct domain *d;
>>>>          struct p2m_domain *p2m;
>>>> -    int i, rv;
>>>> +    int i;
>>>>
>>>>          if (cr3 == 0 || cr3 == CR3_EADDR)
>>>>              cr3 = v->arch.hvm_vcpu.guest_cr[3];
>>>> @@ -1136,9 +1152,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>>>>           */
>>>>          for (i = 0; i<     MAX_NESTEDP2M; i++) {
>>>>              p2m = p2m_getlru_nestedp2m(d, NULL);
>>>> -        rv = p2m_flush_locked(p2m);
>>>> -        if (rv == 0)
>>>> -            break;
>>>> +        p2m_flush_locked(p2m);
>>>>          }
>>>>          nv->nv_p2m = p2m;
>>>>          p2m->cr3 = cr3;
>>>> diff -r b7e5a2566332 -r c323e69a0a08 xen/include/asm-x86/hvm/nestedhvm.h
>>>> --- a/xen/include/asm-x86/hvm/nestedhvm.h     Tue Jun 21 18:28:53 2011 +0100
>>>> +++ b/xen/include/asm-x86/hvm/nestedhvm.h     Wed Jun 22 17:04:08 2011 +0100
>>>> @@ -61,7 +61,6 @@ unsigned long *nestedhvm_vcpu_iomap_get(
>>>>          (!!vcpu_nestedhvm((v)).nv_vmswitch_in_progress)
>>>>
>>>>      void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
>>>> -void nestedhvm_vmcx_flushtlbdomain(struct domain *d);
>>>>
>>>>      bool_t nestedhvm_is_n2(struct vcpu *v);
>>>>
>>>>


-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action
  2011-06-23 15:21         ` Christoph Egger
@ 2011-06-24  9:07           ` Tim Deegan
  0 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-24  9:07 UTC (permalink / raw)
  To: Christoph Egger; +Cc: xen-devel

At 17:21 +0200 on 23 Jun (1308849681), Christoph Egger wrote:
> 
> Now I see a significant slowdown of the L2 guest with multiple vcpus.
> The reason is I see 10 times more IPIs for each vcpu with
> p2m_flush_nestedp2m().
> 
> Please add back nestedhvm_vmcx_flushtlbdomain().

It wasn't safe.  You need to kick other CPUs off the p2m table before
you free its memory.  But I'll look into avoiding those IPIs.

Tim.

> On 06/23/11 17:04, Christoph Egger wrote:
> >
> >I have a fix for this crash. See inline.
> >
> >Christoph
> >
> >
> >On 06/23/11 14:56, Christoph Egger wrote:
> >>On 06/23/11 14:50, Christoph Egger wrote:
> >>>
> >>>This patch crashes the host (and it doesn't disappear with the other
> >>>patches applied):
> >>
> >>err.. it crashes the host when the l1 guest boots the hvmloader invokes
> >>the VGA BIOS.
> >>
> >>>(XEN) Assertion 'pfn_to_pdx(ma>>    PAGE_SHIFT)<    (DIRECTMAP_SIZE>>
> >>>PAGE_SHIFT)'
> >>>failed at xen/include/asm/x86_64/page.h:99
> >>>(XEN) ----[ Xen-4.2-unstable  x86_64  debug=y  Tainted:    C ]----
> >>>(XEN) CPU:    1
> >>>(XEN) RIP:    e008:[<ffff82c4801d2007>] p2m_flush_locked+0x1a8/0x2be
> >>>(XEN) RFLAGS: 0000000000010212   CONTEXT: hypervisor
> >>>(XEN) rax: 0000000000000000   rbx: ffff830421db1d90   rcx: 0000000000000000
> >>>(XEN) rdx: f82f608076a60000   rsi: 000f82f608076a60   rdi: 0000000000000000
> >>>(XEN) rbp: ffff830425217a08   rsp: ffff8304252179c8   r8:  0000000000000000
> >>>(XEN) r9:  0000000000000004   r10: 00000000fffffffb   r11: 0000000000000003
> >>>(XEN) r12: ffff830421db1d90   r13: ffff82f608076a60   r14: ffff830403bbebe8
> >>>(XEN) r15: ffff830403bbe000   cr0: 000000008005003b   cr4: 00000000000406f0
> >>>(XEN) cr3: 0000000420f2b000   cr2: 000000000045f000
> >>>(XEN) ds: 0017   es: 0017   fs: 0000   gs: 0000   ss: e010   cs: e008
> >>>(XEN) Xen stack trace from rsp=ffff8304252179c8:
> >>>(XEN)    ffff830403bbebe8 ffff830421db1d90 ffff830421db1d90 0000000000000000
> >>>(XEN)    ffff830403bbe000 ffff830403bbebe8 ffff830403bbebe8 ffff830403bbeab0
> >>>(XEN)    ffff830425217a38 ffff82c4801d2599 0000000000000001 0000000000000067
> >>>(XEN)    ffff830403bbeab0 ffff830403af4000 ffff830425217a88 ffff82c480208a8c
> >>>(XEN)    0000000125941810 ffff830403bbe000 0000000000000000 ffff830425217b18
> >>>(XEN)    ffff830421db1cc0 00000000000ff000 ffffffffffffffff 0000000000000000
> >>>(XEN)    ffff830425217a98 ffff82c4801ce585 ffff830425217b68 ffff82c4801d4b4e
> >>>(XEN)    0000000000000200 1000000000000000 ffff830425217b28 ffff830425217b20
> >>>(XEN)    0000000000000001 ffff830425217ae8 ffff830425217ef8 ffff830403af4000
> >>>(XEN)    0000000000000000 0000000000000000 0000000000000000 0000000000000000
> >>>(XEN)    0000000000000000 0000000000000000 0000000000000000 ffff830403af4000
> >>>(XEN)    0000000000403af4 ffff82c4801d5ec1 ffff830425217bfc ffffffffffffffff
> >>>(XEN)    0000000000000000 0000000000000001 00000000000ff000 ffff830421db1cc0
> >>>(XEN)    ffff830425217bb8 ffff82c4801d02d1 0000000100000007 ffff830403bbe000
> >>>(XEN)    0000000125217bc8 ffff8280019fafc0 ffff82c400cfd7e0 000000000033f5f8
> >>>(XEN)    ffff830421db1cc0 0000000000000001 ffff830425217c28 ffff82c4801d053d
> >>>(XEN)    ffff830425217bfc ffff830425217bf8 0000000025217bf0 00000000000ff000
> >>>(XEN)    0000000000000001 ffff830425217c10 0000000000000007 ffff830421db1cc0
> >>>(XEN)    ffff830421db1cc0 ffff830425217f18 ffff82c4802e1260 0000000000000000
> >>>(XEN)    ffff830425217c78 ffff82c4801d11fa 000000000033f5f7 00000000000ff000
> >>>(XEN) Xen call trace:
> >>>(XEN)    [<ffff82c4801d2007>] p2m_flush_locked+0x1a8/0x2be
> >>>(XEN)    [<ffff82c4801d2599>] p2m_flush_nestedp2m+0xea/0x140
> >>>(XEN)    [<ffff82c480208a8c>] hap_write_p2m_entry+0x223/0x246
> >>>(XEN)    [<ffff82c4801ce585>] paging_write_p2m_entry+0x6e/0x75
> >>>(XEN)    [<ffff82c4801d4b4e>] p2m_set_entry+0x42b/0x6a8
> >>>(XEN)    [<ffff82c4801d02d1>] set_p2m_entry+0xf2/0x140
> >>>(XEN)    [<ffff82c4801d053d>] p2m_remove_page+0x1dd/0x1ec
> >>>(XEN)    [<ffff82c4801d11fa>] guest_physmap_remove_page+0x109/0x148
> >>>(XEN)    [<ffff82c480168000>] arch_memory_op+0x6af/0x1039
> >>>(XEN)    [<ffff82c4801134fb>] do_memory_op+0x1bd7/0x1c2c
> >>>(XEN)    [<ffff82c480217708>] syscall_enter+0xc8/0x122
> >>>(XEN)
> >>>(XEN)
> >>>(XEN) ****************************************
> >>>
> >>>Christoph
> >>>
> >>>
> >>>
> >>>On 06/22/11 18:10, Tim Deegan wrote:
> >>>># HG changeset patch
> >>>># User Tim Deegan<Tim.Deegan@citrix.com>
> >>>># Date 1308758648 -3600
> >>>># Node ID c323e69a0a08ce9f1e54d2e2fa2edd9845bc8efe
> >>>># Parent  b7e5a25663329254cba539e21f4fbd5b32c67556
> >>>>Nested p2m: implement "flush" as a first-class action
> >>>>rather than using the teardown and init functions.
> >>>>This makes the locking clearer and avoids an expensive scan of all
> >>>>pfns that's only needed for non-nested p2ms.  It also moves the
> >>>>tlb flush into the proper place in the flush logic, avoiding a
> >>>>possible race against other CPUs.
> >>>>
> >>>>Signed-off-by: Tim Deegan<Tim.Deegan@citrix.com>
> >>>>
> >>>>diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/hvm/nestedhvm.c
> >>>>--- a/xen/arch/x86/hvm/nestedhvm.c    Tue Jun 21 18:28:53 2011 +0100
> >>>>+++ b/xen/arch/x86/hvm/nestedhvm.c    Wed Jun 22 17:04:08 2011 +0100
> >>>>@@ -119,12 +119,6 @@ nestedhvm_vmcx_flushtlb(struct p2m_domai
> >>>>         cpus_clear(p2m->p2m_dirty_cpumask);
> >>>>     }
> >>>>
> >>>>-void
> >>>>-nestedhvm_vmcx_flushtlbdomain(struct domain *d)
> >>>>-{
> >>>>-    on_selected_cpus(d->domain_dirty_cpumask, nestedhvm_flushtlb_ipi, d, 1);
> >>>>-}
> >>>>-
> >>>>     bool_t
> >>>>     nestedhvm_is_n2(struct vcpu *v)
> >>>>     {
> >>>>diff -r b7e5a2566332 -r c323e69a0a08 xen/arch/x86/mm/p2m.c
> >>>>--- a/xen/arch/x86/mm/p2m.c   Tue Jun 21 18:28:53 2011 +0100
> >>>>+++ b/xen/arch/x86/mm/p2m.c   Wed Jun 22 17:04:08 2011 +0100
> >>>>@@ -1050,20 +1050,41 @@ p2m_getlru_nestedp2m(struct domain *d, s
> >>>>         return lrup2m;
> >>>>     }
> >>>>
> >>>>-static int
> >>>>+/* Reset this p2m table to be empty */
> >>>>+static void
> >>>>     p2m_flush_locked(struct p2m_domain *p2m)
> >>>>     {
> >>>>-    ASSERT(p2m);
> >>>>-    if (p2m->cr3 == CR3_EADDR)
> >>>>-        /* Microoptimisation: p2m is already empty.
> >>>>-         * =>     about 0.3% speedup of overall system performance.
> >>>>-         */
> >>>>-        return 0;
> >>>>+    struct page_info *top, *pg;
> >>>>+    struct domain *d = p2m->domain;
> >>>>+    void *p;
> >+ mfn_t top_mfn;
> >
> >>>>
> >>>>-    p2m_teardown(p2m);
> >>>>-    p2m_initialise(p2m->domain, p2m);
> >>>>-    p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
> >>>>-    return p2m_alloc_table(p2m);
> >>>>+    p2m_lock(p2m);
> >>>>+
> >>>>+    /* "Host" p2m tables can have shared entries&c that need a bit more
> >>>>+     * care when discarding them */
> >>>>+    ASSERT(p2m_is_nestedp2m(p2m));
> >>>>+    ASSERT(page_list_empty(&p2m->pod.super));
> >>>>+    ASSERT(page_list_empty(&p2m->pod.single));
> >>>>+
> >>>>+    /* This is no longer a valid nested p2m for any address space */
> >>>>+    p2m->cr3 = CR3_EADDR;
> >>>>+
> >>>>+    /* Zap the top level of the trie */
> >
> >+    top_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
> >+    top = mfn_to_page(top_mfn);
> >+    p = map_domain_page(mfn_x(top_mfn));
> >
> >>>>+    clear_page(p);
> >>>>+    unmap_domain_page(p);
> >>>>+
> >>>>+    /* Make sure nobody else is using this p2m table */
> >>>>+    nestedhvm_vmcx_flushtlb(p2m);
> >>>>+
> >>>>+    /* Free the rest of the trie pages back to the paging pool */
> >>>>+    while ( (pg = page_list_remove_head(&p2m->pages)) )
> >>>>+        if ( pg != top )
> >>>>+            d->arch.paging.free_page(d, pg);
> >>>>+    page_list_add(top,&p2m->pages);
> >>>>+
> >>>>+    p2m_unlock(p2m);
> >>>>     }
> >>>>
> >>>>     void
> >>>>@@ -1074,9 +1095,8 @@ p2m_flush(struct vcpu *v, struct p2m_dom
> >>>>         ASSERT(v->domain == d);
> >>>>         vcpu_nestedhvm(v).nv_p2m = NULL;
> >>>>         nestedp2m_lock(d);
> >>>>-    BUG_ON(p2m_flush_locked(p2m) != 0);
> >>>>+    p2m_flush_locked(p2m);
> >>>>         hvm_asid_flush_vcpu(v);
> >>>>-    nestedhvm_vmcx_flushtlb(p2m);
> >>>>         nestedp2m_unlock(d);
> >>>>     }
> >>>>
> >>>>@@ -1086,12 +1106,8 @@ p2m_flush_nestedp2m(struct domain *d)
> >>>>         int i;
> >>>>
> >>>>         nestedp2m_lock(d);
> >>>>-    for (i = 0; i<     MAX_NESTEDP2M; i++) {
> >>>>-        struct p2m_domain *p2m = d->arch.nested_p2m[i];
> >>>>-        BUG_ON(p2m_flush_locked(p2m) != 0);
> >>>>-        cpus_clear(p2m->p2m_dirty_cpumask);
> >>>>-    }
> >>>>-    nestedhvm_vmcx_flushtlbdomain(d);
> >>>>+    for ( i = 0; i<     MAX_NESTEDP2M; i++ )
> >>>>+        p2m_flush_locked(d->arch.nested_p2m[i]);
> >>>>         nestedp2m_unlock(d);
> >>>>     }
> >>>>
> >>>>@@ -1104,7 +1120,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
> >>>>         volatile struct nestedvcpu *nv =&vcpu_nestedhvm(v);
> >>>>         struct domain *d;
> >>>>         struct p2m_domain *p2m;
> >>>>-    int i, rv;
> >>>>+    int i;
> >>>>
> >>>>         if (cr3 == 0 || cr3 == CR3_EADDR)
> >>>>             cr3 = v->arch.hvm_vcpu.guest_cr[3];
> >>>>@@ -1136,9 +1152,7 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
> >>>>          */
> >>>>         for (i = 0; i<     MAX_NESTEDP2M; i++) {
> >>>>             p2m = p2m_getlru_nestedp2m(d, NULL);
> >>>>-        rv = p2m_flush_locked(p2m);
> >>>>-        if (rv == 0)
> >>>>-            break;
> >>>>+        p2m_flush_locked(p2m);
> >>>>         }
> >>>>         nv->nv_p2m = p2m;
> >>>>         p2m->cr3 = cr3;
> >>>>diff -r b7e5a2566332 -r c323e69a0a08 xen/include/asm-x86/hvm/nestedhvm.h
> >>>>--- a/xen/include/asm-x86/hvm/nestedhvm.h     Tue Jun 21 18:28:53 2011 +0100
> >>>>+++ b/xen/include/asm-x86/hvm/nestedhvm.h     Wed Jun 22 17:04:08 2011 +0100
> >>>>@@ -61,7 +61,6 @@ unsigned long *nestedhvm_vcpu_iomap_get(
> >>>>         (!!vcpu_nestedhvm((v)).nv_vmswitch_in_progress)
> >>>>
> >>>>     void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
> >>>>-void nestedhvm_vmcx_flushtlbdomain(struct domain *d);
> >>>>
> >>>>     bool_t nestedhvm_is_n2(struct vcpu *v);
> >>>>
> >>>>
> 
> 
> -- 
> ---to satisfy European Law for business letters:
> Advanced Micro Devices GmbH
> Einsteinring 24, 85689 Dornach b. Muenchen
> Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
> Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
> Registergericht Muenchen, HRB Nr. 43632
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

-- 
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Xen Platform Team
Citrix Systems UK Ltd.  (Company #02937203, SL9 0BG)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: [PATCH 4 of 4] Nested p2m: rework locking around nested-p2m flushes and updates
  2011-06-23 15:08   ` Christoph Egger
@ 2011-06-24 10:45     ` Tim Deegan
  2011-06-24 11:08       ` Christoph Egger
  0 siblings, 1 reply; 18+ messages in thread
From: Tim Deegan @ 2011-06-24 10:45 UTC (permalink / raw)
  To: Christoph Egger; +Cc: xen-devel

At 17:08 +0200 on 23 Jun (1308848939), Christoph Egger wrote:
> >+    /* If this p2m table has been flushed or recycled under our feet,
> >+     * leave it alone.  We'll pick up the right one as we try to
> >+     * vmenter the guest. */
> >+    if ( p2m->cr3 == nhvm_vcpu_hostcr3(v) )
> >+         rv = set_p2m_entry(p2m, L2_gpa>>  PAGE_SHIFT,
> >+                            page_to_mfn(maddr_to_page(L0_gpa)),
> >+                            0 /*4K*/, p2mt, p2ma);
> 
> The introduction of this check leads to this crash when the L2 guest
> initializes its third vcpu:

D'oh!

diff -r b40d4bcca0d7 xen/arch/x86/mm/hap/nested_hap.c
--- a/xen/arch/x86/mm/hap/nested_hap.c	Fri Jun 24 11:16:53 2011 +0100
+++ b/xen/arch/x86/mm/hap/nested_hap.c	Fri Jun 24 11:43:30 2011 +0100
@@ -100,7 +100,7 @@ nestedhap_fix_p2m(struct vcpu *v, struct
                   paddr_t L2_gpa, paddr_t L0_gpa,
                   p2m_type_t p2mt, p2m_access_t p2ma)
 {
-    int rv = 0;
+    int rv = 1;
     ASSERT(p2m);
     ASSERT(p2m->set_entry);
 

This is what comes of not being able to test things properly.  Can you
describe your test setup for me?  I'm having no luck getting
xen-on-debian-on-xen-on-debian to work. 

Cheers,

Tim

-- 
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Xen Platform Team
Citrix Systems UK Ltd.  (Company #02937203, SL9 0BG)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Re: [PATCH 4 of 4] Nested p2m: rework locking around nested-p2m flushes and updates
  2011-06-24 10:45     ` Tim Deegan
@ 2011-06-24 11:08       ` Christoph Egger
  0 siblings, 0 replies; 18+ messages in thread
From: Christoph Egger @ 2011-06-24 11:08 UTC (permalink / raw)
  To: Tim Deegan; +Cc: xen-devel

On 06/24/11 12:45, Tim Deegan wrote:
> At 17:08 +0200 on 23 Jun (1308848939), Christoph Egger wrote:
>>> +    /* If this p2m table has been flushed or recycled under our feet,
>>> +     * leave it alone.  We'll pick up the right one as we try to
>>> +     * vmenter the guest. */
>>> +    if ( p2m->cr3 == nhvm_vcpu_hostcr3(v) )
>>> +         rv = set_p2m_entry(p2m, L2_gpa>>   PAGE_SHIFT,
>>> +                            page_to_mfn(maddr_to_page(L0_gpa)),
>>> +                            0 /*4K*/, p2mt, p2ma);
>>
>> The introduction of this check leads to this crash when the L2 guest
>> initializes its third vcpu:
>
> D'oh!
>
> diff -r b40d4bcca0d7 xen/arch/x86/mm/hap/nested_hap.c
> --- a/xen/arch/x86/mm/hap/nested_hap.c	Fri Jun 24 11:16:53 2011 +0100
> +++ b/xen/arch/x86/mm/hap/nested_hap.c	Fri Jun 24 11:43:30 2011 +0100
> @@ -100,7 +100,7 @@ nestedhap_fix_p2m(struct vcpu *v, struct
>                     paddr_t L2_gpa, paddr_t L0_gpa,
>                     p2m_type_t p2mt, p2m_access_t p2ma)
>   {
> -    int rv = 0;
> +    int rv = 1;
>       ASSERT(p2m);
>       ASSERT(p2m->set_entry);
>

Yes, that fixes the crash.

> This is what comes of not being able to test things properly.  Can you
> describe your test setup for me?  I'm having no luck getting
> xen-on-debian-on-xen-on-debian to work.

That should do it. If your problem is that the L1 Dom0 hangs at boot
then disable the TSC Deadline Timer in libxc/xc_cpuid_x86.c.
This feature is broken since its introduction.

Christoph


-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3 of 4] Nested p2m: clarify logic in p2m_get_nestedp2m()
  2011-06-22 16:10 ` [PATCH 3 of 4] Nested p2m: clarify logic in p2m_get_nestedp2m() Tim Deegan
@ 2011-06-24 14:25   ` Christoph Egger
  2011-06-24 14:37     ` Tim Deegan
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Egger @ 2011-06-24 14:25 UTC (permalink / raw)
  To: Tim Deegan; +Cc: xen-devel

On 06/22/11 18:10, Tim Deegan wrote:
> # HG changeset patch
> # User Tim Deegan<Tim.Deegan@citrix.com>
> # Date 1308758648 -3600
> # Node ID b265371addbbc8a58c95a269fe3cd0fdc866aaa3
> # Parent  dcb8ae5e3eaf6516c889087dfb15efa41a1ac3e9
> Nested p2m: clarify logic in p2m_get_nestedp2m()
>
> This just makes the behaviour of this function a bit more explicit.  It
> may be that it also needs to be changed. :)
>
> Signed-off-by: Tim Deegan<Tim.Deegan@citrix.com>
>
> diff -r dcb8ae5e3eaf -r b265371addbb xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
> +++ b/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
> @@ -1131,11 +1131,9 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>
>       d = v->domain;
>       nestedp2m_lock(d);
> -    for (i = 0; i<  MAX_NESTEDP2M; i++) {
> -        p2m = d->arch.nested_p2m[i];
> -        if ((p2m->cr3 != cr3&&  p2m->cr3 != CR3_EADDR) || (p2m != nv->nv_p2m))
> -            continue;
> -
> +    p2m = nv->nv_p2m;
> +    if ( p2m&&  (p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR) )
> +    {
>           nv->nv_flushp2m = 0;
>           p2m_getlru_nestedp2m(d, p2m);
>           nv->nv_p2m = p2m;
>


Ok, thanks.

In p2m_get_nestedp2m() replace this code hunk

     for (i = 0; i < MAX_NESTEDP2M; i++) {
         p2m = p2m_getlru_nestedp2m(d, NULL);
         p2m_flush_locked(p2m);
     }

with

     p2m = p2m_getlru_nestedp2m(d, NULL);
     p2m_flush_locked(p2m);

The 'i'' variable is unused then. This fixes an endless loop of
nested page faults I observe with SMP l2 guests.

The nested page fault loop happens in conjunction with the change
in patch 4 in nestedhap_fix_p2m().

Christoph


-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3 of 4] Nested p2m: clarify logic in p2m_get_nestedp2m()
  2011-06-24 14:25   ` Christoph Egger
@ 2011-06-24 14:37     ` Tim Deegan
  2011-06-24 14:53       ` Christoph Egger
  0 siblings, 1 reply; 18+ messages in thread
From: Tim Deegan @ 2011-06-24 14:37 UTC (permalink / raw)
  To: Christoph Egger; +Cc: xen-devel

At 15:25 +0100 on 24 Jun (1308929140), Christoph Egger wrote:
> > diff -r dcb8ae5e3eaf -r b265371addbb xen/arch/x86/mm/p2m.c
> > --- a/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
> > +++ b/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
> > @@ -1131,11 +1131,9 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
> >
> >       d = v->domain;
> >       nestedp2m_lock(d);
> > -    for (i = 0; i<  MAX_NESTEDP2M; i++) {
> > -        p2m = d->arch.nested_p2m[i];
> > -        if ((p2m->cr3 != cr3&&  p2m->cr3 != CR3_EADDR) || (p2m != nv->nv_p2m))
> > -            continue;
> > -
> > +    p2m = nv->nv_p2m;
> > +    if ( p2m&&  (p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR) )
> > +    {
> >           nv->nv_flushp2m = 0;
> >           p2m_getlru_nestedp2m(d, p2m);
> >           nv->nv_p2m = p2m;
> >
> 
> 
> Ok, thanks.
> 
> In p2m_get_nestedp2m() replace this code hunk
> 
>      for (i = 0; i < MAX_NESTEDP2M; i++) {
>          p2m = p2m_getlru_nestedp2m(d, NULL);
>          p2m_flush_locked(p2m);
>      }
> 
> with
> 
>      p2m = p2m_getlru_nestedp2m(d, NULL);
>      p2m_flush_locked(p2m);

That seems like an improvement.  I'll put it into my queue.  

More generally, I think that you need to figure out exactly what
behaviour you want from this function.  For example in the current code
there's no way that two vcpus with the same ncr3 value can share a
nested-p2m.  Is that deliberate?

Cheers,

Tim.

-- 
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Xen Platform Team
Citrix Systems UK Ltd.  (Company #02937203, SL9 0BG)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3 of 4] Nested p2m: clarify logic in p2m_get_nestedp2m()
  2011-06-24 14:37     ` Tim Deegan
@ 2011-06-24 14:53       ` Christoph Egger
  2011-06-24 15:05         ` Tim Deegan
  0 siblings, 1 reply; 18+ messages in thread
From: Christoph Egger @ 2011-06-24 14:53 UTC (permalink / raw)
  To: Tim Deegan; +Cc: xen-devel

On 06/24/11 16:37, Tim Deegan wrote:
> At 15:25 +0100 on 24 Jun (1308929140), Christoph Egger wrote:
>>> diff -r dcb8ae5e3eaf -r b265371addbb xen/arch/x86/mm/p2m.c
>>> --- a/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
>>> +++ b/xen/arch/x86/mm/p2m.c	Wed Jun 22 17:04:08 2011 +0100
>>> @@ -1131,11 +1131,9 @@ p2m_get_nestedp2m(struct vcpu *v, uint64
>>>
>>>        d = v->domain;
>>>        nestedp2m_lock(d);
>>> -    for (i = 0; i<   MAX_NESTEDP2M; i++) {
>>> -        p2m = d->arch.nested_p2m[i];
>>> -        if ((p2m->cr3 != cr3&&   p2m->cr3 != CR3_EADDR) || (p2m != nv->nv_p2m))
>>> -            continue;
>>> -
>>> +    p2m = nv->nv_p2m;
>>> +    if ( p2m&&   (p2m->cr3 == cr3 || p2m->cr3 == CR3_EADDR) )
>>> +    {
>>>            nv->nv_flushp2m = 0;
>>>            p2m_getlru_nestedp2m(d, p2m);
>>>            nv->nv_p2m = p2m;
>>>
>>
>>
>> Ok, thanks.
>>
>> In p2m_get_nestedp2m() replace this code hunk
>>
>>       for (i = 0; i<  MAX_NESTEDP2M; i++) {
>>           p2m = p2m_getlru_nestedp2m(d, NULL);
>>           p2m_flush_locked(p2m);
>>       }
>>
>> with
>>
>>       p2m = p2m_getlru_nestedp2m(d, NULL);
>>       p2m_flush_locked(p2m);
>
> That seems like an improvement.  I'll put it into my queue.
>
> More generally, I think that you need to figure out exactly what
> behaviour you want from this function.  For example in the current code
> there's no way that two vcpus with the same ncr3 value can share a
> nested-p2m.  Is that deliberate?


By 'current code' do you mean with or w/o this patch ?

It is deliberate that two vcpus with the same ncr3 share a nested-p2m.
But fixing the p2m locking problem in upstream tree has a higher
priority right now and we can work on that after the p2m locking
issue is fixed upstream.

Christoph


-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3 of 4] Nested p2m: clarify logic in p2m_get_nestedp2m()
  2011-06-24 14:53       ` Christoph Egger
@ 2011-06-24 15:05         ` Tim Deegan
  2011-06-27  9:46           ` Christoph Egger
  0 siblings, 1 reply; 18+ messages in thread
From: Tim Deegan @ 2011-06-24 15:05 UTC (permalink / raw)
  To: Christoph Egger; +Cc: xen-devel

Hi, 

At 15:53 +0100 on 24 Jun (1308930816), Christoph Egger wrote:
> > More generally, I think that you need to figure out exactly what
> > behaviour you want from this function.  For example in the current code
> > there's no way that two vcpus with the same ncr3 value can share a
> > nested-p2m.  Is that deliberate?
> 
> By 'current code' do you mean with or w/o this patch ?

Both, and all versions of the code from before my current series to the
full series applied. 

> It is deliberate that two vcpus with the same ncr3 share a nested-p2m.

But they don't.  The code in current unstable tip does this:

    for (i = 0; i < MAX_NESTEDP2M; i++) {
        p2m = d->arch.nested_p2m[i];
        if ((p2m->cr3 != cr3 && p2m->cr3 != CR3_EADDR) || (p2m != nv->nv_p2m))
            continue;

	// ... return this p2m
    }

    /* All p2m's are or were in use. Take the least recent used one,
     * flush it and reuse.
     */
    for (i = 0; i < MAX_NESTEDP2M; i++) {
        p2m = p2m_getlru_nestedp2m(d, NULL);
        rv = p2m_flush_locked(p2m);
        if (rv == 0)
            break;
    }

    // ... return this p2m

The first loop never returns a p2m that's != nv->nv_p2m.  The second
loop always returns a fresh, flushed p2m.  So there's no way that two
different vcpus, starting with nv->nv_p2m == NULL, can ever get the same
p2m as each other. 

The pseudocode is basically: 
 - If I have an existing nv_p2m and it hasn't been flushed, reuse it. 
 - Else flush all np2ms in LRU order and return the last one flushed.

My patch 3/4 doesn't change the logic at all (I think); your latest fix
just avoids the over-aggressive flushing of all np2ms. 

> But fixing the p2m locking problem in upstream tree has a higher
> priority right now and we can work on that after the p2m locking
> issue is fixed upstream.

AFAICS the locking is fixed by the current set of patches (though I'm
still not able to run Xen-in-Xen well enough to test them).  I can send
the full series again for clarity if you like.  The outstanding bug is
that there are many more IPIs than previously; I suspect that your
latest fix will reduce them quite a lot by avoiding a storm of
mutually-destructive flush operations.  If the performance is still too
bad we can add more IPI-avoidance strategies.

Cheers,

Tim.

-- 
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Xen Platform Team
Citrix Systems UK Ltd.  (Company #02937203, SL9 0BG)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3 of 4] Nested p2m: clarify logic in p2m_get_nestedp2m()
  2011-06-24 15:05         ` Tim Deegan
@ 2011-06-27  9:46           ` Christoph Egger
  0 siblings, 0 replies; 18+ messages in thread
From: Christoph Egger @ 2011-06-27  9:46 UTC (permalink / raw)
  To: Tim Deegan; +Cc: xen-devel

On 06/24/11 17:05, Tim Deegan wrote:
> Hi,
>
> At 15:53 +0100 on 24 Jun (1308930816), Christoph Egger wrote:
>>> More generally, I think that you need to figure out exactly what
>>> behaviour you want from this function.  For example in the current code
>>> there's no way that two vcpus with the same ncr3 value can share a
>>> nested-p2m.  Is that deliberate?
>>
>> By 'current code' do you mean with or w/o this patch ?
>
> Both, and all versions of the code from before my current series to the
> full series applied.
>
>> It is deliberate that two vcpus with the same ncr3 share a nested-p2m.
>
> But they don't.  The code in current unstable tip does this:
>
>      for (i = 0; i<  MAX_NESTEDP2M; i++) {
>          p2m = d->arch.nested_p2m[i];
>          if ((p2m->cr3 != cr3&&  p2m->cr3 != CR3_EADDR) || (p2m != nv->nv_p2m))
>              continue;
>
> 	// ... return this p2m
>      }
>
>      /* All p2m's are or were in use. Take the least recent used one,
>       * flush it and reuse.
>       */
>      for (i = 0; i<  MAX_NESTEDP2M; i++) {
>          p2m = p2m_getlru_nestedp2m(d, NULL);
>          rv = p2m_flush_locked(p2m);
>          if (rv == 0)
>              break;
>      }
>
>      // ... return this p2m
>
> The first loop never returns a p2m that's != nv->nv_p2m.  The second
> loop always returns a fresh, flushed p2m.  So there's no way that two
> different vcpus, starting with nv->nv_p2m == NULL, can ever get the same
> p2m as each other.
>
> The pseudocode is basically:
>   - If I have an existing nv_p2m and it hasn't been flushed, reuse it.
>   - Else flush all np2ms in LRU order and return the last one flushed.

I see. Thanks.

> My patch 3/4 doesn't change the logic at all (I think); your latest fix
> just avoids the over-aggressive flushing of all np2ms.

Yes and results in a noticable performance boost.

>> But fixing the p2m locking problem in upstream tree has a higher
>> priority right now and we can work on that after the p2m locking
>> issue is fixed upstream.
>
> AFAICS the locking is fixed by the current set of patches

Yes, I can confirm that. patch 4 fixes it.

> (though I'm still not able to run Xen-in-Xen well enough to test them).

Can you describe what problem you are facing? Is it a hanging L1 Dom0 ?
Does L1 Dom0 not see the SVM cpuid feature bit?

> I can send the full series again for clarity if you like.

Yes, please!

> The outstanding bug is that there are many more IPIs than previously;
 > I suspect that your latest fix will reduce them quite a lot by 
avoiding a storm of
> mutually-destructive flush operations.

Yes because p2m_flush_nestedp2m() runs less often.

The number of sent IPIs from p2m_flush_nestedp2m() is
still 10 times higher.

> If the performance is still too bad we can add more IPI-avoidance strategies.

Yes, this still brings significant performance for L3 guests because 
both host and l1 guest send less IPIs then.

We can do this performance improvement in a seperate patch series.

Christoph


-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2011-06-27  9:46 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-22 16:10 [PATCH 0 of 4] RFC: Nested-p2m cleanups and locking changes Tim Deegan
2011-06-22 16:10 ` [PATCH 1 of 4] Nested p2m: implement "flush" as a first-class action Tim Deegan
2011-06-23 12:50   ` Christoph Egger
2011-06-23 12:56     ` Christoph Egger
2011-06-23 15:04       ` Christoph Egger
2011-06-23 15:21         ` Christoph Egger
2011-06-24  9:07           ` Tim Deegan
2011-06-22 16:10 ` [PATCH 2 of 4] Nested p2m: remove bogus check of CR3 value Tim Deegan
2011-06-22 16:10 ` [PATCH 3 of 4] Nested p2m: clarify logic in p2m_get_nestedp2m() Tim Deegan
2011-06-24 14:25   ` Christoph Egger
2011-06-24 14:37     ` Tim Deegan
2011-06-24 14:53       ` Christoph Egger
2011-06-24 15:05         ` Tim Deegan
2011-06-27  9:46           ` Christoph Egger
2011-06-22 16:10 ` [PATCH 4 of 4] Nested p2m: rework locking around nested-p2m flushes and updates Tim Deegan
2011-06-23 15:08   ` Christoph Egger
2011-06-24 10:45     ` Tim Deegan
2011-06-24 11:08       ` Christoph Egger

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.