All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00 of 17] mm/p2m cleanups
@ 2011-06-02 12:20 Tim Deegan
  2011-06-02 12:20 ` [PATCH 01 of 17] x86/mm/p2m: Mark internal functions static Tim Deegan
                   ` (16 more replies)
  0 siblings, 17 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

This patch series fixes up the p2m and other x86/mm interfaces, and 
in particular sorts out a lot of the locking discipline.

Cheers,

Tim.

-- 
Tim Deegan <Tim.Deegan@citrix.com>
Principal Software Engineer, Xen Platform Team
Citrix Systems UK Ltd.  (Company #02937203, SL9 0BG)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 01 of 17] x86/mm/p2m: Mark internal functions static
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 02 of 17] x86/mm/p2m: little fixes and tidying up Tim Deegan
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017011 -3600
# Node ID c7d0b66e41cde0cca1d9162b92b41fc6bd30cf20
# Parent  221f431092c04df90ed017f7471ce9641356d40d
x86/mm/p2m: Mark internal functions static

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 221f431092c0 -r c7d0b66e41cd xen/arch/x86/mm/p2m-ept.c
--- a/xen/arch/x86/mm/p2m-ept.c	Wed Jun 01 16:50:16 2011 +0100
+++ b/xen/arch/x86/mm/p2m-ept.c	Thu Jun 02 13:16:51 2011 +0100
@@ -164,7 +164,7 @@ static int ept_set_middle_entry(struct p
 }
 
 /* free ept sub tree behind an entry */
-void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
+static void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
 {
     /* End if the entry is a leaf entry. */
     if ( level == 0 || !is_epte_present(ept_entry) ||
diff -r 221f431092c0 -r c7d0b66e41cd xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c	Wed Jun 01 16:50:16 2011 +0100
+++ b/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:51 2011 +0100
@@ -54,7 +54,7 @@
 #define SUPERPAGE_PAGES (1UL << 9)
 #define superpage_aligned(_x)  (((_x)&(SUPERPAGE_PAGES-1))==0)
 
-unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn)
+static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn)
 {
     unsigned long flags;
 #ifdef __x86_64__
@@ -100,7 +100,7 @@ unsigned long p2m_type_to_flags(p2m_type
 // Find the next level's P2M entry, checking for out-of-range gfn's...
 // Returns NULL on error.
 //
-l1_pgentry_t *
+static l1_pgentry_t *
 p2m_find_entry(void *table, unsigned long *gfn_remainder,
                    unsigned long gfn, uint32_t shift, uint32_t max)
 {
@@ -118,40 +118,8 @@ p2m_find_entry(void *table, unsigned lon
     return (l1_pgentry_t *)table + index;
 }
 
-struct page_info *
-p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type)
-{
-    struct page_info *pg;
-
-    ASSERT(p2m);
-    ASSERT(p2m->domain);
-    ASSERT(p2m->domain->arch.paging.alloc_page);
-    pg = p2m->domain->arch.paging.alloc_page(p2m->domain);
-    if (pg == NULL)
-        return NULL;
-
-    page_list_add_tail(pg, &p2m->pages);
-    pg->u.inuse.type_info = type | 1 | PGT_validated;
-
-    return pg;
-}
-
-void
-p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
-{
-    ASSERT(pg);
-    ASSERT(p2m);
-    ASSERT(p2m->domain);
-    ASSERT(p2m->domain->arch.paging.free_page);
-
-    page_list_del(pg, &p2m->pages);
-    p2m->domain->arch.paging.free_page(p2m->domain, pg);
-
-    return;
-}
-
 /* Free intermediate tables from a p2m sub-tree */
-void
+static void
 p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order)
 {
     /* End if the entry is a leaf entry. */
@@ -864,7 +832,8 @@ out:
 /* Walk the whole p2m table, changing any entries of the old type
  * to the new type.  This is used in hardware-assisted paging to 
  * quickly enable or diable log-dirty tracking */
-void p2m_change_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt)
+static void p2m_change_type_global(struct p2m_domain *p2m,
+                                   p2m_type_t ot, p2m_type_t nt)
 {
     unsigned long mfn, gfn, flags;
     l1_pgentry_t l1e_content;
diff -r 221f431092c0 -r c7d0b66e41cd xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Wed Jun 01 16:50:16 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:51 2011 +0100
@@ -161,6 +161,36 @@ int set_p2m_entry(struct p2m_domain *p2m
     return rc;
 }
 
+struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type)
+{
+    struct page_info *pg;
+
+    ASSERT(p2m);
+    ASSERT(p2m->domain);
+    ASSERT(p2m->domain->arch.paging.alloc_page);
+    pg = p2m->domain->arch.paging.alloc_page(p2m->domain);
+    if (pg == NULL)
+        return NULL;
+
+    page_list_add_tail(pg, &p2m->pages);
+    pg->u.inuse.type_info = type | 1 | PGT_validated;
+
+    return pg;
+}
+
+void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
+{
+    ASSERT(pg);
+    ASSERT(p2m);
+    ASSERT(p2m->domain);
+    ASSERT(p2m->domain->arch.paging.free_page);
+
+    page_list_del(pg, &p2m->pages);
+    p2m->domain->arch.paging.free_page(p2m->domain, pg);
+
+    return;
+}
+
 // Allocate a new p2m table for a domain.
 //
 // The structure of the p2m table is that of a pagetable for xen (i.e. it is
diff -r 221f431092c0 -r c7d0b66e41cd xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Wed Jun 01 16:50:16 2011 +0100
+++ b/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:51 2011 +0100
@@ -499,21 +499,11 @@ static inline unsigned long mfn_to_gfn(s
 /* Init the datastructures for later use by the p2m code */
 int p2m_init(struct domain *d);
 
-/* PTE flags for various types of p2m entry */
-unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn);
-
 /* Allocate a new p2m table for a domain. 
  *
  * Returns 0 for success or -errno. */
 int p2m_alloc_table(struct p2m_domain *p2m);
 
-/* Find the next level's P2M entry, checking for out-of-range gfn's...
- * Returns NULL on error.
- */
-l1_pgentry_t *
-p2m_find_entry(void *table, unsigned long *gfn_remainder,
-               unsigned long gfn, uint32_t shift, uint32_t max);
-
 /* Return all the p2m resources to Xen. */
 void p2m_teardown(struct p2m_domain *p2m);
 void p2m_final_teardown(struct domain *d);
@@ -584,7 +574,6 @@ static inline void guest_physmap_remove_
 }
 
 /* Change types across all p2m entries in a domain */
-void p2m_change_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt);
 void p2m_change_entry_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt);
 
 /* Compare-exchange the type of a single p2m entry */

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 02 of 17] x86/mm/p2m: little fixes and tidying up
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
  2011-06-02 12:20 ` [PATCH 01 of 17] x86/mm/p2m: Mark internal functions static Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 03 of 17] x86/mm/p2m: hide the current-domain fast-path inside the p2m-pt code Tim Deegan
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 9344034d624b2e3cd6b0025ab2051cb89bd7e04a
# Parent  c7d0b66e41cde0cca1d9162b92b41fc6bd30cf20
x86/mm/p2m: little fixes and tidying up

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r c7d0b66e41cd -r 9344034d624b xen/arch/x86/mm/p2m-pod.c
--- a/xen/arch/x86/mm/p2m-pod.c	Thu Jun 02 13:16:51 2011 +0100
+++ b/xen/arch/x86/mm/p2m-pod.c	Thu Jun 02 13:16:52 2011 +0100
@@ -42,7 +42,6 @@
 #undef page_to_mfn
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
-#define SUPERPAGE_PAGES (1UL << 9)
 #define superpage_aligned(_x)  (((_x)&(SUPERPAGE_PAGES-1))==0)
 
 /*
@@ -688,8 +687,7 @@ p2m_pod_zero_check_superpage(struct p2m_
     }
 
     /* Try to remove the page, restoring old mapping if it fails. */
-    set_p2m_entry(p2m, gfn,
-                  _mfn(POPULATE_ON_DEMAND_MFN), 9,
+    set_p2m_entry(p2m, gfn, _mfn(0), 9,
                   p2m_populate_on_demand, p2m->default_access);
 
     /* Make none of the MFNs are used elsewhere... for example, mapped
@@ -801,8 +799,7 @@ p2m_pod_zero_check(struct p2m_domain *p2
         }
 
         /* Try to remove the page, restoring old mapping if it fails. */
-        set_p2m_entry(p2m, gfns[i],
-                      _mfn(POPULATE_ON_DEMAND_MFN), 0,
+        set_p2m_entry(p2m, gfns[i], _mfn(0), 0,
                       p2m_populate_on_demand, p2m->default_access);
 
         /* See if the page was successfully unmapped.  (Allow one refcount
@@ -966,7 +963,7 @@ p2m_pod_demand_populate(struct p2m_domai
          * set_p2m_entry() should automatically shatter the 1GB page into 
          * 512 2MB pages. The rest of 511 calls are unnecessary.
          */
-        set_p2m_entry(p2m, gfn_aligned, _mfn(POPULATE_ON_DEMAND_MFN), 9,
+        set_p2m_entry(p2m, gfn_aligned, _mfn(0), 9,
                       p2m_populate_on_demand, p2m->default_access);
         audit_p2m(p2m, 1);
         p2m_unlock(p2m);
@@ -1054,7 +1051,7 @@ remap_and_retry:
     /* Remap this 2-meg region in singleton chunks */
     gfn_aligned = (gfn>>order)<<order;
     for(i=0; i<(1<<order); i++)
-        set_p2m_entry(p2m, gfn_aligned+i, _mfn(POPULATE_ON_DEMAND_MFN), 0,
+        set_p2m_entry(p2m, gfn_aligned+i, _mfn(0), 0,
                       p2m_populate_on_demand, p2m->default_access);
     if ( tb_init_done )
     {
@@ -1114,7 +1111,7 @@ guest_physmap_mark_populate_on_demand(st
     }
 
     /* Now, actually do the two-way mapping */
-    if ( !set_p2m_entry(p2m, gfn, _mfn(POPULATE_ON_DEMAND_MFN), order,
+    if ( !set_p2m_entry(p2m, gfn, _mfn(0), order,
                         p2m_populate_on_demand, p2m->default_access) )
         rc = -EINVAL;
     else
diff -r c7d0b66e41cd -r 9344034d624b xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:51 2011 +0100
+++ b/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:52 2011 +0100
@@ -51,9 +51,6 @@
 #define P2M_BASE_FLAGS \
         (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
 
-#define SUPERPAGE_PAGES (1UL << 9)
-#define superpage_aligned(_x)  (((_x)&(SUPERPAGE_PAGES-1))==0)
-
 static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn)
 {
     unsigned long flags;
@@ -67,32 +64,31 @@ static unsigned long p2m_type_to_flags(p
 #else
     flags = (t & 0x7UL) << 9;
 #endif
-#ifndef HAVE_GRANT_MAP_P2M
-    BUG_ON(p2m_is_grant(t));
+
+#ifndef __x86_64__
+    /* 32-bit builds don't support a lot of the p2m types */
+    BUG_ON(t > p2m_populate_on_demand);
 #endif
+
     switch(t)
     {
     case p2m_invalid:
+    case p2m_mmio_dm:
+    case p2m_populate_on_demand:
     default:
         return flags;
+    case p2m_ram_ro:
+    case p2m_grant_map_ro:
+    case p2m_ram_logdirty:
+    case p2m_ram_shared:
+        return flags | P2M_BASE_FLAGS;
     case p2m_ram_rw:
     case p2m_grant_map_rw:
         return flags | P2M_BASE_FLAGS | _PAGE_RW;
-    case p2m_ram_logdirty:
-        return flags | P2M_BASE_FLAGS;
-    case p2m_ram_ro:
-    case p2m_grant_map_ro:
-        return flags | P2M_BASE_FLAGS;
-    case p2m_ram_shared:
-        return flags | P2M_BASE_FLAGS;
-    case p2m_mmio_dm:
-        return flags;
     case p2m_mmio_direct:
         if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
             flags |= _PAGE_RW;
         return flags | P2M_BASE_FLAGS | _PAGE_PCD;
-    case p2m_populate_on_demand:
-        return flags;
     }
 }
 
diff -r c7d0b66e41cd -r 9344034d624b xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:51 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
@@ -243,31 +243,30 @@ int p2m_alloc_table(struct p2m_domain *p
                         p2m_invalid, p2m->default_access) )
         goto error;
 
-    if (p2m_is_nestedp2m(p2m))
-        goto nesteddone;
+    if ( !p2m_is_nestedp2m(p2m) )
+    {
+        /* Copy all existing mappings from the page list and m2p */
+        spin_lock(&p2m->domain->page_alloc_lock);
+        page_list_for_each(page, &p2m->domain->page_list)
+        {
+            mfn = page_to_mfn(page);
+            gfn = get_gpfn_from_mfn(mfn_x(mfn));
+            /* Pages should not be shared that early */
+            ASSERT(gfn != SHARED_M2P_ENTRY);
+            page_count++;
+            if (
+#ifdef __x86_64__
+                (gfn != 0x5555555555555555L)
+#else
+                (gfn != 0x55555555L)
+#endif
+                && gfn != INVALID_M2P_ENTRY
+                && !set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_rw, p2m->default_access) )
+                goto error_unlock;
+        }
+        spin_unlock(&p2m->domain->page_alloc_lock);
+    }
 
-    /* Copy all existing mappings from the page list and m2p */
-    spin_lock(&p2m->domain->page_alloc_lock);
-    page_list_for_each(page, &p2m->domain->page_list)
-    {
-        mfn = page_to_mfn(page);
-        gfn = get_gpfn_from_mfn(mfn_x(mfn));
-        /* Pages should not be shared that early */
-        ASSERT(gfn != SHARED_M2P_ENTRY);
-        page_count++;
-        if (
-#ifdef __x86_64__
-            (gfn != 0x5555555555555555L)
-#else
-            (gfn != 0x55555555L)
-#endif
-             && gfn != INVALID_M2P_ENTRY
-            && !set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_rw, p2m->default_access) )
-            goto error_unlock;
-    }
-    spin_unlock(&p2m->domain->page_alloc_lock);
-
- nesteddone:
     P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
     p2m_unlock(p2m);
     return 0;
@@ -693,7 +692,8 @@ int p2m_mem_paging_evict(struct p2m_doma
 
     /* Remove mapping from p2m table */
     p2m_lock(p2m);
-    set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged, p2m->default_access);
+    set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), 0, 
+                  p2m_ram_paged, p2m->default_access);
     audit_p2m(p2m, 1);
     p2m_unlock(p2m);
 
@@ -743,7 +743,8 @@ void p2m_mem_paging_populate(struct p2m_
     if ( p2mt == p2m_ram_paged )
     {
         p2m_lock(p2m);
-        set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start, p2m->default_access);
+        set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), 0, 
+                      p2m_ram_paging_in_start, p2m->default_access);
         audit_p2m(p2m, 1);
         p2m_unlock(p2m);
     }
diff -r c7d0b66e41cd -r 9344034d624b xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:51 2011 +0100
+++ b/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
@@ -47,10 +47,6 @@
  */
 #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
 
-#ifdef __x86_64__
-#define HAVE_GRANT_MAP_P2M
-#endif
-
 /*
  * The upper levels of the p2m pagetable always contain full rights; all 
  * variation in the access control bits is made in the level-1 PTEs.
@@ -78,20 +74,16 @@ typedef enum {
     p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
     p2m_populate_on_demand = 6, /* Place-holder for empty memory */
 
-    /* Note that these can only be used if HAVE_GRANT_MAP_P2M is
-       defined.  They get defined anyway so as to avoid lots of
-       #ifdef's everywhere else. */
-    p2m_grant_map_rw = 7,       /* Read/write grant mapping */
-    p2m_grant_map_ro = 8,       /* Read-only grant mapping */
-
-    /* Likewise, although these are defined in all builds, they can only
+    /* Although these are defined in all builds, they can only
      * be used in 64-bit builds */
+    p2m_grant_map_rw = 7,         /* Read/write grant mapping */
+    p2m_grant_map_ro = 8,         /* Read-only grant mapping */
     p2m_ram_paging_out = 9,       /* Memory that is being paged out */
     p2m_ram_paged = 10,           /* Memory that has been paged out */
     p2m_ram_paging_in = 11,       /* Memory that is being paged in */
     p2m_ram_paging_in_start = 12, /* Memory that is being paged in */
     p2m_ram_shared = 13,          /* Shared or sharable memory */
-    p2m_ram_broken  =14,          /* Broken page, access cause domain crash */
+    p2m_ram_broken = 14,          /* Broken page, access cause domain crash */
 } p2m_type_t;
 
 /*
@@ -170,6 +162,9 @@ typedef enum {
  * reinit the type correctly after fault */
 #define P2M_SHARABLE_TYPES (p2m_to_mask(p2m_ram_rw))
 #define P2M_SHARED_TYPES   (p2m_to_mask(p2m_ram_shared))
+
+/* Broken type: the frame backing this pfn has failed in hardware
+ * and must not be touched. */
 #define P2M_BROKEN_TYPES (p2m_to_mask(p2m_ram_broken))
 
 /* Useful predicates */
@@ -190,12 +185,7 @@ typedef enum {
 #define p2m_is_shared(_t)   (p2m_to_mask(_t) & P2M_SHARED_TYPES)
 #define p2m_is_broken(_t)   (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
 
-/* Populate-on-demand */
-#define POPULATE_ON_DEMAND_MFN  (1<<9)
-#define POD_PAGE_ORDER 9
-
-#define PAGING_MFN  INVALID_MFN
-
+/* Per-p2m-table state */
 struct p2m_domain {
     /* Lock that protects updates to the p2m */
     spinlock_t         lock;
@@ -298,10 +288,6 @@ struct p2m_domain *p2m_get_p2m(struct vc
 
 #define p2m_get_pagetable(p2m)  ((p2m)->phys_table)
 
-/* Flushes specified p2m table */
-void p2m_flush(struct vcpu *v, struct p2m_domain *p2m);
-/* Flushes all nested p2m tables */
-void p2m_flush_nestedp2m(struct domain *d);
 
 /*
  * The P2M lock.  This protects all updates to the p2m table.
@@ -376,23 +362,6 @@ void p2m_flush_nestedp2m(struct domain *
         spin_unlock(&(_domain)->arch.nested_p2m_lock);                 \
     } while (0)
 
-/* Extract the type from the PTE flags that store it */
-static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
-{
-    /* Type is stored in the "available" bits */
-#ifdef __x86_64__
-    /* For AMD IOMMUs we need to use type 0 for plain RAM, but we need
-     * to make sure that an entirely empty PTE doesn't have RAM type */
-    if ( flags == 0 ) 
-        return p2m_invalid;
-    /* AMD IOMMUs use bits 9-11 to encode next io page level and bits
-     * 59-62 for iommu flags so we can't use them to store p2m type info. */
-    return (flags >> 12) & 0x7f;
-#else
-    return (flags >> 9) & 0x7;
-#endif
-}
-
 /* Read the current domain's p2m table.  Do not populate PoD pages. */
 static inline mfn_t gfn_to_mfn_type_current(struct p2m_domain *p2m,
                                             unsigned long gfn, p2m_type_t *t,
@@ -508,6 +477,52 @@ int p2m_alloc_table(struct p2m_domain *p
 void p2m_teardown(struct p2m_domain *p2m);
 void p2m_final_teardown(struct domain *d);
 
+/* Add a page to a domain's p2m table */
+int guest_physmap_add_entry(struct p2m_domain *p2m, unsigned long gfn,
+                            unsigned long mfn, unsigned int page_order, 
+                            p2m_type_t t);
+
+/* Remove a page from a domain's p2m table */
+void guest_physmap_remove_entry(struct p2m_domain *p2m, unsigned long gfn,
+                            unsigned long mfn, unsigned int page_order);
+
+/* Set a p2m range as populate-on-demand */
+int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
+                                          unsigned int order);
+
+/* Untyped version for RAM only, for compatibility */
+static inline int guest_physmap_add_page(struct domain *d,
+                                         unsigned long gfn,
+                                         unsigned long mfn,
+                                         unsigned int page_order)
+{
+    return guest_physmap_add_entry(d->arch.p2m, gfn, mfn, page_order, p2m_ram_rw);
+}
+
+/* Remove a page from a domain's p2m table */
+static inline void guest_physmap_remove_page(struct domain *d,
+                               unsigned long gfn,
+                               unsigned long mfn, unsigned int page_order)
+{
+    guest_physmap_remove_entry(d->arch.p2m, gfn, mfn, page_order);
+}
+
+/* Change types across all p2m entries in a domain */
+void p2m_change_entry_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt);
+
+/* Compare-exchange the type of a single p2m entry */
+p2m_type_t p2m_change_type(struct p2m_domain *p2m, unsigned long gfn,
+                           p2m_type_t ot, p2m_type_t nt);
+
+/* Set mmio addresses in the p2m table (for pass-through) */
+int set_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn);
+int clear_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn);
+
+
+/* 
+ * Populate-on-demand
+ */
+
 /* Dump PoD information about the domain */
 void p2m_pod_dump_data(struct p2m_domain *p2m);
 
@@ -540,52 +555,9 @@ p2m_pod_offline_or_broken_hit(struct pag
 void
 p2m_pod_offline_or_broken_replace(struct page_info *p);
 
-/* Add a page to a domain's p2m table */
-int guest_physmap_add_entry(struct p2m_domain *p2m, unsigned long gfn,
-                            unsigned long mfn, unsigned int page_order, 
-                            p2m_type_t t);
-
-/* Remove a page from a domain's p2m table */
-void guest_physmap_remove_entry(struct p2m_domain *p2m, unsigned long gfn,
-                            unsigned long mfn, unsigned int page_order);
-
-/* Set a p2m range as populate-on-demand */
-int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
-                                          unsigned int order);
-
-/* Untyped version for RAM only, for compatibility 
- *
- * Return 0 for success
+/*
+ * Paging to disk and page-sharing
  */
-static inline int guest_physmap_add_page(struct domain *d,
-                                         unsigned long gfn,
-                                         unsigned long mfn,
-                                         unsigned int page_order)
-{
-    return guest_physmap_add_entry(d->arch.p2m, gfn, mfn, page_order, p2m_ram_rw);
-}
-
-/* Remove a page from a domain's p2m table */
-static inline void guest_physmap_remove_page(struct domain *d,
-                               unsigned long gfn,
-                               unsigned long mfn, unsigned int page_order)
-{
-    guest_physmap_remove_entry(d->arch.p2m, gfn, mfn, page_order);
-}
-
-/* Change types across all p2m entries in a domain */
-void p2m_change_entry_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt);
-
-/* Compare-exchange the type of a single p2m entry */
-p2m_type_t p2m_change_type(struct p2m_domain *p2m, unsigned long gfn,
-                           p2m_type_t ot, p2m_type_t nt);
-
-/* Set mmio addresses in the p2m table (for pass-through) */
-int set_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn);
-int clear_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn);
-
-void nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
-    l1_pgentry_t *p, mfn_t table_mfn, l1_pgentry_t new, unsigned int level);
 
 #ifdef __x86_64__
 /* Modify p2m table for shared gfn */
@@ -680,6 +652,40 @@ extern void audit_p2m(struct p2m_domain 
 #define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0)
 #endif
 
+
+/*
+ * Functions specific to the p2m-pt implementation
+ */
+
+/* Extract the type from the PTE flags that store it */
+static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
+{
+    /* Type is stored in the "available" bits */
+#ifdef __x86_64__
+    /* For AMD IOMMUs we need to use type 0 for plain RAM, but we need
+     * to make sure that an entirely empty PTE doesn't have RAM type */
+    if ( flags == 0 ) 
+        return p2m_invalid;
+    /* AMD IOMMUs use bits 9-11 to encode next io page level and bits
+     * 59-62 for iommu flags so we can't use them to store p2m type info. */
+    return (flags >> 12) & 0x7f;
+#else
+    return (flags >> 9) & 0x7;
+#endif
+}
+
+/*
+ * Nested p2m: shadow p2m tables used for nexted HVM virtualization 
+ */
+
+/* Flushes specified p2m table */
+void p2m_flush(struct vcpu *v, struct p2m_domain *p2m);
+/* Flushes all nested p2m tables */
+void p2m_flush_nestedp2m(struct domain *d);
+
+void nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
+    l1_pgentry_t *p, mfn_t table_mfn, l1_pgentry_t new, unsigned int level);
+
 #endif /* _XEN_P2M_H */
 
 /*
diff -r c7d0b66e41cd -r 9344034d624b xen/include/asm-x86/x86_32/page.h
--- a/xen/include/asm-x86/x86_32/page.h	Thu Jun 02 13:16:51 2011 +0100
+++ b/xen/include/asm-x86/x86_32/page.h	Thu Jun 02 13:16:52 2011 +0100
@@ -15,6 +15,7 @@
 #define L3_PAGETABLE_ENTRIES    4
 #define ROOT_PAGETABLE_ENTRIES  L3_PAGETABLE_ENTRIES
 #define SUPERPAGE_ORDER         PAGETABLE_ORDER
+#define SUPERPAGE_PAGES         (1<<SUPERPAGE_ORDER)
 
 /*
  * Architecturally, physical addresses may be up to 52 bits. However, the
diff -r c7d0b66e41cd -r 9344034d624b xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h	Thu Jun 02 13:16:51 2011 +0100
+++ b/xen/include/asm-x86/x86_64/page.h	Thu Jun 02 13:16:52 2011 +0100
@@ -17,6 +17,7 @@
 #define L4_PAGETABLE_ENTRIES    (1<<PAGETABLE_ORDER)
 #define ROOT_PAGETABLE_ENTRIES  L4_PAGETABLE_ENTRIES
 #define SUPERPAGE_ORDER         PAGETABLE_ORDER
+#define SUPERPAGE_PAGES         (1<<SUPERPAGE_ORDER)
 
 #define __PAGE_OFFSET           DIRECTMAP_VIRT_START
 #define __XEN_VIRT_START        XEN_VIRT_START

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 03 of 17] x86/mm/p2m: hide the current-domain fast-path inside the p2m-pt code
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
  2011-06-02 12:20 ` [PATCH 01 of 17] x86/mm/p2m: Mark internal functions static Tim Deegan
  2011-06-02 12:20 ` [PATCH 02 of 17] x86/mm/p2m: little fixes and tidying up Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 04 of 17] x86/mm/p2m: merge gfn_to_mfn_unshare with other gfn_to_mfn paths Tim Deegan
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID a7d8612c9ba14ae6efbf420e213c983902433942
# Parent  9344034d624b2e3cd6b0025ab2051cb89bd7e04a
x86/mm/p2m: hide the current-domain fast-path inside the p2m-pt code.

The other implementations of the p2m interface don't have this, and
it will go away entirely when 32-bit builds go away, so take it out
of the interface.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 9344034d624b -r a7d8612c9ba1 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/hvm.c	Thu Jun 02 13:16:52 2011 +0100
@@ -1212,7 +1212,7 @@ int hvm_hap_nested_page_fault(unsigned l
     }
 
     p2m = p2m_get_hostp2m(v->domain);
-    mfn = gfn_to_mfn_type_current(p2m, gfn, &p2mt, &p2ma, p2m_guest);
+    mfn = gfn_to_mfn_type_p2m(p2m, gfn, &p2mt, &p2ma, p2m_guest);
 
     /* Check access permissions first, then handle faults */
     if ( access_valid && (mfn_x(mfn) != INVALID_MFN) )
diff -r 9344034d624b -r a7d8612c9ba1 xen/arch/x86/mm/p2m-ept.c
--- a/xen/arch/x86/mm/p2m-ept.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m-ept.c	Thu Jun 02 13:16:52 2011 +0100
@@ -692,13 +692,6 @@ out:
     return;
 }
 
-static mfn_t ept_get_entry_current(struct p2m_domain *p2m,
-                                   unsigned long gfn, p2m_type_t *t, p2m_access_t *a,
-                                   p2m_query_t q)
-{
-    return ept_get_entry(p2m, gfn, t, a, q);
-}
-
 /*
  * To test if the new emt type is the same with old,
  * return 1 to not to reset ept entry.
@@ -824,7 +817,6 @@ void ept_p2m_init(struct p2m_domain *p2m
 {
     p2m->set_entry = ept_set_entry;
     p2m->get_entry = ept_get_entry;
-    p2m->get_entry_current = ept_get_entry_current;
     p2m->change_entry_type_global = ept_change_entry_type_global;
 }
 
diff -r 9344034d624b -r a7d8612c9ba1 xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:52 2011 +0100
@@ -503,6 +503,180 @@ static int p2m_pod_check_and_populate(st
     return r;
 }
 
+/* Read the current domain's p2m table (through the linear mapping). */
+static mfn_t p2m_gfn_to_mfn_current(struct p2m_domain *p2m, 
+                                    unsigned long gfn, p2m_type_t *t, 
+                                    p2m_access_t *a, p2m_query_t q)
+{
+    mfn_t mfn = _mfn(INVALID_MFN);
+    p2m_type_t p2mt = p2m_mmio_dm;
+    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
+    /* XXX This is for compatibility with the old model, where anything not 
+     * XXX marked as RAM was considered to be emulated MMIO space.
+     * XXX Once we start explicitly registering MMIO regions in the p2m 
+     * XXX we will return p2m_invalid for unmapped gfns */
+
+    l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
+    l2_pgentry_t l2e = l2e_empty();
+    int ret;
+#if CONFIG_PAGING_LEVELS >= 4
+    l3_pgentry_t l3e = l3e_empty();
+#endif
+
+    ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
+           / sizeof(l1_pgentry_t));
+
+#if CONFIG_PAGING_LEVELS >= 4
+    /*
+     * Read & process L3
+     */
+    p2m_entry = (l1_pgentry_t *)
+        &__linear_l2_table[l2_linear_offset(RO_MPT_VIRT_START)
+                           + l3_linear_offset(addr)];
+pod_retry_l3:
+    ret = __copy_from_user(&l3e, p2m_entry, sizeof(l3e));
+
+    if ( ret != 0 || !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+    {
+        if ( (l3e_get_flags(l3e) & _PAGE_PSE) &&
+             (p2m_flags_to_type(l3e_get_flags(l3e)) == p2m_populate_on_demand) )
+        {
+            /* The read has succeeded, so we know that mapping exists */
+            if ( q != p2m_query )
+            {
+                if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) )
+                    goto pod_retry_l3;
+                p2mt = p2m_invalid;
+                printk("%s: Allocate 1GB failed!\n", __func__);
+                goto out;
+            }
+            else
+            {
+                p2mt = p2m_populate_on_demand;
+                goto out;
+            }
+        }
+        goto pod_retry_l2;
+    }
+
+    if ( l3e_get_flags(l3e) & _PAGE_PSE )
+    {
+        p2mt = p2m_flags_to_type(l3e_get_flags(l3e));
+        ASSERT(l3e_get_pfn(l3e) != INVALID_MFN || !p2m_is_ram(p2mt));
+        if (p2m_is_valid(p2mt) )
+            mfn = _mfn(l3e_get_pfn(l3e) + 
+                       l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + 
+                       l1_table_offset(addr));
+        else
+            p2mt = p2m_mmio_dm;
+            
+        goto out;
+    }
+#endif
+    /*
+     * Read & process L2
+     */
+    p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
+                                   + l2_linear_offset(addr)];
+
+pod_retry_l2:
+    ret = __copy_from_user(&l2e,
+                           p2m_entry,
+                           sizeof(l2e));
+    if ( ret != 0
+         || !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+    {
+        if( (l2e_get_flags(l2e) & _PAGE_PSE)
+            && ( p2m_flags_to_type(l2e_get_flags(l2e))
+                 == p2m_populate_on_demand ) )
+        {
+            /* The read has succeeded, so we know that the mapping
+             * exits at this point.  */
+            if ( q != p2m_query )
+            {
+                if ( !p2m_pod_check_and_populate(p2m, gfn,
+                                                 p2m_entry, 9, q) )
+                    goto pod_retry_l2;
+
+                /* Allocate failed. */
+                p2mt = p2m_invalid;
+                printk("%s: Allocate failed!\n", __func__);
+                goto out;
+            }
+            else
+            {
+                p2mt = p2m_populate_on_demand;
+                goto out;
+            }
+        }
+
+        goto pod_retry_l1;
+    }
+        
+    if (l2e_get_flags(l2e) & _PAGE_PSE)
+    {
+        p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
+        ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
+
+        if ( p2m_is_valid(p2mt) )
+            mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
+        else
+            p2mt = p2m_mmio_dm;
+
+        goto out;
+    }
+
+    /*
+     * Read and process L1
+     */
+
+    /* Need to __copy_from_user because the p2m is sparse and this
+     * part might not exist */
+pod_retry_l1:
+    p2m_entry = &phys_to_machine_mapping[gfn];
+
+    ret = __copy_from_user(&l1e,
+                           p2m_entry,
+                           sizeof(l1e));
+            
+    if ( ret == 0 ) {
+        p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+        ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+
+        if ( p2m_flags_to_type(l1e_get_flags(l1e))
+             == p2m_populate_on_demand )
+        {
+            /* The read has succeeded, so we know that the mapping
+             * exits at this point.  */
+            if ( q != p2m_query )
+            {
+                if ( !p2m_pod_check_and_populate(p2m, gfn,
+                                                 (l1_pgentry_t *)p2m_entry, 0, q) )
+                    goto pod_retry_l1;
+
+                /* Allocate failed. */
+                p2mt = p2m_invalid;
+                goto out;
+            }
+            else
+            {
+                p2mt = p2m_populate_on_demand;
+                goto out;
+            }
+        }
+
+        if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) )
+            mfn = _mfn(l1e_get_pfn(l1e));
+        else 
+            /* XXX see above */
+            p2mt = p2m_mmio_dm;
+    }
+    
+out:
+    *t = p2mt;
+    return mfn;
+}
+
 
 static mfn_t
 p2m_gfn_to_mfn(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a,
@@ -529,6 +703,10 @@ p2m_gfn_to_mfn(struct p2m_domain *p2m, u
         /* This pfn is higher than the highest the p2m map currently holds */
         return _mfn(INVALID_MFN);
 
+    /* Use the fast path with the linear mapping if we can */
+    if ( p2m == p2m_get_hostp2m(current->domain) )
+        return p2m_gfn_to_mfn_current(p2m, gfn, t, a, q);
+
 #if CONFIG_PAGING_LEVELS >= 4
     {
         l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
@@ -646,185 +824,6 @@ pod_retry_l1:
     return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN);
 }
 
-/* Read the current domain's p2m table (through the linear mapping). */
-static mfn_t p2m_gfn_to_mfn_current(struct p2m_domain *p2m,
-                                    unsigned long gfn, p2m_type_t *t, p2m_access_t *a,
-                                    p2m_query_t q)
-{
-    mfn_t mfn = _mfn(INVALID_MFN);
-    p2m_type_t p2mt = p2m_mmio_dm;
-    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
-    /* XXX This is for compatibility with the old model, where anything not 
-     * XXX marked as RAM was considered to be emulated MMIO space.
-     * XXX Once we start explicitly registering MMIO regions in the p2m 
-     * XXX we will return p2m_invalid for unmapped gfns */
-
-    /* Not currently implemented except for EPT */
-    *a = p2m_access_rwx;
-
-    if ( gfn <= p2m->max_mapped_pfn )
-    {
-        l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
-        l2_pgentry_t l2e = l2e_empty();
-        int ret;
-#if CONFIG_PAGING_LEVELS >= 4
-        l3_pgentry_t l3e = l3e_empty();
-#endif
-
-        ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
-               / sizeof(l1_pgentry_t));
-
-#if CONFIG_PAGING_LEVELS >= 4
-        /*
-         * Read & process L3
-         */
-        p2m_entry = (l1_pgentry_t *)
-            &__linear_l2_table[l2_linear_offset(RO_MPT_VIRT_START)
-                               + l3_linear_offset(addr)];
-    pod_retry_l3:
-        ret = __copy_from_user(&l3e, p2m_entry, sizeof(l3e));
-
-        if ( ret != 0 || !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-        {
-            if ( (l3e_get_flags(l3e) & _PAGE_PSE) &&
-                 (p2m_flags_to_type(l3e_get_flags(l3e)) == p2m_populate_on_demand) )
-            {
-                /* The read has succeeded, so we know that mapping exists */
-                if ( q != p2m_query )
-                {
-                    if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) )
-                        goto pod_retry_l3;
-                    p2mt = p2m_invalid;
-                    printk("%s: Allocate 1GB failed!\n", __func__);
-                    goto out;
-                }
-                else
-                {
-                    p2mt = p2m_populate_on_demand;
-                    goto out;
-                }
-            }
-            goto pod_retry_l2;
-        }
-
-        if ( l3e_get_flags(l3e) & _PAGE_PSE )
-        {
-            p2mt = p2m_flags_to_type(l3e_get_flags(l3e));
-            ASSERT(l3e_get_pfn(l3e) != INVALID_MFN || !p2m_is_ram(p2mt));
-            if (p2m_is_valid(p2mt) )
-                mfn = _mfn(l3e_get_pfn(l3e) + 
-                           l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + 
-                           l1_table_offset(addr));
-            else
-                p2mt = p2m_mmio_dm;
-            
-            goto out;
-        }
-#endif
-        /*
-         * Read & process L2
-         */
-        p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
-                                       + l2_linear_offset(addr)];
-
-    pod_retry_l2:
-        ret = __copy_from_user(&l2e,
-                               p2m_entry,
-                               sizeof(l2e));
-        if ( ret != 0
-             || !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
-        {
-            if( (l2e_get_flags(l2e) & _PAGE_PSE)
-                && ( p2m_flags_to_type(l2e_get_flags(l2e))
-                     == p2m_populate_on_demand ) )
-            {
-                /* The read has succeeded, so we know that the mapping
-                 * exits at this point.  */
-                if ( q != p2m_query )
-                {
-                    if ( !p2m_pod_check_and_populate(p2m, gfn,
-                                                     p2m_entry, 9, q) )
-                        goto pod_retry_l2;
-
-                    /* Allocate failed. */
-                    p2mt = p2m_invalid;
-                    printk("%s: Allocate failed!\n", __func__);
-                    goto out;
-                }
-                else
-                {
-                    p2mt = p2m_populate_on_demand;
-                    goto out;
-                }
-            }
-
-            goto pod_retry_l1;
-        }
-        
-        if (l2e_get_flags(l2e) & _PAGE_PSE)
-        {
-            p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
-            ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
-
-            if ( p2m_is_valid(p2mt) )
-                mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
-            else
-                p2mt = p2m_mmio_dm;
-
-            goto out;
-        }
-
-        /*
-         * Read and process L1
-         */
-
-        /* Need to __copy_from_user because the p2m is sparse and this
-         * part might not exist */
-    pod_retry_l1:
-        p2m_entry = &phys_to_machine_mapping[gfn];
-
-        ret = __copy_from_user(&l1e,
-                               p2m_entry,
-                               sizeof(l1e));
-            
-        if ( ret == 0 ) {
-            p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
-            ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
-
-            if ( p2m_flags_to_type(l1e_get_flags(l1e))
-                 == p2m_populate_on_demand )
-            {
-                /* The read has succeeded, so we know that the mapping
-                 * exits at this point.  */
-                if ( q != p2m_query )
-                {
-                    if ( !p2m_pod_check_and_populate(p2m, gfn,
-                                                     (l1_pgentry_t *)p2m_entry, 0, q) )
-                        goto pod_retry_l1;
-
-                    /* Allocate failed. */
-                    p2mt = p2m_invalid;
-                    goto out;
-                }
-                else
-                {
-                    p2mt = p2m_populate_on_demand;
-                    goto out;
-                }
-            }
-
-            if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) )
-                mfn = _mfn(l1e_get_pfn(l1e));
-            else 
-                /* XXX see above */
-                p2mt = p2m_mmio_dm;
-        }
-    }
-out:
-    *t = p2mt;
-    return mfn;
-}
-
 /* Walk the whole p2m table, changing any entries of the old type
  * to the new type.  This is used in hardware-assisted paging to 
  * quickly enable or diable log-dirty tracking */
@@ -968,7 +967,6 @@ void p2m_pt_init(struct p2m_domain *p2m)
 {
     p2m->set_entry = p2m_set_entry;
     p2m->get_entry = p2m_gfn_to_mfn;
-    p2m->get_entry_current = p2m_gfn_to_mfn_current;
     p2m->change_entry_type_global = p2m_change_type_global;
     p2m->write_p2m_entry = paging_write_p2m_entry;
 }
diff -r 9344034d624b -r a7d8612c9ba1 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
@@ -101,7 +101,6 @@ p2m_init_nestedp2m(struct domain *d)
         if (p2m == NULL)
             return -ENOMEM;
         p2m_initialise(d, p2m);
-        p2m->get_entry_current = p2m->get_entry;
         p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
     }
 
@@ -956,7 +955,6 @@ p2m_flush_locked(struct p2m_domain *p2m)
 
     p2m_teardown(p2m);
     p2m_initialise(p2m->domain, p2m);
-    p2m->get_entry_current = p2m->get_entry;
     p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
     return p2m_alloc_table(p2m);
 }
diff -r 9344034d624b -r a7d8612c9ba1 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
@@ -218,11 +218,6 @@ struct p2m_domain {
                                        p2m_type_t *p2mt,
                                        p2m_access_t *p2ma,
                                        p2m_query_t q);
-    mfn_t              (*get_entry_current)(struct p2m_domain *p2m,
-                                            unsigned long gfn,
-                                            p2m_type_t *p2mt,
-                                            p2m_access_t *p2ma,
-                                            p2m_query_t q);
     void               (*change_entry_type_global)(struct p2m_domain *p2m,
                                                    p2m_type_t ot,
                                                    p2m_type_t nt);
@@ -362,45 +357,14 @@ struct p2m_domain *p2m_get_p2m(struct vc
         spin_unlock(&(_domain)->arch.nested_p2m_lock);                 \
     } while (0)
 
-/* Read the current domain's p2m table.  Do not populate PoD pages. */
-static inline mfn_t gfn_to_mfn_type_current(struct p2m_domain *p2m,
-                                            unsigned long gfn, p2m_type_t *t,
-                                            p2m_access_t *a,
-                                            p2m_query_t q)
-{
-    return p2m->get_entry_current(p2m, gfn, t, a, q);
-}
 
 /* Read P2M table, mapping pages as we go.
  * Do not populate PoD pages. */
 static inline mfn_t
 gfn_to_mfn_type_p2m(struct p2m_domain *p2m, unsigned long gfn,
-                              p2m_type_t *t, p2m_query_t q)
+                    p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
 {
-    p2m_access_t a = 0;
-    return p2m->get_entry(p2m, gfn, t, &a, q);
-}
-
-
-/* General conversion function from gfn to mfn */
-static inline mfn_t _gfn_to_mfn_type(struct p2m_domain *p2m,
-                                     unsigned long gfn, p2m_type_t *t,
-                                     p2m_query_t q)
-{
-    mfn_t mfn;
-    p2m_access_t a;
-
-    if ( !p2m || !paging_mode_translate(p2m->domain) )
-    {
-        /* Not necessarily true, but for non-translated guests, we claim
-         * it's the most generic kind of memory */
-        *t = p2m_ram_rw;
-        mfn = _mfn(gfn);
-    }
-    else if ( likely(current->domain == p2m->domain) )
-        mfn = gfn_to_mfn_type_current(p2m, gfn, t, &a, q);
-    else
-        mfn = gfn_to_mfn_type_p2m(p2m, gfn, t, q);
+    mfn_t mfn = p2m->get_entry(p2m, gfn, t, a, q);
 
 #ifdef __x86_64__
     if (unlikely((p2m_is_broken(*t))))
@@ -415,6 +379,25 @@ static inline mfn_t _gfn_to_mfn_type(str
     return mfn;
 }
 
+
+/* General conversion function from gfn to mfn */
+static inline mfn_t _gfn_to_mfn_type(struct p2m_domain *p2m,
+                                     unsigned long gfn, p2m_type_t *t,
+                                     p2m_query_t q)
+{
+    p2m_access_t a;
+
+    if ( !p2m || !paging_mode_translate(p2m->domain) )
+    {
+        /* Not necessarily true, but for non-translated guests, we claim
+         * it's the most generic kind of memory */
+        *t = p2m_ram_rw;
+        return _mfn(gfn);
+    }
+    
+    return gfn_to_mfn_type_p2m(p2m, gfn, t, &a, q);
+}
+
 #define gfn_to_mfn(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_alloc)
 #define gfn_to_mfn_query(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_query)
 #define gfn_to_mfn_guest(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_guest)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 04 of 17] x86/mm/p2m: merge gfn_to_mfn_unshare with other gfn_to_mfn paths
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (2 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 03 of 17] x86/mm/p2m: hide the current-domain fast-path inside the p2m-pt code Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 05 of 17] x86/mm/p2m: Make p2m interfaces take struct domain arguments Tim Deegan
                   ` (12 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 0d3e0a571fdddc873d1ff3750d15db6fc58fff8e
# Parent  a7d8612c9ba14ae6efbf420e213c983902433942
x86/mm/p2m: merge gfn_to_mfn_unshare with other gfn_to_mfn paths.

gfn_to_mfn_unshare() had its own function despite all other lookup types
being handled in one place. Merge it into _gfn_to_mfn_type(), so that it
gets the benefit of broken-page protection, for example, and tidy its
interfaces up to fit.

The unsharing code still has a lot of bugs, e.g.
 - failure to alloc for unshare on a foreign lookup still BUG()s,
 - at least one race condition in unshare-and-retry
 - p2m_* lookup types should probably be flags, not enum
but it's cleaner and will make later p2m cleanups easier.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r a7d8612c9ba1 -r 0d3e0a571fdd xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/emulate.c	Thu Jun 02 13:16:52 2011 +0100
@@ -63,7 +63,7 @@ static int hvmemul_do_io(
     int rc;
 
     /* Check for paged out page */
-    ram_mfn = gfn_to_mfn_unshare(p2m, ram_gfn, &p2mt, 0);
+    ram_mfn = gfn_to_mfn_unshare(p2m, ram_gfn, &p2mt);
     if ( p2m_is_paging(p2mt) )
     {
         p2m_mem_paging_populate(p2m, ram_gfn);
diff -r a7d8612c9ba1 -r 0d3e0a571fdd xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/hvm.c	Thu Jun 02 13:16:52 2011 +0100
@@ -352,7 +352,7 @@ static int hvm_set_ioreq_page(
     unsigned long mfn;
     void *va;
 
-    mfn = mfn_x(gfn_to_mfn_unshare(p2m, gmfn, &p2mt, 0));
+    mfn = mfn_x(gfn_to_mfn_unshare(p2m, gmfn, &p2mt));
     if ( !p2m_is_ram(p2mt) )
         return -EINVAL;
     if ( p2m_is_paging(p2mt) )
@@ -1767,7 +1767,7 @@ static void *__hvm_map_guest_frame(unsig
     struct p2m_domain *p2m = p2m_get_hostp2m(current->domain);
 
     mfn = mfn_x(writable
-                ? gfn_to_mfn_unshare(p2m, gfn, &p2mt, 0)
+                ? gfn_to_mfn_unshare(p2m, gfn, &p2mt)
                 : gfn_to_mfn(p2m, gfn, &p2mt));
     if ( (p2m_is_shared(p2mt) && writable) || !p2m_is_ram(p2mt) )
         return NULL;
@@ -2229,7 +2229,7 @@ static enum hvm_copy_result __hvm_copy(
             gfn = addr >> PAGE_SHIFT;
         }
 
-        mfn = mfn_x(gfn_to_mfn_unshare(p2m, gfn, &p2mt, 0));
+        mfn = mfn_x(gfn_to_mfn_unshare(p2m, gfn, &p2mt));
 
         if ( p2m_is_paging(p2mt) )
         {
@@ -3724,7 +3724,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
         rc = -EINVAL;
         if ( is_hvm_domain(d) )
         {
-            gfn_to_mfn_unshare(p2m_get_hostp2m(d), a.pfn, &t, 0);
+            gfn_to_mfn_unshare(p2m_get_hostp2m(d), a.pfn, &t);
             if ( p2m_is_mmio(t) )
                 a.mem_type =  HVMMEM_mmio_dm;
             else if ( p2m_is_readonly(t) )
@@ -3779,7 +3779,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
             p2m_type_t t;
             p2m_type_t nt;
             mfn_t mfn;
-            mfn = gfn_to_mfn_unshare(p2m, pfn, &t, 0);
+            mfn = gfn_to_mfn_unshare(p2m, pfn, &t);
             if ( p2m_is_paging(t) )
             {
                 p2m_mem_paging_populate(p2m, pfn);
@@ -3877,7 +3877,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
             mfn_t mfn;
             int success;
 
-            mfn = gfn_to_mfn_unshare(p2m, pfn, &t, 0);
+            mfn = gfn_to_mfn_unshare(p2m, pfn, &t);
 
             p2m_lock(p2m);
             success = p2m->set_entry(p2m, pfn, mfn, 0, t, memaccess[a.hvmmem_access]);
diff -r a7d8612c9ba1 -r 0d3e0a571fdd xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm.c	Thu Jun 02 13:16:52 2011 +0100
@@ -4653,7 +4653,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
             p2m_type_t p2mt;
 
             xatp.idx = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(d),
-                                                xatp.idx, &p2mt, 0));
+                                                xatp.idx, &p2mt));
             /* If the page is still shared, exit early */
             if ( p2m_is_shared(p2mt) )
             {
diff -r a7d8612c9ba1 -r 0d3e0a571fdd xen/arch/x86/mm/guest_walk.c
--- a/xen/arch/x86/mm/guest_walk.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/guest_walk.c	Thu Jun 02 13:16:52 2011 +0100
@@ -93,7 +93,7 @@ static inline void *map_domain_gfn(struc
                                    uint32_t *rc) 
 {
     /* Translate the gfn, unsharing if shared */
-    *mfn = gfn_to_mfn_unshare(p2m, gfn_x(gfn), p2mt, 0);
+    *mfn = gfn_to_mfn_unshare(p2m, gfn_x(gfn), p2mt);
     if ( p2m_is_paging(*p2mt) )
     {
         p2m_mem_paging_populate(p2m, gfn_x(gfn));
diff -r a7d8612c9ba1 -r 0d3e0a571fdd xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/guest_walk.c	Thu Jun 02 13:16:52 2011 +0100
@@ -57,7 +57,7 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA
     walk_t gw;
 
     /* Get the top-level table's MFN */
-    top_mfn = gfn_to_mfn_unshare(p2m, cr3 >> PAGE_SHIFT, &p2mt, 0);
+    top_mfn = gfn_to_mfn_unshare(p2m, cr3 >> PAGE_SHIFT, &p2mt);
     if ( p2m_is_paging(p2mt) )
     {
         p2m_mem_paging_populate(p2m, cr3 >> PAGE_SHIFT);
@@ -89,7 +89,7 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA
     if ( missing == 0 )
     {
         gfn_t gfn = guest_l1e_get_gfn(gw.l1e);
-        gfn_to_mfn_unshare(p2m, gfn_x(gfn), &p2mt, 0);
+        gfn_to_mfn_unshare(p2m, gfn_x(gfn), &p2mt);
         if ( p2m_is_paging(p2mt) )
         {
             p2m_mem_paging_populate(p2m, gfn_x(gfn));
diff -r a7d8612c9ba1 -r 0d3e0a571fdd xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/mem_sharing.c	Thu Jun 02 13:16:52 2011 +0100
@@ -294,8 +294,7 @@ static void mem_sharing_audit(void)
 
 
 static struct page_info* mem_sharing_alloc_page(struct domain *d, 
-                                                unsigned long gfn,
-                                                int must_succeed)
+                                                unsigned long gfn)
 {
     struct page_info* page;
     struct vcpu *v = current;
@@ -307,21 +306,20 @@ static struct page_info* mem_sharing_all
     memset(&req, 0, sizeof(req));
     req.type = MEM_EVENT_TYPE_SHARED;
 
-    if(must_succeed) 
+    if ( v->domain != d )
     {
-        /* We do not support 'must_succeed' any more. External operations such
-         * as grant table mappings may fail with OOM condition! 
-         */
-        BUG();
+        /* XXX This path needs some attention.  For now, just fail foreign 
+         * XXX requests to unshare if there's no memory.  This replaces 
+         * XXX old code that BUG()ed here; the callers now BUG()
+         * XXX elewhere. */
+        gdprintk(XENLOG_ERR, 
+                 "Failed alloc on unshare path for foreign (%d) lookup\n",
+                 d->domain_id);
+        return page;
     }
-    else
-    {
-        /* All foreign attempts to unshare pages should be handled through
-         * 'must_succeed' case. */
-        ASSERT(v->domain->domain_id == d->domain_id);
-        vcpu_pause_nosync(v);
-        req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
-    }
+
+    vcpu_pause_nosync(v);
+    req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
 
     /* XXX: Need to reserve a request, not just check the ring! */
     if(mem_event_check_ring(d)) return page;
@@ -692,8 +690,7 @@ gfn_found:
     if(ret == 0) goto private_page_found;
         
     old_page = page;
-    page = mem_sharing_alloc_page(d, gfn, flags & MEM_SHARING_MUST_SUCCEED);
-    BUG_ON(!page && (flags & MEM_SHARING_MUST_SUCCEED));
+    page = mem_sharing_alloc_page(d, gfn);
     if(!page) 
     {
         /* We've failed to obtain memory for private page. Need to re-add the
diff -r a7d8612c9ba1 -r 0d3e0a571fdd xen/common/grant_table.c
--- a/xen/common/grant_table.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/common/grant_table.c	Thu Jun 02 13:16:52 2011 +0100
@@ -110,7 +110,8 @@ static unsigned inline int max_nr_maptra
 #define gfn_to_mfn_private(_d, _gfn) ({                     \
     p2m_type_t __p2mt;                                      \
     unsigned long __x;                                      \
-    __x = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(_d), _gfn, &__p2mt, 1));  \
+    __x = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(_d), _gfn, &__p2mt));  \
+    BUG_ON(p2m_is_shared(__p2mt)); /* XXX fixme */          \
     if ( !p2m_is_valid(__p2mt) )                            \
         __x = INVALID_MFN;                                  \
     __x; })
@@ -153,7 +154,12 @@ static int __get_paged_frame(unsigned lo
     if ( readonly )
         mfn = gfn_to_mfn(p2m, gfn, &p2mt);
     else
-        mfn = gfn_to_mfn_unshare(p2m, gfn, &p2mt, 1);
+    {
+        mfn = gfn_to_mfn_unshare(p2m, gfn, &p2mt);
+        BUG_ON(p2m_is_shared(p2mt));
+        /* XXX Here, and above in gfn_to_mfn_private, need to handle
+         * XXX failure to unshare. */
+    }
 
     if ( p2m_is_valid(p2mt) ) {
         *frame = mfn_x(mfn);
diff -r a7d8612c9ba1 -r 0d3e0a571fdd xen/common/memory.c
--- a/xen/common/memory.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/common/memory.c	Thu Jun 02 13:16:52 2011 +0100
@@ -363,7 +363,7 @@ static long memory_exchange(XEN_GUEST_HA
                 p2m_type_t p2mt;
 
                 /* Shared pages cannot be exchanged */
-                mfn = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(d), gmfn + k, &p2mt, 0));
+                mfn = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(d), gmfn + k, &p2mt));
                 if ( p2m_is_shared(p2mt) )
                 {
                     rc = -ENOMEM;
diff -r a7d8612c9ba1 -r 0d3e0a571fdd xen/include/asm-x86/mem_sharing.h
--- a/xen/include/asm-x86/mem_sharing.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/mem_sharing.h	Thu Jun 02 13:16:52 2011 +0100
@@ -34,7 +34,6 @@ int mem_sharing_nominate_page(struct p2m
                               unsigned long gfn,
                               int expected_refcnt,
                               shr_handle_t *phandle);
-#define MEM_SHARING_MUST_SUCCEED      (1<<0)
 #define MEM_SHARING_DESTROY_GFN       (1<<1)
 int mem_sharing_unshare_page(struct p2m_domain *p2m, 
                              unsigned long gfn, 
diff -r a7d8612c9ba1 -r 0d3e0a571fdd xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
@@ -112,9 +112,10 @@ typedef enum {
 } p2m_access_t;
 
 typedef enum {
-    p2m_query = 0,              /* Do not populate a PoD entries      */
-    p2m_alloc = 1,              /* Automatically populate PoD entries */
-    p2m_guest = 2,              /* Guest demand-fault; implies alloc  */
+    p2m_query,              /* Do not populate a PoD entries      */
+    p2m_alloc,              /* Automatically populate PoD entries */
+    p2m_unshare,            /* Break c-o-w sharing; implies alloc */
+    p2m_guest,              /* Guest demand-fault; implies alloc  */
 } p2m_query_t;
 
 /* We use bitmaps and maks to handle groups of types */
@@ -367,6 +368,14 @@ gfn_to_mfn_type_p2m(struct p2m_domain *p
     mfn_t mfn = p2m->get_entry(p2m, gfn, t, a, q);
 
 #ifdef __x86_64__
+    if ( q == p2m_unshare && p2m_is_shared(*t) )
+    {
+        mem_sharing_unshare_page(p2m, gfn, 0);
+        mfn = p2m->get_entry(p2m, gfn, t, a, q);
+    }
+#endif
+
+#ifdef __x86_64__
     if (unlikely((p2m_is_broken(*t))))
     {
         /* Return invalid_mfn to avoid caller's access */
@@ -401,32 +410,7 @@ static inline mfn_t _gfn_to_mfn_type(str
 #define gfn_to_mfn(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_alloc)
 #define gfn_to_mfn_query(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_query)
 #define gfn_to_mfn_guest(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_guest)
-
-static inline mfn_t gfn_to_mfn_unshare(struct p2m_domain *p2m,
-                                       unsigned long gfn,
-                                       p2m_type_t *p2mt,
-                                       int must_succeed)
-{
-    mfn_t mfn;
-
-    mfn = gfn_to_mfn(p2m, gfn, p2mt);
-#ifdef __x86_64__
-    if ( p2m_is_shared(*p2mt) )
-    {
-        if ( mem_sharing_unshare_page(p2m, gfn,
-                                      must_succeed 
-                                      ? MEM_SHARING_MUST_SUCCEED : 0) )
-        {
-            BUG_ON(must_succeed);
-            return mfn;
-        }
-        mfn = gfn_to_mfn(p2m, gfn, p2mt);
-    }
-#endif
-
-    return mfn;
-}
-
+#define gfn_to_mfn_unshare(p, g, t) _gfn_to_mfn_type((p), (g), (t), p2m_unshare)
 
 /* Compatibility function exporting the old untyped interface */
 static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 05 of 17] x86/mm/p2m: Make p2m interfaces take struct domain arguments
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (3 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 04 of 17] x86/mm/p2m: merge gfn_to_mfn_unshare with other gfn_to_mfn paths Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 06 of 17] x86/mm/p2m: paging_p2m_ga_to_gfn() doesn't need so many arguments Tim Deegan
                   ` (11 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID de0a051b36ceb01738348426fdd3d52a5776ba1a
# Parent  0d3e0a571fdddc873d1ff3750d15db6fc58fff8e
x86/mm/p2m: Make p2m interfaces take struct domain arguments.

As part of the nested HVM patch series, many p2m functions were changed
to take pointers to p2m tables rather than to domains.  This patch
reverses that for almost all of them, which:
 - gets rid of a lot of "p2m_get_hostp2m(d)" in code which really
   shouldn't have to know anything about how gfns become mfns.
 - ties sharing and paging interfaces to a domain, which is
   what they actually act on, rather than a particular p2m table.

In developing this patch it became clear that memory-sharing and nested
HVM are unlikely to work well together.  I haven't tried to fix that
here beyond adding some assertions around suspect paths (as this patch
is big enough with just the interface changes)

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/cpu/mcheck/vmce.c
--- a/xen/arch/x86/cpu/mcheck/vmce.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/cpu/mcheck/vmce.c	Thu Jun 02 13:16:52 2011 +0100
@@ -577,7 +577,6 @@ int is_vmce_ready(struct mcinfo_bank *ba
 int unmmap_broken_page(struct domain *d, mfn_t mfn, unsigned long gfn)
 {
     mfn_t r_mfn;
-    struct p2m_domain *p2m;
     p2m_type_t pt;
 
     /* Always trust dom0's MCE handler will prevent future access */
@@ -590,18 +589,11 @@ int unmmap_broken_page(struct domain *d,
     if ( !is_hvm_domain(d) || !paging_mode_hap(d) )
         return -ENOSYS;
 
-    p2m = p2m_get_hostp2m(d);
-    ASSERT(p2m);
-
-    /* This only happen for PoD memory, which should be handled seperetely */
-    if (gfn > p2m->max_mapped_pfn)
-        return -EINVAL;
-
-    r_mfn = gfn_to_mfn_query(p2m, gfn, &pt);
+    r_mfn = gfn_to_mfn_query(d, gfn, &pt);
     if ( p2m_to_mask(pt) & P2M_UNMAP_TYPES)
     {
         ASSERT(mfn_x(r_mfn) == mfn_x(mfn));
-        p2m_change_type(p2m, gfn, pt, p2m_ram_broken);
+        p2m_change_type(d, gfn, pt, p2m_ram_broken);
         return 0;
     }
 
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/debug.c
--- a/xen/arch/x86/debug.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/debug.c	Thu Jun 02 13:16:52 2011 +0100
@@ -58,7 +58,7 @@ dbg_hvm_va2mfn(dbgva_t vaddr, struct dom
         return INVALID_MFN;
     }
 
-    mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(dp), gfn, &gfntype)); 
+    mfn = mfn_x(gfn_to_mfn(dp, gfn, &gfntype)); 
     if ( p2m_is_readonly(gfntype) && toaddr )
     {
         DBGP2("kdb:p2m_is_readonly: gfntype:%x\n", gfntype);
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/domain.c	Thu Jun 02 13:16:52 2011 +0100
@@ -164,9 +164,7 @@ void dump_pageframe_info(struct domain *
     }
 
     if ( is_hvm_domain(d) )
-    {
-        p2m_pod_dump_data(p2m_get_hostp2m(d));
-    }
+        p2m_pod_dump_data(d);
 
     spin_lock(&d->page_alloc_lock);
     page_list_for_each ( page, &d->xenpage_list )
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/domctl.c	Thu Jun 02 13:16:52 2011 +0100
@@ -977,7 +977,7 @@ long arch_do_domctl(
 
             ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
             for ( i = 0; i < nr_mfns; i++ )
-                set_mmio_p2m_entry(p2m_get_hostp2m(d), gfn+i, _mfn(mfn+i));
+                set_mmio_p2m_entry(d, gfn+i, _mfn(mfn+i));
         }
         else
         {
@@ -986,7 +986,7 @@ long arch_do_domctl(
                  gfn, mfn, nr_mfns);
 
             for ( i = 0; i < nr_mfns; i++ )
-                clear_mmio_p2m_entry(p2m_get_hostp2m(d), gfn+i);
+                clear_mmio_p2m_entry(d, gfn+i);
             ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
         }
 
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/emulate.c	Thu Jun 02 13:16:52 2011 +0100
@@ -55,7 +55,6 @@ static int hvmemul_do_io(
     paddr_t value = ram_gpa;
     int value_is_ptr = (p_data == NULL);
     struct vcpu *curr = current;
-    struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
     ioreq_t *p = get_ioreq(curr);
     unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
     p2m_type_t p2mt;
@@ -63,10 +62,10 @@ static int hvmemul_do_io(
     int rc;
 
     /* Check for paged out page */
-    ram_mfn = gfn_to_mfn_unshare(p2m, ram_gfn, &p2mt);
+    ram_mfn = gfn_to_mfn_unshare(curr->domain, ram_gfn, &p2mt);
     if ( p2m_is_paging(p2mt) )
     {
-        p2m_mem_paging_populate(p2m, ram_gfn);
+        p2m_mem_paging_populate(curr->domain, ram_gfn);
         return X86EMUL_RETRY;
     }
     if ( p2m_is_shared(p2mt) )
@@ -640,7 +639,6 @@ static int hvmemul_rep_movs(
     unsigned long saddr, daddr, bytes;
     paddr_t sgpa, dgpa;
     uint32_t pfec = PFEC_page_present;
-    struct p2m_domain *p2m = p2m_get_hostp2m(current->domain);
     p2m_type_t p2mt;
     int rc, df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
     char *buf;
@@ -671,12 +669,12 @@ static int hvmemul_rep_movs(
     if ( rc != X86EMUL_OKAY )
         return rc;
 
-    (void)gfn_to_mfn(p2m, sgpa >> PAGE_SHIFT, &p2mt);
+    (void)gfn_to_mfn(current->domain, sgpa >> PAGE_SHIFT, &p2mt);
     if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) )
         return hvmemul_do_mmio(
             sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, df, NULL);
 
-    (void)gfn_to_mfn(p2m, dgpa >> PAGE_SHIFT, &p2mt);
+    (void)gfn_to_mfn(current->domain, dgpa >> PAGE_SHIFT, &p2mt);
     if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) )
         return hvmemul_do_mmio(
             dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, df, NULL);
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/hvm.c	Thu Jun 02 13:16:52 2011 +0100
@@ -347,17 +347,16 @@ static int hvm_set_ioreq_page(
     struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
 {
     struct page_info *page;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
     p2m_type_t p2mt;
     unsigned long mfn;
     void *va;
 
-    mfn = mfn_x(gfn_to_mfn_unshare(p2m, gmfn, &p2mt));
+    mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn, &p2mt));
     if ( !p2m_is_ram(p2mt) )
         return -EINVAL;
     if ( p2m_is_paging(p2mt) )
     {
-        p2m_mem_paging_populate(p2m, gmfn);
+        p2m_mem_paging_populate(d, gmfn);
         return -ENOENT;
     }
     if ( p2m_is_shared(p2mt) )
@@ -1181,7 +1180,7 @@ int hvm_hap_nested_page_fault(unsigned l
     p2m_access_t p2ma;
     mfn_t mfn;
     struct vcpu *v = current;
-    struct p2m_domain *p2m = NULL;
+    struct p2m_domain *p2m;
 
     /* On Nested Virtualization, walk the guest page table.
      * If this succeeds, all is fine.
@@ -1270,12 +1269,13 @@ int hvm_hap_nested_page_fault(unsigned l
 #ifdef __x86_64__
     /* Check if the page has been paged out */
     if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) )
-        p2m_mem_paging_populate(p2m, gfn);
+        p2m_mem_paging_populate(v->domain, gfn);
 
     /* Mem sharing: unshare the page and try again */
     if ( p2mt == p2m_ram_shared )
     {
-        mem_sharing_unshare_page(p2m, gfn, 0);
+        ASSERT(!p2m_is_nestedp2m(p2m));
+        mem_sharing_unshare_page(p2m->domain, gfn, 0);
         return 1;
     }
 #endif
@@ -1289,7 +1289,7 @@ int hvm_hap_nested_page_fault(unsigned l
          * page.
          */
         paging_mark_dirty(v->domain, mfn_x(mfn));
-        p2m_change_type(p2m, gfn, p2m_ram_logdirty, p2m_ram_rw);
+        p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
         return 1;
     }
 
@@ -1486,7 +1486,6 @@ int hvm_set_cr0(unsigned long value)
 {
     struct vcpu *v = current;
     p2m_type_t p2mt;
-    struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
     unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
 
     HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
@@ -1526,7 +1525,7 @@ int hvm_set_cr0(unsigned long value)
         {
             /* The guest CR3 must be pointing to the guest physical. */
             gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
-            mfn = mfn_x(gfn_to_mfn(p2m, gfn, &p2mt));
+            mfn = mfn_x(gfn_to_mfn(v->domain, gfn, &p2mt));
             if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
                  !get_page(mfn_to_page(mfn), v->domain))
             {
@@ -1617,8 +1616,7 @@ int hvm_set_cr3(unsigned long value)
     {
         /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
         HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
-        mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain),
-            value >> PAGE_SHIFT, &p2mt));
+        mfn = mfn_x(gfn_to_mfn(v->domain, value >> PAGE_SHIFT, &p2mt));
         if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
              !get_page(mfn_to_page(mfn), v->domain) )
               goto bad_cr3;
@@ -1764,23 +1762,23 @@ static void *__hvm_map_guest_frame(unsig
 {
     unsigned long mfn;
     p2m_type_t p2mt;
-    struct p2m_domain *p2m = p2m_get_hostp2m(current->domain);
+    struct domain *d = current->domain;
 
     mfn = mfn_x(writable
-                ? gfn_to_mfn_unshare(p2m, gfn, &p2mt)
-                : gfn_to_mfn(p2m, gfn, &p2mt));
+                ? gfn_to_mfn_unshare(d, gfn, &p2mt)
+                : gfn_to_mfn(d, gfn, &p2mt));
     if ( (p2m_is_shared(p2mt) && writable) || !p2m_is_ram(p2mt) )
         return NULL;
     if ( p2m_is_paging(p2mt) )
     {
-        p2m_mem_paging_populate(p2m, gfn);
+        p2m_mem_paging_populate(d, gfn);
         return NULL;
     }
 
     ASSERT(mfn_valid(mfn));
 
     if ( writable )
-        paging_mark_dirty(current->domain, mfn);
+        paging_mark_dirty(d, mfn);
 
     return map_domain_page(mfn);
 }
@@ -2182,7 +2180,6 @@ static enum hvm_copy_result __hvm_copy(
     void *buf, paddr_t addr, int size, unsigned int flags, uint32_t pfec)
 {
     struct vcpu *curr = current;
-    struct p2m_domain *p2m;
     unsigned long gfn, mfn;
     p2m_type_t p2mt;
     char *p;
@@ -2204,8 +2201,6 @@ static enum hvm_copy_result __hvm_copy(
         return HVMCOPY_unhandleable;
 #endif
 
-    p2m = p2m_get_hostp2m(curr->domain);
-
     while ( todo > 0 )
     {
         count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
@@ -2229,11 +2224,11 @@ static enum hvm_copy_result __hvm_copy(
             gfn = addr >> PAGE_SHIFT;
         }
 
-        mfn = mfn_x(gfn_to_mfn_unshare(p2m, gfn, &p2mt));
+        mfn = mfn_x(gfn_to_mfn_unshare(curr->domain, gfn, &p2mt));
 
         if ( p2m_is_paging(p2mt) )
         {
-            p2m_mem_paging_populate(p2m, gfn);
+            p2m_mem_paging_populate(curr->domain, gfn);
             return HVMCOPY_gfn_paged_out;
         }
         if ( p2m_is_shared(p2mt) )
@@ -3650,7 +3645,6 @@ long do_hvm_op(unsigned long op, XEN_GUE
     {
         struct xen_hvm_modified_memory a;
         struct domain *d;
-        struct p2m_domain *p2m;
         unsigned long pfn;
 
         if ( copy_from_guest(&a, arg, 1) )
@@ -3678,14 +3672,13 @@ long do_hvm_op(unsigned long op, XEN_GUE
         if ( !paging_mode_log_dirty(d) )
             goto param_fail3;
 
-        p2m = p2m_get_hostp2m(d);
         for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
         {
             p2m_type_t t;
-            mfn_t mfn = gfn_to_mfn(p2m, pfn, &t);
+            mfn_t mfn = gfn_to_mfn(d, pfn, &t);
             if ( p2m_is_paging(t) )
             {
-                p2m_mem_paging_populate(p2m, pfn);
+                p2m_mem_paging_populate(d, pfn);
 
                 rc = -EINVAL;
                 goto param_fail3;
@@ -3724,7 +3717,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
         rc = -EINVAL;
         if ( is_hvm_domain(d) )
         {
-            gfn_to_mfn_unshare(p2m_get_hostp2m(d), a.pfn, &t);
+            gfn_to_mfn_unshare(d, a.pfn, &t);
             if ( p2m_is_mmio(t) )
                 a.mem_type =  HVMMEM_mmio_dm;
             else if ( p2m_is_readonly(t) )
@@ -3743,7 +3736,6 @@ long do_hvm_op(unsigned long op, XEN_GUE
     {
         struct xen_hvm_set_mem_type a;
         struct domain *d;
-        struct p2m_domain *p2m;
         unsigned long pfn;
         
         /* Interface types to internal p2m types */
@@ -3773,17 +3765,15 @@ long do_hvm_op(unsigned long op, XEN_GUE
         if ( a.hvmmem_type >= ARRAY_SIZE(memtype) )
             goto param_fail4;
 
-        p2m = p2m_get_hostp2m(d);
         for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
         {
             p2m_type_t t;
             p2m_type_t nt;
             mfn_t mfn;
-            mfn = gfn_to_mfn_unshare(p2m, pfn, &t);
+            mfn = gfn_to_mfn_unshare(d, pfn, &t);
             if ( p2m_is_paging(t) )
             {
-                p2m_mem_paging_populate(p2m, pfn);
-
+                p2m_mem_paging_populate(d, pfn);
                 rc = -EINVAL;
                 goto param_fail4;
             }
@@ -3801,7 +3791,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
             }
             else
             {
-                nt = p2m_change_type(p2m, pfn, t, memtype[a.hvmmem_type]);
+                nt = p2m_change_type(d, pfn, t, memtype[a.hvmmem_type]);
                 if ( nt != t )
                 {
                     gdprintk(XENLOG_WARNING,
@@ -3877,7 +3867,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
             mfn_t mfn;
             int success;
 
-            mfn = gfn_to_mfn_unshare(p2m, pfn, &t);
+            mfn = gfn_to_mfn_unshare(d, pfn, &t);
 
             p2m_lock(p2m);
             success = p2m->set_entry(p2m, pfn, mfn, 0, t, memaccess[a.hvmmem_access]);
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/hvm/mtrr.c
--- a/xen/arch/x86/hvm/mtrr.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/mtrr.c	Thu Jun 02 13:16:52 2011 +0100
@@ -390,7 +390,7 @@ uint32_t get_pat_flags(struct vcpu *v,
     {
         struct domain *d = v->domain;
         p2m_type_t p2mt;
-        gfn_to_mfn(p2m_get_hostp2m(d), paddr_to_pfn(gpaddr), &p2mt);
+        gfn_to_mfn(d, paddr_to_pfn(gpaddr), &p2mt);
         if (p2m_is_ram(p2mt))
             gdprintk(XENLOG_WARNING,
                     "Conflict occurs for a given guest l1e flags:%x "
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/hvm/stdvga.c
--- a/xen/arch/x86/hvm/stdvga.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/stdvga.c	Thu Jun 02 13:16:52 2011 +0100
@@ -469,7 +469,7 @@ static int mmio_move(struct hvm_hw_stdvg
     int i;
     int sign = p->df ? -1 : 1;
     p2m_type_t p2mt;
-    struct p2m_domain *p2m = p2m_get_hostp2m(current->domain);
+    struct domain *d = current->domain;
 
     if ( p->data_is_ptr )
     {
@@ -482,7 +482,7 @@ static int mmio_move(struct hvm_hw_stdvg
                 if ( hvm_copy_to_guest_phys(data, &tmp, p->size) !=
                      HVMCOPY_okay )
                 {
-                    (void)gfn_to_mfn(p2m, data >> PAGE_SHIFT, &p2mt);
+                    (void)gfn_to_mfn(d, data >> PAGE_SHIFT, &p2mt);
                     /*
                      * The only case we handle is vga_mem <-> vga_mem.
                      * Anything else disables caching and leaves it to qemu-dm.
@@ -504,7 +504,7 @@ static int mmio_move(struct hvm_hw_stdvg
                 if ( hvm_copy_from_guest_phys(&tmp, data, p->size) !=
                      HVMCOPY_okay )
                 {
-                    (void)gfn_to_mfn(p2m, data >> PAGE_SHIFT, &p2mt);
+                    (void)gfn_to_mfn(d, data >> PAGE_SHIFT, &p2mt);
                     if ( (p2mt != p2m_mmio_dm) || (data < VGA_MEM_BASE) ||
                          ((data + p->size) > (VGA_MEM_BASE + VGA_MEM_SIZE)) )
                         return 0;
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c	Thu Jun 02 13:16:52 2011 +0100
@@ -244,7 +244,7 @@ static int svm_vmcb_restore(struct vcpu 
     {
         if ( c->cr0 & X86_CR0_PG )
         {
-            mfn = mfn_x(gfn_to_mfn(p2m, c->cr3 >> PAGE_SHIFT, &p2mt));
+            mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
             if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
             {
                 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
@@ -1127,6 +1127,7 @@ static void svm_do_nested_pgfault(struct
     unsigned long gfn = gpa >> PAGE_SHIFT;
     mfn_t mfn;
     p2m_type_t p2mt;
+    p2m_access_t p2ma;
     struct p2m_domain *p2m = NULL;
 
     ret = hvm_hap_nested_page_fault(gpa, 0, ~0ul, 0, 0, 0, 0);
@@ -1143,7 +1144,7 @@ static void svm_do_nested_pgfault(struct
         p2m = p2m_get_p2m(v);
         _d.gpa = gpa;
         _d.qualification = 0;
-        _d.mfn = mfn_x(gfn_to_mfn_query(p2m, gfn, &_d.p2mt));
+        _d.mfn = mfn_x(gfn_to_mfn_type_p2m(p2m, gfn, &_d.p2mt, &p2ma, p2m_query));
         
         __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d);
     }
@@ -1163,7 +1164,7 @@ static void svm_do_nested_pgfault(struct
     if ( p2m == NULL )
         p2m = p2m_get_p2m(v);
     /* Everything else is an error. */
-    mfn = gfn_to_mfn_guest(p2m, gfn, &p2mt);
+    mfn = gfn_to_mfn_type_p2m(p2m, gfn, &p2mt, &p2ma, p2m_guest);
     gdprintk(XENLOG_ERR,
          "SVM violation gpa %#"PRIpaddr", mfn %#lx, type %i\n",
          gpa, mfn_x(mfn), p2mt);
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Thu Jun 02 13:16:52 2011 +0100
@@ -476,8 +476,7 @@ static int vmx_restore_cr0_cr3(
     {
         if ( cr0 & X86_CR0_PG )
         {
-            mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain),
-                cr3 >> PAGE_SHIFT, &p2mt));
+            mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
             if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
             {
                 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3);
@@ -993,8 +992,7 @@ static void vmx_load_pdptrs(struct vcpu 
     if ( cr3 & 0x1fUL )
         goto crash;
 
-    mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain),
-        cr3 >> PAGE_SHIFT, &p2mt));
+    mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
     if ( !p2m_is_ram(p2mt) )
         goto crash;
 
@@ -1752,8 +1750,7 @@ static int vmx_alloc_vlapic_mapping(stru
     if ( apic_va == NULL )
         return -ENOMEM;
     share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
-    set_mmio_p2m_entry(
-        p2m_get_hostp2m(d), paddr_to_pfn(APIC_DEFAULT_PHYS_BASE),
+    set_mmio_p2m_entry(d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE),
         _mfn(virt_to_mfn(apic_va)));
     d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
 
@@ -1959,7 +1956,7 @@ static void ept_handle_violation(unsigne
     unsigned long gla, gfn = gpa >> PAGE_SHIFT;
     mfn_t mfn;
     p2m_type_t p2mt;
-    struct p2m_domain *p2m = p2m_get_hostp2m(current->domain);
+    struct domain *d = current->domain;
 
     if ( tb_init_done )
     {
@@ -1972,7 +1969,7 @@ static void ept_handle_violation(unsigne
 
         _d.gpa = gpa;
         _d.qualification = qualification;
-        _d.mfn = mfn_x(gfn_to_mfn_query(p2m, gfn, &_d.p2mt));
+        _d.mfn = mfn_x(gfn_to_mfn_query(d, gfn, &_d.p2mt));
         
         __trace_var(TRC_HVM_NPF, 0, sizeof(_d), &_d);
     }
@@ -1988,7 +1985,7 @@ static void ept_handle_violation(unsigne
         return;
 
     /* Everything else is an error. */
-    mfn = gfn_to_mfn_guest(p2m, gfn, &p2mt);
+    mfn = gfn_to_mfn_guest(d, gfn, &p2mt);
     gdprintk(XENLOG_ERR, "EPT violation %#lx (%c%c%c/%c%c%c), "
              "gpa %#"PRIpaddr", mfn %#lx, type %i.\n", 
              qualification, 
@@ -2000,7 +1997,7 @@ static void ept_handle_violation(unsigne
              (qualification & EPT_EFFECTIVE_EXEC) ? 'x' : '-',
              gpa, mfn_x(mfn), p2mt);
 
-    ept_walk_table(current->domain, gfn);
+    ept_walk_table(d, gfn);
 
     if ( qualification & EPT_GLA_VALID )
     {
@@ -2008,7 +2005,7 @@ static void ept_handle_violation(unsigne
         gdprintk(XENLOG_ERR, " --- GLA %#lx\n", gla);
     }
 
-    domain_crash(current->domain);
+    domain_crash(d);
 }
 
 static void vmx_failed_vmentry(unsigned int exit_reason,
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm.c	Thu Jun 02 13:16:52 2011 +0100
@@ -1808,8 +1808,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
     if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
     {
         /* Translate foreign guest addresses. */
-        mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(pg_dom),
-            l1e_get_pfn(nl1e), &p2mt));
+        mfn = mfn_x(gfn_to_mfn(pg_dom, l1e_get_pfn(nl1e), &p2mt));
         if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) )
             return -EINVAL;
         ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
@@ -3482,13 +3481,13 @@ int do_mmu_update(
 
             req.ptr -= cmd;
             gmfn = req.ptr >> PAGE_SHIFT;
-            mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(pt_owner), gmfn, &p2mt));
+            mfn = mfn_x(gfn_to_mfn(pt_owner, gmfn, &p2mt));
             if ( !p2m_is_valid(p2mt) )
               mfn = INVALID_MFN;
 
             if ( p2m_is_paged(p2mt) )
             {
-                p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), gmfn);
+                p2m_mem_paging_populate(pg_owner, gmfn);
 
                 rc = -ENOENT;
                 break;
@@ -3520,13 +3519,11 @@ int do_mmu_update(
                 {
                     l1_pgentry_t l1e = l1e_from_intpte(req.val);
                     p2m_type_t l1e_p2mt;
-                    gfn_to_mfn(p2m_get_hostp2m(pg_owner),
-                        l1e_get_pfn(l1e), &l1e_p2mt);
+                    gfn_to_mfn(pg_owner, l1e_get_pfn(l1e), &l1e_p2mt);
 
                     if ( p2m_is_paged(l1e_p2mt) )
                     {
-                        p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
-                            l1e_get_pfn(l1e));
+                        p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e));
                         rc = -ENOENT;
                         break;
                     }
@@ -3544,7 +3541,7 @@ int do_mmu_update(
                         /* Unshare the page for RW foreign mappings */
                         if ( l1e_get_flags(l1e) & _PAGE_RW )
                         {
-                            rc = mem_sharing_unshare_page(p2m_get_hostp2m(pg_owner), 
+                            rc = mem_sharing_unshare_page(pg_owner, 
                                                           l1e_get_pfn(l1e), 
                                                           0);
                             if ( rc )
@@ -3562,12 +3559,11 @@ int do_mmu_update(
                 {
                     l2_pgentry_t l2e = l2e_from_intpte(req.val);
                     p2m_type_t l2e_p2mt;
-                    gfn_to_mfn(p2m_get_hostp2m(pg_owner), l2e_get_pfn(l2e), &l2e_p2mt);
+                    gfn_to_mfn(pg_owner, l2e_get_pfn(l2e), &l2e_p2mt);
 
                     if ( p2m_is_paged(l2e_p2mt) )
                     {
-                        p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
-                            l2e_get_pfn(l2e));
+                        p2m_mem_paging_populate(pg_owner, l2e_get_pfn(l2e));
                         rc = -ENOENT;
                         break;
                     }
@@ -3591,12 +3587,11 @@ int do_mmu_update(
                 {
                     l3_pgentry_t l3e = l3e_from_intpte(req.val);
                     p2m_type_t l3e_p2mt;
-                    gfn_to_mfn(p2m_get_hostp2m(pg_owner), l3e_get_pfn(l3e), &l3e_p2mt);
+                    gfn_to_mfn(pg_owner, l3e_get_pfn(l3e), &l3e_p2mt);
 
                     if ( p2m_is_paged(l3e_p2mt) )
                     {
-                        p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
-                            l3e_get_pfn(l3e));
+                        p2m_mem_paging_populate(pg_owner, l3e_get_pfn(l3e));
                         rc = -ENOENT;
                         break;
                     }
@@ -3620,13 +3615,11 @@ int do_mmu_update(
                 {
                     l4_pgentry_t l4e = l4e_from_intpte(req.val);
                     p2m_type_t l4e_p2mt;
-                    gfn_to_mfn(p2m_get_hostp2m(pg_owner),
-                        l4e_get_pfn(l4e), &l4e_p2mt);
+                    gfn_to_mfn(pg_owner, l4e_get_pfn(l4e), &l4e_p2mt);
 
                     if ( p2m_is_paged(l4e_p2mt) )
                     {
-                        p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner),
-                            l4e_get_pfn(l4e));
+                        p2m_mem_paging_populate(pg_owner, l4e_get_pfn(l4e));
                         rc = -ENOENT;
                         break;
                     }
@@ -4003,7 +3996,7 @@ static int create_grant_p2m_mapping(uint
         p2mt = p2m_grant_map_ro;
     else
         p2mt = p2m_grant_map_rw;
-    rc = guest_physmap_add_entry(p2m_get_hostp2m(current->domain),
+    rc = guest_physmap_add_entry(current->domain,
                                  addr >> PAGE_SHIFT, frame, 0, p2mt);
     if ( rc )
         return GNTST_general_error;
@@ -4053,7 +4046,7 @@ static int replace_grant_p2m_mapping(
     if ( new_addr != 0 || (flags & GNTMAP_contains_pte) )
         return GNTST_general_error;
 
-    old_mfn = gfn_to_mfn(p2m_get_hostp2m(d), gfn, &type);
+    old_mfn = gfn_to_mfn(d, gfn, &type);
     if ( !p2m_is_grant(type) || mfn_x(old_mfn) != frame )
     {
         gdprintk(XENLOG_WARNING,
@@ -4652,8 +4645,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
         {
             p2m_type_t p2mt;
 
-            xatp.idx = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(d),
-                                                xatp.idx, &p2mt));
+            xatp.idx = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt));
             /* If the page is still shared, exit early */
             if ( p2m_is_shared(p2mt) )
             {
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/guest_walk.c
--- a/xen/arch/x86/mm/guest_walk.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/guest_walk.c	Thu Jun 02 13:16:52 2011 +0100
@@ -92,12 +92,14 @@ static inline void *map_domain_gfn(struc
                                    p2m_type_t *p2mt,
                                    uint32_t *rc) 
 {
+    p2m_access_t a;
+
     /* Translate the gfn, unsharing if shared */
-    *mfn = gfn_to_mfn_unshare(p2m, gfn_x(gfn), p2mt);
+    *mfn = gfn_to_mfn_type_p2m(p2m, gfn_x(gfn), p2mt, &a, p2m_unshare);
     if ( p2m_is_paging(*p2mt) )
     {
-        p2m_mem_paging_populate(p2m, gfn_x(gfn));
-
+        ASSERT(!p2m_is_nestedp2m(p2m));
+        p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
         *rc = _PAGE_PAGED;
         return NULL;
     }
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/guest_walk.c	Thu Jun 02 13:16:52 2011 +0100
@@ -54,13 +54,16 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA
     mfn_t top_mfn;
     void *top_map;
     p2m_type_t p2mt;
+    p2m_access_t p2ma;
     walk_t gw;
 
     /* Get the top-level table's MFN */
-    top_mfn = gfn_to_mfn_unshare(p2m, cr3 >> PAGE_SHIFT, &p2mt);
+    top_mfn = gfn_to_mfn_type_p2m(p2m, cr3 >> PAGE_SHIFT, 
+                                  &p2mt, &p2ma, p2m_unshare);
     if ( p2m_is_paging(p2mt) )
     {
-        p2m_mem_paging_populate(p2m, cr3 >> PAGE_SHIFT);
+        ASSERT(!p2m_is_nestedp2m(p2m));
+        p2m_mem_paging_populate(p2m->domain, cr3 >> PAGE_SHIFT);
 
         pfec[0] = PFEC_page_paged;
         return INVALID_GFN;
@@ -89,10 +92,11 @@ unsigned long hap_p2m_ga_to_gfn(GUEST_PA
     if ( missing == 0 )
     {
         gfn_t gfn = guest_l1e_get_gfn(gw.l1e);
-        gfn_to_mfn_unshare(p2m, gfn_x(gfn), &p2mt);
+        gfn_to_mfn_type_p2m(p2m, gfn_x(gfn), &p2mt, &p2ma, p2m_unshare);
         if ( p2m_is_paging(p2mt) )
         {
-            p2m_mem_paging_populate(p2m, gfn_x(gfn));
+            ASSERT(!p2m_is_nestedp2m(p2m));
+            p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
 
             pfec[0] = PFEC_page_paged;
             return INVALID_GFN;
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/hap.c	Thu Jun 02 13:16:52 2011 +0100
@@ -71,7 +71,7 @@ static int hap_enable_vram_tracking(stru
 
     /* set l1e entries of P2M table to be read-only. */
     for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++)
-        p2m_change_type(p2m_get_hostp2m(d), i, p2m_ram_rw, p2m_ram_logdirty);
+        p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty);
 
     flush_tlb_mask(d->domain_dirty_cpumask);
     return 0;
@@ -91,7 +91,7 @@ static int hap_disable_vram_tracking(str
 
     /* set l1e entries of P2M table with normal mode */
     for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++)
-        p2m_change_type(p2m_get_hostp2m(d), i, p2m_ram_logdirty, p2m_ram_rw);
+        p2m_change_type(d, i, p2m_ram_logdirty, p2m_ram_rw);
 
     flush_tlb_mask(d->domain_dirty_cpumask);
     return 0;
@@ -107,7 +107,7 @@ static void hap_clean_vram_tracking(stru
 
     /* set l1e entries of P2M table to be read-only. */
     for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++)
-        p2m_change_type(p2m_get_hostp2m(d), i, p2m_ram_rw, p2m_ram_logdirty);
+        p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty);
 
     flush_tlb_mask(d->domain_dirty_cpumask);
 }
@@ -201,8 +201,7 @@ static int hap_enable_log_dirty(struct d
     hap_unlock(d);
 
     /* set l1e entries of P2M table to be read-only. */
-    p2m_change_entry_type_global(p2m_get_hostp2m(d),
-        p2m_ram_rw, p2m_ram_logdirty);
+    p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
     flush_tlb_mask(d->domain_dirty_cpumask);
     return 0;
 }
@@ -214,16 +213,14 @@ static int hap_disable_log_dirty(struct 
     hap_unlock(d);
 
     /* set l1e entries of P2M table with normal mode */
-    p2m_change_entry_type_global(p2m_get_hostp2m(d),
-        p2m_ram_logdirty, p2m_ram_rw);
+    p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
     return 0;
 }
 
 static void hap_clean_dirty_bitmap(struct domain *d)
 {
     /* set l1e entries of P2M table to be read-only. */
-    p2m_change_entry_type_global(p2m_get_hostp2m(d),
-        p2m_ram_rw, p2m_ram_logdirty);
+    p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
     flush_tlb_mask(d->domain_dirty_cpumask);
 }
 
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/hap/nested_hap.c
--- a/xen/arch/x86/mm/hap/nested_hap.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/nested_hap.c	Thu Jun 02 13:16:52 2011 +0100
@@ -123,9 +123,10 @@ nestedhap_walk_L0_p2m(struct p2m_domain 
 {
     mfn_t mfn;
     p2m_type_t p2mt;
+    p2m_access_t p2ma;
 
-    /* we use gfn_to_mfn_query() function to walk L0 P2M table */
-    mfn = gfn_to_mfn_query(p2m, L1_gpa >> PAGE_SHIFT, &p2mt);
+    /* walk L0 P2M table */
+    mfn = gfn_to_mfn_type_p2m(p2m, L1_gpa >> PAGE_SHIFT, &p2mt, &p2ma, p2m_query);
 
     if ( p2m_is_paging(p2mt) || p2m_is_shared(p2mt) || !p2m_is_ram(p2mt) )
         return NESTEDHVM_PAGEFAULT_ERROR;
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/mem_event.c	Thu Jun 02 13:16:52 2011 +0100
@@ -252,7 +252,7 @@ int mem_event_domctl(struct domain *d, x
             /* Get MFN of ring page */
             guest_get_eff_l1e(v, ring_addr, &l1e);
             gfn = l1e_get_pfn(l1e);
-            ring_mfn = gfn_to_mfn(p2m_get_hostp2m(dom_mem_event), gfn, &p2mt);
+            ring_mfn = gfn_to_mfn(dom_mem_event, gfn, &p2mt);
 
             rc = -EINVAL;
             if ( unlikely(!mfn_valid(mfn_x(ring_mfn))) )
@@ -261,7 +261,7 @@ int mem_event_domctl(struct domain *d, x
             /* Get MFN of shared page */
             guest_get_eff_l1e(v, shared_addr, &l1e);
             gfn = l1e_get_pfn(l1e);
-            shared_mfn = gfn_to_mfn(p2m_get_hostp2m(dom_mem_event), gfn, &p2mt);
+            shared_mfn = gfn_to_mfn(dom_mem_event, gfn, &p2mt);
 
             rc = -EINVAL;
             if ( unlikely(!mfn_valid(mfn_x(shared_mfn))) )
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/mem_paging.c
--- a/xen/arch/x86/mm/mem_paging.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/mem_paging.c	Thu Jun 02 13:16:52 2011 +0100
@@ -28,9 +28,6 @@
 int mem_paging_domctl(struct domain *d, xen_domctl_mem_event_op_t *mec,
                       XEN_GUEST_HANDLE(void) u_domctl)
 {
-    int rc;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
-
     /* Only HAP is supported */
     if ( !hap_enabled(d) )
          return -ENODEV;
@@ -40,37 +37,35 @@ int mem_paging_domctl(struct domain *d, 
     case XEN_DOMCTL_MEM_EVENT_OP_PAGING_NOMINATE:
     {
         unsigned long gfn = mec->gfn;
-        rc = p2m_mem_paging_nominate(p2m, gfn);
+        return p2m_mem_paging_nominate(d, gfn);
     }
     break;
 
     case XEN_DOMCTL_MEM_EVENT_OP_PAGING_EVICT:
     {
         unsigned long gfn = mec->gfn;
-        rc = p2m_mem_paging_evict(p2m, gfn);
+        return p2m_mem_paging_evict(d, gfn);
     }
     break;
 
     case XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP:
     {
         unsigned long gfn = mec->gfn;
-        rc = p2m_mem_paging_prep(p2m, gfn);
+        return p2m_mem_paging_prep(d, gfn);
     }
     break;
 
     case XEN_DOMCTL_MEM_EVENT_OP_PAGING_RESUME:
     {
-        p2m_mem_paging_resume(p2m);
-        rc = 0;
+        p2m_mem_paging_resume(d);
+        return 0;
     }
     break;
 
     default:
-        rc = -ENOSYS;
+        return -ENOSYS;
         break;
     }
-
-    return rc;
 }
 
 
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/mem_sharing.c	Thu Jun 02 13:16:52 2011 +0100
@@ -252,7 +252,6 @@ static void mem_sharing_audit(void)
             list_for_each(le, &e->gfns)
             {
                 struct domain *d;
-                struct p2m_domain *p2m;
                 p2m_type_t t;
                 mfn_t mfn;
 
@@ -264,8 +263,7 @@ static void mem_sharing_audit(void)
                             g->domain, g->gfn, mfn_x(e->mfn));
                     continue;
                 }
-                p2m = p2m_get_hostp2m(d);
-                mfn = gfn_to_mfn(p2m, g->gfn, &t); 
+                mfn = gfn_to_mfn(d, g->gfn, &t); 
                 if(mfn_x(mfn) != mfn_x(e->mfn))
                     MEM_SHARING_DEBUG("Incorrect P2M for d=%d, PFN=%lx."
                                       "Expecting MFN=%ld, got %ld\n",
@@ -376,7 +374,7 @@ int mem_sharing_debug_gfn(struct domain 
     p2m_type_t p2mt;
     mfn_t mfn;
 
-    mfn = gfn_to_mfn(p2m_get_hostp2m(d), gfn, &p2mt);
+    mfn = gfn_to_mfn(d, gfn, &p2mt);
 
     printk("Debug for domain=%d, gfn=%lx, ", 
             d->domain_id, 
@@ -485,7 +483,7 @@ int mem_sharing_debug_gref(struct domain
     return mem_sharing_debug_gfn(d, gfn); 
 }
 
-int mem_sharing_nominate_page(struct p2m_domain *p2m, 
+int mem_sharing_nominate_page(struct domain *d,
                               unsigned long gfn,
                               int expected_refcnt,
                               shr_handle_t *phandle)
@@ -497,12 +495,11 @@ int mem_sharing_nominate_page(struct p2m
     shr_handle_t handle;
     shr_hash_entry_t *hash_entry;
     struct gfn_info *gfn_info;
-    struct domain *d = p2m->domain;
 
     *phandle = 0UL;
 
     shr_lock(); 
-    mfn = gfn_to_mfn(p2m, gfn, &p2mt);
+    mfn = gfn_to_mfn(d, gfn, &p2mt);
 
     /* Check if mfn is valid */
     ret = -EINVAL;
@@ -540,7 +537,7 @@ int mem_sharing_nominate_page(struct p2m
     }
 
     /* Change the p2m type */
-    if(p2m_change_type(p2m, gfn, p2mt, p2m_ram_shared) != p2mt) 
+    if(p2m_change_type(d, gfn, p2mt, p2m_ram_shared) != p2mt) 
     {
         /* This is unlikely, as the type must have changed since we've checked
          * it a few lines above.
@@ -602,7 +599,7 @@ int mem_sharing_share_pages(shr_handle_t
         list_del(&gfn->list);
         d = get_domain_by_id(gfn->domain);
         BUG_ON(!d);
-        BUG_ON(set_shared_p2m_entry(p2m_get_hostp2m(d), gfn->gfn, se->mfn) == 0);
+        BUG_ON(set_shared_p2m_entry(d, gfn->gfn, se->mfn) == 0);
         put_domain(d);
         list_add(&gfn->list, &se->gfns);
         put_page_and_type(cpage);
@@ -621,7 +618,7 @@ err_out:
     return ret;
 }
 
-int mem_sharing_unshare_page(struct p2m_domain *p2m,
+int mem_sharing_unshare_page(struct domain *d,
                              unsigned long gfn, 
                              uint16_t flags)
 {
@@ -634,13 +631,12 @@ int mem_sharing_unshare_page(struct p2m_
     struct gfn_info *gfn_info = NULL;
     shr_handle_t handle;
     struct list_head *le;
-    struct domain *d = p2m->domain;
 
     mem_sharing_audit();
     /* Remove the gfn_info from the list */
     shr_lock();
     
-    mfn = gfn_to_mfn(p2m, gfn, &p2mt);
+    mfn = gfn_to_mfn(d, gfn, &p2mt);
     
     /* Has someone already unshared it? */
     if (!p2m_is_shared(p2mt)) {
@@ -706,7 +702,7 @@ gfn_found:
     unmap_domain_page(s);
     unmap_domain_page(t);
 
-    BUG_ON(set_shared_p2m_entry(p2m, gfn, page_to_mfn(page)) == 0);
+    BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)) == 0);
     put_page_and_type(old_page);
 
 private_page_found:    
@@ -717,7 +713,7 @@ private_page_found:
     else
         atomic_dec(&nr_saved_mfns);
 
-    if(p2m_change_type(p2m, gfn, p2m_ram_shared, p2m_ram_rw) != 
+    if(p2m_change_type(d, gfn, p2m_ram_shared, p2m_ram_rw) != 
                                                 p2m_ram_shared) 
     {
         printk("Could not change p2m type.\n");
@@ -754,7 +750,7 @@ int mem_sharing_domctl(struct domain *d,
             shr_handle_t handle;
             if(!mem_sharing_enabled(d))
                 return -EINVAL;
-            rc = mem_sharing_nominate_page(p2m_get_hostp2m(d), gfn, 0, &handle);
+            rc = mem_sharing_nominate_page(d, gfn, 0, &handle);
             mec->u.nominate.handle = handle;
             mem_sharing_audit();
         }
@@ -770,8 +766,7 @@ int mem_sharing_domctl(struct domain *d,
                 return -EINVAL;
             if(mem_sharing_gref_to_gfn(d, gref, &gfn) < 0)
                 return -EINVAL;
-            rc = mem_sharing_nominate_page(p2m_get_hostp2m(d),
-                gfn, 3, &handle);
+            rc = mem_sharing_nominate_page(d, gfn, 3, &handle);
             mec->u.nominate.handle = handle;
             mem_sharing_audit();
         }
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/p2m-pod.c
--- a/xen/arch/x86/mm/p2m-pod.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m-pod.c	Thu Jun 02 13:16:52 2011 +0100
@@ -518,7 +518,7 @@ p2m_pod_decrease_reservation(struct doma
     {
         p2m_type_t t;
 
-        gfn_to_mfn_query(p2m, gpfn + i, &t);
+        gfn_to_mfn_query(d, gpfn + i, &t);
 
         if ( t == p2m_populate_on_demand )
             pod++;
@@ -558,7 +558,7 @@ p2m_pod_decrease_reservation(struct doma
         mfn_t mfn;
         p2m_type_t t;
 
-        mfn = gfn_to_mfn_query(p2m, gpfn + i, &t);
+        mfn = gfn_to_mfn_query(d, gpfn + i, &t);
         if ( t == p2m_populate_on_demand )
         {
             set_p2m_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid, p2m->default_access);
@@ -606,9 +606,9 @@ out:
     return ret;
 }
 
-void
-p2m_pod_dump_data(struct p2m_domain *p2m)
+void p2m_pod_dump_data(struct domain *d)
 {
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
     printk("    PoD entries=%d cachesize=%d\n",
            p2m->pod.entry_count, p2m->pod.count);
 }
@@ -639,7 +639,7 @@ p2m_pod_zero_check_superpage(struct p2m_
     for ( i=0; i<SUPERPAGE_PAGES; i++ )
     {
         
-        mfn = gfn_to_mfn_query(p2m, gfn + i, &type);
+        mfn = gfn_to_mfn_query(d, gfn + i, &type);
 
         if ( i == 0 )
         {
@@ -767,7 +767,7 @@ p2m_pod_zero_check(struct p2m_domain *p2
     /* First, get the gfn list, translate to mfns, and map the pages. */
     for ( i=0; i<count; i++ )
     {
-        mfns[i] = gfn_to_mfn_query(p2m, gfns[i], types + i);
+        mfns[i] = gfn_to_mfn_query(d, gfns[i], types + i);
         /* If this is ram, and not a pagetable or from the xen heap, and probably not mapped
            elsewhere, map it; otherwise, skip. */
         if ( p2m_is_ram(types[i])
@@ -906,7 +906,7 @@ p2m_pod_emergency_sweep(struct p2m_domai
     /* FIXME: Figure out how to avoid superpages */
     for ( i=p2m->pod.reclaim_single; i > 0 ; i-- )
     {
-        gfn_to_mfn_query(p2m, i, &t );
+        gfn_to_mfn_query(p2m->domain, i, &t );
         if ( p2m_is_ram(t) )
         {
             gfns[j] = i;
@@ -1095,7 +1095,7 @@ guest_physmap_mark_populate_on_demand(st
     /* Make sure all gpfns are unused */
     for ( i = 0; i < (1UL << order); i++ )
     {
-        omfn = gfn_to_mfn_query(p2m, gfn + i, &ot);
+        omfn = gfn_to_mfn_query(d, gfn + i, &ot);
         if ( p2m_is_ram(ot) )
         {
             printk("%s: gfn_to_mfn returned type %d!\n",
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:52 2011 +0100
@@ -677,10 +677,9 @@ out:
     return mfn;
 }
 
-
 static mfn_t
-p2m_gfn_to_mfn(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_access_t *a,
-               p2m_query_t q)
+p2m_gfn_to_mfn(struct p2m_domain *p2m, unsigned long gfn, 
+               p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
 {
     mfn_t mfn;
     paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
@@ -697,8 +696,6 @@ p2m_gfn_to_mfn(struct p2m_domain *p2m, u
     /* Not implemented except with EPT */
     *a = p2m_access_rwx; 
 
-    mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
-
     if ( gfn > p2m->max_mapped_pfn )
         /* This pfn is higher than the highest the p2m map currently holds */
         return _mfn(INVALID_MFN);
@@ -707,6 +704,8 @@ p2m_gfn_to_mfn(struct p2m_domain *p2m, u
     if ( p2m == p2m_get_hostp2m(current->domain) )
         return p2m_gfn_to_mfn_current(p2m, gfn, t, a, q);
 
+    mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
+
 #if CONFIG_PAGING_LEVELS >= 4
     {
         l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
@@ -1059,7 +1058,7 @@ void audit_p2m(struct p2m_domain *p2m, i
 
         if ( test_linear && (gfn <= p2m->max_mapped_pfn) )
         {
-            lp2mfn = mfn_x(gfn_to_mfn_query(p2m, gfn, &type));
+            lp2mfn = mfn_x(gfn_to_mfn_type_p2m(p2m, gfn, &type, p2m_query));
             if ( lp2mfn != mfn_x(p2mfn) )
             {
                 P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
@@ -123,9 +123,10 @@ int p2m_init(struct domain *d)
     return p2m_init_nestedp2m(d);
 }
 
-void p2m_change_entry_type_global(struct p2m_domain *p2m,
+void p2m_change_entry_type_global(struct domain *d,
                                   p2m_type_t ot, p2m_type_t nt)
 {
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
     p2m_lock(p2m);
     p2m->change_entry_type_global(p2m, ot, nt);
     p2m_unlock(p2m);
@@ -302,7 +303,11 @@ void p2m_teardown(struct p2m_domain *p2m
     {
         mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query);
         if ( mfn_valid(mfn) && (t == p2m_ram_shared) )
-            BUG_ON(mem_sharing_unshare_page(p2m, gfn, MEM_SHARING_DESTROY_GFN));
+        {
+            ASSERT(!p2m_is_nestedp2m(p2m));
+            BUG_ON(mem_sharing_unshare_page(d, gfn, MEM_SHARING_DESTROY_GFN));
+        }
+
     }
 #endif
 
@@ -369,9 +374,10 @@ p2m_remove_page(struct p2m_domain *p2m, 
 }
 
 void
-guest_physmap_remove_entry(struct p2m_domain *p2m, unsigned long gfn,
+guest_physmap_remove_page(struct domain *d, unsigned long gfn,
                           unsigned long mfn, unsigned int page_order)
 {
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
     p2m_lock(p2m);
     audit_p2m(p2m, 1);
     p2m_remove_page(p2m, gfn, mfn, page_order);
@@ -380,11 +386,11 @@ guest_physmap_remove_entry(struct p2m_do
 }
 
 int
-guest_physmap_add_entry(struct p2m_domain *p2m, unsigned long gfn,
+guest_physmap_add_entry(struct domain *d, unsigned long gfn,
                         unsigned long mfn, unsigned int page_order, 
                         p2m_type_t t)
 {
-    struct domain *d = p2m->domain;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
     unsigned long i, ogfn;
     p2m_type_t ot;
     mfn_t omfn;
@@ -422,7 +428,7 @@ guest_physmap_add_entry(struct p2m_domai
     /* First, remove m->p mappings for existing p->m mappings */
     for ( i = 0; i < (1UL << page_order); i++ )
     {
-        omfn = gfn_to_mfn_query(p2m, gfn + i, &ot);
+        omfn = gfn_to_mfn_query(d, gfn + i, &ot);
         if ( p2m_is_grant(ot) )
         {
             /* Really shouldn't be unmapping grant maps this way */
@@ -461,7 +467,7 @@ guest_physmap_add_entry(struct p2m_domai
              * address */
             P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
                       mfn + i, ogfn, gfn + i);
-            omfn = gfn_to_mfn_query(p2m, ogfn, &ot);
+            omfn = gfn_to_mfn_query(d, ogfn, &ot);
             if ( p2m_is_ram(ot) )
             {
                 ASSERT(mfn_valid(omfn));
@@ -507,17 +513,18 @@ guest_physmap_add_entry(struct p2m_domai
 
 /* Modify the p2m type of a single gfn from ot to nt, returning the 
  * entry's previous type.  Resets the access permissions. */
-p2m_type_t p2m_change_type(struct p2m_domain *p2m, unsigned long gfn, 
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, 
                            p2m_type_t ot, p2m_type_t nt)
 {
     p2m_type_t pt;
     mfn_t mfn;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
 
     p2m_lock(p2m);
 
-    mfn = gfn_to_mfn_query(p2m, gfn, &pt);
+    mfn = gfn_to_mfn_query(d, gfn, &pt);
     if ( pt == ot )
         set_p2m_entry(p2m, gfn, mfn, 0, nt, p2m->default_access);
 
@@ -527,19 +534,20 @@ p2m_type_t p2m_change_type(struct p2m_do
 }
 
 int
-set_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn)
+set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
 {
     int rc = 0;
     p2m_type_t ot;
     mfn_t omfn;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    if ( !paging_mode_translate(p2m->domain) )
+    if ( !paging_mode_translate(d) )
         return 0;
 
-    omfn = gfn_to_mfn_query(p2m, gfn, &ot);
+    omfn = gfn_to_mfn_query(d, gfn, &ot);
     if ( p2m_is_grant(ot) )
     {
-        domain_crash(p2m->domain);
+        domain_crash(d);
         return 0;
     }
     else if ( p2m_is_ram(ot) )
@@ -556,21 +564,22 @@ set_mmio_p2m_entry(struct p2m_domain *p2
     if ( 0 == rc )
         gdprintk(XENLOG_ERR,
             "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
-            mfn_x(gfn_to_mfn(p2m, gfn, &ot)));
+            mfn_x(gfn_to_mfn(d, gfn, &ot)));
     return rc;
 }
 
 int
-clear_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn)
+clear_mmio_p2m_entry(struct domain *d, unsigned long gfn)
 {
     int rc = 0;
     mfn_t mfn;
     p2m_type_t t;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
-    if ( !paging_mode_translate(p2m->domain) )
+    if ( !paging_mode_translate(d) )
         return 0;
 
-    mfn = gfn_to_mfn(p2m, gfn, &t);
+    mfn = gfn_to_mfn(d, gfn, &t);
 
     /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
     if ( (INVALID_MFN == mfn_x(mfn)) || (t != p2m_mmio_direct) )
@@ -588,8 +597,9 @@ clear_mmio_p2m_entry(struct p2m_domain *
 }
 
 int
-set_shared_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn)
+set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
 {
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int rc = 0;
     int need_lock = !p2m_locked_by_me(p2m);
     p2m_type_t ot;
@@ -598,7 +608,7 @@ set_shared_p2m_entry(struct p2m_domain *
     if ( !paging_mode_translate(p2m->domain) )
         return 0;
 
-    omfn = gfn_to_mfn_query(p2m, gfn, &ot);
+    omfn = gfn_to_mfn_query(p2m->domain, gfn, &ot);
     /* At the moment we only allow p2m change if gfn has already been made
      * sharable first */
     ASSERT(p2m_is_shared(ot));
@@ -620,14 +630,15 @@ set_shared_p2m_entry(struct p2m_domain *
 }
 
 #ifdef __x86_64__
-int p2m_mem_paging_nominate(struct p2m_domain *p2m, unsigned long gfn)
+int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn)
 {
     struct page_info *page;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
     p2m_type_t p2mt;
     mfn_t mfn;
     int ret;
 
-    mfn = gfn_to_mfn(p2m, gfn, &p2mt);
+    mfn = gfn_to_mfn(p2m->domain, gfn, &p2mt);
 
     /* Check if mfn is valid */
     ret = -EINVAL;
@@ -664,15 +675,15 @@ int p2m_mem_paging_nominate(struct p2m_d
     return ret;
 }
 
-int p2m_mem_paging_evict(struct p2m_domain *p2m, unsigned long gfn)
+int p2m_mem_paging_evict(struct domain *d, unsigned long gfn)
 {
     struct page_info *page;
     p2m_type_t p2mt;
     mfn_t mfn;
-    struct domain *d = p2m->domain;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     /* Get mfn */
-    mfn = gfn_to_mfn(p2m, gfn, &p2mt);
+    mfn = gfn_to_mfn(d, gfn, &p2mt);
     if ( unlikely(!mfn_valid(mfn)) )
         return -EINVAL;
 
@@ -702,11 +713,10 @@ int p2m_mem_paging_evict(struct p2m_doma
     return 0;
 }
 
-void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn)
+void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 {
     struct vcpu *v = current;
     mem_event_request_t req;
-    struct domain *d = p2m->domain;
 
     /* Check that there's space on the ring for this request */
     if ( mem_event_check_ring(d) == 0)
@@ -721,12 +731,12 @@ void p2m_mem_paging_drop_page(struct p2m
     }
 }
 
-void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn)
+void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
 {
     struct vcpu *v = current;
     mem_event_request_t req;
     p2m_type_t p2mt;
-    struct domain *d = p2m->domain;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     /* Check that there's space on the ring for this request */
     if ( mem_event_check_ring(d) )
@@ -738,7 +748,7 @@ void p2m_mem_paging_populate(struct p2m_
     /* Fix p2m mapping */
     /* XXX: It seems inefficient to have this here, as it's only needed
      *      in one case (ept guest accessing paging out page) */
-    gfn_to_mfn(p2m, gfn, &p2mt);
+    gfn_to_mfn(d, gfn, &p2mt);
     if ( p2mt == p2m_ram_paged )
     {
         p2m_lock(p2m);
@@ -768,9 +778,10 @@ void p2m_mem_paging_populate(struct p2m_
     mem_event_put_request(d, &req);
 }
 
-int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn)
+int p2m_mem_paging_prep(struct domain *d, unsigned long gfn)
 {
     struct page_info *page;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
 
     /* Get a free page */
     page = alloc_domheap_page(p2m->domain, 0);
@@ -786,9 +797,9 @@ int p2m_mem_paging_prep(struct p2m_domai
     return 0;
 }
 
-void p2m_mem_paging_resume(struct p2m_domain *p2m)
+void p2m_mem_paging_resume(struct domain *d)
 {
-    struct domain *d = p2m->domain;
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
     mem_event_response_t rsp;
     p2m_type_t p2mt;
     mfn_t mfn;
@@ -799,7 +810,7 @@ void p2m_mem_paging_resume(struct p2m_do
     /* Fix p2m entry if the page was not dropped */
     if ( !(rsp.flags & MEM_EVENT_FLAG_DROP_PAGE) )
     {
-        mfn = gfn_to_mfn(p2m, rsp.gfn, &p2mt);
+        mfn = gfn_to_mfn(d, rsp.gfn, &p2mt);
         p2m_lock(p2m);
         set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw, p2m->default_access);
         set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
@@ -3712,7 +3712,7 @@ int shadow_track_dirty_vram(struct domai
 
         /* Iterate over VRAM to track dirty bits. */
         for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = gfn_to_mfn(p2m, begin_pfn + i, &t);
+            mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
             struct page_info *page;
             int dirty = 0;
             paddr_t sl1ma = dirty_vram->sl1ma[i];
@@ -3797,7 +3797,7 @@ int shadow_track_dirty_vram(struct domai
                 /* was clean for more than two seconds, try to disable guest
                  * write access */
                 for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = gfn_to_mfn(p2m, i, &t);
+                    mfn_t mfn = gfn_to_mfn(d, i, &t);
                     if (mfn_x(mfn) != INVALID_MFN)
                         flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
                 }
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c	Thu Jun 02 13:16:52 2011 +0100
@@ -2257,7 +2257,6 @@ static int validate_gl4e(struct vcpu *v,
     shadow_l4e_t *sl4p = se;
     mfn_t sl3mfn = _mfn(INVALID_MFN);
     struct domain *d = v->domain;
-    struct p2m_domain *p2m = p2m_get_hostp2m(d);
     p2m_type_t p2mt;
     int result = 0;
 
@@ -2266,7 +2265,7 @@ static int validate_gl4e(struct vcpu *v,
     if ( guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT )
     {
         gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e);
-        mfn_t gl3mfn = gfn_to_mfn_query(p2m, gl3gfn, &p2mt);
+        mfn_t gl3mfn = gfn_to_mfn_query(d, gl3gfn, &p2mt);
         if ( p2m_is_ram(p2mt) )
             sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
         else if ( p2mt != p2m_populate_on_demand )
@@ -2317,14 +2316,13 @@ static int validate_gl3e(struct vcpu *v,
     mfn_t sl2mfn = _mfn(INVALID_MFN);
     p2m_type_t p2mt;
     int result = 0;
-    struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
 
     perfc_incr(shadow_validate_gl3e_calls);
 
     if ( guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT )
     {
         gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e);
-        mfn_t gl2mfn = gfn_to_mfn_query(p2m, gl2gfn, &p2mt);
+        mfn_t gl2mfn = gfn_to_mfn_query(v->domain, gl2gfn, &p2mt);
         if ( p2m_is_ram(p2mt) )
             sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
         else if ( p2mt != p2m_populate_on_demand )
@@ -2348,7 +2346,6 @@ static int validate_gl2e(struct vcpu *v,
     guest_l2e_t new_gl2e = *(guest_l2e_t *)new_ge;
     shadow_l2e_t *sl2p = se;
     mfn_t sl1mfn = _mfn(INVALID_MFN);
-    struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
     p2m_type_t p2mt;
     int result = 0;
 
@@ -2374,7 +2371,7 @@ static int validate_gl2e(struct vcpu *v,
         }
         else
         {
-            mfn_t gl1mfn = gfn_to_mfn_query(p2m, gl1gfn, &p2mt);
+            mfn_t gl1mfn = gfn_to_mfn_query(v->domain, gl1gfn, &p2mt);
             if ( p2m_is_ram(p2mt) )
                 sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow); 
             else if ( p2mt != p2m_populate_on_demand )
@@ -2435,7 +2432,6 @@ static int validate_gl1e(struct vcpu *v,
     shadow_l1e_t *sl1p = se;
     gfn_t gfn;
     mfn_t gmfn;
-    struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
     p2m_type_t p2mt;
     int result = 0;
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
@@ -2445,7 +2441,7 @@ static int validate_gl1e(struct vcpu *v,
     perfc_incr(shadow_validate_gl1e_calls);
 
     gfn = guest_l1e_get_gfn(new_gl1e);
-    gmfn = gfn_to_mfn_query(p2m, gfn, &p2mt);
+    gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
 
     l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt);
     result |= shadow_set_l1e(v, sl1p, new_sl1e, p2mt, sl1mfn);
@@ -2505,7 +2501,7 @@ void sh_resync_l1(struct vcpu *v, mfn_t 
             shadow_l1e_t nsl1e;
 
             gfn = guest_l1e_get_gfn(gl1e);
-            gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), gfn, &p2mt);
+            gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
             l1e_propagate_from_guest(v, gl1e, gmfn, &nsl1e, ft_prefetch, p2mt);
             rc |= shadow_set_l1e(v, sl1p, nsl1e, p2mt, sl1mfn);
 
@@ -2828,7 +2824,7 @@ static void sh_prefetch(struct vcpu *v, 
 
         /* Look at the gfn that the l1e is pointing at */
         gfn = guest_l1e_get_gfn(gl1e);
-        gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), gfn, &p2mt);
+        gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
 
         /* Propagate the entry.  */
         l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt);
@@ -3186,7 +3182,7 @@ static int sh_page_fault(struct vcpu *v,
 
     /* What mfn is the guest trying to access? */
     gfn = guest_l1e_get_gfn(gw.l1e);
-    gmfn = gfn_to_mfn_guest(p2m_get_hostp2m(d), gfn, &p2mt);
+    gmfn = gfn_to_mfn_guest(d, gfn, &p2mt);
 
     if ( shadow_mode_refcounts(d) && 
          ((!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt)) ||
@@ -4296,7 +4292,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
             if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
             {
                 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
-                gl2mfn = gfn_to_mfn_query(p2m_get_hostp2m(d), gl2gfn, &p2mt);
+                gl2mfn = gfn_to_mfn_query(d, gl2gfn, &p2mt);
                 if ( p2m_is_ram(p2mt) )
                     flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
             }
@@ -4309,7 +4305,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
             if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
             {
                 gl2gfn = guest_l3e_get_gfn(gl3e[i]);
-                gl2mfn = gfn_to_mfn_query(p2m_get_hostp2m(d), gl2gfn, &p2mt);
+                gl2mfn = gfn_to_mfn_query(d, gl2gfn, &p2mt);
                 if ( p2m_is_ram(p2mt) )
                     sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
                                            ? SH_type_l2h_shadow 
@@ -4707,7 +4703,7 @@ static void sh_pagetable_dying(struct vc
     if ( gcr3 == gpa )
         fast_path = 1;
 
-    gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), _gfn(gpa >> PAGE_SHIFT), &p2mt);
+    gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt);
     if ( !mfn_valid(gmfn) || !p2m_is_ram(p2mt) )
     {
         printk(XENLOG_DEBUG "sh_pagetable_dying: gpa not valid %"PRIpaddr"\n",
@@ -4727,7 +4723,7 @@ static void sh_pagetable_dying(struct vc
         {
             /* retrieving the l2s */
             gl2a = guest_l3e_get_paddr(gl3e[i]);
-            gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), _gfn(gl2a >> PAGE_SHIFT), &p2mt);
+            gmfn = gfn_to_mfn_query(v->domain, _gfn(gl2a >> PAGE_SHIFT), &p2mt);
             smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_pae_shadow);
         }
 
@@ -4762,7 +4758,7 @@ static void sh_pagetable_dying(struct vc
 
     shadow_lock(v->domain);
 
-    gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), _gfn(gpa >> PAGE_SHIFT), &p2mt);
+    gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt);
 #if GUEST_PAGING_LEVELS == 2
     smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_32_shadow);
 #else
@@ -4802,10 +4798,9 @@ static mfn_t emulate_gva_to_mfn(struct v
     mfn_t mfn;
     p2m_type_t p2mt;
     uint32_t pfec = PFEC_page_present | PFEC_write_access;
-    struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
 
     /* Translate the VA to a GFN */
-    gfn = sh_gva_to_gfn(v, p2m, vaddr, &pfec);
+    gfn = sh_gva_to_gfn(v, NULL, vaddr, &pfec);
     if ( gfn == INVALID_GFN ) 
     {
         if ( is_hvm_vcpu(v) )
@@ -4818,9 +4813,9 @@ static mfn_t emulate_gva_to_mfn(struct v
     /* Translate the GFN to an MFN */
     /* PoD: query only if shadow lock is held (to avoid deadlock) */
     if ( shadow_locked_by_me(v->domain) )
-        mfn = gfn_to_mfn_query(p2m, _gfn(gfn), &p2mt);
+        mfn = gfn_to_mfn_query(v->domain, _gfn(gfn), &p2mt);
     else
-        mfn = gfn_to_mfn(p2m, _gfn(gfn), &p2mt);
+        mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
         
     if ( p2m_is_readonly(p2mt) )
         return _mfn(READONLY_GFN);
@@ -5226,7 +5221,7 @@ int sh_audit_l1_table(struct vcpu *v, mf
             {
                 gfn = guest_l1e_get_gfn(*gl1e);
                 mfn = shadow_l1e_get_mfn(*sl1e);
-                gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), gfn, &p2mt);
+                gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
                 if ( !p2m_is_grant(p2mt) && mfn_x(gmfn) != mfn_x(mfn) )
                     AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
                                " --> %" PRI_mfn " != mfn %" PRI_mfn,
@@ -5270,7 +5265,6 @@ int sh_audit_l2_table(struct vcpu *v, mf
     shadow_l2e_t *sl2e;
     mfn_t mfn, gmfn, gl2mfn;
     gfn_t gfn;
-    struct p2m_domain *p2m = p2m_get_hostp2m(v->domain);
     p2m_type_t p2mt;
     char *s;
     int done = 0;
@@ -5298,7 +5292,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
             mfn = shadow_l2e_get_mfn(*sl2e);
             gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE)  
                 ? get_fl1_shadow_status(v, gfn)
-                : get_shadow_status(v, gfn_to_mfn_query(p2m, gfn, &p2mt), 
+                : get_shadow_status(v, gfn_to_mfn_query(v->domain, gfn, &p2mt),
                                     SH_type_l1_shadow);
             if ( mfn_x(gmfn) != mfn_x(mfn) )
                 AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn
@@ -5306,7 +5300,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
                            " --> %" PRI_mfn " != mfn %" PRI_mfn,
                            gfn_x(gfn), 
                            (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
-                           : mfn_x(gfn_to_mfn_query(p2m,
+                           : mfn_x(gfn_to_mfn_query(v->domain,
                                    gfn, &p2mt)), mfn_x(gmfn), mfn_x(mfn));
         }
     });
@@ -5346,7 +5340,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
         {
             gfn = guest_l3e_get_gfn(*gl3e);
             mfn = shadow_l3e_get_mfn(*sl3e);
-            gmfn = get_shadow_status(v, gfn_to_mfn_query(p2m_get_hostp2m(v->domain), gfn, &p2mt), 
+            gmfn = get_shadow_status(v, gfn_to_mfn_query(v->domain, gfn, &p2mt),
                                      ((GUEST_PAGING_LEVELS == 3 ||
                                        is_pv_32on64_vcpu(v))
                                       && !shadow_mode_external(v->domain)
@@ -5394,7 +5388,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
         {
             gfn = guest_l4e_get_gfn(*gl4e);
             mfn = shadow_l4e_get_mfn(*sl4e);
-            gmfn = get_shadow_status(v, gfn_to_mfn_query(p2m_get_hostp2m(v->domain),
+            gmfn = get_shadow_status(v, gfn_to_mfn_query(v->domain,
                                      gfn, &p2mt), 
                                      SH_type_l3_shadow);
             if ( mfn_x(gmfn) != mfn_x(mfn) )
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/types.h	Thu Jun 02 13:16:52 2011 +0100
@@ -193,9 +193,9 @@ static inline shadow_l4e_t shadow_l4e_fr
 
  /* Override gfn_to_mfn to work with gfn_t */
 #undef gfn_to_mfn_query
-#define gfn_to_mfn_query(d, g, t) _gfn_to_mfn_type((d), gfn_x(g), (t), p2m_query)
+#define gfn_to_mfn_query(d, g, t) gfn_to_mfn_type((d), gfn_x(g), (t), p2m_query)
 #undef gfn_to_mfn_guest
-#define gfn_to_mfn_guest(d, g, t) _gfn_to_mfn_type((d), gfn_x(g), (t), p2m_guest)
+#define gfn_to_mfn_guest(d, g, t) gfn_to_mfn_type((d), gfn_x(g), (t), p2m_guest)
 
 /* The shadow types needed for the various levels. */
 
diff -r 0d3e0a571fdd -r de0a051b36ce xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/msi.c	Thu Jun 02 13:16:52 2011 +0100
@@ -663,7 +663,7 @@ static int msix_capability_init(struct p
             WARN();
 
         if ( dev->domain )
-            p2m_change_entry_type_global(p2m_get_hostp2m(dev->domain),
+            p2m_change_entry_type_global(dev->domain,
                                          p2m_mmio_direct, p2m_mmio_direct);
         if ( !dev->domain || !paging_mode_translate(dev->domain) )
         {
diff -r 0d3e0a571fdd -r de0a051b36ce xen/common/grant_table.c
--- a/xen/common/grant_table.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/common/grant_table.c	Thu Jun 02 13:16:52 2011 +0100
@@ -110,7 +110,7 @@ static unsigned inline int max_nr_maptra
 #define gfn_to_mfn_private(_d, _gfn) ({                     \
     p2m_type_t __p2mt;                                      \
     unsigned long __x;                                      \
-    __x = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(_d), _gfn, &__p2mt));  \
+    __x = mfn_x(gfn_to_mfn_unshare((_d), (_gfn), &__p2mt)); \
     BUG_ON(p2m_is_shared(__p2mt)); /* XXX fixme */          \
     if ( !p2m_is_valid(__p2mt) )                            \
         __x = INVALID_MFN;                                  \
@@ -146,16 +146,14 @@ static int __get_paged_frame(unsigned lo
 {
     int rc = GNTST_okay;
 #if defined(P2M_PAGED_TYPES) || defined(P2M_SHARED_TYPES)
-    struct p2m_domain *p2m;
     p2m_type_t p2mt;
     mfn_t mfn;
 
-    p2m = p2m_get_hostp2m(rd);
     if ( readonly )
-        mfn = gfn_to_mfn(p2m, gfn, &p2mt);
+        mfn = gfn_to_mfn(rd, gfn, &p2mt);
     else
     {
-        mfn = gfn_to_mfn_unshare(p2m, gfn, &p2mt);
+        mfn = gfn_to_mfn_unshare(rd, gfn, &p2mt);
         BUG_ON(p2m_is_shared(p2mt));
         /* XXX Here, and above in gfn_to_mfn_private, need to handle
          * XXX failure to unshare. */
@@ -165,7 +163,7 @@ static int __get_paged_frame(unsigned lo
         *frame = mfn_x(mfn);
         if ( p2m_is_paging(p2mt) )
         {
-            p2m_mem_paging_populate(p2m, gfn);
+            p2m_mem_paging_populate(rd, gfn);
             rc = GNTST_eagain;
         }
     } else {
diff -r 0d3e0a571fdd -r de0a051b36ce xen/common/memory.c
--- a/xen/common/memory.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/common/memory.c	Thu Jun 02 13:16:52 2011 +0100
@@ -162,11 +162,11 @@ int guest_remove_page(struct domain *d, 
     unsigned long mfn;
 
 #ifdef CONFIG_X86
-    mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(d), gmfn, &p2mt)); 
+    mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt)); 
     if ( unlikely(p2m_is_paging(p2mt)) )
     {
         guest_physmap_remove_page(d, gmfn, mfn, 0);
-        p2m_mem_paging_drop_page(p2m_get_hostp2m(d), gmfn);
+        p2m_mem_paging_drop_page(d, gmfn);
         return 1;
     }
 #else
@@ -363,7 +363,7 @@ static long memory_exchange(XEN_GUEST_HA
                 p2m_type_t p2mt;
 
                 /* Shared pages cannot be exchanged */
-                mfn = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(d), gmfn + k, &p2mt));
+                mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn + k, &p2mt));
                 if ( p2m_is_shared(p2mt) )
                 {
                     rc = -ENOMEM;
diff -r 0d3e0a571fdd -r de0a051b36ce xen/common/tmem_xen.c
--- a/xen/common/tmem_xen.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/common/tmem_xen.c	Thu Jun 02 13:16:52 2011 +0100
@@ -109,7 +109,7 @@ static inline void *cli_get_page(tmem_cl
     struct page_info *page;
     int ret;
 
-    cli_mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(current->domain), cmfn, &t));
+    cli_mfn = mfn_x(gfn_to_mfn(current->domain, cmfn, &t));
     if ( t != p2m_ram_rw || !mfn_valid(cli_mfn) )
             return NULL;
     page = mfn_to_page(cli_mfn);
diff -r 0d3e0a571fdd -r de0a051b36ce xen/include/asm-x86/guest_pt.h
--- a/xen/include/asm-x86/guest_pt.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/guest_pt.h	Thu Jun 02 13:16:52 2011 +0100
@@ -53,7 +53,7 @@ gfn_to_paddr(gfn_t gfn)
 
 /* Override gfn_to_mfn to work with gfn_t */
 #undef gfn_to_mfn
-#define gfn_to_mfn(d, g, t) _gfn_to_mfn_type((d), gfn_x(g), (t), p2m_alloc)
+#define gfn_to_mfn(d, g, t) gfn_to_mfn_type((d), gfn_x(g), (t), p2m_alloc)
 
 
 /* Types of the guest's page tables and access functions for them */
diff -r 0d3e0a571fdd -r de0a051b36ce xen/include/asm-x86/mem_sharing.h
--- a/xen/include/asm-x86/mem_sharing.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/mem_sharing.h	Thu Jun 02 13:16:52 2011 +0100
@@ -30,16 +30,15 @@
 typedef uint64_t shr_handle_t; 
 
 unsigned int mem_sharing_get_nr_saved_mfns(void);
-int mem_sharing_nominate_page(struct p2m_domain *p2m, 
+int mem_sharing_nominate_page(struct domain *d, 
                               unsigned long gfn,
                               int expected_refcnt,
                               shr_handle_t *phandle);
 #define MEM_SHARING_DESTROY_GFN       (1<<1)
-int mem_sharing_unshare_page(struct p2m_domain *p2m, 
+int mem_sharing_unshare_page(struct domain *d, 
                              unsigned long gfn, 
                              uint16_t flags);
 int mem_sharing_sharing_resume(struct domain *d);
-int mem_sharing_cache_resize(struct p2m_domain *p2m, int new_size);
 int mem_sharing_domctl(struct domain *d, 
                        xen_domctl_mem_sharing_op_t *mec);
 void mem_sharing_init(void);
diff -r 0d3e0a571fdd -r de0a051b36ce xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
@@ -359,8 +359,10 @@ struct p2m_domain *p2m_get_p2m(struct vc
     } while (0)
 
 
-/* Read P2M table, mapping pages as we go.
- * Do not populate PoD pages. */
+/* Read a particular P2M table, mapping pages as we go.  Most callers
+ * should _not_ call this directly; use the other gfn_to_mfn_* functions
+ * below unless you know you want to walk a p2m that isn't a domain's
+ * main one. */
 static inline mfn_t
 gfn_to_mfn_type_p2m(struct p2m_domain *p2m, unsigned long gfn,
                     p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
@@ -370,7 +372,8 @@ gfn_to_mfn_type_p2m(struct p2m_domain *p
 #ifdef __x86_64__
     if ( q == p2m_unshare && p2m_is_shared(*t) )
     {
-        mem_sharing_unshare_page(p2m, gfn, 0);
+        ASSERT(!p2m_is_nestedp2m(p2m));
+        mem_sharing_unshare_page(p2m->domain, gfn, 0);
         mfn = p2m->get_entry(p2m, gfn, t, a, q);
     }
 #endif
@@ -390,10 +393,11 @@ gfn_to_mfn_type_p2m(struct p2m_domain *p
 
 
 /* General conversion function from gfn to mfn */
-static inline mfn_t _gfn_to_mfn_type(struct p2m_domain *p2m,
-                                     unsigned long gfn, p2m_type_t *t,
-                                     p2m_query_t q)
+static inline mfn_t gfn_to_mfn_type(struct domain *d,
+                                    unsigned long gfn, p2m_type_t *t,
+                                    p2m_query_t q)
 {
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
     p2m_access_t a;
 
     if ( !p2m || !paging_mode_translate(p2m->domain) )
@@ -407,17 +411,17 @@ static inline mfn_t _gfn_to_mfn_type(str
     return gfn_to_mfn_type_p2m(p2m, gfn, t, &a, q);
 }
 
-#define gfn_to_mfn(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_alloc)
-#define gfn_to_mfn_query(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_query)
-#define gfn_to_mfn_guest(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_guest)
-#define gfn_to_mfn_unshare(p, g, t) _gfn_to_mfn_type((p), (g), (t), p2m_unshare)
+#define gfn_to_mfn(d, g, t)         gfn_to_mfn_type((d), (g), (t), p2m_alloc)
+#define gfn_to_mfn_query(d, g, t)   gfn_to_mfn_type((d), (g), (t), p2m_query)
+#define gfn_to_mfn_guest(d, g, t)   gfn_to_mfn_type((d), (g), (t), p2m_guest)
+#define gfn_to_mfn_unshare(d, g, t) gfn_to_mfn_type((d), (g), (t), p2m_unshare)
 
 /* Compatibility function exporting the old untyped interface */
 static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
 {
     mfn_t mfn;
     p2m_type_t t;
-    mfn = gfn_to_mfn(d->arch.p2m, gpfn, &t);
+    mfn = gfn_to_mfn(d, gpfn, &t);
     if ( p2m_is_valid(t) )
         return mfn_x(mfn);
     return INVALID_MFN;
@@ -445,45 +449,39 @@ void p2m_teardown(struct p2m_domain *p2m
 void p2m_final_teardown(struct domain *d);
 
 /* Add a page to a domain's p2m table */
-int guest_physmap_add_entry(struct p2m_domain *p2m, unsigned long gfn,
+int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
                             unsigned long mfn, unsigned int page_order, 
                             p2m_type_t t);
 
-/* Remove a page from a domain's p2m table */
-void guest_physmap_remove_entry(struct p2m_domain *p2m, unsigned long gfn,
-                            unsigned long mfn, unsigned int page_order);
-
-/* Set a p2m range as populate-on-demand */
-int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
-                                          unsigned int order);
-
 /* Untyped version for RAM only, for compatibility */
 static inline int guest_physmap_add_page(struct domain *d,
                                          unsigned long gfn,
                                          unsigned long mfn,
                                          unsigned int page_order)
 {
-    return guest_physmap_add_entry(d->arch.p2m, gfn, mfn, page_order, p2m_ram_rw);
+    return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
 }
 
 /* Remove a page from a domain's p2m table */
-static inline void guest_physmap_remove_page(struct domain *d,
+void guest_physmap_remove_page(struct domain *d,
                                unsigned long gfn,
-                               unsigned long mfn, unsigned int page_order)
-{
-    guest_physmap_remove_entry(d->arch.p2m, gfn, mfn, page_order);
-}
+                               unsigned long mfn, unsigned int page_order);
+
+/* Set a p2m range as populate-on-demand */
+int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
+                                          unsigned int order);
 
 /* Change types across all p2m entries in a domain */
-void p2m_change_entry_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt);
+void p2m_change_entry_type_global(struct domain *d, 
+                                  p2m_type_t ot, p2m_type_t nt);
 
 /* Compare-exchange the type of a single p2m entry */
-p2m_type_t p2m_change_type(struct p2m_domain *p2m, unsigned long gfn,
+p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
                            p2m_type_t ot, p2m_type_t nt);
 
 /* Set mmio addresses in the p2m table (for pass-through) */
-int set_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn);
-int clear_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn);
+int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
+int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn);
 
 
 /* 
@@ -491,7 +489,7 @@ int clear_mmio_p2m_entry(struct p2m_doma
  */
 
 /* Dump PoD information about the domain */
-void p2m_pod_dump_data(struct p2m_domain *p2m);
+void p2m_pod_dump_data(struct domain *d);
 
 /* Move all pages from the populate-on-demand cache to the domain page_list
  * (usually in preparation for domain destruction) */
@@ -508,12 +506,6 @@ p2m_pod_decrease_reservation(struct doma
                              xen_pfn_t gpfn,
                              unsigned int order);
 
-/* Called by p2m code when demand-populating a PoD page */
-int
-p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn,
-                        unsigned int order,
-                        p2m_query_t q);
-
 /* Scan pod cache when offline/broken page triggered */
 int
 p2m_pod_offline_or_broken_hit(struct page_info *p);
@@ -522,30 +514,31 @@ p2m_pod_offline_or_broken_hit(struct pag
 void
 p2m_pod_offline_or_broken_replace(struct page_info *p);
 
+
 /*
  * Paging to disk and page-sharing
  */
 
 #ifdef __x86_64__
 /* Modify p2m table for shared gfn */
-int set_shared_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn);
+int set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
 
 /* Check if a nominated gfn is valid to be paged out */
-int p2m_mem_paging_nominate(struct p2m_domain *p2m, unsigned long gfn);
+int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn);
 /* Evict a frame */
-int p2m_mem_paging_evict(struct p2m_domain *p2m, unsigned long gfn);
+int p2m_mem_paging_evict(struct domain *d, unsigned long gfn);
 /* Tell xenpaging to drop a paged out frame */
-void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn);
+void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn);
 /* Start populating a paged out frame */
-void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn);
+void p2m_mem_paging_populate(struct domain *d, unsigned long gfn);
 /* Prepare the p2m for paging a frame in */
-int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn);
+int p2m_mem_paging_prep(struct domain *d, unsigned long gfn);
 /* Resume normal operation (in case a domain was paused) */
-void p2m_mem_paging_resume(struct p2m_domain *p2m);
+void p2m_mem_paging_resume(struct domain *d);
 #else
-static inline void p2m_mem_paging_drop_page(struct p2m_domain *p2m, unsigned long gfn)
+static inline void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
 { }
-static inline void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn)
+static inline void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
 { }
 #endif
 
@@ -563,6 +556,10 @@ static inline void p2m_mem_access_check(
 { }
 #endif
 
+/* 
+ * Internal functions, only called by other p2m code
+ */
+
 struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type);
 void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg);
 
@@ -619,6 +616,11 @@ extern void audit_p2m(struct p2m_domain 
 #define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0)
 #endif
 
+/* Called by p2m code when demand-populating a PoD page */
+int
+p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn,
+                        unsigned int order,
+                        p2m_query_t q);
 
 /*
  * Functions specific to the p2m-pt implementation
@@ -642,7 +644,7 @@ static inline p2m_type_t p2m_flags_to_ty
 }
 
 /*
- * Nested p2m: shadow p2m tables used for nexted HVM virtualization 
+ * Nested p2m: shadow p2m tables used for nested HVM virtualization 
  */
 
 /* Flushes specified p2m table */

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 06 of 17] x86/mm/p2m: paging_p2m_ga_to_gfn() doesn't need so many arguments
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (4 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 05 of 17] x86/mm/p2m: Make p2m interfaces take struct domain arguments Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 07 of 17] x86/mm: Fix memory-sharing code's locking discipline Tim Deegan
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 39603c2f0dab39bf399910e180aa60deca7db680
# Parent  de0a051b36ceb01738348426fdd3d52a5776ba1a
x86/mm/p2m: paging_p2m_ga_to_gfn() doesn't need so many arguments

It has only one caller and is always called with p2m == hostp2m and mode
== hostmode.  Also, since it's only called from nested HAP code, remove
the check of paging_mode_hap().  Then rename it to reflect its new
interface.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r de0a051b36ce -r 39603c2f0dab xen/arch/x86/mm/hap/nested_hap.c
--- a/xen/arch/x86/mm/hap/nested_hap.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/nested_hap.c	Thu Jun 02 13:16:52 2011 +0100
@@ -143,17 +143,15 @@ nestedhap_walk_L0_p2m(struct p2m_domain 
  * L1_gpa. The result value tells what to do next.
  */
 static int
-nestedhap_walk_L1_p2m(struct vcpu *v, struct p2m_domain *p2m,
-    paddr_t L2_gpa, paddr_t *L1_gpa)
+nestedhap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa)
 {
     uint32_t pfec;
     unsigned long nested_cr3, gfn;
-    const struct paging_mode *mode = paging_get_hostmode(v);
     
     nested_cr3 = nhvm_vcpu_hostcr3(v);
 
-    /* walk the guest table */
-    gfn = paging_p2m_ga_to_gfn(v, p2m, mode, nested_cr3, L2_gpa, &pfec);
+    /* Walk the guest-supplied NPT table, just as if it were a pagetable */
+    gfn = paging_ga_to_gfn_cr3(v, nested_cr3, L2_gpa, &pfec);
 
     if ( gfn == INVALID_GFN ) 
         return NESTEDHVM_PAGEFAULT_INJECT;
@@ -178,10 +176,8 @@ nestedhvm_hap_nested_page_fault(struct v
     p2m = p2m_get_hostp2m(d); /* L0 p2m */
     nested_p2m = p2m_get_nestedp2m(v, nhvm_vcpu_hostcr3(v));
 
-    /* walk the L1 P2M table, note we have to pass p2m
-     * and not nested_p2m here or we fail the walk forever,
-     * otherwise. */
-    rv = nestedhap_walk_L1_p2m(v, p2m, L2_gpa, &L1_gpa);
+    /* walk the L1 P2M table */
+    rv = nestedhap_walk_L1_p2m(v, L2_gpa, &L1_gpa);
 
     /* let caller to handle these two cases */
     switch (rv) {
diff -r de0a051b36ce -r 39603c2f0dab xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/paging.h	Thu Jun 02 13:16:52 2011 +0100
@@ -265,26 +265,19 @@ unsigned long paging_gva_to_gfn(struct v
                                 unsigned long va,
                                 uint32_t *pfec);
 
-/* Translates a guest virtual address to guest physical address
- * where the specified cr3 is translated to host physical address
- * using the specified p2m table.
- * This allows to do page walks in the guest or even in the nested guest.
- * It returns the guest's gfn or the nested guest's gfn.
+/* Translate a guest address using a particular CR3 value.  This is used
+ * to by nested HAP code, to walk the guest-supplied NPT tables as if
+ * they were pagetables.
  * Use 'paddr_t' for the guest address so it won't overflow when
  * guest or nested guest is in 32bit PAE mode.
  */
-static inline unsigned long paging_p2m_ga_to_gfn(struct vcpu *v,
-                                                 struct p2m_domain *p2m,
-                                                 const struct paging_mode *mode,
+static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
                                                  unsigned long cr3,
                                                  paddr_t ga,
                                                  uint32_t *pfec)
 {
-    if ( is_hvm_domain(v->domain) && paging_mode_hap(v->domain) )
-        return mode->p2m_ga_to_gfn(v, p2m, cr3, ga, pfec);
-
-    /* shadow paging */
-    return paging_gva_to_gfn(v, ga, pfec);
+    struct p2m_domain *p2m = v->domain->arch.p2m;
+    return paging_get_hostmode(v)->p2m_ga_to_gfn(v, p2m, cr3, ga, pfec);
 }
 
 /* Update all the things that are derived from the guest's CR3.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 07 of 17] x86/mm: Fix memory-sharing code's locking discipline
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (5 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 06 of 17] x86/mm/p2m: paging_p2m_ga_to_gfn() doesn't need so many arguments Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 08 of 17] x86/mm/p2m: Remove recursive-locking code from set_shared_p2m_entry() Tim Deegan
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 51518c502d1b23a1cc4b325f81d17027a9d31a74
# Parent  39603c2f0dab39bf399910e180aa60deca7db680
x86/mm: Fix memory-sharing code's locking discipline.

memshr_audit is sometimes called with the shr_lock held.  Make it so for
every call.

Move the unsharing loop in p2m_teardown out of the p2m_lock to avoid
deadlocks.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 39603c2f0dab -r 51518c502d1b xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/mem_sharing.c	Thu Jun 02 13:16:52 2011 +0100
@@ -225,7 +225,7 @@ static void mem_sharing_audit(void)
     int bucket;
     struct page_info *pg;
 
-    shr_lock();
+    ASSERT(shr_locked_by_me());
 
     for(bucket=0; bucket < SHR_HASH_LENGTH; bucket++)
     {
@@ -285,8 +285,6 @@ static void mem_sharing_audit(void)
             e = e->next;
         }
     }
-
-    shr_unlock();
 }
 #endif
 
@@ -632,10 +630,10 @@ int mem_sharing_unshare_page(struct doma
     shr_handle_t handle;
     struct list_head *le;
 
+    shr_lock();
     mem_sharing_audit();
+    
     /* Remove the gfn_info from the list */
-    shr_lock();
-    
     mfn = gfn_to_mfn(d, gfn, &p2mt);
     
     /* Has someone already unshared it? */
@@ -739,7 +737,6 @@ int mem_sharing_domctl(struct domain *d,
         case XEN_DOMCTL_MEM_SHARING_OP_CONTROL:
         {
             d->arch.hvm_domain.mem_sharing_enabled = mec->u.enable;
-            mem_sharing_audit();
             rc = 0;
         }
         break;
@@ -752,7 +749,6 @@ int mem_sharing_domctl(struct domain *d,
                 return -EINVAL;
             rc = mem_sharing_nominate_page(d, gfn, 0, &handle);
             mec->u.nominate.handle = handle;
-            mem_sharing_audit();
         }
         break;
 
@@ -768,7 +764,6 @@ int mem_sharing_domctl(struct domain *d,
                 return -EINVAL;
             rc = mem_sharing_nominate_page(d, gfn, 3, &handle);
             mec->u.nominate.handle = handle;
-            mem_sharing_audit();
         }
         break;
 
@@ -777,7 +772,6 @@ int mem_sharing_domctl(struct domain *d,
             shr_handle_t sh = mec->u.share.source_handle;
             shr_handle_t ch = mec->u.share.client_handle;
             rc = mem_sharing_share_pages(sh, ch); 
-            mem_sharing_audit();
         }
         break;
 
@@ -785,7 +779,6 @@ int mem_sharing_domctl(struct domain *d,
         {
             if(!mem_sharing_enabled(d))
                 return -EINVAL;
-            mem_sharing_audit();
             rc = mem_sharing_sharing_resume(d);
         }
         break;
@@ -794,7 +787,6 @@ int mem_sharing_domctl(struct domain *d,
         {
             unsigned long gfn = mec->u.debug.u.gfn;
             rc = mem_sharing_debug_gfn(d, gfn);
-            mem_sharing_audit();
         }
         break;
 
@@ -802,7 +794,6 @@ int mem_sharing_domctl(struct domain *d,
         {
             unsigned long mfn = mec->u.debug.u.mfn;
             rc = mem_sharing_debug_mfn(mfn);
-            mem_sharing_audit();
         }
         break;
 
@@ -810,7 +801,6 @@ int mem_sharing_domctl(struct domain *d,
         {
             grant_ref_t gref = mec->u.debug.u.gref;
             rc = mem_sharing_debug_gref(d, gref);
-            mem_sharing_audit();
         }
         break;
 
@@ -819,6 +809,10 @@ int mem_sharing_domctl(struct domain *d,
             break;
     }
 
+    shr_lock();
+    mem_sharing_audit();
+    shr_unlock();
+
     return rc;
 }
 
diff -r 39603c2f0dab -r 51518c502d1b xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
@@ -296,12 +296,10 @@ void p2m_teardown(struct p2m_domain *p2m
     if (p2m == NULL)
         return;
 
-    p2m_lock(p2m);
-
 #ifdef __x86_64__
     for ( gfn=0; gfn < p2m->max_mapped_pfn; gfn++ )
     {
-        mfn = p2m->get_entry(p2m, gfn, &t, &a, p2m_query);
+        mfn = gfn_to_mfn_type_p2m(p2m, gfn, &t, &a, p2m_query);
         if ( mfn_valid(mfn) && (t == p2m_ram_shared) )
         {
             ASSERT(!p2m_is_nestedp2m(p2m));
@@ -311,6 +309,8 @@ void p2m_teardown(struct p2m_domain *p2m
     }
 #endif
 
+    p2m_lock(p2m);
+
     p2m->phys_table = pagetable_null();
 
     while ( (pg = page_list_remove_head(&p2m->pages)) )

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 08 of 17] x86/mm/p2m: Remove recursive-locking code from set_shared_p2m_entry()
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (6 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 07 of 17] x86/mm: Fix memory-sharing code's locking discipline Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 09 of 17] x86/mm/p2m: Fix locking discipline around p2m updates Tim Deegan
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID dd89c48f2ac5c55e8a46159390c2c2f83a7b7742
# Parent  51518c502d1b23a1cc4b325f81d17027a9d31a74
x86/mm/p2m: Remove recursive-locking code from set_shared_p2m_entry().

It should no longer be needed now that the shr_lock discipline is fixed.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 51518c502d1b -r dd89c48f2ac5 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
@@ -601,7 +601,6 @@ set_shared_p2m_entry(struct domain *d, u
 {
     struct p2m_domain *p2m = p2m_get_hostp2m(d);
     int rc = 0;
-    int need_lock = !p2m_locked_by_me(p2m);
     p2m_type_t ot;
     mfn_t omfn;
 
@@ -617,11 +616,9 @@ set_shared_p2m_entry(struct domain *d, u
     set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
 
     P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn));
-    if ( need_lock ) 
-        p2m_lock(p2m);
+    p2m_lock(p2m);
     rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_shared, p2m->default_access);
-    if ( need_lock ) 
-        p2m_unlock(p2m);
+    p2m_unlock(p2m);
     if ( 0 == rc )
         gdprintk(XENLOG_ERR,
             "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 09 of 17] x86/mm/p2m: Fix locking discipline around p2m updates
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (7 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 08 of 17] x86/mm/p2m: Remove recursive-locking code from set_shared_p2m_entry() Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 10 of 17] x86/mm/p2m: Fix locking discipline around p2m lookups Tim Deegan
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 32f80f08d7246cac9c75d4820698e754550a425a
# Parent  dd89c48f2ac5c55e8a46159390c2c2f83a7b7742
x86/mm/p2m: Fix locking discipline around p2m updates.

Direct callers of the p2m setting functions must hold the p2m lock.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r dd89c48f2ac5 -r 32f80f08d724 xen/arch/x86/mm/hap/nested_hap.c
--- a/xen/arch/x86/mm/hap/nested_hap.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/nested_hap.c	Thu Jun 02 13:16:52 2011 +0100
@@ -103,9 +103,12 @@ nestedhap_fix_p2m(struct p2m_domain *p2m
     ASSERT(p2m);
     ASSERT(p2m->set_entry);
 
-    rv = p2m->set_entry(p2m, L2_gpa >> PAGE_SHIFT,
+    p2m_lock(p2m);
+    rv = set_p2m_entry(p2m, L2_gpa >> PAGE_SHIFT,
                          page_to_mfn(maddr_to_page(L0_gpa)),
                          0 /*4K*/, p2mt, p2ma);
+    p2m_unlock(p2m);
+
     if (rv == 0) {
         gdprintk(XENLOG_ERR,
 		"failed to set entry for 0x%"PRIx64" -> 0x%"PRIx64"\n",
diff -r dd89c48f2ac5 -r 32f80f08d724 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
@@ -140,6 +140,8 @@ int set_p2m_entry(struct p2m_domain *p2m
     unsigned int order;
     int rc = 1;
 
+    ASSERT(p2m_locked_by_me(p2m));
+
     while ( todo )
     {
         if ( hap_enabled(d) )

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 10 of 17] x86/mm/p2m: Fix locking discipline around p2m lookups
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (8 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 09 of 17] x86/mm/p2m: Fix locking discipline around p2m updates Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 11 of 17] x86/mm/p2m: Move p2m code in HVMOP_[gs]et_mem_access into p2m.c Tim Deegan
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 462280f8fae2fcb137d98f6f1dab105afc6745c3
# Parent  32f80f08d7246cac9c75d4820698e754550a425a
x86/mm/p2m: Fix locking discipline around p2m lookups.

All gfn_to_mfn* functions except _query() might take the p2m lock,
so can't be called with a p2m, shadow, hap or log_dirty lock held.
The remaining offender is the memory sharing code, which calls
_unshare() from inside the pagetable walker!  Fixing that is too big
for a cleanup patch like this one.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 32f80f08d724 -r 462280f8fae2 xen/arch/x86/hvm/mtrr.c
--- a/xen/arch/x86/hvm/mtrr.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/mtrr.c	Thu Jun 02 13:16:52 2011 +0100
@@ -390,7 +390,7 @@ uint32_t get_pat_flags(struct vcpu *v,
     {
         struct domain *d = v->domain;
         p2m_type_t p2mt;
-        gfn_to_mfn(d, paddr_to_pfn(gpaddr), &p2mt);
+        gfn_to_mfn_query(d, paddr_to_pfn(gpaddr), &p2mt);
         if (p2m_is_ram(p2mt))
             gdprintk(XENLOG_WARNING,
                     "Conflict occurs for a given guest l1e flags:%x "
diff -r 32f80f08d724 -r 462280f8fae2 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
@@ -566,7 +566,7 @@ set_mmio_p2m_entry(struct domain *d, uns
     if ( 0 == rc )
         gdprintk(XENLOG_ERR,
             "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
-            mfn_x(gfn_to_mfn(d, gfn, &ot)));
+            mfn_x(gfn_to_mfn_query(d, gfn, &ot)));
     return rc;
 }
 
@@ -623,8 +623,8 @@ set_shared_p2m_entry(struct domain *d, u
     p2m_unlock(p2m);
     if ( 0 == rc )
         gdprintk(XENLOG_ERR,
-            "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
-            gmfn_to_mfn(p2m->domain, gfn));
+            "set_shared_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
+            mfn_x(gfn_to_mfn_query(d, gfn, &ot)));
     return rc;
 }
 
diff -r 32f80f08d724 -r 462280f8fae2 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
@@ -3712,7 +3712,7 @@ int shadow_track_dirty_vram(struct domai
 
         /* Iterate over VRAM to track dirty bits. */
         for ( i = 0; i < nr; i++ ) {
-            mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
+            mfn_t mfn = gfn_to_mfn_query(d, begin_pfn + i, &t);
             struct page_info *page;
             int dirty = 0;
             paddr_t sl1ma = dirty_vram->sl1ma[i];
@@ -3797,7 +3797,7 @@ int shadow_track_dirty_vram(struct domai
                 /* was clean for more than two seconds, try to disable guest
                  * write access */
                 for ( i = begin_pfn; i < end_pfn; i++ ) {
-                    mfn_t mfn = gfn_to_mfn(d, i, &t);
+                    mfn_t mfn = gfn_to_mfn_query(d, i, &t);
                     if (mfn_x(mfn) != INVALID_MFN)
                         flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
                 }
diff -r 32f80f08d724 -r 462280f8fae2 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
@@ -411,6 +411,10 @@ static inline mfn_t gfn_to_mfn_type(stru
     return gfn_to_mfn_type_p2m(p2m, gfn, t, &a, q);
 }
 
+/* Syntactic sugar: most callers will use one of these. 
+ * N.B. gfn_to_mfn_query() is the _only_ one guaranteed not to take the
+ * p2m lock; none of the others can be called with the p2m, hap or
+ * shadow lock held. */
 #define gfn_to_mfn(d, g, t)         gfn_to_mfn_type((d), (g), (t), p2m_alloc)
 #define gfn_to_mfn_query(d, g, t)   gfn_to_mfn_type((d), (g), (t), p2m_query)
 #define gfn_to_mfn_guest(d, g, t)   gfn_to_mfn_type((d), (g), (t), p2m_guest)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 11 of 17] x86/mm/p2m: Move p2m code in HVMOP_[gs]et_mem_access into p2m.c
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (9 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 10 of 17] x86/mm/p2m: Fix locking discipline around p2m lookups Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 12 of 17] x86/mm/p2m: Fix locking discipline around log-dirty teardown Tim Deegan
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID c9ea54b4f49ad9363ef8f08fa984f2900dc147e0
# Parent  462280f8fae2fcb137d98f6f1dab105afc6745c3
x86/mm/p2m: Move p2m code in HVMOP_[gs]et_mem_access into p2m.c

It uses p2m internals like the p2m lock and function pointers so belongs
behind the p2m interface.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 462280f8fae2 -r c9ea54b4f49a xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/hvm.c	Thu Jun 02 13:16:52 2011 +0100
@@ -3814,21 +3814,6 @@ long do_hvm_op(unsigned long op, XEN_GUE
     {
         struct xen_hvm_set_mem_access a;
         struct domain *d;
-        struct p2m_domain *p2m;
-        unsigned long pfn;
-        
-        p2m_access_t memaccess[] = {
-            p2m_access_n,
-            p2m_access_r,
-            p2m_access_w,
-            p2m_access_rw,
-            p2m_access_x,
-            p2m_access_rx,
-            p2m_access_wx,
-            p2m_access_rwx,
-            p2m_access_rx2rw,
-            0,  /* HVMMEM_access_default -- will get set below */
-        };
 
         if ( copy_from_guest(&a, arg, 1) )
             return -EFAULT;
@@ -3841,42 +3826,13 @@ long do_hvm_op(unsigned long op, XEN_GUE
         if ( !is_hvm_domain(d) )
             goto param_fail5;
 
-        p2m = p2m_get_hostp2m(d);
-        memaccess[HVMMEM_access_default] = p2m->default_access;
-
-        /* If request to set default access */
-        if ( a.first_pfn == ~0ull ) 
-        {
-            rc = 0;
-            p2m->default_access = memaccess[a.hvmmem_access];
-            goto param_fail5;
-        }
-
         rc = -EINVAL;
         if ( (a.first_pfn > domain_get_maximum_gpfn(d)) ||
              ((a.first_pfn + a.nr - 1) < a.first_pfn) ||
              ((a.first_pfn + a.nr - 1) > domain_get_maximum_gpfn(d)) )
             goto param_fail5;
             
-        if ( a.hvmmem_access >= ARRAY_SIZE(memaccess) )
-            goto param_fail5;
-
-        for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ )
-        {
-            p2m_type_t t;
-            mfn_t mfn;
-            int success;
-
-            mfn = gfn_to_mfn_unshare(d, pfn, &t);
-
-            p2m_lock(p2m);
-            success = p2m->set_entry(p2m, pfn, mfn, 0, t, memaccess[a.hvmmem_access]);
-            p2m_unlock(p2m);
-            if ( !success )
-                goto param_fail5;
-        }
-
-        rc = 0;
+        rc = p2m_set_mem_access(d, a.first_pfn, a.nr, a.hvmmem_access);
 
     param_fail5:
         rcu_unlock_domain(d);
@@ -3887,23 +3843,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
     {
         struct xen_hvm_get_mem_access a;
         struct domain *d;
-        struct p2m_domain *p2m;
-        p2m_type_t t;
-        p2m_access_t ac;
-        mfn_t mfn;
-
-        /* Interface access to internal p2m accesses */
-        hvmmem_access_t memaccess[] = {
-            HVMMEM_access_n,
-            HVMMEM_access_r,
-            HVMMEM_access_w,
-            HVMMEM_access_rw,
-            HVMMEM_access_x,
-            HVMMEM_access_rx,
-            HVMMEM_access_wx,
-            HVMMEM_access_rwx,
-            HVMMEM_access_rx2rw
-        };
+        hvmmem_access_t access;
 
         if ( copy_from_guest(&a, arg, 1) )
             return -EFAULT;
@@ -3916,30 +3856,15 @@ long do_hvm_op(unsigned long op, XEN_GUE
         if ( !is_hvm_domain(d) )
             goto param_fail6;
 
-        p2m = p2m_get_hostp2m(d);
-        
-        if ( a.pfn == ~0ull ) 
-        {
-            a.hvmmem_access = memaccess[p2m->default_access];
-        }
-        else {
-            rc = -EINVAL;
-            if ( (a.pfn > domain_get_maximum_gpfn(d)) )
-                goto param_fail6;
-
-            rc = -ESRCH;
-            mfn = p2m->get_entry(p2m, a.pfn, &t, &ac, p2m_query);
-
-            if ( mfn_x(mfn) == INVALID_MFN )
-                goto param_fail6;
-            
-            rc = -ERANGE;
-            if ( ac >= ARRAY_SIZE(memaccess) )
-                goto param_fail6;
-        
-            a.hvmmem_access = memaccess[ac];
-        }
-
+        rc = -EINVAL;
+        if ( (a.pfn > domain_get_maximum_gpfn(d)) && a.pfn != ~0ull )
+            goto param_fail6;
+
+        rc = p2m_get_mem_access(d, a.pfn, &access);
+        if ( rc != 0 )
+            goto param_fail6;
+
+        a.hvmmem_access = access;
         rc = copy_to_guest(arg, &a, 1) ? -EFAULT : 0;
 
     param_fail6:
diff -r 462280f8fae2 -r c9ea54b4f49a xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
@@ -915,6 +915,101 @@ void p2m_mem_access_resume(struct p2m_do
      * was available */
     mem_event_unpause_vcpus(d);
 }
+
+
+/* Set access type for a region of pfns.
+ * If start_pfn == -1ul, sets the default access type */
+int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, 
+                       uint32_t nr, hvmmem_access_t access) 
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    unsigned long pfn;
+    p2m_access_t a;
+    p2m_type_t t;
+    mfn_t mfn;
+    int rc = 0;
+
+    /* N.B. _not_ static: initializer depends on p2m->default_access */
+    p2m_access_t memaccess[] = {
+        p2m_access_n,
+        p2m_access_r,
+        p2m_access_w,
+        p2m_access_rw,
+        p2m_access_x,
+        p2m_access_rx,
+        p2m_access_wx,
+        p2m_access_rwx,
+        p2m_access_rx2rw,
+        p2m->default_access,
+    };
+
+    if ( access >= HVMMEM_access_default || access < 0 )
+        return -EINVAL;
+
+    a = memaccess[access];
+
+    /* If request to set default access */
+    if ( start_pfn == ~0ull ) 
+    {
+        p2m->default_access = a;
+        return 0;
+    }
+
+    p2m_lock(p2m);
+    for ( pfn = start_pfn; pfn < start_pfn + nr; pfn++ )
+    {
+        mfn = gfn_to_mfn_query(d, pfn, &t);
+        if ( p2m->set_entry(p2m, pfn, mfn, 0, t, a) == 0 )
+        {
+            rc = -ENOMEM;
+            break;
+        }
+    }
+    p2m_unlock(p2m);
+    return rc;
+}
+
+/* Get access type for a pfn
+ * If pfn == -1ul, gets the default access type */
+int p2m_get_mem_access(struct domain *d, unsigned long pfn, 
+                       hvmmem_access_t *access)
+{
+    struct p2m_domain *p2m = p2m_get_hostp2m(d);
+    p2m_type_t t;
+    p2m_access_t a;
+    mfn_t mfn;
+
+    static const hvmmem_access_t memaccess[] = {
+        HVMMEM_access_n,
+        HVMMEM_access_r,
+        HVMMEM_access_w,
+        HVMMEM_access_rw,
+        HVMMEM_access_x,
+        HVMMEM_access_rx,
+        HVMMEM_access_wx,
+        HVMMEM_access_rwx,
+        HVMMEM_access_rx2rw
+    };
+
+    /* If request to get default access */
+    if ( pfn == ~0ull ) 
+    {
+        *access = memaccess[p2m->default_access];
+        return 0;
+    }
+
+    mfn = p2m->get_entry(p2m, pfn, &t, &a, p2m_query);
+    if ( mfn_x(mfn) == INVALID_MFN )
+        return -ESRCH;
+    
+    if ( a >= ARRAY_SIZE(memaccess) || a < 0 )
+        return -ERANGE;
+
+    *access =  memaccess[a];
+    return 0;
+}
+
+
 #endif /* __x86_64__ */
 
 static struct p2m_domain *
diff -r 462280f8fae2 -r c9ea54b4f49a xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
@@ -553,6 +553,17 @@ void p2m_mem_access_check(unsigned long 
                           bool_t access_r, bool_t access_w, bool_t access_x);
 /* Resumes the running of the VCPU, restarting the last instruction */
 void p2m_mem_access_resume(struct p2m_domain *p2m);
+
+/* Set access type for a region of pfns.
+ * If start_pfn == -1ul, sets the default access type */
+int p2m_set_mem_access(struct domain *d, unsigned long start_pfn, 
+                       uint32_t nr, hvmmem_access_t access);
+
+/* Get access type for a pfn
+ * If pfn == -1ul, gets the default access type */
+int p2m_get_mem_access(struct domain *d, unsigned long pfn, 
+                       hvmmem_access_t *access);
+
 #else
 static inline void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid, 
                                         unsigned long gla, bool_t access_r, 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 12 of 17] x86/mm/p2m: Fix locking discipline around log-dirty teardown
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (10 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 11 of 17] x86/mm/p2m: Move p2m code in HVMOP_[gs]et_mem_access into p2m.c Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 13 of 17] x86/mm: dedup the various copies of the shadow lock functions Tim Deegan
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 51f28a7cbb3d1e42e3592e5dc45041742a100fea
# Parent  c9ea54b4f49ad9363ef8f08fa984f2900dc147e0
x86/mm/p2m: Fix locking discipline around log-dirty teardown.

It's not safe to call paging_free_log_dirty_page with the
log-dirty lock held.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r c9ea54b4f49a -r 51f28a7cbb3d xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/paging.c	Thu Jun 02 13:16:52 2011 +0100
@@ -156,10 +156,16 @@ void paging_free_log_dirty_bitmap(struct
 {
     mfn_t *l4, *l3, *l2;
     int i4, i3, i2;
+    struct page_list_head to_free;    
+    struct page_info *pg, *tmp;
 
     if ( !mfn_valid(d->arch.paging.log_dirty.top) )
         return;
 
+    INIT_PAGE_LIST_HEAD(&to_free);
+
+    log_dirty_lock(d);
+
     l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
 
     for ( i4 = 0; i4 < LOGDIRTY_NODE_ENTRIES; i4++ )
@@ -178,22 +184,28 @@ void paging_free_log_dirty_bitmap(struct
 
             for ( i2 = 0; i2 < LOGDIRTY_NODE_ENTRIES; i2++ )
                 if ( mfn_valid(l2[i2]) )
-                    paging_free_log_dirty_page(d, l2[i2]);
+                    page_list_add_tail(mfn_to_page(l2[i2]), &to_free);
 
             unmap_domain_page(l2);
-            paging_free_log_dirty_page(d, l3[i3]);
+            page_list_add_tail(mfn_to_page(l3[i3]), &to_free);
         }
 
         unmap_domain_page(l3);
-        paging_free_log_dirty_page(d, l4[i4]);
+        page_list_add_tail(mfn_to_page(l4[i4]), &to_free);
     }
 
     unmap_domain_page(l4);
-    paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
+    page_list_add_tail(mfn_to_page(d->arch.paging.log_dirty.top), &to_free);
 
     d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
     ASSERT(d->arch.paging.log_dirty.allocs == 0);
     d->arch.paging.log_dirty.failed_allocs = 0;
+
+    log_dirty_unlock(d);
+    
+    /* Return the memory now that we're not holding the log-dirty lock */
+    page_list_for_each_safe(pg, tmp, &to_free)
+        paging_free_log_dirty_page(d, page_to_mfn(pg));
 }
 
 int paging_log_dirty_enable(struct domain *d)
@@ -217,10 +229,8 @@ int paging_log_dirty_disable(struct doma
     domain_pause(d);
     /* Safe because the domain is paused. */
     ret = d->arch.paging.log_dirty.disable_log_dirty(d);
-    log_dirty_lock(d);
     if ( !paging_mode_log_dirty(d) )
         paging_free_log_dirty_bitmap(d);
-    log_dirty_unlock(d);
     domain_unpause(d);
 
     return ret;
@@ -672,10 +682,9 @@ void paging_log_dirty_init(struct domain
 /* This function fress log dirty bitmap resources. */
 static void paging_log_dirty_teardown(struct domain*d)
 {
-    log_dirty_lock(d);
     paging_free_log_dirty_bitmap(d);
-    log_dirty_unlock(d);
 }
+
 /************************************************/
 /*           CODE FOR PAGING SUPPORT            */
 /************************************************/

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 13 of 17] x86/mm: dedup the various copies of the shadow lock functions
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (11 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 12 of 17] x86/mm/p2m: Fix locking discipline around log-dirty teardown Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 14 of 17] x86/mm: Make MM locks recursive Tim Deegan
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID d6518e8670ab15d5a9ec49b500ecf6e67442d3a8
# Parent  51f28a7cbb3d1e42e3592e5dc45041742a100fea
x86/mm: dedup the various copies of the shadow lock functions

Define the lock and unlock functions once, and list all the locks in one
place so (a) it's obvious what the locking discipline is and (b) none of
the locks are visible to non-mm code.  Automatically enforce that these
locks never get taken in the wrong order.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Thu Jun 02 13:16:52 2011 +0100
@@ -1215,7 +1215,6 @@ void ept_sync_domain(struct domain *d)
         return;
 
     ASSERT(local_irq_is_enabled());
-    ASSERT(p2m_locked_by_me(p2m_get_hostp2m(d)));
 
     /*
      * Flush active cpus synchronously. Flush others the next time this domain
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/hap.c	Thu Jun 02 13:16:52 2011 +0100
@@ -572,7 +572,7 @@ static void hap_destroy_monitor_table(st
 /************************************************/
 void hap_domain_init(struct domain *d)
 {
-    hap_lock_init(d);
+    mm_lock_init(&d->arch.paging.hap.lock);
     INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
 }
 
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/hap/private.h
--- a/xen/arch/x86/mm/hap/private.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/private.h	Thu Jun 02 13:16:52 2011 +0100
@@ -20,6 +20,8 @@
 #ifndef __HAP_PRIVATE_H__
 #define __HAP_PRIVATE_H__
 
+#include "../mm-locks.h"
+
 /********************************************/
 /*          GUEST TRANSLATION FUNCS         */
 /********************************************/
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/mem_sharing.c
--- a/xen/arch/x86/mm/mem_sharing.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/mem_sharing.c	Thu Jun 02 13:16:52 2011 +0100
@@ -32,6 +32,8 @@
 #include <asm/mem_event.h>
 #include <asm/atomic.h>
 
+#include "mm-locks.h"
+
 /* Auditing of memory sharing code? */
 #define MEM_SHARING_AUDIT  0
 
@@ -74,13 +76,7 @@ typedef struct gfn_info
     struct list_head list;
 } gfn_info_t;
 
-typedef struct shr_lock
-{
-    spinlock_t  lock;            /* mem sharing lock */
-    int         locker;          /* processor which holds the lock */
-    const char *locker_function; /* func that took it */
-} shr_lock_t;
-static shr_lock_t shr_lock;
+static mm_lock_t shr_lock;
 
 /* Returns true if list has only one entry. O(1) complexity. */
 static inline int list_has_one_entry(struct list_head *head)
@@ -93,43 +89,11 @@ static inline struct gfn_info* gfn_get_i
     return list_entry(list->next, struct gfn_info, list);
 }
 
-#define shr_lock_init(_i)                      \
-    do {                                       \
-        spin_lock_init(&shr_lock.lock);        \
-        shr_lock.locker = -1;                  \
-        shr_lock.locker_function = "nobody";   \
-    } while (0)
-
-#define shr_locked_by_me(_i)                   \
-    (current->processor == shr_lock.locker)
-
-#define shr_lock(_i)                                           \
-    do {                                                       \
-        if ( unlikely(shr_lock.locker == current->processor) ) \
-        {                                                      \
-            printk("Error: shr lock held by %s\n",             \
-                   shr_lock.locker_function);                  \
-            BUG();                                             \
-        }                                                      \
-        spin_lock(&shr_lock.lock);                             \
-        ASSERT(shr_lock.locker == -1);                         \
-        shr_lock.locker = current->processor;                  \
-        shr_lock.locker_function = __func__;                   \
-    } while (0)
-
-#define shr_unlock(_i)                                    \
-    do {                                                  \
-        ASSERT(shr_lock.locker == current->processor);    \
-        shr_lock.locker = -1;                             \
-        shr_lock.locker_function = "nobody";              \
-        spin_unlock(&shr_lock.lock);                      \
-    } while (0)
-
 static void __init mem_sharing_hash_init(void)
 {
     int i;
 
-    shr_lock_init();
+    mm_lock_init(&shr_lock);
     for(i=0; i<SHR_HASH_LENGTH; i++)
         shr_hash[i] = NULL;
 }
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/mm-locks.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/mm/mm-locks.h	Thu Jun 02 13:16:52 2011 +0100
@@ -0,0 +1,161 @@
+/******************************************************************************
+ * arch/x86/mm/mm-locks.h
+ *
+ * Spinlocks used by the code in arch/x86/mm.
+ *
+ * Copyright (c) 2011 Citrix Systems, inc. 
+ * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Copyright (c) 2006-2007 XenSource Inc.
+ * Copyright (c) 2006 Michael A Fetterman
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _MM_LOCKS_H
+#define _MM_LOCKS_H
+
+/* Per-CPU variable for enforcing the lock ordering */
+DECLARE_PER_CPU(int, mm_lock_level);
+
+static inline void mm_lock_init(mm_lock_t *l)
+{
+    spin_lock_init(&l->lock);
+    l->locker = -1;
+    l->locker_function = "nobody";
+    l->unlock_level = 0;
+}
+
+static inline void _mm_lock(mm_lock_t *l, const char *func, int level)
+{
+    if ( unlikely(l->locker == current->processor) )
+        panic("mm lock held by %s\n", l->locker_function);
+    /* If you see this crash, the numbers printed are lines in this file 
+     * where the offending locks are declared. */
+    if ( unlikely(this_cpu(mm_lock_level) >= level) )
+        panic("mm locking order violation: %i >= %i\n", 
+              this_cpu(mm_lock_level), level);
+    spin_lock(&l->lock);
+    ASSERT(l->locker == -1);
+    l->locker = current->processor;
+    l->locker_function = func;
+    l->unlock_level = this_cpu(mm_lock_level);
+    this_cpu(mm_lock_level) = level;
+}
+/* This wrapper uses the line number to express the locking order below */
+#define declare_mm_lock(name)                                             \
+  static inline void mm_lock_##name(mm_lock_t *l, const char *func)       \
+  { _mm_lock(l, func, __LINE__); }
+/* This one captures the name of the calling function */
+#define mm_lock(name, l) mm_lock_##name(l, __func__)
+
+static inline void mm_unlock(mm_lock_t *l)
+{
+    ASSERT(l->locker == current->processor);
+    l->locker = -1;
+    l->locker_function = "nobody";
+    this_cpu(mm_lock_level) = l->unlock_level;
+    l->unlock_level = -1;
+    spin_unlock(&l->lock);
+}
+
+static inline int mm_locked_by_me(mm_lock_t *l) 
+{
+    return (current->processor == l->locker);
+}
+
+/************************************************************************
+ *                                                                      *
+ * To avoid deadlocks, these locks _MUST_ be taken in the order they're *
+ * declared in this file.  The locking functions will enforce this.     *
+ *                                                                      *
+ ************************************************************************/
+
+/* Page-sharing lock (global) 
+ *
+ * A single global lock that protects the memory-sharing code's
+ * hash tables. */
+
+declare_mm_lock(shr)
+#define shr_lock()         mm_lock(shr, &shr_lock)
+#define shr_unlock()       mm_unlock(&shr_lock)
+#define shr_locked_by_me() mm_locked_by_me(&shr_lock)
+
+/* Nested P2M lock (per-domain)
+ *
+ * A per-domain lock that protects some of the nested p2m datastructures.
+ * TODO: find out exactly what needs to be covered by this lock */
+
+declare_mm_lock(nestedp2m)
+#define nestedp2m_lock(d)   mm_lock(nestedp2m, &(d)->arch.nested_p2m_lock)
+#define nestedp2m_unlock(d) mm_unlock(&(d)->arch.nested_p2m_lock)
+
+/* P2M lock (per-p2m-table)
+ * 
+ * This protects all updates to the p2m table.  Updates are expected to
+ * be safe against concurrent reads, which do *not* require the lock. */
+
+declare_mm_lock(p2m)
+#define p2m_lock(p)         mm_lock(p2m, &(p)->lock)
+#define p2m_unlock(p)       mm_unlock(&(p)->lock)
+#define p2m_locked_by_me(p) mm_locked_by_me(&(p)->lock)
+
+/* Shadow lock (per-domain)
+ *
+ * This lock is intended to allow us to make atomic updates to the
+ * software TLB that the shadow pagetables provide.
+ *
+ * Specifically, it protects:
+ *   - all changes to shadow page table pages
+ *   - the shadow hash table
+ *   - the shadow page allocator 
+ *   - all changes to guest page table pages
+ *   - all changes to the page_info->tlbflush_timestamp
+ *   - the page_info->count fields on shadow pages */
+
+declare_mm_lock(shadow)
+#define shadow_lock(d)         mm_lock(shadow, &(d)->arch.paging.shadow.lock)
+#define shadow_unlock(d)       mm_unlock(&(d)->arch.paging.shadow.lock)
+#define shadow_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.shadow.lock)
+
+/* HAP lock (per-domain)
+ * 
+ * Equivalent of the shadow lock for HAP.  Protects updates to the
+ * NPT and EPT tables, and the HAP page allocator. */
+
+declare_mm_lock(hap)
+#define hap_lock(d)         mm_lock(hap, &(d)->arch.paging.hap.lock)
+#define hap_unlock(d)       mm_unlock(&(d)->arch.paging.hap.lock)
+#define hap_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.hap.lock)
+
+/* Log-dirty lock (per-domain) 
+ * 
+ * Protects the log-dirty bitmap from concurrent accesses (and teardowns, etc).
+ *
+ * Because mark_dirty is called from a lot of places, the log-dirty lock
+ * may be acquired with the shadow or HAP locks already held.  When the
+ * log-dirty code makes callbacks into HAP or shadow code to reset
+ * various traps that will trigger the mark_dirty calls, it must *not*
+ * have the log-dirty lock held, or it risks deadlock.  Because the only
+ * purpose of those calls is to make sure that *guest* actions will
+ * cause mark_dirty to be called (hypervisor actions explictly call it
+ * anyway), it is safe to release the log-dirty lock before the callback
+ * as long as the domain is paused for the entire operation. */
+
+declare_mm_lock(log_dirty)
+#define log_dirty_lock(d) mm_lock(log_dirty, &(d)->arch.paging.log_dirty.lock)
+#define log_dirty_unlock(d) mm_unlock(&(d)->arch.paging.log_dirty.lock)
+
+
+#endif /* _MM_LOCKS_H */
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/p2m-ept.c
--- a/xen/arch/x86/mm/p2m-ept.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m-ept.c	Thu Jun 02 13:16:52 2011 +0100
@@ -32,6 +32,8 @@
 #include <xen/keyhandler.h>
 #include <xen/softirq.h>
 
+#include "mm-locks.h"
+
 #define atomic_read_ept_entry(__pepte)                              \
     ( (ept_entry_t) { .epte = atomic_read64(&(__pepte)->epte) } )
 #define atomic_write_ept_entry(__pepte, __epte)                     \
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/p2m-pod.c
--- a/xen/arch/x86/mm/p2m-pod.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m-pod.c	Thu Jun 02 13:16:52 2011 +0100
@@ -32,7 +32,8 @@
 #include <xen/event.h>
 #include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
- 
+
+#include "mm-locks.h"
 
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
@@ -375,7 +376,7 @@ p2m_pod_empty_cache(struct domain *d)
 
     /* After this barrier no new PoD activities can happen. */
     BUG_ON(!d->is_dying);
-    spin_barrier(&p2m->lock);
+    spin_barrier(&p2m->lock.lock);
 
     spin_lock(&d->page_alloc_lock);
 
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:52 2011 +0100
@@ -38,6 +38,8 @@
 #include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
 
+#include "mm-locks.h"
+
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m.c	Thu Jun 02 13:16:52 2011 +0100
@@ -37,6 +37,8 @@
 #include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/svm/amd-iommu-proto.h>
 
+#include "mm-locks.h"
+
 /* turn on/off 1GB host page table support for hap, default on */
 static bool_t __read_mostly opt_hap_1gb = 1;
 boolean_param("hap_1gb", opt_hap_1gb);
@@ -70,7 +72,7 @@ boolean_param("hap_2mb", opt_hap_2mb);
 static void p2m_initialise(struct domain *d, struct p2m_domain *p2m)
 {
     memset(p2m, 0, sizeof(*p2m));
-    p2m_lock_init(p2m);
+    mm_lock_init(&p2m->lock);
     INIT_PAGE_LIST_HEAD(&p2m->pages);
     INIT_PAGE_LIST_HEAD(&p2m->pod.super);
     INIT_PAGE_LIST_HEAD(&p2m->pod.single);
@@ -95,7 +97,7 @@ p2m_init_nestedp2m(struct domain *d)
     uint8_t i;
     struct p2m_domain *p2m;
 
-    nestedp2m_lock_init(d);
+    mm_lock_init(&d->arch.nested_p2m_lock);
     for (i = 0; i < MAX_NESTEDP2M; i++) {
         d->arch.nested_p2m[i] = p2m = xmalloc(struct p2m_domain);
         if (p2m == NULL)
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/paging.c	Thu Jun 02 13:16:52 2011 +0100
@@ -30,6 +30,8 @@
 #include <xen/numa.h>
 #include <xsm/xsm.h>
 
+#include "mm-locks.h"
+
 /* Printouts */
 #define PAGING_PRINTK(_f, _a...)                                     \
     debugtrace_printk("pg: %s(): " _f, __func__, ##_a)
@@ -41,9 +43,9 @@
             debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
     } while (0)
 
-/************************************************/
-/*              LOG DIRTY SUPPORT               */
-/************************************************/
+/* Per-CPU variable for enforcing the lock ordering */
+DEFINE_PER_CPU(int, mm_lock_level);
+
 /* Override macros from asm/page.h to make them work with mfn_t */
 #undef mfn_to_page
 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
@@ -52,49 +54,9 @@
 #undef page_to_mfn
 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
 
-/* The log-dirty lock.  This protects the log-dirty bitmap from
- * concurrent accesses (and teardowns, etc).
- *
- * Locking discipline: always acquire shadow or HAP lock before this one.
- *
- * Because mark_dirty is called from a lot of places, the log-dirty lock
- * may be acquired with the shadow or HAP locks already held.  When the
- * log-dirty code makes callbacks into HAP or shadow code to reset
- * various traps that will trigger the mark_dirty calls, it must *not*
- * have the log-dirty lock held, or it risks deadlock.  Because the only
- * purpose of those calls is to make sure that *guest* actions will
- * cause mark_dirty to be called (hypervisor actions explictly call it
- * anyway), it is safe to release the log-dirty lock before the callback
- * as long as the domain is paused for the entire operation. */
-
-#define log_dirty_lock_init(_d)                                   \
-    do {                                                          \
-        spin_lock_init(&(_d)->arch.paging.log_dirty.lock);        \
-        (_d)->arch.paging.log_dirty.locker = -1;                  \
-        (_d)->arch.paging.log_dirty.locker_function = "nobody";   \
-    } while (0)
-
-#define log_dirty_lock(_d)                                                   \
-    do {                                                                     \
-        if (unlikely((_d)->arch.paging.log_dirty.locker==current->processor))\
-        {                                                                    \
-            printk("Error: paging log dirty lock held by %s\n",              \
-                   (_d)->arch.paging.log_dirty.locker_function);             \
-            BUG();                                                           \
-        }                                                                    \
-        spin_lock(&(_d)->arch.paging.log_dirty.lock);                        \
-        ASSERT((_d)->arch.paging.log_dirty.locker == -1);                    \
-        (_d)->arch.paging.log_dirty.locker = current->processor;             \
-        (_d)->arch.paging.log_dirty.locker_function = __func__;              \
-    } while (0)
-
-#define log_dirty_unlock(_d)                                              \
-    do {                                                                  \
-        ASSERT((_d)->arch.paging.log_dirty.locker == current->processor); \
-        (_d)->arch.paging.log_dirty.locker = -1;                          \
-        (_d)->arch.paging.log_dirty.locker_function = "nobody";           \
-        spin_unlock(&(_d)->arch.paging.log_dirty.lock);                   \
-    } while (0)
+/************************************************/
+/*              LOG DIRTY SUPPORT               */
+/************************************************/
 
 static mfn_t paging_new_log_dirty_page(struct domain *d)
 {
@@ -671,7 +633,7 @@ void paging_log_dirty_init(struct domain
                            void   (*clean_dirty_bitmap)(struct domain *d))
 {
     /* We initialize log dirty lock first */
-    log_dirty_lock_init(d);
+    mm_lock_init(&d->arch.paging.log_dirty.lock);
 
     d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty;
     d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty;
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
@@ -45,7 +45,7 @@ DEFINE_PER_CPU(uint32_t,trace_shadow_pat
  * Called for every domain from arch_domain_create() */
 void shadow_domain_init(struct domain *d, unsigned int domcr_flags)
 {
-    shadow_lock_init(d);
+    mm_lock_init(&d->arch.paging.shadow.lock);
     INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelist);
     INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
 
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c	Thu Jun 02 13:16:52 2011 +0100
@@ -3136,7 +3136,7 @@ static int sh_page_fault(struct vcpu *v,
     if ( unlikely(shadow_locked_by_me(d)) )
     {
         SHADOW_ERROR("Recursive shadow fault: lock was taken by %s\n",
-                     d->arch.paging.shadow.locker_function);
+                     d->arch.paging.shadow.lock.locker_function);
         return 0;
     }
 
diff -r 51f28a7cbb3d -r d6518e8670ab xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/private.h	Thu Jun 02 13:16:52 2011 +0100
@@ -31,6 +31,7 @@
 #include <asm/x86_emulate.h>
 #include <asm/hvm/support.h>
 
+#include "../mm-locks.h"
 
 /******************************************************************************
  * Levels of self-test and paranoia
@@ -128,57 +129,6 @@ enum {
     TRCE_SFLAG_OOS_FIXUP_EVICT,
 };
 
-/******************************************************************************
- * The shadow lock.
- *
- * This lock is per-domain.  It is intended to allow us to make atomic
- * updates to the software TLB that the shadow tables provide.
- * 
- * Specifically, it protects:
- *   - all changes to shadow page table pages
- *   - the shadow hash table
- *   - the shadow page allocator 
- *   - all changes to guest page table pages
- *   - all changes to the page_info->tlbflush_timestamp
- *   - the page_info->count fields on shadow pages
- *   - the shadow dirty bit array and count
- */
-#ifndef CONFIG_SMP
-#error shadow.h currently requires CONFIG_SMP
-#endif
-
-#define shadow_lock_init(_d)                                   \
-    do {                                                       \
-        spin_lock_init(&(_d)->arch.paging.shadow.lock);        \
-        (_d)->arch.paging.shadow.locker = -1;                  \
-        (_d)->arch.paging.shadow.locker_function = "nobody";   \
-    } while (0)
-
-#define shadow_locked_by_me(_d)                     \
-    (current->processor == (_d)->arch.paging.shadow.locker)
-
-#define shadow_lock(_d)                                                       \
-    do {                                                                      \
-        if ( unlikely((_d)->arch.paging.shadow.locker == current->processor) )\
-        {                                                                     \
-            printk("Error: shadow lock held by %s\n",                         \
-                   (_d)->arch.paging.shadow.locker_function);                 \
-            BUG();                                                            \
-        }                                                                     \
-        spin_lock(&(_d)->arch.paging.shadow.lock);                            \
-        ASSERT((_d)->arch.paging.shadow.locker == -1);                        \
-        (_d)->arch.paging.shadow.locker = current->processor;                 \
-        (_d)->arch.paging.shadow.locker_function = __func__;                  \
-    } while (0)
-
-#define shadow_unlock(_d)                                              \
-    do {                                                               \
-        ASSERT((_d)->arch.paging.shadow.locker == current->processor); \
-        (_d)->arch.paging.shadow.locker = -1;                          \
-        (_d)->arch.paging.shadow.locker_function = "nobody";           \
-        spin_unlock(&(_d)->arch.paging.shadow.lock);                   \
-    } while (0)
-
 
 /* Size (in bytes) of a guest PTE */
 #if GUEST_PAGING_LEVELS >= 3
diff -r 51f28a7cbb3d -r d6518e8670ab xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/domain.h	Thu Jun 02 13:16:52 2011 +0100
@@ -91,9 +91,8 @@ void hypercall_page_initialise(struct do
 /*          shadow paging extension             */
 /************************************************/
 struct shadow_domain {
-    spinlock_t        lock;  /* shadow domain lock */
-    int               locker; /* processor which holds the lock */
-    const char       *locker_function; /* Func that took it */
+    mm_lock_t         lock;  /* shadow domain lock */
+
     unsigned int      opt_flags;    /* runtime tunable optimizations on/off */
     struct page_list_head pinned_shadows;
 
@@ -159,9 +158,7 @@ struct shadow_vcpu {
 /*            hardware assisted paging          */
 /************************************************/
 struct hap_domain {
-    spinlock_t        lock;
-    int               locker;
-    const char       *locker_function;
+    mm_lock_t         lock;
 
     struct page_list_head freelist;
     unsigned int      total_pages;  /* number of pages allocated */
@@ -174,9 +171,7 @@ struct hap_domain {
 /************************************************/
 struct log_dirty_domain {
     /* log-dirty lock */
-    spinlock_t     lock;
-    int            locker; /* processor that holds the lock */
-    const char    *locker_function; /* func that took it */
+    mm_lock_t     lock;
 
     /* log-dirty radix tree to record dirty pages */
     mfn_t          top;
@@ -280,9 +275,7 @@ struct arch_domain
 
     /* nestedhvm: translate l2 guest physical to host physical */
     struct p2m_domain *nested_p2m[MAX_NESTEDP2M];
-    spinlock_t nested_p2m_lock;
-    int nested_p2m_locker;
-    const char *nested_p2m_function;
+    mm_lock_t nested_p2m_lock;
 
     /* NB. protected by d->event_lock and by irq_desc[irq].lock */
     struct radix_tree_root irq_pirq;
diff -r 51f28a7cbb3d -r d6518e8670ab xen/include/asm-x86/hap.h
--- a/xen/include/asm-x86/hap.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/hap.h	Thu Jun 02 13:16:52 2011 +0100
@@ -47,41 +47,6 @@ hap_unmap_domain_page(void *p)
 }
 
 /************************************************/
-/*           locking for hap code               */
-/************************************************/
-#define hap_lock_init(_d)                                   \
-    do {                                                    \
-        spin_lock_init(&(_d)->arch.paging.hap.lock);        \
-        (_d)->arch.paging.hap.locker = -1;                  \
-        (_d)->arch.paging.hap.locker_function = "nobody";   \
-    } while (0)
-
-#define hap_locked_by_me(_d)                     \
-    (current->processor == (_d)->arch.paging.hap.locker)
-
-#define hap_lock(_d)                                                       \
-    do {                                                                   \
-        if ( unlikely((_d)->arch.paging.hap.locker == current->processor) )\
-        {                                                                  \
-            printk("Error: hap lock held by %s\n",                         \
-                   (_d)->arch.paging.hap.locker_function);                 \
-            BUG();                                                         \
-        }                                                                  \
-        spin_lock(&(_d)->arch.paging.hap.lock);                            \
-        ASSERT((_d)->arch.paging.hap.locker == -1);                        \
-        (_d)->arch.paging.hap.locker = current->processor;                 \
-        (_d)->arch.paging.hap.locker_function = __func__;                  \
-    } while (0)
-
-#define hap_unlock(_d)                                              \
-    do {                                                            \
-        ASSERT((_d)->arch.paging.hap.locker == current->processor); \
-        (_d)->arch.paging.hap.locker = -1;                          \
-        (_d)->arch.paging.hap.locker_function = "nobody";           \
-        spin_unlock(&(_d)->arch.paging.hap.lock);                   \
-    } while (0)
-
-/************************************************/
 /*        hap domain level functions            */
 /************************************************/
 void  hap_domain_init(struct domain *d);
diff -r 51f28a7cbb3d -r d6518e8670ab xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/mm.h	Thu Jun 02 13:16:52 2011 +0100
@@ -4,6 +4,7 @@
 
 #include <xen/config.h>
 #include <xen/list.h>
+#include <xen/spinlock.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
 
@@ -597,4 +598,12 @@ unsigned long domain_get_maximum_gpfn(st
 
 extern struct domain *dom_xen, *dom_io, *dom_cow;	/* for vmcoreinfo */
 
+/* Definition of an mm lock: spinlock with extra fields for debugging */
+typedef struct mm_lock {
+    spinlock_t         lock; 
+    int                unlock_level;
+    int                locker;          /* processor which holds the lock */
+    const char        *locker_function; /* func that took it */
+} mm_lock_t;
+
 #endif /* __ASM_X86_MM_H__ */
diff -r 51f28a7cbb3d -r d6518e8670ab xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
@@ -189,9 +189,7 @@ typedef enum {
 /* Per-p2m-table state */
 struct p2m_domain {
     /* Lock that protects updates to the p2m */
-    spinlock_t         lock;
-    int                locker;   /* processor which holds the lock */
-    const char        *locker_function; /* Func that took it */
+    mm_lock_t          lock;
 
     /* Shadow translated domain: p2m mapping */
     pagetable_t        phys_table;
@@ -285,80 +283,6 @@ struct p2m_domain *p2m_get_p2m(struct vc
 #define p2m_get_pagetable(p2m)  ((p2m)->phys_table)
 
 
-/*
- * The P2M lock.  This protects all updates to the p2m table.
- * Updates are expected to be safe against concurrent reads,
- * which do *not* require the lock.
- *
- * Locking discipline: always acquire this lock before the shadow or HAP one
- */
-
-#define p2m_lock_init(_p2m)                     \
-    do {                                        \
-        spin_lock_init(&(_p2m)->lock);          \
-        (_p2m)->locker = -1;                    \
-        (_p2m)->locker_function = "nobody";     \
-    } while (0)
-
-#define p2m_lock(_p2m)                                          \
-    do {                                                        \
-        if ( unlikely((_p2m)->locker == current->processor) )   \
-        {                                                       \
-            printk("Error: p2m lock held by %s\n",              \
-                   (_p2m)->locker_function);                    \
-            BUG();                                              \
-        }                                                       \
-        spin_lock(&(_p2m)->lock);                               \
-        ASSERT((_p2m)->locker == -1);                           \
-        (_p2m)->locker = current->processor;                    \
-        (_p2m)->locker_function = __func__;                     \
-    } while (0)
-
-#define p2m_unlock(_p2m)                                \
-    do {                                                \
-        ASSERT((_p2m)->locker == current->processor);   \
-        (_p2m)->locker = -1;                            \
-        (_p2m)->locker_function = "nobody";             \
-        spin_unlock(&(_p2m)->lock);                     \
-    } while (0)
-
-#define p2m_locked_by_me(_p2m)                            \
-    (current->processor == (_p2m)->locker)
-
-
-#define nestedp2m_lock_init(_domain)                                  \
-    do {                                                              \
-        spin_lock_init(&(_domain)->arch.nested_p2m_lock);             \
-        (_domain)->arch.nested_p2m_locker = -1;                       \
-        (_domain)->arch.nested_p2m_function = "nobody";               \
-    } while (0)
-
-#define nestedp2m_locked_by_me(_domain)                \
-    (current->processor == (_domain)->arch.nested_p2m_locker)
-
-#define nestedp2m_lock(_domain)                                       \
-    do {                                                              \
-        if ( nestedp2m_locked_by_me(_domain) )                        \
-        {                                                             \
-            printk("Error: p2m lock held by %s\n",                    \
-                   (_domain)->arch.nested_p2m_function);              \
-            BUG();                                                    \
-        }                                                             \
-        spin_lock(&(_domain)->arch.nested_p2m_lock);                  \
-        ASSERT((_domain)->arch.nested_p2m_locker == -1);              \
-        (_domain)->arch.nested_p2m_locker = current->processor;       \
-        (_domain)->arch.nested_p2m_function = __func__;               \
-    } while (0)
-
-#define nestedp2m_unlock(_domain)                                      \
-    do {                                                               \
-        ASSERT(nestedp2m_locked_by_me(_domain));                       \
-        (_domain)->arch.nested_p2m_locker = -1;                        \
-        (_domain)->arch.nested_p2m_function = "nobody";                \
-        spin_unlock(&(_domain)->arch.nested_p2m_lock);                 \
-    } while (0)
-
-
 /* Read a particular P2M table, mapping pages as we go.  Most callers
  * should _not_ call this directly; use the other gfn_to_mfn_* functions
  * below unless you know you want to walk a p2m that isn't a domain's

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 14 of 17] x86/mm: Make MM locks recursive
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (12 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 13 of 17] x86/mm: dedup the various copies of the shadow lock functions Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 15 of 17] x86/mm: merge the shadow, hap and log-dirty locks into a single paging lock Tim Deegan
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 64398d14dcd6e720ac6908ee5ae284b03832e8bc
# Parent  d6518e8670ab15d5a9ec49b500ecf6e67442d3a8
x86/mm: Make MM locks recursive.

This replaces a lot of open coded 'if (!locked) {lock()}' instances
by making the mm locks recursive locks, but only allowing them
to be taken recursively in the places that they were before.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r d6518e8670ab -r 64398d14dcd6 xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/hap.c	Thu Jun 02 13:16:52 2011 +0100
@@ -277,13 +277,10 @@ static void hap_free(struct domain *d, m
 static struct page_info *hap_alloc_p2m_page(struct domain *d)
 {
     struct page_info *pg;
-    int do_locking;
 
     /* This is called both from the p2m code (which never holds the 
      * hap lock) and the log-dirty code (which sometimes does). */
-    do_locking = !hap_locked_by_me(d);
-    if ( do_locking )
-        hap_lock(d);
+    hap_lock_recursive(d);
     pg = hap_alloc(d);
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -314,20 +311,15 @@ static struct page_info *hap_alloc_p2m_p
         pg->count_info |= 1;
     }
 
-    if ( do_locking )
-        hap_unlock(d);
+    hap_unlock(d);
     return pg;
 }
 
 static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
 {
-    int do_locking;
-
     /* This is called both from the p2m code (which never holds the 
      * hap lock) and the log-dirty code (which sometimes does). */
-    do_locking = !hap_locked_by_me(d);
-    if ( do_locking )
-        hap_lock(d);
+    hap_lock_recursive(d);
 
     ASSERT(page_get_owner(pg) == d);
     /* Should have just the one ref we gave it in alloc_p2m_page() */
@@ -345,8 +337,7 @@ static void hap_free_p2m_page(struct dom
     hap_free(d, page_to_mfn(pg));
     ASSERT(d->arch.paging.hap.p2m_pages >= 0);
 
-    if ( do_locking )
-        hap_unlock(d);
+    hap_unlock(d);
 }
 
 /* Return the size of the pool, rounded up to the nearest MB */
diff -r d6518e8670ab -r 64398d14dcd6 xen/arch/x86/mm/mm-locks.h
--- a/xen/arch/x86/mm/mm-locks.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/mm-locks.h	Thu Jun 02 13:16:52 2011 +0100
@@ -37,42 +37,44 @@ static inline void mm_lock_init(mm_lock_
     l->unlock_level = 0;
 }
 
-static inline void _mm_lock(mm_lock_t *l, const char *func, int level)
+static inline int mm_locked_by_me(mm_lock_t *l) 
 {
-    if ( unlikely(l->locker == current->processor) )
-        panic("mm lock held by %s\n", l->locker_function);
+    return (l->lock.recurse_cpu == current->processor);
+}
+
+static inline void _mm_lock(mm_lock_t *l, const char *func, int level, int rec)
+{
     /* If you see this crash, the numbers printed are lines in this file 
      * where the offending locks are declared. */
-    if ( unlikely(this_cpu(mm_lock_level) >= level) )
-        panic("mm locking order violation: %i >= %i\n", 
+    if ( unlikely(this_cpu(mm_lock_level) > level) )
+        panic("mm locking order violation: %i > %i\n", 
               this_cpu(mm_lock_level), level);
-    spin_lock(&l->lock);
-    ASSERT(l->locker == -1);
-    l->locker = current->processor;
-    l->locker_function = func;
-    l->unlock_level = this_cpu(mm_lock_level);
+    spin_lock_recursive(&l->lock);
+    if ( l->lock.recurse_cnt == 1 )
+    {
+        l->locker_function = func;
+        l->unlock_level = this_cpu(mm_lock_level);
+    }
+    else if ( (unlikely(!rec)) )
+        panic("mm lock already held by %s\n", l->locker_function);
     this_cpu(mm_lock_level) = level;
 }
 /* This wrapper uses the line number to express the locking order below */
-#define declare_mm_lock(name)                                             \
-  static inline void mm_lock_##name(mm_lock_t *l, const char *func)       \
-  { _mm_lock(l, func, __LINE__); }
-/* This one captures the name of the calling function */
-#define mm_lock(name, l) mm_lock_##name(l, __func__)
+#define declare_mm_lock(name)                                                 \
+    static inline void mm_lock_##name(mm_lock_t *l, const char *func, int rec)\
+    { _mm_lock(l, func, __LINE__, rec); }
+/* These capture the name of the calling function */
+#define mm_lock(name, l) mm_lock_##name(l, __func__, 0)
+#define mm_lock_recursive(name, l) mm_lock_##name(l, __func__, 1)
 
 static inline void mm_unlock(mm_lock_t *l)
 {
-    ASSERT(l->locker == current->processor);
-    l->locker = -1;
-    l->locker_function = "nobody";
-    this_cpu(mm_lock_level) = l->unlock_level;
-    l->unlock_level = -1;
-    spin_unlock(&l->lock);
-}
-
-static inline int mm_locked_by_me(mm_lock_t *l) 
-{
-    return (current->processor == l->locker);
+    if ( l->lock.recurse_cnt == 1 )
+    {
+        l->locker_function = "nobody";
+        this_cpu(mm_lock_level) = l->unlock_level;
+    }
+    spin_unlock_recursive(&l->lock);
 }
 
 /************************************************************************
@@ -107,9 +109,10 @@ declare_mm_lock(nestedp2m)
  * be safe against concurrent reads, which do *not* require the lock. */
 
 declare_mm_lock(p2m)
-#define p2m_lock(p)         mm_lock(p2m, &(p)->lock)
-#define p2m_unlock(p)       mm_unlock(&(p)->lock)
-#define p2m_locked_by_me(p) mm_locked_by_me(&(p)->lock)
+#define p2m_lock(p)           mm_lock(p2m, &(p)->lock)
+#define p2m_lock_recursive(p) mm_lock_recursive(p2m, &(p)->lock)
+#define p2m_unlock(p)         mm_unlock(&(p)->lock)
+#define p2m_locked_by_me(p)   mm_locked_by_me(&(p)->lock)
 
 /* Shadow lock (per-domain)
  *
@@ -126,6 +129,8 @@ declare_mm_lock(p2m)
 
 declare_mm_lock(shadow)
 #define shadow_lock(d)         mm_lock(shadow, &(d)->arch.paging.shadow.lock)
+#define shadow_lock_recursive(d) \
+                     mm_lock_recursive(shadow, &(d)->arch.paging.shadow.lock)
 #define shadow_unlock(d)       mm_unlock(&(d)->arch.paging.shadow.lock)
 #define shadow_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.shadow.lock)
 
@@ -136,6 +141,8 @@ declare_mm_lock(shadow)
 
 declare_mm_lock(hap)
 #define hap_lock(d)         mm_lock(hap, &(d)->arch.paging.hap.lock)
+#define hap_lock_recursive(d) \
+                  mm_lock_recursive(hap, &(d)->arch.paging.hap.lock)
 #define hap_unlock(d)       mm_unlock(&(d)->arch.paging.hap.lock)
 #define hap_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.hap.lock)
 
diff -r d6518e8670ab -r 64398d14dcd6 xen/arch/x86/mm/p2m-ept.c
--- a/xen/arch/x86/mm/p2m-ept.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m-ept.c	Thu Jun 02 13:16:52 2011 +0100
@@ -47,26 +47,22 @@ static int ept_pod_check_and_populate(st
                                       ept_entry_t *entry, int order,
                                       p2m_query_t q)
 {
-    /* Only take the lock if we don't already have it.  Otherwise it
-     * wouldn't be safe to do p2m lookups with the p2m lock held */
-    int do_locking = !p2m_locked_by_me(p2m);
     int r;
 
-    if ( do_locking )
-        p2m_lock(p2m);
+    /* This is called from the p2m lookups, which can happen with or 
+     * without the lock hed. */
+    p2m_lock_recursive(p2m);
 
     /* Check to make sure this is still PoD */
     if ( entry->sa_p2mt != p2m_populate_on_demand )
     {
-        if ( do_locking )
-            p2m_unlock(p2m);
+        p2m_unlock(p2m);
         return 0;
     }
 
     r = p2m_pod_demand_populate(p2m, gfn, order, q);
 
-    if ( do_locking )
-        p2m_unlock(p2m);
+    p2m_unlock(p2m);
 
     return r;
 }
diff -r d6518e8670ab -r 64398d14dcd6 xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/p2m-pt.c	Thu Jun 02 13:16:52 2011 +0100
@@ -478,29 +478,24 @@ static int p2m_pod_check_and_populate(st
                                       l1_pgentry_t *p2m_entry, int order,
                                       p2m_query_t q)
 {
-    /* Only take the lock if we don't already have it.  Otherwise it
-     * wouldn't be safe to do p2m lookups with the p2m lock held */
-    int do_locking = !p2m_locked_by_me(p2m);
     int r;
 
-    if ( do_locking )
-        p2m_lock(p2m);
-
+    /* This is called from the p2m lookups, which can happen with or 
+     * without the lock hed. */
+    p2m_lock_recursive(p2m);
     audit_p2m(p2m, 1);
 
     /* Check to make sure this is still PoD */
     if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != p2m_populate_on_demand )
     {
-        if ( do_locking )
-            p2m_unlock(p2m);
+        p2m_unlock(p2m);
         return 0;
     }
 
     r = p2m_pod_demand_populate(p2m, gfn, order, q);
 
     audit_p2m(p2m, 1);
-    if ( do_locking )
-        p2m_unlock(p2m);
+    p2m_unlock(p2m);
 
     return r;
 }
diff -r d6518e8670ab -r 64398d14dcd6 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
@@ -906,7 +906,7 @@ static int sh_skip_sync(struct vcpu *v, 
  * will be made safe (TLB flush semantics); pages unsynced by this vcpu
  * are brought back into sync and write-protected.  If skip != 0, we try
  * to avoid resyncing at all if we think we can get away with it. */
-void sh_resync_all(struct vcpu *v, int skip, int this, int others, int do_locking)
+void sh_resync_all(struct vcpu *v, int skip, int this, int others)
 {
     int idx;
     struct vcpu *other;
@@ -916,14 +916,11 @@ void sh_resync_all(struct vcpu *v, int s
 
     SHADOW_PRINTK("d=%d, v=%d\n", v->domain->domain_id, v->vcpu_id);
 
-    ASSERT(do_locking || shadow_locked_by_me(v->domain));
+    ASSERT(shadow_locked_by_me(v->domain));
 
     if ( !this )
         goto resync_others;
 
-    if ( do_locking )
-        shadow_lock(v->domain);
-
     /* First: resync all of this vcpu's oos pages */
     for ( idx = 0; idx < SHADOW_OOS_PAGES; idx++ ) 
         if ( mfn_valid(oos[idx]) )
@@ -933,9 +930,6 @@ void sh_resync_all(struct vcpu *v, int s
             oos[idx] = _mfn(INVALID_MFN);
         }
 
-    if ( do_locking )
-        shadow_unlock(v->domain);
-
  resync_others:
     if ( !others )
         return;
@@ -946,9 +940,6 @@ void sh_resync_all(struct vcpu *v, int s
         if ( v == other ) 
             continue;
 
-        if ( do_locking )
-            shadow_lock(v->domain);
-
         oos = other->arch.paging.shadow.oos;
         oos_fixup = other->arch.paging.shadow.oos_fixup;
         oos_snapshot = other->arch.paging.shadow.oos_snapshot;
@@ -972,10 +963,7 @@ void sh_resync_all(struct vcpu *v, int s
                 _sh_resync(other, oos[idx], &oos_fixup[idx], oos_snapshot[idx]);
                 oos[idx] = _mfn(INVALID_MFN);
             }
-        }
-        
-        if ( do_locking )
-            shadow_unlock(v->domain);
+        }        
     }
 }
 
@@ -1623,19 +1611,15 @@ static struct page_info *
 shadow_alloc_p2m_page(struct domain *d)
 {
     struct page_info *pg;
-    int do_locking;
 
     /* This is called both from the p2m code (which never holds the 
      * shadow lock) and the log-dirty code (which sometimes does). */
-    do_locking = !shadow_locked_by_me(d);
-    if ( do_locking )
-        shadow_lock(d);
+    shadow_lock_recursive(d);
 
     if ( d->arch.paging.shadow.total_pages 
          < shadow_min_acceptable_pages(d) + 1 )
     {
-        if ( do_locking )
-            shadow_unlock(d);
+        shadow_unlock(d);
         return NULL;
     }
  
@@ -1644,8 +1628,7 @@ shadow_alloc_p2m_page(struct domain *d)
     d->arch.paging.shadow.p2m_pages++;
     d->arch.paging.shadow.total_pages--;
 
-    if ( do_locking )
-        shadow_unlock(d);
+    shadow_unlock(d);
 
     /* Unlike shadow pages, mark p2m pages as owned by the domain.
      * Marking the domain as the owner would normally allow the guest to
@@ -1660,8 +1643,6 @@ shadow_alloc_p2m_page(struct domain *d)
 static void
 shadow_free_p2m_page(struct domain *d, struct page_info *pg)
 {
-    int do_locking;
-
     ASSERT(page_get_owner(pg) == d);
     /* Should have just the one ref we gave it in alloc_p2m_page() */
     if ( (pg->count_info & PGC_count_mask) != 1 )
@@ -1675,16 +1656,13 @@ shadow_free_p2m_page(struct domain *d, s
 
     /* This is called both from the p2m code (which never holds the 
      * shadow lock) and the log-dirty code (which sometimes does). */
-    do_locking = !shadow_locked_by_me(d);
-    if ( do_locking )
-        shadow_lock(d);
+    shadow_lock_recursive(d);
 
     shadow_free(d, page_to_mfn(pg));
     d->arch.paging.shadow.p2m_pages--;
     d->arch.paging.shadow.total_pages++;
 
-    if ( do_locking )
-        shadow_unlock(d);
+    shadow_unlock(d);
 }
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -2489,7 +2467,7 @@ int sh_remove_write_access_from_sl1p(str
 int sh_remove_all_mappings(struct vcpu *v, mfn_t gmfn)
 {
     struct page_info *page = mfn_to_page(gmfn);
-    int expected_count, do_locking;
+    int expected_count;
 
     /* Dispatch table for getting per-type functions */
     static const hash_callback_t callbacks[SH_type_unused] = {
@@ -2531,10 +2509,8 @@ int sh_remove_all_mappings(struct vcpu *
 
     /* Although this is an externally visible function, we do not know
      * whether the shadow lock will be held when it is called (since it
-     * can be called via put_page_type when we clear a shadow l1e).
-     * If the lock isn't held, take it for the duration of the call. */
-    do_locking = !shadow_locked_by_me(v->domain);
-    if ( do_locking ) shadow_lock(v->domain);
+     * can be called via put_page_type when we clear a shadow l1e).*/
+    shadow_lock_recursive(v->domain);
 
     /* XXX TODO: 
      * Heuristics for finding the (probably) single mapping of this gmfn */
@@ -2560,7 +2536,7 @@ int sh_remove_all_mappings(struct vcpu *
         }
     }
 
-    if ( do_locking ) shadow_unlock(v->domain);
+    shadow_unlock(v->domain);
 
     /* We killed at least one mapping, so must flush TLBs. */
     return 1;
@@ -2638,7 +2614,6 @@ void sh_remove_shadows(struct vcpu *v, m
 {
     struct page_info *pg = mfn_to_page(gmfn);
     mfn_t smfn;
-    int do_locking;
     unsigned char t;
     
     /* Dispatch table for getting per-type functions: each level must
@@ -2696,10 +2671,8 @@ void sh_remove_shadows(struct vcpu *v, m
 
     /* Although this is an externally visible function, we do not know
      * whether the shadow lock will be held when it is called (since it
-     * can be called via put_page_type when we clear a shadow l1e).
-     * If the lock isn't held, take it for the duration of the call. */
-    do_locking = !shadow_locked_by_me(v->domain);
-    if ( do_locking ) shadow_lock(v->domain);
+     * can be called via put_page_type when we clear a shadow l1e).*/
+    shadow_lock_recursive(v->domain);
 
     SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
                    v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
@@ -2707,7 +2680,7 @@ void sh_remove_shadows(struct vcpu *v, m
     /* Bail out now if the page is not shadowed */
     if ( (pg->count_info & PGC_page_table) == 0 )
     {
-        if ( do_locking ) shadow_unlock(v->domain);
+        shadow_unlock(v->domain);
         return;
     }
 
@@ -2769,7 +2742,7 @@ void sh_remove_shadows(struct vcpu *v, m
      * take a fault. */
     flush_tlb_mask(v->domain->domain_dirty_cpumask);
 
-    if ( do_locking ) shadow_unlock(v->domain);
+    shadow_unlock(v->domain);
 }
 
 static void
@@ -2907,7 +2880,7 @@ static void sh_update_paging_modes(struc
         /* Need to resync all our pages now, because if a page goes out
          * of sync with paging enabled and is resynced with paging
          * disabled, the resync will go wrong. */
-        shadow_resync_all(v, 0);
+        shadow_resync_all(v);
 #endif /* OOS */
 
         if ( !hvm_paging_enabled(v) )
@@ -3181,8 +3154,7 @@ void shadow_teardown(struct domain *d)
     ASSERT(d->is_dying);
     ASSERT(d != current->domain);
 
-    if ( !shadow_locked_by_me(d) )
-        shadow_lock(d); /* Keep various asserts happy */
+    shadow_lock(d);
 
     if ( shadow_mode_enabled(d) )
     {
diff -r d6518e8670ab -r 64398d14dcd6 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c	Thu Jun 02 13:16:52 2011 +0100
@@ -1979,7 +1979,7 @@ static shadow_l1e_t * shadow_get_and_cre
     /* All pages walked are now pagetables. Safe to resync pages
        in case level 4 or 3 shadows were set. */
     if ( resync )
-        shadow_resync_all(v, 0);
+        shadow_resync_all(v);
 #endif
 
     /* Now follow it down a level.  Guaranteed to succeed. */
@@ -2273,7 +2273,7 @@ static int validate_gl4e(struct vcpu *v,
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
         if ( mfn_valid(sl3mfn) )
-            shadow_resync_all(v, 0);
+            shadow_resync_all(v);
 #endif
     }
     l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch);
@@ -2330,7 +2330,7 @@ static int validate_gl3e(struct vcpu *v,
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
         if ( mfn_valid(sl2mfn) )
-            shadow_resync_all(v, 0);
+            shadow_resync_all(v);
 #endif
     }
     l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch);
@@ -4172,15 +4172,15 @@ sh_update_cr3(struct vcpu *v, int do_loc
         return;
     }
 
+    if ( do_locking ) shadow_lock(v->domain);
+
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     /* Need to resync all the shadow entries on a TLB flush.  Resync
      * current vcpus OOS pages before switching to the new shadow
      * tables so that the VA hint is still valid.  */
-    shadow_resync_current_vcpu(v, do_locking);
+    shadow_resync_current_vcpu(v);
 #endif
 
-    if ( do_locking ) shadow_lock(v->domain);
-
     ASSERT(shadow_locked_by_me(v->domain));
     ASSERT(v->arch.paging.mode);
 
@@ -4406,17 +4406,16 @@ sh_update_cr3(struct vcpu *v, int do_loc
     v->arch.paging.last_write_emul_ok = 0;
 #endif
 
-    /* Release the lock, if we took it (otherwise it's the caller's problem) */
-    if ( do_locking ) shadow_unlock(v->domain);
-
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     /* Need to resync all the shadow entries on a TLB flush. We only
      * update the shadows, leaving the pages out of sync. Also, we try
      * to skip synchronization of shadows not mapped in the new
      * tables. */
-    shadow_sync_other_vcpus(v, do_locking);
+    shadow_sync_other_vcpus(v);
 #endif
 
+    /* Release the lock, if we took it (otherwise it's the caller's problem) */
+    if ( do_locking ) shadow_unlock(v->domain);
 }
 
 
diff -r d6518e8670ab -r 64398d14dcd6 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/private.h	Thu Jun 02 13:16:52 2011 +0100
@@ -388,36 +388,24 @@ int sh_remove_write_access_from_sl1p(str
 /* Pull all out-of-sync shadows back into sync.  If skip != 0, we try
  * to avoid resyncing where we think we can get away with it. */
 
-void sh_resync_all(struct vcpu *v, int skip, int this, int others, int do_locking);
+void sh_resync_all(struct vcpu *v, int skip, int this, int others);
 
 static inline void
-shadow_resync_all(struct vcpu *v, int do_locking)
+shadow_resync_all(struct vcpu *v)
 {
-    sh_resync_all(v,
-                  0 /* skip */,
-                  1 /* this */,
-                  1 /* others */,
-                  do_locking);
+    sh_resync_all(v, 0 /* skip */, 1 /* this */, 1 /* others */);
 }
 
 static inline void
-shadow_resync_current_vcpu(struct vcpu *v, int do_locking)
+shadow_resync_current_vcpu(struct vcpu *v)
 {
-    sh_resync_all(v,
-                  0 /* skip */,
-                  1 /* this */, 
-                  0 /* others */,
-                  do_locking);
+    sh_resync_all(v, 0 /* skip */, 1 /* this */, 0 /* others */);
 }
 
 static inline void
-shadow_sync_other_vcpus(struct vcpu *v, int do_locking)
+shadow_sync_other_vcpus(struct vcpu *v)
 {
-    sh_resync_all(v,
-                  1 /* skip */, 
-                  0 /* this */,
-                  1 /* others */,
-                  do_locking);
+    sh_resync_all(v, 1 /* skip */, 0 /* this */, 1 /* others */);
 }
 
 void oos_audit_hash_is_present(struct domain *d, mfn_t gmfn);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 15 of 17] x86/mm: merge the shadow, hap and log-dirty locks into a single paging lock
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (13 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 14 of 17] x86/mm: Make MM locks recursive Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 16 of 17] x86/mm: simplify log-dirty page allocation Tim Deegan
  2011-06-02 12:20 ` [PATCH 17 of 17] x86/mm/shadow: emulated writes are always guest-originated actions Tim Deegan
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 2bbed46eb10ce80e920506714f7e328193a23b52
# Parent  64398d14dcd6e720ac6908ee5ae284b03832e8bc
x86/mm: merge the shadow, hap and log-dirty locks into a single paging lock.

This will allow us to simplify the locking around calls between
hap/shadow and log-dirty code.  Many log-dirty paths already need the
shadow or HAP lock so it shouldn't increase contention that much.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 64398d14dcd6 -r 2bbed46eb10c xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/hap.c	Thu Jun 02 13:16:52 2011 +0100
@@ -65,9 +65,9 @@ static int hap_enable_vram_tracking(stru
         return -EINVAL;
 
     /* turn on PG_log_dirty bit in paging mode */
-    hap_lock(d);
+    paging_lock(d);
     d->arch.paging.mode |= PG_log_dirty;
-    hap_unlock(d);
+    paging_unlock(d);
 
     /* set l1e entries of P2M table to be read-only. */
     for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++)
@@ -85,9 +85,9 @@ static int hap_disable_vram_tracking(str
     if ( !dirty_vram )
         return -EINVAL;
 
-    hap_lock(d);
+    paging_lock(d);
     d->arch.paging.mode &= ~PG_log_dirty;
-    hap_unlock(d);
+    paging_unlock(d);
 
     /* set l1e entries of P2M table with normal mode */
     for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++)
@@ -196,9 +196,9 @@ param_fail:
 static int hap_enable_log_dirty(struct domain *d)
 {
     /* turn on PG_log_dirty bit in paging mode */
-    hap_lock(d);
+    paging_lock(d);
     d->arch.paging.mode |= PG_log_dirty;
-    hap_unlock(d);
+    paging_unlock(d);
 
     /* set l1e entries of P2M table to be read-only. */
     p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
@@ -208,9 +208,9 @@ static int hap_enable_log_dirty(struct d
 
 static int hap_disable_log_dirty(struct domain *d)
 {
-    hap_lock(d);
+    paging_lock(d);
     d->arch.paging.mode &= ~PG_log_dirty;
-    hap_unlock(d);
+    paging_unlock(d);
 
     /* set l1e entries of P2M table with normal mode */
     p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
@@ -248,7 +248,7 @@ static struct page_info *hap_alloc(struc
     struct page_info *pg = NULL;
     void *p;
 
-    ASSERT(hap_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
 
     pg = page_list_remove_head(&d->arch.paging.hap.freelist);
     if ( unlikely(!pg) )
@@ -268,7 +268,7 @@ static void hap_free(struct domain *d, m
 {
     struct page_info *pg = mfn_to_page(mfn);
 
-    ASSERT(hap_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
 
     d->arch.paging.hap.free_pages++;
     page_list_add_tail(pg, &d->arch.paging.hap.freelist);
@@ -279,8 +279,8 @@ static struct page_info *hap_alloc_p2m_p
     struct page_info *pg;
 
     /* This is called both from the p2m code (which never holds the 
-     * hap lock) and the log-dirty code (which sometimes does). */
-    hap_lock_recursive(d);
+     * paging lock) and the log-dirty code (which sometimes does). */
+    paging_lock_recursive(d);
     pg = hap_alloc(d);
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -311,15 +311,15 @@ static struct page_info *hap_alloc_p2m_p
         pg->count_info |= 1;
     }
 
-    hap_unlock(d);
+    paging_unlock(d);
     return pg;
 }
 
 static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
 {
     /* This is called both from the p2m code (which never holds the 
-     * hap lock) and the log-dirty code (which sometimes does). */
-    hap_lock_recursive(d);
+     * paging lock) and the log-dirty code (which sometimes does). */
+    paging_lock_recursive(d);
 
     ASSERT(page_get_owner(pg) == d);
     /* Should have just the one ref we gave it in alloc_p2m_page() */
@@ -337,7 +337,7 @@ static void hap_free_p2m_page(struct dom
     hap_free(d, page_to_mfn(pg));
     ASSERT(d->arch.paging.hap.p2m_pages >= 0);
 
-    hap_unlock(d);
+    paging_unlock(d);
 }
 
 /* Return the size of the pool, rounded up to the nearest MB */
@@ -358,7 +358,7 @@ hap_set_allocation(struct domain *d, uns
 {
     struct page_info *pg;
 
-    ASSERT(hap_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
 
     if ( pages < d->arch.paging.hap.p2m_pages )
         pages = 0;
@@ -563,7 +563,6 @@ static void hap_destroy_monitor_table(st
 /************************************************/
 void hap_domain_init(struct domain *d)
 {
-    mm_lock_init(&d->arch.paging.hap.lock);
     INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
 }
 
@@ -587,9 +586,9 @@ int hap_enable(struct domain *d, u32 mod
     if ( old_pages == 0 )
     {
         unsigned int r;
-        hap_lock(d);
+        paging_lock(d);
         r = hap_set_allocation(d, 256, NULL);
-        hap_unlock(d);
+        paging_unlock(d);
         if ( r != 0 )
         {
             hap_set_allocation(d, 0, NULL);
@@ -638,10 +637,10 @@ void hap_final_teardown(struct domain *d
 
     p2m_teardown(p2m_get_hostp2m(d));
     /* Free any memory that the p2m teardown released */
-    hap_lock(d);
+    paging_lock(d);
     hap_set_allocation(d, 0, NULL);
     ASSERT(d->arch.paging.hap.p2m_pages == 0);
-    hap_unlock(d);
+    paging_unlock(d);
 }
 
 void hap_teardown(struct domain *d)
@@ -652,8 +651,8 @@ void hap_teardown(struct domain *d)
     ASSERT(d->is_dying);
     ASSERT(d != current->domain);
 
-    if ( !hap_locked_by_me(d) )
-        hap_lock(d); /* Keep various asserts happy */
+    if ( !paging_locked_by_me(d) )
+        paging_lock(d); /* Keep various asserts happy */
 
     if ( paging_mode_enabled(d) )
     {
@@ -689,7 +688,7 @@ void hap_teardown(struct domain *d)
 
     d->arch.paging.mode &= ~PG_log_dirty;
 
-    hap_unlock(d);
+    paging_unlock(d);
 }
 
 int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
@@ -700,9 +699,9 @@ int hap_domctl(struct domain *d, xen_dom
     switch ( sc->op )
     {
     case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
-        hap_lock(d);
+        paging_lock(d);
         rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
-        hap_unlock(d);
+        paging_unlock(d);
         if ( preempted )
             /* Not finished.  Set up to re-run the call. */
             rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
@@ -789,7 +788,7 @@ static void hap_update_paging_modes(stru
 {
     struct domain *d = v->domain;
 
-    hap_lock(d);
+    paging_lock(d);
 
     v->arch.paging.mode = hap_paging_get_mode(v);
 
@@ -804,7 +803,7 @@ static void hap_update_paging_modes(stru
     /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
     hap_update_cr3(v, 0);
 
-    hap_unlock(d);
+    paging_unlock(d);
 }
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -861,7 +860,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi
      * a hypercall which passes a domain and chooses mostly the first
      * vcpu. */
 
-    hap_lock(d);
+    paging_lock(d);
     old_flags = l1e_get_flags(*p);
 
     if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT) ) {
@@ -886,7 +885,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi
         p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p);
 #endif
 
-    hap_unlock(d);
+    paging_unlock(d);
 
     if ( flush_nestedp2m )
         p2m_flush_nestedp2m(d);
diff -r 64398d14dcd6 -r 2bbed46eb10c xen/arch/x86/mm/hap/nested_hap.c
--- a/xen/arch/x86/mm/hap/nested_hap.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/nested_hap.c	Thu Jun 02 13:16:52 2011 +0100
@@ -82,14 +82,14 @@ nestedp2m_write_p2m_entry(struct p2m_dom
     struct domain *d = p2m->domain;
     uint32_t old_flags;
 
-    hap_lock(d);
+    paging_lock(d);
 
     old_flags = l1e_get_flags(*p);
     safe_write_pte(p, new);
     if (old_flags & _PAGE_PRESENT)
         nestedhvm_vmcx_flushtlb(p2m);
     
-    hap_unlock(d);
+    paging_unlock(d);
 }
 
 /********************************************/
diff -r 64398d14dcd6 -r 2bbed46eb10c xen/arch/x86/mm/mm-locks.h
--- a/xen/arch/x86/mm/mm-locks.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/mm-locks.h	Thu Jun 02 13:16:52 2011 +0100
@@ -114,55 +114,26 @@ declare_mm_lock(p2m)
 #define p2m_unlock(p)         mm_unlock(&(p)->lock)
 #define p2m_locked_by_me(p)   mm_locked_by_me(&(p)->lock)
 
-/* Shadow lock (per-domain)
+/* Paging lock (per-domain)
  *
- * This lock is intended to allow us to make atomic updates to the
- * software TLB that the shadow pagetables provide.
- *
- * Specifically, it protects:
+ * For shadow pagetables, this lock protects
  *   - all changes to shadow page table pages
  *   - the shadow hash table
  *   - the shadow page allocator 
  *   - all changes to guest page table pages
  *   - all changes to the page_info->tlbflush_timestamp
- *   - the page_info->count fields on shadow pages */
+ *   - the page_info->count fields on shadow pages 
+ * 
+ * For HAP, it protects the NPT/EPT tables and mode changes. 
+ * 
+ * It also protects the log-dirty bitmap from concurrent accesses (and
+ * teardowns, etc). */
 
-declare_mm_lock(shadow)
-#define shadow_lock(d)         mm_lock(shadow, &(d)->arch.paging.shadow.lock)
-#define shadow_lock_recursive(d) \
-                     mm_lock_recursive(shadow, &(d)->arch.paging.shadow.lock)
-#define shadow_unlock(d)       mm_unlock(&(d)->arch.paging.shadow.lock)
-#define shadow_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.shadow.lock)
-
-/* HAP lock (per-domain)
- * 
- * Equivalent of the shadow lock for HAP.  Protects updates to the
- * NPT and EPT tables, and the HAP page allocator. */
-
-declare_mm_lock(hap)
-#define hap_lock(d)         mm_lock(hap, &(d)->arch.paging.hap.lock)
-#define hap_lock_recursive(d) \
-                  mm_lock_recursive(hap, &(d)->arch.paging.hap.lock)
-#define hap_unlock(d)       mm_unlock(&(d)->arch.paging.hap.lock)
-#define hap_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.hap.lock)
-
-/* Log-dirty lock (per-domain) 
- * 
- * Protects the log-dirty bitmap from concurrent accesses (and teardowns, etc).
- *
- * Because mark_dirty is called from a lot of places, the log-dirty lock
- * may be acquired with the shadow or HAP locks already held.  When the
- * log-dirty code makes callbacks into HAP or shadow code to reset
- * various traps that will trigger the mark_dirty calls, it must *not*
- * have the log-dirty lock held, or it risks deadlock.  Because the only
- * purpose of those calls is to make sure that *guest* actions will
- * cause mark_dirty to be called (hypervisor actions explictly call it
- * anyway), it is safe to release the log-dirty lock before the callback
- * as long as the domain is paused for the entire operation. */
-
-declare_mm_lock(log_dirty)
-#define log_dirty_lock(d) mm_lock(log_dirty, &(d)->arch.paging.log_dirty.lock)
-#define log_dirty_unlock(d) mm_unlock(&(d)->arch.paging.log_dirty.lock)
-
+declare_mm_lock(paging)
+#define paging_lock(d)         mm_lock(paging, &(d)->arch.paging.lock)
+#define paging_lock_recursive(d) \
+                    mm_lock_recursive(paging, &(d)->arch.paging.lock)
+#define paging_unlock(d)       mm_unlock(&(d)->arch.paging.lock)
+#define paging_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.lock)
 
 #endif /* _MM_LOCKS_H */
diff -r 64398d14dcd6 -r 2bbed46eb10c xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/paging.c	Thu Jun 02 13:16:52 2011 +0100
@@ -126,7 +126,7 @@ void paging_free_log_dirty_bitmap(struct
 
     INIT_PAGE_LIST_HEAD(&to_free);
 
-    log_dirty_lock(d);
+    paging_lock(d);
 
     l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
 
@@ -163,7 +163,7 @@ void paging_free_log_dirty_bitmap(struct
     ASSERT(d->arch.paging.log_dirty.allocs == 0);
     d->arch.paging.log_dirty.failed_allocs = 0;
 
-    log_dirty_unlock(d);
+    paging_unlock(d);
     
     /* Return the memory now that we're not holding the log-dirty lock */
     page_list_for_each_safe(pg, tmp, &to_free)
@@ -239,7 +239,8 @@ void paging_mark_dirty(struct domain *d,
     new_mfn = _mfn(INVALID_MFN);
 
 again:
-    log_dirty_lock(d);
+    /* Recursive: this is called from inside the shadow code */
+    paging_lock_recursive(d);
 
     l4 = paging_map_log_dirty_bitmap(d);
     if ( unlikely(!l4) )
@@ -300,13 +301,13 @@ again:
         d->arch.paging.log_dirty.dirty_count++;
     }
 
-    log_dirty_unlock(d);
+    paging_unlock(d);
     if ( mfn_valid(new_mfn) )
         paging_free_log_dirty_page(d, new_mfn);
     return;
 
 oom:
-    log_dirty_unlock(d);
+    paging_unlock(d);
     new_mfn = paging_new_log_dirty_page(d);
     if ( !mfn_valid(new_mfn) )
         /* we've already recorded the failed allocation */
@@ -323,7 +324,8 @@ int paging_mfn_is_dirty(struct domain *d
     unsigned long *l1;
     int rv = 0;
 
-    log_dirty_lock(d);
+    /* Recursive: this is called from inside the shadow code */
+    paging_lock_recursive(d);
     ASSERT(paging_mode_log_dirty(d));
 
     /* We /really/ mean PFN here, even for non-translated guests. */
@@ -359,7 +361,7 @@ int paging_mfn_is_dirty(struct domain *d
     unmap_domain_page(l1);
 
 out:
-    log_dirty_unlock(d);
+    paging_unlock(d);
     return rv;
 }
 
@@ -375,7 +377,7 @@ int paging_log_dirty_op(struct domain *d
     int i4, i3, i2;
 
     domain_pause(d);
-    log_dirty_lock(d);
+    paging_lock(d);
 
     clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
 
@@ -456,7 +458,7 @@ int paging_log_dirty_op(struct domain *d
     if ( pages < sc->pages )
         sc->pages = pages;
 
-    log_dirty_unlock(d);
+    paging_unlock(d);
 
     if ( clean )
     {
@@ -468,7 +470,7 @@ int paging_log_dirty_op(struct domain *d
     return rv;
 
  out:
-    log_dirty_unlock(d);
+    paging_unlock(d);
     domain_unpause(d);
     return rv;
 }
@@ -486,7 +488,7 @@ int paging_log_dirty_range(struct domain
     int i2, i3, i4;
 
     d->arch.paging.log_dirty.clean_dirty_bitmap(d);
-    log_dirty_lock(d);
+    paging_lock(d);
 
     PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
                  d->domain_id,
@@ -611,12 +613,12 @@ int paging_log_dirty_range(struct domain
     if ( l4 )
         unmap_domain_page(l4);
 
-    log_dirty_unlock(d);
+    paging_unlock(d);
 
     return rv;
 
  out:
-    log_dirty_unlock(d);
+    paging_unlock(d);
     return rv;
 }
 
@@ -632,9 +634,6 @@ void paging_log_dirty_init(struct domain
                            int    (*disable_log_dirty)(struct domain *d),
                            void   (*clean_dirty_bitmap)(struct domain *d))
 {
-    /* We initialize log dirty lock first */
-    mm_lock_init(&d->arch.paging.log_dirty.lock);
-
     d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty;
     d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty;
     d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
@@ -658,6 +657,8 @@ int paging_domain_init(struct domain *d,
     if ( (rc = p2m_init(d)) != 0 )
         return rc;
 
+    mm_lock_init(&d->arch.paging.lock);
+
     /* The order of the *_init calls below is important, as the later
      * ones may rewrite some common fields.  Shadow pagetables are the
      * default... */
diff -r 64398d14dcd6 -r 2bbed46eb10c xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
@@ -45,7 +45,6 @@ DEFINE_PER_CPU(uint32_t,trace_shadow_pat
  * Called for every domain from arch_domain_create() */
 void shadow_domain_init(struct domain *d, unsigned int domcr_flags)
 {
-    mm_lock_init(&d->arch.paging.shadow.lock);
     INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelist);
     INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
 
@@ -469,7 +468,7 @@ void shadow_continue_emulation(struct sh
  * 
  * 2. All shadow operations on a guest page require the page to be brought
  *    back into sync before proceeding.  This must be done under the
- *    shadow lock so that the page is guaranteed to remain synced until
+ *    paging lock so that the page is guaranteed to remain synced until
  *    the operation completes.
  *
  *    Exceptions to this rule: the pagefault and invlpg handlers may 
@@ -478,7 +477,7 @@ void shadow_continue_emulation(struct sh
  * 3. Operations on shadows that do not start from a guest page need to
  *    be aware that they may be handling an out-of-sync shadow.
  *
- * 4. Operations that do not normally take the shadow lock (fast-path 
+ * 4. Operations that do not normally take the paging lock (fast-path 
  *    #PF handler, INVLPG) must fall back to a locking, syncing version 
  *    if they see an out-of-sync table. 
  *
@@ -725,7 +724,7 @@ static void _sh_resync(struct vcpu *v, m
 {
     struct page_info *pg = mfn_to_page(gmfn);
 
-    ASSERT(shadow_locked_by_me(v->domain));
+    ASSERT(paging_locked_by_me(v->domain));
     ASSERT(mfn_is_out_of_sync(gmfn));
     /* Guest page must be shadowed *only* as L1 when out of sync. */
     ASSERT(!(mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask 
@@ -916,7 +915,7 @@ void sh_resync_all(struct vcpu *v, int s
 
     SHADOW_PRINTK("d=%d, v=%d\n", v->domain->domain_id, v->vcpu_id);
 
-    ASSERT(shadow_locked_by_me(v->domain));
+    ASSERT(paging_locked_by_me(v->domain));
 
     if ( !this )
         goto resync_others;
@@ -973,7 +972,7 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn
 {
     struct page_info *pg;
     
-    ASSERT(shadow_locked_by_me(v->domain));
+    ASSERT(paging_locked_by_me(v->domain));
 
     SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
                   v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
@@ -1137,7 +1136,7 @@ sh_validate_guest_pt_write(struct vcpu *
     struct domain *d = v->domain;
     int rc;
 
-    ASSERT(shadow_locked_by_me(v->domain));
+    ASSERT(paging_locked_by_me(v->domain));
     rc = sh_validate_guest_entry(v, gmfn, entry, size);
     if ( rc & SHADOW_SET_FLUSH )
         /* Need to flush TLBs to pick up shadow PT changes */
@@ -1159,11 +1158,11 @@ int shadow_write_guest_entry(struct vcpu
  * appropriately.  Returns 0 if we page-faulted, 1 for success. */
 {
     int failed;
-    shadow_lock(v->domain);
+    paging_lock(v->domain);
     failed = __copy_to_user(p, &new, sizeof(new));
     if ( failed != sizeof(new) )
         sh_validate_guest_entry(v, gmfn, p, sizeof(new));
-    shadow_unlock(v->domain);
+    paging_unlock(v->domain);
     return (failed == 0);
 }
 
@@ -1176,12 +1175,12 @@ int shadow_cmpxchg_guest_entry(struct vc
 {
     int failed;
     intpte_t t = *old;
-    shadow_lock(v->domain);
+    paging_lock(v->domain);
     failed = cmpxchg_user(p, t, new);
     if ( t == *old )
         sh_validate_guest_entry(v, gmfn, p, sizeof(new));
     *old = t;
-    shadow_unlock(v->domain);
+    paging_unlock(v->domain);
     return (failed == 0);
 }
 
@@ -1416,9 +1415,9 @@ static void shadow_blow_tables(struct do
 void shadow_blow_tables_per_domain(struct domain *d)
 {
     if ( shadow_mode_enabled(d) && d->vcpu != NULL && d->vcpu[0] != NULL ) {
-        shadow_lock(d);
+        paging_lock(d);
         shadow_blow_tables(d);
-        shadow_unlock(d);
+        paging_unlock(d);
     }
 }
 
@@ -1435,9 +1434,9 @@ static void shadow_blow_all_tables(unsig
     {
         if ( shadow_mode_enabled(d) && d->vcpu != NULL && d->vcpu[0] != NULL )
         {
-            shadow_lock(d);
+            paging_lock(d);
             shadow_blow_tables(d);
-            shadow_unlock(d);
+            paging_unlock(d);
         }
     }
     rcu_read_unlock(&domlist_read_lock);
@@ -1484,7 +1483,7 @@ mfn_t shadow_alloc(struct domain *d,
     void *p;
     int i;
 
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
     ASSERT(shadow_type != SH_type_none);
     perfc_incr(shadow_alloc);
 
@@ -1560,7 +1559,7 @@ void shadow_free(struct domain *d, mfn_t
     u32 shadow_type;
     int i;
 
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
     perfc_incr(shadow_free);
 
     shadow_type = sp->u.sh.type;
@@ -1613,13 +1612,13 @@ shadow_alloc_p2m_page(struct domain *d)
     struct page_info *pg;
 
     /* This is called both from the p2m code (which never holds the 
-     * shadow lock) and the log-dirty code (which sometimes does). */
-    shadow_lock_recursive(d);
+     * paging lock) and the log-dirty code (which sometimes does). */
+    paging_lock_recursive(d);
 
     if ( d->arch.paging.shadow.total_pages 
          < shadow_min_acceptable_pages(d) + 1 )
     {
-        shadow_unlock(d);
+        paging_unlock(d);
         return NULL;
     }
  
@@ -1628,7 +1627,7 @@ shadow_alloc_p2m_page(struct domain *d)
     d->arch.paging.shadow.p2m_pages++;
     d->arch.paging.shadow.total_pages--;
 
-    shadow_unlock(d);
+    paging_unlock(d);
 
     /* Unlike shadow pages, mark p2m pages as owned by the domain.
      * Marking the domain as the owner would normally allow the guest to
@@ -1655,14 +1654,14 @@ shadow_free_p2m_page(struct domain *d, s
     page_set_owner(pg, NULL); 
 
     /* This is called both from the p2m code (which never holds the 
-     * shadow lock) and the log-dirty code (which sometimes does). */
-    shadow_lock_recursive(d);
+     * paging lock) and the log-dirty code (which sometimes does). */
+    paging_lock_recursive(d);
 
     shadow_free(d, page_to_mfn(pg));
     d->arch.paging.shadow.p2m_pages--;
     d->arch.paging.shadow.total_pages++;
 
-    shadow_unlock(d);
+    paging_unlock(d);
 }
 
 #if CONFIG_PAGING_LEVELS == 3
@@ -1721,7 +1720,7 @@ static unsigned int sh_set_allocation(st
     struct page_info *sp;
     unsigned int lower_bound;
 
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
 
     if ( pages > 0 )
     {
@@ -1920,7 +1919,7 @@ static int shadow_hash_alloc(struct doma
 {
     struct page_info **table;
 
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
     ASSERT(!d->arch.paging.shadow.hash_table);
 
     table = xmalloc_array(struct page_info *, SHADOW_HASH_BUCKETS);
@@ -1935,7 +1934,7 @@ static int shadow_hash_alloc(struct doma
  * This function does not care whether the table is populated. */
 static void shadow_hash_teardown(struct domain *d)
 {
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
     ASSERT(d->arch.paging.shadow.hash_table);
 
     xfree(d->arch.paging.shadow.hash_table);
@@ -1951,7 +1950,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
     struct page_info *sp, *prev;
     key_t key;
 
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
     ASSERT(d->arch.paging.shadow.hash_table);
     ASSERT(t);
 
@@ -2005,7 +2004,7 @@ void shadow_hash_insert(struct vcpu *v, 
     struct page_info *sp;
     key_t key;
     
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
     ASSERT(d->arch.paging.shadow.hash_table);
     ASSERT(t);
 
@@ -2031,7 +2030,7 @@ void shadow_hash_delete(struct vcpu *v, 
     struct page_info *sp, *x;
     key_t key;
 
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
     ASSERT(d->arch.paging.shadow.hash_table);
     ASSERT(t);
 
@@ -2085,7 +2084,7 @@ static void hash_foreach(struct vcpu *v,
     struct domain *d = v->domain;
     struct page_info *x;
 
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
 
     /* Can be called via p2m code &c after shadow teardown. */
     if ( unlikely(!d->arch.paging.shadow.hash_table) )
@@ -2242,7 +2241,7 @@ int sh_remove_write_access(struct vcpu *
         ;
     struct page_info *pg = mfn_to_page(gmfn);
 
-    ASSERT(shadow_locked_by_me(v->domain));
+    ASSERT(paging_locked_by_me(v->domain));
 
     /* Only remove writable mappings if we are doing shadow refcounts.
      * In guest refcounting, we trust Xen to already be restricting
@@ -2508,9 +2507,9 @@ int sh_remove_all_mappings(struct vcpu *
         return 0;
 
     /* Although this is an externally visible function, we do not know
-     * whether the shadow lock will be held when it is called (since it
+     * whether the paging lock will be held when it is called (since it
      * can be called via put_page_type when we clear a shadow l1e).*/
-    shadow_lock_recursive(v->domain);
+    paging_lock_recursive(v->domain);
 
     /* XXX TODO: 
      * Heuristics for finding the (probably) single mapping of this gmfn */
@@ -2536,7 +2535,7 @@ int sh_remove_all_mappings(struct vcpu *
         }
     }
 
-    shadow_unlock(v->domain);
+    paging_unlock(v->domain);
 
     /* We killed at least one mapping, so must flush TLBs. */
     return 1;
@@ -2670,9 +2669,9 @@ void sh_remove_shadows(struct vcpu *v, m
     ASSERT(mfn_valid(gmfn));
 
     /* Although this is an externally visible function, we do not know
-     * whether the shadow lock will be held when it is called (since it
+     * whether the paging lock will be held when it is called (since it
      * can be called via put_page_type when we clear a shadow l1e).*/
-    shadow_lock_recursive(v->domain);
+    paging_lock_recursive(v->domain);
 
     SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
                    v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
@@ -2680,7 +2679,7 @@ void sh_remove_shadows(struct vcpu *v, m
     /* Bail out now if the page is not shadowed */
     if ( (pg->count_info & PGC_page_table) == 0 )
     {
-        shadow_unlock(v->domain);
+        paging_unlock(v->domain);
         return;
     }
 
@@ -2742,7 +2741,7 @@ void sh_remove_shadows(struct vcpu *v, m
      * take a fault. */
     flush_tlb_mask(v->domain->domain_dirty_cpumask);
 
-    shadow_unlock(v->domain);
+    paging_unlock(v->domain);
 }
 
 static void
@@ -2811,7 +2810,7 @@ static void sh_update_paging_modes(struc
     struct domain *d = v->domain;
     const struct paging_mode *old_mode = v->arch.paging.mode;
 
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) 
     /* Make sure this vcpu has a virtual TLB array allocated */
@@ -3004,9 +3003,9 @@ static void sh_update_paging_modes(struc
 
 void shadow_update_paging_modes(struct vcpu *v)
 {
-    shadow_lock(v->domain);
+    paging_lock(v->domain);
     sh_update_paging_modes(v);
-    shadow_unlock(v->domain);
+    paging_unlock(v->domain);
 }
 
 /**************************************************************************/
@@ -3017,7 +3016,7 @@ static void sh_new_mode(struct domain *d
 {
     struct vcpu *v;
 
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
     ASSERT(d != current->domain);
 
     d->arch.paging.mode = new_mode;
@@ -3056,7 +3055,7 @@ int shadow_enable(struct domain *d, u32 
     if ( old_pages == 0 )
     {
         unsigned int r;
-        shadow_lock(d);                
+        paging_lock(d);                
         r = sh_set_allocation(d, 1024, NULL); /* Use at least 4MB */
         if ( r != 0 )
         {
@@ -3064,14 +3063,14 @@ int shadow_enable(struct domain *d, u32 
             rv = -ENOMEM;
             goto out_locked;
         }        
-        shadow_unlock(d);
+        paging_unlock(d);
     }
 
     /* Allow p2m and log-dirty code to borrow shadow memory */
     d->arch.paging.alloc_page = shadow_alloc_p2m_page;
     d->arch.paging.free_page = shadow_free_p2m_page;
 
-    /* Init the P2M table.  Must be done before we take the shadow lock 
+    /* Init the P2M table.  Must be done before we take the paging lock 
      * to avoid possible deadlock. */
     if ( mode & PG_translate )
     {
@@ -3103,7 +3102,7 @@ int shadow_enable(struct domain *d, u32 
         pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated;
     }
 
-    shadow_lock(d);
+    paging_lock(d);
 
     /* Sanity check again with the lock held */
     if ( shadow_mode_enabled(d) )
@@ -3133,7 +3132,7 @@ int shadow_enable(struct domain *d, u32 
     sh_new_mode(d, mode);
 
  out_locked:
-    shadow_unlock(d);
+    paging_unlock(d);
  out_unlocked:
     if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
         p2m_teardown(p2m);
@@ -3154,7 +3153,7 @@ void shadow_teardown(struct domain *d)
     ASSERT(d->is_dying);
     ASSERT(d != current->domain);
 
-    shadow_lock(d);
+    paging_lock(d);
 
     if ( shadow_mode_enabled(d) )
     {
@@ -3251,7 +3250,7 @@ void shadow_teardown(struct domain *d)
         d->arch.hvm_domain.dirty_vram = NULL;
     }
 
-    shadow_unlock(d);
+    paging_unlock(d);
 
     /* Must be called outside the lock */
     if ( unpaged_pagetable ) 
@@ -3277,7 +3276,7 @@ void shadow_final_teardown(struct domain
     /* It is now safe to pull down the p2m map. */
     p2m_teardown(p2m_get_hostp2m(d));
     /* Free any shadow memory that the p2m teardown released */
-    shadow_lock(d);
+    paging_lock(d);
     sh_set_allocation(d, 0, NULL);
     SHADOW_PRINTK("dom %u final teardown done."
                    "  Shadow pages total = %u, free = %u, p2m=%u\n",
@@ -3285,13 +3284,13 @@ void shadow_final_teardown(struct domain
                    d->arch.paging.shadow.total_pages, 
                    d->arch.paging.shadow.free_pages, 
                    d->arch.paging.shadow.p2m_pages);
-    shadow_unlock(d);
+    paging_unlock(d);
 }
 
 static int shadow_one_bit_enable(struct domain *d, u32 mode)
 /* Turn on a single shadow mode feature */
 {
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
 
     /* Sanity check the call */
     if ( d == current->domain || (d->arch.paging.mode & mode) == mode )
@@ -3332,7 +3331,7 @@ static int shadow_one_bit_disable(struct
 /* Turn off a single shadow mode feature */
 {
     struct vcpu *v;
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
 
     /* Sanity check the call */
     if ( d == current->domain || !((d->arch.paging.mode & mode) == mode) )
@@ -3397,9 +3396,9 @@ static int shadow_test_enable(struct dom
     int ret;
 
     domain_pause(d);
-    shadow_lock(d);
+    paging_lock(d);
     ret = shadow_one_bit_enable(d, PG_SH_enable);
-    shadow_unlock(d);
+    paging_unlock(d);
     domain_unpause(d);
 
     return ret;
@@ -3410,9 +3409,9 @@ static int shadow_test_disable(struct do
     int ret;
 
     domain_pause(d);
-    shadow_lock(d);
+    paging_lock(d);
     ret = shadow_one_bit_disable(d, PG_SH_enable);
-    shadow_unlock(d);
+    paging_unlock(d);
     domain_unpause(d);
 
     return ret;
@@ -3501,7 +3500,7 @@ shadow_write_p2m_entry(struct vcpu *v, u
 {
     struct domain *d = v->domain;
     
-    shadow_lock(d);
+    paging_lock(d);
 
     /* If there are any shadows, update them.  But if shadow_teardown()
      * has already been called then it's not safe to try. */ 
@@ -3533,7 +3532,7 @@ shadow_write_p2m_entry(struct vcpu *v, u
     }
 #endif
 
-    shadow_unlock(d);
+    paging_unlock(d);
 }
 
 /**************************************************************************/
@@ -3546,8 +3545,7 @@ int shadow_enable_log_dirty(struct domai
 {
     int ret;
 
-    /* shadow lock is required here */
-    shadow_lock(d);
+    paging_lock(d);
     if ( shadow_mode_enabled(d) )
     {
         /* This domain already has some shadows: need to clear them out 
@@ -3565,7 +3563,7 @@ int shadow_enable_log_dirty(struct domai
 #endif
     
     ret = shadow_one_bit_enable(d, PG_log_dirty);
-    shadow_unlock(d);
+    paging_unlock(d);
 
     return ret;
 }
@@ -3575,10 +3573,9 @@ int shadow_disable_log_dirty(struct doma
 {
     int ret;
 
-    /* shadow lock is required here */    
-    shadow_lock(d);
+    paging_lock(d);
     ret = shadow_one_bit_disable(d, PG_log_dirty);
-    shadow_unlock(d);
+    paging_unlock(d);
     
     return ret;
 }
@@ -3588,12 +3585,12 @@ int shadow_disable_log_dirty(struct doma
  */
 void shadow_clean_dirty_bitmap(struct domain *d)
 {
-    shadow_lock(d);
+    paging_lock(d);
     /* Need to revoke write access to the domain's pages again.
      * In future, we'll have a less heavy-handed approach to this,
      * but for now, we just unshadow everything except Xen. */
     shadow_blow_tables(d);
-    shadow_unlock(d);
+    paging_unlock(d);
 }
 
 
@@ -3618,7 +3615,7 @@ int shadow_track_dirty_vram(struct domai
             || end_pfn >= p2m->max_mapped_pfn)
         return -EINVAL;
 
-    shadow_lock(d);
+    paging_lock(d);
 
     if ( dirty_vram && (!nr ||
              ( begin_pfn != dirty_vram->begin_pfn
@@ -3789,7 +3786,7 @@ out_dirty_vram:
     dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
 
 out:
-    shadow_unlock(d);
+    paging_unlock(d);
     return rc;
 }
 
@@ -3824,18 +3821,18 @@ int shadow_domctl(struct domain *d,
         return 0;
 
     case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
-        shadow_lock(d);
+        paging_lock(d);
         if ( sc->mb == 0 && shadow_mode_enabled(d) )
         {            
             /* Can't set the allocation to zero unless the domain stops using
              * shadow pagetables first */
             SHADOW_ERROR("Can't set shadow allocation to zero, domain %u"
                          " is still using shadows.\n", d->domain_id);
-            shadow_unlock(d);
+            paging_unlock(d);
             return -EINVAL;
         }
         rc = sh_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
-        shadow_unlock(d);
+        paging_unlock(d);
         if ( preempted )
             /* Not finished.  Set up to re-run the call. */
             rc = hypercall_create_continuation(
diff -r 64398d14dcd6 -r 2bbed46eb10c xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c	Thu Jun 02 13:16:52 2011 +0100
@@ -203,7 +203,7 @@ shadow_check_gwalk(struct vcpu *v, unsig
 #endif
     int mismatch = 0;
 
-    ASSERT(shadow_locked_by_me(d));
+    ASSERT(paging_locked_by_me(d));
 
     if ( version == atomic_read(&d->arch.paging.shadow.gtable_dirty_version) )
          return 1;
@@ -781,7 +781,7 @@ l1e_propagate_from_guest(struct vcpu *v,
 static inline void safe_write_entry(void *dst, void *src) 
 /* Copy one PTE safely when processors might be running on the
  * destination pagetable.   This does *not* give safety against
- * concurrent writes (that's what the shadow lock is for), just 
+ * concurrent writes (that's what the paging lock is for), just 
  * stops the hardware picking up partially written entries. */
 {
     volatile unsigned long *d = dst;
@@ -3133,17 +3133,17 @@ static int sh_page_fault(struct vcpu *v,
      * do is let Xen's normal fault handlers try to fix it.  In any case, 
      * a diagnostic trace of the fault will be more useful than 
      * a BUG() when we try to take the lock again. */
-    if ( unlikely(shadow_locked_by_me(d)) )
+    if ( unlikely(paging_locked_by_me(d)) )
     {
         SHADOW_ERROR("Recursive shadow fault: lock was taken by %s\n",
-                     d->arch.paging.shadow.lock.locker_function);
+                     d->arch.paging.lock.locker_function);
         return 0;
     }
 
  rewalk:
 
     /* The walk is done in a lock-free style, with some sanity check
-     * postponed after grabbing shadow lock later. Those delayed checks
+     * postponed after grabbing paging lock later. Those delayed checks
      * will make sure no inconsistent mapping being translated into
      * shadow page table. */ 
     version = atomic_read(&d->arch.paging.shadow.gtable_dirty_version);
@@ -3201,7 +3201,7 @@ static int sh_page_fault(struct vcpu *v,
                 regs->error_code | PFEC_page_present);
 #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
 
-    shadow_lock(d);
+    paging_lock(d);
 
     TRACE_CLEAR_PATH_FLAGS;
 
@@ -3235,7 +3235,7 @@ static int sh_page_fault(struct vcpu *v,
     /* Second bit set: Resynced a page. Re-walk needed. */
     if ( rc & GW_RMWR_REWALK )
     {
-        shadow_unlock(d);
+        paging_unlock(d);
         goto rewalk;
     }
 #endif /* OOS */
@@ -3243,7 +3243,7 @@ static int sh_page_fault(struct vcpu *v,
     if ( !shadow_check_gwalk(v, va, &gw, version) )
     {
         perfc_incr(shadow_inconsistent_gwalk);
-        shadow_unlock(d);
+        paging_unlock(d);
         goto rewalk;
     }
 
@@ -3269,7 +3269,7 @@ static int sh_page_fault(struct vcpu *v,
 #else
         ASSERT(d->is_shutting_down);
 #endif
-        shadow_unlock(d);
+        paging_unlock(d);
         trace_shadow_gen(TRC_SHADOW_DOMF_DYING, va);
         return 0;
     }
@@ -3286,7 +3286,7 @@ static int sh_page_fault(struct vcpu *v,
          * sh_remove_shadows() in a previous sh_resync() call has
          * failed. We cannot safely continue since some page is still
          * OOS but not in the hash table anymore. */
-        shadow_unlock(d);
+        paging_unlock(d);
         return 0;
     }
 
@@ -3295,7 +3295,7 @@ static int sh_page_fault(struct vcpu *v,
     if ( shadow_check_gl1e(v, &gw)  )
     {
         perfc_incr(shadow_inconsistent_gwalk);
-        shadow_unlock(d);
+        paging_unlock(d);
         goto rewalk;
     }
 #endif /* OOS */
@@ -3388,7 +3388,7 @@ static int sh_page_fault(struct vcpu *v,
     sh_audit_gw(v, &gw);
     SHADOW_PRINTK("fixed\n");
     shadow_audit_tables(v);
-    shadow_unlock(d);
+    paging_unlock(d);
     return EXCRET_fault_fixed;
 
  emulate:
@@ -3456,7 +3456,7 @@ static int sh_page_fault(struct vcpu *v,
      */
     sh_audit_gw(v, &gw);
     shadow_audit_tables(v);
-    shadow_unlock(d);
+    paging_unlock(d);
 
     this_cpu(trace_emulate_write_val) = 0;
 
@@ -3594,7 +3594,7 @@ static int sh_page_fault(struct vcpu *v,
     SHADOW_PRINTK("mmio %#"PRIpaddr"\n", gpa);
     shadow_audit_tables(v);
     reset_early_unshadow(v);
-    shadow_unlock(d);
+    paging_unlock(d);
     trace_shadow_gen(TRC_SHADOW_MMIO, va);
     return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
             ? EXCRET_fault_fixed : 0);
@@ -3604,7 +3604,7 @@ static int sh_page_fault(struct vcpu *v,
     SHADOW_PRINTK("not a shadow fault\n");
     shadow_audit_tables(v);
     reset_early_unshadow(v);
-    shadow_unlock(d);
+    paging_unlock(d);
 
 propagate:
     trace_not_shadow_fault(gw.l1e, va);
@@ -3644,7 +3644,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
                & _PAGE_PRESENT) )
             return 0;
         /* This must still be a copy-from-user because we don't have the
-         * shadow lock, and the higher-level shadows might disappear
+         * paging lock, and the higher-level shadows might disappear
          * under our feet. */
         if ( __copy_from_user(&sl3e, (sh_linear_l3_table(v) 
                                       + shadow_l3_linear_offset(va)),
@@ -3700,11 +3700,11 @@ sh_invlpg(struct vcpu *v, unsigned long 
              && page_is_out_of_sync(pg) )
         {
             /* The test above may give false positives, since we don't
-             * hold the shadow lock yet.  Check again with the lock held. */
-            shadow_lock(v->domain);
+             * hold the paging lock yet.  Check again with the lock held. */
+            paging_lock(v->domain);
 
             /* This must still be a copy-from-user because we didn't
-             * have the shadow lock last time we checked, and the
+             * have the paging lock last time we checked, and the
              * higher-level shadows might have disappeared under our
              * feet. */
             if ( __copy_from_user(&sl2e, 
@@ -3713,13 +3713,13 @@ sh_invlpg(struct vcpu *v, unsigned long 
                                   sizeof (sl2e)) != 0 )
             {
                 perfc_incr(shadow_invlpg_fault);
-                shadow_unlock(v->domain);
+                paging_unlock(v->domain);
                 return 0;
             }
 
             if ( !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) )
             {
-                shadow_unlock(v->domain);
+                paging_unlock(v->domain);
                 return 0;
             }
 
@@ -3736,7 +3736,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
                 (void) shadow_set_l1e(v, sl1, shadow_l1e_empty(),
                                       p2m_invalid, sl1mfn);
             }
-            shadow_unlock(v->domain);
+            paging_unlock(v->domain);
             /* Need the invlpg, to pick up the disappeareance of the sl1e */
             return 1;
         }
@@ -4153,7 +4153,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
  * this function will call hvm_update_guest_cr(v, 3) to tell them where the 
  * shadow tables are.
  * If do_locking != 0, assume we are being called from outside the 
- * shadow code, and must take and release the shadow lock; otherwise 
+ * shadow code, and must take and release the paging lock; otherwise 
  * that is the caller's responsibility.
  */
 {
@@ -4172,7 +4172,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
         return;
     }
 
-    if ( do_locking ) shadow_lock(v->domain);
+    if ( do_locking ) paging_lock(v->domain);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
     /* Need to resync all the shadow entries on a TLB flush.  Resync
@@ -4181,7 +4181,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
     shadow_resync_current_vcpu(v);
 #endif
 
-    ASSERT(shadow_locked_by_me(v->domain));
+    ASSERT(paging_locked_by_me(v->domain));
     ASSERT(v->arch.paging.mode);
 
     ////
@@ -4415,7 +4415,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
 #endif
 
     /* Release the lock, if we took it (otherwise it's the caller's problem) */
-    if ( do_locking ) shadow_unlock(v->domain);
+    if ( do_locking ) paging_unlock(v->domain);
 }
 
 
@@ -4695,7 +4695,7 @@ static void sh_pagetable_dying(struct vc
     guest_l3e_t *gl3e = NULL;
     paddr_t gl2a = 0;
 
-    shadow_lock(v->domain);
+    paging_lock(v->domain);
 
     gcr3 = (v->arch.hvm_vcpu.guest_cr[3]);
     /* fast path: the pagetable belongs to the current context */
@@ -4747,7 +4747,7 @@ static void sh_pagetable_dying(struct vc
 out:
     if ( !fast_path )
         unmap_domain_page(gl3pa);
-    shadow_unlock(v->domain);
+    paging_unlock(v->domain);
 }
 #else
 static void sh_pagetable_dying(struct vcpu *v, paddr_t gpa)
@@ -4755,7 +4755,7 @@ static void sh_pagetable_dying(struct vc
     mfn_t smfn, gmfn;
     p2m_type_t p2mt;
 
-    shadow_lock(v->domain);
+    paging_lock(v->domain);
 
     gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt);
 #if GUEST_PAGING_LEVELS == 2
@@ -4778,7 +4778,7 @@ static void sh_pagetable_dying(struct vc
 
     v->arch.paging.shadow.pagetable_dying = 1;
 
-    shadow_unlock(v->domain);
+    paging_unlock(v->domain);
 }
 #endif
 
@@ -4810,8 +4810,8 @@ static mfn_t emulate_gva_to_mfn(struct v
     }
 
     /* Translate the GFN to an MFN */
-    /* PoD: query only if shadow lock is held (to avoid deadlock) */
-    if ( shadow_locked_by_me(v->domain) )
+    /* PoD: query only if paging lock is held (to avoid deadlock) */
+    if ( paging_locked_by_me(v->domain) )
         mfn = gfn_to_mfn_query(v->domain, _gfn(gfn), &p2mt);
     else
         mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
@@ -5000,7 +5000,7 @@ sh_x86_emulate_write(struct vcpu *v, uns
     if ( emulate_map_dest_failed(addr) )
         return (long)addr;
 
-    shadow_lock(v->domain);
+    paging_lock(v->domain);
     memcpy(addr, src, bytes);
 
     if ( tb_init_done )
@@ -5021,7 +5021,7 @@ sh_x86_emulate_write(struct vcpu *v, uns
 
     emulate_unmap_dest(v, addr, bytes, sh_ctxt);
     shadow_audit_tables(v);
-    shadow_unlock(v->domain);
+    paging_unlock(v->domain);
     return X86EMUL_OKAY;
 }
 
@@ -5042,7 +5042,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
     if ( emulate_map_dest_failed(addr) )
         return (long)addr;
 
-    shadow_lock(v->domain);
+    paging_lock(v->domain);
     switch ( bytes )
     {
     case 1: prev = cmpxchg(((u8 *)addr), old, new);  break;
@@ -5063,7 +5063,7 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
 
     emulate_unmap_dest(v, addr, bytes, sh_ctxt);
     shadow_audit_tables(v);
-    shadow_unlock(v->domain);
+    paging_unlock(v->domain);
     return rv;
 }
 
@@ -5089,7 +5089,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
     old = (((u64) old_hi) << 32) | (u64) old_lo;
     new = (((u64) new_hi) << 32) | (u64) new_lo;
 
-    shadow_lock(v->domain);
+    paging_lock(v->domain);
     prev = cmpxchg(((u64 *)addr), old, new);
 
     if ( prev != old )
@@ -5097,7 +5097,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
 
     emulate_unmap_dest(v, addr, 8, sh_ctxt);
     shadow_audit_tables(v);
-    shadow_unlock(v->domain);
+    paging_unlock(v->domain);
     return rv;
 }
 #endif
diff -r 64398d14dcd6 -r 2bbed46eb10c xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/private.h	Thu Jun 02 13:16:52 2011 +0100
@@ -533,7 +533,7 @@ static inline int sh_get_ref(struct vcpu
         return 0;
     }
     
-    /* Guarded by the shadow lock, so no need for atomic update */
+    /* Guarded by the paging lock, so no need for atomic update */
     sp->u.sh.count = nx;
 
     /* We remember the first shadow entry that points to each shadow. */
@@ -573,7 +573,7 @@ static inline void sh_put_ref(struct vcp
         BUG();
     }
 
-    /* Guarded by the shadow lock, so no need for atomic update */
+    /* Guarded by the paging lock, so no need for atomic update */
     sp->u.sh.count = nx;
 
     if ( unlikely(nx == 0) ) 
diff -r 64398d14dcd6 -r 2bbed46eb10c xen/arch/x86/mm/shadow/types.h
--- a/xen/arch/x86/mm/shadow/types.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/types.h	Thu Jun 02 13:16:52 2011 +0100
@@ -292,7 +292,7 @@ static inline shadow_l4e_t shadow_l4e_fr
  * MMIO emulation, and faults where the guest PTE is not present.  We
  * record these as shadow l1 entries that have reserved bits set in
  * them, so we can spot them immediately in the fault handler and handle
- * them without needing to hold the shadow lock or walk the guest
+ * them without needing to hold the paging lock or walk the guest
  * pagetables.
  *
  * This is only feasible for PAE and 64bit Xen: 32-bit non-PAE PTEs don't
diff -r 64398d14dcd6 -r 2bbed46eb10c xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/domain.h	Thu Jun 02 13:16:52 2011 +0100
@@ -91,8 +91,6 @@ void hypercall_page_initialise(struct do
 /*          shadow paging extension             */
 /************************************************/
 struct shadow_domain {
-    mm_lock_t         lock;  /* shadow domain lock */
-
     unsigned int      opt_flags;    /* runtime tunable optimizations on/off */
     struct page_list_head pinned_shadows;
 
@@ -158,8 +156,6 @@ struct shadow_vcpu {
 /*            hardware assisted paging          */
 /************************************************/
 struct hap_domain {
-    mm_lock_t         lock;
-
     struct page_list_head freelist;
     unsigned int      total_pages;  /* number of pages allocated */
     unsigned int      free_pages;   /* number of pages on freelists */
@@ -170,9 +166,6 @@ struct hap_domain {
 /*       common paging data structure           */
 /************************************************/
 struct log_dirty_domain {
-    /* log-dirty lock */
-    mm_lock_t     lock;
-
     /* log-dirty radix tree to record dirty pages */
     mfn_t          top;
     unsigned int   allocs;
@@ -189,6 +182,9 @@ struct log_dirty_domain {
 };
 
 struct paging_domain {
+    /* paging lock */
+    mm_lock_t lock;
+
     /* flags to control paging operation */
     u32                     mode;
     /* extension for shadow paging support */
diff -r 64398d14dcd6 -r 2bbed46eb10c xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/p2m.h	Thu Jun 02 13:16:52 2011 +0100
@@ -337,8 +337,8 @@ static inline mfn_t gfn_to_mfn_type(stru
 
 /* Syntactic sugar: most callers will use one of these. 
  * N.B. gfn_to_mfn_query() is the _only_ one guaranteed not to take the
- * p2m lock; none of the others can be called with the p2m, hap or
- * shadow lock held. */
+ * p2m lock; none of the others can be called with the p2m or paging
+ * lock held. */
 #define gfn_to_mfn(d, g, t)         gfn_to_mfn_type((d), (g), (t), p2m_alloc)
 #define gfn_to_mfn_query(d, g, t)   gfn_to_mfn_type((d), (g), (t), p2m_query)
 #define gfn_to_mfn_guest(d, g, t)   gfn_to_mfn_type((d), (g), (t), p2m_guest)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 16 of 17] x86/mm: simplify log-dirty page allocation
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (14 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 15 of 17] x86/mm: merge the shadow, hap and log-dirty locks into a single paging lock Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  2011-06-02 12:20 ` [PATCH 17 of 17] x86/mm/shadow: emulated writes are always guest-originated actions Tim Deegan
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 9974012f48be05a981c9db05c544ffd965bd33d9
# Parent  2bbed46eb10ce80e920506714f7e328193a23b52
x86/mm: simplify log-dirty page allocation.

Now that the log-dirty code is covered by the same lock as shadow and
hap activity, we no longer need to avoid doing allocs and frees with
the lock held.  Simplify the code accordingly.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 2bbed46eb10c -r 9974012f48be xen/arch/x86/mm/hap/hap.c
--- a/xen/arch/x86/mm/hap/hap.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/hap/hap.c	Thu Jun 02 13:16:52 2011 +0100
@@ -279,7 +279,7 @@ static struct page_info *hap_alloc_p2m_p
     struct page_info *pg;
 
     /* This is called both from the p2m code (which never holds the 
-     * paging lock) and the log-dirty code (which sometimes does). */
+     * paging lock) and the log-dirty code (which always does). */
     paging_lock_recursive(d);
     pg = hap_alloc(d);
 
@@ -318,7 +318,7 @@ static struct page_info *hap_alloc_p2m_p
 static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
 {
     /* This is called both from the p2m code (which never holds the 
-     * paging lock) and the log-dirty code (which sometimes does). */
+     * paging lock) and the log-dirty code (which always does). */
     paging_lock_recursive(d);
 
     ASSERT(page_get_owner(pg) == d);
diff -r 2bbed46eb10c -r 9974012f48be xen/arch/x86/mm/paging.c
--- a/xen/arch/x86/mm/paging.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/paging.c	Thu Jun 02 13:16:52 2011 +0100
@@ -74,30 +74,32 @@ static mfn_t paging_new_log_dirty_page(s
     return page_to_mfn(page);
 }
 
-/* Init a new leaf node; returns a mapping or NULL */
-static unsigned long *paging_new_log_dirty_leaf(mfn_t mfn)
+/* Alloc and init a new leaf node */
+static mfn_t paging_new_log_dirty_leaf(struct domain *d)
 {
-    unsigned long *leaf = NULL;
+    mfn_t mfn = paging_new_log_dirty_page(d);
     if ( mfn_valid(mfn) )
     {
-        leaf = map_domain_page(mfn_x(mfn));
+        void *leaf = map_domain_page(mfn_x(mfn));
         clear_page(leaf);
+        unmap_domain_page(leaf);
     }
-    return leaf;
+    return mfn;
 }
 
-/* Init a new non-leaf node; returns a mapping or NULL */
-static mfn_t *paging_new_log_dirty_node(mfn_t mfn)
+/* Alloc and init a new non-leaf node */
+static mfn_t paging_new_log_dirty_node(struct domain *d)
 {
-    int i;
-    mfn_t *node = NULL;
+    mfn_t mfn = paging_new_log_dirty_page(d);
     if ( mfn_valid(mfn) )
     {
-        node = map_domain_page(mfn_x(mfn));
+        int i;
+        mfn_t *node = map_domain_page(mfn_x(mfn));
         for ( i = 0; i < LOGDIRTY_NODE_ENTRIES; i++ )
             node[i] = _mfn(INVALID_MFN);
+        unmap_domain_page(node);
     }
-    return node;
+    return mfn;
 }
 
 /* get the top of the log-dirty bitmap trie */
@@ -118,14 +120,10 @@ void paging_free_log_dirty_bitmap(struct
 {
     mfn_t *l4, *l3, *l2;
     int i4, i3, i2;
-    struct page_list_head to_free;    
-    struct page_info *pg, *tmp;
 
     if ( !mfn_valid(d->arch.paging.log_dirty.top) )
         return;
 
-    INIT_PAGE_LIST_HEAD(&to_free);
-
     paging_lock(d);
 
     l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
@@ -146,28 +144,24 @@ void paging_free_log_dirty_bitmap(struct
 
             for ( i2 = 0; i2 < LOGDIRTY_NODE_ENTRIES; i2++ )
                 if ( mfn_valid(l2[i2]) )
-                    page_list_add_tail(mfn_to_page(l2[i2]), &to_free);
+                    paging_free_log_dirty_page(d, l2[i2]);
 
             unmap_domain_page(l2);
-            page_list_add_tail(mfn_to_page(l3[i3]), &to_free);
+            paging_free_log_dirty_page(d, l3[i3]);
         }
 
         unmap_domain_page(l3);
-        page_list_add_tail(mfn_to_page(l4[i4]), &to_free);
+        paging_free_log_dirty_page(d, l4[i4]);
     }
 
     unmap_domain_page(l4);
-    page_list_add_tail(mfn_to_page(d->arch.paging.log_dirty.top), &to_free);
+    paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
+    d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
 
-    d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
     ASSERT(d->arch.paging.log_dirty.allocs == 0);
     d->arch.paging.log_dirty.failed_allocs = 0;
 
     paging_unlock(d);
-    
-    /* Return the memory now that we're not holding the log-dirty lock */
-    page_list_for_each_safe(pg, tmp, &to_free)
-        paging_free_log_dirty_page(d, page_to_mfn(pg));
 }
 
 int paging_log_dirty_enable(struct domain *d)
@@ -202,7 +196,7 @@ int paging_log_dirty_disable(struct doma
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
 {
     unsigned long pfn;
-    mfn_t gmfn, new_mfn;
+    mfn_t gmfn;
     int changed;
     mfn_t mfn, *l4, *l3, *l2;
     unsigned long *l1;
@@ -232,65 +226,41 @@ void paging_mark_dirty(struct domain *d,
     i3 = L3_LOGDIRTY_IDX(pfn);
     i4 = L4_LOGDIRTY_IDX(pfn);
 
-    /* We can't call paging.alloc_page() with the log-dirty lock held
-     * and we almost never need to call it anyway, so assume that we
-     * won't.  If we do hit a missing page, we'll unlock, allocate one
-     * and start again. */
-    new_mfn = _mfn(INVALID_MFN);
-
-again:
     /* Recursive: this is called from inside the shadow code */
     paging_lock_recursive(d);
 
+    if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) ) 
+    {
+         d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
+         if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
+             goto out;
+    }
+
     l4 = paging_map_log_dirty_bitmap(d);
-    if ( unlikely(!l4) )
-    {
-        l4 = paging_new_log_dirty_node(new_mfn);
-        d->arch.paging.log_dirty.top = new_mfn;
-        new_mfn = _mfn(INVALID_MFN);
-    }
-    if ( unlikely(!l4) )
-        goto oom;
-
     mfn = l4[i4];
     if ( !mfn_valid(mfn) )
-    {
-        l3 = paging_new_log_dirty_node(new_mfn);
-        mfn = l4[i4] = new_mfn;
-        new_mfn = _mfn(INVALID_MFN);
-    }
-    else
-        l3 = map_domain_page(mfn_x(mfn));
+        l4[i4] = mfn = paging_new_log_dirty_node(d);
     unmap_domain_page(l4);
-    if ( unlikely(!l3) )
-        goto oom;
+    if ( !mfn_valid(mfn) )
+        goto out;
 
+    l3 = map_domain_page(mfn_x(mfn));
     mfn = l3[i3];
     if ( !mfn_valid(mfn) )
-    {
-        l2 = paging_new_log_dirty_node(new_mfn);
-        mfn = l3[i3] = new_mfn;
-        new_mfn = _mfn(INVALID_MFN);
-    }
-    else
-        l2 = map_domain_page(mfn_x(mfn));
+        l3[i3] = mfn = paging_new_log_dirty_node(d);
     unmap_domain_page(l3);
-    if ( unlikely(!l2) )
-        goto oom;
+    if ( !mfn_valid(mfn) )
+        goto out;
 
+    l2 = map_domain_page(mfn_x(mfn));
     mfn = l2[i2];
     if ( !mfn_valid(mfn) )
-    {
-        l1 = paging_new_log_dirty_leaf(new_mfn);
-        mfn = l2[i2] = new_mfn;
-        new_mfn = _mfn(INVALID_MFN);
-    }
-    else
-        l1 = map_domain_page(mfn_x(mfn));
+        l2[i2] = mfn = paging_new_log_dirty_leaf(d);
     unmap_domain_page(l2);
-    if ( unlikely(!l1) )
-        goto oom;
+    if ( !mfn_valid(mfn) )
+        goto out;
 
+    l1 = map_domain_page(mfn_x(mfn));
     changed = !__test_and_set_bit(i1, l1);
     unmap_domain_page(l1);
     if ( changed )
@@ -301,18 +271,10 @@ again:
         d->arch.paging.log_dirty.dirty_count++;
     }
 
+out:
+    /* We've already recorded any failed allocations */
     paging_unlock(d);
-    if ( mfn_valid(new_mfn) )
-        paging_free_log_dirty_page(d, new_mfn);
     return;
-
-oom:
-    paging_unlock(d);
-    new_mfn = paging_new_log_dirty_page(d);
-    if ( !mfn_valid(new_mfn) )
-        /* we've already recorded the failed allocation */
-        return;
-    goto again;
 }
 
 
@@ -322,46 +284,42 @@ int paging_mfn_is_dirty(struct domain *d
     unsigned long pfn;
     mfn_t mfn, *l4, *l3, *l2;
     unsigned long *l1;
-    int rv = 0;
+    int rv;
 
-    /* Recursive: this is called from inside the shadow code */
-    paging_lock_recursive(d);
+    ASSERT(paging_locked_by_me(d));
     ASSERT(paging_mode_log_dirty(d));
 
     /* We /really/ mean PFN here, even for non-translated guests. */
     pfn = get_gpfn_from_mfn(mfn_x(gmfn));
     /* Shared pages are always read-only; invalid pages can't be dirty. */
     if ( unlikely(SHARED_M2P(pfn) || !VALID_M2P(pfn)) )
-        goto out;
+        return 0;
 
     mfn = d->arch.paging.log_dirty.top;
     if ( !mfn_valid(mfn) )
-        goto out;
+        return 0;
 
     l4 = map_domain_page(mfn_x(mfn));
     mfn = l4[L4_LOGDIRTY_IDX(pfn)];
     unmap_domain_page(l4);
     if ( !mfn_valid(mfn) )
-        goto out;
+        return 0;
 
     l3 = map_domain_page(mfn_x(mfn));
     mfn = l3[L3_LOGDIRTY_IDX(pfn)];
     unmap_domain_page(l3);
     if ( !mfn_valid(mfn) )
-        goto out;
+        return 0;
 
     l2 = map_domain_page(mfn_x(mfn));
     mfn = l2[L2_LOGDIRTY_IDX(pfn)];
     unmap_domain_page(l2);
     if ( !mfn_valid(mfn) )
-        goto out;
+        return 0;
 
     l1 = map_domain_page(mfn_x(mfn));
     rv = test_bit(L1_LOGDIRTY_IDX(pfn), l1);
     unmap_domain_page(l1);
-
-out:
-    paging_unlock(d);
     return rv;
 }
 
diff -r 2bbed46eb10c -r 9974012f48be xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/common.c	Thu Jun 02 13:16:52 2011 +0100
@@ -1612,7 +1612,7 @@ shadow_alloc_p2m_page(struct domain *d)
     struct page_info *pg;
 
     /* This is called both from the p2m code (which never holds the 
-     * paging lock) and the log-dirty code (which sometimes does). */
+     * paging lock) and the log-dirty code (which always does). */
     paging_lock_recursive(d);
 
     if ( d->arch.paging.shadow.total_pages 
@@ -1654,7 +1654,7 @@ shadow_free_p2m_page(struct domain *d, s
     page_set_owner(pg, NULL); 
 
     /* This is called both from the p2m code (which never holds the 
-     * paging lock) and the log-dirty code (which sometimes does). */
+     * paging lock) and the log-dirty code (which always does). */
     paging_lock_recursive(d);
 
     shadow_free(d, page_to_mfn(pg));
diff -r 2bbed46eb10c -r 9974012f48be xen/include/asm-x86/paging.h
--- a/xen/include/asm-x86/paging.h	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/include/asm-x86/paging.h	Thu Jun 02 13:16:52 2011 +0100
@@ -164,7 +164,8 @@ void paging_log_dirty_init(struct domain
 /* mark a page as dirty */
 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
 
-/* is this guest page dirty? */
+/* is this guest page dirty? 
+ * This is called from inside paging code, with the paging lock held. */
 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
 
 /*

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 17 of 17] x86/mm/shadow: emulated writes are always guest-originated actions
  2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
                   ` (15 preceding siblings ...)
  2011-06-02 12:20 ` [PATCH 16 of 17] x86/mm: simplify log-dirty page allocation Tim Deegan
@ 2011-06-02 12:20 ` Tim Deegan
  16 siblings, 0 replies; 18+ messages in thread
From: Tim Deegan @ 2011-06-02 12:20 UTC (permalink / raw)
  To: xen-devel

# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1307017012 -3600
# Node ID 840e161428246f0173cb41c2409a0b9481b4a457
# Parent  9974012f48be05a981c9db05c544ffd965bd33d9
x86/mm/shadow: emulated writes are always guest-originated actions
and never happen with the paging lock held.

Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>

diff -r 9974012f48be -r 840e16142824 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c	Thu Jun 02 13:16:52 2011 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c	Thu Jun 02 13:16:52 2011 +0100
@@ -4810,11 +4810,8 @@ static mfn_t emulate_gva_to_mfn(struct v
     }
 
     /* Translate the GFN to an MFN */
-    /* PoD: query only if paging lock is held (to avoid deadlock) */
-    if ( paging_locked_by_me(v->domain) )
-        mfn = gfn_to_mfn_query(v->domain, _gfn(gfn), &p2mt);
-    else
-        mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
+    ASSERT(!paging_locked_by_me(v->domain));
+    mfn = gfn_to_mfn_guest(v->domain, _gfn(gfn), &p2mt);
         
     if ( p2m_is_readonly(p2mt) )
         return _mfn(READONLY_GFN);

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2011-06-02 12:20 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-02 12:20 [PATCH 00 of 17] mm/p2m cleanups Tim Deegan
2011-06-02 12:20 ` [PATCH 01 of 17] x86/mm/p2m: Mark internal functions static Tim Deegan
2011-06-02 12:20 ` [PATCH 02 of 17] x86/mm/p2m: little fixes and tidying up Tim Deegan
2011-06-02 12:20 ` [PATCH 03 of 17] x86/mm/p2m: hide the current-domain fast-path inside the p2m-pt code Tim Deegan
2011-06-02 12:20 ` [PATCH 04 of 17] x86/mm/p2m: merge gfn_to_mfn_unshare with other gfn_to_mfn paths Tim Deegan
2011-06-02 12:20 ` [PATCH 05 of 17] x86/mm/p2m: Make p2m interfaces take struct domain arguments Tim Deegan
2011-06-02 12:20 ` [PATCH 06 of 17] x86/mm/p2m: paging_p2m_ga_to_gfn() doesn't need so many arguments Tim Deegan
2011-06-02 12:20 ` [PATCH 07 of 17] x86/mm: Fix memory-sharing code's locking discipline Tim Deegan
2011-06-02 12:20 ` [PATCH 08 of 17] x86/mm/p2m: Remove recursive-locking code from set_shared_p2m_entry() Tim Deegan
2011-06-02 12:20 ` [PATCH 09 of 17] x86/mm/p2m: Fix locking discipline around p2m updates Tim Deegan
2011-06-02 12:20 ` [PATCH 10 of 17] x86/mm/p2m: Fix locking discipline around p2m lookups Tim Deegan
2011-06-02 12:20 ` [PATCH 11 of 17] x86/mm/p2m: Move p2m code in HVMOP_[gs]et_mem_access into p2m.c Tim Deegan
2011-06-02 12:20 ` [PATCH 12 of 17] x86/mm/p2m: Fix locking discipline around log-dirty teardown Tim Deegan
2011-06-02 12:20 ` [PATCH 13 of 17] x86/mm: dedup the various copies of the shadow lock functions Tim Deegan
2011-06-02 12:20 ` [PATCH 14 of 17] x86/mm: Make MM locks recursive Tim Deegan
2011-06-02 12:20 ` [PATCH 15 of 17] x86/mm: merge the shadow, hap and log-dirty locks into a single paging lock Tim Deegan
2011-06-02 12:20 ` [PATCH 16 of 17] x86/mm: simplify log-dirty page allocation Tim Deegan
2011-06-02 12:20 ` [PATCH 17 of 17] x86/mm/shadow: emulated writes are always guest-originated actions Tim Deegan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.