All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released
@ 2019-05-02 22:13 ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-02 22:13 UTC (permalink / raw)
  To: xen-devel
  Cc: Tamas K Lengyel, Wei Liu, George Dunlap, Andrew Cooper,
	Jan Beulich, Roger Pau Monne

Calling _put_page_type while also holding the page_lock
for that page can cause a deadlock.

The comment being dropped is incorrect since it's now out-of-date.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Roger Pau Monne <roger.pau@citrix.com>
---
This series is based on Andrew Cooper's x86-next branch

v4: drop grabbing extra references
---
 xen/arch/x86/mm/mem_sharing.c | 41 ++++++++++-------------------------
 1 file changed, 11 insertions(+), 30 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index dfc279d371..4b3a094481 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -648,10 +648,6 @@ static int page_make_private(struct domain *d, struct page_info *page)
         return -EBUSY;
     }
 
-    /* We can only change the type if count is one */
-    /* Because we are locking pages individually, we need to drop
-     * the lock here, while the page is typed. We cannot risk the 
-     * race of page_unlock and then put_page_type. */
     expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
     if ( page->u.inuse.type_info != expected_type )
     {
@@ -660,12 +656,11 @@ static int page_make_private(struct domain *d, struct page_info *page)
         return -EEXIST;
     }
 
+    mem_sharing_page_unlock(page);
+
     /* Drop the final typecount */
     put_page_and_type(page);
 
-    /* Now that we've dropped the type, we can unlock */
-    mem_sharing_page_unlock(page);
-
     /* Change the owner */
     ASSERT(page_get_owner(page) == dom_cow);
     page_set_owner(page, d);
@@ -900,6 +895,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
     p2m_type_t smfn_type, cmfn_type;
     struct two_gfns tg;
     struct rmap_iterator ri;
+    unsigned long put_count = 0;
 
     get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
                  cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
@@ -964,15 +960,6 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
         goto err_out;
     }
 
-    /* Acquire an extra reference, for the freeing below to be safe. */
-    if ( !get_page(cpage, dom_cow) )
-    {
-        ret = -EOVERFLOW;
-        mem_sharing_page_unlock(secondpg);
-        mem_sharing_page_unlock(firstpg);
-        goto err_out;
-    }
-
     /* Merge the lists together */
     rmap_seed_iterator(cpage, &ri);
     while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
@@ -984,7 +971,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
          * Don't change the type of rmap for the client page. */
         rmap_del(gfn, cpage, 0);
         rmap_add(gfn, spage);
-        put_page_and_type(cpage);
+        put_count++;
         d = get_domain_by_id(gfn->domain);
         BUG_ON(!d);
         BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
@@ -1002,7 +989,10 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
     /* Free the client page */
     if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
         put_page(cpage);
-    put_page(cpage);
+
+    BUG_ON(!put_count);
+    while ( put_count-- )
+        put_page_and_type(cpage);
 
     /* We managed to free a domain page. */
     atomic_dec(&nr_shared_mfns);
@@ -1167,20 +1157,11 @@ int __mem_sharing_unshare_page(struct domain *d,
     {
         if ( !last_gfn )
             mem_sharing_gfn_destroy(page, d, gfn_info);
-        put_page_and_type(page);
         mem_sharing_page_unlock(page);
-        if ( last_gfn )
-        {
-            if ( !get_page(page, dom_cow) )
-            {
-                put_gfn(d, gfn);
-                domain_crash(d);
-                return -EOVERFLOW;
-            }
-            if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
-                put_page(page);
+        if ( last_gfn &&
+            test_and_clear_bit(_PGC_allocated, &page->count_info) )
             put_page(page);
-        }
+        put_page_and_type(page);
         put_gfn(d, gfn);
 
         return 0;
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Xen-devel] [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released
@ 2019-05-02 22:13 ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-02 22:13 UTC (permalink / raw)
  To: xen-devel
  Cc: Tamas K Lengyel, Wei Liu, George Dunlap, Andrew Cooper,
	Jan Beulich, Roger Pau Monne

Calling _put_page_type while also holding the page_lock
for that page can cause a deadlock.

The comment being dropped is incorrect since it's now out-of-date.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Roger Pau Monne <roger.pau@citrix.com>
---
This series is based on Andrew Cooper's x86-next branch

v4: drop grabbing extra references
---
 xen/arch/x86/mm/mem_sharing.c | 41 ++++++++++-------------------------
 1 file changed, 11 insertions(+), 30 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index dfc279d371..4b3a094481 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -648,10 +648,6 @@ static int page_make_private(struct domain *d, struct page_info *page)
         return -EBUSY;
     }
 
-    /* We can only change the type if count is one */
-    /* Because we are locking pages individually, we need to drop
-     * the lock here, while the page is typed. We cannot risk the 
-     * race of page_unlock and then put_page_type. */
     expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
     if ( page->u.inuse.type_info != expected_type )
     {
@@ -660,12 +656,11 @@ static int page_make_private(struct domain *d, struct page_info *page)
         return -EEXIST;
     }
 
+    mem_sharing_page_unlock(page);
+
     /* Drop the final typecount */
     put_page_and_type(page);
 
-    /* Now that we've dropped the type, we can unlock */
-    mem_sharing_page_unlock(page);
-
     /* Change the owner */
     ASSERT(page_get_owner(page) == dom_cow);
     page_set_owner(page, d);
@@ -900,6 +895,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
     p2m_type_t smfn_type, cmfn_type;
     struct two_gfns tg;
     struct rmap_iterator ri;
+    unsigned long put_count = 0;
 
     get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
                  cd, cgfn, &cmfn_type, NULL, &cmfn, 0, &tg);
@@ -964,15 +960,6 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
         goto err_out;
     }
 
-    /* Acquire an extra reference, for the freeing below to be safe. */
-    if ( !get_page(cpage, dom_cow) )
-    {
-        ret = -EOVERFLOW;
-        mem_sharing_page_unlock(secondpg);
-        mem_sharing_page_unlock(firstpg);
-        goto err_out;
-    }
-
     /* Merge the lists together */
     rmap_seed_iterator(cpage, &ri);
     while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
@@ -984,7 +971,7 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
          * Don't change the type of rmap for the client page. */
         rmap_del(gfn, cpage, 0);
         rmap_add(gfn, spage);
-        put_page_and_type(cpage);
+        put_count++;
         d = get_domain_by_id(gfn->domain);
         BUG_ON(!d);
         BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
@@ -1002,7 +989,10 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
     /* Free the client page */
     if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
         put_page(cpage);
-    put_page(cpage);
+
+    BUG_ON(!put_count);
+    while ( put_count-- )
+        put_page_and_type(cpage);
 
     /* We managed to free a domain page. */
     atomic_dec(&nr_shared_mfns);
@@ -1167,20 +1157,11 @@ int __mem_sharing_unshare_page(struct domain *d,
     {
         if ( !last_gfn )
             mem_sharing_gfn_destroy(page, d, gfn_info);
-        put_page_and_type(page);
         mem_sharing_page_unlock(page);
-        if ( last_gfn )
-        {
-            if ( !get_page(page, dom_cow) )
-            {
-                put_gfn(d, gfn);
-                domain_crash(d);
-                return -EOVERFLOW;
-            }
-            if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
-                put_page(page);
+        if ( last_gfn &&
+            test_and_clear_bit(_PGC_allocated, &page->count_info) )
             put_page(page);
-        }
+        put_page_and_type(page);
         put_gfn(d, gfn);
 
         return 0;
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-02 22:13   ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-02 22:13 UTC (permalink / raw)
  To: xen-devel
  Cc: Tamas K Lengyel, Wei Liu, George Dunlap, Andrew Cooper,
	Jan Beulich, Roger Pau Monne

Patch cf4b30dca0a "Add debug code to detect illegal page_lock and put_page_type
ordering" added extra sanity checking to page_lock/page_unlock for debug builds
with the assumption that no hypervisor path ever locks two pages at once.

This assumption doesn't hold during memory sharing so we copy a version of
page_lock/unlock to be used exclusively in the memory sharing subsystem
without the sanity checks.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Roger Pau Monne <roger.pau@citrix.com>
---
 xen/arch/x86/mm/mem_sharing.c | 43 +++++++++++++++++++++++++++++++----
 xen/include/asm-x86/mm.h      | 14 +-----------
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 4b3a094481..baae7ceeda 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
 
 #endif /* MEM_SHARING_AUDIT */
 
-static inline int mem_sharing_page_lock(struct page_info *pg)
+/*
+ * Private implementations of page_lock/unlock to bypass PV-only
+ * sanity checks not applicable to mem-sharing.
+ */
+static inline bool _page_lock(struct page_info *page)
 {
-    int rc;
+    unsigned long x, nx;
+
+    do {
+        while ( (x = page->u.inuse.type_info) & PGT_locked )
+            cpu_relax();
+        nx = x + (1 | PGT_locked);
+        if ( !(x & PGT_validated) ||
+             !(x & PGT_count_mask) ||
+             !(nx & PGT_count_mask) )
+            return false;
+    } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
+
+    return true;
+}
+
+static inline void _page_unlock(struct page_info *page)
+{
+    unsigned long x, nx, y = page->u.inuse.type_info;
+
+    do {
+        x = y;
+        ASSERT((x & PGT_count_mask) && (x & PGT_locked));
+
+        nx = x - (1 | PGT_locked);
+        /* We must not drop the last reference here. */
+        ASSERT(nx & PGT_count_mask);
+    } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
+}
+
+static inline bool mem_sharing_page_lock(struct page_info *pg)
+{
+    bool rc;
     pg_lock_data_t *pld = &(this_cpu(__pld));
 
     page_sharing_mm_pre_lock();
-    rc = page_lock(pg);
+    rc = _page_lock(pg);
     if ( rc )
     {
         preempt_disable();
@@ -135,7 +170,7 @@ static inline void mem_sharing_page_unlock(struct page_info *pg)
     page_sharing_mm_unlock(pld->mm_unlock_level, 
                            &pld->recurse_count);
     preempt_enable();
-    page_unlock(pg);
+    _page_unlock(pg);
 }
 
 static inline shr_handle_t get_next_handle(void)
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 6faa563167..7dc7e33f73 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -356,24 +356,12 @@ struct platform_bad_page {
 const struct platform_bad_page *get_platform_badpages(unsigned int *array_size);
 
 /* Per page locks:
- * page_lock() is used for two purposes: pte serialization, and memory sharing.
+ * page_lock() is used for pte serialization.
  *
  * All users of page lock for pte serialization live in mm.c, use it
  * to lock a page table page during pte updates, do not take other locks within
  * the critical section delimited by page_lock/unlock, and perform no
  * nesting.
- *
- * All users of page lock for memory sharing live in mm/mem_sharing.c. Page_lock
- * is used in memory sharing to protect addition (share) and removal (unshare)
- * of (gfn,domain) tupples to a list of gfn's that the shared page is currently
- * backing. Nesting may happen when sharing (and locking) two pages -- deadlock
- * is avoided by locking pages in increasing order.
- * All memory sharing code paths take the p2m lock of the affected gfn before
- * taking the lock for the underlying page. We enforce ordering between page_lock
- * and p2m_lock using an mm-locks.h construct.
- *
- * These two users (pte serialization and memory sharing) do not collide, since
- * sharing is only supported for hvm guests, which do not perform pv pte updates.
  */
 int page_lock(struct page_info *page);
 void page_unlock(struct page_info *page);
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Xen-devel] [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-02 22:13   ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-02 22:13 UTC (permalink / raw)
  To: xen-devel
  Cc: Tamas K Lengyel, Wei Liu, George Dunlap, Andrew Cooper,
	Jan Beulich, Roger Pau Monne

Patch cf4b30dca0a "Add debug code to detect illegal page_lock and put_page_type
ordering" added extra sanity checking to page_lock/page_unlock for debug builds
with the assumption that no hypervisor path ever locks two pages at once.

This assumption doesn't hold during memory sharing so we copy a version of
page_lock/unlock to be used exclusively in the memory sharing subsystem
without the sanity checks.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Roger Pau Monne <roger.pau@citrix.com>
---
 xen/arch/x86/mm/mem_sharing.c | 43 +++++++++++++++++++++++++++++++----
 xen/include/asm-x86/mm.h      | 14 +-----------
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 4b3a094481..baae7ceeda 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
 
 #endif /* MEM_SHARING_AUDIT */
 
-static inline int mem_sharing_page_lock(struct page_info *pg)
+/*
+ * Private implementations of page_lock/unlock to bypass PV-only
+ * sanity checks not applicable to mem-sharing.
+ */
+static inline bool _page_lock(struct page_info *page)
 {
-    int rc;
+    unsigned long x, nx;
+
+    do {
+        while ( (x = page->u.inuse.type_info) & PGT_locked )
+            cpu_relax();
+        nx = x + (1 | PGT_locked);
+        if ( !(x & PGT_validated) ||
+             !(x & PGT_count_mask) ||
+             !(nx & PGT_count_mask) )
+            return false;
+    } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
+
+    return true;
+}
+
+static inline void _page_unlock(struct page_info *page)
+{
+    unsigned long x, nx, y = page->u.inuse.type_info;
+
+    do {
+        x = y;
+        ASSERT((x & PGT_count_mask) && (x & PGT_locked));
+
+        nx = x - (1 | PGT_locked);
+        /* We must not drop the last reference here. */
+        ASSERT(nx & PGT_count_mask);
+    } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
+}
+
+static inline bool mem_sharing_page_lock(struct page_info *pg)
+{
+    bool rc;
     pg_lock_data_t *pld = &(this_cpu(__pld));
 
     page_sharing_mm_pre_lock();
-    rc = page_lock(pg);
+    rc = _page_lock(pg);
     if ( rc )
     {
         preempt_disable();
@@ -135,7 +170,7 @@ static inline void mem_sharing_page_unlock(struct page_info *pg)
     page_sharing_mm_unlock(pld->mm_unlock_level, 
                            &pld->recurse_count);
     preempt_enable();
-    page_unlock(pg);
+    _page_unlock(pg);
 }
 
 static inline shr_handle_t get_next_handle(void)
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 6faa563167..7dc7e33f73 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -356,24 +356,12 @@ struct platform_bad_page {
 const struct platform_bad_page *get_platform_badpages(unsigned int *array_size);
 
 /* Per page locks:
- * page_lock() is used for two purposes: pte serialization, and memory sharing.
+ * page_lock() is used for pte serialization.
  *
  * All users of page lock for pte serialization live in mm.c, use it
  * to lock a page table page during pte updates, do not take other locks within
  * the critical section delimited by page_lock/unlock, and perform no
  * nesting.
- *
- * All users of page lock for memory sharing live in mm/mem_sharing.c. Page_lock
- * is used in memory sharing to protect addition (share) and removal (unshare)
- * of (gfn,domain) tupples to a list of gfn's that the shared page is currently
- * backing. Nesting may happen when sharing (and locking) two pages -- deadlock
- * is avoided by locking pages in increasing order.
- * All memory sharing code paths take the p2m lock of the affected gfn before
- * taking the lock for the underlying page. We enforce ordering between page_lock
- * and p2m_lock using an mm-locks.h construct.
- *
- * These two users (pte serialization and memory sharing) do not collide, since
- * sharing is only supported for hvm guests, which do not perform pv pte updates.
  */
 int page_lock(struct page_info *page);
 void page_unlock(struct page_info *page);
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v4 3/4] x86/mem_sharing: enable mem_share audit mode only in debug builds
@ 2019-05-02 22:13   ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-02 22:13 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, Tamas K Lengyel, Wei Liu, Jan Beulich, Roger Pau Monne

Improves performance for release builds.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Roger Pau Monne <roger.pau@citrix.com>
---
 xen/include/asm-x86/mem_sharing.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/xen/include/asm-x86/mem_sharing.h b/xen/include/asm-x86/mem_sharing.h
index 0e77b7d935..bb19b7534f 100644
--- a/xen/include/asm-x86/mem_sharing.h
+++ b/xen/include/asm-x86/mem_sharing.h
@@ -25,7 +25,11 @@
 #include <public/memory.h>
 
 /* Auditing of memory sharing code? */
+#ifndef NDEBUG
 #define MEM_SHARING_AUDIT 1
+#else
+#define MEM_SHARING_AUDIT 0
+#endif
 
 typedef uint64_t shr_handle_t; 
 
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Xen-devel] [PATCH v4 3/4] x86/mem_sharing: enable mem_share audit mode only in debug builds
@ 2019-05-02 22:13   ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-02 22:13 UTC (permalink / raw)
  To: xen-devel
  Cc: Andrew Cooper, Tamas K Lengyel, Wei Liu, Jan Beulich, Roger Pau Monne

Improves performance for release builds.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Roger Pau Monne <roger.pau@citrix.com>
---
 xen/include/asm-x86/mem_sharing.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/xen/include/asm-x86/mem_sharing.h b/xen/include/asm-x86/mem_sharing.h
index 0e77b7d935..bb19b7534f 100644
--- a/xen/include/asm-x86/mem_sharing.h
+++ b/xen/include/asm-x86/mem_sharing.h
@@ -25,7 +25,11 @@
 #include <public/memory.h>
 
 /* Auditing of memory sharing code? */
+#ifndef NDEBUG
 #define MEM_SHARING_AUDIT 1
+#else
+#define MEM_SHARING_AUDIT 0
+#endif
 
 typedef uint64_t shr_handle_t; 
 
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH v4 4/4] x86/mem_sharing: compile mem_sharing subsystem only when kconfig is enabled
@ 2019-05-02 22:13   ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-02 22:13 UTC (permalink / raw)
  To: xen-devel
  Cc: Tamas K Lengyel, Wei Liu, Konrad Rzeszutek Wilk, George Dunlap,
	Andrew Cooper, Ian Jackson, Tim Deegan, Julien Grall,
	Stefano Stabellini, Jan Beulich, Roger Pau Monne

Disable it by default as it is only an experimental subsystem.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Roger Pau Monne <roger.pau@citrix.com>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Julien Grall <julien.grall@arm.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Tim Deegan <tim@xen.org>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
----
v4: add ASSERT_UNREACHABLE to inlined functions where applicable & other fixups
---
 xen/arch/x86/Kconfig              |  6 +++++-
 xen/arch/x86/domain.c             |  2 ++
 xen/arch/x86/domctl.c             |  2 ++
 xen/arch/x86/mm.c                 |  2 ++
 xen/arch/x86/mm/Makefile          |  2 +-
 xen/arch/x86/x86_64/compat/mm.c   |  2 ++
 xen/arch/x86/x86_64/mm.c          |  2 ++
 xen/common/Kconfig                |  3 ---
 xen/common/domain.c               |  2 +-
 xen/common/grant_table.c          |  2 +-
 xen/common/memory.c               |  2 +-
 xen/common/vm_event.c             |  6 +++---
 xen/include/asm-x86/mem_sharing.h | 28 ++++++++++++++++++++++++++++
 xen/include/asm-x86/mm.h          |  3 +++
 xen/include/xen/sched.h           |  2 +-
 xen/include/xsm/dummy.h           |  2 +-
 xen/include/xsm/xsm.h             |  4 ++--
 xen/xsm/dummy.c                   |  2 +-
 xen/xsm/flask/hooks.c             |  4 ++--
 19 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
index 4b8b07b549..600ca5c12e 100644
--- a/xen/arch/x86/Kconfig
+++ b/xen/arch/x86/Kconfig
@@ -17,7 +17,6 @@ config X86
 	select HAS_KEXEC
 	select MEM_ACCESS_ALWAYS_ON
 	select HAS_MEM_PAGING
-	select HAS_MEM_SHARING
 	select HAS_NS16550
 	select HAS_PASSTHROUGH
 	select HAS_PCI
@@ -198,6 +197,11 @@ config PV_SHIM_EXCLUSIVE
 	  firmware, and will not function correctly in other scenarios.
 
 	  If unsure, say N.
+
+config MEM_SHARING
+	bool "Xen memory sharing support" if EXPERT = "y"
+	depends on HVM
+
 endmenu
 
 source "common/Kconfig"
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index d2d9f2fc3c..474df8433b 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2046,6 +2046,7 @@ int domain_relinquish_resources(struct domain *d)
             d->arch.auto_unmask = 0;
         }
 
+#ifdef CONFIG_MEM_SHARING
     PROGRESS(shared):
 
         if ( is_hvm_domain(d) )
@@ -2056,6 +2057,7 @@ int domain_relinquish_resources(struct domain *d)
             if ( ret )
                 return ret;
         }
+#endif
 
         spin_lock(&d->page_alloc_lock);
         page_list_splice(&d->arch.relmem_list, &d->page_list);
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 9bf2d0820f..bc9e024ccc 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1231,9 +1231,11 @@ long arch_do_domctl(
         break;
     }
 
+#ifdef CONFIG_MEM_SHARING
     case XEN_DOMCTL_mem_sharing_op:
         ret = mem_sharing_domctl(d, &domctl->u.mem_sharing_op);
         break;
+#endif
 
 #if P2M_AUDIT && defined(CONFIG_HVM)
     case XEN_DOMCTL_audit_p2m:
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 45fadbab61..f9f607fb4b 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -368,7 +368,9 @@ void __init arch_init_memory(void)
 
     efi_init_memory();
 
+#ifdef CONFIG_MEM_SHARING
     mem_sharing_init();
+#endif
 
 #ifndef NDEBUG
     if ( highmem_start )
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 5a17646f98..5010a29d6c 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_HVM) += guest_walk_2.o guest_walk_3.o guest_walk_4.o
 obj-$(CONFIG_SHADOW_PAGING) += guest_walk_2.o guest_walk_3.o guest_walk_4.o
 obj-$(CONFIG_MEM_ACCESS) += mem_access.o
 obj-y += mem_paging.o
-obj-y += mem_sharing.o
+obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
 obj-y += p2m.o p2m-pt.o
 obj-$(CONFIG_HVM) += p2m-ept.o p2m-pod.o
 obj-y += paging.o
diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
index 32410ed273..d4c6be3032 100644
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -152,8 +152,10 @@ int compat_arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     case XENMEM_paging_op:
         return mem_paging_memop(guest_handle_cast(arg, xen_mem_paging_op_t));
 
+#ifdef CONFIG_MEM_SHARING
     case XENMEM_sharing_op:
         return mem_sharing_memop(guest_handle_cast(arg, xen_mem_sharing_op_t));
+#endif
 
     default:
         rc = -ENOSYS;
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index d8f558bc3a..51d1d511f2 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -993,8 +993,10 @@ long subarch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     case XENMEM_paging_op:
         return mem_paging_memop(guest_handle_cast(arg, xen_mem_paging_op_t));
 
+#ifdef CONFIG_MEM_SHARING
     case XENMEM_sharing_op:
         return mem_sharing_memop(guest_handle_cast(arg, xen_mem_sharing_op_t));
+#endif
 
     default:
         rc = -ENOSYS;
diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index c838506241..80575cac10 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -45,9 +45,6 @@ config MEM_ACCESS
 config HAS_MEM_PAGING
 	bool
 
-config HAS_MEM_SHARING
-	bool
-
 config HAS_PDX
 	bool
 
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 88bbe984bc..bb072cf93f 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -926,7 +926,7 @@ static void complete_domain_destroy(struct rcu_head *head)
     xfree(d->vm_event_paging);
 #endif
     xfree(d->vm_event_monitor);
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     xfree(d->vm_event_share);
 #endif
 
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 80728ea57d..6c40dccae9 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -3760,7 +3760,7 @@ void grant_table_init_vcpu(struct vcpu *v)
     v->maptrack_tail = MAPTRACK_TAIL;
 }
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
 int mem_sharing_gref_to_gfn(struct grant_table *gt, grant_ref_t ref,
                             gfn_t *gfn, uint16_t *status)
 {
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 86567e6117..915f2cee1a 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -1676,7 +1676,7 @@ int check_get_page_from_gfn(struct domain *d, gfn_t gfn, bool readonly,
         return -EAGAIN;
     }
 #endif
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     if ( (q & P2M_UNSHARE) && p2m_is_shared(p2mt) )
     {
         if ( page )
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 6e68be47bc..163a671cea 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -544,7 +544,7 @@ static void monitor_notification(struct vcpu *v, unsigned int port)
     vm_event_resume(v->domain, v->domain->vm_event_monitor);
 }
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
 /* Registered with Xen-bound event channel for incoming notifications. */
 static void mem_sharing_notification(struct vcpu *v, unsigned int port)
 {
@@ -574,7 +574,7 @@ void vm_event_cleanup(struct domain *d)
         destroy_waitqueue_head(&d->vm_event_monitor->wq);
         (void)vm_event_disable(d, &d->vm_event_monitor);
     }
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     if ( vm_event_check_ring(d->vm_event_share) )
     {
         destroy_waitqueue_head(&d->vm_event_share->wq);
@@ -720,7 +720,7 @@ int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec,
     }
     break;
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     case XEN_DOMCTL_VM_EVENT_OP_SHARING:
     {
         rc = -EINVAL;
diff --git a/xen/include/asm-x86/mem_sharing.h b/xen/include/asm-x86/mem_sharing.h
index bb19b7534f..8edb8e4cc0 100644
--- a/xen/include/asm-x86/mem_sharing.h
+++ b/xen/include/asm-x86/mem_sharing.h
@@ -24,6 +24,8 @@
 #include <public/domctl.h>
 #include <public/memory.h>
 
+#ifdef CONFIG_MEM_SHARING
+
 /* Auditing of memory sharing code? */
 #ifndef NDEBUG
 #define MEM_SHARING_AUDIT 1
@@ -100,4 +102,30 @@ void mem_sharing_init(void);
  */
 int relinquish_shared_pages(struct domain *d);
 
+#else
+
+static inline unsigned int mem_sharing_get_nr_saved_mfns(void)
+{
+    return 0;
+}
+static inline unsigned int mem_sharing_get_nr_shared_mfns(void)
+{
+    return 0;
+}
+static inline int mem_sharing_unshare_page(struct domain *d,
+                                           unsigned long gfn,
+                                           uint16_t flags)
+{
+    ASSERT_UNREACHABLE();
+    return -EOPNOTSUPP;
+}
+static inline int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
+                              bool allow_sleep)
+{
+    ASSERT_UNREACHABLE();
+    return -EOPNOTSUPP;
+}
+
+#endif
+
 #endif /* __MEM_SHARING_H__ */
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 7dc7e33f73..9c077af8ea 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -127,6 +127,8 @@ struct page_info
         /* For non-pinnable single-page shadows, a higher entry that points
          * at us. */
         paddr_t up;
+
+#ifdef CONFIG_MEM_SHARING
         /* For shared/sharable pages, we use a doubly-linked list
          * of all the {pfn,domain} pairs that map this page. We also include
          * an opaque handle, which is effectively a version, so that clients
@@ -134,6 +136,7 @@ struct page_info
          * This list is allocated and freed when a page is shared/unshared.
          */
         struct page_sharing_info *sharing;
+#endif
     };
 
     /* Reference count and various PGC_xxx flags and fields. */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 748bb0f2f9..17cf8785fb 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -462,7 +462,7 @@ struct domain
     /* Various vm_events */
 
     /* Memory sharing support */
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     struct vm_event_domain *vm_event_share;
 #endif
     /* Memory paging support */
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index e628b1c6af..8afdec9fe8 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -603,7 +603,7 @@ static XSM_INLINE int xsm_mem_paging(XSM_DEFAULT_ARG struct domain *d)
 }
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
 static XSM_INLINE int xsm_mem_sharing(XSM_DEFAULT_ARG struct domain *d)
 {
     XSM_ASSERT_ACTION(XSM_DM_PRIV);
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 8a78d8abd3..8ec6b1a6e8 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -151,7 +151,7 @@ struct xsm_operations {
     int (*mem_paging) (struct domain *d);
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     int (*mem_sharing) (struct domain *d);
 #endif
 
@@ -603,7 +603,7 @@ static inline int xsm_mem_paging (xsm_default_t def, struct domain *d)
 }
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
 static inline int xsm_mem_sharing (xsm_default_t def, struct domain *d)
 {
     return xsm_ops->mem_sharing(d);
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index 1fe0e746fa..6158dce814 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -129,7 +129,7 @@ void __init xsm_fixup_ops (struct xsm_operations *ops)
     set_to_dummy_if_null(ops, mem_paging);
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     set_to_dummy_if_null(ops, mem_sharing);
 #endif
 
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 3d00c747f6..f5f3b42e6e 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1270,7 +1270,7 @@ static int flask_mem_paging(struct domain *d)
 }
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
 static int flask_mem_sharing(struct domain *d)
 {
     return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__MEM_SHARING);
@@ -1838,7 +1838,7 @@ static struct xsm_operations flask_ops = {
     .mem_paging = flask_mem_paging,
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     .mem_sharing = flask_mem_sharing,
 #endif
 
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [Xen-devel] [PATCH v4 4/4] x86/mem_sharing: compile mem_sharing subsystem only when kconfig is enabled
@ 2019-05-02 22:13   ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-02 22:13 UTC (permalink / raw)
  To: xen-devel
  Cc: Tamas K Lengyel, Wei Liu, Konrad Rzeszutek Wilk, George Dunlap,
	Andrew Cooper, Ian Jackson, Tim Deegan, Julien Grall,
	Stefano Stabellini, Jan Beulich, Roger Pau Monne

Disable it by default as it is only an experimental subsystem.

Signed-off-by: Tamas K Lengyel <tamas@tklengyel.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Roger Pau Monne <roger.pau@citrix.com>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Julien Grall <julien.grall@arm.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Tim Deegan <tim@xen.org>
Cc: George Dunlap <george.dunlap@eu.citrix.com>
----
v4: add ASSERT_UNREACHABLE to inlined functions where applicable & other fixups
---
 xen/arch/x86/Kconfig              |  6 +++++-
 xen/arch/x86/domain.c             |  2 ++
 xen/arch/x86/domctl.c             |  2 ++
 xen/arch/x86/mm.c                 |  2 ++
 xen/arch/x86/mm/Makefile          |  2 +-
 xen/arch/x86/x86_64/compat/mm.c   |  2 ++
 xen/arch/x86/x86_64/mm.c          |  2 ++
 xen/common/Kconfig                |  3 ---
 xen/common/domain.c               |  2 +-
 xen/common/grant_table.c          |  2 +-
 xen/common/memory.c               |  2 +-
 xen/common/vm_event.c             |  6 +++---
 xen/include/asm-x86/mem_sharing.h | 28 ++++++++++++++++++++++++++++
 xen/include/asm-x86/mm.h          |  3 +++
 xen/include/xen/sched.h           |  2 +-
 xen/include/xsm/dummy.h           |  2 +-
 xen/include/xsm/xsm.h             |  4 ++--
 xen/xsm/dummy.c                   |  2 +-
 xen/xsm/flask/hooks.c             |  4 ++--
 19 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
index 4b8b07b549..600ca5c12e 100644
--- a/xen/arch/x86/Kconfig
+++ b/xen/arch/x86/Kconfig
@@ -17,7 +17,6 @@ config X86
 	select HAS_KEXEC
 	select MEM_ACCESS_ALWAYS_ON
 	select HAS_MEM_PAGING
-	select HAS_MEM_SHARING
 	select HAS_NS16550
 	select HAS_PASSTHROUGH
 	select HAS_PCI
@@ -198,6 +197,11 @@ config PV_SHIM_EXCLUSIVE
 	  firmware, and will not function correctly in other scenarios.
 
 	  If unsure, say N.
+
+config MEM_SHARING
+	bool "Xen memory sharing support" if EXPERT = "y"
+	depends on HVM
+
 endmenu
 
 source "common/Kconfig"
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index d2d9f2fc3c..474df8433b 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2046,6 +2046,7 @@ int domain_relinquish_resources(struct domain *d)
             d->arch.auto_unmask = 0;
         }
 
+#ifdef CONFIG_MEM_SHARING
     PROGRESS(shared):
 
         if ( is_hvm_domain(d) )
@@ -2056,6 +2057,7 @@ int domain_relinquish_resources(struct domain *d)
             if ( ret )
                 return ret;
         }
+#endif
 
         spin_lock(&d->page_alloc_lock);
         page_list_splice(&d->arch.relmem_list, &d->page_list);
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 9bf2d0820f..bc9e024ccc 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1231,9 +1231,11 @@ long arch_do_domctl(
         break;
     }
 
+#ifdef CONFIG_MEM_SHARING
     case XEN_DOMCTL_mem_sharing_op:
         ret = mem_sharing_domctl(d, &domctl->u.mem_sharing_op);
         break;
+#endif
 
 #if P2M_AUDIT && defined(CONFIG_HVM)
     case XEN_DOMCTL_audit_p2m:
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 45fadbab61..f9f607fb4b 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -368,7 +368,9 @@ void __init arch_init_memory(void)
 
     efi_init_memory();
 
+#ifdef CONFIG_MEM_SHARING
     mem_sharing_init();
+#endif
 
 #ifndef NDEBUG
     if ( highmem_start )
diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
index 5a17646f98..5010a29d6c 100644
--- a/xen/arch/x86/mm/Makefile
+++ b/xen/arch/x86/mm/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_HVM) += guest_walk_2.o guest_walk_3.o guest_walk_4.o
 obj-$(CONFIG_SHADOW_PAGING) += guest_walk_2.o guest_walk_3.o guest_walk_4.o
 obj-$(CONFIG_MEM_ACCESS) += mem_access.o
 obj-y += mem_paging.o
-obj-y += mem_sharing.o
+obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
 obj-y += p2m.o p2m-pt.o
 obj-$(CONFIG_HVM) += p2m-ept.o p2m-pod.o
 obj-y += paging.o
diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
index 32410ed273..d4c6be3032 100644
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -152,8 +152,10 @@ int compat_arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     case XENMEM_paging_op:
         return mem_paging_memop(guest_handle_cast(arg, xen_mem_paging_op_t));
 
+#ifdef CONFIG_MEM_SHARING
     case XENMEM_sharing_op:
         return mem_sharing_memop(guest_handle_cast(arg, xen_mem_sharing_op_t));
+#endif
 
     default:
         rc = -ENOSYS;
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index d8f558bc3a..51d1d511f2 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -993,8 +993,10 @@ long subarch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     case XENMEM_paging_op:
         return mem_paging_memop(guest_handle_cast(arg, xen_mem_paging_op_t));
 
+#ifdef CONFIG_MEM_SHARING
     case XENMEM_sharing_op:
         return mem_sharing_memop(guest_handle_cast(arg, xen_mem_sharing_op_t));
+#endif
 
     default:
         rc = -ENOSYS;
diff --git a/xen/common/Kconfig b/xen/common/Kconfig
index c838506241..80575cac10 100644
--- a/xen/common/Kconfig
+++ b/xen/common/Kconfig
@@ -45,9 +45,6 @@ config MEM_ACCESS
 config HAS_MEM_PAGING
 	bool
 
-config HAS_MEM_SHARING
-	bool
-
 config HAS_PDX
 	bool
 
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 88bbe984bc..bb072cf93f 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -926,7 +926,7 @@ static void complete_domain_destroy(struct rcu_head *head)
     xfree(d->vm_event_paging);
 #endif
     xfree(d->vm_event_monitor);
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     xfree(d->vm_event_share);
 #endif
 
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 80728ea57d..6c40dccae9 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -3760,7 +3760,7 @@ void grant_table_init_vcpu(struct vcpu *v)
     v->maptrack_tail = MAPTRACK_TAIL;
 }
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
 int mem_sharing_gref_to_gfn(struct grant_table *gt, grant_ref_t ref,
                             gfn_t *gfn, uint16_t *status)
 {
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 86567e6117..915f2cee1a 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -1676,7 +1676,7 @@ int check_get_page_from_gfn(struct domain *d, gfn_t gfn, bool readonly,
         return -EAGAIN;
     }
 #endif
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     if ( (q & P2M_UNSHARE) && p2m_is_shared(p2mt) )
     {
         if ( page )
diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
index 6e68be47bc..163a671cea 100644
--- a/xen/common/vm_event.c
+++ b/xen/common/vm_event.c
@@ -544,7 +544,7 @@ static void monitor_notification(struct vcpu *v, unsigned int port)
     vm_event_resume(v->domain, v->domain->vm_event_monitor);
 }
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
 /* Registered with Xen-bound event channel for incoming notifications. */
 static void mem_sharing_notification(struct vcpu *v, unsigned int port)
 {
@@ -574,7 +574,7 @@ void vm_event_cleanup(struct domain *d)
         destroy_waitqueue_head(&d->vm_event_monitor->wq);
         (void)vm_event_disable(d, &d->vm_event_monitor);
     }
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     if ( vm_event_check_ring(d->vm_event_share) )
     {
         destroy_waitqueue_head(&d->vm_event_share->wq);
@@ -720,7 +720,7 @@ int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec,
     }
     break;
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     case XEN_DOMCTL_VM_EVENT_OP_SHARING:
     {
         rc = -EINVAL;
diff --git a/xen/include/asm-x86/mem_sharing.h b/xen/include/asm-x86/mem_sharing.h
index bb19b7534f..8edb8e4cc0 100644
--- a/xen/include/asm-x86/mem_sharing.h
+++ b/xen/include/asm-x86/mem_sharing.h
@@ -24,6 +24,8 @@
 #include <public/domctl.h>
 #include <public/memory.h>
 
+#ifdef CONFIG_MEM_SHARING
+
 /* Auditing of memory sharing code? */
 #ifndef NDEBUG
 #define MEM_SHARING_AUDIT 1
@@ -100,4 +102,30 @@ void mem_sharing_init(void);
  */
 int relinquish_shared_pages(struct domain *d);
 
+#else
+
+static inline unsigned int mem_sharing_get_nr_saved_mfns(void)
+{
+    return 0;
+}
+static inline unsigned int mem_sharing_get_nr_shared_mfns(void)
+{
+    return 0;
+}
+static inline int mem_sharing_unshare_page(struct domain *d,
+                                           unsigned long gfn,
+                                           uint16_t flags)
+{
+    ASSERT_UNREACHABLE();
+    return -EOPNOTSUPP;
+}
+static inline int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
+                              bool allow_sleep)
+{
+    ASSERT_UNREACHABLE();
+    return -EOPNOTSUPP;
+}
+
+#endif
+
 #endif /* __MEM_SHARING_H__ */
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 7dc7e33f73..9c077af8ea 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -127,6 +127,8 @@ struct page_info
         /* For non-pinnable single-page shadows, a higher entry that points
          * at us. */
         paddr_t up;
+
+#ifdef CONFIG_MEM_SHARING
         /* For shared/sharable pages, we use a doubly-linked list
          * of all the {pfn,domain} pairs that map this page. We also include
          * an opaque handle, which is effectively a version, so that clients
@@ -134,6 +136,7 @@ struct page_info
          * This list is allocated and freed when a page is shared/unshared.
          */
         struct page_sharing_info *sharing;
+#endif
     };
 
     /* Reference count and various PGC_xxx flags and fields. */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 748bb0f2f9..17cf8785fb 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -462,7 +462,7 @@ struct domain
     /* Various vm_events */
 
     /* Memory sharing support */
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     struct vm_event_domain *vm_event_share;
 #endif
     /* Memory paging support */
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index e628b1c6af..8afdec9fe8 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -603,7 +603,7 @@ static XSM_INLINE int xsm_mem_paging(XSM_DEFAULT_ARG struct domain *d)
 }
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
 static XSM_INLINE int xsm_mem_sharing(XSM_DEFAULT_ARG struct domain *d)
 {
     XSM_ASSERT_ACTION(XSM_DM_PRIV);
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 8a78d8abd3..8ec6b1a6e8 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -151,7 +151,7 @@ struct xsm_operations {
     int (*mem_paging) (struct domain *d);
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     int (*mem_sharing) (struct domain *d);
 #endif
 
@@ -603,7 +603,7 @@ static inline int xsm_mem_paging (xsm_default_t def, struct domain *d)
 }
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
 static inline int xsm_mem_sharing (xsm_default_t def, struct domain *d)
 {
     return xsm_ops->mem_sharing(d);
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index 1fe0e746fa..6158dce814 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -129,7 +129,7 @@ void __init xsm_fixup_ops (struct xsm_operations *ops)
     set_to_dummy_if_null(ops, mem_paging);
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     set_to_dummy_if_null(ops, mem_sharing);
 #endif
 
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 3d00c747f6..f5f3b42e6e 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1270,7 +1270,7 @@ static int flask_mem_paging(struct domain *d)
 }
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
 static int flask_mem_sharing(struct domain *d)
 {
     return current_has_perm(d, SECCLASS_DOMAIN2, DOMAIN2__MEM_SHARING);
@@ -1838,7 +1838,7 @@ static struct xsm_operations flask_ops = {
     .mem_paging = flask_mem_paging,
 #endif
 
-#ifdef CONFIG_HAS_MEM_SHARING
+#ifdef CONFIG_MEM_SHARING
     .mem_sharing = flask_mem_sharing,
 #endif
 
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released
@ 2019-05-03  8:12   ` Jan Beulich
  0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2019-05-03  8:12 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

>>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> @@ -1002,7 +989,10 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>      /* Free the client page */
>      if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
>          put_page(cpage);
> -    put_page(cpage);
> +
> +    BUG_ON(!put_count);
> +    while ( put_count-- )
> +        put_page_and_type(cpage);

Strictly speaking I think the BUG_ON() should be moved ahead of the
if() in context, so that a problematic put_page() would not get
executed in the first place (even if the system is to die soon after).

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released
@ 2019-05-03  8:12   ` Jan Beulich
  0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2019-05-03  8:12 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

>>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> @@ -1002,7 +989,10 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>      /* Free the client page */
>      if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
>          put_page(cpage);
> -    put_page(cpage);
> +
> +    BUG_ON(!put_count);
> +    while ( put_count-- )
> +        put_page_and_type(cpage);

Strictly speaking I think the BUG_ON() should be moved ahead of the
if() in context, so that a problematic put_page() would not get
executed in the first place (even if the system is to die soon after).

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 4/4] x86/mem_sharing: compile mem_sharing subsystem only when kconfig is enabled
@ 2019-05-03  8:18     ` Jan Beulich
  0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2019-05-03  8:18 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, Roger Pau Monne

>>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -368,7 +368,9 @@ void __init arch_init_memory(void)
>  
>      efi_init_memory();
>  
> +#ifdef CONFIG_MEM_SHARING
>      mem_sharing_init();
> +#endif

While for domctl code and alike using #ifdef may indeed be the
better choice, I think here an inline stub to avoid the #ifdef
would be preferable. Then again - recall you've already ack-ed
my patch to drop the function altogether? Perhaps you should
base your patch on mine (or Andrew could pull that other patch
into x86-next)? In that case (with the hunk above simply
dropped)
Acked-by: Jan Beulich <jbeulich@suse.com>

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 4/4] x86/mem_sharing: compile mem_sharing subsystem only when kconfig is enabled
@ 2019-05-03  8:18     ` Jan Beulich
  0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2019-05-03  8:18 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, Roger Pau Monne

>>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -368,7 +368,9 @@ void __init arch_init_memory(void)
>  
>      efi_init_memory();
>  
> +#ifdef CONFIG_MEM_SHARING
>      mem_sharing_init();
> +#endif

While for domctl code and alike using #ifdef may indeed be the
better choice, I think here an inline stub to avoid the #ifdef
would be preferable. Then again - recall you've already ack-ed
my patch to drop the function altogether? Perhaps you should
base your patch on mine (or Andrew could pull that other patch
into x86-next)? In that case (with the hunk above simply
dropped)
Acked-by: Jan Beulich <jbeulich@suse.com>

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-03  8:27     ` Jan Beulich
  0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2019-05-03  8:27 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

>>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
>  
>  #endif /* MEM_SHARING_AUDIT */
>  
> -static inline int mem_sharing_page_lock(struct page_info *pg)
> +/*
> + * Private implementations of page_lock/unlock to bypass PV-only
> + * sanity checks not applicable to mem-sharing.
> + */
> +static inline bool _page_lock(struct page_info *page)
>  {
> -    int rc;
> +    unsigned long x, nx;
> +
> +    do {
> +        while ( (x = page->u.inuse.type_info) & PGT_locked )
> +            cpu_relax();
> +        nx = x + (1 | PGT_locked);
> +        if ( !(x & PGT_validated) ||
> +             !(x & PGT_count_mask) ||
> +             !(nx & PGT_count_mask) )
> +            return false;

Just for my own understanding: Did you verify that the PGT_validated
check is indeed needed here, or did you copy it "just in case"? In the
latter case a comment may be worthwhile.

Furthermore, are there any mem-sharing specific checks reasonable
to do here in place of the PV ones you want to avoid? Like pages
making it here only ever being of PGT_shared_page type?

> --- a/xen/include/asm-x86/mm.h
> +++ b/xen/include/asm-x86/mm.h
> @@ -356,24 +356,12 @@ struct platform_bad_page {
>  const struct platform_bad_page *get_platform_badpages(unsigned int 
> *array_size);
>  
>  /* Per page locks:
> - * page_lock() is used for two purposes: pte serialization, and memory sharing.
> + * page_lock() is used for pte serialization.
>   *
>   * All users of page lock for pte serialization live in mm.c, use it
>   * to lock a page table page during pte updates, do not take other locks ithin
>   * the critical section delimited by page_lock/unlock, and perform no
>   * nesting.
> - *
> - * All users of page lock for memory sharing live in mm/mem_sharing.c. Page_lock
> - * is used in memory sharing to protect addition (share) and removal (unshare)
> - * of (gfn,domain) tupples to a list of gfn's that the shared page is currently
> - * backing. Nesting may happen when sharing (and locking) two pages -- deadlock
> - * is avoided by locking pages in increasing order.
> - * All memory sharing code paths take the p2m lock of the affected gfn before
> - * taking the lock for the underlying page. We enforce ordering between page_lock
> - * and p2m_lock using an mm-locks.h construct.
> - *
> - * These two users (pte serialization and memory sharing) do not collide, since
> - * sharing is only supported for hvm guests, which do not perform pv pte updates.
>   */
>  int page_lock(struct page_info *page);
>  void page_unlock(struct page_info *page);

I think it would be helpful to retain (in a slightly adjusted form) the last
sentence of the comment above, to clarify that the PGT_locked uses
are now what does not end up colliding. At this occasion "which do not
perform pv pte updates" would also better be re-worded to e.g.
"which do not have PV PTEs updated" (as PVH Dom0 is very much
expected to issue PV page table operations for PV DomU-s).

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-03  8:27     ` Jan Beulich
  0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2019-05-03  8:27 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

>>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
>  
>  #endif /* MEM_SHARING_AUDIT */
>  
> -static inline int mem_sharing_page_lock(struct page_info *pg)
> +/*
> + * Private implementations of page_lock/unlock to bypass PV-only
> + * sanity checks not applicable to mem-sharing.
> + */
> +static inline bool _page_lock(struct page_info *page)
>  {
> -    int rc;
> +    unsigned long x, nx;
> +
> +    do {
> +        while ( (x = page->u.inuse.type_info) & PGT_locked )
> +            cpu_relax();
> +        nx = x + (1 | PGT_locked);
> +        if ( !(x & PGT_validated) ||
> +             !(x & PGT_count_mask) ||
> +             !(nx & PGT_count_mask) )
> +            return false;

Just for my own understanding: Did you verify that the PGT_validated
check is indeed needed here, or did you copy it "just in case"? In the
latter case a comment may be worthwhile.

Furthermore, are there any mem-sharing specific checks reasonable
to do here in place of the PV ones you want to avoid? Like pages
making it here only ever being of PGT_shared_page type?

> --- a/xen/include/asm-x86/mm.h
> +++ b/xen/include/asm-x86/mm.h
> @@ -356,24 +356,12 @@ struct platform_bad_page {
>  const struct platform_bad_page *get_platform_badpages(unsigned int 
> *array_size);
>  
>  /* Per page locks:
> - * page_lock() is used for two purposes: pte serialization, and memory sharing.
> + * page_lock() is used for pte serialization.
>   *
>   * All users of page lock for pte serialization live in mm.c, use it
>   * to lock a page table page during pte updates, do not take other locks ithin
>   * the critical section delimited by page_lock/unlock, and perform no
>   * nesting.
> - *
> - * All users of page lock for memory sharing live in mm/mem_sharing.c. Page_lock
> - * is used in memory sharing to protect addition (share) and removal (unshare)
> - * of (gfn,domain) tupples to a list of gfn's that the shared page is currently
> - * backing. Nesting may happen when sharing (and locking) two pages -- deadlock
> - * is avoided by locking pages in increasing order.
> - * All memory sharing code paths take the p2m lock of the affected gfn before
> - * taking the lock for the underlying page. We enforce ordering between page_lock
> - * and p2m_lock using an mm-locks.h construct.
> - *
> - * These two users (pte serialization and memory sharing) do not collide, since
> - * sharing is only supported for hvm guests, which do not perform pv pte updates.
>   */
>  int page_lock(struct page_info *page);
>  void page_unlock(struct page_info *page);

I think it would be helpful to retain (in a slightly adjusted form) the last
sentence of the comment above, to clarify that the PGT_locked uses
are now what does not end up colliding. At this occasion "which do not
perform pv pte updates" would also better be re-worded to e.g.
"which do not have PV PTEs updated" (as PVH Dom0 is very much
expected to issue PV page table operations for PV DomU-s).

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-03 13:43       ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-03 13:43 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

On Fri, May 3, 2019 at 2:27 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> > --- a/xen/arch/x86/mm/mem_sharing.c
> > +++ b/xen/arch/x86/mm/mem_sharing.c
> > @@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
> >
> >  #endif /* MEM_SHARING_AUDIT */
> >
> > -static inline int mem_sharing_page_lock(struct page_info *pg)
> > +/*
> > + * Private implementations of page_lock/unlock to bypass PV-only
> > + * sanity checks not applicable to mem-sharing.
> > + */
> > +static inline bool _page_lock(struct page_info *page)
> >  {
> > -    int rc;
> > +    unsigned long x, nx;
> > +
> > +    do {
> > +        while ( (x = page->u.inuse.type_info) & PGT_locked )
> > +            cpu_relax();
> > +        nx = x + (1 | PGT_locked);
> > +        if ( !(x & PGT_validated) ||
> > +             !(x & PGT_count_mask) ||
> > +             !(nx & PGT_count_mask) )
> > +            return false;
>
> Just for my own understanding: Did you verify that the PGT_validated
> check is indeed needed here, or did you copy it "just in case"? In the
> latter case a comment may be worthwhile.

This is an exact copy of page_lock, sans the asserts that break it
from mem_sharing. I didn't investigate which of these flags are
necessary for mem_sharing. Frankly, I don't fully understand their
meaning and I haven't came across documentation about it yet. I can
certainly add a comment saying TODO: figure out which of these flags
are actually needed.

> Furthermore, are there any mem-sharing specific checks reasonable
> to do here in place of the PV ones you want to avoid? Like pages
> making it here only ever being of PGT_shared_page type?

There are checks already in place after the lock is taken where
necessary. Those checks can't be moved in here because they don't
apply to all cases uniformly - for example, we also take the lock for
when the page type is being converted between shared and not shared.

> > --- a/xen/include/asm-x86/mm.h
> > +++ b/xen/include/asm-x86/mm.h
> > @@ -356,24 +356,12 @@ struct platform_bad_page {
> >  const struct platform_bad_page *get_platform_badpages(unsigned int
> > *array_size);
> >
> >  /* Per page locks:
> > - * page_lock() is used for two purposes: pte serialization, and memory sharing.
> > + * page_lock() is used for pte serialization.
> >   *
> >   * All users of page lock for pte serialization live in mm.c, use it
> >   * to lock a page table page during pte updates, do not take other locks ithin
> >   * the critical section delimited by page_lock/unlock, and perform no
> >   * nesting.
> > - *
> > - * All users of page lock for memory sharing live in mm/mem_sharing.c. Page_lock
> > - * is used in memory sharing to protect addition (share) and removal (unshare)
> > - * of (gfn,domain) tupples to a list of gfn's that the shared page is currently
> > - * backing. Nesting may happen when sharing (and locking) two pages -- deadlock
> > - * is avoided by locking pages in increasing order.
> > - * All memory sharing code paths take the p2m lock of the affected gfn before
> > - * taking the lock for the underlying page. We enforce ordering between page_lock
> > - * and p2m_lock using an mm-locks.h construct.
> > - *
> > - * These two users (pte serialization and memory sharing) do not collide, since
> > - * sharing is only supported for hvm guests, which do not perform pv pte updates.
> >   */
> >  int page_lock(struct page_info *page);
> >  void page_unlock(struct page_info *page);
>
> I think it would be helpful to retain (in a slightly adjusted form) the last
> sentence of the comment above, to clarify that the PGT_locked uses
> are now what does not end up colliding. At this occasion "which do not
> perform pv pte updates" would also better be re-worded to e.g.
> "which do not have PV PTEs updated" (as PVH Dom0 is very much
> expected to issue PV page table operations for PV DomU-s).

Sure.

Thanks,
Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-03 13:43       ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-03 13:43 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

On Fri, May 3, 2019 at 2:27 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> > --- a/xen/arch/x86/mm/mem_sharing.c
> > +++ b/xen/arch/x86/mm/mem_sharing.c
> > @@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
> >
> >  #endif /* MEM_SHARING_AUDIT */
> >
> > -static inline int mem_sharing_page_lock(struct page_info *pg)
> > +/*
> > + * Private implementations of page_lock/unlock to bypass PV-only
> > + * sanity checks not applicable to mem-sharing.
> > + */
> > +static inline bool _page_lock(struct page_info *page)
> >  {
> > -    int rc;
> > +    unsigned long x, nx;
> > +
> > +    do {
> > +        while ( (x = page->u.inuse.type_info) & PGT_locked )
> > +            cpu_relax();
> > +        nx = x + (1 | PGT_locked);
> > +        if ( !(x & PGT_validated) ||
> > +             !(x & PGT_count_mask) ||
> > +             !(nx & PGT_count_mask) )
> > +            return false;
>
> Just for my own understanding: Did you verify that the PGT_validated
> check is indeed needed here, or did you copy it "just in case"? In the
> latter case a comment may be worthwhile.

This is an exact copy of page_lock, sans the asserts that break it
from mem_sharing. I didn't investigate which of these flags are
necessary for mem_sharing. Frankly, I don't fully understand their
meaning and I haven't came across documentation about it yet. I can
certainly add a comment saying TODO: figure out which of these flags
are actually needed.

> Furthermore, are there any mem-sharing specific checks reasonable
> to do here in place of the PV ones you want to avoid? Like pages
> making it here only ever being of PGT_shared_page type?

There are checks already in place after the lock is taken where
necessary. Those checks can't be moved in here because they don't
apply to all cases uniformly - for example, we also take the lock for
when the page type is being converted between shared and not shared.

> > --- a/xen/include/asm-x86/mm.h
> > +++ b/xen/include/asm-x86/mm.h
> > @@ -356,24 +356,12 @@ struct platform_bad_page {
> >  const struct platform_bad_page *get_platform_badpages(unsigned int
> > *array_size);
> >
> >  /* Per page locks:
> > - * page_lock() is used for two purposes: pte serialization, and memory sharing.
> > + * page_lock() is used for pte serialization.
> >   *
> >   * All users of page lock for pte serialization live in mm.c, use it
> >   * to lock a page table page during pte updates, do not take other locks ithin
> >   * the critical section delimited by page_lock/unlock, and perform no
> >   * nesting.
> > - *
> > - * All users of page lock for memory sharing live in mm/mem_sharing.c. Page_lock
> > - * is used in memory sharing to protect addition (share) and removal (unshare)
> > - * of (gfn,domain) tupples to a list of gfn's that the shared page is currently
> > - * backing. Nesting may happen when sharing (and locking) two pages -- deadlock
> > - * is avoided by locking pages in increasing order.
> > - * All memory sharing code paths take the p2m lock of the affected gfn before
> > - * taking the lock for the underlying page. We enforce ordering between page_lock
> > - * and p2m_lock using an mm-locks.h construct.
> > - *
> > - * These two users (pte serialization and memory sharing) do not collide, since
> > - * sharing is only supported for hvm guests, which do not perform pv pte updates.
> >   */
> >  int page_lock(struct page_info *page);
> >  void page_unlock(struct page_info *page);
>
> I think it would be helpful to retain (in a slightly adjusted form) the last
> sentence of the comment above, to clarify that the PGT_locked uses
> are now what does not end up colliding. At this occasion "which do not
> perform pv pte updates" would also better be re-worded to e.g.
> "which do not have PV PTEs updated" (as PVH Dom0 is very much
> expected to issue PV page table operations for PV DomU-s).

Sure.

Thanks,
Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 4/4] x86/mem_sharing: compile mem_sharing subsystem only when kconfig is enabled
@ 2019-05-03 13:46       ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-03 13:46 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, Roger Pau Monne

On Fri, May 3, 2019 at 2:18 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> > --- a/xen/arch/x86/mm.c
> > +++ b/xen/arch/x86/mm.c
> > @@ -368,7 +368,9 @@ void __init arch_init_memory(void)
> >
> >      efi_init_memory();
> >
> > +#ifdef CONFIG_MEM_SHARING
> >      mem_sharing_init();
> > +#endif
>
> While for domctl code and alike using #ifdef may indeed be the
> better choice, I think here an inline stub to avoid the #ifdef
> would be preferable. Then again - recall you've already ack-ed
> my patch to drop the function altogether? Perhaps you should
> base your patch on mine (or Andrew could pull that other patch
> into x86-next)? In that case (with the hunk above simply
> dropped)
> Acked-by: Jan Beulich <jbeulich@suse.com>

Yes, that hunk can be simply dropped if your patch makes it in before
this does. That would be the optimal route.

Thanks,
Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 4/4] x86/mem_sharing: compile mem_sharing subsystem only when kconfig is enabled
@ 2019-05-03 13:46       ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-03 13:46 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Stefano Stabellini, Wei Liu, Konrad Rzeszutek Wilk,
	George Dunlap, Andrew Cooper, Ian Jackson, Tim Deegan,
	Julien Grall, xen-devel, Roger Pau Monne

On Fri, May 3, 2019 at 2:18 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> > --- a/xen/arch/x86/mm.c
> > +++ b/xen/arch/x86/mm.c
> > @@ -368,7 +368,9 @@ void __init arch_init_memory(void)
> >
> >      efi_init_memory();
> >
> > +#ifdef CONFIG_MEM_SHARING
> >      mem_sharing_init();
> > +#endif
>
> While for domctl code and alike using #ifdef may indeed be the
> better choice, I think here an inline stub to avoid the #ifdef
> would be preferable. Then again - recall you've already ack-ed
> my patch to drop the function altogether? Perhaps you should
> base your patch on mine (or Andrew could pull that other patch
> into x86-next)? In that case (with the hunk above simply
> dropped)
> Acked-by: Jan Beulich <jbeulich@suse.com>

Yes, that hunk can be simply dropped if your patch makes it in before
this does. That would be the optimal route.

Thanks,
Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released
@ 2019-05-03 13:48     ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-03 13:48 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

On Fri, May 3, 2019 at 2:12 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> > @@ -1002,7 +989,10 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >      /* Free the client page */
> >      if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
> >          put_page(cpage);
> > -    put_page(cpage);
> > +
> > +    BUG_ON(!put_count);
> > +    while ( put_count-- )
> > +        put_page_and_type(cpage);
>
> Strictly speaking I think the BUG_ON() should be moved ahead of the
> if() in context, so that a problematic put_page() would not get
> executed in the first place (even if the system is to die soon after).

I don't follow - where is the problematic put_page()? And why is it problematic?

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released
@ 2019-05-03 13:48     ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-03 13:48 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

On Fri, May 3, 2019 at 2:12 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> > @@ -1002,7 +989,10 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >      /* Free the client page */
> >      if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
> >          put_page(cpage);
> > -    put_page(cpage);
> > +
> > +    BUG_ON(!put_count);
> > +    while ( put_count-- )
> > +        put_page_and_type(cpage);
>
> Strictly speaking I think the BUG_ON() should be moved ahead of the
> if() in context, so that a problematic put_page() would not get
> executed in the first place (even if the system is to die soon after).

I don't follow - where is the problematic put_page()? And why is it problematic?

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released
@ 2019-05-03 13:55       ` Jan Beulich
  0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2019-05-03 13:55 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

>>> On 03.05.19 at 15:48, <tamas@tklengyel.com> wrote:
> On Fri, May 3, 2019 at 2:12 AM Jan Beulich <JBeulich@suse.com> wrote:
>>
>> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
>> > @@ -1002,7 +989,10 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>> >      /* Free the client page */
>> >      if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
>> >          put_page(cpage);

This should be after ...

>> > -    put_page(cpage);
>> > +
>> > +    BUG_ON(!put_count);

... this, because ...

>> > +    while ( put_count-- )
>> > +        put_page_and_type(cpage);
>>
>> Strictly speaking I think the BUG_ON() should be moved ahead of the
>> if() in context, so that a problematic put_page() would not get
>> executed in the first place (even if the system is to die soon after).
> 
> I don't follow - where is the problematic put_page()? And why is it 
> problematic?

... if indeed the BUG_ON() triggers, then it should do so before
potentially putting the last ref of a page which shouldn't be put
that way.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released
@ 2019-05-03 13:55       ` Jan Beulich
  0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2019-05-03 13:55 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

>>> On 03.05.19 at 15:48, <tamas@tklengyel.com> wrote:
> On Fri, May 3, 2019 at 2:12 AM Jan Beulich <JBeulich@suse.com> wrote:
>>
>> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
>> > @@ -1002,7 +989,10 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
>> >      /* Free the client page */
>> >      if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
>> >          put_page(cpage);

This should be after ...

>> > -    put_page(cpage);
>> > +
>> > +    BUG_ON(!put_count);

... this, because ...

>> > +    while ( put_count-- )
>> > +        put_page_and_type(cpage);
>>
>> Strictly speaking I think the BUG_ON() should be moved ahead of the
>> if() in context, so that a problematic put_page() would not get
>> executed in the first place (even if the system is to die soon after).
> 
> I don't follow - where is the problematic put_page()? And why is it 
> problematic?

... if indeed the BUG_ON() triggers, then it should do so before
potentially putting the last ref of a page which shouldn't be put
that way.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-03 13:57         ` Jan Beulich
  0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2019-05-03 13:57 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

>>> On 03.05.19 at 15:43, <tamas@tklengyel.com> wrote:
> On Fri, May 3, 2019 at 2:27 AM Jan Beulich <JBeulich@suse.com> wrote:
>>
>> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
>> > --- a/xen/arch/x86/mm/mem_sharing.c
>> > +++ b/xen/arch/x86/mm/mem_sharing.c
>> > @@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
>> >
>> >  #endif /* MEM_SHARING_AUDIT */
>> >
>> > -static inline int mem_sharing_page_lock(struct page_info *pg)
>> > +/*
>> > + * Private implementations of page_lock/unlock to bypass PV-only
>> > + * sanity checks not applicable to mem-sharing.
>> > + */
>> > +static inline bool _page_lock(struct page_info *page)
>> >  {
>> > -    int rc;
>> > +    unsigned long x, nx;
>> > +
>> > +    do {
>> > +        while ( (x = page->u.inuse.type_info) & PGT_locked )
>> > +            cpu_relax();
>> > +        nx = x + (1 | PGT_locked);
>> > +        if ( !(x & PGT_validated) ||
>> > +             !(x & PGT_count_mask) ||
>> > +             !(nx & PGT_count_mask) )
>> > +            return false;
>>
>> Just for my own understanding: Did you verify that the PGT_validated
>> check is indeed needed here, or did you copy it "just in case"? In the
>> latter case a comment may be worthwhile.
> 
> This is an exact copy of page_lock, sans the asserts that break it
> from mem_sharing. I didn't investigate which of these flags are
> necessary for mem_sharing. Frankly, I don't fully understand their
> meaning and I haven't came across documentation about it yet. I can
> certainly add a comment saying TODO: figure out which of these flags
> are actually needed.

Yes something along these lines is what I'm after. But "these flags"
really is just PGT_validated. There's no question the PGT_locked
and PGT_count_mask operations need to remain as they are.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-03 13:57         ` Jan Beulich
  0 siblings, 0 replies; 30+ messages in thread
From: Jan Beulich @ 2019-05-03 13:57 UTC (permalink / raw)
  To: Tamas K Lengyel
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

>>> On 03.05.19 at 15:43, <tamas@tklengyel.com> wrote:
> On Fri, May 3, 2019 at 2:27 AM Jan Beulich <JBeulich@suse.com> wrote:
>>
>> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
>> > --- a/xen/arch/x86/mm/mem_sharing.c
>> > +++ b/xen/arch/x86/mm/mem_sharing.c
>> > @@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
>> >
>> >  #endif /* MEM_SHARING_AUDIT */
>> >
>> > -static inline int mem_sharing_page_lock(struct page_info *pg)
>> > +/*
>> > + * Private implementations of page_lock/unlock to bypass PV-only
>> > + * sanity checks not applicable to mem-sharing.
>> > + */
>> > +static inline bool _page_lock(struct page_info *page)
>> >  {
>> > -    int rc;
>> > +    unsigned long x, nx;
>> > +
>> > +    do {
>> > +        while ( (x = page->u.inuse.type_info) & PGT_locked )
>> > +            cpu_relax();
>> > +        nx = x + (1 | PGT_locked);
>> > +        if ( !(x & PGT_validated) ||
>> > +             !(x & PGT_count_mask) ||
>> > +             !(nx & PGT_count_mask) )
>> > +            return false;
>>
>> Just for my own understanding: Did you verify that the PGT_validated
>> check is indeed needed here, or did you copy it "just in case"? In the
>> latter case a comment may be worthwhile.
> 
> This is an exact copy of page_lock, sans the asserts that break it
> from mem_sharing. I didn't investigate which of these flags are
> necessary for mem_sharing. Frankly, I don't fully understand their
> meaning and I haven't came across documentation about it yet. I can
> certainly add a comment saying TODO: figure out which of these flags
> are actually needed.

Yes something along these lines is what I'm after. But "these flags"
really is just PGT_validated. There's no question the PGT_locked
and PGT_count_mask operations need to remain as they are.

Jan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released
@ 2019-05-03 14:14         ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-03 14:14 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

On Fri, May 3, 2019 at 7:56 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> >>> On 03.05.19 at 15:48, <tamas@tklengyel.com> wrote:
> > On Fri, May 3, 2019 at 2:12 AM Jan Beulich <JBeulich@suse.com> wrote:
> >>
> >> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> >> > @@ -1002,7 +989,10 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >> >      /* Free the client page */
> >> >      if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
> >> >          put_page(cpage);
>
> This should be after ...
>
> >> > -    put_page(cpage);
> >> > +
> >> > +    BUG_ON(!put_count);
>
> ... this, because ...
>
> >> > +    while ( put_count-- )
> >> > +        put_page_and_type(cpage);
> >>
> >> Strictly speaking I think the BUG_ON() should be moved ahead of the
> >> if() in context, so that a problematic put_page() would not get
> >> executed in the first place (even if the system is to die soon after).
> >
> > I don't follow - where is the problematic put_page()? And why is it
> > problematic?
>
> ... if indeed the BUG_ON() triggers, then it should do so before
> potentially putting the last ref of a page which shouldn't be put
> that way.

I see, thanks.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released
@ 2019-05-03 14:14         ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-03 14:14 UTC (permalink / raw)
  To: Jan Beulich
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

On Fri, May 3, 2019 at 7:56 AM Jan Beulich <JBeulich@suse.com> wrote:
>
> >>> On 03.05.19 at 15:48, <tamas@tklengyel.com> wrote:
> > On Fri, May 3, 2019 at 2:12 AM Jan Beulich <JBeulich@suse.com> wrote:
> >>
> >> >>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> >> > @@ -1002,7 +989,10 @@ static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
> >> >      /* Free the client page */
> >> >      if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
> >> >          put_page(cpage);
>
> This should be after ...
>
> >> > -    put_page(cpage);
> >> > +
> >> > +    BUG_ON(!put_count);
>
> ... this, because ...
>
> >> > +    while ( put_count-- )
> >> > +        put_page_and_type(cpage);
> >>
> >> Strictly speaking I think the BUG_ON() should be moved ahead of the
> >> if() in context, so that a problematic put_page() would not get
> >> executed in the first place (even if the system is to die soon after).
> >
> > I don't follow - where is the problematic put_page()? And why is it
> > problematic?
>
> ... if indeed the BUG_ON() triggers, then it should do so before
> potentially putting the last ref of a page which shouldn't be put
> that way.

I see, thanks.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-03 14:35           ` George Dunlap
  0 siblings, 0 replies; 30+ messages in thread
From: George Dunlap @ 2019-05-03 14:35 UTC (permalink / raw)
  To: Jan Beulich, Tamas K Lengyel
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

On 5/3/19 2:57 PM, Jan Beulich wrote:
>>>> On 03.05.19 at 15:43, <tamas@tklengyel.com> wrote:
>> On Fri, May 3, 2019 at 2:27 AM Jan Beulich <JBeulich@suse.com> wrote:
>>>
>>>>>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
>>>> --- a/xen/arch/x86/mm/mem_sharing.c
>>>> +++ b/xen/arch/x86/mm/mem_sharing.c
>>>> @@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
>>>>
>>>>  #endif /* MEM_SHARING_AUDIT */
>>>>
>>>> -static inline int mem_sharing_page_lock(struct page_info *pg)
>>>> +/*
>>>> + * Private implementations of page_lock/unlock to bypass PV-only
>>>> + * sanity checks not applicable to mem-sharing.
>>>> + */
>>>> +static inline bool _page_lock(struct page_info *page)
>>>>  {
>>>> -    int rc;
>>>> +    unsigned long x, nx;
>>>> +
>>>> +    do {
>>>> +        while ( (x = page->u.inuse.type_info) & PGT_locked )
>>>> +            cpu_relax();
>>>> +        nx = x + (1 | PGT_locked);
>>>> +        if ( !(x & PGT_validated) ||
>>>> +             !(x & PGT_count_mask) ||
>>>> +             !(nx & PGT_count_mask) )
>>>> +            return false;
>>>
>>> Just for my own understanding: Did you verify that the PGT_validated
>>> check is indeed needed here, or did you copy it "just in case"? In the
>>> latter case a comment may be worthwhile.
>>
>> This is an exact copy of page_lock, sans the asserts that break it
>> from mem_sharing. I didn't investigate which of these flags are
>> necessary for mem_sharing. Frankly, I don't fully understand their
>> meaning and I haven't came across documentation about it yet.

I hope to add some documentation sometime in the next 6 months or so.
I've submitted a talk on the refcounting system to the XenSummit as well.

Short answer: PGT_validated means that the page in question has been
validated as safe to use as the designated type.  For PGT_writable, this
is instantaneous (i.e., the type is set to PGT_writable with the
PGT_validated bit set in the same cmpxchg operation).

Other types (such as PGT_l*_table) actually take time to verify, and so
you need to first change the type to the new type (so nobody tries to
use it as yet a different type), then verify that it's OK to use it as a
page table (by recursively checking all the entries), then set the
PGT_validated bit.

> Yes something along these lines is what I'm after. But "these flags"
> really is just PGT_validated.

Here's my take:

The sorts of "needs validation" types are PV-only I believe; if
mem_sharing is only for HVM guests, then the types should only be
PGT_writable, for which PGT_validated should always be set.

*If* we somehow do get to a situation where the type count > 0 but
PGT_validated is clear, something has probably gone terribly wrong; and
it would probably be much safer to just fail the page lock.

But perhaps that implies there should also be an ASSERT(!(x &
PGT_validated))?

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-03 14:35           ` George Dunlap
  0 siblings, 0 replies; 30+ messages in thread
From: George Dunlap @ 2019-05-03 14:35 UTC (permalink / raw)
  To: Jan Beulich, Tamas K Lengyel
  Cc: George Dunlap, Andrew Cooper, Wei Liu, xen-devel, Roger Pau Monne

On 5/3/19 2:57 PM, Jan Beulich wrote:
>>>> On 03.05.19 at 15:43, <tamas@tklengyel.com> wrote:
>> On Fri, May 3, 2019 at 2:27 AM Jan Beulich <JBeulich@suse.com> wrote:
>>>
>>>>>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
>>>> --- a/xen/arch/x86/mm/mem_sharing.c
>>>> +++ b/xen/arch/x86/mm/mem_sharing.c
>>>> @@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
>>>>
>>>>  #endif /* MEM_SHARING_AUDIT */
>>>>
>>>> -static inline int mem_sharing_page_lock(struct page_info *pg)
>>>> +/*
>>>> + * Private implementations of page_lock/unlock to bypass PV-only
>>>> + * sanity checks not applicable to mem-sharing.
>>>> + */
>>>> +static inline bool _page_lock(struct page_info *page)
>>>>  {
>>>> -    int rc;
>>>> +    unsigned long x, nx;
>>>> +
>>>> +    do {
>>>> +        while ( (x = page->u.inuse.type_info) & PGT_locked )
>>>> +            cpu_relax();
>>>> +        nx = x + (1 | PGT_locked);
>>>> +        if ( !(x & PGT_validated) ||
>>>> +             !(x & PGT_count_mask) ||
>>>> +             !(nx & PGT_count_mask) )
>>>> +            return false;
>>>
>>> Just for my own understanding: Did you verify that the PGT_validated
>>> check is indeed needed here, or did you copy it "just in case"? In the
>>> latter case a comment may be worthwhile.
>>
>> This is an exact copy of page_lock, sans the asserts that break it
>> from mem_sharing. I didn't investigate which of these flags are
>> necessary for mem_sharing. Frankly, I don't fully understand their
>> meaning and I haven't came across documentation about it yet.

I hope to add some documentation sometime in the next 6 months or so.
I've submitted a talk on the refcounting system to the XenSummit as well.

Short answer: PGT_validated means that the page in question has been
validated as safe to use as the designated type.  For PGT_writable, this
is instantaneous (i.e., the type is set to PGT_writable with the
PGT_validated bit set in the same cmpxchg operation).

Other types (such as PGT_l*_table) actually take time to verify, and so
you need to first change the type to the new type (so nobody tries to
use it as yet a different type), then verify that it's OK to use it as a
page table (by recursively checking all the entries), then set the
PGT_validated bit.

> Yes something along these lines is what I'm after. But "these flags"
> really is just PGT_validated.

Here's my take:

The sorts of "needs validation" types are PV-only I believe; if
mem_sharing is only for HVM guests, then the types should only be
PGT_writable, for which PGT_validated should always be set.

*If* we somehow do get to a situation where the type count > 0 but
PGT_validated is clear, something has probably gone terribly wrong; and
it would probably be much safer to just fail the page lock.

But perhaps that implies there should also be an ASSERT(!(x &
PGT_validated))?

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-03 15:23             ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-03 15:23 UTC (permalink / raw)
  To: George Dunlap
  Cc: Wei Liu, George Dunlap, Andrew Cooper, Jan Beulich, xen-devel,
	Roger Pau Monne

On Fri, May 3, 2019 at 8:35 AM George Dunlap <george.dunlap@citrix.com> wrote:
>
> On 5/3/19 2:57 PM, Jan Beulich wrote:
> >>>> On 03.05.19 at 15:43, <tamas@tklengyel.com> wrote:
> >> On Fri, May 3, 2019 at 2:27 AM Jan Beulich <JBeulich@suse.com> wrote:
> >>>
> >>>>>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> >>>> --- a/xen/arch/x86/mm/mem_sharing.c
> >>>> +++ b/xen/arch/x86/mm/mem_sharing.c
> >>>> @@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
> >>>>
> >>>>  #endif /* MEM_SHARING_AUDIT */
> >>>>
> >>>> -static inline int mem_sharing_page_lock(struct page_info *pg)
> >>>> +/*
> >>>> + * Private implementations of page_lock/unlock to bypass PV-only
> >>>> + * sanity checks not applicable to mem-sharing.
> >>>> + */
> >>>> +static inline bool _page_lock(struct page_info *page)
> >>>>  {
> >>>> -    int rc;
> >>>> +    unsigned long x, nx;
> >>>> +
> >>>> +    do {
> >>>> +        while ( (x = page->u.inuse.type_info) & PGT_locked )
> >>>> +            cpu_relax();
> >>>> +        nx = x + (1 | PGT_locked);
> >>>> +        if ( !(x & PGT_validated) ||
> >>>> +             !(x & PGT_count_mask) ||
> >>>> +             !(nx & PGT_count_mask) )
> >>>> +            return false;
> >>>
> >>> Just for my own understanding: Did you verify that the PGT_validated
> >>> check is indeed needed here, or did you copy it "just in case"? In the
> >>> latter case a comment may be worthwhile.
> >>
> >> This is an exact copy of page_lock, sans the asserts that break it
> >> from mem_sharing. I didn't investigate which of these flags are
> >> necessary for mem_sharing. Frankly, I don't fully understand their
> >> meaning and I haven't came across documentation about it yet.
>
> I hope to add some documentation sometime in the next 6 months or so.
> I've submitted a talk on the refcounting system to the XenSummit as well.

Great, looking forward to it!

>
> Short answer: PGT_validated means that the page in question has been
> validated as safe to use as the designated type.  For PGT_writable, this
> is instantaneous (i.e., the type is set to PGT_writable with the
> PGT_validated bit set in the same cmpxchg operation).
>
> Other types (such as PGT_l*_table) actually take time to verify, and so
> you need to first change the type to the new type (so nobody tries to
> use it as yet a different type), then verify that it's OK to use it as a
> page table (by recursively checking all the entries), then set the
> PGT_validated bit.
>
> > Yes something along these lines is what I'm after. But "these flags"
> > really is just PGT_validated.
>
> Here's my take:
>
> The sorts of "needs validation" types are PV-only I believe; if
> mem_sharing is only for HVM guests, then the types should only be
> PGT_writable, for which PGT_validated should always be set.

In which case it does sound like it's a superfluous check but it's
also harmless.

>
> *If* we somehow do get to a situation where the type count > 0 but
> PGT_validated is clear, something has probably gone terribly wrong; and
> it would probably be much safer to just fail the page lock.

Isn't that exactly what happens?

if ( !(x & PGT_validated) ||
             !(x & PGT_count_mask) ||
             !(nx & PGT_count_mask) )
            return false;

>
> But perhaps that implies there should also be an ASSERT(!(x &
> PGT_validated))?

Well, if the lock failed we already BUG out when it's for runtime
deduplication and return error when it's an operation from toolstack.
So I don't think we need that extra ASSERT.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [Xen-devel] [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr
@ 2019-05-03 15:23             ` Tamas K Lengyel
  0 siblings, 0 replies; 30+ messages in thread
From: Tamas K Lengyel @ 2019-05-03 15:23 UTC (permalink / raw)
  To: George Dunlap
  Cc: Wei Liu, George Dunlap, Andrew Cooper, Jan Beulich, xen-devel,
	Roger Pau Monne

On Fri, May 3, 2019 at 8:35 AM George Dunlap <george.dunlap@citrix.com> wrote:
>
> On 5/3/19 2:57 PM, Jan Beulich wrote:
> >>>> On 03.05.19 at 15:43, <tamas@tklengyel.com> wrote:
> >> On Fri, May 3, 2019 at 2:27 AM Jan Beulich <JBeulich@suse.com> wrote:
> >>>
> >>>>>> On 03.05.19 at 00:13, <tamas@tklengyel.com> wrote:
> >>>> --- a/xen/arch/x86/mm/mem_sharing.c
> >>>> +++ b/xen/arch/x86/mm/mem_sharing.c
> >>>> @@ -112,13 +112,48 @@ static inline void page_sharing_dispose(struct page_info *page)
> >>>>
> >>>>  #endif /* MEM_SHARING_AUDIT */
> >>>>
> >>>> -static inline int mem_sharing_page_lock(struct page_info *pg)
> >>>> +/*
> >>>> + * Private implementations of page_lock/unlock to bypass PV-only
> >>>> + * sanity checks not applicable to mem-sharing.
> >>>> + */
> >>>> +static inline bool _page_lock(struct page_info *page)
> >>>>  {
> >>>> -    int rc;
> >>>> +    unsigned long x, nx;
> >>>> +
> >>>> +    do {
> >>>> +        while ( (x = page->u.inuse.type_info) & PGT_locked )
> >>>> +            cpu_relax();
> >>>> +        nx = x + (1 | PGT_locked);
> >>>> +        if ( !(x & PGT_validated) ||
> >>>> +             !(x & PGT_count_mask) ||
> >>>> +             !(nx & PGT_count_mask) )
> >>>> +            return false;
> >>>
> >>> Just for my own understanding: Did you verify that the PGT_validated
> >>> check is indeed needed here, or did you copy it "just in case"? In the
> >>> latter case a comment may be worthwhile.
> >>
> >> This is an exact copy of page_lock, sans the asserts that break it
> >> from mem_sharing. I didn't investigate which of these flags are
> >> necessary for mem_sharing. Frankly, I don't fully understand their
> >> meaning and I haven't came across documentation about it yet.
>
> I hope to add some documentation sometime in the next 6 months or so.
> I've submitted a talk on the refcounting system to the XenSummit as well.

Great, looking forward to it!

>
> Short answer: PGT_validated means that the page in question has been
> validated as safe to use as the designated type.  For PGT_writable, this
> is instantaneous (i.e., the type is set to PGT_writable with the
> PGT_validated bit set in the same cmpxchg operation).
>
> Other types (such as PGT_l*_table) actually take time to verify, and so
> you need to first change the type to the new type (so nobody tries to
> use it as yet a different type), then verify that it's OK to use it as a
> page table (by recursively checking all the entries), then set the
> PGT_validated bit.
>
> > Yes something along these lines is what I'm after. But "these flags"
> > really is just PGT_validated.
>
> Here's my take:
>
> The sorts of "needs validation" types are PV-only I believe; if
> mem_sharing is only for HVM guests, then the types should only be
> PGT_writable, for which PGT_validated should always be set.

In which case it does sound like it's a superfluous check but it's
also harmless.

>
> *If* we somehow do get to a situation where the type count > 0 but
> PGT_validated is clear, something has probably gone terribly wrong; and
> it would probably be much safer to just fail the page lock.

Isn't that exactly what happens?

if ( !(x & PGT_validated) ||
             !(x & PGT_count_mask) ||
             !(nx & PGT_count_mask) )
            return false;

>
> But perhaps that implies there should also be an ASSERT(!(x &
> PGT_validated))?

Well, if the lock failed we already BUG out when it's for runtime
deduplication and return error when it's an operation from toolstack.
So I don't think we need that extra ASSERT.

Tamas

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2019-05-03 15:24 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-02 22:13 [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released Tamas K Lengyel
2019-05-02 22:13 ` [Xen-devel] " Tamas K Lengyel
2019-05-02 22:13 ` [PATCH v4 2/4] x86/mem_sharing: copy a page_lock version to be internal to memshr Tamas K Lengyel
2019-05-02 22:13   ` [Xen-devel] " Tamas K Lengyel
2019-05-03  8:27   ` Jan Beulich
2019-05-03  8:27     ` [Xen-devel] " Jan Beulich
2019-05-03 13:43     ` Tamas K Lengyel
2019-05-03 13:43       ` [Xen-devel] " Tamas K Lengyel
2019-05-03 13:57       ` Jan Beulich
2019-05-03 13:57         ` [Xen-devel] " Jan Beulich
2019-05-03 14:35         ` George Dunlap
2019-05-03 14:35           ` [Xen-devel] " George Dunlap
2019-05-03 15:23           ` Tamas K Lengyel
2019-05-03 15:23             ` [Xen-devel] " Tamas K Lengyel
2019-05-02 22:13 ` [PATCH v4 3/4] x86/mem_sharing: enable mem_share audit mode only in debug builds Tamas K Lengyel
2019-05-02 22:13   ` [Xen-devel] " Tamas K Lengyel
2019-05-02 22:13 ` [PATCH v4 4/4] x86/mem_sharing: compile mem_sharing subsystem only when kconfig is enabled Tamas K Lengyel
2019-05-02 22:13   ` [Xen-devel] " Tamas K Lengyel
2019-05-03  8:18   ` Jan Beulich
2019-05-03  8:18     ` [Xen-devel] " Jan Beulich
2019-05-03 13:46     ` Tamas K Lengyel
2019-05-03 13:46       ` [Xen-devel] " Tamas K Lengyel
2019-05-03  8:12 ` [PATCH v4 1/4] x86/mem_sharing: reorder when pages are unlocked and released Jan Beulich
2019-05-03  8:12   ` [Xen-devel] " Jan Beulich
2019-05-03 13:48   ` Tamas K Lengyel
2019-05-03 13:48     ` [Xen-devel] " Tamas K Lengyel
2019-05-03 13:55     ` Jan Beulich
2019-05-03 13:55       ` [Xen-devel] " Jan Beulich
2019-05-03 14:14       ` Tamas K Lengyel
2019-05-03 14:14         ` [Xen-devel] " Tamas K Lengyel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.