All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tamas K Lengyel <tamas.lengyel@intel.com>
To: xen-devel@lists.xenproject.org
Cc: "Tamas K Lengyel" <tamas.lengyel@intel.com>,
	"Tamas K Lengyel" <tamas@tklengyel.com>,
	"Jan Beulich" <jbeulich@suse.com>,
	"Andrew Cooper" <andrew.cooper3@citrix.com>,
	"George Dunlap" <george.dunlap@citrix.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>, "Wei Liu" <wl@xen.org>
Subject: [PATCH 2/2] x86/hap: Resolve mm-lock order violations when forking VMs with nested p2m
Date: Mon,  4 Jan 2021 09:41:27 -0800	[thread overview]
Message-ID: <19aab6220bf191a31902488ed38c51d239572269.1609781242.git.tamas.lengyel@intel.com> (raw)
In-Reply-To: <6d5ca8a57a2745e933f00706bff306844611f64d.1609781242.git.tamas.lengyel@intel.com>

Several lock-order violations have been encountered while attempting to fork
VMs with nestedhvm=1 set. This patch resolves the issues.

The order violations stems from a call to p2m_flush_nestedp2m being performed
whenever the hostp2m changes. This functions always takes the p2m lock for the
nested_p2m. However, with sharing the p2m locks always have to be taken before
the sharing lock. To resolve this issue we avoid taking the sharing lock where
possible (and was actually unecessary to begin with). But we also make
p2m_flush_nestedp2m aware that the p2m lock may have already been taken and
preemptively take all nested_p2m locks before unsharing a page where taking the
sharing lock is necessary.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 xen/arch/x86/mm/mem_sharing.c | 43 +++++++++++++++++++++++------------
 xen/arch/x86/mm/p2m.c         | 11 ++++++++-
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
index 4a02c7d6f2..e2f3f50eef 100644
--- a/xen/arch/x86/mm/mem_sharing.c
+++ b/xen/arch/x86/mm/mem_sharing.c
@@ -39,6 +39,7 @@
 #include <asm/event.h>
 #include <asm/hap.h>
 #include <asm/hvm/hvm.h>
+#include <asm/hvm/nestedhvm.h>
 #include <xsm/xsm.h>
 
 #include <public/hvm/params.h>
@@ -893,13 +894,11 @@ static int nominate_page(struct domain *d, gfn_t gfn,
         goto out;
 
     /*
-     * Now that the page is validated, we can lock it. There is no
-     * race because we're holding the p2m entry, so no one else
-     * could be nominating this gfn.
+     * Now that the page is validated, we can make it shared. There is no race
+     * because we're holding the p2m entry, so no one else could be nominating
+     * this gfn & and it is evidently not yet shared with any other VM, thus we
+     * don't need to take the mem_sharing_page_lock here.
      */
-    ret = -ENOENT;
-    if ( !mem_sharing_page_lock(page) )
-        goto out;
 
     /* Initialize the shared state */
     ret = -ENOMEM;
@@ -935,7 +934,6 @@ static int nominate_page(struct domain *d, gfn_t gfn,
 
     *phandle = page->sharing->handle;
     audit_add_list(page);
-    mem_sharing_page_unlock(page);
     ret = 0;
 
 out:
@@ -1214,7 +1212,7 @@ int __mem_sharing_unshare_page(struct domain *d,
     p2m_type_t p2mt;
     mfn_t mfn;
     struct page_info *page, *old_page;
-    int last_gfn;
+    int last_gfn, rc = 0;
     gfn_info_t *gfn_info = NULL;
 
     mfn = get_gfn(d, gfn, &p2mt);
@@ -1226,6 +1224,15 @@ int __mem_sharing_unshare_page(struct domain *d,
         return 0;
     }
 
+    /* lock nested p2ms to avoid lock-order violation */
+    if ( unlikely(nestedhvm_enabled(d)) )
+    {
+        int i;
+
+        for ( i = 0; i < MAX_NESTEDP2M; i++ )
+            p2m_lock(d->arch.nested_p2m[i]);
+    }
+
     page = __grab_shared_page(mfn);
     if ( page == NULL )
     {
@@ -1276,9 +1283,7 @@ int __mem_sharing_unshare_page(struct domain *d,
             put_page_alloc_ref(page);
 
         put_page_and_type(page);
-        put_gfn(d, gfn);
-
-        return 0;
+        goto out;
     }
 
     if ( last_gfn )
@@ -1295,12 +1300,12 @@ int __mem_sharing_unshare_page(struct domain *d,
         /* Undo dec of nr_saved_mfns, as the retry will decrease again. */
         atomic_inc(&nr_saved_mfns);
         mem_sharing_page_unlock(old_page);
-        put_gfn(d, gfn);
         /*
          * Caller is responsible for placing an event
          * in the ring.
          */
-        return -ENOMEM;
+        rc = -ENOMEM;
+        goto out;
     }
 
     copy_domain_page(page_to_mfn(page), page_to_mfn(old_page));
@@ -1327,8 +1332,18 @@ int __mem_sharing_unshare_page(struct domain *d,
      */
     paging_mark_dirty(d, page_to_mfn(page));
     /* We do not need to unlock a private page */
+
+ out:
+    if ( unlikely(nestedhvm_enabled(d)) )
+    {
+        int i;
+
+        for ( i = 0; i < MAX_NESTEDP2M; i++ )
+            p2m_unlock(d->arch.nested_p2m[i]);
+    }
+
     put_gfn(d, gfn);
-    return 0;
+    return rc;
 }
 
 int relinquish_shared_pages(struct domain *d)
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index ad4bb94514..79a2b6762b 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -1598,8 +1598,17 @@ void
 p2m_flush_nestedp2m(struct domain *d)
 {
     int i;
+    struct p2m_domain *p2m;
+
     for ( i = 0; i < MAX_NESTEDP2M; i++ )
-        p2m_flush_table(d->arch.nested_p2m[i]);
+    {
+        p2m = d->arch.nested_p2m[i];
+
+        if ( p2m_locked_by_me(p2m) )
+            p2m_flush_table_locked(p2m);
+        else
+            p2m_flush_table(p2m);
+    }
 }
 
 void np2m_flush_base(struct vcpu *v, unsigned long np2m_base)
-- 
2.25.1



  reply	other threads:[~2021-01-04 17:41 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-04 17:41 [PATCH 1/2] x86/mem_sharing: copy cpuid during vm forking Tamas K Lengyel
2021-01-04 17:41 ` Tamas K Lengyel [this message]
2021-01-06 12:03   ` [PATCH 2/2] x86/hap: Resolve mm-lock order violations when forking VMs with nested p2m Jan Beulich
2021-01-06 15:29     ` Tamas K Lengyel
2021-01-06 16:11       ` Jan Beulich
2021-01-06 16:26         ` Tamas K Lengyel
2021-01-07 12:25           ` Jan Beulich
2021-01-07 12:43             ` Tamas K Lengyel
2021-01-07 12:56               ` Jan Beulich
2021-01-07 13:27                 ` Tamas K Lengyel
2021-01-05  8:40 ` [PATCH 1/2] x86/mem_sharing: copy cpuid during vm forking Jan Beulich
2021-01-05 11:04 ` Andrew Cooper
2021-01-05 15:50   ` Lengyel, Tamas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=19aab6220bf191a31902488ed38c51d239572269.1609781242.git.tamas.lengyel@intel.com \
    --to=tamas.lengyel@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=george.dunlap@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=roger.pau@citrix.com \
    --cc=tamas@tklengyel.com \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.