[PATCH v2 3/4] x86/vmx: Fix security issue when a guest balloons out the #VE info page

From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Xen-devel <xen-devel@lists.xen.org>
Cc: "Kevin Tian" <kevin.tian@intel.com>,
	"Tamas K Lengyel" <tamas@tklengyel.com>,
	"Wei Liu" <wei.liu2@citrix.com>,
	"Jun Nakajima" <jun.nakajima@intel.com>,
	"Razvan Cojocaru" <rcojocaru@bitdefender.com>,
	"Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Jan Beulich" <JBeulich@suse.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>
Subject: [PATCH v2 3/4] x86/vmx: Fix security issue when a guest balloons out the #VE info page
Date: Thu, 21 Feb 2019 20:18:57 +0000	[thread overview]
Message-ID: <1550780337-638-1-git-send-email-andrew.cooper3@citrix.com> (raw)
In-Reply-To: <1550614715-21161-4-git-send-email-andrew.cooper3@citrix.com>

The logic in altp2m_vcpu_{en,dis}able_ve() and vmx_vcpu_update_vmfunc_ve() is
dangerous.  After #VE has been set up, the guest can balloon out and free the
nominated GFN, after which the processor may write to it.  Also, the unlocked
GFN query means the MFN is stale by the time it is used.  Alternatively, a
guest can race two disable calls to cause one VMCS to still reference the
nominated GFN after the tracking information was dropped.

Rework the logic from scratch to make it safe.

Hold an extra page reference on the underlying frame, to account for the
VMCS's reference.  This means that if the GFN gets ballooned out, it isn't
freed back to Xen until #VE is disabled, and the VMCS no longer refers to the
page.

A consequence of this is that altp2m_vcpu_disable_ve() needs to be called
during the domain_kill() path, to drop the reference for domains which shut
down with #VE still enabled.

For domains using altp2m, we expect a single enable call and no disable for
the remaining lifetime of the domain.  However, to avoid problems with
concurrent calls, use cmpxchg() to locklessly maintain safety.

This doesn't have an XSA because altp2m is not yet a security-supported
feature.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Release-acked-by: Juergen Gross <jgross@suse.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Razvan Cojocaru <rcojocaru@bitdefender.com>
CC: Tamas K Lengyel <tamas@tklengyel.com>
CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>

v2:
 * Reuse domain_relinquish_resources() rather than introducing
   domain_unmap_resources().
 * Use check_get_page_from_gfn() rather than opcoding half of it.
 * Disallow registering the #VE info page over anything which isn't plain RAM
   in the guest.
---
 xen/arch/x86/domain.c          |  7 +++++
 xen/arch/x86/hvm/vmx/vmx.c     | 33 ++++++++++++-----------
 xen/arch/x86/mm/altp2m.c       | 59 ++++++++++++++++++++++++++++++++++++------
 xen/include/asm-x86/hvm/vcpu.h |  7 ++++-
 4 files changed, 80 insertions(+), 26 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 7a29435..b5febd6 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -38,6 +38,7 @@
 #include <xen/livepatch.h>
 #include <public/sysctl.h>
 #include <public/hvm/hvm_vcpu.h>
+#include <asm/altp2m.h>
 #include <asm/regs.h>
 #include <asm/mc146818rtc.h>
 #include <asm/system.h>
@@ -2071,6 +2072,12 @@ int domain_relinquish_resources(struct domain *d)
                 return ret;
         }
 
+        if ( altp2m_active(d) )
+        {
+            for_each_vcpu ( d, v )
+                altp2m_vcpu_disable_ve(v);
+        }
+
         if ( is_pv_domain(d) )
         {
             for_each_vcpu ( d, v )
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 24def93..395bccd 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2196,14 +2196,11 @@ static void vmx_vcpu_update_vmfunc_ve(struct vcpu *v)
 
         if ( cpu_has_vmx_virt_exceptions )
         {
-            p2m_type_t t;
-            mfn_t mfn;
+            const struct page_info *pg = vcpu_altp2m(v).veinfo_pg;
 
-            mfn = get_gfn_query_unlocked(d, gfn_x(vcpu_altp2m(v).veinfo_gfn), &t);
-
-            if ( !mfn_eq(mfn, INVALID_MFN) )
+            if ( pg )
             {
-                __vmwrite(VIRT_EXCEPTION_INFO, mfn_x(mfn) << PAGE_SHIFT);
+                __vmwrite(VIRT_EXCEPTION_INFO, page_to_maddr(pg));
                 /*
                  * Make sure we have an up-to-date EPTP_INDEX when
                  * setting SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS.
@@ -2237,21 +2234,19 @@ static int vmx_vcpu_emulate_vmfunc(const struct cpu_user_regs *regs)
 
 static bool_t vmx_vcpu_emulate_ve(struct vcpu *v)
 {
-    bool_t rc = 0, writable;
-    gfn_t gfn = vcpu_altp2m(v).veinfo_gfn;
+    const struct page_info *pg = vcpu_altp2m(v).veinfo_pg;
     ve_info_t *veinfo;
+    bool rc = false;
 
-    if ( gfn_eq(gfn, INVALID_GFN) )
-        return 0;
+    if ( !pg )
+        return rc;
 
-    veinfo = hvm_map_guest_frame_rw(gfn_x(gfn), 0, &writable);
-    if ( !veinfo )
-        return 0;
-    if ( !writable || veinfo->semaphore != 0 )
-        goto out;
+    veinfo = __map_domain_page(pg);
 
-    rc = 1;
+    if ( veinfo->semaphore != 0 )
+        goto out;
 
+    rc = true;
     veinfo->exit_reason = EXIT_REASON_EPT_VIOLATION;
     veinfo->semaphore = ~0;
     veinfo->eptp_index = vcpu_altp2m(v).p2midx;
@@ -2266,7 +2261,11 @@ static bool_t vmx_vcpu_emulate_ve(struct vcpu *v)
                             X86_EVENT_NO_EC);
 
  out:
-    hvm_unmap_guest_frame(veinfo, 0);
+    unmap_domain_page(veinfo);
+
+    if ( rc )
+        paging_mark_dirty(v->domain, page_to_mfn(pg));
+
     return rc;
 }
 
diff --git a/xen/arch/x86/mm/altp2m.c b/xen/arch/x86/mm/altp2m.c
index 8bdefb0..50768f2 100644
--- a/xen/arch/x86/mm/altp2m.c
+++ b/xen/arch/x86/mm/altp2m.c
@@ -27,7 +27,6 @@ altp2m_vcpu_initialise(struct vcpu *v)
         vcpu_pause(v);
 
     vcpu_altp2m(v).p2midx = 0;
-    vcpu_altp2m(v).veinfo_gfn = INVALID_GFN;
     atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
 
     altp2m_vcpu_update_p2m(v);
@@ -58,25 +57,69 @@ altp2m_vcpu_destroy(struct vcpu *v)
 
 int altp2m_vcpu_enable_ve(struct vcpu *v, gfn_t gfn)
 {
+    struct domain *d = v->domain;
+    struct altp2mvcpu *a = &vcpu_altp2m(v);
     p2m_type_t p2mt;
+    struct page_info *pg;
+    int rc;
+
+    /* Early exit path if #VE is already configured. */
+    if ( a->veinfo_pg )
+        return -EEXIST;
+
+    rc = check_get_page_from_gfn(d, gfn, false, &p2mt, &pg);
+    if ( rc )
+        return rc;
+
+    /*
+     * Looking for a plain piece of guest writeable RAM with isn't a magic
+     * frame such as a grant/ioreq/shared_info/etc mapping.  We (ab)use the
+     * pageable() predicate for this, due to it having the same properties
+     * that we want.
+     */
+    if ( !p2m_is_pageable(p2mt) || is_xen_heap_page(pg) )
+    {
+        rc = -EINVAL;
+        goto err;
+    }
 
-    if ( !gfn_eq(vcpu_altp2m(v).veinfo_gfn, INVALID_GFN) ||
-         mfn_eq(get_gfn_query_unlocked(v->domain, gfn_x(gfn), &p2mt),
-                INVALID_MFN) )
-        return -EINVAL;
+    /*
+     * Update veinfo_pg, making sure to be safe with concurrent hypercalls.
+     * The first caller to make veinfo_pg become non-NULL will program its MFN
+     * into the VMCS, so must not be clobbered.  Callers which lose the race
+     * back off with -EEXIST.
+     */
+    if ( cmpxchg(&a->veinfo_pg, NULL, pg) != NULL )
+    {
+        rc = -EEXIST;
+        goto err;
+    }
 
-    vcpu_altp2m(v).veinfo_gfn = gfn;
     altp2m_vcpu_update_vmfunc_ve(v);
 
     return 0;
+
+ err:
+    put_page(pg);
+
+    return rc;
 }
 
 void altp2m_vcpu_disable_ve(struct vcpu *v)
 {
-    if ( !gfn_eq(vcpu_altp2m(v).veinfo_gfn, INVALID_GFN) )
+    struct altp2mvcpu *a = &vcpu_altp2m(v);
+    struct page_info *pg;
+
+    /*
+     * Update veinfo_pg, making sure to be safe with concurrent hypercalls.
+     * The winner of this race is responsible to update the VMCS to no longer
+     * point at the page, then drop the associated ref.
+     */
+    if ( (pg = xchg(&a->veinfo_pg, NULL)) )
     {
-        vcpu_altp2m(v).veinfo_gfn = INVALID_GFN;
         altp2m_vcpu_update_vmfunc_ve(v);
+
+        put_page(pg);
     }
 }
 
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index c8a40f6..6c84d5a 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -137,8 +137,13 @@ struct nestedvcpu {
 #define vcpu_nestedhvm(v) ((v)->arch.hvm.nvcpu)
 
 struct altp2mvcpu {
+    /*
+     * #VE information page.  This pointer being non-NULL indicates that a
+     * VMCS's VIRT_EXCEPTION_INFO field is pointing to the page, and an extra
+     * page reference is held.
+     */
+    struct page_info *veinfo_pg;
     uint16_t    p2midx;         /* alternate p2m index */
-    gfn_t       veinfo_gfn;     /* #VE information page gfn */
 };
 
 #define vcpu_altp2m(v) ((v)->arch.hvm.avcpu)
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel