All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
@ 2017-03-24 13:24 Razvan Cojocaru
  2017-03-28  9:14 ` Razvan Cojocaru
  0 siblings, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-24 13:24 UTC (permalink / raw)
  To: xen-devel; +Cc: andrew.cooper3, paul.durrant, Razvan Cojocaru, jbeulich

hvmemul_cmpxchg() is no presently SMP-safe, as it ends up doing
a memcpy(). Use an actual CMPXCHG instruction in the implementation.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>

---
Questions:
 - I've used __cmpxchg(), as it's already there, but it always locks.
   In my tests and in x86_emulate()'s context, the lock is already taken
   anyway, so it doesn't appear to make a difference at present. Have I
   missed a case where it does, or should we add a lock prefix anyway
   for the future? If so, should I use another version of cmpxchg()
   that we already have in the source code but I've missed, modify this
   one, bring another implementation in?
 - Is there anything I've missed in the implementation that could crash
   the host / guest / cause trouble?
 - In the introspection context, before this change a RETRY return in
   hvm_emulate_one_vm_event() would do nothing - when emulating because
   of a mem_access vm_event, this would mean that the VCPU would then
   simply try to execute the instruction again, which would trigger
   another mem_access event (which would hopefully this time trigger
   a successful insn emulation). However, with these changes, this
   would mean that, in a SMP scenario where a VCPU failed it's
   CMPXCHG, the instruction would be re-executed in the guest, which
   I've found leads to bad things happening (mostly BSODs). I've
   mitigated this in this patch by trying to emulate the failed
   instruction in a loop until it succeeds, but I'm not convinced it
   is a good idea. What are the alternatives?
---
 xen/arch/x86/hvm/emulate.c | 230 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 181 insertions(+), 49 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 2d92957..b5754a1 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -20,6 +20,7 @@
 #include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/ioreq.h>
+#include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/svm/svm.h>
@@ -1029,6 +1030,77 @@ static int hvmemul_wbinvd_discard(
     return X86EMUL_OKAY;
 }
 
+static int hvmemul_vaddr_to_mfn(
+    unsigned long addr,
+    mfn_t *mfn,
+    uint32_t pfec,
+    struct x86_emulate_ctxt *ctxt)
+{
+    paddr_t gpa = addr & ~PAGE_MASK;
+    struct page_info *page;
+    p2m_type_t p2mt;
+    unsigned long gfn;
+    struct vcpu *curr = current;
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+
+    gfn = paging_gva_to_gfn(curr, addr, &pfec);
+
+    if ( gfn == gfn_x(INVALID_GFN) )
+    {
+        pagefault_info_t pfinfo = {};
+
+        if ( ( pfec & PFEC_page_paged ) || ( pfec & PFEC_page_shared ) )
+            return X86EMUL_RETRY;
+
+        pfinfo.linear = addr;
+        pfinfo.ec = pfec;
+
+        x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt);
+        return X86EMUL_EXCEPTION;
+    }
+
+    gpa |= (paddr_t)gfn << PAGE_SHIFT;
+
+    /*
+     * No need to do the P2M lookup for internally handled MMIO, benefiting
+     * - 32-bit WinXP (& older Windows) on AMD CPUs for LAPIC accesses,
+     * - newer Windows (like Server 2012) for HPET accesses.
+     */
+    if ( !nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa) )
+        return X86EMUL_UNHANDLEABLE;
+
+    page = get_page_from_gfn(curr->domain, gfn, &p2mt, P2M_UNSHARE);
+
+    if ( !page )
+        return X86EMUL_UNHANDLEABLE;
+
+    if ( p2m_is_paging(p2mt) )
+    {
+        put_page(page);
+        p2m_mem_paging_populate(curr->domain, gfn);
+        return X86EMUL_RETRY;
+    }
+
+    if ( p2m_is_shared(p2mt) )
+    {
+        put_page(page);
+        return X86EMUL_RETRY;
+    }
+
+    if ( p2m_is_grant(p2mt) )
+    {
+        put_page(page);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    *mfn = _mfn(page_to_mfn(page));
+
+    put_page(page);
+
+    return X86EMUL_OKAY;
+}
+
 static int hvmemul_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
@@ -1037,8 +1109,70 @@ static int hvmemul_cmpxchg(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    /* Fix this in case the guest is really relying on r-m-w atomicity. */
-    return hvmemul_write(seg, offset, p_new, bytes, ctxt);
+    unsigned long addr, reps = 1;
+    int rc = X86EMUL_OKAY;
+    unsigned long old = 0, new = 0;
+    uint32_t pfec = PFEC_page_present | PFEC_write_access;
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    mfn_t mfn[2];
+    void *map = NULL;
+    struct domain *currd = current->domain;
+
+    if ( is_x86_system_segment(seg) )
+        pfec |= PFEC_implicit;
+    else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+        pfec |= PFEC_user_mode;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr);
+
+    if ( rc != X86EMUL_OKAY || !bytes )
+        return rc;
+
+    rc = hvmemul_vaddr_to_mfn(addr, &mfn[0], pfec, ctxt);
+
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    if ( likely(((addr + bytes - 1) & PAGE_MASK) == (addr & PAGE_MASK)) )
+    {
+        /* Whole write fits on a single page. */
+        mfn[1] = INVALID_MFN;
+        map = map_domain_page(mfn[0]);
+    }
+    else
+    {
+        rc = hvmemul_vaddr_to_mfn((addr + bytes - 1) & PAGE_MASK, &mfn[1],
+                                  pfec, ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+
+        map = vmap(mfn, 2);
+    }
+
+    if ( !map )
+        return X86EMUL_UNHANDLEABLE;
+
+    map += (addr & ~PAGE_MASK);
+
+    memcpy(&old, p_old, bytes);
+    memcpy(&new, p_new, bytes);
+
+    if ( __cmpxchg(map, old, new, bytes) != old )
+        rc = X86EMUL_RETRY;
+
+    paging_mark_dirty(currd, mfn[0]);
+
+    if ( unlikely(mfn_valid(mfn[1])) )
+    {
+        paging_mark_dirty(currd, mfn[1]);
+        vunmap((void *)((unsigned long)map & PAGE_MASK));
+    }
+    else
+        unmap_domain_page(map);
+
+    return rc;
 }
 
 static int hvmemul_validate(
@@ -1961,59 +2095,57 @@ int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
 void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
     unsigned int errcode)
 {
-    struct hvm_emulate_ctxt ctx = {{ 0 }};
-    int rc;
+    int rc = X86EMUL_OKAY;
 
-    hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
+    do {
+        struct hvm_emulate_ctxt ctx = {{ 0 }};
 
-    switch ( kind )
-    {
-    case EMUL_KIND_NOWRITE:
-        rc = _hvm_emulate_one(&ctx, &hvm_emulate_ops_no_write);
-        break;
-    case EMUL_KIND_SET_CONTEXT_INSN: {
-        struct vcpu *curr = current;
-        struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+        hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
 
-        BUILD_BUG_ON(sizeof(vio->mmio_insn) !=
-                     sizeof(curr->arch.vm_event->emul.insn.data));
-        ASSERT(!vio->mmio_insn_bytes);
+        switch ( kind )
+        {
+        case EMUL_KIND_NOWRITE:
+            rc = _hvm_emulate_one(&ctx, &hvm_emulate_ops_no_write);
+            break;
+        case EMUL_KIND_SET_CONTEXT_INSN: {
+            struct vcpu *curr = current;
+            struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+
+            BUILD_BUG_ON(sizeof(vio->mmio_insn) !=
+                         sizeof(curr->arch.vm_event->emul.insn.data));
+            ASSERT(!vio->mmio_insn_bytes);
+
+            /*
+             * Stash insn buffer into mmio buffer here instead of ctx
+             * to avoid having to add more logic to hvm_emulate_one.
+             */
+            vio->mmio_insn_bytes = sizeof(vio->mmio_insn);
+            memcpy(vio->mmio_insn, curr->arch.vm_event->emul.insn.data,
+                   vio->mmio_insn_bytes);
+        }
+        /* Fall-through */
+        default:
+            ctx.set_context = (kind == EMUL_KIND_SET_CONTEXT_DATA);
+            rc = hvm_emulate_one(&ctx);
+        }
 
-        /*
-         * Stash insn buffer into mmio buffer here instead of ctx
-         * to avoid having to add more logic to hvm_emulate_one.
-         */
-        vio->mmio_insn_bytes = sizeof(vio->mmio_insn);
-        memcpy(vio->mmio_insn, curr->arch.vm_event->emul.insn.data,
-               vio->mmio_insn_bytes);
-    }
-    /* Fall-through */
-    default:
-        ctx.set_context = (kind == EMUL_KIND_SET_CONTEXT_DATA);
-        rc = hvm_emulate_one(&ctx);
-    }
+        switch ( rc )
+        {
+        case X86EMUL_RETRY:
+            break;
+        case X86EMUL_UNHANDLEABLE:
+            hvm_dump_emulation_state(XENLOG_G_DEBUG "Mem event", &ctx);
+            hvm_inject_hw_exception(trapnr, errcode);
+            break;
+        case X86EMUL_EXCEPTION:
+            if ( ctx.ctxt.event_pending )
+                hvm_inject_event(&ctx.ctxt.event);
+            break;
+        }
 
-    switch ( rc )
-    {
-    case X86EMUL_RETRY:
-        /*
-         * This function is called when handling an EPT-related vm_event
-         * reply. As such, nothing else needs to be done here, since simply
-         * returning makes the current instruction cause a page fault again,
-         * consistent with X86EMUL_RETRY.
-         */
-        return;
-    case X86EMUL_UNHANDLEABLE:
-        hvm_dump_emulation_state(XENLOG_G_DEBUG "Mem event", &ctx);
-        hvm_inject_hw_exception(trapnr, errcode);
-        break;
-    case X86EMUL_EXCEPTION:
-        if ( ctx.ctxt.event_pending )
-            hvm_inject_event(&ctx.ctxt.event);
-        break;
-    }
+        hvm_emulate_writeback(&ctx);
 
-    hvm_emulate_writeback(&ctx);
+    } while( rc == X86EMUL_RETRY );
 }
 
 void hvm_emulate_init_once(
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2017-04-10 10:18 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-24 13:24 [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG Razvan Cojocaru
2017-03-28  9:14 ` Razvan Cojocaru
2017-03-28 10:03   ` Jan Beulich
2017-03-28 10:25     ` Andrew Cooper
2017-03-28 10:44       ` Jan Beulich
2017-03-29  5:59       ` Jan Beulich
2017-03-29  8:14         ` Razvan Cojocaru
2017-03-28 10:27     ` Razvan Cojocaru
2017-03-28 10:47       ` Jan Beulich
2017-03-28 10:50         ` Razvan Cojocaru
2017-03-28 11:32           ` Jan Beulich
2017-03-29 13:55           ` Jan Beulich
2017-03-29 14:00             ` Razvan Cojocaru
2017-03-29 15:04               ` Razvan Cojocaru
2017-03-29 15:49                 ` Razvan Cojocaru
2017-03-30 12:05                   ` Jan Beulich
2017-03-30 12:25                     ` Razvan Cojocaru
2017-03-30 12:56                     ` Razvan Cojocaru
2017-03-30 14:08                       ` Razvan Cojocaru
2017-03-30 14:21                         ` Jan Beulich
2017-03-30 15:05                           ` Razvan Cojocaru
2017-03-30 15:47                             ` Jan Beulich
2017-03-31  6:17                               ` Razvan Cojocaru
2017-03-31  7:34                                 ` Jan Beulich
2017-03-31  9:56                                   ` Razvan Cojocaru
2017-03-31 14:46                                     ` Jan Beulich
2017-03-31 15:01                                       ` Razvan Cojocaru
2017-03-31 15:04                                         ` Jan Beulich
2017-04-01 16:56                                           ` Razvan Cojocaru
2017-04-03 10:23                                             ` Jan Beulich
2017-04-03 18:20                                             ` Razvan Cojocaru
2017-04-03 18:36                                               ` Razvan Cojocaru
2017-04-04  9:07                                                 ` Jan Beulich
2017-04-04 12:01                                                   ` Razvan Cojocaru
2017-04-08 22:15                                                 ` Razvan Cojocaru
2017-04-09 11:03                                                   ` Razvan Cojocaru
2017-04-10 10:18                                                   ` Jan Beulich
2017-03-29 14:12             ` Razvan Cojocaru

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.