All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
@ 2017-03-24 13:24 Razvan Cojocaru
  2017-03-28  9:14 ` Razvan Cojocaru
  0 siblings, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-24 13:24 UTC (permalink / raw)
  To: xen-devel; +Cc: andrew.cooper3, paul.durrant, Razvan Cojocaru, jbeulich

hvmemul_cmpxchg() is no presently SMP-safe, as it ends up doing
a memcpy(). Use an actual CMPXCHG instruction in the implementation.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>

---
Questions:
 - I've used __cmpxchg(), as it's already there, but it always locks.
   In my tests and in x86_emulate()'s context, the lock is already taken
   anyway, so it doesn't appear to make a difference at present. Have I
   missed a case where it does, or should we add a lock prefix anyway
   for the future? If so, should I use another version of cmpxchg()
   that we already have in the source code but I've missed, modify this
   one, bring another implementation in?
 - Is there anything I've missed in the implementation that could crash
   the host / guest / cause trouble?
 - In the introspection context, before this change a RETRY return in
   hvm_emulate_one_vm_event() would do nothing - when emulating because
   of a mem_access vm_event, this would mean that the VCPU would then
   simply try to execute the instruction again, which would trigger
   another mem_access event (which would hopefully this time trigger
   a successful insn emulation). However, with these changes, this
   would mean that, in a SMP scenario where a VCPU failed it's
   CMPXCHG, the instruction would be re-executed in the guest, which
   I've found leads to bad things happening (mostly BSODs). I've
   mitigated this in this patch by trying to emulate the failed
   instruction in a loop until it succeeds, but I'm not convinced it
   is a good idea. What are the alternatives?
---
 xen/arch/x86/hvm/emulate.c | 230 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 181 insertions(+), 49 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 2d92957..b5754a1 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -20,6 +20,7 @@
 #include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/ioreq.h>
+#include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/svm/svm.h>
@@ -1029,6 +1030,77 @@ static int hvmemul_wbinvd_discard(
     return X86EMUL_OKAY;
 }
 
+static int hvmemul_vaddr_to_mfn(
+    unsigned long addr,
+    mfn_t *mfn,
+    uint32_t pfec,
+    struct x86_emulate_ctxt *ctxt)
+{
+    paddr_t gpa = addr & ~PAGE_MASK;
+    struct page_info *page;
+    p2m_type_t p2mt;
+    unsigned long gfn;
+    struct vcpu *curr = current;
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+
+    gfn = paging_gva_to_gfn(curr, addr, &pfec);
+
+    if ( gfn == gfn_x(INVALID_GFN) )
+    {
+        pagefault_info_t pfinfo = {};
+
+        if ( ( pfec & PFEC_page_paged ) || ( pfec & PFEC_page_shared ) )
+            return X86EMUL_RETRY;
+
+        pfinfo.linear = addr;
+        pfinfo.ec = pfec;
+
+        x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt);
+        return X86EMUL_EXCEPTION;
+    }
+
+    gpa |= (paddr_t)gfn << PAGE_SHIFT;
+
+    /*
+     * No need to do the P2M lookup for internally handled MMIO, benefiting
+     * - 32-bit WinXP (& older Windows) on AMD CPUs for LAPIC accesses,
+     * - newer Windows (like Server 2012) for HPET accesses.
+     */
+    if ( !nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa) )
+        return X86EMUL_UNHANDLEABLE;
+
+    page = get_page_from_gfn(curr->domain, gfn, &p2mt, P2M_UNSHARE);
+
+    if ( !page )
+        return X86EMUL_UNHANDLEABLE;
+
+    if ( p2m_is_paging(p2mt) )
+    {
+        put_page(page);
+        p2m_mem_paging_populate(curr->domain, gfn);
+        return X86EMUL_RETRY;
+    }
+
+    if ( p2m_is_shared(p2mt) )
+    {
+        put_page(page);
+        return X86EMUL_RETRY;
+    }
+
+    if ( p2m_is_grant(p2mt) )
+    {
+        put_page(page);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    *mfn = _mfn(page_to_mfn(page));
+
+    put_page(page);
+
+    return X86EMUL_OKAY;
+}
+
 static int hvmemul_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
@@ -1037,8 +1109,70 @@ static int hvmemul_cmpxchg(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    /* Fix this in case the guest is really relying on r-m-w atomicity. */
-    return hvmemul_write(seg, offset, p_new, bytes, ctxt);
+    unsigned long addr, reps = 1;
+    int rc = X86EMUL_OKAY;
+    unsigned long old = 0, new = 0;
+    uint32_t pfec = PFEC_page_present | PFEC_write_access;
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    mfn_t mfn[2];
+    void *map = NULL;
+    struct domain *currd = current->domain;
+
+    if ( is_x86_system_segment(seg) )
+        pfec |= PFEC_implicit;
+    else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+        pfec |= PFEC_user_mode;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr);
+
+    if ( rc != X86EMUL_OKAY || !bytes )
+        return rc;
+
+    rc = hvmemul_vaddr_to_mfn(addr, &mfn[0], pfec, ctxt);
+
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    if ( likely(((addr + bytes - 1) & PAGE_MASK) == (addr & PAGE_MASK)) )
+    {
+        /* Whole write fits on a single page. */
+        mfn[1] = INVALID_MFN;
+        map = map_domain_page(mfn[0]);
+    }
+    else
+    {
+        rc = hvmemul_vaddr_to_mfn((addr + bytes - 1) & PAGE_MASK, &mfn[1],
+                                  pfec, ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+
+        map = vmap(mfn, 2);
+    }
+
+    if ( !map )
+        return X86EMUL_UNHANDLEABLE;
+
+    map += (addr & ~PAGE_MASK);
+
+    memcpy(&old, p_old, bytes);
+    memcpy(&new, p_new, bytes);
+
+    if ( __cmpxchg(map, old, new, bytes) != old )
+        rc = X86EMUL_RETRY;
+
+    paging_mark_dirty(currd, mfn[0]);
+
+    if ( unlikely(mfn_valid(mfn[1])) )
+    {
+        paging_mark_dirty(currd, mfn[1]);
+        vunmap((void *)((unsigned long)map & PAGE_MASK));
+    }
+    else
+        unmap_domain_page(map);
+
+    return rc;
 }
 
 static int hvmemul_validate(
@@ -1961,59 +2095,57 @@ int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
 void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
     unsigned int errcode)
 {
-    struct hvm_emulate_ctxt ctx = {{ 0 }};
-    int rc;
+    int rc = X86EMUL_OKAY;
 
-    hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
+    do {
+        struct hvm_emulate_ctxt ctx = {{ 0 }};
 
-    switch ( kind )
-    {
-    case EMUL_KIND_NOWRITE:
-        rc = _hvm_emulate_one(&ctx, &hvm_emulate_ops_no_write);
-        break;
-    case EMUL_KIND_SET_CONTEXT_INSN: {
-        struct vcpu *curr = current;
-        struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+        hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
 
-        BUILD_BUG_ON(sizeof(vio->mmio_insn) !=
-                     sizeof(curr->arch.vm_event->emul.insn.data));
-        ASSERT(!vio->mmio_insn_bytes);
+        switch ( kind )
+        {
+        case EMUL_KIND_NOWRITE:
+            rc = _hvm_emulate_one(&ctx, &hvm_emulate_ops_no_write);
+            break;
+        case EMUL_KIND_SET_CONTEXT_INSN: {
+            struct vcpu *curr = current;
+            struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+
+            BUILD_BUG_ON(sizeof(vio->mmio_insn) !=
+                         sizeof(curr->arch.vm_event->emul.insn.data));
+            ASSERT(!vio->mmio_insn_bytes);
+
+            /*
+             * Stash insn buffer into mmio buffer here instead of ctx
+             * to avoid having to add more logic to hvm_emulate_one.
+             */
+            vio->mmio_insn_bytes = sizeof(vio->mmio_insn);
+            memcpy(vio->mmio_insn, curr->arch.vm_event->emul.insn.data,
+                   vio->mmio_insn_bytes);
+        }
+        /* Fall-through */
+        default:
+            ctx.set_context = (kind == EMUL_KIND_SET_CONTEXT_DATA);
+            rc = hvm_emulate_one(&ctx);
+        }
 
-        /*
-         * Stash insn buffer into mmio buffer here instead of ctx
-         * to avoid having to add more logic to hvm_emulate_one.
-         */
-        vio->mmio_insn_bytes = sizeof(vio->mmio_insn);
-        memcpy(vio->mmio_insn, curr->arch.vm_event->emul.insn.data,
-               vio->mmio_insn_bytes);
-    }
-    /* Fall-through */
-    default:
-        ctx.set_context = (kind == EMUL_KIND_SET_CONTEXT_DATA);
-        rc = hvm_emulate_one(&ctx);
-    }
+        switch ( rc )
+        {
+        case X86EMUL_RETRY:
+            break;
+        case X86EMUL_UNHANDLEABLE:
+            hvm_dump_emulation_state(XENLOG_G_DEBUG "Mem event", &ctx);
+            hvm_inject_hw_exception(trapnr, errcode);
+            break;
+        case X86EMUL_EXCEPTION:
+            if ( ctx.ctxt.event_pending )
+                hvm_inject_event(&ctx.ctxt.event);
+            break;
+        }
 
-    switch ( rc )
-    {
-    case X86EMUL_RETRY:
-        /*
-         * This function is called when handling an EPT-related vm_event
-         * reply. As such, nothing else needs to be done here, since simply
-         * returning makes the current instruction cause a page fault again,
-         * consistent with X86EMUL_RETRY.
-         */
-        return;
-    case X86EMUL_UNHANDLEABLE:
-        hvm_dump_emulation_state(XENLOG_G_DEBUG "Mem event", &ctx);
-        hvm_inject_hw_exception(trapnr, errcode);
-        break;
-    case X86EMUL_EXCEPTION:
-        if ( ctx.ctxt.event_pending )
-            hvm_inject_event(&ctx.ctxt.event);
-        break;
-    }
+        hvm_emulate_writeback(&ctx);
 
-    hvm_emulate_writeback(&ctx);
+    } while( rc == X86EMUL_RETRY );
 }
 
 void hvm_emulate_init_once(
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-24 13:24 [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG Razvan Cojocaru
@ 2017-03-28  9:14 ` Razvan Cojocaru
  2017-03-28 10:03   ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-28  9:14 UTC (permalink / raw)
  To: xen-devel; +Cc: andrew.cooper3, paul.durrant, jbeulich

The real CMPXCHG hvmemul_cmpxchg() implementation works as expected as
far the race conditions go, and returning RETRY for failed writebacks
seems to work without issue for regular Xen emulation.

However, when the patch is used with introspection, I've had a BCCode:
101 BSOD and rare (but several) occasions when the guest becomes
unresponsive (can't close Firefox or have the Windows start menu show up
when clicking the "Start" button, but the guest is otherwise running).

This I believe is due to the do { } while ( rc == X86EMUL_RETRY ); loop
in hvm_emulate_one_vm_event(): I am basically now looping behing the
guest's back until the CMPXCHG succeeds, which can theoretically be a
very long time to execute a CMPXCHG instruction, and most likely not
what the guest OS is expecting.

The alternative (and the current default) is to do nothing on
X86EMUL_RETRY and just allow the guest to re-enter in the same place,
which should trigger the same page fault vm_event, which can hopefully
now be able to emulate the current instruction. However, in the best
case scenario, this just complicates the above loop since the current
instruction will still be unable to complete until emulation succeeds
but this time with VMEXITs. And in the worst case scenario (which is
what happens in my tests) this adds an additional factor of
unpredictability, since the guest quickly BSODs (or rarely just plain
hangs).

I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
failed CMPXCHG should happen just once, with the proper registers and ZF
set. The guest surely expects neither that the instruction resume until
it succeeds, nor that some hidden loop goes on for an undeterminate
ammount of time until a CMPXCHG succeeds.

The picture is further complicated by the two-part handling of
instructions in x86_emulate(). For example, for CMPXCHG we have:

case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */
    /* Save real source value, then compare EAX against destination. */
    src.orig_val = src.val;
    src.val = _regs.r(ax);
    /* cmp: %%eax - dst ==> dst and src swapped for macro invocation */
    emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
    if ( _regs.eflags & X86_EFLAGS_ZF )
    {
        /* Success: write back to memory. */
        dst.val = src.orig_val;
    }
    else
    {
        /* Failure: write the value we saw to EAX. */
        dst.type = OP_REG;
        dst.reg  = (unsigned long *)&_regs.r(ax);
    }
    break;

This is the only part that sets the proper registers and ZF, and it's
only the comparison. If this succeeds, x86_emulate() currently assumes
that the writeback part cannot fail. But then the writeback part is:

case OP_MEM:
    if ( !(d & Mov) && (dst.orig_val == dst.val) &&
         !ctxt->force_writeback )
         /* nothing to do */;
    else if ( lock_prefix )
    {
        fail_if(!ops->cmpxchg);
        rc = ops->cmpxchg(
            dst.mem.seg, dst.mem.off, &dst.orig_val,
             &dst.val, dst.bytes, ctxt);
    }
    else
    {
        fail_if(!ops->write);
        rc = ops->write(dst.mem.seg, dst.mem.off,
                        !state->simd_size ? &dst.val : (void *)mmvalp,
                        dst.bytes, ctxt);
        if ( sfence )
            asm volatile ( "sfence" ::: "memory" );
    }
    if ( rc != 0 )
        goto done;
default:
    break;

I now see that this is why using a spinlock only around the writeback
part did not solve the issue: both the compare part and the writeback
part need to be part of the same atomic operation - lock needs to be
aquired before the cmp / ZF part.

Opinions and suggestions are appreciated. If I'm not mistaken, it looks
like the smp_lock design is the better solution to the problem.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-28  9:14 ` Razvan Cojocaru
@ 2017-03-28 10:03   ` Jan Beulich
  2017-03-28 10:25     ` Andrew Cooper
  2017-03-28 10:27     ` Razvan Cojocaru
  0 siblings, 2 replies; 38+ messages in thread
From: Jan Beulich @ 2017-03-28 10:03 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, xen-devel

>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
> failed CMPXCHG should happen just once, with the proper registers and ZF
> set. The guest surely expects neither that the instruction resume until
> it succeeds, nor that some hidden loop goes on for an undeterminate
> ammount of time until a CMPXCHG succeeds.

The guest doesn't observe the CMPXCHG failing - RETRY leads to
the instruction being restarted instead of completed.

There is a severe downside to MMIO accesses with this model
though: Other than for RAM, we shouldn't be reading (and even
less so writing) the memory location more than once.

> The picture is further complicated by the two-part handling of
> instructions in x86_emulate(). For example, for CMPXCHG we have:
>[...]
> I now see that this is why using a spinlock only around the writeback
> part did not solve the issue: both the compare part and the writeback
> part need to be part of the same atomic operation - lock needs to be
> aquired before the cmp / ZF part.

No exactly: RMW insns require the lock to be acquired ahead of
the memory read, and dropped after the memory write.

> Opinions and suggestions are appreciated. If I'm not mistaken, it looks
> like the smp_lock design is the better solution to the problem.

Andrew has told me that the re-work of how we do memory accesses
in the emulator is pretty high on his priority list now. Whether we'd
want to introduce an interim solution therefore depends on the time
scale to possibly reach the only ultimately correct solution (no longer
acting on intermediate copies of the data coming from guest memory,
which is only correct for plain reads and plain writes).

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-28 10:03   ` Jan Beulich
@ 2017-03-28 10:25     ` Andrew Cooper
  2017-03-28 10:44       ` Jan Beulich
  2017-03-29  5:59       ` Jan Beulich
  2017-03-28 10:27     ` Razvan Cojocaru
  1 sibling, 2 replies; 38+ messages in thread
From: Andrew Cooper @ 2017-03-28 10:25 UTC (permalink / raw)
  To: Jan Beulich, Razvan Cojocaru; +Cc: paul.durrant, xen-devel

On 28/03/17 11:03, Jan Beulich wrote:
>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>> failed CMPXCHG should happen just once, with the proper registers and ZF
>> set. The guest surely expects neither that the instruction resume until
>> it succeeds, nor that some hidden loop goes on for an undeterminate
>> ammount of time until a CMPXCHG succeeds.
> The guest doesn't observe the CMPXCHG failing - RETRY leads to
> the instruction being restarted instead of completed.

And this probably is the root of the problem.  CMPXCHG on a contended
location should be observed to fail from the guests point of view.

>
> There is a severe downside to MMIO accesses with this model
> though: Other than for RAM, we shouldn't be reading (and even
> less so writing) the memory location more than once.

With MMIO, the hardware analogy is sending a single PCIe transaction
with the atomic flag set.

However, atomic options on non-RAM locations tend not to actually be
atomic on real hardware anyway, so so-long as we don't make multiple
reads/writes, the lack of atomicity won't be a problem in practice.

>
>> The picture is further complicated by the two-part handling of
>> instructions in x86_emulate(). For example, for CMPXCHG we have:
>> [...]
>> I now see that this is why using a spinlock only around the writeback
>> part did not solve the issue: both the compare part and the writeback
>> part need to be part of the same atomic operation - lock needs to be
>> aquired before the cmp / ZF part.
> No exactly: RMW insns require the lock to be acquired ahead of
> the memory read, and dropped after the memory write.
>
>> Opinions and suggestions are appreciated. If I'm not mistaken, it looks
>> like the smp_lock design is the better solution to the problem.
> Andrew has told me that the re-work of how we do memory accesses
> in the emulator is pretty high on his priority list now. Whether we'd
> want to introduce an interim solution therefore depends on the time
> scale to possibly reach the only ultimately correct solution (no longer
> acting on intermediate copies of the data coming from guest memory,
> which is only correct for plain reads and plain writes).

This re-enforces my opinion that mapping the destination and pointing a
stub at the mapping is the only viable option.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-28 10:03   ` Jan Beulich
  2017-03-28 10:25     ` Andrew Cooper
@ 2017-03-28 10:27     ` Razvan Cojocaru
  2017-03-28 10:47       ` Jan Beulich
  1 sibling, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-28 10:27 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, xen-devel

On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>> failed CMPXCHG should happen just once, with the proper registers and ZF
>> set. The guest surely expects neither that the instruction resume until
>> it succeeds, nor that some hidden loop goes on for an undeterminate
>> ammount of time until a CMPXCHG succeeds.
> 
> The guest doesn't observe the CMPXCHG failing - RETRY leads to
> the instruction being restarted instead of completed.

Indeed, but it works differently with hvm_emulate_one_vm_event() where
RETRY currently would have the instruction be re-executed (properly
re-executed, not just re-emulated) by the guest.

> There is a severe downside to MMIO accesses with this model
> though: Other than for RAM, we shouldn't be reading (and even
> less so writing) the memory location more than once.
> 
>> The picture is further complicated by the two-part handling of
>> instructions in x86_emulate(). For example, for CMPXCHG we have:
>> [...]
>> I now see that this is why using a spinlock only around the writeback
>> part did not solve the issue: both the compare part and the writeback
>> part need to be part of the same atomic operation - lock needs to be
>> aquired before the cmp / ZF part.
> 
> No exactly: RMW insns require the lock to be acquired ahead of
> the memory read, and dropped after the memory write.

Indeed, that's what I meant. I should have been more precise.

>> Opinions and suggestions are appreciated. If I'm not mistaken, it looks
>> like the smp_lock design is the better solution to the problem.
> 
> Andrew has told me that the re-work of how we do memory accesses
> in the emulator is pretty high on his priority list now. Whether we'd
> want to introduce an interim solution therefore depends on the time
> scale to possibly reach the only ultimately correct solution (no longer
> acting on intermediate copies of the data coming from guest memory,
> which is only correct for plain reads and plain writes).

Great! I understand, I'll switch to the smp_lock() patch as soon as the
schedule allows, and we'll see how that fits into the timeframe.


Thanks,
Razvan




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-28 10:25     ` Andrew Cooper
@ 2017-03-28 10:44       ` Jan Beulich
  2017-03-29  5:59       ` Jan Beulich
  1 sibling, 0 replies; 38+ messages in thread
From: Jan Beulich @ 2017-03-28 10:44 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: paul.durrant, Razvan Cojocaru, xen-devel

>>> On 28.03.17 at 12:25, <andrew.cooper3@citrix.com> wrote:
> On 28/03/17 11:03, Jan Beulich wrote:
>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>> set. The guest surely expects neither that the instruction resume until
>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>> ammount of time until a CMPXCHG succeeds.
>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>> the instruction being restarted instead of completed.
> 
> And this probably is the root of the problem.  CMPXCHG on a contended
> location should be observed to fail from the guests point of view.

Oops - my reply was imprecise: A _guest_ CMPXCHG would of
course be observed to have failed by the guest. Us using
CMPXCHG to carry out some other atomic op wouldn't.

>>> The picture is further complicated by the two-part handling of
>>> instructions in x86_emulate(). For example, for CMPXCHG we have:
>>> [...]
>>> I now see that this is why using a spinlock only around the writeback
>>> part did not solve the issue: both the compare part and the writeback
>>> part need to be part of the same atomic operation - lock needs to be
>>> aquired before the cmp / ZF part.
>> No exactly: RMW insns require the lock to be acquired ahead of
>> the memory read, and dropped after the memory write.
>>
>>> Opinions and suggestions are appreciated. If I'm not mistaken, it looks
>>> like the smp_lock design is the better solution to the problem.
>> Andrew has told me that the re-work of how we do memory accesses
>> in the emulator is pretty high on his priority list now. Whether we'd
>> want to introduce an interim solution therefore depends on the time
>> scale to possibly reach the only ultimately correct solution (no longer
>> acting on intermediate copies of the data coming from guest memory,
>> which is only correct for plain reads and plain writes).
> 
> This re-enforces my opinion that mapping the destination and pointing a
> stub at the mapping is the only viable option.

Well, you re-state what I was saying.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-28 10:27     ` Razvan Cojocaru
@ 2017-03-28 10:47       ` Jan Beulich
  2017-03-28 10:50         ` Razvan Cojocaru
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2017-03-28 10:47 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, xen-devel

>>> On 28.03.17 at 12:27, <rcojocaru@bitdefender.com> wrote:
> On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>> set. The guest surely expects neither that the instruction resume until
>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>> ammount of time until a CMPXCHG succeeds.
>> 
>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>> the instruction being restarted instead of completed.
> 
> Indeed, but it works differently with hvm_emulate_one_vm_event() where
> RETRY currently would have the instruction be re-executed (properly
> re-executed, not just re-emulated) by the guest.

Right - see my other reply to Andrew: The function likely would
need to tell apart guest CMPXCHG uses from us using the insn to
carry out the write by some other one. That may involve
adjustments to the memory write logic in x86_emulate() itself, as
the late failure of the comparison then would also need to be
communicated back (via ZF clear) to the guest.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-28 10:47       ` Jan Beulich
@ 2017-03-28 10:50         ` Razvan Cojocaru
  2017-03-28 11:32           ` Jan Beulich
  2017-03-29 13:55           ` Jan Beulich
  0 siblings, 2 replies; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-28 10:50 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, xen-devel

On 03/28/2017 01:47 PM, Jan Beulich wrote:
>>>> On 28.03.17 at 12:27, <rcojocaru@bitdefender.com> wrote:
>> On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>>> set. The guest surely expects neither that the instruction resume until
>>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>>> ammount of time until a CMPXCHG succeeds.
>>>
>>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>>> the instruction being restarted instead of completed.
>>
>> Indeed, but it works differently with hvm_emulate_one_vm_event() where
>> RETRY currently would have the instruction be re-executed (properly
>> re-executed, not just re-emulated) by the guest.
> 
> Right - see my other reply to Andrew: The function likely would
> need to tell apart guest CMPXCHG uses from us using the insn to
> carry out the write by some other one. That may involve
> adjustments to the memory write logic in x86_emulate() itself, as
> the late failure of the comparison then would also need to be
> communicated back (via ZF clear) to the guest.

Exactly, it would require quite some reworking of x86_emulate().


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-28 10:50         ` Razvan Cojocaru
@ 2017-03-28 11:32           ` Jan Beulich
  2017-03-29 13:55           ` Jan Beulich
  1 sibling, 0 replies; 38+ messages in thread
From: Jan Beulich @ 2017-03-28 11:32 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, xen-devel

>>> On 28.03.17 at 12:50, <rcojocaru@bitdefender.com> wrote:
> On 03/28/2017 01:47 PM, Jan Beulich wrote:
>>>>> On 28.03.17 at 12:27, <rcojocaru@bitdefender.com> wrote:
>>> On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>>>> set. The guest surely expects neither that the instruction resume until
>>>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>>>> ammount of time until a CMPXCHG succeeds.
>>>>
>>>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>>>> the instruction being restarted instead of completed.
>>>
>>> Indeed, but it works differently with hvm_emulate_one_vm_event() where
>>> RETRY currently would have the instruction be re-executed (properly
>>> re-executed, not just re-emulated) by the guest.
>> 
>> Right - see my other reply to Andrew: The function likely would
>> need to tell apart guest CMPXCHG uses from us using the insn to
>> carry out the write by some other one. That may involve
>> adjustments to the memory write logic in x86_emulate() itself, as
>> the late failure of the comparison then would also need to be
>> communicated back (via ZF clear) to the guest.
> 
> Exactly, it would require quite some reworking of x86_emulate().

I don't think so, no. It would merely require a special case step
following ->cmpxchg() to deal with it being CMPXCHG we're
emulating. Plus matching code in CMPXCHG{8,16}B emulation.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-28 10:25     ` Andrew Cooper
  2017-03-28 10:44       ` Jan Beulich
@ 2017-03-29  5:59       ` Jan Beulich
  2017-03-29  8:14         ` Razvan Cojocaru
  1 sibling, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2017-03-29  5:59 UTC (permalink / raw)
  To: Razvan Cojocaru, Andrew Cooper; +Cc: paul.durrant, xen-devel

>>> On 28.03.17 at 12:25, <andrew.cooper3@citrix.com> wrote:
> On 28/03/17 11:03, Jan Beulich wrote:
>> There is a severe downside to MMIO accesses with this model
>> though: Other than for RAM, we shouldn't be reading (and even
>> less so writing) the memory location more than once.
> 
> With MMIO, the hardware analogy is sending a single PCIe transaction
> with the atomic flag set.
> 
> However, atomic options on non-RAM locations tend not to actually be
> atomic on real hardware anyway, so so-long as we don't make multiple
> reads/writes, the lack of atomicity won't be a problem in practice.

Are you sure about this? Before caches were introduced, all LOCKed
insns uses a bus lock; I would very much assume that this model is
being followed for backwards compatibility for all LOCKed memory
accesses not going through the cache.

Furthermore I've been thinking a little more about the proposed
new memory write approach: That won't work for the original
purpose the emulator has - emulating insns in order to send get
data to/from qemu. In that case there simply is no page to map,
and this then also explains the current ->write() mechanism.
Multiple reads in this case are being avoided by going through
holding the data in struct hvm_mmio_cache. Multiple writes
_currently_ are being avoided by hvmemul_cmpxchg() forwarding
to hvmemul_write(). This property would need to be retained
for any adjustments we make.

As to the CMPXCHG insn (mis-)handling, I think it'll be best if I
come forward with the outlined x86_emulate() adjustment.
Whether that'll be acceptable for 4.9 we'll have to see.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-29  5:59       ` Jan Beulich
@ 2017-03-29  8:14         ` Razvan Cojocaru
  0 siblings, 0 replies; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-29  8:14 UTC (permalink / raw)
  To: Jan Beulich, Andrew Cooper; +Cc: paul.durrant, xen-devel

On 03/29/2017 08:59 AM, Jan Beulich wrote:
> As to the CMPXCHG insn (mis-)handling, I think it'll be best if I
> come forward with the outlined x86_emulate() adjustment.
> Whether that'll be acceptable for 4.9 we'll have to see.

Thanks! I'll test it with the CMPXCHG changes in my patch when it
becomes available.


Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-28 10:50         ` Razvan Cojocaru
  2017-03-28 11:32           ` Jan Beulich
@ 2017-03-29 13:55           ` Jan Beulich
  2017-03-29 14:00             ` Razvan Cojocaru
  2017-03-29 14:12             ` Razvan Cojocaru
  1 sibling, 2 replies; 38+ messages in thread
From: Jan Beulich @ 2017-03-29 13:55 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

[-- Attachment #1: Type: text/plain, Size: 1955 bytes --]

>>> On 28.03.17 at 12:50, <rcojocaru@bitdefender.com> wrote:
> On 03/28/2017 01:47 PM, Jan Beulich wrote:
>>>>> On 28.03.17 at 12:27, <rcojocaru@bitdefender.com> wrote:
>>> On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>>>> set. The guest surely expects neither that the instruction resume until
>>>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>>>> ammount of time until a CMPXCHG succeeds.
>>>>
>>>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>>>> the instruction being restarted instead of completed.
>>>
>>> Indeed, but it works differently with hvm_emulate_one_vm_event() where
>>> RETRY currently would have the instruction be re-executed (properly
>>> re-executed, not just re-emulated) by the guest.
>> 
>> Right - see my other reply to Andrew: The function likely would
>> need to tell apart guest CMPXCHG uses from us using the insn to
>> carry out the write by some other one. That may involve
>> adjustments to the memory write logic in x86_emulate() itself, as
>> the late failure of the comparison then would also need to be
>> communicated back (via ZF clear) to the guest.
> 
> Exactly, it would require quite some reworking of x86_emulate().

I had imagined it to be less intrusive (outside of x86_emulate()),
but I've now learned why Andrew was able to get rid of
X86EMUL_CMPXCHG_FAILED - the apparently intended behavior
was never implemented. Attached a first take at it, which has
seen smoke testing, but nothing more. The way it ends up being
I don't think this can reasonably be considered for 4.9 at this
point in time. (Also Cc-ing Tim for the shadow code changes,
even if this isn't really a proper patch submission.)

Jan


[-- Attachment #2: x86emul-cmpxchg-fail.patch --]
[-- Type: text/plain, Size: 13572 bytes --]

x86emul: correctly handle CMPXCHG* comparison failures

If the ->cmpxchg() hook finds a mismatch, we should deal with this the
same as when the "manual" comparison reports a mismatch.

This involves reverting bfce0e62c3 ("x86/emul: Drop
X86EMUL_CMPXCHG_FAILED"), albeit with X86EMUL_CMPXCHG_FAILED now
becoming a value distinct from X86EMUL_RETRY.

In order to not leave mixed code also fully switch affected functions
from paddr_t to intpte_t.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
The code could be further simplified if we could rely on all
->cmpxchg() hooks always using CMPXCHG, but for now we need to cope
with them using plain write (and hence accept the double reads if
CMPXCHG is actually being used).
Note that the patch doesn't address the incorrectness of there not
being a memory write even in the comparison-failed case.

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5236,16 +5236,17 @@ static int ptwr_emulated_read(
 
 static int ptwr_emulated_update(
     unsigned long addr,
-    paddr_t old,
-    paddr_t val,
+    intpte_t *p_old,
+    intpte_t val,
     unsigned int bytes,
-    unsigned int do_cmpxchg,
     struct ptwr_emulate_ctxt *ptwr_ctxt)
 {
     unsigned long mfn;
     unsigned long unaligned_addr = addr;
     struct page_info *page;
     l1_pgentry_t pte, ol1e, nl1e, *pl1e;
+    intpte_t old = p_old ? *p_old : 0;
+    unsigned int offset = 0;
     struct vcpu *v = current;
     struct domain *d = v->domain;
     int ret;
@@ -5259,28 +5260,30 @@ static int ptwr_emulated_update(
     }
 
     /* Turn a sub-word access into a full-word access. */
-    if ( bytes != sizeof(paddr_t) )
+    if ( bytes != sizeof(val) )
     {
-        paddr_t      full;
-        unsigned int rc, offset = addr & (sizeof(paddr_t)-1);
+        intpte_t full;
+        unsigned int rc;
+
+        offset = addr & (sizeof(full) - 1);
 
         /* Align address; read full word. */
-        addr &= ~(sizeof(paddr_t)-1);
-        if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 )
+        addr &= ~(sizeof(full) - 1);
+        if ( (rc = copy_from_user(&full, (void *)addr, sizeof(full))) != 0 )
         {
             x86_emul_pagefault(0, /* Read fault. */
-                               addr + sizeof(paddr_t) - rc,
+                               addr + sizeof(full) - rc,
                                &ptwr_ctxt->ctxt);
             return X86EMUL_EXCEPTION;
         }
         /* Mask out bits provided by caller. */
-        full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
+        full &= ~((((intpte_t)1 << (bytes * 8)) - 1) << (offset * 8));
         /* Shift the caller value and OR in the missing bits. */
-        val  &= (((paddr_t)1 << (bytes*8)) - 1);
+        val  &= (((intpte_t)1 << (bytes * 8)) - 1);
         val <<= (offset)*8;
         val  |= full;
         /* Also fill in missing parts of the cmpxchg old value. */
-        old  &= (((paddr_t)1 << (bytes*8)) - 1);
+        old  &= (((intpte_t)1 << (bytes * 8)) - 1);
         old <<= (offset)*8;
         old  |= full;
     }
@@ -5302,7 +5305,7 @@ static int ptwr_emulated_update(
     {
     default:
         if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
-             !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
+             !p_old && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
         {
             /*
              * If this is an upper-half write to a PAE PTE then we assume that
@@ -5333,21 +5336,26 @@ static int ptwr_emulated_update(
     /* Checked successfully: do the update (write or cmpxchg). */
     pl1e = map_domain_page(_mfn(mfn));
     pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
-    if ( do_cmpxchg )
+    if ( p_old )
     {
-        int okay;
-        intpte_t t = old;
         ol1e = l1e_from_intpte(old);
 
-        okay = paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e),
-                                          &t, l1e_get_intpte(nl1e), _mfn(mfn));
-        okay = (okay && t == old);
+        if ( !paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e),
+                                         &old, l1e_get_intpte(nl1e), _mfn(mfn)) )
+            ret = X86EMUL_UNHANDLEABLE;
+        else if ( l1e_get_intpte(ol1e) == old )
+            ret = X86EMUL_OKAY;
+        else
+        {
+            *p_old = old >> (offset * 8);
+            ret = X86EMUL_CMPXCHG_FAILED;
+        }
 
-        if ( !okay )
+        if ( ret != X86EMUL_OKAY )
         {
             unmap_domain_page(pl1e);
             put_page_from_l1e(nl1e, d);
-            return X86EMUL_RETRY;
+            return ret;
         }
     }
     else
@@ -5374,9 +5382,9 @@ static int ptwr_emulated_write(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    paddr_t val = 0;
+    intpte_t val = 0;
 
-    if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes - 1)) || !bytes )
+    if ( (bytes > sizeof(val)) || (bytes & (bytes - 1)) || !bytes )
     {
         MEM_LOG("ptwr_emulate: bad write size (addr=%lx, bytes=%u)",
                 offset, bytes);
@@ -5385,9 +5393,9 @@ static int ptwr_emulated_write(
 
     memcpy(&val, p_data, bytes);
 
-    return ptwr_emulated_update(
-        offset, 0, val, bytes, 0,
-        container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
+    return ptwr_emulated_update(offset, NULL, val, bytes,
+                                container_of(ctxt, struct ptwr_emulate_ctxt,
+                                             ctxt));
 }
 
 static int ptwr_emulated_cmpxchg(
@@ -5398,21 +5406,20 @@ static int ptwr_emulated_cmpxchg(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    paddr_t old = 0, new = 0;
+    intpte_t new = 0;
 
-    if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes -1)) )
+    if ( (bytes > sizeof(new)) || (bytes & (bytes -1)) )
     {
         MEM_LOG("ptwr_emulate: bad cmpxchg size (addr=%lx, bytes=%u)",
                 offset, bytes);
         return X86EMUL_UNHANDLEABLE;
     }
 
-    memcpy(&old, p_old, bytes);
     memcpy(&new, p_new, bytes);
 
-    return ptwr_emulated_update(
-        offset, old, new, bytes, 1,
-        container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
+    return ptwr_emulated_update(offset, p_old, new, bytes,
+                                container_of(ctxt, struct ptwr_emulate_ctxt,
+                                             ctxt));
 }
 
 static int pv_emul_is_mem_write(const struct x86_emulate_state *state,
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -285,7 +285,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg
     struct sh_emulate_ctxt *sh_ctxt =
         container_of(ctxt, struct sh_emulate_ctxt, ctxt);
     struct vcpu *v = current;
-    unsigned long addr, old, new;
+    unsigned long addr, new = 0;
     int rc;
 
     if ( bytes > sizeof(long) )
@@ -296,12 +296,10 @@ hvm_emulate_cmpxchg(enum x86_segment seg
     if ( rc )
         return rc;
 
-    old = new = 0;
-    memcpy(&old, p_old, bytes);
     memcpy(&new, p_new, bytes);
 
     return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
-               v, addr, old, new, bytes, sh_ctxt);
+               v, addr, p_old, new, bytes, sh_ctxt);
 }
 
 static const struct x86_emulate_ops hvm_shadow_emulator_ops = {
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -4755,11 +4755,11 @@ sh_x86_emulate_write(struct vcpu *v, uns
 
 static int
 sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr,
-                        unsigned long old, unsigned long new,
-                        unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
+                       unsigned long *p_old, unsigned long new,
+                       unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
 {
     void *addr;
-    unsigned long prev;
+    unsigned long prev, old = *p_old;
     int rv = X86EMUL_OKAY;
 
     /* Unaligned writes are only acceptable on HVM */
@@ -4783,7 +4783,10 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
     }
 
     if ( prev != old )
-        rv = X86EMUL_RETRY;
+    {
+        *p_old = prev;
+        rv = X86EMUL_CMPXCHG_FAILED;
+    }
 
     SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
                   " wanted %#lx now %#lx bytes %u\n",
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1880,6 +1880,9 @@ protmode_load_seg(
 
         default:
             return rc;
+
+        case X86EMUL_CMPXCHG_FAILED:
+            return X86EMUL_RETRY;
         }
 
         /* Force the Accessed flag in our local copy. */
@@ -6702,6 +6705,7 @@ x86_emulate(
         break;
 
     case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */
+        fail_if(!ops->cmpxchg);
         /* Save real source value, then compare EAX against destination. */
         src.orig_val = src.val;
         src.val = _regs.r(ax);
@@ -6710,8 +6714,17 @@ x86_emulate(
         if ( _regs.eflags & X86_EFLAGS_ZF )
         {
             /* Success: write back to memory. */
-            dst.val = src.orig_val;
+            dst.val = src.val;
+            rc = ops->cmpxchg(dst.mem.seg, dst.mem.off, &dst.val,
+                              &src.orig_val, dst.bytes, ctxt);
+            if ( rc == X86EMUL_CMPXCHG_FAILED )
+            {
+               _regs.eflags &= ~X86_EFLAGS_ZF;
+               rc = X86EMUL_OKAY;
+            }
         }
+        if ( _regs.eflags & X86_EFLAGS_ZF )
+            dst.type = OP_NONE;
         else
         {
             /* Failure: write the value we saw to EAX. */
@@ -7016,6 +7029,7 @@ x86_emulate(
 
         if ( memcmp(old, aux, op_bytes) )
         {
+        cmpxchgNb_failed:
             /* Expected != actual: store actual to rDX:rAX and clear ZF. */
             _regs.r(ax) = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
             _regs.r(dx) = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
@@ -7025,7 +7039,7 @@ x86_emulate(
         {
             /*
              * Expected == actual: Get proposed value, attempt atomic cmpxchg
-             * and set ZF.
+             * and set ZF if successful.
              */
             if ( !(rex_prefix & REX_W) )
             {
@@ -7038,10 +7052,20 @@ x86_emulate(
                 aux->u64[1] = _regs.r(cx);
             }
 
-            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
-                                    op_bytes, ctxt)) != X86EMUL_OKAY )
+            switch ( rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
+                                       op_bytes, ctxt) )
+            {
+            case X86EMUL_OKAY:
+                _regs.eflags |= X86_EFLAGS_ZF;
+                break;
+
+            case X86EMUL_CMPXCHG_FAILED:
+                rc = X86EMUL_OKAY;
+                goto cmpxchgNb_failed;
+
+            default:
                 goto done;
-            _regs.eflags |= X86_EFLAGS_ZF;
+            }
         }
         break;
     }
@@ -8049,6 +8073,8 @@ x86_emulate(
             rc = ops->cmpxchg(
                 dst.mem.seg, dst.mem.off, &dst.orig_val,
                 &dst.val, dst.bytes, ctxt);
+            if ( rc == X86EMUL_CMPXCHG_FAILED )
+                rc = X86EMUL_RETRY;
         }
         else
         {
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -153,6 +153,8 @@ struct x86_emul_fpu_aux {
 #define X86EMUL_EXCEPTION      2
  /* Retry the emulation for some reason. No state modified. */
 #define X86EMUL_RETRY          3
+ /* (cmpxchg accessor): CMPXCHG failed. */
+#define X86EMUL_CMPXCHG_FAILED 4
  /*
   * Operation fully done by one of the hooks:
   * - validate(): operation completed (except common insn retire logic)
@@ -160,7 +162,7 @@ struct x86_emul_fpu_aux {
   * - read_io() / write_io(): bypass GPR update (non-string insns only)
   * Undefined behavior when used anywhere else.
   */
-#define X86EMUL_DONE           4
+#define X86EMUL_DONE           5
 
 /* FPU sub-types which may be requested via ->get_fpu(). */
 enum x86_emulate_fpu_type {
@@ -250,6 +252,8 @@ struct x86_emulate_ops
     /*
      * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation.
      *  @p_old: [IN ] Pointer to value expected to be current at @addr.
+     *          [OUT] Pointer to value found at @addr (may always be
+     *                updated, meaningful for X86EMUL_CMPXCHG_FAILED only).
      *  @p_new: [IN ] Pointer to value to write to @addr.
      *  @bytes: [IN ] Operation size (up to 8 (x86/32) or 16 (x86/64) bytes).
      */
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -89,7 +89,7 @@ struct shadow_paging_mode {
                                             void *src, u32 bytes,
                                             struct sh_emulate_ctxt *sh_ctxt);
     int           (*x86_emulate_cmpxchg   )(struct vcpu *v, unsigned long va,
-                                            unsigned long old, 
+                                            unsigned long *old,
                                             unsigned long new,
                                             unsigned int bytes,
                                             struct sh_emulate_ctxt *sh_ctxt);

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-29 13:55           ` Jan Beulich
@ 2017-03-29 14:00             ` Razvan Cojocaru
  2017-03-29 15:04               ` Razvan Cojocaru
  2017-03-29 14:12             ` Razvan Cojocaru
  1 sibling, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-29 14:00 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 03/29/2017 04:55 PM, Jan Beulich wrote:
>>>> On 28.03.17 at 12:50, <rcojocaru@bitdefender.com> wrote:
>> On 03/28/2017 01:47 PM, Jan Beulich wrote:
>>>>>> On 28.03.17 at 12:27, <rcojocaru@bitdefender.com> wrote:
>>>> On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>>>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>>>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>>>>> set. The guest surely expects neither that the instruction resume until
>>>>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>>>>> ammount of time until a CMPXCHG succeeds.
>>>>>
>>>>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>>>>> the instruction being restarted instead of completed.
>>>>
>>>> Indeed, but it works differently with hvm_emulate_one_vm_event() where
>>>> RETRY currently would have the instruction be re-executed (properly
>>>> re-executed, not just re-emulated) by the guest.
>>>
>>> Right - see my other reply to Andrew: The function likely would
>>> need to tell apart guest CMPXCHG uses from us using the insn to
>>> carry out the write by some other one. That may involve
>>> adjustments to the memory write logic in x86_emulate() itself, as
>>> the late failure of the comparison then would also need to be
>>> communicated back (via ZF clear) to the guest.
>>
>> Exactly, it would require quite some reworking of x86_emulate().
> 
> I had imagined it to be less intrusive (outside of x86_emulate()),
> but I've now learned why Andrew was able to get rid of
> X86EMUL_CMPXCHG_FAILED - the apparently intended behavior
> was never implemented. Attached a first take at it, which has
> seen smoke testing, but nothing more. The way it ends up being
> I don't think this can reasonably be considered for 4.9 at this
> point in time. (Also Cc-ing Tim for the shadow code changes,
> even if this isn't really a proper patch submission.)

Thanks! I'll give a spin with a modified version of my CMPXCHG patch as
soon as possible.


Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-29 13:55           ` Jan Beulich
  2017-03-29 14:00             ` Razvan Cojocaru
@ 2017-03-29 14:12             ` Razvan Cojocaru
  1 sibling, 0 replies; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-29 14:12 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 03/29/2017 04:55 PM, Jan Beulich wrote:
>>>> On 28.03.17 at 12:50, <rcojocaru@bitdefender.com> wrote:
>> On 03/28/2017 01:47 PM, Jan Beulich wrote:
>>>>>> On 28.03.17 at 12:27, <rcojocaru@bitdefender.com> wrote:
>>>> On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>>>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>>>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>>>>> set. The guest surely expects neither that the instruction resume until
>>>>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>>>>> ammount of time until a CMPXCHG succeeds.
>>>>>
>>>>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>>>>> the instruction being restarted instead of completed.
>>>>
>>>> Indeed, but it works differently with hvm_emulate_one_vm_event() where
>>>> RETRY currently would have the instruction be re-executed (properly
>>>> re-executed, not just re-emulated) by the guest.
>>>
>>> Right - see my other reply to Andrew: The function likely would
>>> need to tell apart guest CMPXCHG uses from us using the insn to
>>> carry out the write by some other one. That may involve
>>> adjustments to the memory write logic in x86_emulate() itself, as
>>> the late failure of the comparison then would also need to be
>>> communicated back (via ZF clear) to the guest.
>>
>> Exactly, it would require quite some reworking of x86_emulate().
> 
> I had imagined it to be less intrusive (outside of x86_emulate()),
> but I've now learned why Andrew was able to get rid of
> X86EMUL_CMPXCHG_FAILED - the apparently intended behavior
> was never implemented. Attached a first take at it, which has
> seen smoke testing, but nothing more. The way it ends up being
> I don't think this can reasonably be considered for 4.9 at this
> point in time. (Also Cc-ing Tim for the shadow code changes,
> even if this isn't really a proper patch submission.)

I have this xenstored-related error when trying to build the latest
staging, not sure who this should be forwarded to (hopefully I'm not
spamming):

make -C xenstored install
make[6]: Entering directory `/home/red/work/xen.git/tools/ocaml/xenstored'
rm -f paths.ml.tmp;  printf "let %s = \"%s\";;\n" sbindir /usr/sbin
>>paths.ml.tmp;  printf "let %s = \"%s\";;\n" bindir /usr/bin
>>paths.ml.tmp;  printf "let %s = \"%s\";;\n" libexec /usr/lib/xen
>>paths.ml.tmp;  printf "let %s = \"%s\";;\n" libexec_bin
/usr/lib/xen/bin >>paths.ml.tmp;  printf "let %s = \"%s\";;\n" libdir
/usr/lib >>paths.ml.tmp;  printf "let %s = \"%s\";;\n" sharedir
/usr/share >>paths.ml.tmp;  printf "let %s = \"%s\";;\n" xenfirmwaredir
/usr/lib/xen/boot >>paths.ml.tmp;  printf "let %s = \"%s\";;\n"
xen_config_dir /etc/xen >>paths.ml.tmp;  printf "let %s = \"%s\";;\n"
xen_script_dir /etc/xen/scripts >>paths.ml.tmp;  printf "let %s =
\"%s\";;\n" xen_lock_dir /var/lock >>paths.ml.tmp;  printf "let %s =
\"%s\";;\n" xen_run_dir /var/run/xen >>paths.ml.tmp;  printf "let %s =
\"%s\";;\n" xen_paging_dir /var/lib/xen/xenpaging >>paths.ml.tmp;
printf "let %s = \"%s\";;\n" xen_dump_dir /var/lib/xen/dump
>>paths.ml.tmp;  printf "let %s = \"%s\";;\n" xen_log_dir /var/log/xen
>>paths.ml.tmp;  printf "let %s = \"%s\";;\n" xen_lib_dir /var/lib/xen
>>paths.ml.tmp;  printf "let %s = \"%s\";;\n" xen_run_stored
/var/run/xenstored >>paths.ml.tmp;  if ! cmp -s paths.ml.tmp paths.ml;
then mv -f paths.ml.tmp paths.ml; else rm -f paths.ml.tmp; fi
rm -f _paths.h.tmp;  echo "#define sbindir \"/usr/sbin\""
>>_paths.h.tmp;  echo "#define bindir \"/usr/bin\"" >>_paths.h.tmp;
echo "#define LIBEXEC \"/usr/lib/xen\"" >>_paths.h.tmp;  echo "#define
LIBEXEC_BIN \"/usr/lib/xen/bin\"" >>_paths.h.tmp;  echo "#define libdir
\"/usr/lib\"" >>_paths.h.tmp;  echo "#define SHAREDIR \"/usr/share\""
>>_paths.h.tmp;  echo "#define XENFIRMWAREDIR \"/usr/lib/xen/boot\""
>>_paths.h.tmp;  echo "#define XEN_CONFIG_DIR \"/etc/xen\""
>>_paths.h.tmp;  echo "#define XEN_SCRIPT_DIR \"/etc/xen/scripts\""
>>_paths.h.tmp;  echo "#define XEN_LOCK_DIR \"/var/lock\""
>>_paths.h.tmp;  echo "#define XEN_RUN_DIR \"/var/run/xen\""
>>_paths.h.tmp;  echo "#define XEN_PAGING_DIR
\"/var/lib/xen/xenpaging\"" >>_paths.h.tmp;  echo "#define XEN_DUMP_DIR
\"/var/lib/xen/dump\"" >>_paths.h.tmp;  echo "#define XEN_LOG_DIR
\"/var/log/xen\"" >>_paths.h.tmp;  echo "#define XEN_LIB_DIR
\"/var/lib/xen\"" >>_paths.h.tmp;  echo "#define XEN_RUN_STORED
\"/var/run/xenstored\"" >>_paths.h.tmp;  if ! cmp -s _paths.h.tmp
_paths.h; then mv -f _paths.h.tmp _paths.h; else rm -f _paths.h.tmp; fi
 MLOPT    store.cmx
File "store.ml", line 1:
Error: The files perms.cmi and define.cmi make inconsistent assumptions
       over interface Define
make[6]: *** [store.cmx] Error 2

This happens on "make dist".


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-29 14:00             ` Razvan Cojocaru
@ 2017-03-29 15:04               ` Razvan Cojocaru
  2017-03-29 15:49                 ` Razvan Cojocaru
  0 siblings, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-29 15:04 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

[-- Attachment #1: Type: text/plain, Size: 3731 bytes --]

On 03/29/2017 05:00 PM, Razvan Cojocaru wrote:
> On 03/29/2017 04:55 PM, Jan Beulich wrote:
>>>>> On 28.03.17 at 12:50, <rcojocaru@bitdefender.com> wrote:
>>> On 03/28/2017 01:47 PM, Jan Beulich wrote:
>>>>>>> On 28.03.17 at 12:27, <rcojocaru@bitdefender.com> wrote:
>>>>> On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>>>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>>>>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>>>>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>>>>>> set. The guest surely expects neither that the instruction resume until
>>>>>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>>>>>> ammount of time until a CMPXCHG succeeds.
>>>>>>
>>>>>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>>>>>> the instruction being restarted instead of completed.
>>>>>
>>>>> Indeed, but it works differently with hvm_emulate_one_vm_event() where
>>>>> RETRY currently would have the instruction be re-executed (properly
>>>>> re-executed, not just re-emulated) by the guest.
>>>>
>>>> Right - see my other reply to Andrew: The function likely would
>>>> need to tell apart guest CMPXCHG uses from us using the insn to
>>>> carry out the write by some other one. That may involve
>>>> adjustments to the memory write logic in x86_emulate() itself, as
>>>> the late failure of the comparison then would also need to be
>>>> communicated back (via ZF clear) to the guest.
>>>
>>> Exactly, it would require quite some reworking of x86_emulate().
>>
>> I had imagined it to be less intrusive (outside of x86_emulate()),
>> but I've now learned why Andrew was able to get rid of
>> X86EMUL_CMPXCHG_FAILED - the apparently intended behavior
>> was never implemented. Attached a first take at it, which has
>> seen smoke testing, but nothing more. The way it ends up being
>> I don't think this can reasonably be considered for 4.9 at this
>> point in time. (Also Cc-ing Tim for the shadow code changes,
>> even if this isn't really a proper patch submission.)
> 
> Thanks! I'll give a spin with a modified version of my CMPXCHG patch as
> soon as possible.

With the attached patch with hvmemul_cmpxchg() now returning
X86EMUL_CMPXCHG_FAILED if __cmpxchg() fails my (32-bit) Windows 7 guest
gets stuck at the "Starting Windows" screen. It's state appears to be:

# ./xenctx -a 3
cs:eip: 0008:8bcd85d6
flags: 00200246 cid i z p
ss:esp: 0010:82736b9c
eax: 00000000   ebx: 84f3a678   ecx: 84ee2610   edx: 001eb615
esi: 40008000   edi: 82739d20   ebp: 82736c20
 ds:     0023    es:     0023    fs:     0030    gs:     0000

cr0: 8001003b
cr2: 8fd94000
cr3: 00185000
cr4: 000406f9

dr0: 00000000
dr1: 00000000
dr2: 00000000
dr3: 00000000
dr6: fffe0ff0
dr7: 00000400
Code (instr addr 8bcd85d6)
47 fc 83 c7 14 4e 75 ef 5f 5e c3 cc cc cc cc cc cc 8b ff fb f4 <c3> cc
cc cc cc cc 8b ff 55 8b ec

# ./xenctx -a 3
cs:eip: 0008:8bcd85d6
flags: 00200246 cid i z p
ss:esp: 0010:82736b9c
eax: 00000000   ebx: 84f3a678   ecx: 84ee2610   edx: 002ca60d
esi: 40008000   edi: 82739d20   ebp: 82736c20
 ds:     0023    es:     0023    fs:     0030    gs:     0000

cr0: 8001003b
cr2: 8fd94000
cr3: 00185000
cr4: 000406f9

dr0: 00000000
dr1: 00000000
dr2: 00000000
dr3: 00000000
dr6: fffe0ff0
dr7: 00000400
Code (instr addr 8bcd85d6)
47 fc 83 c7 14 4e 75 ef 5f 5e c3 cc cc cc cc cc cc 8b ff fb f4 <c3> cc
cc cc cc cc 8b ff 55 8b ec

This only happens in SMP scenarios (my guest had 10 VCPUs for easy
reproduction). With a single VCPU, the guest booted fine. So something
somehow is still not right when a CMPXCHG fails in a race-type situation
(unless something's obviously wrong with my patch, but I don't see it).


Thanks,
Razvan


[-- Attachment #2: real_cmpxchg.patch --]
[-- Type: text/x-diff, Size: 4423 bytes --]

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 2d92957..b946ef7 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -20,6 +20,7 @@
 #include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/ioreq.h>
+#include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/svm/svm.h>
@@ -1029,6 +1030,77 @@ static int hvmemul_wbinvd_discard(
     return X86EMUL_OKAY;
 }
 
+static int hvmemul_vaddr_to_mfn(
+    unsigned long addr,
+    mfn_t *mfn,
+    uint32_t pfec,
+    struct x86_emulate_ctxt *ctxt)
+{
+    paddr_t gpa = addr & ~PAGE_MASK;
+    struct page_info *page;
+    p2m_type_t p2mt;
+    unsigned long gfn;
+    struct vcpu *curr = current;
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+
+    gfn = paging_gva_to_gfn(curr, addr, &pfec);
+
+    if ( gfn == gfn_x(INVALID_GFN) )
+    {
+        pagefault_info_t pfinfo = {};
+
+        if ( ( pfec & PFEC_page_paged ) || ( pfec & PFEC_page_shared ) )
+            return X86EMUL_RETRY;
+
+        pfinfo.linear = addr;
+        pfinfo.ec = pfec;
+
+        x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt);
+        return X86EMUL_EXCEPTION;
+    }
+
+    gpa |= (paddr_t)gfn << PAGE_SHIFT;
+
+    /*
+     * No need to do the P2M lookup for internally handled MMIO, benefiting
+     * - 32-bit WinXP (& older Windows) on AMD CPUs for LAPIC accesses,
+     * - newer Windows (like Server 2012) for HPET accesses.
+     */
+    if ( !nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa) )
+        return X86EMUL_UNHANDLEABLE;
+
+    page = get_page_from_gfn(curr->domain, gfn, &p2mt, P2M_UNSHARE);
+
+    if ( !page )
+        return X86EMUL_UNHANDLEABLE;
+
+    if ( p2m_is_paging(p2mt) )
+    {
+        put_page(page);
+        p2m_mem_paging_populate(curr->domain, gfn);
+        return X86EMUL_RETRY;
+    }
+
+    if ( p2m_is_shared(p2mt) )
+    {
+        put_page(page);
+        return X86EMUL_RETRY;
+    }
+
+    if ( p2m_is_grant(p2mt) )
+    {
+        put_page(page);
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    *mfn = _mfn(page_to_mfn(page));
+
+    put_page(page);
+
+    return X86EMUL_OKAY;
+}
+
 static int hvmemul_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
@@ -1037,8 +1109,70 @@ static int hvmemul_cmpxchg(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    /* Fix this in case the guest is really relying on r-m-w atomicity. */
-    return hvmemul_write(seg, offset, p_new, bytes, ctxt);
+    unsigned long addr, reps = 1;
+    int rc = X86EMUL_OKAY;
+    unsigned long old = 0, new = 0;
+    uint32_t pfec = PFEC_page_present | PFEC_write_access;
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    mfn_t mfn[2];
+    void *map = NULL;
+    struct domain *currd = current->domain;
+
+    if ( is_x86_system_segment(seg) )
+        pfec |= PFEC_implicit;
+    else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+        pfec |= PFEC_user_mode;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr);
+
+    if ( rc != X86EMUL_OKAY || !bytes )
+        return rc;
+
+    rc = hvmemul_vaddr_to_mfn(addr, &mfn[0], pfec, ctxt);
+
+    if ( rc != X86EMUL_OKAY )
+        return rc;
+
+    if ( likely(((addr + bytes - 1) & PAGE_MASK) == (addr & PAGE_MASK)) )
+    {
+        /* Whole write fits on a single page. */
+        mfn[1] = INVALID_MFN;
+        map = map_domain_page(mfn[0]);
+    }
+    else
+    {
+        rc = hvmemul_vaddr_to_mfn((addr + bytes - 1) & PAGE_MASK, &mfn[1],
+                                  pfec, ctxt);
+        if ( rc != X86EMUL_OKAY )
+            return rc;
+
+        map = vmap(mfn, 2);
+    }
+
+    if ( !map )
+        return X86EMUL_UNHANDLEABLE;
+
+    map += (addr & ~PAGE_MASK);
+
+    memcpy(&old, p_old, bytes);
+    memcpy(&new, p_new, bytes);
+
+    if ( __cmpxchg(map, old, new, bytes) != old )
+        rc = X86EMUL_CMPXCHG_FAILED;
+
+    paging_mark_dirty(currd, mfn[0]);
+
+    if ( unlikely(mfn_valid(mfn[1])) )
+    {
+        paging_mark_dirty(currd, mfn[1]);
+        vunmap((void *)((unsigned long)map & PAGE_MASK));
+    }
+    else
+        unmap_domain_page(map);
+
+    return rc;
 }
 
 static int hvmemul_validate(

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-29 15:04               ` Razvan Cojocaru
@ 2017-03-29 15:49                 ` Razvan Cojocaru
  2017-03-30 12:05                   ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-29 15:49 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 03/29/2017 06:04 PM, Razvan Cojocaru wrote:
> On 03/29/2017 05:00 PM, Razvan Cojocaru wrote:
>> On 03/29/2017 04:55 PM, Jan Beulich wrote:
>>>>>> On 28.03.17 at 12:50, <rcojocaru@bitdefender.com> wrote:
>>>> On 03/28/2017 01:47 PM, Jan Beulich wrote:
>>>>>>>> On 28.03.17 at 12:27, <rcojocaru@bitdefender.com> wrote:
>>>>>> On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>>>>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>>>>>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>>>>>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>>>>>>> set. The guest surely expects neither that the instruction resume until
>>>>>>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>>>>>>> ammount of time until a CMPXCHG succeeds.
>>>>>>>
>>>>>>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>>>>>>> the instruction being restarted instead of completed.
>>>>>>
>>>>>> Indeed, but it works differently with hvm_emulate_one_vm_event() where
>>>>>> RETRY currently would have the instruction be re-executed (properly
>>>>>> re-executed, not just re-emulated) by the guest.
>>>>>
>>>>> Right - see my other reply to Andrew: The function likely would
>>>>> need to tell apart guest CMPXCHG uses from us using the insn to
>>>>> carry out the write by some other one. That may involve
>>>>> adjustments to the memory write logic in x86_emulate() itself, as
>>>>> the late failure of the comparison then would also need to be
>>>>> communicated back (via ZF clear) to the guest.
>>>>
>>>> Exactly, it would require quite some reworking of x86_emulate().
>>>
>>> I had imagined it to be less intrusive (outside of x86_emulate()),
>>> but I've now learned why Andrew was able to get rid of
>>> X86EMUL_CMPXCHG_FAILED - the apparently intended behavior
>>> was never implemented. Attached a first take at it, which has
>>> seen smoke testing, but nothing more. The way it ends up being
>>> I don't think this can reasonably be considered for 4.9 at this
>>> point in time. (Also Cc-ing Tim for the shadow code changes,
>>> even if this isn't really a proper patch submission.)
>>
>> Thanks! I'll give a spin with a modified version of my CMPXCHG patch as
>> soon as possible.
> 
> With the attached patch with hvmemul_cmpxchg() now returning
> X86EMUL_CMPXCHG_FAILED if __cmpxchg() fails my (32-bit) Windows 7 guest
> gets stuck at the "Starting Windows" screen.

And again this change:

1162     if ( __cmpxchg(map, old, new, bytes) != old )
1163     {
1164         memcpy(p_old, map, bytes);
1165         rc = X86EMUL_CMPXCHG_FAILED;
1166     }

i.e. doing the accumulator <- destination part of a failed CMPXCHG which
might be missing from your patch leads me again to BSODs. I'm not sure
if __cmpxchg() should work differently and do this atomically, or if
this should be done in x86_emulate() and it's not, or if it is done
there somewhere I've missed in the first patch.


Thanks,
Razvan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-29 15:49                 ` Razvan Cojocaru
@ 2017-03-30 12:05                   ` Jan Beulich
  2017-03-30 12:25                     ` Razvan Cojocaru
  2017-03-30 12:56                     ` Razvan Cojocaru
  0 siblings, 2 replies; 38+ messages in thread
From: Jan Beulich @ 2017-03-30 12:05 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

>>> On 29.03.17 at 17:49, <rcojocaru@bitdefender.com> wrote:
> On 03/29/2017 06:04 PM, Razvan Cojocaru wrote:
>> On 03/29/2017 05:00 PM, Razvan Cojocaru wrote:
>>> On 03/29/2017 04:55 PM, Jan Beulich wrote:
>>>>>>> On 28.03.17 at 12:50, <rcojocaru@bitdefender.com> wrote:
>>>>> On 03/28/2017 01:47 PM, Jan Beulich wrote:
>>>>>>>>> On 28.03.17 at 12:27, <rcojocaru@bitdefender.com> wrote:
>>>>>>> On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>>>>>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>>>>>>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>>>>>>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>>>>>>>> set. The guest surely expects neither that the instruction resume until
>>>>>>>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>>>>>>>> ammount of time until a CMPXCHG succeeds.
>>>>>>>>
>>>>>>>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>>>>>>>> the instruction being restarted instead of completed.
>>>>>>>
>>>>>>> Indeed, but it works differently with hvm_emulate_one_vm_event() where
>>>>>>> RETRY currently would have the instruction be re-executed (properly
>>>>>>> re-executed, not just re-emulated) by the guest.
>>>>>>
>>>>>> Right - see my other reply to Andrew: The function likely would
>>>>>> need to tell apart guest CMPXCHG uses from us using the insn to
>>>>>> carry out the write by some other one. That may involve
>>>>>> adjustments to the memory write logic in x86_emulate() itself, as
>>>>>> the late failure of the comparison then would also need to be
>>>>>> communicated back (via ZF clear) to the guest.
>>>>>
>>>>> Exactly, it would require quite some reworking of x86_emulate().
>>>>
>>>> I had imagined it to be less intrusive (outside of x86_emulate()),
>>>> but I've now learned why Andrew was able to get rid of
>>>> X86EMUL_CMPXCHG_FAILED - the apparently intended behavior
>>>> was never implemented. Attached a first take at it, which has
>>>> seen smoke testing, but nothing more. The way it ends up being
>>>> I don't think this can reasonably be considered for 4.9 at this
>>>> point in time. (Also Cc-ing Tim for the shadow code changes,
>>>> even if this isn't really a proper patch submission.)
>>>
>>> Thanks! I'll give a spin with a modified version of my CMPXCHG patch as
>>> soon as possible.
>> 
>> With the attached patch with hvmemul_cmpxchg() now returning
>> X86EMUL_CMPXCHG_FAILED if __cmpxchg() fails my (32-bit) Windows 7 guest
>> gets stuck at the "Starting Windows" screen.

That's with or without monitoring in use? I specifically did try a
32-bit Win7 guest, and I didn't have an issue. But then again a
single run may not mean much.

> And again this change:
> 
> 1162     if ( __cmpxchg(map, old, new, bytes) != old )
> 1163     {
> 1164         memcpy(p_old, map, bytes);
> 1165         rc = X86EMUL_CMPXCHG_FAILED;
> 1166     }
> 
> i.e. doing the accumulator <- destination part of a failed CMPXCHG which
> might be missing from your patch leads me again to BSODs.

Missing from my patch? Why and/or where? It's not clear to me which
function the above code fragment is supposed to go into. I might
guess hvmemul_cmpxchg(), but then my patch doesn't alter its
behavior (from forwarding to hvmeml_write()), and hence I don't
see why my patch would need to do such an adjustment.

What I do note though is that you don't copy back the value
__cmpxchg() returns, yet that's what is needed. *map may
have changed again already.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-30 12:05                   ` Jan Beulich
@ 2017-03-30 12:25                     ` Razvan Cojocaru
  2017-03-30 12:56                     ` Razvan Cojocaru
  1 sibling, 0 replies; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-30 12:25 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 03/30/2017 03:05 PM, Jan Beulich wrote:
>>>> On 29.03.17 at 17:49, <rcojocaru@bitdefender.com> wrote:
>> On 03/29/2017 06:04 PM, Razvan Cojocaru wrote:
>>> On 03/29/2017 05:00 PM, Razvan Cojocaru wrote:
>>>> On 03/29/2017 04:55 PM, Jan Beulich wrote:
>>>>>>>> On 28.03.17 at 12:50, <rcojocaru@bitdefender.com> wrote:
>>>>>> On 03/28/2017 01:47 PM, Jan Beulich wrote:
>>>>>>>>>> On 28.03.17 at 12:27, <rcojocaru@bitdefender.com> wrote:
>>>>>>>> On 03/28/2017 01:03 PM, Jan Beulich wrote:
>>>>>>>>>>>> On 28.03.17 at 11:14, <rcojocaru@bitdefender.com> wrote:
>>>>>>>>>> I'm not sure that the RETRY model is what the guest OS expects. AFAIK, a
>>>>>>>>>> failed CMPXCHG should happen just once, with the proper registers and ZF
>>>>>>>>>> set. The guest surely expects neither that the instruction resume until
>>>>>>>>>> it succeeds, nor that some hidden loop goes on for an undeterminate
>>>>>>>>>> ammount of time until a CMPXCHG succeeds.
>>>>>>>>>
>>>>>>>>> The guest doesn't observe the CMPXCHG failing - RETRY leads to
>>>>>>>>> the instruction being restarted instead of completed.
>>>>>>>>
>>>>>>>> Indeed, but it works differently with hvm_emulate_one_vm_event() where
>>>>>>>> RETRY currently would have the instruction be re-executed (properly
>>>>>>>> re-executed, not just re-emulated) by the guest.
>>>>>>>
>>>>>>> Right - see my other reply to Andrew: The function likely would
>>>>>>> need to tell apart guest CMPXCHG uses from us using the insn to
>>>>>>> carry out the write by some other one. That may involve
>>>>>>> adjustments to the memory write logic in x86_emulate() itself, as
>>>>>>> the late failure of the comparison then would also need to be
>>>>>>> communicated back (via ZF clear) to the guest.
>>>>>>
>>>>>> Exactly, it would require quite some reworking of x86_emulate().
>>>>>
>>>>> I had imagined it to be less intrusive (outside of x86_emulate()),
>>>>> but I've now learned why Andrew was able to get rid of
>>>>> X86EMUL_CMPXCHG_FAILED - the apparently intended behavior
>>>>> was never implemented. Attached a first take at it, which has
>>>>> seen smoke testing, but nothing more. The way it ends up being
>>>>> I don't think this can reasonably be considered for 4.9 at this
>>>>> point in time. (Also Cc-ing Tim for the shadow code changes,
>>>>> even if this isn't really a proper patch submission.)
>>>>
>>>> Thanks! I'll give a spin with a modified version of my CMPXCHG patch as
>>>> soon as possible.
>>>
>>> With the attached patch with hvmemul_cmpxchg() now returning
>>> X86EMUL_CMPXCHG_FAILED if __cmpxchg() fails my (32-bit) Windows 7 guest
>>> gets stuck at the "Starting Windows" screen.
> 
> That's with or without monitoring in use? I specifically did try a
> 32-bit Win7 guest, and I didn't have an issue. But then again a
> single run may not mean much.

With monitoring in use - specifically using hvm_emulate_one_vm_event().
Sorry for the ommision.

>> And again this change:
>>
>> 1162     if ( __cmpxchg(map, old, new, bytes) != old )
>> 1163     {
>> 1164         memcpy(p_old, map, bytes);
>> 1165         rc = X86EMUL_CMPXCHG_FAILED;
>> 1166     }
>>
>> i.e. doing the accumulator <- destination part of a failed CMPXCHG which
>> might be missing from your patch leads me again to BSODs.
> 
> Missing from my patch? Why and/or where? It's not clear to me which
> function the above code fragment is supposed to go into. I might
> guess hvmemul_cmpxchg(), but then my patch doesn't alter its
> behavior (from forwarding to hvmeml_write()), and hence I don't
> see why my patch would need to do such an adjustment.

Right, I was thinking about this bit:

6704         if ( _regs.eflags & X86_EFLAGS_ZF )
6705             dst.type = OP_NONE;
6706         else
6707         {
6708             /* Failure: write the value we saw to EAX. */
6709             dst.type = OP_REG;
6710             dst.reg  = (unsigned long *)&_regs.r(ax);
6711         }

For some reason I had missed it, but I now see it does the writeback. My
mistake.

> What I do note though is that you don't copy back the value
> __cmpxchg() returns, yet that's what is needed. *map may
> have changed again already.

True, I'll update my tests.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-30 12:05                   ` Jan Beulich
  2017-03-30 12:25                     ` Razvan Cojocaru
@ 2017-03-30 12:56                     ` Razvan Cojocaru
  2017-03-30 14:08                       ` Razvan Cojocaru
  1 sibling, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-30 12:56 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 03/30/2017 03:05 PM, Jan Beulich wrote:
> What I do note though is that you don't copy back the value
> __cmpxchg() returns, yet that's what is needed. *map may
> have changed again already.

Changing the code to:

1162     ret = __cmpxchg(map, old, new, bytes);
1163
1164     if ( ret != old )
1165     {
1166         memcpy(p_old, &ret, bytes);
1167         rc = X86EMUL_CMPXCHG_FAILED;
1168     }

where ret is an unsigned long still triggers BSODs when I add my patch
to yours. I'll need to dig deeper.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-30 12:56                     ` Razvan Cojocaru
@ 2017-03-30 14:08                       ` Razvan Cojocaru
  2017-03-30 14:21                         ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-30 14:08 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 03/30/2017 03:56 PM, Razvan Cojocaru wrote:
> On 03/30/2017 03:05 PM, Jan Beulich wrote:
>> What I do note though is that you don't copy back the value
>> __cmpxchg() returns, yet that's what is needed. *map may
>> have changed again already.
> 
> Changing the code to:
> 
> 1162     ret = __cmpxchg(map, old, new, bytes);
> 1163
> 1164     if ( ret != old )
> 1165     {
> 1166         memcpy(p_old, &ret, bytes);
> 1167         rc = X86EMUL_CMPXCHG_FAILED;
> 1168     }
> 
> where ret is an unsigned long still triggers BSODs when I add my patch
> to yours. I'll need to dig deeper.

Nevermind, I've found the culprit: hvm_emulate_one_vm_event()'s code
needs to be wrapped in a loop that checks for X86EMUL_RETRY again, since
hvmemul_cmpxchg() may return RETRY even for some mapping problems, in
which case we again end up with the guest trying to re-execute an
emulable CMPXCHG.

However, this gets me back to my original problem when I "solved" it in
the same manner (looping until emulation succeeds) back when
hvmemul_cmpxchg() failures were reported with RETRY: eventually the
guest BSODs with code 101. RETRY failures are still possible coming from
the hvmemul_vaddr_to_mfn() code in my patch.

I wonder if I should just return X86EMUL_CMPXCHG_FAILED for all those as
well and just never end up returning RETRY from hvmemul_cmpxchg().


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-30 14:08                       ` Razvan Cojocaru
@ 2017-03-30 14:21                         ` Jan Beulich
  2017-03-30 15:05                           ` Razvan Cojocaru
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2017-03-30 14:21 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

>>> On 30.03.17 at 16:08, <rcojocaru@bitdefender.com> wrote:
> On 03/30/2017 03:56 PM, Razvan Cojocaru wrote:
>> On 03/30/2017 03:05 PM, Jan Beulich wrote:
>>> What I do note though is that you don't copy back the value
>>> __cmpxchg() returns, yet that's what is needed. *map may
>>> have changed again already.
>> 
>> Changing the code to:
>> 
>> 1162     ret = __cmpxchg(map, old, new, bytes);
>> 1163
>> 1164     if ( ret != old )
>> 1165     {
>> 1166         memcpy(p_old, &ret, bytes);
>> 1167         rc = X86EMUL_CMPXCHG_FAILED;
>> 1168     }
>> 
>> where ret is an unsigned long still triggers BSODs when I add my patch
>> to yours. I'll need to dig deeper.
> 
> Nevermind, I've found the culprit: hvm_emulate_one_vm_event()'s code
> needs to be wrapped in a loop that checks for X86EMUL_RETRY again, since
> hvmemul_cmpxchg() may return RETRY even for some mapping problems, in
> which case we again end up with the guest trying to re-execute an
> emulable CMPXCHG.

This seems wrong to me - note how my patch changes behavior
regarding the return value from paging_cmpxchg_guest_entry()
in ptwr_emulated_update().

> However, this gets me back to my original problem when I "solved" it in
> the same manner (looping until emulation succeeds) back when
> hvmemul_cmpxchg() failures were reported with RETRY: eventually the
> guest BSODs with code 101. RETRY failures are still possible coming from
> the hvmemul_vaddr_to_mfn() code in my patch.
> 
> I wonder if I should just return X86EMUL_CMPXCHG_FAILED for all those as
> well and just never end up returning RETRY from hvmemul_cmpxchg().

That would seem similarly wrong to me - it ought to be
UNHANDLEABLE, I would think. In any event, never returning
RETRY would also be wrong in case you hit emulated MMIO, but
that's really the only case where RETRY should be passed back.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-30 14:21                         ` Jan Beulich
@ 2017-03-30 15:05                           ` Razvan Cojocaru
  2017-03-30 15:47                             ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-30 15:05 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 03/30/2017 05:21 PM, Jan Beulich wrote:
>>>> On 30.03.17 at 16:08, <rcojocaru@bitdefender.com> wrote:
>> On 03/30/2017 03:56 PM, Razvan Cojocaru wrote:
>>> On 03/30/2017 03:05 PM, Jan Beulich wrote:
>>>> What I do note though is that you don't copy back the value
>>>> __cmpxchg() returns, yet that's what is needed. *map may
>>>> have changed again already.
>>>
>>> Changing the code to:
>>>
>>> 1162     ret = __cmpxchg(map, old, new, bytes);
>>> 1163
>>> 1164     if ( ret != old )
>>> 1165     {
>>> 1166         memcpy(p_old, &ret, bytes);
>>> 1167         rc = X86EMUL_CMPXCHG_FAILED;
>>> 1168     }
>>>
>>> where ret is an unsigned long still triggers BSODs when I add my patch
>>> to yours. I'll need to dig deeper.
>>
>> Nevermind, I've found the culprit: hvm_emulate_one_vm_event()'s code
>> needs to be wrapped in a loop that checks for X86EMUL_RETRY again, since
>> hvmemul_cmpxchg() may return RETRY even for some mapping problems, in
>> which case we again end up with the guest trying to re-execute an
>> emulable CMPXCHG.
> 
> This seems wrong to me - note how my patch changes behavior
> regarding the return value from paging_cmpxchg_guest_entry()
> in ptwr_emulated_update().
> 
>> However, this gets me back to my original problem when I "solved" it in
>> the same manner (looping until emulation succeeds) back when
>> hvmemul_cmpxchg() failures were reported with RETRY: eventually the
>> guest BSODs with code 101. RETRY failures are still possible coming from
>> the hvmemul_vaddr_to_mfn() code in my patch.
>>
>> I wonder if I should just return X86EMUL_CMPXCHG_FAILED for all those as
>> well and just never end up returning RETRY from hvmemul_cmpxchg().
> 
> That would seem similarly wrong to me - it ought to be
> UNHANDLEABLE, I would think. In any event, never returning
> RETRY would also be wrong in case you hit emulated MMIO, but
> that's really the only case where RETRY should be passed back.

Sorry, I don't follow: hvm_emulate_one_vm_event() calls
hvm_emulate_one() and then does this with the return value:

switch ( rc )
{
case X86EMUL_RETRY:
    /*
     * This function is called when handling an EPT-related vm_event
     * reply. As such, nothing else needs to be done here, since simply
     * returning makes the current instruction cause a page fault again,
     * consistent with X86EMUL_RETRY.
     */
    return;
case X86EMUL_UNHANDLEABLE:
    hvm_dump_emulation_state(XENLOG_G_DEBUG "Mem event", &ctx);
    hvm_inject_hw_exception(trapnr, errcode);
    break;
case X86EMUL_EXCEPTION:
    if ( ctx.ctxt.event_pending )
        hvm_inject_event(&ctx.ctxt.event);
    break;
}

hvm_emulate_writeback(&ctx);

If I'd return UNHANDLEABLE from hvmemul_cmpxchg() where I'm now
returning RETRY from hvmemul_vaddr_to_mfn(), that would inject
TRAP_invalid_op in the guest, so it seems rather harsh. And returning
RETRY here does nothing, so the guest will resume at the same IP
(re-trying CMPXCHG, causing a page fault (or crashing the guest), and
re-trying the emulation (if it hasn't)). Please see hvm_do_resume() in
arch/x86/hvm/hvm.c.

Speaking of emulated MMIO, I've got this when the guest was crashing
immediately (pre RETRY loop):

 MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
07 8b cb e8 da 4b ff ff 8b 45

Again, this is all with guest monitoring enabled.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-30 15:05                           ` Razvan Cojocaru
@ 2017-03-30 15:47                             ` Jan Beulich
  2017-03-31  6:17                               ` Razvan Cojocaru
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2017-03-30 15:47 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

>>> On 30.03.17 at 17:05, <rcojocaru@bitdefender.com> wrote:
> On 03/30/2017 05:21 PM, Jan Beulich wrote:
>>>>> On 30.03.17 at 16:08, <rcojocaru@bitdefender.com> wrote:
>>> On 03/30/2017 03:56 PM, Razvan Cojocaru wrote:
>>>> On 03/30/2017 03:05 PM, Jan Beulich wrote:
>>>>> What I do note though is that you don't copy back the value
>>>>> __cmpxchg() returns, yet that's what is needed. *map may
>>>>> have changed again already.
>>>>
>>>> Changing the code to:
>>>>
>>>> 1162     ret = __cmpxchg(map, old, new, bytes);
>>>> 1163
>>>> 1164     if ( ret != old )
>>>> 1165     {
>>>> 1166         memcpy(p_old, &ret, bytes);
>>>> 1167         rc = X86EMUL_CMPXCHG_FAILED;
>>>> 1168     }
>>>>
>>>> where ret is an unsigned long still triggers BSODs when I add my patch
>>>> to yours. I'll need to dig deeper.
>>>
>>> Nevermind, I've found the culprit: hvm_emulate_one_vm_event()'s code
>>> needs to be wrapped in a loop that checks for X86EMUL_RETRY again, since
>>> hvmemul_cmpxchg() may return RETRY even for some mapping problems, in
>>> which case we again end up with the guest trying to re-execute an
>>> emulable CMPXCHG.
>> 
>> This seems wrong to me - note how my patch changes behavior
>> regarding the return value from paging_cmpxchg_guest_entry()
>> in ptwr_emulated_update().
>> 
>>> However, this gets me back to my original problem when I "solved" it in
>>> the same manner (looping until emulation succeeds) back when
>>> hvmemul_cmpxchg() failures were reported with RETRY: eventually the
>>> guest BSODs with code 101. RETRY failures are still possible coming from
>>> the hvmemul_vaddr_to_mfn() code in my patch.
>>>
>>> I wonder if I should just return X86EMUL_CMPXCHG_FAILED for all those as
>>> well and just never end up returning RETRY from hvmemul_cmpxchg().
>> 
>> That would seem similarly wrong to me - it ought to be
>> UNHANDLEABLE, I would think. In any event, never returning
>> RETRY would also be wrong in case you hit emulated MMIO, but
>> that's really the only case where RETRY should be passed back.
> 
> Sorry, I don't follow: hvm_emulate_one_vm_event() calls
> hvm_emulate_one() and then does this with the return value:
> 
> switch ( rc )
> {
> case X86EMUL_RETRY:
>     /*
>      * This function is called when handling an EPT-related vm_event
>      * reply. As such, nothing else needs to be done here, since simply
>      * returning makes the current instruction cause a page fault again,
>      * consistent with X86EMUL_RETRY.
>      */
>     return;
> case X86EMUL_UNHANDLEABLE:
>     hvm_dump_emulation_state(XENLOG_G_DEBUG "Mem event", &ctx);
>     hvm_inject_hw_exception(trapnr, errcode);
>     break;
> case X86EMUL_EXCEPTION:
>     if ( ctx.ctxt.event_pending )
>         hvm_inject_event(&ctx.ctxt.event);
>     break;
> }
> 
> hvm_emulate_writeback(&ctx);
> 
> If I'd return UNHANDLEABLE from hvmemul_cmpxchg() where I'm now
> returning RETRY from hvmemul_vaddr_to_mfn(), that would inject
> TRAP_invalid_op in the guest, so it seems rather harsh.

Well, my comment was for normal execution of the guest. In
oder to make it into the emulator, the virtual->physical
translation in the guest must have worked, so if there is a
mapping failure, this indicates something fishy going on inside
the guest.

> Speaking of emulated MMIO, I've got this when the guest was crashing
> immediately (pre RETRY loop):
> 
>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
> 07 8b cb e8 da 4b ff ff 8b 45

That's a BTR, which we should be emulating fine. More information
would need to be collected to have a chance to understand what
might be going one (first of all the virtual and physical memory
address this was trying to act on).

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-30 15:47                             ` Jan Beulich
@ 2017-03-31  6:17                               ` Razvan Cojocaru
  2017-03-31  7:34                                 ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-31  6:17 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 03/30/2017 06:47 PM, Jan Beulich wrote:
>> Speaking of emulated MMIO, I've got this when the guest was crashing
>> immediately (pre RETRY loop):
>>
>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>> 07 8b cb e8 da 4b ff ff 8b 45
> 
> That's a BTR, which we should be emulating fine. More information
> would need to be collected to have a chance to understand what
> might be going one (first of all the virtual and physical memory
> address this was trying to act on).

Right, the BTR part should be fine, but I think the LOCK part is what's
causing the issue. I've done a few more test runs to see what return
RETRY (dumping the instruction with an "(r)" prefix to distinguish from
the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
handler, which presumably now fails - possibly simply because it's
always LOCKed in my patch):

# grep "Mem event" /var/log/xen/console/hypervisor.log | sort | uniq
(XEN) (r) Mem event emulation failed: d3v1 32bit @ 0008:8267f1aa -> f0
0f ba 28 07 72 d5 8d 45 f4 50 33 ff 56 47 53
(XEN) (r) Mem event emulation failed: d3v5 32bit @ 0008:8267f1aa -> f0
0f ba 28 07 72 d5 8d 45 f4 50 33 ff 56 47 53
(XEN) (r) Mem event emulation failed: d3v5 32bit @ 0008:826ebc7c -> f0
0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
(XEN) (r) Mem event emulation failed: d3v6 32bit @ 0008:8267f1aa -> f0
0f ba 28 07 72 d5 8d 45 f4 50 33 ff 56 47 53
(XEN) (r) Mem event emulation failed: d3v6 32bit @ 0008:826eb861 -> f0
0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
(XEN) (r) Mem event emulation failed: d3v6 32bit @ 0008:826ebc7c -> f0
0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
(XEN) (r) Mem event emulation failed: d3v6 32bit @ 0008:826ebce6 -> f0
0f c1 01 8b 7d fc c1 ef 09 81 e7 f8 ff 7f 00
(XEN) (r) Mem event emulation failed: d3v7 32bit @ 0008:8267f1aa -> f0
0f ba 28 07 72 d5 8d 45 f4 50 33 ff 56 47 53
(XEN) (r) Mem event emulation failed: d3v7 32bit @ 0008:826eb861 -> f0
0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
(XEN) (r) Mem event emulation failed: d3v7 32bit @ 0008:826ebc7c -> f0
0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
(XEN) (r) Mem event emulation failed: d3v7 32bit @ 0008:826ebce6 -> f0
0f c1 01 8b 7d fc c1 ef 09 81 e7 f8 ff 7f 00
(XEN) (r) Mem event emulation failed: d3v7 32bit @ 0008:826ec59a -> f0
0f ba 31 00 72 09 e8 a3 3e ff ff 8b 44 24 18
(XEN) (r) Mem event emulation failed: d3v7 32bit @ 0008:826f6276 -> f0
0f ba 28 07 72 cc 39 53 04 75 5e 8d 43 08 8b
(XEN) (r) Mem event emulation failed: d3v8 32bit @ 0008:8267f1aa -> f0
0f ba 28 07 72 d5 8d 45 f4 50 33 ff 56 47 53
(XEN) (r) Mem event emulation failed: d3v8 32bit @ 0008:826eb861 -> f0
0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
(XEN) (r) Mem event emulation failed: d3v9 32bit @ 0008:8267f1aa -> f0
0f ba 28 07 72 d5 8d 45 f4 50 33 ff 56 47 53
(XEN) (r) Mem event emulation failed: d3v9 32bit @ 0008:826eb861 -> f0
0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
(XEN) (r) Mem event emulation failed: d3v9 32bit @ 0008:826ebce6 -> f0
0f c1 01 8b 7d fc c1 ef 09 81 e7 f8 ff 7f 00
(XEN) (r) Mem event emulation failed: d3v9 32bit @ 0008:826ec583 -> f0
0f c1 01 64 a1 24 01 00 00 66 ff 88 86 00 00


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-31  6:17                               ` Razvan Cojocaru
@ 2017-03-31  7:34                                 ` Jan Beulich
  2017-03-31  9:56                                   ` Razvan Cojocaru
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2017-03-31  7:34 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>> immediately (pre RETRY loop):
>>>
>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>> 07 8b cb e8 da 4b ff ff 8b 45
>> 
>> That's a BTR, which we should be emulating fine. More information
>> would need to be collected to have a chance to understand what
>> might be going one (first of all the virtual and physical memory
>> address this was trying to act on).
> 
> Right, the BTR part should be fine, but I think the LOCK part is what's
> causing the issue. I've done a few more test runs to see what return
> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
> handler, which presumably now fails - possibly simply because it's
> always LOCKed in my patch):

Well, all of that looks to be expected behavior. I'm afraid I don't see
how this information helps understanding the MMIO emulation failure
above.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-31  7:34                                 ` Jan Beulich
@ 2017-03-31  9:56                                   ` Razvan Cojocaru
  2017-03-31 14:46                                     ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-31  9:56 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 03/31/2017 10:34 AM, Jan Beulich wrote:
>>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>>> immediately (pre RETRY loop):
>>>>
>>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>
>>> That's a BTR, which we should be emulating fine. More information
>>> would need to be collected to have a chance to understand what
>>> might be going one (first of all the virtual and physical memory
>>> address this was trying to act on).
>>
>> Right, the BTR part should be fine, but I think the LOCK part is what's
>> causing the issue. I've done a few more test runs to see what return
>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>> handler, which presumably now fails - possibly simply because it's
>> always LOCKed in my patch):
> 
> Well, all of that looks to be expected behavior. I'm afraid I don't see
> how this information helps understanding the MMIO emulation failure
> above.

I've managed to obtain this log of emulation errors:
https://pastebin.com/Esy1SkHx

The "virtual address" lines that are not followed by any "Mem event"
line correspond to CMXCHG_FAILED return codes.

The very last line is a MMIO emulation failed.

It's probably important that this happens with the model where
hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
succeeds. The other model allows me to go further with the guest, but
eventually I get timeout-related BSODs or the guest becomes unresponsive.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-31  9:56                                   ` Razvan Cojocaru
@ 2017-03-31 14:46                                     ` Jan Beulich
  2017-03-31 15:01                                       ` Razvan Cojocaru
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2017-03-31 14:46 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

>>> On 31.03.17 at 11:56, <rcojocaru@bitdefender.com> wrote:
> On 03/31/2017 10:34 AM, Jan Beulich wrote:
>>>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
>>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>>>> immediately (pre RETRY loop):
>>>>>
>>>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>
>>>> That's a BTR, which we should be emulating fine. More information
>>>> would need to be collected to have a chance to understand what
>>>> might be going one (first of all the virtual and physical memory
>>>> address this was trying to act on).
>>>
>>> Right, the BTR part should be fine, but I think the LOCK part is what's
>>> causing the issue. I've done a few more test runs to see what return
>>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>>> handler, which presumably now fails - possibly simply because it's
>>> always LOCKed in my patch):
>> 
>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>> how this information helps understanding the MMIO emulation failure
>> above.
> 
> I've managed to obtain this log of emulation errors:
> https://pastebin.com/Esy1SkHx 
> 
> The "virtual address" lines that are not followed by any "Mem event"
> line correspond to CMXCHG_FAILED return codes.
> 
> The very last line is a MMIO emulation failed.
> 
> It's probably important that this happens with the model where
> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
> succeeds. The other model allows me to go further with the guest, but
> eventually I get timeout-related BSODs or the guest becomes unresponsive.

Interesting. You didn't clarify what the printed "offset" values are,
and it doesn't look like these have any correlation with the underlying
(guest) physical address, which we would also want to see. And then
it strikes me as odd that in these last lines

(XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> f0 0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
(XEN) virtual address: 0xffd080f0, offset: 4291854576
(XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45

the instruction pointers and virtual addresses are different, but the
code bytes are exactly the same. This doesn't seem very likely, so I
wonder whether there's an issue with us wrongly re-using previously
fetched insn bytes. (Of course I'd be happy to be proven wrong with
this guessing, by you checking the involved binary/ies.)

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-31 14:46                                     ` Jan Beulich
@ 2017-03-31 15:01                                       ` Razvan Cojocaru
  2017-03-31 15:04                                         ` Jan Beulich
  0 siblings, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-03-31 15:01 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 03/31/2017 05:46 PM, Jan Beulich wrote:
>>>> On 31.03.17 at 11:56, <rcojocaru@bitdefender.com> wrote:
>> On 03/31/2017 10:34 AM, Jan Beulich wrote:
>>>>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
>>>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>>>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>>>>> immediately (pre RETRY loop):
>>>>>>
>>>>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>
>>>>> That's a BTR, which we should be emulating fine. More information
>>>>> would need to be collected to have a chance to understand what
>>>>> might be going one (first of all the virtual and physical memory
>>>>> address this was trying to act on).
>>>>
>>>> Right, the BTR part should be fine, but I think the LOCK part is what's
>>>> causing the issue. I've done a few more test runs to see what return
>>>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>>>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>>>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>>>> handler, which presumably now fails - possibly simply because it's
>>>> always LOCKed in my patch):
>>>
>>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>>> how this information helps understanding the MMIO emulation failure
>>> above.
>>
>> I've managed to obtain this log of emulation errors:
>> https://pastebin.com/Esy1SkHx 
>>
>> The "virtual address" lines that are not followed by any "Mem event"
>> line correspond to CMXCHG_FAILED return codes.
>>
>> The very last line is a MMIO emulation failed.
>>
>> It's probably important that this happens with the model where
>> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
>> succeeds. The other model allows me to go further with the guest, but
>> eventually I get timeout-related BSODs or the guest becomes unresponsive.
> 
> Interesting. You didn't clarify what the printed "offset" values are,
> and it doesn't look like these have any correlation with the underlying
> (guest) physical address, which we would also want to see. And then
> it strikes me as odd that in these last lines
> 
> (XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> f0 0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
> (XEN) virtual address: 0xffd080f0, offset: 4291854576
> (XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
> 
> the instruction pointers and virtual addresses are different, but the
> code bytes are exactly the same. This doesn't seem very likely, so I
> wonder whether there's an issue with us wrongly re-using previously
> fetched insn bytes. (Of course I'd be happy to be proven wrong with
> this guessing, by you checking the involved binary/ies.)

Offset is the actual value of the "offset" parameter of
hvmemul_cmpxchg(). I'll output more info and recheck.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-31 15:01                                       ` Razvan Cojocaru
@ 2017-03-31 15:04                                         ` Jan Beulich
  2017-04-01 16:56                                           ` Razvan Cojocaru
  0 siblings, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2017-03-31 15:04 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

>>> On 31.03.17 at 17:01, <rcojocaru@bitdefender.com> wrote:
> On 03/31/2017 05:46 PM, Jan Beulich wrote:
>>>>> On 31.03.17 at 11:56, <rcojocaru@bitdefender.com> wrote:
>>> On 03/31/2017 10:34 AM, Jan Beulich wrote:
>>>>>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
>>>>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>>>>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>>>>>> immediately (pre RETRY loop):
>>>>>>>
>>>>>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>>
>>>>>> That's a BTR, which we should be emulating fine. More information
>>>>>> would need to be collected to have a chance to understand what
>>>>>> might be going one (first of all the virtual and physical memory
>>>>>> address this was trying to act on).
>>>>>
>>>>> Right, the BTR part should be fine, but I think the LOCK part is what's
>>>>> causing the issue. I've done a few more test runs to see what return
>>>>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>>>>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>>>>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>>>>> handler, which presumably now fails - possibly simply because it's
>>>>> always LOCKed in my patch):
>>>>
>>>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>>>> how this information helps understanding the MMIO emulation failure
>>>> above.
>>>
>>> I've managed to obtain this log of emulation errors:
>>> https://pastebin.com/Esy1SkHx 
>>>
>>> The "virtual address" lines that are not followed by any "Mem event"
>>> line correspond to CMXCHG_FAILED return codes.
>>>
>>> The very last line is a MMIO emulation failed.
>>>
>>> It's probably important that this happens with the model where
>>> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
>>> succeeds. The other model allows me to go further with the guest, but
>>> eventually I get timeout-related BSODs or the guest becomes unresponsive.
>> 
>> Interesting. You didn't clarify what the printed "offset" values are,
>> and it doesn't look like these have any correlation with the underlying
>> (guest) physical address, which we would also want to see. And then
>> it strikes me as odd that in these last lines
>> 
>> (XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> f0 0f 
> ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
>> (XEN) virtual address: 0xffd080f0, offset: 4291854576
>> (XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 72 
> 07 8b cb e8 da 4b ff ff 8b 45
>> 
>> the instruction pointers and virtual addresses are different, but the
>> code bytes are exactly the same. This doesn't seem very likely, so I
>> wonder whether there's an issue with us wrongly re-using previously
>> fetched insn bytes. (Of course I'd be happy to be proven wrong with
>> this guessing, by you checking the involved binary/ies.)
> 
> Offset is the actual value of the "offset" parameter of
> hvmemul_cmpxchg().

That's not very useful then, as for flat segments "offset" ==
"virtual address" (i.e. you merely re-print in decimal what you've
already printed in hex).

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-03-31 15:04                                         ` Jan Beulich
@ 2017-04-01 16:56                                           ` Razvan Cojocaru
  2017-04-03 10:23                                             ` Jan Beulich
  2017-04-03 18:20                                             ` Razvan Cojocaru
  0 siblings, 2 replies; 38+ messages in thread
From: Razvan Cojocaru @ 2017-04-01 16:56 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

[-- Attachment #1: Type: text/plain, Size: 3810 bytes --]

On 03/31/2017 06:04 PM, Jan Beulich wrote:
>>>> On 31.03.17 at 17:01, <rcojocaru@bitdefender.com> wrote:
>> On 03/31/2017 05:46 PM, Jan Beulich wrote:
>>>>>> On 31.03.17 at 11:56, <rcojocaru@bitdefender.com> wrote:
>>>> On 03/31/2017 10:34 AM, Jan Beulich wrote:
>>>>>>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
>>>>>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>>>>>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>>>>>>> immediately (pre RETRY loop):
>>>>>>>>
>>>>>>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>>>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>>>
>>>>>>> That's a BTR, which we should be emulating fine. More information
>>>>>>> would need to be collected to have a chance to understand what
>>>>>>> might be going one (first of all the virtual and physical memory
>>>>>>> address this was trying to act on).
>>>>>>
>>>>>> Right, the BTR part should be fine, but I think the LOCK part is what's
>>>>>> causing the issue. I've done a few more test runs to see what return
>>>>>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>>>>>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>>>>>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>>>>>> handler, which presumably now fails - possibly simply because it's
>>>>>> always LOCKed in my patch):
>>>>>
>>>>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>>>>> how this information helps understanding the MMIO emulation failure
>>>>> above.
>>>>
>>>> I've managed to obtain this log of emulation errors:
>>>> https://pastebin.com/Esy1SkHx 
>>>>
>>>> The "virtual address" lines that are not followed by any "Mem event"
>>>> line correspond to CMXCHG_FAILED return codes.
>>>>
>>>> The very last line is a MMIO emulation failed.
>>>>
>>>> It's probably important that this happens with the model where
>>>> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
>>>> succeeds. The other model allows me to go further with the guest, but
>>>> eventually I get timeout-related BSODs or the guest becomes unresponsive.
>>>
>>> Interesting. You didn't clarify what the printed "offset" values are,
>>> and it doesn't look like these have any correlation with the underlying
>>> (guest) physical address, which we would also want to see. And then
>>> it strikes me as odd that in these last lines
>>>
>>> (XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> f0 0f 
>> ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
>>> (XEN) virtual address: 0xffd080f0, offset: 4291854576
>>> (XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 72 
>> 07 8b cb e8 da 4b ff ff 8b 45
>>>
>>> the instruction pointers and virtual addresses are different, but the
>>> code bytes are exactly the same. This doesn't seem very likely, so I
>>> wonder whether there's an issue with us wrongly re-using previously
>>> fetched insn bytes. (Of course I'd be happy to be proven wrong with
>>> this guessing, by you checking the involved binary/ies.)
>>
>> Offset is the actual value of the "offset" parameter of
>> hvmemul_cmpxchg().
> 
> That's not very useful then, as for flat segments "offset" ==
> "virtual address" (i.e. you merely re-print in decimal what you've
> already printed in hex).

The attached patch (a combination of your patch and mine) produces the
following output when booting a Windows 7 32-bit guest with monitoring:
https://pastebin.com/ayiFmj1N

The failed MMIO emulation is caused by a mapping failure due to the
"!nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa)" condition
being true in hvmemul_vaddr_to_mfn(). I've ripped that off from
__hvm_copy() but it looks like that might not be the right way to use it.


Thanks,
Razvan

[-- Attachment #2: combined_patches.patch --]
[-- Type: text/x-patch, Size: 22161 bytes --]

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 2d92957..f6244af 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -20,6 +20,7 @@
 #include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/ioreq.h>
+#include <asm/hvm/nestedhvm.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/support.h>
 #include <asm/hvm/svm/svm.h>
@@ -1029,6 +1030,86 @@ static int hvmemul_wbinvd_discard(
     return X86EMUL_OKAY;
 }
 
+static int hvmemul_vaddr_to_mfn(
+    unsigned long addr,
+    mfn_t *mfn,
+    uint32_t pfec,
+    struct x86_emulate_ctxt *ctxt)
+{
+    paddr_t gpa = addr & ~PAGE_MASK;
+    struct page_info *page;
+    p2m_type_t p2mt;
+    unsigned long gfn;
+    struct vcpu *curr = current;
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+
+    gfn = paging_gva_to_gfn(curr, addr, &pfec);
+
+    printk("gfn: 0x%lx\n", gfn);
+
+    if ( gfn == gfn_x(INVALID_GFN) )
+    {
+        pagefault_info_t pfinfo = {};
+
+        if ( ( pfec & PFEC_page_paged ) || ( pfec & PFEC_page_shared ) )
+            return X86EMUL_RETRY;
+
+        pfinfo.linear = addr;
+        pfinfo.ec = pfec;
+
+        x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt);
+        return X86EMUL_EXCEPTION;
+    }
+
+    gpa |= (paddr_t)gfn << PAGE_SHIFT;
+
+    /*
+     * No need to do the P2M lookup for internally handled MMIO, benefiting
+     * - 32-bit WinXP (& older Windows) on AMD CPUs for LAPIC accesses,
+     * - newer Windows (like Server 2012) for HPET accesses.
+     */
+    if ( !nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa) )
+    {
+        printk("!nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa)\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    page = get_page_from_gfn(curr->domain, gfn, &p2mt, P2M_UNSHARE);
+
+    if ( !page )
+    {
+        printk("!page\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    if ( p2m_is_paging(p2mt) )
+    {
+        put_page(page);
+        p2m_mem_paging_populate(curr->domain, gfn);
+        return X86EMUL_RETRY;
+    }
+
+    if ( p2m_is_shared(p2mt) )
+    {
+        put_page(page);
+        return X86EMUL_RETRY;
+    }
+
+    if ( p2m_is_grant(p2mt) )
+    {
+        put_page(page);
+        printk("p2m_is_grant(p2mt)\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    *mfn = _mfn(page_to_mfn(page));
+
+    put_page(page);
+
+    return X86EMUL_OKAY;
+}
+
 static int hvmemul_cmpxchg(
     enum x86_segment seg,
     unsigned long offset,
@@ -1037,8 +1118,98 @@ static int hvmemul_cmpxchg(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    /* Fix this in case the guest is really relying on r-m-w atomicity. */
-    return hvmemul_write(seg, offset, p_new, bytes, ctxt);
+    unsigned long addr = 0, reps = 1;
+    int rc = X86EMUL_OKAY;
+    unsigned long old = 0, new = 0;
+    uint32_t pfec = PFEC_page_present | PFEC_write_access;
+    struct hvm_emulate_ctxt *hvmemul_ctxt =
+        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
+    mfn_t mfn[2];
+    void *map = NULL;
+    struct domain *currd = current->domain;
+    unsigned long ret;
+
+    if ( is_x86_system_segment(seg) )
+        pfec |= PFEC_implicit;
+    else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
+        pfec |= PFEC_user_mode;
+
+    rc = hvmemul_virtual_to_linear(
+        seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr);
+
+    if ( rc != X86EMUL_OKAY || !bytes )
+    {
+        printk("rc != X86EMUL_OKAY || !bytes\n");
+        goto out;
+    }
+
+    rc = hvmemul_vaddr_to_mfn(addr, &mfn[0], pfec, ctxt);
+
+    if ( rc != X86EMUL_OKAY )
+    {
+        printk("hvmemul_vaddr_to_mfn() fail\n");
+        goto out;
+    }
+
+    if ( likely(((addr + bytes - 1) & PAGE_MASK) == (addr & PAGE_MASK)) )
+    {
+        /* Whole write fits on a single page. */
+        mfn[1] = INVALID_MFN;
+        map = map_domain_page(mfn[0]);
+    }
+    else
+    {
+        rc = hvmemul_vaddr_to_mfn((addr + bytes - 1) & PAGE_MASK, &mfn[1],
+                                  pfec, ctxt);
+        if ( rc != X86EMUL_OKAY )
+        {
+            printk("hvmemul_vaddr_to_mfn(mfn[1]) fail\n");
+            goto out;
+        }
+
+        map = vmap(mfn, 2);
+    }
+
+    if ( !map )
+    {
+        printk("!map\n");
+        return X86EMUL_UNHANDLEABLE;
+    }
+
+    map += (addr & ~PAGE_MASK);
+
+    memcpy(&old, p_old, bytes);
+    memcpy(&new, p_new, bytes);
+
+    ret = __cmpxchg(map, old, new, bytes);
+
+    if ( ret != old )
+    {
+        memcpy(p_old, &ret, bytes);
+        rc = X86EMUL_CMPXCHG_FAILED;
+    }
+
+    paging_mark_dirty(currd, mfn[0]);
+
+    if ( unlikely(mfn_valid(mfn[1])) )
+    {
+        paging_mark_dirty(currd, mfn[1]);
+        vunmap((void *)((unsigned long)map & PAGE_MASK));
+    }
+    else
+        unmap_domain_page(map);
+
+out:
+    if ( rc != X86EMUL_OKAY )
+    {
+        if ( rc != X86EMUL_CMPXCHG_FAILED )
+            rc = X86EMUL_UNHANDLEABLE;
+    }
+
+    printk("[%d] virtual address: 0x%lx, rc: %d\n",
+           current->vcpu_id, addr, rc);
+
+    return rc;
 }
 
 static int hvmemul_validate(
@@ -1961,59 +2132,64 @@ int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
 void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
     unsigned int errcode)
 {
-    struct hvm_emulate_ctxt ctx = {{ 0 }};
-    int rc;
+    int rc = X86EMUL_OKAY;
 
-    hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
+    /* do { */
+        struct hvm_emulate_ctxt ctx = {{ 0 }};
 
-    switch ( kind )
-    {
-    case EMUL_KIND_NOWRITE:
-        rc = _hvm_emulate_one(&ctx, &hvm_emulate_ops_no_write);
-        break;
-    case EMUL_KIND_SET_CONTEXT_INSN: {
-        struct vcpu *curr = current;
-        struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+        hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
 
-        BUILD_BUG_ON(sizeof(vio->mmio_insn) !=
-                     sizeof(curr->arch.vm_event->emul.insn.data));
-        ASSERT(!vio->mmio_insn_bytes);
+        switch ( kind )
+        {
+        case EMUL_KIND_NOWRITE:
+            rc = _hvm_emulate_one(&ctx, &hvm_emulate_ops_no_write);
+            break;
+        case EMUL_KIND_SET_CONTEXT_INSN: {
+            struct vcpu *curr = current;
+            struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+
+            BUILD_BUG_ON(sizeof(vio->mmio_insn) !=
+                         sizeof(curr->arch.vm_event->emul.insn.data));
+            ASSERT(!vio->mmio_insn_bytes);
+
+            /*
+             * Stash insn buffer into mmio buffer here instead of ctx
+             * to avoid having to add more logic to hvm_emulate_one.
+             */
+            vio->mmio_insn_bytes = sizeof(vio->mmio_insn);
+            memcpy(vio->mmio_insn, curr->arch.vm_event->emul.insn.data,
+                   vio->mmio_insn_bytes);
+        }
+        /* Fall-through */
+        default:
+            ctx.set_context = (kind == EMUL_KIND_SET_CONTEXT_DATA);
+            rc = hvm_emulate_one(&ctx);
+        }
 
-        /*
-         * Stash insn buffer into mmio buffer here instead of ctx
-         * to avoid having to add more logic to hvm_emulate_one.
-         */
-        vio->mmio_insn_bytes = sizeof(vio->mmio_insn);
-        memcpy(vio->mmio_insn, curr->arch.vm_event->emul.insn.data,
-               vio->mmio_insn_bytes);
-    }
-    /* Fall-through */
-    default:
-        ctx.set_context = (kind == EMUL_KIND_SET_CONTEXT_DATA);
-        rc = hvm_emulate_one(&ctx);
-    }
+        if ( rc != X86EMUL_OKAY )
+        {
+            printk("Dump follows for VCPU %d\n", current->vcpu_id);
+        }
 
-    switch ( rc )
-    {
-    case X86EMUL_RETRY:
-        /*
-         * This function is called when handling an EPT-related vm_event
-         * reply. As such, nothing else needs to be done here, since simply
-         * returning makes the current instruction cause a page fault again,
-         * consistent with X86EMUL_RETRY.
-         */
-        return;
-    case X86EMUL_UNHANDLEABLE:
-        hvm_dump_emulation_state(XENLOG_G_DEBUG "Mem event", &ctx);
-        hvm_inject_hw_exception(trapnr, errcode);
-        break;
-    case X86EMUL_EXCEPTION:
-        if ( ctx.ctxt.event_pending )
-            hvm_inject_event(&ctx.ctxt.event);
-        break;
-    }
+        switch ( rc )
+        {
+        case X86EMUL_RETRY:
+            /* break; */
+            hvm_dump_emulation_state(XENLOG_G_DEBUG "Mem event (RETRY)", &ctx);
+            return;
+        case X86EMUL_UNHANDLEABLE:
+            hvm_dump_emulation_state(XENLOG_G_DEBUG "Mem event", &ctx);
+            hvm_inject_hw_exception(trapnr, errcode);
+            break;
+        case X86EMUL_EXCEPTION:
+            if ( ctx.ctxt.event_pending )
+                hvm_inject_event(&ctx.ctxt.event);
+            break;
+        }
+
+        hvm_emulate_writeback(&ctx);
 
-    hvm_emulate_writeback(&ctx);
+    /* } while( rc == X86EMUL_RETRY ); */
 }
 
 void hvm_emulate_init_once(
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 4dbd24f..a67cd55 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5176,16 +5176,17 @@ static int ptwr_emulated_read(
 
 static int ptwr_emulated_update(
     unsigned long addr,
-    paddr_t old,
-    paddr_t val,
+    intpte_t *p_old,
+    intpte_t val,
     unsigned int bytes,
-    unsigned int do_cmpxchg,
     struct ptwr_emulate_ctxt *ptwr_ctxt)
 {
     unsigned long mfn;
     unsigned long unaligned_addr = addr;
     struct page_info *page;
     l1_pgentry_t pte, ol1e, nl1e, *pl1e;
+    intpte_t old = p_old ? *p_old : 0;
+    unsigned int offset = 0;
     struct vcpu *v = current;
     struct domain *d = v->domain;
     int ret;
@@ -5199,28 +5200,30 @@ static int ptwr_emulated_update(
     }
 
     /* Turn a sub-word access into a full-word access. */
-    if ( bytes != sizeof(paddr_t) )
+    if ( bytes != sizeof(val) )
     {
-        paddr_t      full;
-        unsigned int rc, offset = addr & (sizeof(paddr_t)-1);
+        intpte_t full;
+        unsigned int rc;
+
+        offset = addr & (sizeof(full) - 1);
 
         /* Align address; read full word. */
-        addr &= ~(sizeof(paddr_t)-1);
-        if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 )
+        addr &= ~(sizeof(full) - 1);
+        if ( (rc = copy_from_user(&full, (void *)addr, sizeof(full))) != 0 )
         {
             x86_emul_pagefault(0, /* Read fault. */
-                               addr + sizeof(paddr_t) - rc,
+                               addr + sizeof(full) - rc,
                                &ptwr_ctxt->ctxt);
             return X86EMUL_EXCEPTION;
         }
         /* Mask out bits provided by caller. */
-        full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
+        full &= ~((((intpte_t)1 << (bytes * 8)) - 1) << (offset * 8));
         /* Shift the caller value and OR in the missing bits. */
-        val  &= (((paddr_t)1 << (bytes*8)) - 1);
+        val  &= (((intpte_t)1 << (bytes * 8)) - 1);
         val <<= (offset)*8;
         val  |= full;
         /* Also fill in missing parts of the cmpxchg old value. */
-        old  &= (((paddr_t)1 << (bytes*8)) - 1);
+        old  &= (((intpte_t)1 << (bytes * 8)) - 1);
         old <<= (offset)*8;
         old  |= full;
     }
@@ -5242,7 +5245,7 @@ static int ptwr_emulated_update(
     {
     default:
         if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
-             !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
+             !p_old && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
         {
             /*
              * If this is an upper-half write to a PAE PTE then we assume that
@@ -5273,21 +5276,26 @@ static int ptwr_emulated_update(
     /* Checked successfully: do the update (write or cmpxchg). */
     pl1e = map_domain_page(_mfn(mfn));
     pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
-    if ( do_cmpxchg )
+    if ( p_old )
     {
-        int okay;
-        intpte_t t = old;
         ol1e = l1e_from_intpte(old);
 
-        okay = paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e),
-                                          &t, l1e_get_intpte(nl1e), _mfn(mfn));
-        okay = (okay && t == old);
+        if ( !paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e),
+                                         &old, l1e_get_intpte(nl1e), _mfn(mfn)) )
+            ret = X86EMUL_UNHANDLEABLE;
+        else if ( l1e_get_intpte(ol1e) == old )
+            ret = X86EMUL_OKAY;
+        else
+        {
+            *p_old = old >> (offset * 8);
+            ret = X86EMUL_CMPXCHG_FAILED;
+        }
 
-        if ( !okay )
+        if ( ret != X86EMUL_OKAY )
         {
             unmap_domain_page(pl1e);
             put_page_from_l1e(nl1e, d);
-            return X86EMUL_RETRY;
+            return ret;
         }
     }
     else
@@ -5314,9 +5322,9 @@ static int ptwr_emulated_write(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    paddr_t val = 0;
+    intpte_t val = 0;
 
-    if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes - 1)) || !bytes )
+    if ( (bytes > sizeof(val)) || (bytes & (bytes - 1)) || !bytes )
     {
         MEM_LOG("ptwr_emulate: bad write size (addr=%lx, bytes=%u)",
                 offset, bytes);
@@ -5325,9 +5333,9 @@ static int ptwr_emulated_write(
 
     memcpy(&val, p_data, bytes);
 
-    return ptwr_emulated_update(
-        offset, 0, val, bytes, 0,
-        container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
+    return ptwr_emulated_update(offset, NULL, val, bytes,
+                                container_of(ctxt, struct ptwr_emulate_ctxt,
+                                             ctxt));
 }
 
 static int ptwr_emulated_cmpxchg(
@@ -5338,21 +5346,20 @@ static int ptwr_emulated_cmpxchg(
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    paddr_t old = 0, new = 0;
+    intpte_t new = 0;
 
-    if ( (bytes > sizeof(paddr_t)) || (bytes & (bytes -1)) )
+    if ( (bytes > sizeof(new)) || (bytes & (bytes -1)) )
     {
         MEM_LOG("ptwr_emulate: bad cmpxchg size (addr=%lx, bytes=%u)",
                 offset, bytes);
         return X86EMUL_UNHANDLEABLE;
     }
 
-    memcpy(&old, p_old, bytes);
     memcpy(&new, p_new, bytes);
 
-    return ptwr_emulated_update(
-        offset, old, new, bytes, 1,
-        container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
+    return ptwr_emulated_update(offset, p_old, new, bytes,
+                                container_of(ctxt, struct ptwr_emulate_ctxt,
+                                             ctxt));
 }
 
 static int pv_emul_is_mem_write(const struct x86_emulate_state *state,
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index d93f2ab..06dc9f6 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -285,7 +285,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg,
     struct sh_emulate_ctxt *sh_ctxt =
         container_of(ctxt, struct sh_emulate_ctxt, ctxt);
     struct vcpu *v = current;
-    unsigned long addr, old, new;
+    unsigned long addr, new = 0;
     int rc;
 
     if ( bytes > sizeof(long) )
@@ -296,12 +296,10 @@ hvm_emulate_cmpxchg(enum x86_segment seg,
     if ( rc )
         return rc;
 
-    old = new = 0;
-    memcpy(&old, p_old, bytes);
     memcpy(&new, p_new, bytes);
 
     return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
-               v, addr, old, new, bytes, sh_ctxt);
+               v, addr, p_old, new, bytes, sh_ctxt);
 }
 
 static const struct x86_emulate_ops hvm_shadow_emulator_ops = {
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 4798c93..d8270db 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -4783,11 +4783,11 @@ sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
 
 static int
 sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr,
-                        unsigned long old, unsigned long new,
-                        unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
+                       unsigned long *p_old, unsigned long new,
+                       unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
 {
     void *addr;
-    unsigned long prev;
+    unsigned long prev, old = *p_old;
     int rv = X86EMUL_OKAY;
 
     /* Unaligned writes are only acceptable on HVM */
@@ -4811,7 +4811,10 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr,
     }
 
     if ( prev != old )
-        rv = X86EMUL_RETRY;
+    {
+        *p_old = prev;
+        rv = X86EMUL_CMPXCHG_FAILED;
+    }
 
     SHADOW_DEBUG(EMULATE, "va %#lx was %#lx expected %#lx"
                   " wanted %#lx now %#lx bytes %u\n",
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index bb67be6..444c84c 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1862,6 +1862,9 @@ protmode_load_seg(
 
         default:
             return rc;
+
+        case X86EMUL_CMPXCHG_FAILED:
+            return X86EMUL_RETRY;
         }
 
         /* Force the Accessed flag in our local copy. */
@@ -6680,6 +6683,7 @@ x86_emulate(
         break;
 
     case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */
+        fail_if(!ops->cmpxchg);
         /* Save real source value, then compare EAX against destination. */
         src.orig_val = src.val;
         src.val = _regs.r(ax);
@@ -6688,8 +6692,17 @@ x86_emulate(
         if ( _regs.eflags & X86_EFLAGS_ZF )
         {
             /* Success: write back to memory. */
-            dst.val = src.orig_val;
+            dst.val = src.val;
+            rc = ops->cmpxchg(dst.mem.seg, dst.mem.off, &dst.val,
+                              &src.orig_val, dst.bytes, ctxt);
+            if ( rc == X86EMUL_CMPXCHG_FAILED )
+            {
+               _regs.eflags &= ~X86_EFLAGS_ZF;
+               rc = X86EMUL_OKAY;
+            }
         }
+        if ( _regs.eflags & X86_EFLAGS_ZF )
+            dst.type = OP_NONE;
         else
         {
             /* Failure: write the value we saw to EAX. */
@@ -6994,6 +7007,7 @@ x86_emulate(
 
         if ( memcmp(old, aux, op_bytes) )
         {
+        cmpxchgNb_failed:
             /* Expected != actual: store actual to rDX:rAX and clear ZF. */
             _regs.r(ax) = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
             _regs.r(dx) = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
@@ -7003,7 +7017,7 @@ x86_emulate(
         {
             /*
              * Expected == actual: Get proposed value, attempt atomic cmpxchg
-             * and set ZF.
+             * and set ZF if successful.
              */
             if ( !(rex_prefix & REX_W) )
             {
@@ -7016,10 +7030,20 @@ x86_emulate(
                 aux->u64[1] = _regs.r(cx);
             }
 
-            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
-                                    op_bytes, ctxt)) != X86EMUL_OKAY )
+            switch ( rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
+                                       op_bytes, ctxt) )
+            {
+            case X86EMUL_OKAY:
+                _regs.eflags |= X86_EFLAGS_ZF;
+                break;
+
+            case X86EMUL_CMPXCHG_FAILED:
+                rc = X86EMUL_OKAY;
+                goto cmpxchgNb_failed;
+
+            default:
                 goto done;
-            _regs.eflags |= X86_EFLAGS_ZF;
+            }
         }
         break;
     }
@@ -7942,6 +7966,8 @@ x86_emulate(
             rc = ops->cmpxchg(
                 dst.mem.seg, dst.mem.off, &dst.orig_val,
                 &dst.val, dst.bytes, ctxt);
+            if ( rc == X86EMUL_CMPXCHG_FAILED )
+                rc = X86EMUL_RETRY;
         }
         else
         {
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
index 9c5fcde..6a8d6a0 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -153,6 +153,8 @@ struct x86_emul_fpu_aux {
 #define X86EMUL_EXCEPTION      2
  /* Retry the emulation for some reason. No state modified. */
 #define X86EMUL_RETRY          3
+ /* (cmpxchg accessor): CMPXCHG failed. */
+#define X86EMUL_CMPXCHG_FAILED 4
  /*
   * Operation fully done by one of the hooks:
   * - validate(): operation completed (except common insn retire logic)
@@ -160,7 +162,7 @@ struct x86_emul_fpu_aux {
   * - read_io() / write_io(): bypass GPR update (non-string insns only)
   * Undefined behavior when used anywhere else.
   */
-#define X86EMUL_DONE           4
+#define X86EMUL_DONE           5
 
 /* FPU sub-types which may be requested via ->get_fpu(). */
 enum x86_emulate_fpu_type {
@@ -250,6 +252,8 @@ struct x86_emulate_ops
     /*
      * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation.
      *  @p_old: [IN ] Pointer to value expected to be current at @addr.
+     *          [OUT] Pointer to value found at @addr (may always be
+     *                updated, meaningful for X86EMUL_CMPXCHG_FAILED only).
      *  @p_new: [IN ] Pointer to value to write to @addr.
      *  @bytes: [IN ] Operation size (up to 8 (x86/32) or 16 (x86/64) bytes).
      */
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index f262c9e..3efcf6d 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -89,7 +89,7 @@ struct shadow_paging_mode {
                                             void *src, u32 bytes,
                                             struct sh_emulate_ctxt *sh_ctxt);
     int           (*x86_emulate_cmpxchg   )(struct vcpu *v, unsigned long va,
-                                            unsigned long old, 
+                                            unsigned long *old,
                                             unsigned long new,
                                             unsigned int bytes,
                                             struct sh_emulate_ctxt *sh_ctxt);

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-04-01 16:56                                           ` Razvan Cojocaru
@ 2017-04-03 10:23                                             ` Jan Beulich
  2017-04-03 18:20                                             ` Razvan Cojocaru
  1 sibling, 0 replies; 38+ messages in thread
From: Jan Beulich @ 2017-04-03 10:23 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

>>> On 01.04.17 at 18:56, <rcojocaru@bitdefender.com> wrote:
> On 03/31/2017 06:04 PM, Jan Beulich wrote:
>>>>> On 31.03.17 at 17:01, <rcojocaru@bitdefender.com> wrote:
>>> On 03/31/2017 05:46 PM, Jan Beulich wrote:
>>>>>>> On 31.03.17 at 11:56, <rcojocaru@bitdefender.com> wrote:
>>>>> On 03/31/2017 10:34 AM, Jan Beulich wrote:
>>>>>>>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
>>>>>>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>>>>>>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>>>>>>>> immediately (pre RETRY loop):
>>>>>>>>>
>>>>>>>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>>>>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>>>>
>>>>>>>> That's a BTR, which we should be emulating fine. More information
>>>>>>>> would need to be collected to have a chance to understand what
>>>>>>>> might be going one (first of all the virtual and physical memory
>>>>>>>> address this was trying to act on).
>>>>>>>
>>>>>>> Right, the BTR part should be fine, but I think the LOCK part is what's
>>>>>>> causing the issue. I've done a few more test runs to see what return
>>>>>>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>>>>>>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>>>>>>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>>>>>>> handler, which presumably now fails - possibly simply because it's
>>>>>>> always LOCKed in my patch):
>>>>>>
>>>>>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>>>>>> how this information helps understanding the MMIO emulation failure
>>>>>> above.
>>>>>
>>>>> I've managed to obtain this log of emulation errors:
>>>>> https://pastebin.com/Esy1SkHx 
>>>>>
>>>>> The "virtual address" lines that are not followed by any "Mem event"
>>>>> line correspond to CMXCHG_FAILED return codes.
>>>>>
>>>>> The very last line is a MMIO emulation failed.
>>>>>
>>>>> It's probably important that this happens with the model where
>>>>> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
>>>>> succeeds. The other model allows me to go further with the guest, but
>>>>> eventually I get timeout-related BSODs or the guest becomes unresponsive.
>>>>
>>>> Interesting. You didn't clarify what the printed "offset" values are,
>>>> and it doesn't look like these have any correlation with the underlying
>>>> (guest) physical address, which we would also want to see. And then
>>>> it strikes me as odd that in these last lines
>>>>
>>>> (XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> f0 0f 
> 
>>> ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
>>>> (XEN) virtual address: 0xffd080f0, offset: 4291854576
>>>> (XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 72 
>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>
>>>> the instruction pointers and virtual addresses are different, but the
>>>> code bytes are exactly the same. This doesn't seem very likely, so I
>>>> wonder whether there's an issue with us wrongly re-using previously
>>>> fetched insn bytes. (Of course I'd be happy to be proven wrong with
>>>> this guessing, by you checking the involved binary/ies.)
>>>
>>> Offset is the actual value of the "offset" parameter of
>>> hvmemul_cmpxchg().
>> 
>> That's not very useful then, as for flat segments "offset" ==
>> "virtual address" (i.e. you merely re-print in decimal what you've
>> already printed in hex).
> 
> The attached patch (a combination of your patch and mine) produces the
> following output when booting a Windows 7 32-bit guest with monitoring:
> https://pastebin.com/ayiFmj1N 
> 
> The failed MMIO emulation is caused by a mapping failure due to the
> "!nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa)" condition
> being true in hvmemul_vaddr_to_mfn(). I've ripped that off from
> __hvm_copy() but it looks like that might not be the right way to use it.

(XEN) [7] virtual address: 0x8276d09c, rc: 4
(XEN) gfn: 0x276d
(XEN) Dump follows for VCPU 7
(XEN) Mem event (RETRY) emulation failed: d4v7 32bit @ 0008:826a1c7c -> f0 0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
(XEN) [9] virtual address: 0x8276d0a8, rc: 0
(XEN) gfn: 0xfed00
(XEN) !nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa)
(XEN) hvmemul_vaddr_to_mfn() fail
(XEN) [7] virtual address: 0xffd080f0, rc: 1

It would help to know which of the earlier messages were also for
vCPU 7. In any event we again have the same suspicious pattern:
Virtual (and now also physical, assuming the gfn logged was that on
vCPU 7, not 9) address completely different, but ...

(XEN) MMIO emulation failed: d4v7 32bit @ 0008:8263bf3c -> f0 0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41

... instruction bytes identical between the last mem event emulation
and the failed MMIO one. Relatively unlikely to be pure coincidence.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-04-01 16:56                                           ` Razvan Cojocaru
  2017-04-03 10:23                                             ` Jan Beulich
@ 2017-04-03 18:20                                             ` Razvan Cojocaru
  2017-04-03 18:36                                               ` Razvan Cojocaru
  1 sibling, 1 reply; 38+ messages in thread
From: Razvan Cojocaru @ 2017-04-03 18:20 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 04/01/2017 07:56 PM, Razvan Cojocaru wrote:
> On 03/31/2017 06:04 PM, Jan Beulich wrote:
>>>>> On 31.03.17 at 17:01, <rcojocaru@bitdefender.com> wrote:
>>> On 03/31/2017 05:46 PM, Jan Beulich wrote:
>>>>>>> On 31.03.17 at 11:56, <rcojocaru@bitdefender.com> wrote:
>>>>> On 03/31/2017 10:34 AM, Jan Beulich wrote:
>>>>>>>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
>>>>>>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>>>>>>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>>>>>>>> immediately (pre RETRY loop):
>>>>>>>>>
>>>>>>>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>>>>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>>>>
>>>>>>>> That's a BTR, which we should be emulating fine. More information
>>>>>>>> would need to be collected to have a chance to understand what
>>>>>>>> might be going one (first of all the virtual and physical memory
>>>>>>>> address this was trying to act on).
>>>>>>>
>>>>>>> Right, the BTR part should be fine, but I think the LOCK part is what's
>>>>>>> causing the issue. I've done a few more test runs to see what return
>>>>>>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>>>>>>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>>>>>>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>>>>>>> handler, which presumably now fails - possibly simply because it's
>>>>>>> always LOCKed in my patch):
>>>>>>
>>>>>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>>>>>> how this information helps understanding the MMIO emulation failure
>>>>>> above.
>>>>>
>>>>> I've managed to obtain this log of emulation errors:
>>>>> https://pastebin.com/Esy1SkHx 
>>>>>
>>>>> The "virtual address" lines that are not followed by any "Mem event"
>>>>> line correspond to CMXCHG_FAILED return codes.
>>>>>
>>>>> The very last line is a MMIO emulation failed.
>>>>>
>>>>> It's probably important that this happens with the model where
>>>>> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
>>>>> succeeds. The other model allows me to go further with the guest, but
>>>>> eventually I get timeout-related BSODs or the guest becomes unresponsive.
>>>>
>>>> Interesting. You didn't clarify what the printed "offset" values are,
>>>> and it doesn't look like these have any correlation with the underlying
>>>> (guest) physical address, which we would also want to see. And then
>>>> it strikes me as odd that in these last lines
>>>>
>>>> (XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> f0 0f 
>>> ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
>>>> (XEN) virtual address: 0xffd080f0, offset: 4291854576
>>>> (XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 72 
>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>
>>>> the instruction pointers and virtual addresses are different, but the
>>>> code bytes are exactly the same. This doesn't seem very likely, so I
>>>> wonder whether there's an issue with us wrongly re-using previously
>>>> fetched insn bytes. (Of course I'd be happy to be proven wrong with
>>>> this guessing, by you checking the involved binary/ies.)
>>>
>>> Offset is the actual value of the "offset" parameter of
>>> hvmemul_cmpxchg().
>>
>> That's not very useful then, as for flat segments "offset" ==
>> "virtual address" (i.e. you merely re-print in decimal what you've
>> already printed in hex).
> 
> The attached patch (a combination of your patch and mine) produces the
> following output when booting a Windows 7 32-bit guest with monitoring:
> https://pastebin.com/ayiFmj1N
> 
> The failed MMIO emulation is caused by a mapping failure due to the
> "!nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa)" condition
> being true in hvmemul_vaddr_to_mfn(). I've ripped that off from
> __hvm_copy() but it looks like that might not be the right way to use it.

Sorry to reply to this email instead of your original reply but I've
"left it" in my computer at work. Here's the last part of the log, with
the VCPU number logged for the GFN as well:

(XEN) [8] gfn: 0x2781
(XEN) [8] virtual address: 0x827810a8, rc: 0
(XEN) [8] gfn: 0x2781
(XEN) [8] virtual address: 0x827810a8, rc: 0
(XEN) [8] gfn: 0x2781
(XEN) [8] virtual address: 0x827810cc, rc: 0
(XEN) [8] gfn: 0x2781
(XEN) [8] virtual address: 0x8278109c, rc: 0
(XEN) [8] gfn: 0x2781
(XEN) [8] virtual address: 0x827810d0, rc: 0
(XEN) [11] gfn: 0x2781
(XEN) [8] gfn: 0x2781
(XEN) [11] virtual address: 0x8278109c, rc: 0
(XEN) [8] virtual address: 0x8278109c, rc: 4
(XEN) Dump follows for VCPU 8
(XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826b5c7c ->
f0 0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
(XEN) [11] gfn: 0x2781
(XEN) [11] virtual address: 0x827810a8, rc: 0
(XEN) [11] gfn: 0x2781
(XEN) [11] virtual address: 0x827810a8, rc: 0
(XEN) [8] gfn: 0xfed00
(XEN) !page
(XEN) hvmemul_vaddr_to_mfn() fail
(XEN) [8] virtual address: 0xffd080f0, rc: 1
(XEN) MMIO emulation failed: d3v8 32bit @ 0008:8264ff3c -> f0 0f c1 08
85 c9 74 1f f6 c1 02 75 1a 41 8d 41

The code does look the same for the last two failures on VCPU 8, for
clearly different GFNs and virtual addresses. The first time it hits a
protected page, we try to emulate the instruction and it fails with
X86EMUL_CMPXCHG_FAILED (rc: 4). Then it somehow pops up again and this
time it's MMIO-emulated, and that fails as well (with UNHANDLEABLE,
since we can't seem to be able to map the memory).


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-04-03 18:20                                             ` Razvan Cojocaru
@ 2017-04-03 18:36                                               ` Razvan Cojocaru
  2017-04-04  9:07                                                 ` Jan Beulich
  2017-04-08 22:15                                                 ` Razvan Cojocaru
  0 siblings, 2 replies; 38+ messages in thread
From: Razvan Cojocaru @ 2017-04-03 18:36 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 04/03/2017 09:20 PM, Razvan Cojocaru wrote:
> On 04/01/2017 07:56 PM, Razvan Cojocaru wrote:
>> On 03/31/2017 06:04 PM, Jan Beulich wrote:
>>>>>> On 31.03.17 at 17:01, <rcojocaru@bitdefender.com> wrote:
>>>> On 03/31/2017 05:46 PM, Jan Beulich wrote:
>>>>>>>> On 31.03.17 at 11:56, <rcojocaru@bitdefender.com> wrote:
>>>>>> On 03/31/2017 10:34 AM, Jan Beulich wrote:
>>>>>>>>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
>>>>>>>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>>>>>>>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>>>>>>>>> immediately (pre RETRY loop):
>>>>>>>>>>
>>>>>>>>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>>>>>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>>>>>
>>>>>>>>> That's a BTR, which we should be emulating fine. More information
>>>>>>>>> would need to be collected to have a chance to understand what
>>>>>>>>> might be going one (first of all the virtual and physical memory
>>>>>>>>> address this was trying to act on).
>>>>>>>>
>>>>>>>> Right, the BTR part should be fine, but I think the LOCK part is what's
>>>>>>>> causing the issue. I've done a few more test runs to see what return
>>>>>>>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>>>>>>>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>>>>>>>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>>>>>>>> handler, which presumably now fails - possibly simply because it's
>>>>>>>> always LOCKed in my patch):
>>>>>>>
>>>>>>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>>>>>>> how this information helps understanding the MMIO emulation failure
>>>>>>> above.
>>>>>>
>>>>>> I've managed to obtain this log of emulation errors:
>>>>>> https://pastebin.com/Esy1SkHx 
>>>>>>
>>>>>> The "virtual address" lines that are not followed by any "Mem event"
>>>>>> line correspond to CMXCHG_FAILED return codes.
>>>>>>
>>>>>> The very last line is a MMIO emulation failed.
>>>>>>
>>>>>> It's probably important that this happens with the model where
>>>>>> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
>>>>>> succeeds. The other model allows me to go further with the guest, but
>>>>>> eventually I get timeout-related BSODs or the guest becomes unresponsive.
>>>>>
>>>>> Interesting. You didn't clarify what the printed "offset" values are,
>>>>> and it doesn't look like these have any correlation with the underlying
>>>>> (guest) physical address, which we would also want to see. And then
>>>>> it strikes me as odd that in these last lines
>>>>>
>>>>> (XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> f0 0f 
>>>> ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
>>>>> (XEN) virtual address: 0xffd080f0, offset: 4291854576
>>>>> (XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 72 
>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>
>>>>> the instruction pointers and virtual addresses are different, but the
>>>>> code bytes are exactly the same. This doesn't seem very likely, so I
>>>>> wonder whether there's an issue with us wrongly re-using previously
>>>>> fetched insn bytes. (Of course I'd be happy to be proven wrong with
>>>>> this guessing, by you checking the involved binary/ies.)
>>>>
>>>> Offset is the actual value of the "offset" parameter of
>>>> hvmemul_cmpxchg().
>>>
>>> That's not very useful then, as for flat segments "offset" ==
>>> "virtual address" (i.e. you merely re-print in decimal what you've
>>> already printed in hex).
>>
>> The attached patch (a combination of your patch and mine) produces the
>> following output when booting a Windows 7 32-bit guest with monitoring:
>> https://pastebin.com/ayiFmj1N
>>
>> The failed MMIO emulation is caused by a mapping failure due to the
>> "!nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa)" condition
>> being true in hvmemul_vaddr_to_mfn(). I've ripped that off from
>> __hvm_copy() but it looks like that might not be the right way to use it.
> 
> Sorry to reply to this email instead of your original reply but I've
> "left it" in my computer at work. Here's the last part of the log, with
> the VCPU number logged for the GFN as well:
> 
> (XEN) [8] gfn: 0x2781
> (XEN) [8] virtual address: 0x827810a8, rc: 0
> (XEN) [8] gfn: 0x2781
> (XEN) [8] virtual address: 0x827810a8, rc: 0
> (XEN) [8] gfn: 0x2781
> (XEN) [8] virtual address: 0x827810cc, rc: 0
> (XEN) [8] gfn: 0x2781
> (XEN) [8] virtual address: 0x8278109c, rc: 0
> (XEN) [8] gfn: 0x2781
> (XEN) [8] virtual address: 0x827810d0, rc: 0
> (XEN) [11] gfn: 0x2781
> (XEN) [8] gfn: 0x2781
> (XEN) [11] virtual address: 0x8278109c, rc: 0
> (XEN) [8] virtual address: 0x8278109c, rc: 4
> (XEN) Dump follows for VCPU 8
> (XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826b5c7c ->
> f0 0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
> (XEN) [11] gfn: 0x2781
> (XEN) [11] virtual address: 0x827810a8, rc: 0
> (XEN) [11] gfn: 0x2781
> (XEN) [11] virtual address: 0x827810a8, rc: 0
> (XEN) [8] gfn: 0xfed00
> (XEN) !page
> (XEN) hvmemul_vaddr_to_mfn() fail
> (XEN) [8] virtual address: 0xffd080f0, rc: 1
> (XEN) MMIO emulation failed: d3v8 32bit @ 0008:8264ff3c -> f0 0f c1 08
> 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
> 
> The code does look the same for the last two failures on VCPU 8, for
> clearly different GFNs and virtual addresses. The first time it hits a
> protected page, we try to emulate the instruction and it fails with
> X86EMUL_CMPXCHG_FAILED (rc: 4). Then it somehow pops up again and this
> time it's MMIO-emulated, and that fails as well (with UNHANDLEABLE,
> since we can't seem to be able to map the memory).

Another log, making sure that the MMIO emulation fail really happens on
the same processor as the preceding (identical bytes) fail:

(XEN) [8] gfn: 0x276f
(XEN) [11] virtual address: 0x8276f09c, rc: 0
(XEN) [8] virtual address: 0x8276f09c, rc: 4
(XEN) Dump follows for VCPU 8
(XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826a3861 ->
f0 0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
(XEN) [11] gfn: 0x276f
(XEN) [11] virtual address: 0x8276f0d0, rc: 0
(XEN) [11] gfn: 0x276f
(XEN) [11] virtual address: 0x8276f09c, rc: 0
(XEN) [11] gfn: 0x276f
(XEN) [11] virtual address: 0x8276f0cc, rc: 0
(XEN) [11] gfn: 0x276f
(XEN) [11] virtual address: 0x8276f09c, rc: 0
(XEN) [8] gfn: 0xfed00
(XEN) !page
(XEN) hvmemul_vaddr_to_mfn() fail
(XEN) [8] virtual address: 0xffd080f0, rc: 1
(XEN) Dump follows for VCPU 8
(XEN) MMIO emulation failed: d3v8 32bit @ 0008:8263df3c -> f0 0f ba 30
00 72 07 8b cb e8 da 4b ff ff 8b 45
(XEN) [11] gfn: 0x276f
(XEN) [11] virtual address: 0x8276f0d0, rc: 0


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-04-03 18:36                                               ` Razvan Cojocaru
@ 2017-04-04  9:07                                                 ` Jan Beulich
  2017-04-04 12:01                                                   ` Razvan Cojocaru
  2017-04-08 22:15                                                 ` Razvan Cojocaru
  1 sibling, 1 reply; 38+ messages in thread
From: Jan Beulich @ 2017-04-04  9:07 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

>>> On 03.04.17 at 20:36, <rcojocaru@bitdefender.com> wrote:
> Another log, making sure that the MMIO emulation fail really happens on
> the same processor as the preceding (identical bytes) fail:
> 
> (XEN) [8] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f09c, rc: 0
> (XEN) [8] virtual address: 0x8276f09c, rc: 4
> (XEN) Dump follows for VCPU 8
> (XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826a3861 ->
> f0 0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45

So what does your code do following this? Namely is there any exit
back to guest context prior to the retried operation succeeding (e.g.
are you allowing the instruction to be re-executed)? If there is, I'd
suspect you don't clean up some internal state and ...

> (XEN) [11] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f0d0, rc: 0
> (XEN) [11] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f09c, rc: 0
> (XEN) [11] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f0cc, rc: 0
> (XEN) [11] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f09c, rc: 0
> (XEN) [8] gfn: 0xfed00
> (XEN) !page
> (XEN) hvmemul_vaddr_to_mfn() fail
> (XEN) [8] virtual address: 0xffd080f0, rc: 1
> (XEN) Dump follows for VCPU 8
> (XEN) MMIO emulation failed: d3v8 32bit @ 0008:8263df3c -> f0 0f ba 30
> 00 72 07 8b cb e8 da 4b ff ff 8b 45

... this then happens in the context of an interrupt taken on
the instruction to be re-executed (i.e. before it actually gets
re-executed). "Normal" retry processing is not supposed to
reach guest context again, i.e. the retry is being performed
from (iirc) hvm_do_resume(). Hence it can re-use previously
collected information on the instruction and its operand(s).
Any exit to guest context, otoh, would require invalidation of
such internally cached state.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-04-04  9:07                                                 ` Jan Beulich
@ 2017-04-04 12:01                                                   ` Razvan Cojocaru
  0 siblings, 0 replies; 38+ messages in thread
From: Razvan Cojocaru @ 2017-04-04 12:01 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 04/04/2017 12:07 PM, Jan Beulich wrote:
>>>> On 03.04.17 at 20:36, <rcojocaru@bitdefender.com> wrote:
>> Another log, making sure that the MMIO emulation fail really happens on
>> the same processor as the preceding (identical bytes) fail:
>>
>> (XEN) [8] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f09c, rc: 0
>> (XEN) [8] virtual address: 0x8276f09c, rc: 4
>> (XEN) Dump follows for VCPU 8
>> (XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826a3861 ->
>> f0 0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
> 
> So what does your code do following this? Namely is there any exit
> back to guest context prior to the retried operation succeeding (e.g.
> are you allowing the instruction to be re-executed)? If there is, I'd
> suspect you don't clean up some internal state and ...

That code is called from hvm_do_resume() ...

>> (XEN) [11] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f0d0, rc: 0
>> (XEN) [11] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f09c, rc: 0
>> (XEN) [11] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f0cc, rc: 0
>> (XEN) [11] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f09c, rc: 0
>> (XEN) [8] gfn: 0xfed00
>> (XEN) !page
>> (XEN) hvmemul_vaddr_to_mfn() fail
>> (XEN) [8] virtual address: 0xffd080f0, rc: 1
>> (XEN) Dump follows for VCPU 8
>> (XEN) MMIO emulation failed: d3v8 32bit @ 0008:8263df3c -> f0 0f ba 30
>> 00 72 07 8b cb e8 da 4b ff ff 8b 45
> 
> ... this then happens in the context of an interrupt taken on
> the instruction to be re-executed (i.e. before it actually gets
> re-executed). "Normal" retry processing is not supposed to
> reach guest context again, i.e. the retry is being performed
> from (iirc) hvm_do_resume(). Hence it can re-use previously
> collected information on the instruction and its operand(s).
> Any exit to guest context, otoh, would require invalidation of
> such internally cached state.

... which does nothing if hvm_emulate_one_vm_event() fails. It doesn't
even check the return code. That's why I have added the do { } while (
rc == X86EMUL_RETRY ); loop around the code in
hvm_emulate_one_vm_event() and noticed that the BSODs appear much later
(and they're timeout BSODs, presumably because of a taking-too-long
RETRY loop), or the guest becomes unresponsive.

I'm not sure what cached state that would be, I'll take a closer look at
the code.

I'll retest as soon as possible (it's been a very busy time, sorry for
the late replies).


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-04-03 18:36                                               ` Razvan Cojocaru
  2017-04-04  9:07                                                 ` Jan Beulich
@ 2017-04-08 22:15                                                 ` Razvan Cojocaru
  2017-04-09 11:03                                                   ` Razvan Cojocaru
  2017-04-10 10:18                                                   ` Jan Beulich
  1 sibling, 2 replies; 38+ messages in thread
From: Razvan Cojocaru @ 2017-04-08 22:15 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 04/03/2017 09:36 PM, Razvan Cojocaru wrote:
> On 04/03/2017 09:20 PM, Razvan Cojocaru wrote:
>> On 04/01/2017 07:56 PM, Razvan Cojocaru wrote:
>>> On 03/31/2017 06:04 PM, Jan Beulich wrote:
>>>>>>> On 31.03.17 at 17:01, <rcojocaru@bitdefender.com> wrote:
>>>>> On 03/31/2017 05:46 PM, Jan Beulich wrote:
>>>>>>>>> On 31.03.17 at 11:56, <rcojocaru@bitdefender.com> wrote:
>>>>>>> On 03/31/2017 10:34 AM, Jan Beulich wrote:
>>>>>>>>>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
>>>>>>>>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>>>>>>>>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>>>>>>>>>> immediately (pre RETRY loop):
>>>>>>>>>>>
>>>>>>>>>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>>>>>>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>>>>>>
>>>>>>>>>> That's a BTR, which we should be emulating fine. More information
>>>>>>>>>> would need to be collected to have a chance to understand what
>>>>>>>>>> might be going one (first of all the virtual and physical memory
>>>>>>>>>> address this was trying to act on).
>>>>>>>>>
>>>>>>>>> Right, the BTR part should be fine, but I think the LOCK part is what's
>>>>>>>>> causing the issue. I've done a few more test runs to see what return
>>>>>>>>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>>>>>>>>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>>>>>>>>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>>>>>>>>> handler, which presumably now fails - possibly simply because it's
>>>>>>>>> always LOCKed in my patch):
>>>>>>>>
>>>>>>>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>>>>>>>> how this information helps understanding the MMIO emulation failure
>>>>>>>> above.
>>>>>>>
>>>>>>> I've managed to obtain this log of emulation errors:
>>>>>>> https://pastebin.com/Esy1SkHx 
>>>>>>>
>>>>>>> The "virtual address" lines that are not followed by any "Mem event"
>>>>>>> line correspond to CMXCHG_FAILED return codes.
>>>>>>>
>>>>>>> The very last line is a MMIO emulation failed.
>>>>>>>
>>>>>>> It's probably important that this happens with the model where
>>>>>>> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
>>>>>>> succeeds. The other model allows me to go further with the guest, but
>>>>>>> eventually I get timeout-related BSODs or the guest becomes unresponsive.
>>>>>>
>>>>>> Interesting. You didn't clarify what the printed "offset" values are,
>>>>>> and it doesn't look like these have any correlation with the underlying
>>>>>> (guest) physical address, which we would also want to see. And then
>>>>>> it strikes me as odd that in these last lines
>>>>>>
>>>>>> (XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> f0 0f 
>>>>> ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
>>>>>> (XEN) virtual address: 0xffd080f0, offset: 4291854576
>>>>>> (XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 72 
>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>>
>>>>>> the instruction pointers and virtual addresses are different, but the
>>>>>> code bytes are exactly the same. This doesn't seem very likely, so I
>>>>>> wonder whether there's an issue with us wrongly re-using previously
>>>>>> fetched insn bytes. (Of course I'd be happy to be proven wrong with
>>>>>> this guessing, by you checking the involved binary/ies.)
>>>>>
>>>>> Offset is the actual value of the "offset" parameter of
>>>>> hvmemul_cmpxchg().
>>>>
>>>> That's not very useful then, as for flat segments "offset" ==
>>>> "virtual address" (i.e. you merely re-print in decimal what you've
>>>> already printed in hex).
>>>
>>> The attached patch (a combination of your patch and mine) produces the
>>> following output when booting a Windows 7 32-bit guest with monitoring:
>>> https://pastebin.com/ayiFmj1N
>>>
>>> The failed MMIO emulation is caused by a mapping failure due to the
>>> "!nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa)" condition
>>> being true in hvmemul_vaddr_to_mfn(). I've ripped that off from
>>> __hvm_copy() but it looks like that might not be the right way to use it.
>>
>> Sorry to reply to this email instead of your original reply but I've
>> "left it" in my computer at work. Here's the last part of the log, with
>> the VCPU number logged for the GFN as well:
>>
>> (XEN) [8] gfn: 0x2781
>> (XEN) [8] virtual address: 0x827810a8, rc: 0
>> (XEN) [8] gfn: 0x2781
>> (XEN) [8] virtual address: 0x827810a8, rc: 0
>> (XEN) [8] gfn: 0x2781
>> (XEN) [8] virtual address: 0x827810cc, rc: 0
>> (XEN) [8] gfn: 0x2781
>> (XEN) [8] virtual address: 0x8278109c, rc: 0
>> (XEN) [8] gfn: 0x2781
>> (XEN) [8] virtual address: 0x827810d0, rc: 0
>> (XEN) [11] gfn: 0x2781
>> (XEN) [8] gfn: 0x2781
>> (XEN) [11] virtual address: 0x8278109c, rc: 0
>> (XEN) [8] virtual address: 0x8278109c, rc: 4
>> (XEN) Dump follows for VCPU 8
>> (XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826b5c7c ->
>> f0 0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
>> (XEN) [11] gfn: 0x2781
>> (XEN) [11] virtual address: 0x827810a8, rc: 0
>> (XEN) [11] gfn: 0x2781
>> (XEN) [11] virtual address: 0x827810a8, rc: 0
>> (XEN) [8] gfn: 0xfed00
>> (XEN) !page
>> (XEN) hvmemul_vaddr_to_mfn() fail
>> (XEN) [8] virtual address: 0xffd080f0, rc: 1
>> (XEN) MMIO emulation failed: d3v8 32bit @ 0008:8264ff3c -> f0 0f c1 08
>> 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
>>
>> The code does look the same for the last two failures on VCPU 8, for
>> clearly different GFNs and virtual addresses. The first time it hits a
>> protected page, we try to emulate the instruction and it fails with
>> X86EMUL_CMPXCHG_FAILED (rc: 4). Then it somehow pops up again and this
>> time it's MMIO-emulated, and that fails as well (with UNHANDLEABLE,
>> since we can't seem to be able to map the memory).
> 
> Another log, making sure that the MMIO emulation fail really happens on
> the same processor as the preceding (identical bytes) fail:
> 
> (XEN) [8] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f09c, rc: 0
> (XEN) [8] virtual address: 0x8276f09c, rc: 4
> (XEN) Dump follows for VCPU 8
> (XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826a3861 ->
> f0 0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
> (XEN) [11] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f0d0, rc: 0
> (XEN) [11] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f09c, rc: 0
> (XEN) [11] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f0cc, rc: 0
> (XEN) [11] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f09c, rc: 0
> (XEN) [8] gfn: 0xfed00
> (XEN) !page
> (XEN) hvmemul_vaddr_to_mfn() fail
> (XEN) [8] virtual address: 0xffd080f0, rc: 1
> (XEN) Dump follows for VCPU 8
> (XEN) MMIO emulation failed: d3v8 32bit @ 0008:8263df3c -> f0 0f ba 30
> 00 72 07 8b cb e8 da 4b ff ff 8b 45
> (XEN) [11] gfn: 0x276f
> (XEN) [11] virtual address: 0x8276f0d0, rc: 0

As you've suggested, this does indeed seem to happen because of the
current model of emulating because of vm_event replies: an instruction
needs to be emulated in hvm_do_resume(), but inside
hvm_emulate_one_vm_event(), hvm_emulate_one() returns RETRY, presumably
because of the new cmpxchg handler. This causes
hvm_emulate_one_vm_event() to simply return, doing nothing else. Then,
the guest resumes execution at the same place (since RIP has not been
modified), then hvm_do_resume() gets called again, except this time this
code:

 481     if ( !handle_hvm_io_completion(v) )
 482         return;

ends up trying to MMIO emulate the current instruction by calling
handle_mmio().

I've had hvm_emulate_one_vm_event() return what hvm_emulate_one()
returns, and loop while ( rc == RETRY ) in hvm_do_resume() - this is
functionally equivalent to what I've been doing before by wrapping the
hvm_emulate_one_vm_event() code in a do {} while ( rc == RETRY ); loop
as previously discussed. However, as stated before, at a much later
point I still get rare BSODs, or the guest becomes unresponsive. Digging
a bit more, I've found that what seems to be happening is that emulating
some instructions returns EXCEPTION at some point (for instructions
using the new cmpxchg handler, this can happen if paging_gva_to_gfn()
returns INVALID_GFN and neither PFEC_page_paged nor PFEC_page_shared are
set). Since this causes hvm_emulate_one_vm_event() to
hvm_inject_event(), I'm not sure that it does not mess with the
interrupt part of hvm_do_resume(). This will need more testing to figure
out exactly what's going wrong.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-04-08 22:15                                                 ` Razvan Cojocaru
@ 2017-04-09 11:03                                                   ` Razvan Cojocaru
  2017-04-10 10:18                                                   ` Jan Beulich
  1 sibling, 0 replies; 38+ messages in thread
From: Razvan Cojocaru @ 2017-04-09 11:03 UTC (permalink / raw)
  To: Jan Beulich; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

On 04/09/2017 01:15 AM, Razvan Cojocaru wrote:
> I'm not sure that it does not mess with the
> interrupt part of hvm_do_resume().

Actually I've now printk()ed
v->arch.hvm_vcpu.inject_event.vector on the EXCEPTION case of
hvm_emulate_one_vm_event(), and it's
always -1 in my tests, so the interrupt theory doesn't check out. I'll
have to think about another scenario to test.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG
  2017-04-08 22:15                                                 ` Razvan Cojocaru
  2017-04-09 11:03                                                   ` Razvan Cojocaru
@ 2017-04-10 10:18                                                   ` Jan Beulich
  1 sibling, 0 replies; 38+ messages in thread
From: Jan Beulich @ 2017-04-10 10:18 UTC (permalink / raw)
  To: Razvan Cojocaru; +Cc: andrew.cooper3, paul.durrant, Tim Deegan, xen-devel

>>> On 09.04.17 at 00:15, <rcojocaru@bitdefender.com> wrote:
> On 04/03/2017 09:36 PM, Razvan Cojocaru wrote:
>> On 04/03/2017 09:20 PM, Razvan Cojocaru wrote:
>>> On 04/01/2017 07:56 PM, Razvan Cojocaru wrote:
>>>> On 03/31/2017 06:04 PM, Jan Beulich wrote:
>>>>>>>> On 31.03.17 at 17:01, <rcojocaru@bitdefender.com> wrote:
>>>>>> On 03/31/2017 05:46 PM, Jan Beulich wrote:
>>>>>>>>>> On 31.03.17 at 11:56, <rcojocaru@bitdefender.com> wrote:
>>>>>>>> On 03/31/2017 10:34 AM, Jan Beulich wrote:
>>>>>>>>>>>> On 31.03.17 at 08:17, <rcojocaru@bitdefender.com> wrote:
>>>>>>>>>> On 03/30/2017 06:47 PM, Jan Beulich wrote:
>>>>>>>>>>>> Speaking of emulated MMIO, I've got this when the guest was crashing
>>>>>>>>>>>> immediately (pre RETRY loop):
>>>>>>>>>>>>
>>>>>>>>>>>>  MMIO emulation failed: d3v8 32bit @ 0008:82679f3c -> f0 0f ba 30 00 72
>>>>>>>>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>>>>>>>
>>>>>>>>>>> That's a BTR, which we should be emulating fine. More information
>>>>>>>>>>> would need to be collected to have a chance to understand what
>>>>>>>>>>> might be going one (first of all the virtual and physical memory
>>>>>>>>>>> address this was trying to act on).
>>>>>>>>>>
>>>>>>>>>> Right, the BTR part should be fine, but I think the LOCK part is what's
>>>>>>>>>> causing the issue. I've done a few more test runs to see what return
>>>>>>>>>> RETRY (dumping the instruction with an "(r)" prefix to distinguish from
>>>>>>>>>> the UNHANDLEABLE dump), and a couple of instructions return RETRY (BTR
>>>>>>>>>> and XADD, both LOCK-prefixed, which means they now involve CMPXCHG
>>>>>>>>>> handler, which presumably now fails - possibly simply because it's
>>>>>>>>>> always LOCKed in my patch):
>>>>>>>>>
>>>>>>>>> Well, all of that looks to be expected behavior. I'm afraid I don't see
>>>>>>>>> how this information helps understanding the MMIO emulation failure
>>>>>>>>> above.
>>>>>>>>
>>>>>>>> I've managed to obtain this log of emulation errors:
>>>>>>>> https://pastebin.com/Esy1SkHx 
>>>>>>>>
>>>>>>>> The "virtual address" lines that are not followed by any "Mem event"
>>>>>>>> line correspond to CMXCHG_FAILED return codes.
>>>>>>>>
>>>>>>>> The very last line is a MMIO emulation failed.
>>>>>>>>
>>>>>>>> It's probably important that this happens with the model where
>>>>>>>> hvm_emulate_one_vm_event() does _not_ re-try the emulation until it
>>>>>>>> succeeds. The other model allows me to go further with the guest, but
>>>>>>>> eventually I get timeout-related BSODs or the guest becomes unresponsive.
>>>>>>>
>>>>>>> Interesting. You didn't clarify what the printed "offset" values are,
>>>>>>> and it doesn't look like these have any correlation with the underlying
>>>>>>> (guest) physical address, which we would also want to see. And then
>>>>>>> it strikes me as odd that in these last lines
>>>>>>>
>>>>>>> (XEN) Mem event (RETRY) emulation failed: d5v8 32bit @ 0008:826bb861 -> f0 0f 
> 
>>>>>> ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
>>>>>>> (XEN) virtual address: 0xffd080f0, offset: 4291854576
>>>>>>> (XEN) MMIO emulation failed: d5v8 32bit @ 0008:82655f3c -> f0 0f ba 30 00 72 
>>>>>> 07 8b cb e8 da 4b ff ff 8b 45
>>>>>>>
>>>>>>> the instruction pointers and virtual addresses are different, but the
>>>>>>> code bytes are exactly the same. This doesn't seem very likely, so I
>>>>>>> wonder whether there's an issue with us wrongly re-using previously
>>>>>>> fetched insn bytes. (Of course I'd be happy to be proven wrong with
>>>>>>> this guessing, by you checking the involved binary/ies.)
>>>>>>
>>>>>> Offset is the actual value of the "offset" parameter of
>>>>>> hvmemul_cmpxchg().
>>>>>
>>>>> That's not very useful then, as for flat segments "offset" ==
>>>>> "virtual address" (i.e. you merely re-print in decimal what you've
>>>>> already printed in hex).
>>>>
>>>> The attached patch (a combination of your patch and mine) produces the
>>>> following output when booting a Windows 7 32-bit guest with monitoring:
>>>> https://pastebin.com/ayiFmj1N 
>>>>
>>>> The failed MMIO emulation is caused by a mapping failure due to the
>>>> "!nestedhvm_vcpu_in_guestmode(curr) && hvm_mmio_internal(gpa)" condition
>>>> being true in hvmemul_vaddr_to_mfn(). I've ripped that off from
>>>> __hvm_copy() but it looks like that might not be the right way to use it.
>>>
>>> Sorry to reply to this email instead of your original reply but I've
>>> "left it" in my computer at work. Here's the last part of the log, with
>>> the VCPU number logged for the GFN as well:
>>>
>>> (XEN) [8] gfn: 0x2781
>>> (XEN) [8] virtual address: 0x827810a8, rc: 0
>>> (XEN) [8] gfn: 0x2781
>>> (XEN) [8] virtual address: 0x827810a8, rc: 0
>>> (XEN) [8] gfn: 0x2781
>>> (XEN) [8] virtual address: 0x827810cc, rc: 0
>>> (XEN) [8] gfn: 0x2781
>>> (XEN) [8] virtual address: 0x8278109c, rc: 0
>>> (XEN) [8] gfn: 0x2781
>>> (XEN) [8] virtual address: 0x827810d0, rc: 0
>>> (XEN) [11] gfn: 0x2781
>>> (XEN) [8] gfn: 0x2781
>>> (XEN) [11] virtual address: 0x8278109c, rc: 0
>>> (XEN) [8] virtual address: 0x8278109c, rc: 4
>>> (XEN) Dump follows for VCPU 8
>>> (XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826b5c7c ->
>>> f0 0f c1 08 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
>>> (XEN) [11] gfn: 0x2781
>>> (XEN) [11] virtual address: 0x827810a8, rc: 0
>>> (XEN) [11] gfn: 0x2781
>>> (XEN) [11] virtual address: 0x827810a8, rc: 0
>>> (XEN) [8] gfn: 0xfed00
>>> (XEN) !page
>>> (XEN) hvmemul_vaddr_to_mfn() fail
>>> (XEN) [8] virtual address: 0xffd080f0, rc: 1
>>> (XEN) MMIO emulation failed: d3v8 32bit @ 0008:8264ff3c -> f0 0f c1 08
>>> 85 c9 74 1f f6 c1 02 75 1a 41 8d 41
>>>
>>> The code does look the same for the last two failures on VCPU 8, for
>>> clearly different GFNs and virtual addresses. The first time it hits a
>>> protected page, we try to emulate the instruction and it fails with
>>> X86EMUL_CMPXCHG_FAILED (rc: 4). Then it somehow pops up again and this
>>> time it's MMIO-emulated, and that fails as well (with UNHANDLEABLE,
>>> since we can't seem to be able to map the memory).
>> 
>> Another log, making sure that the MMIO emulation fail really happens on
>> the same processor as the preceding (identical bytes) fail:
>> 
>> (XEN) [8] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f09c, rc: 0
>> (XEN) [8] virtual address: 0x8276f09c, rc: 4
>> (XEN) Dump follows for VCPU 8
>> (XEN) Mem event (RETRY) emulation failed: d3v8 32bit @ 0008:826a3861 ->
>> f0 0f ba 30 00 72 07 8b cb e8 da 4b ff ff 8b 45
>> (XEN) [11] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f0d0, rc: 0
>> (XEN) [11] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f09c, rc: 0
>> (XEN) [11] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f0cc, rc: 0
>> (XEN) [11] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f09c, rc: 0
>> (XEN) [8] gfn: 0xfed00
>> (XEN) !page
>> (XEN) hvmemul_vaddr_to_mfn() fail
>> (XEN) [8] virtual address: 0xffd080f0, rc: 1
>> (XEN) Dump follows for VCPU 8
>> (XEN) MMIO emulation failed: d3v8 32bit @ 0008:8263df3c -> f0 0f ba 30
>> 00 72 07 8b cb e8 da 4b ff ff 8b 45
>> (XEN) [11] gfn: 0x276f
>> (XEN) [11] virtual address: 0x8276f0d0, rc: 0
> 
> As you've suggested, this does indeed seem to happen because of the
> current model of emulating because of vm_event replies: an instruction
> needs to be emulated in hvm_do_resume(), but inside
> hvm_emulate_one_vm_event(), hvm_emulate_one() returns RETRY, presumably
> because of the new cmpxchg handler. This causes
> hvm_emulate_one_vm_event() to simply return, doing nothing else. Then,
> the guest resumes execution at the same place (since RIP has not been
> modified), then hvm_do_resume() gets called again, except this time this
> code:
> 
>  481     if ( !handle_hvm_io_completion(v) )
>  482         return;
> 
> ends up trying to MMIO emulate the current instruction by calling
> handle_mmio().
> 
> I've had hvm_emulate_one_vm_event() return what hvm_emulate_one()
> returns, and loop while ( rc == RETRY ) in hvm_do_resume() - this is
> functionally equivalent to what I've been doing before by wrapping the
> hvm_emulate_one_vm_event() code in a do {} while ( rc == RETRY ); loop
> as previously discussed. However, as stated before, at a much later
> point I still get rare BSODs, or the guest becomes unresponsive. Digging
> a bit more, I've found that what seems to be happening is that emulating
> some instructions returns EXCEPTION at some point (for instructions
> using the new cmpxchg handler, this can happen if paging_gva_to_gfn()
> returns INVALID_GFN and neither PFEC_page_paged nor PFEC_page_shared are
> set).

Apparently in line with your later reply - if you got INVALID_GFN
from paging_gva_to_gfn(), the earlier read of the same memory
should have failed already (unless something plays with the P2M
behind your back).

Jan

> Since this causes hvm_emulate_one_vm_event() to
> hvm_inject_event(), I'm not sure that it does not mess with the
> interrupt part of hvm_do_resume(). This will need more testing to figure
> out exactly what's going wrong.
> 
> 
> Thanks,
> Razvan



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2017-04-10 10:18 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-24 13:24 [PATCH RFC] x86/emulate: implement hvmemul_cmpxchg() with an actual CMPXCHG Razvan Cojocaru
2017-03-28  9:14 ` Razvan Cojocaru
2017-03-28 10:03   ` Jan Beulich
2017-03-28 10:25     ` Andrew Cooper
2017-03-28 10:44       ` Jan Beulich
2017-03-29  5:59       ` Jan Beulich
2017-03-29  8:14         ` Razvan Cojocaru
2017-03-28 10:27     ` Razvan Cojocaru
2017-03-28 10:47       ` Jan Beulich
2017-03-28 10:50         ` Razvan Cojocaru
2017-03-28 11:32           ` Jan Beulich
2017-03-29 13:55           ` Jan Beulich
2017-03-29 14:00             ` Razvan Cojocaru
2017-03-29 15:04               ` Razvan Cojocaru
2017-03-29 15:49                 ` Razvan Cojocaru
2017-03-30 12:05                   ` Jan Beulich
2017-03-30 12:25                     ` Razvan Cojocaru
2017-03-30 12:56                     ` Razvan Cojocaru
2017-03-30 14:08                       ` Razvan Cojocaru
2017-03-30 14:21                         ` Jan Beulich
2017-03-30 15:05                           ` Razvan Cojocaru
2017-03-30 15:47                             ` Jan Beulich
2017-03-31  6:17                               ` Razvan Cojocaru
2017-03-31  7:34                                 ` Jan Beulich
2017-03-31  9:56                                   ` Razvan Cojocaru
2017-03-31 14:46                                     ` Jan Beulich
2017-03-31 15:01                                       ` Razvan Cojocaru
2017-03-31 15:04                                         ` Jan Beulich
2017-04-01 16:56                                           ` Razvan Cojocaru
2017-04-03 10:23                                             ` Jan Beulich
2017-04-03 18:20                                             ` Razvan Cojocaru
2017-04-03 18:36                                               ` Razvan Cojocaru
2017-04-04  9:07                                                 ` Jan Beulich
2017-04-04 12:01                                                   ` Razvan Cojocaru
2017-04-08 22:15                                                 ` Razvan Cojocaru
2017-04-09 11:03                                                   ` Razvan Cojocaru
2017-04-10 10:18                                                   ` Jan Beulich
2017-03-29 14:12             ` Razvan Cojocaru

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.