[for-4.7] x86/emulate: synchronize LOCKed instruction emulation

* [for-4.7] x86/emulate: synchronize LOCKed instruction emulation
@ 2016-04-13 12:26 Razvan Cojocaru
  2016-04-14  4:35 ` Jan Beulich
                   ` (2 more replies)
  0 siblings, 3 replies; 47+ messages in thread
From: Razvan Cojocaru @ 2016-04-13 12:26 UTC (permalink / raw)
  To: xen-devel
  Cc: keir, Razvan Cojocaru, george.dunlap, andrew.cooper3,
	ian.jackson, tim, paul.durrant, jbeulich, wei.liu2

LOCK-prefixed instructions are currenly allowed to run in parallel
in x86_emulate(), which can lead the guest into an undefined state.
This patch fixes the issue.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
---
 tools/tests/x86_emulator/test_x86_emulator.c | 12 ++++++++++++
 xen/arch/x86/hvm/emulate.c                   | 26 ++++++++++++++++++++++++++
 xen/arch/x86/mm.c                            |  3 +++
 xen/arch/x86/mm/shadow/common.c              |  4 ++++
 xen/arch/x86/x86_emulate/x86_emulate.c       | 23 ++++++++++++++++++++++-
 xen/arch/x86/x86_emulate/x86_emulate.h       |  8 ++++++++
 xen/common/domain.c                          |  2 ++
 xen/include/asm-x86/domain.h                 |  4 ++++
 xen/include/asm-x86/hvm/emulate.h            |  3 +++
 9 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/tools/tests/x86_emulator/test_x86_emulator.c b/tools/tests/x86_emulator/test_x86_emulator.c
index 86e298f..22e963b 100644
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -9,6 +9,8 @@
 
 #define __packed __attribute__((packed))
 
+typedef bool bool_t;
+
 #include "x86_emulate/x86_emulate.h"
 #include "blowfish.h"
 
@@ -160,6 +162,14 @@ int get_fpu(
     return X86EMUL_OKAY;
 }
 
+static void smp_lock(bool_t locked)
+{
+}
+
+static void smp_unlock(bool_t locked)
+{
+}
+
 static struct x86_emulate_ops emulops = {
     .read       = read,
     .insn_fetch = fetch,
@@ -167,6 +177,8 @@ static struct x86_emulate_ops emulops = {
     .cmpxchg    = cmpxchg,
     .cpuid      = cpuid,
     .get_fpu    = get_fpu,
+    .smp_lock   = smp_lock,
+    .smp_unlock = smp_unlock,
 };
 
 int main(int argc, char **argv)
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index cc0b841..02096d5 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -25,6 +25,8 @@
 #include <asm/hvm/svm/svm.h>
 #include <asm/vm_event.h>
 
+DEFINE_PERCPU_RWLOCK_GLOBAL(emulate_locked_rwlock);
+
 static void hvmtrace_io_assist(const ioreq_t *p)
 {
     unsigned int size, event;
@@ -1616,6 +1618,26 @@ static int hvmemul_vmfunc(
     return rc;
 }
 
+void emulate_smp_lock(bool_t locked)
+{
+    struct domain *d = current->domain;
+
+    if ( locked )
+        percpu_write_lock(emulate_locked_rwlock, &d->arch.emulate_lock);
+    else
+        percpu_read_lock(emulate_locked_rwlock, &d->arch.emulate_lock);
+}
+
+void emulate_smp_unlock(bool_t locked)
+{
+    struct domain *d = current->domain;
+
+    if ( locked )
+        percpu_write_unlock(emulate_locked_rwlock, &d->arch.emulate_lock);
+    else
+        percpu_read_unlock(emulate_locked_rwlock, &d->arch.emulate_lock);
+}
+
 static const struct x86_emulate_ops hvm_emulate_ops = {
     .read          = hvmemul_read,
     .insn_fetch    = hvmemul_insn_fetch,
@@ -1641,6 +1663,8 @@ static const struct x86_emulate_ops hvm_emulate_ops = {
     .put_fpu       = hvmemul_put_fpu,
     .invlpg        = hvmemul_invlpg,
     .vmfunc        = hvmemul_vmfunc,
+    .smp_lock      = emulate_smp_lock,
+    .smp_unlock    = emulate_smp_unlock,
 };
 
 static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
@@ -1668,6 +1692,8 @@ static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
     .put_fpu       = hvmemul_put_fpu,
     .invlpg        = hvmemul_invlpg,
     .vmfunc        = hvmemul_vmfunc,
+    .smp_lock      = emulate_smp_lock,
+    .smp_unlock    = emulate_smp_unlock,
 };
 
 static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index bca7532..52a3c5d 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -112,6 +112,7 @@
 #include <asm/ldt.h>
 #include <asm/x86_emulate.h>
 #include <asm/e820.h>
+#include <asm/hvm/emulate.h>
 #include <asm/hypercall.h>
 #include <asm/shared.h>
 #include <asm/mem_sharing.h>
@@ -5319,6 +5320,8 @@ static const struct x86_emulate_ops ptwr_emulate_ops = {
     .insn_fetch = ptwr_emulated_read,
     .write      = ptwr_emulated_write,
     .cmpxchg    = ptwr_emulated_cmpxchg,
+    .smp_lock   = emulate_smp_lock,
+    .smp_unlock = emulate_smp_unlock,
 };
 
 /* Write page fault handler: check if guest is trying to modify a PTE. */
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index ec87fb4..6d18430 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -283,6 +283,8 @@ static const struct x86_emulate_ops hvm_shadow_emulator_ops = {
     .insn_fetch = hvm_emulate_insn_fetch,
     .write      = hvm_emulate_write,
     .cmpxchg    = hvm_emulate_cmpxchg,
+    .smp_lock   = emulate_smp_lock,
+    .smp_unlock = emulate_smp_unlock,
 };
 
 static int
@@ -351,6 +353,8 @@ static const struct x86_emulate_ops pv_shadow_emulator_ops = {
     .insn_fetch = pv_emulate_read,
     .write      = pv_emulate_write,
     .cmpxchg    = pv_emulate_cmpxchg,
+    .smp_lock   = emulate_smp_lock,
+    .smp_unlock = emulate_smp_unlock,
 };
 
 const struct x86_emulate_ops *shadow_init_emulation(
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 10a2959..aab934f 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1520,6 +1520,8 @@ x86_emulate(
     struct operand ea = { .type = OP_MEM, .reg = REG_POISON };
     ea.mem.seg = x86_seg_ds; /* gcc may reject anon union initializer */
 
+    ASSERT(ops->smp_lock && ops->smp_unlock);
+
     ctxt->retire.byte = 0;
 
     op_bytes = def_op_bytes = ad_bytes = def_ad_bytes = ctxt->addr_size/8;
@@ -1589,6 +1591,8 @@ x86_emulate(
     }
  done_prefixes:
 
+    ops->smp_lock(lock_prefix);
+
     if ( rex_prefix & REX_W )
         op_bytes = 8;
 
@@ -2052,7 +2056,10 @@ x86_emulate(
         generate_exception_if(mode_64bit() && !twobyte, EXC_UD, -1);
         fail_if(ops->read_segment == NULL);
         if ( (rc = ops->read_segment(src.val, &reg, ctxt)) != 0 )
+        {
+            ops->smp_unlock(lock_prefix);
             return rc;
+        }
         /* 64-bit mode: PUSH defaults to a 64-bit operand. */
         if ( mode_64bit() && (op_bytes == 4) )
             op_bytes = 8;
@@ -2074,7 +2081,10 @@ x86_emulate(
                               &dst.val, op_bytes, ctxt, ops)) != 0 )
             goto done;
         if ( (rc = load_seg(src.val, dst.val, 0, NULL, ctxt, ops)) != 0 )
+        {
+            ops->smp_unlock(lock_prefix);
             return rc;
+        }
         break;
 
     case 0x0e: /* push %%cs */
@@ -2380,7 +2390,12 @@ x86_emulate(
         }
         /* Write back the memory destination with implicit LOCK prefix. */
         dst.val = src.val;
-        lock_prefix = 1;
+        if ( !lock_prefix )
+        {
+            ops->smp_unlock(lock_prefix);
+            lock_prefix = 1;
+            ops->smp_lock(lock_prefix);
+        }
         break;
 
     case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
@@ -3859,6 +3874,9 @@ x86_emulate(
  done:
     _put_fpu();
     put_stub(stub);
+
+    ops->smp_unlock(lock_prefix);
+
     return rc;
 
  twobyte_insn:
@@ -4767,5 +4785,8 @@ x86_emulate(
  cannot_emulate:
     _put_fpu();
     put_stub(stub);
+
+    ops->smp_unlock(lock_prefix);
+
     return X86EMUL_UNHANDLEABLE;
 }
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
index 3a1bb46..e515840 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -400,6 +400,14 @@ struct x86_emulate_ops
     /* vmfunc: Emulate VMFUNC via given set of EAX ECX inputs */
     int (*vmfunc)(
         struct x86_emulate_ctxt *ctxt);
+
+    /* smp_lock: Take a write lock if locked, read lock otherwise. */
+    void (*smp_lock)(
+        bool_t locked);
+
+    /* smp_unlock: Write unlock if locked, read unlock otherwise. */
+    void (*smp_unlock)(
+        bool_t locked);
 };
 
 struct cpu_user_regs;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 45273d4..0f98256 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -272,6 +272,8 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
 
     TRACE_1D(TRC_DOM0_DOM_ADD, d->domain_id);
 
+    percpu_rwlock_resource_init(&d->arch.emulate_lock, emulate_locked_rwlock);
+
     lock_profile_register_struct(LOCKPROF_TYPE_PERDOM, d, domid, "Domain");
 
     if ( (err = xsm_alloc_security_domain(d)) != 0 )
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index d393ed2..04312ae 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -271,6 +271,8 @@ struct monitor_write_data {
     uint64_t cr4;
 };
 
+DECLARE_PERCPU_RWLOCK_GLOBAL(emulate_locked_rwlock);
+
 struct arch_domain
 {
     struct page_info *perdomain_l3_pg;
@@ -409,6 +411,8 @@ struct arch_domain
 
     /* Emulated devices enabled bitmap. */
     uint32_t emulation_flags;
+
+    percpu_rwlock_t emulate_lock;
 } __cacheline_aligned;
 
 #define has_vlapic(d)      (!!((d)->arch.emulation_flags & XEN_X86_EMU_LAPIC))
diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h
index 142d1b6..863f01d 100644
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -67,6 +67,9 @@ int hvmemul_do_pio_buffer(uint16_t port,
 void hvm_dump_emulation_state(const char *prefix,
                               struct hvm_emulate_ctxt *hvmemul_ctxt);
 
+void emulate_smp_lock(bool_t locked);
+void emulate_smp_unlock(bool_t locked);
+
 #endif /* __ASM_X86_HVM_EMULATE_H__ */
 
 /*
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 47+ messages in thread