All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
@ 2017-03-15 16:04 Razvan Cojocaru
  2017-03-15 16:30 ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-15 16:04 UTC (permalink / raw)
  To: xen-devel
  Cc: wei.liu2, Razvan Cojocaru, george.dunlap, andrew.cooper3,
	ian.jackson, tim, paul.durrant, jbeulich

LOCK-prefixed instructions are currenly allowed to run in parallel
in x86_emulate(), which can lead the guest into an undefined state.
This patch fixes the issue.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

---
Changes since V1:
 - Added Andrew Cooper's credit, as he's kept the patch current
   througout non-trivial code changes since the initial patch.
 - Significantly more patch testing (with XenServer).
 - Restricted lock scope.
 - Logic fixes.
---
 tools/tests/x86_emulator/test_x86_emulator.c | 10 ++++++++++
 xen/arch/x86/domain.c                        |  2 ++
 xen/arch/x86/hvm/emulate.c                   | 26 ++++++++++++++++++++++++++
 xen/arch/x86/mm.c                            |  6 ++++++
 xen/arch/x86/mm/shadow/common.c              |  2 ++
 xen/arch/x86/traps.c                         |  2 ++
 xen/arch/x86/x86_emulate/x86_emulate.c       | 14 ++++++++++++--
 xen/arch/x86/x86_emulate/x86_emulate.h       |  8 ++++++++
 xen/include/asm-x86/domain.h                 |  4 ++++
 xen/include/asm-x86/hvm/emulate.h            |  3 +++
 10 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/tools/tests/x86_emulator/test_x86_emulator.c b/tools/tests/x86_emulator/test_x86_emulator.c
index 04332bb..86b79a1 100644
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -283,6 +283,14 @@ static int read_msr(
     return X86EMUL_UNHANDLEABLE;
 }
 
+static void smp_lock(bool locked)
+{
+}
+
+static void smp_unlock(bool locked)
+{
+}
+
 static struct x86_emulate_ops emulops = {
     .read       = read,
     .insn_fetch = fetch,
@@ -293,6 +301,8 @@ static struct x86_emulate_ops emulops = {
     .read_cr    = emul_test_read_cr,
     .read_msr   = read_msr,
     .get_fpu    = emul_test_get_fpu,
+    .smp_lock   = smp_lock,
+    .smp_unlock = smp_unlock,
 };
 
 int main(int argc, char **argv)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 479aee6..55010f4 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -529,6 +529,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags,
     if ( config == NULL && !is_idle_domain(d) )
         return -EINVAL;
 
+    percpu_rwlock_resource_init(&d->arch.emulate_lock, emulate_locked_rwlock);
+
     d->arch.s3_integrity = !!(domcr_flags & DOMCRF_s3_integrity);
 
     INIT_LIST_HEAD(&d->arch.pdev_list);
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index f36d7c9..d5bfbf1 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -24,6 +24,8 @@
 #include <asm/hvm/svm/svm.h>
 #include <asm/vm_event.h>
 
+DEFINE_PERCPU_RWLOCK_GLOBAL(emulate_locked_rwlock);
+
 static void hvmtrace_io_assist(const ioreq_t *p)
 {
     unsigned int size, event;
@@ -1682,6 +1684,26 @@ static int hvmemul_vmfunc(
     return rc;
 }
 
+void emulate_smp_lock(bool locked)
+{
+    struct domain *d = current->domain;
+
+    if ( locked )
+        percpu_write_lock(emulate_locked_rwlock, &d->arch.emulate_lock);
+    else
+        percpu_read_lock(emulate_locked_rwlock, &d->arch.emulate_lock);
+}
+
+void emulate_smp_unlock(bool locked)
+{
+    struct domain *d = current->domain;
+
+    if ( locked )
+        percpu_write_unlock(emulate_locked_rwlock, &d->arch.emulate_lock);
+    else
+        percpu_read_unlock(emulate_locked_rwlock, &d->arch.emulate_lock);
+}
+
 static const struct x86_emulate_ops hvm_emulate_ops = {
     .read          = hvmemul_read,
     .insn_fetch    = hvmemul_insn_fetch,
@@ -1706,6 +1728,8 @@ static const struct x86_emulate_ops hvm_emulate_ops = {
     .put_fpu       = hvmemul_put_fpu,
     .invlpg        = hvmemul_invlpg,
     .vmfunc        = hvmemul_vmfunc,
+    .smp_lock      = emulate_smp_lock,
+    .smp_unlock    = emulate_smp_unlock,
 };
 
 static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
@@ -1731,6 +1755,8 @@ static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
     .put_fpu       = hvmemul_put_fpu,
     .invlpg        = hvmemul_invlpg,
     .vmfunc        = hvmemul_vmfunc,
+    .smp_lock      = emulate_smp_lock,
+    .smp_unlock    = emulate_smp_unlock,
 };
 
 static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 7bc951d..2fb3325 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5369,6 +5369,8 @@ static const struct x86_emulate_ops ptwr_emulate_ops = {
     .cmpxchg    = ptwr_emulated_cmpxchg,
     .validate   = pv_emul_is_mem_write,
     .cpuid      = pv_emul_cpuid,
+    .smp_lock   = emulate_smp_lock,
+    .smp_unlock = emulate_smp_unlock,
 };
 
 /* Write page fault handler: check if guest is trying to modify a PTE. */
@@ -5485,6 +5487,8 @@ static const struct x86_emulate_ops mmio_ro_emulate_ops = {
     .write      = mmio_ro_emulated_write,
     .validate   = pv_emul_is_mem_write,
     .cpuid      = pv_emul_cpuid,
+    .smp_lock   = emulate_smp_lock,
+    .smp_unlock = emulate_smp_unlock,
 };
 
 int mmcfg_intercept_write(
@@ -5524,6 +5528,8 @@ static const struct x86_emulate_ops mmcfg_intercept_ops = {
     .write      = mmcfg_intercept_write,
     .validate   = pv_emul_is_mem_write,
     .cpuid      = pv_emul_cpuid,
+    .smp_lock   = emulate_smp_lock,
+    .smp_unlock = emulate_smp_unlock,
 };
 
 /* Check if guest is trying to modify a r/o MMIO page. */
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index d078d78..3a2f02e 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -310,6 +310,8 @@ static const struct x86_emulate_ops hvm_shadow_emulator_ops = {
     .write      = hvm_emulate_write,
     .cmpxchg    = hvm_emulate_cmpxchg,
     .cpuid      = hvmemul_cpuid,
+    .smp_lock   = emulate_smp_lock,
+    .smp_unlock = emulate_smp_unlock,
 };
 
 const struct x86_emulate_ops *shadow_init_emulation(
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 08b0070..8bdc8c8 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2957,6 +2957,8 @@ static const struct x86_emulate_ops priv_op_ops = {
     .write_msr           = priv_op_write_msr,
     .cpuid               = pv_emul_cpuid,
     .wbinvd              = priv_op_wbinvd,
+    .smp_lock            = emulate_smp_lock,
+    .smp_unlock          = emulate_smp_unlock,
 };
 
 static int emulate_privileged_op(struct cpu_user_regs *regs)
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 4872f19..bec4af7 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -3037,7 +3037,7 @@ x86_emulate(
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
 
-    ASSERT(ops->read);
+    ASSERT(ops->read && ops->smp_lock && ops->smp_unlock);
 
     rc = x86_decode(&state, ctxt, ops);
     if ( rc != X86EMUL_OKAY )
@@ -3065,6 +3065,8 @@ x86_emulate(
     d = state.desc;
 #define state (&state)
 
+    ops->smp_lock(lock_prefix);
+
     generate_exception_if(state->not_64bit && mode_64bit(), EXC_UD);
 
     if ( ea.type == OP_REG )
@@ -3593,6 +3595,12 @@ x86_emulate(
         break;
 
     case 0x86 ... 0x87: xchg: /* xchg */
+        if ( !lock_prefix )
+        {
+            ops->smp_unlock(lock_prefix);
+            lock_prefix = 1;
+            ops->smp_lock(lock_prefix);
+        }
         /* Write back the register source. */
         switch ( dst.bytes )
         {
@@ -3603,7 +3611,6 @@ x86_emulate(
         }
         /* Write back the memory destination with implicit LOCK prefix. */
         dst.val = src.val;
-        lock_prefix = 1;
         break;
 
     case 0xc6: /* Grp11: mov / xabort */
@@ -7925,8 +7932,11 @@ x86_emulate(
     ctxt->regs->eflags &= ~X86_EFLAGS_RF;
 
  done:
+    ops->smp_unlock(lock_prefix);
+
     _put_fpu();
     put_stub(stub);
+
     return rc;
 #undef state
 }
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
index 6e98453..3f8bb38 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -448,6 +448,14 @@ struct x86_emulate_ops
     /* vmfunc: Emulate VMFUNC via given set of EAX ECX inputs */
     int (*vmfunc)(
         struct x86_emulate_ctxt *ctxt);
+
+    /* smp_lock: Take a write lock if locked, read lock otherwise. */
+    void (*smp_lock)(
+        bool locked);
+
+    /* smp_unlock: Write unlock if locked, read unlock otherwise. */
+    void (*smp_unlock)(
+        bool locked);
 };
 
 struct cpu_user_regs;
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index ff5267f..2afccab 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -271,6 +271,8 @@ struct monitor_write_data {
     uint64_t cr4;
 };
 
+DECLARE_PERCPU_RWLOCK_GLOBAL(emulate_locked_rwlock);
+
 struct arch_domain
 {
     struct page_info *perdomain_l3_pg;
@@ -413,6 +415,8 @@ struct arch_domain
 
     /* Emulated devices enabled bitmap. */
     uint32_t emulation_flags;
+
+    percpu_rwlock_t emulate_lock;
 } __cacheline_aligned;
 
 #define has_vlapic(d)      (!!((d)->arch.emulation_flags & XEN_X86_EMU_LAPIC))
diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h
index 88d6b70..b29a47e 100644
--- a/xen/include/asm-x86/hvm/emulate.h
+++ b/xen/include/asm-x86/hvm/emulate.h
@@ -93,6 +93,9 @@ int hvmemul_do_pio_buffer(uint16_t port,
 void hvm_dump_emulation_state(const char *prefix,
                               struct hvm_emulate_ctxt *hvmemul_ctxt);
 
+void emulate_smp_lock(bool locked);
+void emulate_smp_unlock(bool locked);
+
 #endif /* __ASM_X86_HVM_EMULATE_H__ */
 
 /*
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-15 16:04 [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation Razvan Cojocaru
@ 2017-03-15 16:30 ` Jan Beulich
  2017-03-15 16:46   ` Razvan Cojocaru
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2017-03-15 16:30 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
> ---
> Changes since V1:
>  - Added Andrew Cooper's credit, as he's kept the patch current
>    througout non-trivial code changes since the initial patch.
>  - Significantly more patch testing (with XenServer).
>  - Restricted lock scope.

Not by much, as it seems. In particular you continue to take the
lock even for instructions not accessing memory at all.

Also, by "reworked" I did assume you mean converted to at least the
cmpxchg based model.

> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -283,6 +283,14 @@ static int read_msr(
>      return X86EMUL_UNHANDLEABLE;
>  }
>  
> +static void smp_lock(bool locked)
> +{
> +}
> +
> +static void smp_unlock(bool locked)
> +{
> +}

I don't think the hooks should be a requirement, and hence these
shouldn't be needed.

> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -529,6 +529,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags,
>      if ( config == NULL && !is_idle_domain(d) )
>          return -EINVAL;
>  
> +    percpu_rwlock_resource_init(&d->arch.emulate_lock, emulate_locked_rwlock);

This should move into the same file as where the lock and the hook
functions live, so that the variable can be static. I'm not sure ...

> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -24,6 +24,8 @@
>  #include <asm/hvm/svm/svm.h>
>  #include <asm/vm_event.h>
>  
> +DEFINE_PERCPU_RWLOCK_GLOBAL(emulate_locked_rwlock);

... this is the right file, though, considering the wide (including PV)
use of it.

> @@ -1731,6 +1755,8 @@ static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
>      .put_fpu       = hvmemul_put_fpu,
>      .invlpg        = hvmemul_invlpg,
>      .vmfunc        = hvmemul_vmfunc,
> +    .smp_lock      = emulate_smp_lock,
> +    .smp_unlock    = emulate_smp_unlock,
>  };

No need for the hooks here.

> @@ -5485,6 +5487,8 @@ static const struct x86_emulate_ops mmio_ro_emulate_ops = {
>      .write      = mmio_ro_emulated_write,
>      .validate   = pv_emul_is_mem_write,
>      .cpuid      = pv_emul_cpuid,
> +    .smp_lock   = emulate_smp_lock,
> +    .smp_unlock = emulate_smp_unlock,
>  };

Nor here.

> @@ -5524,6 +5528,8 @@ static const struct x86_emulate_ops mmcfg_intercept_ops = {
>      .write      = mmcfg_intercept_write,
>      .validate   = pv_emul_is_mem_write,
>      .cpuid      = pv_emul_cpuid,
> +    .smp_lock   = emulate_smp_lock,
> +    .smp_unlock = emulate_smp_unlock,
>  };

Not sure about this one, but generally I'd expect no LOCKed accesses
to MMCFG space.

> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -2957,6 +2957,8 @@ static const struct x86_emulate_ops priv_op_ops = {
>      .write_msr           = priv_op_write_msr,
>      .cpuid               = pv_emul_cpuid,
>      .wbinvd              = priv_op_wbinvd,
> +    .smp_lock            = emulate_smp_lock,
> +    .smp_unlock          = emulate_smp_unlock,
>  };

No need for the hooks again.

> @@ -3065,6 +3065,8 @@ x86_emulate(
>      d = state.desc;
>  #define state (&state)
>  
> +    ops->smp_lock(lock_prefix);

There's a "goto complete_insn" upwards from here, which therefore
bypasses the acquire, but goes through the release path. Also this is
still too early to take the lock.

> @@ -7925,8 +7932,11 @@ x86_emulate(
>      ctxt->regs->eflags &= ~X86_EFLAGS_RF;
>  
>   done:
> +    ops->smp_unlock(lock_prefix);

And this, imo, is too late (except for covering error exits coming
here). I don't think you can avoid having a local tracking variable.

> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -448,6 +448,14 @@ struct x86_emulate_ops
>      /* vmfunc: Emulate VMFUNC via given set of EAX ECX inputs */
>      int (*vmfunc)(
>          struct x86_emulate_ctxt *ctxt);
> +
> +    /* smp_lock: Take a write lock if locked, read lock otherwise. */
> +    void (*smp_lock)(
> +        bool locked);
> +
> +    /* smp_unlock: Write unlock if locked, read unlock otherwise. */
> +    void (*smp_unlock)(
> +        bool locked);
>  };

All hooks should take a ctxt pointer.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-15 16:30 ` Jan Beulich
@ 2017-03-15 16:46   ` Razvan Cojocaru
  2017-03-15 16:57     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-15 16:46 UTC (permalink / raw)
  To: Jan Beulich
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>> ---
>> Changes since V1:
>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>    througout non-trivial code changes since the initial patch.
>>  - Significantly more patch testing (with XenServer).
>>  - Restricted lock scope.
> 
> Not by much, as it seems. In particular you continue to take the
> lock even for instructions not accessing memory at all.

I'll take a closer look.

> Also, by "reworked" I did assume you mean converted to at least the
> cmpxchg based model.

I haven't been able to follow the latest emulator changes closely, could
you please clarify what you mean by "the cmpxchg model"? Thanks.

>> --- a/tools/tests/x86_emulator/test_x86_emulator.c
>> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
>> @@ -283,6 +283,14 @@ static int read_msr(
>>      return X86EMUL_UNHANDLEABLE;
>>  }
>>  
>> +static void smp_lock(bool locked)
>> +{
>> +}
>> +
>> +static void smp_unlock(bool locked)
>> +{
>> +}
> 
> I don't think the hooks should be a requirement, and hence these
> shouldn't be needed.

I'll make them optional.

>> --- a/xen/arch/x86/domain.c
>> +++ b/xen/arch/x86/domain.c
>> @@ -529,6 +529,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags,
>>      if ( config == NULL && !is_idle_domain(d) )
>>          return -EINVAL;
>>  
>> +    percpu_rwlock_resource_init(&d->arch.emulate_lock, emulate_locked_rwlock);
> 
> This should move into the same file as where the lock and the hook
> functions live, so that the variable can be static. I'm not sure ...
> 
>> --- a/xen/arch/x86/hvm/emulate.c
>> +++ b/xen/arch/x86/hvm/emulate.c
>> @@ -24,6 +24,8 @@
>>  #include <asm/hvm/svm/svm.h>
>>  #include <asm/vm_event.h>
>>  
>> +DEFINE_PERCPU_RWLOCK_GLOBAL(emulate_locked_rwlock);
> 
> ... this is the right file, though, considering the wide (including PV)
> use of it.

I'll hunt for a better place for it.

>> @@ -3065,6 +3065,8 @@ x86_emulate(
>>      d = state.desc;
>>  #define state (&state)
>>  
>> +    ops->smp_lock(lock_prefix);
> 
> There's a "goto complete_insn" upwards from here, which therefore
> bypasses the acquire, but goes through the release path. Also this is
> still too early to take the lock.

True, it appears that there have been changes in staging since the last
test. I'll need to follow the locking patch carefully.

>> @@ -7925,8 +7932,11 @@ x86_emulate(
>>      ctxt->regs->eflags &= ~X86_EFLAGS_RF;
>>  
>>   done:
>> +    ops->smp_unlock(lock_prefix);
> 
> And this, imo, is too late (except for covering error exits coming
> here). I don't think you can avoid having a local tracking variable.

Fair enough. Will use one.

>> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
>> @@ -448,6 +448,14 @@ struct x86_emulate_ops
>>      /* vmfunc: Emulate VMFUNC via given set of EAX ECX inputs */
>>      int (*vmfunc)(
>>          struct x86_emulate_ctxt *ctxt);
>> +
>> +    /* smp_lock: Take a write lock if locked, read lock otherwise. */
>> +    void (*smp_lock)(
>> +        bool locked);
>> +
>> +    /* smp_unlock: Write unlock if locked, read unlock otherwise. */
>> +    void (*smp_unlock)(
>> +        bool locked);
>>  };
> 
> All hooks should take a ctxt pointer.

I'll try to adapt them.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-15 16:46   ` Razvan Cojocaru
@ 2017-03-15 16:57     ` Jan Beulich
  2017-03-21 15:38       ` Razvan Cojocaru
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2017-03-15 16:57 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>> ---
>>> Changes since V1:
>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>    througout non-trivial code changes since the initial patch.
>>>  - Significantly more patch testing (with XenServer).
>>>  - Restricted lock scope.
>> 
>> Not by much, as it seems. In particular you continue to take the
>> lock even for instructions not accessing memory at all.
> 
> I'll take a closer look.
> 
>> Also, by "reworked" I did assume you mean converted to at least the
>> cmpxchg based model.
> 
> I haven't been able to follow the latest emulator changes closely, could
> you please clarify what you mean by "the cmpxchg model"? Thanks.

This is unrelated to any recent changes. The idea is to make the
->cmpxchg() hook actually behave like what its name says. It's
being used for LOCKed insn writeback already, and it could
therefore simply force a retry of the full instruction if the compare
part of it fails. It may need to be given another parameter, to
allow the hook function to tell LOCKed from "normal" uses.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-15 16:57     ` Jan Beulich
@ 2017-03-21 15:38       ` Razvan Cojocaru
  2017-03-21 15:39         ` Razvan Cojocaru
  2017-03-21 16:29         ` Jan Beulich
  0 siblings, 2 replies; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-21 15:38 UTC (permalink / raw)
  To: Jan Beulich
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

On 03/15/2017 06:57 PM, Jan Beulich wrote:
>>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
>> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>>> ---
>>>> Changes since V1:
>>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>>    througout non-trivial code changes since the initial patch.
>>>>  - Significantly more patch testing (with XenServer).
>>>>  - Restricted lock scope.
>>>
>>> Not by much, as it seems. In particular you continue to take the
>>> lock even for instructions not accessing memory at all.
>>
>> I'll take a closer look.
>>
>>> Also, by "reworked" I did assume you mean converted to at least the
>>> cmpxchg based model.
>>
>> I haven't been able to follow the latest emulator changes closely, could
>> you please clarify what you mean by "the cmpxchg model"? Thanks.
> 
> This is unrelated to any recent changes. The idea is to make the
> ->cmpxchg() hook actually behave like what its name says. It's
> being used for LOCKed insn writeback already, and it could
> therefore simply force a retry of the full instruction if the compare
> part of it fails. It may need to be given another parameter, to
> allow the hook function to tell LOCKed from "normal" uses.

I assume this is what you have in mind?


static int hvmemul_cmpxchg(
    enum x86_segment seg,
    unsigned long offset,
    void *p_old,
    void *p_new,
    unsigned int bytes,
    struct x86_emulate_ctxt *ctxt)
{
    /* Fix this in case the guest is really relying on r-m-w atomicity. */
    uint64_t read;
    int rc;

    rc = hvmemul_read(seg, offset, &read, bytes, ctxt);

    if ( rc != X86EMUL_OKAY )
        return rc;

    switch( bytes )
    {
    case 1:
        if ( *(uint8_t *)read != *(uint8_t *)p_old )
        {
            *(uint8_t *)p_old = *(uint8_t *)&read;
            return X86EMUL_RETRY;
        }
        break;
    case 2:
        if ( *(uint16_t *)read != *(uint16_t *)p_old )
        {
            *(uint16_t *)p_old = *(uint16_t *)&read;
            return X86EMUL_RETRY;
        }
        break;
    case 4:
        if ( *(uint32_t *)&read != *(uint32_t *)p_old )
        {
            *(uint32_t *)p_old = *(uint32_t *)&read;
            return X86EMUL_RETRY;
        }
        break;
    case 8:
        if ( read != *(uint64_t *)p_old )
        {
            *(uint64_t *)p_old = read;
            return X86EMUL_RETRY;
        }
        break;
    }

    return hvmemul_write(seg, offset, p_new, bytes, ctxt);
}

I can rip this off and send this one small patch independently of the
larger LOCK one if you think it's a good idea.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-21 15:38       ` Razvan Cojocaru
@ 2017-03-21 15:39         ` Razvan Cojocaru
  2017-03-21 16:29         ` Jan Beulich
  1 sibling, 0 replies; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-21 15:39 UTC (permalink / raw)
  To: Jan Beulich
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

On 03/21/2017 05:38 PM, Razvan Cojocaru wrote:
> On 03/15/2017 06:57 PM, Jan Beulich wrote:
>>>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
>>> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>>>> ---
>>>>> Changes since V1:
>>>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>>>    througout non-trivial code changes since the initial patch.
>>>>>  - Significantly more patch testing (with XenServer).
>>>>>  - Restricted lock scope.
>>>>
>>>> Not by much, as it seems. In particular you continue to take the
>>>> lock even for instructions not accessing memory at all.
>>>
>>> I'll take a closer look.
>>>
>>>> Also, by "reworked" I did assume you mean converted to at least the
>>>> cmpxchg based model.
>>>
>>> I haven't been able to follow the latest emulator changes closely, could
>>> you please clarify what you mean by "the cmpxchg model"? Thanks.
>>
>> This is unrelated to any recent changes. The idea is to make the
>> ->cmpxchg() hook actually behave like what its name says. It's
>> being used for LOCKed insn writeback already, and it could
>> therefore simply force a retry of the full instruction if the compare
>> part of it fails. It may need to be given another parameter, to
>> allow the hook function to tell LOCKed from "normal" uses.
> 
> I assume this is what you have in mind?
> 
> 
> static int hvmemul_cmpxchg(
>     enum x86_segment seg,
>     unsigned long offset,
>     void *p_old,
>     void *p_new,
>     unsigned int bytes,
>     struct x86_emulate_ctxt *ctxt)
> {
>     /* Fix this in case the guest is really relying on r-m-w atomicity. */
>     uint64_t read;
>     int rc;
> 
>     rc = hvmemul_read(seg, offset, &read, bytes, ctxt);
> 
>     if ( rc != X86EMUL_OKAY )
>         return rc;
> 
>     switch( bytes )
>     {
>     case 1:
>         if ( *(uint8_t *)read != *(uint8_t *)p_old )
>         {
>             *(uint8_t *)p_old = *(uint8_t *)&read;
>             return X86EMUL_RETRY;
>         }
>         break;
>     case 2:
>         if ( *(uint16_t *)read != *(uint16_t *)p_old )
>         {
>             *(uint16_t *)p_old = *(uint16_t *)&read;
>             return X86EMUL_RETRY;
>         }
>         break;

Sorry, forgot to add & to "read" for the two cases above.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-21 15:38       ` Razvan Cojocaru
  2017-03-21 15:39         ` Razvan Cojocaru
@ 2017-03-21 16:29         ` Jan Beulich
  2017-03-21 16:44           ` Razvan Cojocaru
  1 sibling, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2017-03-21 16:29 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

>>> On 21.03.17 at 16:38, <rcojocaru@bitdefender.com> wrote:
> On 03/15/2017 06:57 PM, Jan Beulich wrote:
>>>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
>>> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>>>> ---
>>>>> Changes since V1:
>>>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>>>    througout non-trivial code changes since the initial patch.
>>>>>  - Significantly more patch testing (with XenServer).
>>>>>  - Restricted lock scope.
>>>>
>>>> Not by much, as it seems. In particular you continue to take the
>>>> lock even for instructions not accessing memory at all.
>>>
>>> I'll take a closer look.
>>>
>>>> Also, by "reworked" I did assume you mean converted to at least the
>>>> cmpxchg based model.
>>>
>>> I haven't been able to follow the latest emulator changes closely, could
>>> you please clarify what you mean by "the cmpxchg model"? Thanks.
>> 
>> This is unrelated to any recent changes. The idea is to make the
>> ->cmpxchg() hook actually behave like what its name says. It's
>> being used for LOCKed insn writeback already, and it could
>> therefore simply force a retry of the full instruction if the compare
>> part of it fails. It may need to be given another parameter, to
>> allow the hook function to tell LOCKed from "normal" uses.
> 
> I assume this is what you have in mind?

Hmm, not exactly. I'd expect an actual CMPXCHG instruction to
be used, with a LOCK prefix when the original access had one.
There should certainly not be any tail call to hvmemul_write()
anymore.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-21 16:29         ` Jan Beulich
@ 2017-03-21 16:44           ` Razvan Cojocaru
  2017-03-21 17:02             ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-21 16:44 UTC (permalink / raw)
  To: Jan Beulich
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

On 03/21/2017 06:29 PM, Jan Beulich wrote:
>>>> On 21.03.17 at 16:38, <rcojocaru@bitdefender.com> wrote:
>> On 03/15/2017 06:57 PM, Jan Beulich wrote:
>>>>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
>>>> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>>>>> ---
>>>>>> Changes since V1:
>>>>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>>>>    througout non-trivial code changes since the initial patch.
>>>>>>  - Significantly more patch testing (with XenServer).
>>>>>>  - Restricted lock scope.
>>>>>
>>>>> Not by much, as it seems. In particular you continue to take the
>>>>> lock even for instructions not accessing memory at all.
>>>>
>>>> I'll take a closer look.
>>>>
>>>>> Also, by "reworked" I did assume you mean converted to at least the
>>>>> cmpxchg based model.
>>>>
>>>> I haven't been able to follow the latest emulator changes closely, could
>>>> you please clarify what you mean by "the cmpxchg model"? Thanks.
>>>
>>> This is unrelated to any recent changes. The idea is to make the
>>> ->cmpxchg() hook actually behave like what its name says. It's
>>> being used for LOCKed insn writeback already, and it could
>>> therefore simply force a retry of the full instruction if the compare
>>> part of it fails. It may need to be given another parameter, to
>>> allow the hook function to tell LOCKed from "normal" uses.
>>
>> I assume this is what you have in mind?
> 
> Hmm, not exactly. I'd expect an actual CMPXCHG instruction to
> be used, with a LOCK prefix when the original access had one.
> There should certainly not be any tail call to hvmemul_write()
> anymore.

There seems to be a misunderstanding here: if I understand this reply
correctly, you'd like hvmemul_cmpxchg() to become a stub that really
runs CMPXCHG - but if that comes with the smp_lock part as well, it
doesn't matter (except for the now-ignored p_old part, i.e. the comparison).

I've initially asked if I should bring the old XenServer-queued LOCK
patch back, and I've understood that it could serve a purpose, at least
as a temporary workaround until your, and Andrew's emulator changes,
make it unrequired. However, it appears that you've understood this to
mean the addition of the CMPXCHG stub (speaking of which, could you
please point out examples of implementing such stubs in the Xen code? I
have never done one of those before.)


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-21 16:44           ` Razvan Cojocaru
@ 2017-03-21 17:02             ` Jan Beulich
  2017-03-22 17:22               ` Razvan Cojocaru
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2017-03-21 17:02 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

>>> On 21.03.17 at 17:44, <rcojocaru@bitdefender.com> wrote:
> On 03/21/2017 06:29 PM, Jan Beulich wrote:
>>>>> On 21.03.17 at 16:38, <rcojocaru@bitdefender.com> wrote:
>>> On 03/15/2017 06:57 PM, Jan Beulich wrote:
>>>>>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
>>>>> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>>>>>> ---
>>>>>>> Changes since V1:
>>>>>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>>>>>    througout non-trivial code changes since the initial patch.
>>>>>>>  - Significantly more patch testing (with XenServer).
>>>>>>>  - Restricted lock scope.
>>>>>>
>>>>>> Not by much, as it seems. In particular you continue to take the
>>>>>> lock even for instructions not accessing memory at all.
>>>>>
>>>>> I'll take a closer look.
>>>>>
>>>>>> Also, by "reworked" I did assume you mean converted to at least the
>>>>>> cmpxchg based model.
>>>>>
>>>>> I haven't been able to follow the latest emulator changes closely, could
>>>>> you please clarify what you mean by "the cmpxchg model"? Thanks.
>>>>
>>>> This is unrelated to any recent changes. The idea is to make the
>>>> ->cmpxchg() hook actually behave like what its name says. It's
>>>> being used for LOCKed insn writeback already, and it could
>>>> therefore simply force a retry of the full instruction if the compare
>>>> part of it fails. It may need to be given another parameter, to
>>>> allow the hook function to tell LOCKed from "normal" uses.
>>>
>>> I assume this is what you have in mind?
>> 
>> Hmm, not exactly. I'd expect an actual CMPXCHG instruction to
>> be used, with a LOCK prefix when the original access had one.
>> There should certainly not be any tail call to hvmemul_write()
>> anymore.
> 
> There seems to be a misunderstanding here: if I understand this reply
> correctly, you'd like hvmemul_cmpxchg() to become a stub that really
> runs CMPXCHG - but if that comes with the smp_lock part as well, it
> doesn't matter (except for the now-ignored p_old part, i.e. the comparison).
> 
> I've initially asked if I should bring the old XenServer-queued LOCK
> patch back, and I've understood that it could serve a purpose, at least
> as a temporary workaround until your, and Andrew's emulator changes,
> make it unrequired.

Well, yes, as said earlier (still visible in context above) I had
originally assumed "reworked" would mean making the cmpxchg
hook actually match its name.

> However, it appears that you've understood this to
> mean the addition of the CMPXCHG stub (speaking of which, could you
> please point out examples of implementing such stubs in the Xen code? I
> have never done one of those before.)

There's no talk about stubs here. All I had hoped would have been
done _instead_ of the smp-lock approach was to use the CMPXCHG
instruction inside the hook function, at least for RAM case (I think
we could live with MMIO still being forwarded to the write handler).

So all this isn't to say that the smp-lock approach might not be
worthwhile to take on its own, provided the locked region gets
shrunk as much as possible without losing correctness. The
CMPXCHG model would just not have this locking granularity
problem in the first place.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-21 17:02             ` Jan Beulich
@ 2017-03-22 17:22               ` Razvan Cojocaru
  2017-03-23  8:19                 ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-22 17:22 UTC (permalink / raw)
  To: Jan Beulich
  Cc: wei.liu2, george.dunlap, ian.jackson, tim, xen-devel,
	paul.durrant, andrew.cooper3

On 03/21/2017 07:02 PM, Jan Beulich wrote:
>>>> On 21.03.17 at 17:44, <rcojocaru@bitdefender.com> wrote:
>> On 03/21/2017 06:29 PM, Jan Beulich wrote:
>>>>>> On 21.03.17 at 16:38, <rcojocaru@bitdefender.com> wrote:
>>>> On 03/15/2017 06:57 PM, Jan Beulich wrote:
>>>>>>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
>>>>>> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>>>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>>>>>>> ---
>>>>>>>> Changes since V1:
>>>>>>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>>>>>>    througout non-trivial code changes since the initial patch.
>>>>>>>>  - Significantly more patch testing (with XenServer).
>>>>>>>>  - Restricted lock scope.
>>>>>>>
>>>>>>> Not by much, as it seems. In particular you continue to take the
>>>>>>> lock even for instructions not accessing memory at all.
>>>>>>
>>>>>> I'll take a closer look.
>>>>>>
>>>>>>> Also, by "reworked" I did assume you mean converted to at least the
>>>>>>> cmpxchg based model.
>>>>>>
>>>>>> I haven't been able to follow the latest emulator changes closely, could
>>>>>> you please clarify what you mean by "the cmpxchg model"? Thanks.
>>>>>
>>>>> This is unrelated to any recent changes. The idea is to make the
>>>>> ->cmpxchg() hook actually behave like what its name says. It's
>>>>> being used for LOCKed insn writeback already, and it could
>>>>> therefore simply force a retry of the full instruction if the compare
>>>>> part of it fails. It may need to be given another parameter, to
>>>>> allow the hook function to tell LOCKed from "normal" uses.
>>>>
>>>> I assume this is what you have in mind?
>>>
>>> Hmm, not exactly. I'd expect an actual CMPXCHG instruction to
>>> be used, with a LOCK prefix when the original access had one.
>>> There should certainly not be any tail call to hvmemul_write()
>>> anymore.
>>
>> There seems to be a misunderstanding here: if I understand this reply
>> correctly, you'd like hvmemul_cmpxchg() to become a stub that really
>> runs CMPXCHG - but if that comes with the smp_lock part as well, it
>> doesn't matter (except for the now-ignored p_old part, i.e. the comparison).
>>
>> I've initially asked if I should bring the old XenServer-queued LOCK
>> patch back, and I've understood that it could serve a purpose, at least
>> as a temporary workaround until your, and Andrew's emulator changes,
>> make it unrequired.
> 
> Well, yes, as said earlier (still visible in context above) I had
> originally assumed "reworked" would mean making the cmpxchg
> hook actually match its name.
> 
>> However, it appears that you've understood this to
>> mean the addition of the CMPXCHG stub (speaking of which, could you
>> please point out examples of implementing such stubs in the Xen code? I
>> have never done one of those before.)
> 
> There's no talk about stubs here. All I had hoped would have been
> done _instead_ of the smp-lock approach was to use the CMPXCHG
> instruction inside the hook function, at least for RAM case (I think
> we could live with MMIO still being forwarded to the write handler).
> 
> So all this isn't to say that the smp-lock approach might not be
> worthwhile to take on its own, provided the locked region gets
> shrunk as much as possible without losing correctness. The
> CMPXCHG model would just not have this locking granularity
> problem in the first place.

Sadly, I've now written this (rough) patch:

http://pastebin.com/3DJ5WYt0

only to find that it does not solve our issue. With multiple processors
per guest and heavy emulation at boot time, the VM got stuck at roughly
the same point in its life as before the patch. Looks like more than
CMPXCHG needs synchronizing.

So it would appear that the smp_lock patch is the only way to bring this
under control. Either that, or my patch misses something.
Single-processor guests work just fine.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-22 17:22               ` Razvan Cojocaru
@ 2017-03-23  8:19                 ` Jan Beulich
  2017-03-23  8:27                   ` Razvan Cojocaru
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2017-03-23  8:19 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

>>> On 22.03.17 at 18:22, <rcojocaru@bitdefender.com> wrote:
> On 03/21/2017 07:02 PM, Jan Beulich wrote:
>>>>> On 21.03.17 at 17:44, <rcojocaru@bitdefender.com> wrote:
>>> On 03/21/2017 06:29 PM, Jan Beulich wrote:
>>>>>>> On 21.03.17 at 16:38, <rcojocaru@bitdefender.com> wrote:
>>>>> On 03/15/2017 06:57 PM, Jan Beulich wrote:
>>>>>>>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
>>>>>>> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>>>>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>>>>>>>> ---
>>>>>>>>> Changes since V1:
>>>>>>>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>>>>>>>    througout non-trivial code changes since the initial patch.
>>>>>>>>>  - Significantly more patch testing (with XenServer).
>>>>>>>>>  - Restricted lock scope.
>>>>>>>>
>>>>>>>> Not by much, as it seems. In particular you continue to take the
>>>>>>>> lock even for instructions not accessing memory at all.
>>>>>>>
>>>>>>> I'll take a closer look.
>>>>>>>
>>>>>>>> Also, by "reworked" I did assume you mean converted to at least the
>>>>>>>> cmpxchg based model.
>>>>>>>
>>>>>>> I haven't been able to follow the latest emulator changes closely, could
>>>>>>> you please clarify what you mean by "the cmpxchg model"? Thanks.
>>>>>>
>>>>>> This is unrelated to any recent changes. The idea is to make the
>>>>>> ->cmpxchg() hook actually behave like what its name says. It's
>>>>>> being used for LOCKed insn writeback already, and it could
>>>>>> therefore simply force a retry of the full instruction if the compare
>>>>>> part of it fails. It may need to be given another parameter, to
>>>>>> allow the hook function to tell LOCKed from "normal" uses.
>>>>>
>>>>> I assume this is what you have in mind?
>>>>
>>>> Hmm, not exactly. I'd expect an actual CMPXCHG instruction to
>>>> be used, with a LOCK prefix when the original access had one.
>>>> There should certainly not be any tail call to hvmemul_write()
>>>> anymore.
>>>
>>> There seems to be a misunderstanding here: if I understand this reply
>>> correctly, you'd like hvmemul_cmpxchg() to become a stub that really
>>> runs CMPXCHG - but if that comes with the smp_lock part as well, it
>>> doesn't matter (except for the now-ignored p_old part, i.e. the comparison).
>>>
>>> I've initially asked if I should bring the old XenServer-queued LOCK
>>> patch back, and I've understood that it could serve a purpose, at least
>>> as a temporary workaround until your, and Andrew's emulator changes,
>>> make it unrequired.
>> 
>> Well, yes, as said earlier (still visible in context above) I had
>> originally assumed "reworked" would mean making the cmpxchg
>> hook actually match its name.
>> 
>>> However, it appears that you've understood this to
>>> mean the addition of the CMPXCHG stub (speaking of which, could you
>>> please point out examples of implementing such stubs in the Xen code? I
>>> have never done one of those before.)
>> 
>> There's no talk about stubs here. All I had hoped would have been
>> done _instead_ of the smp-lock approach was to use the CMPXCHG
>> instruction inside the hook function, at least for RAM case (I think
>> we could live with MMIO still being forwarded to the write handler).
>> 
>> So all this isn't to say that the smp-lock approach might not be
>> worthwhile to take on its own, provided the locked region gets
>> shrunk as much as possible without losing correctness. The
>> CMPXCHG model would just not have this locking granularity
>> problem in the first place.
> 
> Sadly, I've now written this (rough) patch:
> 
> http://pastebin.com/3DJ5WYt0 
> 
> only to find that it does not solve our issue. With multiple processors
> per guest and heavy emulation at boot time, the VM got stuck at roughly
> the same point in its life as before the patch. Looks like more than
> CMPXCHG needs synchronizing.
> 
> So it would appear that the smp_lock patch is the only way to bring this
> under control. Either that, or my patch misses something.
> Single-processor guests work just fine.

Well, first of all the code to return RETRY is commented out. I
may guess that you've tried with it not commented out, but I
can't be sure.

And then "stuck" of course can mean many things, so a better
description of the state the VM is in and/or the operations that
it's stuck on would be needed.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-23  8:19                 ` Jan Beulich
@ 2017-03-23  8:27                   ` Razvan Cojocaru
  2017-03-23  8:45                     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-23  8:27 UTC (permalink / raw)
  To: Jan Beulich
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

On 03/23/2017 10:19 AM, Jan Beulich wrote:
>>>> On 22.03.17 at 18:22, <rcojocaru@bitdefender.com> wrote:
>> On 03/21/2017 07:02 PM, Jan Beulich wrote:
>>>>>> On 21.03.17 at 17:44, <rcojocaru@bitdefender.com> wrote:
>>>> On 03/21/2017 06:29 PM, Jan Beulich wrote:
>>>>>>>> On 21.03.17 at 16:38, <rcojocaru@bitdefender.com> wrote:
>>>>>> On 03/15/2017 06:57 PM, Jan Beulich wrote:
>>>>>>>>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
>>>>>>>> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>>>>>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>>>>>>>>> ---
>>>>>>>>>> Changes since V1:
>>>>>>>>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>>>>>>>>    througout non-trivial code changes since the initial patch.
>>>>>>>>>>  - Significantly more patch testing (with XenServer).
>>>>>>>>>>  - Restricted lock scope.
>>>>>>>>>
>>>>>>>>> Not by much, as it seems. In particular you continue to take the
>>>>>>>>> lock even for instructions not accessing memory at all.
>>>>>>>>
>>>>>>>> I'll take a closer look.
>>>>>>>>
>>>>>>>>> Also, by "reworked" I did assume you mean converted to at least the
>>>>>>>>> cmpxchg based model.
>>>>>>>>
>>>>>>>> I haven't been able to follow the latest emulator changes closely, could
>>>>>>>> you please clarify what you mean by "the cmpxchg model"? Thanks.
>>>>>>>
>>>>>>> This is unrelated to any recent changes. The idea is to make the
>>>>>>> ->cmpxchg() hook actually behave like what its name says. It's
>>>>>>> being used for LOCKed insn writeback already, and it could
>>>>>>> therefore simply force a retry of the full instruction if the compare
>>>>>>> part of it fails. It may need to be given another parameter, to
>>>>>>> allow the hook function to tell LOCKed from "normal" uses.
>>>>>>
>>>>>> I assume this is what you have in mind?
>>>>>
>>>>> Hmm, not exactly. I'd expect an actual CMPXCHG instruction to
>>>>> be used, with a LOCK prefix when the original access had one.
>>>>> There should certainly not be any tail call to hvmemul_write()
>>>>> anymore.
>>>>
>>>> There seems to be a misunderstanding here: if I understand this reply
>>>> correctly, you'd like hvmemul_cmpxchg() to become a stub that really
>>>> runs CMPXCHG - but if that comes with the smp_lock part as well, it
>>>> doesn't matter (except for the now-ignored p_old part, i.e. the comparison).
>>>>
>>>> I've initially asked if I should bring the old XenServer-queued LOCK
>>>> patch back, and I've understood that it could serve a purpose, at least
>>>> as a temporary workaround until your, and Andrew's emulator changes,
>>>> make it unrequired.
>>>
>>> Well, yes, as said earlier (still visible in context above) I had
>>> originally assumed "reworked" would mean making the cmpxchg
>>> hook actually match its name.
>>>
>>>> However, it appears that you've understood this to
>>>> mean the addition of the CMPXCHG stub (speaking of which, could you
>>>> please point out examples of implementing such stubs in the Xen code? I
>>>> have never done one of those before.)
>>>
>>> There's no talk about stubs here. All I had hoped would have been
>>> done _instead_ of the smp-lock approach was to use the CMPXCHG
>>> instruction inside the hook function, at least for RAM case (I think
>>> we could live with MMIO still being forwarded to the write handler).
>>>
>>> So all this isn't to say that the smp-lock approach might not be
>>> worthwhile to take on its own, provided the locked region gets
>>> shrunk as much as possible without losing correctness. The
>>> CMPXCHG model would just not have this locking granularity
>>> problem in the first place.
>>
>> Sadly, I've now written this (rough) patch:
>>
>> http://pastebin.com/3DJ5WYt0 
>>
>> only to find that it does not solve our issue. With multiple processors
>> per guest and heavy emulation at boot time, the VM got stuck at roughly
>> the same point in its life as before the patch. Looks like more than
>> CMPXCHG needs synchronizing.
>>
>> So it would appear that the smp_lock patch is the only way to bring this
>> under control. Either that, or my patch misses something.
>> Single-processor guests work just fine.
> 
> Well, first of all the code to return RETRY is commented out. I
> may guess that you've tried with it not commented out, but I
> can't be sure.

Indeed I have tried with it on, with the condition for success being at
first "val != old", and then, as it is in the pasted code, "val == new".
Both of these caused BSODs and random crashes. The guest only boots
without any apparent issues (and with 1 VCPU only) after I've stopped
returning RETRY.

> And then "stuck" of course can mean many things, so a better
> description of the state the VM is in and/or the operations that
> it's stuck on would be needed.

I agree, but on a first look it would appear that the hang is just like
the one without the patch, when multiple processors are involved. This
also is in line with my previous observation that when only cmpxchg() is
locked, the problem still occurs (you may remember that initially I've
tried to only synchronize the cmpxchg() call in x86_emulate()).

I'll try to give the patch a few more spins and compare the guest stuck
state with the stuck state without the patch.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-23  8:27                   ` Razvan Cojocaru
@ 2017-03-23  8:45                     ` Jan Beulich
  2017-03-23  9:51                       ` Razvan Cojocaru
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2017-03-23  8:45 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

>>> On 23.03.17 at 09:27, <rcojocaru@bitdefender.com> wrote:
> On 03/23/2017 10:19 AM, Jan Beulich wrote:
>>>>> On 22.03.17 at 18:22, <rcojocaru@bitdefender.com> wrote:
>>> On 03/21/2017 07:02 PM, Jan Beulich wrote:
>>>>>>> On 21.03.17 at 17:44, <rcojocaru@bitdefender.com> wrote:
>>>>> On 03/21/2017 06:29 PM, Jan Beulich wrote:
>>>>>>>>> On 21.03.17 at 16:38, <rcojocaru@bitdefender.com> wrote:
>>>>>>> On 03/15/2017 06:57 PM, Jan Beulich wrote:
>>>>>>>>>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
>>>>>>>>> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>>>>>>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>>>>>>>>>> ---
>>>>>>>>>>> Changes since V1:
>>>>>>>>>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>>>>>>>>>    througout non-trivial code changes since the initial patch.
>>>>>>>>>>>  - Significantly more patch testing (with XenServer).
>>>>>>>>>>>  - Restricted lock scope.
>>>>>>>>>>
>>>>>>>>>> Not by much, as it seems. In particular you continue to take the
>>>>>>>>>> lock even for instructions not accessing memory at all.
>>>>>>>>>
>>>>>>>>> I'll take a closer look.
>>>>>>>>>
>>>>>>>>>> Also, by "reworked" I did assume you mean converted to at least the
>>>>>>>>>> cmpxchg based model.
>>>>>>>>>
>>>>>>>>> I haven't been able to follow the latest emulator changes closely, could
>>>>>>>>> you please clarify what you mean by "the cmpxchg model"? Thanks.
>>>>>>>>
>>>>>>>> This is unrelated to any recent changes. The idea is to make the
>>>>>>>> ->cmpxchg() hook actually behave like what its name says. It's
>>>>>>>> being used for LOCKed insn writeback already, and it could
>>>>>>>> therefore simply force a retry of the full instruction if the compare
>>>>>>>> part of it fails. It may need to be given another parameter, to
>>>>>>>> allow the hook function to tell LOCKed from "normal" uses.
>>>>>>>
>>>>>>> I assume this is what you have in mind?
>>>>>>
>>>>>> Hmm, not exactly. I'd expect an actual CMPXCHG instruction to
>>>>>> be used, with a LOCK prefix when the original access had one.
>>>>>> There should certainly not be any tail call to hvmemul_write()
>>>>>> anymore.
>>>>>
>>>>> There seems to be a misunderstanding here: if I understand this reply
>>>>> correctly, you'd like hvmemul_cmpxchg() to become a stub that really
>>>>> runs CMPXCHG - but if that comes with the smp_lock part as well, it
>>>>> doesn't matter (except for the now-ignored p_old part, i.e. the comparison).
>>>>>
>>>>> I've initially asked if I should bring the old XenServer-queued LOCK
>>>>> patch back, and I've understood that it could serve a purpose, at least
>>>>> as a temporary workaround until your, and Andrew's emulator changes,
>>>>> make it unrequired.
>>>>
>>>> Well, yes, as said earlier (still visible in context above) I had
>>>> originally assumed "reworked" would mean making the cmpxchg
>>>> hook actually match its name.
>>>>
>>>>> However, it appears that you've understood this to
>>>>> mean the addition of the CMPXCHG stub (speaking of which, could you
>>>>> please point out examples of implementing such stubs in the Xen code? I
>>>>> have never done one of those before.)
>>>>
>>>> There's no talk about stubs here. All I had hoped would have been
>>>> done _instead_ of the smp-lock approach was to use the CMPXCHG
>>>> instruction inside the hook function, at least for RAM case (I think
>>>> we could live with MMIO still being forwarded to the write handler).
>>>>
>>>> So all this isn't to say that the smp-lock approach might not be
>>>> worthwhile to take on its own, provided the locked region gets
>>>> shrunk as much as possible without losing correctness. The
>>>> CMPXCHG model would just not have this locking granularity
>>>> problem in the first place.
>>>
>>> Sadly, I've now written this (rough) patch:
>>>
>>> http://pastebin.com/3DJ5WYt0 
>>>
>>> only to find that it does not solve our issue. With multiple processors
>>> per guest and heavy emulation at boot time, the VM got stuck at roughly
>>> the same point in its life as before the patch. Looks like more than
>>> CMPXCHG needs synchronizing.
>>>
>>> So it would appear that the smp_lock patch is the only way to bring this
>>> under control. Either that, or my patch misses something.
>>> Single-processor guests work just fine.
>> 
>> Well, first of all the code to return RETRY is commented out. I
>> may guess that you've tried with it not commented out, but I
>> can't be sure.
> 
> Indeed I have tried with it on, with the condition for success being at
> first "val != old", and then, as it is in the pasted code, "val == new".
> Both of these caused BSODs and random crashes. The guest only boots
> without any apparent issues (and with 1 VCPU only) after I've stopped
> returning RETRY.

"val == new" is clearly wrong: Whether to retry depends on whether
the cmpxchg was unsuccessful.

>> And then "stuck" of course can mean many things, so a better
>> description of the state the VM is in and/or the operations that
>> it's stuck on would be needed.
> 
> I agree, but on a first look it would appear that the hang is just like
> the one without the patch, when multiple processors are involved. This
> also is in line with my previous observation that when only cmpxchg() is
> locked, the problem still occurs (you may remember that initially I've
> tried to only synchronize the cmpxchg() call in x86_emulate()).
> 
> I'll try to give the patch a few more spins and compare the guest stuck
> state with the stuck state without the patch.

Are you sure you don't run into a case of a RMW insn's memory
access crossing a page boundary? Your code does not handle
this case at all afaics. Other than that I can't see any obvious
omission.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-23  8:45                     ` Jan Beulich
@ 2017-03-23  9:51                       ` Razvan Cojocaru
  2017-03-23 10:21                         ` Razvan Cojocaru
  0 siblings, 1 reply; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-23  9:51 UTC (permalink / raw)
  To: Jan Beulich
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

On 03/23/2017 10:45 AM, Jan Beulich wrote:
>>>> On 23.03.17 at 09:27, <rcojocaru@bitdefender.com> wrote:
>> On 03/23/2017 10:19 AM, Jan Beulich wrote:
>>>>>> On 22.03.17 at 18:22, <rcojocaru@bitdefender.com> wrote:
>>>> On 03/21/2017 07:02 PM, Jan Beulich wrote:
>>>>>>>> On 21.03.17 at 17:44, <rcojocaru@bitdefender.com> wrote:
>>>>>> On 03/21/2017 06:29 PM, Jan Beulich wrote:
>>>>>>>>>> On 21.03.17 at 16:38, <rcojocaru@bitdefender.com> wrote:
>>>>>>>> On 03/15/2017 06:57 PM, Jan Beulich wrote:
>>>>>>>>>>>> On 15.03.17 at 17:46, <rcojocaru@bitdefender.com> wrote:
>>>>>>>>>> On 03/15/2017 06:30 PM, Jan Beulich wrote:
>>>>>>>>>>>>>> On 15.03.17 at 17:04, <rcojocaru@bitdefender.com> wrote:
>>>>>>>>>>>> ---
>>>>>>>>>>>> Changes since V1:
>>>>>>>>>>>>  - Added Andrew Cooper's credit, as he's kept the patch current
>>>>>>>>>>>>    througout non-trivial code changes since the initial patch.
>>>>>>>>>>>>  - Significantly more patch testing (with XenServer).
>>>>>>>>>>>>  - Restricted lock scope.
>>>>>>>>>>>
>>>>>>>>>>> Not by much, as it seems. In particular you continue to take the
>>>>>>>>>>> lock even for instructions not accessing memory at all.
>>>>>>>>>>
>>>>>>>>>> I'll take a closer look.
>>>>>>>>>>
>>>>>>>>>>> Also, by "reworked" I did assume you mean converted to at least the
>>>>>>>>>>> cmpxchg based model.
>>>>>>>>>>
>>>>>>>>>> I haven't been able to follow the latest emulator changes closely, could
>>>>>>>>>> you please clarify what you mean by "the cmpxchg model"? Thanks.
>>>>>>>>>
>>>>>>>>> This is unrelated to any recent changes. The idea is to make the
>>>>>>>>> ->cmpxchg() hook actually behave like what its name says. It's
>>>>>>>>> being used for LOCKed insn writeback already, and it could
>>>>>>>>> therefore simply force a retry of the full instruction if the compare
>>>>>>>>> part of it fails. It may need to be given another parameter, to
>>>>>>>>> allow the hook function to tell LOCKed from "normal" uses.
>>>>>>>>
>>>>>>>> I assume this is what you have in mind?
>>>>>>>
>>>>>>> Hmm, not exactly. I'd expect an actual CMPXCHG instruction to
>>>>>>> be used, with a LOCK prefix when the original access had one.
>>>>>>> There should certainly not be any tail call to hvmemul_write()
>>>>>>> anymore.
>>>>>>
>>>>>> There seems to be a misunderstanding here: if I understand this reply
>>>>>> correctly, you'd like hvmemul_cmpxchg() to become a stub that really
>>>>>> runs CMPXCHG - but if that comes with the smp_lock part as well, it
>>>>>> doesn't matter (except for the now-ignored p_old part, i.e. the comparison).
>>>>>>
>>>>>> I've initially asked if I should bring the old XenServer-queued LOCK
>>>>>> patch back, and I've understood that it could serve a purpose, at least
>>>>>> as a temporary workaround until your, and Andrew's emulator changes,
>>>>>> make it unrequired.
>>>>>
>>>>> Well, yes, as said earlier (still visible in context above) I had
>>>>> originally assumed "reworked" would mean making the cmpxchg
>>>>> hook actually match its name.
>>>>>
>>>>>> However, it appears that you've understood this to
>>>>>> mean the addition of the CMPXCHG stub (speaking of which, could you
>>>>>> please point out examples of implementing such stubs in the Xen code? I
>>>>>> have never done one of those before.)
>>>>>
>>>>> There's no talk about stubs here. All I had hoped would have been
>>>>> done _instead_ of the smp-lock approach was to use the CMPXCHG
>>>>> instruction inside the hook function, at least for RAM case (I think
>>>>> we could live with MMIO still being forwarded to the write handler).
>>>>>
>>>>> So all this isn't to say that the smp-lock approach might not be
>>>>> worthwhile to take on its own, provided the locked region gets
>>>>> shrunk as much as possible without losing correctness. The
>>>>> CMPXCHG model would just not have this locking granularity
>>>>> problem in the first place.
>>>>
>>>> Sadly, I've now written this (rough) patch:
>>>>
>>>> http://pastebin.com/3DJ5WYt0 
>>>>
>>>> only to find that it does not solve our issue. With multiple processors
>>>> per guest and heavy emulation at boot time, the VM got stuck at roughly
>>>> the same point in its life as before the patch. Looks like more than
>>>> CMPXCHG needs synchronizing.
>>>>
>>>> So it would appear that the smp_lock patch is the only way to bring this
>>>> under control. Either that, or my patch misses something.
>>>> Single-processor guests work just fine.
>>>
>>> Well, first of all the code to return RETRY is commented out. I
>>> may guess that you've tried with it not commented out, but I
>>> can't be sure.
>>
>> Indeed I have tried with it on, with the condition for success being at
>> first "val != old", and then, as it is in the pasted code, "val == new".
>> Both of these caused BSODs and random crashes. The guest only boots
>> without any apparent issues (and with 1 VCPU only) after I've stopped
>> returning RETRY.
> 
> "val == new" is clearly wrong: Whether to retry depends on whether
> the cmpxchg was unsuccessful.

You're right, it looks like using "val == old" as a test for success
(which is of course the only correct test) and return RETRY on fail
works (though I'm still seeing a BSOD very seldom, I'll need to test it
carefully and see what's going on).

Thanks for the help!


Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-23  9:51                       ` Razvan Cojocaru
@ 2017-03-23 10:21                         ` Razvan Cojocaru
  2017-03-23 13:23                           ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-23 10:21 UTC (permalink / raw)
  To: Jan Beulich
  Cc: wei.liu2, george.dunlap, ian.jackson, tim, xen-devel,
	paul.durrant, andrew.cooper3

>>>>> Sadly, I've now written this (rough) patch:
>>>>>
>>>>> http://pastebin.com/3DJ5WYt0 
>>>>>
>>>>> only to find that it does not solve our issue. With multiple processors
>>>>> per guest and heavy emulation at boot time, the VM got stuck at roughly
>>>>> the same point in its life as before the patch. Looks like more than
>>>>> CMPXCHG needs synchronizing.
>>>>>
>>>>> So it would appear that the smp_lock patch is the only way to bring this
>>>>> under control. Either that, or my patch misses something.
>>>>> Single-processor guests work just fine.
>>>>
>>>> Well, first of all the code to return RETRY is commented out. I
>>>> may guess that you've tried with it not commented out, but I
>>>> can't be sure.
>>>
>>> Indeed I have tried with it on, with the condition for success being at
>>> first "val != old", and then, as it is in the pasted code, "val == new".
>>> Both of these caused BSODs and random crashes. The guest only boots
>>> without any apparent issues (and with 1 VCPU only) after I've stopped
>>> returning RETRY.
>>
>> "val == new" is clearly wrong: Whether to retry depends on whether
>> the cmpxchg was unsuccessful.
> 
> You're right, it looks like using "val == old" as a test for success
> (which is of course the only correct test) and return RETRY on fail
> works (though I'm still seeing a BSOD very seldom, I'll need to test it
> carefully and see what's going on).

I've made a race more probable by resuming the VCPU only after
processing all the events in the ring buffer and now all my Windows 7
32-bit guests BSOD with IRQL_NOT_LESS_OR_EQUAL every time at boot with
the updated patch.

Changing the guests' configuration to only use 1 VCPU again solves the
issue, and also resuming the VCPU after treating each vm_event makes the
BSOD much less likely to appear (hence the illusion that the problem is
fixed).

As for an instruction's crossing a page boundary, I'm not sure how that
would affect things here - we're simply emulating whatever instructions
cause page faults in interesting pages at boot, we're not yet preventing
writes at this point. And if it were such an issue, would it have been
fixed (and it is) by the smp_lock version of the patch?


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-23 10:21                         ` Razvan Cojocaru
@ 2017-03-23 13:23                           ` Jan Beulich
  2017-03-23 13:28                             ` Razvan Cojocaru
  2017-03-23 15:54                             ` Razvan Cojocaru
  0 siblings, 2 replies; 20+ messages in thread
From: Jan Beulich @ 2017-03-23 13:23 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

>>> On 23.03.17 at 11:21, <rcojocaru@bitdefender.com> wrote:
>>>>>> Sadly, I've now written this (rough) patch:
>>>>>>
>>>>>> http://pastebin.com/3DJ5WYt0 
>>>>>>
>>>>>> only to find that it does not solve our issue. With multiple processors
>>>>>> per guest and heavy emulation at boot time, the VM got stuck at roughly
>>>>>> the same point in its life as before the patch. Looks like more than
>>>>>> CMPXCHG needs synchronizing.
>>>>>>
>>>>>> So it would appear that the smp_lock patch is the only way to bring this
>>>>>> under control. Either that, or my patch misses something.
>>>>>> Single-processor guests work just fine.
>>>>>
>>>>> Well, first of all the code to return RETRY is commented out. I
>>>>> may guess that you've tried with it not commented out, but I
>>>>> can't be sure.
>>>>
>>>> Indeed I have tried with it on, with the condition for success being at
>>>> first "val != old", and then, as it is in the pasted code, "val == new".
>>>> Both of these caused BSODs and random crashes. The guest only boots
>>>> without any apparent issues (and with 1 VCPU only) after I've stopped
>>>> returning RETRY.
>>>
>>> "val == new" is clearly wrong: Whether to retry depends on whether
>>> the cmpxchg was unsuccessful.
>> 
>> You're right, it looks like using "val == old" as a test for success
>> (which is of course the only correct test) and return RETRY on fail
>> works (though I'm still seeing a BSOD very seldom, I'll need to test it
>> carefully and see what's going on).
> 
> I've made a race more probable by resuming the VCPU only after
> processing all the events in the ring buffer and now all my Windows 7
> 32-bit guests BSOD with IRQL_NOT_LESS_OR_EQUAL every time at boot with
> the updated patch.
> 
> Changing the guests' configuration to only use 1 VCPU again solves the
> issue, and also resuming the VCPU after treating each vm_event makes the
> BSOD much less likely to appear (hence the illusion that the problem is
> fixed).

Well, I can only repeat: We need to understand what it is that
goes wrong.

> As for an instruction's crossing a page boundary, I'm not sure how that
> would affect things here - we're simply emulating whatever instructions
> cause page faults in interesting pages at boot, we're not yet preventing
> writes at this point. And if it were such an issue, would it have been
> fixed (and it is) by the smp_lock version of the patch?

Yes, it likely would have been: The problem with your code is that
you map just a single page even when the original access crosses
a page boundary, yet then access the returned pointer as if you
had mapped two adjacent pages.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-23 13:23                           ` Jan Beulich
@ 2017-03-23 13:28                             ` Razvan Cojocaru
  2017-03-23 15:54                             ` Razvan Cojocaru
  1 sibling, 0 replies; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-23 13:28 UTC (permalink / raw)
  To: Jan Beulich
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

On 03/23/2017 03:23 PM, Jan Beulich wrote:
>>>> On 23.03.17 at 11:21, <rcojocaru@bitdefender.com> wrote:
>>>>>>> Sadly, I've now written this (rough) patch:
>>>>>>>
>>>>>>> http://pastebin.com/3DJ5WYt0 
>>>>>>>
>>>>>>> only to find that it does not solve our issue. With multiple processors
>>>>>>> per guest and heavy emulation at boot time, the VM got stuck at roughly
>>>>>>> the same point in its life as before the patch. Looks like more than
>>>>>>> CMPXCHG needs synchronizing.
>>>>>>>
>>>>>>> So it would appear that the smp_lock patch is the only way to bring this
>>>>>>> under control. Either that, or my patch misses something.
>>>>>>> Single-processor guests work just fine.
>>>>>>
>>>>>> Well, first of all the code to return RETRY is commented out. I
>>>>>> may guess that you've tried with it not commented out, but I
>>>>>> can't be sure.
>>>>>
>>>>> Indeed I have tried with it on, with the condition for success being at
>>>>> first "val != old", and then, as it is in the pasted code, "val == new".
>>>>> Both of these caused BSODs and random crashes. The guest only boots
>>>>> without any apparent issues (and with 1 VCPU only) after I've stopped
>>>>> returning RETRY.
>>>>
>>>> "val == new" is clearly wrong: Whether to retry depends on whether
>>>> the cmpxchg was unsuccessful.
>>>
>>> You're right, it looks like using "val == old" as a test for success
>>> (which is of course the only correct test) and return RETRY on fail
>>> works (though I'm still seeing a BSOD very seldom, I'll need to test it
>>> carefully and see what's going on).
>>
>> I've made a race more probable by resuming the VCPU only after
>> processing all the events in the ring buffer and now all my Windows 7
>> 32-bit guests BSOD with IRQL_NOT_LESS_OR_EQUAL every time at boot with
>> the updated patch.
>>
>> Changing the guests' configuration to only use 1 VCPU again solves the
>> issue, and also resuming the VCPU after treating each vm_event makes the
>> BSOD much less likely to appear (hence the illusion that the problem is
>> fixed).
> 
> Well, I can only repeat: We need to understand what it is that
> goes wrong.
> 
>> As for an instruction's crossing a page boundary, I'm not sure how that
>> would affect things here - we're simply emulating whatever instructions
>> cause page faults in interesting pages at boot, we're not yet preventing
>> writes at this point. And if it were such an issue, would it have been
>> fixed (and it is) by the smp_lock version of the patch?
> 
> Yes, it likely would have been: The problem with your code is that
> you map just a single page even when the original access crosses
> a page boundary, yet then access the returned pointer as if you
> had mapped two adjacent pages.

Oh, right, the patch code, of course. For some reason I was thinking
about a rather complicated scenario related to emulating an instruction
touching several protected pages. Yes, you're right, the patch does not
correctly handle the case where we'd write beyond the mapped page.

I'll check if that's what happens.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-23 13:23                           ` Jan Beulich
  2017-03-23 13:28                             ` Razvan Cojocaru
@ 2017-03-23 15:54                             ` Razvan Cojocaru
  2017-03-23 16:20                               ` Jan Beulich
  1 sibling, 1 reply; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-23 15:54 UTC (permalink / raw)
  To: Jan Beulich
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

On 03/23/2017 03:23 PM, Jan Beulich wrote:
>>>> On 23.03.17 at 11:21, <rcojocaru@bitdefender.com> wrote:
>>>>>>> Sadly, I've now written this (rough) patch:
>>>>>>>
>>>>>>> http://pastebin.com/3DJ5WYt0 
>>>>>>>
>>>>>>> only to find that it does not solve our issue. With multiple processors
>>>>>>> per guest and heavy emulation at boot time, the VM got stuck at roughly
>>>>>>> the same point in its life as before the patch. Looks like more than
>>>>>>> CMPXCHG needs synchronizing.
>>>>>>>
>>>>>>> So it would appear that the smp_lock patch is the only way to bring this
>>>>>>> under control. Either that, or my patch misses something.
>>>>>>> Single-processor guests work just fine.
>>>>>>
>>>>>> Well, first of all the code to return RETRY is commented out. I
>>>>>> may guess that you've tried with it not commented out, but I
>>>>>> can't be sure.
>>>>>
>>>>> Indeed I have tried with it on, with the condition for success being at
>>>>> first "val != old", and then, as it is in the pasted code, "val == new".
>>>>> Both of these caused BSODs and random crashes. The guest only boots
>>>>> without any apparent issues (and with 1 VCPU only) after I've stopped
>>>>> returning RETRY.
>>>>
>>>> "val == new" is clearly wrong: Whether to retry depends on whether
>>>> the cmpxchg was unsuccessful.
>>>
>>> You're right, it looks like using "val == old" as a test for success
>>> (which is of course the only correct test) and return RETRY on fail
>>> works (though I'm still seeing a BSOD very seldom, I'll need to test it
>>> carefully and see what's going on).
>>
>> I've made a race more probable by resuming the VCPU only after
>> processing all the events in the ring buffer and now all my Windows 7
>> 32-bit guests BSOD with IRQL_NOT_LESS_OR_EQUAL every time at boot with
>> the updated patch.
>>
>> Changing the guests' configuration to only use 1 VCPU again solves the
>> issue, and also resuming the VCPU after treating each vm_event makes the
>> BSOD much less likely to appear (hence the illusion that the problem is
>> fixed).
> 
> Well, I can only repeat: We need to understand what it is that
> goes wrong.
> 
>> As for an instruction's crossing a page boundary, I'm not sure how that
>> would affect things here - we're simply emulating whatever instructions
>> cause page faults in interesting pages at boot, we're not yet preventing
>> writes at this point. And if it were such an issue, would it have been
>> fixed (and it is) by the smp_lock version of the patch?
> 
> Yes, it likely would have been: The problem with your code is that
> you map just a single page even when the original access crosses
> a page boundary, yet then access the returned pointer as if you
> had mapped two adjacent pages.

I've printed out all the offsets in the new cmpxchg hook, and the
largest one I've seen across many guest runs was 4064, with a size of 8,
which would not cause the write to go beyond the page. So this is not
what's causing the issue.

Since it's working fine with 1 VCPU per guest, this I think also
validates the new implementation of cmpxchg, since if it would have been
wrong the guest would have run into trouble soon. This is born out by my
before-and-after printk()s:

(XEN) offset: 156, size: 4
(XEN) offset: 1708, size: 4
(XEN) - [0] mem: 0, old: 0, new: 1, lock: 1
(XEN) - [1] mem: 3, old: 3, new: 4, lock: 1
(XEN) + [0] mem: 1, old: 0, new: 1, val: 0
(XEN) + [1] mem: 4, old: 3, new: 4, val: 3
(XEN) returning X86EMUL_OKAY
(XEN) returning X86EMUL_OKAY
(XEN) offset: 1708, size: 4
(XEN) - [1] mem: 4, old: 4, new: 3, lock: 1
(XEN) offset: 204, size: 4
(XEN) + [1] mem: 3, old: 4, new: 3, val: 4
(XEN) - [0] mem: 3539, old: 3539, new: 353a, lock: 1
(XEN) returning X86EMUL_OKAY
(XEN) + [0] mem: 353a, old: 3539, new: 353a, val: 3539
(XEN) returning X86EMUL_OKAY
(XEN) offset: 1708, size: 4
(XEN) - [1] mem: 3, old: 3, new: 4, lock: 1
(XEN) + [1] mem: 4, old: 3, new: 4, val: 3
(XEN) returning X86EMUL_OKAY

With 2 VCPUs, the guest can't seem to make up its mind to either BSOD
with IRQL_NOT_LESS_OR_EQUAL (when I return RETRY on old != val ) or
BAD_POOL_HEADER (when I always return OKAY), or simply get stuck. These
are all situations that happen on boot, fairly quickly (my Windows 7
32-bit guest doesn't go beyond the "Starting Windows" screen).

The last time it got stuck I've collected this info with succesive runs
of xenctx:

# ./xenctx -a 6
cs:eip: 0008:82a4209b
flags: 00200046 cid z p
ss:esp: 0010:8273f9bc
eax: 00000000   ebx: 00000000   ecx: 82782480   edx: 000003fd
esi: 80ba0020   edi: 00000000   ebp: 8273fa08
 ds:     0023    es:     0023    fs:     0030    gs:     0000

cr0: 8001003b
cr2: 8d486408
cr3: 00185000
cr4: 000406f9

dr0: 00000000
dr1: 00000000
dr2: 00000000
dr3: 00000000
dr6: fffe0ff0
dr7: 00000400
Code (instr addr 82a4209b)
14 a3 a2 82 cc cc cc cc cc cc cc cc cc cc 33 c0 8b 54 24 04 ec <c2> 04
00 8b ff 33 c0 8b 54 24 04

# ./xenctx -a 6
cs:eip: 0008:8c5065d6
flags: 00200246 cid i z p
ss:esp: 0010:8273fb9c
eax: 00000000   ebx: 8d662338   ecx: 850865f0   edx: 00000000
esi: 40008000   edi: 82742d20   ebp: 8273fc20
 ds:     0023    es:     0023    fs:     0030    gs:     0000

cr0: 8001003b
cr2: 8d486408
cr3: 00185000
cr4: 000406f9

dr0: 00000000
dr1: 00000000
dr2: 00000000
dr3: 00000000
dr6: fffe0ff0
dr7: 00000400
Code (instr addr 8c5065d6)
47 fc 83 c7 14 4e 75 ef 5f 5e c3 cc cc cc cc cc cc 8b ff fb f4 <c3> cc
cc cc cc cc 8b ff 55 8b ec

Hovewer this is likely irrelevant since by now we're past the trigger
event. I'm not sure where to go from here.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-23 15:54                             ` Razvan Cojocaru
@ 2017-03-23 16:20                               ` Jan Beulich
  2017-03-23 16:31                                 ` Razvan Cojocaru
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2017-03-23 16:20 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

>>> On 23.03.17 at 16:54, <rcojocaru@bitdefender.com> wrote:
> I'm not sure where to go from here.

Well, without finding where things start to go wrong, I don't think
we can make any progress here. Finding that may admittedly be
a rather tedious process.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation
  2017-03-23 16:20                               ` Jan Beulich
@ 2017-03-23 16:31                                 ` Razvan Cojocaru
  0 siblings, 0 replies; 20+ messages in thread
From: Razvan Cojocaru @ 2017-03-23 16:31 UTC (permalink / raw)
  To: Jan Beulich
  Cc: tim, wei.liu2, george.dunlap, andrew.cooper3, ian.jackson,
	xen-devel, paul.durrant

On 03/23/2017 06:20 PM, Jan Beulich wrote:
>>>> On 23.03.17 at 16:54, <rcojocaru@bitdefender.com> wrote:
>> I'm not sure where to go from here.
> 
> Well, without finding where things start to go wrong, I don't think
> we can make any progress here. Finding that may admittedly be
> a rather tedious process.

I may have stumbled across a clue: hvm_emulate_one_vm_event() currently
does nothing when hvm_emulate_one() returns RETRY. This was fine before,
but if the cmpxchg hook can propagate RETRY upwards, a failed emulation
attempt causes the guest to retry the instruction, generate another
page-fault, and so on.

I've now wrapped the body of hvm_emulate_one_vm_event() in a do { /* ...
*/ } while ( rc == X86EMUL_RETRY ); and my guest seems to be booting. At
this point I'm not sure if this is the best solution (should I just
retry indefinitely until it succeeds?) or if this is just luck, but it
at least feels like a step in the right direction.


Thanks,
Razvan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2017-03-23 16:31 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-15 16:04 [PATCH V2] x86/emulate: synchronize LOCKed instruction emulation Razvan Cojocaru
2017-03-15 16:30 ` Jan Beulich
2017-03-15 16:46   ` Razvan Cojocaru
2017-03-15 16:57     ` Jan Beulich
2017-03-21 15:38       ` Razvan Cojocaru
2017-03-21 15:39         ` Razvan Cojocaru
2017-03-21 16:29         ` Jan Beulich
2017-03-21 16:44           ` Razvan Cojocaru
2017-03-21 17:02             ` Jan Beulich
2017-03-22 17:22               ` Razvan Cojocaru
2017-03-23  8:19                 ` Jan Beulich
2017-03-23  8:27                   ` Razvan Cojocaru
2017-03-23  8:45                     ` Jan Beulich
2017-03-23  9:51                       ` Razvan Cojocaru
2017-03-23 10:21                         ` Razvan Cojocaru
2017-03-23 13:23                           ` Jan Beulich
2017-03-23 13:28                             ` Razvan Cojocaru
2017-03-23 15:54                             ` Razvan Cojocaru
2017-03-23 16:20                               ` Jan Beulich
2017-03-23 16:31                                 ` Razvan Cojocaru

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.