All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] x86emul: FPU handling corrections
@ 2017-03-13 10:56 Jan Beulich
  2017-03-13 11:03 ` [PATCH 1/4] x86emul: fold exit paths Jan Beulich
                   ` (4 more replies)
  0 siblings, 5 replies; 16+ messages in thread
From: Jan Beulich @ 2017-03-13 10:56 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

1: fold exit paths
2: centralize put_fpu() invocations
3: correct handling of FPU insns faulting on memory write
4: correct FPU code/data pointers and opcode handling
XTF: add FPU/SIMD register state test

Signed-off-by: Jan Beulich <jbeulich@suse.com>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/4] x86emul: fold exit paths
  2017-03-13 10:56 [PATCH 0/4] x86emul: FPU handling corrections Jan Beulich
@ 2017-03-13 11:03 ` Jan Beulich
  2017-03-13 11:26   ` Andrew Cooper
  2017-03-13 11:03 ` [PATCH 2/4] x86emul: centralize put_fpu() invocations Jan Beulich
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2017-03-13 11:03 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 654 bytes --]

Move "cannot_emulate" and make it go through the common (error) exit
path.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -7762,7 +7762,9 @@ x86_emulate(
     }
 
     default:
-        goto cannot_emulate;
+    cannot_emulate:
+        rc = X86EMUL_UNHANDLEABLE;
+        goto done;
     }
 
     if ( state->simd_size )
@@ -7906,11 +7908,6 @@ x86_emulate(
     _put_fpu();
     put_stub(stub);
     return rc;
-
- cannot_emulate:
-    _put_fpu();
-    put_stub(stub);
-    return X86EMUL_UNHANDLEABLE;
 #undef state
 }
 




[-- Attachment #2: x86emul-cannot-emulate.patch --]
[-- Type: text/plain, Size: 676 bytes --]

x86emul: fold exit paths

Move "cannot_emulate" and make it go through the common (error) exit
path.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -7762,7 +7762,9 @@ x86_emulate(
     }
 
     default:
-        goto cannot_emulate;
+    cannot_emulate:
+        rc = X86EMUL_UNHANDLEABLE;
+        goto done;
     }
 
     if ( state->simd_size )
@@ -7906,11 +7908,6 @@ x86_emulate(
     _put_fpu();
     put_stub(stub);
     return rc;
-
- cannot_emulate:
-    _put_fpu();
-    put_stub(stub);
-    return X86EMUL_UNHANDLEABLE;
 #undef state
 }
 

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 2/4] x86emul: centralize put_fpu() invocations
  2017-03-13 10:56 [PATCH 0/4] x86emul: FPU handling corrections Jan Beulich
  2017-03-13 11:03 ` [PATCH 1/4] x86emul: fold exit paths Jan Beulich
@ 2017-03-13 11:03 ` Jan Beulich
  2017-03-13 11:55   ` Andrew Cooper
  2017-03-13 11:05 ` [PATCH 3/4] x86emul: correct handling of FPU insns faulting on memory write Jan Beulich
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2017-03-13 11:03 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 8723 bytes --]

..., splitting parts of it into check_*() macros. This is in
preparation of making ->put_fpu() do further adjustments to register
state. (Some of the check_xmm() invocations could be avoided, as in
some of the cases no insns handled there can actually raise #XM, but I
think we're better off keeping them to avoid later additions of further
insn patterns rendering the lack of the check a bug.)

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -937,6 +937,7 @@ do {
 
 struct fpu_insn_ctxt {
     uint8_t insn_bytes;
+    uint8_t type;
     int8_t exn_raised;
 };
 
@@ -956,8 +957,6 @@ static int _get_fpu(
 {
     int rc;
 
-    fic->exn_raised = -1;
-
     fail_if(!ops->get_fpu);
     rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
 
@@ -965,6 +964,8 @@ static int _get_fpu(
     {
         unsigned long cr0;
 
+        fic->type = type;
+
         fail_if(!ops->read_cr);
         if ( type >= X86EMUL_FPU_xmm )
         {
@@ -1006,22 +1007,31 @@ do {
     rc = _get_fpu(_type, _fic, ctxt, ops);                      \
     if ( rc ) goto done;                                        \
 } while (0)
-#define _put_fpu()                                              \
+
+#define check_fpu(_fic)                                         \
 do {                                                            \
-    if ( ops->put_fpu != NULL )                                 \
-        (ops->put_fpu)(ctxt);                                   \
+    generate_exception_if((_fic)->exn_raised >= 0,              \
+                          (_fic)->exn_raised);                  \
 } while (0)
-#define put_fpu(_fic)                                           \
+
+#define check_xmm(_fic)                                         \
 do {                                                            \
-    _put_fpu();                                                 \
     if ( (_fic)->exn_raised == EXC_XM && ops->read_cr &&        \
          ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&         \
          !(cr4 & X86_CR4_OSXMMEXCPT) )				\
         (_fic)->exn_raised = EXC_UD;                            \
-    generate_exception_if((_fic)->exn_raised >= 0,              \
-                          (_fic)->exn_raised);                  \
+    check_fpu(_fic);                                            \
 } while (0)
 
+static void put_fpu(
+    struct fpu_insn_ctxt *fic,
+    struct x86_emulate_ctxt *ctxt,
+    const struct x86_emulate_ops *ops)
+{
+    if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
+        ops->put_fpu(ctxt);
+}
+
 static inline bool fpu_check_write(void)
 {
     uint16_t fsw;
@@ -3015,7 +3025,7 @@ x86_emulate(
     struct operand dst = { .reg = PTR_POISON };
     unsigned long cr4;
     enum x86_swint_type swint_type;
-    struct fpu_insn_ctxt fic;
+    struct fpu_insn_ctxt fic = { .type = X86EMUL_FPU_none, .exn_raised = -1 };
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
 
@@ -3708,7 +3718,7 @@ x86_emulate(
         host_and_vcpu_must_have(fpu);
         get_fpu(X86EMUL_FPU_wait, &fic);
         asm volatile ( "fwait" ::: "memory" );
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0x9c: /* pushf */
@@ -4153,7 +4163,7 @@ x86_emulate(
                 break;
             }
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xd9: /* FPU 0xd9 */
@@ -4242,7 +4252,7 @@ x86_emulate(
             if ( dst.type == OP_MEM && dst.bytes == 4 && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xda: /* FPU 0xda */
@@ -4293,7 +4303,7 @@ x86_emulate(
                 break;
             }
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xdb: /* FPU 0xdb */
@@ -4365,7 +4375,7 @@ x86_emulate(
             if ( dst.type == OP_MEM && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xdc: /* FPU 0xdc */
@@ -4416,7 +4426,7 @@ x86_emulate(
                 break;
             }
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xdd: /* FPU 0xdd */
@@ -4475,7 +4485,7 @@ x86_emulate(
             if ( dst.type == OP_MEM && dst.bytes == 8 && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xde: /* FPU 0xde */
@@ -4523,7 +4533,7 @@ x86_emulate(
                 break;
             }
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xdf: /* FPU 0xdf */
@@ -4605,7 +4615,7 @@ x86_emulate(
             if ( dst.type == OP_MEM && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
@@ -5667,7 +5677,7 @@ x86_emulate(
                             : "c" (mmvalp), "m" (*mmvalp));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         state->simd_size = simd_none;
         break;
@@ -5719,7 +5729,7 @@ x86_emulate(
                       [mask] "i" (EFLAGS_MASK));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         ASSERT(!state->simd_size);
         break;
@@ -5903,7 +5913,7 @@ x86_emulate(
         invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         ASSERT(!state->simd_size);
         dst.bytes = 4;
@@ -6111,7 +6121,7 @@ x86_emulate(
         dst.val = src.val;
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         ASSERT(!state->simd_size);
         break;
@@ -6253,7 +6263,7 @@ x86_emulate(
         invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) );
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         ASSERT(!state->simd_size);
         break;
@@ -6323,8 +6333,6 @@ x86_emulate(
                     asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" );
                 }
 
-                put_fpu(&fic);
-
                 ASSERT(!state->simd_size);
                 break;
             }
@@ -7070,10 +7078,7 @@ x86_emulate(
 
         put_stub(stub);
         if ( !ea.val )
-        {
-            put_fpu(&fic);
             goto complete_insn;
-        }
 
         opc = init_prefixes(stub);
         opc[0] = b;
@@ -7221,7 +7226,7 @@ x86_emulate(
         emulate_stub("+m" (*mmvalp), "a" (mmvalp));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         state->simd_size = simd_none;
         dst.type = OP_NONE;
@@ -7525,7 +7530,7 @@ x86_emulate(
         invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         ASSERT(!state->simd_size);
         dst.bytes = dst.type == OP_REG || b == 0x17 ? 4 : 1 << (b & 3);
@@ -7841,7 +7846,7 @@ x86_emulate(
             invoke_stub("", "", "+m" (*mmvalp) : "D" (mmvalp));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
     }
 
     switch ( dst.type )
@@ -7883,6 +7888,8 @@ x86_emulate(
     }
 
  complete_insn: /* Commit shadow register state. */
+    put_fpu(&fic, ctxt, ops);
+
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
         _regs.r(ip) = _regs.eip;
@@ -7905,7 +7912,8 @@ x86_emulate(
     ctxt->regs->eflags &= ~X86_EFLAGS_RF;
 
  done:
-    _put_fpu();
+    if ( rc != X86EMUL_OKAY )
+        put_fpu(&fic, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -161,7 +161,9 @@ enum x86_emulate_fpu_type {
     X86EMUL_FPU_wait, /* WAIT/FWAIT instruction */
     X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */
     X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */
-    X86EMUL_FPU_ymm  /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
+    X86EMUL_FPU_ymm, /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
+    /* This sentinel will never be passed to ->get_fpu(). */
+    X86EMUL_FPU_none
 };
 
 struct cpuid_leaf



[-- Attachment #2: x86emul-move-put_fpu.patch --]
[-- Type: text/plain, Size: 8764 bytes --]

x86emul: centralize put_fpu() invocations

..., splitting parts of it into check_*() macros. This is in
preparation of making ->put_fpu() do further adjustments to register
state. (Some of the check_xmm() invocations could be avoided, as in
some of the cases no insns handled there can actually raise #XM, but I
think we're better off keeping them to avoid later additions of further
insn patterns rendering the lack of the check a bug.)

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -937,6 +937,7 @@ do {
 
 struct fpu_insn_ctxt {
     uint8_t insn_bytes;
+    uint8_t type;
     int8_t exn_raised;
 };
 
@@ -956,8 +957,6 @@ static int _get_fpu(
 {
     int rc;
 
-    fic->exn_raised = -1;
-
     fail_if(!ops->get_fpu);
     rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
 
@@ -965,6 +964,8 @@ static int _get_fpu(
     {
         unsigned long cr0;
 
+        fic->type = type;
+
         fail_if(!ops->read_cr);
         if ( type >= X86EMUL_FPU_xmm )
         {
@@ -1006,22 +1007,31 @@ do {
     rc = _get_fpu(_type, _fic, ctxt, ops);                      \
     if ( rc ) goto done;                                        \
 } while (0)
-#define _put_fpu()                                              \
+
+#define check_fpu(_fic)                                         \
 do {                                                            \
-    if ( ops->put_fpu != NULL )                                 \
-        (ops->put_fpu)(ctxt);                                   \
+    generate_exception_if((_fic)->exn_raised >= 0,              \
+                          (_fic)->exn_raised);                  \
 } while (0)
-#define put_fpu(_fic)                                           \
+
+#define check_xmm(_fic)                                         \
 do {                                                            \
-    _put_fpu();                                                 \
     if ( (_fic)->exn_raised == EXC_XM && ops->read_cr &&        \
          ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&         \
          !(cr4 & X86_CR4_OSXMMEXCPT) )				\
         (_fic)->exn_raised = EXC_UD;                            \
-    generate_exception_if((_fic)->exn_raised >= 0,              \
-                          (_fic)->exn_raised);                  \
+    check_fpu(_fic);                                            \
 } while (0)
 
+static void put_fpu(
+    struct fpu_insn_ctxt *fic,
+    struct x86_emulate_ctxt *ctxt,
+    const struct x86_emulate_ops *ops)
+{
+    if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
+        ops->put_fpu(ctxt);
+}
+
 static inline bool fpu_check_write(void)
 {
     uint16_t fsw;
@@ -3015,7 +3025,7 @@ x86_emulate(
     struct operand dst = { .reg = PTR_POISON };
     unsigned long cr4;
     enum x86_swint_type swint_type;
-    struct fpu_insn_ctxt fic;
+    struct fpu_insn_ctxt fic = { .type = X86EMUL_FPU_none, .exn_raised = -1 };
     struct x86_emulate_stub stub = {};
     DECLARE_ALIGNED(mmval_t, mmval);
 
@@ -3708,7 +3718,7 @@ x86_emulate(
         host_and_vcpu_must_have(fpu);
         get_fpu(X86EMUL_FPU_wait, &fic);
         asm volatile ( "fwait" ::: "memory" );
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0x9c: /* pushf */
@@ -4153,7 +4163,7 @@ x86_emulate(
                 break;
             }
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xd9: /* FPU 0xd9 */
@@ -4242,7 +4252,7 @@ x86_emulate(
             if ( dst.type == OP_MEM && dst.bytes == 4 && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xda: /* FPU 0xda */
@@ -4293,7 +4303,7 @@ x86_emulate(
                 break;
             }
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xdb: /* FPU 0xdb */
@@ -4365,7 +4375,7 @@ x86_emulate(
             if ( dst.type == OP_MEM && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xdc: /* FPU 0xdc */
@@ -4416,7 +4426,7 @@ x86_emulate(
                 break;
             }
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xdd: /* FPU 0xdd */
@@ -4475,7 +4485,7 @@ x86_emulate(
             if ( dst.type == OP_MEM && dst.bytes == 8 && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xde: /* FPU 0xde */
@@ -4523,7 +4533,7 @@ x86_emulate(
                 break;
             }
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xdf: /* FPU 0xdf */
@@ -4605,7 +4615,7 @@ x86_emulate(
             if ( dst.type == OP_MEM && !fpu_check_write() )
                 dst.type = OP_NONE;
         }
-        put_fpu(&fic);
+        check_fpu(&fic);
         break;
 
     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
@@ -5667,7 +5677,7 @@ x86_emulate(
                             : "c" (mmvalp), "m" (*mmvalp));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         state->simd_size = simd_none;
         break;
@@ -5719,7 +5729,7 @@ x86_emulate(
                       [mask] "i" (EFLAGS_MASK));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         ASSERT(!state->simd_size);
         break;
@@ -5903,7 +5913,7 @@ x86_emulate(
         invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         ASSERT(!state->simd_size);
         dst.bytes = 4;
@@ -6111,7 +6121,7 @@ x86_emulate(
         dst.val = src.val;
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         ASSERT(!state->simd_size);
         break;
@@ -6253,7 +6263,7 @@ x86_emulate(
         invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) );
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         ASSERT(!state->simd_size);
         break;
@@ -6323,8 +6333,6 @@ x86_emulate(
                     asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" );
                 }
 
-                put_fpu(&fic);
-
                 ASSERT(!state->simd_size);
                 break;
             }
@@ -7070,10 +7078,7 @@ x86_emulate(
 
         put_stub(stub);
         if ( !ea.val )
-        {
-            put_fpu(&fic);
             goto complete_insn;
-        }
 
         opc = init_prefixes(stub);
         opc[0] = b;
@@ -7221,7 +7226,7 @@ x86_emulate(
         emulate_stub("+m" (*mmvalp), "a" (mmvalp));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         state->simd_size = simd_none;
         dst.type = OP_NONE;
@@ -7525,7 +7530,7 @@ x86_emulate(
         invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
 
         ASSERT(!state->simd_size);
         dst.bytes = dst.type == OP_REG || b == 0x17 ? 4 : 1 << (b & 3);
@@ -7841,7 +7846,7 @@ x86_emulate(
             invoke_stub("", "", "+m" (*mmvalp) : "D" (mmvalp));
 
         put_stub(stub);
-        put_fpu(&fic);
+        check_xmm(&fic);
     }
 
     switch ( dst.type )
@@ -7883,6 +7888,8 @@ x86_emulate(
     }
 
  complete_insn: /* Commit shadow register state. */
+    put_fpu(&fic, ctxt, ops);
+
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
         _regs.r(ip) = _regs.eip;
@@ -7905,7 +7912,8 @@ x86_emulate(
     ctxt->regs->eflags &= ~X86_EFLAGS_RF;
 
  done:
-    _put_fpu();
+    if ( rc != X86EMUL_OKAY )
+        put_fpu(&fic, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -161,7 +161,9 @@ enum x86_emulate_fpu_type {
     X86EMUL_FPU_wait, /* WAIT/FWAIT instruction */
     X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */
     X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */
-    X86EMUL_FPU_ymm  /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
+    X86EMUL_FPU_ymm, /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
+    /* This sentinel will never be passed to ->get_fpu(). */
+    X86EMUL_FPU_none
 };
 
 struct cpuid_leaf

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 3/4] x86emul: correct handling of FPU insns faulting on memory write
  2017-03-13 10:56 [PATCH 0/4] x86emul: FPU handling corrections Jan Beulich
  2017-03-13 11:03 ` [PATCH 1/4] x86emul: fold exit paths Jan Beulich
  2017-03-13 11:03 ` [PATCH 2/4] x86emul: centralize put_fpu() invocations Jan Beulich
@ 2017-03-13 11:05 ` Jan Beulich
  2017-03-13 14:03   ` Andrew Cooper
  2017-03-14  9:15   ` Tian, Kevin
  2017-03-13 11:05 ` [PATCH 4/4] x86emul: correct FPU code/data pointers and opcode handling Jan Beulich
  2017-03-13 11:07 ` [PATCH][XTF] add FPU/SIMD register state test Jan Beulich
  4 siblings, 2 replies; 16+ messages in thread
From: Jan Beulich @ 2017-03-13 11:05 UTC (permalink / raw)
  To: xen-devel
  Cc: Kevin Tian, Suravee Suthikulpanit, Andrew Cooper, Paul Durrant,
	Jun Nakajima, Boris Ostrovsky

[-- Attachment #1: Type: text/plain, Size: 8714 bytes --]

When an FPU instruction with a memory destination fails during the
memory write, it should not affect FPU register state. Due to the way
we emulate FPU (and SIMD) instructions, we can only guarantee this by
- backing out changes to the FPU register state in such a case or
- doing a descriptor read and/or page walk up front, perhaps with the
  stubs accessing the actual memory location then.
The latter would require a significant change in how the emulator does
its guest memory accessing, so for now the former variant is being
chosen.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Note that the state save overhead (unless state hadn't been loaded at
all before, which should only be possible if a guest is fiddling with
the instruction stream under emulation) is taken for every FPU insn
hitting the emulator. We could reduce this to just the ones writing to
memory, but that would involve quite a few further changes and
resulting code where even more code paths need to match up with one
another.

--- a/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
+++ b/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
@@ -433,6 +433,7 @@ static struct x86_emulate_ops fuzz_emulo
     SET(wbinvd),
     SET(invlpg),
     .get_fpu    = emul_test_get_fpu,
+    .put_fpu    = emul_test_put_fpu,
     .cpuid      = emul_test_cpuid,
 };
 #undef SET
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -293,6 +293,7 @@ static struct x86_emulate_ops emulops =
     .read_cr    = emul_test_read_cr,
     .read_msr   = read_msr,
     .get_fpu    = emul_test_get_fpu,
+    .put_fpu    = emul_test_put_fpu,
 };
 
 int main(int argc, char **argv)
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -138,4 +138,11 @@ int emul_test_get_fpu(
     return X86EMUL_OKAY;
 }
 
+void emul_test_put_fpu(
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout)
+{
+    /* TBD */
+}
+
 #include "x86_emulate/x86_emulate.c"
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -178,3 +178,7 @@ int emul_test_get_fpu(
     void *exception_callback_arg,
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt);
+
+void emul_test_put_fpu(
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout);
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -15,6 +15,7 @@
 #include <xen/paging.h>
 #include <xen/trace.h>
 #include <asm/event.h>
+#include <asm/i387.h>
 #include <asm/xstate.h>
 #include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
@@ -1619,6 +1620,35 @@ static int hvmemul_get_fpu(
 
     if ( !curr->fpu_dirtied )
         hvm_funcs.fpu_dirty_intercept();
+    else if ( type == X86EMUL_FPU_fpu )
+    {
+        const typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt =
+            curr->arch.fpu_ctxt;
+
+        /*
+         * Latch current register state so that we can back out changes
+         * if needed (namely when a memory write fails after register state
+         * has already been updated).
+         * NB: We don't really need the "enable" part of the called function
+         * (->fpu_dirtied set implies CR0.TS clear), but the additional
+         * overhead should be low enough to not warrant introduction of yet
+         * another slightly different function. However, we need to undo the
+         * ->fpu_dirtied clearing the function does as well as the possible
+         * masking of all exceptions by FNSTENV.)
+         */
+        save_fpu_enable();
+        curr->fpu_dirtied = true;
+        if ( (fpu_ctxt->fcw & 0x3f) != 0x3f )
+        {
+            uint16_t fcw;
+
+            asm ( "fnstcw %0" : "=m" (fcw) );
+            if ( (fcw & 0x3f) == 0x3f )
+                asm ( "fldcw %0" :: "m" (fpu_ctxt->fcw) );
+            else
+                ASSERT(fcw == fpu_ctxt->fcw);
+        }
+    }
 
     curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
     curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
@@ -1627,10 +1657,24 @@ static int hvmemul_get_fpu(
 }
 
 static void hvmemul_put_fpu(
-    struct x86_emulate_ctxt *ctxt)
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout)
 {
     struct vcpu *curr = current;
+
     curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
+
+    if ( backout == X86EMUL_FPU_fpu )
+    {
+        /*
+         * To back out changes to the register file simply adjust state such
+         * that upon next FPU insn use by the guest we'll reload the state
+         * saved (or freshly loaded) by hvmemul_get_fpu().
+         */
+        curr->fpu_dirtied = false;
+        stts();
+        hvm_funcs.fpu_leave(curr);
+    }
 }
 
 static int hvmemul_invlpg(
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2268,6 +2268,7 @@ static struct hvm_function_table __initd
     .update_guest_cr      = svm_update_guest_cr,
     .update_guest_efer    = svm_update_guest_efer,
     .update_guest_vendor  = svm_update_guest_vendor,
+    .fpu_leave            = svm_fpu_leave,
     .set_guest_pat        = svm_set_guest_pat,
     .get_guest_pat        = svm_get_guest_pat,
     .set_tsc_offset       = svm_set_tsc_offset,
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2128,6 +2128,7 @@ static struct hvm_function_table __initd
     .update_guest_cr      = vmx_update_guest_cr,
     .update_guest_efer    = vmx_update_guest_efer,
     .update_guest_vendor  = vmx_update_guest_vendor,
+    .fpu_leave            = vmx_fpu_leave,
     .set_guest_pat        = vmx_set_guest_pat,
     .get_guest_pat        = vmx_get_guest_pat,
     .set_tsc_offset       = vmx_set_tsc_offset,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -964,6 +964,7 @@ static int _get_fpu(
     {
         unsigned long cr0;
 
+        fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
         fic->type = type;
 
         fail_if(!ops->read_cr);
@@ -1025,11 +1026,14 @@ do {
 
 static void put_fpu(
     struct fpu_insn_ctxt *fic,
+    bool failed_late,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
-    if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
-        ops->put_fpu(ctxt);
+    if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
+        ops->put_fpu(ctxt, X86EMUL_FPU_fpu);
+    else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
+        ops->put_fpu(ctxt, X86EMUL_FPU_none);
 }
 
 static inline bool fpu_check_write(void)
@@ -3714,9 +3718,9 @@ x86_emulate(
         break;
 
     case 0x9b:  /* wait/fwait */
-        fic.insn_bytes = 1;
         host_and_vcpu_must_have(fpu);
         get_fpu(X86EMUL_FPU_wait, &fic);
+        fic.insn_bytes = 1;
         asm volatile ( "fwait" ::: "memory" );
         check_fpu(&fic);
         break;
@@ -7888,7 +7892,7 @@ x86_emulate(
     }
 
  complete_insn: /* Commit shadow register state. */
-    put_fpu(&fic, ctxt, ops);
+    put_fpu(&fic, false, ctxt, ops);
 
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
@@ -7913,7 +7917,7 @@ x86_emulate(
 
  done:
     if ( rc != X86EMUL_OKAY )
-        put_fpu(&fic, ctxt, ops);
+        put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -437,9 +437,12 @@ struct x86_emulate_ops
      * put_fpu: Relinquish the FPU. Unhook from FPU/SIMD exception handlers.
      *  The handler, if installed, must be prepared to get called without
      *  the get_fpu one having got called before!
+     * @backout: Undo updates to the specified register file (can, besides
+     *           X86EMUL_FPU_none, only be X86EMUL_FPU_fpu at present);
      */
     void (*put_fpu)(
-        struct x86_emulate_ctxt *ctxt);
+        struct x86_emulate_ctxt *ctxt,
+        enum x86_emulate_fpu_type backout);
 
     /* invlpg: Invalidate paging structures which map addressed byte. */
     int (*invlpg)(
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -140,6 +140,8 @@ struct hvm_function_table {
 
     void (*update_guest_vendor)(struct vcpu *v);
 
+    void (*fpu_leave)(struct vcpu *v);
+
     int  (*get_guest_pat)(struct vcpu *v, u64 *);
     int  (*set_guest_pat)(struct vcpu *v, u64);
 



[-- Attachment #2: x86emul-FPU-backout.patch --]
[-- Type: text/plain, Size: 8777 bytes --]

x86emul: correct handling of FPU insns faulting on memory write

When an FPU instruction with a memory destination fails during the
memory write, it should not affect FPU register state. Due to the way
we emulate FPU (and SIMD) instructions, we can only guarantee this by
- backing out changes to the FPU register state in such a case or
- doing a descriptor read and/or page walk up front, perhaps with the
  stubs accessing the actual memory location then.
The latter would require a significant change in how the emulator does
its guest memory accessing, so for now the former variant is being
chosen.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Note that the state save overhead (unless state hadn't been loaded at
all before, which should only be possible if a guest is fiddling with
the instruction stream under emulation) is taken for every FPU insn
hitting the emulator. We could reduce this to just the ones writing to
memory, but that would involve quite a few further changes and
resulting code where even more code paths need to match up with one
another.

--- a/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
+++ b/tools/fuzz/x86_instruction_emulator/x86-insn-emulator-fuzzer.c
@@ -433,6 +433,7 @@ static struct x86_emulate_ops fuzz_emulo
     SET(wbinvd),
     SET(invlpg),
     .get_fpu    = emul_test_get_fpu,
+    .put_fpu    = emul_test_put_fpu,
     .cpuid      = emul_test_cpuid,
 };
 #undef SET
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -293,6 +293,7 @@ static struct x86_emulate_ops emulops =
     .read_cr    = emul_test_read_cr,
     .read_msr   = read_msr,
     .get_fpu    = emul_test_get_fpu,
+    .put_fpu    = emul_test_put_fpu,
 };
 
 int main(int argc, char **argv)
--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -138,4 +138,11 @@ int emul_test_get_fpu(
     return X86EMUL_OKAY;
 }
 
+void emul_test_put_fpu(
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout)
+{
+    /* TBD */
+}
+
 #include "x86_emulate/x86_emulate.c"
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -178,3 +178,7 @@ int emul_test_get_fpu(
     void *exception_callback_arg,
     enum x86_emulate_fpu_type type,
     struct x86_emulate_ctxt *ctxt);
+
+void emul_test_put_fpu(
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout);
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -15,6 +15,7 @@
 #include <xen/paging.h>
 #include <xen/trace.h>
 #include <asm/event.h>
+#include <asm/i387.h>
 #include <asm/xstate.h>
 #include <asm/hvm/emulate.h>
 #include <asm/hvm/hvm.h>
@@ -1619,6 +1620,35 @@ static int hvmemul_get_fpu(
 
     if ( !curr->fpu_dirtied )
         hvm_funcs.fpu_dirty_intercept();
+    else if ( type == X86EMUL_FPU_fpu )
+    {
+        const typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt =
+            curr->arch.fpu_ctxt;
+
+        /*
+         * Latch current register state so that we can back out changes
+         * if needed (namely when a memory write fails after register state
+         * has already been updated).
+         * NB: We don't really need the "enable" part of the called function
+         * (->fpu_dirtied set implies CR0.TS clear), but the additional
+         * overhead should be low enough to not warrant introduction of yet
+         * another slightly different function. However, we need to undo the
+         * ->fpu_dirtied clearing the function does as well as the possible
+         * masking of all exceptions by FNSTENV.)
+         */
+        save_fpu_enable();
+        curr->fpu_dirtied = true;
+        if ( (fpu_ctxt->fcw & 0x3f) != 0x3f )
+        {
+            uint16_t fcw;
+
+            asm ( "fnstcw %0" : "=m" (fcw) );
+            if ( (fcw & 0x3f) == 0x3f )
+                asm ( "fldcw %0" :: "m" (fpu_ctxt->fcw) );
+            else
+                ASSERT(fcw == fpu_ctxt->fcw);
+        }
+    }
 
     curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
     curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
@@ -1627,10 +1657,24 @@ static int hvmemul_get_fpu(
 }
 
 static void hvmemul_put_fpu(
-    struct x86_emulate_ctxt *ctxt)
+    struct x86_emulate_ctxt *ctxt,
+    enum x86_emulate_fpu_type backout)
 {
     struct vcpu *curr = current;
+
     curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
+
+    if ( backout == X86EMUL_FPU_fpu )
+    {
+        /*
+         * To back out changes to the register file simply adjust state such
+         * that upon next FPU insn use by the guest we'll reload the state
+         * saved (or freshly loaded) by hvmemul_get_fpu().
+         */
+        curr->fpu_dirtied = false;
+        stts();
+        hvm_funcs.fpu_leave(curr);
+    }
 }
 
 static int hvmemul_invlpg(
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2268,6 +2268,7 @@ static struct hvm_function_table __initd
     .update_guest_cr      = svm_update_guest_cr,
     .update_guest_efer    = svm_update_guest_efer,
     .update_guest_vendor  = svm_update_guest_vendor,
+    .fpu_leave            = svm_fpu_leave,
     .set_guest_pat        = svm_set_guest_pat,
     .get_guest_pat        = svm_get_guest_pat,
     .set_tsc_offset       = svm_set_tsc_offset,
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2128,6 +2128,7 @@ static struct hvm_function_table __initd
     .update_guest_cr      = vmx_update_guest_cr,
     .update_guest_efer    = vmx_update_guest_efer,
     .update_guest_vendor  = vmx_update_guest_vendor,
+    .fpu_leave            = vmx_fpu_leave,
     .set_guest_pat        = vmx_set_guest_pat,
     .get_guest_pat        = vmx_get_guest_pat,
     .set_tsc_offset       = vmx_set_tsc_offset,
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -964,6 +964,7 @@ static int _get_fpu(
     {
         unsigned long cr0;
 
+        fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
         fic->type = type;
 
         fail_if(!ops->read_cr);
@@ -1025,11 +1026,14 @@ do {
 
 static void put_fpu(
     struct fpu_insn_ctxt *fic,
+    bool failed_late,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
-    if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
-        ops->put_fpu(ctxt);
+    if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
+        ops->put_fpu(ctxt, X86EMUL_FPU_fpu);
+    else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
+        ops->put_fpu(ctxt, X86EMUL_FPU_none);
 }
 
 static inline bool fpu_check_write(void)
@@ -3714,9 +3718,9 @@ x86_emulate(
         break;
 
     case 0x9b:  /* wait/fwait */
-        fic.insn_bytes = 1;
         host_and_vcpu_must_have(fpu);
         get_fpu(X86EMUL_FPU_wait, &fic);
+        fic.insn_bytes = 1;
         asm volatile ( "fwait" ::: "memory" );
         check_fpu(&fic);
         break;
@@ -7888,7 +7892,7 @@ x86_emulate(
     }
 
  complete_insn: /* Commit shadow register state. */
-    put_fpu(&fic, ctxt, ops);
+    put_fpu(&fic, false, ctxt, ops);
 
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
@@ -7913,7 +7917,7 @@ x86_emulate(
 
  done:
     if ( rc != X86EMUL_OKAY )
-        put_fpu(&fic, ctxt, ops);
+        put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -437,9 +437,12 @@ struct x86_emulate_ops
      * put_fpu: Relinquish the FPU. Unhook from FPU/SIMD exception handlers.
      *  The handler, if installed, must be prepared to get called without
      *  the get_fpu one having got called before!
+     * @backout: Undo updates to the specified register file (can, besides
+     *           X86EMUL_FPU_none, only be X86EMUL_FPU_fpu at present);
      */
     void (*put_fpu)(
-        struct x86_emulate_ctxt *ctxt);
+        struct x86_emulate_ctxt *ctxt,
+        enum x86_emulate_fpu_type backout);
 
     /* invlpg: Invalidate paging structures which map addressed byte. */
     int (*invlpg)(
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -140,6 +140,8 @@ struct hvm_function_table {
 
     void (*update_guest_vendor)(struct vcpu *v);
 
+    void (*fpu_leave)(struct vcpu *v);
+
     int  (*get_guest_pat)(struct vcpu *v, u64 *);
     int  (*set_guest_pat)(struct vcpu *v, u64);
 

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 4/4] x86emul: correct FPU code/data pointers and opcode handling
  2017-03-13 10:56 [PATCH 0/4] x86emul: FPU handling corrections Jan Beulich
                   ` (2 preceding siblings ...)
  2017-03-13 11:05 ` [PATCH 3/4] x86emul: correct handling of FPU insns faulting on memory write Jan Beulich
@ 2017-03-13 11:05 ` Jan Beulich
  2017-03-14  9:21   ` Paul Durrant
  2017-03-14 10:56   ` Andrew Cooper
  2017-03-13 11:07 ` [PATCH][XTF] add FPU/SIMD register state test Jan Beulich
  4 siblings, 2 replies; 16+ messages in thread
From: Jan Beulich @ 2017-03-13 11:05 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Paul Durrant

[-- Attachment #1: Type: text/plain, Size: 12265 bytes --]

Prevent leaking the hypervisor ones (stored by hardware during stub
execution), at once making sure the guest sees correct values there.
This piggybacks on the backout logic used to deal with write faults of
FPU insns.

Deliberately ignore the NO_FPU_SEL feature here: Honoring it would
merely mean extra code with no benefit (once we XRSTOR state, the
selector values will simply be lost anyway).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -140,7 +140,8 @@ int emul_test_get_fpu(
 
 void emul_test_put_fpu(
     struct x86_emulate_ctxt *ctxt,
-    enum x86_emulate_fpu_type backout)
+    enum x86_emulate_fpu_type backout,
+    const struct x86_emul_fpu_aux *aux)
 {
     /* TBD */
 }
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -181,4 +181,5 @@ int emul_test_get_fpu(
 
 void emul_test_put_fpu(
     struct x86_emulate_ctxt *ctxt,
-    enum x86_emulate_fpu_type backout);
+    enum x86_emulate_fpu_type backout,
+    const struct x86_emul_fpu_aux *aux);
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1658,12 +1658,73 @@ static int hvmemul_get_fpu(
 
 static void hvmemul_put_fpu(
     struct x86_emulate_ctxt *ctxt,
-    enum x86_emulate_fpu_type backout)
+    enum x86_emulate_fpu_type backout,
+    const struct x86_emul_fpu_aux *aux)
 {
     struct vcpu *curr = current;
 
     curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
 
+    if ( aux )
+    {
+        typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = curr->arch.fpu_ctxt;
+        bool dval = aux->dval;
+        int mode = hvm_guest_x86_mode(curr);
+
+        ASSERT(backout == X86EMUL_FPU_none);
+        /*
+         * Latch current register state so that we can replace FIP/FDP/FOP
+         * (which have values resulting from our own invocation of the FPU
+         * instruction during emulation).
+         * NB: See also the comment in hvmemul_get_fpu(); we don't need to
+         * set ->fpu_dirtied here as it is going to be cleared below, and
+         * we also don't need to reload FCW as we're forcing full state to
+         * be reloaded anyway.
+         */
+        save_fpu_enable();
+
+        if ( boot_cpu_has(X86_FEATURE_FDP_EXCP_ONLY) &&
+             !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) )
+            dval = false;
+
+        switch ( mode )
+        {
+        case 8:
+            fpu_ctxt->fip.addr = aux->ip;
+            if ( dval )
+                fpu_ctxt->fdp.addr = aux->dp;
+            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 8;
+            break;
+
+        case 4: case 2:
+            fpu_ctxt->fip.offs = aux->ip;
+            fpu_ctxt->fip.sel  = aux->cs;
+            if ( dval )
+            {
+                fpu_ctxt->fdp.offs = aux->dp;
+                fpu_ctxt->fdp.sel  = aux->ds;
+            }
+            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = mode;
+            break;
+
+        case 0: case 1:
+            fpu_ctxt->fip.addr = aux->ip | (aux->cs << 4);
+            if ( dval )
+                fpu_ctxt->fdp.addr = aux->dp | (aux->ds << 4);
+            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 2;
+            break;
+
+        default:
+            ASSERT_UNREACHABLE();
+            return;
+        }
+
+        fpu_ctxt->fop = aux->op;
+
+        /* Re-use backout code below. */
+        backout = X86EMUL_FPU_fpu;
+    }
+
     if ( backout == X86EMUL_FPU_fpu )
     {
         /*
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -536,6 +536,55 @@ struct operand {
         unsigned long    off;
     } mem;
 };
+
+struct x86_emulate_state {
+    unsigned int op_bytes, ad_bytes;
+
+    enum {
+        ext_none = vex_none,
+        ext_0f   = vex_0f,
+        ext_0f38 = vex_0f38,
+        ext_0f3a = vex_0f3a,
+        /*
+         * For XOP use values such that the respective instruction field
+         * can be used without adjustment.
+         */
+        ext_8f08 = 8,
+        ext_8f09,
+        ext_8f0a,
+    } ext;
+    uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
+    uint8_t rex_prefix;
+    bool lock_prefix;
+    bool not_64bit; /* Instruction not available in 64bit. */
+    bool fpu_ctrl;  /* Instruction is an FPU control one. */
+    opcode_desc_t desc;
+    union vex vex;
+    union evex evex;
+    enum simd_opsize simd_size;
+
+    /*
+     * Data operand effective address (usually computed from ModRM).
+     * Default is a memory operand relative to segment DS.
+     */
+    struct operand ea;
+
+    /* Immediate operand values, if any. Use otherwise unused fields. */
+#define imm1 ea.val
+#define imm2 ea.orig_val
+
+    unsigned long ip;
+    struct cpu_user_regs *regs;
+
+#ifndef NDEBUG
+    /*
+     * Track caller of x86_decode_insn() to spot missing as well as
+     * premature calls to x86_emulate_free_state().
+     */
+    void *caller;
+#endif
+};
+
 #ifdef __x86_64__
 #define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
 #else
@@ -1027,13 +1076,48 @@ do {
 static void put_fpu(
     struct fpu_insn_ctxt *fic,
     bool failed_late,
+    const struct x86_emulate_state *state,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
     if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
-        ops->put_fpu(ctxt, X86EMUL_FPU_fpu);
+        ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
+    else if ( unlikely(fic->type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
+    {
+        struct x86_emul_fpu_aux aux = {
+            .ip = ctxt->regs->r(ip),
+            .cs = ctxt->regs->cs,
+            .op = ((ctxt->opcode & 7) << 8) | state->modrm,
+        };
+        struct segment_register sreg;
+
+        if ( ops->read_segment &&
+             ops->read_segment(x86_seg_cs, &sreg, ctxt) == X86EMUL_OKAY )
+            aux.cs = sreg.sel;
+        if ( state->ea.type == OP_MEM )
+        {
+            aux.dp = state->ea.mem.off;
+            if ( ops->read_segment &&
+                 ops->read_segment(state->ea.mem.seg, &sreg,
+                                   ctxt) == X86EMUL_OKAY )
+                aux.ds = sreg.sel;
+            else
+                switch ( state->ea.mem.seg )
+                {
+                case x86_seg_cs: aux.ds = ctxt->regs->cs; break;
+                case x86_seg_ds: aux.ds = ctxt->regs->ds; break;
+                case x86_seg_es: aux.ds = ctxt->regs->es; break;
+                case x86_seg_fs: aux.ds = ctxt->regs->fs; break;
+                case x86_seg_gs: aux.ds = ctxt->regs->gs; break;
+                case x86_seg_ss: aux.ds = ctxt->regs->ss; break;
+                default:         ASSERT_UNREACHABLE();    break;
+                }
+            aux.dval = true;
+        }
+        ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux);
+    }
     else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
-        ops->put_fpu(ctxt, X86EMUL_FPU_none);
+        ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
 }
 
 static inline bool fpu_check_write(void)
@@ -2086,53 +2170,6 @@ int x86emul_unhandleable_rw(
     return X86EMUL_UNHANDLEABLE;
 }
 
-struct x86_emulate_state {
-    unsigned int op_bytes, ad_bytes;
-
-    enum {
-        ext_none = vex_none,
-        ext_0f   = vex_0f,
-        ext_0f38 = vex_0f38,
-        ext_0f3a = vex_0f3a,
-        /*
-         * For XOP use values such that the respective instruction field
-         * can be used without adjustment.
-         */
-        ext_8f08 = 8,
-        ext_8f09,
-        ext_8f0a,
-    } ext;
-    uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
-    uint8_t rex_prefix;
-    bool lock_prefix;
-    bool not_64bit; /* Instruction not available in 64bit. */
-    opcode_desc_t desc;
-    union vex vex;
-    union evex evex;
-    enum simd_opsize simd_size;
-
-    /*
-     * Data operand effective address (usually computed from ModRM).
-     * Default is a memory operand relative to segment DS.
-     */
-    struct operand ea;
-
-    /* Immediate operand values, if any. Use otherwise unused fields. */
-#define imm1 ea.val
-#define imm2 ea.orig_val
-
-    unsigned long ip;
-    struct cpu_user_regs *regs;
-
-#ifndef NDEBUG
-    /*
-     * Track caller of x86_decode_insn() to spot missing as well as
-     * premature calls to x86_emulate_free_state().
-     */
-    void *caller;
-#endif
-};
-
 /* Helper definitions. */
 #define op_bytes (state->op_bytes)
 #define ad_bytes (state->ad_bytes)
@@ -4231,8 +4268,10 @@ x86_emulate(
                 dst.bytes = 4;
                 break;
             case 4: /* fldenv - TODO */
+                state->fpu_ctrl = true;
                 goto cannot_emulate;
             case 5: /* fldcw m2byte */
+                state->fpu_ctrl = true;
                 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
                                      2, ctxt)) != X86EMUL_OKAY )
                     goto done;
@@ -4240,8 +4279,10 @@ x86_emulate(
                 dst.type = OP_NONE;
                 break;
             case 6: /* fnstenv - TODO */
+                state->fpu_ctrl = true;
                 goto cannot_emulate;
             case 7: /* fnstcw m2byte */
+                state->fpu_ctrl = true;
                 emulate_fpu_insn_memdst("fnstcw", dst.val);
                 dst.bytes = 2;
                 break;
@@ -4330,6 +4371,7 @@ x86_emulate(
         case 0xe3: /* fninit */
         case 0xe4: /* fnsetpm - 287 only, ignored by 387 */
         /* case 0xe5: frstpm - 287 only, #UD on 387 */
+            state->fpu_ctrl = true;
             emulate_fpu_insn_stub(0xdb, modrm);
             break;
         default:
@@ -4473,8 +4515,10 @@ x86_emulate(
                 break;
             case 4: /* frstor - TODO */
             case 6: /* fnsave - TODO */
+                state->fpu_ctrl = true;
                 goto cannot_emulate;
             case 7: /* fnstsw m2byte */
+                state->fpu_ctrl = true;
                 emulate_fpu_insn_memdst("fnstsw", dst.val);
                 dst.bytes = 2;
                 break;
@@ -4547,6 +4591,7 @@ x86_emulate(
         {
         case 0xe0:
             /* fnstsw %ax */
+            state->fpu_ctrl = true;
             dst.bytes = 2;
             dst.type = OP_REG;
             dst.reg = (void *)&_regs.ax;
@@ -7892,7 +7937,7 @@ x86_emulate(
     }
 
  complete_insn: /* Commit shadow register state. */
-    put_fpu(&fic, false, ctxt, ops);
+    put_fpu(&fic, false, state, ctxt, ops);
 
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
@@ -7917,7 +7962,8 @@ x86_emulate(
 
  done:
     if ( rc != X86EMUL_OKAY )
-        put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
+        put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM,
+                state, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -135,6 +135,13 @@ struct __attribute__((__packed__)) segme
     uint64_t   base;
 };
 
+struct x86_emul_fpu_aux {
+    unsigned long ip, dp;
+    uint16_t cs, ds;
+    unsigned int op:11;
+    unsigned int dval:1;
+};
+
 /*
  * Return codes from state-accessor functions and from x86_emulate().
  */
@@ -439,10 +446,12 @@ struct x86_emulate_ops
      *  the get_fpu one having got called before!
      * @backout: Undo updates to the specified register file (can, besides
      *           X86EMUL_FPU_none, only be X86EMUL_FPU_fpu at present);
+     * @aux: Packaged up FIP/FDP/FOP values to load into FPU.
      */
     void (*put_fpu)(
         struct x86_emulate_ctxt *ctxt,
-        enum x86_emulate_fpu_type backout);
+        enum x86_emulate_fpu_type backout,
+        const struct x86_emul_fpu_aux *aux);
 
     /* invlpg: Invalidate paging structures which map addressed byte. */
     int (*invlpg)(



[-- Attachment #2: x86emul-FPU-FxP.patch --]
[-- Type: text/plain, Size: 12324 bytes --]

x86emul: correct FPU code/data pointers and opcode handling

Prevent leaking the hypervisor ones (stored by hardware during stub
execution), at once making sure the guest sees correct values there.
This piggybacks on the backout logic used to deal with write faults of
FPU insns.

Deliberately ignore the NO_FPU_SEL feature here: Honoring it would
merely mean extra code with no benefit (once we XRSTOR state, the
selector values will simply be lost anyway).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -140,7 +140,8 @@ int emul_test_get_fpu(
 
 void emul_test_put_fpu(
     struct x86_emulate_ctxt *ctxt,
-    enum x86_emulate_fpu_type backout)
+    enum x86_emulate_fpu_type backout,
+    const struct x86_emul_fpu_aux *aux)
 {
     /* TBD */
 }
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -181,4 +181,5 @@ int emul_test_get_fpu(
 
 void emul_test_put_fpu(
     struct x86_emulate_ctxt *ctxt,
-    enum x86_emulate_fpu_type backout);
+    enum x86_emulate_fpu_type backout,
+    const struct x86_emul_fpu_aux *aux);
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1658,12 +1658,73 @@ static int hvmemul_get_fpu(
 
 static void hvmemul_put_fpu(
     struct x86_emulate_ctxt *ctxt,
-    enum x86_emulate_fpu_type backout)
+    enum x86_emulate_fpu_type backout,
+    const struct x86_emul_fpu_aux *aux)
 {
     struct vcpu *curr = current;
 
     curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
 
+    if ( aux )
+    {
+        typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = curr->arch.fpu_ctxt;
+        bool dval = aux->dval;
+        int mode = hvm_guest_x86_mode(curr);
+
+        ASSERT(backout == X86EMUL_FPU_none);
+        /*
+         * Latch current register state so that we can replace FIP/FDP/FOP
+         * (which have values resulting from our own invocation of the FPU
+         * instruction during emulation).
+         * NB: See also the comment in hvmemul_get_fpu(); we don't need to
+         * set ->fpu_dirtied here as it is going to be cleared below, and
+         * we also don't need to reload FCW as we're forcing full state to
+         * be reloaded anyway.
+         */
+        save_fpu_enable();
+
+        if ( boot_cpu_has(X86_FEATURE_FDP_EXCP_ONLY) &&
+             !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) )
+            dval = false;
+
+        switch ( mode )
+        {
+        case 8:
+            fpu_ctxt->fip.addr = aux->ip;
+            if ( dval )
+                fpu_ctxt->fdp.addr = aux->dp;
+            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 8;
+            break;
+
+        case 4: case 2:
+            fpu_ctxt->fip.offs = aux->ip;
+            fpu_ctxt->fip.sel  = aux->cs;
+            if ( dval )
+            {
+                fpu_ctxt->fdp.offs = aux->dp;
+                fpu_ctxt->fdp.sel  = aux->ds;
+            }
+            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = mode;
+            break;
+
+        case 0: case 1:
+            fpu_ctxt->fip.addr = aux->ip | (aux->cs << 4);
+            if ( dval )
+                fpu_ctxt->fdp.addr = aux->dp | (aux->ds << 4);
+            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 2;
+            break;
+
+        default:
+            ASSERT_UNREACHABLE();
+            return;
+        }
+
+        fpu_ctxt->fop = aux->op;
+
+        /* Re-use backout code below. */
+        backout = X86EMUL_FPU_fpu;
+    }
+
     if ( backout == X86EMUL_FPU_fpu )
     {
         /*
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -536,6 +536,55 @@ struct operand {
         unsigned long    off;
     } mem;
 };
+
+struct x86_emulate_state {
+    unsigned int op_bytes, ad_bytes;
+
+    enum {
+        ext_none = vex_none,
+        ext_0f   = vex_0f,
+        ext_0f38 = vex_0f38,
+        ext_0f3a = vex_0f3a,
+        /*
+         * For XOP use values such that the respective instruction field
+         * can be used without adjustment.
+         */
+        ext_8f08 = 8,
+        ext_8f09,
+        ext_8f0a,
+    } ext;
+    uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
+    uint8_t rex_prefix;
+    bool lock_prefix;
+    bool not_64bit; /* Instruction not available in 64bit. */
+    bool fpu_ctrl;  /* Instruction is an FPU control one. */
+    opcode_desc_t desc;
+    union vex vex;
+    union evex evex;
+    enum simd_opsize simd_size;
+
+    /*
+     * Data operand effective address (usually computed from ModRM).
+     * Default is a memory operand relative to segment DS.
+     */
+    struct operand ea;
+
+    /* Immediate operand values, if any. Use otherwise unused fields. */
+#define imm1 ea.val
+#define imm2 ea.orig_val
+
+    unsigned long ip;
+    struct cpu_user_regs *regs;
+
+#ifndef NDEBUG
+    /*
+     * Track caller of x86_decode_insn() to spot missing as well as
+     * premature calls to x86_emulate_free_state().
+     */
+    void *caller;
+#endif
+};
+
 #ifdef __x86_64__
 #define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
 #else
@@ -1027,13 +1076,48 @@ do {
 static void put_fpu(
     struct fpu_insn_ctxt *fic,
     bool failed_late,
+    const struct x86_emulate_state *state,
     struct x86_emulate_ctxt *ctxt,
     const struct x86_emulate_ops *ops)
 {
     if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
-        ops->put_fpu(ctxt, X86EMUL_FPU_fpu);
+        ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
+    else if ( unlikely(fic->type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
+    {
+        struct x86_emul_fpu_aux aux = {
+            .ip = ctxt->regs->r(ip),
+            .cs = ctxt->regs->cs,
+            .op = ((ctxt->opcode & 7) << 8) | state->modrm,
+        };
+        struct segment_register sreg;
+
+        if ( ops->read_segment &&
+             ops->read_segment(x86_seg_cs, &sreg, ctxt) == X86EMUL_OKAY )
+            aux.cs = sreg.sel;
+        if ( state->ea.type == OP_MEM )
+        {
+            aux.dp = state->ea.mem.off;
+            if ( ops->read_segment &&
+                 ops->read_segment(state->ea.mem.seg, &sreg,
+                                   ctxt) == X86EMUL_OKAY )
+                aux.ds = sreg.sel;
+            else
+                switch ( state->ea.mem.seg )
+                {
+                case x86_seg_cs: aux.ds = ctxt->regs->cs; break;
+                case x86_seg_ds: aux.ds = ctxt->regs->ds; break;
+                case x86_seg_es: aux.ds = ctxt->regs->es; break;
+                case x86_seg_fs: aux.ds = ctxt->regs->fs; break;
+                case x86_seg_gs: aux.ds = ctxt->regs->gs; break;
+                case x86_seg_ss: aux.ds = ctxt->regs->ss; break;
+                default:         ASSERT_UNREACHABLE();    break;
+                }
+            aux.dval = true;
+        }
+        ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux);
+    }
     else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
-        ops->put_fpu(ctxt, X86EMUL_FPU_none);
+        ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
 }
 
 static inline bool fpu_check_write(void)
@@ -2086,53 +2170,6 @@ int x86emul_unhandleable_rw(
     return X86EMUL_UNHANDLEABLE;
 }
 
-struct x86_emulate_state {
-    unsigned int op_bytes, ad_bytes;
-
-    enum {
-        ext_none = vex_none,
-        ext_0f   = vex_0f,
-        ext_0f38 = vex_0f38,
-        ext_0f3a = vex_0f3a,
-        /*
-         * For XOP use values such that the respective instruction field
-         * can be used without adjustment.
-         */
-        ext_8f08 = 8,
-        ext_8f09,
-        ext_8f0a,
-    } ext;
-    uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
-    uint8_t rex_prefix;
-    bool lock_prefix;
-    bool not_64bit; /* Instruction not available in 64bit. */
-    opcode_desc_t desc;
-    union vex vex;
-    union evex evex;
-    enum simd_opsize simd_size;
-
-    /*
-     * Data operand effective address (usually computed from ModRM).
-     * Default is a memory operand relative to segment DS.
-     */
-    struct operand ea;
-
-    /* Immediate operand values, if any. Use otherwise unused fields. */
-#define imm1 ea.val
-#define imm2 ea.orig_val
-
-    unsigned long ip;
-    struct cpu_user_regs *regs;
-
-#ifndef NDEBUG
-    /*
-     * Track caller of x86_decode_insn() to spot missing as well as
-     * premature calls to x86_emulate_free_state().
-     */
-    void *caller;
-#endif
-};
-
 /* Helper definitions. */
 #define op_bytes (state->op_bytes)
 #define ad_bytes (state->ad_bytes)
@@ -4231,8 +4268,10 @@ x86_emulate(
                 dst.bytes = 4;
                 break;
             case 4: /* fldenv - TODO */
+                state->fpu_ctrl = true;
                 goto cannot_emulate;
             case 5: /* fldcw m2byte */
+                state->fpu_ctrl = true;
                 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
                                      2, ctxt)) != X86EMUL_OKAY )
                     goto done;
@@ -4240,8 +4279,10 @@ x86_emulate(
                 dst.type = OP_NONE;
                 break;
             case 6: /* fnstenv - TODO */
+                state->fpu_ctrl = true;
                 goto cannot_emulate;
             case 7: /* fnstcw m2byte */
+                state->fpu_ctrl = true;
                 emulate_fpu_insn_memdst("fnstcw", dst.val);
                 dst.bytes = 2;
                 break;
@@ -4330,6 +4371,7 @@ x86_emulate(
         case 0xe3: /* fninit */
         case 0xe4: /* fnsetpm - 287 only, ignored by 387 */
         /* case 0xe5: frstpm - 287 only, #UD on 387 */
+            state->fpu_ctrl = true;
             emulate_fpu_insn_stub(0xdb, modrm);
             break;
         default:
@@ -4473,8 +4515,10 @@ x86_emulate(
                 break;
             case 4: /* frstor - TODO */
             case 6: /* fnsave - TODO */
+                state->fpu_ctrl = true;
                 goto cannot_emulate;
             case 7: /* fnstsw m2byte */
+                state->fpu_ctrl = true;
                 emulate_fpu_insn_memdst("fnstsw", dst.val);
                 dst.bytes = 2;
                 break;
@@ -4547,6 +4591,7 @@ x86_emulate(
         {
         case 0xe0:
             /* fnstsw %ax */
+            state->fpu_ctrl = true;
             dst.bytes = 2;
             dst.type = OP_REG;
             dst.reg = (void *)&_regs.ax;
@@ -7892,7 +7937,7 @@ x86_emulate(
     }
 
  complete_insn: /* Commit shadow register state. */
-    put_fpu(&fic, false, ctxt, ops);
+    put_fpu(&fic, false, state, ctxt, ops);
 
     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
     if ( !mode_64bit() )
@@ -7917,7 +7962,8 @@ x86_emulate(
 
  done:
     if ( rc != X86EMUL_OKAY )
-        put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
+        put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM,
+                state, ctxt, ops);
     put_stub(stub);
     return rc;
 #undef state
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -135,6 +135,13 @@ struct __attribute__((__packed__)) segme
     uint64_t   base;
 };
 
+struct x86_emul_fpu_aux {
+    unsigned long ip, dp;
+    uint16_t cs, ds;
+    unsigned int op:11;
+    unsigned int dval:1;
+};
+
 /*
  * Return codes from state-accessor functions and from x86_emulate().
  */
@@ -439,10 +446,12 @@ struct x86_emulate_ops
      *  the get_fpu one having got called before!
      * @backout: Undo updates to the specified register file (can, besides
      *           X86EMUL_FPU_none, only be X86EMUL_FPU_fpu at present);
+     * @aux: Packaged up FIP/FDP/FOP values to load into FPU.
      */
     void (*put_fpu)(
         struct x86_emulate_ctxt *ctxt,
-        enum x86_emulate_fpu_type backout);
+        enum x86_emulate_fpu_type backout,
+        const struct x86_emul_fpu_aux *aux);
 
     /* invlpg: Invalidate paging structures which map addressed byte. */
     int (*invlpg)(

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH][XTF] add FPU/SIMD register state test
  2017-03-13 10:56 [PATCH 0/4] x86emul: FPU handling corrections Jan Beulich
                   ` (3 preceding siblings ...)
  2017-03-13 11:05 ` [PATCH 4/4] x86emul: correct FPU code/data pointers and opcode handling Jan Beulich
@ 2017-03-13 11:07 ` Jan Beulich
  2017-03-14 11:36   ` Andrew Cooper
  4 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2017-03-13 11:07 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Paul C Lai, Suravee Suthikulpanit

[-- Attachment #1: Type: text/plain, Size: 7512 bytes --]

Add tests to verify that
- FPU insns leave correct (guest) values in FIP/FDP/FOP/FCS/FDS,
- FPU insns writing memory don't update FPU register state when the
  write faults (at the example of FISTP),
- VCVTPS2PH (once implemented in the emulator) doesn't update MXCSR
  if its write faults (VCVTPS2PH currently is the only SIMD insn
  writing to memory and updating register state).

Note that the AMD-specific code in fpu_fxrstor() and xrstor() causes
the FISTP part of this test to always fail. I don't really have any
idea what to do about this (other than perhaps skipping the test on AMD
altogether).

Note further that the FCS part of 64-bit variant of the FSTP test also
always fails on AMD, since, other than on Intel, {F,}XRSTOR with REX.W
set do not appear to clear FCS/FDS (I can't find any statement either
way in the PM).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: It is not clear to me how to deal with the native execution
     failure of VCVTPS2PH on at least some Intel models.

--- a/include/arch/x86/cpuid.h
+++ b/include/arch/x86/cpuid.h
@@ -77,6 +77,7 @@ static inline bool cpu_has(unsigned int
 #define cpu_has_pcid            cpu_has(X86_FEATURE_PCID)
 #define cpu_has_xsave           cpu_has(X86_FEATURE_XSAVE)
 #define cpu_has_avx             cpu_has(X86_FEATURE_AVX)
+#define cpu_has_f16c            cpu_has(X86_FEATURE_F16C)
 
 #define cpu_has_syscall         cpu_has(X86_FEATURE_SYSCALL)
 #define cpu_has_nx              cpu_has(X86_FEATURE_NX)
--- /dev/null
+++ b/tests/fpu-state/Makefile
@@ -0,0 +1,9 @@
+include $(ROOT)/build/common.mk
+
+NAME      := fpu-state
+CATEGORY  := functional
+TEST-ENVS := hvm64 hvm32pse
+
+obj-perenv += main.o
+
+include $(ROOT)/build/gen.mk
--- /dev/null
+++ b/tests/fpu-state/main.c
@@ -0,0 +1,184 @@
+/**
+ * @file tests/fpu-state/main.c
+ * @ref test-fpu-state - Emulation of FPU state
+ *
+ * @page test-fpu-state FPU State Emulation
+ *
+ * FPU code/data pointers and opcode must not be the ones resulting
+ * from the stub execution in the hypervisor.
+ *
+ * FPU and SIMD instructions faulting during memory write must not
+ * update the respective register files.
+ *
+ * @see tests/fpu-state/main.c
+ */
+#include <xtf.h>
+
+#include <arch/x86/exinfo.h>
+
+const char test_title[] = "FPU State";
+
+struct x87_env {
+    uint16_t cw, :16;
+    uint16_t sw, :16;
+    uint16_t tw, :16;
+    uint32_t ip;
+    uint16_t cs;
+    uint16_t op:11, :5;
+    uint32_t dp;
+    uint16_t ds, :16;
+};
+
+void probe_fstp(bool force)
+{
+    const uint8_t *fstp_offs;
+    uint32_t flt;
+    struct x87_env fenv;
+
+    fenv.cw = 0x35f; /* unmask PE */
+    asm volatile ( "fninit;"
+                   "fldcw %[cw];"
+                   "fldpi;"
+                   "mov $1f, %[offs];"
+                   "test %[fep], %[fep];"
+                   "jz 1f;"
+                   _ASM_XEN_FEP
+                   "1: fstps %[data]; 2:"
+                   : [offs] "=&g" (fstp_offs), [data] "=m" (flt)
+                   : [cw] "m" (fenv.cw), [fep] "q" (force) );
+
+    asm ( "fnstenv %0" : "=m" (fenv) );
+    if ( fenv.ip != (unsigned long)fstp_offs )
+        xtf_failure("Fail: FIP wrong (%08x)\n", fenv.ip);
+    if ( fenv.cs && fenv.cs != __KERN_CS )
+        xtf_failure("Fail: FCS wrong (%04x)\n", fenv.cs);
+    if ( fenv.dp != (unsigned long)&flt )
+        xtf_failure("Fail: FDP wrong (%08x)\n", fenv.dp);
+    if ( fenv.ds && fenv.ds != __KERN_DS )
+        xtf_failure("Fail: FDS wrong (%04x)\n", fenv.ds);
+    /* Skip possible opcode prefixes before checking the opcode. */
+    while ( (fstp_offs[0] & ~7) != 0xd8 )
+        ++fstp_offs;
+    if ( fenv.op && fenv.op != (((fstp_offs[0] & 7) << 8) | fstp_offs[1]) )
+        xtf_failure("Fail: FOP wrong (%03x)\n", fenv.op);
+}
+
+void probe_fistp(bool force)
+{
+    unsigned long fldpi_offs;
+    exinfo_t fault = 0;
+    uint16_t fsw;
+    struct x87_env fenv;
+
+    asm volatile ( "fninit;"
+                   "0: fldpi;"
+                   "mov $0b, %[offs];"
+                   "test %[fep], %[fep];"
+                   "jz 1f;"
+                   _ASM_XEN_FEP
+                   "1: fistps (%[ptr]); 2:"
+                   _ASM_EXTABLE_HANDLER(1b, 2b, ex_record_fault_eax)
+                   : [offs] "=&g" (fldpi_offs), "+a" (fault)
+                   : [ptr] "r" (0), [fep] "q" (force) );
+
+    if ( !fault )
+        xtf_error("Error: FISTP to NULL did not fault\n");
+
+    asm ( "fnstsw %0" : "=am" (fsw) );
+    if ( fsw != 0x3800 )
+        xtf_failure("Fail: FSW changed unexpectedly (%04x)\n", fsw);
+
+    asm ( "fnstenv %0" : "=m" (fenv) );
+    if ( fenv.ip != fldpi_offs )
+        xtf_failure("Fail: FIP changed unexpectedly (%08x)\n", fenv.ip);
+    if ( fenv.cs && fenv.cs != __KERN_CS )
+        xtf_failure("Fail: FCS changed unexpectedly (%04x)\n", fenv.cs);
+    if ( fenv.dp )
+        xtf_failure("Fail: FDP changed unexpectedly (%08x)\n", fenv.dp);
+    if ( fenv.ds )
+        xtf_failure("Fail: FDS changed unexpectedly (%04x)\n", fenv.ds);
+    if ( fenv.op && fenv.op != 0x1eb )
+        xtf_failure("Fail: FOP changed unexpectedly (%03x)\n", fenv.op);
+}
+
+void probe_vcvtps2ph(bool force)
+{
+    exinfo_t fault = 0;
+    uint32_t mxcsr = 0x1f80;
+
+    asm volatile ( "vldmxcsr %[mxcsr];"
+                   "vpcmpeqb %%xmm0,%%xmm0,%%xmm0;"
+                   "vpcmpgtb %%xmm0,%%xmm0,%%xmm1;"
+                   "vpunpcklbw %%xmm1,%%xmm0,%%xmm2;"
+                   "test %[fep], %[fep];"
+                   "jz 1f;"
+                   _ASM_XEN_FEP
+                   "1: vcvtps2ph $0,%%xmm2,(%[ptr]); 2:"
+                   _ASM_EXTABLE_HANDLER(1b, 2b, ex_record_fault_eax)
+                   : "+a" (fault)
+                   : [ptr] "r" (0), [mxcsr] "m" (mxcsr), [fep] "q" (force) );
+
+    if ( !fault )
+        xtf_error("Error: VCVTPS2PH to NULL did not fault\n");
+    else if ( exinfo_vec(fault) == X86_EXC_UD )
+    {
+        if ( force )
+            xtf_skip("Emulator does not support VCVTPS2PH\n");
+        else
+            xtf_failure("Fail: VCVTPS2PH did #UD\n");
+    }
+
+    asm ( "vstmxcsr %0" : "=m" (mxcsr) );
+    if ( mxcsr != 0x1f80 )
+        xtf_failure("Fail: MXCSR changed unexpectedly (%08x)\n", mxcsr);
+}
+
+void run_tests(bool force)
+{
+    if ( cpu_has_fpu )
+    {
+        printk("Testing%s FSTP\n", force ? " emulated" : "");
+        probe_fstp(force);
+
+        printk("Testing%s FISTP (to NULL)\n", force ? " emulated" : "");
+        probe_fistp(force);
+    }
+
+    if ( cpu_has_f16c )
+    {
+        unsigned long cr4 = read_cr4();
+        unsigned long xcr0;
+
+        write_cr4(cr4 | X86_CR4_OSXSAVE);
+        xcr0 = read_xcr0();
+        write_xcr0(xcr0 | XSTATE_SSE | XSTATE_YMM);
+
+        printk("Testing%s VCVTPS2PH (to NULL)\n", force ? " emulated" : "");
+        probe_vcvtps2ph(force);
+
+        write_xcr0(xcr0);
+        write_cr4(cr4);
+    }
+}
+
+void test_main(void)
+{
+    run_tests(false);
+
+    if ( !xtf_has_fep )
+        xtf_skip("FEP support not detected - some tests will be skipped\n");
+    else
+        run_tests(true);
+
+    xtf_success(NULL);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */



[-- Attachment #2: xtf-FPU-state.patch --]
[-- Type: text/plain, Size: 7544 bytes --]

add FPU/SIMD register state test

Add tests to verify that
- FPU insns leave correct (guest) values in FIP/FDP/FOP/FCS/FDS,
- FPU insns writing memory don't update FPU register state when the
  write faults (at the example of FISTP),
- VCVTPS2PH (once implemented in the emulator) doesn't update MXCSR
  if its write faults (VCVTPS2PH currently is the only SIMD insn
  writing to memory and updating register state).

Note that the AMD-specific code in fpu_fxrstor() and xrstor() causes
the FISTP part of this test to always fail. I don't really have any
idea what to do about this (other than perhaps skipping the test on AMD
altogether).

Note further that the FCS part of 64-bit variant of the FSTP test also
always fails on AMD, since, other than on Intel, {F,}XRSTOR with REX.W
set do not appear to clear FCS/FDS (I can't find any statement either
way in the PM).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: It is not clear to me how to deal with the native execution
     failure of VCVTPS2PH on at least some Intel models.

--- a/include/arch/x86/cpuid.h
+++ b/include/arch/x86/cpuid.h
@@ -77,6 +77,7 @@ static inline bool cpu_has(unsigned int
 #define cpu_has_pcid            cpu_has(X86_FEATURE_PCID)
 #define cpu_has_xsave           cpu_has(X86_FEATURE_XSAVE)
 #define cpu_has_avx             cpu_has(X86_FEATURE_AVX)
+#define cpu_has_f16c            cpu_has(X86_FEATURE_F16C)
 
 #define cpu_has_syscall         cpu_has(X86_FEATURE_SYSCALL)
 #define cpu_has_nx              cpu_has(X86_FEATURE_NX)
--- /dev/null
+++ b/tests/fpu-state/Makefile
@@ -0,0 +1,9 @@
+include $(ROOT)/build/common.mk
+
+NAME      := fpu-state
+CATEGORY  := functional
+TEST-ENVS := hvm64 hvm32pse
+
+obj-perenv += main.o
+
+include $(ROOT)/build/gen.mk
--- /dev/null
+++ b/tests/fpu-state/main.c
@@ -0,0 +1,184 @@
+/**
+ * @file tests/fpu-state/main.c
+ * @ref test-fpu-state - Emulation of FPU state
+ *
+ * @page test-fpu-state FPU State Emulation
+ *
+ * FPU code/data pointers and opcode must not be the ones resulting
+ * from the stub execution in the hypervisor.
+ *
+ * FPU and SIMD instructions faulting during memory write must not
+ * update the respective register files.
+ *
+ * @see tests/fpu-state/main.c
+ */
+#include <xtf.h>
+
+#include <arch/x86/exinfo.h>
+
+const char test_title[] = "FPU State";
+
+struct x87_env {
+    uint16_t cw, :16;
+    uint16_t sw, :16;
+    uint16_t tw, :16;
+    uint32_t ip;
+    uint16_t cs;
+    uint16_t op:11, :5;
+    uint32_t dp;
+    uint16_t ds, :16;
+};
+
+void probe_fstp(bool force)
+{
+    const uint8_t *fstp_offs;
+    uint32_t flt;
+    struct x87_env fenv;
+
+    fenv.cw = 0x35f; /* unmask PE */
+    asm volatile ( "fninit;"
+                   "fldcw %[cw];"
+                   "fldpi;"
+                   "mov $1f, %[offs];"
+                   "test %[fep], %[fep];"
+                   "jz 1f;"
+                   _ASM_XEN_FEP
+                   "1: fstps %[data]; 2:"
+                   : [offs] "=&g" (fstp_offs), [data] "=m" (flt)
+                   : [cw] "m" (fenv.cw), [fep] "q" (force) );
+
+    asm ( "fnstenv %0" : "=m" (fenv) );
+    if ( fenv.ip != (unsigned long)fstp_offs )
+        xtf_failure("Fail: FIP wrong (%08x)\n", fenv.ip);
+    if ( fenv.cs && fenv.cs != __KERN_CS )
+        xtf_failure("Fail: FCS wrong (%04x)\n", fenv.cs);
+    if ( fenv.dp != (unsigned long)&flt )
+        xtf_failure("Fail: FDP wrong (%08x)\n", fenv.dp);
+    if ( fenv.ds && fenv.ds != __KERN_DS )
+        xtf_failure("Fail: FDS wrong (%04x)\n", fenv.ds);
+    /* Skip possible opcode prefixes before checking the opcode. */
+    while ( (fstp_offs[0] & ~7) != 0xd8 )
+        ++fstp_offs;
+    if ( fenv.op && fenv.op != (((fstp_offs[0] & 7) << 8) | fstp_offs[1]) )
+        xtf_failure("Fail: FOP wrong (%03x)\n", fenv.op);
+}
+
+void probe_fistp(bool force)
+{
+    unsigned long fldpi_offs;
+    exinfo_t fault = 0;
+    uint16_t fsw;
+    struct x87_env fenv;
+
+    asm volatile ( "fninit;"
+                   "0: fldpi;"
+                   "mov $0b, %[offs];"
+                   "test %[fep], %[fep];"
+                   "jz 1f;"
+                   _ASM_XEN_FEP
+                   "1: fistps (%[ptr]); 2:"
+                   _ASM_EXTABLE_HANDLER(1b, 2b, ex_record_fault_eax)
+                   : [offs] "=&g" (fldpi_offs), "+a" (fault)
+                   : [ptr] "r" (0), [fep] "q" (force) );
+
+    if ( !fault )
+        xtf_error("Error: FISTP to NULL did not fault\n");
+
+    asm ( "fnstsw %0" : "=am" (fsw) );
+    if ( fsw != 0x3800 )
+        xtf_failure("Fail: FSW changed unexpectedly (%04x)\n", fsw);
+
+    asm ( "fnstenv %0" : "=m" (fenv) );
+    if ( fenv.ip != fldpi_offs )
+        xtf_failure("Fail: FIP changed unexpectedly (%08x)\n", fenv.ip);
+    if ( fenv.cs && fenv.cs != __KERN_CS )
+        xtf_failure("Fail: FCS changed unexpectedly (%04x)\n", fenv.cs);
+    if ( fenv.dp )
+        xtf_failure("Fail: FDP changed unexpectedly (%08x)\n", fenv.dp);
+    if ( fenv.ds )
+        xtf_failure("Fail: FDS changed unexpectedly (%04x)\n", fenv.ds);
+    if ( fenv.op && fenv.op != 0x1eb )
+        xtf_failure("Fail: FOP changed unexpectedly (%03x)\n", fenv.op);
+}
+
+void probe_vcvtps2ph(bool force)
+{
+    exinfo_t fault = 0;
+    uint32_t mxcsr = 0x1f80;
+
+    asm volatile ( "vldmxcsr %[mxcsr];"
+                   "vpcmpeqb %%xmm0,%%xmm0,%%xmm0;"
+                   "vpcmpgtb %%xmm0,%%xmm0,%%xmm1;"
+                   "vpunpcklbw %%xmm1,%%xmm0,%%xmm2;"
+                   "test %[fep], %[fep];"
+                   "jz 1f;"
+                   _ASM_XEN_FEP
+                   "1: vcvtps2ph $0,%%xmm2,(%[ptr]); 2:"
+                   _ASM_EXTABLE_HANDLER(1b, 2b, ex_record_fault_eax)
+                   : "+a" (fault)
+                   : [ptr] "r" (0), [mxcsr] "m" (mxcsr), [fep] "q" (force) );
+
+    if ( !fault )
+        xtf_error("Error: VCVTPS2PH to NULL did not fault\n");
+    else if ( exinfo_vec(fault) == X86_EXC_UD )
+    {
+        if ( force )
+            xtf_skip("Emulator does not support VCVTPS2PH\n");
+        else
+            xtf_failure("Fail: VCVTPS2PH did #UD\n");
+    }
+
+    asm ( "vstmxcsr %0" : "=m" (mxcsr) );
+    if ( mxcsr != 0x1f80 )
+        xtf_failure("Fail: MXCSR changed unexpectedly (%08x)\n", mxcsr);
+}
+
+void run_tests(bool force)
+{
+    if ( cpu_has_fpu )
+    {
+        printk("Testing%s FSTP\n", force ? " emulated" : "");
+        probe_fstp(force);
+
+        printk("Testing%s FISTP (to NULL)\n", force ? " emulated" : "");
+        probe_fistp(force);
+    }
+
+    if ( cpu_has_f16c )
+    {
+        unsigned long cr4 = read_cr4();
+        unsigned long xcr0;
+
+        write_cr4(cr4 | X86_CR4_OSXSAVE);
+        xcr0 = read_xcr0();
+        write_xcr0(xcr0 | XSTATE_SSE | XSTATE_YMM);
+
+        printk("Testing%s VCVTPS2PH (to NULL)\n", force ? " emulated" : "");
+        probe_vcvtps2ph(force);
+
+        write_xcr0(xcr0);
+        write_cr4(cr4);
+    }
+}
+
+void test_main(void)
+{
+    run_tests(false);
+
+    if ( !xtf_has_fep )
+        xtf_skip("FEP support not detected - some tests will be skipped\n");
+    else
+        run_tests(true);
+
+    xtf_success(NULL);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/4] x86emul: fold exit paths
  2017-03-13 11:03 ` [PATCH 1/4] x86emul: fold exit paths Jan Beulich
@ 2017-03-13 11:26   ` Andrew Cooper
  0 siblings, 0 replies; 16+ messages in thread
From: Andrew Cooper @ 2017-03-13 11:26 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 13/03/17 11:03, Jan Beulich wrote:
> Move "cannot_emulate" and make it go through the common (error) exit
> path.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] x86emul: centralize put_fpu() invocations
  2017-03-13 11:03 ` [PATCH 2/4] x86emul: centralize put_fpu() invocations Jan Beulich
@ 2017-03-13 11:55   ` Andrew Cooper
  2017-03-13 12:31     ` Jan Beulich
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2017-03-13 11:55 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 13/03/17 11:03, Jan Beulich wrote:
> ..., splitting parts of it into check_*() macros. This is in
> preparation of making ->put_fpu() do further adjustments to register
> state. (Some of the check_xmm() invocations could be avoided, as in
> some of the cases no insns handled there can actually raise #XM, but I
> think we're better off keeping them to avoid later additions of further
> insn patterns rendering the lack of the check a bug.)
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -937,6 +937,7 @@ do {
>  
>  struct fpu_insn_ctxt {
>      uint8_t insn_bytes;
> +    uint8_t type;
>      int8_t exn_raised;
>  };
>  
> @@ -956,8 +957,6 @@ static int _get_fpu(
>  {
>      int rc;
>  
> -    fic->exn_raised = -1;
> -
>      fail_if(!ops->get_fpu);
>      rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
>  
> @@ -965,6 +964,8 @@ static int _get_fpu(
>      {
>          unsigned long cr0;
>  
> +        fic->type = type;
> +
>          fail_if(!ops->read_cr);
>          if ( type >= X86EMUL_FPU_xmm )
>          {
> @@ -1006,22 +1007,31 @@ do {
>      rc = _get_fpu(_type, _fic, ctxt, ops);                      \
>      if ( rc ) goto done;                                        \
>  } while (0)
> -#define _put_fpu()                                              \
> +
> +#define check_fpu(_fic)                                         \
>  do {                                                            \
> -    if ( ops->put_fpu != NULL )                                 \
> -        (ops->put_fpu)(ctxt);                                   \
> +    generate_exception_if((_fic)->exn_raised >= 0,              \
> +                          (_fic)->exn_raised);                  \
>  } while (0)
> -#define put_fpu(_fic)                                           \
> +
> +#define check_xmm(_fic)                                         \
>  do {                                                            \
> -    _put_fpu();                                                 \
>      if ( (_fic)->exn_raised == EXC_XM && ops->read_cr &&        \
>           ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&         \
>           !(cr4 & X86_CR4_OSXMMEXCPT) )				\
>          (_fic)->exn_raised = EXC_UD;                            \
> -    generate_exception_if((_fic)->exn_raised >= 0,              \
> -                          (_fic)->exn_raised);                  \
> +    check_fpu(_fic);                                            \
>  } while (0)

These two check macros seem like they should have an _exn() suffix to
make it clearer what they are actually doing.

>  
> +static void put_fpu(
> +    struct fpu_insn_ctxt *fic,
> +    struct x86_emulate_ctxt *ctxt,
> +    const struct x86_emulate_ops *ops)
> +{
> +    if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
> +        ops->put_fpu(ctxt);
> +}
> +
>  static inline bool fpu_check_write(void)
>  {
>      uint16_t fsw;
> @@ -7905,7 +7912,8 @@ x86_emulate(
>      ctxt->regs->eflags &= ~X86_EFLAGS_RF;
>  
>   done:
> -    _put_fpu();
> +    if ( rc != X86EMUL_OKAY )
> +        put_fpu(&fic, ctxt, ops);

Unless you assert at complete_insn that rc == X86EMUL_OKAY, you still
might call put_fpu() twice.

You can avoid the conditional here by setting fic->type to
X86EMUL_FPU_none at the end of put_fpu(), which looks like it is
compatible with your later modifications.

>      put_stub(stub);
>      return rc;
>  #undef state
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -161,7 +161,9 @@ enum x86_emulate_fpu_type {
>      X86EMUL_FPU_wait, /* WAIT/FWAIT instruction */
>      X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */
>      X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */
> -    X86EMUL_FPU_ymm  /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
> +    X86EMUL_FPU_ymm, /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
> +    /* This sentinel will never be passed to ->get_fpu(). */

Don't you mean ->put_fpu() here?  Either way, can we assert this fact?

~Andrew

> +    X86EMUL_FPU_none
>  };
>  
>  struct cpuid_leaf
>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/4] x86emul: centralize put_fpu() invocations
  2017-03-13 11:55   ` Andrew Cooper
@ 2017-03-13 12:31     ` Jan Beulich
  0 siblings, 0 replies; 16+ messages in thread
From: Jan Beulich @ 2017-03-13 12:31 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 13.03.17 at 12:55, <andrew.cooper3@citrix.com> wrote:
> On 13/03/17 11:03, Jan Beulich wrote:
>> @@ -1006,22 +1007,31 @@ do {
>>      rc = _get_fpu(_type, _fic, ctxt, ops);                      \
>>      if ( rc ) goto done;                                        \
>>  } while (0)
>> -#define _put_fpu()                                              \
>> +
>> +#define check_fpu(_fic)                                         \
>>  do {                                                            \
>> -    if ( ops->put_fpu != NULL )                                 \
>> -        (ops->put_fpu)(ctxt);                                   \
>> +    generate_exception_if((_fic)->exn_raised >= 0,              \
>> +                          (_fic)->exn_raised);                  \
>>  } while (0)
>> -#define put_fpu(_fic)                                           \
>> +
>> +#define check_xmm(_fic)                                         \
>>  do {                                                            \
>> -    _put_fpu();                                                 \
>>      if ( (_fic)->exn_raised == EXC_XM && ops->read_cr &&        \
>>           ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&         \
>>           !(cr4 & X86_CR4_OSXMMEXCPT) )				\
>>          (_fic)->exn_raised = EXC_UD;                            \
>> -    generate_exception_if((_fic)->exn_raised >= 0,              \
>> -                          (_fic)->exn_raised);                  \
>> +    check_fpu(_fic);                                            \
>>  } while (0)
> 
> These two check macros seem like they should have an _exn() suffix to
> make it clearer what they are actually doing.

Will do.

>> @@ -7905,7 +7912,8 @@ x86_emulate(
>>      ctxt->regs->eflags &= ~X86_EFLAGS_RF;
>>  
>>   done:
>> -    _put_fpu();
>> +    if ( rc != X86EMUL_OKAY )
>> +        put_fpu(&fic, ctxt, ops);
> 
> Unless you assert at complete_insn that rc == X86EMUL_OKAY, you still
> might call put_fpu() twice.

Well, reaching complete_insn with rc != X86EMUL_OKAY is
possible, and hence can't be asserted. See the X86EMUL_DONE
handling there. One could assert right before the done label, but
then again reaching complete_insn with any other X86EMUL_*
result would have been a bug even before this series. But
anyway, I guess I'll rather do ...

> You can avoid the conditional here by setting fic->type to
> X86EMUL_FPU_none at the end of put_fpu(), which looks like it is
> compatible with your later modifications.

... this.

>> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
>> @@ -161,7 +161,9 @@ enum x86_emulate_fpu_type {
>>      X86EMUL_FPU_wait, /* WAIT/FWAIT instruction */
>>      X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */
>>      X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */
>> -    X86EMUL_FPU_ymm  /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
>> +    X86EMUL_FPU_ymm, /* AVX/XOP instruction set (%ymm0-%ymm7/15) */
>> +    /* This sentinel will never be passed to ->get_fpu(). */
> 
> Don't you mean ->put_fpu() here?

Definitely not, as patch 3 would be violating this then immediately.

>  Either way, can we assert this fact?

I did consider this, but decided against because of the scalability
aspect (every get_fpu() implementation would then have to have
this). But then again, perhaps you'd be fine with the assertion
being put in _get_fpu(), right next to the call of the hook?

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] x86emul: correct handling of FPU insns faulting on memory write
  2017-03-13 11:05 ` [PATCH 3/4] x86emul: correct handling of FPU insns faulting on memory write Jan Beulich
@ 2017-03-13 14:03   ` Andrew Cooper
  2017-03-14  9:15   ` Tian, Kevin
  1 sibling, 0 replies; 16+ messages in thread
From: Andrew Cooper @ 2017-03-13 14:03 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Boris Ostrovsky, Kevin Tian, Paul Durrant, Jun Nakajima,
	Suravee Suthikulpanit

On 13/03/17 11:05, Jan Beulich wrote:
> When an FPU instruction with a memory destination fails during the
> memory write, it should not affect FPU register state. Due to the way
> we emulate FPU (and SIMD) instructions, we can only guarantee this by
> - backing out changes to the FPU register state in such a case or
> - doing a descriptor read and/or page walk up front, perhaps with the
>   stubs accessing the actual memory location then.
> The latter would require a significant change in how the emulator does
> its guest memory accessing, so for now the former variant is being
> chosen.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

> ---
> Note that the state save overhead (unless state hadn't been loaded at
> all before, which should only be possible if a guest is fiddling with
> the instruction stream under emulation) is taken for every FPU insn
> hitting the emulator. We could reduce this to just the ones writing to
> memory, but that would involve quite a few further changes and
> resulting code where even more code paths need to match up with one
> another.

An FPU save isn't the slow part of hitting emulation.  I don't expect
this will be a meaningful overhead.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/4] x86emul: correct handling of FPU insns faulting on memory write
  2017-03-13 11:05 ` [PATCH 3/4] x86emul: correct handling of FPU insns faulting on memory write Jan Beulich
  2017-03-13 14:03   ` Andrew Cooper
@ 2017-03-14  9:15   ` Tian, Kevin
  1 sibling, 0 replies; 16+ messages in thread
From: Tian, Kevin @ 2017-03-14  9:15 UTC (permalink / raw)
  To: Jan Beulich, xen-devel
  Cc: Andrew Cooper, Boris Ostrovsky, Paul Durrant, Nakajima, Jun,
	Suravee Suthikulpanit

> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: Monday, March 13, 2017 7:05 PM
> 
> When an FPU instruction with a memory destination fails during the memory
> write, it should not affect FPU register state. Due to the way we emulate FPU
> (and SIMD) instructions, we can only guarantee this by
> - backing out changes to the FPU register state in such a case or
> - doing a descriptor read and/or page walk up front, perhaps with the
>   stubs accessing the actual memory location then.
> The latter would require a significant change in how the emulator does its
> guest memory accessing, so for now the former variant is being chosen.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] x86emul: correct FPU code/data pointers and opcode handling
  2017-03-13 11:05 ` [PATCH 4/4] x86emul: correct FPU code/data pointers and opcode handling Jan Beulich
@ 2017-03-14  9:21   ` Paul Durrant
  2017-03-14 10:56   ` Andrew Cooper
  1 sibling, 0 replies; 16+ messages in thread
From: Paul Durrant @ 2017-03-14  9:21 UTC (permalink / raw)
  To: 'Jan Beulich', xen-devel; +Cc: Andrew Cooper

> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 13 March 2017 11:06
> To: xen-devel <xen-devel@lists.xenproject.org>
> Cc: Andrew Cooper <Andrew.Cooper3@citrix.com>; Paul Durrant
> <Paul.Durrant@citrix.com>
> Subject: [PATCH 4/4] x86emul: correct FPU code/data pointers and opcode
> handling
> 
> Prevent leaking the hypervisor ones (stored by hardware during stub
> execution), at once making sure the guest sees correct values there.
> This piggybacks on the backout logic used to deal with write faults of
> FPU insns.
> 
> Deliberately ignore the NO_FPU_SEL feature here: Honoring it would
> merely mean extra code with no benefit (once we XRSTOR state, the
> selector values will simply be lost anyway).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 

emulate.c parts...

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> --- a/tools/tests/x86_emulator/x86_emulate.c
> +++ b/tools/tests/x86_emulator/x86_emulate.c
> @@ -140,7 +140,8 @@ int emul_test_get_fpu(
> 
>  void emul_test_put_fpu(
>      struct x86_emulate_ctxt *ctxt,
> -    enum x86_emulate_fpu_type backout)
> +    enum x86_emulate_fpu_type backout,
> +    const struct x86_emul_fpu_aux *aux)
>  {
>      /* TBD */
>  }
> --- a/tools/tests/x86_emulator/x86_emulate.h
> +++ b/tools/tests/x86_emulator/x86_emulate.h
> @@ -181,4 +181,5 @@ int emul_test_get_fpu(
> 
>  void emul_test_put_fpu(
>      struct x86_emulate_ctxt *ctxt,
> -    enum x86_emulate_fpu_type backout);
> +    enum x86_emulate_fpu_type backout,
> +    const struct x86_emul_fpu_aux *aux);
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1658,12 +1658,73 @@ static int hvmemul_get_fpu(
> 
>  static void hvmemul_put_fpu(
>      struct x86_emulate_ctxt *ctxt,
> -    enum x86_emulate_fpu_type backout)
> +    enum x86_emulate_fpu_type backout,
> +    const struct x86_emul_fpu_aux *aux)
>  {
>      struct vcpu *curr = current;
> 
>      curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
> 
> +    if ( aux )
> +    {
> +        typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = curr-
> >arch.fpu_ctxt;
> +        bool dval = aux->dval;
> +        int mode = hvm_guest_x86_mode(curr);
> +
> +        ASSERT(backout == X86EMUL_FPU_none);
> +        /*
> +         * Latch current register state so that we can replace FIP/FDP/FOP
> +         * (which have values resulting from our own invocation of the FPU
> +         * instruction during emulation).
> +         * NB: See also the comment in hvmemul_get_fpu(); we don't need to
> +         * set ->fpu_dirtied here as it is going to be cleared below, and
> +         * we also don't need to reload FCW as we're forcing full state to
> +         * be reloaded anyway.
> +         */
> +        save_fpu_enable();
> +
> +        if ( boot_cpu_has(X86_FEATURE_FDP_EXCP_ONLY) &&
> +             !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) )
> +            dval = false;
> +
> +        switch ( mode )
> +        {
> +        case 8:
> +            fpu_ctxt->fip.addr = aux->ip;
> +            if ( dval )
> +                fpu_ctxt->fdp.addr = aux->dp;
> +            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 8;
> +            break;
> +
> +        case 4: case 2:
> +            fpu_ctxt->fip.offs = aux->ip;
> +            fpu_ctxt->fip.sel  = aux->cs;
> +            if ( dval )
> +            {
> +                fpu_ctxt->fdp.offs = aux->dp;
> +                fpu_ctxt->fdp.sel  = aux->ds;
> +            }
> +            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = mode;
> +            break;
> +
> +        case 0: case 1:
> +            fpu_ctxt->fip.addr = aux->ip | (aux->cs << 4);
> +            if ( dval )
> +                fpu_ctxt->fdp.addr = aux->dp | (aux->ds << 4);
> +            fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 2;
> +            break;
> +
> +        default:
> +            ASSERT_UNREACHABLE();
> +            return;
> +        }
> +
> +        fpu_ctxt->fop = aux->op;
> +
> +        /* Re-use backout code below. */
> +        backout = X86EMUL_FPU_fpu;
> +    }
> +
>      if ( backout == X86EMUL_FPU_fpu )
>      {
>          /*
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -536,6 +536,55 @@ struct operand {
>          unsigned long    off;
>      } mem;
>  };
> +
> +struct x86_emulate_state {
> +    unsigned int op_bytes, ad_bytes;
> +
> +    enum {
> +        ext_none = vex_none,
> +        ext_0f   = vex_0f,
> +        ext_0f38 = vex_0f38,
> +        ext_0f3a = vex_0f3a,
> +        /*
> +         * For XOP use values such that the respective instruction field
> +         * can be used without adjustment.
> +         */
> +        ext_8f08 = 8,
> +        ext_8f09,
> +        ext_8f0a,
> +    } ext;
> +    uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
> +    uint8_t rex_prefix;
> +    bool lock_prefix;
> +    bool not_64bit; /* Instruction not available in 64bit. */
> +    bool fpu_ctrl;  /* Instruction is an FPU control one. */
> +    opcode_desc_t desc;
> +    union vex vex;
> +    union evex evex;
> +    enum simd_opsize simd_size;
> +
> +    /*
> +     * Data operand effective address (usually computed from ModRM).
> +     * Default is a memory operand relative to segment DS.
> +     */
> +    struct operand ea;
> +
> +    /* Immediate operand values, if any. Use otherwise unused fields. */
> +#define imm1 ea.val
> +#define imm2 ea.orig_val
> +
> +    unsigned long ip;
> +    struct cpu_user_regs *regs;
> +
> +#ifndef NDEBUG
> +    /*
> +     * Track caller of x86_decode_insn() to spot missing as well as
> +     * premature calls to x86_emulate_free_state().
> +     */
> +    void *caller;
> +#endif
> +};
> +
>  #ifdef __x86_64__
>  #define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
>  #else
> @@ -1027,13 +1076,48 @@ do {
>  static void put_fpu(
>      struct fpu_insn_ctxt *fic,
>      bool failed_late,
> +    const struct x86_emulate_state *state,
>      struct x86_emulate_ctxt *ctxt,
>      const struct x86_emulate_ops *ops)
>  {
>      if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
> -        ops->put_fpu(ctxt, X86EMUL_FPU_fpu);
> +        ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
> +    else if ( unlikely(fic->type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
> +    {
> +        struct x86_emul_fpu_aux aux = {
> +            .ip = ctxt->regs->r(ip),
> +            .cs = ctxt->regs->cs,
> +            .op = ((ctxt->opcode & 7) << 8) | state->modrm,
> +        };
> +        struct segment_register sreg;
> +
> +        if ( ops->read_segment &&
> +             ops->read_segment(x86_seg_cs, &sreg, ctxt) == X86EMUL_OKAY )
> +            aux.cs = sreg.sel;
> +        if ( state->ea.type == OP_MEM )
> +        {
> +            aux.dp = state->ea.mem.off;
> +            if ( ops->read_segment &&
> +                 ops->read_segment(state->ea.mem.seg, &sreg,
> +                                   ctxt) == X86EMUL_OKAY )
> +                aux.ds = sreg.sel;
> +            else
> +                switch ( state->ea.mem.seg )
> +                {
> +                case x86_seg_cs: aux.ds = ctxt->regs->cs; break;
> +                case x86_seg_ds: aux.ds = ctxt->regs->ds; break;
> +                case x86_seg_es: aux.ds = ctxt->regs->es; break;
> +                case x86_seg_fs: aux.ds = ctxt->regs->fs; break;
> +                case x86_seg_gs: aux.ds = ctxt->regs->gs; break;
> +                case x86_seg_ss: aux.ds = ctxt->regs->ss; break;
> +                default:         ASSERT_UNREACHABLE();    break;
> +                }
> +            aux.dval = true;
> +        }
> +        ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux);
> +    }
>      else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
> -        ops->put_fpu(ctxt, X86EMUL_FPU_none);
> +        ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
>  }
> 
>  static inline bool fpu_check_write(void)
> @@ -2086,53 +2170,6 @@ int x86emul_unhandleable_rw(
>      return X86EMUL_UNHANDLEABLE;
>  }
> 
> -struct x86_emulate_state {
> -    unsigned int op_bytes, ad_bytes;
> -
> -    enum {
> -        ext_none = vex_none,
> -        ext_0f   = vex_0f,
> -        ext_0f38 = vex_0f38,
> -        ext_0f3a = vex_0f3a,
> -        /*
> -         * For XOP use values such that the respective instruction field
> -         * can be used without adjustment.
> -         */
> -        ext_8f08 = 8,
> -        ext_8f09,
> -        ext_8f0a,
> -    } ext;
> -    uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
> -    uint8_t rex_prefix;
> -    bool lock_prefix;
> -    bool not_64bit; /* Instruction not available in 64bit. */
> -    opcode_desc_t desc;
> -    union vex vex;
> -    union evex evex;
> -    enum simd_opsize simd_size;
> -
> -    /*
> -     * Data operand effective address (usually computed from ModRM).
> -     * Default is a memory operand relative to segment DS.
> -     */
> -    struct operand ea;
> -
> -    /* Immediate operand values, if any. Use otherwise unused fields. */
> -#define imm1 ea.val
> -#define imm2 ea.orig_val
> -
> -    unsigned long ip;
> -    struct cpu_user_regs *regs;
> -
> -#ifndef NDEBUG
> -    /*
> -     * Track caller of x86_decode_insn() to spot missing as well as
> -     * premature calls to x86_emulate_free_state().
> -     */
> -    void *caller;
> -#endif
> -};
> -
>  /* Helper definitions. */
>  #define op_bytes (state->op_bytes)
>  #define ad_bytes (state->ad_bytes)
> @@ -4231,8 +4268,10 @@ x86_emulate(
>                  dst.bytes = 4;
>                  break;
>              case 4: /* fldenv - TODO */
> +                state->fpu_ctrl = true;
>                  goto cannot_emulate;
>              case 5: /* fldcw m2byte */
> +                state->fpu_ctrl = true;
>                  if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
>                                       2, ctxt)) != X86EMUL_OKAY )
>                      goto done;
> @@ -4240,8 +4279,10 @@ x86_emulate(
>                  dst.type = OP_NONE;
>                  break;
>              case 6: /* fnstenv - TODO */
> +                state->fpu_ctrl = true;
>                  goto cannot_emulate;
>              case 7: /* fnstcw m2byte */
> +                state->fpu_ctrl = true;
>                  emulate_fpu_insn_memdst("fnstcw", dst.val);
>                  dst.bytes = 2;
>                  break;
> @@ -4330,6 +4371,7 @@ x86_emulate(
>          case 0xe3: /* fninit */
>          case 0xe4: /* fnsetpm - 287 only, ignored by 387 */
>          /* case 0xe5: frstpm - 287 only, #UD on 387 */
> +            state->fpu_ctrl = true;
>              emulate_fpu_insn_stub(0xdb, modrm);
>              break;
>          default:
> @@ -4473,8 +4515,10 @@ x86_emulate(
>                  break;
>              case 4: /* frstor - TODO */
>              case 6: /* fnsave - TODO */
> +                state->fpu_ctrl = true;
>                  goto cannot_emulate;
>              case 7: /* fnstsw m2byte */
> +                state->fpu_ctrl = true;
>                  emulate_fpu_insn_memdst("fnstsw", dst.val);
>                  dst.bytes = 2;
>                  break;
> @@ -4547,6 +4591,7 @@ x86_emulate(
>          {
>          case 0xe0:
>              /* fnstsw %ax */
> +            state->fpu_ctrl = true;
>              dst.bytes = 2;
>              dst.type = OP_REG;
>              dst.reg = (void *)&_regs.ax;
> @@ -7892,7 +7937,7 @@ x86_emulate(
>      }
> 
>   complete_insn: /* Commit shadow register state. */
> -    put_fpu(&fic, false, ctxt, ops);
> +    put_fpu(&fic, false, state, ctxt, ops);
> 
>      /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
>      if ( !mode_64bit() )
> @@ -7917,7 +7962,8 @@ x86_emulate(
> 
>   done:
>      if ( rc != X86EMUL_OKAY )
> -        put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, ctxt, ops);
> +        put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM,
> +                state, ctxt, ops);
>      put_stub(stub);
>      return rc;
>  #undef state
> --- a/xen/arch/x86/x86_emulate/x86_emulate.h
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.h
> @@ -135,6 +135,13 @@ struct __attribute__((__packed__)) segme
>      uint64_t   base;
>  };
> 
> +struct x86_emul_fpu_aux {
> +    unsigned long ip, dp;
> +    uint16_t cs, ds;
> +    unsigned int op:11;
> +    unsigned int dval:1;
> +};
> +
>  /*
>   * Return codes from state-accessor functions and from x86_emulate().
>   */
> @@ -439,10 +446,12 @@ struct x86_emulate_ops
>       *  the get_fpu one having got called before!
>       * @backout: Undo updates to the specified register file (can, besides
>       *           X86EMUL_FPU_none, only be X86EMUL_FPU_fpu at present);
> +     * @aux: Packaged up FIP/FDP/FOP values to load into FPU.
>       */
>      void (*put_fpu)(
>          struct x86_emulate_ctxt *ctxt,
> -        enum x86_emulate_fpu_type backout);
> +        enum x86_emulate_fpu_type backout,
> +        const struct x86_emul_fpu_aux *aux);
> 
>      /* invlpg: Invalidate paging structures which map addressed byte. */
>      int (*invlpg)(
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] x86emul: correct FPU code/data pointers and opcode handling
  2017-03-13 11:05 ` [PATCH 4/4] x86emul: correct FPU code/data pointers and opcode handling Jan Beulich
  2017-03-14  9:21   ` Paul Durrant
@ 2017-03-14 10:56   ` Andrew Cooper
  2017-03-14 11:04     ` Jan Beulich
  1 sibling, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2017-03-14 10:56 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Paul Durrant

On 13/03/17 11:05, Jan Beulich wrote:
> @@ -1027,13 +1076,48 @@ do {
>  static void put_fpu(
>      struct fpu_insn_ctxt *fic,
>      bool failed_late,
> +    const struct x86_emulate_state *state,
>      struct x86_emulate_ctxt *ctxt,
>      const struct x86_emulate_ops *ops)
>  {
>      if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
> -        ops->put_fpu(ctxt, X86EMUL_FPU_fpu);
> +        ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
> +    else if ( unlikely(fic->type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
> +    {
> +        struct x86_emul_fpu_aux aux = {
> +            .ip = ctxt->regs->r(ip),
> +            .cs = ctxt->regs->cs,
> +            .op = ((ctxt->opcode & 7) << 8) | state->modrm,
> +        };
> +        struct segment_register sreg;
> +
> +        if ( ops->read_segment &&
> +             ops->read_segment(x86_seg_cs, &sreg, ctxt) == X86EMUL_OKAY )

Why are the read_segment hooks optional here?

In the case that we are hitting FPU instructions for emulation, we can
reasonably require read/write_segment hooks.

In particular, regs->%sreg are only valid for PV guests.

> +            aux.cs = sreg.sel;
> +        if ( state->ea.type == OP_MEM )
> +        {
> +            aux.dp = state->ea.mem.off;
> +            if ( ops->read_segment &&
> +                 ops->read_segment(state->ea.mem.seg, &sreg,
> +                                   ctxt) == X86EMUL_OKAY )
> +                aux.ds = sreg.sel;
> +            else
> +                switch ( state->ea.mem.seg )
> +                {
> +                case x86_seg_cs: aux.ds = ctxt->regs->cs; break;
> +                case x86_seg_ds: aux.ds = ctxt->regs->ds; break;
> +                case x86_seg_es: aux.ds = ctxt->regs->es; break;
> +                case x86_seg_fs: aux.ds = ctxt->regs->fs; break;
> +                case x86_seg_gs: aux.ds = ctxt->regs->gs; break;
> +                case x86_seg_ss: aux.ds = ctxt->regs->ss; break;
> +                default:         ASSERT_UNREACHABLE();    break;
> +                }
> +            aux.dval = true;
> +        }
> +        ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux);
> +    }
>      else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
> -        ops->put_fpu(ctxt, X86EMUL_FPU_none);
> +        ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
>  }
>  
>  static inline bool fpu_check_write(void)
> @@ -4231,8 +4268,10 @@ x86_emulate(
>                  dst.bytes = 4;
>                  break;
>              case 4: /* fldenv - TODO */
> +                state->fpu_ctrl = true;

Arguably, state->fpu_ctrl is a decode property rather than an emulation
property.  It is the kind of information which we might plausibly want
an x86_insn_*() accessor for.

~Andrew

>                  goto cannot_emulate;
>              case 5: /* fldcw m2byte */
> +                state->fpu_ctrl = true;
>                  if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
>                                       2, ctxt)) != X86EMUL_OKAY )
>                      goto done;
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/4] x86emul: correct FPU code/data pointers and opcode handling
  2017-03-14 10:56   ` Andrew Cooper
@ 2017-03-14 11:04     ` Jan Beulich
  0 siblings, 0 replies; 16+ messages in thread
From: Jan Beulich @ 2017-03-14 11:04 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Paul Durrant

>>> On 14.03.17 at 11:56, <andrew.cooper3@citrix.com> wrote:
> On 13/03/17 11:05, Jan Beulich wrote:
>> @@ -1027,13 +1076,48 @@ do {
>>  static void put_fpu(
>>      struct fpu_insn_ctxt *fic,
>>      bool failed_late,
>> +    const struct x86_emulate_state *state,
>>      struct x86_emulate_ctxt *ctxt,
>>      const struct x86_emulate_ops *ops)
>>  {
>>      if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
>> -        ops->put_fpu(ctxt, X86EMUL_FPU_fpu);
>> +        ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
>> +    else if ( unlikely(fic->type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
>> +    {
>> +        struct x86_emul_fpu_aux aux = {
>> +            .ip = ctxt->regs->r(ip),
>> +            .cs = ctxt->regs->cs,
>> +            .op = ((ctxt->opcode & 7) << 8) | state->modrm,
>> +        };
>> +        struct segment_register sreg;
>> +
>> +        if ( ops->read_segment &&
>> +             ops->read_segment(x86_seg_cs, &sreg, ctxt) == X86EMUL_OKAY )
> 
> Why are the read_segment hooks optional here?
> 
> In the case that we are hitting FPU instructions for emulation, we can
> reasonably require read/write_segment hooks.
> 
> In particular, regs->%sreg are only valid for PV guests.

That's the precise reason I made their availability optional here:
A caller caring only about PV guests has no need to set these,
as long as for what might be emulated only selector values are
of interest. And it is clear that we can't distinguish guest types
here.

>> @@ -4231,8 +4268,10 @@ x86_emulate(
>>                  dst.bytes = 4;
>>                  break;
>>              case 4: /* fldenv - TODO */
>> +                state->fpu_ctrl = true;
> 
> Arguably, state->fpu_ctrl is a decode property rather than an emulation
> property.  It is the kind of information which we might plausibly want
> an x86_insn_*() accessor for.

If we had a consumer for this, I'd agree, but adding an accessor
without user and moving the changes here to the decode phase
(increasing code churn) seems counterproductive to me.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH][XTF] add FPU/SIMD register state test
  2017-03-13 11:07 ` [PATCH][XTF] add FPU/SIMD register state test Jan Beulich
@ 2017-03-14 11:36   ` Andrew Cooper
  2017-03-14 11:54     ` Jan Beulich
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Cooper @ 2017-03-14 11:36 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Paul C Lai, Suravee Suthikulpanit

On 13/03/17 11:07, Jan Beulich wrote:
> Add tests to verify that
> - FPU insns leave correct (guest) values in FIP/FDP/FOP/FCS/FDS,
> - FPU insns writing memory don't update FPU register state when the
>   write faults (at the example of FISTP),
> - VCVTPS2PH (once implemented in the emulator) doesn't update MXCSR
>   if its write faults (VCVTPS2PH currently is the only SIMD insn
>   writing to memory and updating register state).
>
> Note that the AMD-specific code in fpu_fxrstor() and xrstor() causes
> the FISTP part of this test to always fail. I don't really have any
> idea what to do about this (other than perhaps skipping the test on AMD
> altogether).
>
> Note further that the FCS part of 64-bit variant of the FSTP test also
> always fails on AMD, since, other than on Intel, {F,}XRSTOR with REX.W
> set do not appear to clear FCS/FDS (I can't find any statement either
> way in the PM).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> TBD: It is not clear to me how to deal with the native execution
>      failure of VCVTPS2PH on at least some Intel models.

As a general comment, tests dealing with quirks like this should
tolerate real hardware behaviour, without causing the test to fail. 
There is nothing useful from having tests failing in cases we cant/wont
do anything to fix.

As for the Intel behaviour, Is this documented, or does it have an
erratum reference?

For the AMD behaviour, some more though is going to be required.

>
> --- a/include/arch/x86/cpuid.h
> +++ b/include/arch/x86/cpuid.h
> @@ -77,6 +77,7 @@ static inline bool cpu_has(unsigned int
>  #define cpu_has_pcid            cpu_has(X86_FEATURE_PCID)
>  #define cpu_has_xsave           cpu_has(X86_FEATURE_XSAVE)
>  #define cpu_has_avx             cpu_has(X86_FEATURE_AVX)
> +#define cpu_has_f16c            cpu_has(X86_FEATURE_F16C)
>  
>  #define cpu_has_syscall         cpu_has(X86_FEATURE_SYSCALL)
>  #define cpu_has_nx              cpu_has(X86_FEATURE_NX)
> --- /dev/null
> +++ b/tests/fpu-state/Makefile
> @@ -0,0 +1,9 @@
> +include $(ROOT)/build/common.mk
> +
> +NAME      := fpu-state
> +CATEGORY  := functional
> +TEST-ENVS := hvm64 hvm32pse
> +
> +obj-perenv += main.o
> +
> +include $(ROOT)/build/gen.mk
> --- /dev/null
> +++ b/tests/fpu-state/main.c
> @@ -0,0 +1,184 @@
> +/**
> + * @file tests/fpu-state/main.c
> + * @ref test-fpu-state - Emulation of FPU state
> + *
> + * @page test-fpu-state FPU State Emulation
> + *
> + * FPU code/data pointers and opcode must not be the ones resulting
> + * from the stub execution in the hypervisor.
> + *
> + * FPU and SIMD instructions faulting during memory write must not
> + * update the respective register files.
> + *
> + * @see tests/fpu-state/main.c
> + */
> +#include <xtf.h>
> +
> +#include <arch/x86/exinfo.h>
> +
> +const char test_title[] = "FPU State";
> +
> +struct x87_env {

Can this include 32bit in the name.  The 16bit layout is different.

In fact, if you could put it in arch/x86/x87.h, that would be even more
helpful.

> +    uint16_t cw, :16;
> +    uint16_t sw, :16;
> +    uint16_t tw, :16;
> +    uint32_t ip;
> +    uint16_t cs;
> +    uint16_t op:11, :5;
> +    uint32_t dp;
> +    uint16_t ds, :16;
> +};
> +
> +void probe_fstp(bool force)
> +{
> +    const uint8_t *fstp_offs;
> +    uint32_t flt;
> +    struct x87_env fenv;
> +
> +    fenv.cw = 0x35f; /* unmask PE */
> +    asm volatile ( "fninit;"
> +                   "fldcw %[cw];"
> +                   "fldpi;"
> +                   "mov $1f, %[offs];"

You can have the compiler do all of this by using a named label, and using

extern char fstp_offs[] asm("$label");

~Andrew

> +                   "test %[fep], %[fep];"
> +                   "jz 1f;"
> +                   _ASM_XEN_FEP
> +                   "1: fstps %[data]; 2:"
> +                   : [offs] "=&g" (fstp_offs), [data] "=m" (flt)
> +                   : [cw] "m" (fenv.cw), [fep] "q" (force) );
> +
> +    asm ( "fnstenv %0" : "=m" (fenv) );
> +    if ( fenv.ip != (unsigned long)fstp_offs )
> +        xtf_failure("Fail: FIP wrong (%08x)\n", fenv.ip);
> +    if ( fenv.cs && fenv.cs != __KERN_CS )
> +        xtf_failure("Fail: FCS wrong (%04x)\n", fenv.cs);
> +    if ( fenv.dp != (unsigned long)&flt )
> +        xtf_failure("Fail: FDP wrong (%08x)\n", fenv.dp);
> +    if ( fenv.ds && fenv.ds != __KERN_DS )
> +        xtf_failure("Fail: FDS wrong (%04x)\n", fenv.ds);
> +    /* Skip possible opcode prefixes before checking the opcode. */
> +    while ( (fstp_offs[0] & ~7) != 0xd8 )
> +        ++fstp_offs;
> +    if ( fenv.op && fenv.op != (((fstp_offs[0] & 7) << 8) | fstp_offs[1]) )
> +        xtf_failure("Fail: FOP wrong (%03x)\n", fenv.op);
> +}
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH][XTF] add FPU/SIMD register state test
  2017-03-14 11:36   ` Andrew Cooper
@ 2017-03-14 11:54     ` Jan Beulich
  0 siblings, 0 replies; 16+ messages in thread
From: Jan Beulich @ 2017-03-14 11:54 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Paul C Lai, Suravee Suthikulpanit

>>> On 14.03.17 at 12:36, <andrew.cooper3@citrix.com> wrote:
> On 13/03/17 11:07, Jan Beulich wrote:
>> Add tests to verify that
>> - FPU insns leave correct (guest) values in FIP/FDP/FOP/FCS/FDS,
>> - FPU insns writing memory don't update FPU register state when the
>>   write faults (at the example of FISTP),
>> - VCVTPS2PH (once implemented in the emulator) doesn't update MXCSR
>>   if its write faults (VCVTPS2PH currently is the only SIMD insn
>>   writing to memory and updating register state).
>>
>> Note that the AMD-specific code in fpu_fxrstor() and xrstor() causes
>> the FISTP part of this test to always fail. I don't really have any
>> idea what to do about this (other than perhaps skipping the test on AMD
>> altogether).
>>
>> Note further that the FCS part of 64-bit variant of the FSTP test also
>> always fails on AMD, since, other than on Intel, {F,}XRSTOR with REX.W
>> set do not appear to clear FCS/FDS (I can't find any statement either
>> way in the PM).
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> TBD: It is not clear to me how to deal with the native execution
>>      failure of VCVTPS2PH on at least some Intel models.
> 
> As a general comment, tests dealing with quirks like this should
> tolerate real hardware behaviour, without causing the test to fail. 
> There is nothing useful from having tests failing in cases we cant/wont
> do anything to fix.
> 
> As for the Intel behaviour, Is this documented, or does it have an
> erratum reference?

Neither - I am waiting for them to confirm whether this is an erratum,
or whether (for some strange reason) this is intended behavior.

> For the AMD behaviour, some more though is going to be required.

One option I've been considering was to dynamically select between
xtf_failure() and xtf_warning() for the problematic checks.

>> +void probe_fstp(bool force)
>> +{
>> +    const uint8_t *fstp_offs;
>> +    uint32_t flt;
>> +    struct x87_env fenv;
>> +
>> +    fenv.cw = 0x35f; /* unmask PE */
>> +    asm volatile ( "fninit;"
>> +                   "fldcw %[cw];"
>> +                   "fldpi;"
>> +                   "mov $1f, %[offs];"
> 
> You can have the compiler do all of this by using a named label, and using
> 
> extern char fstp_offs[] asm("$label");

Is there any benefit to this? To me this looks uglier than what I
have now.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2017-03-14 11:54 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-13 10:56 [PATCH 0/4] x86emul: FPU handling corrections Jan Beulich
2017-03-13 11:03 ` [PATCH 1/4] x86emul: fold exit paths Jan Beulich
2017-03-13 11:26   ` Andrew Cooper
2017-03-13 11:03 ` [PATCH 2/4] x86emul: centralize put_fpu() invocations Jan Beulich
2017-03-13 11:55   ` Andrew Cooper
2017-03-13 12:31     ` Jan Beulich
2017-03-13 11:05 ` [PATCH 3/4] x86emul: correct handling of FPU insns faulting on memory write Jan Beulich
2017-03-13 14:03   ` Andrew Cooper
2017-03-14  9:15   ` Tian, Kevin
2017-03-13 11:05 ` [PATCH 4/4] x86emul: correct FPU code/data pointers and opcode handling Jan Beulich
2017-03-14  9:21   ` Paul Durrant
2017-03-14 10:56   ` Andrew Cooper
2017-03-14 11:04     ` Jan Beulich
2017-03-13 11:07 ` [PATCH][XTF] add FPU/SIMD register state test Jan Beulich
2017-03-14 11:36   ` Andrew Cooper
2017-03-14 11:54     ` Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.