All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] x86: further insn emulator improvements
@ 2016-09-08 13:36 Jan Beulich
  2016-09-08 13:42 ` [PATCH 1/5] x86emul: support UMIP Jan Beulich
                   ` (4 more replies)
  0 siblings, 5 replies; 20+ messages in thread
From: Jan Beulich @ 2016-09-08 13:36 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

These are really independent of

and I prefer them to be a separate series, but won't apply without that
one in place. The final two I decided to pick up from Mihai, as it seemed
natural for me to do the rebasing on top of the major earlier changes,
and as I'd like to get the original issue (certain Windows drivers using
these insns) dealt with in 4.8.

1: support UMIP
2: consolidate segment register handling
3: support RTM instructions
4: add support for {,v}movq xmm,xmm/m64
5: add support for {,v}movd {,x}mm,r/m32 and {,v}movq {,x}mm,r/m64

Signed-off-by: Jan Beulich <jbeulich@suse.com>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/5] x86emul: support UMIP
  2016-09-08 13:36 [PATCH 0/5] x86: further insn emulator improvements Jan Beulich
@ 2016-09-08 13:42 ` Jan Beulich
  2016-09-30 10:32   ` Andrew Cooper
  2016-09-08 13:43 ` [PATCH 2/5] x86emul: consolidate segment register handling Jan Beulich
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2016-09-08 13:42 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 4577 bytes --]

To make this complete, also add support for SLDT and STR. Note that by
just looking at the guest CR4 bit, this is independent of actually
making available the UMIP feature to guests.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -182,7 +182,7 @@ static const opcode_desc_t opcode_table[
 
 static const opcode_desc_t twobyte_table[256] = {
     /* 0x00 - 0x07 */
-    SrcMem16|ModRM, ImplicitOps|ModRM, ModRM, ModRM,
+    ModRM, ImplicitOps|ModRM, ModRM, ModRM,
     0, ImplicitOps, ImplicitOps, ImplicitOps,
     /* 0x08 - 0x0F */
     ImplicitOps, ImplicitOps, 0, ImplicitOps,
@@ -421,6 +421,7 @@ typedef union {
 /* Control register flags. */
 #define CR0_PE    (1<<0)
 #define CR4_TSD   (1<<2)
+#define CR4_UMIP  (1<<11)
 
 /* EFLAGS bit definitions. */
 #define EFLG_VIP  (1<<20)
@@ -1484,6 +1485,17 @@ static bool is_aligned(enum x86_segment
     return !((reg.base + offs) & (size - 1));
 }
 
+static bool is_umip(struct x86_emulate_ctxt *ctxt,
+                    const struct x86_emulate_ops *ops)
+{
+    unsigned long cr4;
+
+    /* Intentionally not using mode_ring0() here to avoid its fail_if(). */
+    return get_cpl(ctxt, ops) > 0 &&
+           ops->read_cr && ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&
+           (cr4 & CR4_UMIP);
+}
+
 /* Inject a software interrupt/exception, emulating if needed. */
 static int inject_swint(enum x86_swint_type type,
                         uint8_t vector, uint8_t insn_len,
@@ -2051,10 +2063,20 @@ x86_decode(
             break;
 
         case ext_0f:
-        case ext_0f3a:
-        case ext_8f08:
-        case ext_8f09:
-        case ext_8f0a:
+            switch ( b )
+            {
+            case 0x00: /* Grp6 */
+                switch ( modrm_reg & 6 )
+                {
+                case 0:
+                    d |= DstMem | SrcImplicit | Mov;
+                    break;
+                case 2: case 4:
+                    d |= SrcMem16;
+                    break;
+                }
+                break;
+            }
             break;
 
         case ext_0f38:
@@ -2070,6 +2092,12 @@ x86_decode(
             }
             break;
 
+        case ext_0f3a:
+        case ext_8f08:
+        case ext_8f09:
+        case ext_8f0a:
+            break;
+
         default:
             ASSERT_UNREACHABLE();
         }
@@ -4177,14 +4205,31 @@ x86_emulate(
         }
         break;
 
-    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */
-        fail_if((modrm_reg & 6) != 2);
+    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */ {
+        enum x86_segment seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
+
+        fail_if(modrm_reg & 4);
         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
-        generate_exception_if(!mode_ring0(), EXC_GP, 0);
-        if ( (rc = load_seg((modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr,
-                            src.val, 0, NULL, ctxt, ops)) != 0 )
-            goto done;
+        if ( modrm_reg & 2 )
+        {
+            generate_exception_if(!mode_ring0(), EXC_GP, 0);
+            if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
+                goto done;
+        }
+        else
+        {
+            struct segment_register reg;
+
+            generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
+            fail_if(!ops->read_segment);
+            if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
+                goto done;
+            dst.val = reg.sel;
+            if ( dst.type == OP_MEM )
+                dst.bytes = 2;
+        }
         break;
+    }
 
     case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ {
         struct segment_register reg;
@@ -4282,6 +4327,7 @@ x86_emulate(
         case 0: /* sgdt */
         case 1: /* sidt */
             generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+            generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
             fail_if(ops->read_segment == NULL);
             if ( (rc = ops->read_segment((modrm_reg & 1) ?
                                          x86_seg_idtr : x86_seg_gdtr,
@@ -4316,6 +4362,7 @@ x86_emulate(
                 goto done;
             break;
         case 4: /* smsw */
+            generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
             ea.bytes = (ea.type == OP_MEM) ? 2 : op_bytes;
             dst = ea;
             fail_if(ops->read_cr == NULL);



[-- Attachment #2: x86emul-UMIP.patch --]
[-- Type: text/plain, Size: 4598 bytes --]

x86emul: support UMIP

To make this complete, also add support for SLDT and STR. Note that by
just looking at the guest CR4 bit, this is independent of actually
making available the UMIP feature to guests.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -182,7 +182,7 @@ static const opcode_desc_t opcode_table[
 
 static const opcode_desc_t twobyte_table[256] = {
     /* 0x00 - 0x07 */
-    SrcMem16|ModRM, ImplicitOps|ModRM, ModRM, ModRM,
+    ModRM, ImplicitOps|ModRM, ModRM, ModRM,
     0, ImplicitOps, ImplicitOps, ImplicitOps,
     /* 0x08 - 0x0F */
     ImplicitOps, ImplicitOps, 0, ImplicitOps,
@@ -421,6 +421,7 @@ typedef union {
 /* Control register flags. */
 #define CR0_PE    (1<<0)
 #define CR4_TSD   (1<<2)
+#define CR4_UMIP  (1<<11)
 
 /* EFLAGS bit definitions. */
 #define EFLG_VIP  (1<<20)
@@ -1484,6 +1485,17 @@ static bool is_aligned(enum x86_segment
     return !((reg.base + offs) & (size - 1));
 }
 
+static bool is_umip(struct x86_emulate_ctxt *ctxt,
+                    const struct x86_emulate_ops *ops)
+{
+    unsigned long cr4;
+
+    /* Intentionally not using mode_ring0() here to avoid its fail_if(). */
+    return get_cpl(ctxt, ops) > 0 &&
+           ops->read_cr && ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&
+           (cr4 & CR4_UMIP);
+}
+
 /* Inject a software interrupt/exception, emulating if needed. */
 static int inject_swint(enum x86_swint_type type,
                         uint8_t vector, uint8_t insn_len,
@@ -2051,10 +2063,20 @@ x86_decode(
             break;
 
         case ext_0f:
-        case ext_0f3a:
-        case ext_8f08:
-        case ext_8f09:
-        case ext_8f0a:
+            switch ( b )
+            {
+            case 0x00: /* Grp6 */
+                switch ( modrm_reg & 6 )
+                {
+                case 0:
+                    d |= DstMem | SrcImplicit | Mov;
+                    break;
+                case 2: case 4:
+                    d |= SrcMem16;
+                    break;
+                }
+                break;
+            }
             break;
 
         case ext_0f38:
@@ -2070,6 +2092,12 @@ x86_decode(
             }
             break;
 
+        case ext_0f3a:
+        case ext_8f08:
+        case ext_8f09:
+        case ext_8f0a:
+            break;
+
         default:
             ASSERT_UNREACHABLE();
         }
@@ -4177,14 +4205,31 @@ x86_emulate(
         }
         break;
 
-    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */
-        fail_if((modrm_reg & 6) != 2);
+    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */ {
+        enum x86_segment seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
+
+        fail_if(modrm_reg & 4);
         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
-        generate_exception_if(!mode_ring0(), EXC_GP, 0);
-        if ( (rc = load_seg((modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr,
-                            src.val, 0, NULL, ctxt, ops)) != 0 )
-            goto done;
+        if ( modrm_reg & 2 )
+        {
+            generate_exception_if(!mode_ring0(), EXC_GP, 0);
+            if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
+                goto done;
+        }
+        else
+        {
+            struct segment_register reg;
+
+            generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
+            fail_if(!ops->read_segment);
+            if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
+                goto done;
+            dst.val = reg.sel;
+            if ( dst.type == OP_MEM )
+                dst.bytes = 2;
+        }
         break;
+    }
 
     case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ {
         struct segment_register reg;
@@ -4282,6 +4327,7 @@ x86_emulate(
         case 0: /* sgdt */
         case 1: /* sidt */
             generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+            generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
             fail_if(ops->read_segment == NULL);
             if ( (rc = ops->read_segment((modrm_reg & 1) ?
                                          x86_seg_idtr : x86_seg_gdtr,
@@ -4316,6 +4362,7 @@ x86_emulate(
                 goto done;
             break;
         case 4: /* smsw */
+            generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
             ea.bytes = (ea.type == OP_MEM) ? 2 : op_bytes;
             dst = ea;
             fail_if(ops->read_cr == NULL);

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 2/5] x86emul: consolidate segment register handling
  2016-09-08 13:36 [PATCH 0/5] x86: further insn emulator improvements Jan Beulich
  2016-09-08 13:42 ` [PATCH 1/5] x86emul: support UMIP Jan Beulich
@ 2016-09-08 13:43 ` Jan Beulich
  2016-09-30 10:39   ` Andrew Cooper
  2016-09-08 13:44 ` [PATCH 3/5] x86emul: support RTM instructions Jan Beulich
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2016-09-08 13:43 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 12240 bytes --]

Use a single set of variables throughout the huge switch() statement,
allowing to funnel SLDT/STR into the mov-from-sreg code path.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2494,7 +2494,8 @@ x86_emulate(
 
     switch ( ctxt->opcode )
     {
-        struct segment_register cs;
+        enum x86_segment seg;
+        struct segment_register cs, sreg;
 
     case 0x00 ... 0x05: add: /* add */
         emulate_2op_SrcV("add", src, dst, _regs.eflags);
@@ -2530,22 +2531,20 @@ x86_emulate(
         dst.type = OP_NONE;
         break;
 
-    case 0x06: /* push %%es */ {
-        struct segment_register reg;
+    case 0x06: /* push %%es */
         src.val = x86_seg_es;
     push_seg:
         generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         fail_if(ops->read_segment == NULL);
-        if ( (rc = ops->read_segment(src.val, &reg, ctxt)) != 0 )
+        if ( (rc = ops->read_segment(src.val, &sreg, ctxt)) != 0 )
             goto done;
         /* 64-bit mode: PUSH defaults to a 64-bit operand. */
         if ( mode_64bit() && (op_bytes == 4) )
             op_bytes = 8;
         if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
-                              &reg.sel, op_bytes, ctxt)) != 0 )
+                              &sreg.sel, op_bytes, ctxt)) != 0 )
             goto done;
         break;
-    }
 
     case 0x07: /* pop %%es */
         src.val = x86_seg_es;
@@ -2861,21 +2860,20 @@ x86_emulate(
         dst.val = src.val;
         break;
 
-    case 0x8c: /* mov Sreg,r/m */ {
-        struct segment_register reg;
-        enum x86_segment seg = decode_segment(modrm_reg);
+    case 0x8c: /* mov Sreg,r/m */
+        seg = decode_segment(modrm_reg);
         generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
+    store_seg:
         fail_if(ops->read_segment == NULL);
-        if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
+        if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != 0 )
             goto done;
-        dst.val = reg.sel;
+        dst.val = sreg.sel;
         if ( dst.type == OP_MEM )
             dst.bytes = 2;
         break;
-    }
 
-    case 0x8e: /* mov r/m,Sreg */ {
-        enum x86_segment seg = decode_segment(modrm_reg);
+    case 0x8e: /* mov r/m,Sreg */
+        seg = decode_segment(modrm_reg);
         generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
         generate_exception_if(seg == x86_seg_cs, EXC_UD, -1);
         if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
@@ -2884,7 +2882,6 @@ x86_emulate(
             ctxt->retire.flags.mov_ss = 1;
         dst.type = OP_NONE;
         break;
-    }
 
     case 0x8d: /* lea */
         generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
@@ -2941,17 +2938,15 @@ x86_emulate(
         }
         break;
 
-    case 0x9a: /* call (far, absolute) */ {
-        struct segment_register reg;
-
+    case 0x9a: /* call (far, absolute) */
         ASSERT(!mode_64bit());
         fail_if(ops->read_segment == NULL);
 
-        if ( (rc = ops->read_segment(x86_seg_cs, &reg, ctxt)) ||
+        if ( (rc = ops->read_segment(x86_seg_cs, &sreg, ctxt)) ||
              (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) ||
              (validate_far_branch(&cs, imm1),
               rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
-                              &reg.sel, op_bytes, ctxt)) ||
+                              &sreg.sel, op_bytes, ctxt)) ||
              (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
                               &_regs.eip, op_bytes, ctxt)) ||
              (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) )
@@ -2959,7 +2954,6 @@ x86_emulate(
 
         _regs.eip = imm1;
         break;
-    }
 
     case 0x9b:  /* wait/fwait */
         host_and_vcpu_must_have(fpu);
@@ -4178,13 +4172,12 @@ x86_emulate(
 
             if ( (modrm_reg & 7) == 3 ) /* call */
             {
-                struct segment_register reg;
                 fail_if(ops->read_segment == NULL);
-                if ( (rc = ops->read_segment(x86_seg_cs, &reg, ctxt)) ||
+                if ( (rc = ops->read_segment(x86_seg_cs, &sreg, ctxt)) ||
                      (rc = load_seg(x86_seg_cs, sel, 0, &cs, ctxt, ops)) ||
                      (validate_far_branch(&cs, src.val),
                       rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
-                                      &reg.sel, op_bytes, ctxt)) ||
+                                      &sreg.sel, op_bytes, ctxt)) ||
                      (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
                                       &_regs.eip, op_bytes, ctxt)) ||
                      (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) )
@@ -4205,34 +4198,24 @@ x86_emulate(
         }
         break;
 
-    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */ {
-        enum x86_segment seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
-
+    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */
+        seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
         fail_if(modrm_reg & 4);
         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
-        if ( modrm_reg & 2 )
+        if ( modrm_reg & 2 ) /* lldt / ltr */
         {
             generate_exception_if(!mode_ring0(), EXC_GP, 0);
             if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
                 goto done;
         }
-        else
+        else /* sldt / str */
         {
-            struct segment_register reg;
-
             generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
-            fail_if(!ops->read_segment);
-            if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
-                goto done;
-            dst.val = reg.sel;
-            if ( dst.type == OP_MEM )
-                dst.bytes = 2;
+            goto store_seg;
         }
         break;
-    }
 
     case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ {
-        struct segment_register reg;
         unsigned long base, limit, cr0, cr0w;
 
         switch( modrm )
@@ -4322,6 +4305,8 @@ x86_emulate(
         }
         }
 
+        seg = (modrm_reg & 1) ? x86_seg_idtr : x86_seg_gdtr;
+
         switch ( modrm_reg & 7 )
         {
         case 0: /* sgdt */
@@ -4329,16 +4314,14 @@ x86_emulate(
             generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
             generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
             fail_if(ops->read_segment == NULL);
-            if ( (rc = ops->read_segment((modrm_reg & 1) ?
-                                         x86_seg_idtr : x86_seg_gdtr,
-                                         &reg, ctxt)) )
+            if ( (rc = ops->read_segment(seg, &sreg, ctxt)) )
                 goto done;
             if ( op_bytes == 2 )
-                reg.base &= 0xffffff;
+                sreg.base &= 0xffffff;
             if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0,
-                                  &reg.limit, 2, ctxt)) ||
+                                  &sreg.limit, 2, ctxt)) ||
                  (rc = ops->write(ea.mem.seg, ea.mem.off+2,
-                                  &reg.base, mode_64bit() ? 8 : 4, ctxt)) )
+                                  &sreg.base, mode_64bit() ? 8 : 4, ctxt)) )
                 goto done;
             break;
         case 2: /* lgdt */
@@ -4346,19 +4329,17 @@ x86_emulate(
             generate_exception_if(!mode_ring0(), EXC_GP, 0);
             generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
             fail_if(ops->write_segment == NULL);
-            memset(&reg, 0, sizeof(reg));
+            memset(&sreg, 0, sizeof(sreg));
             if ( (rc = read_ulong(ea.mem.seg, ea.mem.off+0,
                                   &limit, 2, ctxt, ops)) ||
                  (rc = read_ulong(ea.mem.seg, ea.mem.off+2,
                                   &base, mode_64bit() ? 8 : 4, ctxt, ops)) )
                 goto done;
-            reg.base = base;
-            reg.limit = limit;
+            sreg.base = base;
+            sreg.limit = limit;
             if ( op_bytes == 2 )
-                reg.base &= 0xffffff;
-            if ( (rc = ops->write_segment((modrm_reg & 1) ?
-                                          x86_seg_idtr : x86_seg_gdtr,
-                                          &reg, ctxt)) )
+                sreg.base &= 0xffffff;
+            if ( (rc = ops->write_segment(seg, &sreg, ctxt)) )
                 goto done;
             break;
         case 4: /* smsw */
@@ -4401,7 +4382,6 @@ x86_emulate(
 
     case X86EMUL_OPC(0x0f, 0x05): /* syscall */ {
         uint64_t msr_content;
-        struct segment_register cs, ss;
 
         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
 
@@ -4415,11 +4395,11 @@ x86_emulate(
             goto done;
 
         cs.sel = (msr_content >> 32) & ~3; /* SELECTOR_RPL_MASK */
-        ss.sel = cs.sel + 8;
+        sreg.sel = cs.sel + 8;
 
-        cs.base = ss.base = 0; /* flat segment */
-        cs.limit = ss.limit = ~0u;  /* 4GB limit */
-        ss.attr.bytes = 0xc93; /* G+DB+P+S+Data */
+        cs.base = sreg.base = 0; /* flat segment */
+        cs.limit = sreg.limit = ~0u;  /* 4GB limit */
+        sreg.attr.bytes = 0xc93; /* G+DB+P+S+Data */
 
 #ifdef __x86_64__
         rc = in_longmode(ctxt, ops);
@@ -4453,7 +4433,7 @@ x86_emulate(
 
         fail_if(ops->write_segment == NULL);
         if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) ||
-             (rc = ops->write_segment(x86_seg_ss, &ss, ctxt)) )
+             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) )
             goto done;
 
         break;
@@ -4672,7 +4652,6 @@ x86_emulate(
 
     case X86EMUL_OPC(0x0f, 0x34): /* sysenter */ {
         uint64_t msr_content;
-        struct segment_register cs, ss;
         int lm;
 
         generate_exception_if(mode_ring0(), EXC_GP, 0);
@@ -4697,14 +4676,14 @@ x86_emulate(
         cs.attr.bytes = lm ? 0xa9b  /* L+DB+P+S+Code */
                            : 0xc9b; /* G+DB+P+S+Code */
 
-        ss.sel = cs.sel + 8;
-        ss.base = 0;   /* flat segment */
-        ss.limit = ~0u;  /* 4GB limit */
-        ss.attr.bytes = 0xc93; /* G+DB+P+S+Data */
+        sreg.sel = cs.sel + 8;
+        sreg.base = 0;   /* flat segment */
+        sreg.limit = ~0u;  /* 4GB limit */
+        sreg.attr.bytes = 0xc93; /* G+DB+P+S+Data */
 
         fail_if(ops->write_segment == NULL);
         if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != 0 ||
-             (rc = ops->write_segment(x86_seg_ss, &ss, ctxt)) != 0 )
+             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) != 0 )
             goto done;
 
         if ( (rc = ops->read_msr(MSR_SYSENTER_EIP, &msr_content, ctxt)) != 0 )
@@ -4720,7 +4699,6 @@ x86_emulate(
 
     case X86EMUL_OPC(0x0f, 0x35): /* sysexit */ {
         uint64_t msr_content;
-        struct segment_register cs, ss;
         bool_t user64 = !!(rex_prefix & REX_W);
 
         generate_exception_if(!mode_ring0(), EXC_GP, 0);
@@ -4742,14 +4720,14 @@ x86_emulate(
         cs.attr.bytes = user64 ? 0xafb  /* L+DB+P+DPL3+S+Code */
                                : 0xcfb; /* G+DB+P+DPL3+S+Code */
 
-        ss.sel = cs.sel + 8;
-        ss.base = 0;   /* flat segment */
-        ss.limit = ~0u;  /* 4GB limit */
-        ss.attr.bytes = 0xcf3; /* G+DB+P+DPL3+S+Data */
+        sreg.sel = cs.sel + 8;
+        sreg.base = 0;   /* flat segment */
+        sreg.limit = ~0u;  /* 4GB limit */
+        sreg.attr.bytes = 0xcf3; /* G+DB+P+DPL3+S+Data */
 
         fail_if(ops->write_segment == NULL);
         if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != 0 ||
-             (rc = ops->write_segment(x86_seg_ss, &ss, ctxt)) != 0 )
+             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) != 0 )
             goto done;
 
         _regs.eip = user64 ? _regs.edx : (uint32_t)_regs.edx;



[-- Attachment #2: x86emul-consolidate-sreg.patch --]
[-- Type: text/plain, Size: 12286 bytes --]

x86emul: consolidate segment register handling

Use a single set of variables throughout the huge switch() statement,
allowing to funnel SLDT/STR into the mov-from-sreg code path.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2494,7 +2494,8 @@ x86_emulate(
 
     switch ( ctxt->opcode )
     {
-        struct segment_register cs;
+        enum x86_segment seg;
+        struct segment_register cs, sreg;
 
     case 0x00 ... 0x05: add: /* add */
         emulate_2op_SrcV("add", src, dst, _regs.eflags);
@@ -2530,22 +2531,20 @@ x86_emulate(
         dst.type = OP_NONE;
         break;
 
-    case 0x06: /* push %%es */ {
-        struct segment_register reg;
+    case 0x06: /* push %%es */
         src.val = x86_seg_es;
     push_seg:
         generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
         fail_if(ops->read_segment == NULL);
-        if ( (rc = ops->read_segment(src.val, &reg, ctxt)) != 0 )
+        if ( (rc = ops->read_segment(src.val, &sreg, ctxt)) != 0 )
             goto done;
         /* 64-bit mode: PUSH defaults to a 64-bit operand. */
         if ( mode_64bit() && (op_bytes == 4) )
             op_bytes = 8;
         if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
-                              &reg.sel, op_bytes, ctxt)) != 0 )
+                              &sreg.sel, op_bytes, ctxt)) != 0 )
             goto done;
         break;
-    }
 
     case 0x07: /* pop %%es */
         src.val = x86_seg_es;
@@ -2861,21 +2860,20 @@ x86_emulate(
         dst.val = src.val;
         break;
 
-    case 0x8c: /* mov Sreg,r/m */ {
-        struct segment_register reg;
-        enum x86_segment seg = decode_segment(modrm_reg);
+    case 0x8c: /* mov Sreg,r/m */
+        seg = decode_segment(modrm_reg);
         generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
+    store_seg:
         fail_if(ops->read_segment == NULL);
-        if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
+        if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != 0 )
             goto done;
-        dst.val = reg.sel;
+        dst.val = sreg.sel;
         if ( dst.type == OP_MEM )
             dst.bytes = 2;
         break;
-    }
 
-    case 0x8e: /* mov r/m,Sreg */ {
-        enum x86_segment seg = decode_segment(modrm_reg);
+    case 0x8e: /* mov r/m,Sreg */
+        seg = decode_segment(modrm_reg);
         generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
         generate_exception_if(seg == x86_seg_cs, EXC_UD, -1);
         if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
@@ -2884,7 +2882,6 @@ x86_emulate(
             ctxt->retire.flags.mov_ss = 1;
         dst.type = OP_NONE;
         break;
-    }
 
     case 0x8d: /* lea */
         generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
@@ -2941,17 +2938,15 @@ x86_emulate(
         }
         break;
 
-    case 0x9a: /* call (far, absolute) */ {
-        struct segment_register reg;
-
+    case 0x9a: /* call (far, absolute) */
         ASSERT(!mode_64bit());
         fail_if(ops->read_segment == NULL);
 
-        if ( (rc = ops->read_segment(x86_seg_cs, &reg, ctxt)) ||
+        if ( (rc = ops->read_segment(x86_seg_cs, &sreg, ctxt)) ||
              (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) ||
              (validate_far_branch(&cs, imm1),
               rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
-                              &reg.sel, op_bytes, ctxt)) ||
+                              &sreg.sel, op_bytes, ctxt)) ||
              (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
                               &_regs.eip, op_bytes, ctxt)) ||
              (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) )
@@ -2959,7 +2954,6 @@ x86_emulate(
 
         _regs.eip = imm1;
         break;
-    }
 
     case 0x9b:  /* wait/fwait */
         host_and_vcpu_must_have(fpu);
@@ -4178,13 +4172,12 @@ x86_emulate(
 
             if ( (modrm_reg & 7) == 3 ) /* call */
             {
-                struct segment_register reg;
                 fail_if(ops->read_segment == NULL);
-                if ( (rc = ops->read_segment(x86_seg_cs, &reg, ctxt)) ||
+                if ( (rc = ops->read_segment(x86_seg_cs, &sreg, ctxt)) ||
                      (rc = load_seg(x86_seg_cs, sel, 0, &cs, ctxt, ops)) ||
                      (validate_far_branch(&cs, src.val),
                       rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
-                                      &reg.sel, op_bytes, ctxt)) ||
+                                      &sreg.sel, op_bytes, ctxt)) ||
                      (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
                                       &_regs.eip, op_bytes, ctxt)) ||
                      (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) )
@@ -4205,34 +4198,24 @@ x86_emulate(
         }
         break;
 
-    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */ {
-        enum x86_segment seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
-
+    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */
+        seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
         fail_if(modrm_reg & 4);
         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
-        if ( modrm_reg & 2 )
+        if ( modrm_reg & 2 ) /* lldt / ltr */
         {
             generate_exception_if(!mode_ring0(), EXC_GP, 0);
             if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
                 goto done;
         }
-        else
+        else /* sldt / str */
         {
-            struct segment_register reg;
-
             generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
-            fail_if(!ops->read_segment);
-            if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
-                goto done;
-            dst.val = reg.sel;
-            if ( dst.type == OP_MEM )
-                dst.bytes = 2;
+            goto store_seg;
         }
         break;
-    }
 
     case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ {
-        struct segment_register reg;
         unsigned long base, limit, cr0, cr0w;
 
         switch( modrm )
@@ -4322,6 +4305,8 @@ x86_emulate(
         }
         }
 
+        seg = (modrm_reg & 1) ? x86_seg_idtr : x86_seg_gdtr;
+
         switch ( modrm_reg & 7 )
         {
         case 0: /* sgdt */
@@ -4329,16 +4314,14 @@ x86_emulate(
             generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
             generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
             fail_if(ops->read_segment == NULL);
-            if ( (rc = ops->read_segment((modrm_reg & 1) ?
-                                         x86_seg_idtr : x86_seg_gdtr,
-                                         &reg, ctxt)) )
+            if ( (rc = ops->read_segment(seg, &sreg, ctxt)) )
                 goto done;
             if ( op_bytes == 2 )
-                reg.base &= 0xffffff;
+                sreg.base &= 0xffffff;
             if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0,
-                                  &reg.limit, 2, ctxt)) ||
+                                  &sreg.limit, 2, ctxt)) ||
                  (rc = ops->write(ea.mem.seg, ea.mem.off+2,
-                                  &reg.base, mode_64bit() ? 8 : 4, ctxt)) )
+                                  &sreg.base, mode_64bit() ? 8 : 4, ctxt)) )
                 goto done;
             break;
         case 2: /* lgdt */
@@ -4346,19 +4329,17 @@ x86_emulate(
             generate_exception_if(!mode_ring0(), EXC_GP, 0);
             generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
             fail_if(ops->write_segment == NULL);
-            memset(&reg, 0, sizeof(reg));
+            memset(&sreg, 0, sizeof(sreg));
             if ( (rc = read_ulong(ea.mem.seg, ea.mem.off+0,
                                   &limit, 2, ctxt, ops)) ||
                  (rc = read_ulong(ea.mem.seg, ea.mem.off+2,
                                   &base, mode_64bit() ? 8 : 4, ctxt, ops)) )
                 goto done;
-            reg.base = base;
-            reg.limit = limit;
+            sreg.base = base;
+            sreg.limit = limit;
             if ( op_bytes == 2 )
-                reg.base &= 0xffffff;
-            if ( (rc = ops->write_segment((modrm_reg & 1) ?
-                                          x86_seg_idtr : x86_seg_gdtr,
-                                          &reg, ctxt)) )
+                sreg.base &= 0xffffff;
+            if ( (rc = ops->write_segment(seg, &sreg, ctxt)) )
                 goto done;
             break;
         case 4: /* smsw */
@@ -4401,7 +4382,6 @@ x86_emulate(
 
     case X86EMUL_OPC(0x0f, 0x05): /* syscall */ {
         uint64_t msr_content;
-        struct segment_register cs, ss;
 
         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
 
@@ -4415,11 +4395,11 @@ x86_emulate(
             goto done;
 
         cs.sel = (msr_content >> 32) & ~3; /* SELECTOR_RPL_MASK */
-        ss.sel = cs.sel + 8;
+        sreg.sel = cs.sel + 8;
 
-        cs.base = ss.base = 0; /* flat segment */
-        cs.limit = ss.limit = ~0u;  /* 4GB limit */
-        ss.attr.bytes = 0xc93; /* G+DB+P+S+Data */
+        cs.base = sreg.base = 0; /* flat segment */
+        cs.limit = sreg.limit = ~0u;  /* 4GB limit */
+        sreg.attr.bytes = 0xc93; /* G+DB+P+S+Data */
 
 #ifdef __x86_64__
         rc = in_longmode(ctxt, ops);
@@ -4453,7 +4433,7 @@ x86_emulate(
 
         fail_if(ops->write_segment == NULL);
         if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) ||
-             (rc = ops->write_segment(x86_seg_ss, &ss, ctxt)) )
+             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) )
             goto done;
 
         break;
@@ -4672,7 +4652,6 @@ x86_emulate(
 
     case X86EMUL_OPC(0x0f, 0x34): /* sysenter */ {
         uint64_t msr_content;
-        struct segment_register cs, ss;
         int lm;
 
         generate_exception_if(mode_ring0(), EXC_GP, 0);
@@ -4697,14 +4676,14 @@ x86_emulate(
         cs.attr.bytes = lm ? 0xa9b  /* L+DB+P+S+Code */
                            : 0xc9b; /* G+DB+P+S+Code */
 
-        ss.sel = cs.sel + 8;
-        ss.base = 0;   /* flat segment */
-        ss.limit = ~0u;  /* 4GB limit */
-        ss.attr.bytes = 0xc93; /* G+DB+P+S+Data */
+        sreg.sel = cs.sel + 8;
+        sreg.base = 0;   /* flat segment */
+        sreg.limit = ~0u;  /* 4GB limit */
+        sreg.attr.bytes = 0xc93; /* G+DB+P+S+Data */
 
         fail_if(ops->write_segment == NULL);
         if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != 0 ||
-             (rc = ops->write_segment(x86_seg_ss, &ss, ctxt)) != 0 )
+             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) != 0 )
             goto done;
 
         if ( (rc = ops->read_msr(MSR_SYSENTER_EIP, &msr_content, ctxt)) != 0 )
@@ -4720,7 +4699,6 @@ x86_emulate(
 
     case X86EMUL_OPC(0x0f, 0x35): /* sysexit */ {
         uint64_t msr_content;
-        struct segment_register cs, ss;
         bool_t user64 = !!(rex_prefix & REX_W);
 
         generate_exception_if(!mode_ring0(), EXC_GP, 0);
@@ -4742,14 +4720,14 @@ x86_emulate(
         cs.attr.bytes = user64 ? 0xafb  /* L+DB+P+DPL3+S+Code */
                                : 0xcfb; /* G+DB+P+DPL3+S+Code */
 
-        ss.sel = cs.sel + 8;
-        ss.base = 0;   /* flat segment */
-        ss.limit = ~0u;  /* 4GB limit */
-        ss.attr.bytes = 0xcf3; /* G+DB+P+DPL3+S+Data */
+        sreg.sel = cs.sel + 8;
+        sreg.base = 0;   /* flat segment */
+        sreg.limit = ~0u;  /* 4GB limit */
+        sreg.attr.bytes = 0xcf3; /* G+DB+P+DPL3+S+Data */
 
         fail_if(ops->write_segment == NULL);
         if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != 0 ||
-             (rc = ops->write_segment(x86_seg_ss, &ss, ctxt)) != 0 )
+             (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) != 0 )
             goto done;
 
         _regs.eip = user64 ? _regs.edx : (uint32_t)_regs.edx;

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 3/5] x86emul: support RTM instructions
  2016-09-08 13:36 [PATCH 0/5] x86: further insn emulator improvements Jan Beulich
  2016-09-08 13:42 ` [PATCH 1/5] x86emul: support UMIP Jan Beulich
  2016-09-08 13:43 ` [PATCH 2/5] x86emul: consolidate segment register handling Jan Beulich
@ 2016-09-08 13:44 ` Jan Beulich
  2016-09-30 12:37   ` Andrew Cooper
  2016-09-08 13:45 ` [PATCH 4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64 Jan Beulich
  2016-09-08 13:46 ` [PATCH 5/5] x86/emulate: add support for {, v}movd {, x}mm, r/m32 and {, v}movq {, x}mm, r/m64 Jan Beulich
  4 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2016-09-08 13:44 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper

[-- Attachment #1: Type: text/plain, Size: 2165 bytes --]

Minimal emulation: XBEGIN aborts right away, hence
- XABORT is just a no-op,
- XEND always raises #GP,
- XTEST always signals neither RTM nor HLE are active.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1172,6 +1172,8 @@ static bool_t vcpu_has(
 #define vcpu_has_clflush() vcpu_has(       1, EDX, 19, ctxt, ops)
 #define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_bmi1()  vcpu_has(0x00000007, EBX,  3, ctxt, ops)
+#define vcpu_has_hle()   vcpu_has(0x00000007, EBX,  4, ctxt, ops)
+#define vcpu_has_rtm()   vcpu_has(0x00000007, EBX, 11, ctxt, ops)
 
 #define vcpu_must_have(leaf, reg, bit) \
     generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
@@ -2852,7 +2854,18 @@ x86_emulate(
         lock_prefix = 1;
         break;
 
-    case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
+    case 0xc6: /* Grp11: mov / xabort */
+    case 0xc7: /* Grp11: mov / xbegin */
+        if ( modrm == 0xf8 && vcpu_has_rtm() )
+        {
+            if ( b & 1 )
+            {
+                jmp_rel((int32_t)src.val);
+                _regs.eax = 0;
+            }
+            dst.type = OP_NONE;
+            break;
+        }
         generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
     case 0x88 ... 0x8b: /* mov */
     case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
@@ -4246,6 +4259,17 @@ x86_emulate(
                 goto done;
             goto no_writeback;
 
+        case 0xd5: /* xend */
+            generate_exception_if(vcpu_has_rtm() && !vex.pfx, EXC_GP, 0);
+            break;
+
+        case 0xd6: /* xtest */
+            if ( (!vcpu_has_rtm() && !vcpu_has_hle()) || vex.pfx )
+                break;
+            /* Neither HLE nor RTM can be active when we get here. */
+            _regs.eflags |= EFLG_ZF;
+            goto no_writeback;
+
         case 0xdf: /* invlpga */
             generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
             generate_exception_if(!mode_ring0(), EXC_GP, 0);




[-- Attachment #2: x86emul-RTM.patch --]
[-- Type: text/plain, Size: 2196 bytes --]

x86emul: support RTM instructions

Minimal emulation: XBEGIN aborts right away, hence
- XABORT is just a no-op,
- XEND always raises #GP,
- XTEST always signals neither RTM nor HLE are active.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1172,6 +1172,8 @@ static bool_t vcpu_has(
 #define vcpu_has_clflush() vcpu_has(       1, EDX, 19, ctxt, ops)
 #define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_bmi1()  vcpu_has(0x00000007, EBX,  3, ctxt, ops)
+#define vcpu_has_hle()   vcpu_has(0x00000007, EBX,  4, ctxt, ops)
+#define vcpu_has_rtm()   vcpu_has(0x00000007, EBX, 11, ctxt, ops)
 
 #define vcpu_must_have(leaf, reg, bit) \
     generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
@@ -2852,7 +2854,18 @@ x86_emulate(
         lock_prefix = 1;
         break;
 
-    case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
+    case 0xc6: /* Grp11: mov / xabort */
+    case 0xc7: /* Grp11: mov / xbegin */
+        if ( modrm == 0xf8 && vcpu_has_rtm() )
+        {
+            if ( b & 1 )
+            {
+                jmp_rel((int32_t)src.val);
+                _regs.eax = 0;
+            }
+            dst.type = OP_NONE;
+            break;
+        }
         generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
     case 0x88 ... 0x8b: /* mov */
     case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
@@ -4246,6 +4259,17 @@ x86_emulate(
                 goto done;
             goto no_writeback;
 
+        case 0xd5: /* xend */
+            generate_exception_if(vcpu_has_rtm() && !vex.pfx, EXC_GP, 0);
+            break;
+
+        case 0xd6: /* xtest */
+            if ( (!vcpu_has_rtm() && !vcpu_has_hle()) || vex.pfx )
+                break;
+            /* Neither HLE nor RTM can be active when we get here. */
+            _regs.eflags |= EFLG_ZF;
+            goto no_writeback;
+
         case 0xdf: /* invlpga */
             generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
             generate_exception_if(!mode_ring0(), EXC_GP, 0);

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64
  2016-09-08 13:36 [PATCH 0/5] x86: further insn emulator improvements Jan Beulich
                   ` (2 preceding siblings ...)
  2016-09-08 13:44 ` [PATCH 3/5] x86emul: support RTM instructions Jan Beulich
@ 2016-09-08 13:45 ` Jan Beulich
  2016-09-08 13:56   ` Mihai Donțu
  2016-09-30 10:43   ` Andrew Cooper
  2016-09-08 13:46 ` [PATCH 5/5] x86/emulate: add support for {, v}movd {, x}mm, r/m32 and {, v}movq {, x}mm, r/m64 Jan Beulich
  4 siblings, 2 replies; 20+ messages in thread
From: Jan Beulich @ 2016-09-08 13:45 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Mihai Dontu

[-- Attachment #1: Type: text/plain, Size: 3958 bytes --]

From: Mihai Donțu <mdontu@bitdefender.com>

Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Re-base on decoding changes. Address my own review comments (where
    still applicable). #UD when vex.l is set. Various adjustments to
    the test tool change.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -713,6 +713,54 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movq %%xmm0,32(%%ecx)...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movq_to_mem2);
+
+        asm volatile ( "pcmpgtb %%xmm0, %%xmm0\n"
+                       put_insn(movq_to_mem2, "movq %%xmm0, 32(%0)")
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(movq_to_mem2);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem2) ||
+             *((uint64_t *)res + 4) ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovq %%xmm1,32(%%edx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovq_to_mem);
+
+        asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n"
+                       put_insn(vmovq_to_mem, "vmovq %%xmm1, 32(%0)")
+                       :: "d" (NULL) );
+
+        memset(res, 0xdb, 64);
+        set_insn(vmovq_to_mem);
+        regs.ecx = 0;
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem) ||
+             *((uint64_t *)res + 4) ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
     if ( stack_exec && cpu_has_sse2 )
     {
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -269,7 +269,7 @@ static const opcode_desc_t twobyte_table
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     /* 0xD0 - 0xDF */
-    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
+    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM, ModRM,
     ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
     /* 0xE0 - 0xEF */
     ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
@@ -4779,6 +4779,8 @@ x86_emulate(
     case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
     case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu xmm,xmm/m128 */
                                          /* vmovdqu ymm,ymm/m256 */
+    case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
+    case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
     {
         uint8_t *buf = get_stub(stub);
         struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
@@ -4796,7 +4798,8 @@ x86_emulate(
             case vex_66:
             case vex_f3:
                 host_and_vcpu_must_have(sse2);
-                buf[0] = 0x66; /* movdqa */
+                /* Converting movdqu to movdqa here: Our buffer is aligned. */
+                buf[0] = 0x66;
                 get_fpu(X86EMUL_FPU_xmm, &fic);
                 ea.bytes = 16;
                 break;
@@ -4819,6 +4822,11 @@ x86_emulate(
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
         }
+        if ( b == 0xd6 )
+        {
+            generate_exception_if(vex.l, EXC_UD, -1);
+            ea.bytes = 8;
+        }
         if ( ea.type == OP_MEM )
         {
             generate_exception_if((vex.pfx == vex_66) &&



[-- Attachment #2: x86emul-vmovq.patch --]
[-- Type: text/plain, Size: 4117 bytes --]

x86/emulate: add support for {,v}movq xmm,xmm/m64

From: Mihai Donțu <mdontu@bitdefender.com>

Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Re-base on decoding changes. Address my own review comments (where
    still applicable). #UD when vex.l is set. Various adjustments to
    the test tool change.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -713,6 +713,54 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movq %%xmm0,32(%%ecx)...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movq_to_mem2);
+
+        asm volatile ( "pcmpgtb %%xmm0, %%xmm0\n"
+                       put_insn(movq_to_mem2, "movq %%xmm0, 32(%0)")
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(movq_to_mem2);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem2) ||
+             *((uint64_t *)res + 4) ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovq %%xmm1,32(%%edx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovq_to_mem);
+
+        asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n"
+                       put_insn(vmovq_to_mem, "vmovq %%xmm1, 32(%0)")
+                       :: "d" (NULL) );
+
+        memset(res, 0xdb, 64);
+        set_insn(vmovq_to_mem);
+        regs.ecx = 0;
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem) ||
+             *((uint64_t *)res + 4) ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
     if ( stack_exec && cpu_has_sse2 )
     {
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -269,7 +269,7 @@ static const opcode_desc_t twobyte_table
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     /* 0xD0 - 0xDF */
-    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
+    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM, ModRM,
     ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
     /* 0xE0 - 0xEF */
     ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
@@ -4779,6 +4779,8 @@ x86_emulate(
     case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
     case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu xmm,xmm/m128 */
                                          /* vmovdqu ymm,ymm/m256 */
+    case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
+    case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
     {
         uint8_t *buf = get_stub(stub);
         struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
@@ -4796,7 +4798,8 @@ x86_emulate(
             case vex_66:
             case vex_f3:
                 host_and_vcpu_must_have(sse2);
-                buf[0] = 0x66; /* movdqa */
+                /* Converting movdqu to movdqa here: Our buffer is aligned. */
+                buf[0] = 0x66;
                 get_fpu(X86EMUL_FPU_xmm, &fic);
                 ea.bytes = 16;
                 break;
@@ -4819,6 +4822,11 @@ x86_emulate(
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
         }
+        if ( b == 0xd6 )
+        {
+            generate_exception_if(vex.l, EXC_UD, -1);
+            ea.bytes = 8;
+        }
         if ( ea.type == OP_MEM )
         {
             generate_exception_if((vex.pfx == vex_66) &&

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 5/5] x86/emulate: add support for {, v}movd {, x}mm, r/m32 and {, v}movq {, x}mm, r/m64
  2016-09-08 13:36 [PATCH 0/5] x86: further insn emulator improvements Jan Beulich
                   ` (3 preceding siblings ...)
  2016-09-08 13:45 ` [PATCH 4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64 Jan Beulich
@ 2016-09-08 13:46 ` Jan Beulich
  2016-09-30 11:59   ` Andrew Cooper
  4 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2016-09-08 13:46 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Mihai Dontu, zhi.a.wang

[-- Attachment #1: Type: text/plain, Size: 12768 bytes --]

From: Zhi Wang <zhi.a.wang@intel.com>

Found that Windows driver was using a SSE2 instruction MOVD.

Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Re-base on decoding changes. Address Andrew's and my own review
    comments (where still applicable). #UD when vex.l is set. Various
    adjustments to the test tool change.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -973,6 +973,296 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movd %%mm3,32(%%ecx)...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movd_to_mem);
+
+        asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
+                       put_insn(movd_to_mem, "movd %%mm3, 32(%0)")
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(movd_to_mem);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movd_to_mem) ||
+             res[8] + 1 ||
+             memcmp(res, res + 9, 28) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movd %%xmm2,32(%%edx)...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movd_to_mem2);
+
+        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+                       put_insn(movd_to_mem2, "movd %%xmm2, 32(%0)")
+                       :: "d" (NULL) );
+
+        memset(res, 0xdb, 64);
+        set_insn(movd_to_mem2);
+        regs.ecx = 0;
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movd_to_mem2) ||
+             res[8] + 1 ||
+             memcmp(res, res + 9, 28) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovd %%xmm1,32(%%ecx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovd_to_mem);
+
+        asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+                       put_insn(vmovd_to_mem, "vmovd %%xmm1, 32(%0)")
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(vmovd_to_mem);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovd_to_mem) ||
+             res[8] + 1 ||
+             memcmp(res, res + 9, 28) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movd %%mm3,%%ebx...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movd_to_reg);
+
+        /*
+         * Intentionally not specifying "b" as an input (or even output) here
+         * to not keep the compiler from using the variable, which in turn
+         * allows noticing whether the emulator touches the actual register
+         * instead of the regs field.
+         */
+        asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
+                       put_insn(movd_to_reg, "movd %%mm3, %%ebx")
+                       :: );
+
+        set_insn(movd_to_reg);
+#ifdef __x86_64__
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+#else
+        regs.ebx = 0xbdbdbdbdUL;
+#endif
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(movd_to_reg) ||
+             regs.ebx != 0xffffffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movd %%xmm2,%%ebx...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movd_to_reg2);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+                       put_insn(movd_to_reg2, "movd %%xmm2, %%ebx")
+                       :: );
+
+        set_insn(movd_to_reg2);
+#ifdef __x86_64__
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+#else
+        regs.ebx = 0xbdbdbdbdUL;
+#endif
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(movd_to_reg2) ||
+             regs.ebx != 0xffffffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovd %%xmm1,%%ebx...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovd_to_reg);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+                       put_insn(vmovd_to_reg, "vmovd %%xmm1, %%ebx")
+                       :: );
+
+        set_insn(vmovd_to_reg);
+#ifdef __x86_64__
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+#else
+        regs.ebx = 0xbdbdbdbdUL;
+#endif
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(vmovd_to_reg) ||
+             regs.ebx != 0xffffffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+#ifdef __x86_64__
+    printf("%-40s", "Testing movq %%mm3,32(%%ecx)...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movq_to_mem3);
+
+        asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
+                       put_insn(movq_to_mem3, "rex64 movd %%mm3, 32(%0)")
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(movq_to_mem3);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem3) ||
+             *((long *)res + 4) + 1 ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movq %%xmm2,32(%%edx)...");
+    if ( stack_exec )
+    {
+        decl_insn(movq_to_mem4);
+
+        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+                       put_insn(movq_to_mem4, "rex64 movd %%xmm2, 32(%0)")
+                       :: "d" (NULL) );
+
+        memset(res, 0xdb, 64);
+        set_insn(movq_to_mem4);
+        regs.ecx = 0;
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem4) ||
+             *((long *)res + 4) + 1 ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovq %%xmm1,32(%%ecx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovq_to_mem2);
+
+        asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+#if 0 /* This doesn't work, as the assembler will pick opcode D6. */
+                       put_insn(vmovq_to_mem2, "vmovq %%xmm1, 32(%0)")
+#else
+                       put_insn(vmovq_to_mem2, ".byte 0xc4, 0xe1, 0xf9, 0x7e, 0x49, 0x20")
+#endif
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(vmovq_to_mem2);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem2) ||
+             *((long *)res + 4) + 1 ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movq %%mm3,%%rbx...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movq_to_reg);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
+                       put_insn(movq_to_reg, "movq %%mm3, %%rbx")
+                       :: );
+
+        set_insn(movq_to_reg);
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || regs.rbx + 1 || !check_eip(movq_to_reg) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movq %%xmm2,%%rbx...");
+    if ( stack_exec )
+    {
+        decl_insn(movq_to_reg2);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+                       put_insn(movq_to_reg2, "movq %%xmm2, %%rbx")
+                       :: );
+
+        set_insn(movq_to_reg2);
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || regs.rbx + 1 || !check_eip(movq_to_reg2) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovq %%xmm1,%%rbx...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovq_to_reg);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+                       put_insn(vmovq_to_reg, "vmovq %%xmm1, %%rbx")
+                       :: );
+
+        set_insn(vmovq_to_reg);
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || regs.rbx + 1 || !check_eip(vmovq_to_reg) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+#endif
+
 #undef decl_insn
 #undef put_insn
 #undef set_insn
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -223,7 +223,7 @@ static const opcode_desc_t twobyte_table
     /* 0x70 - 0x7F */
     SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM,
     ModRM, ModRM, ModRM, ImplicitOps,
-    ModRM, ModRM, 0, 0, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
+    ModRM, ModRM, 0, 0, ModRM, ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
     /* 0x80 - 0x87 */
     DstImplicit|SrcImm, DstImplicit|SrcImm,
     DstImplicit|SrcImm, DstImplicit|SrcImm,
@@ -2291,6 +2291,10 @@ x86_decode(
         return X86EMUL_UNHANDLEABLE;
     }
 
+    if ( op_bytes == 2 &&
+         (ctxt->opcode & X86EMUL_OPC_PFX_MASK) == X86EMUL_OPC_66(0, 0) )
+        op_bytes = 4;
+
  done:
     return rc;
 }
@@ -4772,6 +4776,12 @@ x86_emulate(
                                          /* vmovdqa ymm/m256,ymm */
     case X86EMUL_OPC_VEX_F3(0x0f, 0x6f): /* vmovdqu xmm/m128,xmm */
                                          /* vmovdqu ymm/m256,ymm */
+    case X86EMUL_OPC(0x0f, 0x7e):        /* movd mm,r/m32 */
+                                         /* movq mm,r/m64 */
+    case X86EMUL_OPC_66(0x0f, 0x7e):     /* movd xmm,r/m32 */
+                                         /* movq xmm,r/m64 */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmovd xmm,r/m32 */
+                                         /* vmovq xmm,r/m64 */
     case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
     case X86EMUL_OPC_66(0x0f, 0x7f):     /* movdqa xmm,xmm/m128 */
     case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa xmm,xmm/m128 */
@@ -4822,10 +4832,16 @@ x86_emulate(
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
         }
-        if ( b == 0xd6 )
+        switch ( b )
         {
+        case 0x7e:
+            generate_exception_if(vex.l, EXC_UD, -1);
+            ea.bytes = op_bytes;
+            break;
+        case 0xd6:
             generate_exception_if(vex.l, EXC_UD, -1);
             ea.bytes = 8;
+            break;
         }
         if ( ea.type == OP_MEM )
         {
@@ -4836,15 +4852,22 @@ x86_emulate(
             if ( b == 0x6f )
                 rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp,
                                ea.bytes, ctxt);
-            /* convert memory operand to (%rAX) */
+        }
+        if ( ea.type == OP_MEM || b == 0x7e )
+        {
+            /* Convert memory operand or GPR destination to (%rAX) */
             rex_prefix &= ~REX_B;
             vex.b = 1;
             buf[4] &= 0x38;
+            if ( ea.type == OP_MEM )
+                ea.reg = (void *)mmvalp;
+            else /* Ensure zero-extension of a 32-bit result. */
+                *ea.reg = 0;
         }
         if ( !rc )
         {
            copy_REX_VEX(buf, rex_prefix, vex);
-           asm volatile ( "call *%0" : : "r" (stub.func), "a" (mmvalp)
+           asm volatile ( "call *%0" : : "r" (stub.func), "a" (ea.reg)
                                      : "memory" );
         }
         put_fpu(&fic);



[-- Attachment #2: x86emul-vmovd.patch --]
[-- Type: text/plain, Size: 13235 bytes --]

x86/emulate: add support for {,v}movd {,x}mm,r/m32 and {,v}movq {,x}mm,r/m64

From: Zhi Wang <zhi.a.wang@intel.com>

Found that Windows driver was using a SSE2 instruction MOVD.

Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v4: Re-base on decoding changes. Address Andrew's and my own review
    comments (where still applicable). #UD when vex.l is set. Various
    adjustments to the test tool change.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -973,6 +973,296 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing movd %%mm3,32(%%ecx)...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movd_to_mem);
+
+        asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
+                       put_insn(movd_to_mem, "movd %%mm3, 32(%0)")
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(movd_to_mem);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movd_to_mem) ||
+             res[8] + 1 ||
+             memcmp(res, res + 9, 28) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movd %%xmm2,32(%%edx)...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movd_to_mem2);
+
+        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+                       put_insn(movd_to_mem2, "movd %%xmm2, 32(%0)")
+                       :: "d" (NULL) );
+
+        memset(res, 0xdb, 64);
+        set_insn(movd_to_mem2);
+        regs.ecx = 0;
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movd_to_mem2) ||
+             res[8] + 1 ||
+             memcmp(res, res + 9, 28) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovd %%xmm1,32(%%ecx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovd_to_mem);
+
+        asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+                       put_insn(vmovd_to_mem, "vmovd %%xmm1, 32(%0)")
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(vmovd_to_mem);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovd_to_mem) ||
+             res[8] + 1 ||
+             memcmp(res, res + 9, 28) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movd %%mm3,%%ebx...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movd_to_reg);
+
+        /*
+         * Intentionally not specifying "b" as an input (or even output) here
+         * to not keep the compiler from using the variable, which in turn
+         * allows noticing whether the emulator touches the actual register
+         * instead of the regs field.
+         */
+        asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
+                       put_insn(movd_to_reg, "movd %%mm3, %%ebx")
+                       :: );
+
+        set_insn(movd_to_reg);
+#ifdef __x86_64__
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+#else
+        regs.ebx = 0xbdbdbdbdUL;
+#endif
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(movd_to_reg) ||
+             regs.ebx != 0xffffffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movd %%xmm2,%%ebx...");
+    if ( stack_exec && cpu_has_sse2 )
+    {
+        decl_insn(movd_to_reg2);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+                       put_insn(movd_to_reg2, "movd %%xmm2, %%ebx")
+                       :: );
+
+        set_insn(movd_to_reg2);
+#ifdef __x86_64__
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+#else
+        regs.ebx = 0xbdbdbdbdUL;
+#endif
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(movd_to_reg2) ||
+             regs.ebx != 0xffffffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovd %%xmm1,%%ebx...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovd_to_reg);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+                       put_insn(vmovd_to_reg, "vmovd %%xmm1, %%ebx")
+                       :: );
+
+        set_insn(vmovd_to_reg);
+#ifdef __x86_64__
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+#else
+        regs.ebx = 0xbdbdbdbdUL;
+#endif
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || !check_eip(vmovd_to_reg) ||
+             regs.ebx != 0xffffffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+#ifdef __x86_64__
+    printf("%-40s", "Testing movq %%mm3,32(%%ecx)...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movq_to_mem3);
+
+        asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
+                       put_insn(movq_to_mem3, "rex64 movd %%mm3, 32(%0)")
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(movq_to_mem3);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem3) ||
+             *((long *)res + 4) + 1 ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movq %%xmm2,32(%%edx)...");
+    if ( stack_exec )
+    {
+        decl_insn(movq_to_mem4);
+
+        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+                       put_insn(movq_to_mem4, "rex64 movd %%xmm2, 32(%0)")
+                       :: "d" (NULL) );
+
+        memset(res, 0xdb, 64);
+        set_insn(movq_to_mem4);
+        regs.ecx = 0;
+        regs.edx = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem4) ||
+             *((long *)res + 4) + 1 ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovq %%xmm1,32(%%ecx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovq_to_mem2);
+
+        asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+#if 0 /* This doesn't work, as the assembler will pick opcode D6. */
+                       put_insn(vmovq_to_mem2, "vmovq %%xmm1, 32(%0)")
+#else
+                       put_insn(vmovq_to_mem2, ".byte 0xc4, 0xe1, 0xf9, 0x7e, 0x49, 0x20")
+#endif
+                       :: "c" (NULL) );
+
+        memset(res, 0xbd, 64);
+        set_insn(vmovq_to_mem2);
+        regs.ecx = (unsigned long)res;
+        regs.edx = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem2) ||
+             *((long *)res + 4) + 1 ||
+             memcmp(res, res + 10, 24) ||
+             memcmp(res, res + 6, 8) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movq %%mm3,%%rbx...");
+    if ( stack_exec && cpu_has_mmx )
+    {
+        decl_insn(movq_to_reg);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
+                       put_insn(movq_to_reg, "movq %%mm3, %%rbx")
+                       :: );
+
+        set_insn(movq_to_reg);
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || regs.rbx + 1 || !check_eip(movq_to_reg) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing movq %%xmm2,%%rbx...");
+    if ( stack_exec )
+    {
+        decl_insn(movq_to_reg2);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+                       put_insn(movq_to_reg2, "movq %%xmm2, %%rbx")
+                       :: );
+
+        set_insn(movq_to_reg2);
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || regs.rbx + 1 || !check_eip(movq_to_reg2) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovq %%xmm1,%%rbx...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        decl_insn(vmovq_to_reg);
+
+        /* See comment next to movd above. */
+        asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+                       put_insn(vmovq_to_reg, "vmovq %%xmm1, %%rbx")
+                       :: );
+
+        set_insn(vmovq_to_reg);
+        regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || regs.rbx + 1 || !check_eip(vmovq_to_reg) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+#endif
+
 #undef decl_insn
 #undef put_insn
 #undef set_insn
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -223,7 +223,7 @@ static const opcode_desc_t twobyte_table
     /* 0x70 - 0x7F */
     SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM,
     ModRM, ModRM, ModRM, ImplicitOps,
-    ModRM, ModRM, 0, 0, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
+    ModRM, ModRM, 0, 0, ModRM, ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
     /* 0x80 - 0x87 */
     DstImplicit|SrcImm, DstImplicit|SrcImm,
     DstImplicit|SrcImm, DstImplicit|SrcImm,
@@ -2291,6 +2291,10 @@ x86_decode(
         return X86EMUL_UNHANDLEABLE;
     }
 
+    if ( op_bytes == 2 &&
+         (ctxt->opcode & X86EMUL_OPC_PFX_MASK) == X86EMUL_OPC_66(0, 0) )
+        op_bytes = 4;
+
  done:
     return rc;
 }
@@ -4772,6 +4776,12 @@ x86_emulate(
                                          /* vmovdqa ymm/m256,ymm */
     case X86EMUL_OPC_VEX_F3(0x0f, 0x6f): /* vmovdqu xmm/m128,xmm */
                                          /* vmovdqu ymm/m256,ymm */
+    case X86EMUL_OPC(0x0f, 0x7e):        /* movd mm,r/m32 */
+                                         /* movq mm,r/m64 */
+    case X86EMUL_OPC_66(0x0f, 0x7e):     /* movd xmm,r/m32 */
+                                         /* movq xmm,r/m64 */
+    case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmovd xmm,r/m32 */
+                                         /* vmovq xmm,r/m64 */
     case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
     case X86EMUL_OPC_66(0x0f, 0x7f):     /* movdqa xmm,xmm/m128 */
     case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa xmm,xmm/m128 */
@@ -4822,10 +4832,16 @@ x86_emulate(
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
         }
-        if ( b == 0xd6 )
+        switch ( b )
         {
+        case 0x7e:
+            generate_exception_if(vex.l, EXC_UD, -1);
+            ea.bytes = op_bytes;
+            break;
+        case 0xd6:
             generate_exception_if(vex.l, EXC_UD, -1);
             ea.bytes = 8;
+            break;
         }
         if ( ea.type == OP_MEM )
         {
@@ -4836,15 +4852,22 @@ x86_emulate(
             if ( b == 0x6f )
                 rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp,
                                ea.bytes, ctxt);
-            /* convert memory operand to (%rAX) */
+        }
+        if ( ea.type == OP_MEM || b == 0x7e )
+        {
+            /* Convert memory operand or GPR destination to (%rAX) */
             rex_prefix &= ~REX_B;
             vex.b = 1;
             buf[4] &= 0x38;
+            if ( ea.type == OP_MEM )
+                ea.reg = (void *)mmvalp;
+            else /* Ensure zero-extension of a 32-bit result. */
+                *ea.reg = 0;
         }
         if ( !rc )
         {
            copy_REX_VEX(buf, rex_prefix, vex);
-           asm volatile ( "call *%0" : : "r" (stub.func), "a" (mmvalp)
+           asm volatile ( "call *%0" : : "r" (stub.func), "a" (ea.reg)
                                      : "memory" );
         }
         put_fpu(&fic);

[-- Attachment #3: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64
  2016-09-08 13:45 ` [PATCH 4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64 Jan Beulich
@ 2016-09-08 13:56   ` Mihai Donțu
  2016-09-30 10:43   ` Andrew Cooper
  1 sibling, 0 replies; 20+ messages in thread
From: Mihai Donțu @ 2016-09-08 13:56 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Andrew Cooper


[-- Attachment #1.1: Type: text/plain, Size: 4480 bytes --]

On Thursday 08 September 2016 07:45:19 Jan Beulich wrote:
> From: Mihai Donțu <mdontu@bitdefender.com>
> 
> Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v4: Re-base on decoding changes. Address my own review comments (where
>     still applicable). #UD when vex.l is set. Various adjustments to
>     the test tool change.

Thank you! They were in my queue for too long and I was struggling to
find a window of time to get them in shape.

> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -713,6 +713,54 @@ int main(int argc, char **argv)
>      else
>          printf("skipped\n");
>  
> +    printf("%-40s", "Testing movq %%xmm0,32(%%ecx)...");
> +    if ( stack_exec && cpu_has_sse2 )
> +    {
> +        decl_insn(movq_to_mem2);
> +
> +        asm volatile ( "pcmpgtb %%xmm0, %%xmm0\n"
> +                       put_insn(movq_to_mem2, "movq %%xmm0, 32(%0)")
> +                       :: "c" (NULL) );
> +
> +        memset(res, 0xbd, 64);
> +        set_insn(movq_to_mem2);
> +        regs.ecx = (unsigned long)res;
> +        regs.edx = 0;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem2) ||
> +             *((uint64_t *)res + 4) ||
> +             memcmp(res, res + 10, 24) ||
> +             memcmp(res, res + 6, 8) )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
> +    printf("%-40s", "Testing vmovq %%xmm1,32(%%edx)...");
> +    if ( stack_exec && cpu_has_avx )
> +    {
> +        decl_insn(vmovq_to_mem);
> +
> +        asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n"
> +                       put_insn(vmovq_to_mem, "vmovq %%xmm1, 32(%0)")
> +                       :: "d" (NULL) );
> +
> +        memset(res, 0xdb, 64);
> +        set_insn(vmovq_to_mem);
> +        regs.ecx = 0;
> +        regs.edx = (unsigned long)res;
> +        rc = x86_emulate(&ctxt, &emulops);
> +        if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem) ||
> +             *((uint64_t *)res + 4) ||
> +             memcmp(res, res + 10, 24) ||
> +             memcmp(res, res + 6, 8) )
> +            goto fail;
> +        printf("okay\n");
> +    }
> +    else
> +        printf("skipped\n");
> +
>      printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
>      if ( stack_exec && cpu_has_sse2 )
>      {
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -269,7 +269,7 @@ static const opcode_desc_t twobyte_table
>      ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
>      ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
>      /* 0xD0 - 0xDF */
> -    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
> +    ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM, ModRM,
>      ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
>      /* 0xE0 - 0xEF */
>      ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
> @@ -4779,6 +4779,8 @@ x86_emulate(
>      case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
>      case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu xmm,xmm/m128 */
>                                           /* vmovdqu ymm,ymm/m256 */
> +    case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
> +    case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
>      {
>          uint8_t *buf = get_stub(stub);
>          struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
> @@ -4796,7 +4798,8 @@ x86_emulate(
>              case vex_66:
>              case vex_f3:
>                  host_and_vcpu_must_have(sse2);
> -                buf[0] = 0x66; /* movdqa */
> +                /* Converting movdqu to movdqa here: Our buffer is aligned. */
> +                buf[0] = 0x66;
>                  get_fpu(X86EMUL_FPU_xmm, &fic);
>                  ea.bytes = 16;
>                  break;
> @@ -4819,6 +4822,11 @@ x86_emulate(
>              get_fpu(X86EMUL_FPU_ymm, &fic);
>              ea.bytes = 16 << vex.l;
>          }
> +        if ( b == 0xd6 )
> +        {
> +            generate_exception_if(vex.l, EXC_UD, -1);
> +            ea.bytes = 8;
> +        }
>          if ( ea.type == OP_MEM )
>          {
>              generate_exception_if((vex.pfx == vex_66) &&
> 

-- 
Mihai DONȚU

[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 163 bytes --]

[-- Attachment #2: Type: text/plain, Size: 127 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] x86emul: support UMIP
  2016-09-08 13:42 ` [PATCH 1/5] x86emul: support UMIP Jan Beulich
@ 2016-09-30 10:32   ` Andrew Cooper
  2016-09-30 12:23     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2016-09-30 10:32 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 08/09/16 14:42, Jan Beulich wrote:
> To make this complete, also add support for SLDT and STR. Note that by
> just looking at the guest CR4 bit, this is independent of actually
> making available the UMIP feature to guests.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -182,7 +182,7 @@ static const opcode_desc_t opcode_table[
>  
>  static const opcode_desc_t twobyte_table[256] = {
>      /* 0x00 - 0x07 */
> -    SrcMem16|ModRM, ImplicitOps|ModRM, ModRM, ModRM,
> +    ModRM, ImplicitOps|ModRM, ModRM, ModRM,
>      0, ImplicitOps, ImplicitOps, ImplicitOps,
>      /* 0x08 - 0x0F */
>      ImplicitOps, ImplicitOps, 0, ImplicitOps,
> @@ -421,6 +421,7 @@ typedef union {
>  /* Control register flags. */
>  #define CR0_PE    (1<<0)
>  #define CR4_TSD   (1<<2)
> +#define CR4_UMIP  (1<<11)
>  
>  /* EFLAGS bit definitions. */
>  #define EFLG_VIP  (1<<20)
> @@ -1484,6 +1485,17 @@ static bool is_aligned(enum x86_segment
>      return !((reg.base + offs) & (size - 1));
>  }
>  
> +static bool is_umip(struct x86_emulate_ctxt *ctxt,
> +                    const struct x86_emulate_ops *ops)

is_umip is an odd way of phrasing this.  umip_enabled() or
is_umip_enabled() would be better.

> +{
> +    unsigned long cr4;
> +
> +    /* Intentionally not using mode_ring0() here to avoid its fail_if(). */
> +    return get_cpl(ctxt, ops) > 0 &&
> +           ops->read_cr && ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&
> +           (cr4 & CR4_UMIP);
> +}
> +
>  /* Inject a software interrupt/exception, emulating if needed. */
>  static int inject_swint(enum x86_swint_type type,
>                          uint8_t vector, uint8_t insn_len,
> @@ -2051,10 +2063,20 @@ x86_decode(
>              break;
>  
>          case ext_0f:
> -        case ext_0f3a:
> -        case ext_8f08:
> -        case ext_8f09:
> -        case ext_8f0a:
> +            switch ( b )
> +            {
> +            case 0x00: /* Grp6 */
> +                switch ( modrm_reg & 6 )
> +                {
> +                case 0:
> +                    d |= DstMem | SrcImplicit | Mov;
> +                    break;
> +                case 2: case 4:
> +                    d |= SrcMem16;
> +                    break;
> +                }
> +                break;
> +            }
>              break;
>  
>          case ext_0f38:
> @@ -2070,6 +2092,12 @@ x86_decode(
>              }
>              break;
>  
> +        case ext_0f3a:
> +        case ext_8f08:
> +        case ext_8f09:
> +        case ext_8f0a:
> +            break;
> +
>          default:
>              ASSERT_UNREACHABLE();
>          }
> @@ -4177,14 +4205,31 @@ x86_emulate(
>          }
>          break;
>  
> -    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */
> -        fail_if((modrm_reg & 6) != 2);
> +    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */ {

Newline for {

> +        enum x86_segment seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
> +
> +        fail_if(modrm_reg & 4);
>          generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
> -        generate_exception_if(!mode_ring0(), EXC_GP, 0);
> -        if ( (rc = load_seg((modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr,
> -                            src.val, 0, NULL, ctxt, ops)) != 0 )
> -            goto done;
> +        if ( modrm_reg & 2 )

This needs to be (modrm_reg & 6) == 2.  Otherwise, the /6 and /7
encodings will also raise #GP when they should raise #UD

Actually thinking about it, could we just have a full switch here like
other Grp $N decodes?

~Andrew

> +        {
> +            generate_exception_if(!mode_ring0(), EXC_GP, 0);
> +            if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
> +                goto done;
> +        }
> +        else
> +        {
> +            struct segment_register reg;
> +
> +            generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
> +            fail_if(!ops->read_segment);
> +            if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
> +                goto done;
> +            dst.val = reg.sel;
> +            if ( dst.type == OP_MEM )
> +                dst.bytes = 2;
> +        }
>          break;
> +    }
>  
>      case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ {
>          struct segment_register reg;
> @@ -4282,6 +4327,7 @@ x86_emulate(
>          case 0: /* sgdt */
>          case 1: /* sidt */
>              generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
> +            generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
>              fail_if(ops->read_segment == NULL);
>              if ( (rc = ops->read_segment((modrm_reg & 1) ?
>                                           x86_seg_idtr : x86_seg_gdtr,
> @@ -4316,6 +4362,7 @@ x86_emulate(
>                  goto done;
>              break;
>          case 4: /* smsw */
> +            generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
>              ea.bytes = (ea.type == OP_MEM) ? 2 : op_bytes;
>              dst = ea;
>              fail_if(ops->read_cr == NULL);
>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] x86emul: consolidate segment register handling
  2016-09-08 13:43 ` [PATCH 2/5] x86emul: consolidate segment register handling Jan Beulich
@ 2016-09-30 10:39   ` Andrew Cooper
  2016-09-30 12:15     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2016-09-30 10:39 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 08/09/16 14:43, Jan Beulich wrote:
> @@ -2861,21 +2860,20 @@ x86_emulate(
>          dst.val = src.val;
>          break;
>  
> -    case 0x8c: /* mov Sreg,r/m */ {
> -        struct segment_register reg;
> -        enum x86_segment seg = decode_segment(modrm_reg);
> +    case 0x8c: /* mov Sreg,r/m */
> +        seg = decode_segment(modrm_reg);
>          generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
> +    store_seg:

store_seg is easy to confuse with load_seg.  How about read_selector
instead?

It also occurs to me that you might have an easier time with both this
patch and patch 1 if you reversed their order.  Either way, Reviewed-by:
Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64
  2016-09-08 13:45 ` [PATCH 4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64 Jan Beulich
  2016-09-08 13:56   ` Mihai Donțu
@ 2016-09-30 10:43   ` Andrew Cooper
  1 sibling, 0 replies; 20+ messages in thread
From: Andrew Cooper @ 2016-09-30 10:43 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Mihai Dontu

On 08/09/16 14:45, Jan Beulich wrote:
> From: Mihai Donțu <mdontu@bitdefender.com>
>
> Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/5] x86/emulate: add support for {, v}movd {, x}mm, r/m32 and {, v}movq {, x}mm, r/m64
  2016-09-08 13:46 ` [PATCH 5/5] x86/emulate: add support for {, v}movd {, x}mm, r/m32 and {, v}movq {, x}mm, r/m64 Jan Beulich
@ 2016-09-30 11:59   ` Andrew Cooper
  2016-09-30 12:11     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2016-09-30 11:59 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Mihai Dontu, zhi.a.wang

On 08/09/16 14:46, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -223,7 +223,7 @@ static const opcode_desc_t twobyte_table
>      /* 0x70 - 0x7F */
>      SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM,
>      ModRM, ModRM, ModRM, ImplicitOps,
> -    ModRM, ModRM, 0, 0, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
> +    ModRM, ModRM, 0, 0, ModRM, ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
>      /* 0x80 - 0x87 */
>      DstImplicit|SrcImm, DstImplicit|SrcImm,
>      DstImplicit|SrcImm, DstImplicit|SrcImm,
> @@ -2291,6 +2291,10 @@ x86_decode(
>          return X86EMUL_UNHANDLEABLE;
>      }
>  
> +    if ( op_bytes == 2 &&
> +         (ctxt->opcode & X86EMUL_OPC_PFX_MASK) == X86EMUL_OPC_66(0, 0) )
> +        op_bytes = 4;

What is this change for?  I presume it is to undo the effect of the
operand size override prefix when we have decided that the prefix
actually had an alternate meaning?

If so, can we have a comment to this effect?

Everything else looks ok.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/5] x86/emulate: add support for {, v}movd {, x}mm, r/m32 and {, v}movq {, x}mm, r/m64
  2016-09-30 11:59   ` Andrew Cooper
@ 2016-09-30 12:11     ` Jan Beulich
  2016-09-30 12:12       ` Andrew Cooper
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2016-09-30 12:11 UTC (permalink / raw)
  To: Andrew Cooper, xen-devel; +Cc: Mihai Dontu, zhi.a.wang

>>> On 30.09.16 at 13:59, <andrew.cooper3@citrix.com> wrote:
> On 08/09/16 14:46, Jan Beulich wrote:
>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>> @@ -223,7 +223,7 @@ static const opcode_desc_t twobyte_table
>>      /* 0x70 - 0x7F */
>>      SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM,
>>      ModRM, ModRM, ModRM, ImplicitOps,
>> -    ModRM, ModRM, 0, 0, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
>> +    ModRM, ModRM, 0, 0, ModRM, ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
>>      /* 0x80 - 0x87 */
>>      DstImplicit|SrcImm, DstImplicit|SrcImm,
>>      DstImplicit|SrcImm, DstImplicit|SrcImm,
>> @@ -2291,6 +2291,10 @@ x86_decode(
>>          return X86EMUL_UNHANDLEABLE;
>>      }
>>  
>> +    if ( op_bytes == 2 &&
>> +         (ctxt->opcode & X86EMUL_OPC_PFX_MASK) == X86EMUL_OPC_66(0, 0) )
>> +        op_bytes = 4;
> 
> What is this change for?  I presume it is to undo the effect of the
> operand size override prefix when we have decided that the prefix
> actually had an alternate meaning?

Yes.

> If so, can we have a comment to this effect?

+    /*
+     * Undo the operand-size override effect of prefix 66 when it was
+     * determined to have another meaning.
+     */

> Everything else looks ok.

Can I take this as R-b then with the comment added?

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 5/5] x86/emulate: add support for {, v}movd {, x}mm, r/m32 and {, v}movq {, x}mm, r/m64
  2016-09-30 12:11     ` Jan Beulich
@ 2016-09-30 12:12       ` Andrew Cooper
  0 siblings, 0 replies; 20+ messages in thread
From: Andrew Cooper @ 2016-09-30 12:12 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Mihai Dontu, zhi.a.wang

On 30/09/16 13:11, Jan Beulich wrote:
>>>> On 30.09.16 at 13:59, <andrew.cooper3@citrix.com> wrote:
>> On 08/09/16 14:46, Jan Beulich wrote:
>>> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
>>> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
>>> @@ -223,7 +223,7 @@ static const opcode_desc_t twobyte_table
>>>      /* 0x70 - 0x7F */
>>>      SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM,
>>>      ModRM, ModRM, ModRM, ImplicitOps,
>>> -    ModRM, ModRM, 0, 0, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
>>> +    ModRM, ModRM, 0, 0, ModRM, ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
>>>      /* 0x80 - 0x87 */
>>>      DstImplicit|SrcImm, DstImplicit|SrcImm,
>>>      DstImplicit|SrcImm, DstImplicit|SrcImm,
>>> @@ -2291,6 +2291,10 @@ x86_decode(
>>>          return X86EMUL_UNHANDLEABLE;
>>>      }
>>>  
>>> +    if ( op_bytes == 2 &&
>>> +         (ctxt->opcode & X86EMUL_OPC_PFX_MASK) == X86EMUL_OPC_66(0, 0) )
>>> +        op_bytes = 4;
>> What is this change for?  I presume it is to undo the effect of the
>> operand size override prefix when we have decided that the prefix
>> actually had an alternate meaning?
> Yes.
>
>> If so, can we have a comment to this effect?
> +    /*
> +     * Undo the operand-size override effect of prefix 66 when it was
> +     * determined to have another meaning.
> +     */
>
>> Everything else looks ok.
> Can I take this as R-b then with the comment added?

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] x86emul: consolidate segment register handling
  2016-09-30 10:39   ` Andrew Cooper
@ 2016-09-30 12:15     ` Jan Beulich
  2016-09-30 12:16       ` Andrew Cooper
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2016-09-30 12:15 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 30.09.16 at 12:39, <andrew.cooper3@citrix.com> wrote:
> On 08/09/16 14:43, Jan Beulich wrote:
>> @@ -2861,21 +2860,20 @@ x86_emulate(
>>          dst.val = src.val;
>>          break;
>>  
>> -    case 0x8c: /* mov Sreg,r/m */ {
>> -        struct segment_register reg;
>> -        enum x86_segment seg = decode_segment(modrm_reg);
>> +    case 0x8c: /* mov Sreg,r/m */
>> +        seg = decode_segment(modrm_reg);
>>          generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
>> +    store_seg:
> 
> store_seg is easy to confuse with load_seg.  How about read_selector
> instead?

Well - it was specifically meant to match up with load_seg. I dislike
read_selector (specifically because the read part of the operation
was done by the time we get here), but if you like store_selector
better than store_seg, I could use that one.

> It also occurs to me that you might have an easier time with both this
> patch and patch 1 if you reversed their order.

Yes, but that's the order they came to life in, and I then didn't
want to bother switching them around.

>  Either way, Reviewed-by:
> Andrew Cooper <andrew.cooper3@citrix.com>

Thanks, Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5] x86emul: consolidate segment register handling
  2016-09-30 12:15     ` Jan Beulich
@ 2016-09-30 12:16       ` Andrew Cooper
  0 siblings, 0 replies; 20+ messages in thread
From: Andrew Cooper @ 2016-09-30 12:16 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 30/09/16 13:15, Jan Beulich wrote:
>>>> On 30.09.16 at 12:39, <andrew.cooper3@citrix.com> wrote:
>> On 08/09/16 14:43, Jan Beulich wrote:
>>> @@ -2861,21 +2860,20 @@ x86_emulate(
>>>          dst.val = src.val;
>>>          break;
>>>  
>>> -    case 0x8c: /* mov Sreg,r/m */ {
>>> -        struct segment_register reg;
>>> -        enum x86_segment seg = decode_segment(modrm_reg);
>>> +    case 0x8c: /* mov Sreg,r/m */
>>> +        seg = decode_segment(modrm_reg);
>>>          generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
>>> +    store_seg:
>> store_seg is easy to confuse with load_seg.  How about read_selector
>> instead?
> Well - it was specifically meant to match up with load_seg. I dislike
> read_selector (specifically because the read part of the operation
> was done by the time we get here), but if you like store_selector
> better than store_seg, I could use that one.

Fine by me.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] x86emul: support UMIP
  2016-09-30 10:32   ` Andrew Cooper
@ 2016-09-30 12:23     ` Jan Beulich
  2016-09-30 12:27       ` Andrew Cooper
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2016-09-30 12:23 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 30.09.16 at 12:32, <andrew.cooper3@citrix.com> wrote:
> On 08/09/16 14:42, Jan Beulich wrote:
>> @@ -1484,6 +1485,17 @@ static bool is_aligned(enum x86_segment
>>      return !((reg.base + offs) & (size - 1));
>>  }
>>  
>> +static bool is_umip(struct x86_emulate_ctxt *ctxt,
>> +                    const struct x86_emulate_ops *ops)
> 
> is_umip is an odd way of phrasing this.  umip_enabled() or
> is_umip_enabled() would be better.

That would have been my choice if there wasn't the CPL check in here.
I prefer to read the 'p' here as "prevented" rather then "prevention".

>> @@ -4177,14 +4205,31 @@ x86_emulate(
>>          }
>>          break;
>>  
>> -    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */
>> -        fail_if((modrm_reg & 6) != 2);
>> +    case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */ {
> 
> Newline for {

Yeah, we're kind of inconsistent with that when they are for case
statements.

>> +        enum x86_segment seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
>> +
>> +        fail_if(modrm_reg & 4);
>>          generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
>> -        generate_exception_if(!mode_ring0(), EXC_GP, 0);
>> -        if ( (rc = load_seg((modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr,
>> -                            src.val, 0, NULL, ctxt, ops)) != 0 )
>> -            goto done;
>> +        if ( modrm_reg & 2 )
> 
> This needs to be (modrm_reg & 6) == 2.  Otherwise, the /6 and /7
> encodings will also raise #GP when they should raise #UD

Note the earlier "fail_if(modrm_reg & 4)".

> Actually thinking about it, could we just have a full switch here like
> other Grp $N decodes?

I can certainly pull this ahead from a later (not yet submitted)
patch.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] x86emul: support UMIP
  2016-09-30 12:23     ` Jan Beulich
@ 2016-09-30 12:27       ` Andrew Cooper
  2016-09-30 12:44         ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2016-09-30 12:27 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 30/09/16 13:23, Jan Beulich wrote:
>>>> On 30.09.16 at 12:32, <andrew.cooper3@citrix.com> wrote:
>> On 08/09/16 14:42, Jan Beulich wrote:
>>> @@ -1484,6 +1485,17 @@ static bool is_aligned(enum x86_segment
>>>      return !((reg.base + offs) & (size - 1));
>>>  }
>>>  
>>> +static bool is_umip(struct x86_emulate_ctxt *ctxt,
>>> +                    const struct x86_emulate_ops *ops)
>> is_umip is an odd way of phrasing this.  umip_enabled() or
>> is_umip_enabled() would be better.
> That would have been my choice if there wasn't the CPL check in here.
> I prefer to read the 'p' here as "prevented" rather then "prevention".

In which case, umip_active()?

>>> +        enum x86_segment seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
>>> +
>>> +        fail_if(modrm_reg & 4);
>>>          generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
>>> -        generate_exception_if(!mode_ring0(), EXC_GP, 0);
>>> -        if ( (rc = load_seg((modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr,
>>> -                            src.val, 0, NULL, ctxt, ops)) != 0 )
>>> -            goto done;
>>> +        if ( modrm_reg & 2 )
>> This needs to be (modrm_reg & 6) == 2.  Otherwise, the /6 and /7
>> encodings will also raise #GP when they should raise #UD
> Note the earlier "fail_if(modrm_reg & 4)".

Ah - I hadn't spotted that, which does catch that case, as well as ver{r,w}.

>
>> Actually thinking about it, could we just have a full switch here like
>> other Grp $N decodes?
> I can certainly pull this ahead from a later (not yet submitted)
> patch.

I think this would be best, especially if you already have the code to hand.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/5] x86emul: support RTM instructions
  2016-09-08 13:44 ` [PATCH 3/5] x86emul: support RTM instructions Jan Beulich
@ 2016-09-30 12:37   ` Andrew Cooper
  2016-09-30 12:51     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2016-09-30 12:37 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 08/09/16 14:44, Jan Beulich wrote:
> Minimal emulation: XBEGIN aborts right away, hence
> - XABORT is just a no-op,
> - XEND always raises #GP,
> - XTEST always signals neither RTM nor HLE are active.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -1172,6 +1172,8 @@ static bool_t vcpu_has(
>  #define vcpu_has_clflush() vcpu_has(       1, EDX, 19, ctxt, ops)
>  #define vcpu_has_lzcnt() vcpu_has(0x80000001, ECX,  5, ctxt, ops)
>  #define vcpu_has_bmi1()  vcpu_has(0x00000007, EBX,  3, ctxt, ops)
> +#define vcpu_has_hle()   vcpu_has(0x00000007, EBX,  4, ctxt, ops)
> +#define vcpu_has_rtm()   vcpu_has(0x00000007, EBX, 11, ctxt, ops)
>  
>  #define vcpu_must_have(leaf, reg, bit) \
>      generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
> @@ -2852,7 +2854,18 @@ x86_emulate(
>          lock_prefix = 1;
>          break;
>  
> -    case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
> +    case 0xc6: /* Grp11: mov / xabort */
> +    case 0xc7: /* Grp11: mov / xbegin */
> +        if ( modrm == 0xf8 && vcpu_has_rtm() )
> +        {
> +            if ( b & 1 )
> +            {
> +                jmp_rel((int32_t)src.val);

This should be based on op_bytes.  There are two forms, one with a rel16
jump and one with rel32, and I don't see this being accounted for
anywhere else.

> +                _regs.eax = 0;
> +            }
> +            dst.type = OP_NONE;

The XABORT instruction should explicitly set bit.

Incidentally, what is supposed to happen if we branch into the middle of
an RTM region?

> +            break;
> +        }
>          generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
>      case 0x88 ... 0x8b: /* mov */
>      case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
> @@ -4246,6 +4259,17 @@ x86_emulate(
>                  goto done;
>              goto no_writeback;
>  
> +        case 0xd5: /* xend */
> +            generate_exception_if(vcpu_has_rtm() && !vex.pfx, EXC_GP, 0);
> +            break;
> +
> +        case 0xd6: /* xtest */
> +            if ( (!vcpu_has_rtm() && !vcpu_has_hle()) || vex.pfx )
> +                break;

Shouldn't this raise #UD explicitly?  I can't spot anything which does
if we break out.

~Andrew

> +            /* Neither HLE nor RTM can be active when we get here. */
> +            _regs.eflags |= EFLG_ZF;
> +            goto no_writeback;
> +
>          case 0xdf: /* invlpga */
>              generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
>              generate_exception_if(!mode_ring0(), EXC_GP, 0);
>
>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/5] x86emul: support UMIP
  2016-09-30 12:27       ` Andrew Cooper
@ 2016-09-30 12:44         ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2016-09-30 12:44 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 30.09.16 at 14:27, <andrew.cooper3@citrix.com> wrote:
> On 30/09/16 13:23, Jan Beulich wrote:
>>>>> On 30.09.16 at 12:32, <andrew.cooper3@citrix.com> wrote:
>>> On 08/09/16 14:42, Jan Beulich wrote:
>>>> @@ -1484,6 +1485,17 @@ static bool is_aligned(enum x86_segment
>>>>      return !((reg.base + offs) & (size - 1));
>>>>  }
>>>>  
>>>> +static bool is_umip(struct x86_emulate_ctxt *ctxt,
>>>> +                    const struct x86_emulate_ops *ops)
>>> is_umip is an odd way of phrasing this.  umip_enabled() or
>>> is_umip_enabled() would be better.
>> That would have been my choice if there wasn't the CPL check in here.
>> I prefer to read the 'p' here as "prevented" rather then "prevention".
> 
> In which case, umip_active()?

Ah - that's fine.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/5] x86emul: support RTM instructions
  2016-09-30 12:37   ` Andrew Cooper
@ 2016-09-30 12:51     ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2016-09-30 12:51 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel

>>> On 30.09.16 at 14:37, <andrew.cooper3@citrix.com> wrote:
> On 08/09/16 14:44, Jan Beulich wrote:
>> @@ -2852,7 +2854,18 @@ x86_emulate(
>>          lock_prefix = 1;
>>          break;
>>  
>> -    case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
>> +    case 0xc6: /* Grp11: mov / xabort */
>> +    case 0xc7: /* Grp11: mov / xbegin */
>> +        if ( modrm == 0xf8 && vcpu_has_rtm() )
>> +        {
>> +            if ( b & 1 )
>> +            {
>> +                jmp_rel((int32_t)src.val);
> 
> This should be based on op_bytes.  There are two forms, one with a rel16
> jump and one with rel32, and I don't see this being accounted for
> anywhere else.

Just like for other branches (as well as any instructions with immediate
operands) this gets taken care of when the immediate gets fetched.

>> +                _regs.eax = 0;
>> +            }
>> +            dst.type = OP_NONE;
> 
> The XABORT instruction should explicitly set bit.

???

Since we abort upon XBEGIN, XABORT is supposed to be a NOP.

> Incidentally, what is supposed to happen if we branch into the middle of
> an RTM region?

Sooner or later the code would reach an XEND, which is defined
to #GP with no prior XBEGIN.

>> @@ -4246,6 +4259,17 @@ x86_emulate(
>>                  goto done;
>>              goto no_writeback;
>>  
>> +        case 0xd5: /* xend */
>> +            generate_exception_if(vcpu_has_rtm() && !vex.pfx, EXC_GP, 0);
>> +            break;
>> +
>> +        case 0xd6: /* xtest */
>> +            if ( (!vcpu_has_rtm() && !vcpu_has_hle()) || vex.pfx )
>> +                break;
> 
> Shouldn't this raise #UD explicitly?  I can't spot anything which does
> if we break out.

As mentioned on IRC I already made this explicit for v2, but even
without it's being taken care of by

            generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);

in the second switch() statement.

Here's how v2 is going to look like:

+        case 0xd5: /* xend */
+            generate_exception_if(vex.pfx, EXC_UD, -1);
+            generate_exception_if(!vcpu_has_rtm(), EXC_UD, -1);
+            generate_exception_if(vcpu_has_rtm(), EXC_GP, 0);
+            break;
+
+        case 0xd6: /* xtest */
+            generate_exception_if(vex.pfx, EXC_UD, -1);
+            generate_exception_if(!vcpu_has_rtm() && !vcpu_has_hle(),
+                                  EXC_UD, -1);
+            /* Neither HLE nor RTM can be active when we get here. */
+            _regs.eflags |= EFLG_ZF;
+            goto no_writeback;


Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2016-09-30 12:51 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-08 13:36 [PATCH 0/5] x86: further insn emulator improvements Jan Beulich
2016-09-08 13:42 ` [PATCH 1/5] x86emul: support UMIP Jan Beulich
2016-09-30 10:32   ` Andrew Cooper
2016-09-30 12:23     ` Jan Beulich
2016-09-30 12:27       ` Andrew Cooper
2016-09-30 12:44         ` Jan Beulich
2016-09-08 13:43 ` [PATCH 2/5] x86emul: consolidate segment register handling Jan Beulich
2016-09-30 10:39   ` Andrew Cooper
2016-09-30 12:15     ` Jan Beulich
2016-09-30 12:16       ` Andrew Cooper
2016-09-08 13:44 ` [PATCH 3/5] x86emul: support RTM instructions Jan Beulich
2016-09-30 12:37   ` Andrew Cooper
2016-09-30 12:51     ` Jan Beulich
2016-09-08 13:45 ` [PATCH 4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64 Jan Beulich
2016-09-08 13:56   ` Mihai Donțu
2016-09-30 10:43   ` Andrew Cooper
2016-09-08 13:46 ` [PATCH 5/5] x86/emulate: add support for {, v}movd {, x}mm, r/m32 and {, v}movq {, x}mm, r/m64 Jan Beulich
2016-09-30 11:59   ` Andrew Cooper
2016-09-30 12:11     ` Jan Beulich
2016-09-30 12:12       ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.