All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Beulich <JBeulich@suse.com>
To: "xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>, Wei Liu <wl@xen.org>,
	RogerPau Monne <roger.pau@citrix.com>
Subject: [Xen-devel] [PATCH v10 04/13] x86emul: support AVX512_4FMAPS insns
Date: Wed, 17 Jul 2019 06:34:43 +0000	[thread overview]
Message-ID: <3ae1722e-fa97-6cea-dbb4-9a08f6ab3f14@suse.com> (raw)
In-Reply-To: <0ccca19e-7bbb-ab1e-c0bb-a568b02874e0@suse.com>

A decoder adjustment is needed here because of the current sharing of
table entries between different (implied) opcode prefixes: The same
major opcodes are used for vfmsub{132,213}{p,s}{s,d}, which have a
different memory operand size and different Disp8 scaling.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
v9: Re-base. Explain need for decoder special case.
v8: Correct vcpu_has_*() insertion point.
v7: Re-base.
v6: New.

--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -538,6 +538,13 @@ static const struct test avx512pf_512[]
      INSNX(scatterpf1q, 66, 0f38, c7, 6, vl, sd, el),
  };
  
+static const struct test avx512_4fmaps_512[] = {
+    INSN(4fmaddps,  f2, 0f38, 9a, el_4, d, vl),
+    INSN(4fmaddss,  f2, 0f38, 9b, el_4, d, vl),
+    INSN(4fnmaddps, f2, 0f38, aa, el_4, d, vl),
+    INSN(4fnmaddss, f2, 0f38, ab, el_4, d, vl),
+};
+
  static const struct test avx512_bitalg_all[] = {
      INSN(popcnt,      66, 0f38, 54, vl, bw, vl),
      INSN(pshufbitqmb, 66, 0f38, 8f, vl,  b, vl),
@@ -941,6 +948,7 @@ void evex_disp8_test(void *instr, struct
      RUN(avx512er, 512);
  #define cpu_has_avx512pf cpu_has_avx512f
      RUN(avx512pf, 512);
+    RUN(avx512_4fmaps, 512);
      RUN(avx512_bitalg, all);
      RUN(avx512_ifma, all);
      RUN(avx512_vbmi, all);
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -4274,6 +4274,81 @@ int main(int argc, char **argv)
      }
  #endif
  
+    printf("%-40s", "Testing v4fmaddps 32(%ecx),%zmm4,%zmm4{%k5}...");
+    if ( stack_exec && cpu_has_avx512_4fmaps )
+    {
+        decl_insn(v4fmaddps);
+        static const struct {
+            float f[16];
+        } in = {{
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+        }}, out = {{
+            1 + 1 * 9 + 2 * 10 + 3 * 11 + 4 * 12,
+            2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+            16 + 16 * 9 + 17 * 10 + 18 * 11 + 19 * 12
+        }};
+
+        asm volatile ( "vmovups %1, %%zmm4\n\t"
+                       "vbroadcastss %%xmm4, %%zmm7\n\t"
+                       "vaddps %%zmm4, %%zmm7, %%zmm5\n\t"
+                       "vaddps %%zmm5, %%zmm7, %%zmm6\n\t"
+                       "vaddps %%zmm6, %%zmm7, %%zmm7\n\t"
+                       "kmovw %2, %%k5\n"
+                       put_insn(v4fmaddps,
+                                "v4fmaddps 32(%0), %%zmm4, %%zmm4%{%%k5%}")
+                       :: "c" (NULL), "m" (in), "rmk" (0x8001) );
+
+        set_insn(v4fmaddps);
+        regs.ecx = (unsigned long)&in;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(v4fmaddps) )
+            goto fail;
+
+        asm ( "vcmpeqps %1, %%zmm4, %%k0\n\t"
+              "kmovw %%k0, %0" : "=g" (rc) : "m" (out) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing v4fnmaddss 16(%edx),%zmm4,%zmm4{%k3}...");
+    if ( stack_exec && cpu_has_avx512_4fmaps )
+    {
+        decl_insn(v4fnmaddss);
+        static const struct {
+            float f[16];
+        } in = {{
+            1, 2, 3, 4, 5, 6, 7, 8
+        }}, out = {{
+            1 - 1 * 5 - 2 * 6 - 3 * 7 - 4 * 8, 2, 3, 4
+        }};
+
+        asm volatile ( "vmovups %1, %%xmm4\n\t"
+                       "vaddss %%xmm4, %%xmm4, %%xmm5\n\t"
+                       "vaddss %%xmm5, %%xmm4, %%xmm6\n\t"
+                       "vaddss %%xmm6, %%xmm4, %%xmm7\n\t"
+                       "kmovw %2, %%k3\n"
+                       put_insn(v4fnmaddss,
+                                "v4fnmaddss 16(%0), %%xmm4, %%xmm4%{%%k3%}")
+                       :: "d" (NULL), "m" (in), "rmk" (1) );
+
+        set_insn(v4fnmaddss);
+        regs.edx = (unsigned long)&in;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY || !check_eip(v4fnmaddss) )
+            goto fail;
+
+        asm ( "vcmpeqps %1, %%zmm4, %%k0\n\t"
+              "kmovw %%k0, %0" : "=g" (rc) : "m" (out) );
+        if ( rc != 0xffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
  #undef decl_insn
  #undef put_insn
  #undef set_insn
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -146,6 +146,7 @@ static inline bool xcr0_mask(uint64_t ma
  #define cpu_has_avx512_vbmi2 (cp.feat.avx512_vbmi2 && xcr0_mask(0xe6))
  #define cpu_has_avx512_bitalg (cp.feat.avx512_bitalg && xcr0_mask(0xe6))
  #define cpu_has_avx512_vpopcntdq (cp.feat.avx512_vpopcntdq && xcr0_mask(0xe6))
+#define cpu_has_avx512_4fmaps (cp.feat.avx512_4fmaps && xcr0_mask(0xe6))
  
  #define cpu_has_xgetbv1   (cpu_has_xsave && cp.xstate.xgetbv1)
  
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1892,6 +1892,7 @@ in_protmode(
  #define vcpu_has_avx512_bitalg() (ctxt->cpuid->feat.avx512_bitalg)
  #define vcpu_has_avx512_vpopcntdq() (ctxt->cpuid->feat.avx512_vpopcntdq)
  #define vcpu_has_rdpid()       (ctxt->cpuid->feat.rdpid)
+#define vcpu_has_avx512_4fmaps() (ctxt->cpuid->feat.avx512_4fmaps)
  
  #define vcpu_must_have(feat) \
      generate_exception_if(!vcpu_has_##feat(), EXC_UD)
@@ -3173,6 +3174,18 @@ x86_decode(
                                                     state);
                      state->simd_size = simd_other;
                  }
+
+                switch ( b )
+                {
+                /* v4f{,n}madd{p,s}s need special casing */
+                case 0x9a: case 0x9b: case 0xaa: case 0xab:
+                    if ( evex.pfx == vex_f2 )
+                    {
+                        disp8scale = 4;
+                        state->simd_size = simd_128;
+                    }
+                    break;
+                }
              }
              break;
  
@@ -9370,6 +9383,24 @@ x86_emulate(
              avx512_vlen_check(true);
          goto simd_zmm;
  
+    case X86EMUL_OPC_EVEX_F2(0x0f38, 0x9a): /* v4fmaddps m128,zmm+3,zmm{k} */
+    case X86EMUL_OPC_EVEX_F2(0x0f38, 0xaa): /* v4fnmaddps m128,zmm+3,zmm{k} */
+        host_and_vcpu_must_have(avx512_4fmaps);
+        generate_exception_if((ea.type != OP_MEM || evex.w || evex.brs ||
+                               evex.lr != 2),
+                              EXC_UD);
+        op_mask = op_mask & 0xffff ? 0xf : 0;
+        goto simd_zmm;
+
+    case X86EMUL_OPC_EVEX_F2(0x0f38, 0x9b): /* v4fmaddss m128,xmm+3,xmm{k} */
+    case X86EMUL_OPC_EVEX_F2(0x0f38, 0xab): /* v4fnmaddss m128,xmm+3,xmm{k} */
+        host_and_vcpu_must_have(avx512_4fmaps);
+        generate_exception_if((ea.type != OP_MEM || evex.w || evex.brs ||
+                               evex.lr == 3),
+                              EXC_UD);
+        op_mask = op_mask & 1 ? 0xf : 0;
+        goto simd_zmm;
+
      case X86EMUL_OPC_EVEX_66(0x0f38, 0xa0): /* vpscatterd{d,q} [xyz]mm,mem{k} */
      case X86EMUL_OPC_EVEX_66(0x0f38, 0xa1): /* vpscatterq{d,q} [xyz]mm,mem{k} */
      case X86EMUL_OPC_EVEX_66(0x0f38, 0xa2): /* vscatterdp{s,d} [xyz]mm,mem{k} */
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -119,6 +119,7 @@
  #define cpu_has_itsc            boot_cpu_has(X86_FEATURE_ITSC)
  
  /* CPUID level 0x00000007:0.edx */
+#define cpu_has_avx512_4fmaps   boot_cpu_has(X86_FEATURE_AVX512_4FMAPS)
  #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
  
  /* Synthesized. */

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2019-07-17  6:35 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-17  6:27 [Xen-devel] [PATCH v10 00/13] x86emul: remaining AVX512 support Jan Beulich
2019-07-17  6:33 ` [Xen-devel] [PATCH v10 01/13] x86emul: support of AVX512* population count insns Jan Beulich
2019-07-17 11:32   ` Andrew Cooper
2019-07-17  6:34 ` [Xen-devel] [PATCH v10 02/13] x86emul: support of AVX512_IFMA insns Jan Beulich
2019-07-17  6:34 ` [Xen-devel] [PATCH v10 03/13] x86emul: support remaining AVX512_VBMI2 insns Jan Beulich
2019-07-17  6:34 ` Jan Beulich [this message]
2019-07-17  6:35 ` [Xen-devel] [PATCH v10 05/13] x86emul: support AVX512_4VNNIW insns Jan Beulich
2019-07-17  6:35 ` [Xen-devel] [PATCH v10 06/13] x86emul: support AVX512_VNNI insns Jan Beulich
2019-07-17  6:35 ` [Xen-devel] [PATCH v10 07/13] x86emul: support VPCLMULQDQ insns Jan Beulich
2019-07-17  6:36 ` [Xen-devel] [PATCH v10 08/13] x86emul: support VAES insns Jan Beulich
2019-07-17  6:36 ` [Xen-devel] [PATCH v10 09/13] x86emul: support GFNI insns Jan Beulich
2019-07-17  6:36 ` [Xen-devel] [PATCH v10 10/13] x86emul: restore ordering within main switch statement Jan Beulich
2019-07-17  6:37 ` [Xen-devel] [PATCH v10 11/13] x86emul: add an AES/VAES test case to the harness Jan Beulich
2019-07-17  6:38 ` [Xen-devel] [PATCH v10 12/13] x86emul: add a SHA " Jan Beulich
2019-07-17  6:38 ` [Xen-devel] [PATCH v10 13/13] x86emul: add a PCLMUL/VPCLMUL " Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3ae1722e-fa97-6cea-dbb4-9a08f6ab3f14@suse.com \
    --to=jbeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=roger.pau@citrix.com \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.