All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xenproject.org>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>
Subject: [PATCH v3 04/25] x86emul: support FMA4 insns
Date: Thu, 07 Dec 2017 07:01:18 -0700	[thread overview]
Message-ID: <5A2957BE020000780019589A@prv-mh.provo.novell.com> (raw)
In-Reply-To: <5A29550C020000780019585B@prv-mh.provo.novell.com>

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Re-base.

--- a/.gitignore
+++ b/.gitignore
@@ -226,6 +226,7 @@
 tools/tests/x86_emulator/asm
 tools/tests/x86_emulator/avx*.[ch]
 tools/tests/x86_emulator/blowfish.h
+tools/tests/x86_emulator/fma*.[ch]
 tools/tests/x86_emulator/sse*.[ch]
 tools/tests/x86_emulator/test_x86_emulator
 tools/tests/x86_emulator/x86_emulate
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -12,7 +12,8 @@ run: $(TARGET)
 	./$(TARGET)
 
 SIMD := sse sse2 sse4 avx
-TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx
+FMA := fma4
+TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx $(FMA)
 
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
@@ -29,6 +30,9 @@ sse4-flts := $(sse2-flts)
 avx-vecs := 16 32
 avx-ints :=
 avx-flts := 4 8
+fma4-vecs := $(avx-vecs)
+fma4-ints :=
+fma4-flts := $(avx-flts)
 
 # When converting SSE to AVX, have the compiler avoid XMM0 to widen
 # coverage of the VEX.vvvv checks in the emulator. We must not do this,
@@ -58,7 +62,7 @@ $(1)-avx-cflags := \
 	    "-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -Os -DVEC_SIZE=$(vec) -DUINT_SIZE=$(int)"))
 endef
 
-$(foreach flavor,$(SIMD),$(eval $(call simd-defs,$(flavor))))
+$(foreach flavor,$(SIMD) $(FMA),$(eval $(call simd-defs,$(flavor))))
 
 $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile
 	rm -f $@.new $*.bin
@@ -77,6 +81,11 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t
 $(addsuffix .c,$(SIMD)) $(addsuffix -avx.c,$(filter sse%,$(SIMD))):
 	ln -sf simd.c $@
 
+$(addsuffix .c,$(FMA)):
+	ln -sf simd-fma.c $@
+
+$(addsuffix .o,$(SIMD) $(FMA)) $(addsuffix -avx.o,$(filter sse%,$(SIMD))): simd.h
+
 $(TARGET): x86-emulate.o test_x86_emulator.o
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $^
 
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -1,71 +1,6 @@
-#include <stdbool.h>
+#include "simd.h"
 
-asm (
-    "\t.text\n"
-    "\t.globl _start\n"
-    "_start:\n"
-#if defined(__i386__) && VEC_SIZE == 16
-    "\tpush %ebp\n"
-    "\tmov %esp,%ebp\n"
-    "\tand $~0xf,%esp\n"
-    "\tcall simd_test\n"
-    "\tleave\n"
-    "\tret"
-#else
-    "\tjmp simd_test"
-#endif
-    );
-
-typedef
-#if defined(INT_SIZE)
-# define ELEM_SIZE INT_SIZE
-signed int
-# if INT_SIZE == 1
-#  define MODE QI
-# elif INT_SIZE == 2
-#  define MODE HI
-# elif INT_SIZE == 4
-#  define MODE SI
-# elif INT_SIZE == 8
-#  define MODE DI
-# endif
-#elif defined(UINT_SIZE)
-# define ELEM_SIZE UINT_SIZE
-unsigned int
-# if UINT_SIZE == 1
-#  define MODE QI
-# elif UINT_SIZE == 2
-#  define MODE HI
-# elif UINT_SIZE == 4
-#  define MODE SI
-# elif UINT_SIZE == 8
-#  define MODE DI
-# endif
-#elif defined(FLOAT_SIZE)
-float
-# define ELEM_SIZE FLOAT_SIZE
-# if FLOAT_SIZE == 4
-#  define MODE SF
-# elif FLOAT_SIZE == 8
-#  define MODE DF
-# endif
-#endif
-#ifndef VEC_SIZE
-# define VEC_SIZE ELEM_SIZE
-#endif
-__attribute__((mode(MODE), vector_size(VEC_SIZE))) vec_t;
-
-#define ELEM_COUNT (VEC_SIZE / ELEM_SIZE)
-
-typedef unsigned int __attribute__((mode(QI), vector_size(VEC_SIZE))) byte_vec_t;
-
-/* Various builtins want plain char / int / long long vector types ... */
-typedef char __attribute__((vector_size(VEC_SIZE))) vqi_t;
-typedef short __attribute__((vector_size(VEC_SIZE))) vhi_t;
-typedef int __attribute__((vector_size(VEC_SIZE))) vsi_t;
-#if VEC_SIZE >= 8
-typedef long long __attribute__((vector_size(VEC_SIZE))) vdi_t;
-#endif
+ENTRY(simd_test);
 
 #if VEC_SIZE == 8 && defined(__SSE__)
 # define to_bool(cmp) (__builtin_ia32_pmovmskb(cmp) == 0xff)
@@ -418,13 +353,6 @@ static inline bool _to_bool(byte_vec_t b
 # endif
 #endif
 
-/*
- * Suppress value propagation by the compiler, preventing unwanted
- * optimization. This at once makes the compiler use memory operands
- * more often, which for our purposes is the more interesting case.
- */
-#define touch(var) asm volatile ( "" : "+m" (var) )
-
 int simd_test(void)
 {
     unsigned int i, j;
--- /dev/null
+++ b/tools/tests/x86_emulator/simd.h
@@ -0,0 +1,78 @@
+#include <stdbool.h>
+
+#if defined(__i386__) && VEC_SIZE == 16
+# define ENTRY(name) \
+asm ( "\t.text\n" \
+      "\t.globl _start\n" \
+      "_start:\n" \
+      "\tpush %ebp\n" \
+      "\tmov %esp,%ebp\n" \
+      "\tand $~0xf,%esp\n" \
+      "\tcall " #name "\n" \
+      "\tleave\n" \
+      "\tret" )
+#else
+# define ENTRY(name) \
+asm ( "\t.text\n" \
+      "\t.globl _start\n" \
+      "_start:\n" \
+      "\tjmp " #name )
+#endif
+
+typedef
+#if defined(INT_SIZE)
+# define ELEM_SIZE INT_SIZE
+signed int
+# if INT_SIZE == 1
+#  define MODE QI
+# elif INT_SIZE == 2
+#  define MODE HI
+# elif INT_SIZE == 4
+#  define MODE SI
+# elif INT_SIZE == 8
+#  define MODE DI
+# endif
+#elif defined(UINT_SIZE)
+# define ELEM_SIZE UINT_SIZE
+unsigned int
+# if UINT_SIZE == 1
+#  define MODE QI
+# elif UINT_SIZE == 2
+#  define MODE HI
+# elif UINT_SIZE == 4
+#  define MODE SI
+# elif UINT_SIZE == 8
+#  define MODE DI
+# endif
+#elif defined(FLOAT_SIZE)
+float
+# define ELEM_SIZE FLOAT_SIZE
+# if FLOAT_SIZE == 4
+#  define MODE SF
+# elif FLOAT_SIZE == 8
+#  define MODE DF
+# endif
+#endif
+#ifndef VEC_SIZE
+# define VEC_SIZE ELEM_SIZE
+#endif
+__attribute__((mode(MODE), vector_size(VEC_SIZE))) vec_t;
+
+#define ELEM_COUNT (VEC_SIZE / ELEM_SIZE)
+
+typedef unsigned int __attribute__((mode(QI), vector_size(VEC_SIZE))) byte_vec_t;
+
+/* Various builtins want plain char / int / long long vector types ... */
+typedef char __attribute__((vector_size(VEC_SIZE))) vqi_t;
+typedef short __attribute__((vector_size(VEC_SIZE))) vhi_t;
+typedef int __attribute__((vector_size(VEC_SIZE))) vsi_t;
+#if VEC_SIZE >= 8
+typedef long long __attribute__((vector_size(VEC_SIZE))) vdi_t;
+#endif
+
+/*
+ * Suppress value propagation by the compiler, preventing unwanted
+ * optimization. This at once makes the compiler use memory operands
+ * more often, which for our purposes is the more interesting case.
+ */
+#define touch(var) asm volatile ( "" : "+m" (var) )
--- /dev/null
+++ b/tools/tests/x86_emulator/simd-fma.c
@@ -0,0 +1,121 @@
+#include "simd.h"
+
+ENTRY(fma_test);
+
+#if VEC_SIZE < 16
+# define to_bool(cmp) (!~(cmp)[0])
+#elif VEC_SIZE == 16
+# if FLOAT_SIZE == 4
+#  define to_bool(cmp) __builtin_ia32_vtestcps(cmp, (vec_t){} == 0)
+# elif FLOAT_SIZE == 8
+#  define to_bool(cmp) __builtin_ia32_vtestcpd(cmp, (vec_t){} == 0)
+# endif
+#elif VEC_SIZE == 32
+# if FLOAT_SIZE == 4
+#  define to_bool(cmp) __builtin_ia32_vtestcps256(cmp, (vec_t){} == 0)
+# elif FLOAT_SIZE == 8
+#  define to_bool(cmp) __builtin_ia32_vtestcpd256(cmp, (vec_t){} == 0)
+# endif
+#endif
+
+#if VEC_SIZE == 16
+# if FLOAT_SIZE == 4
+#  define addsub(x, y) __builtin_ia32_addsubps(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps(x, y, z)
+#  endif
+# elif FLOAT_SIZE == 8
+#  define addsub(x, y) __builtin_ia32_addsubpd(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd(x, y, z)
+#  endif
+# endif
+#elif VEC_SIZE == 32
+# if FLOAT_SIZE == 4
+#  define addsub(x, y) __builtin_ia32_addsubps256(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps256(x, y, z)
+#  endif
+# elif FLOAT_SIZE == 8
+#  define addsub(x, y) __builtin_ia32_addsubpd256(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd256(x, y, z)
+#  endif
+# endif
+#endif
+
+int fma_test(void)
+{
+    unsigned int i;
+    vec_t x, y, z, src, inv, one;
+
+    for ( i = 0; i < ELEM_COUNT; ++i )
+    {
+        src[i] = i + 1;
+        inv[i] = ELEM_COUNT - i;
+        one[i] = 1;
+    }
+
+    x = (src + one) * inv;
+    y = (src - one) * inv;
+    touch(src);
+    z = inv * src + inv;
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(src);
+    z = -inv * src - inv;
+    if ( !to_bool(-x == z) ) return __LINE__;
+
+    touch(src);
+    z = inv * src - inv;
+    if ( !to_bool(y == z) ) return __LINE__;
+
+    touch(src);
+    z = -inv * src + inv;
+    if ( !to_bool(-y == z) ) return __LINE__;
+    touch(src);
+
+    x = src + inv;
+    y = src - inv;
+    touch(inv);
+    z = src * one + inv;
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(inv);
+    z = -src * one - inv;
+    if ( !to_bool(-x == z) ) return __LINE__;
+
+    touch(inv);
+    z = src * one - inv;
+    if ( !to_bool(y == z) ) return __LINE__;
+
+    touch(inv);
+    z = -src * one + inv;
+    if ( !to_bool(-y == z) ) return __LINE__;
+    touch(inv);
+
+#if defined(addsub) && defined(fmaddsub)
+    x = addsub(src * inv, one);
+    y = addsub(src * inv, -one);
+    touch(one);
+    z = fmaddsub(src, inv, one);
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(one);
+    z = fmaddsub(src, inv, -one);
+    if ( !to_bool(y == z) ) return __LINE__;
+    touch(one);
+
+    x = addsub(src * inv, one);
+    touch(inv);
+    z = fmaddsub(src, inv, one);
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(inv);
+    z = fmaddsub(src, inv, -one);
+    if ( !to_bool(y == z) ) return __LINE__;
+    touch(inv);
+#endif
+
+    return 0;
+}
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -11,6 +11,7 @@
 #include "sse2-avx.h"
 #include "sse4-avx.h"
 #include "avx.h"
+#include "fma4.h"
 
 #define verbose false /* Switch to true for far more logging. */
 
@@ -47,6 +48,11 @@ static bool simd_check_avx(void)
 #define simd_check_sse2_avx  simd_check_avx
 #define simd_check_sse4_avx  simd_check_avx
 
+static bool simd_check_fma4(void)
+{
+    return cpu_has_fma4;
+}
+
 static void simd_set_regs(struct cpu_user_regs *regs)
 {
     if ( cpu_has_mmx )
@@ -143,6 +149,12 @@ static const struct {
     SIMD(AVX scalar double,      avx,         f8),
     SIMD(AVX 128bit double,      avx,       16f8),
     SIMD(AVX 256bit double,      avx,       32f8),
+    SIMD(FMA4 scalar single,     fma4,        f4),
+    SIMD(FMA4 128bit single,     fma4,      16f4),
+    SIMD(FMA4 256bit single,     fma4,      32f4),
+    SIMD(FMA4 scalar double,     fma4,        f8),
+    SIMD(FMA4 128bit double,     fma4,      16f8),
+    SIMD(FMA4 256bit double,     fma4,      32f8),
 #undef SIMD_
 #undef SIMD
 };
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -169,6 +169,16 @@ static inline uint64_t xgetbv(uint32_t x
     (res.c & (1U << 6)) != 0; \
 })
 
+#define cpu_has_fma4 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
+        res.c = 0; \
+    else \
+        emul_test_cpuid(0x80000001, 0, &res, NULL); \
+    (res.c & (1U << 16)) != 0; \
+})
+
 #define cpu_has_tbm ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(0x80000001, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -421,7 +421,16 @@ static const struct {
     [0x44] = { .simd_size = simd_packed_int },
     [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0x5c ... 0x5f] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0x68 ... 0x69] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x6a ... 0x6b] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+    [0x6c ... 0x6d] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x6e ... 0x6f] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+    [0x78 ... 0x79] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x7a ... 0x7b] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+    [0x7c ... 0x7d] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x7e ... 0x7f] = { .simd_size = simd_scalar_fp, .four_op = 1 },
     [0xcc] = { .simd_size = simd_other },
     [0xdf] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0xf0] = {},
@@ -1613,6 +1622,7 @@ static bool vcpu_has(
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_sse4a()       vcpu_has(0x80000001, ECX,  6, ctxt, ops)
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
+#define vcpu_has_fma4()        vcpu_has(0x80000001, ECX, 16, ctxt, ops)
 #define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
@@ -6168,6 +6178,7 @@ x86_emulate(
     simd_0f_imm8_avx:
                 host_and_vcpu_must_have(avx);
             }
+    simd_0f_imm8_ymm:
             get_fpu(X86EMUL_FPU_ymm, &fic);
         }
         else if ( vex.pfx )
@@ -7726,6 +7737,49 @@ x86_emulate(
         generate_exception_if(vex.w, EXC_UD);
         goto simd_0f_int_imm8;
 
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5c): /* vfmaddsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5d): /* vfmaddsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5e): /* vfmsubaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5f): /* vfmsubaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x68): /* vfmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x69): /* vfmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6a): /* vfmaddss xmm,xmm/m32,xmm,xmm */
+                                           /* vfmaddss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6b): /* vfmaddsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfmaddsd xmm/m64,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6c): /* vfmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6d): /* vfmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6e): /* vfmsubss xmm,xmm/m32,xmm,xmm */
+                                           /* vfmsubss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6f): /* vfmsubsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfmsubsd xmm/m64,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x78): /* vfnmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x79): /* vfnmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7a): /* vfnmaddss xmm,xmm/m32,xmm,xmm */
+                                           /* vfnmaddss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7b): /* vfnmaddsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfnmaddsd xmm/m64,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7c): /* vfnmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7d): /* vfnmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7e): /* vfnmsubss xmm,xmm/m32,xmm,xmm */
+                                           /* vfnmsubss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7f): /* vfnmsubsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfnmsubsd xmm/m64,xmm,xmm,xmm */
+        host_and_vcpu_must_have(fma4);
+        goto simd_0f_imm8_ymm;
+
     case X86EMUL_OPC_66(0x0f3a, 0x60):     /* pcmpestrm $imm8,xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x60): /* vpcmpestrm $imm8,xmm/m128,xmm */
     case X86EMUL_OPC_66(0x0f3a, 0x61):     /* pcmpestri $imm8,xmm/m128,xmm */
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -76,6 +76,7 @@
 #define cpu_has_svm             boot_cpu_has(X86_FEATURE_SVM)
 #define cpu_has_sse4a           boot_cpu_has(X86_FEATURE_SSE4A)
 #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
+#define cpu_has_fma4            boot_cpu_has(X86_FEATURE_FMA4)
 #define cpu_has_tbm             boot_cpu_has(X86_FEATURE_TBM)
 
 /* CPUID level 0x0000000D:1.eax */



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2017-12-07 14:01 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-07 13:49 [PATCH v3 00/25] x86: emulator enhancements Jan Beulich
2017-12-07 13:58 ` [PATCH v3 01/25] x86emul: make decode_register() return unsigned long * Jan Beulich
2017-12-07 18:32   ` Andrew Cooper
2017-12-08  7:44     ` Jan Beulich
2017-12-07 13:59 ` [PATCH v3 02/25] x86emul: build SIMD tests with -Os Jan Beulich
2017-12-07 18:32   ` Andrew Cooper
2017-12-07 14:00 ` [PATCH v3 03/25] x86emul: support F16C insns Jan Beulich
2018-01-31 18:58   ` Andrew Cooper
2017-12-07 14:01 ` Jan Beulich [this message]
2018-01-31 19:51   ` [PATCH v3 04/25] x86emul: support FMA4 insns Andrew Cooper
2017-12-07 14:02 ` [PATCH v3 05/25] x86emul: support FMA insns Jan Beulich
2018-02-01 16:15   ` Andrew Cooper
2017-12-07 14:03 ` [PATCH v3 06/25] x86emul: support most remaining AVX2 insns Jan Beulich
2018-02-01 19:45   ` Andrew Cooper
2018-02-02  9:29     ` Jan Beulich
2017-12-07 14:03 ` [PATCH v3 07/25] x86emul: support AVX2 gather insns Jan Beulich
2018-02-01 20:53   ` Andrew Cooper
2018-02-02  9:44     ` Jan Beulich
2017-12-07 14:04 ` [PATCH v3 08/25] x86emul: add tables for XOP 08 and 09 extension spaces Jan Beulich
2018-02-02 11:43   ` Andrew Cooper
2018-02-02 15:15     ` Jan Beulich
2018-02-02 16:02       ` Andrew Cooper
2017-12-07 14:04 ` [PATCH v3 09/25] x86emul: support XOP insns Jan Beulich
2018-02-02 12:03   ` Andrew Cooper
2018-02-02 15:17     ` Jan Beulich
2018-02-05 13:01       ` Andrew Cooper
2017-12-07 14:05 ` [PATCH v3 10/25] x86emul: support 3DNow! insns Jan Beulich
2018-02-02 13:02   ` Andrew Cooper
2018-02-02 15:22     ` Jan Beulich
2018-02-02 16:04       ` Andrew Cooper
2017-12-07 14:06 ` [PATCH v3 11/25] x86emul: place test blobs in executable section Jan Beulich
2018-02-02 13:03   ` Andrew Cooper
2018-02-02 15:27     ` Jan Beulich
2018-02-05 13:11       ` Andrew Cooper
2018-02-05 13:55         ` Jan Beulich
2017-12-07 14:07 ` [PATCH v3 12/25] x86emul: abstract out XCRn accesses Jan Beulich
2018-02-02 13:29   ` Andrew Cooper
2018-02-02 17:05     ` Jan Beulich
2017-12-07 14:08 ` [PATCH v3 13/25] x86emul: adjust_bnd() should check XCR0 Jan Beulich
2018-02-02 13:30   ` Andrew Cooper
2018-02-02 16:19     ` Jan Beulich
2018-02-02 16:28       ` Andrew Cooper
2017-12-07 14:09 ` [PATCH v3 14/25] x86emul: make all FPU emulation use the stub Jan Beulich
2018-02-02 13:37   ` Andrew Cooper
2017-12-07 14:10 ` [PATCH v3 15/25] x86/HVM: eliminate custom #MF/#XM handling Jan Beulich
2018-02-02 13:38   ` Andrew Cooper
2017-12-07 14:11 ` [PATCH v3 16/25] x86emul: support SWAPGS Jan Beulich
2018-02-02 13:41   ` Andrew Cooper
2018-02-02 16:24     ` Jan Beulich
2017-12-07 14:11 ` [PATCH v3 17/25] x86emul: emulate {MONITOR, MWAIT}{, X} as no-op Jan Beulich
2018-02-02 14:05   ` Andrew Cooper
2017-12-07 14:12 ` [PATCH v3 18/25] x86emul: add missing suffixes in test harness Jan Beulich
2018-02-02 14:13   ` Andrew Cooper
2017-12-07 14:14 ` [PATCH v3 19/25] x86emul: tell cmpxchg hook whether LOCK is in effect Jan Beulich
2017-12-08 10:58   ` Paul Durrant
2018-02-02 14:13   ` Andrew Cooper
2017-12-07 14:15 ` [PATCH v3 20/25] x86emul: correctly handle CMPXCHG* comparison failures Jan Beulich
2018-02-02 14:49   ` Andrew Cooper
2018-02-05  8:07     ` Jan Beulich
2018-02-05 13:38       ` Andrew Cooper
2017-12-07 14:16 ` [PATCH v3 21/25] x86emul: add read-modify-write hook Jan Beulich
2018-02-02 16:13   ` Andrew Cooper
2018-02-05  8:22     ` Jan Beulich
2018-02-05 14:21       ` Andrew Cooper
2018-02-05 14:56         ` Jan Beulich
2017-12-07 14:16 ` [PATCH v3 22/25] x86/HVM: do actual CMPXCHG in hvmemul_cmpxchg() Jan Beulich
2017-12-07 14:38   ` Razvan Cojocaru
2017-12-08 10:38   ` Paul Durrant
2018-02-02 16:36   ` Andrew Cooper
2018-02-05  8:32     ` Jan Beulich
2018-02-05 16:09       ` Andrew Cooper
2018-02-05 16:49         ` Jan Beulich
2018-02-05 16:57           ` Andrew Cooper
2018-02-05 17:05             ` Jan Beulich
2017-12-07 14:17 ` [PATCH v3 23/25] x86/HVM: make use of new read-modify-write emulator hook Jan Beulich
2017-12-08 10:41   ` Paul Durrant
2018-02-02 16:37   ` Andrew Cooper
2018-02-05  8:34     ` Jan Beulich
2018-02-05 16:15       ` Andrew Cooper
2017-12-07 14:18 ` [PATCH v3 24/25] x86/shadow: fully move unmap-dest into common code Jan Beulich
2018-02-02 16:46   ` Andrew Cooper
2017-12-07 14:19 ` [PATCH v3 25/25] x86/shadow: fold sh_x86_emulate_{write, cmpxchg}() into their only callers Jan Beulich
2018-02-02 16:52   ` Andrew Cooper
2018-02-05  8:42     ` Jan Beulich
2018-02-05 12:16       ` Tim Deegan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5A2957BE020000780019589A@prv-mh.provo.novell.com \
    --to=jbeulich@suse.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.