All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xenproject.org>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>
Subject: [PATCH v2 05/17] x86emul: support FMA4 insns
Date: Thu, 14 Sep 2017 09:14:10 -0600	[thread overview]
Message-ID: <59BAB8E2020000780017B393@prv-mh.provo.novell.com> (raw)
In-Reply-To: <59BAB38A020000780017B34E@prv-mh.provo.novell.com>

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/.gitignore
+++ b/.gitignore
@@ -226,6 +226,7 @@
 tools/tests/x86_emulator/asm
 tools/tests/x86_emulator/avx*.[ch]
 tools/tests/x86_emulator/blowfish.h
+tools/tests/x86_emulator/fma*.[ch]
 tools/tests/x86_emulator/sse*.[ch]
 tools/tests/x86_emulator/test_x86_emulator
 tools/tests/x86_emulator/x86_emulate
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -12,7 +12,8 @@ run: $(TARGET)
 	./$(TARGET)
 
 SIMD := sse sse2 sse4 avx
-TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx
+FMA := fma4
+TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx $(FMA)
 
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
@@ -29,6 +30,9 @@ sse4-flts := $(sse2-flts)
 avx-vecs := 16 32
 avx-ints :=
 avx-flts := 4 8
+fma4-vecs := $(avx-vecs)
+fma4-ints :=
+fma4-flts := $(avx-flts)
 
 # When converting SSE to AVX, have the compiler avoid XMM0 to widen
 # coverage of the VEX.vvvv checks in the emulator. We must not do this,
@@ -58,7 +62,7 @@ $(1)-avx-cflags := \
 	    "-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -Os -DVEC_SIZE=$(vec) -DUINT_SIZE=$(int)"))
 endef
 
-$(foreach flavor,$(SIMD),$(eval $(call simd-defs,$(flavor))))
+$(foreach flavor,$(SIMD) $(FMA),$(eval $(call simd-defs,$(flavor))))
 
 $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile
 	rm -f $@.new $*.bin
@@ -77,6 +81,11 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t
 $(addsuffix .c,$(SIMD)) $(addsuffix -avx.c,$(filter sse%,$(SIMD))):
 	ln -sf simd.c $@
 
+$(addsuffix .c,$(FMA)):
+	ln -sf simd-fma.c $@
+
+$(addsuffix .o,$(SIMD) $(FMA)) $(addsuffix -avx.o,$(filter sse%,$(SIMD))): simd.h
+
 $(TARGET): x86_emulate.o test_x86_emulator.o
 	$(HOSTCC) -o $@ $^
 
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -1,71 +1,6 @@
-#include <stdbool.h>
+#include "simd.h"
 
-asm (
-    "\t.text\n"
-    "\t.globl _start\n"
-    "_start:\n"
-#if defined(__i386__) && VEC_SIZE == 16
-    "\tpush %ebp\n"
-    "\tmov %esp,%ebp\n"
-    "\tand $~0xf,%esp\n"
-    "\tcall simd_test\n"
-    "\tleave\n"
-    "\tret"
-#else
-    "\tjmp simd_test"
-#endif
-    );
-
-typedef
-#if defined(INT_SIZE)
-# define ELEM_SIZE INT_SIZE
-signed int
-# if INT_SIZE == 1
-#  define MODE QI
-# elif INT_SIZE == 2
-#  define MODE HI
-# elif INT_SIZE == 4
-#  define MODE SI
-# elif INT_SIZE == 8
-#  define MODE DI
-# endif
-#elif defined(UINT_SIZE)
-# define ELEM_SIZE UINT_SIZE
-unsigned int
-# if UINT_SIZE == 1
-#  define MODE QI
-# elif UINT_SIZE == 2
-#  define MODE HI
-# elif UINT_SIZE == 4
-#  define MODE SI
-# elif UINT_SIZE == 8
-#  define MODE DI
-# endif
-#elif defined(FLOAT_SIZE)
-float
-# define ELEM_SIZE FLOAT_SIZE
-# if FLOAT_SIZE == 4
-#  define MODE SF
-# elif FLOAT_SIZE == 8
-#  define MODE DF
-# endif
-#endif
-#ifndef VEC_SIZE
-# define VEC_SIZE ELEM_SIZE
-#endif
-__attribute__((mode(MODE), vector_size(VEC_SIZE))) vec_t;
-
-#define ELEM_COUNT (VEC_SIZE / ELEM_SIZE)
-
-typedef unsigned int __attribute__((mode(QI), vector_size(VEC_SIZE))) byte_vec_t;
-
-/* Various builtins want plain char / int / long long vector types ... */
-typedef char __attribute__((vector_size(VEC_SIZE))) vqi_t;
-typedef short __attribute__((vector_size(VEC_SIZE))) vhi_t;
-typedef int __attribute__((vector_size(VEC_SIZE))) vsi_t;
-#if VEC_SIZE >= 8
-typedef long long __attribute__((vector_size(VEC_SIZE))) vdi_t;
-#endif
+ENTRY(simd_test);
 
 #if VEC_SIZE == 8 && defined(__SSE__)
 # define to_bool(cmp) (__builtin_ia32_pmovmskb(cmp) == 0xff)
@@ -418,13 +353,6 @@ static inline bool _to_bool(byte_vec_t b
 # endif
 #endif
 
-/*
- * Suppress value propagation by the compiler, preventing unwanted
- * optimization. This at once makes the compiler use memory operands
- * more often, which for our purposes is the more interesting case.
- */
-#define touch(var) asm volatile ( "" : "+m" (var) )
-
 int simd_test(void)
 {
     unsigned int i, j;
--- /dev/null
+++ b/tools/tests/x86_emulator/simd.h
@@ -0,0 +1,78 @@
+#include <stdbool.h>
+
+#if defined(__i386__) && VEC_SIZE == 16
+# define ENTRY(name) \
+asm ( "\t.text\n" \
+      "\t.globl _start\n" \
+      "_start:\n" \
+      "\tpush %ebp\n" \
+      "\tmov %esp,%ebp\n" \
+      "\tand $~0xf,%esp\n" \
+      "\tcall " #name "\n" \
+      "\tleave\n" \
+      "\tret" )
+#else
+# define ENTRY(name) \
+asm ( "\t.text\n" \
+      "\t.globl _start\n" \
+      "_start:\n" \
+      "\tjmp " #name )
+#endif
+
+typedef
+#if defined(INT_SIZE)
+# define ELEM_SIZE INT_SIZE
+signed int
+# if INT_SIZE == 1
+#  define MODE QI
+# elif INT_SIZE == 2
+#  define MODE HI
+# elif INT_SIZE == 4
+#  define MODE SI
+# elif INT_SIZE == 8
+#  define MODE DI
+# endif
+#elif defined(UINT_SIZE)
+# define ELEM_SIZE UINT_SIZE
+unsigned int
+# if UINT_SIZE == 1
+#  define MODE QI
+# elif UINT_SIZE == 2
+#  define MODE HI
+# elif UINT_SIZE == 4
+#  define MODE SI
+# elif UINT_SIZE == 8
+#  define MODE DI
+# endif
+#elif defined(FLOAT_SIZE)
+float
+# define ELEM_SIZE FLOAT_SIZE
+# if FLOAT_SIZE == 4
+#  define MODE SF
+# elif FLOAT_SIZE == 8
+#  define MODE DF
+# endif
+#endif
+#ifndef VEC_SIZE
+# define VEC_SIZE ELEM_SIZE
+#endif
+__attribute__((mode(MODE), vector_size(VEC_SIZE))) vec_t;
+
+#define ELEM_COUNT (VEC_SIZE / ELEM_SIZE)
+
+typedef unsigned int __attribute__((mode(QI), vector_size(VEC_SIZE))) byte_vec_t;
+
+/* Various builtins want plain char / int / long long vector types ... */
+typedef char __attribute__((vector_size(VEC_SIZE))) vqi_t;
+typedef short __attribute__((vector_size(VEC_SIZE))) vhi_t;
+typedef int __attribute__((vector_size(VEC_SIZE))) vsi_t;
+#if VEC_SIZE >= 8
+typedef long long __attribute__((vector_size(VEC_SIZE))) vdi_t;
+#endif
+
+/*
+ * Suppress value propagation by the compiler, preventing unwanted
+ * optimization. This at once makes the compiler use memory operands
+ * more often, which for our purposes is the more interesting case.
+ */
+#define touch(var) asm volatile ( "" : "+m" (var) )
--- /dev/null
+++ b/tools/tests/x86_emulator/simd-fma.c
@@ -0,0 +1,121 @@
+#include "simd.h"
+
+ENTRY(fma_test);
+
+#if VEC_SIZE < 16
+# define to_bool(cmp) (!~(cmp)[0])
+#elif VEC_SIZE == 16
+# if FLOAT_SIZE == 4
+#  define to_bool(cmp) __builtin_ia32_vtestcps(cmp, (vec_t){} == 0)
+# elif FLOAT_SIZE == 8
+#  define to_bool(cmp) __builtin_ia32_vtestcpd(cmp, (vec_t){} == 0)
+# endif
+#elif VEC_SIZE == 32
+# if FLOAT_SIZE == 4
+#  define to_bool(cmp) __builtin_ia32_vtestcps256(cmp, (vec_t){} == 0)
+# elif FLOAT_SIZE == 8
+#  define to_bool(cmp) __builtin_ia32_vtestcpd256(cmp, (vec_t){} == 0)
+# endif
+#endif
+
+#if VEC_SIZE == 16
+# if FLOAT_SIZE == 4
+#  define addsub(x, y) __builtin_ia32_addsubps(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps(x, y, z)
+#  endif
+# elif FLOAT_SIZE == 8
+#  define addsub(x, y) __builtin_ia32_addsubpd(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd(x, y, z)
+#  endif
+# endif
+#elif VEC_SIZE == 32
+# if FLOAT_SIZE == 4
+#  define addsub(x, y) __builtin_ia32_addsubps256(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps256(x, y, z)
+#  endif
+# elif FLOAT_SIZE == 8
+#  define addsub(x, y) __builtin_ia32_addsubpd256(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd256(x, y, z)
+#  endif
+# endif
+#endif
+
+int fma_test(void)
+{
+    unsigned int i;
+    vec_t x, y, z, src, inv, one;
+
+    for ( i = 0; i < ELEM_COUNT; ++i )
+    {
+        src[i] = i + 1;
+        inv[i] = ELEM_COUNT - i;
+        one[i] = 1;
+    }
+
+    x = (src + one) * inv;
+    y = (src - one) * inv;
+    touch(src);
+    z = inv * src + inv;
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(src);
+    z = -inv * src - inv;
+    if ( !to_bool(-x == z) ) return __LINE__;
+
+    touch(src);
+    z = inv * src - inv;
+    if ( !to_bool(y == z) ) return __LINE__;
+
+    touch(src);
+    z = -inv * src + inv;
+    if ( !to_bool(-y == z) ) return __LINE__;
+    touch(src);
+
+    x = src + inv;
+    y = src - inv;
+    touch(inv);
+    z = src * one + inv;
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(inv);
+    z = -src * one - inv;
+    if ( !to_bool(-x == z) ) return __LINE__;
+
+    touch(inv);
+    z = src * one - inv;
+    if ( !to_bool(y == z) ) return __LINE__;
+
+    touch(inv);
+    z = -src * one + inv;
+    if ( !to_bool(-y == z) ) return __LINE__;
+    touch(inv);
+
+#if defined(addsub) && defined(fmaddsub)
+    x = addsub(src * inv, one);
+    y = addsub(src * inv, -one);
+    touch(one);
+    z = fmaddsub(src, inv, one);
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(one);
+    z = fmaddsub(src, inv, -one);
+    if ( !to_bool(y == z) ) return __LINE__;
+    touch(one);
+
+    x = addsub(src * inv, one);
+    touch(inv);
+    z = fmaddsub(src, inv, one);
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(inv);
+    z = fmaddsub(src, inv, -one);
+    if ( !to_bool(y == z) ) return __LINE__;
+    touch(inv);
+#endif
+
+    return 0;
+}
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -11,6 +11,7 @@
 #include "sse2-avx.h"
 #include "sse4-avx.h"
 #include "avx.h"
+#include "fma4.h"
 
 #define verbose false /* Switch to true for far more logging. */
 
@@ -47,6 +48,11 @@ static bool simd_check_avx(void)
 #define simd_check_sse2_avx  simd_check_avx
 #define simd_check_sse4_avx  simd_check_avx
 
+static bool simd_check_fma4(void)
+{
+    return cpu_has_fma4;
+}
+
 static void simd_set_regs(struct cpu_user_regs *regs)
 {
     if ( cpu_has_mmx )
@@ -143,6 +149,12 @@ static const struct {
     SIMD(AVX scalar double,      avx,         f8),
     SIMD(AVX 128bit double,      avx,       16f8),
     SIMD(AVX 256bit double,      avx,       32f8),
+    SIMD(FMA4 scalar single,     fma4,        f4),
+    SIMD(FMA4 128bit single,     fma4,      16f4),
+    SIMD(FMA4 256bit single,     fma4,      32f4),
+    SIMD(FMA4 scalar double,     fma4,        f8),
+    SIMD(FMA4 128bit double,     fma4,      16f8),
+    SIMD(FMA4 256bit double,     fma4,      32f8),
 #undef SIMD_
 #undef SIMD
 };
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -164,6 +164,16 @@ static inline uint64_t xgetbv(uint32_t x
     (res.c & (1U << 6)) != 0; \
 })
 
+#define cpu_has_fma4 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
+        res.c = 0; \
+    else \
+        emul_test_cpuid(0x80000001, 0, &res, NULL); \
+    (res.c & (1U << 16)) != 0; \
+})
+
 #define cpu_has_tbm ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(0x80000001, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -421,7 +421,16 @@ static const struct {
     [0x44] = { .simd_size = simd_packed_int },
     [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0x5c ... 0x5f] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0x68 ... 0x69] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x6a ... 0x6b] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+    [0x6c ... 0x6d] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x6e ... 0x6f] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+    [0x78 ... 0x79] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x7a ... 0x7b] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+    [0x7c ... 0x7d] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x7e ... 0x7f] = { .simd_size = simd_scalar_fp, .four_op = 1 },
     [0xcc] = { .simd_size = simd_other },
     [0xdf] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0xf0] = {},
@@ -1612,6 +1621,7 @@ static bool vcpu_has(
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_sse4a()       vcpu_has(0x80000001, ECX,  6, ctxt, ops)
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
+#define vcpu_has_fma4()        vcpu_has(0x80000001, ECX, 16, ctxt, ops)
 #define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
@@ -6176,6 +6186,7 @@ x86_emulate(
     simd_0f_imm8_avx:
                 host_and_vcpu_must_have(avx);
             }
+    simd_0f_imm8_ymm:
             get_fpu(X86EMUL_FPU_ymm, &fic);
         }
         else if ( vex.pfx )
@@ -7732,6 +7743,49 @@ x86_emulate(
         generate_exception_if(vex.w, EXC_UD);
         goto simd_0f_int_imm8;
 
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5c): /* vfmaddsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5d): /* vfmaddsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5e): /* vfmsubaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5f): /* vfmsubaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x68): /* vfmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x69): /* vfmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6a): /* vfmaddss xmm,xmm/m32,xmm,xmm */
+                                           /* vfmaddss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6b): /* vfmaddsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfmaddsd xmm/m64,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6c): /* vfmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6d): /* vfmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6e): /* vfmsubss xmm,xmm/m32,xmm,xmm */
+                                           /* vfmsubss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6f): /* vfmsubsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfmsubsd xmm/m64,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x78): /* vfnmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x79): /* vfnmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7a): /* vfnmaddss xmm,xmm/m32,xmm,xmm */
+                                           /* vfnmaddss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7b): /* vfnmaddsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfnmaddsd xmm/m64,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7c): /* vfnmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7d): /* vfnmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7e): /* vfnmsubss xmm,xmm/m32,xmm,xmm */
+                                           /* vfnmsubss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7f): /* vfnmsubsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfnmsubsd xmm/m64,xmm,xmm,xmm */
+        host_and_vcpu_must_have(fma4);
+        goto simd_0f_imm8_ymm;
+
     case X86EMUL_OPC_66(0x0f3a, 0x60):     /* pcmpestrm $imm8,xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x60): /* vpcmpestrm $imm8,xmm/m128,xmm */
     case X86EMUL_OPC_66(0x0f3a, 0x61):     /* pcmpestri $imm8,xmm/m128,xmm */
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -76,6 +76,7 @@
 #define cpu_has_svm             boot_cpu_has(X86_FEATURE_SVM)
 #define cpu_has_sse4a           boot_cpu_has(X86_FEATURE_SSE4A)
 #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
+#define cpu_has_fma4            boot_cpu_has(X86_FEATURE_FMA4)
 #define cpu_has_tbm             boot_cpu_has(X86_FEATURE_TBM)
 
 /* CPUID level 0x0000000D:1.eax */



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2017-09-14 15:14 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-14 14:51 [PATCH v2 00/17] x86: emulator enhancements Jan Beulich
2017-09-14 15:12 ` [PATCH v2 01/17] x86emul: support remaining AVX insns Jan Beulich
2017-10-09 15:36   ` George Dunlap
2017-10-09 16:12     ` Jan Beulich
2017-10-10 14:17       ` George Dunlap
2017-09-14 15:12 ` [PATCH v2 02/17] x86emul: re-order cases of main switch statement Jan Beulich
2017-09-15 17:23   ` Andrew Cooper
2017-09-14 15:13 ` [PATCH v2 03/17] x86emul: build SIMD tests with -Os Jan Beulich
2017-09-14 15:13 ` [PATCH v2 04/17] x86emul: support F16C insns Jan Beulich
2017-09-14 15:14 ` Jan Beulich [this message]
2017-09-14 15:14 ` [PATCH v2 06/17] x86emul: support FMA insns Jan Beulich
2017-09-14 15:15 ` [PATCH v2 07/17] x86emul: support most remaining AVX2 insns Jan Beulich
2017-09-14 15:16 ` [PATCH v2 08/17] x86emul: fold/eliminate some local variables Jan Beulich
2017-09-15 17:22   ` Andrew Cooper
2017-10-06 14:21   ` George Dunlap
2017-10-06 14:47     ` Jan Beulich
2017-10-06 14:43   ` George Dunlap
2017-09-14 15:17 ` [PATCH v2 09/17] x86emul: support AVX2 gather insns Jan Beulich
2017-09-14 15:17 ` [PATCH v2 10/17] x86emul: add tables for XOP 08 and 09 extension spaces Jan Beulich
2017-09-14 15:18 ` [PATCH v2 11/17] x86emul: support XOP insns Jan Beulich
2017-09-14 15:18 ` [PATCH v2 12/17] x86emul: support 3DNow! insns Jan Beulich
2017-09-14 15:19 ` [PATCH v2 13/17] x86emul: re-order checks in test harness Jan Beulich
2017-09-15 17:26   ` Andrew Cooper
2017-09-14 15:20 ` [PATCH v2 14/17] x86emul: abstract out XCRn accesses Jan Beulich
2017-09-14 15:25   ` Jan Beulich
2017-09-18  9:12   ` Paul Durrant
2017-09-14 15:20 ` [PATCH v2 15/17] x86emul: adjust_bnd() should check XCR0 Jan Beulich
2017-09-14 15:21 ` [PATCH v2 16/17] x86emul: make all FPU emulation use the stub Jan Beulich
2017-09-14 15:22 ` [PATCH v2 17/17] x86/HVM: eliminate custom #MF/#XM handling Jan Beulich
2017-09-18  9:27   ` Paul Durrant
2017-09-18  9:35     ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=59BAB8E2020000780017B393@prv-mh.provo.novell.com \
    --to=jbeulich@suse.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.