[PATCH v2 05/17] x86emul: support FMA4 insns

From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xenproject.org>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>
Subject: [PATCH v2 05/17] x86emul: support FMA4 insns
Date: Thu, 14 Sep 2017 09:14:10 -0600	[thread overview]
Message-ID: <59BAB8E2020000780017B393@prv-mh.provo.novell.com> (raw)
In-Reply-To: <59BAB38A020000780017B34E@prv-mh.provo.novell.com>

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/.gitignore
+++ b/.gitignore
@@ -226,6 +226,7 @@
 tools/tests/x86_emulator/asm
 tools/tests/x86_emulator/avx*.[ch]
 tools/tests/x86_emulator/blowfish.h
+tools/tests/x86_emulator/fma*.[ch]
 tools/tests/x86_emulator/sse*.[ch]
 tools/tests/x86_emulator/test_x86_emulator
 tools/tests/x86_emulator/x86_emulate
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -12,7 +12,8 @@ run: $(TARGET)
 	./$(TARGET)
 
 SIMD := sse sse2 sse4 avx
-TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx
+FMA := fma4
+TESTCASES := blowfish $(SIMD) sse2-avx sse4-avx $(FMA)
 
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
@@ -29,6 +30,9 @@ sse4-flts := $(sse2-flts)
 avx-vecs := 16 32
 avx-ints :=
 avx-flts := 4 8
+fma4-vecs := $(avx-vecs)
+fma4-ints :=
+fma4-flts := $(avx-flts)
 
 # When converting SSE to AVX, have the compiler avoid XMM0 to widen
 # coverage of the VEX.vvvv checks in the emulator. We must not do this,
@@ -58,7 +62,7 @@ $(1)-avx-cflags := \
 	    "-D_$(vec)u$(int) -m$(1) $(sse2avx-$(1)) -Os -DVEC_SIZE=$(vec) -DUINT_SIZE=$(int)"))
 endef
 
-$(foreach flavor,$(SIMD),$(eval $(call simd-defs,$(flavor))))
+$(foreach flavor,$(SIMD) $(FMA),$(eval $(call simd-defs,$(flavor))))
 
 $(addsuffix .h,$(TESTCASES)): %.h: %.c testcase.mk Makefile
 	rm -f $@.new $*.bin
@@ -77,6 +81,11 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t
 $(addsuffix .c,$(SIMD)) $(addsuffix -avx.c,$(filter sse%,$(SIMD))):
 	ln -sf simd.c $@
 
+$(addsuffix .c,$(FMA)):
+	ln -sf simd-fma.c $@
+
+$(addsuffix .o,$(SIMD) $(FMA)) $(addsuffix -avx.o,$(filter sse%,$(SIMD))): simd.h
+
 $(TARGET): x86_emulate.o test_x86_emulator.o
 	$(HOSTCC) -o $@ $^
 
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -1,71 +1,6 @@
-#include <stdbool.h>
+#include "simd.h"
 
-asm (
-    "\t.text\n"
-    "\t.globl _start\n"
-    "_start:\n"
-#if defined(__i386__) && VEC_SIZE == 16
-    "\tpush %ebp\n"
-    "\tmov %esp,%ebp\n"
-    "\tand $~0xf,%esp\n"
-    "\tcall simd_test\n"
-    "\tleave\n"
-    "\tret"
-#else
-    "\tjmp simd_test"
-#endif
-    );
-
-typedef
-#if defined(INT_SIZE)
-# define ELEM_SIZE INT_SIZE
-signed int
-# if INT_SIZE == 1
-#  define MODE QI
-# elif INT_SIZE == 2
-#  define MODE HI
-# elif INT_SIZE == 4
-#  define MODE SI
-# elif INT_SIZE == 8
-#  define MODE DI
-# endif
-#elif defined(UINT_SIZE)
-# define ELEM_SIZE UINT_SIZE
-unsigned int
-# if UINT_SIZE == 1
-#  define MODE QI
-# elif UINT_SIZE == 2
-#  define MODE HI
-# elif UINT_SIZE == 4
-#  define MODE SI
-# elif UINT_SIZE == 8
-#  define MODE DI
-# endif
-#elif defined(FLOAT_SIZE)
-float
-# define ELEM_SIZE FLOAT_SIZE
-# if FLOAT_SIZE == 4
-#  define MODE SF
-# elif FLOAT_SIZE == 8
-#  define MODE DF
-# endif
-#endif
-#ifndef VEC_SIZE
-# define VEC_SIZE ELEM_SIZE
-#endif
-__attribute__((mode(MODE), vector_size(VEC_SIZE))) vec_t;
-
-#define ELEM_COUNT (VEC_SIZE / ELEM_SIZE)
-
-typedef unsigned int __attribute__((mode(QI), vector_size(VEC_SIZE))) byte_vec_t;
-
-/* Various builtins want plain char / int / long long vector types ... */
-typedef char __attribute__((vector_size(VEC_SIZE))) vqi_t;
-typedef short __attribute__((vector_size(VEC_SIZE))) vhi_t;
-typedef int __attribute__((vector_size(VEC_SIZE))) vsi_t;
-#if VEC_SIZE >= 8
-typedef long long __attribute__((vector_size(VEC_SIZE))) vdi_t;
-#endif
+ENTRY(simd_test);
 
 #if VEC_SIZE == 8 && defined(__SSE__)
 # define to_bool(cmp) (__builtin_ia32_pmovmskb(cmp) == 0xff)
@@ -418,13 +353,6 @@ static inline bool _to_bool(byte_vec_t b
 # endif
 #endif
 
-/*
- * Suppress value propagation by the compiler, preventing unwanted
- * optimization. This at once makes the compiler use memory operands
- * more often, which for our purposes is the more interesting case.
- */
-#define touch(var) asm volatile ( "" : "+m" (var) )
-
 int simd_test(void)
 {
     unsigned int i, j;
--- /dev/null
+++ b/tools/tests/x86_emulator/simd.h
@@ -0,0 +1,78 @@
+#include <stdbool.h>
+
+#if defined(__i386__) && VEC_SIZE == 16
+# define ENTRY(name) \
+asm ( "\t.text\n" \
+      "\t.globl _start\n" \
+      "_start:\n" \
+      "\tpush %ebp\n" \
+      "\tmov %esp,%ebp\n" \
+      "\tand $~0xf,%esp\n" \
+      "\tcall " #name "\n" \
+      "\tleave\n" \
+      "\tret" )
+#else
+# define ENTRY(name) \
+asm ( "\t.text\n" \
+      "\t.globl _start\n" \
+      "_start:\n" \
+      "\tjmp " #name )
+#endif
+
+typedef
+#if defined(INT_SIZE)
+# define ELEM_SIZE INT_SIZE
+signed int
+# if INT_SIZE == 1
+#  define MODE QI
+# elif INT_SIZE == 2
+#  define MODE HI
+# elif INT_SIZE == 4
+#  define MODE SI
+# elif INT_SIZE == 8
+#  define MODE DI
+# endif
+#elif defined(UINT_SIZE)
+# define ELEM_SIZE UINT_SIZE
+unsigned int
+# if UINT_SIZE == 1
+#  define MODE QI
+# elif UINT_SIZE == 2
+#  define MODE HI
+# elif UINT_SIZE == 4
+#  define MODE SI
+# elif UINT_SIZE == 8
+#  define MODE DI
+# endif
+#elif defined(FLOAT_SIZE)
+float
+# define ELEM_SIZE FLOAT_SIZE
+# if FLOAT_SIZE == 4
+#  define MODE SF
+# elif FLOAT_SIZE == 8
+#  define MODE DF
+# endif
+#endif
+#ifndef VEC_SIZE
+# define VEC_SIZE ELEM_SIZE
+#endif
+__attribute__((mode(MODE), vector_size(VEC_SIZE))) vec_t;
+
+#define ELEM_COUNT (VEC_SIZE / ELEM_SIZE)
+
+typedef unsigned int __attribute__((mode(QI), vector_size(VEC_SIZE))) byte_vec_t;
+
+/* Various builtins want plain char / int / long long vector types ... */
+typedef char __attribute__((vector_size(VEC_SIZE))) vqi_t;
+typedef short __attribute__((vector_size(VEC_SIZE))) vhi_t;
+typedef int __attribute__((vector_size(VEC_SIZE))) vsi_t;
+#if VEC_SIZE >= 8
+typedef long long __attribute__((vector_size(VEC_SIZE))) vdi_t;
+#endif
+
+/*
+ * Suppress value propagation by the compiler, preventing unwanted
+ * optimization. This at once makes the compiler use memory operands
+ * more often, which for our purposes is the more interesting case.
+ */
+#define touch(var) asm volatile ( "" : "+m" (var) )
--- /dev/null
+++ b/tools/tests/x86_emulator/simd-fma.c
@@ -0,0 +1,121 @@
+#include "simd.h"
+
+ENTRY(fma_test);
+
+#if VEC_SIZE < 16
+# define to_bool(cmp) (!~(cmp)[0])
+#elif VEC_SIZE == 16
+# if FLOAT_SIZE == 4
+#  define to_bool(cmp) __builtin_ia32_vtestcps(cmp, (vec_t){} == 0)
+# elif FLOAT_SIZE == 8
+#  define to_bool(cmp) __builtin_ia32_vtestcpd(cmp, (vec_t){} == 0)
+# endif
+#elif VEC_SIZE == 32
+# if FLOAT_SIZE == 4
+#  define to_bool(cmp) __builtin_ia32_vtestcps256(cmp, (vec_t){} == 0)
+# elif FLOAT_SIZE == 8
+#  define to_bool(cmp) __builtin_ia32_vtestcpd256(cmp, (vec_t){} == 0)
+# endif
+#endif
+
+#if VEC_SIZE == 16
+# if FLOAT_SIZE == 4
+#  define addsub(x, y) __builtin_ia32_addsubps(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps(x, y, z)
+#  endif
+# elif FLOAT_SIZE == 8
+#  define addsub(x, y) __builtin_ia32_addsubpd(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd(x, y, z)
+#  endif
+# endif
+#elif VEC_SIZE == 32
+# if FLOAT_SIZE == 4
+#  define addsub(x, y) __builtin_ia32_addsubps256(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps256(x, y, z)
+#  endif
+# elif FLOAT_SIZE == 8
+#  define addsub(x, y) __builtin_ia32_addsubpd256(x, y)
+#  if defined(__FMA4__)
+#   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd256(x, y, z)
+#  endif
+# endif
+#endif
+
+int fma_test(void)
+{
+    unsigned int i;
+    vec_t x, y, z, src, inv, one;
+
+    for ( i = 0; i < ELEM_COUNT; ++i )
+    {
+        src[i] = i + 1;
+        inv[i] = ELEM_COUNT - i;
+        one[i] = 1;
+    }
+
+    x = (src + one) * inv;
+    y = (src - one) * inv;
+    touch(src);
+    z = inv * src + inv;
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(src);
+    z = -inv * src - inv;
+    if ( !to_bool(-x == z) ) return __LINE__;
+
+    touch(src);
+    z = inv * src - inv;
+    if ( !to_bool(y == z) ) return __LINE__;
+
+    touch(src);
+    z = -inv * src + inv;
+    if ( !to_bool(-y == z) ) return __LINE__;
+    touch(src);
+
+    x = src + inv;
+    y = src - inv;
+    touch(inv);
+    z = src * one + inv;
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(inv);
+    z = -src * one - inv;
+    if ( !to_bool(-x == z) ) return __LINE__;
+
+    touch(inv);
+    z = src * one - inv;
+    if ( !to_bool(y == z) ) return __LINE__;
+
+    touch(inv);
+    z = -src * one + inv;
+    if ( !to_bool(-y == z) ) return __LINE__;
+    touch(inv);
+
+#if defined(addsub) && defined(fmaddsub)
+    x = addsub(src * inv, one);
+    y = addsub(src * inv, -one);
+    touch(one);
+    z = fmaddsub(src, inv, one);
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(one);
+    z = fmaddsub(src, inv, -one);
+    if ( !to_bool(y == z) ) return __LINE__;
+    touch(one);
+
+    x = addsub(src * inv, one);
+    touch(inv);
+    z = fmaddsub(src, inv, one);
+    if ( !to_bool(x == z) ) return __LINE__;
+
+    touch(inv);
+    z = fmaddsub(src, inv, -one);
+    if ( !to_bool(y == z) ) return __LINE__;
+    touch(inv);
+#endif
+
+    return 0;
+}
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -11,6 +11,7 @@
 #include "sse2-avx.h"
 #include "sse4-avx.h"
 #include "avx.h"
+#include "fma4.h"
 
 #define verbose false /* Switch to true for far more logging. */
 
@@ -47,6 +48,11 @@ static bool simd_check_avx(void)
 #define simd_check_sse2_avx  simd_check_avx
 #define simd_check_sse4_avx  simd_check_avx
 
+static bool simd_check_fma4(void)
+{
+    return cpu_has_fma4;
+}
+
 static void simd_set_regs(struct cpu_user_regs *regs)
 {
     if ( cpu_has_mmx )
@@ -143,6 +149,12 @@ static const struct {
     SIMD(AVX scalar double,      avx,         f8),
     SIMD(AVX 128bit double,      avx,       16f8),
     SIMD(AVX 256bit double,      avx,       32f8),
+    SIMD(FMA4 scalar single,     fma4,        f4),
+    SIMD(FMA4 128bit single,     fma4,      16f4),
+    SIMD(FMA4 256bit single,     fma4,      32f4),
+    SIMD(FMA4 scalar double,     fma4,        f8),
+    SIMD(FMA4 128bit double,     fma4,      16f8),
+    SIMD(FMA4 256bit double,     fma4,      32f8),
 #undef SIMD_
 #undef SIMD
 };
--- a/tools/tests/x86_emulator/x86_emulate.h
+++ b/tools/tests/x86_emulator/x86_emulate.h
@@ -164,6 +164,16 @@ static inline uint64_t xgetbv(uint32_t x
     (res.c & (1U << 6)) != 0; \
 })
 
+#define cpu_has_fma4 ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(1, 0, &res, NULL); \
+    if ( !(res.c & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
+        res.c = 0; \
+    else \
+        emul_test_cpuid(0x80000001, 0, &res, NULL); \
+    (res.c & (1U << 16)) != 0; \
+})
+
 #define cpu_has_tbm ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(0x80000001, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -421,7 +421,16 @@ static const struct {
     [0x44] = { .simd_size = simd_packed_int },
     [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
+    [0x5c ... 0x5f] = { .simd_size = simd_packed_fp, .four_op = 1 },
     [0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
+    [0x68 ... 0x69] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x6a ... 0x6b] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+    [0x6c ... 0x6d] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x6e ... 0x6f] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+    [0x78 ... 0x79] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x7a ... 0x7b] = { .simd_size = simd_scalar_fp, .four_op = 1 },
+    [0x7c ... 0x7d] = { .simd_size = simd_packed_fp, .four_op = 1 },
+    [0x7e ... 0x7f] = { .simd_size = simd_scalar_fp, .four_op = 1 },
     [0xcc] = { .simd_size = simd_other },
     [0xdf] = { .simd_size = simd_packed_int, .two_op = 1 },
     [0xf0] = {},
@@ -1612,6 +1621,7 @@ static bool vcpu_has(
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
 #define vcpu_has_sse4a()       vcpu_has(0x80000001, ECX,  6, ctxt, ops)
 #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
+#define vcpu_has_fma4()        vcpu_has(0x80000001, ECX, 16, ctxt, ops)
 #define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
 #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
 #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
@@ -6176,6 +6186,7 @@ x86_emulate(
     simd_0f_imm8_avx:
                 host_and_vcpu_must_have(avx);
             }
+    simd_0f_imm8_ymm:
             get_fpu(X86EMUL_FPU_ymm, &fic);
         }
         else if ( vex.pfx )
@@ -7732,6 +7743,49 @@ x86_emulate(
         generate_exception_if(vex.w, EXC_UD);
         goto simd_0f_int_imm8;
 
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5c): /* vfmaddsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5d): /* vfmaddsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5e): /* vfmsubaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x5f): /* vfmsubaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x68): /* vfmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x69): /* vfmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6a): /* vfmaddss xmm,xmm/m32,xmm,xmm */
+                                           /* vfmaddss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6b): /* vfmaddsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfmaddsd xmm/m64,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6c): /* vfmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6d): /* vfmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6e): /* vfmsubss xmm,xmm/m32,xmm,xmm */
+                                           /* vfmsubss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x6f): /* vfmsubsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfmsubsd xmm/m64,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x78): /* vfnmaddps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmaddps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x79): /* vfnmaddpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7a): /* vfnmaddss xmm,xmm/m32,xmm,xmm */
+                                           /* vfnmaddss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7b): /* vfnmaddsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfnmaddsd xmm/m64,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7c): /* vfnmsubps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmsubps {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7d): /* vfnmsubpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
+                                           /* vfnmsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm,{x,y}mm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7e): /* vfnmsubss xmm,xmm/m32,xmm,xmm */
+                                           /* vfnmsubss xmm/m32,xmm,xmm,xmm */
+    case X86EMUL_OPC_VEX_66(0x0f3a, 0x7f): /* vfnmsubsd xmm,xmm/m64,xmm,xmm */
+                                           /* vfnmsubsd xmm/m64,xmm,xmm,xmm */
+        host_and_vcpu_must_have(fma4);
+        goto simd_0f_imm8_ymm;
+
     case X86EMUL_OPC_66(0x0f3a, 0x60):     /* pcmpestrm $imm8,xmm/m128,xmm */
     case X86EMUL_OPC_VEX_66(0x0f3a, 0x60): /* vpcmpestrm $imm8,xmm/m128,xmm */
     case X86EMUL_OPC_66(0x0f3a, 0x61):     /* pcmpestri $imm8,xmm/m128,xmm */
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -76,6 +76,7 @@
 #define cpu_has_svm             boot_cpu_has(X86_FEATURE_SVM)
 #define cpu_has_sse4a           boot_cpu_has(X86_FEATURE_SSE4A)
 #define cpu_has_lwp             boot_cpu_has(X86_FEATURE_LWP)
+#define cpu_has_fma4            boot_cpu_has(X86_FEATURE_FMA4)
 #define cpu_has_tbm             boot_cpu_has(X86_FEATURE_TBM)
 
 /* CPUID level 0x0000000D:1.eax */



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel