All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: xen-devel <xen-devel@lists.xenproject.org>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>
Subject: [PATCH v3 10/25] x86emul: support 3DNow! insns
Date: Thu, 07 Dec 2017 07:05:40 -0700	[thread overview]
Message-ID: <5A2958C402000078001958F7@prv-mh.provo.novell.com> (raw)
In-Reply-To: <5A29550C020000780019585B@prv-mh.provo.novell.com>

Yes, recent AMD CPUs don't support them anymore, but I think we should
nevertheless cope.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v3: Re-base.

--- a/.gitignore
+++ b/.gitignore
@@ -223,6 +223,7 @@
 tools/security/xensec_tool
 tools/tests/x86_emulator/*.bin
 tools/tests/x86_emulator/*.tmp
+tools/tests/x86_emulator/3dnow*.[ch]
 tools/tests/x86_emulator/asm
 tools/tests/x86_emulator/avx*.[ch]
 tools/tests/x86_emulator/blowfish.h
--- a/tools/tests/x86_emulator/Makefile
+++ b/tools/tests/x86_emulator/Makefile
@@ -11,7 +11,7 @@ all: $(TARGET)
 run: $(TARGET)
 	./$(TARGET)
 
-SIMD := sse sse2 sse4 avx avx2 xop
+SIMD := 3dnow sse sse2 sse4 avx avx2 xop
 FMA := fma4 fma
 SG := avx2-sg
 TESTCASES := blowfish $(SIMD) $(FMA) $(SG)
@@ -19,6 +19,9 @@ TESTCASES := blowfish $(SIMD) $(FMA) $(S
 blowfish-cflags := ""
 blowfish-cflags-x86_32 := "-mno-accumulate-outgoing-args -Dstatic="
 
+3dnow-vecs := 8
+3dnow-ints :=
+3dnow-flts := 4
 sse-vecs := 16
 sse-ints :=
 sse-flts := 4
@@ -49,8 +52,13 @@ xop-ints := 1 2 4 8
 xop-flts := $(avx-flts)
 
 # For AVX and later, have the compiler avoid XMM0 to widen coverage of
-# the VEX.vvvv checks in the emulator.
-non-sse = $(if $(filter sse%,$(1)),,-ffixed-xmm0)
+# the VEX.vvvv checks in the emulator.  For 3DNow!, however, force SSE
+# use for floating point operations, to avoid mixing MMX and FPU register
+# uses.  Also enable 3DNow! extensions, but note that we can't use 3dnowa
+# as the test flavor right away since -m3dnowa is being understood only
+# by gcc 7.x and newer (older ones want a specific machine model instead).
+3dnowa := $(call cc-option,$(CC),-m3dnowa,-march=k8)
+non-sse = $(if $(filter sse%,$(1)),,$(if $(filter 3dnow%,$(1)),-msse -mfpmath=sse $(3dnowa),-ffixed-xmm0))
 
 define simd-defs
 $(1)-cflags := \
@@ -81,8 +89,9 @@ $(addsuffix .h,$(TESTCASES)): %.h: %.c t
 	$(foreach arch,$(filter-out $(XEN_COMPILE_ARCH),x86_32) $(XEN_COMPILE_ARCH), \
 	    for cflags in $($*-cflags) $($*-cflags-$(arch)); do \
 		$(MAKE) -f testcase.mk TESTCASE=$* XEN_TARGET_ARCH=$(arch) $*-cflags="$$cflags" all; \
+		prefix=$(shell echo $(subst -,_,$*) | sed -e 's,^\([0-9]\),_\1,'); \
 		flavor=$$(echo $${cflags} | sed -e 's, .*,,' -e 'y,-=,__,') ; \
-		(echo "static const unsigned int $(subst -,_,$*)_$(arch)$${flavor}[] = {"; \
+		(echo "static const unsigned int $${prefix}_$(arch)$${flavor}[] = {"; \
 		 od -v -t x $*.bin | sed -e 's/^[0-9]* /0x/' -e 's/ /, 0x/g' -e 's/$$/,/'; \
 		 echo "};") >>$@.new; \
 		rm -f $*.bin; \
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -48,6 +48,8 @@ static inline bool _to_bool(byte_vec_t b
 
 #if VEC_SIZE == FLOAT_SIZE
 # define to_int(x) ((vec_t){ (int)(x)[0] })
+#elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
+# define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
 #elif VEC_SIZE == 16 && defined(__SSE2__)
 # if FLOAT_SIZE == 4
 #  define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x))
@@ -70,7 +72,24 @@ static inline bool _to_bool(byte_vec_t b
 })
 #endif
 
-#if FLOAT_SIZE == 4 && defined(__SSE__)
+#if VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW_A__)
+# define max __builtin_ia32_pfmax
+# define min __builtin_ia32_pfmin
+# define recip(x) ({ \
+    vec_t t_ = __builtin_ia32_pfrcp(x); \
+    touch(x); \
+    t_[1] = __builtin_ia32_pfrcp(__builtin_ia32_pswapdsf(x))[0]; \
+    touch(x); \
+    __builtin_ia32_pfrcpit2(__builtin_ia32_pfrcpit1(t_, x), t_); \
+})
+# define rsqrt(x) ({ \
+    vec_t t_ = __builtin_ia32_pfrsqrt(x); \
+    touch(x); \
+    t_[1] = __builtin_ia32_pfrsqrt(__builtin_ia32_pswapdsf(x))[0]; \
+    touch(x); \
+    __builtin_ia32_pfrcpit2(__builtin_ia32_pfrsqit1(__builtin_ia32_pfmul(t_, t_), x), t_); \
+})
+#elif FLOAT_SIZE == 4 && defined(__SSE__)
 # if VEC_SIZE == 32 && defined(__AVX__)
 #  if defined(__AVX2__)
 #   define broadcast(x) \
--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -5,6 +5,7 @@
 
 #include "x86-emulate.h"
 #include "blowfish.h"
+#include "3dnow.h"
 #include "sse.h"
 #include "sse2.h"
 #include "sse4.h"
@@ -28,6 +29,11 @@ static bool blowfish_check_regs(const st
     return regs->eax == 2 && regs->edx == 1;
 }
 
+static bool simd_check__3dnow(void)
+{
+    return cpu_has_3dnow_ext && cpu_has_sse;
+}
+
 static bool simd_check_sse(void)
 {
     return cpu_has_sse;
@@ -117,6 +123,7 @@ static const struct {
 #else
 # define SIMD(desc, feat, form) SIMD_(32, desc, feat, form)
 #endif
+    SIMD(3DNow! single,          _3dnow,     8f4),
     SIMD(SSE scalar single,      sse,         f4),
     SIMD(SSE packed single,      sse,       16f4),
     SIMD(SSE2 scalar single,     sse2,        f4),
--- a/tools/tests/x86_emulator/x86-emulate.h
+++ b/tools/tests/x86_emulator/x86-emulate.h
@@ -171,6 +171,12 @@ static inline uint64_t xgetbv(uint32_t x
     (res.b & (1U << 8)) != 0; \
 })
 
+#define cpu_has_3dnow_ext ({ \
+    struct cpuid_leaf res; \
+    emul_test_cpuid(0x80000001, 0, &res, NULL); \
+    (res.d & (1U << 30)) != 0; \
+})
+
 #define cpu_has_sse4a ({ \
     struct cpuid_leaf res; \
     emul_test_cpuid(0x80000001, 0, &res, NULL); \
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -355,6 +355,36 @@ static const struct {
     [0xff] = { ModRM }
 };
 
+static const uint16_t _3dnow_table[16] = {
+    [0x0] = (1 << 0xd) /* pi2fd */,
+    [0x1] = (1 << 0xd) /* pf2id */,
+    [0x9] = (1 << 0x0) /* pfcmpge */ |
+            (1 << 0x4) /* pfmin */ |
+            (1 << 0x6) /* pfrcp */ |
+            (1 << 0x7) /* pfrsqrt */ |
+            (1 << 0xa) /* pfsub */ |
+            (1 << 0xe) /* pfadd */,
+    [0xa] = (1 << 0x0) /* pfcmpge */ |
+            (1 << 0x4) /* pfmax */ |
+            (1 << 0x6) /* pfrcpit1 */ |
+            (1 << 0x7) /* pfrsqit1 */ |
+            (1 << 0xa) /* pfsubr */ |
+            (1 << 0xe) /* pfacc */,
+    [0xb] = (1 << 0x0) /* pfcmpeq */ |
+            (1 << 0x4) /* pfmul */ |
+            (1 << 0x6) /* pfrcpit2 */ |
+            (1 << 0x7) /* pmulhrw */ |
+            (1 << 0xf) /* pavgusb */,
+};
+
+static const uint16_t _3dnow_ext_table[16] = {
+    [0x1] = (1 << 0xd) /* pi2fw */,
+    [0x1] = (1 << 0xc) /* pf2iw */,
+    [0x8] = (1 << 0xa) /* pfnacc */ |
+            (1 << 0xa) /* pfpnacc */,
+    [0xb] = (1 << 0xb) /* pfswapd */,
+};
+
 /*
  * "two_op" and "four_op" below refer to the number of register operands
  * (one of which possibly also allowing to be a memory one). The named
@@ -1671,6 +1701,8 @@ static bool vcpu_has(
 #define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)
 #define vcpu_has_mmxext()     (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
                                vcpu_has_sse())
+#define vcpu_has_3dnow_ext()   vcpu_has(0x80000001, EDX, 30, ctxt, ops)
+#define vcpu_has_3dnow()       vcpu_has(0x80000001, EDX, 31, ctxt, ops)
 #define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)
 #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
 #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
@@ -5505,6 +5537,26 @@ x86_emulate(
     case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */
         break;
 
+    case X86EMUL_OPC(0x0f, 0x0e): /* femms */
+        host_and_vcpu_must_have(3dnow);
+        asm volatile ( "femms" );
+        break;
+
+    case X86EMUL_OPC(0x0f, 0x0f): /* 3DNow! */
+        if ( _3dnow_ext_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
+            host_and_vcpu_must_have(3dnow_ext);
+        else if ( _3dnow_table[(imm1 >> 4) & 0xf] & (1 << (imm1 & 0xf)) )
+            host_and_vcpu_must_have(3dnow);
+        else
+            generate_exception(EXC_UD);
+
+        get_fpu(X86EMUL_FPU_mmx, &fic);
+
+        d = DstReg | SrcMem;
+        op_bytes = 8;
+        state->simd_size = simd_other;
+        goto simd_0f_imm8;
+
 #define CASE_SIMD_PACKED_INT(pfx, opc)       \
     case X86EMUL_OPC(pfx, opc):              \
     case X86EMUL_OPC_66(pfx, opc)
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -71,6 +71,8 @@
                                  && boot_cpu_has(X86_FEATURE_FFXSR))
 #define cpu_has_page1gb         boot_cpu_has(X86_FEATURE_PAGE1GB)
 #define cpu_has_rdtscp          boot_cpu_has(X86_FEATURE_RDTSCP)
+#define cpu_has_3dnow_ext       boot_cpu_has(X86_FEATURE_3DNOWEXT)
+#define cpu_has_3dnow           boot_cpu_has(X86_FEATURE_3DNOW)
 
 /* CPUID level 0x80000001.ecx */
 #define cpu_has_cmp_legacy      boot_cpu_has(X86_FEATURE_CMP_LEGACY)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2017-12-07 14:05 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-07 13:49 [PATCH v3 00/25] x86: emulator enhancements Jan Beulich
2017-12-07 13:58 ` [PATCH v3 01/25] x86emul: make decode_register() return unsigned long * Jan Beulich
2017-12-07 18:32   ` Andrew Cooper
2017-12-08  7:44     ` Jan Beulich
2017-12-07 13:59 ` [PATCH v3 02/25] x86emul: build SIMD tests with -Os Jan Beulich
2017-12-07 18:32   ` Andrew Cooper
2017-12-07 14:00 ` [PATCH v3 03/25] x86emul: support F16C insns Jan Beulich
2018-01-31 18:58   ` Andrew Cooper
2017-12-07 14:01 ` [PATCH v3 04/25] x86emul: support FMA4 insns Jan Beulich
2018-01-31 19:51   ` Andrew Cooper
2017-12-07 14:02 ` [PATCH v3 05/25] x86emul: support FMA insns Jan Beulich
2018-02-01 16:15   ` Andrew Cooper
2017-12-07 14:03 ` [PATCH v3 06/25] x86emul: support most remaining AVX2 insns Jan Beulich
2018-02-01 19:45   ` Andrew Cooper
2018-02-02  9:29     ` Jan Beulich
2017-12-07 14:03 ` [PATCH v3 07/25] x86emul: support AVX2 gather insns Jan Beulich
2018-02-01 20:53   ` Andrew Cooper
2018-02-02  9:44     ` Jan Beulich
2017-12-07 14:04 ` [PATCH v3 08/25] x86emul: add tables for XOP 08 and 09 extension spaces Jan Beulich
2018-02-02 11:43   ` Andrew Cooper
2018-02-02 15:15     ` Jan Beulich
2018-02-02 16:02       ` Andrew Cooper
2017-12-07 14:04 ` [PATCH v3 09/25] x86emul: support XOP insns Jan Beulich
2018-02-02 12:03   ` Andrew Cooper
2018-02-02 15:17     ` Jan Beulich
2018-02-05 13:01       ` Andrew Cooper
2017-12-07 14:05 ` Jan Beulich [this message]
2018-02-02 13:02   ` [PATCH v3 10/25] x86emul: support 3DNow! insns Andrew Cooper
2018-02-02 15:22     ` Jan Beulich
2018-02-02 16:04       ` Andrew Cooper
2017-12-07 14:06 ` [PATCH v3 11/25] x86emul: place test blobs in executable section Jan Beulich
2018-02-02 13:03   ` Andrew Cooper
2018-02-02 15:27     ` Jan Beulich
2018-02-05 13:11       ` Andrew Cooper
2018-02-05 13:55         ` Jan Beulich
2017-12-07 14:07 ` [PATCH v3 12/25] x86emul: abstract out XCRn accesses Jan Beulich
2018-02-02 13:29   ` Andrew Cooper
2018-02-02 17:05     ` Jan Beulich
2017-12-07 14:08 ` [PATCH v3 13/25] x86emul: adjust_bnd() should check XCR0 Jan Beulich
2018-02-02 13:30   ` Andrew Cooper
2018-02-02 16:19     ` Jan Beulich
2018-02-02 16:28       ` Andrew Cooper
2017-12-07 14:09 ` [PATCH v3 14/25] x86emul: make all FPU emulation use the stub Jan Beulich
2018-02-02 13:37   ` Andrew Cooper
2017-12-07 14:10 ` [PATCH v3 15/25] x86/HVM: eliminate custom #MF/#XM handling Jan Beulich
2018-02-02 13:38   ` Andrew Cooper
2017-12-07 14:11 ` [PATCH v3 16/25] x86emul: support SWAPGS Jan Beulich
2018-02-02 13:41   ` Andrew Cooper
2018-02-02 16:24     ` Jan Beulich
2017-12-07 14:11 ` [PATCH v3 17/25] x86emul: emulate {MONITOR, MWAIT}{, X} as no-op Jan Beulich
2018-02-02 14:05   ` Andrew Cooper
2017-12-07 14:12 ` [PATCH v3 18/25] x86emul: add missing suffixes in test harness Jan Beulich
2018-02-02 14:13   ` Andrew Cooper
2017-12-07 14:14 ` [PATCH v3 19/25] x86emul: tell cmpxchg hook whether LOCK is in effect Jan Beulich
2017-12-08 10:58   ` Paul Durrant
2018-02-02 14:13   ` Andrew Cooper
2017-12-07 14:15 ` [PATCH v3 20/25] x86emul: correctly handle CMPXCHG* comparison failures Jan Beulich
2018-02-02 14:49   ` Andrew Cooper
2018-02-05  8:07     ` Jan Beulich
2018-02-05 13:38       ` Andrew Cooper
2017-12-07 14:16 ` [PATCH v3 21/25] x86emul: add read-modify-write hook Jan Beulich
2018-02-02 16:13   ` Andrew Cooper
2018-02-05  8:22     ` Jan Beulich
2018-02-05 14:21       ` Andrew Cooper
2018-02-05 14:56         ` Jan Beulich
2017-12-07 14:16 ` [PATCH v3 22/25] x86/HVM: do actual CMPXCHG in hvmemul_cmpxchg() Jan Beulich
2017-12-07 14:38   ` Razvan Cojocaru
2017-12-08 10:38   ` Paul Durrant
2018-02-02 16:36   ` Andrew Cooper
2018-02-05  8:32     ` Jan Beulich
2018-02-05 16:09       ` Andrew Cooper
2018-02-05 16:49         ` Jan Beulich
2018-02-05 16:57           ` Andrew Cooper
2018-02-05 17:05             ` Jan Beulich
2017-12-07 14:17 ` [PATCH v3 23/25] x86/HVM: make use of new read-modify-write emulator hook Jan Beulich
2017-12-08 10:41   ` Paul Durrant
2018-02-02 16:37   ` Andrew Cooper
2018-02-05  8:34     ` Jan Beulich
2018-02-05 16:15       ` Andrew Cooper
2017-12-07 14:18 ` [PATCH v3 24/25] x86/shadow: fully move unmap-dest into common code Jan Beulich
2018-02-02 16:46   ` Andrew Cooper
2017-12-07 14:19 ` [PATCH v3 25/25] x86/shadow: fold sh_x86_emulate_{write, cmpxchg}() into their only callers Jan Beulich
2018-02-02 16:52   ` Andrew Cooper
2018-02-05  8:42     ` Jan Beulich
2018-02-05 12:16       ` Tim Deegan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5A2958C402000078001958F7@prv-mh.provo.novell.com \
    --to=jbeulich@suse.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.