All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] x86emul/test: encourage compiler to use more embedded broadcast
@ 2022-06-09 15:35 Jan Beulich
  2022-06-09 15:54 ` Andrew Cooper
  0 siblings, 1 reply; 2+ messages in thread
From: Jan Beulich @ 2022-06-09 15:35 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Wei Liu, Roger Pau Monné

For one it was an oversight to leave dup_{hi,lo}() undefined for 512-bit
vector size. And then in FMA testing we can also arrange for the
compiler to (hopefully) recognize broadcasting potential. Plus we can
replace the broadcast(1) use in the addsub() surrogate with inline
assembly explicitly using embedded broadcast (even gcc12 still doesn't
support broadcast for any of the addsub/subadd builtins).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Also alter addsub() surrogate.

--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -912,6 +912,13 @@ static inline vec_t movlhps(vec_t x, vec
 })
 #  endif
 # endif
+#elif VEC_SIZE == 64
+# if FLOAT_SIZE == 4
+#  define dup_hi(x) B(movshdup, _mask, x, undef(), ~0)
+#  define dup_lo(x) B(movsldup, _mask, x, undef(), ~0)
+# elif FLOAT_SIZE == 8
+#  define dup_lo(x) B(movddup, _mask, x, undef(), ~0)
+# endif
 #endif
 #if VEC_SIZE == 16 && defined(__SSSE3__) && !defined(__AVX512VL__)
 # if INT_SIZE == 1
--- a/tools/tests/x86_emulator/simd.h
+++ b/tools/tests/x86_emulator/simd.h
@@ -49,8 +49,10 @@ float
 # define ELEM_SIZE FLOAT_SIZE
 # if FLOAT_SIZE == 4
 #  define MODE SF
+#  define ELEM_SFX "s"
 # elif FLOAT_SIZE == 8
 #  define MODE DF
+#  define ELEM_SFX "d"
 # endif
 #endif
 #ifndef VEC_SIZE
--- a/tools/tests/x86_emulator/simd-fma.c
+++ b/tools/tests/x86_emulator/simd-fma.c
@@ -56,13 +56,27 @@ ENTRY(fma_test);
 #endif
 
 #if defined(fmaddsub) && !defined(addsub)
-# define addsub(x, y) fmaddsub(x, broadcast(1), y)
+# ifdef __AVX512F__
+#  define addsub(x, y) ({ \
+    vec_t t_; \
+    typeof(t_[0]) one_ = 1; \
+    asm ( "vfmaddsub231p" ELEM_SFX " %2%{1to%c4%}, %1, %0" \
+          : "=v" (t_) \
+          : "v" (x), "m" (one_), "0" (y), "i" (ELEM_COUNT) ); \
+    t_; \
+})
+# else
+#  define addsub(x, y) fmaddsub(x, broadcast(1), y)
+# endif
 #endif
 
 int fma_test(void)
 {
     unsigned int i;
     vec_t x, y, z, src, inv, one;
+#ifdef __AVX512F__
+    typeof(one[0]) one_ = 1;
+#endif
 
     for ( i = 0; i < ELEM_COUNT; ++i )
     {
@@ -71,6 +85,10 @@ int fma_test(void)
         one[i] = 1;
     }
 
+#ifdef __AVX512F__
+# define one one_
+#endif
+
     x = (src + one) * inv;
     y = (src - one) * inv;
     touch(src);
@@ -93,22 +111,28 @@ int fma_test(void)
     x = src + inv;
     y = src - inv;
     touch(inv);
+    touch(one);
     z = src * one + inv;
     if ( !eq(x, z) ) return __LINE__;
 
     touch(inv);
+    touch(one);
     z = -src * one - inv;
     if ( !eq(-x, z) ) return __LINE__;
 
     touch(inv);
+    touch(one);
     z = src * one - inv;
     if ( !eq(y, z) ) return __LINE__;
 
     touch(inv);
+    touch(one);
     z = -src * one + inv;
     if ( !eq(-y, z) ) return __LINE__;
     touch(inv);
 
+#undef one
+
 #if defined(addsub) && defined(fmaddsub)
     x = addsub(src * inv, one);
     y = addsub(src * inv, -one);


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] x86emul/test: encourage compiler to use more embedded broadcast
  2022-06-09 15:35 [PATCH v2] x86emul/test: encourage compiler to use more embedded broadcast Jan Beulich
@ 2022-06-09 15:54 ` Andrew Cooper
  0 siblings, 0 replies; 2+ messages in thread
From: Andrew Cooper @ 2022-06-09 15:54 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Wei Liu, Roger Pau Monne

On 09/06/2022 16:35, Jan Beulich wrote:
> For one it was an oversight to leave dup_{hi,lo}() undefined for 512-bit
> vector size. And then in FMA testing we can also arrange for the
> compiler to (hopefully) recognize broadcasting potential. Plus we can
> replace the broadcast(1) use in the addsub() surrogate with inline
> assembly explicitly using embedded broadcast (even gcc12 still doesn't
> support broadcast for any of the addsub/subadd builtins).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-06-09 15:55 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-09 15:35 [PATCH v2] x86emul/test: encourage compiler to use more embedded broadcast Jan Beulich
2022-06-09 15:54 ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.