qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] Instruction set detection for clang.
@ 2020-07-23  0:27 Shu-Chun Weng
  2020-07-23  0:27 ` [PATCH 1/2] configure: avx2 and avx512f " Shu-Chun Weng
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Shu-Chun Weng @ 2020-07-23  0:27 UTC (permalink / raw)
  To: rth; +Cc: Shu-Chun Weng, qemu-devel

Currently when configuring QEMU with clang, AVX2, AVX512F, ATOMIC64, and
ATOMIC128 are all disabled because the detection code is GCC-only. With these
two patches, I am able to configure, build, and run tests with clang with all of
the above enabled.

Shu-Chun Weng (2):
  configure: avx2 and avx512f detection for clang
  configure: atomic64/128 detection for clang

 configure           | 34 +++++++++++++++++++++++-----------
 util/bufferiszero.c | 33 +++++++++++++++++++++++----------
 2 files changed, 46 insertions(+), 21 deletions(-)

-- 
2.28.0.rc0.105.gf9edc3c819-goog



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] configure: avx2 and avx512f detection for clang
  2020-07-23  0:27 [PATCH 0/2] Instruction set detection for clang Shu-Chun Weng
@ 2020-07-23  0:27 ` Shu-Chun Weng
  2020-07-23  4:54   ` Thomas Huth
  2020-07-23  0:27 ` [PATCH 2/2] configure: atomic64/128 " Shu-Chun Weng
  2020-08-05 23:23 ` [PATCH 0/2] Instruction set " Shu-Chun Weng
  2 siblings, 1 reply; 8+ messages in thread
From: Shu-Chun Weng @ 2020-07-23  0:27 UTC (permalink / raw)
  To: rth; +Cc: Shu-Chun Weng, qemu-devel

Since clang does not support "#pragma GCC", the instruction sets are
always disabled. In this change, we

 1. wrap "#pragma GCC" inside "#ifndef __clang__",
 2. only retain them around "#include <{e,i,s}mmintrin.h>" to work
    around gcc bug,
 3. and annotate each function with `__attribute__((target(*)))` which
    is recognized by both gcc and clang.

Signed-off-by: Shu-Chun Weng <scw@google.com>
---
 configure           | 16 ++++++++++++++--
 util/bufferiszero.c | 33 +++++++++++++++++++++++----------
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/configure b/configure
index 4bd80ed507..d9ce3aa5db 100755
--- a/configure
+++ b/configure
@@ -5808,10 +5808,16 @@ fi
 
 if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
   cat > $TMPC << EOF
+#include <cpuid.h>
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("avx2")
-#include <cpuid.h>
+#endif
 #include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+__attribute__((target("avx2")))
 static int bar(void *a) {
     __m256i x = *(__m256i *)a;
     return _mm256_testz_si256(x, x);
@@ -5835,10 +5841,16 @@ fi
 
 if test "$cpuid_h" = "yes" && test "$avx512f_opt" = "yes"; then
   cat > $TMPC << EOF
+#include <cpuid.h>
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("avx512f")
-#include <cpuid.h>
+#endif
 #include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+__attribute__((target("avx512f")))
 static int bar(void *a) {
     __m512i x = *(__m512i *)a;
     return _mm512_test_epi64_mask(x, x);
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 695bb4ce28..ca836b6e8c 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -64,17 +64,18 @@ buffer_zero_int(const void *buf, size_t len)
 }
 
 #if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
-/* Do not use push_options pragmas unnecessarily, because clang
- * does not support them.
- */
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("sse2")
 #endif
 #include <emmintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
 
 /* Note that each of these vectorized functions require len >= 64.  */
 
+__attribute__((target("sse2")))
 static bool
 buffer_zero_sse2(const void *buf, size_t len)
 {
@@ -104,19 +105,22 @@ buffer_zero_sse2(const void *buf, size_t len)
 
     return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
 }
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-#pragma GCC pop_options
-#endif
 
 #ifdef CONFIG_AVX2_OPT
 /* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
  * the includes have to be within the corresponding push_options region, and
  * therefore the regions themselves have to be ordered with increasing ISA.
  */
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("sse4")
+#endif
 #include <smmintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
 
+__attribute__((target("sse4")))
 static bool
 buffer_zero_sse4(const void *buf, size_t len)
 {
@@ -145,11 +149,16 @@ buffer_zero_sse4(const void *buf, size_t len)
     return _mm_testz_si128(t, t);
 }
 
-#pragma GCC pop_options
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("avx2")
+#endif
 #include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
 
+__attribute__((target("avx2")))
 static bool
 buffer_zero_avx2(const void *buf, size_t len)
 {
@@ -176,14 +185,19 @@ buffer_zero_avx2(const void *buf, size_t len)
 
     return _mm256_testz_si256(t, t);
 }
-#pragma GCC pop_options
 #endif /* CONFIG_AVX2_OPT */
 
 #ifdef CONFIG_AVX512F_OPT
+#ifndef __clang__
 #pragma GCC push_options
 #pragma GCC target("avx512f")
+#endif
 #include <immintrin.h>
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
 
+__attribute__((target("avx512f")))
 static bool
 buffer_zero_avx512(const void *buf, size_t len)
 {
@@ -210,7 +224,6 @@ buffer_zero_avx512(const void *buf, size_t len)
     return !_mm512_test_epi64_mask(t, t);
 
 }
-#pragma GCC pop_options
 #endif
 
 
-- 
2.28.0.rc0.105.gf9edc3c819-goog



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] configure: atomic64/128 detection for clang
  2020-07-23  0:27 [PATCH 0/2] Instruction set detection for clang Shu-Chun Weng
  2020-07-23  0:27 ` [PATCH 1/2] configure: avx2 and avx512f " Shu-Chun Weng
@ 2020-07-23  0:27 ` Shu-Chun Weng
  2020-08-05 23:23 ` [PATCH 0/2] Instruction set " Shu-Chun Weng
  2 siblings, 0 replies; 8+ messages in thread
From: Shu-Chun Weng @ 2020-07-23  0:27 UTC (permalink / raw)
  To: rth; +Cc: Shu-Chun Weng, qemu-devel

The public interface for __atomic_* and __sync_* do not contain the
explicit *_{number} versions:
  https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
  https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html

They appear to be GCC's internal symbols which happen to work. However,
clang does not recognize them. Replace the existing usages with the `_n`
versions (or no suffix) which are the documented API.

Signed-off-by: Shu-Chun Weng <scw@google.com>
---
 configure | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/configure b/configure
index d9ce3aa5db..0613a049e9 100755
--- a/configure
+++ b/configure
@@ -5894,9 +5894,9 @@ if test "$int128" = "yes"; then
 int main(void)
 {
   unsigned __int128 x = 0, y = 0;
-  y = __atomic_load_16(&x, 0);
-  __atomic_store_16(&x, y, 0);
-  __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
+  y = __atomic_load_n(&x, 0);
+  __atomic_store_n(&x, y, 0);
+  __atomic_compare_exchange_n(&x, &y, x, 0, 0, 0);
   return 0;
 }
 EOF
@@ -5911,7 +5911,7 @@ if test "$int128" = yes && test "$atomic128" = no; then
 int main(void)
 {
   unsigned __int128 x = 0, y = 0;
-  __sync_val_compare_and_swap_16(&x, y, x);
+  __sync_val_compare_and_swap(&x, y, x);
   return 0;
 }
 EOF
@@ -5931,11 +5931,11 @@ int main(void)
 {
   uint64_t x = 0, y = 0;
 #ifdef __ATOMIC_RELAXED
-  y = __atomic_load_8(&x, 0);
-  __atomic_store_8(&x, y, 0);
-  __atomic_compare_exchange_8(&x, &y, x, 0, 0, 0);
-  __atomic_exchange_8(&x, y, 0);
-  __atomic_fetch_add_8(&x, y, 0);
+  y = __atomic_load_n(&x, 0);
+  __atomic_store_n(&x, y, 0);
+  __atomic_compare_exchange_n(&x, &y, x, 0, 0, 0);
+  __atomic_exchange_n(&x, y, 0);
+  __atomic_fetch_add(&x, y, 0);
 #else
   typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1];
   __sync_lock_test_and_set(&x, y);
-- 
2.28.0.rc0.105.gf9edc3c819-goog



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] configure: avx2 and avx512f detection for clang
  2020-07-23  0:27 ` [PATCH 1/2] configure: avx2 and avx512f " Shu-Chun Weng
@ 2020-07-23  4:54   ` Thomas Huth
  2020-07-23  6:04     ` Shu-Chun Weng
  0 siblings, 1 reply; 8+ messages in thread
From: Thomas Huth @ 2020-07-23  4:54 UTC (permalink / raw)
  To: Shu-Chun Weng, rth; +Cc: qemu-devel

On 23/07/2020 02.27, Shu-Chun Weng wrote:
> Since clang does not support "#pragma GCC", the instruction sets are
> always disabled. In this change, we
> 
>  1. wrap "#pragma GCC" inside "#ifndef __clang__",
>  2. only retain them around "#include <{e,i,s}mmintrin.h>" to work
>     around gcc bug,
>  3. and annotate each function with `__attribute__((target(*)))` which
>     is recognized by both gcc and clang.
> 
> Signed-off-by: Shu-Chun Weng <scw@google.com>
> ---
>  configure           | 16 ++++++++++++++--
>  util/bufferiszero.c | 33 +++++++++++++++++++++++----------
>  2 files changed, 37 insertions(+), 12 deletions(-)
> 
> diff --git a/configure b/configure
> index 4bd80ed507..d9ce3aa5db 100755
> --- a/configure
> +++ b/configure
> @@ -5808,10 +5808,16 @@ fi
>  
>  if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
>    cat > $TMPC << EOF
> +#include <cpuid.h>
> +#ifndef __clang__
>  #pragma GCC push_options
>  #pragma GCC target("avx2")
> -#include <cpuid.h>
> +#endif
>  #include <immintrin.h>
> +#ifndef __clang__
> +#pragma GCC pop_options
> +#endif
> +__attribute__((target("avx2")))
>  static int bar(void *a) {
>      __m256i x = *(__m256i *)a;
>      return _mm256_testz_si256(x, x);

I wonder whether it would make more sense to pass "-mavx2" to the
compile_object call afterwards and simply remove the #pragmas here?
Did you try that already?

 Thomas



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] configure: avx2 and avx512f detection for clang
  2020-07-23  4:54   ` Thomas Huth
@ 2020-07-23  6:04     ` Shu-Chun Weng
  2020-07-23  6:24       ` Thomas Huth
  0 siblings, 1 reply; 8+ messages in thread
From: Shu-Chun Weng @ 2020-07-23  6:04 UTC (permalink / raw)
  To: Thomas Huth; +Cc: rth, qemu-devel


[-- Attachment #1.1: Type: text/plain, Size: 1801 bytes --]

Do we have the flexibility to do that for util/bufferiszero.c as well?
Otherwise, we are using different mechanisms to detect (compile test.c with
-mavx2) and actually use (GCC pragma & __attribute__((target(*))))) the
feature in production.

Shu-Chun

On Wed, Jul 22, 2020 at 9:55 PM Thomas Huth <thuth@redhat.com> wrote:

> On 23/07/2020 02.27, Shu-Chun Weng wrote:
> > Since clang does not support "#pragma GCC", the instruction sets are
> > always disabled. In this change, we
> >
> >  1. wrap "#pragma GCC" inside "#ifndef __clang__",
> >  2. only retain them around "#include <{e,i,s}mmintrin.h>" to work
> >     around gcc bug,
> >  3. and annotate each function with `__attribute__((target(*)))` which
> >     is recognized by both gcc and clang.
> >
> > Signed-off-by: Shu-Chun Weng <scw@google.com>
> > ---
> >  configure           | 16 ++++++++++++++--
> >  util/bufferiszero.c | 33 +++++++++++++++++++++++----------
> >  2 files changed, 37 insertions(+), 12 deletions(-)
> >
> > diff --git a/configure b/configure
> > index 4bd80ed507..d9ce3aa5db 100755
> > --- a/configure
> > +++ b/configure
> > @@ -5808,10 +5808,16 @@ fi
> >
> >  if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
> >    cat > $TMPC << EOF
> > +#include <cpuid.h>
> > +#ifndef __clang__
> >  #pragma GCC push_options
> >  #pragma GCC target("avx2")
> > -#include <cpuid.h>
> > +#endif
> >  #include <immintrin.h>
> > +#ifndef __clang__
> > +#pragma GCC pop_options
> > +#endif
> > +__attribute__((target("avx2")))
> >  static int bar(void *a) {
> >      __m256i x = *(__m256i *)a;
> >      return _mm256_testz_si256(x, x);
>
> I wonder whether it would make more sense to pass "-mavx2" to the
> compile_object call afterwards and simply remove the #pragmas here?
> Did you try that already?
>
>  Thomas
>
>

[-- Attachment #1.2: Type: text/html, Size: 2645 bytes --]

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 3844 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] configure: avx2 and avx512f detection for clang
  2020-07-23  6:04     ` Shu-Chun Weng
@ 2020-07-23  6:24       ` Thomas Huth
  0 siblings, 0 replies; 8+ messages in thread
From: Thomas Huth @ 2020-07-23  6:24 UTC (permalink / raw)
  To: Shu-Chun Weng; +Cc: qemu-devel, rth

On 23/07/2020 08.04, Shu-Chun Weng wrote:
> Do we have the flexibility to do that for util/bufferiszero.c as well?
> Otherwise, we are using different mechanisms to detect (compile test.c
> with -mavx2) and actually use (GCC pragma & __attribute__((target(*)))))
> the feature in production.

That's true ... so it's likely better to keep the pragmas in the
configure script, indeed!

 Thanks,
  Thomas


> Shu-Chun
> 
> On Wed, Jul 22, 2020 at 9:55 PM Thomas Huth <thuth@redhat.com
> <mailto:thuth@redhat.com>> wrote:
> 
>     On 23/07/2020 02.27, Shu-Chun Weng wrote:
>     > Since clang does not support "#pragma GCC", the instruction sets are
>     > always disabled. In this change, we
>     >
>     >  1. wrap "#pragma GCC" inside "#ifndef __clang__",
>     >  2. only retain them around "#include <{e,i,s}mmintrin.h>" to work
>     >     around gcc bug,
>     >  3. and annotate each function with `__attribute__((target(*)))` which
>     >     is recognized by both gcc and clang.
>     >
>     > Signed-off-by: Shu-Chun Weng <scw@google.com <mailto:scw@google.com>>
>     > ---
>     >  configure           | 16 ++++++++++++++--
>     >  util/bufferiszero.c | 33 +++++++++++++++++++++++----------
>     >  2 files changed, 37 insertions(+), 12 deletions(-)
>     >
>     > diff --git a/configure b/configure
>     > index 4bd80ed507..d9ce3aa5db 100755
>     > --- a/configure
>     > +++ b/configure
>     > @@ -5808,10 +5808,16 @@ fi
>     > 
>     >  if test "$cpuid_h" = "yes" && test "$avx2_opt" != "no"; then
>     >    cat > $TMPC << EOF
>     > +#include <cpuid.h>
>     > +#ifndef __clang__
>     >  #pragma GCC push_options
>     >  #pragma GCC target("avx2")
>     > -#include <cpuid.h>
>     > +#endif
>     >  #include <immintrin.h>
>     > +#ifndef __clang__
>     > +#pragma GCC pop_options
>     > +#endif
>     > +__attribute__((target("avx2")))
>     >  static int bar(void *a) {
>     >      __m256i x = *(__m256i *)a;
>     >      return _mm256_testz_si256(x, x);
> 
>     I wonder whether it would make more sense to pass "-mavx2" to the
>     compile_object call afterwards and simply remove the #pragmas here?
>     Did you try that already?
> 
>      Thomas
> 



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/2] Instruction set detection for clang.
  2020-07-23  0:27 [PATCH 0/2] Instruction set detection for clang Shu-Chun Weng
  2020-07-23  0:27 ` [PATCH 1/2] configure: avx2 and avx512f " Shu-Chun Weng
  2020-07-23  0:27 ` [PATCH 2/2] configure: atomic64/128 " Shu-Chun Weng
@ 2020-08-05 23:23 ` Shu-Chun Weng
  2020-09-17  7:25   ` Shu-Chun Weng
  2 siblings, 1 reply; 8+ messages in thread
From: Shu-Chun Weng @ 2020-08-05 23:23 UTC (permalink / raw)
  To: rth; +Cc: qemu-devel


[-- Attachment #1.1: Type: text/plain, Size: 752 bytes --]

Ping: https://patchew.org/QEMU/cover.1595463707.git.scw@google.com/

On Wed, Jul 22, 2020 at 5:27 PM Shu-Chun Weng <scw@google.com> wrote:

> Currently when configuring QEMU with clang, AVX2, AVX512F, ATOMIC64, and
> ATOMIC128 are all disabled because the detection code is GCC-only. With
> these
> two patches, I am able to configure, build, and run tests with clang with
> all of
> the above enabled.
>
> Shu-Chun Weng (2):
>   configure: avx2 and avx512f detection for clang
>   configure: atomic64/128 detection for clang
>
>  configure           | 34 +++++++++++++++++++++++-----------
>  util/bufferiszero.c | 33 +++++++++++++++++++++++----------
>  2 files changed, 46 insertions(+), 21 deletions(-)
>
> --
> 2.28.0.rc0.105.gf9edc3c819-goog
>
>

[-- Attachment #1.2: Type: text/html, Size: 1176 bytes --]

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 3844 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/2] Instruction set detection for clang.
  2020-08-05 23:23 ` [PATCH 0/2] Instruction set " Shu-Chun Weng
@ 2020-09-17  7:25   ` Shu-Chun Weng
  0 siblings, 0 replies; 8+ messages in thread
From: Shu-Chun Weng @ 2020-09-17  7:25 UTC (permalink / raw)
  To: rth; +Cc: qemu-devel


[-- Attachment #1.1: Type: text/plain, Size: 854 bytes --]

Ping.

On Wed, Aug 5, 2020 at 4:23 PM Shu-Chun Weng <scw@google.com> wrote:

> Ping: https://patchew.org/QEMU/cover.1595463707.git.scw@google.com/
>
> On Wed, Jul 22, 2020 at 5:27 PM Shu-Chun Weng <scw@google.com> wrote:
>
>> Currently when configuring QEMU with clang, AVX2, AVX512F, ATOMIC64, and
>> ATOMIC128 are all disabled because the detection code is GCC-only. With
>> these
>> two patches, I am able to configure, build, and run tests with clang with
>> all of
>> the above enabled.
>>
>> Shu-Chun Weng (2):
>>   configure: avx2 and avx512f detection for clang
>>   configure: atomic64/128 detection for clang
>>
>>  configure           | 34 +++++++++++++++++++++++-----------
>>  util/bufferiszero.c | 33 +++++++++++++++++++++++----------
>>  2 files changed, 46 insertions(+), 21 deletions(-)
>>
>> --
>> 2.28.0.rc0.105.gf9edc3c819-goog
>>
>>

[-- Attachment #1.2: Type: text/html, Size: 1544 bytes --]

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 3990 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-09-17  7:28 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-23  0:27 [PATCH 0/2] Instruction set detection for clang Shu-Chun Weng
2020-07-23  0:27 ` [PATCH 1/2] configure: avx2 and avx512f " Shu-Chun Weng
2020-07-23  4:54   ` Thomas Huth
2020-07-23  6:04     ` Shu-Chun Weng
2020-07-23  6:24       ` Thomas Huth
2020-07-23  0:27 ` [PATCH 2/2] configure: atomic64/128 " Shu-Chun Weng
2020-08-05 23:23 ` [PATCH 0/2] Instruction set " Shu-Chun Weng
2020-09-17  7:25   ` Shu-Chun Weng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).