All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] stop using mmx intrinsics
@ 2024-03-20 21:12 Tyler Retzlaff
  2024-03-20 21:12 ` [PATCH] net: " Tyler Retzlaff
  2024-03-28 16:14 ` [PATCH v2 0/2] " Tyler Retzlaff
  0 siblings, 2 replies; 12+ messages in thread
From: Tyler Retzlaff @ 2024-03-20 21:12 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, Jasvinder Singh, Konstantin Ananyev, Tyler Retzlaff

MSVC does not support older MMX intrinsics use SSE/AVX instead.

Tyler Retzlaff (1):
  net: stop using mmx intrinsics

 lib/net/net_crc_avx512.c | 28 ++++++++++------------------
 lib/net/net_crc_sse.c    | 28 ++++++++++------------------
 2 files changed, 20 insertions(+), 36 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] net: stop using mmx intrinsics
  2024-03-20 21:12 [PATCH] stop using mmx intrinsics Tyler Retzlaff
@ 2024-03-20 21:12 ` Tyler Retzlaff
  2024-03-21 17:09   ` Thomas Monjalon
  2024-03-28 16:14 ` [PATCH v2 0/2] " Tyler Retzlaff
  1 sibling, 1 reply; 12+ messages in thread
From: Tyler Retzlaff @ 2024-03-20 21:12 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, Jasvinder Singh, Konstantin Ananyev, Tyler Retzlaff

Update code to use only avx/sse intrinsics as mmx is not supported on
MSVC.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/net/net_crc_avx512.c | 28 ++++++++++------------------
 lib/net/net_crc_sse.c    | 28 ++++++++++------------------
 2 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/lib/net/net_crc_avx512.c b/lib/net/net_crc_avx512.c
index 0f0dee4..6d0c644 100644
--- a/lib/net/net_crc_avx512.c
+++ b/lib/net/net_crc_avx512.c
@@ -8,7 +8,11 @@
 
 #include "net_crc.h"
 
+#ifdef RTE_TOOLCHAIN_MSVC
+#include <intrin.h>
+#else
 #include <x86intrin.h>
+#endif
 
 /* VPCLMULQDQ CRC computation context structure */
 struct crc_vpclmulqdq_ctx {
@@ -331,13 +335,10 @@ static const alignas(16) uint32_t mask2[4] = {
 			c9, c10, c11);
 	crc32_eth.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
 			c16, c17, 0, 0);
-	crc32_eth.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
-			_mm_cvtsi64_m64(c17));
+	crc32_eth.fold_1x128b = _mm_set_epi64x(c17, c16);
 
-	crc32_eth.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
-			_mm_cvtsi64_m64(c19));
-	crc32_eth.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
-			_mm_cvtsi64_m64(c21));
+	crc32_eth.rk5_rk6 = _mm_set_epi64x(c19, c18);
+	crc32_eth.rk7_rk8 = _mm_set_epi64x(c21, c20);
 }
 
 static void
@@ -378,13 +379,10 @@ static const alignas(16) uint32_t mask2[4] = {
 			c9, c10, c11);
 	crc16_ccitt.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
 			c16, c17, 0, 0);
-	crc16_ccitt.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
-			_mm_cvtsi64_m64(c17));
+	crc16_ccitt.fold_1x128b = _mm_set_epi64x(c17, c16);
 
-	crc16_ccitt.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
-			_mm_cvtsi64_m64(c19));
-	crc16_ccitt.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
-			_mm_cvtsi64_m64(c21));
+	crc16_ccitt.rk5_rk6 = _mm_set_epi64x(c19, c18);
+	crc16_ccitt.rk7_rk8 = _mm_set_epi64x(c21, c20);
 }
 
 void
@@ -392,12 +390,6 @@ static const alignas(16) uint32_t mask2[4] = {
 {
 	crc32_load_init_constants();
 	crc16_load_init_constants();
-
-	/*
-	 * Reset the register as following calculation may
-	 * use other data types such as float, double, etc.
-	 */
-	_mm_empty();
 }
 
 uint32_t
diff --git a/lib/net/net_crc_sse.c b/lib/net/net_crc_sse.c
index d673ae3..9ab80a0 100644
--- a/lib/net/net_crc_sse.c
+++ b/lib/net/net_crc_sse.c
@@ -10,7 +10,11 @@
 
 #include "net_crc.h"
 
+#ifdef RTE_TOOLCHAIN_MSVC
+#include <intrin.h>
+#else
 #include <x86intrin.h>
+#endif
 
 /** PCLMULQDQ CRC computation context structure */
 struct crc_pclmulqdq_ctx {
@@ -272,12 +276,9 @@ static const alignas(16) uint8_t crc_xmm_shift_tab[48] = {
 	p =  0x10811LLU;
 
 	/** Save the params in context structure */
-	crc16_ccitt_pclmulqdq.rk1_rk2 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
-	crc16_ccitt_pclmulqdq.rk5_rk6 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
-	crc16_ccitt_pclmulqdq.rk7_rk8 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
+	crc16_ccitt_pclmulqdq.rk1_rk2 = _mm_set_epi64x(k2, k1);
+	crc16_ccitt_pclmulqdq.rk5_rk6 = _mm_set_epi64x(k6, k5);
+	crc16_ccitt_pclmulqdq.rk7_rk8 = _mm_set_epi64x(p, q);
 
 	/** Initialize CRC32 data */
 	k1 = 0xccaa009eLLU;
@@ -288,18 +289,9 @@ static const alignas(16) uint8_t crc_xmm_shift_tab[48] = {
 	p =  0x1db710641LLU;
 
 	/** Save the params in context structure */
-	crc32_eth_pclmulqdq.rk1_rk2 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
-	crc32_eth_pclmulqdq.rk5_rk6 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
-	crc32_eth_pclmulqdq.rk7_rk8 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
-
-	/**
-	 * Reset the register as following calculation may
-	 * use other data types such as float, double, etc.
-	 */
-	_mm_empty();
+	crc32_eth_pclmulqdq.rk1_rk2 = _mm_set_epi64x(k2, k1);
+	crc32_eth_pclmulqdq.rk5_rk6 = _mm_set_epi64x(k6, k5);
+	crc32_eth_pclmulqdq.rk7_rk8 = _mm_set_epi64x(p, q);
 }
 
 uint32_t
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: stop using mmx intrinsics
  2024-03-20 21:12 ` [PATCH] net: " Tyler Retzlaff
@ 2024-03-21 17:09   ` Thomas Monjalon
  2024-03-21 17:27     ` Tyler Retzlaff
  0 siblings, 1 reply; 12+ messages in thread
From: Thomas Monjalon @ 2024-03-21 17:09 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, Bruce Richardson, Jasvinder Singh, Konstantin Ananyev

20/03/2024 22:12, Tyler Retzlaff:
> +#ifdef RTE_TOOLCHAIN_MSVC
> +#include <intrin.h>
> +#else
>  #include <x86intrin.h>
> +#endif

It is not the same include in MSVC?
Is it something we want to wrap in a DPDK header file?



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: stop using mmx intrinsics
  2024-03-21 17:09   ` Thomas Monjalon
@ 2024-03-21 17:27     ` Tyler Retzlaff
  2024-03-21 18:01       ` Thomas Monjalon
  0 siblings, 1 reply; 12+ messages in thread
From: Tyler Retzlaff @ 2024-03-21 17:27 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Bruce Richardson, Jasvinder Singh, Konstantin Ananyev

On Thu, Mar 21, 2024 at 06:09:01PM +0100, Thomas Monjalon wrote:
> 20/03/2024 22:12, Tyler Retzlaff:
> > +#ifdef RTE_TOOLCHAIN_MSVC
> > +#include <intrin.h>
> > +#else
> >  #include <x86intrin.h>
> > +#endif
> 
> It is not the same include in MSVC?

unfortunately intrin.h is vestigial in the monolithic approach. to use
any intrinsic you're supposed to include only the one and only true
header instead of vendor/arch feature specific headers.

> Is it something we want to wrap in a DPDK header file?

do you mean create a monolithic rte_intrinsic.h header that is
essentially

#ifdef MSVC
#include <intrin.h>
#else
#include <x86intrin.h>
#include <immintrin.h>
#include <nmmintrin.h>
...
#endif

i assumed that doing something like this might be unpopular due to the
unnecessary namespace pollution.

another alternative could be to find a way to limit that pollution only
to msvc by stashing intrin.h in e.g. rte_common.h (or rte_os.h) under
conditional compile but the problem i think we had with that approach is
that some llvm headers don't define prototypes that match those from
msvc see lib/eal/windows/include/rte_windows.h another issue arises
where if the application includes intrin.h before dpdk headers we again
have to deal with llvm vs msvc differences.

fwiw the instance highlighted llvm should have volatile qualified in
their prototype but didn't.

i will commit to looking into this more after applications are working.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: stop using mmx intrinsics
  2024-03-21 17:27     ` Tyler Retzlaff
@ 2024-03-21 18:01       ` Thomas Monjalon
  2024-03-21 18:18         ` Tyler Retzlaff
  2024-03-28 16:16         ` Tyler Retzlaff
  0 siblings, 2 replies; 12+ messages in thread
From: Thomas Monjalon @ 2024-03-21 18:01 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, Bruce Richardson, Jasvinder Singh, Konstantin Ananyev,
	david.marchand

21/03/2024 18:27, Tyler Retzlaff:
> On Thu, Mar 21, 2024 at 06:09:01PM +0100, Thomas Monjalon wrote:
> > 20/03/2024 22:12, Tyler Retzlaff:
> > > +#ifdef RTE_TOOLCHAIN_MSVC
> > > +#include <intrin.h>
> > > +#else
> > >  #include <x86intrin.h>
> > > +#endif
> > 
> > It is not the same include in MSVC?
> 
> unfortunately intrin.h is vestigial in the monolithic approach. to use
> any intrinsic you're supposed to include only the one and only true
> header instead of vendor/arch feature specific headers.
> 
> > Is it something we want to wrap in a DPDK header file?
> 
> do you mean create a monolithic rte_intrinsic.h header that is
> essentially
> 
> #ifdef MSVC
> #include <intrin.h>
> #else
> #include <x86intrin.h>
> #include <immintrin.h>
> #include <nmmintrin.h>
> ...
> #endif
> 
> i assumed that doing something like this might be unpopular due to the
> unnecessary namespace pollution.

We already have such a file.
It is rte_vect.h.
I suppose we should just make sure it is included consistently
instead of x86intrin.h or immintrin.h

This command will show where changes are required:
	git grep intrin.h




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: stop using mmx intrinsics
  2024-03-21 18:01       ` Thomas Monjalon
@ 2024-03-21 18:18         ` Tyler Retzlaff
  2024-03-28 16:16         ` Tyler Retzlaff
  1 sibling, 0 replies; 12+ messages in thread
From: Tyler Retzlaff @ 2024-03-21 18:18 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Bruce Richardson, Jasvinder Singh, Konstantin Ananyev,
	david.marchand

On Thu, Mar 21, 2024 at 07:01:17PM +0100, Thomas Monjalon wrote:
> 21/03/2024 18:27, Tyler Retzlaff:
> > On Thu, Mar 21, 2024 at 06:09:01PM +0100, Thomas Monjalon wrote:
> > > 20/03/2024 22:12, Tyler Retzlaff:
> > > > +#ifdef RTE_TOOLCHAIN_MSVC
> > > > +#include <intrin.h>
> > > > +#else
> > > >  #include <x86intrin.h>
> > > > +#endif
> > > 
> > > It is not the same include in MSVC?
> > 
> > unfortunately intrin.h is vestigial in the monolithic approach. to use
> > any intrinsic you're supposed to include only the one and only true
> > header instead of vendor/arch feature specific headers.
> > 
> > > Is it something we want to wrap in a DPDK header file?
> > 
> > do you mean create a monolithic rte_intrinsic.h header that is
> > essentially
> > 
> > #ifdef MSVC
> > #include <intrin.h>
> > #else
> > #include <x86intrin.h>
> > #include <immintrin.h>
> > #include <nmmintrin.h>
> > ...
> > #endif
> > 
> > i assumed that doing something like this might be unpopular due to the
> > unnecessary namespace pollution.
> 
> We already have such a file.
> It is rte_vect.h.
> I suppose we should just make sure it is included consistently
> instead of x86intrin.h or immintrin.h
> 
> This command will show where changes are required:
> 	git grep intrin.h

there were some corner cases i can't recall, but since you identified
rte_vect.h is the preferred header let me do some experiments to see
what i can learn.  i'll either submit a series addressing it
specifically or come back with details.

thanks!

> 
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2 0/2] stop using mmx intrinsics
  2024-03-20 21:12 [PATCH] stop using mmx intrinsics Tyler Retzlaff
  2024-03-20 21:12 ` [PATCH] net: " Tyler Retzlaff
@ 2024-03-28 16:14 ` Tyler Retzlaff
  2024-03-28 16:14   ` [PATCH v2 1/2] eal: include header for MSVC SIMD intrinsics Tyler Retzlaff
  2024-03-28 16:14   ` [PATCH v2 2/2] net: stop using mmx intrinsics Tyler Retzlaff
  1 sibling, 2 replies; 12+ messages in thread
From: Tyler Retzlaff @ 2024-03-28 16:14 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, Jasvinder Singh, Konstantin Ananyev, Tyler Retzlaff

MSVC does not support older MMX intrinsics use SSE/AVX instead.

v2:
  * move conditional #include <intrin.h> into rte_vect.h and include
    rte_vect.h into net_crc_avx512.c net_crc_sse.c instead of duplicating
    conditional compile of include in each file.

Tyler Retzlaff (2):
  eal: include header for MSVC SIMD intrinsics
  net: stop using mmx intrinsics

 lib/eal/include/generic/rte_vect.h |  6 +++++-
 lib/net/net_crc_avx512.c           | 27 +++++++--------------------
 lib/net/net_crc_sse.c              | 27 +++++++--------------------
 3 files changed, 19 insertions(+), 41 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2 1/2] eal: include header for MSVC SIMD intrinsics
  2024-03-28 16:14 ` [PATCH v2 0/2] " Tyler Retzlaff
@ 2024-03-28 16:14   ` Tyler Retzlaff
  2024-03-28 17:19     ` Bruce Richardson
  2024-03-28 16:14   ` [PATCH v2 2/2] net: stop using mmx intrinsics Tyler Retzlaff
  1 sibling, 1 reply; 12+ messages in thread
From: Tyler Retzlaff @ 2024-03-28 16:14 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, Jasvinder Singh, Konstantin Ananyev, Tyler Retzlaff

MSVC documents that you use the monolithic intrin.h for all intrinsics
(including SIMD intrinsics) include intrin.h into rte_vec.h when
building with MSVC so we don't have to duplicate conditionally compile
include it across the DPDK source.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/eal/include/generic/rte_vect.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/eal/include/generic/rte_vect.h b/lib/eal/include/generic/rte_vect.h
index 6540419..1f84292 100644
--- a/lib/eal/include/generic/rte_vect.h
+++ b/lib/eal/include/generic/rte_vect.h
@@ -15,7 +15,11 @@
 
 #include <stdint.h>
 
-#ifndef RTE_TOOLCHAIN_MSVC
+#ifdef RTE_TOOLCHAIN_MSVC
+
+#include <intrin.h>
+
+#else
 
 /* Unsigned vector types */
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 2/2] net: stop using mmx intrinsics
  2024-03-28 16:14 ` [PATCH v2 0/2] " Tyler Retzlaff
  2024-03-28 16:14   ` [PATCH v2 1/2] eal: include header for MSVC SIMD intrinsics Tyler Retzlaff
@ 2024-03-28 16:14   ` Tyler Retzlaff
  2024-03-28 17:21     ` Bruce Richardson
  1 sibling, 1 reply; 12+ messages in thread
From: Tyler Retzlaff @ 2024-03-28 16:14 UTC (permalink / raw)
  To: dev; +Cc: Bruce Richardson, Jasvinder Singh, Konstantin Ananyev, Tyler Retzlaff

Update code to use only avx/sse intrinsics as mmx is not supported on
MSVC.

Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
---
 lib/net/net_crc_avx512.c | 27 +++++++--------------------
 lib/net/net_crc_sse.c    | 27 +++++++--------------------
 2 files changed, 14 insertions(+), 40 deletions(-)

diff --git a/lib/net/net_crc_avx512.c b/lib/net/net_crc_avx512.c
index 0f0dee4..d18eb96 100644
--- a/lib/net/net_crc_avx512.c
+++ b/lib/net/net_crc_avx512.c
@@ -5,11 +5,10 @@
 #include <stdalign.h>
 
 #include <rte_common.h>
+#include <rte_vect.h>
 
 #include "net_crc.h"
 
-#include <x86intrin.h>
-
 /* VPCLMULQDQ CRC computation context structure */
 struct crc_vpclmulqdq_ctx {
 	__m512i rk1_rk2;
@@ -331,13 +330,10 @@ static const alignas(16) uint32_t mask2[4] = {
 			c9, c10, c11);
 	crc32_eth.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
 			c16, c17, 0, 0);
-	crc32_eth.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
-			_mm_cvtsi64_m64(c17));
+	crc32_eth.fold_1x128b = _mm_set_epi64x(c17, c16);
 
-	crc32_eth.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
-			_mm_cvtsi64_m64(c19));
-	crc32_eth.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
-			_mm_cvtsi64_m64(c21));
+	crc32_eth.rk5_rk6 = _mm_set_epi64x(c19, c18);
+	crc32_eth.rk7_rk8 = _mm_set_epi64x(c21, c20);
 }
 
 static void
@@ -378,13 +374,10 @@ static const alignas(16) uint32_t mask2[4] = {
 			c9, c10, c11);
 	crc16_ccitt.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
 			c16, c17, 0, 0);
-	crc16_ccitt.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
-			_mm_cvtsi64_m64(c17));
+	crc16_ccitt.fold_1x128b = _mm_set_epi64x(c17, c16);
 
-	crc16_ccitt.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
-			_mm_cvtsi64_m64(c19));
-	crc16_ccitt.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
-			_mm_cvtsi64_m64(c21));
+	crc16_ccitt.rk5_rk6 = _mm_set_epi64x(c19, c18);
+	crc16_ccitt.rk7_rk8 = _mm_set_epi64x(c21, c20);
 }
 
 void
@@ -392,12 +385,6 @@ static const alignas(16) uint32_t mask2[4] = {
 {
 	crc32_load_init_constants();
 	crc16_load_init_constants();
-
-	/*
-	 * Reset the register as following calculation may
-	 * use other data types such as float, double, etc.
-	 */
-	_mm_empty();
 }
 
 uint32_t
diff --git a/lib/net/net_crc_sse.c b/lib/net/net_crc_sse.c
index d673ae3..112dc94 100644
--- a/lib/net/net_crc_sse.c
+++ b/lib/net/net_crc_sse.c
@@ -6,12 +6,11 @@
 #include <string.h>
 
 #include <rte_common.h>
+#include <rte_vect.h>
 #include <rte_branch_prediction.h>
 
 #include "net_crc.h"
 
-#include <x86intrin.h>
-
 /** PCLMULQDQ CRC computation context structure */
 struct crc_pclmulqdq_ctx {
 	__m128i rk1_rk2;
@@ -272,12 +271,9 @@ static const alignas(16) uint8_t crc_xmm_shift_tab[48] = {
 	p =  0x10811LLU;
 
 	/** Save the params in context structure */
-	crc16_ccitt_pclmulqdq.rk1_rk2 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
-	crc16_ccitt_pclmulqdq.rk5_rk6 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
-	crc16_ccitt_pclmulqdq.rk7_rk8 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
+	crc16_ccitt_pclmulqdq.rk1_rk2 = _mm_set_epi64x(k2, k1);
+	crc16_ccitt_pclmulqdq.rk5_rk6 = _mm_set_epi64x(k6, k5);
+	crc16_ccitt_pclmulqdq.rk7_rk8 = _mm_set_epi64x(p, q);
 
 	/** Initialize CRC32 data */
 	k1 = 0xccaa009eLLU;
@@ -288,18 +284,9 @@ static const alignas(16) uint8_t crc_xmm_shift_tab[48] = {
 	p =  0x1db710641LLU;
 
 	/** Save the params in context structure */
-	crc32_eth_pclmulqdq.rk1_rk2 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
-	crc32_eth_pclmulqdq.rk5_rk6 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
-	crc32_eth_pclmulqdq.rk7_rk8 =
-		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
-
-	/**
-	 * Reset the register as following calculation may
-	 * use other data types such as float, double, etc.
-	 */
-	_mm_empty();
+	crc32_eth_pclmulqdq.rk1_rk2 = _mm_set_epi64x(k2, k1);
+	crc32_eth_pclmulqdq.rk5_rk6 = _mm_set_epi64x(k6, k5);
+	crc32_eth_pclmulqdq.rk7_rk8 = _mm_set_epi64x(p, q);
 }
 
 uint32_t
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: stop using mmx intrinsics
  2024-03-21 18:01       ` Thomas Monjalon
  2024-03-21 18:18         ` Tyler Retzlaff
@ 2024-03-28 16:16         ` Tyler Retzlaff
  1 sibling, 0 replies; 12+ messages in thread
From: Tyler Retzlaff @ 2024-03-28 16:16 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: dev, Bruce Richardson, Jasvinder Singh, Konstantin Ananyev,
	david.marchand

On Thu, Mar 21, 2024 at 07:01:17PM +0100, Thomas Monjalon wrote:
> 21/03/2024 18:27, Tyler Retzlaff:
> > On Thu, Mar 21, 2024 at 06:09:01PM +0100, Thomas Monjalon wrote:
> > > 20/03/2024 22:12, Tyler Retzlaff:
> > > > +#ifdef RTE_TOOLCHAIN_MSVC
> > > > +#include <intrin.h>
> > > > +#else
> > > >  #include <x86intrin.h>
> > > > +#endif
> > > 
> > > It is not the same include in MSVC?
> > 
> > unfortunately intrin.h is vestigial in the monolithic approach. to use
> > any intrinsic you're supposed to include only the one and only true
> > header instead of vendor/arch feature specific headers.
> > 
> > > Is it something we want to wrap in a DPDK header file?
> > 
> > do you mean create a monolithic rte_intrinsic.h header that is
> > essentially
> > 
> > #ifdef MSVC
> > #include <intrin.h>
> > #else
> > #include <x86intrin.h>
> > #include <immintrin.h>
> > #include <nmmintrin.h>
> > ...
> > #endif
> > 
> > i assumed that doing something like this might be unpopular due to the
> > unnecessary namespace pollution.
> 
> We already have such a file.
> It is rte_vect.h.
> I suppose we should just make sure it is included consistently
> instead of x86intrin.h or immintrin.h
> 
> This command will show where changes are required:
> 	git grep intrin.h

thanks! i saw none of the problems i had before so this worked great.

there is only one other include of intrin.h in eal now and it is not for
vector intrinsics so it should be cleaner to just include rte_vect.h
whenever SIMD / vector intrinsics are required for windows and !windows.

> 
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 1/2] eal: include header for MSVC SIMD intrinsics
  2024-03-28 16:14   ` [PATCH v2 1/2] eal: include header for MSVC SIMD intrinsics Tyler Retzlaff
@ 2024-03-28 17:19     ` Bruce Richardson
  0 siblings, 0 replies; 12+ messages in thread
From: Bruce Richardson @ 2024-03-28 17:19 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, Jasvinder Singh, Konstantin Ananyev

On Thu, Mar 28, 2024 at 09:14:05AM -0700, Tyler Retzlaff wrote:
> MSVC documents that you use the monolithic intrin.h for all intrinsics
> (including SIMD intrinsics) include intrin.h into rte_vec.h when
> building with MSVC so we don't have to duplicate conditionally compile
> include it across the DPDK source.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---
Acked-by: Bruce Richardson <bruce.richardson@intel.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 2/2] net: stop using mmx intrinsics
  2024-03-28 16:14   ` [PATCH v2 2/2] net: stop using mmx intrinsics Tyler Retzlaff
@ 2024-03-28 17:21     ` Bruce Richardson
  0 siblings, 0 replies; 12+ messages in thread
From: Bruce Richardson @ 2024-03-28 17:21 UTC (permalink / raw)
  To: Tyler Retzlaff; +Cc: dev, Jasvinder Singh, Konstantin Ananyev

On Thu, Mar 28, 2024 at 09:14:06AM -0700, Tyler Retzlaff wrote:
> Update code to use only avx/sse intrinsics as mmx is not supported on
> MSVC.
> 
> Signed-off-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
> ---

One comment inline below. With or without that suggestion:

Acked-by: Bruce Richardson <bruce.richardson@intel.com>

>  lib/net/net_crc_avx512.c | 27 +++++++--------------------
>  lib/net/net_crc_sse.c    | 27 +++++++--------------------
>  2 files changed, 14 insertions(+), 40 deletions(-)
> 
> diff --git a/lib/net/net_crc_avx512.c b/lib/net/net_crc_avx512.c
> index 0f0dee4..d18eb96 100644
> --- a/lib/net/net_crc_avx512.c
> +++ b/lib/net/net_crc_avx512.c
> @@ -5,11 +5,10 @@
>  #include <stdalign.h>
>  
>  #include <rte_common.h>
> +#include <rte_vect.h>
>  
>  #include "net_crc.h"
>  
> -#include <x86intrin.h>
> -
>  /* VPCLMULQDQ CRC computation context structure */
>  struct crc_vpclmulqdq_ctx {
>  	__m512i rk1_rk2;
> @@ -331,13 +330,10 @@ static const alignas(16) uint32_t mask2[4] = {
>  			c9, c10, c11);
>  	crc32_eth.fold_3x128b = _mm512_setr_epi64(c12, c13, c14, c15,
>  			c16, c17, 0, 0);

Since the setr's below are being replaced, it would be nice to change these
ones above too. Long term I think it's going to be confusing having some
assignments set up as L->R, while others are R->L.


> -	crc32_eth.fold_1x128b = _mm_setr_epi64(_mm_cvtsi64_m64(c16),
> -			_mm_cvtsi64_m64(c17));
> +	crc32_eth.fold_1x128b = _mm_set_epi64x(c17, c16);
>  
> -	crc32_eth.rk5_rk6 = _mm_setr_epi64(_mm_cvtsi64_m64(c18),
> -			_mm_cvtsi64_m64(c19));
> -	crc32_eth.rk7_rk8 = _mm_setr_epi64(_mm_cvtsi64_m64(c20),
> -			_mm_cvtsi64_m64(c21));
> +	crc32_eth.rk5_rk6 = _mm_set_epi64x(c19, c18);
> +	crc32_eth.rk7_rk8 = _mm_set_epi64x(c21, c20);
>  }

<snip>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2024-03-28 17:21 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-20 21:12 [PATCH] stop using mmx intrinsics Tyler Retzlaff
2024-03-20 21:12 ` [PATCH] net: " Tyler Retzlaff
2024-03-21 17:09   ` Thomas Monjalon
2024-03-21 17:27     ` Tyler Retzlaff
2024-03-21 18:01       ` Thomas Monjalon
2024-03-21 18:18         ` Tyler Retzlaff
2024-03-28 16:16         ` Tyler Retzlaff
2024-03-28 16:14 ` [PATCH v2 0/2] " Tyler Retzlaff
2024-03-28 16:14   ` [PATCH v2 1/2] eal: include header for MSVC SIMD intrinsics Tyler Retzlaff
2024-03-28 17:19     ` Bruce Richardson
2024-03-28 16:14   ` [PATCH v2 2/2] net: stop using mmx intrinsics Tyler Retzlaff
2024-03-28 17:21     ` Bruce Richardson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.