All of lore.kernel.org
 help / color / mirror / Atom feed
From: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
To: dev-VfR2kkLFssw@public.gmane.org
Subject: [PATCH] acl: If build does not support sse4.2, emulate missing instructions with C code
Date: Mon,  4 Aug 2014 11:35:58 -0400	[thread overview]
Message-ID: <1407166558-9532-1-git-send-email-nhorman@tuxdriver.com> (raw)

The ACL library makes extensive use of some SSE4.2 instructions, which means the
default build can't compile this library.  Work around the problem by testing
the __SSE42__ definition in the acl_vects.h file and defining the macros there
as intrinsics or c-level equivalants.  Note this is a minimal patch, adjusting
only the definitions that are currently used in the ACL library.

Only compile tested so far, but I wanted to post it for early review so that
others could aid in unit testing.

Signed-off-by: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
CC: Thomas Monjalon <thomas.monjalon-pdR9zngts4EAvxtiuMwx3w@public.gmane.org>
CC: "Konstantin Ananyev" <konstantin.ananyev-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
CC: Bruce Richardson <bruce.richardson-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 lib/librte_acl/acl_bld.c  |   3 +-
 lib/librte_acl/acl_vect.h | 102 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c
index 873447b..de974a4 100644
--- a/lib/librte_acl/acl_bld.c
+++ b/lib/librte_acl/acl_bld.c
@@ -31,7 +31,6 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <nmmintrin.h>
 #include <rte_acl.h>
 #include "tb_mem.h"
 #include "acl.h"
@@ -1481,7 +1480,7 @@ acl_calc_wildness(struct rte_acl_build_rule *head,
 			switch (rule->config->defs[n].type) {
 			case RTE_ACL_FIELD_TYPE_BITMASK:
 				wild = (size -
-					_mm_popcnt_u32(fld->mask_range.u8)) /
+					__builtin_popcountl(fld->mask_range.u8)) /
 					size;
 				break;
 
diff --git a/lib/librte_acl/acl_vect.h b/lib/librte_acl/acl_vect.h
index d813600..e5f391b 100644
--- a/lib/librte_acl/acl_vect.h
+++ b/lib/librte_acl/acl_vect.h
@@ -34,6 +34,10 @@
 #ifndef _RTE_ACL_VECT_H_
 #define _RTE_ACL_VECT_H_
 
+#ifdef __SSE4_1__
+#include <smmintrin.h>
+#endif
+
 /**
  * @file
  *
@@ -44,12 +48,12 @@
 extern "C" {
 #endif
 
+
 #define	MM_ADD16(a, b)		_mm_add_epi16(a, b)
 #define	MM_ADD32(a, b)		_mm_add_epi32(a, b)
 #define	MM_ALIGNR8(a, b, c)	_mm_alignr_epi8(a, b, c)
 #define	MM_AND(a, b)		_mm_and_si128(a, b)
 #define MM_ANDNOT(a, b)		_mm_andnot_si128(a, b)
-#define MM_BLENDV8(a, b, c)	_mm_blendv_epi8(a, b, c)
 #define MM_CMPEQ16(a, b)	_mm_cmpeq_epi16(a, b)
 #define MM_CMPEQ32(a, b)	_mm_cmpeq_epi32(a, b)
 #define	MM_CMPEQ8(a, b)		_mm_cmpeq_epi8(a, b)
@@ -59,7 +63,6 @@ extern "C" {
 #define	MM_CVT32(a)		_mm_cvtsi128_si32(a)
 #define MM_CVTU32(a)		_mm_cvtsi32_si128(a)
 #define	MM_INSERT16(a, c, b)	_mm_insert_epi16(a, c, b)
-#define	MM_INSERT32(a, c, b)	_mm_insert_epi32(a, c, b)
 #define	MM_LOAD(a)		_mm_load_si128(a)
 #define	MM_LOADH_PI(a, b)	_mm_loadh_pi(a, b)
 #define	MM_LOADU(a)		_mm_loadu_si128(a)
@@ -82,7 +85,6 @@ extern "C" {
 #define	MM_SRL32(a, b)		_mm_srli_epi32(a, b)
 #define	MM_STORE(a, b)		_mm_store_si128(a, b)
 #define	MM_STOREU(a, b)		_mm_storeu_si128(a, b)
-#define	MM_TESTZ(a, b)		_mm_testz_si128(a, b)
 #define	MM_XOR(a, b)		_mm_xor_si128(a, b)
 
 #define	MM_SET16(a, b, c, d, e, f, g, h)	\
@@ -93,6 +95,100 @@ extern "C" {
 	_mm_set_epi8(c0, c1, c2, c3, c4, c5, c6, c7,	\
 		c8, c9, cA, cB, cC, cD, cE, cF)
 
+
+#ifndef __SSE4_1__
+static inline xmm_t pblendvb(xmm_t dst, xmm_t src, xmm_t mask)
+{
+	unsigned char tmpd[16], tmps[16], tmpm[16];
+	int i;
+
+	MM_STOREU((xmm_t *)&tmpd, dst);
+	MM_STOREU((xmm_t *)&tmps, src);
+	MM_STOREU((xmm_t *)&tmpm, mask);
+
+	for (i = 0; i < 16; i++)
+		if (mask[i] & 0x8)
+			dst[i] = src[i];
+
+	dst = MM_LOADU((xmm_t *)&tmpd);
+
+	return dst;
+}
+
+#define MM_BLENDV8(a, b, c)	pblendvb(a, b, c)
+
+
+static inline int ptestz(xmm_t a, xmm_t b)
+{
+	unsigned long long tmpa[2], tmpb[2];
+
+	MM_STOREU((xmm_t *)&tmpa, a);
+	MM_STOREU((xmm_t *)&tmpb, b);
+
+	if (tmpa[0] & tmpb[0])
+		return 1;
+	if (tmpa[1] & tmpb[1])
+		return 1;
+
+	return 0;
+}
+
+#define	MM_TESTZ(a, b)		ptestz(a, b)
+
+static inline xmm_t pinsrd(xmm_t dst, int32_t val, char off)
+{
+	unsigned long long tmpa[2];
+	unsigned long long mask;
+	int32_t tmp;
+	
+	MM_STOREU((xmm_t *)&tmpa, dst);
+
+	/*
+	 * Inserting a dword is a bit odd as it can cross a word boundary
+	 */
+
+	if (off > 32) {
+		/*
+		 * If the offset is more than 32, then part of the 
+		 * inserted word will appear in the upper half of the xmm
+		 * register.  Grab the part of the value that crosses the 64 bit 
+		 * boundary.
+		 */
+		tmp = val >> (off - 32);
+
+		/*
+		 * Mask off the least significant bits of the upper longword
+		 */
+		mask = ~((1 << (off - 32)) - 1);
+		tmpa[1] &= mask;
+
+		/*
+		 * and insert the new value
+		 */
+		tmpa[1] |= tmp;
+	}
+	if (off < 64) {
+		/*
+		 * If the offset is less than 64 bits, we also need to mask and 
+		 * assign the lower longword
+		 */
+		mask = (1 << off) - 1;
+		tmpa[0] &= mask;
+		tmpa[0] |= (val << off);
+	}
+
+	dst = MM_LOADU((xmm_t *)&tmpa);
+	return dst;
+}
+
+#define	MM_INSERT32(a, c, b)	pinsrd(a, c, b)
+
+#else
+#define	MM_BLENDV8(a, b, c)	_mm_blendv_epi8(a, b, c)
+#define	MM_TESTZ(a, b)		_mm_testz_si128(a, b)
+#define	MM_INSERT32(a, c, b)	_mm_insert_epi32(a, c, b)
+#endif
+
 #ifdef RTE_ARCH_X86_64
 
 #define	MM_CVT64(a)		_mm_cvtsi128_si64(a)
-- 
1.8.3.1

             reply	other threads:[~2014-08-04 15:35 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-04 15:35 Neil Horman [this message]
     [not found] ` <1407166558-9532-1-git-send-email-nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
2014-08-05 15:26   ` [PATCH] acl: If build does not support sse4.2, emulate missing instructions with C code Ananyev, Konstantin
     [not found]     ` <2601191342CEEE43887BDE71AB9772582134F98D-kPTMFJFq+rEu0RiL9chJVbfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2014-08-05 18:20       ` Neil Horman
     [not found]         ` <20140805182035.GB20550-B26myB8xz7F8NnZeBjwnZQMhkBWG/bsMQH7oEaQurus@public.gmane.org>
2014-08-06 10:52           ` Ananyev, Konstantin
     [not found]             ` <2601191342CEEE43887BDE71AB9772582134FC52-kPTMFJFq+rEu0RiL9chJVbfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2014-08-06 12:12               ` Neil Horman
     [not found]                 ` <20140806121221.GA26562-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2014-08-06 12:23                   ` Ananyev, Konstantin
     [not found]                     ` <2601191342CEEE43887BDE71AB9772582134FCB4-kPTMFJFq+rEu0RiL9chJVbfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2014-08-06 13:35                       ` Neil Horman
2014-08-06 11:39           ` Ananyev, Konstantin
     [not found]             ` <2601191342CEEE43887BDE71AB9772582134FC92-kPTMFJFq+rEu0RiL9chJVbfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2014-08-06 12:18               ` Neil Horman
     [not found]                 ` <20140806121859.GB26562-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2014-08-06 12:26                   ` Ananyev, Konstantin
2014-08-06 16:59                   ` Richardson, Bruce
     [not found]                     ` <59AF69C657FD0841A61C55336867B5B0343D666D-kPTMFJFq+rELt2AQoY/u9bfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2014-08-06 17:27                       ` Neil Horman
     [not found]                         ` <20140806172709.GA23133-B26myB8xz7F8NnZeBjwnZQMhkBWG/bsMQH7oEaQurus@public.gmane.org>
2014-08-12 23:19                           ` Thomas Monjalon
2014-08-13 12:33                             ` Neil Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1407166558-9532-1-git-send-email-nhorman@tuxdriver.com \
    --to=nhorman-2xusbdqka4r54taoqtywwq@public.gmane.org \
    --cc=dev-VfR2kkLFssw@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.