All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jasvinder Singh <jasvinder.singh@intel.com>
To: dev@dpdk.org
Cc: olivier.matz@6wind.com, declan.doherty@intel.com,
	pablo.de.lara.guarch@intel.com
Subject: [PATCH v7 1/2] librte_net: add crc compute APIs
Date: Wed, 29 Mar 2017 18:15:03 +0100	[thread overview]
Message-ID: <1490807704-211859-2-git-send-email-jasvinder.singh@intel.com> (raw)
In-Reply-To: <1490807704-211859-1-git-send-email-jasvinder.singh@intel.com>

APIs for selecting the architecure specific implementation and computing
the crc (16-bit and 32-bit CRCs) are added. For CRCs calculation, scalar
as well as x86 intrinsic(sse4.2) versions are implemented.

The scalar version is based on generic Look-Up Table(LUT) algorithm,
while x86 intrinsic version uses carry-less multiplication for
fast CRC computation.

Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>
Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 lib/Makefile                       |   2 +-
 lib/librte_net/Makefile            |   3 +
 lib/librte_net/rte_net_crc.c       | 199 ++++++++++++++++++++
 lib/librte_net/rte_net_crc.h       | 104 +++++++++++
 lib/librte_net/rte_net_crc_sse.h   | 361 +++++++++++++++++++++++++++++++++++++
 lib/librte_net/rte_net_version.map |   8 +
 6 files changed, 676 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_net/rte_net_crc.c
 create mode 100644 lib/librte_net/rte_net_crc.h
 create mode 100644 lib/librte_net/rte_net_crc_sse.h

diff --git a/lib/Makefile b/lib/Makefile
index 5ad3c7c..456eb38 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -62,7 +62,7 @@ DEPDIRS-librte_lpm := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
 DEPDIRS-librte_acl := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net
-DEPDIRS-librte_net := librte_mbuf
+DEPDIRS-librte_net := librte_mbuf librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag
 DEPDIRS-librte_ip_frag := librte_eal librte_mempool librte_mbuf librte_ether
 DEPDIRS-librte_ip_frag += librte_hash
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index abd5c46..757f3bc 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -39,10 +39,13 @@ EXPORT_MAP := rte_net_version.map
 LIBABIVER := 1
 
 SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
+SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc_sse.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
new file mode 100644
index 0000000..0f58f07
--- /dev/null
+++ b/lib/librte_net/rte_net_crc.c
@@ -0,0 +1,199 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+
+#include <rte_net_crc.h>
+
+/** crc tables */
+static uint32_t crc32_eth_lut[CRC_LUT_SIZE];
+static uint32_t crc16_ccitt_lut[CRC_LUT_SIZE];
+
+static uint32_t
+rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len);
+
+static uint32_t
+rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len);
+
+typedef uint32_t
+(*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len);
+
+static rte_net_crc_handler *handlers;
+
+static rte_net_crc_handler handlers_scalar[] = {
+	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler,
+	[RTE_NET_CRC32_ETH] = rte_crc32_eth_handler,
+};
+
+#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_SSE4_2)
+static rte_net_crc_handler handlers_sse42[] = {
+	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_sse42_handler,
+	[RTE_NET_CRC32_ETH] = rte_crc32_eth_sse42_handler,
+};
+#endif
+
+/**
+ * Reflect the bits about the middle
+ *
+ * @param val
+ *   value to be reflected
+ *
+ * @return
+ *   reflected value
+ */
+static uint32_t
+reflect_32bits(uint32_t val)
+{
+	uint32_t i, res = 0;
+
+	for (i = 0; i < 32; i++)
+		if ((val & (1 << i)) != 0)
+			res |= (uint32_t)(1 << (31 - i));
+
+	return res;
+}
+
+static void
+crc32_eth_init_lut(uint32_t poly,
+	uint32_t *lut)
+{
+	uint32_t i, j;
+
+	for (i = 0; i < CRC_LUT_SIZE; i++) {
+		uint32_t crc = reflect_32bits(i);
+
+		for (j = 0; j < 8; j++) {
+			if (crc & 0x80000000L)
+				crc = (crc << 1) ^ poly;
+			else
+				crc <<= 1;
+		}
+		lut[i] = reflect_32bits(crc);
+	}
+}
+
+static inline __attribute__((always_inline)) uint32_t
+crc32_eth_calc_lut(const uint8_t *data,
+	uint32_t data_len,
+	uint32_t crc,
+	const uint32_t *lut)
+{
+	while (data_len--)
+		crc = lut[(crc ^ *data++) & 0xffL] ^ (crc >> 8);
+
+	return crc;
+}
+
+static void
+rte_net_crc_scalar_init(void)
+{
+	/** 32-bit crc init */
+	crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
+
+	/** 16-bit CRC init */
+	crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
+}
+
+static inline uint32_t
+rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
+{
+	/** return 16-bit CRC value */
+	return (uint16_t)~crc32_eth_calc_lut(data,
+		data_len,
+		0xffff,
+		crc16_ccitt_lut);
+}
+
+static inline uint32_t
+rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
+{
+	/** return 32-bit CRC value */
+	return ~crc32_eth_calc_lut(data,
+		data_len,
+		0xffffffffUL,
+		crc32_eth_lut);
+}
+
+void
+rte_net_crc_set_alg(enum rte_net_crc_alg alg)
+{
+	switch (alg) {
+	case RTE_NET_CRC_SSE42:
+#ifdef RTE_ARCH_X86_64
+		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2))
+			alg = RTE_NET_CRC_SCALAR;
+		else {
+			handlers = handlers_sse42;
+			break;
+		}
+#endif
+	case RTE_NET_CRC_SCALAR:
+	default:
+		handlers = handlers_scalar;
+		break;
+	}
+}
+
+uint32_t
+rte_net_crc_calc(const void *data,
+	uint32_t data_len,
+	enum rte_net_crc_type type)
+{
+	uint32_t ret;
+	rte_net_crc_handler f_handle;
+
+	f_handle = handlers[type];
+	ret = f_handle((const uint8_t *) data, data_len);
+
+	return ret;
+}
+
+/*
+ * Select highest available crc algorithm as default one.
+ */
+static inline void __attribute__((constructor))
+rte_net_crc_init(void)
+{
+	enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR;
+
+	rte_net_crc_scalar_init();
+
+#ifdef RTE_ARCH_X86_64
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) {
+		alg = RTE_NET_CRC_SSE42;
+		rte_net_crc_sse42_init();
+	}
+#endif
+
+	rte_net_crc_set_alg(alg);
+}
diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
new file mode 100644
index 0000000..dd6c110
--- /dev/null
+++ b/lib/librte_net/rte_net_crc.h
@@ -0,0 +1,104 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_NET_CRC_H_
+#define _RTE_NET_CRC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+
+/** CRC polynomials */
+#define CRC32_ETH_POLYNOMIAL 0x04c11db7UL
+#define CRC16_CCITT_POLYNOMIAL 0x1021U
+
+#define CRC_LUT_SIZE 256
+
+/** CRC types */
+enum rte_net_crc_type {
+	RTE_NET_CRC16_CCITT = 0,
+	RTE_NET_CRC32_ETH,
+	RTE_NET_CRC_REQS
+};
+
+/** CRC compute algorithm */
+enum rte_net_crc_alg {
+	RTE_NET_CRC_SCALAR = 0,
+	RTE_NET_CRC_SSE42,
+};
+
+/**
+ * This API set the CRC computation algorithm (i.e. scalar version,
+ * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
+ * structure.
+ *
+ * @param alg
+ *   This parameter is used to select the CRC implementation version.
+ *   - RTE_NET_CRC_SCALAR
+ *   - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
+ */
+void
+rte_net_crc_set_alg(enum rte_net_crc_alg alg);
+
+/**
+ * CRC compute API
+ *
+ * @param data
+ *   Pointer to the packet data for CRC computation
+ * @param data_len
+ *   Data length for CRC computation
+ * @param type
+ *   CRC type (enum rte_net_crc_type)
+ *
+ * @return
+ *   CRC value
+ */
+uint32_t
+rte_net_crc_calc(const void *data,
+	uint32_t data_len,
+	enum rte_net_crc_type type);
+
+#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_SSE4_2)
+#include <rte_net_crc_sse.h>
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_NET_CRC_H_ */
diff --git a/lib/librte_net/rte_net_crc_sse.h b/lib/librte_net/rte_net_crc_sse.h
new file mode 100644
index 0000000..514b400
--- /dev/null
+++ b/lib/librte_net/rte_net_crc_sse.h
@@ -0,0 +1,361 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_NET_CRC_SSE_H_
+#define _RTE_NET_CRC_SSE_H_
+
+#include <cpuid.h>
+#include <rte_net_crc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** PCLMULQDQ CRC computation context structure */
+struct crc_pclmulqdq_ctx {
+	__m128i rk1_rk2;
+	__m128i rk5_rk6;
+	__m128i rk7_rk8;
+};
+
+struct crc_pclmulqdq_ctx crc32_eth_pclmulqdq __rte_aligned(16);
+struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq __rte_aligned(16);
+/**
+ * @brief Performs one folding round
+ *
+ * Logically function operates as follows:
+ *     DATA = READ_NEXT_16BYTES();
+ *     F1 = LSB8(FOLD)
+ *     F2 = MSB8(FOLD)
+ *     T1 = CLMUL(F1, RK1)
+ *     T2 = CLMUL(F2, RK2)
+ *     FOLD = XOR(T1, T2, DATA)
+ *
+ * @param data_block
+ *   16 byte data block
+ * @param precomp
+ *   Precomputed rk1 constanst
+ * @param fold
+ *   Current16 byte folded data
+ *
+ * @return
+ *   New 16 byte folded data
+ */
+static inline __attribute__((always_inline)) __m128i
+crcr32_folding_round(__m128i data_block,
+		__m128i precomp,
+		__m128i fold)
+{
+	__m128i tmp0 = _mm_clmulepi64_si128(fold, precomp, 0x01);
+	__m128i tmp1 = _mm_clmulepi64_si128(fold, precomp, 0x10);
+
+	return _mm_xor_si128(tmp1, _mm_xor_si128(data_block, tmp0));
+}
+
+/**
+ * Performs reduction from 128 bits to 64 bits
+ *
+ * @param data128
+ *   128 bits data to be reduced
+ * @param precomp
+ *   precomputed constants rk5, rk6
+ *
+ * @return
+ *  64 bits reduced data
+ */
+
+static inline __attribute__((always_inline)) __m128i
+crcr32_reduce_128_to_64(__m128i data128, __m128i precomp)
+{
+	__m128i tmp0, tmp1, tmp2;
+
+	/* 64b fold */
+	tmp0 = _mm_clmulepi64_si128(data128, precomp, 0x00);
+	tmp1 = _mm_srli_si128(data128, 8);
+	tmp0 = _mm_xor_si128(tmp0, tmp1);
+
+	/* 32b fold */
+	tmp2 = _mm_slli_si128(tmp0, 4);
+	tmp1 = _mm_clmulepi64_si128(tmp2, precomp, 0x10);
+
+	return _mm_xor_si128(tmp1, tmp0);
+}
+
+/**
+ * Performs Barret's reduction from 64 bits to 32 bits
+ *
+ * @param data64
+ *   64 bits data to be reduced
+ * @param precomp
+ *   rk7 precomputed constant
+ *
+ * @return
+ *   reduced 32 bits data
+ */
+
+static inline __attribute__((always_inline)) uint32_t
+crcr32_reduce_64_to_32(__m128i data64, __m128i precomp)
+{
+	static const uint32_t mask1[4] __rte_aligned(16) = {
+		0xffffffff, 0xffffffff, 0x00000000, 0x00000000
+	};
+
+	static const uint32_t mask2[4] __rte_aligned(16) = {
+		0x00000000, 0xffffffff, 0xffffffff, 0xffffffff
+	};
+	__m128i tmp0, tmp1, tmp2;
+
+	tmp0 = _mm_and_si128(data64, _mm_load_si128((const __m128i *)mask2));
+
+	tmp1 = _mm_clmulepi64_si128(tmp0, precomp, 0x00);
+	tmp1 = _mm_xor_si128(tmp1, tmp0);
+	tmp1 = _mm_and_si128(tmp1, _mm_load_si128((const __m128i *)mask1));
+
+	tmp2 = _mm_clmulepi64_si128(tmp1, precomp, 0x10);
+	tmp2 = _mm_xor_si128(tmp2, tmp1);
+	tmp2 = _mm_xor_si128(tmp2, tmp0);
+
+	return _mm_extract_epi32(tmp2, 2);
+}
+
+static const uint8_t crc_xmm_shift_tab[48] __rte_aligned(16) = {
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+/**
+ * Shifts left 128 bit register by specified number of bytes
+ *
+ * @param reg
+ *   128 bit value
+ * @param num
+ *   number of bytes to shift left reg by (0-16)
+ *
+ * @return
+ *   reg << (num * 8)
+ */
+
+static inline __attribute__((always_inline)) __m128i
+xmm_shift_left(__m128i reg, const unsigned int num)
+{
+	const __m128i *p = (const __m128i *)(crc_xmm_shift_tab + 16 - num);
+
+	return _mm_shuffle_epi8(reg, _mm_loadu_si128(p));
+}
+
+static inline __attribute__((always_inline)) uint32_t
+crc32_eth_calc_pclmulqdq(
+	const uint8_t *data,
+	uint32_t data_len,
+	uint32_t crc,
+	const struct crc_pclmulqdq_ctx *params)
+{
+	__m128i temp, fold, k;
+	uint32_t n;
+
+	/* Get CRC init value */
+	temp = _mm_insert_epi32(_mm_setzero_si128(), crc, 0);
+
+	/**
+	 * Folding all data into single 16 byte data block
+	 * Assumes: fold holds first 16 bytes of data
+	 */
+
+	if (unlikely(data_len < 32)) {
+		if (unlikely(data_len == 16)) {
+			/* 16 bytes */
+			fold = _mm_loadu_si128((const __m128i *)data);
+			fold = _mm_xor_si128(fold, temp);
+			goto reduction_128_64;
+		}
+
+		if (unlikely(data_len < 16)) {
+			/* 0 to 15 bytes */
+			uint8_t buffer[16] __rte_aligned(16);
+
+			memset(buffer, 0, sizeof(buffer));
+			memcpy(buffer, data, data_len);
+
+			fold = _mm_load_si128((const __m128i *)buffer);
+			fold = _mm_xor_si128(fold, temp);
+			if (unlikely(data_len < 4)) {
+				fold = xmm_shift_left(fold, 8 - data_len);
+				goto barret_reduction;
+			}
+			fold = xmm_shift_left(fold, 16 - data_len);
+			goto reduction_128_64;
+		}
+		/* 17 to 31 bytes */
+		fold = _mm_loadu_si128((const __m128i *)data);
+		fold = _mm_xor_si128(fold, temp);
+		n = 16;
+		k = params->rk1_rk2;
+		goto partial_bytes;
+	}
+
+	/** At least 32 bytes in the buffer */
+	/** Apply CRC initial value */
+	fold = _mm_loadu_si128((const __m128i *)data);
+	fold = _mm_xor_si128(fold, temp);
+
+	/** Main folding loop - the last 16 bytes is processed separately */
+	k = params->rk1_rk2;
+	for (n = 16; (n + 16) <= data_len; n += 16) {
+		temp = _mm_loadu_si128((const __m128i *)&data[n]);
+		fold = crcr32_folding_round(temp, k, fold);
+	}
+
+partial_bytes:
+	if (likely(n < data_len)) {
+
+		const uint32_t mask3[4] __rte_aligned(16) = {
+			0x80808080, 0x80808080, 0x80808080, 0x80808080
+		};
+
+		const uint8_t shf_table[32] __rte_aligned(16) = {
+			0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+			0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+			0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+			0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+		};
+
+		__m128i last16, a, b;
+
+		last16 = _mm_loadu_si128((const __m128i *)&data[data_len - 16]);
+
+		temp = _mm_loadu_si128((const __m128i *)
+			&shf_table[data_len & 15]);
+		a = _mm_shuffle_epi8(fold, temp);
+
+		temp = _mm_xor_si128(temp,
+			_mm_load_si128((const __m128i *)mask3));
+		b = _mm_shuffle_epi8(fold, temp);
+		b = _mm_blendv_epi8(b, last16, temp);
+
+		/* k = rk1 & rk2 */
+		temp = _mm_clmulepi64_si128(a, k, 0x01);
+		fold = _mm_clmulepi64_si128(a, k, 0x10);
+
+		fold = _mm_xor_si128(fold, temp);
+		fold = _mm_xor_si128(fold, b);
+	}
+
+	/** Reduction 128 -> 32 Assumes: fold holds 128bit folded data */
+reduction_128_64:
+	k = params->rk5_rk6;
+	fold = crcr32_reduce_128_to_64(fold, k);
+
+barret_reduction:
+	k = params->rk7_rk8;
+	n = crcr32_reduce_64_to_32(fold, k);
+
+	return n;
+}
+
+
+static inline void
+rte_net_crc_sse42_init(void)
+{
+	uint64_t k1, k2, k5, k6;
+	uint64_t p = 0, q = 0;
+
+	/** Initialize CRC16 data */
+	k1 = 0x189aeLLU;
+	k2 = 0x8e10LLU;
+	k5 = 0x189aeLLU;
+	k6 = 0x114aaLLU;
+	q =  0x11c581910LLU;
+	p =  0x10811LLU;
+
+	/** Save the params in context structure */
+	crc16_ccitt_pclmulqdq.rk1_rk2 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
+	crc16_ccitt_pclmulqdq.rk5_rk6 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
+	crc16_ccitt_pclmulqdq.rk7_rk8 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
+
+	/** Initialize CRC32 data */
+	k1 = 0xccaa009eLLU;
+	k2 = 0x1751997d0LLU;
+	k5 = 0xccaa009eLLU;
+	k6 = 0x163cd6124LLU;
+	q =  0x1f7011640LLU;
+	p =  0x1db710641LLU;
+
+	/** Save the params in context structure */
+	crc32_eth_pclmulqdq.rk1_rk2 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k1), _mm_cvtsi64_m64(k2));
+	crc32_eth_pclmulqdq.rk5_rk6 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
+	crc32_eth_pclmulqdq.rk7_rk8 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
+
+	/**
+	 * Reset the register as following calculation may
+	 * use other data types such as float, double, etc.
+	 */
+	_mm_empty();
+
+}
+
+static inline uint32_t
+rte_crc16_ccitt_sse42_handler(const uint8_t *data,
+	uint32_t data_len)
+{
+	/** return 16-bit CRC value */
+	return (uint16_t)~crc32_eth_calc_pclmulqdq(data,
+		data_len,
+		0xffff,
+		&crc16_ccitt_pclmulqdq);
+}
+
+static inline uint32_t
+rte_crc32_eth_sse42_handler(const uint8_t *data,
+	uint32_t data_len)
+{
+	return ~crc32_eth_calc_pclmulqdq(data,
+		data_len,
+		0xffffffffUL,
+		&crc32_eth_pclmulqdq);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_NET_CRC_SSE_H_ */
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
index 3b15e65..687c40e 100644
--- a/lib/librte_net/rte_net_version.map
+++ b/lib/librte_net/rte_net_version.map
@@ -4,3 +4,11 @@ DPDK_16.11 {
 
 	local: *;
 };
+
+DPDK_17.05 {
+	global:
+
+	rte_net_crc_calc;
+	rte_net_crc_set_alg;
+
+} DPDK_16.11;
-- 
2.5.5

  reply	other threads:[~2017-03-29 17:05 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-24 20:54 [PATCH 0/2] librte_net: add crc computation support Jasvinder Singh
2017-02-24 20:54 ` [PATCH 1/2] librte_net: add crc init and compute APIs Jasvinder Singh
2017-02-28 12:08   ` [PATCH v2 0/2] librte_net: add crc computation support Jasvinder Singh
2017-02-28 12:08     ` [PATCH v2 1/2] librte_net: add crc init and compute APIs Jasvinder Singh
2017-02-28 12:15       ` Jerin Jacob
2017-03-01 18:46       ` Thomas Monjalon
2017-03-02 13:03         ` Singh, Jasvinder
2017-03-06 15:27           ` Thomas Monjalon
2017-03-08 11:08             ` De Lara Guarch, Pablo
2017-03-15 17:35               ` Thomas Monjalon
2017-03-15 19:03                 ` Dumitrescu, Cristian
2017-03-15 20:15                   ` Thomas Monjalon
2017-03-15 21:11                     ` Dumitrescu, Cristian
2017-03-15 19:09                 ` Dumitrescu, Cristian
2017-03-12 21:33       ` [PATCH v3 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-12 21:33         ` [PATCH v3 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-03-13  3:06           ` Ananyev, Konstantin
2017-03-13  9:05             ` Singh, Jasvinder
2017-03-20 19:29           ` [PATCH v4 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-20 19:29             ` [PATCH v4 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-03-21 14:45               ` [PATCH v5 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-21 14:45                 ` [PATCH v5 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-03-28 18:04                   ` De Lara Guarch, Pablo
2017-03-28 18:07                     ` De Lara Guarch, Pablo
2017-03-28 19:21                     ` Singh, Jasvinder
2017-03-29 12:42                   ` [PATCH v6 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-29 12:42                     ` [PATCH v6 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-03-29 16:14                       ` De Lara Guarch, Pablo
2017-03-29 17:15                       ` [PATCH v7 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-29 17:15                         ` Jasvinder Singh [this message]
2017-03-30 11:30                           ` [PATCH v8 " Jasvinder Singh
2017-03-30 11:30                             ` [PATCH v8 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-03-30 11:31                               ` Ananyev, Konstantin
2017-03-30 12:06                                 ` Singh, Jasvinder
2017-03-30 14:40                                 ` Olivier Matz
2017-03-30 15:14                                   ` Singh, Jasvinder
2017-03-30 16:15                               ` [PATCH v9 0/3] librte_net: add crc computation support Jasvinder Singh
2017-03-30 16:15                                 ` [PATCH v9 1/3] librte_net: add crc compute APIs Jasvinder Singh
2017-04-04 20:00                                   ` Thomas Monjalon
2017-04-05 14:58                                   ` [PATCH v10 0/2] librte_net: add crc computation support Jasvinder Singh
2017-04-05 14:58                                     ` [PATCH v10 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-04-05 17:49                                       ` Thomas Monjalon
2017-04-05 19:22                                         ` Singh, Jasvinder
2017-04-05 20:49                                       ` [PATCH v11 0/2] librte_net: add crc computation support Jasvinder Singh
2017-04-05 20:49                                         ` [PATCH v11 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-04-05 20:49                                         ` [PATCH v11 2/2] test/test: add unit test for CRC computation Jasvinder Singh
2017-04-05 20:59                                           ` Thomas Monjalon
2017-04-05 21:00                                         ` [PATCH v11 0/2] librte_net: add crc computation support Thomas Monjalon
2017-04-05 14:58                                     ` [PATCH v10 2/2] test/test: add unit test for CRC computation Jasvinder Singh
2017-03-30 16:15                                 ` [PATCH v9 2/3] " Jasvinder Singh
2017-03-30 16:15                                 ` [PATCH v9 3/3] maintainers: add packet crc section and claim maintainership Jasvinder Singh
2017-04-04 19:55                                   ` Thomas Monjalon
2017-04-04 20:02                                 ` [PATCH v9 0/3] librte_net: add crc computation support Thomas Monjalon
2017-04-05  8:34                                   ` Singh, Jasvinder
2017-04-05  9:01                                     ` Thomas Monjalon
2017-04-05  9:37                                       ` Richardson, Bruce
2017-04-05 12:52                                         ` Singh, Jasvinder
2017-03-30 11:30                             ` [PATCH v8 2/2] test/test: add unit test for CRC computation Jasvinder Singh
2017-03-29 17:15                         ` [PATCH v7 " Jasvinder Singh
2017-03-29 12:42                     ` [PATCH v6 " Jasvinder Singh
2017-03-29 16:12                       ` De Lara Guarch, Pablo
2017-03-21 14:45                 ` [PATCH v5 " Jasvinder Singh
2017-03-28 19:23                   ` De Lara Guarch, Pablo
2017-03-28 19:27                     ` Singh, Jasvinder
2017-03-20 19:29             ` [PATCH v4 2/2] app/test: " Jasvinder Singh
2017-03-21  7:14               ` Peng, Yuan
2017-03-12 21:33         ` [PATCH v3 " Jasvinder Singh
2017-02-28 12:08     ` [PATCH v2 " Jasvinder Singh
2017-02-24 20:54 ` [PATCH " Jasvinder Singh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1490807704-211859-2-git-send-email-jasvinder.singh@intel.com \
    --to=jasvinder.singh@intel.com \
    --cc=declan.doherty@intel.com \
    --cc=dev@dpdk.org \
    --cc=olivier.matz@6wind.com \
    --cc=pablo.de.lara.guarch@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.