All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jasvinder Singh <jasvinder.singh@intel.com>
To: dev@dpdk.org
Cc: declan.doherty@intel.com, pablo.de.lara.guarch@intel.com
Subject: [PATCH v3 1/2] librte_net: add crc compute APIs
Date: Sun, 12 Mar 2017 21:33:32 +0000	[thread overview]
Message-ID: <1489354413-137376-2-git-send-email-jasvinder.singh@intel.com> (raw)
In-Reply-To: <1489354413-137376-1-git-send-email-jasvinder.singh@intel.com>

APIs for selecting the architecure specific implementation and computing
the crc (16-bit and 32-bit CRCs) are added. For CRCs calculation, scalar
as well as x86 intrinsic(sse4.2) versions are implemented.

The scalar version is based on generic Look-Up Table(LUT) algorithm,
while x86 intrinsic version uses carry-less multiplication for
fast CRC computation.

Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
 lib/librte_net/Makefile            |   3 +
 lib/librte_net/rte_net_crc.c       | 205 ++++++++++++++++++++++
 lib/librte_net/rte_net_crc.h       | 104 +++++++++++
 lib/librte_net/rte_net_crc_sse.h   | 351 +++++++++++++++++++++++++++++++++++++
 lib/librte_net/rte_net_version.map |   8 +
 5 files changed, 671 insertions(+)
 create mode 100644 lib/librte_net/rte_net_crc.c
 create mode 100644 lib/librte_net/rte_net_crc.h
 create mode 100644 lib/librte_net/rte_net_crc_sse.h

diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index 20cf664..39ff1cc 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -39,11 +39,14 @@ EXPORT_MAP := rte_net_version.map
 LIBABIVER := 1
 
 SRCS-$(CONFIG_RTE_LIBRTE_NET) := rte_net.c
+SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_net_crc.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc_sse.h
 
 DEPDIRS-$(CONFIG_RTE_LIBRTE_NET) += lib/librte_eal lib/librte_mbuf
 
diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
new file mode 100644
index 0000000..c460ab0
--- /dev/null
+++ b/lib/librte_net/rte_net_crc.c
@@ -0,0 +1,205 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_net_crc.h>
+#include <stddef.h>
+
+/** crc tables */
+static uint32_t crc32_eth_lut[256];
+static uint32_t crc16_ccitt_lut[256];
+
+static uint32_t
+rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len);
+
+static uint32_t
+rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len);
+
+typedef uint32_t
+(*rte_net_crc_handler)(const uint8_t *data, uint32_t data_len);
+
+static rte_net_crc_handler *handlers;
+
+static rte_net_crc_handler handlers_scalar[] = {
+	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_handler,
+	[RTE_NET_CRC32_ETH] = rte_crc32_eth_handler,
+};
+
+#if defined(RTE_ARCH_X86_64) || defined(RTE_CPU_FALGS_SSE_4_2)
+static rte_net_crc_handler handlers_sse42[] = {
+	[RTE_NET_CRC16_CCITT] = rte_crc16_ccitt_sse42_handler,
+	[RTE_NET_CRC32_ETH] = rte_crc32_eth_sse42_handler,
+};
+#endif
+
+/**
+ * Reflect the bits about the middle
+ *
+ * @param x value to be reflected
+ *
+ * @return reflected value
+ */
+static uint32_t
+reflect_32bits(const uint32_t val)
+{
+	uint32_t i, res = 0;
+
+	for (i = 0; i < 32; i++)
+		if ((val & (1 << i)) != 0)
+			res |= (uint32_t)(1 << (31 - i));
+
+	return res;
+}
+
+static void
+crc32_eth_init_lut(const uint32_t poly,
+	uint32_t *lut)
+{
+	uint_fast32_t i, j;
+
+	for (i = 0; i < 256; i++) {
+		uint_fast32_t crc = reflect_32bits(i);
+
+		for (j = 0; j < 8; j++) {
+			if (crc & 0x80000000L)
+				crc = (crc << 1) ^ poly;
+			else
+				crc <<= 1;
+		}
+	lut[i] = reflect_32bits(crc);
+	}
+}
+
+static inline __attribute__((always_inline)) uint32_t
+crc32_eth_calc_lut(const uint8_t *data,
+	uint32_t data_len,
+	uint32_t crc,
+	const uint32_t *lut)
+{
+	if (unlikely(data == NULL || lut == NULL))
+		return crc;
+
+	while (data_len--)
+		crc = lut[(crc ^ *data++) & 0xffL] ^ (crc >> 8);
+
+	return crc;
+}
+
+static void
+rte_net_crc_scalar_init(void)
+{
+	/** 32-bit crc init */
+	crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
+
+	/** 16-bit CRC init */
+	crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
+
+}
+
+static inline uint32_t
+rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
+{
+	return (uint16_t)~crc32_eth_calc_lut(data,
+		data_len,
+		0xffff,
+		crc16_ccitt_lut);
+}
+
+static inline uint32_t
+rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
+{
+	return ~crc32_eth_calc_lut(data,
+		data_len,
+		0xffffffffUL,
+		crc32_eth_lut);
+}
+
+void
+rte_net_crc_set_alg(enum rte_net_crc_alg alg)
+{
+	switch (alg) {
+
+	case RTE_NET_CRC_SSE42:
+#ifdef RTE_ARCH_X86_64
+		if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2))
+			alg = RTE_NET_CRC_SCALAR;
+		else {
+			handlers = handlers_sse42;
+			break;
+		}
+#endif
+	case RTE_NET_CRC_SCALAR:
+	default:
+		handlers = handlers_scalar;
+		break;
+	}
+}
+
+uint32_t
+rte_net_crc_calc(struct rte_mbuf *mbuf,
+	uint32_t data_offset,
+	uint32_t data_len,
+	enum rte_net_crc_type type)
+{
+	uint32_t ret;
+	rte_net_crc_handler f_handle;
+
+	const uint8_t *data =
+		(const uint8_t *) rte_pktmbuf_mtod_offset(mbuf,
+			uint8_t *,
+			data_offset);
+
+	f_handle = handlers[type];
+	ret = f_handle(data, data_len);
+
+	return ret;
+}
+
+/*
+ * Select highest available crc algorithm as default one.
+ */
+static inline void __attribute__((constructor))
+rte_net_crc_init(void)
+{
+	enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR;
+
+	rte_net_crc_scalar_init();
+
+#ifdef RTE_ARCH_X86_64
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) {
+		alg = RTE_NET_CRC_SSE42;
+		rte_net_crc_sse42_init();
+	}
+#endif
+
+	rte_net_crc_set_alg(alg);
+}
diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
new file mode 100644
index 0000000..26b6406
--- /dev/null
+++ b/lib/librte_net/rte_net_crc.h
@@ -0,0 +1,104 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_NET_CRC_H_
+#define _RTE_NET_CRC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+
+/** CRC polynomials */
+#define CRC32_ETH_POLYNOMIAL 0x04c11db7UL
+#define CRC16_CCITT_POLYNOMIAL 0x1021U
+
+/** CRC types */
+enum rte_net_crc_type {
+	RTE_NET_CRC16_CCITT = 0,
+	RTE_NET_CRC32_ETH,
+	RTE_NET_CRC_REQS
+};
+
+/** CRC compute algorithm */
+enum rte_net_crc_alg {
+	RTE_NET_CRC_SCALAR = 0,
+	RTE_NET_CRC_SSE42,
+};
+
+/**
+ * CRC Initialisation API
+ *
+ *  This API set the crc computation algorithm and internal crc
+ *  data structue.
+ *
+ * @param crc_algortihm
+ *   crc compute algorithm
+ */
+void
+rte_net_crc_set_alg(enum rte_net_crc_alg alg);
+
+/**
+ * CRC compute API
+ *
+ * @param mbuf
+ *  Pointer to the packet mbuf for crc computation
+ * @param data_offset
+ *  Offset to the data in the mbuf
+ * @param data_len
+ *  length of the data to compute the crc on
+ * @param type
+ *  crc type
+ *
+ * @return
+ *  computed crc value
+ */
+uint32_t
+rte_net_crc_calc(struct rte_mbuf *mbuf,
+	uint32_t data_offset,
+	uint32_t data_len,
+	enum rte_net_crc_type type);
+
+#if defined(RTE_ARCH_X86_64) || defined(RTE_CPU_FALGS_SSE_4_2)
+#include <rte_net_crc_sse.h>
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _RTE_NET_CRC_H_ */
diff --git a/lib/librte_net/rte_net_crc_sse.h b/lib/librte_net/rte_net_crc_sse.h
new file mode 100644
index 0000000..332b95b
--- /dev/null
+++ b/lib/librte_net/rte_net_crc_sse.h
@@ -0,0 +1,351 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_NET_CRC_SSE_H_
+#define _RTE_NET_CRC_SSE_H_
+
+#include <cpuid.h>
+#include <rte_net_crc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** PCLMULQDQ CRC computation context structure */
+struct crc_pclmulqdq_ctx {
+	__m128i rk1_rk2;
+	__m128i rk5_rk6;
+	__m128i rk7_rk8;
+};
+
+struct crc_pclmulqdq_ctx crc32_eth_pclmulqdq __rte_aligned(16);
+struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq __rte_aligned(16);
+/**
+ * @brief Performs one folding round
+ *
+ * Logically function operates as follows:
+ *     DATA = READ_NEXT_16BYTES();
+ *     F1 = LSB8(FOLD)
+ *     F2 = MSB8(FOLD)
+ *     T1 = CLMUL(F1, RK1)
+ *     T2 = CLMUL(F2, RK2)
+ *     FOLD = XOR(T1, T2, DATA)
+ *
+ * @param data_block 16 byte data block
+ * @param precomp precomputed rk1 constanst
+ * @param fold running 16 byte folded data
+ *
+ * @return New 16 byte folded data
+ */
+static inline __attribute__((always_inline)) __m128i
+crcr32_folding_round(const __m128i data_block,
+		const __m128i precomp,
+		const __m128i fold)
+{
+	__m128i tmp0 = _mm_clmulepi64_si128(fold, precomp, 0x01);
+	__m128i tmp1 = _mm_clmulepi64_si128(fold, precomp, 0x10);
+
+	return _mm_xor_si128(tmp1, _mm_xor_si128(data_block, tmp0));
+}
+
+/**
+ * Performs reduction from 128 bits to 64 bits
+ *
+ * @param data128 128 bits data to be reduced
+ * @param precomp rk5 and rk6 precomputed constants
+ *
+ * @return data reduced to 64 bits
+ */
+
+static inline __attribute__((always_inline)) __m128i
+crcr32_reduce_128_to_64(__m128i data128,
+	const __m128i precomp)
+{
+	__m128i tmp0, tmp1, tmp2;
+
+	/* 64b fold */
+	tmp0 = _mm_clmulepi64_si128(data128, precomp, 0x00);
+	tmp1 = _mm_srli_si128(data128, 8);
+	tmp0 = _mm_xor_si128(tmp0, tmp1);
+
+	/* 32b fold */
+	tmp2 = _mm_slli_si128(tmp0, 4);
+	tmp1 = _mm_clmulepi64_si128(tmp2, precomp, 0x10);
+
+	return _mm_xor_si128(tmp1, tmp0);
+}
+
+/**
+ * Performs Barret's reduction from 64 bits to 32 bits
+ *
+ * @param data64 64 bits data to be reduced
+ * @param precomp rk7 precomputed constant
+ *
+ * @return data reduced to 32 bits
+ */
+
+static inline __attribute__((always_inline)) uint32_t
+crcr32_reduce_64_to_32(__m128i data64,
+	const __m128i precomp)
+{
+	static const uint32_t mask1[4] __rte_aligned(16) = {
+		0xffffffff, 0xffffffff, 0x00000000, 0x00000000
+	};
+
+	static const uint32_t mask2[4] __rte_aligned(16) = {
+		0x00000000, 0xffffffff, 0xffffffff, 0xffffffff
+	};
+	__m128i tmp0, tmp1, tmp2;
+
+	tmp0 = _mm_and_si128(data64, _mm_load_si128((const __m128i *)mask2));
+
+	tmp1 = _mm_clmulepi64_si128(tmp0, precomp, 0x00);
+	tmp1 = _mm_xor_si128(tmp1, tmp0);
+	tmp1 = _mm_and_si128(tmp1, _mm_load_si128((const __m128i *)mask1));
+
+	tmp2 = _mm_clmulepi64_si128(tmp1, precomp, 0x10);
+	tmp2 = _mm_xor_si128(tmp2, tmp1);
+	tmp2 = _mm_xor_si128(tmp2, tmp0);
+
+	return _mm_extract_epi32(tmp2, 2);
+}
+
+static const uint8_t crc_xmm_shift_tab[48] __rte_aligned(16) = {
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+/**
+ * Shifts left 128 bit register by specified number of bytes
+ *
+ * @param reg 128 bit value
+ * @param num number of bytes to shift left reg by (0-16)
+ *
+ * @return reg << (num * 8)
+ */
+
+static inline __attribute__((always_inline)) __m128i
+xmm_shift_left(__m128i reg, const unsigned int num)
+{
+	const __m128i *p = (const __m128i *)(crc_xmm_shift_tab + 16 - num);
+
+	return _mm_shuffle_epi8(reg, _mm_loadu_si128(p));
+}
+
+static inline __attribute__((always_inline)) uint32_t
+crc32_eth_calc_pclmulqdq(
+	const uint8_t *data,
+	uint32_t data_len,
+	uint32_t crc,
+	const struct crc_pclmulqdq_ctx *params)
+{
+	__m128i temp, fold, k;
+	uint32_t n;
+
+	if (unlikely(data == NULL))
+		return crc;
+
+	if (unlikely(data_len == 0))
+		return crc;
+
+	if (unlikely(params == NULL))
+		return crc;
+
+	/* Get CRC init value */
+	temp = _mm_insert_epi32(_mm_setzero_si128(), crc, 0);
+
+	/**
+	 * Folding all data into single 16 byte data block
+	 * Assumes: fold holds first 16 bytes of data
+	 */
+
+	if (unlikely(data_len < 32)) {
+		if (unlikely(data_len == 16)) {
+			/* 16 bytes */
+			fold = _mm_loadu_si128((const __m128i *)data);
+			fold = _mm_xor_si128(fold, temp);
+			goto reduction_128_64;
+		}
+
+		if (unlikely(data_len < 16)) {
+			/* 0 to 15 bytes */
+			uint8_t buffer[16] __rte_aligned(16);
+
+			memset(buffer, 0, sizeof(buffer));
+			memcpy(buffer, data, data_len);
+
+			fold = _mm_load_si128((const __m128i *)buffer);
+			fold = _mm_xor_si128(fold, temp);
+			if (unlikely(data_len < 4)) {
+				fold = xmm_shift_left(fold, 8 - data_len);
+				goto barret_reduction;
+			}
+			fold = xmm_shift_left(fold, 16 - data_len);
+			goto reduction_128_64;
+		}
+		/* 17 to 31 bytes */
+		fold = _mm_loadu_si128((const __m128i *)data);
+		fold = _mm_xor_si128(fold, temp);
+		n = 16;
+		k = params->rk1_rk2;
+		goto partial_bytes;
+	}
+
+	/** At least 32 bytes in the buffer */
+	/** Apply CRC initial value */
+	fold = _mm_loadu_si128((const __m128i *)data);
+	fold = _mm_xor_si128(fold, temp);
+
+	/** Main folding loop - the last 16 bytes is processed separately */
+	k = params->rk1_rk2;
+	for (n = 16; (n + 16) <= data_len; n += 16) {
+		temp = _mm_loadu_si128((const __m128i *)&data[n]);
+		fold = crcr32_folding_round(temp, k, fold);
+	}
+
+partial_bytes:
+	if (likely(n < data_len)) {
+
+		const uint32_t mask3[4] __rte_aligned(16) = {
+			0x80808080, 0x80808080, 0x80808080, 0x80808080
+		};
+
+		const uint8_t shf_table[32] __rte_aligned(16) = {
+			0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+			0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+			0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+			0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+		};
+
+		__m128i last16, a, b;
+
+		last16 = _mm_loadu_si128((const __m128i *)&data[data_len - 16]);
+
+		temp = _mm_loadu_si128((const __m128i *)
+			&shf_table[data_len & 15]);
+		a = _mm_shuffle_epi8(fold, temp);
+
+		temp = _mm_xor_si128(temp,
+			_mm_load_si128((const __m128i *)mask3));
+		b = _mm_shuffle_epi8(fold, temp);
+		b = _mm_blendv_epi8(b, last16, temp);
+
+		/* k = rk1 & rk2 */
+		temp = _mm_clmulepi64_si128(a, k, 0x01);
+		fold = _mm_clmulepi64_si128(a, k, 0x10);
+
+		fold = _mm_xor_si128(fold, temp);
+		fold = _mm_xor_si128(fold, b);
+	}
+
+	/** Reduction 128 -> 32 Assumes: fold holds 128bit folded data */
+reduction_128_64:
+	k = params->rk5_rk6;
+	fold = crcr32_reduce_128_to_64(fold, k);
+
+barret_reduction:
+	k = params->rk7_rk8;
+	n = crcr32_reduce_64_to_32(fold, k);
+
+	return n;
+}
+
+
+static inline void
+rte_net_crc_sse42_init(void)
+{
+	uint64_t k1, k2, k5, k6;
+	uint64_t p = 0, q = 0;
+
+	/** Initialize CRC16 data */
+	k1 = 0x189aeLLU;
+	k2 = 0x8e10LLU;
+	k5 = 0x189aeLLU;
+	k6 = 0x114aaLLU;
+	q =  0x11c581910LLU;
+	p =  0x10811LLU;
+
+	/** Save the params in context structure */
+	crc16_ccitt_pclmulqdq.rk1_rk2 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k1),	_mm_cvtsi64_m64(k2));
+	crc16_ccitt_pclmulqdq.rk5_rk6 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k5),	_mm_cvtsi64_m64(k6));
+	crc16_ccitt_pclmulqdq.rk7_rk8 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
+
+	/** Initialize CRC32 data */
+	k1 = 0xccaa009eLLU;
+	k2 = 0x1751997d0LLU;
+	k5 = 0xccaa009eLLU;
+	k6 = 0x163cd6124LLU;
+	q =  0x1f7011640LLU;
+	p =  0x1db710641LLU;
+
+	/** Save the params in context structure */
+	crc32_eth_pclmulqdq.rk1_rk2 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k1),	_mm_cvtsi64_m64(k2));
+	crc32_eth_pclmulqdq.rk5_rk6 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(k5), _mm_cvtsi64_m64(k6));
+	crc32_eth_pclmulqdq.rk7_rk8 =
+		_mm_setr_epi64(_mm_cvtsi64_m64(q), _mm_cvtsi64_m64(p));
+
+	_mm_empty();
+
+}
+
+static inline uint32_t
+rte_crc16_ccitt_sse42_handler(const uint8_t *data, uint32_t data_len)
+{
+	return (uint16_t)~crc32_eth_calc_pclmulqdq(data,
+		data_len,
+		0xffff,
+		&crc16_ccitt_pclmulqdq);
+}
+
+static inline uint32_t
+rte_crc32_eth_sse42_handler(const uint8_t *data, uint32_t data_len)
+{
+	return ~crc32_eth_calc_pclmulqdq(data,
+		data_len,
+		0xffffffffUL,
+		&crc32_eth_pclmulqdq);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_NET_CRC_SSE_H_ */
diff --git a/lib/librte_net/rte_net_version.map b/lib/librte_net/rte_net_version.map
index 3b15e65..c6716ec 100644
--- a/lib/librte_net/rte_net_version.map
+++ b/lib/librte_net/rte_net_version.map
@@ -4,3 +4,11 @@ DPDK_16.11 {
 
 	local: *;
 };
+
+DPDK_17.05 {
+	global:
+
+	rte_net_crc_set_alg;
+	rte_net_crc_calc;
+
+} DPDK_16.11;
-- 
2.5.5

  reply	other threads:[~2017-03-12 21:23 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-24 20:54 [PATCH 0/2] librte_net: add crc computation support Jasvinder Singh
2017-02-24 20:54 ` [PATCH 1/2] librte_net: add crc init and compute APIs Jasvinder Singh
2017-02-28 12:08   ` [PATCH v2 0/2] librte_net: add crc computation support Jasvinder Singh
2017-02-28 12:08     ` [PATCH v2 1/2] librte_net: add crc init and compute APIs Jasvinder Singh
2017-02-28 12:15       ` Jerin Jacob
2017-03-01 18:46       ` Thomas Monjalon
2017-03-02 13:03         ` Singh, Jasvinder
2017-03-06 15:27           ` Thomas Monjalon
2017-03-08 11:08             ` De Lara Guarch, Pablo
2017-03-15 17:35               ` Thomas Monjalon
2017-03-15 19:03                 ` Dumitrescu, Cristian
2017-03-15 20:15                   ` Thomas Monjalon
2017-03-15 21:11                     ` Dumitrescu, Cristian
2017-03-15 19:09                 ` Dumitrescu, Cristian
2017-03-12 21:33       ` [PATCH v3 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-12 21:33         ` Jasvinder Singh [this message]
2017-03-13  3:06           ` [PATCH v3 1/2] librte_net: add crc compute APIs Ananyev, Konstantin
2017-03-13  9:05             ` Singh, Jasvinder
2017-03-20 19:29           ` [PATCH v4 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-20 19:29             ` [PATCH v4 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-03-21 14:45               ` [PATCH v5 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-21 14:45                 ` [PATCH v5 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-03-28 18:04                   ` De Lara Guarch, Pablo
2017-03-28 18:07                     ` De Lara Guarch, Pablo
2017-03-28 19:21                     ` Singh, Jasvinder
2017-03-29 12:42                   ` [PATCH v6 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-29 12:42                     ` [PATCH v6 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-03-29 16:14                       ` De Lara Guarch, Pablo
2017-03-29 17:15                       ` [PATCH v7 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-29 17:15                         ` [PATCH v7 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-03-30 11:30                           ` [PATCH v8 0/2] librte_net: add crc computation support Jasvinder Singh
2017-03-30 11:30                             ` [PATCH v8 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-03-30 11:31                               ` Ananyev, Konstantin
2017-03-30 12:06                                 ` Singh, Jasvinder
2017-03-30 14:40                                 ` Olivier Matz
2017-03-30 15:14                                   ` Singh, Jasvinder
2017-03-30 16:15                               ` [PATCH v9 0/3] librte_net: add crc computation support Jasvinder Singh
2017-03-30 16:15                                 ` [PATCH v9 1/3] librte_net: add crc compute APIs Jasvinder Singh
2017-04-04 20:00                                   ` Thomas Monjalon
2017-04-05 14:58                                   ` [PATCH v10 0/2] librte_net: add crc computation support Jasvinder Singh
2017-04-05 14:58                                     ` [PATCH v10 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-04-05 17:49                                       ` Thomas Monjalon
2017-04-05 19:22                                         ` Singh, Jasvinder
2017-04-05 20:49                                       ` [PATCH v11 0/2] librte_net: add crc computation support Jasvinder Singh
2017-04-05 20:49                                         ` [PATCH v11 1/2] librte_net: add crc compute APIs Jasvinder Singh
2017-04-05 20:49                                         ` [PATCH v11 2/2] test/test: add unit test for CRC computation Jasvinder Singh
2017-04-05 20:59                                           ` Thomas Monjalon
2017-04-05 21:00                                         ` [PATCH v11 0/2] librte_net: add crc computation support Thomas Monjalon
2017-04-05 14:58                                     ` [PATCH v10 2/2] test/test: add unit test for CRC computation Jasvinder Singh
2017-03-30 16:15                                 ` [PATCH v9 2/3] " Jasvinder Singh
2017-03-30 16:15                                 ` [PATCH v9 3/3] maintainers: add packet crc section and claim maintainership Jasvinder Singh
2017-04-04 19:55                                   ` Thomas Monjalon
2017-04-04 20:02                                 ` [PATCH v9 0/3] librte_net: add crc computation support Thomas Monjalon
2017-04-05  8:34                                   ` Singh, Jasvinder
2017-04-05  9:01                                     ` Thomas Monjalon
2017-04-05  9:37                                       ` Richardson, Bruce
2017-04-05 12:52                                         ` Singh, Jasvinder
2017-03-30 11:30                             ` [PATCH v8 2/2] test/test: add unit test for CRC computation Jasvinder Singh
2017-03-29 17:15                         ` [PATCH v7 " Jasvinder Singh
2017-03-29 12:42                     ` [PATCH v6 " Jasvinder Singh
2017-03-29 16:12                       ` De Lara Guarch, Pablo
2017-03-21 14:45                 ` [PATCH v5 " Jasvinder Singh
2017-03-28 19:23                   ` De Lara Guarch, Pablo
2017-03-28 19:27                     ` Singh, Jasvinder
2017-03-20 19:29             ` [PATCH v4 2/2] app/test: " Jasvinder Singh
2017-03-21  7:14               ` Peng, Yuan
2017-03-12 21:33         ` [PATCH v3 " Jasvinder Singh
2017-02-28 12:08     ` [PATCH v2 " Jasvinder Singh
2017-02-24 20:54 ` [PATCH " Jasvinder Singh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1489354413-137376-2-git-send-email-jasvinder.singh@intel.com \
    --to=jasvinder.singh@intel.com \
    --cc=declan.doherty@intel.com \
    --cc=dev@dpdk.org \
    --cc=pablo.de.lara.guarch@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.