From mboxrd@z Thu Jan  1 00:00:00 1970
From: "De Lara Guarch, Pablo" <pablo.de.lara.guarch@intel.com>
Subject: Re: [PATCH v5 1/2] librte_net: add crc compute APIs
Date: Tue, 28 Mar 2017 18:04:16 +0000
Message-ID: <E115CCD9D858EF4F90C690B0DCB4D8974770CFBE@IRSMSX108.ger.corp.intel.com>
References: <1490038180-207288-2-git-send-email-jasvinder.singh@intel.com>
 <1490107540-66614-1-git-send-email-jasvinder.singh@intel.com>
 <1490107540-66614-2-git-send-email-jasvinder.singh@intel.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: quoted-printable
Cc: "olivier.matz@6wind.com" <olivier.matz@6wind.com>, "Doherty, Declan"
 <declan.doherty@intel.com>
To: "Singh, Jasvinder" <jasvinder.singh@intel.com>, "dev@dpdk.org"
 <dev@dpdk.org>
Return-path: <dev-bounces@dpdk.org>
Received: from mga03.intel.com (mga03.intel.com [134.134.136.65])
 by dpdk.org (Postfix) with ESMTP id B5FB2201
 for <dev@dpdk.org>; Tue, 28 Mar 2017 20:04:20 +0200 (CEST)
In-Reply-To: <1490107540-66614-2-git-send-email-jasvinder.singh@intel.com>
Content-Language: en-US
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org
Sender: "dev" <dev-bounces@dpdk.org>

Hi Jasvinder,

> -----Original Message-----
> From: Singh, Jasvinder
> Sent: Tuesday, March 21, 2017 2:46 PM
> To: dev@dpdk.org
> Cc: olivier.matz@6wind.com; Doherty, Declan; De Lara Guarch, Pablo
> Subject: [PATCH v5 1/2] librte_net: add crc compute APIs
>=20
> APIs for selecting the architecure specific implementation and computing
> the crc (16-bit and 32-bit CRCs) are added. For CRCs calculation, scalar
> as well as x86 intrinsic(sse4.2) versions are implemented.
>=20
> The scalar version is based on generic Look-Up Table(LUT) algorithm,
> while x86 intrinsic version uses carry-less multiplication for
> fast CRC computation.
>=20
> Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>
> ---

> diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
> new file mode 100644
> index 0000000..89edd80
> --- /dev/null
> +++ b/lib/librte_net/rte_net_crc.c

...

> +
> +#include <rte_net_crc.h>
> +#include <stddef.h>
> +
> +/** crc tables */
> +static uint32_t crc32_eth_lut[256];
> +static uint32_t crc16_ccitt_lut[256];

Use a macro for 256, that you can use in crc32_eth_init_lut.=20
> +
> +static uint32_t rte_crc16_ccitt_handler(const uint8_t *data,
> +	uint32_t data_len);

Separate "static uint32_t" in another line.


> +/**
> + * Reflect the bits about the middle
> + *
> + * @param x value to be reflected

Should be "val".

> + *
> + * @return reflected value
> + */
> +static uint32_t
> +reflect_32bits(const uint32_t val)

No need for "const" here, as it is not a pointer.

> +{
> +	uint32_t i, res =3D 0;
> +
> +	for (i =3D 0; i < 32; i++)
> +		if ((val & (1 << i)) !=3D 0)
> +			res |=3D (uint32_t)(1 << (31 - i));
> +
> +	return res;
> +}
> +
> +static void
> +crc32_eth_init_lut(const uint32_t poly,

No need for "const" here.

> +	uint32_t *lut)
> +{
> +	uint_fast32_t i, j;
> +
> +	for (i =3D 0; i < 256; i++) {
> +		uint_fast32_t crc =3D reflect_32bits(i);
> +
> +		for (j =3D 0; j < 8; j++) {
> +			if (crc & 0x80000000L)
> +				crc =3D (crc << 1) ^ poly;
> +			else
> +				crc <<=3D 1;
> +		}
> +	lut[i] =3D reflect_32bits(crc);

Wrong indentation.

> +	}
> +}
> +
> +static inline __attribute__((always_inline)) uint32_t
> +crc32_eth_calc_lut(const uint8_t *data,
> +	uint32_t data_len,
> +	uint32_t crc,
> +	const uint32_t *lut)
> +{
> +	while (data_len--)
> +		crc =3D lut[(crc ^ *data++) & 0xffL] ^ (crc >> 8);
> +
> +	return crc;
> +}
> +
> +static void
> +rte_net_crc_scalar_init(void)
> +{
> +	/** 32-bit crc init */
> +	crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
> +
> +	/** 16-bit CRC init */
> +	crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16,
> crc16_ccitt_lut);
> +

Remove this blank line.

> +}
> +
> +static inline uint32_t
> +rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
> +{
> +	return (uint16_t)~crc32_eth_calc_lut(data,
> +		data_len,
> +		0xffff,
> +		crc16_ccitt_lut);

Since you are casting to uint16_t, when you are supposed to cast to uint32_=
t
(given the return type), I would add a comment explaining why.

> +}
> +


> diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
> new file mode 100644
> index 0000000..f8c9075
> --- /dev/null
> +++ b/lib/librte_net/rte_net_crc.h
> @@ -0,0 +1,101 @@

...

> +
> +/**
> + *  This API set the crc computation algorithm (i.e. scalar version,
> + *  x86 64-bit sse4.2 intrinsic version, etc.) and internal data
> + *  structure.
> + *
> + * @param alg

Add extra information (CRC algorithm?).

> + *   - RTE_NET_CRC_SCALAR
> + *   - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic)
> + */
> +void
> +rte_net_crc_set_alg(enum rte_net_crc_alg alg);
> +
> +/**
> + * CRC compute API
> + *
> + * @param data
> + *  Pointer to the packet data for crc computation
> + * @param data_len
> + *  Data length for crc computation
> + * @param type
> + *  Crc type (enum rte_net_crc_type)

CRC

> + *
> + * @return
> + *  crc value

Add two spaces after "@param" and "@return".

> + */
> +uint32_t
> +rte_net_crc_calc(const void *data,
> +	uint32_t data_len,
> +	enum rte_net_crc_type type);
> +
> +#if defined(RTE_ARCH_X86_64) || defined(RTE_CPU_FALGS_SSE_4_2)

Typo in RTE_CPU_FALGS_SSE_4_2 (I missed the same one in rte_net_crc.c ).
Also, should it be "&&"?


> +#include <rte_net_crc_sse.h>
> +#endif
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +
> +#endif /* _RTE_NET_CRC_H_ */
> diff --git a/lib/librte_net/rte_net_crc_sse.h
> b/lib/librte_net/rte_net_crc_sse.h
> new file mode 100644
> index 0000000..e9af22d
> --- /dev/null
> +++ b/lib/librte_net/rte_net_crc_sse.h
> @@ -0,0 +1,345 @@

...

> + * @brief Performs one folding round
> + *
> + * Logically function operates as follows:
> + *     DATA =3D READ_NEXT_16BYTES();
> + *     F1 =3D LSB8(FOLD)
> + *     F2 =3D MSB8(FOLD)
> + *     T1 =3D CLMUL(F1, RK1)
> + *     T2 =3D CLMUL(F2, RK2)
> + *     FOLD =3D XOR(T1, T2, DATA)
> + *
> + * @param data_block 16 byte data block
> + * @param precomp precomputed rk1 constanst
> + * @param fold running 16 byte folded data
> + *
> + * @return New 16 byte folded data

Move parameter/rturn description in a separate line (same for other functio=
ns).

> + */
> +static inline __attribute__((always_inline)) __m128i
> +crcr32_folding_round(const __m128i data_block,
> +		const __m128i precomp,
> +		const __m128i fold)

No need to use "const" here.

> +{
> +	__m128i tmp0 =3D _mm_clmulepi64_si128(fold, precomp, 0x01);
> +	__m128i tmp1 =3D _mm_clmulepi64_si128(fold, precomp, 0x10);
> +
> +	return _mm_xor_si128(tmp1, _mm_xor_si128(data_block, tmp0));
> +}
> +
> +/**
> + * Performs reduction from 128 bits to 64 bits
> + *
> + * @param data128 128 bits data to be reduced
> + * @param precomp rk5 and rk6 precomputed constants
> + *
> + * @return data reduced to 64 bits
> + */
> +
> +static inline __attribute__((always_inline)) __m128i
> +crcr32_reduce_128_to_64(__m128i data128,
> +	const __m128i precomp)

No need to use "const" here.

...

> +
> +
> +static inline void
> +rte_net_crc_sse42_init(void)
> +{
> +	uint64_t k1, k2, k5, k6;
> +	uint64_t p =3D 0, q =3D 0;
> +
> +	/** Initialize CRC16 data */
> +	k1 =3D 0x189aeLLU;
> +	k2 =3D 0x8e10LLU;
> +	k5 =3D 0x189aeLLU;
> +	k6 =3D 0x114aaLLU;
> +	q =3D  0x11c581910LLU;
> +	p =3D  0x10811LLU;
> +
> +	/** Save the params in context structure */
> +	crc16_ccitt_pclmulqdq.rk1_rk2 =3D
> +		_mm_setr_epi64(_mm_cvtsi64_m64(k1),
> 	_mm_cvtsi64_m64(k2));
> +	crc16_ccitt_pclmulqdq.rk5_rk6 =3D
> +		_mm_setr_epi64(_mm_cvtsi64_m64(k5),
> 	_mm_cvtsi64_m64(k6));
> +	crc16_ccitt_pclmulqdq.rk7_rk8 =3D
> +		_mm_setr_epi64(_mm_cvtsi64_m64(q),
> _mm_cvtsi64_m64(p));
> +
> +	/** Initialize CRC32 data */
> +	k1 =3D 0xccaa009eLLU;
> +	k2 =3D 0x1751997d0LLU;
> +	k5 =3D 0xccaa009eLLU;
> +	k6 =3D 0x163cd6124LLU;
> +	q =3D  0x1f7011640LLU;
> +	p =3D  0x1db710641LLU;
> +
> +	/** Save the params in context structure */
> +	crc32_eth_pclmulqdq.rk1_rk2 =3D
> +		_mm_setr_epi64(_mm_cvtsi64_m64(k1),
> 	_mm_cvtsi64_m64(k2));

Add extra tab for better readability.

> +	crc32_eth_pclmulqdq.rk5_rk6 =3D
> +		_mm_setr_epi64(_mm_cvtsi64_m64(k5),
> _mm_cvtsi64_m64(k6));
> +	crc32_eth_pclmulqdq.rk7_rk8 =3D
> +		_mm_setr_epi64(_mm_cvtsi64_m64(q),
> _mm_cvtsi64_m64(p));
> +
> +	_mm_empty();

Maybe we need a comment here.

> +
> +}
> +
> +static inline uint32_t
> +rte_crc16_ccitt_sse42_handler(const uint8_t *data,
> +	uint32_t data_len)
> +{
> +	return (uint16_t)~crc32_eth_calc_pclmulqdq(data,
> +		data_len,
> +		0xffff,
> +		&crc16_ccitt_pclmulqdq);

Same comment about the casting here.

> +}
> +
> +static inline uint32_t
> +rte_crc32_eth_sse42_handler(const uint8_t *data,
> +	uint32_t data_len)
> +{
> +	return ~crc32_eth_calc_pclmulqdq(data,
> +		data_len,
> +		0xffffffffUL,
> +		&crc32_eth_pclmulqdq);
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_NET_CRC_SSE_H_ */
> diff --git a/lib/librte_net/rte_net_version.map
> b/lib/librte_net/rte_net_version.map
> index 3b15e65..c6716ec 100644
> --- a/lib/librte_net/rte_net_version.map
> +++ b/lib/librte_net/rte_net_version.map
> @@ -4,3 +4,11 @@ DPDK_16.11 {
>=20
>  	local: *;
>  };
> +
> +DPDK_17.05 {
> +	global:
> +
> +	rte_net_crc_set_alg;
> +	rte_net_crc_calc;

This has to be alphabetically sorted.

> +
> +} DPDK_16.11;
> --
> 2.5.5