From mboxrd@z Thu Jan 1 00:00:00 1970 From: "De Lara Guarch, Pablo" Subject: Re: [PATCH v5 1/2] librte_net: add crc compute APIs Date: Tue, 28 Mar 2017 18:04:16 +0000 Message-ID: References: <1490038180-207288-2-git-send-email-jasvinder.singh@intel.com> <1490107540-66614-1-git-send-email-jasvinder.singh@intel.com> <1490107540-66614-2-git-send-email-jasvinder.singh@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: quoted-printable Cc: "olivier.matz@6wind.com" , "Doherty, Declan" To: "Singh, Jasvinder" , "dev@dpdk.org" Return-path: Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id B5FB2201 for ; Tue, 28 Mar 2017 20:04:20 +0200 (CEST) In-Reply-To: <1490107540-66614-2-git-send-email-jasvinder.singh@intel.com> Content-Language: en-US List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Hi Jasvinder, > -----Original Message----- > From: Singh, Jasvinder > Sent: Tuesday, March 21, 2017 2:46 PM > To: dev@dpdk.org > Cc: olivier.matz@6wind.com; Doherty, Declan; De Lara Guarch, Pablo > Subject: [PATCH v5 1/2] librte_net: add crc compute APIs >=20 > APIs for selecting the architecure specific implementation and computing > the crc (16-bit and 32-bit CRCs) are added. For CRCs calculation, scalar > as well as x86 intrinsic(sse4.2) versions are implemented. >=20 > The scalar version is based on generic Look-Up Table(LUT) algorithm, > while x86 intrinsic version uses carry-less multiplication for > fast CRC computation. >=20 > Signed-off-by: Jasvinder Singh > --- > diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c > new file mode 100644 > index 0000000..89edd80 > --- /dev/null > +++ b/lib/librte_net/rte_net_crc.c ... > + > +#include > +#include > + > +/** crc tables */ > +static uint32_t crc32_eth_lut[256]; > +static uint32_t crc16_ccitt_lut[256]; Use a macro for 256, that you can use in crc32_eth_init_lut.=20 > + > +static uint32_t rte_crc16_ccitt_handler(const uint8_t *data, > + uint32_t data_len); Separate "static uint32_t" in another line. > +/** > + * Reflect the bits about the middle > + * > + * @param x value to be reflected Should be "val". > + * > + * @return reflected value > + */ > +static uint32_t > +reflect_32bits(const uint32_t val) No need for "const" here, as it is not a pointer. > +{ > + uint32_t i, res =3D 0; > + > + for (i =3D 0; i < 32; i++) > + if ((val & (1 << i)) !=3D 0) > + res |=3D (uint32_t)(1 << (31 - i)); > + > + return res; > +} > + > +static void > +crc32_eth_init_lut(const uint32_t poly, No need for "const" here. > + uint32_t *lut) > +{ > + uint_fast32_t i, j; > + > + for (i =3D 0; i < 256; i++) { > + uint_fast32_t crc =3D reflect_32bits(i); > + > + for (j =3D 0; j < 8; j++) { > + if (crc & 0x80000000L) > + crc =3D (crc << 1) ^ poly; > + else > + crc <<=3D 1; > + } > + lut[i] =3D reflect_32bits(crc); Wrong indentation. > + } > +} > + > +static inline __attribute__((always_inline)) uint32_t > +crc32_eth_calc_lut(const uint8_t *data, > + uint32_t data_len, > + uint32_t crc, > + const uint32_t *lut) > +{ > + while (data_len--) > + crc =3D lut[(crc ^ *data++) & 0xffL] ^ (crc >> 8); > + > + return crc; > +} > + > +static void > +rte_net_crc_scalar_init(void) > +{ > + /** 32-bit crc init */ > + crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut); > + > + /** 16-bit CRC init */ > + crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, > crc16_ccitt_lut); > + Remove this blank line. > +} > + > +static inline uint32_t > +rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len) > +{ > + return (uint16_t)~crc32_eth_calc_lut(data, > + data_len, > + 0xffff, > + crc16_ccitt_lut); Since you are casting to uint16_t, when you are supposed to cast to uint32_= t (given the return type), I would add a comment explaining why. > +} > + > diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h > new file mode 100644 > index 0000000..f8c9075 > --- /dev/null > +++ b/lib/librte_net/rte_net_crc.h > @@ -0,0 +1,101 @@ ... > + > +/** > + * This API set the crc computation algorithm (i.e. scalar version, > + * x86 64-bit sse4.2 intrinsic version, etc.) and internal data > + * structure. > + * > + * @param alg Add extra information (CRC algorithm?). > + * - RTE_NET_CRC_SCALAR > + * - RTE_NET_CRC_SSE42 (Use 64-bit SSE4.2 intrinsic) > + */ > +void > +rte_net_crc_set_alg(enum rte_net_crc_alg alg); > + > +/** > + * CRC compute API > + * > + * @param data > + * Pointer to the packet data for crc computation > + * @param data_len > + * Data length for crc computation > + * @param type > + * Crc type (enum rte_net_crc_type) CRC > + * > + * @return > + * crc value Add two spaces after "@param" and "@return". > + */ > +uint32_t > +rte_net_crc_calc(const void *data, > + uint32_t data_len, > + enum rte_net_crc_type type); > + > +#if defined(RTE_ARCH_X86_64) || defined(RTE_CPU_FALGS_SSE_4_2) Typo in RTE_CPU_FALGS_SSE_4_2 (I missed the same one in rte_net_crc.c ). Also, should it be "&&"? > +#include > +#endif > + > +#ifdef __cplusplus > +} > +#endif > + > + > +#endif /* _RTE_NET_CRC_H_ */ > diff --git a/lib/librte_net/rte_net_crc_sse.h > b/lib/librte_net/rte_net_crc_sse.h > new file mode 100644 > index 0000000..e9af22d > --- /dev/null > +++ b/lib/librte_net/rte_net_crc_sse.h > @@ -0,0 +1,345 @@ ... > + * @brief Performs one folding round > + * > + * Logically function operates as follows: > + * DATA =3D READ_NEXT_16BYTES(); > + * F1 =3D LSB8(FOLD) > + * F2 =3D MSB8(FOLD) > + * T1 =3D CLMUL(F1, RK1) > + * T2 =3D CLMUL(F2, RK2) > + * FOLD =3D XOR(T1, T2, DATA) > + * > + * @param data_block 16 byte data block > + * @param precomp precomputed rk1 constanst > + * @param fold running 16 byte folded data > + * > + * @return New 16 byte folded data Move parameter/rturn description in a separate line (same for other functio= ns). > + */ > +static inline __attribute__((always_inline)) __m128i > +crcr32_folding_round(const __m128i data_block, > + const __m128i precomp, > + const __m128i fold) No need to use "const" here. > +{ > + __m128i tmp0 =3D _mm_clmulepi64_si128(fold, precomp, 0x01); > + __m128i tmp1 =3D _mm_clmulepi64_si128(fold, precomp, 0x10); > + > + return _mm_xor_si128(tmp1, _mm_xor_si128(data_block, tmp0)); > +} > + > +/** > + * Performs reduction from 128 bits to 64 bits > + * > + * @param data128 128 bits data to be reduced > + * @param precomp rk5 and rk6 precomputed constants > + * > + * @return data reduced to 64 bits > + */ > + > +static inline __attribute__((always_inline)) __m128i > +crcr32_reduce_128_to_64(__m128i data128, > + const __m128i precomp) No need to use "const" here. ... > + > + > +static inline void > +rte_net_crc_sse42_init(void) > +{ > + uint64_t k1, k2, k5, k6; > + uint64_t p =3D 0, q =3D 0; > + > + /** Initialize CRC16 data */ > + k1 =3D 0x189aeLLU; > + k2 =3D 0x8e10LLU; > + k5 =3D 0x189aeLLU; > + k6 =3D 0x114aaLLU; > + q =3D 0x11c581910LLU; > + p =3D 0x10811LLU; > + > + /** Save the params in context structure */ > + crc16_ccitt_pclmulqdq.rk1_rk2 =3D > + _mm_setr_epi64(_mm_cvtsi64_m64(k1), > _mm_cvtsi64_m64(k2)); > + crc16_ccitt_pclmulqdq.rk5_rk6 =3D > + _mm_setr_epi64(_mm_cvtsi64_m64(k5), > _mm_cvtsi64_m64(k6)); > + crc16_ccitt_pclmulqdq.rk7_rk8 =3D > + _mm_setr_epi64(_mm_cvtsi64_m64(q), > _mm_cvtsi64_m64(p)); > + > + /** Initialize CRC32 data */ > + k1 =3D 0xccaa009eLLU; > + k2 =3D 0x1751997d0LLU; > + k5 =3D 0xccaa009eLLU; > + k6 =3D 0x163cd6124LLU; > + q =3D 0x1f7011640LLU; > + p =3D 0x1db710641LLU; > + > + /** Save the params in context structure */ > + crc32_eth_pclmulqdq.rk1_rk2 =3D > + _mm_setr_epi64(_mm_cvtsi64_m64(k1), > _mm_cvtsi64_m64(k2)); Add extra tab for better readability. > + crc32_eth_pclmulqdq.rk5_rk6 =3D > + _mm_setr_epi64(_mm_cvtsi64_m64(k5), > _mm_cvtsi64_m64(k6)); > + crc32_eth_pclmulqdq.rk7_rk8 =3D > + _mm_setr_epi64(_mm_cvtsi64_m64(q), > _mm_cvtsi64_m64(p)); > + > + _mm_empty(); Maybe we need a comment here. > + > +} > + > +static inline uint32_t > +rte_crc16_ccitt_sse42_handler(const uint8_t *data, > + uint32_t data_len) > +{ > + return (uint16_t)~crc32_eth_calc_pclmulqdq(data, > + data_len, > + 0xffff, > + &crc16_ccitt_pclmulqdq); Same comment about the casting here. > +} > + > +static inline uint32_t > +rte_crc32_eth_sse42_handler(const uint8_t *data, > + uint32_t data_len) > +{ > + return ~crc32_eth_calc_pclmulqdq(data, > + data_len, > + 0xffffffffUL, > + &crc32_eth_pclmulqdq); > +} > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_NET_CRC_SSE_H_ */ > diff --git a/lib/librte_net/rte_net_version.map > b/lib/librte_net/rte_net_version.map > index 3b15e65..c6716ec 100644 > --- a/lib/librte_net/rte_net_version.map > +++ b/lib/librte_net/rte_net_version.map > @@ -4,3 +4,11 @@ DPDK_16.11 { >=20 > local: *; > }; > + > +DPDK_17.05 { > + global: > + > + rte_net_crc_set_alg; > + rte_net_crc_calc; This has to be alphabetically sorted. > + > +} DPDK_16.11; > -- > 2.5.5