From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jianbo Liu Subject: [PATCH 3/4] eal/arm: Enable lpm/table/pipeline libs Date: Tue, 1 Dec 2015 13:41:15 -0500 Message-ID: <1448995276-9599-4-git-send-email-jianbo.liu@linaro.org> References: <1448995276-9599-1-git-send-email-jianbo.liu@linaro.org> To: dev@dpdk.org Return-path: Received: from mail-wm0-f41.google.com (mail-wm0-f41.google.com [74.125.82.41]) by dpdk.org (Postfix) with ESMTP id 887EA8E6C for ; Tue, 1 Dec 2015 11:41:46 +0100 (CET) Received: by wmec201 with SMTP id c201so7239325wme.1 for ; Tue, 01 Dec 2015 02:41:46 -0800 (PST) In-Reply-To: <1448995276-9599-1-git-send-email-jianbo.liu@linaro.org> List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Adds ARM NEON support for lpm. And enables table/pipeline libraries which depend on lpm. Signed-off-by: Jianbo Liu --- config/defconfig_arm-armv7a-linuxapp-gcc | 3 - config/defconfig_arm64-armv8a-linuxapp-gcc | 3 - lib/librte_eal/common/include/arch/arm/rte_vect.h | 28 ++++++++++ lib/librte_lpm/rte_lpm.h | 68 ++++++++++++++++------- 4 files changed, 77 insertions(+), 25 deletions(-) diff --git a/config/defconfig_arm-armv7a-linuxapp-gcc b/config/defconfig_arm-armv7a-linuxapp-gcc index cbebd64..efffa1f 100644 --- a/config/defconfig_arm-armv7a-linuxapp-gcc +++ b/config/defconfig_arm-armv7a-linuxapp-gcc @@ -53,9 +53,6 @@ CONFIG_RTE_LIBRTE_KNI=n CONFIG_RTE_EAL_IGB_UIO=n # fails to compile on ARM -CONFIG_RTE_LIBRTE_LPM=n -CONFIG_RTE_LIBRTE_TABLE=n -CONFIG_RTE_LIBRTE_PIPELINE=n CONFIG_RTE_SCHED_VECTOR=n # cannot use those on ARM diff --git a/config/defconfig_arm64-armv8a-linuxapp-gcc b/config/defconfig_arm64-armv8a-linuxapp-gcc index 504f3ed..57f7941 100644 --- a/config/defconfig_arm64-armv8a-linuxapp-gcc +++ b/config/defconfig_arm64-armv8a-linuxapp-gcc @@ -51,7 +51,4 @@ CONFIG_RTE_LIBRTE_IVSHMEM=n CONFIG_RTE_LIBRTE_FM10K_PMD=n CONFIG_RTE_LIBRTE_I40E_PMD=n -CONFIG_RTE_LIBRTE_LPM=n -CONFIG_RTE_LIBRTE_TABLE=n -CONFIG_RTE_LIBRTE_PIPELINE=n CONFIG_RTE_SCHED_VECTOR=n diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h index a33c054..7437711 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -41,6 +41,8 @@ extern "C" { typedef int32x4_t xmm_t; +typedef int32x4_t __m128i; + #define XMM_SIZE (sizeof(xmm_t)) #define XMM_MASK (XMM_SIZE - 1) @@ -53,6 +55,32 @@ typedef union rte_xmm { double pd[XMM_SIZE / sizeof(double)]; } __attribute__((aligned(16))) rte_xmm_t; +static __inline __m128i +_mm_set_epi32(int i3, int i2, int i1, int i0) +{ + int32_t r[4] = {i0, i1, i2, i3}; + + return vld1q_s32(r); +} + +static __inline __m128i +_mm_loadu_si128(__m128i *p) +{ + return vld1q_s32((int32_t *)p); +} + +static __inline __m128i +_mm_set1_epi32(int i) +{ + return vdupq_n_s32(i); +} + +static __inline __m128i +_mm_and_si128(__m128i a, __m128i b) +{ + return vandq_s32(a, b); +} + #ifdef RTE_ARCH_ARM /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */ static __inline uint8x16_t diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h index c299ce2..c76c07d 100644 --- a/lib/librte_lpm/rte_lpm.h +++ b/lib/librte_lpm/rte_lpm.h @@ -361,6 +361,47 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips, /* Mask four results. */ #define RTE_LPM_MASKX4_RES UINT64_C(0x00ff00ff00ff00ff) +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) +static inline void +rte_lpm_tbl24_val4(const struct rte_lpm *lpm, int32x4_t ip, uint16_t tbl[4]) +{ + uint32x4_t i24; + uint32_t idx[4]; + + /* get 4 indexes for tbl24[]. */ + i24 = vshrq_n_u32(vreinterpretq_u32_s32(ip), CHAR_BIT); + vst1q_u32(idx, i24); + + /* extract values from tbl24[] */ + tbl[0] = *(const uint16_t *)&lpm->tbl24[idx[0]]; + tbl[1] = *(const uint16_t *)&lpm->tbl24[idx[1]]; + tbl[2] = *(const uint16_t *)&lpm->tbl24[idx[2]]; + tbl[3] = *(const uint16_t *)&lpm->tbl24[idx[3]]; +} +#else +static inline void +rte_lpm_tbl24_val4(const struct rte_lpm *lpm, __m128i ip, uint16_t tbl[4]) +{ + __m128i i24; + uint64_t idx; + + /* get 4 indexes for tbl24[]. */ + i24 = _mm_srli_epi32(ip, CHAR_BIT); + + /* extract values from tbl24[] */ + idx = _mm_cvtsi128_si64(i24); + i24 = _mm_srli_si128(i24, sizeof(uint64_t)); + + tbl[0] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx]; + tbl[1] = *(const uint16_t *)&lpm->tbl24[idx >> 32]; + + idx = _mm_cvtsi128_si64(i24); + + tbl[2] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx]; + tbl[3] = *(const uint16_t *)&lpm->tbl24[idx >> 32]; +} +#endif + /** * Lookup four IP addresses in an LPM table. * @@ -381,17 +422,19 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips, * if lookup would fail. */ static inline void +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) +rte_lpm_lookupx4(const struct rte_lpm *lpm, int32x4_t ip, uint16_t hop[4], + uint16_t defv) +#else rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4], uint16_t defv) +#endif { - __m128i i24; rte_xmm_t i8; uint16_t tbl[4]; - uint64_t idx, pt; - - const __m128i mask8 = - _mm_set_epi32(UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX); + uint64_t pt; + const __m128i mask8 = _mm_set1_epi32(UINT8_MAX); /* * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 4 LPM entries * as one 64-bit value (0x0300030003000300). @@ -412,20 +455,7 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4], (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32 | (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 48); - /* get 4 indexes for tbl24[]. */ - i24 = _mm_srli_epi32(ip, CHAR_BIT); - - /* extract values from tbl24[] */ - idx = _mm_cvtsi128_si64(i24); - i24 = _mm_srli_si128(i24, sizeof(uint64_t)); - - tbl[0] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx]; - tbl[1] = *(const uint16_t *)&lpm->tbl24[idx >> 32]; - - idx = _mm_cvtsi128_si64(i24); - - tbl[2] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx]; - tbl[3] = *(const uint16_t *)&lpm->tbl24[idx >> 32]; + rte_lpm_tbl24_val4(lpm, ip, tbl); /* get 4 indexes for tbl8[]. */ i8.x = _mm_and_si128(ip, mask8); -- 1.8.3.1