All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jerin Jacob <jerin.jacob@caviumnetworks.com>
To: Jianbo Liu <jianbo.liu@linaro.org>
Cc: dev@dpdk.org
Subject: Re: [PATCH 3/4] eal/arm: Enable lpm/table/pipeline libs
Date: Tue, 1 Dec 2015 22:11:42 +0530	[thread overview]
Message-ID: <20151201164139.GA12144@localhost.localdomain> (raw)
In-Reply-To: <1448995276-9599-4-git-send-email-jianbo.liu@linaro.org>

On Tue, Dec 01, 2015 at 01:41:15PM -0500, Jianbo Liu wrote:
> Adds ARM NEON support for lpm.
> And enables table/pipeline libraries which depend on lpm.

I already sent the patch on the same yesterday.
We can converge the patches after the discussion.
Please check "[dpdk-dev] [PATCH 0/3] add lpm support for NEON" on ml


> 
> Signed-off-by: Jianbo Liu <jianbo.liu@linaro.org>
> ---
>  config/defconfig_arm-armv7a-linuxapp-gcc          |  3 -
>  config/defconfig_arm64-armv8a-linuxapp-gcc        |  3 -
>  lib/librte_eal/common/include/arch/arm/rte_vect.h | 28 ++++++++++
>  lib/librte_lpm/rte_lpm.h                          | 68 ++++++++++++++++-------
>  4 files changed, 77 insertions(+), 25 deletions(-)
> 
> diff --git a/config/defconfig_arm-armv7a-linuxapp-gcc b/config/defconfig_arm-armv7a-linuxapp-gcc
> index cbebd64..efffa1f 100644
> --- a/config/defconfig_arm-armv7a-linuxapp-gcc
> +++ b/config/defconfig_arm-armv7a-linuxapp-gcc
> @@ -53,9 +53,6 @@ CONFIG_RTE_LIBRTE_KNI=n
>  CONFIG_RTE_EAL_IGB_UIO=n
>  
>  # fails to compile on ARM
> -CONFIG_RTE_LIBRTE_LPM=n
> -CONFIG_RTE_LIBRTE_TABLE=n
> -CONFIG_RTE_LIBRTE_PIPELINE=n
>  CONFIG_RTE_SCHED_VECTOR=n
>  
>  # cannot use those on ARM
> diff --git a/config/defconfig_arm64-armv8a-linuxapp-gcc b/config/defconfig_arm64-armv8a-linuxapp-gcc
> index 504f3ed..57f7941 100644
> --- a/config/defconfig_arm64-armv8a-linuxapp-gcc
> +++ b/config/defconfig_arm64-armv8a-linuxapp-gcc
> @@ -51,7 +51,4 @@ CONFIG_RTE_LIBRTE_IVSHMEM=n
>  CONFIG_RTE_LIBRTE_FM10K_PMD=n
>  CONFIG_RTE_LIBRTE_I40E_PMD=n
>  
> -CONFIG_RTE_LIBRTE_LPM=n
> -CONFIG_RTE_LIBRTE_TABLE=n
> -CONFIG_RTE_LIBRTE_PIPELINE=n
>  CONFIG_RTE_SCHED_VECTOR=n
> diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> index a33c054..7437711 100644
> --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
> +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
> @@ -41,6 +41,8 @@ extern "C" {
>  
>  typedef int32x4_t xmm_t;
>  
> +typedef int32x4_t __m128i;
> +
>  #define	XMM_SIZE	(sizeof(xmm_t))
>  #define	XMM_MASK	(XMM_SIZE - 1)
>  
> @@ -53,6 +55,32 @@ typedef union rte_xmm {
>  	double   pd[XMM_SIZE / sizeof(double)];
>  } __attribute__((aligned(16))) rte_xmm_t;
>  
> +static __inline __m128i
> +_mm_set_epi32(int i3, int i2, int i1, int i0)
> +{
> +	int32_t r[4] = {i0, i1, i2, i3};
> +
> +	return vld1q_s32(r);
> +}
> +
> +static __inline __m128i
> +_mm_loadu_si128(__m128i *p)
> +{
> +	return vld1q_s32((int32_t *)p);
> +}
> +
> +static __inline __m128i
> +_mm_set1_epi32(int i)
> +{
> +	return vdupq_n_s32(i);
> +}
> +
> +static __inline __m128i
> +_mm_and_si128(__m128i a, __m128i b)
> +{
> +	return vandq_s32(a, b);
> +}
> +

IMO, it makes sense to not emulate the SSE intrinsics with NEON
Let's create the rte_vect_* as required. look at the existing patch.


>  #ifdef RTE_ARCH_ARM
>  /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
>  static __inline uint8x16_t
> diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
> index c299ce2..c76c07d 100644
> --- a/lib/librte_lpm/rte_lpm.h
> +++ b/lib/librte_lpm/rte_lpm.h
> @@ -361,6 +361,47 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips,
>  /* Mask four results. */
>  #define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ff00ff00ff00ff)
>  
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)

Separate out arm implementation to the different header file.
Too many ifdef looks odd in the header file and difficult to manage.


> +static inline void
> +rte_lpm_tbl24_val4(const struct rte_lpm *lpm, int32x4_t ip, uint16_t tbl[4])
> +{
> +	uint32x4_t i24;
> +	uint32_t idx[4];
> +
> +	/* get 4 indexes for tbl24[]. */
> +	i24 = vshrq_n_u32(vreinterpretq_u32_s32(ip), CHAR_BIT);
> +	vst1q_u32(idx, i24);
> +
> +	/* extract values from tbl24[] */
> +	tbl[0] = *(const uint16_t *)&lpm->tbl24[idx[0]];
> +	tbl[1] = *(const uint16_t *)&lpm->tbl24[idx[1]];
> +	tbl[2] = *(const uint16_t *)&lpm->tbl24[idx[2]];
> +	tbl[3] = *(const uint16_t *)&lpm->tbl24[idx[3]];
> +}

Nice. There is an improvement in this portion code wrt my patch. This is
a candidate for convergence.


> +#else
> +static inline void
> +rte_lpm_tbl24_val4(const struct rte_lpm *lpm, __m128i ip, uint16_t tbl[4])
> +{
> +	__m128i i24;
> +	uint64_t idx;
> +
> +	/* get 4 indexes for tbl24[]. */
> +	i24 = _mm_srli_epi32(ip, CHAR_BIT);
> +
> +	/* extract values from tbl24[] */
> +	idx = _mm_cvtsi128_si64(i24);
> +	i24 = _mm_srli_si128(i24, sizeof(uint64_t));
> +
> +	tbl[0] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
> +	tbl[1] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
> +
> +	idx = _mm_cvtsi128_si64(i24);
> +
> +	tbl[2] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
> +	tbl[3] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
> +}
> +#endif
> +
>  /**
>   * Lookup four IP addresses in an LPM table.
>   *
> @@ -381,17 +422,19 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips,
>   *   if lookup would fail.
>   */
>  static inline void
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +rte_lpm_lookupx4(const struct rte_lpm *lpm, int32x4_t ip, uint16_t hop[4],
> +	uint16_t defv)

This would call for change in the change the ABI,
IMO, __m128i can be used to represent 128bit vector to avoid ABI chang


> +#else
separate out arm implementation to the different header file. Too many
ifdef looks odd in the header file.

Could you  rebase your patch based on existing patch and send the
improvement portion as separate patch or I can send update patch with
your improvements and with your signoff.


>  rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4],
>  	uint16_t defv)
> +#endif
>  {
> -	__m128i i24;
>  	rte_xmm_t i8;
>  	uint16_t tbl[4];
> -	uint64_t idx, pt;
> -
> -	const __m128i mask8 =
> -		_mm_set_epi32(UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX);
> +	uint64_t pt;
>  
> +	const __m128i mask8 = _mm_set1_epi32(UINT8_MAX);
>  	/*
>  	 * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 4 LPM entries
>  	 * as one 64-bit value (0x0300030003000300).
> @@ -412,20 +455,7 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4],
>  		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32 |
>  		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 48);
>  
> -	/* get 4 indexes for tbl24[]. */
> -	i24 = _mm_srli_epi32(ip, CHAR_BIT);
> -
> -	/* extract values from tbl24[] */
> -	idx = _mm_cvtsi128_si64(i24);
> -	i24 = _mm_srli_si128(i24, sizeof(uint64_t));
> -
> -	tbl[0] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
> -	tbl[1] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
> -
> -	idx = _mm_cvtsi128_si64(i24);
> -
> -	tbl[2] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
> -	tbl[3] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
> +	rte_lpm_tbl24_val4(lpm, ip, tbl);
>  
>  	/* get 4 indexes for tbl8[]. */
>  	i8.x = _mm_and_si128(ip, mask8);
> -- 
> 1.8.3.1
> 

  reply	other threads:[~2015-12-01 16:42 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-01 18:41 [PATCH 0/4] support acl/lpm/table/pipeline libs for armv7 and armv8 Jianbo Liu
2015-12-01 12:47 ` Jan Viktorin
2015-12-01 20:56   ` Jianbo Liu
2015-12-01 18:41 ` [PATCH 1/4] eal/arm: use RTE_ARM_EAL_RDTSC_USE_PMU in rte_cycle_32.h Jianbo Liu
2015-12-01 12:41   ` Jan Viktorin
2015-12-01 12:43   ` Jan Viktorin
2015-12-01 18:41 ` [PATCH 2/4] eal/acl: enable acl for armv7-a Jianbo Liu
2015-12-01 14:43   ` Jerin Jacob
2015-12-01 14:46     ` Jan Viktorin
2015-12-02  6:14       ` Jianbo Liu
2015-12-01 18:41 ` [PATCH 3/4] eal/arm: Enable lpm/table/pipeline libs Jianbo Liu
2015-12-01 16:41   ` Jerin Jacob [this message]
2015-12-01 17:02     ` Jan Viktorin
2015-12-02  7:02     ` Jianbo Liu
     [not found]     ` <CAP4Qi3-5ofDU-2-4KsxFzMC1OpTsc5WjmxcFT2Eu_URA0UBzDw@mail.gmail.com>
2015-12-02  8:03       ` Jerin Jacob
2015-12-02  9:49         ` Jianbo Liu
2015-12-02 10:33           ` Ananyev, Konstantin
2015-12-02 10:48             ` Jerin Jacob
2015-12-02 13:06               ` Jan Viktorin
2015-12-02 10:39           ` Jerin Jacob
2015-12-02 13:05             ` Jan Viktorin
2015-12-02 13:13             ` Jianbo Liu
2015-12-02 14:34               ` Jerin Jacob
2015-12-02 16:40                 ` Thomas Monjalon
2015-12-02 16:53                   ` Jerin Jacob
2015-12-02 16:57                     ` Thomas Monjalon
2015-12-02 17:38                       ` Jerin Jacob
2015-12-03  9:33                       ` Jerin Jacob
2015-12-03 11:02                         ` Ananyev, Konstantin
2015-12-03 12:17                           ` Jerin Jacob
2015-12-03 12:42                             ` Ananyev, Konstantin
2015-12-03 13:20                               ` Jerin Jacob
2015-12-01 18:41 ` [PATCH 4/4] maintainers: claim resposibility for ARMv7 and ARMv8 Jianbo Liu
2015-12-01 16:44   ` Jerin Jacob
2015-12-03 15:02 ` [PATCH v2 0/3] support acl lib for armv7-a and a small fix Jianbo Liu
2015-12-03 15:02   ` [PATCH v2 1/3] eal/arm: use RTE_ARM_EAL_RDTSC_USE_PMU in rte_cycle_32.h Jianbo Liu
2015-12-08  1:13     ` Thomas Monjalon
2015-12-03 15:02   ` [PATCH v2 2/3] eal/acl: enable acl for armv7-a Jianbo Liu
2015-12-03 15:13     ` Jerin Jacob
2015-12-08  1:18     ` Thomas Monjalon
2015-12-08  1:50       ` Jianbo Liu
2015-12-08  2:23         ` Thomas Monjalon
2015-12-08  7:56           ` Jianbo Liu
2015-12-08 10:03             ` Thomas Monjalon
2015-12-08 10:21               ` Jianbo Liu
2015-12-08 10:38                 ` Thomas Monjalon
2015-12-08 11:27                   ` Jan Viktorin
2015-12-08 10:25               ` Jan Viktorin
2015-12-03 15:02   ` [PATCH v2 3/3] maintainers: claim resposibility for ARMv7 and ARMv8 Jianbo Liu
2015-12-08  1:24   ` [PATCH v2 0/3] support acl lib for armv7-a and a small fix Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151201164139.GA12144@localhost.localdomain \
    --to=jerin.jacob@caviumnetworks.com \
    --cc=dev@dpdk.org \
    --cc=jianbo.liu@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.