All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mempool: Reduce rte_mempool structure size
@ 2016-02-02 23:02 Keith Wiles
  2016-02-03 17:11 ` Ananyev, Konstantin
                   ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: Keith Wiles @ 2016-02-02 23:02 UTC (permalink / raw)
  To: dev

The rte_mempool structure is changed, which will cause an ABI change
for this structure.

Allow mempool cache support to be dynamic depending on if the
mempool being created needs cache support. Saves about 1.5M of
memory used by the rte_mempool structure. Performance does not seem
to be effected running l3fwd and the test_mempool execution passed.

Allocating small mempools which do not require cache can consume
larges amounts of memory if you have a number of these mempools.

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 app/test/test_mempool.c          |  4 +--
 lib/librte_mempool/rte_mempool.c | 56 ++++++++++++++++++----------------------
 lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
 3 files changed, 40 insertions(+), 49 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 72f8fb6..7b479f8 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -122,8 +122,8 @@ test_mempool_basic(void)
 		return -1;
 
 	printf("get private data\n");
-	if (rte_mempool_get_priv(mp) !=
-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
+	if (rte_mempool_get_priv(mp) != (char *)mp +
+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
 		return -1;
 
 	printf("get physical address of an object\n");
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index aff5f6d..bdf8e2e 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -450,15 +450,11 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	int page_size = getpagesize();
 
 	/* compilation-time checks */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
 			  RTE_CACHE_LINE_MASK) != 0);
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
-			  RTE_CACHE_LINE_MASK) != 0);
-#endif
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
 			  RTE_CACHE_LINE_MASK) != 0);
 	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, stats) &
@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		 */
 		int head = sizeof(struct rte_mempool);
 		int new_size = (private_data_size + head) % page_size;
-		if (new_size) {
+		if (new_size)
 			private_data_size += page_size - new_size;
-		}
 	}
 
 	/* try to allocate tailq entry */
@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	 * store mempool objects. Otherwise reserve a memzone that is large
 	 * enough to hold mempool header and metadata plus mempool objects.
 	 */
-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
+	mempool_size += private_data_size;
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
 	if (vaddr == NULL)
 		mempool_size += (size_t)objsz.total_size * n;
@@ -598,8 +594,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
 	mp->private_data_size = private_data_size;
 
+	/*
+	 * local_cache pointer is set even if cache_size is zero.
+	 * The local_cache points to just past the elt_pa[] array.
+	 */
+	mp->local_cache = (struct rte_mempool_cache *)
+			((char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
+
 	/* calculate address of the first element for continuous mempool. */
-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
 		private_data_size;
 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
 
@@ -613,9 +616,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		mp->elt_va_start = (uintptr_t)obj;
 		mp->elt_pa[0] = mp->phys_addr +
 			(mp->elt_va_start - (uintptr_t)mp);
-
-	/* mempool elements in a separate chunk of memory. */
 	} else {
+		/* mempool elements in a separate chunk of memory. */
 		mp->elt_va_start = (uintptr_t)vaddr;
 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
 	}
@@ -645,19 +647,15 @@ unsigned
 rte_mempool_count(const struct rte_mempool *mp)
 {
 	unsigned count;
+	unsigned lcore_id;
 
 	count = rte_ring_count(mp->ring);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	{
-		unsigned lcore_id;
-		if (mp->cache_size == 0)
-			return count;
+	if (mp->cache_size == 0)
+		return count;
 
-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
-			count += mp->local_cache[lcore_id].len;
-	}
-#endif
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		count += mp->local_cache[lcore_id].len;
 
 	/*
 	 * due to race condition (access to len is not locked), the
@@ -672,13 +670,15 @@ rte_mempool_count(const struct rte_mempool *mp)
 static unsigned
 rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	unsigned lcore_id;
 	unsigned count = 0;
 	unsigned cache_count;
 
 	fprintf(f, "  cache infos:\n");
 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
+	if (mp->cache_size == 0)
+		return count;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		cache_count = mp->local_cache[lcore_id].len;
 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
@@ -686,11 +686,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 	}
 	fprintf(f, "    total_cache_count=%u\n", count);
 	return count;
-#else
-	RTE_SET_USED(mp);
-	fprintf(f, "  cache disabled\n");
-	return 0;
-#endif
 }
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -755,13 +750,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)
 #define mempool_audit_cookies(mp) do {} while(0)
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /* check cookies before and after objects */
 static void
 mempool_audit_cache(const struct rte_mempool *mp)
 {
 	/* check cache size consistency */
 	unsigned lcore_id;
+
+	if (mp->cache_size == 0)
+		return;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
@@ -770,10 +768,6 @@ mempool_audit_cache(const struct rte_mempool *mp)
 		}
 	}
 }
-#else
-#define mempool_audit_cache(mp) do {} while(0)
-#endif
-
 
 /* check the consistency of mempool (size, cookies, ...) */
 void
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 6e2390a..434ef98 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {
 } __rte_cache_aligned;
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /**
  * A structure that stores a per-core object cache.
  */
@@ -107,7 +106,6 @@ struct rte_mempool_cache {
 	 */
 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
 } __rte_cache_aligned;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 /**
  * A structure that stores the size of mempool elements.
@@ -194,10 +192,7 @@ struct rte_mempool {
 
 	unsigned private_data_size;      /**< Size of private data. */
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	/** Per-lcore local cache. */
-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
-#endif
+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	/** Per-lcore statistics. */
@@ -247,6 +242,13 @@ struct rte_mempool {
 #endif
 
 /**
+ * Size of elt_pa array size based on number of pages. (Internal use)
+ */
+#define __PA_SIZE(mp, pgn) \
+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
+
+/**
  * Calculate the size of the mempool header.
  *
  * @param mp
@@ -254,9 +256,9 @@ struct rte_mempool {
  * @param pgn
  *   Number of pages used to store mempool objects.
  */
-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
 
 /**
  * Return true if the whole mempool is in contiguous memory.
@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))
 __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 		    unsigned n, int is_mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index;
 	void **cache_objs;
 	unsigned lcore_id = rte_lcore_id();
 	uint32_t cache_size = mp->cache_size;
 	uint32_t flushthresh = mp->cache_flushthresh;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* increment stat now, adding in mempool always success */
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	/* cache is not enabled or single producer or non-EAL thread */
 	if (unlikely(cache_size == 0 || is_mp == 0 ||
 		     lcore_id >= RTE_MAX_LCORE))
@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	return;
 
 ring_enqueue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* push remaining objects in ring */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 		   unsigned n, int is_mc)
 {
 	int ret;
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index, len;
 	void **cache_objs;
@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	return 0;
 
 ring_dequeue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* get remaining objects from ring */
 	if (is_mc)
@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
  */
 static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
 {
-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+	return (char *)mp +
+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
 }
 
 /**
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH] mempool: Reduce rte_mempool structure size
  2016-02-02 23:02 [PATCH] mempool: Reduce rte_mempool structure size Keith Wiles
@ 2016-02-03 17:11 ` Ananyev, Konstantin
  2016-02-08 11:02 ` Olivier MATZ
  2016-02-09 17:30 ` [PATCH v2] mempool: reduce " Keith Wiles
  2 siblings, 0 replies; 32+ messages in thread
From: Ananyev, Konstantin @ 2016-02-03 17:11 UTC (permalink / raw)
  To: Wiles, Keith, dev



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Keith Wiles
> Sent: Tuesday, February 02, 2016 11:03 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] mempool: Reduce rte_mempool structure size
> 
> The rte_mempool structure is changed, which will cause an ABI change
> for this structure.
> 
> Allow mempool cache support to be dynamic depending on if the
> mempool being created needs cache support. Saves about 1.5M of
> memory used by the rte_mempool structure. Performance does not seem
> to be effected running l3fwd and the test_mempool execution passed.
> 
> Allocating small mempools which do not require cache can consume
> larges amounts of memory if you have a number of these mempools.
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---

Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] mempool: Reduce rte_mempool structure size
  2016-02-02 23:02 [PATCH] mempool: Reduce rte_mempool structure size Keith Wiles
  2016-02-03 17:11 ` Ananyev, Konstantin
@ 2016-02-08 11:02 ` Olivier MATZ
  2016-02-08 15:57   ` Wiles, Keith
  2016-02-09 17:30 ` [PATCH v2] mempool: reduce " Keith Wiles
  2 siblings, 1 reply; 32+ messages in thread
From: Olivier MATZ @ 2016-02-08 11:02 UTC (permalink / raw)
  To: Keith Wiles, dev

Hi Keith,

Looks good, thanks. Please find some comments below.

> [PATCH] mempool: Reduce rte_mempool structure size

nit: we usually avoid uppercase letters in title

On 02/03/2016 12:02 AM, Keith Wiles wrote:
> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
> index aff5f6d..bdf8e2e 100644
> --- a/lib/librte_mempool/rte_mempool.c
> +++ b/lib/librte_mempool/rte_mempool.c
> @@ -450,15 +450,11 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
>  	int page_size = getpagesize();
>  
>  	/* compilation-time checks */
> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>  	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
>  			  RTE_CACHE_LINE_MASK) != 0);
> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>  	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
>  			  RTE_CACHE_LINE_MASK) != 0);
> -	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
> -			  RTE_CACHE_LINE_MASK) != 0);
> -#endif
> -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG

I don't think the #ifdef RTE_LIBRTE_MEMPOOL_DEBUG should be moved.
It should only protects the checks on stats which are enabled
in debug mode.

> @@ -194,10 +192,7 @@ struct rte_mempool {
>  
>  	unsigned private_data_size;      /**< Size of private data. */
>  
> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> -	/** Per-lcore local cache. */
> -	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
> -#endif
> +	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
>  
>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>  	/** Per-lcore statistics. */

As you noticed it in your initial mail, this changes the ABI. I
think your patch justifies the ABI change, so I think it should
follow the ABI change process described in
dpdk/doc/guides/contributing/versioning.rst.

>From what I understand of versioning.rst, these kind of changes
requires a deprecation notice first, and will be integrated in
next version. I don't think it's easy to keep a backward compat
in this case, especially because the rte_mempool structure is
used by several inlined functions.

Regards,
Olivier

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] mempool: Reduce rte_mempool structure size
  2016-02-08 11:02 ` Olivier MATZ
@ 2016-02-08 15:57   ` Wiles, Keith
  0 siblings, 0 replies; 32+ messages in thread
From: Wiles, Keith @ 2016-02-08 15:57 UTC (permalink / raw)
  To: Olivier MATZ, dev


>Hi Keith,
>
>Looks good, thanks. Please find some comments below.
>
>> [PATCH] mempool: Reduce rte_mempool structure size
>
>nit: we usually avoid uppercase letters in title

Will make that change for v2. Why no uppercase letters in the title, seems a bit odd to me in this case??
>
>On 02/03/2016 12:02 AM, Keith Wiles wrote:
>> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
>> index aff5f6d..bdf8e2e 100644
>> --- a/lib/librte_mempool/rte_mempool.c
>> +++ b/lib/librte_mempool/rte_mempool.c
>> @@ -450,15 +450,11 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
>>  	int page_size = getpagesize();
>>  
>>  	/* compilation-time checks */
>> +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>>  	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
>>  			  RTE_CACHE_LINE_MASK) != 0);
>> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>  	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
>>  			  RTE_CACHE_LINE_MASK) != 0);
>> -	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
>> -			  RTE_CACHE_LINE_MASK) != 0);
>> -#endif
>> -#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>
>I don't think the #ifdef RTE_LIBRTE_MEMPOOL_DEBUG should be moved.
>It should only protects the checks on stats which are enabled
>in debug mode.

Will make that change for v2.
>
>> @@ -194,10 +192,7 @@ struct rte_mempool {
>>  
>>  	unsigned private_data_size;      /**< Size of private data. */
>>  
>> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>> -	/** Per-lcore local cache. */
>> -	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
>> -#endif
>> +	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
>>  
>>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>>  	/** Per-lcore statistics. */
>
>As you noticed it in your initial mail, this changes the ABI. I
>think your patch justifies the ABI change, so I think it should
>follow the ABI change process described in
>dpdk/doc/guides/contributing/versioning.rst.
>
>From what I understand of versioning.rst, these kind of changes
>requires a deprecation notice first, and will be integrated in
>next version. I don't think it's easy to keep a backward compat
>in this case, especially because the rte_mempool structure is
>used by several inlined functions.

I am reading the API doc and need to understand this process a bit more, but from what I can tell I need to add a ifdef RTE_NEXT_ABI around the new structure and old. Not sure where else I need to do that as compat is a bit hard as you stated. The API revision file is there something that needs to be done in that vile too?

You can reply to me directly it you like to save some bandwidth.
>
>Regards,
>Olivier
>


Regards,
Keith





^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v2] mempool: reduce rte_mempool structure size
  2016-02-02 23:02 [PATCH] mempool: Reduce rte_mempool structure size Keith Wiles
  2016-02-03 17:11 ` Ananyev, Konstantin
  2016-02-08 11:02 ` Olivier MATZ
@ 2016-02-09 17:30 ` Keith Wiles
  2016-02-10 16:59   ` Olivier MATZ
                     ` (2 more replies)
  2 siblings, 3 replies; 32+ messages in thread
From: Keith Wiles @ 2016-02-09 17:30 UTC (permalink / raw)
  To: dev

Patch v2 to add some comments and setup for RTE_NEXT_ABI changes.

The rte_mempool structure is changed, which will cause an ABI change
for this structure. Providing backward compat is not reasonable
here as this structure is used in multiple defines/inlines.

Allow mempool cache support to be dynamic depending on if the
mempool being created needs cache support. Saves about 1.5M of
memory used by the rte_mempool structure.

Allocating small mempools which do not require cache can consume
larges amounts of memory if you have a number of these mempools.

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 app/test/test_mempool.c                     |  5 ++
 config/defconfig_x86_64-native-linuxapp-gcc |  5 ++
 lib/librte_mempool/rte_mempool.c            | 83 ++++++++++++++++++++++++++---
 lib/librte_mempool/rte_mempool.h            | 57 +++++++++++++++++++-
 4 files changed, 143 insertions(+), 7 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 72f8fb6..2829d40 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -122,8 +122,13 @@ test_mempool_basic(void)
 		return -1;
 
 	printf("get private data\n");
+#ifdef RTE_NEXT_ABI
+	if (rte_mempool_get_priv(mp) != (char *)mp +
+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
+#else
 	if (rte_mempool_get_priv(mp) !=
 			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
+#endif
 		return -1;
 
 	printf("get physical address of an object\n");
diff --git a/config/defconfig_x86_64-native-linuxapp-gcc b/config/defconfig_x86_64-native-linuxapp-gcc
index 60baf5b..02e9ace 100644
--- a/config/defconfig_x86_64-native-linuxapp-gcc
+++ b/config/defconfig_x86_64-native-linuxapp-gcc
@@ -40,3 +40,8 @@ CONFIG_RTE_ARCH_64=y
 
 CONFIG_RTE_TOOLCHAIN="gcc"
 CONFIG_RTE_TOOLCHAIN_GCC=y
+CONFIG_RTE_BUILD_SHARED_LIB=y
+CONFIG_RTE_NEXT_ABI=n
+CONFIG_RTE_EAL_IGB_UIO=n
+CONFIG_RTE_LIBRTE_KNI=n
+CONFIG_RTE_KNI_KMOD=n
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index aff5f6d..c61dc44 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -452,12 +452,17 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	/* compilation-time checks */
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
 			  RTE_CACHE_LINE_MASK) != 0);
+#ifdef RTE_NEXT_ABI
+	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
+			  RTE_CACHE_LINE_MASK) != 0);
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
 	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
 #endif
+#endif /* RTE_NEXT_ABI */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
 			  RTE_CACHE_LINE_MASK) != 0);
@@ -527,9 +532,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		 */
 		int head = sizeof(struct rte_mempool);
 		int new_size = (private_data_size + head) % page_size;
-		if (new_size) {
+		if (new_size)
 			private_data_size += page_size - new_size;
-		}
 	}
 
 	/* try to allocate tailq entry */
@@ -544,7 +548,12 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	 * store mempool objects. Otherwise reserve a memzone that is large
 	 * enough to hold mempool header and metadata plus mempool objects.
 	 */
+#ifdef RTE_NEXT_ABI
+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
+	mempool_size += private_data_size;
+#else
 	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+#endif /* RTE_NEXT_ABI */
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
 	if (vaddr == NULL)
 		mempool_size += (size_t)objsz.total_size * n;
@@ -598,9 +607,22 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
 	mp->private_data_size = private_data_size;
 
+#ifdef RTE_NEXT_ABI
+	/*
+	 * local_cache pointer is set even if cache_size is zero.
+	 * The local_cache points to just past the elt_pa[] array.
+	 */
+	mp->local_cache = (struct rte_mempool_cache *)
+			((char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
+
+	/* calculate address of the first element for continuous mempool. */
+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
+		private_data_size;
+#else
 	/* calculate address of the first element for continuous mempool. */
 	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
 		private_data_size;
+#endif /* RTE_NEXT_ABI */
 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
 
 	/* populate address translation fields. */
@@ -613,9 +635,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		mp->elt_va_start = (uintptr_t)obj;
 		mp->elt_pa[0] = mp->phys_addr +
 			(mp->elt_va_start - (uintptr_t)mp);
-
-	/* mempool elements in a separate chunk of memory. */
 	} else {
+		/* mempool elements in a separate chunk of memory. */
 		mp->elt_va_start = (uintptr_t)vaddr;
 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
 	}
@@ -645,10 +666,21 @@ unsigned
 rte_mempool_count(const struct rte_mempool *mp)
 {
 	unsigned count;
+#ifdef RTE_NEXT_ABI
+	unsigned lcore_id;
 
 	count = rte_ring_count(mp->ring);
 
+	if (mp->cache_size == 0)
+		return count;
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		count += mp->local_cache[lcore_id].len;
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+
+	count = rte_ring_count(mp->ring);
+
 	{
 		unsigned lcore_id;
 		if (mp->cache_size == 0)
@@ -658,7 +690,7 @@ rte_mempool_count(const struct rte_mempool *mp)
 			count += mp->local_cache[lcore_id].len;
 	}
 #endif
-
+#endif /* RTE_NEXT_ABI */
 	/*
 	 * due to race condition (access to len is not locked), the
 	 * total can be greater than size... so fix the result
@@ -672,6 +704,24 @@ rte_mempool_count(const struct rte_mempool *mp)
 static unsigned
 rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 {
+#ifdef RTE_NEXT_ABI
+	unsigned lcore_id;
+	unsigned count = 0;
+	unsigned cache_count;
+
+	fprintf(f, "  cache infos:\n");
+	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
+	if (mp->cache_size == 0)
+		return count;
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		cache_count = mp->local_cache[lcore_id].len;
+		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
+		count += cache_count;
+	}
+	fprintf(f, "    total_cache_count=%u\n", count);
+	return count;
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	unsigned lcore_id;
 	unsigned count = 0;
@@ -691,6 +741,7 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 	fprintf(f, "  cache disabled\n");
 	return 0;
 #endif
+#endif /* RTE_NEXT_ABI */
 }
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -755,6 +806,26 @@ mempool_audit_cookies(const struct rte_mempool *mp)
 #define mempool_audit_cookies(mp) do {} while(0)
 #endif
 
+#ifdef RTE_NEXT_ABI
+/* check cookies before and after objects */
+static void
+mempool_audit_cache(const struct rte_mempool *mp)
+{
+	/* check cache size consistency */
+	unsigned lcore_id;
+
+	if (mp->cache_size == 0)
+		return;
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
+			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
+				lcore_id);
+			rte_panic("MEMPOOL: invalid cache len\n");
+		}
+	}
+}
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /* check cookies before and after objects */
 static void
@@ -773,7 +844,7 @@ mempool_audit_cache(const struct rte_mempool *mp)
 #else
 #define mempool_audit_cache(mp) do {} while(0)
 #endif
-
+#endif /* RTE_NEXT_ABI */
 
 /* check the consistency of mempool (size, cookies, ...) */
 void
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 6e2390a..fc9b595 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -95,6 +95,19 @@ struct rte_mempool_debug_stats {
 } __rte_cache_aligned;
 #endif
 
+#ifdef RTE_NEXT_ABI
+/**
+ * A structure that stores a per-core object cache.
+ */
+struct rte_mempool_cache {
+	unsigned len; /**< Cache len */
+	/*
+	 * Cache is allocated to this size to allow it to overflow in certain
+	 * cases to avoid needless emptying of cache.
+	 */
+	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
+} __rte_cache_aligned;
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /**
  * A structure that stores a per-core object cache.
@@ -108,6 +121,7 @@ struct rte_mempool_cache {
 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
 } __rte_cache_aligned;
 #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+#endif /* RTE_NEXT_ABI */
 
 /**
  * A structure that stores the size of mempool elements.
@@ -194,10 +208,14 @@ struct rte_mempool {
 
 	unsigned private_data_size;      /**< Size of private data. */
 
+#ifdef RTE_NEXT_ABI
+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	/** Per-lcore local cache. */
 	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
 #endif
+#endif  /* RTE_NEXT_ABI */
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	/** Per-lcore statistics. */
@@ -246,6 +264,26 @@ struct rte_mempool {
 #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
 #endif
 
+#ifdef RTE_NEXT_ABI
+/**
+ * Size of elt_pa array size based on number of pages. (Internal use)
+ */
+#define __PA_SIZE(mp, pgn) \
+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
+
+/**
+ * Calculate the size of the mempool header.
+ *
+ * @param mp
+ *   Pointer to the memory pool.
+ * @param pgn
+ *   Number of pages used to store mempool objects.
+ */
+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
+#else
 /**
  * Calculate the size of the mempool header.
  *
@@ -257,7 +295,7 @@ struct rte_mempool {
 #define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
 	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
 	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
-
+#endif /* RTE_NEXT_ABI */
 /**
  * Return true if the whole mempool is in contiguous memory.
  */
@@ -755,19 +793,25 @@ static inline void __attribute__((always_inline))
 __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 		    unsigned n, int is_mp)
 {
+#ifndef RTE_NEXT_ABI	/* Note: ifndef */
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+#endif /* RTE_NEXT_ABI */
 	struct rte_mempool_cache *cache;
 	uint32_t index;
 	void **cache_objs;
 	unsigned lcore_id = rte_lcore_id();
 	uint32_t cache_size = mp->cache_size;
 	uint32_t flushthresh = mp->cache_flushthresh;
+#ifndef RTE_NEXT_ABI	/* Note: ifndef */
 #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+#endif /* RTE_NEXT_ABI */
 
 	/* increment stat now, adding in mempool always success */
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
+#ifndef RTE_NEXT_ABI	/* Note: ifndef */
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+#endif /* RTE_NEXT_ABI */
 	/* cache is not enabled or single producer or non-EAL thread */
 	if (unlikely(cache_size == 0 || is_mp == 0 ||
 		     lcore_id >= RTE_MAX_LCORE))
@@ -802,7 +846,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	return;
 
 ring_enqueue:
+#ifndef RTE_NEXT_ABI	/* Note: ifndef */
 #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+#endif /* RTE_NEXT_ABI */
 
 	/* push remaining objects in ring */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -946,7 +992,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 		   unsigned n, int is_mc)
 {
 	int ret;
+#ifndef RTE_NEXT_ABI	/* Note: ifndef */
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+#endif /* RTE_NEXT_ABI */
 	struct rte_mempool_cache *cache;
 	uint32_t index, len;
 	void **cache_objs;
@@ -992,7 +1040,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	return 0;
 
 ring_dequeue:
+#ifndef RTE_NEXT_ABI	/* Note: ifndef */
 #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+#endif /* RTE_NEXT_ABI */
 
 	/* get remaining objects from ring */
 	if (is_mc)
@@ -1293,7 +1343,12 @@ void rte_mempool_audit(const struct rte_mempool *mp);
  */
 static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
 {
+#ifdef RTE_NEXT_ABI
+	return (char *)mp +
+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
+#else
 	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+#endif /* RTE_NEXT_ABI */
 }
 
 /**
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] mempool: reduce rte_mempool structure size
  2016-02-09 17:30 ` [PATCH v2] mempool: reduce " Keith Wiles
@ 2016-02-10 16:59   ` Olivier MATZ
  2016-02-10 17:22     ` Wiles, Keith
  2016-02-10 18:35     ` Wiles, Keith
  2016-02-10 21:18   ` [PATCH v3] " Keith Wiles
  2016-02-12 18:36   ` [PATCH v4] " Keith Wiles
  2 siblings, 2 replies; 32+ messages in thread
From: Olivier MATZ @ 2016-02-10 16:59 UTC (permalink / raw)
  To: Keith Wiles, dev

Hi Keith,

Thank you for adding the RTE_NEXT_ABI. I think this is the way
described in the process. Your changes will be available in next
version (16.4) for people compiling with RTE_NEXT_ABI=y, and in
16.7 without option (I'm just surprised that RTE_NEXT_ABI=y in
default configs...).

I think a deprecation notice should also be added in this commit
in doc/guides/rel_notes/deprecation.rst.

Please also find comments below.

On 02/09/2016 06:30 PM, Keith Wiles wrote:

> diff --git a/config/defconfig_x86_64-native-linuxapp-gcc b/config/defconfig_x86_64-native-linuxapp-gcc
> index 60baf5b..02e9ace 100644
> --- a/config/defconfig_x86_64-native-linuxapp-gcc
> +++ b/config/defconfig_x86_64-native-linuxapp-gcc
> @@ -40,3 +40,8 @@ CONFIG_RTE_ARCH_64=y
>  
>  CONFIG_RTE_TOOLCHAIN="gcc"
>  CONFIG_RTE_TOOLCHAIN_GCC=y
> +CONFIG_RTE_BUILD_SHARED_LIB=y
> +CONFIG_RTE_NEXT_ABI=n
> +CONFIG_RTE_EAL_IGB_UIO=n
> +CONFIG_RTE_LIBRTE_KNI=n
> +CONFIG_RTE_KNI_KMOD=n

I think this should not be part of the patch.

> @@ -672,6 +704,24 @@ rte_mempool_count(const struct rte_mempool *mp)
>  static unsigned
>  rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
>  {
> +#ifdef RTE_NEXT_ABI
> +	unsigned lcore_id;
> +	unsigned count = 0;
> +	unsigned cache_count;
> +
> +	fprintf(f, "  cache infos:\n");
> +	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
> +	if (mp->cache_size == 0)
> +		return count;
> +
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		cache_count = mp->local_cache[lcore_id].len;
> +		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
> +		count += cache_count;
> +	}
> +	fprintf(f, "    total_cache_count=%u\n", count);
> +	return count;
> +#else
>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>  	unsigned lcore_id;
>  	unsigned count = 0;

I think in this case we could avoid to duplicate the code without
beeing unclear by using the proper #ifdefs:

#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI)
	/* common code */
#ifdef RTE_NEXT_ABI
	if (mp->cache_size == 0)
		return count;
#endif
	/* common code */
#else
...
#endif


> @@ -755,6 +806,26 @@ mempool_audit_cookies(const struct rte_mempool *mp)
>  #define mempool_audit_cookies(mp) do {} while(0)
>  #endif
>  
> +#ifdef RTE_NEXT_ABI
> +/* check cookies before and after objects */
> +static void
> +mempool_audit_cache(const struct rte_mempool *mp)
> +{
> +	/* check cache size consistency */
> +	unsigned lcore_id;
> +
> +	if (mp->cache_size == 0)
> +		return;
> +
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
> +			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
> +				lcore_id);
> +			rte_panic("MEMPOOL: invalid cache len\n");
> +		}
> +	}
> +}
> +#else

same here

> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 6e2390a..fc9b595 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -95,6 +95,19 @@ struct rte_mempool_debug_stats {
>  } __rte_cache_aligned;
>  #endif
>  
> +#ifdef RTE_NEXT_ABI
> +/**
> + * A structure that stores a per-core object cache.
> + */
> +struct rte_mempool_cache {
> +	unsigned len; /**< Cache len */
> +	/*
> +	 * Cache is allocated to this size to allow it to overflow in certain
> +	 * cases to avoid needless emptying of cache.
> +	 */
> +	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
> +} __rte_cache_aligned;
> +#else

same here



> @@ -755,19 +793,25 @@ static inline void __attribute__((always_inline))
>  __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>  		    unsigned n, int is_mp)
>  {
> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> +#endif /* RTE_NEXT_ABI */
>  	struct rte_mempool_cache *cache;
>  	uint32_t index;
>  	void **cache_objs;
>  	unsigned lcore_id = rte_lcore_id();
>  	uint32_t cache_size = mp->cache_size;
>  	uint32_t flushthresh = mp->cache_flushthresh;
> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> +#endif /* RTE_NEXT_ABI */

this looks strange... I think it does not work properly.
Why not
#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI)

>  	/* increment stat now, adding in mempool always success */
>  	__MEMPOOL_STAT_ADD(mp, put, n);
>  
> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> +#endif /* RTE_NEXT_ABI */
>  	/* cache is not enabled or single producer or non-EAL thread */
>  	if (unlikely(cache_size == 0 || is_mp == 0 ||
>  		     lcore_id >= RTE_MAX_LCORE))
> @@ -802,7 +846,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>  	return;
>  
>  ring_enqueue:
> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> +#endif /* RTE_NEXT_ABI */
>  
>  	/* push remaining objects in ring */
>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> @@ -946,7 +992,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>  		   unsigned n, int is_mc)
>  {
>  	int ret;
> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> +#endif /* RTE_NEXT_ABI */
>  	struct rte_mempool_cache *cache;
>  	uint32_t index, len;
>  	void **cache_objs;
> @@ -992,7 +1040,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>  	return 0;
>  
>  ring_dequeue:
> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> +#endif /* RTE_NEXT_ABI */
>  
>  	/* get remaining objects from ring */
>  	if (is_mc)

Same in those cases.



Regards,
Olivier

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] mempool: reduce rte_mempool structure size
  2016-02-10 16:59   ` Olivier MATZ
@ 2016-02-10 17:22     ` Wiles, Keith
  2016-02-10 18:35     ` Wiles, Keith
  1 sibling, 0 replies; 32+ messages in thread
From: Wiles, Keith @ 2016-02-10 17:22 UTC (permalink / raw)
  To: Olivier MATZ, dev

>Hi Keith,
>
>Thank you for adding the RTE_NEXT_ABI. I think this is the way
>described in the process. Your changes will be available in next
>version (16.4) for people compiling with RTE_NEXT_ABI=y, and in
>16.7 without option (I'm just surprised that RTE_NEXT_ABI=y in
>default configs...).
>
>I think a deprecation notice should also be added in this commit
>in doc/guides/rel_notes/deprecation.rst.

Will add the text.
>
>Please also find comments below.
>
>On 02/09/2016 06:30 PM, Keith Wiles wrote:
>
>> diff --git a/config/defconfig_x86_64-native-linuxapp-gcc b/config/defconfig_x86_64-native-linuxapp-gcc
>> index 60baf5b..02e9ace 100644
>> --- a/config/defconfig_x86_64-native-linuxapp-gcc
>> +++ b/config/defconfig_x86_64-native-linuxapp-gcc
>> @@ -40,3 +40,8 @@ CONFIG_RTE_ARCH_64=y
>>  
>>  CONFIG_RTE_TOOLCHAIN="gcc"
>>  CONFIG_RTE_TOOLCHAIN_GCC=y
>> +CONFIG_RTE_BUILD_SHARED_LIB=y
>> +CONFIG_RTE_NEXT_ABI=n
>> +CONFIG_RTE_EAL_IGB_UIO=n
>> +CONFIG_RTE_LIBRTE_KNI=n
>> +CONFIG_RTE_KNI_KMOD=n

Hmm, not sure where this came from, but will remove it.
>
>I think this should not be part of the patch.
>
>> @@ -672,6 +704,24 @@ rte_mempool_count(const struct rte_mempool *mp)
>>  static unsigned
>>  rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
>>  {
>> +#ifdef RTE_NEXT_ABI
>> +	unsigned lcore_id;
>> +	unsigned count = 0;
>> +	unsigned cache_count;
>> +
>> +	fprintf(f, "  cache infos:\n");
>> +	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
>> +	if (mp->cache_size == 0)
>> +		return count;
>> +
>> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>> +		cache_count = mp->local_cache[lcore_id].len;
>> +		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
>> +		count += cache_count;
>> +	}
>> +	fprintf(f, "    total_cache_count=%u\n", count);
>> +	return count;
>> +#else
>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>  	unsigned lcore_id;
>>  	unsigned count = 0;
>
>I think in this case we could avoid to duplicate the code without
>beeing unclear by using the proper #ifdefs:

I was struggling with how it should be done. I like to see clear ifdefs and be able to see the complete code for a given case. In these cases I wanted to make it simple to remove the code quickly by just deleting lines instead of editing lines. I will follow your suggestion.
>
>#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI)
>	/* common code */
>#ifdef RTE_NEXT_ABI
>	if (mp->cache_size == 0)
>		return count;
>#endif
>	/* common code */
>#else
>...
>#endif
>
>
>> @@ -755,6 +806,26 @@ mempool_audit_cookies(const struct rte_mempool *mp)
>>  #define mempool_audit_cookies(mp) do {} while(0)
>>  #endif
>>  
>> +#ifdef RTE_NEXT_ABI
>> +/* check cookies before and after objects */
>> +static void
>> +mempool_audit_cache(const struct rte_mempool *mp)
>> +{
>> +	/* check cache size consistency */
>> +	unsigned lcore_id;
>> +
>> +	if (mp->cache_size == 0)
>> +		return;
>> +
>> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>> +		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
>> +			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
>> +				lcore_id);
>> +			rte_panic("MEMPOOL: invalid cache len\n");
>> +		}
>> +	}
>> +}
>> +#else
>
>same here
>
>> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>> index 6e2390a..fc9b595 100644
>> --- a/lib/librte_mempool/rte_mempool.h
>> +++ b/lib/librte_mempool/rte_mempool.h
>> @@ -95,6 +95,19 @@ struct rte_mempool_debug_stats {
>>  } __rte_cache_aligned;
>>  #endif
>>  
>> +#ifdef RTE_NEXT_ABI
>> +/**
>> + * A structure that stores a per-core object cache.
>> + */
>> +struct rte_mempool_cache {
>> +	unsigned len; /**< Cache len */
>> +	/*
>> +	 * Cache is allocated to this size to allow it to overflow in certain
>> +	 * cases to avoid needless emptying of cache.
>> +	 */
>> +	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
>> +} __rte_cache_aligned;
>> +#else
>
>same here
>
>
>
>> @@ -755,19 +793,25 @@ static inline void __attribute__((always_inline))
>>  __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>>  		    unsigned n, int is_mp)
>>  {
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>> +#endif /* RTE_NEXT_ABI */
>>  	struct rte_mempool_cache *cache;
>>  	uint32_t index;
>>  	void **cache_objs;
>>  	unsigned lcore_id = rte_lcore_id();
>>  	uint32_t cache_size = mp->cache_size;
>>  	uint32_t flushthresh = mp->cache_flushthresh;
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>> +#endif /* RTE_NEXT_ABI */
>
>this looks strange... I think it does not work properly.
>Why not
>#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI)

Yes, it is strange :-(
>
>>  	/* increment stat now, adding in mempool always success */
>>  	__MEMPOOL_STAT_ADD(mp, put, n);
>>  
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>> +#endif /* RTE_NEXT_ABI */
>>  	/* cache is not enabled or single producer or non-EAL thread */
>>  	if (unlikely(cache_size == 0 || is_mp == 0 ||
>>  		     lcore_id >= RTE_MAX_LCORE))
>> @@ -802,7 +846,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>>  	return;
>>  
>>  ring_enqueue:
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>> +#endif /* RTE_NEXT_ABI */
>>  
>>  	/* push remaining objects in ring */
>>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>> @@ -946,7 +992,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>>  		   unsigned n, int is_mc)
>>  {
>>  	int ret;
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>> +#endif /* RTE_NEXT_ABI */
>>  	struct rte_mempool_cache *cache;
>>  	uint32_t index, len;
>>  	void **cache_objs;
>> @@ -992,7 +1040,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>>  	return 0;
>>  
>>  ring_dequeue:
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>> +#endif /* RTE_NEXT_ABI */
>>  
>>  	/* get remaining objects from ring */
>>  	if (is_mc)
>
>Same in those cases.
>
>
>
>Regards,
>Olivier
>


Regards,
Keith





^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] mempool: reduce rte_mempool structure size
  2016-02-10 16:59   ` Olivier MATZ
  2016-02-10 17:22     ` Wiles, Keith
@ 2016-02-10 18:35     ` Wiles, Keith
  2016-02-10 20:06       ` Olivier MATZ
  1 sibling, 1 reply; 32+ messages in thread
From: Wiles, Keith @ 2016-02-10 18:35 UTC (permalink / raw)
  To: Olivier MATZ, dev

>Hi Keith,
>
>Thank you for adding the RTE_NEXT_ABI. I think this is the way
>described in the process. Your changes will be available in next
>version (16.4) for people compiling with RTE_NEXT_ABI=y, and in
>16.7 without option (I'm just surprised that RTE_NEXT_ABI=y in
>default configs...).
>
>I think a deprecation notice should also be added in this commit
>in doc/guides/rel_notes/deprecation.rst.
>
>Please also find comments below.
>
>On 02/09/2016 06:30 PM, Keith Wiles wrote:
>
>> diff --git a/config/defconfig_x86_64-native-linuxapp-gcc b/config/defconfig_x86_64-native-linuxapp-gcc
>> index 60baf5b..02e9ace 100644
>> --- a/config/defconfig_x86_64-native-linuxapp-gcc
>> +++ b/config/defconfig_x86_64-native-linuxapp-gcc
>> @@ -40,3 +40,8 @@ CONFIG_RTE_ARCH_64=y
>>  
>>  CONFIG_RTE_TOOLCHAIN="gcc"
>>  CONFIG_RTE_TOOLCHAIN_GCC=y
>> +CONFIG_RTE_BUILD_SHARED_LIB=y
>> +CONFIG_RTE_NEXT_ABI=n
>> +CONFIG_RTE_EAL_IGB_UIO=n
>> +CONFIG_RTE_LIBRTE_KNI=n
>> +CONFIG_RTE_KNI_KMOD=n
>
>I think this should not be part of the patch.
>
>> @@ -672,6 +704,24 @@ rte_mempool_count(const struct rte_mempool *mp)
>>  static unsigned
>>  rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
>>  {
>> +#ifdef RTE_NEXT_ABI
>> +	unsigned lcore_id;
>> +	unsigned count = 0;
>> +	unsigned cache_count;
>> +
>> +	fprintf(f, "  cache infos:\n");
>> +	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
>> +	if (mp->cache_size == 0)
>> +		return count;
>> +
>> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>> +		cache_count = mp->local_cache[lcore_id].len;
>> +		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
>> +		count += cache_count;
>> +	}
>> +	fprintf(f, "    total_cache_count=%u\n", count);
>> +	return count;
>> +#else
>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>  	unsigned lcore_id;
>>  	unsigned count = 0;
>
>I think in this case we could avoid to duplicate the code without
>beeing unclear by using the proper #ifdefs:
>
>#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI)
>	/* common code */
>#ifdef RTE_NEXT_ABI
>	if (mp->cache_size == 0)
>		return count;
>#endif
>	/* common code */
>#else
>...
>#endif

Started looking at this change and the problem is I want to remove the ifdef RTE_MEMPOOL.. As well as the #else/#endif code. If I do as you suggest it does not appear to be clearer when someone goes back to remove the code, they may think the #ifdef RTE_MEMPOOL/#else/#endif are still required.

>
>
>> @@ -755,6 +806,26 @@ mempool_audit_cookies(const struct rte_mempool *mp)
>>  #define mempool_audit_cookies(mp) do {} while(0)
>>  #endif
>>  
>> +#ifdef RTE_NEXT_ABI
>> +/* check cookies before and after objects */
>> +static void
>> +mempool_audit_cache(const struct rte_mempool *mp)
>> +{
>> +	/* check cache size consistency */
>> +	unsigned lcore_id;
>> +
>> +	if (mp->cache_size == 0)
>> +		return;
>> +
>> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>> +		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
>> +			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
>> +				lcore_id);
>> +			rte_panic("MEMPOOL: invalid cache len\n");
>> +		}
>> +	}
>> +}
>> +#else
>
>same here

The same comment here.
>
>> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>> index 6e2390a..fc9b595 100644
>> --- a/lib/librte_mempool/rte_mempool.h
>> +++ b/lib/librte_mempool/rte_mempool.h
>> @@ -95,6 +95,19 @@ struct rte_mempool_debug_stats {
>>  } __rte_cache_aligned;
>>  #endif
>>  
>> +#ifdef RTE_NEXT_ABI
>> +/**
>> + * A structure that stores a per-core object cache.
>> + */
>> +struct rte_mempool_cache {
>> +	unsigned len; /**< Cache len */
>> +	/*
>> +	 * Cache is allocated to this size to allow it to overflow in certain
>> +	 * cases to avoid needless emptying of cache.
>> +	 */
>> +	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
>> +} __rte_cache_aligned;
>> +#else
>
>same here

Same for this one.
>
>
>
>> @@ -755,19 +793,25 @@ static inline void __attribute__((always_inline))
>>  __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>>  		    unsigned n, int is_mp)
>>  {
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>> +#endif /* RTE_NEXT_ABI */
>>  	struct rte_mempool_cache *cache;
>>  	uint32_t index;
>>  	void **cache_objs;
>>  	unsigned lcore_id = rte_lcore_id();
>>  	uint32_t cache_size = mp->cache_size;
>>  	uint32_t flushthresh = mp->cache_flushthresh;
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>> +#endif /* RTE_NEXT_ABI */
>
>this looks strange... I think it does not work properly.
>Why not
>#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI)

Yes I agree the ifndef looks strange, but it should work as we want to remove the #ifdef RTE_MEMPOOL/#endif lines. This was the reason for the comment that it was a ifndef.
>
>>  	/* increment stat now, adding in mempool always success */
>>  	__MEMPOOL_STAT_ADD(mp, put, n);
>>  
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>> +#endif /* RTE_NEXT_ABI */
>>  	/* cache is not enabled or single producer or non-EAL thread */
>>  	if (unlikely(cache_size == 0 || is_mp == 0 ||
>>  		     lcore_id >= RTE_MAX_LCORE))
>> @@ -802,7 +846,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>>  	return;
>>  
>>  ring_enqueue:
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>> +#endif /* RTE_NEXT_ABI */
>>  
>>  	/* push remaining objects in ring */
>>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>> @@ -946,7 +992,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>>  		   unsigned n, int is_mc)
>>  {
>>  	int ret;
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>> +#endif /* RTE_NEXT_ABI */
>>  	struct rte_mempool_cache *cache;
>>  	uint32_t index, len;
>>  	void **cache_objs;
>> @@ -992,7 +1040,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>>  	return 0;
>>  
>>  ring_dequeue:
>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>> +#endif /* RTE_NEXT_ABI */
>>  
>>  	/* get remaining objects from ring */
>>  	if (is_mc)
>
>Same in those cases.

The #ifdef RTE_MEMPOOL/#endif lines need to be removed when deprecated.
>
>
>Regards,
>Olivier
>


Regards,
Keith





^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] mempool: reduce rte_mempool structure size
  2016-02-10 18:35     ` Wiles, Keith
@ 2016-02-10 20:06       ` Olivier MATZ
  0 siblings, 0 replies; 32+ messages in thread
From: Olivier MATZ @ 2016-02-10 20:06 UTC (permalink / raw)
  To: Wiles, Keith, dev

Hi Keith,

On 02/10/2016 07:35 PM, Wiles, Keith wrote:
>>> @@ -672,6 +704,24 @@ rte_mempool_count(const struct rte_mempool *mp)
>>>  static unsigned
>>>  rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
>>>  {
>>> +#ifdef RTE_NEXT_ABI
>>> +	unsigned lcore_id;
>>> +	unsigned count = 0;
>>> +	unsigned cache_count;
>>> +
>>> +	fprintf(f, "  cache infos:\n");
>>> +	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
>>> +	if (mp->cache_size == 0)
>>> +		return count;
>>> +
>>> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>>> +		cache_count = mp->local_cache[lcore_id].len;
>>> +		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
>>> +		count += cache_count;
>>> +	}
>>> +	fprintf(f, "    total_cache_count=%u\n", count);
>>> +	return count;
>>> +#else
>>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>>  	unsigned lcore_id;
>>>  	unsigned count = 0;
>>
>> I think in this case we could avoid to duplicate the code without
>> beeing unclear by using the proper #ifdefs:
>>
>> #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI)
>> 	/* common code */
>> #ifdef RTE_NEXT_ABI
>> 	if (mp->cache_size == 0)
>> 		return count;
>> #endif
>> 	/* common code */
>> #else
>> ...
>> #endif
> 
> Started looking at this change and the problem is I want to remove the ifdef RTE_MEMPOOL.. As well as the #else/#endif code. If I do as you suggest it does not appear to be clearer when someone goes back to remove the code, they may think the #ifdef RTE_MEMPOOL/#else/#endif are still required.

OK, makes sense.

>>> @@ -755,19 +793,25 @@ static inline void __attribute__((always_inline))
>>>  __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>>>  		    unsigned n, int is_mp)
>>>  {
>>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>> +#endif /* RTE_NEXT_ABI */
>>>  	struct rte_mempool_cache *cache;
>>>  	uint32_t index;
>>>  	void **cache_objs;
>>>  	unsigned lcore_id = rte_lcore_id();
>>>  	uint32_t cache_size = mp->cache_size;
>>>  	uint32_t flushthresh = mp->cache_flushthresh;
>>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>>> +#endif /* RTE_NEXT_ABI */
>>
>> this looks strange... I think it does not work properly.
>> Why not
>> #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI)
> 
> Yes I agree the ifndef looks strange, but it should work as we want to remove the #ifdef RTE_MEMPOOL/#endif lines. This was the reason for the comment that it was a ifndef.

It's not only strange, it's also probably not what you want to do:

	#ifndef RTE_NEXT_ABI	/* Note: ifndef */
	#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
	#endif /* RTE_NEXT_ABI */
	...

Here, the #endif corresponds to the second #if, not the first #ifdef.

Regards,
Olivier

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-09 17:30 ` [PATCH v2] mempool: reduce " Keith Wiles
  2016-02-10 16:59   ` Olivier MATZ
@ 2016-02-10 21:18   ` Keith Wiles
  2016-02-12 11:23     ` Panu Matilainen
  2016-02-12 18:36   ` [PATCH v4] " Keith Wiles
  2 siblings, 1 reply; 32+ messages in thread
From: Keith Wiles @ 2016-02-10 21:18 UTC (permalink / raw)
  To: dev

The rte_mempool structure is changed, which will cause an ABI change
for this structure. Providing backward compat is not reasonable
here as this structure is used in multiple defines/inlines.

Allow mempool cache support to be dynamic depending on if the
mempool being created needs cache support. Saves about 1.5M of
memory used by the rte_mempool structure.

Allocating small mempools which do not require cache can consume
larges amounts of memory if you have a number of these mempools.

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
* Patch v3 fix up the ifdefs to correct some problems in removing ifdef
  lines. Added the ABI deprecation notice to the document file.
* Patch v2 to add some comments and setup for RTE_NEXT_ABI changes.

 app/test/test_mempool.c              |  5 +++
 doc/guides/rel_notes/deprecation.rst |  7 +++
 lib/librte_mempool/rte_mempool.c     | 82 +++++++++++++++++++++++++++++++++---
 lib/librte_mempool/rte_mempool.h     | 46 ++++++++++++++++----
 4 files changed, 127 insertions(+), 13 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index f0f823b..f3fba50 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -122,8 +122,13 @@ test_mempool_basic(void)
 		return -1;
 
 	printf("get private data\n");
+#ifdef RTE_NEXT_ABI
+	if (rte_mempool_get_priv(mp) != (char *)mp +
+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
+#else
 	if (rte_mempool_get_priv(mp) !=
 			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
+#endif
 		return -1;
 
 	printf("get physical address of an object\n");
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index e94d4a2..1b9d25e 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -49,3 +49,10 @@ Deprecation Notices
   commands (such as RETA update in testpmd).  This should impact
   CMDLINE_PARSE_RESULT_BUFSIZE, STR_TOKEN_SIZE and RDLINE_BUF_SIZE.
   It should be integrated in release 2.3.
+
+* ABI change is planned for the rte_mempool structure to allow mempool
+  cache support to be dynamic depending on the mempool being created
+  needing cache support. Saves about 1.5M of memory per rte_mempool structure
+  by removing the per lcore cache memory. Change will occur after DPDK 16.04
+  release.
+
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index aff5f6d..5f21eaa 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -452,12 +452,17 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	/* compilation-time checks */
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
 			  RTE_CACHE_LINE_MASK) != 0);
+#ifdef RTE_NEXT_ABI
+	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
+			  RTE_CACHE_LINE_MASK) != 0);
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
 	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
 #endif
+#endif /* RTE_NEXT_ABI */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
 			  RTE_CACHE_LINE_MASK) != 0);
@@ -527,9 +532,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		 */
 		int head = sizeof(struct rte_mempool);
 		int new_size = (private_data_size + head) % page_size;
-		if (new_size) {
+		if (new_size)
 			private_data_size += page_size - new_size;
-		}
 	}
 
 	/* try to allocate tailq entry */
@@ -544,7 +548,12 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	 * store mempool objects. Otherwise reserve a memzone that is large
 	 * enough to hold mempool header and metadata plus mempool objects.
 	 */
+#ifdef RTE_NEXT_ABI
+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
+	mempool_size += private_data_size;
+#else
 	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+#endif /* RTE_NEXT_ABI */
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
 	if (vaddr == NULL)
 		mempool_size += (size_t)objsz.total_size * n;
@@ -598,9 +607,22 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
 	mp->private_data_size = private_data_size;
 
+#ifdef RTE_NEXT_ABI
+	/*
+	 * local_cache pointer is set even if cache_size is zero.
+	 * The local_cache points to just past the elt_pa[] array.
+	 */
+	mp->local_cache = (struct rte_mempool_cache *)
+			((char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
+
+	/* calculate address of the first element for continuous mempool. */
+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
+		private_data_size;
+#else
 	/* calculate address of the first element for continuous mempool. */
 	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
 		private_data_size;
+#endif /* RTE_NEXT_ABI */
 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
 
 	/* populate address translation fields. */
@@ -613,9 +635,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		mp->elt_va_start = (uintptr_t)obj;
 		mp->elt_pa[0] = mp->phys_addr +
 			(mp->elt_va_start - (uintptr_t)mp);
-
-	/* mempool elements in a separate chunk of memory. */
 	} else {
+		/* mempool elements in a separate chunk of memory. */
 		mp->elt_va_start = (uintptr_t)vaddr;
 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
 	}
@@ -645,10 +666,21 @@ unsigned
 rte_mempool_count(const struct rte_mempool *mp)
 {
 	unsigned count;
+#ifdef RTE_NEXT_ABI
+	unsigned lcore_id;
 
 	count = rte_ring_count(mp->ring);
 
+	if (mp->cache_size == 0)
+		return count;
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		count += mp->local_cache[lcore_id].len;
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+
+	count = rte_ring_count(mp->ring);
+
 	{
 		unsigned lcore_id;
 		if (mp->cache_size == 0)
@@ -658,6 +690,7 @@ rte_mempool_count(const struct rte_mempool *mp)
 			count += mp->local_cache[lcore_id].len;
 	}
 #endif
+#endif /* RTE_NEXT_ABI */
 
 	/*
 	 * due to race condition (access to len is not locked), the
@@ -672,6 +705,24 @@ rte_mempool_count(const struct rte_mempool *mp)
 static unsigned
 rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 {
+#ifdef RTE_NEXT_ABI
+	unsigned lcore_id;
+	unsigned count = 0;
+	unsigned cache_count;
+
+	fprintf(f, "  cache infos:\n");
+	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
+	if (mp->cache_size == 0)
+		return count;
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		cache_count = mp->local_cache[lcore_id].len;
+		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
+		count += cache_count;
+	}
+	fprintf(f, "    total_cache_count=%u\n", count);
+	return count;
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	unsigned lcore_id;
 	unsigned count = 0;
@@ -691,6 +742,7 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 	fprintf(f, "  cache disabled\n");
 	return 0;
 #endif
+#endif /* RTE_NEXT_ABI */
 }
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -755,6 +807,26 @@ mempool_audit_cookies(const struct rte_mempool *mp)
 #define mempool_audit_cookies(mp) do {} while(0)
 #endif
 
+#ifdef RTE_NEXT_ABI
+/* check cookies before and after objects */
+static void
+mempool_audit_cache(const struct rte_mempool *mp)
+{
+	/* check cache size consistency */
+	unsigned lcore_id;
+
+	if (mp->cache_size == 0)
+		return;
+
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
+			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
+				lcore_id);
+			rte_panic("MEMPOOL: invalid cache len\n");
+		}
+	}
+}
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /* check cookies before and after objects */
 static void
@@ -773,7 +845,7 @@ mempool_audit_cache(const struct rte_mempool *mp)
 #else
 #define mempool_audit_cache(mp) do {} while(0)
 #endif
-
+#endif /* RTE_NEXT_ABI */
 
 /* check the consistency of mempool (size, cookies, ...) */
 void
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 9745bf0..b12d6a9 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -95,7 +95,7 @@ struct rte_mempool_debug_stats {
 } __rte_cache_aligned;
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI) /* Remove line */
 /**
  * A structure that stores a per-core object cache.
  */
@@ -107,7 +107,7 @@ struct rte_mempool_cache {
 	 */
 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
 } __rte_cache_aligned;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ /* Remove line RTE_NEXT_ABI */
 
 /**
  * A structure that stores the size of mempool elements.
@@ -194,10 +194,14 @@ struct rte_mempool {
 
 	unsigned private_data_size;      /**< Size of private data. */
 
+#ifdef RTE_NEXT_ABI
+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
+#else
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	/** Per-lcore local cache. */
 	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
 #endif
+#endif  /* RTE_NEXT_ABI */
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	/** Per-lcore statistics. */
@@ -246,6 +250,26 @@ struct rte_mempool {
 #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
 #endif
 
+#ifdef RTE_NEXT_ABI
+/**
+ * Size of elt_pa array size based on number of pages. (Internal use)
+ */
+#define __PA_SIZE(mp, pgn) \
+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
+
+/**
+ * Calculate the size of the mempool header.
+ *
+ * @param mp
+ *   Pointer to the memory pool.
+ * @param pgn
+ *   Number of pages used to store mempool objects.
+ */
+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
+#else
 /**
  * Calculate the size of the mempool header.
  *
@@ -257,6 +281,7 @@ struct rte_mempool {
 #define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
 	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
 	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
+#endif /* RTE_NEXT_ABI */
 
 /**
  * Return true if the whole mempool is in contiguous memory.
@@ -755,19 +780,19 @@ static inline void __attribute__((always_inline))
 __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 		    unsigned n, int is_mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI) /* Remove line */
 	struct rte_mempool_cache *cache;
 	uint32_t index;
 	void **cache_objs;
 	unsigned lcore_id = rte_lcore_id();
 	uint32_t cache_size = mp->cache_size;
 	uint32_t flushthresh = mp->cache_flushthresh;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */   /* Remove line RTE_NEXT_ABI */
 
 	/* increment stat now, adding in mempool always success */
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI) /* Remove line */
 	/* cache is not enabled or single producer or non-EAL thread */
 	if (unlikely(cache_size == 0 || is_mp == 0 ||
 		     lcore_id >= RTE_MAX_LCORE))
@@ -802,7 +827,7 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	return;
 
 ring_enqueue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ /* Remove line RTE_NEXT_ABI */
 
 	/* push remaining objects in ring */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -946,7 +971,7 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 		   unsigned n, int is_mc)
 {
 	int ret;
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
+#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI) /* Remove line */
 	struct rte_mempool_cache *cache;
 	uint32_t index, len;
 	void **cache_objs;
@@ -992,7 +1017,7 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	return 0;
 
 ring_dequeue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
+#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ /* Remove line RTE_NEXT_ABI */
 
 	/* get remaining objects from ring */
 	if (is_mc)
@@ -1293,7 +1318,12 @@ void rte_mempool_audit(const struct rte_mempool *mp);
  */
 static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
 {
+#ifdef RTE_NEXT_ABI
+	return (char *)mp +
+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
+#else
 	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+#endif /* RTE_NEXT_ABI */
 }
 
 /**
-- 
2.5.4 (Apple Git-61)

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-10 21:18   ` [PATCH v3] " Keith Wiles
@ 2016-02-12 11:23     ` Panu Matilainen
  2016-02-12 13:57       ` Thomas Monjalon
  0 siblings, 1 reply; 32+ messages in thread
From: Panu Matilainen @ 2016-02-12 11:23 UTC (permalink / raw)
  To: Keith Wiles, dev

On 02/10/2016 11:18 PM, Keith Wiles wrote:
> The rte_mempool structure is changed, which will cause an ABI change
> for this structure. Providing backward compat is not reasonable
> here as this structure is used in multiple defines/inlines.
>
> Allow mempool cache support to be dynamic depending on if the
> mempool being created needs cache support. Saves about 1.5M of
> memory used by the rte_mempool structure.
>
> Allocating small mempools which do not require cache can consume
> larges amounts of memory if you have a number of these mempools.
>
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>
> ---
> * Patch v3 fix up the ifdefs to correct some problems in removing ifdef
>    lines. Added the ABI deprecation notice to the document file.
> * Patch v2 to add some comments and setup for RTE_NEXT_ABI changes.
>
>   app/test/test_mempool.c              |  5 +++
>   doc/guides/rel_notes/deprecation.rst |  7 +++
>   lib/librte_mempool/rte_mempool.c     | 82 +++++++++++++++++++++++++++++++++---
>   lib/librte_mempool/rte_mempool.h     | 46 ++++++++++++++++----
>   4 files changed, 127 insertions(+), 13 deletions(-)
>
> diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
> index f0f823b..f3fba50 100644
> --- a/app/test/test_mempool.c
> +++ b/app/test/test_mempool.c
> @@ -122,8 +122,13 @@ test_mempool_basic(void)
>   		return -1;
>
>   	printf("get private data\n");
> +#ifdef RTE_NEXT_ABI
> +	if (rte_mempool_get_priv(mp) != (char *)mp +
> +			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
> +#else
>   	if (rte_mempool_get_priv(mp) !=
>   			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
> +#endif
>   		return -1;
>
>   	printf("get physical address of an object\n");
> diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
> index e94d4a2..1b9d25e 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -49,3 +49,10 @@ Deprecation Notices
>     commands (such as RETA update in testpmd).  This should impact
>     CMDLINE_PARSE_RESULT_BUFSIZE, STR_TOKEN_SIZE and RDLINE_BUF_SIZE.
>     It should be integrated in release 2.3.
> +
> +* ABI change is planned for the rte_mempool structure to allow mempool
> +  cache support to be dynamic depending on the mempool being created
> +  needing cache support. Saves about 1.5M of memory per rte_mempool structure
> +  by removing the per lcore cache memory. Change will occur after DPDK 16.04
> +  release.
> +
> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
> index aff5f6d..5f21eaa 100644
> --- a/lib/librte_mempool/rte_mempool.c
> +++ b/lib/librte_mempool/rte_mempool.c
> @@ -452,12 +452,17 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
>   	/* compilation-time checks */
>   	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
>   			  RTE_CACHE_LINE_MASK) != 0);
> +#ifdef RTE_NEXT_ABI
> +	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
> +			  RTE_CACHE_LINE_MASK) != 0);
> +#else
>   #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>   	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
>   			  RTE_CACHE_LINE_MASK) != 0);
>   	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
>   			  RTE_CACHE_LINE_MASK) != 0);
>   #endif
> +#endif /* RTE_NEXT_ABI */
>   #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>   	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
>   			  RTE_CACHE_LINE_MASK) != 0);
> @@ -527,9 +532,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
>   		 */
>   		int head = sizeof(struct rte_mempool);
>   		int new_size = (private_data_size + head) % page_size;
> -		if (new_size) {
> +		if (new_size)
>   			private_data_size += page_size - new_size;
> -		}
>   	}
>
>   	/* try to allocate tailq entry */
> @@ -544,7 +548,12 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
>   	 * store mempool objects. Otherwise reserve a memzone that is large
>   	 * enough to hold mempool header and metadata plus mempool objects.
>   	 */
> +#ifdef RTE_NEXT_ABI
> +	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
> +	mempool_size += private_data_size;
> +#else
>   	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
> +#endif /* RTE_NEXT_ABI */
>   	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
>   	if (vaddr == NULL)
>   		mempool_size += (size_t)objsz.total_size * n;
> @@ -598,9 +607,22 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
>   	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
>   	mp->private_data_size = private_data_size;
>
> +#ifdef RTE_NEXT_ABI
> +	/*
> +	 * local_cache pointer is set even if cache_size is zero.
> +	 * The local_cache points to just past the elt_pa[] array.
> +	 */
> +	mp->local_cache = (struct rte_mempool_cache *)
> +			((char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
> +
> +	/* calculate address of the first element for continuous mempool. */
> +	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
> +		private_data_size;
> +#else
>   	/* calculate address of the first element for continuous mempool. */
>   	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
>   		private_data_size;
> +#endif /* RTE_NEXT_ABI */
>   	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
>
>   	/* populate address translation fields. */
> @@ -613,9 +635,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
>   		mp->elt_va_start = (uintptr_t)obj;
>   		mp->elt_pa[0] = mp->phys_addr +
>   			(mp->elt_va_start - (uintptr_t)mp);
> -
> -	/* mempool elements in a separate chunk of memory. */
>   	} else {
> +		/* mempool elements in a separate chunk of memory. */
>   		mp->elt_va_start = (uintptr_t)vaddr;
>   		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
>   	}
> @@ -645,10 +666,21 @@ unsigned
>   rte_mempool_count(const struct rte_mempool *mp)
>   {
>   	unsigned count;
> +#ifdef RTE_NEXT_ABI
> +	unsigned lcore_id;
>
>   	count = rte_ring_count(mp->ring);
>
> +	if (mp->cache_size == 0)
> +		return count;
> +
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
> +		count += mp->local_cache[lcore_id].len;
> +#else
>   #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> +
> +	count = rte_ring_count(mp->ring);
> +
>   	{
>   		unsigned lcore_id;
>   		if (mp->cache_size == 0)
> @@ -658,6 +690,7 @@ rte_mempool_count(const struct rte_mempool *mp)
>   			count += mp->local_cache[lcore_id].len;
>   	}
>   #endif
> +#endif /* RTE_NEXT_ABI */
>
>   	/*
>   	 * due to race condition (access to len is not locked), the
> @@ -672,6 +705,24 @@ rte_mempool_count(const struct rte_mempool *mp)
>   static unsigned
>   rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
>   {
> +#ifdef RTE_NEXT_ABI
> +	unsigned lcore_id;
> +	unsigned count = 0;
> +	unsigned cache_count;
> +
> +	fprintf(f, "  cache infos:\n");
> +	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
> +	if (mp->cache_size == 0)
> +		return count;
> +
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		cache_count = mp->local_cache[lcore_id].len;
> +		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
> +		count += cache_count;
> +	}
> +	fprintf(f, "    total_cache_count=%u\n", count);
> +	return count;
> +#else
>   #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>   	unsigned lcore_id;
>   	unsigned count = 0;
> @@ -691,6 +742,7 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
>   	fprintf(f, "  cache disabled\n");
>   	return 0;
>   #endif
> +#endif /* RTE_NEXT_ABI */
>   }
>
>   #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> @@ -755,6 +807,26 @@ mempool_audit_cookies(const struct rte_mempool *mp)
>   #define mempool_audit_cookies(mp) do {} while(0)
>   #endif
>
> +#ifdef RTE_NEXT_ABI
> +/* check cookies before and after objects */
> +static void
> +mempool_audit_cache(const struct rte_mempool *mp)
> +{
> +	/* check cache size consistency */
> +	unsigned lcore_id;
> +
> +	if (mp->cache_size == 0)
> +		return;
> +
> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> +		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
> +			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
> +				lcore_id);
> +			rte_panic("MEMPOOL: invalid cache len\n");
> +		}
> +	}
> +}
> +#else
>   #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>   /* check cookies before and after objects */
>   static void
> @@ -773,7 +845,7 @@ mempool_audit_cache(const struct rte_mempool *mp)
>   #else
>   #define mempool_audit_cache(mp) do {} while(0)
>   #endif
> -
> +#endif /* RTE_NEXT_ABI */
>
>   /* check the consistency of mempool (size, cookies, ...) */
>   void
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 9745bf0..b12d6a9 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -95,7 +95,7 @@ struct rte_mempool_debug_stats {
>   } __rte_cache_aligned;
>   #endif
>
> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI) /* Remove line */
>   /**
>    * A structure that stores a per-core object cache.
>    */
> @@ -107,7 +107,7 @@ struct rte_mempool_cache {
>   	 */
>   	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
>   } __rte_cache_aligned;
> -#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ /* Remove line RTE_NEXT_ABI */
>
>   /**
>    * A structure that stores the size of mempool elements.
> @@ -194,10 +194,14 @@ struct rte_mempool {
>
>   	unsigned private_data_size;      /**< Size of private data. */
>
> +#ifdef RTE_NEXT_ABI
> +	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
> +#else
>   #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>   	/** Per-lcore local cache. */
>   	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
>   #endif
> +#endif  /* RTE_NEXT_ABI */
>
>   #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>   	/** Per-lcore statistics. */
> @@ -246,6 +250,26 @@ struct rte_mempool {
>   #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
>   #endif
>
> +#ifdef RTE_NEXT_ABI
> +/**
> + * Size of elt_pa array size based on number of pages. (Internal use)
> + */
> +#define __PA_SIZE(mp, pgn) \
> +	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
> +	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
> +
> +/**
> + * Calculate the size of the mempool header.
> + *
> + * @param mp
> + *   Pointer to the memory pool.
> + * @param pgn
> + *   Number of pages used to store mempool objects.
> + */
> +#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
> +	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
> +	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
> +#else
>   /**
>    * Calculate the size of the mempool header.
>    *
> @@ -257,6 +281,7 @@ struct rte_mempool {
>   #define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
>   	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
>   	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
> +#endif /* RTE_NEXT_ABI */
>
>   /**
>    * Return true if the whole mempool is in contiguous memory.
> @@ -755,19 +780,19 @@ static inline void __attribute__((always_inline))
>   __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>   		    unsigned n, int is_mp)
>   {
> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI) /* Remove line */
>   	struct rte_mempool_cache *cache;
>   	uint32_t index;
>   	void **cache_objs;
>   	unsigned lcore_id = rte_lcore_id();
>   	uint32_t cache_size = mp->cache_size;
>   	uint32_t flushthresh = mp->cache_flushthresh;
> -#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */   /* Remove line RTE_NEXT_ABI */
>
>   	/* increment stat now, adding in mempool always success */
>   	__MEMPOOL_STAT_ADD(mp, put, n);
>
> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI) /* Remove line */
>   	/* cache is not enabled or single producer or non-EAL thread */
>   	if (unlikely(cache_size == 0 || is_mp == 0 ||
>   		     lcore_id >= RTE_MAX_LCORE))
> @@ -802,7 +827,7 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>   	return;
>
>   ring_enqueue:
> -#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ /* Remove line RTE_NEXT_ABI */
>
>   	/* push remaining objects in ring */
>   #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> @@ -946,7 +971,7 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>   		   unsigned n, int is_mc)
>   {
>   	int ret;
> -#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI) /* Remove line */
>   	struct rte_mempool_cache *cache;
>   	uint32_t index, len;
>   	void **cache_objs;
> @@ -992,7 +1017,7 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>   	return 0;
>
>   ring_dequeue:
> -#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ /* Remove line RTE_NEXT_ABI */
>
>   	/* get remaining objects from ring */
>   	if (is_mc)
> @@ -1293,7 +1318,12 @@ void rte_mempool_audit(const struct rte_mempool *mp);
>    */
>   static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
>   {
> +#ifdef RTE_NEXT_ABI
> +	return (char *)mp +
> +		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
> +#else
>   	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
> +#endif /* RTE_NEXT_ABI */
>   }
>
>   /**
>

This is not RTE_NEXT_ABI material IMO, the added ifdef clutter is just 
too much.

I'd suggest adding a deprecation notice for the change now and after 
16.04 is released, just resend the patch without messing with RTE_NEXT_ABI.

	- Pnau -

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-12 11:23     ` Panu Matilainen
@ 2016-02-12 13:57       ` Thomas Monjalon
  2016-02-12 14:19         ` Panu Matilainen
  0 siblings, 1 reply; 32+ messages in thread
From: Thomas Monjalon @ 2016-02-12 13:57 UTC (permalink / raw)
  To: Panu Matilainen, Keith Wiles; +Cc: dev

2016-02-12 13:23, Panu Matilainen:
> On 02/10/2016 11:18 PM, Keith Wiles wrote:
> >   static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
> >   {
> > +#ifdef RTE_NEXT_ABI
> > +	return (char *)mp +
> > +		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
> > +#else
> >   	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
> > +#endif /* RTE_NEXT_ABI */
> >   }
> 
> This is not RTE_NEXT_ABI material IMO, the added ifdef clutter is just 
> too much.

The changes are restricted to the mempool files.
I think it is not so much. However I wonder how much the feature is important
to justify the use of NEXT_ABI.

> I'd suggest adding a deprecation notice for the change now and after 
> 16.04 is released, just resend the patch without messing with RTE_NEXT_ABI.

When adding a deprecation notice, it is really better to provide a reference
to the code change.
So if you give up with NEXT_ABI, please add a link to this code change in
the new commit message. Thanks

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-12 13:57       ` Thomas Monjalon
@ 2016-02-12 14:19         ` Panu Matilainen
  2016-02-12 15:07           ` Wiles, Keith
  0 siblings, 1 reply; 32+ messages in thread
From: Panu Matilainen @ 2016-02-12 14:19 UTC (permalink / raw)
  To: Thomas Monjalon, Keith Wiles; +Cc: dev

On 02/12/2016 03:57 PM, Thomas Monjalon wrote:
> 2016-02-12 13:23, Panu Matilainen:
>> On 02/10/2016 11:18 PM, Keith Wiles wrote:
>>>    static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
>>>    {
>>> +#ifdef RTE_NEXT_ABI
>>> +	return (char *)mp +
>>> +		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
>>> +#else
>>>    	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
>>> +#endif /* RTE_NEXT_ABI */
>>>    }
>>
>> This is not RTE_NEXT_ABI material IMO, the added ifdef clutter is just
>> too much.
>
> The changes are restricted to the mempool files.
> I think it is not so much. However I wonder how much the feature is important
> to justify the use of NEXT_ABI.

Well yes, to be precise: for the benefit of this patch, the ifdef 
clutter seems too much.

Its not as if every change is expected to go through a NEXT_ABI phase, 
based on http://dpdk.org/ml/archives/dev/2016-February/032866.html there 
might be some confusion regarding that.

>
>> I'd suggest adding a deprecation notice for the change now and after
>> 16.04 is released, just resend the patch without messing with RTE_NEXT_ABI.
>
> When adding a deprecation notice, it is really better to provide a reference
> to the code change.
> So if you give up with NEXT_ABI, please add a link to this code change in
> the new commit message. Thanks
>

Nod.

	- Panu -

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-12 14:19         ` Panu Matilainen
@ 2016-02-12 15:07           ` Wiles, Keith
  2016-02-12 15:38             ` Thomas Monjalon
  0 siblings, 1 reply; 32+ messages in thread
From: Wiles, Keith @ 2016-02-12 15:07 UTC (permalink / raw)
  To: Panu Matilainen, Thomas Monjalon; +Cc: dev

>On 02/12/2016 03:57 PM, Thomas Monjalon wrote:
>> 2016-02-12 13:23, Panu Matilainen:
>>> On 02/10/2016 11:18 PM, Keith Wiles wrote:
>>>>    static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
>>>>    {
>>>> +#ifdef RTE_NEXT_ABI
>>>> +	return (char *)mp +
>>>> +		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
>>>> +#else
>>>>    	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
>>>> +#endif /* RTE_NEXT_ABI */
>>>>    }
>>>
>>> This is not RTE_NEXT_ABI material IMO, the added ifdef clutter is just
>>> too much.
>>
>> The changes are restricted to the mempool files.
>> I think it is not so much. However I wonder how much the feature is important
>> to justify the use of NEXT_ABI.
>
>Well yes, to be precise: for the benefit of this patch, the ifdef 
>clutter seems too much.
>
>Its not as if every change is expected to go through a NEXT_ABI phase, 
>based on http://dpdk.org/ml/archives/dev/2016-February/032866.html there 
>might be some confusion regarding that.

I think the NEXT_ABI is reasonable in this case as it does change a structure everyone uses and the ifdef clutter is caused by having to remove old ifdefs, which is a good thing for DPDK. The NEXT_ABI ifdefs only exist for one release and then they will disappear, which I think is more then reasonable.
>
>>
>>> I'd suggest adding a deprecation notice for the change now and after
>>> 16.04 is released, just resend the patch without messing with RTE_NEXT_ABI.
>>
>> When adding a deprecation notice, it is really better to provide a reference
>> to the code change.
>> So if you give up with NEXT_ABI, please add a link to this code change in
>> the new commit message. Thanks
>>
>
>Nod.
>
>	- Panu -
>


Regards,
Keith





^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-12 15:07           ` Wiles, Keith
@ 2016-02-12 15:38             ` Thomas Monjalon
  2016-02-12 15:50               ` Olivier MATZ
  2016-02-12 15:54               ` Wiles, Keith
  0 siblings, 2 replies; 32+ messages in thread
From: Thomas Monjalon @ 2016-02-12 15:38 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev

2016-02-12 15:07, Wiles, Keith:
> >On 02/12/2016 03:57 PM, Thomas Monjalon wrote:
> >> 2016-02-12 13:23, Panu Matilainen:
> >>> On 02/10/2016 11:18 PM, Keith Wiles wrote:
> >>>>    static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
> >>>>    {
> >>>> +#ifdef RTE_NEXT_ABI
> >>>> +	return (char *)mp +
> >>>> +		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
> >>>> +#else
> >>>>    	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
> >>>> +#endif /* RTE_NEXT_ABI */
> >>>>    }
> >>>
> >>> This is not RTE_NEXT_ABI material IMO, the added ifdef clutter is just
> >>> too much.
> >>
> >> The changes are restricted to the mempool files.
> >> I think it is not so much. However I wonder how much the feature is important
> >> to justify the use of NEXT_ABI.
> >
> >Well yes, to be precise: for the benefit of this patch, the ifdef 
> >clutter seems too much.
> >
> >Its not as if every change is expected to go through a NEXT_ABI phase, 
> >based on http://dpdk.org/ml/archives/dev/2016-February/032866.html there 
> >might be some confusion regarding that.
> 
> I think the NEXT_ABI is reasonable in this case as it does change a structure everyone uses and the ifdef clutter is caused by having to remove old ifdefs, which is a good thing for DPDK. The NEXT_ABI ifdefs only exist for one release and then they will disappear, which I think is more then reasonable.

OK, I'm going to sum it up with new words and let the conclusion comes
from Keith, Panu and Olivier.

We agreed to allow ABI breaking if a notification was done in the
previous release.
Keith has sent a notification for 16.04 so the "official" ABI will be
changed in 16.07.
It is also encouraged to show how the ABI will be broken when sending
a notification. It allows to give an informed opinion before ack'ing.
The code snippet will also be useful to app developpers when preparing
a future upgrade.
Keith has sent the whole code change.
This code change may be submitted in the current release without waiting
the deprecation time if gated in the NEXT_ABI ifdefs.
It allows to provide the feature to app developpers who don't care about
versioning. But the price is a more complicated code to read and manage.

To make it short, the rules to use NEXT_ABI are not strict and may change.
So now you have to decide if this change can be integrated in 16.04
as NEXT_ABI.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-12 15:38             ` Thomas Monjalon
@ 2016-02-12 15:50               ` Olivier MATZ
  2016-02-12 15:58                 ` Wiles, Keith
  2016-02-15  9:58                 ` Hunt, David
  2016-02-12 15:54               ` Wiles, Keith
  1 sibling, 2 replies; 32+ messages in thread
From: Olivier MATZ @ 2016-02-12 15:50 UTC (permalink / raw)
  To: Thomas Monjalon, Wiles, Keith; +Cc: dev

Hi,

On 02/12/2016 04:38 PM, Thomas Monjalon wrote:
> OK, I'm going to sum it up with new words and let the conclusion comes
> from Keith, Panu and Olivier.
> 
> We agreed to allow ABI breaking if a notification was done in the
> previous release.
> Keith has sent a notification for 16.04 so the "official" ABI will be
> changed in 16.07.
> It is also encouraged to show how the ABI will be broken when sending
> a notification. It allows to give an informed opinion before ack'ing.
> The code snippet will also be useful to app developpers when preparing
> a future upgrade.
> Keith has sent the whole code change.
> This code change may be submitted in the current release without waiting
> the deprecation time if gated in the NEXT_ABI ifdefs.
> It allows to provide the feature to app developpers who don't care about
> versioning. But the price is a more complicated code to read and manage.
> 
> To make it short, the rules to use NEXT_ABI are not strict and may change.
> So now you have to decide if this change can be integrated in 16.04
> as NEXT_ABI.

Thank you Thomas for this summary. Then my vote would be in favor of
only keep the deprecation notice for 16.04 and push the code without
the NEXT_ABI ifdefs for 16.07 because:

- although it's a valuable patch, there is no urgency in having if for
  the next release
- NEXT_ABI does make the code harder to read in this case, and I'm
  thinking about the patchset from David Hunt (external mempool handler)
  that will be in the same situation, and maybe also another patchset
  I'm working on.

Regards,
Olivier

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-12 15:38             ` Thomas Monjalon
  2016-02-12 15:50               ` Olivier MATZ
@ 2016-02-12 15:54               ` Wiles, Keith
  1 sibling, 0 replies; 32+ messages in thread
From: Wiles, Keith @ 2016-02-12 15:54 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: dev

>2016-02-12 15:07, Wiles, Keith:
>> >On 02/12/2016 03:57 PM, Thomas Monjalon wrote:
>> >> 2016-02-12 13:23, Panu Matilainen:
>> >>> On 02/10/2016 11:18 PM, Keith Wiles wrote:
>> >>>>    static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
>> >>>>    {
>> >>>> +#ifdef RTE_NEXT_ABI
>> >>>> +	return (char *)mp +
>> >>>> +		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
>> >>>> +#else
>> >>>>    	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
>> >>>> +#endif /* RTE_NEXT_ABI */
>> >>>>    }
>> >>>
>> >>> This is not RTE_NEXT_ABI material IMO, the added ifdef clutter is just
>> >>> too much.
>> >>
>> >> The changes are restricted to the mempool files.
>> >> I think it is not so much. However I wonder how much the feature is important
>> >> to justify the use of NEXT_ABI.
>> >
>> >Well yes, to be precise: for the benefit of this patch, the ifdef 
>> >clutter seems too much.
>> >
>> >Its not as if every change is expected to go through a NEXT_ABI phase, 
>> >based on http://dpdk.org/ml/archives/dev/2016-February/032866.html there 
>> >might be some confusion regarding that.
>> 
>> I think the NEXT_ABI is reasonable in this case as it does change a structure everyone uses and the ifdef clutter is caused by having to remove old ifdefs, which is a good thing for DPDK. The NEXT_ABI ifdefs only exist for one release and then they will disappear, which I think is more then reasonable.
>
>OK, I'm going to sum it up with new words and let the conclusion comes
>from Keith, Panu and Olivier.
>
>We agreed to allow ABI breaking if a notification was done in the
>previous release.
>Keith has sent a notification for 16.04 so the "official" ABI will be
>changed in 16.07.
>It is also encouraged to show how the ABI will be broken when sending
>a notification. It allows to give an informed opinion before ack'ing.
>The code snippet will also be useful to app developpers when preparing
>a future upgrade.
>Keith has sent the whole code change.
>This code change may be submitted in the current release without waiting
>the deprecation time if gated in the NEXT_ABI ifdefs.
>It allows to provide the feature to app developpers who don't care about
>versioning. But the price is a more complicated code to read and manage.
>
>To make it short, the rules to use NEXT_ABI are not strict and may change.
>So now you have to decide if this change can be integrated in 16.04
>as NEXT_ABI.

I would personally go ahead with the NEXT_ABI in 16.04 as it seems more reasonable for developers. I do not know if we made this patch in 16.04 without NEXT_ABI what would break in a developers project, which to me means we need to error on the side of caution by using NEXT_ABI.

I am willing to submit a v4 patch without the NEXT_API ifdefs, but that is something everyone needs to agree on.
>
>


Regards,
Keith





^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-12 15:50               ` Olivier MATZ
@ 2016-02-12 15:58                 ` Wiles, Keith
  2016-02-15  9:58                 ` Hunt, David
  1 sibling, 0 replies; 32+ messages in thread
From: Wiles, Keith @ 2016-02-12 15:58 UTC (permalink / raw)
  To: Olivier MATZ, Thomas Monjalon; +Cc: dev

>Hi,
>
>On 02/12/2016 04:38 PM, Thomas Monjalon wrote:
>> OK, I'm going to sum it up with new words and let the conclusion comes
>> from Keith, Panu and Olivier.
>> 
>> We agreed to allow ABI breaking if a notification was done in the
>> previous release.
>> Keith has sent a notification for 16.04 so the "official" ABI will be
>> changed in 16.07.
>> It is also encouraged to show how the ABI will be broken when sending
>> a notification. It allows to give an informed opinion before ack'ing.
>> The code snippet will also be useful to app developpers when preparing
>> a future upgrade.
>> Keith has sent the whole code change.
>> This code change may be submitted in the current release without waiting
>> the deprecation time if gated in the NEXT_ABI ifdefs.
>> It allows to provide the feature to app developpers who don't care about
>> versioning. But the price is a more complicated code to read and manage.
>> 
>> To make it short, the rules to use NEXT_ABI are not strict and may change.
>> So now you have to decide if this change can be integrated in 16.04
>> as NEXT_ABI.
>
>Thank you Thomas for this summary. Then my vote would be in favor of
>only keep the deprecation notice for 16.04 and push the code without
>the NEXT_ABI ifdefs for 16.07 because:
>
>- although it's a valuable patch, there is no urgency in having if for
>  the next release
>- NEXT_ABI does make the code harder to read in this case, and I'm
>  thinking about the patchset from David Hunt (external mempool handler)
>  that will be in the same situation, and maybe also another patchset
>  I'm working on.

As I stated in my previous email, I can submit v4 patch. Do you need two patches one for the notice in 16.04 and then one for 16.07?
>
>Regards,
>Olivier
>


Regards,
Keith





^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v4] mempool: reduce rte_mempool structure size
  2016-02-09 17:30 ` [PATCH v2] mempool: reduce " Keith Wiles
  2016-02-10 16:59   ` Olivier MATZ
  2016-02-10 21:18   ` [PATCH v3] " Keith Wiles
@ 2016-02-12 18:36   ` Keith Wiles
  2016-02-15  9:20     ` Olivier MATZ
  2016-04-14  9:42     ` [PATCH v5] " Olivier Matz
  2 siblings, 2 replies; 32+ messages in thread
From: Keith Wiles @ 2016-02-12 18:36 UTC (permalink / raw)
  To: dev

The rte_mempool structure is changed, which will cause an ABI change
for this structure. Providing backward compat is not reasonable
here as this structure is used in multiple defines/inlines.

Allow mempool cache support to be dynamic depending on if the
mempool being created needs cache support. Saves about 1.5M of
memory used by the rte_mempool structure.

Allocating small mempools which do not require cache can consume
larges amounts of memory if you have a number of these mempools.

Change to be effective in release 16.07.

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
* Patch v4 remove RTE_NEXT_ABI ifdefs for 16.07 integration, plus split
  out the deprecation notice into another patch email for 16.04 release.
* Patch v3 fix up the ifdefs to correct some problems in removing ifdef
  lines. Added the ABI deprecation notice to the document file.
* Patch v2 to add some comments and setup for RTE_NEXT_ABI changes.

 app/test/test_mempool.c          |  4 +--
 lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------
 lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
 3 files changed, 40 insertions(+), 48 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index f0f823b..10e1fa4 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -122,8 +122,8 @@ test_mempool_basic(void)
 		return -1;
 
 	printf("get private data\n");
-	if (rte_mempool_get_priv(mp) !=
-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
+	if (rte_mempool_get_priv(mp) != (char *)mp +
+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
 		return -1;
 
 	printf("get physical address of an object\n");
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index aff5f6d..6f067f3 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	/* compilation-time checks */
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
 			  RTE_CACHE_LINE_MASK) != 0);
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
-			  RTE_CACHE_LINE_MASK) != 0);
-#endif
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
 			  RTE_CACHE_LINE_MASK) != 0);
@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		 */
 		int head = sizeof(struct rte_mempool);
 		int new_size = (private_data_size + head) % page_size;
-		if (new_size) {
+		if (new_size)
 			private_data_size += page_size - new_size;
-		}
 	}
 
 	/* try to allocate tailq entry */
@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	 * store mempool objects. Otherwise reserve a memzone that is large
 	 * enough to hold mempool header and metadata plus mempool objects.
 	 */
-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
+	mempool_size += private_data_size;
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
 	if (vaddr == NULL)
 		mempool_size += (size_t)objsz.total_size * n;
@@ -598,8 +594,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
 	mp->private_data_size = private_data_size;
 
+	/*
+	 * local_cache pointer is set even if cache_size is zero.
+	 * The local_cache points to just past the elt_pa[] array.
+	 */
+	mp->local_cache = (struct rte_mempool_cache *)
+			((char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
+
 	/* calculate address of the first element for continuous mempool. */
-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
 		private_data_size;
 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
 
@@ -613,9 +616,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		mp->elt_va_start = (uintptr_t)obj;
 		mp->elt_pa[0] = mp->phys_addr +
 			(mp->elt_va_start - (uintptr_t)mp);
-
-	/* mempool elements in a separate chunk of memory. */
 	} else {
+		/* mempool elements in a separate chunk of memory. */
 		mp->elt_va_start = (uintptr_t)vaddr;
 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
 	}
@@ -645,19 +647,15 @@ unsigned
 rte_mempool_count(const struct rte_mempool *mp)
 {
 	unsigned count;
+	unsigned lcore_id;
 
 	count = rte_ring_count(mp->ring);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	{
-		unsigned lcore_id;
-		if (mp->cache_size == 0)
-			return count;
+	if (mp->cache_size == 0)
+		return count;
 
-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
-			count += mp->local_cache[lcore_id].len;
-	}
-#endif
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		count += mp->local_cache[lcore_id].len;
 
 	/*
 	 * due to race condition (access to len is not locked), the
@@ -672,13 +670,16 @@ rte_mempool_count(const struct rte_mempool *mp)
 static unsigned
 rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	unsigned lcore_id;
 	unsigned count = 0;
 	unsigned cache_count;
 
 	fprintf(f, "  cache infos:\n");
 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
+
+	if (mp->cache_size == 0)
+		return count;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		cache_count = mp->local_cache[lcore_id].len;
 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
@@ -686,11 +687,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 	}
 	fprintf(f, "    total_cache_count=%u\n", count);
 	return count;
-#else
-	RTE_SET_USED(mp);
-	fprintf(f, "  cache disabled\n");
-	return 0;
-#endif
 }
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -755,13 +751,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)
 #define mempool_audit_cookies(mp) do {} while(0)
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /* check cookies before and after objects */
 static void
 mempool_audit_cache(const struct rte_mempool *mp)
 {
 	/* check cache size consistency */
 	unsigned lcore_id;
+
+	if (mp->cache_size == 0)
+		return;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
@@ -770,10 +769,6 @@ mempool_audit_cache(const struct rte_mempool *mp)
 		}
 	}
 }
-#else
-#define mempool_audit_cache(mp) do {} while(0)
-#endif
-
 
 /* check the consistency of mempool (size, cookies, ...) */
 void
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 9745bf0..8595e77 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {
 } __rte_cache_aligned;
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /**
  * A structure that stores a per-core object cache.
  */
@@ -107,7 +106,6 @@ struct rte_mempool_cache {
 	 */
 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
 } __rte_cache_aligned;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 /**
  * A structure that stores the size of mempool elements.
@@ -194,10 +192,7 @@ struct rte_mempool {
 
 	unsigned private_data_size;      /**< Size of private data. */
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	/** Per-lcore local cache. */
-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
-#endif
+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	/** Per-lcore statistics. */
@@ -247,6 +242,13 @@ struct rte_mempool {
 #endif
 
 /**
+ * Size of elt_pa array size based on number of pages. (Internal use)
+ */
+#define __PA_SIZE(mp, pgn) \
+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
+
+/**
  * Calculate the size of the mempool header.
  *
  * @param mp
@@ -254,9 +256,9 @@ struct rte_mempool {
  * @param pgn
  *   Number of pages used to store mempool objects.
  */
-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
 
 /**
  * Return true if the whole mempool is in contiguous memory.
@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))
 __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 		    unsigned n, int is_mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index;
 	void **cache_objs;
 	unsigned lcore_id = rte_lcore_id();
 	uint32_t cache_size = mp->cache_size;
 	uint32_t flushthresh = mp->cache_flushthresh;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* increment stat now, adding in mempool always success */
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	/* cache is not enabled or single producer or non-EAL thread */
 	if (unlikely(cache_size == 0 || is_mp == 0 ||
 		     lcore_id >= RTE_MAX_LCORE))
@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	return;
 
 ring_enqueue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* push remaining objects in ring */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 		   unsigned n, int is_mc)
 {
 	int ret;
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index, len;
 	void **cache_objs;
@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	return 0;
 
 ring_dequeue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* get remaining objects from ring */
 	if (is_mc)
@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
  */
 static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
 {
-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+	return (char *)mp +
+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
 }
 
 /**
-- 
2.5.4 (Apple Git-61)

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH v4] mempool: reduce rte_mempool structure size
  2016-02-12 18:36   ` [PATCH v4] " Keith Wiles
@ 2016-02-15  9:20     ` Olivier MATZ
  2016-04-14  9:42     ` [PATCH v5] " Olivier Matz
  1 sibling, 0 replies; 32+ messages in thread
From: Olivier MATZ @ 2016-02-15  9:20 UTC (permalink / raw)
  To: Keith Wiles, dev



On 02/12/2016 07:36 PM, Keith Wiles wrote:
> The rte_mempool structure is changed, which will cause an ABI change
> for this structure. Providing backward compat is not reasonable
> here as this structure is used in multiple defines/inlines.
> 
> Allow mempool cache support to be dynamic depending on if the
> mempool being created needs cache support. Saves about 1.5M of
> memory used by the rte_mempool structure.
> 
> Allocating small mempools which do not require cache can consume
> larges amounts of memory if you have a number of these mempools.
> 
> Change to be effective in release 16.07.
> 
> Signed-off-by: Keith Wiles <keith.wiles@intel.com>

Acked-by: Olivier Matz <olivier.matz@6wind.com>
(for 16.07)

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-12 15:50               ` Olivier MATZ
  2016-02-12 15:58                 ` Wiles, Keith
@ 2016-02-15  9:58                 ` Hunt, David
  2016-02-15 10:15                   ` Olivier MATZ
  1 sibling, 1 reply; 32+ messages in thread
From: Hunt, David @ 2016-02-15  9:58 UTC (permalink / raw)
  To: Olivier MATZ, Thomas Monjalon, Wiles, Keith; +Cc: dev

On 12/02/2016 15:50, Olivier MATZ wrote:
> - NEXT_ABI does make the code harder to read in this case, and I'm
>    thinking about the patchset from David Hunt (external mempool handler)
>    that will be in the same situation, and maybe also another patchset
>    I'm working on.

Olivier,
     I'm working on that at the moment with the external mempool handler 
code. However, it crossed my mind that we have a choice to use symbol 
versioning OR use NEXT_ABI. Would one method be preferred over the other?
Regards,
David.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-15  9:58                 ` Hunt, David
@ 2016-02-15 10:15                   ` Olivier MATZ
  2016-02-15 10:21                     ` Hunt, David
  0 siblings, 1 reply; 32+ messages in thread
From: Olivier MATZ @ 2016-02-15 10:15 UTC (permalink / raw)
  To: Hunt, David, Thomas Monjalon, Wiles, Keith; +Cc: dev

Hi David,

On 02/15/2016 10:58 AM, Hunt, David wrote:
> On 12/02/2016 15:50, Olivier MATZ wrote:
>> - NEXT_ABI does make the code harder to read in this case, and I'm
>>    thinking about the patchset from David Hunt (external mempool handler)
>>    that will be in the same situation, and maybe also another patchset
>>    I'm working on.
> 
> Olivier,
>     I'm working on that at the moment with the external mempool handler
> code. However, it crossed my mind that we have a choice to use symbol
> versioning OR use NEXT_ABI. Would one method be preferred over the other?

I think symbol versioning should always be preferred when possible.

In your case, as far as I remember, your are updating the rte_mempool
structure, which is accessed by static inline functions. I don't think
it is easily manageable with symbol versioning. Moreover, the ABI will
already be broken by Keith's patch, so I think it's less problematic
to have other patches breaking the ABI at the same time.

Regards,
Olivier

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-15 10:15                   ` Olivier MATZ
@ 2016-02-15 10:21                     ` Hunt, David
  2016-02-15 12:31                       ` Olivier MATZ
  0 siblings, 1 reply; 32+ messages in thread
From: Hunt, David @ 2016-02-15 10:21 UTC (permalink / raw)
  To: Olivier MATZ, Thomas Monjalon, Wiles, Keith; +Cc: dev

On 15/02/2016 10:15, Olivier MATZ wrote:
> Hi David,
>
> On 02/15/2016 10:58 AM, Hunt, David wrote:
>> On 12/02/2016 15:50, Olivier MATZ wrote:
>>> - NEXT_ABI does make the code harder to read in this case, and I'm
>>>     thinking about the patchset from David Hunt (external mempool handler)
>>>     that will be in the same situation, and maybe also another patchset
>>>     I'm working on.
>>
>> Olivier,
>>      I'm working on that at the moment with the external mempool handler
>> code. However, it crossed my mind that we have a choice to use symbol
>> versioning OR use NEXT_ABI. Would one method be preferred over the other?
>
> I think symbol versioning should always be preferred when possible.
>
> In your case, as far as I remember, your are updating the rte_mempool
> structure, which is accessed by static inline functions. I don't think
> it is easily manageable with symbol versioning. Moreover, the ABI will
> already be broken by Keith's patch, so I think it's less problematic
> to have other patches breaking the ABI at the same time.

OK, Thanks for that. I'll use NEXT_ABI in this case so. :)

Regards,
David.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v3] mempool: reduce rte_mempool structure size
  2016-02-15 10:21                     ` Hunt, David
@ 2016-02-15 12:31                       ` Olivier MATZ
  0 siblings, 0 replies; 32+ messages in thread
From: Olivier MATZ @ 2016-02-15 12:31 UTC (permalink / raw)
  To: Hunt, David, Thomas Monjalon, Wiles, Keith; +Cc: dev

Hi David,

On 02/15/2016 11:21 AM, Hunt, David wrote:
> On 15/02/2016 10:15, Olivier MATZ wrote:
>> On 02/15/2016 10:58 AM, Hunt, David wrote:
>>>      I'm working on that at the moment with the external mempool handler
>>> code. However, it crossed my mind that we have a choice to use symbol
>>> versioning OR use NEXT_ABI. Would one method be preferred over the
>>> other?
>>
>> I think symbol versioning should always be preferred when possible.
>>
>> In your case, as far as I remember, your are updating the rte_mempool
>> structure, which is accessed by static inline functions. I don't think
>> it is easily manageable with symbol versioning. Moreover, the ABI will
>> already be broken by Keith's patch, so I think it's less problematic
>> to have other patches breaking the ABI at the same time.
> 
> OK, Thanks for that. I'll use NEXT_ABI in this case so. :)

Just to let you know in case you missed it: Keith's patch (v3 [1] and
v4 [2]) finally does not have the NEXT_ABI ifdefs, because it was
too heavy.

So for your patches it will also depend on the complexity of the
changes. You can have a try with NEXT_ABI and see if the code is
still maintainable or not. If not, the process is to push a deprecation
notice for 16.04 and the code for 16.07.

Regards,
Olivier

[1] v3: http://dpdk.org/ml/archives/dev/2016-February/033004.html
[2] v4: http://dpdk.org/ml/archives/dev/2016-February/033102.html

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH v5] mempool: reduce rte_mempool structure size
  2016-02-12 18:36   ` [PATCH v4] " Keith Wiles
  2016-02-15  9:20     ` Olivier MATZ
@ 2016-04-14  9:42     ` Olivier Matz
  2016-04-14 13:28       ` Wiles, Keith
                         ` (2 more replies)
  1 sibling, 3 replies; 32+ messages in thread
From: Olivier Matz @ 2016-04-14  9:42 UTC (permalink / raw)
  To: dev, keith.wiles; +Cc: thomas.monjalon, pmatilai

From: Keith Wiles <keith.wiles@intel.com>

The rte_mempool structure is changed, which will cause an ABI change
for this structure. Providing backward compat is not reasonable
here as this structure is used in multiple defines/inlines.

Allow mempool cache support to be dynamic depending on if the
mempool being created needs cache support. Saves about 1.5M of
memory used by the rte_mempool structure.

Allocating small mempools which do not require cache can consume
larges amounts of memory if you have a number of these mempools.

Change to be effective in release 16.07.

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
Acked-by: Olivier Matz <olivier.matz@6wind.com>
---

Changes in v5:

- use RTE_PTR_ADD() instead of cast to (char *) to fix compilation on tilera.
  Error log was:

  rte_mempool.c: In function ‘rte_mempool_xmem_create’:
  rte_mempool.c:595: error: cast increases required alignment of target type


 app/test/test_mempool.c          |  4 +--
 lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------
 lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
 3 files changed, 40 insertions(+), 48 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index f0f823b..10e1fa4 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -122,8 +122,8 @@ test_mempool_basic(void)
 		return -1;
 
 	printf("get private data\n");
-	if (rte_mempool_get_priv(mp) !=
-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
+	if (rte_mempool_get_priv(mp) != (char *)mp +
+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
 		return -1;
 
 	printf("get physical address of an object\n");
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index f8781e1..7a0e07e 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	/* compilation-time checks */
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
 			  RTE_CACHE_LINE_MASK) != 0);
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
-			  RTE_CACHE_LINE_MASK) != 0);
-#endif
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
 			  RTE_CACHE_LINE_MASK) != 0);
@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		 */
 		int head = sizeof(struct rte_mempool);
 		int new_size = (private_data_size + head) % page_size;
-		if (new_size) {
+		if (new_size)
 			private_data_size += page_size - new_size;
-		}
 	}
 
 	/* try to allocate tailq entry */
@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	 * store mempool objects. Otherwise reserve a memzone that is large
 	 * enough to hold mempool header and metadata plus mempool objects.
 	 */
-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
+	mempool_size += private_data_size;
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
 	if (vaddr == NULL)
 		mempool_size += (size_t)objsz.total_size * n;
@@ -591,8 +587,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
 	mp->private_data_size = private_data_size;
 
+	/*
+	 * local_cache pointer is set even if cache_size is zero.
+	 * The local_cache points to just past the elt_pa[] array.
+	 */
+	mp->local_cache = (struct rte_mempool_cache *)
+		RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
+
 	/* calculate address of the first element for continuous mempool. */
-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
 		private_data_size;
 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
 
@@ -606,9 +609,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		mp->elt_va_start = (uintptr_t)obj;
 		mp->elt_pa[0] = mp->phys_addr +
 			(mp->elt_va_start - (uintptr_t)mp);
-
-	/* mempool elements in a separate chunk of memory. */
 	} else {
+		/* mempool elements in a separate chunk of memory. */
 		mp->elt_va_start = (uintptr_t)vaddr;
 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
 	}
@@ -643,19 +645,15 @@ unsigned
 rte_mempool_count(const struct rte_mempool *mp)
 {
 	unsigned count;
+	unsigned lcore_id;
 
 	count = rte_ring_count(mp->ring);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	{
-		unsigned lcore_id;
-		if (mp->cache_size == 0)
-			return count;
+	if (mp->cache_size == 0)
+		return count;
 
-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
-			count += mp->local_cache[lcore_id].len;
-	}
-#endif
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		count += mp->local_cache[lcore_id].len;
 
 	/*
 	 * due to race condition (access to len is not locked), the
@@ -670,13 +668,16 @@ rte_mempool_count(const struct rte_mempool *mp)
 static unsigned
 rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	unsigned lcore_id;
 	unsigned count = 0;
 	unsigned cache_count;
 
 	fprintf(f, "  cache infos:\n");
 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
+
+	if (mp->cache_size == 0)
+		return count;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		cache_count = mp->local_cache[lcore_id].len;
 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
@@ -684,11 +685,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 	}
 	fprintf(f, "    total_cache_count=%u\n", count);
 	return count;
-#else
-	RTE_SET_USED(mp);
-	fprintf(f, "  cache disabled\n");
-	return 0;
-#endif
 }
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -753,13 +749,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)
 #define mempool_audit_cookies(mp) do {} while(0)
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /* check cookies before and after objects */
 static void
 mempool_audit_cache(const struct rte_mempool *mp)
 {
 	/* check cache size consistency */
 	unsigned lcore_id;
+
+	if (mp->cache_size == 0)
+		return;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
@@ -768,10 +767,6 @@ mempool_audit_cache(const struct rte_mempool *mp)
 		}
 	}
 }
-#else
-#define mempool_audit_cache(mp) do {} while(0)
-#endif
-
 
 /* check the consistency of mempool (size, cookies, ...) */
 void
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 9745bf0..8595e77 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {
 } __rte_cache_aligned;
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /**
  * A structure that stores a per-core object cache.
  */
@@ -107,7 +106,6 @@ struct rte_mempool_cache {
 	 */
 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
 } __rte_cache_aligned;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 /**
  * A structure that stores the size of mempool elements.
@@ -194,10 +192,7 @@ struct rte_mempool {
 
 	unsigned private_data_size;      /**< Size of private data. */
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	/** Per-lcore local cache. */
-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
-#endif
+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	/** Per-lcore statistics. */
@@ -247,6 +242,13 @@ struct rte_mempool {
 #endif
 
 /**
+ * Size of elt_pa array size based on number of pages. (Internal use)
+ */
+#define __PA_SIZE(mp, pgn) \
+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
+
+/**
  * Calculate the size of the mempool header.
  *
  * @param mp
@@ -254,9 +256,9 @@ struct rte_mempool {
  * @param pgn
  *   Number of pages used to store mempool objects.
  */
-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
 
 /**
  * Return true if the whole mempool is in contiguous memory.
@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))
 __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 		    unsigned n, int is_mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index;
 	void **cache_objs;
 	unsigned lcore_id = rte_lcore_id();
 	uint32_t cache_size = mp->cache_size;
 	uint32_t flushthresh = mp->cache_flushthresh;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* increment stat now, adding in mempool always success */
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	/* cache is not enabled or single producer or non-EAL thread */
 	if (unlikely(cache_size == 0 || is_mp == 0 ||
 		     lcore_id >= RTE_MAX_LCORE))
@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	return;
 
 ring_enqueue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* push remaining objects in ring */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 		   unsigned n, int is_mc)
 {
 	int ret;
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index, len;
 	void **cache_objs;
@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	return 0;
 
 ring_dequeue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* get remaining objects from ring */
 	if (is_mc)
@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
  */
 static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
 {
-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+	return (char *)mp +
+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
 }
 
 /**
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH v5] mempool: reduce rte_mempool structure size
  2016-04-14  9:42     ` [PATCH v5] " Olivier Matz
@ 2016-04-14 13:28       ` Wiles, Keith
  2016-04-14 13:43         ` Olivier MATZ
  2016-04-14 13:53       ` Wiles, Keith
  2016-05-17  5:31       ` Thomas Monjalon
  2 siblings, 1 reply; 32+ messages in thread
From: Wiles, Keith @ 2016-04-14 13:28 UTC (permalink / raw)
  To: Olivier Matz, dev; +Cc: thomas.monjalon, pmatilai

>From: Keith Wiles <keith.wiles@intel.com>
>
>The rte_mempool structure is changed, which will cause an ABI change
>for this structure. Providing backward compat is not reasonable
>here as this structure is used in multiple defines/inlines.
>
>Allow mempool cache support to be dynamic depending on if the
>mempool being created needs cache support. Saves about 1.5M of
>memory used by the rte_mempool structure.
>
>Allocating small mempools which do not require cache can consume
>larges amounts of memory if you have a number of these mempools.
>
>Change to be effective in release 16.07.
>
>Signed-off-by: Keith Wiles <keith.wiles@intel.com>
>Acked-by: Olivier Matz <olivier.matz@6wind.com>
>---
>
>Changes in v5:
>
>- use RTE_PTR_ADD() instead of cast to (char *) to fix compilation on tilera.
>  Error log was:
>
>  rte_mempool.c: In function ‘rte_mempool_xmem_create’:
>  rte_mempool.c:595: error: cast increases required alignment of target type
>
>
> app/test/test_mempool.c          |  4 +--
> lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------
> lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
> 3 files changed, 40 insertions(+), 48 deletions(-)
>
>diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
>index f0f823b..10e1fa4 100644
>--- a/app/test/test_mempool.c
>+++ b/app/test/test_mempool.c
>@@ -122,8 +122,8 @@ test_mempool_basic(void)
> 		return -1;
> 
> 	printf("get private data\n");
>-	if (rte_mempool_get_priv(mp) !=
>-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
>+	if (rte_mempool_get_priv(mp) != (char *)mp +
>+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))

Should we not add the RTE_PTR_ADD() here as well?

> 		return -1;
> 
> 	printf("get physical address of an object\n");
>diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
>index f8781e1..7a0e07e 100644
>--- a/lib/librte_mempool/rte_mempool.c
>+++ b/lib/librte_mempool/rte_mempool.c
>@@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 	/* compilation-time checks */
> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
> 			  RTE_CACHE_LINE_MASK) != 0);
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
> 			  RTE_CACHE_LINE_MASK) != 0);
>-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
>-			  RTE_CACHE_LINE_MASK) != 0);
>-#endif
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
> 			  RTE_CACHE_LINE_MASK) != 0);
>@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 		 */
> 		int head = sizeof(struct rte_mempool);
> 		int new_size = (private_data_size + head) % page_size;
>-		if (new_size) {
>+		if (new_size)
> 			private_data_size += page_size - new_size;
>-		}
> 	}
> 
> 	/* try to allocate tailq entry */
>@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 	 * store mempool objects. Otherwise reserve a memzone that is large
> 	 * enough to hold mempool header and metadata plus mempool objects.
> 	 */
>-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
>+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
>+	mempool_size += private_data_size;
> 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
> 	if (vaddr == NULL)
> 		mempool_size += (size_t)objsz.total_size * n;
>@@ -591,8 +587,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
> 	mp->private_data_size = private_data_size;
> 
>+	/*
>+	 * local_cache pointer is set even if cache_size is zero.
>+	 * The local_cache points to just past the elt_pa[] array.
>+	 */
>+	mp->local_cache = (struct rte_mempool_cache *)
>+		RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
>+
> 	/* calculate address of the first element for continuous mempool. */
>-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
>+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
> 		private_data_size;
> 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
> 
>@@ -606,9 +609,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 		mp->elt_va_start = (uintptr_t)obj;
> 		mp->elt_pa[0] = mp->phys_addr +
> 			(mp->elt_va_start - (uintptr_t)mp);
>-
>-	/* mempool elements in a separate chunk of memory. */
> 	} else {
>+		/* mempool elements in a separate chunk of memory. */
> 		mp->elt_va_start = (uintptr_t)vaddr;
> 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
> 	}
>@@ -643,19 +645,15 @@ unsigned
> rte_mempool_count(const struct rte_mempool *mp)
> {
> 	unsigned count;
>+	unsigned lcore_id;
> 
> 	count = rte_ring_count(mp->ring);
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>-	{
>-		unsigned lcore_id;
>-		if (mp->cache_size == 0)
>-			return count;
>+	if (mp->cache_size == 0)
>+		return count;
> 
>-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>-			count += mp->local_cache[lcore_id].len;
>-	}
>-#endif
>+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>+		count += mp->local_cache[lcore_id].len;
> 
> 	/*
> 	 * due to race condition (access to len is not locked), the
>@@ -670,13 +668,16 @@ rte_mempool_count(const struct rte_mempool *mp)
> static unsigned
> rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
> {
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	unsigned lcore_id;
> 	unsigned count = 0;
> 	unsigned cache_count;
> 
> 	fprintf(f, "  cache infos:\n");
> 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
>+
>+	if (mp->cache_size == 0)
>+		return count;
>+
> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> 		cache_count = mp->local_cache[lcore_id].len;
> 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
>@@ -684,11 +685,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
> 	}
> 	fprintf(f, "    total_cache_count=%u\n", count);
> 	return count;
>-#else
>-	RTE_SET_USED(mp);
>-	fprintf(f, "  cache disabled\n");
>-	return 0;
>-#endif
> }
> 
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>@@ -753,13 +749,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)
> #define mempool_audit_cookies(mp) do {} while(0)
> #endif
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /* check cookies before and after objects */
> static void
> mempool_audit_cache(const struct rte_mempool *mp)
> {
> 	/* check cache size consistency */
> 	unsigned lcore_id;
>+
>+	if (mp->cache_size == 0)
>+		return;
>+
> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
> 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
>@@ -768,10 +767,6 @@ mempool_audit_cache(const struct rte_mempool *mp)
> 		}
> 	}
> }
>-#else
>-#define mempool_audit_cache(mp) do {} while(0)
>-#endif
>-
> 
> /* check the consistency of mempool (size, cookies, ...) */
> void
>diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>index 9745bf0..8595e77 100644
>--- a/lib/librte_mempool/rte_mempool.h
>+++ b/lib/librte_mempool/rte_mempool.h
>@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {
> } __rte_cache_aligned;
> #endif
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /**
>  * A structure that stores a per-core object cache.
>  */
>@@ -107,7 +106,6 @@ struct rte_mempool_cache {
> 	 */
> 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
> } __rte_cache_aligned;
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> /**
>  * A structure that stores the size of mempool elements.
>@@ -194,10 +192,7 @@ struct rte_mempool {
> 
> 	unsigned private_data_size;      /**< Size of private data. */
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>-	/** Per-lcore local cache. */
>-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
>-#endif
>+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
> 
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> 	/** Per-lcore statistics. */
>@@ -247,6 +242,13 @@ struct rte_mempool {
> #endif
> 
> /**
>+ * Size of elt_pa array size based on number of pages. (Internal use)
>+ */
>+#define __PA_SIZE(mp, pgn) \
>+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
>+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
>+
>+/**
>  * Calculate the size of the mempool header.
>  *
>  * @param mp
>@@ -254,9 +256,9 @@ struct rte_mempool {
>  * @param pgn
>  *   Number of pages used to store mempool objects.
>  */
>-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
>-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
>-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
>+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
>+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
>+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
> 
> /**
>  * Return true if the whole mempool is in contiguous memory.
>@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))
> __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> 		    unsigned n, int is_mp)
> {
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	struct rte_mempool_cache *cache;
> 	uint32_t index;
> 	void **cache_objs;
> 	unsigned lcore_id = rte_lcore_id();
> 	uint32_t cache_size = mp->cache_size;
> 	uint32_t flushthresh = mp->cache_flushthresh;
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> 	/* increment stat now, adding in mempool always success */
> 	__MEMPOOL_STAT_ADD(mp, put, n);
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	/* cache is not enabled or single producer or non-EAL thread */
> 	if (unlikely(cache_size == 0 || is_mp == 0 ||
> 		     lcore_id >= RTE_MAX_LCORE))
>@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> 	return;
> 
> ring_enqueue:
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> 	/* push remaining objects in ring */
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> 		   unsigned n, int is_mc)
> {
> 	int ret;
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	struct rte_mempool_cache *cache;
> 	uint32_t index, len;
> 	void **cache_objs;
>@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> 	return 0;
> 
> ring_dequeue:
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> 	/* get remaining objects from ring */
> 	if (is_mc)
>@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
>  */
> static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
> {
>-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
>+	return (char *)mp +
>+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);

And here?

> }
> 
> /**
>-- 
>2.1.4
>
>


Regards,
Keith





^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v5] mempool: reduce rte_mempool structure size
  2016-04-14 13:28       ` Wiles, Keith
@ 2016-04-14 13:43         ` Olivier MATZ
  0 siblings, 0 replies; 32+ messages in thread
From: Olivier MATZ @ 2016-04-14 13:43 UTC (permalink / raw)
  To: Wiles, Keith, dev; +Cc: thomas.monjalon, pmatilai

Hi,

On 04/14/2016 03:28 PM, Wiles, Keith wrote:
>> From: Keith Wiles <keith.wiles@intel.com>
>> --- a/app/test/test_mempool.c
>> +++ b/app/test/test_mempool.c
>> @@ -122,8 +122,8 @@ test_mempool_basic(void)
>> 		return -1;
>>
>> 	printf("get private data\n");
>> -	if (rte_mempool_get_priv(mp) !=
>> -			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
>> +	if (rte_mempool_get_priv(mp) != (char *)mp +
>> +			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
>
> Should we not add the RTE_PTR_ADD() here as well?

The displayed error message was "cast increases required alignment
of target type", and in this case the alignment constraint of mp
is higher than the constraint for char * (1). So I think there is
no issue here... at least I can say it compiles without error.

Regards,
Olivier

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v5] mempool: reduce rte_mempool structure size
  2016-04-14  9:42     ` [PATCH v5] " Olivier Matz
  2016-04-14 13:28       ` Wiles, Keith
@ 2016-04-14 13:53       ` Wiles, Keith
  2016-05-17  5:31       ` Thomas Monjalon
  2 siblings, 0 replies; 32+ messages in thread
From: Wiles, Keith @ 2016-04-14 13:53 UTC (permalink / raw)
  To: Olivier Matz, dev; +Cc: thomas.monjalon, pmatilai

>From: Keith Wiles <keith.wiles@intel.com>
>
>The rte_mempool structure is changed, which will cause an ABI change
>for this structure. Providing backward compat is not reasonable
>here as this structure is used in multiple defines/inlines.
>
>Allow mempool cache support to be dynamic depending on if the
>mempool being created needs cache support. Saves about 1.5M of
>memory used by the rte_mempool structure.
>
>Allocating small mempools which do not require cache can consume
>larges amounts of memory if you have a number of these mempools.
>
>Change to be effective in release 16.07.
>
>Signed-off-by: Keith Wiles <keith.wiles@intel.com>
>Acked-by: Olivier Matz <olivier.matz@6wind.com>

For the change to this patch:
Acked-by: Keith Wiles <keith.wiles@intel.com>

>---
>
>Changes in v5:
>
>- use RTE_PTR_ADD() instead of cast to (char *) to fix compilation on tilera.
>  Error log was:
>
>  rte_mempool.c: In function ‘rte_mempool_xmem_create’:
>  rte_mempool.c:595: error: cast increases required alignment of target type
>
>
> app/test/test_mempool.c          |  4 +--
> lib/librte_mempool/rte_mempool.c | 55 ++++++++++++++++++----------------------
> lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
> 3 files changed, 40 insertions(+), 48 deletions(-)
>
>diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
>index f0f823b..10e1fa4 100644
>--- a/app/test/test_mempool.c
>+++ b/app/test/test_mempool.c
>@@ -122,8 +122,8 @@ test_mempool_basic(void)
> 		return -1;
> 
> 	printf("get private data\n");
>-	if (rte_mempool_get_priv(mp) !=
>-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
>+	if (rte_mempool_get_priv(mp) != (char *)mp +
>+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
> 		return -1;
> 
> 	printf("get physical address of an object\n");
>diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
>index f8781e1..7a0e07e 100644
>--- a/lib/librte_mempool/rte_mempool.c
>+++ b/lib/librte_mempool/rte_mempool.c
>@@ -452,12 +452,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 	/* compilation-time checks */
> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
> 			  RTE_CACHE_LINE_MASK) != 0);
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
> 			  RTE_CACHE_LINE_MASK) != 0);
>-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
>-			  RTE_CACHE_LINE_MASK) != 0);
>-#endif
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
> 			  RTE_CACHE_LINE_MASK) != 0);
>@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 		 */
> 		int head = sizeof(struct rte_mempool);
> 		int new_size = (private_data_size + head) % page_size;
>-		if (new_size) {
>+		if (new_size)
> 			private_data_size += page_size - new_size;
>-		}
> 	}
> 
> 	/* try to allocate tailq entry */
>@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 	 * store mempool objects. Otherwise reserve a memzone that is large
> 	 * enough to hold mempool header and metadata plus mempool objects.
> 	 */
>-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
>+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
>+	mempool_size += private_data_size;
> 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
> 	if (vaddr == NULL)
> 		mempool_size += (size_t)objsz.total_size * n;
>@@ -591,8 +587,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
> 	mp->private_data_size = private_data_size;
> 
>+	/*
>+	 * local_cache pointer is set even if cache_size is zero.
>+	 * The local_cache points to just past the elt_pa[] array.
>+	 */
>+	mp->local_cache = (struct rte_mempool_cache *)
>+		RTE_PTR_ADD(mp, MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
>+
> 	/* calculate address of the first element for continuous mempool. */
>-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
>+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
> 		private_data_size;
> 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
> 
>@@ -606,9 +609,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
> 		mp->elt_va_start = (uintptr_t)obj;
> 		mp->elt_pa[0] = mp->phys_addr +
> 			(mp->elt_va_start - (uintptr_t)mp);
>-
>-	/* mempool elements in a separate chunk of memory. */
> 	} else {
>+		/* mempool elements in a separate chunk of memory. */
> 		mp->elt_va_start = (uintptr_t)vaddr;
> 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
> 	}
>@@ -643,19 +645,15 @@ unsigned
> rte_mempool_count(const struct rte_mempool *mp)
> {
> 	unsigned count;
>+	unsigned lcore_id;
> 
> 	count = rte_ring_count(mp->ring);
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>-	{
>-		unsigned lcore_id;
>-		if (mp->cache_size == 0)
>-			return count;
>+	if (mp->cache_size == 0)
>+		return count;
> 
>-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>-			count += mp->local_cache[lcore_id].len;
>-	}
>-#endif
>+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>+		count += mp->local_cache[lcore_id].len;
> 
> 	/*
> 	 * due to race condition (access to len is not locked), the
>@@ -670,13 +668,16 @@ rte_mempool_count(const struct rte_mempool *mp)
> static unsigned
> rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
> {
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	unsigned lcore_id;
> 	unsigned count = 0;
> 	unsigned cache_count;
> 
> 	fprintf(f, "  cache infos:\n");
> 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
>+
>+	if (mp->cache_size == 0)
>+		return count;
>+
> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> 		cache_count = mp->local_cache[lcore_id].len;
> 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
>@@ -684,11 +685,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
> 	}
> 	fprintf(f, "    total_cache_count=%u\n", count);
> 	return count;
>-#else
>-	RTE_SET_USED(mp);
>-	fprintf(f, "  cache disabled\n");
>-	return 0;
>-#endif
> }
> 
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>@@ -753,13 +749,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)
> #define mempool_audit_cookies(mp) do {} while(0)
> #endif
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /* check cookies before and after objects */
> static void
> mempool_audit_cache(const struct rte_mempool *mp)
> {
> 	/* check cache size consistency */
> 	unsigned lcore_id;
>+
>+	if (mp->cache_size == 0)
>+		return;
>+
> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
> 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
>@@ -768,10 +767,6 @@ mempool_audit_cache(const struct rte_mempool *mp)
> 		}
> 	}
> }
>-#else
>-#define mempool_audit_cache(mp) do {} while(0)
>-#endif
>-
> 
> /* check the consistency of mempool (size, cookies, ...) */
> void
>diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>index 9745bf0..8595e77 100644
>--- a/lib/librte_mempool/rte_mempool.h
>+++ b/lib/librte_mempool/rte_mempool.h
>@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {
> } __rte_cache_aligned;
> #endif
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> /**
>  * A structure that stores a per-core object cache.
>  */
>@@ -107,7 +106,6 @@ struct rte_mempool_cache {
> 	 */
> 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
> } __rte_cache_aligned;
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> /**
>  * A structure that stores the size of mempool elements.
>@@ -194,10 +192,7 @@ struct rte_mempool {
> 
> 	unsigned private_data_size;      /**< Size of private data. */
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>-	/** Per-lcore local cache. */
>-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
>-#endif
>+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
> 
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> 	/** Per-lcore statistics. */
>@@ -247,6 +242,13 @@ struct rte_mempool {
> #endif
> 
> /**
>+ * Size of elt_pa array size based on number of pages. (Internal use)
>+ */
>+#define __PA_SIZE(mp, pgn) \
>+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
>+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
>+
>+/**
>  * Calculate the size of the mempool header.
>  *
>  * @param mp
>@@ -254,9 +256,9 @@ struct rte_mempool {
>  * @param pgn
>  *   Number of pages used to store mempool objects.
>  */
>-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
>-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
>-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
>+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
>+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
>+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
> 
> /**
>  * Return true if the whole mempool is in contiguous memory.
>@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))
> __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> 		    unsigned n, int is_mp)
> {
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	struct rte_mempool_cache *cache;
> 	uint32_t index;
> 	void **cache_objs;
> 	unsigned lcore_id = rte_lcore_id();
> 	uint32_t cache_size = mp->cache_size;
> 	uint32_t flushthresh = mp->cache_flushthresh;
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> 	/* increment stat now, adding in mempool always success */
> 	__MEMPOOL_STAT_ADD(mp, put, n);
> 
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	/* cache is not enabled or single producer or non-EAL thread */
> 	if (unlikely(cache_size == 0 || is_mp == 0 ||
> 		     lcore_id >= RTE_MAX_LCORE))
>@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> 	return;
> 
> ring_enqueue:
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> 	/* push remaining objects in ring */
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> 		   unsigned n, int is_mc)
> {
> 	int ret;
>-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> 	struct rte_mempool_cache *cache;
> 	uint32_t index, len;
> 	void **cache_objs;
>@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> 	return 0;
> 
> ring_dequeue:
>-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
> 
> 	/* get remaining objects from ring */
> 	if (is_mc)
>@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
>  */
> static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
> {
>-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
>+	return (char *)mp +
>+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
> }
> 
> /**
>-- 
>2.1.4
>
>


Regards,
Keith





^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v5] mempool: reduce rte_mempool structure size
  2016-04-14  9:42     ` [PATCH v5] " Olivier Matz
  2016-04-14 13:28       ` Wiles, Keith
  2016-04-14 13:53       ` Wiles, Keith
@ 2016-05-17  5:31       ` Thomas Monjalon
  2 siblings, 0 replies; 32+ messages in thread
From: Thomas Monjalon @ 2016-05-17  5:31 UTC (permalink / raw)
  To: Olivier Matz, keith.wiles; +Cc: dev, pmatilai

2016-04-14 11:42, Olivier Matz:
> From: Keith Wiles <keith.wiles@intel.com>
> 
> The rte_mempool structure is changed, which will cause an ABI change
> for this structure. Providing backward compat is not reasonable
> here as this structure is used in multiple defines/inlines.

The deprecation notice must be removed by this patch.

[...]
> +/**
>   * Calculate the size of the mempool header.
>   *
>   * @param mp
> @@ -254,9 +256,9 @@ struct rte_mempool {
>   * @param pgn
>   *   Number of pages used to store mempool objects.

A new parameter has been forgotten:
 * @param cs
 *   Size of the per-lcore cache.                                                                                

>   */
> -#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
> -	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
> -	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
> +#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
> +	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
> +	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))

Applied with above changes

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] mempool: reduce rte_mempool structure size
  2016-02-10 18:02 ` Thomas Monjalon
@ 2016-02-12 11:52   ` Panu Matilainen
  0 siblings, 0 replies; 32+ messages in thread
From: Panu Matilainen @ 2016-02-12 11:52 UTC (permalink / raw)
  To: Thomas Monjalon, Wiles, Keith; +Cc: dev, Neil Horman

On 02/10/2016 08:02 PM, Thomas Monjalon wrote:
> 2016-02-10 18:01, Wiles, Keith:
>>>>> --- a/config/defconfig_x86_64-native-linuxapp-gcc
>>>>> +++ b/config/defconfig_x86_64-native-linuxapp-gcc
>>>>> @@ -40,3 +40,8 @@ CONFIG_RTE_ARCH_64=y
>>>>>
>>>>>   CONFIG_RTE_TOOLCHAIN="gcc"
>>>>>   CONFIG_RTE_TOOLCHAIN_GCC=y
>>>>> +CONFIG_RTE_BUILD_SHARED_LIB=y
>>>>> +CONFIG_RTE_NEXT_ABI=n
>>>>> +CONFIG_RTE_EAL_IGB_UIO=n
>>>>> +CONFIG_RTE_LIBRTE_KNI=n
>>>>> +CONFIG_RTE_KNI_KMOD=n
>>>
>>> Hmm, not sure where this came from, but will remove it.
>>
>> I think this was from the ABI-Checker I ran and the tool should leave the repo in its original state.
>
> Yes you're right. The ABI checker modify the defconfig instead of modifying
> the generated .config file.

Its "by design" according to Neil (this was discussed when the abi 
checker was introduced, see 
http://dpdk.org/ml/archives/dev/2015-March/014636.html)

I actually agree with Neil that changing .config after make config seems 
counter-intuitive compared to how this works in other projects, but that 
doesn't make modifying the default templates the right thing to do in DPDK.

> Anyone for a patch?

I can add it to my TODO among a couple of other things - it should not 
leave clutter like this behind no matter what. As to how exactly it 
should be fixed, it could of course just change .config, or 
alternatively create (and later clean up) a temporary defconfig file 
with the necessary contents, which would be closer to the current 
approach I guess.

	- Panu -

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] mempool: reduce rte_mempool structure size
  2016-02-10 18:01 [PATCH v2] " Wiles, Keith
@ 2016-02-10 18:02 ` Thomas Monjalon
  2016-02-12 11:52   ` Panu Matilainen
  0 siblings, 1 reply; 32+ messages in thread
From: Thomas Monjalon @ 2016-02-10 18:02 UTC (permalink / raw)
  To: Wiles, Keith; +Cc: dev, Neil Horman

2016-02-10 18:01, Wiles, Keith:
> >>> --- a/config/defconfig_x86_64-native-linuxapp-gcc
> >>> +++ b/config/defconfig_x86_64-native-linuxapp-gcc
> >>> @@ -40,3 +40,8 @@ CONFIG_RTE_ARCH_64=y
> >>>  
> >>>  CONFIG_RTE_TOOLCHAIN="gcc"
> >>>  CONFIG_RTE_TOOLCHAIN_GCC=y
> >>> +CONFIG_RTE_BUILD_SHARED_LIB=y
> >>> +CONFIG_RTE_NEXT_ABI=n
> >>> +CONFIG_RTE_EAL_IGB_UIO=n
> >>> +CONFIG_RTE_LIBRTE_KNI=n
> >>> +CONFIG_RTE_KNI_KMOD=n
> >
> >Hmm, not sure where this came from, but will remove it.
> 
> I think this was from the ABI-Checker I ran and the tool should leave the repo in its original state.

Yes you're right. The ABI checker modify the defconfig instead of modifying
the generated .config file.
Anyone for a patch?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH v2] mempool: reduce rte_mempool structure size
@ 2016-02-10 18:01 Wiles, Keith
  2016-02-10 18:02 ` Thomas Monjalon
  0 siblings, 1 reply; 32+ messages in thread
From: Wiles, Keith @ 2016-02-10 18:01 UTC (permalink / raw)
  To: Olivier MATZ, dev

>>Hi Keith,
>>
>>Thank you for adding the RTE_NEXT_ABI. I think this is the way
>>described in the process. Your changes will be available in next
>>version (16.4) for people compiling with RTE_NEXT_ABI=y, and in
>>16.7 without option (I'm just surprised that RTE_NEXT_ABI=y in
>>default configs...).
>>
>>I think a deprecation notice should also be added in this commit
>>in doc/guides/rel_notes/deprecation.rst.
>
>Will add the text.
>>
>>Please also find comments below.
>>
>>On 02/09/2016 06:30 PM, Keith Wiles wrote:
>>
>>> diff --git a/config/defconfig_x86_64-native-linuxapp-gcc b/config/defconfig_x86_64-native-linuxapp-gcc
>>> index 60baf5b..02e9ace 100644
>>> --- a/config/defconfig_x86_64-native-linuxapp-gcc
>>> +++ b/config/defconfig_x86_64-native-linuxapp-gcc
>>> @@ -40,3 +40,8 @@ CONFIG_RTE_ARCH_64=y
>>>  
>>>  CONFIG_RTE_TOOLCHAIN="gcc"
>>>  CONFIG_RTE_TOOLCHAIN_GCC=y
>>> +CONFIG_RTE_BUILD_SHARED_LIB=y
>>> +CONFIG_RTE_NEXT_ABI=n
>>> +CONFIG_RTE_EAL_IGB_UIO=n
>>> +CONFIG_RTE_LIBRTE_KNI=n
>>> +CONFIG_RTE_KNI_KMOD=n
>
>Hmm, not sure where this came from, but will remove it.

I think this was from the ABI-Checker I ran and the tool should leave the repo in its original state.

>>
>>I think this should not be part of the patch.
>>
>>> @@ -672,6 +704,24 @@ rte_mempool_count(const struct rte_mempool *mp)
>>>  static unsigned
>>>  rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
>>>  {
>>> +#ifdef RTE_NEXT_ABI
>>> +	unsigned lcore_id;
>>> +	unsigned count = 0;
>>> +	unsigned cache_count;
>>> +
>>> +	fprintf(f, "  cache infos:\n");
>>> +	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
>>> +	if (mp->cache_size == 0)
>>> +		return count;
>>> +
>>> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>>> +		cache_count = mp->local_cache[lcore_id].len;
>>> +		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
>>> +		count += cache_count;
>>> +	}
>>> +	fprintf(f, "    total_cache_count=%u\n", count);
>>> +	return count;
>>> +#else
>>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>>  	unsigned lcore_id;
>>>  	unsigned count = 0;
>>
>>I think in this case we could avoid to duplicate the code without
>>beeing unclear by using the proper #ifdefs:
>
>I was struggling with how it should be done. I like to see clear ifdefs and be able to see the complete code for a given case. In these cases I wanted to make it simple to remove the code quickly by just deleting lines instead of editing lines. I will follow your suggestion.
>>
>>#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI)
>>	/* common code */
>>#ifdef RTE_NEXT_ABI
>>	if (mp->cache_size == 0)
>>		return count;
>>#endif
>>	/* common code */
>>#else
>>...
>>#endif
>>
>>
>>> @@ -755,6 +806,26 @@ mempool_audit_cookies(const struct rte_mempool *mp)
>>>  #define mempool_audit_cookies(mp) do {} while(0)
>>>  #endif
>>>  
>>> +#ifdef RTE_NEXT_ABI
>>> +/* check cookies before and after objects */
>>> +static void
>>> +mempool_audit_cache(const struct rte_mempool *mp)
>>> +{
>>> +	/* check cache size consistency */
>>> +	unsigned lcore_id;
>>> +
>>> +	if (mp->cache_size == 0)
>>> +		return;
>>> +
>>> +	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>>> +		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
>>> +			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
>>> +				lcore_id);
>>> +			rte_panic("MEMPOOL: invalid cache len\n");
>>> +		}
>>> +	}
>>> +}
>>> +#else
>>
>>same here
>>
>>> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>>> index 6e2390a..fc9b595 100644
>>> --- a/lib/librte_mempool/rte_mempool.h
>>> +++ b/lib/librte_mempool/rte_mempool.h
>>> @@ -95,6 +95,19 @@ struct rte_mempool_debug_stats {
>>>  } __rte_cache_aligned;
>>>  #endif
>>>  
>>> +#ifdef RTE_NEXT_ABI
>>> +/**
>>> + * A structure that stores a per-core object cache.
>>> + */
>>> +struct rte_mempool_cache {
>>> +	unsigned len; /**< Cache len */
>>> +	/*
>>> +	 * Cache is allocated to this size to allow it to overflow in certain
>>> +	 * cases to avoid needless emptying of cache.
>>> +	 */
>>> +	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
>>> +} __rte_cache_aligned;
>>> +#else
>>
>>same here
>>
>>
>>
>>> @@ -755,19 +793,25 @@ static inline void __attribute__((always_inline))
>>>  __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>>>  		    unsigned n, int is_mp)
>>>  {
>>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>> +#endif /* RTE_NEXT_ABI */
>>>  	struct rte_mempool_cache *cache;
>>>  	uint32_t index;
>>>  	void **cache_objs;
>>>  	unsigned lcore_id = rte_lcore_id();
>>>  	uint32_t cache_size = mp->cache_size;
>>>  	uint32_t flushthresh = mp->cache_flushthresh;
>>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>>> +#endif /* RTE_NEXT_ABI */
>>
>>this looks strange... I think it does not work properly.
>>Why not
>>#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 || defined(RTE_NEXT_ABI)
>
>Yes, it is strange :-(
>>
>>>  	/* increment stat now, adding in mempool always success */
>>>  	__MEMPOOL_STAT_ADD(mp, put, n);
>>>  
>>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>> +#endif /* RTE_NEXT_ABI */
>>>  	/* cache is not enabled or single producer or non-EAL thread */
>>>  	if (unlikely(cache_size == 0 || is_mp == 0 ||
>>>  		     lcore_id >= RTE_MAX_LCORE))
>>> @@ -802,7 +846,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>>>  	return;
>>>  
>>>  ring_enqueue:
>>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>>> +#endif /* RTE_NEXT_ABI */
>>>  
>>>  	/* push remaining objects in ring */
>>>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>>> @@ -946,7 +992,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>>>  		   unsigned n, int is_mc)
>>>  {
>>>  	int ret;
>>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
>>> +#endif /* RTE_NEXT_ABI */
>>>  	struct rte_mempool_cache *cache;
>>>  	uint32_t index, len;
>>>  	void **cache_objs;
>>> @@ -992,7 +1040,9 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>>>  	return 0;
>>>  
>>>  ring_dequeue:
>>> +#ifndef RTE_NEXT_ABI	/* Note: ifndef */
>>>  #endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
>>> +#endif /* RTE_NEXT_ABI */
>>>  
>>>  	/* get remaining objects from ring */
>>>  	if (is_mc)
>>
>>Same in those cases.
>>
>>
>>
>>Regards,
>>Olivier
>>
>
>
>Regards,
>Keith
>
>
>
>
>


Regards,
Keith





^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2016-05-17  5:31 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-02 23:02 [PATCH] mempool: Reduce rte_mempool structure size Keith Wiles
2016-02-03 17:11 ` Ananyev, Konstantin
2016-02-08 11:02 ` Olivier MATZ
2016-02-08 15:57   ` Wiles, Keith
2016-02-09 17:30 ` [PATCH v2] mempool: reduce " Keith Wiles
2016-02-10 16:59   ` Olivier MATZ
2016-02-10 17:22     ` Wiles, Keith
2016-02-10 18:35     ` Wiles, Keith
2016-02-10 20:06       ` Olivier MATZ
2016-02-10 21:18   ` [PATCH v3] " Keith Wiles
2016-02-12 11:23     ` Panu Matilainen
2016-02-12 13:57       ` Thomas Monjalon
2016-02-12 14:19         ` Panu Matilainen
2016-02-12 15:07           ` Wiles, Keith
2016-02-12 15:38             ` Thomas Monjalon
2016-02-12 15:50               ` Olivier MATZ
2016-02-12 15:58                 ` Wiles, Keith
2016-02-15  9:58                 ` Hunt, David
2016-02-15 10:15                   ` Olivier MATZ
2016-02-15 10:21                     ` Hunt, David
2016-02-15 12:31                       ` Olivier MATZ
2016-02-12 15:54               ` Wiles, Keith
2016-02-12 18:36   ` [PATCH v4] " Keith Wiles
2016-02-15  9:20     ` Olivier MATZ
2016-04-14  9:42     ` [PATCH v5] " Olivier Matz
2016-04-14 13:28       ` Wiles, Keith
2016-04-14 13:43         ` Olivier MATZ
2016-04-14 13:53       ` Wiles, Keith
2016-05-17  5:31       ` Thomas Monjalon
2016-02-10 18:01 [PATCH v2] " Wiles, Keith
2016-02-10 18:02 ` Thomas Monjalon
2016-02-12 11:52   ` Panu Matilainen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.