[PATCH] mempool: Reduce rte_mempool structure size

* [PATCH] mempool: Reduce rte_mempool structure size
@ 2016-02-02 23:02 Keith Wiles
  2016-02-03 17:11 ` Ananyev, Konstantin
                   ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: Keith Wiles @ 2016-02-02 23:02 UTC (permalink / raw)
  To: dev

The rte_mempool structure is changed, which will cause an ABI change
for this structure.

Allow mempool cache support to be dynamic depending on if the
mempool being created needs cache support. Saves about 1.5M of
memory used by the rte_mempool structure. Performance does not seem
to be effected running l3fwd and the test_mempool execution passed.

Allocating small mempools which do not require cache can consume
larges amounts of memory if you have a number of these mempools.

Signed-off-by: Keith Wiles <keith.wiles@intel.com>
---
 app/test/test_mempool.c          |  4 +--
 lib/librte_mempool/rte_mempool.c | 56 ++++++++++++++++++----------------------
 lib/librte_mempool/rte_mempool.h | 29 ++++++++++-----------
 3 files changed, 40 insertions(+), 49 deletions(-)

diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 72f8fb6..7b479f8 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -122,8 +122,8 @@ test_mempool_basic(void)
 		return -1;
 
 	printf("get private data\n");
-	if (rte_mempool_get_priv(mp) !=
-			(char*) mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num))
+	if (rte_mempool_get_priv(mp) != (char *)mp +
+			MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size))
 		return -1;
 
 	printf("get physical address of an object\n");
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index aff5f6d..bdf8e2e 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -450,15 +450,11 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	int page_size = getpagesize();
 
 	/* compilation-time checks */
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) &
 			  RTE_CACHE_LINE_MASK) != 0);
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) &
 			  RTE_CACHE_LINE_MASK) != 0);
-	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) &
-			  RTE_CACHE_LINE_MASK) != 0);
-#endif
-#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) &
 			  RTE_CACHE_LINE_MASK) != 0);
 	RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, stats) &
@@ -527,9 +523,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		 */
 		int head = sizeof(struct rte_mempool);
 		int new_size = (private_data_size + head) % page_size;
-		if (new_size) {
+		if (new_size)
 			private_data_size += page_size - new_size;
-		}
 	}
 
 	/* try to allocate tailq entry */
@@ -544,7 +539,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	 * store mempool objects. Otherwise reserve a memzone that is large
 	 * enough to hold mempool header and metadata plus mempool objects.
 	 */
-	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size;
+	mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size);
+	mempool_size += private_data_size;
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
 	if (vaddr == NULL)
 		mempool_size += (size_t)objsz.total_size * n;
@@ -598,8 +594,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 	mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size);
 	mp->private_data_size = private_data_size;
 
+	/*
+	 * local_cache pointer is set even if cache_size is zero.
+	 * The local_cache points to just past the elt_pa[] array.
+	 */
+	mp->local_cache = (struct rte_mempool_cache *)
+			((char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, 0));
+
 	/* calculate address of the first element for continuous mempool. */
-	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) +
+	obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num, cache_size) +
 		private_data_size;
 	obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN);
 
@@ -613,9 +616,8 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		mp->elt_va_start = (uintptr_t)obj;
 		mp->elt_pa[0] = mp->phys_addr +
 			(mp->elt_va_start - (uintptr_t)mp);
-
-	/* mempool elements in a separate chunk of memory. */
 	} else {
+		/* mempool elements in a separate chunk of memory. */
 		mp->elt_va_start = (uintptr_t)vaddr;
 		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
 	}
@@ -645,19 +647,15 @@ unsigned
 rte_mempool_count(const struct rte_mempool *mp)
 {
 	unsigned count;
+	unsigned lcore_id;
 
 	count = rte_ring_count(mp->ring);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	{
-		unsigned lcore_id;
-		if (mp->cache_size == 0)
-			return count;
+	if (mp->cache_size == 0)
+		return count;
 
-		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
-			count += mp->local_cache[lcore_id].len;
-	}
-#endif
+	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+		count += mp->local_cache[lcore_id].len;
 
 	/*
 	 * due to race condition (access to len is not locked), the
@@ -672,13 +670,15 @@ rte_mempool_count(const struct rte_mempool *mp)
 static unsigned
 rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	unsigned lcore_id;
 	unsigned count = 0;
 	unsigned cache_count;
 
 	fprintf(f, "  cache infos:\n");
 	fprintf(f, "    cache_size=%"PRIu32"\n", mp->cache_size);
+	if (mp->cache_size == 0)
+		return count;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		cache_count = mp->local_cache[lcore_id].len;
 		fprintf(f, "    cache_count[%u]=%u\n", lcore_id, cache_count);
@@ -686,11 +686,6 @@ rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp)
 	}
 	fprintf(f, "    total_cache_count=%u\n", count);
 	return count;
-#else
-	RTE_SET_USED(mp);
-	fprintf(f, "  cache disabled\n");
-	return 0;
-#endif
 }
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -755,13 +750,16 @@ mempool_audit_cookies(const struct rte_mempool *mp)
 #define mempool_audit_cookies(mp) do {} while(0)
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /* check cookies before and after objects */
 static void
 mempool_audit_cache(const struct rte_mempool *mp)
 {
 	/* check cache size consistency */
 	unsigned lcore_id;
+
+	if (mp->cache_size == 0)
+		return;
+
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) {
 			RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n",
@@ -770,10 +768,6 @@ mempool_audit_cache(const struct rte_mempool *mp)
 		}
 	}
 }
-#else
-#define mempool_audit_cache(mp) do {} while(0)
-#endif
-
 
 /* check the consistency of mempool (size, cookies, ...) */
 void
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 6e2390a..434ef98 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -95,7 +95,6 @@ struct rte_mempool_debug_stats {
 } __rte_cache_aligned;
 #endif
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 /**
  * A structure that stores a per-core object cache.
  */
@@ -107,7 +106,6 @@ struct rte_mempool_cache {
 	 */
 	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */
 } __rte_cache_aligned;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 /**
  * A structure that stores the size of mempool elements.
@@ -194,10 +192,7 @@ struct rte_mempool {
 
 	unsigned private_data_size;      /**< Size of private data. */
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	/** Per-lcore local cache. */
-	struct rte_mempool_cache local_cache[RTE_MAX_LCORE];
-#endif
+	struct rte_mempool_cache *local_cache; /**< Per-lcore local cache */
 
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 	/** Per-lcore statistics. */
@@ -247,6 +242,13 @@ struct rte_mempool {
 #endif
 
 /**
+ * Size of elt_pa array size based on number of pages. (Internal use)
+ */
+#define __PA_SIZE(mp, pgn) \
+	RTE_ALIGN_CEIL((((pgn) - RTE_DIM((mp)->elt_pa)) * \
+	sizeof((mp)->elt_pa[0])), RTE_CACHE_LINE_SIZE)
+
+/**
  * Calculate the size of the mempool header.
  *
  * @param mp
@@ -254,9 +256,9 @@ struct rte_mempool {
  * @param pgn
  *   Number of pages used to store mempool objects.
  */
-#define	MEMPOOL_HEADER_SIZE(mp, pgn)	(sizeof(*(mp)) + \
-	RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \
-	sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE))
+#define MEMPOOL_HEADER_SIZE(mp, pgn, cs) \
+	(sizeof(*(mp)) + __PA_SIZE(mp, pgn) + (((cs) == 0) ? 0 : \
+	(sizeof(struct rte_mempool_cache) * RTE_MAX_LCORE)))
 
 /**
  * Return true if the whole mempool is in contiguous memory.
@@ -755,19 +757,16 @@ static inline void __attribute__((always_inline))
 __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 		    unsigned n, int is_mp)
 {
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index;
 	void **cache_objs;
 	unsigned lcore_id = rte_lcore_id();
 	uint32_t cache_size = mp->cache_size;
 	uint32_t flushthresh = mp->cache_flushthresh;
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* increment stat now, adding in mempool always success */
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	/* cache is not enabled or single producer or non-EAL thread */
 	if (unlikely(cache_size == 0 || is_mp == 0 ||
 		     lcore_id >= RTE_MAX_LCORE))
@@ -802,7 +801,6 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	return;
 
 ring_enqueue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* push remaining objects in ring */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
@@ -946,7 +944,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 		   unsigned n, int is_mc)
 {
 	int ret;
-#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
 	struct rte_mempool_cache *cache;
 	uint32_t index, len;
 	void **cache_objs;
@@ -992,7 +989,6 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	return 0;
 
 ring_dequeue:
-#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */
 
 	/* get remaining objects from ring */
 	if (is_mc)
@@ -1293,7 +1289,8 @@ void rte_mempool_audit(const struct rte_mempool *mp);
  */
 static inline void *rte_mempool_get_priv(struct rte_mempool *mp)
 {
-	return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num);
+	return (char *)mp +
+		MEMPOOL_HEADER_SIZE(mp, mp->pg_num, mp->cache_size);
 }
 
 /**
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 32+ messages in thread