Re: [dpdk-dev] [PATCH v3 2/2] lib/mempool: distinguish debug counters from cache and pool

From: Dharmik Thakkar <Dharmik.Thakkar@arm.com>
To: Olivier Matz <olivier.matz@6wind.com>
Cc: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>,
	dev <dev@dpdk.org>, nd <nd@arm.com>,
	Joyce Kong <Joyce.Kong@arm.com>, "Kinsella, Ray" <mdr@ashroe.eu>,
	Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
Subject: Re: [dpdk-dev] [PATCH v3 2/2] lib/mempool: distinguish debug counters from cache and pool
Date: Thu, 22 Apr 2021 21:27:02 +0000	[thread overview]
Message-ID: <E4C639A8-22E2-460E-9A88-52FEDA0D3A96@arm.com> (raw)
In-Reply-To: <20210421162938.GG1726@platinum>

Hi Olivier,

Thank you for your comments!

> On Apr 21, 2021, at 11:29 AM, Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> Hi Dharmik,
> 
> Please see some comments below.
> 
> On Mon, Apr 19, 2021 at 07:08:00PM -0500, Dharmik Thakkar wrote:
>> From: Joyce Kong <joyce.kong@arm.com>
>> 
>> If cache is enabled, objects will be retrieved/put from/to cache,
>> subsequently from/to the common pool. Now the debug stats calculate
>> the objects retrieved/put from/to cache and pool together, it is
>> better to distinguish them.
>> 
>> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>> ---
>> lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++
>> lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++----------
>> 2 files changed, 57 insertions(+), 14 deletions(-)
>> 
>> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
>> index afb1239c8d48..339f14455624 100644
>> --- a/lib/librte_mempool/rte_mempool.c
>> +++ b/lib/librte_mempool/rte_mempool.c
>> @@ -1244,6 +1244,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>> 		sum.put_bulk += mp->stats[lcore_id].put_bulk;
>> 		sum.put_objs += mp->stats[lcore_id].put_objs;
>> +		sum.put_common_pool_bulk +=
>> +			mp->stats[lcore_id].put_common_pool_bulk;
>> +		sum.put_common_pool_objs +=
>> +			mp->stats[lcore_id].put_common_pool_objs;
>> +		sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk;
>> +		sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs;
>> +		sum.get_common_pool_bulk +=
>> +			mp->stats[lcore_id].get_common_pool_bulk;
>> +		sum.get_common_pool_objs +=
>> +			mp->stats[lcore_id].get_common_pool_objs;
>> +		sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk;
>> +		sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs;
>> 		sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk;
>> 		sum.get_success_objs += mp->stats[lcore_id].get_success_objs;
>> 		sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk;
>> @@ -1254,6 +1266,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>> 	fprintf(f, "  stats:\n");
>> 	fprintf(f, "    put_bulk=%"PRIu64"\n", sum.put_bulk);
>> 	fprintf(f, "    put_objs=%"PRIu64"\n", sum.put_objs);
>> +	fprintf(f, "    put_common_pool_bulk=%"PRIu64"\n",
>> +						sum.put_common_pool_bulk);
>> +	fprintf(f, "    put_common_pool_objs=%"PRIu64"\n",
>> +						sum.put_common_pool_objs);
>> +	fprintf(f, "    put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk);
>> +	fprintf(f, "    put_cache_objs=%"PRIu64"\n", sum.put_cache_objs);
>> +	fprintf(f, "    get_common_pool_bulk=%"PRIu64"\n",
>> +						sum.get_common_pool_bulk);
>> +	fprintf(f, "    get_common_pool_objs=%"PRIu64"\n",
>> +						sum.get_common_pool_objs);
>> +	fprintf(f, "    get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk);
>> +	fprintf(f, "    get_cache_objs=%"PRIu64"\n", sum.get_cache_objs);
>> 	fprintf(f, "    get_success_bulk=%"PRIu64"\n", sum.get_success_bulk);
>> 	fprintf(f, "    get_success_objs=%"PRIu64"\n", sum.get_success_objs);
>> 	fprintf(f, "    get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk);
>> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>> index 848a19226149..0959f8a3f367 100644
>> --- a/lib/librte_mempool/rte_mempool.h
>> +++ b/lib/librte_mempool/rte_mempool.h
>> @@ -66,12 +66,20 @@ extern "C" {
>>  * A structure that stores the mempool statistics (per-lcore).
>>  */
>> struct rte_mempool_debug_stats {
>> -	uint64_t put_bulk;         /**< Number of puts. */
>> -	uint64_t put_objs;         /**< Number of objects successfully put. */
>> -	uint64_t get_success_bulk; /**< Successful allocation number. */
>> -	uint64_t get_success_objs; /**< Objects successfully allocated. */
>> -	uint64_t get_fail_bulk;    /**< Failed allocation number. */
>> -	uint64_t get_fail_objs;    /**< Objects that failed to be allocated. */
>> +	uint64_t put_bulk;		  /**< Number of puts. */
>> +	uint64_t put_objs;		  /**< Number of objects successfully put. */
>> +	uint64_t put_common_pool_bulk;	  /**< Number of bulks enqueued in common pool. */
>> +	uint64_t put_common_pool_objs;	  /**< Number of objects enqueued in common pool. */
>> +	uint64_t put_cache_bulk;	  /**< Number of bulks enqueued in cache. */
>> +	uint64_t put_cache_objs;	  /**< Number of objects enqueued in cache. */
>> +	uint64_t get_common_pool_bulk;    /**< Number of bulks dequeued from common pool. */
>> +	uint64_t get_common_pool_objs;	  /**< Number of objects dequeued from common pool. */
>> +	uint64_t get_cache_bulk;	  /**< Number of bulks dequeued from cache. */
>> +	uint64_t get_cache_objs;	  /**< Number of objects dequeued from cache. */
>> +	uint64_t get_success_bulk;	  /**< Successful allocation number. */
>> +	uint64_t get_success_objs;	  /**< Objects successfully allocated. */
>> +	uint64_t get_fail_bulk;		  /**< Failed allocation number. */
>> +	uint64_t get_fail_objs;		  /**< Objects that failed to be allocated. */
> 
> I missed it the first time, but this changes the size of the
> rte_mempool_debug_stats structure. I think we don't care about this ABI
> breakage because this structure is only defined if
> RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc.

Agreed, thank you!

> 
> About the field themselves, I'm not certain that there is an added value
> to have stats for cache gets and puts. My feeling is that the important
> stat to monitor is the access to common pool, because it is the one that
> highlights a possible performance impact (contention). The cache stats
> are more or less equal to "success + fail - common". Moreover, it will
> simplify the patch and avoid risks of mistakes.
> 
> What do you think?

Yes, I think the cache stats can be removed.
Also, please correct me if I’m wrong; but, in my understanding,
the cache stats are equal to “success - common”. Is adding “fail” required?

> 
>> 	/** Successful allocation number of contiguous blocks. */
>> 	uint64_t get_success_blks;
>> 	/** Failed allocation number of contiguous blocks. */
>> @@ -699,10 +707,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
>> 		void **obj_table, unsigned n)
>> {
>> 	struct rte_mempool_ops *ops;
>> +	int ret;
>> 
>> 	rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n);
>> 	ops = rte_mempool_get_ops(mp->ops_index);
>> -	return ops->dequeue(mp, obj_table, n);
>> +	ret = ops->dequeue(mp, obj_table, n);
>> +	if (ret == 0) {
>> +		__MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1);
>> +		__MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n);
>> +		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> +		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>> +	}
>> +	return ret;
>> }
>> 
>> /**
>> @@ -749,6 +765,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table,
>> {
>> 	struct rte_mempool_ops *ops;
>> 
>> +	__MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1);
>> +	__MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n);
>> 	rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n);
>> 	ops = rte_mempool_get_ops(mp->ops_index);
>> 	return ops->enqueue(mp, obj_table, n);
>> @@ -1297,14 +1315,18 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
>> 
>> 	/* Add elements back into the cache */
>> 	rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
>> -
>> 	cache->len += n;
>> 
>> +	__MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1);
>> +
>> 	if (cache->len >= cache->flushthresh) {
>> +		__MEMPOOL_STAT_ADD(mp, put_cache_objs,
>> +				   n - (cache->len - cache->size));
>> 		rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
>> 				cache->len - cache->size);
>> 		cache->len = cache->size;
>> -	}
>> +	} else
>> +		__MEMPOOL_STAT_ADD(mp, put_cache_objs, n);
>> 
> 
> In case we keep cache stats, I'd add {} after the else to be consistent
> with the if().

Ack.

> 
>> 	return;
>> 
>> @@ -1438,8 +1460,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>> 
>> 	cache->len -= n;
>> 
>> -	__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> -	__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>> +	__MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1);
>> +	__MEMPOOL_STAT_ADD(mp, get_cache_objs, n);
> 
> In case we keep cache stats, I don't think we should remove get_success
> stats increment. Else, the success stats will never be incremented when
> retrieving objects from the cache.
> 

Good catch. Thanks!

> 
>> 
>> 	return 0;
>> 
>> @@ -1451,9 +1473,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>> 	if (ret < 0) {
>> 		__MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
>> 		__MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
>> -	} else {
>> -		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> -		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>> 	}
>> 
>> 	return ret;
>> -- 
>> 2.17.1