All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jesper Dangaard Brouer <brouer@redhat.com>
To: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: <ilias.apalodimas@linaro.org>, <saeedm@mellanox.com>,
	<tariqt@mellanox.com>, <netdev@vger.kernel.org>,
	<kernel-team@fb.com>,
	brouer@redhat.com
Subject: Re: [PATCH 08/10 net-next] page_pool: Add statistics
Date: Thu, 17 Oct 2019 10:29:01 +0200	[thread overview]
Message-ID: <20191017102901.43a09b35@carbon> (raw)
In-Reply-To: <20191016225028.2100206-9-jonathan.lemon@gmail.com>

On Wed, 16 Oct 2019 15:50:26 -0700
Jonathan Lemon <jonathan.lemon@gmail.com> wrote:

> Add statistics to the page pool, providing visibility into its operation.
> 
> Callers can provide a location where the stats are stored, otherwise
> the page pool will allocate a statistic area.
> 
> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  include/net/page_pool.h | 21 +++++++++++++---
>  net/core/page_pool.c    | 55 +++++++++++++++++++++++++++++++++++------
>  2 files changed, 65 insertions(+), 11 deletions(-)
> 
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index fc340db42f9a..4f383522b141 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -34,8 +34,11 @@
>  #include <linux/ptr_ring.h>
>  #include <linux/dma-direction.h>
>  
> -#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */
> -#define PP_FLAG_ALL	PP_FLAG_DMA_MAP
> +#define PP_FLAG_DMA_MAP		BIT(0) /* page_pool does the DMA map/unmap */
> +#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP)
> +
> +/* internal flags, not expoed to user */
> +#define PP_FLAG_INTERNAL_STATS	BIT(8)
>  
>  /*
>   * Fast allocation side cache array/stack
> @@ -57,6 +60,17 @@
>  #define PP_ALLOC_POOL_DEFAULT	1024
>  #define PP_ALLOC_POOL_LIMIT	32768
>  
> +struct page_pool_stats {
> +	u64 cache_hit;
> +	u64 cache_full;
> +	u64 cache_empty;
> +	u64 ring_produce;
> +	u64 ring_consume;

You are placing producer and consumer counters on the same cache-line.
This is not acceptable.

The page pool and ptr_ring, are specifically designed to avoid
cache-line contention between consumers and producers.  This patch
kills that work.


> +	u64 ring_return;
> +	u64 flush;
> +	u64 node_change;
> +};

Another example of carefully avoiding cache-line bouncing is the
inflight tracking, e.g. the struct placement of pages_state_release_cnt
and pages_state_hold_cnt for inflight accounting.


> +
>  struct page_pool_params {
>  	unsigned int	flags;
>  	unsigned int	order;
> @@ -65,6 +79,7 @@ struct page_pool_params {
>  	int		nid;  /* Numa node id to allocate from pages from */
>  	enum dma_data_direction dma_dir; /* DMA mapping direction */
>  	struct device	*dev; /* device, for DMA pre-mapping purposes */
> +	struct page_pool_stats *stats; /* pool stats stored externally */
>  };
>  
>  struct page_pool {
> @@ -230,8 +245,8 @@ static inline bool page_pool_put(struct page_pool *pool)
>  static inline void page_pool_update_nid(struct page_pool *pool, int new_nid)
>  {
>  	if (unlikely(pool->p.nid != new_nid)) {
> -		/* TODO: Add statistics/trace */
>  		pool->p.nid = new_nid;
> +		pool->p.stats->node_change++;
>  	}
>  }
>  #endif /* _NET_PAGE_POOL_H */
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index f8fedecddb6f..ea6202813584 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -20,9 +20,10 @@
>  
>  static int page_pool_init(struct page_pool *pool)
>  {
> +	int size;
>  
>  	/* Validate only known flags were used */
> -	if (pool->p.flags & ~(PP_FLAG_ALL))
> +	if (pool->p.flags & ~PP_FLAG_ALL)
>  		return -EINVAL;
>  
>  	if (!pool->p.pool_size)
> @@ -40,8 +41,16 @@ static int page_pool_init(struct page_pool *pool)
>  	    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
>  		return -EINVAL;
>  
> +	if (!pool->p.stats) {
> +		size  = sizeof(struct page_pool_stats);
> +		pool->p.stats = kzalloc_node(size, GFP_KERNEL, pool->p.nid);
> +		if (!pool->p.stats)
> +			return -ENOMEM;
> +		pool->p.flags |= PP_FLAG_INTERNAL_STATS;
> +	}
> +
>  	if (ptr_ring_init(&pool->ring, pool->p.pool_size, GFP_KERNEL) < 0)
> -		return -ENOMEM;
> +		goto fail;
>  
>  	atomic_set(&pool->pages_state_release_cnt, 0);
>  
> @@ -52,6 +61,12 @@ static int page_pool_init(struct page_pool *pool)
>  		get_device(pool->p.dev);
>  
>  	return 0;
> +
> +fail:
> +	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
> +		kfree(pool->p.stats);
> +
> +	return -ENOMEM;
>  }
>  
>  struct page_pool *page_pool_create(const struct page_pool_params *params)
> @@ -98,9 +113,11 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
>  	if (likely(in_serving_softirq())) {
>  		if (likely(pool->alloc_count)) {
>  			/* Fast-path */
> +			pool->p.stats->cache_hit++;
>  			page = pool->alloc_cache[--pool->alloc_count];
>  			return page;
>  		}
> +		pool->p.stats->cache_empty++;
>  		refill = true;
>  	}
>  
> @@ -113,10 +130,13 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
>  	 */
>  	spin_lock(&r->consumer_lock);
>  	page = __ptr_ring_consume(r);
> -	if (refill)
> +	if (refill) {
>  		pool->alloc_count = __ptr_ring_consume_batched(r,
>  							pool->alloc_cache,
>  							PP_ALLOC_CACHE_REFILL);
> +		pool->p.stats->ring_consume += pool->alloc_count;
> +	}
> +	pool->p.stats->ring_consume += !!page;
>  	spin_unlock(&r->consumer_lock);
>  	return page;
>  }
> @@ -266,15 +286,23 @@ static void __page_pool_return_page(struct page_pool *pool, struct page *page)
>  static bool __page_pool_recycle_into_ring(struct page_pool *pool,
>  				   struct page *page)
>  {
> +	struct ptr_ring *r = &pool->ring;
>  	int ret;
>  
> -	/* BH protection not needed if current is serving softirq */
>  	if (in_serving_softirq())
> -		ret = ptr_ring_produce(&pool->ring, page);
> +		spin_lock(&r->producer_lock);
>  	else
> -		ret = ptr_ring_produce_bh(&pool->ring, page);
> +		spin_lock_bh(&r->producer_lock);
>  
> -	return (ret == 0) ? true : false;
> +	ret = __ptr_ring_produce(r, page);
> +	pool->p.stats->ring_produce++;
> +
> +	if (in_serving_softirq())
> +		spin_unlock(&r->producer_lock);
> +	else
> +		spin_unlock_bh(&r->producer_lock);
> +
> +	return ret == 0;
>  }
>  
>  /* Only allow direct recycling in special circumstances, into the
> @@ -285,8 +313,10 @@ static bool __page_pool_recycle_into_ring(struct page_pool *pool,
>  static bool __page_pool_recycle_into_cache(struct page *page,
>  					   struct page_pool *pool)
>  {
> -	if (unlikely(pool->alloc_count == pool->p.cache_size))
> +	if (unlikely(pool->alloc_count == pool->p.cache_size)) {
> +		pool->p.stats->cache_full++;
>  		return false;
> +	}
>  
>  	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
>  	pool->alloc_cache[pool->alloc_count++] = page;
> @@ -343,6 +373,7 @@ EXPORT_SYMBOL(__page_pool_put_page);
>  static void __page_pool_empty_ring(struct page_pool *pool)
>  {
>  	struct page *page;
> +	int count = 0;
>  
>  	/* Empty recycle ring */
>  	while ((page = ptr_ring_consume_bh(&pool->ring))) {
> @@ -351,8 +382,11 @@ static void __page_pool_empty_ring(struct page_pool *pool)
>  			pr_crit("%s() page_pool refcnt %d violation\n",
>  				__func__, page_ref_count(page));
>  
> +		count++;
>  		__page_pool_return_page(pool, page);
>  	}
> +
> +	pool->p.stats->ring_return += count;
>  }
>  
>  static void __warn_in_flight(struct page_pool *pool)
> @@ -381,6 +415,9 @@ void __page_pool_free(struct page_pool *pool)
>  	if (!__page_pool_safe_to_destroy(pool))
>  		__warn_in_flight(pool);
>  
> +	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
> +		kfree(pool->p.stats);
> +
>  	ptr_ring_cleanup(&pool->ring, NULL);
>  
>  	if (pool->p.flags & PP_FLAG_DMA_MAP)
> @@ -394,6 +431,8 @@ static void page_pool_flush(struct page_pool *pool)
>  {
>  	struct page *page;
>  
> +	pool->p.stats->flush++;
> +
>  	/* Empty alloc cache, assume caller made sure this is
>  	 * no-longer in use, and page_pool_alloc_pages() cannot be
>  	 * called concurrently.

Have you benchmarked the overhead of adding this accounting?

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

  reply	other threads:[~2019-10-17  8:29 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
2019-10-16 22:50 ` [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache Jonathan Lemon
2019-10-17  1:30   ` Jakub Kicinski
2019-10-18 20:03     ` Jonathan Lemon
2019-10-18 20:53   ` Saeed Mahameed
2019-10-19  0:10     ` Jonathan Lemon
2019-10-20  7:29       ` Tariq Toukan
2019-10-16 22:50 ` [PATCH 02/10 net-next] net/mlx5e: RX, Manage RX pages only via page pool API Jonathan Lemon
2019-10-16 22:50 ` [PATCH 03/10 net-next] net/mlx5e: RX, Internal DMA mapping in page_pool Jonathan Lemon
2019-10-17  1:33   ` Jakub Kicinski
2019-10-18 20:04     ` Jonathan Lemon
2019-10-18 21:01   ` Saeed Mahameed
2019-10-16 22:50 ` [PATCH 04/10 net-next] page_pool: Add API to update numa node and flush page caches Jonathan Lemon
2019-10-17 12:06   ` Ilias Apalodimas
2019-10-18 21:07     ` Saeed Mahameed
2019-10-18 23:38       ` Jonathan Lemon
2019-10-16 22:50 ` [PATCH 05/10 net-next] net/mlx5e: Rx, Update page pool numa node when changed Jonathan Lemon
2019-10-16 22:50 ` [PATCH 06/10 net-next] page_pool: Add page_pool_keep_page Jonathan Lemon
2019-10-16 22:50 ` [PATCH 07/10 net-next] page_pool: allow configurable linear cache size Jonathan Lemon
2019-10-17  8:51   ` Jesper Dangaard Brouer
2019-10-18 20:31     ` Jonathan Lemon
2019-10-16 22:50 ` [PATCH 08/10 net-next] page_pool: Add statistics Jonathan Lemon
2019-10-17  8:29   ` Jesper Dangaard Brouer [this message]
2019-10-18 21:29   ` Saeed Mahameed
2019-10-18 23:37     ` Jonathan Lemon
2019-10-16 22:50 ` [PATCH 09/10 net-next] net/mlx5: Add page_pool stats to the Mellanox driver Jonathan Lemon
2019-10-17 11:09   ` Jesper Dangaard Brouer
2019-10-18 20:45     ` Jonathan Lemon
2019-10-16 22:50 ` [PATCH 10/10 net-next] page_pool: Cleanup and rename page_pool functions Jonathan Lemon
2019-10-18 20:50 ` [PATCH 00/10 net-next] page_pool cleanups Saeed Mahameed
2019-10-18 21:21   ` Saeed Mahameed
2019-10-18 23:32   ` Jonathan Lemon
2019-10-21 19:08     ` Saeed Mahameed
2019-10-21 21:45       ` Jonathan Lemon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191017102901.43a09b35@carbon \
    --to=brouer@redhat.com \
    --cc=ilias.apalodimas@linaro.org \
    --cc=jonathan.lemon@gmail.com \
    --cc=kernel-team@fb.com \
    --cc=netdev@vger.kernel.org \
    --cc=saeedm@mellanox.com \
    --cc=tariqt@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.