All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/10 net-next] page_pool cleanups
@ 2019-10-16 22:50 Jonathan Lemon
  2019-10-16 22:50 ` [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache Jonathan Lemon
                   ` (10 more replies)
  0 siblings, 11 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

This patch combines work from various people:
- part of Tariq's work to move the DMA mapping from
  the mlx5 driver into the page pool.  This does not
  include later patches which remove the dma address
  from the driver, as this conflicts with AF_XDP.

- Saeed's changes to check the numa node before
  including the page in the pool, and flushing the
  pool on a node change.

- Statistics and cleanup for page pool.

Jonathan Lemon (5):
  page_pool: Add page_pool_keep_page
  page_pool: allow configurable linear cache size
  page_pool: Add statistics
  net/mlx5: Add page_pool stats to the Mellanox driver
  page_pool: Cleanup and rename page_pool functions.

Saeed Mahameed (2):
  page_pool: Add API to update numa node and flush page caches
  net/mlx5e: Rx, Update page pool numa node when changed

Tariq Toukan (3):
  net/mlx5e: RX, Remove RX page-cache
  net/mlx5e: RX, Manage RX pages only via page pool API
  net/mlx5e: RX, Internal DMA mapping in page_pool

 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  18 +-
 .../net/ethernet/mellanox/mlx5/core/en/xdp.c  |  12 +-
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  19 +-
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 128 ++--------
 .../ethernet/mellanox/mlx5/core/en_stats.c    |  39 ++--
 .../ethernet/mellanox/mlx5/core/en_stats.h    |  19 +-
 include/net/page_pool.h                       | 216 +++++++++--------
 net/core/page_pool.c                          | 221 +++++++++++-------
 8 files changed, 319 insertions(+), 353 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
@ 2019-10-16 22:50 ` Jonathan Lemon
  2019-10-17  1:30   ` Jakub Kicinski
  2019-10-18 20:53   ` Saeed Mahameed
  2019-10-16 22:50 ` [PATCH 02/10 net-next] net/mlx5e: RX, Manage RX pages only via page pool API Jonathan Lemon
                   ` (9 subsequent siblings)
  10 siblings, 2 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

From: Tariq Toukan <tariqt@mellanox.com>

Obsolete the RX page-cache layer, pages are now recycled
in page_pool.

This patch introduce a temporary degradation as recycling
does not keep the pages DMA-mapped. That is fixed in a
downstream patch.

Issue: 1487631
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  | 13 ----
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 16 -----
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 67 ++-----------------
 3 files changed, 4 insertions(+), 92 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8d76452cacdc..0595cdcff594 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -583,18 +583,6 @@ struct mlx5e_mpw_info {
 
 #define MLX5E_MAX_RX_FRAGS 4
 
-/* a single cache unit is capable to serve one napi call (for non-striding rq)
- * or a MPWQE (for striding rq).
- */
-#define MLX5E_CACHE_UNIT	(MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
-				 MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
-#define MLX5E_CACHE_SIZE	(4 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
-struct mlx5e_page_cache {
-	u32 head;
-	u32 tail;
-	struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
-};
-
 struct mlx5e_rq;
 typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
 typedef struct sk_buff *
@@ -658,7 +646,6 @@ struct mlx5e_rq {
 	struct mlx5e_rq_stats *stats;
 	struct mlx5e_cq        cq;
 	struct mlx5e_cq_decomp cqd;
-	struct mlx5e_page_cache page_cache;
 	struct hwtstamp_config *tstamp;
 	struct mlx5_clock      *clock;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7569287f8f3c..168be1f800a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -612,9 +612,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 	}
 
-	rq->page_cache.head = 0;
-	rq->page_cache.tail = 0;
-
 	return 0;
 
 err_free:
@@ -640,8 +637,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 static void mlx5e_free_rq(struct mlx5e_rq *rq)
 {
-	int i;
-
 	if (rq->xdp_prog)
 		bpf_prog_put(rq->xdp_prog);
 
@@ -655,17 +650,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 		mlx5e_free_di_list(rq);
 	}
 
-	for (i = rq->page_cache.head; i != rq->page_cache.tail;
-	     i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
-		struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
-
-		/* With AF_XDP, page_cache is not used, so this loop is not
-		 * entered, and it's safe to call mlx5e_page_release_dynamic
-		 * directly.
-		 */
-		mlx5e_page_release_dynamic(rq, dma_info, false);
-	}
-
 	xdp_rxq_info_unreg(&rq->xdp_rxq);
 	page_pool_destroy(rq->page_pool);
 	mlx5_wq_destroy(&rq->wq_ctrl);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index d6a547238de0..a3773f8a4931 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -184,65 +184,9 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
 	return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
 }
 
-static inline bool mlx5e_page_is_reserved(struct page *page)
-{
-	return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id();
-}
-
-static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
-				      struct mlx5e_dma_info *dma_info)
-{
-	struct mlx5e_page_cache *cache = &rq->page_cache;
-	u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
-	struct mlx5e_rq_stats *stats = rq->stats;
-
-	if (tail_next == cache->head) {
-		stats->cache_full++;
-		return false;
-	}
-
-	if (unlikely(mlx5e_page_is_reserved(dma_info->page))) {
-		stats->cache_waive++;
-		return false;
-	}
-
-	cache->page_cache[cache->tail] = *dma_info;
-	cache->tail = tail_next;
-	return true;
-}
-
-static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
-				      struct mlx5e_dma_info *dma_info)
-{
-	struct mlx5e_page_cache *cache = &rq->page_cache;
-	struct mlx5e_rq_stats *stats = rq->stats;
-
-	if (unlikely(cache->head == cache->tail)) {
-		stats->cache_empty++;
-		return false;
-	}
-
-	if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
-		stats->cache_busy++;
-		return false;
-	}
-
-	*dma_info = cache->page_cache[cache->head];
-	cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
-	stats->cache_reuse++;
-
-	dma_sync_single_for_device(rq->pdev, dma_info->addr,
-				   PAGE_SIZE,
-				   DMA_FROM_DEVICE);
-	return true;
-}
-
 static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
 					struct mlx5e_dma_info *dma_info)
 {
-	if (mlx5e_rx_cache_get(rq, dma_info))
-		return 0;
-
 	dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
 	if (unlikely(!dma_info->page))
 		return -ENOMEM;
@@ -276,14 +220,11 @@ void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
 				struct mlx5e_dma_info *dma_info,
 				bool recycle)
 {
-	if (likely(recycle)) {
-		if (mlx5e_rx_cache_put(rq, dma_info))
-			return;
+	mlx5e_page_dma_unmap(rq, dma_info);
 
-		mlx5e_page_dma_unmap(rq, dma_info);
+	if (likely(recycle)) {
 		page_pool_recycle_direct(rq->page_pool, dma_info->page);
 	} else {
-		mlx5e_page_dma_unmap(rq, dma_info);
 		page_pool_release_page(rq->page_pool, dma_info->page);
 		put_page(dma_info->page);
 	}
@@ -1167,7 +1108,7 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	if (!skb) {
 		/* probably for XDP */
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
-			/* do not return page to cache,
+			/* do not return page to pool,
 			 * it will be returned on XDP_TX completion.
 			 */
 			goto wq_cyc_pop;
@@ -1210,7 +1151,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	if (!skb) {
 		/* probably for XDP */
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
-			/* do not return page to cache,
+			/* do not return page to pool,
 			 * it will be returned on XDP_TX completion.
 			 */
 			goto wq_cyc_pop;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 02/10 net-next] net/mlx5e: RX, Manage RX pages only via page pool API
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
  2019-10-16 22:50 ` [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache Jonathan Lemon
@ 2019-10-16 22:50 ` Jonathan Lemon
  2019-10-16 22:50 ` [PATCH 03/10 net-next] net/mlx5e: RX, Internal DMA mapping in page_pool Jonathan Lemon
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

From: Tariq Toukan <tariqt@mellanox.com>

Do not directly call page allocator functions, but use
page pool API only.

Obsolete the recycle param.

Issue: 1487631
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  4 +-
 .../net/ethernet/mellanox/mlx5/core/en/xdp.c  |  9 ++--
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 46 +++++++------------
 3 files changed, 23 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0595cdcff594..a1ab5c76177d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -928,8 +928,8 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 
 void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
 void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
-				struct mlx5e_dma_info *dma_info,
-				bool recycle);
+				struct mlx5e_dma_info *dma_info);
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index f049e0ac308a..1b26061cb959 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -354,8 +354,7 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
 
 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
 				  struct mlx5e_xdp_wqe_info *wi,
-				  u32 *xsk_frames,
-				  bool recycle)
+				  u32 *xsk_frames)
 {
 	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
 	u16 i;
@@ -372,7 +371,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
 			break;
 		case MLX5E_XDP_XMIT_MODE_PAGE:
 			/* XDP_TX from the regular RQ */
-			mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle);
+			mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di);
 			break;
 		case MLX5E_XDP_XMIT_MODE_XSK:
 			/* AF_XDP send */
@@ -430,7 +429,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 
 			sqcc += wi->num_wqebbs;
 
-			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
+			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames);
 		} while (!last_wqe);
 	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
@@ -461,7 +460,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
 
 		sq->cc += wi->num_wqebbs;
 
-		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false);
+		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames);
 	}
 
 	if (xsk_frames)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index a3773f8a4931..033b8264a4e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -217,31 +217,20 @@ void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
 }
 
 void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
-				struct mlx5e_dma_info *dma_info,
-				bool recycle)
+				struct mlx5e_dma_info *dma_info)
 {
 	mlx5e_page_dma_unmap(rq, dma_info);
 
-	if (likely(recycle)) {
-		page_pool_recycle_direct(rq->page_pool, dma_info->page);
-	} else {
-		page_pool_release_page(rq->page_pool, dma_info->page);
-		put_page(dma_info->page);
-	}
+	page_pool_recycle_direct(rq->page_pool, dma_info->page);
 }
 
 static inline void mlx5e_page_release(struct mlx5e_rq *rq,
-				      struct mlx5e_dma_info *dma_info,
-				      bool recycle)
+				      struct mlx5e_dma_info *dma_info)
 {
 	if (rq->umem)
-		/* The `recycle` parameter is ignored, and the page is always
-		 * put into the Reuse Ring, because there is no way to return
-		 * the page to the userspace when the interface goes down.
-		 */
 		mlx5e_xsk_page_release(rq, dma_info);
 	else
-		mlx5e_page_release_dynamic(rq, dma_info, recycle);
+		mlx5e_page_release_dynamic(rq, dma_info);
 }
 
 static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
@@ -261,11 +250,10 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
 }
 
 static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
-				     struct mlx5e_wqe_frag_info *frag,
-				     bool recycle)
+				     struct mlx5e_wqe_frag_info *frag)
 {
 	if (frag->last_in_page)
-		mlx5e_page_release(rq, frag->di, recycle);
+		mlx5e_page_release(rq, frag->di);
 }
 
 static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
@@ -293,26 +281,25 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
 
 free_frags:
 	while (--i >= 0)
-		mlx5e_put_rx_frag(rq, --frag, true);
+		mlx5e_put_rx_frag(rq, --frag);
 
 	return err;
 }
 
 static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
-				     struct mlx5e_wqe_frag_info *wi,
-				     bool recycle)
+				     struct mlx5e_wqe_frag_info *wi)
 {
 	int i;
 
 	for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
-		mlx5e_put_rx_frag(rq, wi, recycle);
+		mlx5e_put_rx_frag(rq, wi);
 }
 
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
 {
 	struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
 
-	mlx5e_free_rx_wqe(rq, wi, false);
+	mlx5e_free_rx_wqe(rq, wi);
 }
 
 static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
@@ -388,7 +375,7 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle
 
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++)
 		if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap))
-			mlx5e_page_release(rq, &dma_info[i], recycle);
+			mlx5e_page_release(rq, &dma_info[i]);
 }
 
 static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
@@ -476,7 +463,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 err_unmap:
 	while (--i >= 0) {
 		dma_info--;
-		mlx5e_page_release(rq, dma_info, true);
+		mlx5e_page_release(rq, dma_info);
 	}
 
 err:
@@ -1120,7 +1107,7 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	napi_gro_receive(rq->cq.napi, skb);
 
 free_wqe:
-	mlx5e_free_rx_wqe(rq, wi, true);
+	mlx5e_free_rx_wqe(rq, wi);
 wq_cyc_pop:
 	mlx5_wq_cyc_pop(wq);
 }
@@ -1167,7 +1154,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	napi_gro_receive(rq->cq.napi, skb);
 
 free_wqe:
-	mlx5e_free_rx_wqe(rq, wi, true);
+	mlx5e_free_rx_wqe(rq, wi);
 wq_cyc_pop:
 	mlx5_wq_cyc_pop(wq);
 }
@@ -1478,7 +1465,7 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	napi_gro_receive(rq->cq.napi, skb);
 
 wq_free_wqe:
-	mlx5e_free_rx_wqe(rq, wi, true);
+	mlx5e_free_rx_wqe(rq, wi);
 	mlx5_wq_cyc_pop(wq);
 }
 
@@ -1518,7 +1505,8 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	napi_gro_receive(rq->cq.napi, skb);
 
 wq_free_wqe:
-	mlx5e_free_rx_wqe(rq, wi, true);
+	mlx5e_free_rx_wqe(rq, wi);
+wq_cyc_pop:
 	mlx5_wq_cyc_pop(wq);
 }
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 03/10 net-next] net/mlx5e: RX, Internal DMA mapping in page_pool
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
  2019-10-16 22:50 ` [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache Jonathan Lemon
  2019-10-16 22:50 ` [PATCH 02/10 net-next] net/mlx5e: RX, Manage RX pages only via page pool API Jonathan Lemon
@ 2019-10-16 22:50 ` Jonathan Lemon
  2019-10-17  1:33   ` Jakub Kicinski
  2019-10-18 21:01   ` Saeed Mahameed
  2019-10-16 22:50 ` [PATCH 04/10 net-next] page_pool: Add API to update numa node and flush page caches Jonathan Lemon
                   ` (7 subsequent siblings)
  10 siblings, 2 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

From: Tariq Toukan <tariqt@mellanox.com>

After RX page-cache is removed in previous patch, let the
page_pool be responsible for the DMA mapping.

Issue: 1487631
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h     |  2 --
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c |  3 +--
 .../net/ethernet/mellanox/mlx5/core/en_main.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c  | 16 +---------------
 4 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index a1ab5c76177d..2e281c755b65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -926,10 +926,8 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
 bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 				struct mlx5e_params *params);
 
-void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
 void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
 				struct mlx5e_dma_info *dma_info);
-void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 1b26061cb959..8376b2789575 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -161,8 +161,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
 			goto xdp_abort;
 		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
 		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
-		if (!xsk)
-			mlx5e_page_dma_unmap(rq, di);
+		/* xdp maps call xdp_release_frame() if needed */
 		rq->stats->xdp_redirect++;
 		return true;
 	default:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 168be1f800a3..2b828de1adf0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -546,7 +546,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	} else {
 		/* Create a page_pool and register it with rxq */
 		pp_params.order     = 0;
-		pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
+		pp_params.flags     = PP_FLAG_DMA_MAP;
 		pp_params.pool_size = pool_size;
 		pp_params.nid       = cpu_to_node(c->cpu);
 		pp_params.dev       = c->pdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 033b8264a4e4..1b74d03fdf06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -190,14 +190,7 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
 	dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
 	if (unlikely(!dma_info->page))
 		return -ENOMEM;
-
-	dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
-				      PAGE_SIZE, rq->buff.map_dir);
-	if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
-		page_pool_recycle_direct(rq->page_pool, dma_info->page);
-		dma_info->page = NULL;
-		return -ENOMEM;
-	}
+	dma_info->addr = page_pool_get_dma_addr(dma_info->page);
 
 	return 0;
 }
@@ -211,16 +204,9 @@ static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
 		return mlx5e_page_alloc_pool(rq, dma_info);
 }
 
-void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
-{
-	dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir);
-}
-
 void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
 				struct mlx5e_dma_info *dma_info)
 {
-	mlx5e_page_dma_unmap(rq, dma_info);
-
 	page_pool_recycle_direct(rq->page_pool, dma_info->page);
 }
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 04/10 net-next] page_pool: Add API to update numa node and flush page caches
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
                   ` (2 preceding siblings ...)
  2019-10-16 22:50 ` [PATCH 03/10 net-next] net/mlx5e: RX, Internal DMA mapping in page_pool Jonathan Lemon
@ 2019-10-16 22:50 ` Jonathan Lemon
  2019-10-17 12:06   ` Ilias Apalodimas
  2019-10-16 22:50 ` [PATCH 05/10 net-next] net/mlx5e: Rx, Update page pool numa node when changed Jonathan Lemon
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

From: Saeed Mahameed <saeedm@mellanox.com>

Add page_pool_update_nid() to be called from drivers when they detect
numa node changes.

It will do:
1) Flush the pool's page cache and ptr_ring.
2) Update page pool nid value to start allocating from the new numa
node.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 include/net/page_pool.h | 10 ++++++++++
 net/core/page_pool.c    | 16 +++++++++++-----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 2cbcdbdec254..fb13cf6055ff 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -226,4 +226,14 @@ static inline bool page_pool_put(struct page_pool *pool)
 	return refcount_dec_and_test(&pool->user_cnt);
 }
 
+/* Only safe from napi context or when user guarantees it is thread safe */
+void __page_pool_flush(struct page_pool *pool);
+static inline void page_pool_update_nid(struct page_pool *pool, int new_nid)
+{
+	if (unlikely(pool->p.nid != new_nid)) {
+		/* TODO: Add statistics/trace */
+		__page_pool_flush(pool);
+		pool->p.nid = new_nid;
+	}
+}
 #endif /* _NET_PAGE_POOL_H */
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 5bc65587f1c4..678cf85f273a 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -373,16 +373,13 @@ void __page_pool_free(struct page_pool *pool)
 }
 EXPORT_SYMBOL(__page_pool_free);
 
-/* Request to shutdown: release pages cached by page_pool, and check
- * for in-flight pages
- */
-bool __page_pool_request_shutdown(struct page_pool *pool)
+void __page_pool_flush(struct page_pool *pool)
 {
 	struct page *page;
 
 	/* Empty alloc cache, assume caller made sure this is
 	 * no-longer in use, and page_pool_alloc_pages() cannot be
-	 * call concurrently.
+	 * called concurrently.
 	 */
 	while (pool->alloc.count) {
 		page = pool->alloc.cache[--pool->alloc.count];
@@ -393,6 +390,15 @@ bool __page_pool_request_shutdown(struct page_pool *pool)
 	 * be in-flight.
 	 */
 	__page_pool_empty_ring(pool);
+}
+EXPORT_SYMBOL(__page_pool_flush);
+
+/* Request to shutdown: release pages cached by page_pool, and check
+ * for in-flight pages
+ */
+bool __page_pool_request_shutdown(struct page_pool *pool)
+{
+	__page_pool_flush(pool);
 
 	return __page_pool_safe_to_destroy(pool);
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 05/10 net-next] net/mlx5e: Rx, Update page pool numa node when changed
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
                   ` (3 preceding siblings ...)
  2019-10-16 22:50 ` [PATCH 04/10 net-next] page_pool: Add API to update numa node and flush page caches Jonathan Lemon
@ 2019-10-16 22:50 ` Jonathan Lemon
  2019-10-16 22:50 ` [PATCH 06/10 net-next] page_pool: Add page_pool_keep_page Jonathan Lemon
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

From: Saeed Mahameed <saeedm@mellanox.com>

On poll rx ring napi cycle check if numa node is different than the page
pool nid and update it using page_pool_update_nid();

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1b74d03fdf06..57a5fe1e8055 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1300,6 +1300,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
 		return 0;
 
+	if (rq->page_pool)
+		page_pool_update_nid(rq->page_pool, numa_mem_id());
+
 	if (rq->cqd.left)
 		work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 06/10 net-next] page_pool: Add page_pool_keep_page
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
                   ` (4 preceding siblings ...)
  2019-10-16 22:50 ` [PATCH 05/10 net-next] net/mlx5e: Rx, Update page pool numa node when changed Jonathan Lemon
@ 2019-10-16 22:50 ` Jonathan Lemon
  2019-10-16 22:50 ` [PATCH 07/10 net-next] page_pool: allow configurable linear cache size Jonathan Lemon
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

When releasing a page to the pool, only retain the
page if page_pool_keep_page() returns true.

Do not flush the page pool on node changes, but instead
lazily discard the pages as they are returned.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 include/net/page_pool.h |  2 --
 net/core/page_pool.c    | 39 +++++++++++++++++++++++----------------
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index fb13cf6055ff..89bc91294b53 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -227,12 +227,10 @@ static inline bool page_pool_put(struct page_pool *pool)
 }
 
 /* Only safe from napi context or when user guarantees it is thread safe */
-void __page_pool_flush(struct page_pool *pool);
 static inline void page_pool_update_nid(struct page_pool *pool, int new_nid)
 {
 	if (unlikely(pool->p.nid != new_nid)) {
 		/* TODO: Add statistics/trace */
-		__page_pool_flush(pool);
 		pool->p.nid = new_nid;
 	}
 }
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 678cf85f273a..ea56823236c5 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -258,6 +258,7 @@ static bool __page_pool_recycle_into_ring(struct page_pool *pool,
 				   struct page *page)
 {
 	int ret;
+
 	/* BH protection not needed if current is serving softirq */
 	if (in_serving_softirq())
 		ret = ptr_ring_produce(&pool->ring, page);
@@ -272,8 +273,8 @@ static bool __page_pool_recycle_into_ring(struct page_pool *pool,
  *
  * Caller must provide appropriate safe context.
  */
-static bool __page_pool_recycle_direct(struct page *page,
-				       struct page_pool *pool)
+static bool __page_pool_recycle_into_cache(struct page *page,
+					   struct page_pool *pool)
 {
 	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
 		return false;
@@ -283,27 +284,35 @@ static bool __page_pool_recycle_direct(struct page *page,
 	return true;
 }
 
+/* Determine whether this page should be kept or returned
+ *
+ * refcnt == 1 means page_pool owns page.
+ */
+static bool page_pool_keep_page(struct page_pool *pool, struct page *page)
+{
+	return page_ref_count(page) == 1 &&
+	       page_to_nid(page) == pool->p.nid &&
+	       !page_is_pfmemalloc(page);
+}
+
 void __page_pool_put_page(struct page_pool *pool,
 			  struct page *page, bool allow_direct)
 {
 	/* This allocator is optimized for the XDP mode that uses
 	 * one-frame-per-page, but have fallbacks that act like the
 	 * regular page allocator APIs.
-	 *
-	 * refcnt == 1 means page_pool owns page, and can recycle it.
 	 */
-	if (likely(page_ref_count(page) == 1)) {
+	if (likely(page_pool_keep_page(pool, page))) {
 		/* Read barrier done in page_ref_count / READ_ONCE */
 
 		if (allow_direct && in_serving_softirq())
-			if (__page_pool_recycle_direct(page, pool))
+			if (__page_pool_recycle_into_cache(page, pool))
 				return;
 
-		if (!__page_pool_recycle_into_ring(pool, page)) {
-			/* Cache full, fallback to free pages */
-			__page_pool_return_page(pool, page);
-		}
-		return;
+		if (__page_pool_recycle_into_ring(pool, page))
+			return;
+
+		/* Cache full, fallback to return pages */
 	}
 	/* Fallback/non-XDP mode: API user have elevated refcnt.
 	 *
@@ -318,8 +327,7 @@ void __page_pool_put_page(struct page_pool *pool,
 	 * doing refcnt based recycle tricks, meaning another process
 	 * will be invoking put_page.
 	 */
-	__page_pool_clean_page(pool, page);
-	put_page(page);
+	__page_pool_return_page(pool, page);
 }
 EXPORT_SYMBOL(__page_pool_put_page);
 
@@ -373,7 +381,7 @@ void __page_pool_free(struct page_pool *pool)
 }
 EXPORT_SYMBOL(__page_pool_free);
 
-void __page_pool_flush(struct page_pool *pool)
+static void page_pool_flush(struct page_pool *pool)
 {
 	struct page *page;
 
@@ -391,14 +399,13 @@ void __page_pool_flush(struct page_pool *pool)
 	 */
 	__page_pool_empty_ring(pool);
 }
-EXPORT_SYMBOL(__page_pool_flush);
 
 /* Request to shutdown: release pages cached by page_pool, and check
  * for in-flight pages
  */
 bool __page_pool_request_shutdown(struct page_pool *pool)
 {
-	__page_pool_flush(pool);
+	page_pool_flush(pool);
 
 	return __page_pool_safe_to_destroy(pool);
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 07/10 net-next] page_pool: allow configurable linear cache size
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
                   ` (5 preceding siblings ...)
  2019-10-16 22:50 ` [PATCH 06/10 net-next] page_pool: Add page_pool_keep_page Jonathan Lemon
@ 2019-10-16 22:50 ` Jonathan Lemon
  2019-10-17  8:51   ` Jesper Dangaard Brouer
  2019-10-16 22:50 ` [PATCH 08/10 net-next] page_pool: Add statistics Jonathan Lemon
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

Some drivers may utilize more than one page per RX work entry.
Allow a configurable cache size, with the same defaults if the
size is zero.

Convert magic numbers into descriptive entries.

Re-arrange the page_pool structure for efficiency.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 include/net/page_pool.h | 50 ++++++++++++++++++++---------------------
 net/core/page_pool.c    | 49 +++++++++++++++++++++++-----------------
 2 files changed, 54 insertions(+), 45 deletions(-)

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 89bc91294b53..fc340db42f9a 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -51,41 +51,34 @@
  * cache is already full (or partly full) then the XDP_DROP recycles
  * would have to take a slower code path.
  */
-#define PP_ALLOC_CACHE_SIZE	128
 #define PP_ALLOC_CACHE_REFILL	64
-struct pp_alloc_cache {
-	u32 count;
-	void *cache[PP_ALLOC_CACHE_SIZE];
-};
+#define PP_ALLOC_CACHE_DEFAULT	(2 * PP_ALLOC_CACHE_REFILL)
+#define PP_ALLOC_CACHE_LIMIT	512
+#define PP_ALLOC_POOL_DEFAULT	1024
+#define PP_ALLOC_POOL_LIMIT	32768
 
 struct page_pool_params {
 	unsigned int	flags;
 	unsigned int	order;
 	unsigned int	pool_size;
+	unsigned int	cache_size;
 	int		nid;  /* Numa node id to allocate from pages from */
-	struct device	*dev; /* device, for DMA pre-mapping purposes */
 	enum dma_data_direction dma_dir; /* DMA mapping direction */
+	struct device	*dev; /* device, for DMA pre-mapping purposes */
 };
 
 struct page_pool {
 	struct page_pool_params p;
 
+	u32 alloc_count;
         u32 pages_state_hold_cnt;
+	atomic_t pages_state_release_cnt;
 
-	/*
-	 * Data structure for allocation side
-	 *
-	 * Drivers allocation side usually already perform some kind
-	 * of resource protection.  Piggyback on this protection, and
-	 * require driver to protect allocation side.
-	 *
-	 * For NIC drivers this means, allocate a page_pool per
-	 * RX-queue. As the RX-queue is already protected by
-	 * Softirq/BH scheduling and napi_schedule. NAPI schedule
-	 * guarantee that a single napi_struct will only be scheduled
-	 * on a single CPU (see napi_schedule).
+	/* A page_pool is strictly tied to a single RX-queue being
+	 * protected by NAPI, due to above pp_alloc_cache. This
+	 * refcnt serves purpose is to simplify drivers error handling.
 	 */
-	struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
+	refcount_t user_cnt;
 
 	/* Data structure for storing recycled pages.
 	 *
@@ -100,13 +93,20 @@ struct page_pool {
 	 */
 	struct ptr_ring ring;
 
-	atomic_t pages_state_release_cnt;
-
-	/* A page_pool is strictly tied to a single RX-queue being
-	 * protected by NAPI, due to above pp_alloc_cache. This
-	 * refcnt serves purpose is to simplify drivers error handling.
+	/*
+	 * Data structure for allocation side
+	 *
+	 * Drivers allocation side usually already perform some kind
+	 * of resource protection.  Piggyback on this protection, and
+	 * require driver to protect allocation side.
+	 *
+	 * For NIC drivers this means, allocate a page_pool per
+	 * RX-queue. As the RX-queue is already protected by
+	 * Softirq/BH scheduling and napi_schedule. NAPI schedule
+	 * guarantee that a single napi_struct will only be scheduled
+	 * on a single CPU (see napi_schedule).
 	 */
-	refcount_t user_cnt;
+	void *alloc_cache[];
 };
 
 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index ea56823236c5..f8fedecddb6f 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -18,22 +18,18 @@
 
 #include <trace/events/page_pool.h>
 
-static int page_pool_init(struct page_pool *pool,
-			  const struct page_pool_params *params)
+static int page_pool_init(struct page_pool *pool)
 {
-	unsigned int ring_qsize = 1024; /* Default */
-
-	memcpy(&pool->p, params, sizeof(pool->p));
 
 	/* Validate only known flags were used */
 	if (pool->p.flags & ~(PP_FLAG_ALL))
 		return -EINVAL;
 
-	if (pool->p.pool_size)
-		ring_qsize = pool->p.pool_size;
+	if (!pool->p.pool_size)
+		pool->p.pool_size = PP_ALLOC_POOL_DEFAULT;
 
 	/* Sanity limit mem that can be pinned down */
-	if (ring_qsize > 32768)
+	if (pool->p.pool_size > PP_ALLOC_POOL_LIMIT)
 		return -E2BIG;
 
 	/* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
@@ -44,7 +40,7 @@ static int page_pool_init(struct page_pool *pool,
 	    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
 		return -EINVAL;
 
-	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
+	if (ptr_ring_init(&pool->ring, pool->p.pool_size, GFP_KERNEL) < 0)
 		return -ENOMEM;
 
 	atomic_set(&pool->pages_state_release_cnt, 0);
@@ -61,13 +57,26 @@ static int page_pool_init(struct page_pool *pool,
 struct page_pool *page_pool_create(const struct page_pool_params *params)
 {
 	struct page_pool *pool;
+	u32 cache_size, size;
 	int err;
 
-	pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
+	cache_size = params->cache_size;
+	if (!cache_size)
+		cache_size = PP_ALLOC_CACHE_DEFAULT;
+
+	/* Sanity limit mem that can be pinned down */
+	if (cache_size > PP_ALLOC_CACHE_LIMIT)
+		return ERR_PTR(-E2BIG);
+
+	size = sizeof(*pool) + cache_size * sizeof(void *);
+	pool = kzalloc_node(size, GFP_KERNEL, params->nid);
 	if (!pool)
 		return ERR_PTR(-ENOMEM);
 
-	err = page_pool_init(pool, params);
+	memcpy(&pool->p, params, sizeof(pool->p));
+	pool->p.cache_size = cache_size;
+
+	err = page_pool_init(pool);
 	if (err < 0) {
 		pr_warn("%s() gave up with errno %d\n", __func__, err);
 		kfree(pool);
@@ -87,9 +96,9 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
 
 	/* Test for safe-context, caller should provide this guarantee */
 	if (likely(in_serving_softirq())) {
-		if (likely(pool->alloc.count)) {
+		if (likely(pool->alloc_count)) {
 			/* Fast-path */
-			page = pool->alloc.cache[--pool->alloc.count];
+			page = pool->alloc_cache[--pool->alloc_count];
 			return page;
 		}
 		refill = true;
@@ -105,8 +114,8 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
 	spin_lock(&r->consumer_lock);
 	page = __ptr_ring_consume(r);
 	if (refill)
-		pool->alloc.count = __ptr_ring_consume_batched(r,
-							pool->alloc.cache,
+		pool->alloc_count = __ptr_ring_consume_batched(r,
+							pool->alloc_cache,
 							PP_ALLOC_CACHE_REFILL);
 	spin_unlock(&r->consumer_lock);
 	return page;
@@ -276,11 +285,11 @@ static bool __page_pool_recycle_into_ring(struct page_pool *pool,
 static bool __page_pool_recycle_into_cache(struct page *page,
 					   struct page_pool *pool)
 {
-	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
+	if (unlikely(pool->alloc_count == pool->p.cache_size))
 		return false;
 
 	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
-	pool->alloc.cache[pool->alloc.count++] = page;
+	pool->alloc_cache[pool->alloc_count++] = page;
 	return true;
 }
 
@@ -365,7 +374,7 @@ void __page_pool_free(struct page_pool *pool)
 	if (!page_pool_put(pool))
 		return;
 
-	WARN(pool->alloc.count, "API usage violation");
+	WARN(pool->alloc_count, "API usage violation");
 	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
 
 	/* Can happen due to forced shutdown */
@@ -389,8 +398,8 @@ static void page_pool_flush(struct page_pool *pool)
 	 * no-longer in use, and page_pool_alloc_pages() cannot be
 	 * called concurrently.
 	 */
-	while (pool->alloc.count) {
-		page = pool->alloc.cache[--pool->alloc.count];
+	while (pool->alloc_count) {
+		page = pool->alloc_cache[--pool->alloc_count];
 		__page_pool_return_page(pool, page);
 	}
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 08/10 net-next] page_pool: Add statistics
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
                   ` (6 preceding siblings ...)
  2019-10-16 22:50 ` [PATCH 07/10 net-next] page_pool: allow configurable linear cache size Jonathan Lemon
@ 2019-10-16 22:50 ` Jonathan Lemon
  2019-10-17  8:29   ` Jesper Dangaard Brouer
  2019-10-18 21:29   ` Saeed Mahameed
  2019-10-16 22:50 ` [PATCH 09/10 net-next] net/mlx5: Add page_pool stats to the Mellanox driver Jonathan Lemon
                   ` (2 subsequent siblings)
  10 siblings, 2 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

Add statistics to the page pool, providing visibility into its operation.

Callers can provide a location where the stats are stored, otherwise
the page pool will allocate a statistic area.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 include/net/page_pool.h | 21 +++++++++++++---
 net/core/page_pool.c    | 55 +++++++++++++++++++++++++++++++++++------
 2 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index fc340db42f9a..4f383522b141 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -34,8 +34,11 @@
 #include <linux/ptr_ring.h>
 #include <linux/dma-direction.h>
 
-#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */
-#define PP_FLAG_ALL	PP_FLAG_DMA_MAP
+#define PP_FLAG_DMA_MAP		BIT(0) /* page_pool does the DMA map/unmap */
+#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP)
+
+/* internal flags, not expoed to user */
+#define PP_FLAG_INTERNAL_STATS	BIT(8)
 
 /*
  * Fast allocation side cache array/stack
@@ -57,6 +60,17 @@
 #define PP_ALLOC_POOL_DEFAULT	1024
 #define PP_ALLOC_POOL_LIMIT	32768
 
+struct page_pool_stats {
+	u64 cache_hit;
+	u64 cache_full;
+	u64 cache_empty;
+	u64 ring_produce;
+	u64 ring_consume;
+	u64 ring_return;
+	u64 flush;
+	u64 node_change;
+};
+
 struct page_pool_params {
 	unsigned int	flags;
 	unsigned int	order;
@@ -65,6 +79,7 @@ struct page_pool_params {
 	int		nid;  /* Numa node id to allocate from pages from */
 	enum dma_data_direction dma_dir; /* DMA mapping direction */
 	struct device	*dev; /* device, for DMA pre-mapping purposes */
+	struct page_pool_stats *stats; /* pool stats stored externally */
 };
 
 struct page_pool {
@@ -230,8 +245,8 @@ static inline bool page_pool_put(struct page_pool *pool)
 static inline void page_pool_update_nid(struct page_pool *pool, int new_nid)
 {
 	if (unlikely(pool->p.nid != new_nid)) {
-		/* TODO: Add statistics/trace */
 		pool->p.nid = new_nid;
+		pool->p.stats->node_change++;
 	}
 }
 #endif /* _NET_PAGE_POOL_H */
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index f8fedecddb6f..ea6202813584 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -20,9 +20,10 @@
 
 static int page_pool_init(struct page_pool *pool)
 {
+	int size;
 
 	/* Validate only known flags were used */
-	if (pool->p.flags & ~(PP_FLAG_ALL))
+	if (pool->p.flags & ~PP_FLAG_ALL)
 		return -EINVAL;
 
 	if (!pool->p.pool_size)
@@ -40,8 +41,16 @@ static int page_pool_init(struct page_pool *pool)
 	    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
 		return -EINVAL;
 
+	if (!pool->p.stats) {
+		size  = sizeof(struct page_pool_stats);
+		pool->p.stats = kzalloc_node(size, GFP_KERNEL, pool->p.nid);
+		if (!pool->p.stats)
+			return -ENOMEM;
+		pool->p.flags |= PP_FLAG_INTERNAL_STATS;
+	}
+
 	if (ptr_ring_init(&pool->ring, pool->p.pool_size, GFP_KERNEL) < 0)
-		return -ENOMEM;
+		goto fail;
 
 	atomic_set(&pool->pages_state_release_cnt, 0);
 
@@ -52,6 +61,12 @@ static int page_pool_init(struct page_pool *pool)
 		get_device(pool->p.dev);
 
 	return 0;
+
+fail:
+	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
+		kfree(pool->p.stats);
+
+	return -ENOMEM;
 }
 
 struct page_pool *page_pool_create(const struct page_pool_params *params)
@@ -98,9 +113,11 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
 	if (likely(in_serving_softirq())) {
 		if (likely(pool->alloc_count)) {
 			/* Fast-path */
+			pool->p.stats->cache_hit++;
 			page = pool->alloc_cache[--pool->alloc_count];
 			return page;
 		}
+		pool->p.stats->cache_empty++;
 		refill = true;
 	}
 
@@ -113,10 +130,13 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
 	 */
 	spin_lock(&r->consumer_lock);
 	page = __ptr_ring_consume(r);
-	if (refill)
+	if (refill) {
 		pool->alloc_count = __ptr_ring_consume_batched(r,
 							pool->alloc_cache,
 							PP_ALLOC_CACHE_REFILL);
+		pool->p.stats->ring_consume += pool->alloc_count;
+	}
+	pool->p.stats->ring_consume += !!page;
 	spin_unlock(&r->consumer_lock);
 	return page;
 }
@@ -266,15 +286,23 @@ static void __page_pool_return_page(struct page_pool *pool, struct page *page)
 static bool __page_pool_recycle_into_ring(struct page_pool *pool,
 				   struct page *page)
 {
+	struct ptr_ring *r = &pool->ring;
 	int ret;
 
-	/* BH protection not needed if current is serving softirq */
 	if (in_serving_softirq())
-		ret = ptr_ring_produce(&pool->ring, page);
+		spin_lock(&r->producer_lock);
 	else
-		ret = ptr_ring_produce_bh(&pool->ring, page);
+		spin_lock_bh(&r->producer_lock);
 
-	return (ret == 0) ? true : false;
+	ret = __ptr_ring_produce(r, page);
+	pool->p.stats->ring_produce++;
+
+	if (in_serving_softirq())
+		spin_unlock(&r->producer_lock);
+	else
+		spin_unlock_bh(&r->producer_lock);
+
+	return ret == 0;
 }
 
 /* Only allow direct recycling in special circumstances, into the
@@ -285,8 +313,10 @@ static bool __page_pool_recycle_into_ring(struct page_pool *pool,
 static bool __page_pool_recycle_into_cache(struct page *page,
 					   struct page_pool *pool)
 {
-	if (unlikely(pool->alloc_count == pool->p.cache_size))
+	if (unlikely(pool->alloc_count == pool->p.cache_size)) {
+		pool->p.stats->cache_full++;
 		return false;
+	}
 
 	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
 	pool->alloc_cache[pool->alloc_count++] = page;
@@ -343,6 +373,7 @@ EXPORT_SYMBOL(__page_pool_put_page);
 static void __page_pool_empty_ring(struct page_pool *pool)
 {
 	struct page *page;
+	int count = 0;
 
 	/* Empty recycle ring */
 	while ((page = ptr_ring_consume_bh(&pool->ring))) {
@@ -351,8 +382,11 @@ static void __page_pool_empty_ring(struct page_pool *pool)
 			pr_crit("%s() page_pool refcnt %d violation\n",
 				__func__, page_ref_count(page));
 
+		count++;
 		__page_pool_return_page(pool, page);
 	}
+
+	pool->p.stats->ring_return += count;
 }
 
 static void __warn_in_flight(struct page_pool *pool)
@@ -381,6 +415,9 @@ void __page_pool_free(struct page_pool *pool)
 	if (!__page_pool_safe_to_destroy(pool))
 		__warn_in_flight(pool);
 
+	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
+		kfree(pool->p.stats);
+
 	ptr_ring_cleanup(&pool->ring, NULL);
 
 	if (pool->p.flags & PP_FLAG_DMA_MAP)
@@ -394,6 +431,8 @@ static void page_pool_flush(struct page_pool *pool)
 {
 	struct page *page;
 
+	pool->p.stats->flush++;
+
 	/* Empty alloc cache, assume caller made sure this is
 	 * no-longer in use, and page_pool_alloc_pages() cannot be
 	 * called concurrently.
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 09/10 net-next] net/mlx5: Add page_pool stats to the Mellanox driver
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
                   ` (7 preceding siblings ...)
  2019-10-16 22:50 ` [PATCH 08/10 net-next] page_pool: Add statistics Jonathan Lemon
@ 2019-10-16 22:50 ` Jonathan Lemon
  2019-10-17 11:09   ` Jesper Dangaard Brouer
  2019-10-16 22:50 ` [PATCH 10/10 net-next] page_pool: Cleanup and rename page_pool functions Jonathan Lemon
  2019-10-18 20:50 ` [PATCH 00/10 net-next] page_pool cleanups Saeed Mahameed
  10 siblings, 1 reply; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

Replace the now deprecated inernal cache stats with the page pool stats.

# ethtool -S eth0 | grep rx_pool
     rx_pool_cache_hit: 1646798
     rx_pool_cache_full: 0
     rx_pool_cache_empty: 15723566
     rx_pool_ring_produce: 474958
     rx_pool_ring_consume: 0
     rx_pool_ring_return: 474958
     rx_pool_flush: 144
     rx_pool_node_change: 0

Showing about a 10% hit rate for the page pool.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  1 +
 .../ethernet/mellanox/mlx5/core/en_stats.c    | 39 ++++++++++++-------
 .../ethernet/mellanox/mlx5/core/en_stats.h    | 19 +++++----
 4 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 2e281c755b65..b34519061d12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -50,6 +50,7 @@
 #include <net/xdp.h>
 #include <linux/dim.h>
 #include <linux/bits.h>
+#include <net/page_pool.h>
 #include "wq.h"
 #include "mlx5_core.h"
 #include "en_stats.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2b828de1adf0..f10b5838fb17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -551,6 +551,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		pp_params.nid       = cpu_to_node(c->cpu);
 		pp_params.dev       = c->pdev;
 		pp_params.dma_dir   = rq->buff.map_dir;
+		pp_params.stats     = &rq->stats->pool;
 
 		/* page_pool can be used even when there is no rq->xdp_prog,
 		 * given page_pool does not handle DMA mapping there is no
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index ac6fdcda7019..ad42d965d786 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -102,11 +102,14 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_cache_hit) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_cache_full) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_cache_empty) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_ring_produce) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_ring_consume) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_ring_return) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_flush) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_node_change) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
@@ -214,11 +217,14 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 		s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
 		s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
 		s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
-		s->rx_cache_reuse += rq_stats->cache_reuse;
-		s->rx_cache_full  += rq_stats->cache_full;
-		s->rx_cache_empty += rq_stats->cache_empty;
-		s->rx_cache_busy  += rq_stats->cache_busy;
-		s->rx_cache_waive += rq_stats->cache_waive;
+		s->rx_pool_cache_hit += rq_stats->pool.cache_hit;
+		s->rx_pool_cache_full += rq_stats->pool.cache_full;
+		s->rx_pool_cache_empty += rq_stats->pool.cache_empty;
+		s->rx_pool_ring_produce += rq_stats->pool.ring_produce;
+		s->rx_pool_ring_consume += rq_stats->pool.ring_consume;
+		s->rx_pool_ring_return += rq_stats->pool.ring_return;
+		s->rx_pool_flush += rq_stats->pool.flush;
+		s->rx_pool_node_change += rq_stats->pool.node_change;
 		s->rx_congst_umr  += rq_stats->congst_umr;
 		s->rx_arfs_err    += rq_stats->arfs_err;
 		s->rx_recover     += rq_stats->recover;
@@ -1446,11 +1452,14 @@ static const struct counter_desc rq_stats_desc[] = {
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.cache_hit) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.cache_full) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.cache_empty) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.ring_produce) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.ring_consume) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.ring_return) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.flush) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.node_change) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 79f261bf86ac..7d6001969400 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -109,11 +109,14 @@ struct mlx5e_sw_stats {
 	u64 rx_buff_alloc_err;
 	u64 rx_cqe_compress_blks;
 	u64 rx_cqe_compress_pkts;
-	u64 rx_cache_reuse;
-	u64 rx_cache_full;
-	u64 rx_cache_empty;
-	u64 rx_cache_busy;
-	u64 rx_cache_waive;
+	u64 rx_pool_cache_hit;
+	u64 rx_pool_cache_full;
+	u64 rx_pool_cache_empty;
+	u64 rx_pool_ring_produce;
+	u64 rx_pool_ring_consume;
+	u64 rx_pool_ring_return;
+	u64 rx_pool_flush;
+	u64 rx_pool_node_change;
 	u64 rx_congst_umr;
 	u64 rx_arfs_err;
 	u64 rx_recover;
@@ -245,14 +248,10 @@ struct mlx5e_rq_stats {
 	u64 buff_alloc_err;
 	u64 cqe_compress_blks;
 	u64 cqe_compress_pkts;
-	u64 cache_reuse;
-	u64 cache_full;
-	u64 cache_empty;
-	u64 cache_busy;
-	u64 cache_waive;
 	u64 congst_umr;
 	u64 arfs_err;
 	u64 recover;
+	struct page_pool_stats pool;
 };
 
 struct mlx5e_sq_stats {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 10/10 net-next] page_pool: Cleanup and rename page_pool functions.
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
                   ` (8 preceding siblings ...)
  2019-10-16 22:50 ` [PATCH 09/10 net-next] net/mlx5: Add page_pool stats to the Mellanox driver Jonathan Lemon
@ 2019-10-16 22:50 ` Jonathan Lemon
  2019-10-18 20:50 ` [PATCH 00/10 net-next] page_pool cleanups Saeed Mahameed
  10 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-16 22:50 UTC (permalink / raw)
  To: brouer, ilias.apalodimas, saeedm, tariqt; +Cc: netdev, kernel-team

Functions starting with __ usually indicate those which are called
while holding a lock, or are exported, but should not be called
directly.  Internal static functions don't meet either of these
criteria, so make it more readable.

Move stub functions to end of file in their own section for readability
and ease of maintenance.  Add missing page_pool_get() stub function.

Checked by compiling kernel without PAGE_POOL defined.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 include/net/page_pool.h | 141 ++++++++++++++++++++--------------------
 net/core/page_pool.c    |  76 ++++++++++------------
 2 files changed, 102 insertions(+), 115 deletions(-)

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 4f383522b141..3d1590be5638 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -124,6 +124,8 @@ struct page_pool {
 	void *alloc_cache[];
 };
 
+#ifdef CONFIG_PAGE_POOL
+
 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
 
 static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
@@ -133,8 +135,8 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
 	return page_pool_alloc_pages(pool, gfp);
 }
 
-/* get the stored dma direction. A driver might decide to treat this locally and
- * avoid the extra cache line from page_pool to determine the direction
+/* get the stored dma direction. A driver might decide to treat this locally
+ * and avoid the extra cache line from page_pool to determine the direction.
  */
 static
 inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
@@ -143,16 +145,36 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
 }
 
 struct page_pool *page_pool_create(const struct page_pool_params *params);
+void page_pool_free(struct page_pool *pool);
+void page_pool_put_page(struct page_pool *pool,
+			struct page *page, bool allow_direct);
 
-void __page_pool_free(struct page_pool *pool);
-static inline void page_pool_free(struct page_pool *pool)
+/* Very limited use-cases allow recycle direct */
+static inline void page_pool_recycle_direct(struct page_pool *pool,
+					    struct page *page)
 {
-	/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
-	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
-	 */
-#ifdef CONFIG_PAGE_POOL
-	__page_pool_free(pool);
-#endif
+	page_pool_put_page(pool, page, true);
+}
+
+/* API user MUST have disconnected alloc-side (not allowed to call
+ * page_pool_alloc_pages()) before calling this.  The free-side can
+ * still run concurrently, to handle in-flight packet-pages.
+ *
+ * A request to shutdown can fail (with false) if there are still
+ * in-flight packet-pages.
+ */
+bool page_pool_request_shutdown(struct page_pool *pool);
+
+/* Disconnects a page (from a page_pool).  API users can have a need
+ * to disconnect a page (from a page_pool), to allow it to be used as
+ * a regular page (that will eventually be returned to the normal
+ * page-allocator via put_page).
+ */
+void page_pool_release_page(struct page_pool *pool, struct page *page);
+
+static inline bool is_page_pool_compiled_in(void)
+{
+	return true;
 }
 
 /* Drivers use this instead of page_pool_free */
@@ -164,73 +186,11 @@ static inline void page_pool_destroy(struct page_pool *pool)
 	page_pool_free(pool);
 }
 
-/* Never call this directly, use helpers below */
-void __page_pool_put_page(struct page_pool *pool,
-			  struct page *page, bool allow_direct);
-
-static inline void page_pool_put_page(struct page_pool *pool,
-				      struct page *page, bool allow_direct)
-{
-	/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
-	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
-	 */
-#ifdef CONFIG_PAGE_POOL
-	__page_pool_put_page(pool, page, allow_direct);
-#endif
-}
-/* Very limited use-cases allow recycle direct */
-static inline void page_pool_recycle_direct(struct page_pool *pool,
-					    struct page *page)
-{
-	__page_pool_put_page(pool, page, true);
-}
-
-/* API user MUST have disconnected alloc-side (not allowed to call
- * page_pool_alloc_pages()) before calling this.  The free-side can
- * still run concurrently, to handle in-flight packet-pages.
- *
- * A request to shutdown can fail (with false) if there are still
- * in-flight packet-pages.
- */
-bool __page_pool_request_shutdown(struct page_pool *pool);
-static inline bool page_pool_request_shutdown(struct page_pool *pool)
-{
-	bool safe_to_remove = false;
-
-#ifdef CONFIG_PAGE_POOL
-	safe_to_remove = __page_pool_request_shutdown(pool);
-#endif
-	return safe_to_remove;
-}
-
-/* Disconnects a page (from a page_pool).  API users can have a need
- * to disconnect a page (from a page_pool), to allow it to be used as
- * a regular page (that will eventually be returned to the normal
- * page-allocator via put_page).
- */
-void page_pool_unmap_page(struct page_pool *pool, struct page *page);
-static inline void page_pool_release_page(struct page_pool *pool,
-					  struct page *page)
-{
-#ifdef CONFIG_PAGE_POOL
-	page_pool_unmap_page(pool, page);
-#endif
-}
-
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
 	return page->dma_addr;
 }
 
-static inline bool is_page_pool_compiled_in(void)
-{
-#ifdef CONFIG_PAGE_POOL
-	return true;
-#else
-	return false;
-#endif
-}
-
 static inline void page_pool_get(struct page_pool *pool)
 {
 	refcount_inc(&pool->user_cnt);
@@ -249,4 +209,41 @@ static inline void page_pool_update_nid(struct page_pool *pool, int new_nid)
 		pool->p.stats->node_change++;
 	}
 }
+
+#else
+
+/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
+ * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
+ */
+
+static inline void page_pool_free(struct page_pool *pool)
+{
+}
+
+static inline void page_pool_put_page(struct page_pool *pool,
+				      struct page *page, bool allow_direct)
+{
+}
+
+static inline bool page_pool_request_shutdown(struct page_pool *pool)
+{
+	return false;
+}
+
+static inline void page_pool_release_page(struct page_pool *pool,
+					  struct page *page)
+{
+}
+
+static inline bool is_page_pool_compiled_in(void)
+{
+	return false;
+}
+
+static inline void page_pool_get(struct page_pool *pool)
+{
+}
+
+#endif /* CONFIG_PAGE_POOL */
+
 #endif /* _NET_PAGE_POOL_H */
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index ea6202813584..5ef062db61bc 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -103,7 +103,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
 EXPORT_SYMBOL(page_pool_create);
 
 /* fast path */
-static struct page *__page_pool_get_cached(struct page_pool *pool)
+static struct page *page_pool_get_cached(struct page_pool *pool)
 {
 	struct ptr_ring *r = &pool->ring;
 	bool refill = false;
@@ -143,8 +143,8 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
 
 /* slow path */
 noinline
-static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
-						 gfp_t _gfp)
+static struct page *page_pool_alloc_pages_slow(struct page_pool *pool,
+					       gfp_t _gfp)
 {
 	struct page *page;
 	gfp_t gfp = _gfp;
@@ -203,12 +203,12 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
 	struct page *page;
 
 	/* Fast-path: Get a page from cache */
-	page = __page_pool_get_cached(pool);
+	page = page_pool_get_cached(pool);
 	if (page)
 		return page;
 
 	/* Slow-path: cache empty, do real allocation */
-	page = __page_pool_alloc_pages_slow(pool, gfp);
+	page = page_pool_alloc_pages_slow(pool, gfp);
 	return page;
 }
 EXPORT_SYMBOL(page_pool_alloc_pages);
@@ -230,7 +230,7 @@ static s32 page_pool_inflight(struct page_pool *pool)
 	return distance;
 }
 
-static bool __page_pool_safe_to_destroy(struct page_pool *pool)
+static bool page_pool_safe_to_destroy(struct page_pool *pool)
 {
 	s32 inflight = page_pool_inflight(pool);
 
@@ -241,8 +241,7 @@ static bool __page_pool_safe_to_destroy(struct page_pool *pool)
 }
 
 /* Cleanup page_pool state from page */
-static void __page_pool_clean_page(struct page_pool *pool,
-				   struct page *page)
+void page_pool_release_page(struct page_pool *pool, struct page *page)
 {
 	dma_addr_t dma;
 
@@ -260,31 +259,22 @@ static void __page_pool_clean_page(struct page_pool *pool,
 	trace_page_pool_state_release(pool, page,
 			      atomic_read(&pool->pages_state_release_cnt));
 }
-
-/* unmap the page and clean our state */
-void page_pool_unmap_page(struct page_pool *pool, struct page *page)
-{
-	/* When page is unmapped, this implies page will not be
-	 * returned to page_pool.
-	 */
-	__page_pool_clean_page(pool, page);
-}
-EXPORT_SYMBOL(page_pool_unmap_page);
+EXPORT_SYMBOL(page_pool_release_page);
 
 /* Return a page to the page allocator, cleaning up our state */
-static void __page_pool_return_page(struct page_pool *pool, struct page *page)
+static void page_pool_return_page(struct page_pool *pool, struct page *page)
 {
-	__page_pool_clean_page(pool, page);
+	page_pool_release_page(pool, page);
 
 	put_page(page);
 	/* An optimization would be to call __free_pages(page, pool->p.order)
 	 * knowing page is not part of page-cache (thus avoiding a
-	 * __page_cache_release() call).
+	 * page_cache_release() call).
 	 */
 }
 
-static bool __page_pool_recycle_into_ring(struct page_pool *pool,
-				   struct page *page)
+static bool page_pool_recycle_into_ring(struct page_pool *pool,
+					struct page *page)
 {
 	struct ptr_ring *r = &pool->ring;
 	int ret;
@@ -310,8 +300,8 @@ static bool __page_pool_recycle_into_ring(struct page_pool *pool,
  *
  * Caller must provide appropriate safe context.
  */
-static bool __page_pool_recycle_into_cache(struct page *page,
-					   struct page_pool *pool)
+static bool page_pool_recycle_into_cache(struct page *page,
+					 struct page_pool *pool)
 {
 	if (unlikely(pool->alloc_count == pool->p.cache_size)) {
 		pool->p.stats->cache_full++;
@@ -334,8 +324,8 @@ static bool page_pool_keep_page(struct page_pool *pool, struct page *page)
 	       !page_is_pfmemalloc(page);
 }
 
-void __page_pool_put_page(struct page_pool *pool,
-			  struct page *page, bool allow_direct)
+void page_pool_put_page(struct page_pool *pool,
+			struct page *page, bool allow_direct)
 {
 	/* This allocator is optimized for the XDP mode that uses
 	 * one-frame-per-page, but have fallbacks that act like the
@@ -345,10 +335,10 @@ void __page_pool_put_page(struct page_pool *pool,
 		/* Read barrier done in page_ref_count / READ_ONCE */
 
 		if (allow_direct && in_serving_softirq())
-			if (__page_pool_recycle_into_cache(page, pool))
+			if (page_pool_recycle_into_cache(page, pool))
 				return;
 
-		if (__page_pool_recycle_into_ring(pool, page))
+		if (page_pool_recycle_into_ring(pool, page))
 			return;
 
 		/* Cache full, fallback to return pages */
@@ -366,11 +356,11 @@ void __page_pool_put_page(struct page_pool *pool,
 	 * doing refcnt based recycle tricks, meaning another process
 	 * will be invoking put_page.
 	 */
-	__page_pool_return_page(pool, page);
+	page_pool_return_page(pool, page);
 }
-EXPORT_SYMBOL(__page_pool_put_page);
+EXPORT_SYMBOL(page_pool_put_page);
 
-static void __page_pool_empty_ring(struct page_pool *pool)
+static void page_pool_empty_ring(struct page_pool *pool)
 {
 	struct page *page;
 	int count = 0;
@@ -383,13 +373,13 @@ static void __page_pool_empty_ring(struct page_pool *pool)
 				__func__, page_ref_count(page));
 
 		count++;
-		__page_pool_return_page(pool, page);
+		page_pool_return_page(pool, page);
 	}
 
 	pool->p.stats->ring_return += count;
 }
 
-static void __warn_in_flight(struct page_pool *pool)
+static void warn_in_flight(struct page_pool *pool)
 {
 	u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
 	u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
@@ -402,7 +392,7 @@ static void __warn_in_flight(struct page_pool *pool)
 	     distance, hold_cnt, release_cnt);
 }
 
-void __page_pool_free(struct page_pool *pool)
+void page_pool_free(struct page_pool *pool)
 {
 	/* Only last user actually free/release resources */
 	if (!page_pool_put(pool))
@@ -412,8 +402,8 @@ void __page_pool_free(struct page_pool *pool)
 	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
 
 	/* Can happen due to forced shutdown */
-	if (!__page_pool_safe_to_destroy(pool))
-		__warn_in_flight(pool);
+	if (!page_pool_safe_to_destroy(pool))
+		warn_in_flight(pool);
 
 	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
 		kfree(pool->p.stats);
@@ -425,7 +415,7 @@ void __page_pool_free(struct page_pool *pool)
 
 	kfree(pool);
 }
-EXPORT_SYMBOL(__page_pool_free);
+EXPORT_SYMBOL(page_pool_free);
 
 static void page_pool_flush(struct page_pool *pool)
 {
@@ -439,22 +429,22 @@ static void page_pool_flush(struct page_pool *pool)
 	 */
 	while (pool->alloc_count) {
 		page = pool->alloc_cache[--pool->alloc_count];
-		__page_pool_return_page(pool, page);
+		page_pool_return_page(pool, page);
 	}
 
 	/* No more consumers should exist, but producers could still
 	 * be in-flight.
 	 */
-	__page_pool_empty_ring(pool);
+	page_pool_empty_ring(pool);
 }
 
 /* Request to shutdown: release pages cached by page_pool, and check
  * for in-flight pages
  */
-bool __page_pool_request_shutdown(struct page_pool *pool)
+bool page_pool_request_shutdown(struct page_pool *pool)
 {
 	page_pool_flush(pool);
 
-	return __page_pool_safe_to_destroy(pool);
+	return page_pool_safe_to_destroy(pool);
 }
-EXPORT_SYMBOL(__page_pool_request_shutdown);
+EXPORT_SYMBOL(page_pool_request_shutdown);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 34+ messages in thread

* Re: [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache
  2019-10-16 22:50 ` [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache Jonathan Lemon
@ 2019-10-17  1:30   ` Jakub Kicinski
  2019-10-18 20:03     ` Jonathan Lemon
  2019-10-18 20:53   ` Saeed Mahameed
  1 sibling, 1 reply; 34+ messages in thread
From: Jakub Kicinski @ 2019-10-17  1:30 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: brouer, ilias.apalodimas, saeedm, tariqt, netdev, kernel-team

On Wed, 16 Oct 2019 15:50:19 -0700, Jonathan Lemon wrote:
> From: Tariq Toukan <tariqt@mellanox.com>
> 
> Obsolete the RX page-cache layer, pages are now recycled
> in page_pool.
> 
> This patch introduce a temporary degradation as recycling
> does not keep the pages DMA-mapped. That is fixed in a
> downstream patch.
> 
> Issue: 1487631

Please drop these Issue identifiers.

> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> 

And no empty lines between tags.

> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 03/10 net-next] net/mlx5e: RX, Internal DMA mapping in page_pool
  2019-10-16 22:50 ` [PATCH 03/10 net-next] net/mlx5e: RX, Internal DMA mapping in page_pool Jonathan Lemon
@ 2019-10-17  1:33   ` Jakub Kicinski
  2019-10-18 20:04     ` Jonathan Lemon
  2019-10-18 21:01   ` Saeed Mahameed
  1 sibling, 1 reply; 34+ messages in thread
From: Jakub Kicinski @ 2019-10-17  1:33 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: brouer, ilias.apalodimas, saeedm, tariqt, netdev, kernel-team

On Wed, 16 Oct 2019 15:50:21 -0700, Jonathan Lemon wrote:
> From: Tariq Toukan <tariqt@mellanox.com>
> 
> After RX page-cache is removed in previous patch, let the
> page_pool be responsible for the DMA mapping.
> 
> Issue: 1487631
> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> 
> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>

IIUC you'll need to add DMA syncs here, no? map/unmap has syncing as
side effect.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 08/10 net-next] page_pool: Add statistics
  2019-10-16 22:50 ` [PATCH 08/10 net-next] page_pool: Add statistics Jonathan Lemon
@ 2019-10-17  8:29   ` Jesper Dangaard Brouer
  2019-10-18 21:29   ` Saeed Mahameed
  1 sibling, 0 replies; 34+ messages in thread
From: Jesper Dangaard Brouer @ 2019-10-17  8:29 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: ilias.apalodimas, saeedm, tariqt, netdev, kernel-team, brouer

On Wed, 16 Oct 2019 15:50:26 -0700
Jonathan Lemon <jonathan.lemon@gmail.com> wrote:

> Add statistics to the page pool, providing visibility into its operation.
> 
> Callers can provide a location where the stats are stored, otherwise
> the page pool will allocate a statistic area.
> 
> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  include/net/page_pool.h | 21 +++++++++++++---
>  net/core/page_pool.c    | 55 +++++++++++++++++++++++++++++++++++------
>  2 files changed, 65 insertions(+), 11 deletions(-)
> 
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index fc340db42f9a..4f383522b141 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -34,8 +34,11 @@
>  #include <linux/ptr_ring.h>
>  #include <linux/dma-direction.h>
>  
> -#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */
> -#define PP_FLAG_ALL	PP_FLAG_DMA_MAP
> +#define PP_FLAG_DMA_MAP		BIT(0) /* page_pool does the DMA map/unmap */
> +#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP)
> +
> +/* internal flags, not expoed to user */
> +#define PP_FLAG_INTERNAL_STATS	BIT(8)
>  
>  /*
>   * Fast allocation side cache array/stack
> @@ -57,6 +60,17 @@
>  #define PP_ALLOC_POOL_DEFAULT	1024
>  #define PP_ALLOC_POOL_LIMIT	32768
>  
> +struct page_pool_stats {
> +	u64 cache_hit;
> +	u64 cache_full;
> +	u64 cache_empty;
> +	u64 ring_produce;
> +	u64 ring_consume;

You are placing producer and consumer counters on the same cache-line.
This is not acceptable.

The page pool and ptr_ring, are specifically designed to avoid
cache-line contention between consumers and producers.  This patch
kills that work.


> +	u64 ring_return;
> +	u64 flush;
> +	u64 node_change;
> +};

Another example of carefully avoiding cache-line bouncing is the
inflight tracking, e.g. the struct placement of pages_state_release_cnt
and pages_state_hold_cnt for inflight accounting.


> +
>  struct page_pool_params {
>  	unsigned int	flags;
>  	unsigned int	order;
> @@ -65,6 +79,7 @@ struct page_pool_params {
>  	int		nid;  /* Numa node id to allocate from pages from */
>  	enum dma_data_direction dma_dir; /* DMA mapping direction */
>  	struct device	*dev; /* device, for DMA pre-mapping purposes */
> +	struct page_pool_stats *stats; /* pool stats stored externally */
>  };
>  
>  struct page_pool {
> @@ -230,8 +245,8 @@ static inline bool page_pool_put(struct page_pool *pool)
>  static inline void page_pool_update_nid(struct page_pool *pool, int new_nid)
>  {
>  	if (unlikely(pool->p.nid != new_nid)) {
> -		/* TODO: Add statistics/trace */
>  		pool->p.nid = new_nid;
> +		pool->p.stats->node_change++;
>  	}
>  }
>  #endif /* _NET_PAGE_POOL_H */
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index f8fedecddb6f..ea6202813584 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -20,9 +20,10 @@
>  
>  static int page_pool_init(struct page_pool *pool)
>  {
> +	int size;
>  
>  	/* Validate only known flags were used */
> -	if (pool->p.flags & ~(PP_FLAG_ALL))
> +	if (pool->p.flags & ~PP_FLAG_ALL)
>  		return -EINVAL;
>  
>  	if (!pool->p.pool_size)
> @@ -40,8 +41,16 @@ static int page_pool_init(struct page_pool *pool)
>  	    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
>  		return -EINVAL;
>  
> +	if (!pool->p.stats) {
> +		size  = sizeof(struct page_pool_stats);
> +		pool->p.stats = kzalloc_node(size, GFP_KERNEL, pool->p.nid);
> +		if (!pool->p.stats)
> +			return -ENOMEM;
> +		pool->p.flags |= PP_FLAG_INTERNAL_STATS;
> +	}
> +
>  	if (ptr_ring_init(&pool->ring, pool->p.pool_size, GFP_KERNEL) < 0)
> -		return -ENOMEM;
> +		goto fail;
>  
>  	atomic_set(&pool->pages_state_release_cnt, 0);
>  
> @@ -52,6 +61,12 @@ static int page_pool_init(struct page_pool *pool)
>  		get_device(pool->p.dev);
>  
>  	return 0;
> +
> +fail:
> +	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
> +		kfree(pool->p.stats);
> +
> +	return -ENOMEM;
>  }
>  
>  struct page_pool *page_pool_create(const struct page_pool_params *params)
> @@ -98,9 +113,11 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
>  	if (likely(in_serving_softirq())) {
>  		if (likely(pool->alloc_count)) {
>  			/* Fast-path */
> +			pool->p.stats->cache_hit++;
>  			page = pool->alloc_cache[--pool->alloc_count];
>  			return page;
>  		}
> +		pool->p.stats->cache_empty++;
>  		refill = true;
>  	}
>  
> @@ -113,10 +130,13 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
>  	 */
>  	spin_lock(&r->consumer_lock);
>  	page = __ptr_ring_consume(r);
> -	if (refill)
> +	if (refill) {
>  		pool->alloc_count = __ptr_ring_consume_batched(r,
>  							pool->alloc_cache,
>  							PP_ALLOC_CACHE_REFILL);
> +		pool->p.stats->ring_consume += pool->alloc_count;
> +	}
> +	pool->p.stats->ring_consume += !!page;
>  	spin_unlock(&r->consumer_lock);
>  	return page;
>  }
> @@ -266,15 +286,23 @@ static void __page_pool_return_page(struct page_pool *pool, struct page *page)
>  static bool __page_pool_recycle_into_ring(struct page_pool *pool,
>  				   struct page *page)
>  {
> +	struct ptr_ring *r = &pool->ring;
>  	int ret;
>  
> -	/* BH protection not needed if current is serving softirq */
>  	if (in_serving_softirq())
> -		ret = ptr_ring_produce(&pool->ring, page);
> +		spin_lock(&r->producer_lock);
>  	else
> -		ret = ptr_ring_produce_bh(&pool->ring, page);
> +		spin_lock_bh(&r->producer_lock);
>  
> -	return (ret == 0) ? true : false;
> +	ret = __ptr_ring_produce(r, page);
> +	pool->p.stats->ring_produce++;
> +
> +	if (in_serving_softirq())
> +		spin_unlock(&r->producer_lock);
> +	else
> +		spin_unlock_bh(&r->producer_lock);
> +
> +	return ret == 0;
>  }
>  
>  /* Only allow direct recycling in special circumstances, into the
> @@ -285,8 +313,10 @@ static bool __page_pool_recycle_into_ring(struct page_pool *pool,
>  static bool __page_pool_recycle_into_cache(struct page *page,
>  					   struct page_pool *pool)
>  {
> -	if (unlikely(pool->alloc_count == pool->p.cache_size))
> +	if (unlikely(pool->alloc_count == pool->p.cache_size)) {
> +		pool->p.stats->cache_full++;
>  		return false;
> +	}
>  
>  	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
>  	pool->alloc_cache[pool->alloc_count++] = page;
> @@ -343,6 +373,7 @@ EXPORT_SYMBOL(__page_pool_put_page);
>  static void __page_pool_empty_ring(struct page_pool *pool)
>  {
>  	struct page *page;
> +	int count = 0;
>  
>  	/* Empty recycle ring */
>  	while ((page = ptr_ring_consume_bh(&pool->ring))) {
> @@ -351,8 +382,11 @@ static void __page_pool_empty_ring(struct page_pool *pool)
>  			pr_crit("%s() page_pool refcnt %d violation\n",
>  				__func__, page_ref_count(page));
>  
> +		count++;
>  		__page_pool_return_page(pool, page);
>  	}
> +
> +	pool->p.stats->ring_return += count;
>  }
>  
>  static void __warn_in_flight(struct page_pool *pool)
> @@ -381,6 +415,9 @@ void __page_pool_free(struct page_pool *pool)
>  	if (!__page_pool_safe_to_destroy(pool))
>  		__warn_in_flight(pool);
>  
> +	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
> +		kfree(pool->p.stats);
> +
>  	ptr_ring_cleanup(&pool->ring, NULL);
>  
>  	if (pool->p.flags & PP_FLAG_DMA_MAP)
> @@ -394,6 +431,8 @@ static void page_pool_flush(struct page_pool *pool)
>  {
>  	struct page *page;
>  
> +	pool->p.stats->flush++;
> +
>  	/* Empty alloc cache, assume caller made sure this is
>  	 * no-longer in use, and page_pool_alloc_pages() cannot be
>  	 * called concurrently.

Have you benchmarked the overhead of adding this accounting?

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 07/10 net-next] page_pool: allow configurable linear cache size
  2019-10-16 22:50 ` [PATCH 07/10 net-next] page_pool: allow configurable linear cache size Jonathan Lemon
@ 2019-10-17  8:51   ` Jesper Dangaard Brouer
  2019-10-18 20:31     ` Jonathan Lemon
  0 siblings, 1 reply; 34+ messages in thread
From: Jesper Dangaard Brouer @ 2019-10-17  8:51 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: ilias.apalodimas, saeedm, tariqt, netdev, kernel-team, brouer

On Wed, 16 Oct 2019 15:50:25 -0700
Jonathan Lemon <jonathan.lemon@gmail.com> wrote:

> Some drivers may utilize more than one page per RX work entry.
> Allow a configurable cache size, with the same defaults if the
> size is zero.
> 
> Convert magic numbers into descriptive entries.
> 
> Re-arrange the page_pool structure for efficiency.

IMHO the re-arrange you did does not improve efficiency, it kills the
efficiency...

> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  include/net/page_pool.h | 50 ++++++++++++++++++++---------------------
>  net/core/page_pool.c    | 49 +++++++++++++++++++++++-----------------
>  2 files changed, 54 insertions(+), 45 deletions(-)
> 
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index 89bc91294b53..fc340db42f9a 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -51,41 +51,34 @@
>   * cache is already full (or partly full) then the XDP_DROP recycles
>   * would have to take a slower code path.
>   */
> -#define PP_ALLOC_CACHE_SIZE	128
>  #define PP_ALLOC_CACHE_REFILL	64
> -struct pp_alloc_cache {
> -	u32 count;
> -	void *cache[PP_ALLOC_CACHE_SIZE];
> -};
> +#define PP_ALLOC_CACHE_DEFAULT	(2 * PP_ALLOC_CACHE_REFILL)
> +#define PP_ALLOC_CACHE_LIMIT	512
> +#define PP_ALLOC_POOL_DEFAULT	1024
> +#define PP_ALLOC_POOL_LIMIT	32768
>  
>  struct page_pool_params {
>  	unsigned int	flags;
>  	unsigned int	order;
>  	unsigned int	pool_size;
> +	unsigned int	cache_size;
>  	int		nid;  /* Numa node id to allocate from pages from */
> -	struct device	*dev; /* device, for DMA pre-mapping purposes */
>  	enum dma_data_direction dma_dir; /* DMA mapping direction */
> +	struct device	*dev; /* device, for DMA pre-mapping purposes */
>  };
>  
>  struct page_pool {
>  	struct page_pool_params p;
>  
> +	u32 alloc_count;
>          u32 pages_state_hold_cnt;
> +	atomic_t pages_state_release_cnt;
  
The struct members pages_state_hold_cnt and pages_state_release_cnt,
MUST be kept on different cachelines, else the point of tracking
inflight pages this way is lost.


> -	/*
> -	 * Data structure for allocation side
> -	 *
> -	 * Drivers allocation side usually already perform some kind
> -	 * of resource protection.  Piggyback on this protection, and
> -	 * require driver to protect allocation side.
> -	 *
> -	 * For NIC drivers this means, allocate a page_pool per
> -	 * RX-queue. As the RX-queue is already protected by
> -	 * Softirq/BH scheduling and napi_schedule. NAPI schedule
> -	 * guarantee that a single napi_struct will only be scheduled
> -	 * on a single CPU (see napi_schedule).
> +	/* A page_pool is strictly tied to a single RX-queue being
> +	 * protected by NAPI, due to above pp_alloc_cache. This
                                           ^^^^^^^^^^^^^^    
You remove the 'pp_alloc_cache' in this patch, and still mention it in
the comments.

> +	 * refcnt serves purpose is to simplify drivers error handling.
>  	 */
> -	struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
> +	refcount_t user_cnt;
>  
>  	/* Data structure for storing recycled pages.
>  	 *
> @@ -100,13 +93,20 @@ struct page_pool {
>  	 */
>  	struct ptr_ring ring;
>  
> -	atomic_t pages_state_release_cnt;
> -
> -	/* A page_pool is strictly tied to a single RX-queue being
> -	 * protected by NAPI, due to above pp_alloc_cache. This
> -	 * refcnt serves purpose is to simplify drivers error handling.
> +	/*
> +	 * Data structure for allocation side
> +	 *
> +	 * Drivers allocation side usually already perform some kind
> +	 * of resource protection.  Piggyback on this protection, and
> +	 * require driver to protect allocation side.
> +	 *
> +	 * For NIC drivers this means, allocate a page_pool per
> +	 * RX-queue. As the RX-queue is already protected by
> +	 * Softirq/BH scheduling and napi_schedule. NAPI schedule
> +	 * guarantee that a single napi_struct will only be scheduled
> +	 * on a single CPU (see napi_schedule).
>  	 */
> -	refcount_t user_cnt;
> +	void *alloc_cache[];
>  };
>  
>  struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index ea56823236c5..f8fedecddb6f 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -18,22 +18,18 @@
>  
>  #include <trace/events/page_pool.h>
>  
> -static int page_pool_init(struct page_pool *pool,
> -			  const struct page_pool_params *params)
> +static int page_pool_init(struct page_pool *pool)
>  {
> -	unsigned int ring_qsize = 1024; /* Default */
> -
> -	memcpy(&pool->p, params, sizeof(pool->p));
>  
>  	/* Validate only known flags were used */
>  	if (pool->p.flags & ~(PP_FLAG_ALL))
>  		return -EINVAL;
>  
> -	if (pool->p.pool_size)
> -		ring_qsize = pool->p.pool_size;
> +	if (!pool->p.pool_size)
> +		pool->p.pool_size = PP_ALLOC_POOL_DEFAULT;
>  
>  	/* Sanity limit mem that can be pinned down */
> -	if (ring_qsize > 32768)
> +	if (pool->p.pool_size > PP_ALLOC_POOL_LIMIT)
>  		return -E2BIG;
>  
>  	/* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
> @@ -44,7 +40,7 @@ static int page_pool_init(struct page_pool *pool,
>  	    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
>  		return -EINVAL;
>  
> -	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
> +	if (ptr_ring_init(&pool->ring, pool->p.pool_size, GFP_KERNEL) < 0)
>  		return -ENOMEM;
>  
>  	atomic_set(&pool->pages_state_release_cnt, 0);
> @@ -61,13 +57,26 @@ static int page_pool_init(struct page_pool *pool,
>  struct page_pool *page_pool_create(const struct page_pool_params *params)
>  {
>  	struct page_pool *pool;
> +	u32 cache_size, size;
>  	int err;
>  
> -	pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
> +	cache_size = params->cache_size;
> +	if (!cache_size)
> +		cache_size = PP_ALLOC_CACHE_DEFAULT;
> +
> +	/* Sanity limit mem that can be pinned down */
> +	if (cache_size > PP_ALLOC_CACHE_LIMIT)
> +		return ERR_PTR(-E2BIG);
> +
> +	size = sizeof(*pool) + cache_size * sizeof(void *);
> +	pool = kzalloc_node(size, GFP_KERNEL, params->nid);

You have now placed alloc_cache at the end of page_pool struct, this
allows for dynamic changing the size, that is kind of nice.  Before I
placed pp_alloc_cache in struct to make sure that we didn't have
false-sharing.  Now, I'm unsure if false-sharing can happen?
(depend on alignment from kzalloc_node).


>  	if (!pool)
>  		return ERR_PTR(-ENOMEM);
>  
> -	err = page_pool_init(pool, params);
> +	memcpy(&pool->p, params, sizeof(pool->p));
> +	pool->p.cache_size = cache_size;
> +
> +	err = page_pool_init(pool);
>  	if (err < 0) {
>  		pr_warn("%s() gave up with errno %d\n", __func__, err);
>  		kfree(pool);
> @@ -87,9 +96,9 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
>  
>  	/* Test for safe-context, caller should provide this guarantee */
>  	if (likely(in_serving_softirq())) {
> -		if (likely(pool->alloc.count)) {
> +		if (likely(pool->alloc_count)) {
>  			/* Fast-path */
> -			page = pool->alloc.cache[--pool->alloc.count];
> +			page = pool->alloc_cache[--pool->alloc_count];
>  			return page;
>  		}
>  		refill = true;
> @@ -105,8 +114,8 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
>  	spin_lock(&r->consumer_lock);
>  	page = __ptr_ring_consume(r);
>  	if (refill)
> -		pool->alloc.count = __ptr_ring_consume_batched(r,
> -							pool->alloc.cache,
> +		pool->alloc_count = __ptr_ring_consume_batched(r,
> +							pool->alloc_cache,
>  							PP_ALLOC_CACHE_REFILL);
>  	spin_unlock(&r->consumer_lock);
>  	return page;
> @@ -276,11 +285,11 @@ static bool __page_pool_recycle_into_ring(struct page_pool *pool,
>  static bool __page_pool_recycle_into_cache(struct page *page,
>  					   struct page_pool *pool)
>  {
> -	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
> +	if (unlikely(pool->alloc_count == pool->p.cache_size))
>  		return false;
>  
>  	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
> -	pool->alloc.cache[pool->alloc.count++] = page;
> +	pool->alloc_cache[pool->alloc_count++] = page;
>  	return true;
>  }
>  
> @@ -365,7 +374,7 @@ void __page_pool_free(struct page_pool *pool)
>  	if (!page_pool_put(pool))
>  		return;
>  
> -	WARN(pool->alloc.count, "API usage violation");
> +	WARN(pool->alloc_count, "API usage violation");
>  	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
>  
>  	/* Can happen due to forced shutdown */
> @@ -389,8 +398,8 @@ static void page_pool_flush(struct page_pool *pool)
>  	 * no-longer in use, and page_pool_alloc_pages() cannot be
>  	 * called concurrently.
>  	 */
> -	while (pool->alloc.count) {
> -		page = pool->alloc.cache[--pool->alloc.count];
> +	while (pool->alloc_count) {
> +		page = pool->alloc_cache[--pool->alloc_count];
>  		__page_pool_return_page(pool, page);
>  	}
>  



-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 09/10 net-next] net/mlx5: Add page_pool stats to the Mellanox driver
  2019-10-16 22:50 ` [PATCH 09/10 net-next] net/mlx5: Add page_pool stats to the Mellanox driver Jonathan Lemon
@ 2019-10-17 11:09   ` Jesper Dangaard Brouer
  2019-10-18 20:45     ` Jonathan Lemon
  0 siblings, 1 reply; 34+ messages in thread
From: Jesper Dangaard Brouer @ 2019-10-17 11:09 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: ilias.apalodimas, saeedm, tariqt, netdev, kernel-team, brouer

On Wed, 16 Oct 2019 15:50:27 -0700
Jonathan Lemon <jonathan.lemon@gmail.com> wrote:

> Replace the now deprecated inernal cache stats with the page pool stats.

I can see that the stats you introduced are useful, but they have to be
implemented in way that does not hurt performance.


> # ethtool -S eth0 | grep rx_pool
>      rx_pool_cache_hit: 1646798
>      rx_pool_cache_full: 0
>      rx_pool_cache_empty: 15723566
>      rx_pool_ring_produce: 474958
>      rx_pool_ring_consume: 0
>      rx_pool_ring_return: 474958
>      rx_pool_flush: 144
>      rx_pool_node_change: 0
> 
> Showing about a 10% hit rate for the page pool.

What is the workload from above stats?


> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
>  .../net/ethernet/mellanox/mlx5/core/en_main.c |  1 +
>  .../ethernet/mellanox/mlx5/core/en_stats.c    | 39 ++++++++++++-------
>  .../ethernet/mellanox/mlx5/core/en_stats.h    | 19 +++++----
>  4 files changed, 35 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> index 2e281c755b65..b34519061d12 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> @@ -50,6 +50,7 @@
>  #include <net/xdp.h>
>  #include <linux/dim.h>
>  #include <linux/bits.h>
> +#include <net/page_pool.h>
>  #include "wq.h"
>  #include "mlx5_core.h"
>  #include "en_stats.h"
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 2b828de1adf0..f10b5838fb17 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -551,6 +551,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
>  		pp_params.nid       = cpu_to_node(c->cpu);
>  		pp_params.dev       = c->pdev;
>  		pp_params.dma_dir   = rq->buff.map_dir;
> +		pp_params.stats     = &rq->stats->pool;
>  
>  		/* page_pool can be used even when there is no rq->xdp_prog,
>  		 * given page_pool does not handle DMA mapping there is no
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> index ac6fdcda7019..ad42d965d786 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> @@ -102,11 +102,14 @@ static const struct counter_desc sw_stats_desc[] = {
>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
> -	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
> -	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
> -	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
> -	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
> -	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_cache_hit) },
> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_cache_full) },
> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_cache_empty) },
> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_ring_produce) },
> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_ring_consume) },
> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_ring_return) },
> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_flush) },
> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_node_change) },
>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
> @@ -214,11 +217,14 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
>  		s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
>  		s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
>  		s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
> -		s->rx_cache_reuse += rq_stats->cache_reuse;
> -		s->rx_cache_full  += rq_stats->cache_full;
> -		s->rx_cache_empty += rq_stats->cache_empty;
> -		s->rx_cache_busy  += rq_stats->cache_busy;
> -		s->rx_cache_waive += rq_stats->cache_waive;
> +		s->rx_pool_cache_hit += rq_stats->pool.cache_hit;
> +		s->rx_pool_cache_full += rq_stats->pool.cache_full;
> +		s->rx_pool_cache_empty += rq_stats->pool.cache_empty;
> +		s->rx_pool_ring_produce += rq_stats->pool.ring_produce;
> +		s->rx_pool_ring_consume += rq_stats->pool.ring_consume;
> +		s->rx_pool_ring_return += rq_stats->pool.ring_return;
> +		s->rx_pool_flush += rq_stats->pool.flush;
> +		s->rx_pool_node_change += rq_stats->pool.node_change;
>  		s->rx_congst_umr  += rq_stats->congst_umr;
>  		s->rx_arfs_err    += rq_stats->arfs_err;
>  		s->rx_recover     += rq_stats->recover;
> @@ -1446,11 +1452,14 @@ static const struct counter_desc rq_stats_desc[] = {
>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
> -	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
> -	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
> -	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
> -	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
> -	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.cache_hit) },
> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.cache_full) },
> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.cache_empty) },
> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.ring_produce) },
> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.ring_consume) },
> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.ring_return) },
> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.flush) },
> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.node_change) },
>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
> index 79f261bf86ac..7d6001969400 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
> @@ -109,11 +109,14 @@ struct mlx5e_sw_stats {
>  	u64 rx_buff_alloc_err;
>  	u64 rx_cqe_compress_blks;
>  	u64 rx_cqe_compress_pkts;
> -	u64 rx_cache_reuse;
> -	u64 rx_cache_full;
> -	u64 rx_cache_empty;
> -	u64 rx_cache_busy;
> -	u64 rx_cache_waive;
> +	u64 rx_pool_cache_hit;
> +	u64 rx_pool_cache_full;
> +	u64 rx_pool_cache_empty;
> +	u64 rx_pool_ring_produce;
> +	u64 rx_pool_ring_consume;
> +	u64 rx_pool_ring_return;
> +	u64 rx_pool_flush;
> +	u64 rx_pool_node_change;
>  	u64 rx_congst_umr;
>  	u64 rx_arfs_err;
>  	u64 rx_recover;
> @@ -245,14 +248,10 @@ struct mlx5e_rq_stats {
>  	u64 buff_alloc_err;
>  	u64 cqe_compress_blks;
>  	u64 cqe_compress_pkts;
> -	u64 cache_reuse;
> -	u64 cache_full;
> -	u64 cache_empty;
> -	u64 cache_busy;
> -	u64 cache_waive;
>  	u64 congst_umr;
>  	u64 arfs_err;
>  	u64 recover;
> +	struct page_pool_stats pool;
>  };
>  
>  struct mlx5e_sq_stats {



-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 04/10 net-next] page_pool: Add API to update numa node and flush page caches
  2019-10-16 22:50 ` [PATCH 04/10 net-next] page_pool: Add API to update numa node and flush page caches Jonathan Lemon
@ 2019-10-17 12:06   ` Ilias Apalodimas
  2019-10-18 21:07     ` Saeed Mahameed
  0 siblings, 1 reply; 34+ messages in thread
From: Ilias Apalodimas @ 2019-10-17 12:06 UTC (permalink / raw)
  To: Jonathan Lemon; +Cc: brouer, saeedm, tariqt, netdev, kernel-team

Hi Saeed,

On Wed, Oct 16, 2019 at 03:50:22PM -0700, Jonathan Lemon wrote:
> From: Saeed Mahameed <saeedm@mellanox.com>
> 
> Add page_pool_update_nid() to be called from drivers when they detect
> numa node changes.
> 
> It will do:
> 1) Flush the pool's page cache and ptr_ring.
> 2) Update page pool nid value to start allocating from the new numa
> node.
> 
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  include/net/page_pool.h | 10 ++++++++++
>  net/core/page_pool.c    | 16 +++++++++++-----
>  2 files changed, 21 insertions(+), 5 deletions(-)
> 
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index 2cbcdbdec254..fb13cf6055ff 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -226,4 +226,14 @@ static inline bool page_pool_put(struct page_pool *pool)
>  	return refcount_dec_and_test(&pool->user_cnt);
>  }
>  
> +/* Only safe from napi context or when user guarantees it is thread safe */
> +void __page_pool_flush(struct page_pool *pool);

This should be called per packet right? Any noticeable impact on performance?

> +static inline void page_pool_update_nid(struct page_pool *pool, int new_nid)
> +{
> +	if (unlikely(pool->p.nid != new_nid)) {
> +		/* TODO: Add statistics/trace */
> +		__page_pool_flush(pool);
> +		pool->p.nid = new_nid;
> +	}
> +}
>  #endif /* _NET_PAGE_POOL_H */
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index 5bc65587f1c4..678cf85f273a 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -373,16 +373,13 @@ void __page_pool_free(struct page_pool *pool)
>  }
>  EXPORT_SYMBOL(__page_pool_free);
>  
> -/* Request to shutdown: release pages cached by page_pool, and check
> - * for in-flight pages
> - */
> -bool __page_pool_request_shutdown(struct page_pool *pool)
> +void __page_pool_flush(struct page_pool *pool)
>  {
>  	struct page *page;
>  
>  	/* Empty alloc cache, assume caller made sure this is
>  	 * no-longer in use, and page_pool_alloc_pages() cannot be
> -	 * call concurrently.
> +	 * called concurrently.
>  	 */
>  	while (pool->alloc.count) {
>  		page = pool->alloc.cache[--pool->alloc.count];
> @@ -393,6 +390,15 @@ bool __page_pool_request_shutdown(struct page_pool *pool)
>  	 * be in-flight.
>  	 */
>  	__page_pool_empty_ring(pool);
> +}
> +EXPORT_SYMBOL(__page_pool_flush);

A later patch removes this, do we actually need it here?

> +
> +/* Request to shutdown: release pages cached by page_pool, and check
> + * for in-flight pages
> + */
> +bool __page_pool_request_shutdown(struct page_pool *pool)
> +{
> +	__page_pool_flush(pool);
>  
>  	return __page_pool_safe_to_destroy(pool);
>  }
> -- 
> 2.17.1
> 


Thanks
/Ilias

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache
  2019-10-17  1:30   ` Jakub Kicinski
@ 2019-10-18 20:03     ` Jonathan Lemon
  0 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-18 20:03 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: brouer, ilias.apalodimas, saeedm, tariqt, netdev, kernel-team

On 16 Oct 2019, at 18:30, Jakub Kicinski wrote:

> On Wed, 16 Oct 2019 15:50:19 -0700, Jonathan Lemon wrote:
>> From: Tariq Toukan <tariqt@mellanox.com>
>>
>> Obsolete the RX page-cache layer, pages are now recycled
>> in page_pool.
>>
>> This patch introduce a temporary degradation as recycling
>> does not keep the pages DMA-mapped. That is fixed in a
>> downstream patch.
>>
>> Issue: 1487631
>
> Please drop these Issue identifiers.
>
>> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
>>
>
> And no empty lines between tags.
>
>> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>

Will fix.
-- 
Jonathan

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 03/10 net-next] net/mlx5e: RX, Internal DMA mapping in page_pool
  2019-10-17  1:33   ` Jakub Kicinski
@ 2019-10-18 20:04     ` Jonathan Lemon
  0 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-18 20:04 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: brouer, ilias.apalodimas, saeedm, tariqt, netdev, kernel-team

On 16 Oct 2019, at 18:33, Jakub Kicinski wrote:

> On Wed, 16 Oct 2019 15:50:21 -0700, Jonathan Lemon wrote:
>> From: Tariq Toukan <tariqt@mellanox.com>
>>
>> After RX page-cache is removed in previous patch, let the
>> page_pool be responsible for the DMA mapping.
>>
>> Issue: 1487631
>> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
>>
>> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
>
> IIUC you'll need to add DMA syncs here, no? map/unmap has syncing as
> side effect.

The driver still needs to do DMA sync for data transfers,
this only covers the initial mapping.
-- 
Jonathan

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 07/10 net-next] page_pool: allow configurable linear cache size
  2019-10-17  8:51   ` Jesper Dangaard Brouer
@ 2019-10-18 20:31     ` Jonathan Lemon
  0 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-18 20:31 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: ilias.apalodimas, saeedm, tariqt, netdev, kernel-team



On 17 Oct 2019, at 1:51, Jesper Dangaard Brouer wrote:

> On Wed, 16 Oct 2019 15:50:25 -0700
> Jonathan Lemon <jonathan.lemon@gmail.com> wrote:
>
>> Some drivers may utilize more than one page per RX work entry.
>> Allow a configurable cache size, with the same defaults if the
>> size is zero.
>>
>> Convert magic numbers into descriptive entries.
>>
>> Re-arrange the page_pool structure for efficiency.
>
> IMHO the re-arrange you did does not improve efficiency, it kills the
> efficiency...
>
>> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
>> ---
>>  include/net/page_pool.h | 50 
>> ++++++++++++++++++++---------------------
>>  net/core/page_pool.c    | 49 
>> +++++++++++++++++++++++-----------------
>>  2 files changed, 54 insertions(+), 45 deletions(-)
>>
>> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
>> index 89bc91294b53..fc340db42f9a 100644
>> --- a/include/net/page_pool.h
>> +++ b/include/net/page_pool.h
>> @@ -51,41 +51,34 @@
>>   * cache is already full (or partly full) then the XDP_DROP recycles
>>   * would have to take a slower code path.
>>   */
>> -#define PP_ALLOC_CACHE_SIZE	128
>>  #define PP_ALLOC_CACHE_REFILL	64
>> -struct pp_alloc_cache {
>> -	u32 count;
>> -	void *cache[PP_ALLOC_CACHE_SIZE];
>> -};
>> +#define PP_ALLOC_CACHE_DEFAULT	(2 * PP_ALLOC_CACHE_REFILL)
>> +#define PP_ALLOC_CACHE_LIMIT	512
>> +#define PP_ALLOC_POOL_DEFAULT	1024
>> +#define PP_ALLOC_POOL_LIMIT	32768
>>
>>  struct page_pool_params {
>>  	unsigned int	flags;
>>  	unsigned int	order;
>>  	unsigned int	pool_size;
>> +	unsigned int	cache_size;
>>  	int		nid;  /* Numa node id to allocate from pages from */
>> -	struct device	*dev; /* device, for DMA pre-mapping purposes */
>>  	enum dma_data_direction dma_dir; /* DMA mapping direction */
>> +	struct device	*dev; /* device, for DMA pre-mapping purposes */
>>  };
>>
>>  struct page_pool {
>>  	struct page_pool_params p;
>>
>> +	u32 alloc_count;
>>          u32 pages_state_hold_cnt;
>> +	atomic_t pages_state_release_cnt;
>
> The struct members pages_state_hold_cnt and pages_state_release_cnt,
> MUST be kept on different cachelines, else the point of tracking
> inflight pages this way is lost.

Will update, but there are other items that make this lost
in the noise.


>> -	/*
>> -	 * Data structure for allocation side
>> -	 *
>> -	 * Drivers allocation side usually already perform some kind
>> -	 * of resource protection.  Piggyback on this protection, and
>> -	 * require driver to protect allocation side.
>> -	 *
>> -	 * For NIC drivers this means, allocate a page_pool per
>> -	 * RX-queue. As the RX-queue is already protected by
>> -	 * Softirq/BH scheduling and napi_schedule. NAPI schedule
>> -	 * guarantee that a single napi_struct will only be scheduled
>> -	 * on a single CPU (see napi_schedule).
>> +	/* A page_pool is strictly tied to a single RX-queue being
>> +	 * protected by NAPI, due to above pp_alloc_cache. This
>                                            ^^^^^^^^^^^^^^
> You remove the 'pp_alloc_cache' in this patch, and still mention it in
> the comments.

Will fix.


>> +	 * refcnt serves purpose is to simplify drivers error handling.
>>  	 */
>> -	struct pp_alloc_cache alloc ____cacheline_aligned_in_smp;
>> +	refcount_t user_cnt;
>>
>>  	/* Data structure for storing recycled pages.
>>  	 *
>> @@ -100,13 +93,20 @@ struct page_pool {
>>  	 */
>>  	struct ptr_ring ring;
>>
>> -	atomic_t pages_state_release_cnt;
>> -
>> -	/* A page_pool is strictly tied to a single RX-queue being
>> -	 * protected by NAPI, due to above pp_alloc_cache. This
>> -	 * refcnt serves purpose is to simplify drivers error handling.
>> +	/*
>> +	 * Data structure for allocation side
>> +	 *
>> +	 * Drivers allocation side usually already perform some kind
>> +	 * of resource protection.  Piggyback on this protection, and
>> +	 * require driver to protect allocation side.
>> +	 *
>> +	 * For NIC drivers this means, allocate a page_pool per
>> +	 * RX-queue. As the RX-queue is already protected by
>> +	 * Softirq/BH scheduling and napi_schedule. NAPI schedule
>> +	 * guarantee that a single napi_struct will only be scheduled
>> +	 * on a single CPU (see napi_schedule).
>>  	 */
>> -	refcount_t user_cnt;
>> +	void *alloc_cache[];
>>  };
>>
>>  struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t 
>> gfp);
>> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>> index ea56823236c5..f8fedecddb6f 100644
>> --- a/net/core/page_pool.c
>> +++ b/net/core/page_pool.c
>> @@ -18,22 +18,18 @@
>>
>>  #include <trace/events/page_pool.h>
>>
>> -static int page_pool_init(struct page_pool *pool,
>> -			  const struct page_pool_params *params)
>> +static int page_pool_init(struct page_pool *pool)
>>  {
>> -	unsigned int ring_qsize = 1024; /* Default */
>> -
>> -	memcpy(&pool->p, params, sizeof(pool->p));
>>
>>  	/* Validate only known flags were used */
>>  	if (pool->p.flags & ~(PP_FLAG_ALL))
>>  		return -EINVAL;
>>
>> -	if (pool->p.pool_size)
>> -		ring_qsize = pool->p.pool_size;
>> +	if (!pool->p.pool_size)
>> +		pool->p.pool_size = PP_ALLOC_POOL_DEFAULT;
>>
>>  	/* Sanity limit mem that can be pinned down */
>> -	if (ring_qsize > 32768)
>> +	if (pool->p.pool_size > PP_ALLOC_POOL_LIMIT)
>>  		return -E2BIG;
>>
>>  	/* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
>> @@ -44,7 +40,7 @@ static int page_pool_init(struct page_pool *pool,
>>  	    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
>>  		return -EINVAL;
>>
>> -	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
>> +	if (ptr_ring_init(&pool->ring, pool->p.pool_size, GFP_KERNEL) < 0)
>>  		return -ENOMEM;
>>
>>  	atomic_set(&pool->pages_state_release_cnt, 0);
>> @@ -61,13 +57,26 @@ static int page_pool_init(struct page_pool *pool,
>>  struct page_pool *page_pool_create(const struct page_pool_params 
>> *params)
>>  {
>>  	struct page_pool *pool;
>> +	u32 cache_size, size;
>>  	int err;
>>
>> -	pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
>> +	cache_size = params->cache_size;
>> +	if (!cache_size)
>> +		cache_size = PP_ALLOC_CACHE_DEFAULT;
>> +
>> +	/* Sanity limit mem that can be pinned down */
>> +	if (cache_size > PP_ALLOC_CACHE_LIMIT)
>> +		return ERR_PTR(-E2BIG);
>> +
>> +	size = sizeof(*pool) + cache_size * sizeof(void *);
>> +	pool = kzalloc_node(size, GFP_KERNEL, params->nid);
>
> You have now placed alloc_cache at the end of page_pool struct, this
> allows for dynamic changing the size, that is kind of nice.  Before I
> placed pp_alloc_cache in struct to make sure that we didn't have
> false-sharing.  Now, I'm unsure if false-sharing can happen?
> (depend on alignment from kzalloc_node).


>
>>  	if (!pool)
>>  		return ERR_PTR(-ENOMEM);
>>
>> -	err = page_pool_init(pool, params);
>> +	memcpy(&pool->p, params, sizeof(pool->p));
>> +	pool->p.cache_size = cache_size;
>> +
>> +	err = page_pool_init(pool);
>>  	if (err < 0) {
>>  		pr_warn("%s() gave up with errno %d\n", __func__, err);
>>  		kfree(pool);
>> @@ -87,9 +96,9 @@ static struct page *__page_pool_get_cached(struct 
>> page_pool *pool)
>>
>>  	/* Test for safe-context, caller should provide this guarantee */
>>  	if (likely(in_serving_softirq())) {
>> -		if (likely(pool->alloc.count)) {
>> +		if (likely(pool->alloc_count)) {
>>  			/* Fast-path */
>> -			page = pool->alloc.cache[--pool->alloc.count];
>> +			page = pool->alloc_cache[--pool->alloc_count];
>>  			return page;
>>  		}
>>  		refill = true;
>> @@ -105,8 +114,8 @@ static struct page *__page_pool_get_cached(struct 
>> page_pool *pool)
>>  	spin_lock(&r->consumer_lock);
>>  	page = __ptr_ring_consume(r);
>>  	if (refill)
>> -		pool->alloc.count = __ptr_ring_consume_batched(r,
>> -							pool->alloc.cache,
>> +		pool->alloc_count = __ptr_ring_consume_batched(r,
>> +							pool->alloc_cache,
>>  							PP_ALLOC_CACHE_REFILL);
>>  	spin_unlock(&r->consumer_lock);
>>  	return page;
>> @@ -276,11 +285,11 @@ static bool 
>> __page_pool_recycle_into_ring(struct page_pool *pool,
>>  static bool __page_pool_recycle_into_cache(struct page *page,
>>  					   struct page_pool *pool)
>>  {
>> -	if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE))
>> +	if (unlikely(pool->alloc_count == pool->p.cache_size))
>>  		return false;
>>
>>  	/* Caller MUST have verified/know (page_ref_count(page) == 1) */
>> -	pool->alloc.cache[pool->alloc.count++] = page;
>> +	pool->alloc_cache[pool->alloc_count++] = page;
>>  	return true;
>>  }
>>
>> @@ -365,7 +374,7 @@ void __page_pool_free(struct page_pool *pool)
>>  	if (!page_pool_put(pool))
>>  		return;
>>
>> -	WARN(pool->alloc.count, "API usage violation");
>> +	WARN(pool->alloc_count, "API usage violation");
>>  	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
>>
>>  	/* Can happen due to forced shutdown */
>> @@ -389,8 +398,8 @@ static void page_pool_flush(struct page_pool 
>> *pool)
>>  	 * no-longer in use, and page_pool_alloc_pages() cannot be
>>  	 * called concurrently.
>>  	 */
>> -	while (pool->alloc.count) {
>> -		page = pool->alloc.cache[--pool->alloc.count];
>> +	while (pool->alloc_count) {
>> +		page = pool->alloc_cache[--pool->alloc_count];
>>  		__page_pool_return_page(pool, page);
>>  	}
>>
>
>
>
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 09/10 net-next] net/mlx5: Add page_pool stats to the Mellanox driver
  2019-10-17 11:09   ` Jesper Dangaard Brouer
@ 2019-10-18 20:45     ` Jonathan Lemon
  0 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-18 20:45 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: ilias.apalodimas, saeedm, tariqt, netdev, kernel-team



On 17 Oct 2019, at 4:09, Jesper Dangaard Brouer wrote:

> On Wed, 16 Oct 2019 15:50:27 -0700
> Jonathan Lemon <jonathan.lemon@gmail.com> wrote:
>
>> Replace the now deprecated inernal cache stats with the page pool 
>> stats.
>
> I can see that the stats you introduced are useful, but they have to 
> be
> implemented in way that does not hurt performance.

They're not noticeable, but even if they were, they are needed
for production, otherwise there's no way to identify problems.

I can separate the ring consume/produce counters so they
are always separated by a cache line distance.



>> # ethtool -S eth0 | grep rx_pool
>>      rx_pool_cache_hit: 1646798
>>      rx_pool_cache_full: 0
>>      rx_pool_cache_empty: 15723566
>>      rx_pool_ring_produce: 474958
>>      rx_pool_ring_consume: 0
>>      rx_pool_ring_return: 474958
>>      rx_pool_flush: 144
>>      rx_pool_node_change: 0
>>
>> Showing about a 10% hit rate for the page pool.
>
> What is the workload from above stats?

Network traffic from a proxygen load balancer.  From
this, we see the ptr_ring is completely unused except
for flushing operations.
-- 
Jonathan

>
>
>> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
>> ---
>>  drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
>>  .../net/ethernet/mellanox/mlx5/core/en_main.c |  1 +
>>  .../ethernet/mellanox/mlx5/core/en_stats.c    | 39 
>> ++++++++++++-------
>>  .../ethernet/mellanox/mlx5/core/en_stats.h    | 19 +++++----
>>  4 files changed, 35 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h 
>> b/drivers/net/ethernet/mellanox/mlx5/core/en.h
>> index 2e281c755b65..b34519061d12 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
>> @@ -50,6 +50,7 @@
>>  #include <net/xdp.h>
>>  #include <linux/dim.h>
>>  #include <linux/bits.h>
>> +#include <net/page_pool.h>
>>  #include "wq.h"
>>  #include "mlx5_core.h"
>>  #include "en_stats.h"
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c 
>> b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> index 2b828de1adf0..f10b5838fb17 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> @@ -551,6 +551,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel 
>> *c,
>>  		pp_params.nid       = cpu_to_node(c->cpu);
>>  		pp_params.dev       = c->pdev;
>>  		pp_params.dma_dir   = rq->buff.map_dir;
>> +		pp_params.stats     = &rq->stats->pool;
>>
>>  		/* page_pool can be used even when there is no rq->xdp_prog,
>>  		 * given page_pool does not handle DMA mapping there is no
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c 
>> b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
>> index ac6fdcda7019..ad42d965d786 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
>> @@ -102,11 +102,14 @@ static const struct counter_desc 
>> sw_stats_desc[] = {
>>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
>>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) 
>> },
>>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) 
>> },
>> -	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
>> -	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
>> -	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
>> -	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
>> -	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
>> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_cache_hit) },
>> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_cache_full) },
>> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_cache_empty) },
>> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_ring_produce) 
>> },
>> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_ring_consume) 
>> },
>> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_ring_return) },
>> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_flush) },
>> +	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pool_node_change) },
>>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
>>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
>>  	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
>> @@ -214,11 +217,14 @@ static void mlx5e_grp_sw_update_stats(struct 
>> mlx5e_priv *priv)
>>  		s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
>>  		s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
>>  		s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
>> -		s->rx_cache_reuse += rq_stats->cache_reuse;
>> -		s->rx_cache_full  += rq_stats->cache_full;
>> -		s->rx_cache_empty += rq_stats->cache_empty;
>> -		s->rx_cache_busy  += rq_stats->cache_busy;
>> -		s->rx_cache_waive += rq_stats->cache_waive;
>> +		s->rx_pool_cache_hit += rq_stats->pool.cache_hit;
>> +		s->rx_pool_cache_full += rq_stats->pool.cache_full;
>> +		s->rx_pool_cache_empty += rq_stats->pool.cache_empty;
>> +		s->rx_pool_ring_produce += rq_stats->pool.ring_produce;
>> +		s->rx_pool_ring_consume += rq_stats->pool.ring_consume;
>> +		s->rx_pool_ring_return += rq_stats->pool.ring_return;
>> +		s->rx_pool_flush += rq_stats->pool.flush;
>> +		s->rx_pool_node_change += rq_stats->pool.node_change;
>>  		s->rx_congst_umr  += rq_stats->congst_umr;
>>  		s->rx_arfs_err    += rq_stats->arfs_err;
>>  		s->rx_recover     += rq_stats->recover;
>> @@ -1446,11 +1452,14 @@ static const struct counter_desc 
>> rq_stats_desc[] = {
>>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
>>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) 
>> },
>>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) 
>> },
>> -	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
>> -	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
>> -	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
>> -	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
>> -	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
>> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.cache_hit) },
>> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.cache_full) },
>> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.cache_empty) },
>> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.ring_produce) 
>> },
>> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.ring_consume) 
>> },
>> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.ring_return) },
>> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.flush) },
>> +	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pool.node_change) },
>>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
>>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
>>  	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h 
>> b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
>> index 79f261bf86ac..7d6001969400 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
>> @@ -109,11 +109,14 @@ struct mlx5e_sw_stats {
>>  	u64 rx_buff_alloc_err;
>>  	u64 rx_cqe_compress_blks;
>>  	u64 rx_cqe_compress_pkts;
>> -	u64 rx_cache_reuse;
>> -	u64 rx_cache_full;
>> -	u64 rx_cache_empty;
>> -	u64 rx_cache_busy;
>> -	u64 rx_cache_waive;
>> +	u64 rx_pool_cache_hit;
>> +	u64 rx_pool_cache_full;
>> +	u64 rx_pool_cache_empty;
>> +	u64 rx_pool_ring_produce;
>> +	u64 rx_pool_ring_consume;
>> +	u64 rx_pool_ring_return;
>> +	u64 rx_pool_flush;
>> +	u64 rx_pool_node_change;
>>  	u64 rx_congst_umr;
>>  	u64 rx_arfs_err;
>>  	u64 rx_recover;
>> @@ -245,14 +248,10 @@ struct mlx5e_rq_stats {
>>  	u64 buff_alloc_err;
>>  	u64 cqe_compress_blks;
>>  	u64 cqe_compress_pkts;
>> -	u64 cache_reuse;
>> -	u64 cache_full;
>> -	u64 cache_empty;
>> -	u64 cache_busy;
>> -	u64 cache_waive;
>>  	u64 congst_umr;
>>  	u64 arfs_err;
>>  	u64 recover;
>> +	struct page_pool_stats pool;
>>  };
>>
>>  struct mlx5e_sq_stats {
>
>
>
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 00/10 net-next] page_pool cleanups
  2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
                   ` (9 preceding siblings ...)
  2019-10-16 22:50 ` [PATCH 10/10 net-next] page_pool: Cleanup and rename page_pool functions Jonathan Lemon
@ 2019-10-18 20:50 ` Saeed Mahameed
  2019-10-18 21:21   ` Saeed Mahameed
  2019-10-18 23:32   ` Jonathan Lemon
  10 siblings, 2 replies; 34+ messages in thread
From: Saeed Mahameed @ 2019-10-18 20:50 UTC (permalink / raw)
  To: jonathan.lemon, ilias.apalodimas, Tariq Toukan, brouer
  Cc: kernel-team, netdev

On Wed, 2019-10-16 at 15:50 -0700, Jonathan Lemon wrote:
> This patch combines work from various people:
> - part of Tariq's work to move the DMA mapping from
>   the mlx5 driver into the page pool.  This does not
>   include later patches which remove the dma address
>   from the driver, as this conflicts with AF_XDP.
> 
> - Saeed's changes to check the numa node before
>   including the page in the pool, and flushing the
>   pool on a node change.
> 

Hi Jonathan, thanks for submitting this,
the patches you have are not up to date, i have new ones with tracing
support and some fixes from offlist review iterations, plus performance
numbers and a  cover letter. 

I will send it to you and you can post it as v2 ? 


> - Statistics and cleanup for page pool.
> 
> Jonathan Lemon (5):
>   page_pool: Add page_pool_keep_page
>   page_pool: allow configurable linear cache size
>   page_pool: Add statistics
>   net/mlx5: Add page_pool stats to the Mellanox driver
>   page_pool: Cleanup and rename page_pool functions.
> 
> Saeed Mahameed (2):
>   page_pool: Add API to update numa node and flush page caches
>   net/mlx5e: Rx, Update page pool numa node when changed
> 
> Tariq Toukan (3):
>   net/mlx5e: RX, Remove RX page-cache
>   net/mlx5e: RX, Manage RX pages only via page pool API
>   net/mlx5e: RX, Internal DMA mapping in page_pool
> 
>  drivers/net/ethernet/mellanox/mlx5/core/en.h  |  18 +-
>  .../net/ethernet/mellanox/mlx5/core/en/xdp.c  |  12 +-
>  .../net/ethernet/mellanox/mlx5/core/en_main.c |  19 +-
>  .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 128 ++--------
>  .../ethernet/mellanox/mlx5/core/en_stats.c    |  39 ++--
>  .../ethernet/mellanox/mlx5/core/en_stats.h    |  19 +-
>  include/net/page_pool.h                       | 216 +++++++++-------
> -
>  net/core/page_pool.c                          | 221 +++++++++++-----
> --
>  8 files changed, 319 insertions(+), 353 deletions(-)
> 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache
  2019-10-16 22:50 ` [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache Jonathan Lemon
  2019-10-17  1:30   ` Jakub Kicinski
@ 2019-10-18 20:53   ` Saeed Mahameed
  2019-10-19  0:10     ` Jonathan Lemon
  1 sibling, 1 reply; 34+ messages in thread
From: Saeed Mahameed @ 2019-10-18 20:53 UTC (permalink / raw)
  To: jonathan.lemon, ilias.apalodimas, Tariq Toukan, brouer
  Cc: kernel-team, netdev

On Wed, 2019-10-16 at 15:50 -0700, Jonathan Lemon wrote:
> From: Tariq Toukan <tariqt@mellanox.com>
> 
> Obsolete the RX page-cache layer, pages are now recycled
> in page_pool.
> 
> This patch introduce a temporary degradation as recycling
> does not keep the pages DMA-mapped. That is fixed in a
> downstream patch.
Tariq,

i think we need to have performance numbers here to show degradation.
i am sure that non XDP traffic TCP/UDP performance will be hit.

> 
> Issue: 1487631
> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> 
> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en.h  | 13 ----
>  .../net/ethernet/mellanox/mlx5/core/en_main.c | 16 -----
>  .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 67 ++---------------
> --
>  3 files changed, 4 insertions(+), 92 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> index 8d76452cacdc..0595cdcff594 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> @@ -583,18 +583,6 @@ struct mlx5e_mpw_info {
>  
>  #define MLX5E_MAX_RX_FRAGS 4
>  
> -/* a single cache unit is capable to serve one napi call (for non-
> striding rq)
> - * or a MPWQE (for striding rq).
> - */
> -#define MLX5E_CACHE_UNIT	(MLX5_MPWRQ_PAGES_PER_WQE >
> NAPI_POLL_WEIGHT ? \
> -				 MLX5_MPWRQ_PAGES_PER_WQE :
> NAPI_POLL_WEIGHT)
> -#define MLX5E_CACHE_SIZE	(4 *
> roundup_pow_of_two(MLX5E_CACHE_UNIT))
> -struct mlx5e_page_cache {
> -	u32 head;
> -	u32 tail;
> -	struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
> -};
> -
>  struct mlx5e_rq;
>  typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct
> mlx5_cqe64*);
>  typedef struct sk_buff *
> @@ -658,7 +646,6 @@ struct mlx5e_rq {
>  	struct mlx5e_rq_stats *stats;
>  	struct mlx5e_cq        cq;
>  	struct mlx5e_cq_decomp cqd;
> -	struct mlx5e_page_cache page_cache;
>  	struct hwtstamp_config *tstamp;
>  	struct mlx5_clock      *clock;
>  
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 7569287f8f3c..168be1f800a3 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -612,9 +612,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel
> *c,
>  		rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
>  	}
>  
> -	rq->page_cache.head = 0;
> -	rq->page_cache.tail = 0;
> -
>  	return 0;
>  
>  err_free:
> @@ -640,8 +637,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel
> *c,
>  
>  static void mlx5e_free_rq(struct mlx5e_rq *rq)
>  {
> -	int i;
> -
>  	if (rq->xdp_prog)
>  		bpf_prog_put(rq->xdp_prog);
>  
> @@ -655,17 +650,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
>  		mlx5e_free_di_list(rq);
>  	}
>  
> -	for (i = rq->page_cache.head; i != rq->page_cache.tail;
> -	     i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
> -		struct mlx5e_dma_info *dma_info = &rq-
> >page_cache.page_cache[i];
> -
> -		/* With AF_XDP, page_cache is not used, so this loop is
> not
> -		 * entered, and it's safe to call
> mlx5e_page_release_dynamic
> -		 * directly.
> -		 */
> -		mlx5e_page_release_dynamic(rq, dma_info, false);
> -	}
> -
>  	xdp_rxq_info_unreg(&rq->xdp_rxq);
>  	page_pool_destroy(rq->page_pool);
>  	mlx5_wq_destroy(&rq->wq_ctrl);
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> index d6a547238de0..a3773f8a4931 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> @@ -184,65 +184,9 @@ static inline u32
> mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
>  	return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
>  }
>  
> -static inline bool mlx5e_page_is_reserved(struct page *page)
> -{
> -	return page_is_pfmemalloc(page) || page_to_nid(page) !=
> numa_mem_id();
> -}
> -
> -static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
> -				      struct mlx5e_dma_info *dma_info)
> -{
> -	struct mlx5e_page_cache *cache = &rq->page_cache;
> -	u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
> -	struct mlx5e_rq_stats *stats = rq->stats;
> -
> -	if (tail_next == cache->head) {
> -		stats->cache_full++;
> -		return false;
> -	}
> -
> -	if (unlikely(mlx5e_page_is_reserved(dma_info->page))) {
> -		stats->cache_waive++;
> -		return false;
> -	}
> -
> -	cache->page_cache[cache->tail] = *dma_info;
> -	cache->tail = tail_next;
> -	return true;
> -}
> -
> -static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
> -				      struct mlx5e_dma_info *dma_info)
> -{
> -	struct mlx5e_page_cache *cache = &rq->page_cache;
> -	struct mlx5e_rq_stats *stats = rq->stats;
> -
> -	if (unlikely(cache->head == cache->tail)) {
> -		stats->cache_empty++;
> -		return false;
> -	}
> -
> -	if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
> -		stats->cache_busy++;
> -		return false;
> -	}
> -
> -	*dma_info = cache->page_cache[cache->head];
> -	cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
> -	stats->cache_reuse++;
> -
> -	dma_sync_single_for_device(rq->pdev, dma_info->addr,
> -				   PAGE_SIZE,
> -				   DMA_FROM_DEVICE);
> -	return true;
> -}
> -
>  static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq,
>  					struct mlx5e_dma_info
> *dma_info)
>  {
> -	if (mlx5e_rx_cache_get(rq, dma_info))
> -		return 0;
> -
>  	dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
>  	if (unlikely(!dma_info->page))
>  		return -ENOMEM;
> @@ -276,14 +220,11 @@ void mlx5e_page_release_dynamic(struct mlx5e_rq
> *rq,
>  				struct mlx5e_dma_info *dma_info,
>  				bool recycle)
>  {
> -	if (likely(recycle)) {
> -		if (mlx5e_rx_cache_put(rq, dma_info))
> -			return;
> +	mlx5e_page_dma_unmap(rq, dma_info);
>  
> -		mlx5e_page_dma_unmap(rq, dma_info);
> +	if (likely(recycle)) {
>  		page_pool_recycle_direct(rq->page_pool, dma_info-
> >page);
>  	} else {
> -		mlx5e_page_dma_unmap(rq, dma_info);
>  		page_pool_release_page(rq->page_pool, dma_info->page);
>  		put_page(dma_info->page);
>  	}
> @@ -1167,7 +1108,7 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq,
> struct mlx5_cqe64 *cqe)
>  	if (!skb) {
>  		/* probably for XDP */
>  		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq-
> >flags)) {
> -			/* do not return page to cache,
> +			/* do not return page to pool,
>  			 * it will be returned on XDP_TX completion.
>  			 */
>  			goto wq_cyc_pop;
> @@ -1210,7 +1151,7 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq
> *rq, struct mlx5_cqe64 *cqe)
>  	if (!skb) {
>  		/* probably for XDP */
>  		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq-
> >flags)) {
> -			/* do not return page to cache,
> +			/* do not return page to pool,
>  			 * it will be returned on XDP_TX completion.
>  			 */
>  			goto wq_cyc_pop;

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 03/10 net-next] net/mlx5e: RX, Internal DMA mapping in page_pool
  2019-10-16 22:50 ` [PATCH 03/10 net-next] net/mlx5e: RX, Internal DMA mapping in page_pool Jonathan Lemon
  2019-10-17  1:33   ` Jakub Kicinski
@ 2019-10-18 21:01   ` Saeed Mahameed
  1 sibling, 0 replies; 34+ messages in thread
From: Saeed Mahameed @ 2019-10-18 21:01 UTC (permalink / raw)
  To: jonathan.lemon, ilias.apalodimas, Tariq Toukan, brouer
  Cc: kernel-team, netdev

On Wed, 2019-10-16 at 15:50 -0700, Jonathan Lemon wrote:
> From: Tariq Toukan <tariqt@mellanox.com>
> 
> After RX page-cache is removed in previous patch, let the
> page_pool be responsible for the DMA mapping.
> 
> Issue: 1487631
> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> 
> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en.h     |  2 --
>  drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c |  3 +--
>  .../net/ethernet/mellanox/mlx5/core/en_main.c    |  2 +-
>  drivers/net/ethernet/mellanox/mlx5/core/en_rx.c  | 16 +-------------
> --
>  4 files changed, 3 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> index a1ab5c76177d..2e281c755b65 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
> @@ -926,10 +926,8 @@ bool
> mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
>  bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
>  				struct mlx5e_params *params);
>  
> -void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info
> *dma_info);
>  void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
>  				struct mlx5e_dma_info *dma_info);
> -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info
> *dma_info);
>  void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64
> *cqe);
>  void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct
> mlx5_cqe64 *cqe);
>  bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> index 1b26061cb959..8376b2789575 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> @@ -161,8 +161,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct
> mlx5e_dma_info *di,
>  			goto xdp_abort;
>  		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
>  		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
> -		if (!xsk)
> -			mlx5e_page_dma_unmap(rq, di);
> +		/* xdp maps call xdp_release_frame() if needed */
>  		rq->stats->xdp_redirect++;
>  		return true;
>  	default:
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index 168be1f800a3..2b828de1adf0 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -546,7 +546,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel
> *c,
>  	} else {
>  		/* Create a page_pool and register it with rxq */
>  		pp_params.order     = 0;
> -		pp_params.flags     = 0; /* No-internal DMA mapping in
> page_pool */
> +		pp_params.flags     = PP_FLAG_DMA_MAP;
>  		pp_params.pool_size = pool_size;
>  		pp_params.nid       = cpu_to_node(c->cpu);
>  		pp_params.dev       = c->pdev;
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> index 033b8264a4e4..1b74d03fdf06 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> @@ -190,14 +190,7 @@ static inline int mlx5e_page_alloc_pool(struct
> mlx5e_rq *rq,
>  	dma_info->page = page_pool_dev_alloc_pages(rq->page_pool);
>  	if (unlikely(!dma_info->page))
>  		return -ENOMEM;

Jakub is right, you need to dma_sync for device after allocating ..
pages recycled from cache can be synced for CPU .. you need to sync
them for device here.

> -
> -	dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
> -				      PAGE_SIZE, rq->buff.map_dir);
> -	if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
> -		page_pool_recycle_direct(rq->page_pool, dma_info-
> >page);
> -		dma_info->page = NULL;
> -		return -ENOMEM;
> -	}
> +	dma_info->addr = page_pool_get_dma_addr(dma_info->page);
>  
>  	return 0;
>  }
> @@ -211,16 +204,9 @@ static inline int mlx5e_page_alloc(struct
> mlx5e_rq *rq,
>  		return mlx5e_page_alloc_pool(rq, dma_info);
>  }
>  
> -void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info
> *dma_info)
> -{
> -	dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq-
> >buff.map_dir);
> -}
> -
>  void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
>  				struct mlx5e_dma_info *dma_info)
>  {
> -	mlx5e_page_dma_unmap(rq, dma_info);
> -
>  	page_pool_recycle_direct(rq->page_pool, dma_info->page);
>  }
>  

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 04/10 net-next] page_pool: Add API to update numa node and flush page caches
  2019-10-17 12:06   ` Ilias Apalodimas
@ 2019-10-18 21:07     ` Saeed Mahameed
  2019-10-18 23:38       ` Jonathan Lemon
  0 siblings, 1 reply; 34+ messages in thread
From: Saeed Mahameed @ 2019-10-18 21:07 UTC (permalink / raw)
  To: jonathan.lemon, ilias.apalodimas
  Cc: kernel-team, netdev, Tariq Toukan, brouer

On Thu, 2019-10-17 at 15:06 +0300, Ilias Apalodimas wrote:
> Hi Saeed,
> 
> On Wed, Oct 16, 2019 at 03:50:22PM -0700, Jonathan Lemon wrote:
> > From: Saeed Mahameed <saeedm@mellanox.com>
> > 
> > Add page_pool_update_nid() to be called from drivers when they
> > detect
> > numa node changes.
> > 
> > It will do:
> > 1) Flush the pool's page cache and ptr_ring.
> > 2) Update page pool nid value to start allocating from the new numa
> > node.
> > 
> > Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> > Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> > ---
> >  include/net/page_pool.h | 10 ++++++++++
> >  net/core/page_pool.c    | 16 +++++++++++-----
> >  2 files changed, 21 insertions(+), 5 deletions(-)
> > 
> > diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> > index 2cbcdbdec254..fb13cf6055ff 100644
> > --- a/include/net/page_pool.h
> > +++ b/include/net/page_pool.h
> > @@ -226,4 +226,14 @@ static inline bool page_pool_put(struct
> > page_pool *pool)
> >  	return refcount_dec_and_test(&pool->user_cnt);
> >  }
> >  
> > +/* Only safe from napi context or when user guarantees it is
> > thread safe */
> > +void __page_pool_flush(struct page_pool *pool);
> 
> This should be called per packet right? Any noticeable impact on
> performance?
> 
no, once per napi and only if a change in numa node is detected, so
very very rare !

> > +static inline void page_pool_update_nid(struct page_pool *pool,
> > int new_nid)
> > +{
> > +	if (unlikely(pool->p.nid != new_nid)) {
> > +		/* TODO: Add statistics/trace */
> > +		__page_pool_flush(pool);
> > +		pool->p.nid = new_nid;
> > +	}
> > +}
> >  #endif /* _NET_PAGE_POOL_H */
> > diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> > index 5bc65587f1c4..678cf85f273a 100644
> > --- a/net/core/page_pool.c
> > +++ b/net/core/page_pool.c
> > @@ -373,16 +373,13 @@ void __page_pool_free(struct page_pool *pool)
> >  }
> >  EXPORT_SYMBOL(__page_pool_free);
> >  
> > -/* Request to shutdown: release pages cached by page_pool, and
> > check
> > - * for in-flight pages
> > - */
> > -bool __page_pool_request_shutdown(struct page_pool *pool)
> > +void __page_pool_flush(struct page_pool *pool)
> >  {
> >  	struct page *page;
> >  
> >  	/* Empty alloc cache, assume caller made sure this is
> >  	 * no-longer in use, and page_pool_alloc_pages() cannot be
> > -	 * call concurrently.
> > +	 * called concurrently.
> >  	 */
> >  	while (pool->alloc.count) {
> >  		page = pool->alloc.cache[--pool->alloc.count];
> > @@ -393,6 +390,15 @@ bool __page_pool_request_shutdown(struct
> > page_pool *pool)
> >  	 * be in-flight.
> >  	 */
> >  	__page_pool_empty_ring(pool);
> > +}
> > +EXPORT_SYMBOL(__page_pool_flush);
> 
> A later patch removes this, do we actually need it here?

I agree, Jonathan changed the design of my last patch in this series
and this became redundant as he is going to do lazy release of unwanted
pages, rather than flushing the cache.

> 
> > +
> > +/* Request to shutdown: release pages cached by page_pool, and
> > check
> > + * for in-flight pages
> > + */
> > +bool __page_pool_request_shutdown(struct page_pool *pool)
> > +{
> > +	__page_pool_flush(pool);
> >  
> >  	return __page_pool_safe_to_destroy(pool);
> >  }
> > -- 
> > 2.17.1
> > 
> 
> Thanks
> /Ilias

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 00/10 net-next] page_pool cleanups
  2019-10-18 20:50 ` [PATCH 00/10 net-next] page_pool cleanups Saeed Mahameed
@ 2019-10-18 21:21   ` Saeed Mahameed
  2019-10-18 23:32   ` Jonathan Lemon
  1 sibling, 0 replies; 34+ messages in thread
From: Saeed Mahameed @ 2019-10-18 21:21 UTC (permalink / raw)
  To: jonathan.lemon, ilias.apalodimas, Tariq Toukan, brouer
  Cc: kernel-team, netdev

On Fri, 2019-10-18 at 13:50 -0700, Saeed Mahameed wrote:
> On Wed, 2019-10-16 at 15:50 -0700, Jonathan Lemon wrote:
> > This patch combines work from various people:
> > - part of Tariq's work to move the DMA mapping from
> >   the mlx5 driver into the page pool.  This does not
> >   include later patches which remove the dma address
> >   from the driver, as this conflicts with AF_XDP.
> > 
> > - Saeed's changes to check the numa node before
> >   including the page in the pool, and flushing the
> >   pool on a node change.
> > 
> 
> Hi Jonathan, thanks for submitting this,
> the patches you have are not up to date, i have new ones with tracing
> support and some fixes from offlist review iterations, plus
> performance
> numbers and a  cover letter. 
> 
> I will send it to you and you can post it as v2 ? 

actually i suggest to take my 3 patches out of this series and submit
them as standalone, they are not directly related to the other stuff
here, and can perfectly work without them, since my 3 patches are
addressing a real issue with page pool numa node migration.

> 
> 
> > - Statistics and cleanup for page pool.
> > 
> > Jonathan Lemon (5):
> >   page_pool: Add page_pool_keep_page
> >   page_pool: allow configurable linear cache size
> >   page_pool: Add statistics
> >   net/mlx5: Add page_pool stats to the Mellanox driver
> >   page_pool: Cleanup and rename page_pool functions.
> > 
> > Saeed Mahameed (2):
> >   page_pool: Add API to update numa node and flush page caches
> >   net/mlx5e: Rx, Update page pool numa node when changed
> > 
> > Tariq Toukan (3):
> >   net/mlx5e: RX, Remove RX page-cache
> >   net/mlx5e: RX, Manage RX pages only via page pool API
> >   net/mlx5e: RX, Internal DMA mapping in page_pool
> > 
> >  drivers/net/ethernet/mellanox/mlx5/core/en.h  |  18 +-
> >  .../net/ethernet/mellanox/mlx5/core/en/xdp.c  |  12 +-
> >  .../net/ethernet/mellanox/mlx5/core/en_main.c |  19 +-
> >  .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 128 ++--------
> >  .../ethernet/mellanox/mlx5/core/en_stats.c    |  39 ++--
> >  .../ethernet/mellanox/mlx5/core/en_stats.h    |  19 +-
> >  include/net/page_pool.h                       | 216 +++++++++-----
> > --
> > -
> >  net/core/page_pool.c                          | 221 +++++++++++---
> > --
> > --
> >  8 files changed, 319 insertions(+), 353 deletions(-)
> > 

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 08/10 net-next] page_pool: Add statistics
  2019-10-16 22:50 ` [PATCH 08/10 net-next] page_pool: Add statistics Jonathan Lemon
  2019-10-17  8:29   ` Jesper Dangaard Brouer
@ 2019-10-18 21:29   ` Saeed Mahameed
  2019-10-18 23:37     ` Jonathan Lemon
  1 sibling, 1 reply; 34+ messages in thread
From: Saeed Mahameed @ 2019-10-18 21:29 UTC (permalink / raw)
  To: jonathan.lemon, ilias.apalodimas, Tariq Toukan, brouer
  Cc: kernel-team, netdev

On Wed, 2019-10-16 at 15:50 -0700, Jonathan Lemon wrote:
> Add statistics to the page pool, providing visibility into its
> operation.
> 
> Callers can provide a location where the stats are stored, otherwise
> the page pool will allocate a statistic area.
> 
> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  include/net/page_pool.h | 21 +++++++++++++---
>  net/core/page_pool.c    | 55 +++++++++++++++++++++++++++++++++++--
> ----
>  2 files changed, 65 insertions(+), 11 deletions(-)
> 
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index fc340db42f9a..4f383522b141 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -34,8 +34,11 @@
>  #include <linux/ptr_ring.h>
>  #include <linux/dma-direction.h>
>  
> -#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap
> */
> -#define PP_FLAG_ALL	PP_FLAG_DMA_MAP
> +#define PP_FLAG_DMA_MAP		BIT(0) /* page_pool does the
> DMA map/unmap */
> +#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP)
> +
> +/* internal flags, not expoed to user */
> +#define PP_FLAG_INTERNAL_STATS	BIT(8)
>  
>  /*
>   * Fast allocation side cache array/stack
> @@ -57,6 +60,17 @@
>  #define PP_ALLOC_POOL_DEFAULT	1024
>  #define PP_ALLOC_POOL_LIMIT	32768
>  
> +struct page_pool_stats {
> +	u64 cache_hit;
> +	u64 cache_full;
> +	u64 cache_empty;
> +	u64 ring_produce;
> +	u64 ring_consume;
> +	u64 ring_return;
> +	u64 flush;
> +	u64 node_change;
> +};
> +
>  struct page_pool_params {
>  	unsigned int	flags;
>  	unsigned int	order;
> @@ -65,6 +79,7 @@ struct page_pool_params {
>  	int		nid;  /* Numa node id to allocate from pages
> from */
>  	enum dma_data_direction dma_dir; /* DMA mapping direction */
>  	struct device	*dev; /* device, for DMA pre-mapping purposes
> */
> +	struct page_pool_stats *stats; /* pool stats stored externally
> */
>  };
>  
>  struct page_pool {
> @@ -230,8 +245,8 @@ static inline bool page_pool_put(struct page_pool
> *pool)
>  static inline void page_pool_update_nid(struct page_pool *pool, int
> new_nid)
>  {
>  	if (unlikely(pool->p.nid != new_nid)) {
> -		/* TODO: Add statistics/trace */
>  		pool->p.nid = new_nid;
> +		pool->p.stats->node_change++;
>  	}
>  }
>  #endif /* _NET_PAGE_POOL_H */
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index f8fedecddb6f..ea6202813584 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -20,9 +20,10 @@
>  
>  static int page_pool_init(struct page_pool *pool)
>  {
> +	int size;
>  
>  	/* Validate only known flags were used */
> -	if (pool->p.flags & ~(PP_FLAG_ALL))
> +	if (pool->p.flags & ~PP_FLAG_ALL)
>  		return -EINVAL;
>  
>  	if (!pool->p.pool_size)
> @@ -40,8 +41,16 @@ static int page_pool_init(struct page_pool *pool)
>  	    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
>  		return -EINVAL;
>  
> +	if (!pool->p.stats) {
> +		size  = sizeof(struct page_pool_stats);
> +		pool->p.stats = kzalloc_node(size, GFP_KERNEL, pool-
> >p.nid);
> +		if (!pool->p.stats)
> +			return -ENOMEM;
> +		pool->p.flags |= PP_FLAG_INTERNAL_STATS;
> +	}
> +
>  	if (ptr_ring_init(&pool->ring, pool->p.pool_size, GFP_KERNEL) <
> 0)
> -		return -ENOMEM;
> +		goto fail;
>  
>  	atomic_set(&pool->pages_state_release_cnt, 0);
>  
> @@ -52,6 +61,12 @@ static int page_pool_init(struct page_pool *pool)
>  		get_device(pool->p.dev);
>  
>  	return 0;
> +
> +fail:
> +	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
> +		kfree(pool->p.stats);
> +
> +	return -ENOMEM;
>  }
>  
>  struct page_pool *page_pool_create(const struct page_pool_params
> *params)
> @@ -98,9 +113,11 @@ static struct page *__page_pool_get_cached(struct
> page_pool *pool)
>  	if (likely(in_serving_softirq())) {
>  		if (likely(pool->alloc_count)) {
>  			/* Fast-path */
> +			pool->p.stats->cache_hit++;
>  			page = pool->alloc_cache[--pool->alloc_count];
>  			return page;
>  		}
> +		pool->p.stats->cache_empty++;

this is problematic for 32bit SMP archs, you need to use
u64_stats_sync API. 
in mlx5 we only support 64bits, unlike page cache which must be
protected here.

>  		refill = true;
>  	}
>  
> @@ -113,10 +130,13 @@ static struct page
> *__page_pool_get_cached(struct page_pool *pool)
>  	 */
>  	spin_lock(&r->consumer_lock);
>  	page = __ptr_ring_consume(r);
> -	if (refill)
> +	if (refill) {
>  		pool->alloc_count = __ptr_ring_consume_batched(r,
>  							pool-
> >alloc_cache,
>  							PP_ALLOC_CACHE_
> REFILL);
> +		pool->p.stats->ring_consume += pool->alloc_count;
> +	}
> +	pool->p.stats->ring_consume += !!page;
>  	spin_unlock(&r->consumer_lock);
>  	return page;
>  }
> @@ -266,15 +286,23 @@ static void __page_pool_return_page(struct
> page_pool *pool, struct page *page)
>  static bool __page_pool_recycle_into_ring(struct page_pool *pool,
>  				   struct page *page)
>  {
> +	struct ptr_ring *r = &pool->ring;
>  	int ret;
>  
> -	/* BH protection not needed if current is serving softirq */
>  	if (in_serving_softirq())
> -		ret = ptr_ring_produce(&pool->ring, page);
> +		spin_lock(&r->producer_lock);
>  	else
> -		ret = ptr_ring_produce_bh(&pool->ring, page);
> +		spin_lock_bh(&r->producer_lock);
>  
> -	return (ret == 0) ? true : false;
> +	ret = __ptr_ring_produce(r, page);
> +	pool->p.stats->ring_produce++;
> +
> +	if (in_serving_softirq())
> +		spin_unlock(&r->producer_lock);
> +	else
> +		spin_unlock_bh(&r->producer_lock);
> +
> +	return ret == 0;
>  }
>  
>  /* Only allow direct recycling in special circumstances, into the
> @@ -285,8 +313,10 @@ static bool __page_pool_recycle_into_ring(struct
> page_pool *pool,
>  static bool __page_pool_recycle_into_cache(struct page *page,
>  					   struct page_pool *pool)
>  {
> -	if (unlikely(pool->alloc_count == pool->p.cache_size))
> +	if (unlikely(pool->alloc_count == pool->p.cache_size)) {
> +		pool->p.stats->cache_full++;
>  		return false;
> +	}
>  
>  	/* Caller MUST have verified/know (page_ref_count(page) == 1)
> */
>  	pool->alloc_cache[pool->alloc_count++] = page;
> @@ -343,6 +373,7 @@ EXPORT_SYMBOL(__page_pool_put_page);
>  static void __page_pool_empty_ring(struct page_pool *pool)
>  {
>  	struct page *page;
> +	int count = 0;
>  
>  	/* Empty recycle ring */
>  	while ((page = ptr_ring_consume_bh(&pool->ring))) {
> @@ -351,8 +382,11 @@ static void __page_pool_empty_ring(struct
> page_pool *pool)
>  			pr_crit("%s() page_pool refcnt %d violation\n",
>  				__func__, page_ref_count(page));
>  
> +		count++;
>  		__page_pool_return_page(pool, page);
>  	}
> +
> +	pool->p.stats->ring_return += count;
>  }
>  
>  static void __warn_in_flight(struct page_pool *pool)
> @@ -381,6 +415,9 @@ void __page_pool_free(struct page_pool *pool)
>  	if (!__page_pool_safe_to_destroy(pool))
>  		__warn_in_flight(pool);
>  
> +	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
> +		kfree(pool->p.stats);
> +
>  	ptr_ring_cleanup(&pool->ring, NULL);
>  
>  	if (pool->p.flags & PP_FLAG_DMA_MAP)
> @@ -394,6 +431,8 @@ static void page_pool_flush(struct page_pool
> *pool)
>  {
>  	struct page *page;
>  
> +	pool->p.stats->flush++;
> +
>  	/* Empty alloc cache, assume caller made sure this is
>  	 * no-longer in use, and page_pool_alloc_pages() cannot be
>  	 * called concurrently.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 00/10 net-next] page_pool cleanups
  2019-10-18 20:50 ` [PATCH 00/10 net-next] page_pool cleanups Saeed Mahameed
  2019-10-18 21:21   ` Saeed Mahameed
@ 2019-10-18 23:32   ` Jonathan Lemon
  2019-10-21 19:08     ` Saeed Mahameed
  1 sibling, 1 reply; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-18 23:32 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: ilias.apalodimas, Tariq Toukan, brouer, kernel-team, netdev



On 18 Oct 2019, at 13:50, Saeed Mahameed wrote:

> On Wed, 2019-10-16 at 15:50 -0700, Jonathan Lemon wrote:
>> This patch combines work from various people:
>> - part of Tariq's work to move the DMA mapping from
>>   the mlx5 driver into the page pool.  This does not
>>   include later patches which remove the dma address
>>   from the driver, as this conflicts with AF_XDP.
>>
>> - Saeed's changes to check the numa node before
>>   including the page in the pool, and flushing the
>>   pool on a node change.
>>
>
> Hi Jonathan, thanks for submitting this,
> the patches you have are not up to date, i have new ones with tracing
> support and some fixes from offlist review iterations, plus performance
> numbers and a  cover letter.
>
> I will send it to you and you can post it as v2 ?

Sure, I have some other cleanups to do and have a concern about
the cache effectiveness for some workloads.
-- 
Jonathan


>
>
>> - Statistics and cleanup for page pool.
>>
>> Jonathan Lemon (5):
>>   page_pool: Add page_pool_keep_page
>>   page_pool: allow configurable linear cache size
>>   page_pool: Add statistics
>>   net/mlx5: Add page_pool stats to the Mellanox driver
>>   page_pool: Cleanup and rename page_pool functions.
>>
>> Saeed Mahameed (2):
>>   page_pool: Add API to update numa node and flush page caches
>>   net/mlx5e: Rx, Update page pool numa node when changed
>>
>> Tariq Toukan (3):
>>   net/mlx5e: RX, Remove RX page-cache
>>   net/mlx5e: RX, Manage RX pages only via page pool API
>>   net/mlx5e: RX, Internal DMA mapping in page_pool
>>
>>  drivers/net/ethernet/mellanox/mlx5/core/en.h  |  18 +-
>>  .../net/ethernet/mellanox/mlx5/core/en/xdp.c  |  12 +-
>>  .../net/ethernet/mellanox/mlx5/core/en_main.c |  19 +-
>>  .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 128 ++--------
>>  .../ethernet/mellanox/mlx5/core/en_stats.c    |  39 ++--
>>  .../ethernet/mellanox/mlx5/core/en_stats.h    |  19 +-
>>  include/net/page_pool.h                       | 216 +++++++++-------
>> -
>>  net/core/page_pool.c                          | 221 +++++++++++-----
>> --
>>  8 files changed, 319 insertions(+), 353 deletions(-)
>>

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 08/10 net-next] page_pool: Add statistics
  2019-10-18 21:29   ` Saeed Mahameed
@ 2019-10-18 23:37     ` Jonathan Lemon
  0 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-18 23:37 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: ilias.apalodimas, Tariq Toukan, brouer, kernel-team, netdev



On 18 Oct 2019, at 14:29, Saeed Mahameed wrote:

> On Wed, 2019-10-16 at 15:50 -0700, Jonathan Lemon wrote:
>> Add statistics to the page pool, providing visibility into its
>> operation.
>>
>> Callers can provide a location where the stats are stored, otherwise
>> the page pool will allocate a statistic area.
>>
>> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
>> ---
>>  include/net/page_pool.h | 21 +++++++++++++---
>>  net/core/page_pool.c    | 55 +++++++++++++++++++++++++++++++++++--
>> ----
>>  2 files changed, 65 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
>> index fc340db42f9a..4f383522b141 100644
>> --- a/include/net/page_pool.h
>> +++ b/include/net/page_pool.h
>> @@ -34,8 +34,11 @@
>>  #include <linux/ptr_ring.h>
>>  #include <linux/dma-direction.h>
>>
>> -#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap
>> */
>> -#define PP_FLAG_ALL	PP_FLAG_DMA_MAP
>> +#define PP_FLAG_DMA_MAP		BIT(0) /* page_pool does the
>> DMA map/unmap */
>> +#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP)
>> +
>> +/* internal flags, not expoed to user */
>> +#define PP_FLAG_INTERNAL_STATS	BIT(8)
>>
>>  /*
>>   * Fast allocation side cache array/stack
>> @@ -57,6 +60,17 @@
>>  #define PP_ALLOC_POOL_DEFAULT	1024
>>  #define PP_ALLOC_POOL_LIMIT	32768
>>
>> +struct page_pool_stats {
>> +	u64 cache_hit;
>> +	u64 cache_full;
>> +	u64 cache_empty;
>> +	u64 ring_produce;
>> +	u64 ring_consume;
>> +	u64 ring_return;
>> +	u64 flush;
>> +	u64 node_change;
>> +};
>> +
>>  struct page_pool_params {
>>  	unsigned int	flags;
>>  	unsigned int	order;
>> @@ -65,6 +79,7 @@ struct page_pool_params {
>>  	int		nid;  /* Numa node id to allocate from pages
>> from */
>>  	enum dma_data_direction dma_dir; /* DMA mapping direction */
>>  	struct device	*dev; /* device, for DMA pre-mapping purposes
>> */
>> +	struct page_pool_stats *stats; /* pool stats stored externally
>> */
>>  };
>>
>>  struct page_pool {
>> @@ -230,8 +245,8 @@ static inline bool page_pool_put(struct page_pool
>> *pool)
>>  static inline void page_pool_update_nid(struct page_pool *pool, int
>> new_nid)
>>  {
>>  	if (unlikely(pool->p.nid != new_nid)) {
>> -		/* TODO: Add statistics/trace */
>>  		pool->p.nid = new_nid;
>> +		pool->p.stats->node_change++;
>>  	}
>>  }
>>  #endif /* _NET_PAGE_POOL_H */
>> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>> index f8fedecddb6f..ea6202813584 100644
>> --- a/net/core/page_pool.c
>> +++ b/net/core/page_pool.c
>> @@ -20,9 +20,10 @@
>>
>>  static int page_pool_init(struct page_pool *pool)
>>  {
>> +	int size;
>>
>>  	/* Validate only known flags were used */
>> -	if (pool->p.flags & ~(PP_FLAG_ALL))
>> +	if (pool->p.flags & ~PP_FLAG_ALL)
>>  		return -EINVAL;
>>
>>  	if (!pool->p.pool_size)
>> @@ -40,8 +41,16 @@ static int page_pool_init(struct page_pool *pool)
>>  	    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
>>  		return -EINVAL;
>>
>> +	if (!pool->p.stats) {
>> +		size  = sizeof(struct page_pool_stats);
>> +		pool->p.stats = kzalloc_node(size, GFP_KERNEL, pool-
>>> p.nid);
>> +		if (!pool->p.stats)
>> +			return -ENOMEM;
>> +		pool->p.flags |= PP_FLAG_INTERNAL_STATS;
>> +	}
>> +
>>  	if (ptr_ring_init(&pool->ring, pool->p.pool_size, GFP_KERNEL) <
>> 0)
>> -		return -ENOMEM;
>> +		goto fail;
>>
>>  	atomic_set(&pool->pages_state_release_cnt, 0);
>>
>> @@ -52,6 +61,12 @@ static int page_pool_init(struct page_pool *pool)
>>  		get_device(pool->p.dev);
>>
>>  	return 0;
>> +
>> +fail:
>> +	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
>> +		kfree(pool->p.stats);
>> +
>> +	return -ENOMEM;
>>  }
>>
>>  struct page_pool *page_pool_create(const struct page_pool_params
>> *params)
>> @@ -98,9 +113,11 @@ static struct page *__page_pool_get_cached(struct
>> page_pool *pool)
>>  	if (likely(in_serving_softirq())) {
>>  		if (likely(pool->alloc_count)) {
>>  			/* Fast-path */
>> +			pool->p.stats->cache_hit++;
>>  			page = pool->alloc_cache[--pool->alloc_count];
>>  			return page;
>>  		}
>> +		pool->p.stats->cache_empty++;
>
> this is problematic for 32bit SMP archs, you need to use
> u64_stats_sync API.
> in mlx5 we only support 64bits, unlike page cache which must be
> protected here.

Oooh, hmm.

I think Apple had the right idea and discarded 32-bits
along with the iPhone 5.

Tempted to just make stats a NOP on 32-bit machines.
-- 
Jonathan


>
>>  		refill = true;
>>  	}
>>
>> @@ -113,10 +130,13 @@ static struct page
>> *__page_pool_get_cached(struct page_pool *pool)
>>  	 */
>>  	spin_lock(&r->consumer_lock);
>>  	page = __ptr_ring_consume(r);
>> -	if (refill)
>> +	if (refill) {
>>  		pool->alloc_count = __ptr_ring_consume_batched(r,
>>  							pool-
>>> alloc_cache,
>>  							PP_ALLOC_CACHE_
>> REFILL);
>> +		pool->p.stats->ring_consume += pool->alloc_count;
>> +	}
>> +	pool->p.stats->ring_consume += !!page;
>>  	spin_unlock(&r->consumer_lock);
>>  	return page;
>>  }
>> @@ -266,15 +286,23 @@ static void __page_pool_return_page(struct
>> page_pool *pool, struct page *page)
>>  static bool __page_pool_recycle_into_ring(struct page_pool *pool,
>>  				   struct page *page)
>>  {
>> +	struct ptr_ring *r = &pool->ring;
>>  	int ret;
>>
>> -	/* BH protection not needed if current is serving softirq */
>>  	if (in_serving_softirq())
>> -		ret = ptr_ring_produce(&pool->ring, page);
>> +		spin_lock(&r->producer_lock);
>>  	else
>> -		ret = ptr_ring_produce_bh(&pool->ring, page);
>> +		spin_lock_bh(&r->producer_lock);
>>
>> -	return (ret == 0) ? true : false;
>> +	ret = __ptr_ring_produce(r, page);
>> +	pool->p.stats->ring_produce++;
>> +
>> +	if (in_serving_softirq())
>> +		spin_unlock(&r->producer_lock);
>> +	else
>> +		spin_unlock_bh(&r->producer_lock);
>> +
>> +	return ret == 0;
>>  }
>>
>>  /* Only allow direct recycling in special circumstances, into the
>> @@ -285,8 +313,10 @@ static bool __page_pool_recycle_into_ring(struct
>> page_pool *pool,
>>  static bool __page_pool_recycle_into_cache(struct page *page,
>>  					   struct page_pool *pool)
>>  {
>> -	if (unlikely(pool->alloc_count == pool->p.cache_size))
>> +	if (unlikely(pool->alloc_count == pool->p.cache_size)) {
>> +		pool->p.stats->cache_full++;
>>  		return false;
>> +	}
>>
>>  	/* Caller MUST have verified/know (page_ref_count(page) == 1)
>> */
>>  	pool->alloc_cache[pool->alloc_count++] = page;
>> @@ -343,6 +373,7 @@ EXPORT_SYMBOL(__page_pool_put_page);
>>  static void __page_pool_empty_ring(struct page_pool *pool)
>>  {
>>  	struct page *page;
>> +	int count = 0;
>>
>>  	/* Empty recycle ring */
>>  	while ((page = ptr_ring_consume_bh(&pool->ring))) {
>> @@ -351,8 +382,11 @@ static void __page_pool_empty_ring(struct
>> page_pool *pool)
>>  			pr_crit("%s() page_pool refcnt %d violation\n",
>>  				__func__, page_ref_count(page));
>>
>> +		count++;
>>  		__page_pool_return_page(pool, page);
>>  	}
>> +
>> +	pool->p.stats->ring_return += count;
>>  }
>>
>>  static void __warn_in_flight(struct page_pool *pool)
>> @@ -381,6 +415,9 @@ void __page_pool_free(struct page_pool *pool)
>>  	if (!__page_pool_safe_to_destroy(pool))
>>  		__warn_in_flight(pool);
>>
>> +	if (pool->p.flags & PP_FLAG_INTERNAL_STATS)
>> +		kfree(pool->p.stats);
>> +
>>  	ptr_ring_cleanup(&pool->ring, NULL);
>>
>>  	if (pool->p.flags & PP_FLAG_DMA_MAP)
>> @@ -394,6 +431,8 @@ static void page_pool_flush(struct page_pool
>> *pool)
>>  {
>>  	struct page *page;
>>
>> +	pool->p.stats->flush++;
>> +
>>  	/* Empty alloc cache, assume caller made sure this is
>>  	 * no-longer in use, and page_pool_alloc_pages() cannot be
>>  	 * called concurrently.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 04/10 net-next] page_pool: Add API to update numa node and flush page caches
  2019-10-18 21:07     ` Saeed Mahameed
@ 2019-10-18 23:38       ` Jonathan Lemon
  0 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-18 23:38 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: ilias.apalodimas, kernel-team, netdev, Tariq Toukan, brouer



On 18 Oct 2019, at 14:07, Saeed Mahameed wrote:

> On Thu, 2019-10-17 at 15:06 +0300, Ilias Apalodimas wrote:
>> Hi Saeed,
>>
>> On Wed, Oct 16, 2019 at 03:50:22PM -0700, Jonathan Lemon wrote:
>>> From: Saeed Mahameed <saeedm@mellanox.com>
>>>
>>> Add page_pool_update_nid() to be called from drivers when they
>>> detect
>>> numa node changes.
>>>
>>> It will do:
>>> 1) Flush the pool's page cache and ptr_ring.
>>> 2) Update page pool nid value to start allocating from the new numa
>>> node.
>>>
>>> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>>> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
>>> ---
>>>  include/net/page_pool.h | 10 ++++++++++
>>>  net/core/page_pool.c    | 16 +++++++++++-----
>>>  2 files changed, 21 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
>>> index 2cbcdbdec254..fb13cf6055ff 100644
>>> --- a/include/net/page_pool.h
>>> +++ b/include/net/page_pool.h
>>> @@ -226,4 +226,14 @@ static inline bool page_pool_put(struct
>>> page_pool *pool)
>>>  	return refcount_dec_and_test(&pool->user_cnt);
>>>  }
>>>
>>> +/* Only safe from napi context or when user guarantees it is
>>> thread safe */
>>> +void __page_pool_flush(struct page_pool *pool);
>>
>> This should be called per packet right? Any noticeable impact on
>> performance?
>>
> no, once per napi and only if a change in numa node is detected, so
> very very rare !
>
>>> +static inline void page_pool_update_nid(struct page_pool *pool,
>>> int new_nid)
>>> +{
>>> +	if (unlikely(pool->p.nid != new_nid)) {
>>> +		/* TODO: Add statistics/trace */
>>> +		__page_pool_flush(pool);
>>> +		pool->p.nid = new_nid;
>>> +	}
>>> +}
>>>  #endif /* _NET_PAGE_POOL_H */
>>> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>>> index 5bc65587f1c4..678cf85f273a 100644
>>> --- a/net/core/page_pool.c
>>> +++ b/net/core/page_pool.c
>>> @@ -373,16 +373,13 @@ void __page_pool_free(struct page_pool *pool)
>>>  }
>>>  EXPORT_SYMBOL(__page_pool_free);
>>>
>>> -/* Request to shutdown: release pages cached by page_pool, and
>>> check
>>> - * for in-flight pages
>>> - */
>>> -bool __page_pool_request_shutdown(struct page_pool *pool)
>>> +void __page_pool_flush(struct page_pool *pool)
>>>  {
>>>  	struct page *page;
>>>
>>>  	/* Empty alloc cache, assume caller made sure this is
>>>  	 * no-longer in use, and page_pool_alloc_pages() cannot be
>>> -	 * call concurrently.
>>> +	 * called concurrently.
>>>  	 */
>>>  	while (pool->alloc.count) {
>>>  		page = pool->alloc.cache[--pool->alloc.count];
>>> @@ -393,6 +390,15 @@ bool __page_pool_request_shutdown(struct
>>> page_pool *pool)
>>>  	 * be in-flight.
>>>  	 */
>>>  	__page_pool_empty_ring(pool);
>>> +}
>>> +EXPORT_SYMBOL(__page_pool_flush);
>>
>> A later patch removes this, do we actually need it here?
>
> I agree, Jonathan changed the design of my last patch in this series
> and this became redundant as he is going to do lazy release of unwanted
> pages, rather than flushing the cache.

Yeah - I didn't want to take the latency hit when the node changed,
and would prefer to just amortize the cost over time.
-- 
Jonathan


>
>>
>>> +
>>> +/* Request to shutdown: release pages cached by page_pool, and
>>> check
>>> + * for in-flight pages
>>> + */
>>> +bool __page_pool_request_shutdown(struct page_pool *pool)
>>> +{
>>> +	__page_pool_flush(pool);
>>>
>>>  	return __page_pool_safe_to_destroy(pool);
>>>  }
>>> -- 
>>> 2.17.1
>>>
>>
>> Thanks
>> /Ilias

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache
  2019-10-18 20:53   ` Saeed Mahameed
@ 2019-10-19  0:10     ` Jonathan Lemon
  2019-10-20  7:29       ` Tariq Toukan
  0 siblings, 1 reply; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-19  0:10 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: ilias.apalodimas, Tariq Toukan, brouer, Netdev, kernel-team

I was running the updated patches on machines with various workloads, and
have a bunch of different results.

For the following numbers,
  Effective = hit / (hit + empty + stall) * 100

In other words, show the hit rate for for every trip to the cache,
and the cache full stat is ignored.

On a webserver:

[web] # ./eff
('rx_pool_cache_hit:', '360127643')
('rx_pool_cache_full:', '0')
('rx_pool_cache_empty:', '6455735977')
('rx_pool_ring_produce:', '474958')
('rx_pool_ring_consume:', '0')
('rx_pool_ring_return:', '474958')
('rx_pool_flush:', '144')
('rx_pool_node_change:', '0')
cache effectiveness:  5.28

On a proxygen:
# ethtool -S eth0 | grep rx_pool
     rx_pool_cache_hit: 1646798
     rx_pool_cache_full: 0
     rx_pool_cache_empty: 15723566
     rx_pool_ring_produce: 474958
     rx_pool_ring_consume: 0
     rx_pool_ring_return: 474958
     rx_pool_flush: 144
     rx_pool_node_change: 0
cache effectiveness:  9.48

On both of these, only pages with refcount = 1 are being kept.


I changed things around in the page pool so:

1) the cache behaves like a ring instead of a stack, this
   sacrifices temporal locality.

2) it caches all pages returned regardless of refcount, but
   only returns pages with refcount=1.

This is the same behavior as the mlx5 cache.  Some gains
would come about if the sojourn time though the cache is
greater than the lifetime of the page usage by the networking
stack, as it provides a fixed working set of mapped pages.

On the web server, this is a net loss:
[web] # ./eff
('rx_pool_cache_hit:', '6052662')
('rx_pool_cache_full:', '156355415')
('rx_pool_cache_empty:', '409600')
('rx_pool_cache_stall:', '302787473')
('rx_pool_ring_produce:', '156633847')
('rx_pool_ring_consume:', '9925520')
('rx_pool_ring_return:', '278788')
('rx_pool_flush:', '96')
('rx_pool_node_change:', '0')
cache effectiveness:  1.95720846778

For proxygen on the other hand, it's a win:
[proxy] # ./eff
('rx_pool_cache_hit:', '69235177')
('rx_pool_cache_full:', '35404387')
('rx_pool_cache_empty:', '460800')
('rx_pool_cache_stall:', '42932530')
('rx_pool_ring_produce:', '35717618')
('rx_pool_ring_consume:', '27879469')
('rx_pool_ring_return:', '404800')
('rx_pool_flush:', '108')
('rx_pool_node_change:', '0')
cache effectiveness:  61.4721608624

So the correct behavior isn't quite clear cut here - caching a
working set of mapped pages is beneficial in spite of the HOL
blocking stalls for some workloads, but I'm sure that it wouldn't
be too difficult to exceed the WS size.

Thoughts?
-- 
Jonathan


^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache
  2019-10-19  0:10     ` Jonathan Lemon
@ 2019-10-20  7:29       ` Tariq Toukan
  0 siblings, 0 replies; 34+ messages in thread
From: Tariq Toukan @ 2019-10-20  7:29 UTC (permalink / raw)
  To: Jonathan Lemon, Saeed Mahameed
  Cc: ilias.apalodimas, Tariq Toukan, brouer, Netdev, kernel-team



On 10/19/2019 3:10 AM, Jonathan Lemon wrote:
> I was running the updated patches on machines with various workloads, and
> have a bunch of different results.
> 
> For the following numbers,
>    Effective = hit / (hit + empty + stall) * 100
> 
> In other words, show the hit rate for for every trip to the cache,
> and the cache full stat is ignored.
> 
> On a webserver:
> 
> [web] # ./eff
> ('rx_pool_cache_hit:', '360127643')
> ('rx_pool_cache_full:', '0')
> ('rx_pool_cache_empty:', '6455735977')
> ('rx_pool_ring_produce:', '474958')
> ('rx_pool_ring_consume:', '0')
> ('rx_pool_ring_return:', '474958')
> ('rx_pool_flush:', '144')
> ('rx_pool_node_change:', '0')
> cache effectiveness:  5.28
> 
> On a proxygen:
> # ethtool -S eth0 | grep rx_pool
>       rx_pool_cache_hit: 1646798
>       rx_pool_cache_full: 0
>       rx_pool_cache_empty: 15723566
>       rx_pool_ring_produce: 474958
>       rx_pool_ring_consume: 0
>       rx_pool_ring_return: 474958
>       rx_pool_flush: 144
>       rx_pool_node_change: 0
> cache effectiveness:  9.48
> 
> On both of these, only pages with refcount = 1 are being kept.
> 
> 
> I changed things around in the page pool so:
> 
> 1) the cache behaves like a ring instead of a stack, this
>     sacrifices temporal locality.
> 
> 2) it caches all pages returned regardless of refcount, but
>     only returns pages with refcount=1.
> 
> This is the same behavior as the mlx5 cache.  Some gains
> would come about if the sojourn time though the cache is
> greater than the lifetime of the page usage by the networking
> stack, as it provides a fixed working set of mapped pages.
> 
> On the web server, this is a net loss:
> [web] # ./eff
> ('rx_pool_cache_hit:', '6052662')
> ('rx_pool_cache_full:', '156355415')
> ('rx_pool_cache_empty:', '409600')
> ('rx_pool_cache_stall:', '302787473')
> ('rx_pool_ring_produce:', '156633847')
> ('rx_pool_ring_consume:', '9925520')
> ('rx_pool_ring_return:', '278788')
> ('rx_pool_flush:', '96')
> ('rx_pool_node_change:', '0')
> cache effectiveness:  1.95720846778
> 
> For proxygen on the other hand, it's a win:
> [proxy] # ./eff
> ('rx_pool_cache_hit:', '69235177')
> ('rx_pool_cache_full:', '35404387')
> ('rx_pool_cache_empty:', '460800')
> ('rx_pool_cache_stall:', '42932530')
> ('rx_pool_ring_produce:', '35717618')
> ('rx_pool_ring_consume:', '27879469')
> ('rx_pool_ring_return:', '404800')
> ('rx_pool_flush:', '108')
> ('rx_pool_node_change:', '0')
> cache effectiveness:  61.4721608624
> 
> So the correct behavior isn't quite clear cut here - caching a
> working set of mapped pages is beneficial in spite of the HOL
> blocking stalls for some workloads, but I'm sure that it wouldn't
> be too difficult to exceed the WS size.
> 
> Thoughts?
> 

We have a WIP in which we avoid the HOL block, by having pages returned 
to the available-queue only when their refcnt reaches back to 1. This 
requires catching this case in the page/skb release path.

See:
https://github.com/xdp-project/xdp-project/tree/master/areas/mem

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 00/10 net-next] page_pool cleanups
  2019-10-18 23:32   ` Jonathan Lemon
@ 2019-10-21 19:08     ` Saeed Mahameed
  2019-10-21 21:45       ` Jonathan Lemon
  0 siblings, 1 reply; 34+ messages in thread
From: Saeed Mahameed @ 2019-10-21 19:08 UTC (permalink / raw)
  To: jonathan.lemon
  Cc: kernel-team, ilias.apalodimas, Tariq Toukan, brouer, netdev

On Fri, 2019-10-18 at 16:32 -0700, Jonathan Lemon wrote:
> 
> On 18 Oct 2019, at 13:50, Saeed Mahameed wrote:
> 
> > On Wed, 2019-10-16 at 15:50 -0700, Jonathan Lemon wrote:
> > > This patch combines work from various people:
> > > - part of Tariq's work to move the DMA mapping from
> > >   the mlx5 driver into the page pool.  This does not
> > >   include later patches which remove the dma address
> > >   from the driver, as this conflicts with AF_XDP.
> > > 
> > > - Saeed's changes to check the numa node before
> > >   including the page in the pool, and flushing the
> > >   pool on a node change.
> > > 
> > 
> > Hi Jonathan, thanks for submitting this,
> > the patches you have are not up to date, i have new ones with
> > tracing
> > support and some fixes from offlist review iterations, plus
> > performance
> > numbers and a  cover letter.
> > 
> > I will send it to you and you can post it as v2 ?
> 
> Sure, I have some other cleanups to do and have a concern about
> the cache effectiveness for some workloads.

Ok then, I will submit the page pool NUMA change patches separately.
I will remove the flush mechanism and will add your changes.

for the other patches, mlx5 cache and page pool statistics, i think
they need some more work and a lot of pieces are still WIP. I don't
want to block the NUMA change API patches.

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 00/10 net-next] page_pool cleanups
  2019-10-21 19:08     ` Saeed Mahameed
@ 2019-10-21 21:45       ` Jonathan Lemon
  0 siblings, 0 replies; 34+ messages in thread
From: Jonathan Lemon @ 2019-10-21 21:45 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: kernel-team, ilias.apalodimas, Tariq Toukan, brouer, netdev



On 21 Oct 2019, at 12:08, Saeed Mahameed wrote:

> On Fri, 2019-10-18 at 16:32 -0700, Jonathan Lemon wrote:
>>
>> On 18 Oct 2019, at 13:50, Saeed Mahameed wrote:
>>
>>> On Wed, 2019-10-16 at 15:50 -0700, Jonathan Lemon wrote:
>>>> This patch combines work from various people:
>>>> - part of Tariq's work to move the DMA mapping from
>>>>   the mlx5 driver into the page pool.  This does not
>>>>   include later patches which remove the dma address
>>>>   from the driver, as this conflicts with AF_XDP.
>>>>
>>>> - Saeed's changes to check the numa node before
>>>>   including the page in the pool, and flushing the
>>>>   pool on a node change.
>>>>
>>>
>>> Hi Jonathan, thanks for submitting this,
>>> the patches you have are not up to date, i have new ones with
>>> tracing
>>> support and some fixes from offlist review iterations, plus
>>> performance
>>> numbers and a  cover letter.
>>>
>>> I will send it to you and you can post it as v2 ?
>>
>> Sure, I have some other cleanups to do and have a concern about
>> the cache effectiveness for some workloads.
>
> Ok then, I will submit the page pool NUMA change patches separately.
> I will remove the flush mechanism and will add your changes.
>
> for the other patches, mlx5 cache and page pool statistics, i think
> they need some more work and a lot of pieces are still WIP. I don't
> want to block the NUMA change API patches.

Sounds good - the stats are only really needed once the mlx5 private
cache goes away, and it doesn't look like that will happen immediately.

The private cache and the page pool are performing two different functions
at the moment.
-- 
Jonathan

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2019-10-21 21:45 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-16 22:50 [PATCH 00/10 net-next] page_pool cleanups Jonathan Lemon
2019-10-16 22:50 ` [PATCH 01/10 net-next] net/mlx5e: RX, Remove RX page-cache Jonathan Lemon
2019-10-17  1:30   ` Jakub Kicinski
2019-10-18 20:03     ` Jonathan Lemon
2019-10-18 20:53   ` Saeed Mahameed
2019-10-19  0:10     ` Jonathan Lemon
2019-10-20  7:29       ` Tariq Toukan
2019-10-16 22:50 ` [PATCH 02/10 net-next] net/mlx5e: RX, Manage RX pages only via page pool API Jonathan Lemon
2019-10-16 22:50 ` [PATCH 03/10 net-next] net/mlx5e: RX, Internal DMA mapping in page_pool Jonathan Lemon
2019-10-17  1:33   ` Jakub Kicinski
2019-10-18 20:04     ` Jonathan Lemon
2019-10-18 21:01   ` Saeed Mahameed
2019-10-16 22:50 ` [PATCH 04/10 net-next] page_pool: Add API to update numa node and flush page caches Jonathan Lemon
2019-10-17 12:06   ` Ilias Apalodimas
2019-10-18 21:07     ` Saeed Mahameed
2019-10-18 23:38       ` Jonathan Lemon
2019-10-16 22:50 ` [PATCH 05/10 net-next] net/mlx5e: Rx, Update page pool numa node when changed Jonathan Lemon
2019-10-16 22:50 ` [PATCH 06/10 net-next] page_pool: Add page_pool_keep_page Jonathan Lemon
2019-10-16 22:50 ` [PATCH 07/10 net-next] page_pool: allow configurable linear cache size Jonathan Lemon
2019-10-17  8:51   ` Jesper Dangaard Brouer
2019-10-18 20:31     ` Jonathan Lemon
2019-10-16 22:50 ` [PATCH 08/10 net-next] page_pool: Add statistics Jonathan Lemon
2019-10-17  8:29   ` Jesper Dangaard Brouer
2019-10-18 21:29   ` Saeed Mahameed
2019-10-18 23:37     ` Jonathan Lemon
2019-10-16 22:50 ` [PATCH 09/10 net-next] net/mlx5: Add page_pool stats to the Mellanox driver Jonathan Lemon
2019-10-17 11:09   ` Jesper Dangaard Brouer
2019-10-18 20:45     ` Jonathan Lemon
2019-10-16 22:50 ` [PATCH 10/10 net-next] page_pool: Cleanup and rename page_pool functions Jonathan Lemon
2019-10-18 20:50 ` [PATCH 00/10 net-next] page_pool cleanups Saeed Mahameed
2019-10-18 21:21   ` Saeed Mahameed
2019-10-18 23:32   ` Jonathan Lemon
2019-10-21 19:08     ` Saeed Mahameed
2019-10-21 21:45       ` Jonathan Lemon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.