netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 0/1] Change page_pool timeout handling
@ 2019-11-11  6:20 Jonathan Lemon
  2019-11-11  6:20 ` [RFC PATCH 1/1] page_pool: do not release pool until inflight == 0 Jonathan Lemon
  0 siblings, 1 reply; 7+ messages in thread
From: Jonathan Lemon @ 2019-11-11  6:20 UTC (permalink / raw)
  To: netdev, brouer, ilias.apalodimas; +Cc: kernel-team

It isn't safe to remove the page pool until all in-flight pages 
are returned.  If the pages are handed up the stack, it might be
a while before they are returned.

The page pool can also be used independently of xdp, so it shouldn't
be depending on timeout handling from xdp.  Change things around so
the pool handles its own timeout.

I wanted to send this out to clarify how I see things working.  This
is currently being tested with the mlx4/mlx5 drivers for attaching 
DMA-mapped pages to skbs, sending them up the stack, and then putting
them back in the page pool afterwards.

Jonathan Lemon (1):
  page_pool: do not release pool until inflight == 0.

 .../net/ethernet/stmicro/stmmac/stmmac_main.c |   4 +-
 include/net/page_pool.h                       |  55 +++-----
 include/net/xdp_priv.h                        |   4 -
 include/trace/events/xdp.h                    |  19 +--
 net/core/page_pool.c                          | 115 ++++++++++------
 net/core/xdp.c                                | 130 +++++++-----------
 6 files changed, 138 insertions(+), 189 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [RFC PATCH 1/1] page_pool: do not release pool until inflight == 0.
  2019-11-11  6:20 [RFC PATCH 0/1] Change page_pool timeout handling Jonathan Lemon
@ 2019-11-11  6:20 ` Jonathan Lemon
  2019-11-11  9:21   ` Jesper Dangaard Brouer
  2019-11-11 11:47   ` Jesper Dangaard Brouer
  0 siblings, 2 replies; 7+ messages in thread
From: Jonathan Lemon @ 2019-11-11  6:20 UTC (permalink / raw)
  To: netdev, brouer, ilias.apalodimas; +Cc: kernel-team

The page pool keeps track of the number of pages in flight, and
it isn't safe to remove the pool until all pages are returned.

Disallow removing the pool until all pages are back, so the pool
is always available for page producers.

Make the page pool responsible for its own delayed destruction
instead of relying on XDP, so the page pool can be used without
xdp.

When all pages are returned, free the pool and notify xdp if the
pool is being being used by xdp.  Perform a table walk since some
drivers (cpsw) may share the pool among multiple xdp_rxq_info.

Fixes: d956a048cd3f ("xdp: force mem allocator removal and periodic warning")

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |   4 +-
 include/net/page_pool.h                       |  55 +++-----
 include/net/xdp_priv.h                        |   4 -
 include/trace/events/xdp.h                    |  19 +--
 net/core/page_pool.c                          | 115 ++++++++++------
 net/core/xdp.c                                | 130 +++++++-----------
 6 files changed, 138 insertions(+), 189 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 199c4f938bb2..cd0d8a4bbeb3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1502,10 +1502,8 @@ static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
 					  rx_q->dma_erx, rx_q->dma_rx_phy);
 
 		kfree(rx_q->buf_pool);
-		if (rx_q->page_pool) {
-			page_pool_request_shutdown(rx_q->page_pool);
+		if (rx_q->page_pool)
 			page_pool_destroy(rx_q->page_pool);
-		}
 	}
 }
 
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 2cbcdbdec254..7353ca5abb63 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -33,6 +33,7 @@
 #include <linux/mm.h> /* Needed by ptr_ring */
 #include <linux/ptr_ring.h>
 #include <linux/dma-direction.h>
+#include <net/xdp.h>
 
 #define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */
 #define PP_FLAG_ALL	PP_FLAG_DMA_MAP
@@ -70,7 +71,12 @@ struct page_pool_params {
 struct page_pool {
 	struct page_pool_params p;
 
-        u32 pages_state_hold_cnt;
+	struct delayed_work release_dw;
+	void (*disconnect)(void *);
+	unsigned long defer_start;
+	unsigned long defer_warn;
+
+	u32 pages_state_hold_cnt;
 
 	/*
 	 * Data structure for allocation side
@@ -129,26 +135,20 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
 
 struct page_pool *page_pool_create(const struct page_pool_params *params);
 
-void __page_pool_free(struct page_pool *pool);
-static inline void page_pool_free(struct page_pool *pool)
-{
-	/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
-	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
-	 */
 #ifdef CONFIG_PAGE_POOL
-	__page_pool_free(pool);
-#endif
-}
-
-/* Drivers use this instead of page_pool_free */
+void page_pool_destroy(struct page_pool *pool);
+void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *));
+#else
 static inline void page_pool_destroy(struct page_pool *pool)
 {
-	if (!pool)
-		return;
-
-	page_pool_free(pool);
 }
 
+static inline void page_pool_use_xdp_mem(struct page_pool *pool,
+					 void (*disconnect)(void *));
+{
+}
+#endif
+
 /* Never call this directly, use helpers below */
 void __page_pool_put_page(struct page_pool *pool,
 			  struct page *page, bool allow_direct);
@@ -170,24 +170,6 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 	__page_pool_put_page(pool, page, true);
 }
 
-/* API user MUST have disconnected alloc-side (not allowed to call
- * page_pool_alloc_pages()) before calling this.  The free-side can
- * still run concurrently, to handle in-flight packet-pages.
- *
- * A request to shutdown can fail (with false) if there are still
- * in-flight packet-pages.
- */
-bool __page_pool_request_shutdown(struct page_pool *pool);
-static inline bool page_pool_request_shutdown(struct page_pool *pool)
-{
-	bool safe_to_remove = false;
-
-#ifdef CONFIG_PAGE_POOL
-	safe_to_remove = __page_pool_request_shutdown(pool);
-#endif
-	return safe_to_remove;
-}
-
 /* Disconnects a page (from a page_pool).  API users can have a need
  * to disconnect a page (from a page_pool), to allow it to be used as
  * a regular page (that will eventually be returned to the normal
@@ -216,11 +198,6 @@ static inline bool is_page_pool_compiled_in(void)
 #endif
 }
 
-static inline void page_pool_get(struct page_pool *pool)
-{
-	refcount_inc(&pool->user_cnt);
-}
-
 static inline bool page_pool_put(struct page_pool *pool)
 {
 	return refcount_dec_and_test(&pool->user_cnt);
diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h
index 6a8cba6ea79a..a9d5b7603b89 100644
--- a/include/net/xdp_priv.h
+++ b/include/net/xdp_priv.h
@@ -12,12 +12,8 @@ struct xdp_mem_allocator {
 		struct page_pool *page_pool;
 		struct zero_copy_allocator *zc_alloc;
 	};
-	int disconnect_cnt;
-	unsigned long defer_start;
 	struct rhash_head node;
 	struct rcu_head rcu;
-	struct delayed_work defer_wq;
-	unsigned long defer_warn;
 };
 
 #endif /* __LINUX_NET_XDP_PRIV_H__ */
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index c7e3c9c5bad3..a7378bcd9928 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -317,19 +317,15 @@ __MEM_TYPE_MAP(__MEM_TYPE_TP_FN)
 
 TRACE_EVENT(mem_disconnect,
 
-	TP_PROTO(const struct xdp_mem_allocator *xa,
-		 bool safe_to_remove, bool force),
+	TP_PROTO(const struct xdp_mem_allocator *xa),
 
-	TP_ARGS(xa, safe_to_remove, force),
+	TP_ARGS(xa),
 
 	TP_STRUCT__entry(
 		__field(const struct xdp_mem_allocator *,	xa)
 		__field(u32,		mem_id)
 		__field(u32,		mem_type)
 		__field(const void *,	allocator)
-		__field(bool,		safe_to_remove)
-		__field(bool,		force)
-		__field(int,		disconnect_cnt)
 	),
 
 	TP_fast_assign(
@@ -337,19 +333,12 @@ TRACE_EVENT(mem_disconnect,
 		__entry->mem_id		= xa->mem.id;
 		__entry->mem_type	= xa->mem.type;
 		__entry->allocator	= xa->allocator;
-		__entry->safe_to_remove	= safe_to_remove;
-		__entry->force		= force;
-		__entry->disconnect_cnt	= xa->disconnect_cnt;
 	),
 
-	TP_printk("mem_id=%d mem_type=%s allocator=%p"
-		  " safe_to_remove=%s force=%s disconnect_cnt=%d",
+	TP_printk("mem_id=%d mem_type=%s allocator=%p",
 		  __entry->mem_id,
 		  __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB),
-		  __entry->allocator,
-		  __entry->safe_to_remove ? "true" : "false",
-		  __entry->force ? "true" : "false",
-		  __entry->disconnect_cnt
+		  __entry->allocator
 	)
 );
 
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 5bc65587f1c4..bfe96326335d 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -18,6 +18,9 @@
 
 #include <trace/events/page_pool.h>
 
+#define DEFER_TIME (msecs_to_jiffies(1000))
+#define DEFER_WARN_INTERVAL (60 * HZ)
+
 static int page_pool_init(struct page_pool *pool,
 			  const struct page_pool_params *params)
 {
@@ -193,22 +196,14 @@ static s32 page_pool_inflight(struct page_pool *pool)
 {
 	u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
 	u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
-	s32 distance;
+	s32 inflight;
 
-	distance = _distance(hold_cnt, release_cnt);
+	inflight = _distance(hold_cnt, release_cnt);
 
-	trace_page_pool_inflight(pool, distance, hold_cnt, release_cnt);
-	return distance;
-}
-
-static bool __page_pool_safe_to_destroy(struct page_pool *pool)
-{
-	s32 inflight = page_pool_inflight(pool);
-
-	/* The distance should not be able to become negative */
+	trace_page_pool_inflight(pool, inflight, hold_cnt, release_cnt);
 	WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
 
-	return (inflight == 0);
+	return inflight;
 }
 
 /* Cleanup page_pool state from page */
@@ -338,31 +333,10 @@ static void __page_pool_empty_ring(struct page_pool *pool)
 	}
 }
 
-static void __warn_in_flight(struct page_pool *pool)
+static void page_pool_free(struct page_pool *pool)
 {
-	u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
-	u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
-	s32 distance;
-
-	distance = _distance(hold_cnt, release_cnt);
-
-	/* Drivers should fix this, but only problematic when DMA is used */
-	WARN(1, "Still in-flight pages:%d hold:%u released:%u",
-	     distance, hold_cnt, release_cnt);
-}
-
-void __page_pool_free(struct page_pool *pool)
-{
-	/* Only last user actually free/release resources */
-	if (!page_pool_put(pool))
-		return;
-
-	WARN(pool->alloc.count, "API usage violation");
-	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
-
-	/* Can happen due to forced shutdown */
-	if (!__page_pool_safe_to_destroy(pool))
-		__warn_in_flight(pool);
+	if (pool->disconnect)
+		pool->disconnect(pool);
 
 	ptr_ring_cleanup(&pool->ring, NULL);
 
@@ -371,12 +345,8 @@ void __page_pool_free(struct page_pool *pool)
 
 	kfree(pool);
 }
-EXPORT_SYMBOL(__page_pool_free);
 
-/* Request to shutdown: release pages cached by page_pool, and check
- * for in-flight pages
- */
-bool __page_pool_request_shutdown(struct page_pool *pool)
+static void page_pool_scrub(struct page_pool *pool)
 {
 	struct page *page;
 
@@ -393,7 +363,64 @@ bool __page_pool_request_shutdown(struct page_pool *pool)
 	 * be in-flight.
 	 */
 	__page_pool_empty_ring(pool);
-
-	return __page_pool_safe_to_destroy(pool);
 }
-EXPORT_SYMBOL(__page_pool_request_shutdown);
+
+static int page_pool_release(struct page_pool *pool)
+{
+	int inflight;
+
+	page_pool_scrub(pool);
+	inflight = page_pool_inflight(pool);
+	if (!inflight)
+		page_pool_free(pool);
+
+	return inflight;
+}
+
+static void page_pool_release_retry(struct work_struct *wq)
+{
+	struct delayed_work *dwq = to_delayed_work(wq);
+	struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
+	int inflight;
+
+	inflight = page_pool_release(pool);
+	if (!inflight)
+		return;
+
+	/* Periodic warning */
+	if (time_after_eq(jiffies, pool->defer_warn)) {
+		int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
+
+		pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
+			__func__, inflight, sec);
+		pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
+	}
+
+	/* Still not ready to be disconnected, retry later */
+	schedule_delayed_work(&pool->release_dw, DEFER_TIME);
+}
+
+void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
+{
+	refcount_inc(&pool->user_cnt);
+	pool->disconnect = disconnect;
+}
+
+void page_pool_destroy(struct page_pool *pool)
+{
+	if (!pool)
+		return;
+
+	if (!page_pool_put(pool))
+		return;
+
+	if (!page_pool_release(pool))
+		return;
+
+	pool->defer_start = jiffies;
+	pool->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
+
+	INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry);
+	schedule_delayed_work(&pool->release_dw, DEFER_TIME);
+}
+EXPORT_SYMBOL(page_pool_destroy);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 20781ad5f9c3..e334fad0a6b8 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -70,25 +70,47 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
 
 	xa = container_of(rcu, struct xdp_mem_allocator, rcu);
 
-	/* Allocator have indicated safe to remove before this is called */
-	if (xa->mem.type == MEM_TYPE_PAGE_POOL)
-		page_pool_free(xa->page_pool);
-
 	/* Allow this ID to be reused */
 	ida_simple_remove(&mem_id_pool, xa->mem.id);
 
-	/* Poison memory */
-	xa->mem.id = 0xFFFF;
-	xa->mem.type = 0xF0F0;
-	xa->allocator = (void *)0xDEAD9001;
-
 	kfree(xa);
 }
 
-static bool __mem_id_disconnect(int id, bool force)
+static void mem_xa_remove(struct xdp_mem_allocator *xa)
+{
+	trace_mem_disconnect(xa);
+
+	mutex_lock(&mem_id_lock);
+
+	if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
+		call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
+
+	mutex_unlock(&mem_id_lock);
+}
+
+static void mem_allocator_disconnect(void *allocator)
+{
+	struct xdp_mem_allocator *xa;
+	struct rhashtable_iter iter;
+
+	rhashtable_walk_enter(mem_id_ht, &iter);
+	do {
+		rhashtable_walk_start(&iter);
+
+		while ((xa = rhashtable_walk_next(&iter)) && !IS_ERR(xa)) {
+			if (xa->allocator == allocator)
+				mem_xa_remove(xa);
+		}
+
+		rhashtable_walk_stop(&iter);
+
+	} while (xa == ERR_PTR(-EAGAIN));
+	rhashtable_walk_exit(&iter);
+}
+
+static void mem_id_disconnect(int id)
 {
 	struct xdp_mem_allocator *xa;
-	bool safe_to_remove = true;
 
 	mutex_lock(&mem_id_lock);
 
@@ -96,51 +118,15 @@ static bool __mem_id_disconnect(int id, bool force)
 	if (!xa) {
 		mutex_unlock(&mem_id_lock);
 		WARN(1, "Request remove non-existing id(%d), driver bug?", id);
-		return true;
+		return;
 	}
-	xa->disconnect_cnt++;
 
-	/* Detects in-flight packet-pages for page_pool */
-	if (xa->mem.type == MEM_TYPE_PAGE_POOL)
-		safe_to_remove = page_pool_request_shutdown(xa->page_pool);
+	trace_mem_disconnect(xa);
 
-	trace_mem_disconnect(xa, safe_to_remove, force);
-
-	if ((safe_to_remove || force) &&
-	    !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
+	if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
 		call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
 
 	mutex_unlock(&mem_id_lock);
-	return (safe_to_remove|force);
-}
-
-#define DEFER_TIME (msecs_to_jiffies(1000))
-#define DEFER_WARN_INTERVAL (30 * HZ)
-#define DEFER_MAX_RETRIES 120
-
-static void mem_id_disconnect_defer_retry(struct work_struct *wq)
-{
-	struct delayed_work *dwq = to_delayed_work(wq);
-	struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq);
-	bool force = false;
-
-	if (xa->disconnect_cnt > DEFER_MAX_RETRIES)
-		force = true;
-
-	if (__mem_id_disconnect(xa->mem.id, force))
-		return;
-
-	/* Periodic warning */
-	if (time_after_eq(jiffies, xa->defer_warn)) {
-		int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ;
-
-		pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n",
-			__func__, xa->mem.id, xa->disconnect_cnt, sec);
-		xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
-	}
-
-	/* Still not ready to be disconnected, retry later */
-	schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
 }
 
 void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
@@ -153,38 +139,21 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
 		return;
 	}
 
-	if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL &&
-	    xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) {
-		return;
-	}
-
 	if (id == 0)
 		return;
 
-	if (__mem_id_disconnect(id, false))
-		return;
+	if (xdp_rxq->mem.type == MEM_TYPE_ZERO_COPY)
+		return mem_id_disconnect(id);
 
-	/* Could not disconnect, defer new disconnect attempt to later */
-	mutex_lock(&mem_id_lock);
-
-	xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
-	if (!xa) {
-		mutex_unlock(&mem_id_lock);
-		return;
+	if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
+		rcu_read_lock();
+		xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
+		page_pool_destroy(xa->page_pool);
+		rcu_read_unlock();
 	}
-	xa->defer_start = jiffies;
-	xa->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
-
-	INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry);
-	mutex_unlock(&mem_id_lock);
-	schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
 
-/* This unregister operation will also cleanup and destroy the
- * allocator. The page_pool_free() operation is first called when it's
- * safe to remove, possibly deferred to a workqueue.
- */
 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
 {
 	/* Simplify driver cleanup code paths, allow unreg "unused" */
@@ -371,7 +340,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 	}
 
 	if (type == MEM_TYPE_PAGE_POOL)
-		page_pool_get(xdp_alloc->page_pool);
+		page_pool_use_xdp_mem(allocator, mem_allocator_disconnect);
 
 	mutex_unlock(&mem_id_lock);
 
@@ -402,15 +371,8 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
 		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 		page = virt_to_head_page(data);
-		if (likely(xa)) {
-			napi_direct &= !xdp_return_frame_no_direct();
-			page_pool_put_page(xa->page_pool, page, napi_direct);
-		} else {
-			/* Hopefully stack show who to blame for late return */
-			WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
-			trace_mem_return_failed(mem, page);
-			put_page(page);
-		}
+		napi_direct &= !xdp_return_frame_no_direct();
+		page_pool_put_page(xa->page_pool, page, napi_direct);
 		rcu_read_unlock();
 		break;
 	case MEM_TYPE_PAGE_SHARED:
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 1/1] page_pool: do not release pool until inflight == 0.
  2019-11-11  6:20 ` [RFC PATCH 1/1] page_pool: do not release pool until inflight == 0 Jonathan Lemon
@ 2019-11-11  9:21   ` Jesper Dangaard Brouer
  2019-11-11 16:16     ` Jonathan Lemon
  2019-11-11 11:47   ` Jesper Dangaard Brouer
  1 sibling, 1 reply; 7+ messages in thread
From: Jesper Dangaard Brouer @ 2019-11-11  9:21 UTC (permalink / raw)
  To: Jonathan Lemon; +Cc: netdev, ilias.apalodimas, kernel-team, brouer

On Sun, 10 Nov 2019 22:20:38 -0800
Jonathan Lemon <jonathan.lemon@gmail.com> wrote:

> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 20781ad5f9c3..e334fad0a6b8 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -70,25 +70,47 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
>  
>  	xa = container_of(rcu, struct xdp_mem_allocator, rcu);
>  
> -	/* Allocator have indicated safe to remove before this is called */
> -	if (xa->mem.type == MEM_TYPE_PAGE_POOL)
> -		page_pool_free(xa->page_pool);
> -
>  	/* Allow this ID to be reused */
>  	ida_simple_remove(&mem_id_pool, xa->mem.id);
>  
> -	/* Poison memory */
> -	xa->mem.id = 0xFFFF;
> -	xa->mem.type = 0xF0F0;
> -	xa->allocator = (void *)0xDEAD9001;
> -
>  	kfree(xa);
>  }

Can you PLEASE leave the memory poisonings that I have added alone.
Removing these are irrelevant for current patch. You clearly don't like
this approach, but I've also clearly told that I disagree.  I'm the
maintainer of this code and I prefer letting them stay. I'm the one
that signed up for dealing with hard to find bugs in the code.

I'll try to explain again, hopefully one last time.  You argue that the
memory subsystem already have use-after-free detection e.g via
kmemleak.  I argue that these facilities change the timing so much,
that race condition will not be provoked when enabled.  This is not
theoretical, I've seen bugzilla cases consume a huge amount of support
and engineering resources, trying to track down bugs that would
disappear once the debug facility is enabled.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 1/1] page_pool: do not release pool until inflight == 0.
  2019-11-11  6:20 ` [RFC PATCH 1/1] page_pool: do not release pool until inflight == 0 Jonathan Lemon
  2019-11-11  9:21   ` Jesper Dangaard Brouer
@ 2019-11-11 11:47   ` Jesper Dangaard Brouer
  2019-11-11 14:04     ` Ilias Apalodimas
  2019-11-12  6:00     ` Jonathan Lemon
  1 sibling, 2 replies; 7+ messages in thread
From: Jesper Dangaard Brouer @ 2019-11-11 11:47 UTC (permalink / raw)
  To: Jonathan Lemon; +Cc: netdev, ilias.apalodimas, kernel-team, brouer

On Sun, 10 Nov 2019 22:20:38 -0800
Jonathan Lemon <jonathan.lemon@gmail.com> wrote:

> The page pool keeps track of the number of pages in flight, and
> it isn't safe to remove the pool until all pages are returned.
> 
> Disallow removing the pool until all pages are back, so the pool
> is always available for page producers.
> 
> Make the page pool responsible for its own delayed destruction

I like this part, making page_pool responsible for its own delayed
destruction.  I originally also wanted to do this, but got stuck on
mem.id getting removed prematurely from rhashtable.  You actually
solved this, via introducing a disconnect callback, from page_pool into
mem_allocator_disconnect(). I like it.

> instead of relying on XDP, so the page pool can be used without
> xdp.

This is a misconception, the xdp_rxq_info_reg_mem_model API does not
imply driver is using XDP.  Yes, I know the naming is sort of wrong,
contains "xdp". Also the xdp_mem_info name.  Ilias and I have discussed
to rename this several times.

The longer term plan is/was to use this (xdp_)mem_info as generic
return path for SKBs, creating a more flexible memory model for
networking.  This patch is fine and in itself does not disrupt/change
that, but your offlist changes does.  As your offlist changes does
imply a performance gain, I will likely accept this (and then find
another plan for more flexible memory model for networking).


> When all pages are returned, free the pool and notify xdp if the
> pool is being being used by xdp.  Perform a table walk since some
> drivers (cpsw) may share the pool among multiple xdp_rxq_info.

I misunderstood this description, first after reading the code in
details, I realized that this describe your disconnect callback.  And
how the mem.id removal is safe, by being delayed until after all pages
are returned.   The notes below is the code, was just for me to follow
this disconnect callback system, which I think is fine... left it if
others also want to double check the correctness.
 
> Fixes: d956a048cd3f ("xdp: force mem allocator removal and periodic warning")
> 
No newline between "Fixes" line and :Signed-off-by:

> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  .../net/ethernet/stmicro/stmmac/stmmac_main.c |   4 +-
>  include/net/page_pool.h                       |  55 +++-----
>  include/net/xdp_priv.h                        |   4 -
>  include/trace/events/xdp.h                    |  19 +--
>  net/core/page_pool.c                          | 115 ++++++++++------
>  net/core/xdp.c                                | 130 +++++++-----------
[...]


> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index 5bc65587f1c4..bfe96326335d 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
[...]
>  /* Cleanup page_pool state from page */
> @@ -338,31 +333,10 @@ static void __page_pool_empty_ring(struct page_pool *pool)
>  	}
>  }
>  
> -static void __warn_in_flight(struct page_pool *pool)
> +static void page_pool_free(struct page_pool *pool)
>  {
> -	u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
> -	u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
> -	s32 distance;
> -
> -	distance = _distance(hold_cnt, release_cnt);
> -
> -	/* Drivers should fix this, but only problematic when DMA is used */
> -	WARN(1, "Still in-flight pages:%d hold:%u released:%u",
> -	     distance, hold_cnt, release_cnt);
> -}
> -
> -void __page_pool_free(struct page_pool *pool)
> -{
> -	/* Only last user actually free/release resources */
> -	if (!page_pool_put(pool))
> -		return;
> -
> -	WARN(pool->alloc.count, "API usage violation");
> -	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
> -
> -	/* Can happen due to forced shutdown */
> -	if (!__page_pool_safe_to_destroy(pool))
> -		__warn_in_flight(pool);
> +	if (pool->disconnect)
> +		pool->disconnect(pool);

Callback to mem reg system.

>  
>  	ptr_ring_cleanup(&pool->ring, NULL);
>  
> @@ -371,12 +345,8 @@ void __page_pool_free(struct page_pool *pool)
>  
>  	kfree(pool);
>  }
> -EXPORT_SYMBOL(__page_pool_free);
>  
> -/* Request to shutdown: release pages cached by page_pool, and check
> - * for in-flight pages
> - */
> -bool __page_pool_request_shutdown(struct page_pool *pool)
> +static void page_pool_scrub(struct page_pool *pool)
>  {
>  	struct page *page;
>  
> @@ -393,7 +363,64 @@ bool __page_pool_request_shutdown(struct page_pool *pool)
>  	 * be in-flight.
>  	 */
>  	__page_pool_empty_ring(pool);
> -
> -	return __page_pool_safe_to_destroy(pool);
>  }
> -EXPORT_SYMBOL(__page_pool_request_shutdown);
> +
> +static int page_pool_release(struct page_pool *pool)
> +{
> +	int inflight;
> +
> +	page_pool_scrub(pool);
> +	inflight = page_pool_inflight(pool);
> +	if (!inflight)
> +		page_pool_free(pool);
> +
> +	return inflight;
> +}
> +
> +static void page_pool_release_retry(struct work_struct *wq)
> +{
> +	struct delayed_work *dwq = to_delayed_work(wq);
> +	struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
> +	int inflight;
> +
> +	inflight = page_pool_release(pool);
> +	if (!inflight)
> +		return;
> +
> +	/* Periodic warning */
> +	if (time_after_eq(jiffies, pool->defer_warn)) {
> +		int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
> +
> +		pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
> +			__func__, inflight, sec);
> +		pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
> +	}
> +
> +	/* Still not ready to be disconnected, retry later */
> +	schedule_delayed_work(&pool->release_dw, DEFER_TIME);
> +}
> +
> +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
> +{
> +	refcount_inc(&pool->user_cnt);
> +	pool->disconnect = disconnect;
> +}

Function page_pool_use_xdp_mem is used by xdp.c to register the callback.

> +void page_pool_destroy(struct page_pool *pool)
> +{
> +	if (!pool)
> +		return;
> +
> +	if (!page_pool_put(pool))
> +		return;
> +
> +	if (!page_pool_release(pool))
> +		return;
> +
> +	pool->defer_start = jiffies;
> +	pool->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
> +
> +	INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry);
> +	schedule_delayed_work(&pool->release_dw, DEFER_TIME);
> +}
> +EXPORT_SYMBOL(page_pool_destroy);
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 20781ad5f9c3..e334fad0a6b8 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
>  
>  void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
> @@ -153,38 +139,21 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
[...]
> +	if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
> +		rcu_read_lock();
> +		xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
> +		page_pool_destroy(xa->page_pool);
> +		rcu_read_unlock();
>  	}
[...]

Calling page_pool_destroy() instead of mem_allocator_disconnect().


> @@ -371,7 +340,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
>  	}
>  
>  	if (type == MEM_TYPE_PAGE_POOL)
> -		page_pool_get(xdp_alloc->page_pool);
> +		page_pool_use_xdp_mem(allocator, mem_allocator_disconnect);

Register callback to mem_allocator_disconnect().

>  
>  	mutex_unlock(&mem_id_lock);
>  
> @@ -402,15 +371,8 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
>  		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
>  		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
>  		page = virt_to_head_page(data);
> -		if (likely(xa)) {
> -			napi_direct &= !xdp_return_frame_no_direct();
> -			page_pool_put_page(xa->page_pool, page, napi_direct);
> -		} else {
> -			/* Hopefully stack show who to blame for late return */
> -			WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
> -			trace_mem_return_failed(mem, page);
> -			put_page(page);
> -		}
> +		napi_direct &= !xdp_return_frame_no_direct();
> +		page_pool_put_page(xa->page_pool, page, napi_direct);
>  		rcu_read_unlock();
>  		break;
>  	case MEM_TYPE_PAGE_SHARED:

This should be correct.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 1/1] page_pool: do not release pool until inflight == 0.
  2019-11-11 11:47   ` Jesper Dangaard Brouer
@ 2019-11-11 14:04     ` Ilias Apalodimas
  2019-11-12  6:00     ` Jonathan Lemon
  1 sibling, 0 replies; 7+ messages in thread
From: Ilias Apalodimas @ 2019-11-11 14:04 UTC (permalink / raw)
  To: Jesper Dangaard Brouer; +Cc: Jonathan Lemon, netdev, kernel-team

Hi Jesper, Jonathan, 

On Mon, Nov 11, 2019 at 12:47:21PM +0100, Jesper Dangaard Brouer wrote:
> On Sun, 10 Nov 2019 22:20:38 -0800
> Jonathan Lemon <jonathan.lemon@gmail.com> wrote:
> 
> > The page pool keeps track of the number of pages in flight, and
> > it isn't safe to remove the pool until all pages are returned.
> > 
> > Disallow removing the pool until all pages are back, so the pool
> > is always available for page producers.
> > 
> > Make the page pool responsible for its own delayed destruction
> 
> I like this part, making page_pool responsible for its own delayed
> destruction.  I originally also wanted to do this, but got stuck on
> mem.id getting removed prematurely from rhashtable.  You actually
> solved this, via introducing a disconnect callback, from page_pool into
> mem_allocator_disconnect(). I like it.

+1 here. It seems cleaner since we'll keep the pool alive until all pages have
been dealt with

> 
> > instead of relying on XDP, so the page pool can be used without
> > xdp.
> 
> This is a misconception, the xdp_rxq_info_reg_mem_model API does not
> imply driver is using XDP.  Yes, I know the naming is sort of wrong,
> contains "xdp". Also the xdp_mem_info name.  Ilias and I have discussed
> to rename this several times.

Yes. We'll try to document that since it's a bit confusing :(

> 
> The longer term plan is/was to use this (xdp_)mem_info as generic
> return path for SKBs, creating a more flexible memory model for
> networking.  This patch is fine and in itself does not disrupt/change
> that, but your offlist changes does.  As your offlist changes does
> imply a performance gain, I will likely accept this (and then find
> another plan for more flexible memory model for networking).
> 

Keep in mind the stmmac driver is using the pool differently. 
They figured out mapping and unmapping was more expensive that copying memory in
that hardware. 
So the NAPI Rx handler copies the page pool data into an skb and recycles the
buffer immediately. Tehy should have 0 inflight packets during the teardown

> 
> > When all pages are returned, free the pool and notify xdp if the
> > pool is being being used by xdp.  Perform a table walk since some
> > drivers (cpsw) may share the pool among multiple xdp_rxq_info.
> 
> I misunderstood this description, first after reading the code in
> details, I realized that this describe your disconnect callback.  And
> how the mem.id removal is safe, by being delayed until after all pages
> are returned.   The notes below is the code, was just for me to follow
> this disconnect callback system, which I think is fine... left it if
> others also want to double check the correctness.
>  
> > Fixes: d956a048cd3f ("xdp: force mem allocator removal and periodic warning")
> > 
> No newline between "Fixes" line and :Signed-off-by:
> 

Thanks for working on this
/Ilias

> > Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> > ---
> >  .../net/ethernet/stmicro/stmmac/stmmac_main.c |   4 +-
> >  include/net/page_pool.h                       |  55 +++-----
> >  include/net/xdp_priv.h                        |   4 -
> >  include/trace/events/xdp.h                    |  19 +--
> >  net/core/page_pool.c                          | 115 ++++++++++------
> >  net/core/xdp.c                                | 130 +++++++-----------
> [...]
> 
> 
> > diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> > index 5bc65587f1c4..bfe96326335d 100644
> > --- a/net/core/page_pool.c
> > +++ b/net/core/page_pool.c
> [...]
> >  /* Cleanup page_pool state from page */
> > @@ -338,31 +333,10 @@ static void __page_pool_empty_ring(struct page_pool *pool)
> >  	}
> >  }
> >  
> > -static void __warn_in_flight(struct page_pool *pool)
> > +static void page_pool_free(struct page_pool *pool)
> >  {
> > -	u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
> > -	u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
> > -	s32 distance;
> > -
> > -	distance = _distance(hold_cnt, release_cnt);
> > -
> > -	/* Drivers should fix this, but only problematic when DMA is used */
> > -	WARN(1, "Still in-flight pages:%d hold:%u released:%u",
> > -	     distance, hold_cnt, release_cnt);
> > -}
> > -
> > -void __page_pool_free(struct page_pool *pool)
> > -{
> > -	/* Only last user actually free/release resources */
> > -	if (!page_pool_put(pool))
> > -		return;
> > -
> > -	WARN(pool->alloc.count, "API usage violation");
> > -	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
> > -
> > -	/* Can happen due to forced shutdown */
> > -	if (!__page_pool_safe_to_destroy(pool))
> > -		__warn_in_flight(pool);
> > +	if (pool->disconnect)
> > +		pool->disconnect(pool);
> 
> Callback to mem reg system.
> 
> >  
> >  	ptr_ring_cleanup(&pool->ring, NULL);
> >  
> > @@ -371,12 +345,8 @@ void __page_pool_free(struct page_pool *pool)
> >  
> >  	kfree(pool);
> >  }
> > -EXPORT_SYMBOL(__page_pool_free);
> >  
> > -/* Request to shutdown: release pages cached by page_pool, and check
> > - * for in-flight pages
> > - */
> > -bool __page_pool_request_shutdown(struct page_pool *pool)
> > +static void page_pool_scrub(struct page_pool *pool)
> >  {
> >  	struct page *page;
> >  
> > @@ -393,7 +363,64 @@ bool __page_pool_request_shutdown(struct page_pool *pool)
> >  	 * be in-flight.
> >  	 */
> >  	__page_pool_empty_ring(pool);
> > -
> > -	return __page_pool_safe_to_destroy(pool);
> >  }
> > -EXPORT_SYMBOL(__page_pool_request_shutdown);
> > +
> > +static int page_pool_release(struct page_pool *pool)
> > +{
> > +	int inflight;
> > +
> > +	page_pool_scrub(pool);
> > +	inflight = page_pool_inflight(pool);
> > +	if (!inflight)
> > +		page_pool_free(pool);
> > +
> > +	return inflight;
> > +}
> > +
> > +static void page_pool_release_retry(struct work_struct *wq)
> > +{
> > +	struct delayed_work *dwq = to_delayed_work(wq);
> > +	struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
> > +	int inflight;
> > +
> > +	inflight = page_pool_release(pool);
> > +	if (!inflight)
> > +		return;
> > +
> > +	/* Periodic warning */
> > +	if (time_after_eq(jiffies, pool->defer_warn)) {
> > +		int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
> > +
> > +		pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
> > +			__func__, inflight, sec);
> > +		pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
> > +	}
> > +
> > +	/* Still not ready to be disconnected, retry later */
> > +	schedule_delayed_work(&pool->release_dw, DEFER_TIME);
> > +}
> > +
> > +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
> > +{
> > +	refcount_inc(&pool->user_cnt);
> > +	pool->disconnect = disconnect;
> > +}
> 
> Function page_pool_use_xdp_mem is used by xdp.c to register the callback.
> 
> > +void page_pool_destroy(struct page_pool *pool)
> > +{
> > +	if (!pool)
> > +		return;
> > +
> > +	if (!page_pool_put(pool))
> > +		return;
> > +
> > +	if (!page_pool_release(pool))
> > +		return;
> > +
> > +	pool->defer_start = jiffies;
> > +	pool->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
> > +
> > +	INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry);
> > +	schedule_delayed_work(&pool->release_dw, DEFER_TIME);
> > +}
> > +EXPORT_SYMBOL(page_pool_destroy);
> > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > index 20781ad5f9c3..e334fad0a6b8 100644
> > --- a/net/core/xdp.c
> > +++ b/net/core/xdp.c
> >  
> >  void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
> > @@ -153,38 +139,21 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
> [...]
> > +	if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
> > +		rcu_read_lock();
> > +		xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
> > +		page_pool_destroy(xa->page_pool);
> > +		rcu_read_unlock();
> >  	}
> [...]
> 
> Calling page_pool_destroy() instead of mem_allocator_disconnect().
> 
> 
> > @@ -371,7 +340,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
> >  	}
> >  
> >  	if (type == MEM_TYPE_PAGE_POOL)
> > -		page_pool_get(xdp_alloc->page_pool);
> > +		page_pool_use_xdp_mem(allocator, mem_allocator_disconnect);
> 
> Register callback to mem_allocator_disconnect().
> 
> >  
> >  	mutex_unlock(&mem_id_lock);
> >  
> > @@ -402,15 +371,8 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
> >  		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
> >  		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
> >  		page = virt_to_head_page(data);
> > -		if (likely(xa)) {
> > -			napi_direct &= !xdp_return_frame_no_direct();
> > -			page_pool_put_page(xa->page_pool, page, napi_direct);
> > -		} else {
> > -			/* Hopefully stack show who to blame for late return */
> > -			WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
> > -			trace_mem_return_failed(mem, page);
> > -			put_page(page);
> > -		}
> > +		napi_direct &= !xdp_return_frame_no_direct();
> > +		page_pool_put_page(xa->page_pool, page, napi_direct);
> >  		rcu_read_unlock();
> >  		break;
> >  	case MEM_TYPE_PAGE_SHARED:
> 
> This should be correct.
> 
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 1/1] page_pool: do not release pool until inflight == 0.
  2019-11-11  9:21   ` Jesper Dangaard Brouer
@ 2019-11-11 16:16     ` Jonathan Lemon
  0 siblings, 0 replies; 7+ messages in thread
From: Jonathan Lemon @ 2019-11-11 16:16 UTC (permalink / raw)
  To: Jesper Dangaard Brouer; +Cc: netdev, ilias.apalodimas, kernel-team



On 11 Nov 2019, at 1:21, Jesper Dangaard Brouer wrote:

> On Sun, 10 Nov 2019 22:20:38 -0800
> Jonathan Lemon <jonathan.lemon@gmail.com> wrote:
>
>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>> index 20781ad5f9c3..e334fad0a6b8 100644
>> --- a/net/core/xdp.c
>> +++ b/net/core/xdp.c
>> @@ -70,25 +70,47 @@ static void __xdp_mem_allocator_rcu_free(struct 
>> rcu_head *rcu)
>>
>>  	xa = container_of(rcu, struct xdp_mem_allocator, rcu);
>>
>> -	/* Allocator have indicated safe to remove before this is called */
>> -	if (xa->mem.type == MEM_TYPE_PAGE_POOL)
>> -		page_pool_free(xa->page_pool);
>> -
>>  	/* Allow this ID to be reused */
>>  	ida_simple_remove(&mem_id_pool, xa->mem.id);
>>
>> -	/* Poison memory */
>> -	xa->mem.id = 0xFFFF;
>> -	xa->mem.type = 0xF0F0;
>> -	xa->allocator = (void *)0xDEAD9001;
>> -
>>  	kfree(xa);
>>  }
>
> Can you PLEASE leave the memory poisonings that I have added alone.
> Removing these are irrelevant for current patch. You clearly don't 
> like
> this approach, but I've also clearly told that I disagree.  I'm the
> maintainer of this code and I prefer letting them stay. I'm the one
> that signed up for dealing with hard to find bugs in the code.

As you're the maintainer, I'll put this back.  However, I strongly
feel that this is poor practice; detecting use-after-free in cases
like this should be the job of the page and memory allocator; not
kmemleak (which detects lost memory), but page poisoning and redzone
support.

If this is really useful, it should specifically be under DEBUG
support, rather than in the mainline code.  It's not a performance
issue - just more techdebt, from my POV.
-- 
Jonathan

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH 1/1] page_pool: do not release pool until inflight == 0.
  2019-11-11 11:47   ` Jesper Dangaard Brouer
  2019-11-11 14:04     ` Ilias Apalodimas
@ 2019-11-12  6:00     ` Jonathan Lemon
  1 sibling, 0 replies; 7+ messages in thread
From: Jonathan Lemon @ 2019-11-12  6:00 UTC (permalink / raw)
  To: Jesper Dangaard Brouer; +Cc: netdev, ilias.apalodimas, kernel-team


On 11 Nov 2019, at 3:47, Jesper Dangaard Brouer wrote:

> On Sun, 10 Nov 2019 22:20:38 -0800
> Jonathan Lemon <jonathan.lemon@gmail.com> wrote:
>
>> The page pool keeps track of the number of pages in flight, and
>> it isn't safe to remove the pool until all pages are returned.
>>
>> Disallow removing the pool until all pages are back, so the pool
>> is always available for page producers.
>>
>> Make the page pool responsible for its own delayed destruction
>
> I like this part, making page_pool responsible for its own delayed
> destruction.  I originally also wanted to do this, but got stuck on
> mem.id getting removed prematurely from rhashtable.  You actually
> solved this, via introducing a disconnect callback, from page_pool into
> mem_allocator_disconnect(). I like it.
>
>> instead of relying on XDP, so the page pool can be used without
>> xdp.
>
> This is a misconception, the xdp_rxq_info_reg_mem_model API does not
> imply driver is using XDP.  Yes, I know the naming is sort of wrong,
> contains "xdp". Also the xdp_mem_info name.  Ilias and I have discussed
> to rename this several times.
>
> The longer term plan is/was to use this (xdp_)mem_info as generic
> return path for SKBs, creating a more flexible memory model for
> networking.  This patch is fine and in itself does not disrupt/change
> that, but your offlist changes does.  As your offlist changes does
> imply a performance gain, I will likely accept this (and then find
> another plan for more flexible memory model for networking).

Are you referring to the patch which encodes the page pool pointer
in the page, and then sends it directly to the pool on skb free
instead of performing a mem id lookup and indirection through the
memory model?

It could be done either way.  I'm not seeing any advantages of
the additional indirection, as the pool lifetime is guaranteed.

All that is needed is:
1) A way to differentiate this page as coming from the page pool.

   The current plan of setting a bit on the skb which indicates that
   the pages should be returned via the page pool is workable, but there
   will be some pages returned which came from the system page allocator,
   and these need to be filtered out.

   There must be some type of signature the page permits filtering and
   returning non-matching pages back to the page allocator.


2) Identifying up exactly which page pool the page belongs to.

   This could be done by just placing the pool pointer on the page,
   or putting the mem info there and indirecting through the lookup.

-- 
Jonathan

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-11-12  6:00 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-11  6:20 [RFC PATCH 0/1] Change page_pool timeout handling Jonathan Lemon
2019-11-11  6:20 ` [RFC PATCH 1/1] page_pool: do not release pool until inflight == 0 Jonathan Lemon
2019-11-11  9:21   ` Jesper Dangaard Brouer
2019-11-11 16:16     ` Jonathan Lemon
2019-11-11 11:47   ` Jesper Dangaard Brouer
2019-11-11 14:04     ` Ilias Apalodimas
2019-11-12  6:00     ` Jonathan Lemon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).