Netdev Archive on lore.kernel.org
 help / color / Atom feed
From: Jesper Dangaard Brouer <brouer@redhat.com>
To: netdev@vger.kernel.org,
	"Ilias Apalodimas" <ilias.apalodimas@linaro.org>,
	"Toke Høiland-Jørgensen" <toke@toke.dk>,
	"Tariq Toukan" <tariqt@mellanox.com>,
	"Jesper Dangaard Brouer" <brouer@redhat.com>
Cc: toshiaki.makita1@gmail.com, grygorii.strashko@ti.com,
	ivan.khoronzhuk@linaro.org, mcroce@redhat.com
Subject: [PATCH net-next v2 09/12] xdp: force mem allocator removal and periodic warning
Date: Tue, 18 Jun 2019 15:05:53 +0200
Message-ID: <156086315298.27760.5679537697366392825.stgit@firesoul> (raw)
In-Reply-To: <156086304827.27760.11339786046465638081.stgit@firesoul>

If bugs exists or are introduced later e.g. by drivers misusing the API,
then we want to warn about the issue, such that developer notice. This patch
will generate a bit of noise in form of periodic pr_warn every 30 seconds.

It is not nice to have this stall warning running forever. Thus, this patch
will (after 120 attempts) force disconnect the mem id (from the rhashtable)
and free the page_pool object. This will cause fallback to the put_page() as
before, which only potentially leak DMA-mappings, if objects are really
stuck for this long. In that unlikely case, a WARN_ONCE should show us the
call stack.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 net/core/page_pool.c |   18 +++++++++++++++++-
 net/core/xdp.c       |   37 +++++++++++++++++++++++++++++++------
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 8679e24fd665..42c3b0a5a259 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -330,11 +330,27 @@ static void __page_pool_empty_ring(struct page_pool *pool)
 	}
 }
 
+static void __warn_in_flight(struct page_pool *pool)
+{
+	u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
+	u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
+	s32 distance;
+
+	distance = _distance(hold_cnt, release_cnt);
+
+	/* Drivers should fix this, but only problematic when DMA is used */
+	WARN(1, "Still in-flight pages:%d hold:%u released:%u",
+	     distance, hold_cnt, release_cnt);
+}
+
 void __page_pool_free(struct page_pool *pool)
 {
 	WARN(pool->alloc.count, "API usage violation");
 	WARN(!ptr_ring_empty(&pool->ring), "ptr_ring is not empty");
-	WARN(!__page_pool_safe_to_destroy(pool), "still in-flight pages");
+
+	/* Can happen due to forced shutdown */
+	if (!__page_pool_safe_to_destroy(pool))
+		__warn_in_flight(pool);
 
 	ptr_ring_cleanup(&pool->ring, NULL);
 	kfree(pool);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 2b7bad227030..53bce4fa776a 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -39,6 +39,9 @@ struct xdp_mem_allocator {
 	struct rhash_head node;
 	struct rcu_head rcu;
 	struct delayed_work defer_wq;
+	unsigned long defer_start;
+	unsigned long defer_warn;
+	int disconnect_cnt;
 };
 
 static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
@@ -95,7 +98,7 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
 	kfree(xa);
 }
 
-bool __mem_id_disconnect(int id)
+bool __mem_id_disconnect(int id, bool force)
 {
 	struct xdp_mem_allocator *xa;
 	bool safe_to_remove = true;
@@ -108,29 +111,47 @@ bool __mem_id_disconnect(int id)
 		WARN(1, "Request remove non-existing id(%d), driver bug?", id);
 		return true;
 	}
+	xa->disconnect_cnt++;
 
 	/* Detects in-flight packet-pages for page_pool */
 	if (xa->mem.type == MEM_TYPE_PAGE_POOL)
 		safe_to_remove = page_pool_request_shutdown(xa->page_pool);
 
-	if (safe_to_remove &&
+	/* TODO: Tracepoint will be added here in next-patch */
+
+	if ((safe_to_remove || force) &&
 	    !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
 		call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
 
 	mutex_unlock(&mem_id_lock);
-	return safe_to_remove;
+	return (safe_to_remove|force);
 }
 
 #define DEFER_TIME (msecs_to_jiffies(1000))
+#define DEFER_WARN_INTERVAL (30 * HZ)
+#define DEFER_MAX_RETRIES 120
 
 static void mem_id_disconnect_defer_retry(struct work_struct *wq)
 {
 	struct delayed_work *dwq = to_delayed_work(wq);
 	struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq);
+	bool force = false;
+
+	if (xa->disconnect_cnt > DEFER_MAX_RETRIES)
+		force = true;
 
-	if (__mem_id_disconnect(xa->mem.id))
+	if (__mem_id_disconnect(xa->mem.id, force))
 		return;
 
+	/* Periodic warning */
+	if (time_after_eq(jiffies, xa->defer_warn)) {
+		int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ;
+
+		pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n",
+			__func__, xa->mem.id, xa->disconnect_cnt, sec);
+		xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
+	}
+
 	/* Still not ready to be disconnected, retry later */
 	schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
 }
@@ -153,7 +174,7 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
 	if (id == 0)
 		return;
 
-	if (__mem_id_disconnect(id))
+	if (__mem_id_disconnect(id, false))
 		return;
 
 	/* Could not disconnect, defer new disconnect attempt to later */
@@ -164,6 +185,8 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
 		mutex_unlock(&mem_id_lock);
 		return;
 	}
+	xa->defer_start = jiffies;
+	xa->defer_warn  = jiffies + DEFER_WARN_INTERVAL;
 
 	INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry);
 	mutex_unlock(&mem_id_lock);
@@ -388,10 +411,12 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
 		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 		page = virt_to_head_page(data);
-		if (xa) {
+		if (likely(xa)) {
 			napi_direct &= !xdp_return_frame_no_direct();
 			page_pool_put_page(xa->page_pool, page, napi_direct);
 		} else {
+			/* Hopefully stack show who to blame for late return */
+			WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
 			put_page(page);
 		}
 		rcu_read_unlock();


  parent reply index

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-18 13:05 [PATCH net-next v2 00/12] xdp: page_pool fixes and in-flight accounting Jesper Dangaard Brouer
2019-06-18 13:05 ` [PATCH net-next v2 01/12] net: page_pool: add helper function to retrieve dma addresses Jesper Dangaard Brouer
2019-06-18 13:05 ` [PATCH net-next v2 02/12] net: page_pool: add helper function to unmap " Jesper Dangaard Brouer
2019-06-18 13:05 ` [PATCH net-next v2 03/12] xdp: fix leak of IDA cyclic id if rhashtable_insert_slow fails Jesper Dangaard Brouer
2019-06-18 13:05 ` [PATCH net-next v2 04/12] xdp: page_pool related fix to cpumap Jesper Dangaard Brouer
2019-06-18 13:05 ` [PATCH net-next v2 05/12] veth: use xdp_release_frame for XDP_PASS Jesper Dangaard Brouer
2019-06-18 13:05 ` [PATCH net-next v2 06/12] page_pool: introduce page_pool_free and use in mlx5 Jesper Dangaard Brouer
2019-06-18 13:05 ` [PATCH net-next v2 07/12] mlx5: more strict use of page_pool API Jesper Dangaard Brouer
2019-06-18 13:05 ` [PATCH net-next v2 08/12] xdp: tracking page_pool resources and safe removal Jesper Dangaard Brouer
2019-06-25 10:50   ` Ivan Khoronzhuk
2019-06-25 11:27     ` Jesper Dangaard Brouer
2019-06-25 11:51       ` Ivan Khoronzhuk
2019-06-25 12:28         ` Ivan Khoronzhuk
2019-06-18 13:05 ` Jesper Dangaard Brouer [this message]
2019-06-18 13:05 ` [PATCH net-next v2 10/12] xdp: add tracepoints for XDP mem Jesper Dangaard Brouer
2019-06-18 13:06 ` [PATCH net-next v2 11/12] page_pool: add tracepoints for page_pool with details need by XDP Jesper Dangaard Brouer
2019-06-18 13:06 ` [PATCH net-next v2 12/12] page_pool: make sure struct device is stable Jesper Dangaard Brouer
2019-06-19 15:24 ` [PATCH net-next v2 00/12] xdp: page_pool fixes and in-flight accounting David Miller

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=156086315298.27760.5679537697366392825.stgit@firesoul \
    --to=brouer@redhat.com \
    --cc=grygorii.strashko@ti.com \
    --cc=ilias.apalodimas@linaro.org \
    --cc=ivan.khoronzhuk@linaro.org \
    --cc=mcroce@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=tariqt@mellanox.com \
    --cc=toke@toke.dk \
    --cc=toshiaki.makita1@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Netdev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netdev/0 netdev/git/0.git
	git clone --mirror https://lore.kernel.org/netdev/1 netdev/git/1.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netdev netdev/ https://lore.kernel.org/netdev \
		netdev@vger.kernel.org netdev@archiver.kernel.org
	public-inbox-index netdev

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netdev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox