All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: "David S . Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>
Cc: netdev <netdev@vger.kernel.org>,
	Eric Dumazet <edumazet@google.com>,
	Eric Dumazet <eric.dumazet@gmail.com>,
	Dmitry Vyukov <dvyukov@google.com>
Subject: [PATCH v3 net-next 03/23] net: add net device refcount tracker infrastructure
Date: Sat,  4 Dec 2021 20:21:57 -0800	[thread overview]
Message-ID: <20211205042217.982127-4-eric.dumazet@gmail.com> (raw)
In-Reply-To: <20211205042217.982127-1-eric.dumazet@gmail.com>

From: Eric Dumazet <edumazet@google.com>

net device are refcounted. Over the years we had numerous bugs
caused by imbalanced dev_hold() and dev_put() calls.

The general idea is to be able to precisely pair each decrement with
a corresponding prior increment. Both share a cookie, basically
a pointer to private data storing stack traces.

This patch adds dev_hold_track() and dev_put_track().

To use these helpers, each data structure owning a refcount
should also use a "netdevice_tracker" to pair the hold and put.

netdevice_tracker dev_tracker;
...
dev_hold_track(dev, &dev_tracker, GFP_ATOMIC);
...
dev_put_track(dev, &dev_tracker);

Whenever a leak happens, we will get precise stack traces
of the point dev_hold_track() happened, at device dismantle phase.

We will also get a stack trace if too many dev_put_track() for the same
netdevice_tracker are attempted.

This is guarded by CONFIG_NET_DEV_REFCNT_TRACKER option.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/netdevice.h | 45 +++++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug         |  5 +++++
 net/Kconfig.debug         | 11 ++++++++++
 net/core/dev.c            |  3 +++
 4 files changed, 64 insertions(+)
 create mode 100644 net/Kconfig.debug

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 65117f01d5f2a9c9e815b6c967d5e9e4c94af0ae..143d60ed004732e4a086e66fdcf7b3d362c1dc20 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -48,6 +48,7 @@
 #include <uapi/linux/pkt_cls.h>
 #include <linux/hashtable.h>
 #include <linux/rbtree.h>
+#include <linux/ref_tracker.h>
 
 struct netpoll_info;
 struct device;
@@ -300,6 +301,12 @@ enum netdev_state_t {
 };
 
 
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+typedef struct ref_tracker *netdevice_tracker;
+#else
+typedef struct {} netdevice_tracker;
+#endif
+
 struct gro_list {
 	struct list_head	list;
 	int			count;
@@ -1865,6 +1872,7 @@ enum netdev_ml_priv_type {
  *	@proto_down_reason:	reason a netdev interface is held down
  *	@pcpu_refcnt:		Number of references to this device
  *	@dev_refcnt:		Number of references to this device
+ *	@refcnt_tracker:	Tracker directory for tracked references to this device
  *	@todo_list:		Delayed register/unregister
  *	@link_watch_list:	XXX: need comments on this one
  *
@@ -2178,6 +2186,7 @@ struct net_device {
 #else
 	refcount_t		dev_refcnt;
 #endif
+	struct ref_tracker_dir	refcnt_tracker;
 
 	struct list_head	link_watch_list;
 
@@ -3805,6 +3814,7 @@ void netdev_run_todo(void);
  *	@dev: network device
  *
  * Release reference to device to allow it to be freed.
+ * Try using dev_put_track() instead.
  */
 static inline void dev_put(struct net_device *dev)
 {
@@ -3822,6 +3832,7 @@ static inline void dev_put(struct net_device *dev)
  *	@dev: network device
  *
  * Hold reference to device to keep it from being freed.
+ * Try using dev_hold_track() instead.
  */
 static inline void dev_hold(struct net_device *dev)
 {
@@ -3834,6 +3845,40 @@ static inline void dev_hold(struct net_device *dev)
 	}
 }
 
+static inline void netdev_tracker_alloc(struct net_device *dev,
+					netdevice_tracker *tracker, gfp_t gfp)
+{
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+	ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp);
+#endif
+}
+
+static inline void netdev_tracker_free(struct net_device *dev,
+				       netdevice_tracker *tracker)
+{
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+	ref_tracker_free(&dev->refcnt_tracker, tracker);
+#endif
+}
+
+static inline void dev_hold_track(struct net_device *dev,
+				  netdevice_tracker *tracker, gfp_t gfp)
+{
+	if (dev) {
+		dev_hold(dev);
+		netdev_tracker_alloc(dev, tracker, gfp);
+	}
+}
+
+static inline void dev_put_track(struct net_device *dev,
+				 netdevice_tracker *tracker)
+{
+	if (dev) {
+		netdev_tracker_free(dev, tracker);
+		dev_put(dev);
+	}
+}
+
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
  * and _off may be called from IRQ context, but it is caller
  * who is responsible for serialization of these calls.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 633c2c5cb45bd435a7684dbe2f2eca477c871463..6504b97f8dfd786b6a7b9cdb2c2eda5aaa2c2b03 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -598,6 +598,11 @@ config DEBUG_MISC
 	  Say Y here if you need to enable miscellaneous debug code that should
 	  be under a more specific debug option but isn't.
 
+menu "Networking Debugging"
+
+source "net/Kconfig.debug"
+
+endmenu # "Networking Debugging"
 
 menu "Memory Debugging"
 
diff --git a/net/Kconfig.debug b/net/Kconfig.debug
new file mode 100644
index 0000000000000000000000000000000000000000..ee8d67a784291fb126d95324db05183b5a78ab70
--- /dev/null
+++ b/net/Kconfig.debug
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config NET_DEV_REFCNT_TRACKER
+	bool "Enable net device refcount tracking"
+	depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
+	select REF_TRACKER
+	default n
+	help
+	  Enable debugging feature to track device references.
+	  This adds memory and cpu costs.
+
diff --git a/net/core/dev.c b/net/core/dev.c
index aba8acc1238c2213b9fc0c47c93568f731f375e5..1740d6cfe86b58359cceaec7ee9cc015a3843723 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9864,6 +9864,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
 			       netdev_unregister_timeout_secs * HZ)) {
 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
 				 dev->name, refcnt);
+			ref_tracker_dir_print(&dev->refcnt_tracker, 10);
 			warning_time = jiffies;
 		}
 	}
@@ -10154,6 +10155,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
+	ref_tracker_dir_init(&dev->refcnt_tracker, 128);
 #ifdef CONFIG_PCPU_DEV_REFCNT
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
@@ -10270,6 +10272,7 @@ void free_netdev(struct net_device *dev)
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
+	ref_tracker_dir_exit(&dev->refcnt_tracker);
 #ifdef CONFIG_PCPU_DEV_REFCNT
 	free_percpu(dev->pcpu_refcnt);
 	dev->pcpu_refcnt = NULL;
-- 
2.34.1.400.ga245620fadb-goog


  parent reply	other threads:[~2021-12-05  4:22 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-05  4:21 [PATCH v3 net-next 00/23] net: add preliminary netdev refcount tracking Eric Dumazet
2021-12-05  4:21 ` [PATCH v3 net-next 01/23] lib: add reference counting tracking infrastructure Eric Dumazet
2021-12-08 14:09   ` Andrzej Hajda
2021-12-08 14:27     ` Dmitry Vyukov
2021-12-08 15:04       ` Andrzej Hajda
2021-12-08 14:59     ` Jakub Kicinski
2021-12-08 15:11       ` Andrzej Hajda
2021-12-15 10:18   ` Jiri Slaby
2021-12-15 10:38     ` Eric Dumazet
2021-12-15 10:41       ` Eric Dumazet
2021-12-15 10:57         ` Vlastimil Babka
2021-12-15 11:08           ` Eric Dumazet
2021-12-15 11:09             ` Jiri Slaby
2021-12-15 11:25               ` Eric Dumazet
2021-12-05  4:21 ` [PATCH v3 net-next 02/23] lib: add tests for reference tracker Eric Dumazet
2021-12-05  4:21 ` Eric Dumazet [this message]
2021-12-05  4:21 ` [PATCH v3 net-next 04/23] net: add net device refcount tracker to struct netdev_rx_queue Eric Dumazet
2021-12-05  4:21 ` [PATCH v3 net-next 05/23] net: add net device refcount tracker to struct netdev_queue Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 06/23] net: add net device refcount tracker to ethtool_phys_id() Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 07/23] net: add net device refcount tracker to dev_ifsioc() Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 08/23] drop_monitor: add net device refcount tracker Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 09/23] net: dst: add net device refcount tracking to dst_entry Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 10/23] ipv6: add net device refcount tracker to rt6_probe_deferred() Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 11/23] sit: add net device refcount tracking to ip_tunnel Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 12/23] ipv6: add net device refcount tracker to struct ip6_tnl Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 13/23] net: add net device refcount tracker to struct neighbour Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 14/23] net: add net device refcount tracker to struct pneigh_entry Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 15/23] net: add net device refcount tracker to struct neigh_parms Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 16/23] net: add net device refcount tracker to struct netdev_adjacent Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 17/23] ipv6: add net device refcount tracker to struct inet6_dev Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 18/23] ipv4: add net device refcount tracker to struct in_device Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 19/23] net/sched: add net device refcount tracker to struct Qdisc Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 20/23] net: linkwatch: add net device refcount tracker Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 21/23] net: failover: " Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 22/23] ipmr, ip6mr: add net device refcount tracker to struct vif_device Eric Dumazet
2021-12-05  4:22 ` [PATCH v3 net-next 23/23] netpoll: add net device refcount tracker to struct netpoll Eric Dumazet
2021-12-06 23:23 ` [PATCH v3 net-next 00/23] net: add preliminary netdev refcount tracking Andrew Lunn
2021-12-06 23:44   ` Eric Dumazet
2021-12-06 23:48     ` Eric Dumazet
2021-12-07  0:00     ` Andrew Lunn
2021-12-07  0:04       ` Eric Dumazet
2021-12-07  0:12         ` Andrew Lunn
2021-12-07  0:17           ` Eric Dumazet
2021-12-07  0:21             ` Eric Dumazet
2021-12-07  0:27             ` Andrew Lunn
2021-12-07  0:53               ` Eric Dumazet
2021-12-07 19:52                 ` Andrew Lunn
2021-12-07 20:00                   ` Eric Dumazet
2021-12-08 17:29                     ` Andrew Lunn
2021-12-08 18:21                       ` Eric Dumazet
2021-12-08 18:53                         ` Eric Dumazet
2021-12-07  0:26 ` Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211205042217.982127-4-eric.dumazet@gmail.com \
    --to=eric.dumazet@gmail.com \
    --cc=davem@davemloft.net \
    --cc=dvyukov@google.com \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.