All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC -next 0/2] refcount: add tracking infrastructure
@ 2021-11-17 19:20 Eric Dumazet
  2021-11-17 19:20 ` [RFC -next 1/2] lib: add reference counting infrastructure Eric Dumazet
  2021-11-17 19:20 ` [RFC -next 2/2] net: add dev_hold_track() and dev_put_track() helpers Eric Dumazet
  0 siblings, 2 replies; 14+ messages in thread
From: Eric Dumazet @ 2021-11-17 19:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski; +Cc: netdev, Eric Dumazet, Eric Dumazet

From: Eric Dumazet <edumazet@google.com>

RFC only, I yet have to convert some dev_put()/dev_hold() to show
how this works.

I am posting this because Jakub is working on same issue.

Eric Dumazet (2):
  lib: add reference counting infrastructure
  net: add dev_hold_track() and dev_put_track() helpers

 include/linux/netdevice.h   |  23 +++++++
 include/linux/ref_tracker.h |  78 ++++++++++++++++++++++++
 lib/Kconfig                 |   4 ++
 lib/Makefile                |   2 +
 lib/ref_tracker.c           | 116 ++++++++++++++++++++++++++++++++++++
 net/Kconfig                 |   8 +++
 net/core/dev.c              |   3 +
 7 files changed, 234 insertions(+)
 create mode 100644 include/linux/ref_tracker.h
 create mode 100644 lib/ref_tracker.c

-- 
2.34.0.rc1.387.gb447b232ab-goog


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-17 19:20 [RFC -next 0/2] refcount: add tracking infrastructure Eric Dumazet
@ 2021-11-17 19:20 ` Eric Dumazet
  2021-11-17 20:03   ` Jakub Kicinski
  2021-11-30  9:39   ` Dmitry Vyukov
  2021-11-17 19:20 ` [RFC -next 2/2] net: add dev_hold_track() and dev_put_track() helpers Eric Dumazet
  1 sibling, 2 replies; 14+ messages in thread
From: Eric Dumazet @ 2021-11-17 19:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski; +Cc: netdev, Eric Dumazet, Eric Dumazet

From: Eric Dumazet <edumazet@google.com>

It can be hard to track where references are taken and released.

In networking, we have annoying issues at device dismantles,
and we had various proposals to ease root causing them.

This patch adds new infrastructure pairing refcount increases
and decreases. This will self document code, because programmer
will have to associate increments/decrements.

This is controled by CONFIG_REF_TRACKER which can be selected
by users of this feature.

This adds both cpu and memory costs, and thus should be reserved
for debug kernel builds, or be enabled on demand with a static key.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/ref_tracker.h |  78 ++++++++++++++++++++++++
 lib/Kconfig                 |   4 ++
 lib/Makefile                |   2 +
 lib/ref_tracker.c           | 116 ++++++++++++++++++++++++++++++++++++
 4 files changed, 200 insertions(+)
 create mode 100644 include/linux/ref_tracker.h
 create mode 100644 lib/ref_tracker.c

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
new file mode 100644
index 0000000000000000000000000000000000000000..1a2a3696682d40b38f9f1dd2b14663716e37d9d3
--- /dev/null
+++ b/include/linux/ref_tracker.h
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#ifndef _LINUX_REF_TRACKER_H
+#define _LINUX_REF_TRACKER_H
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/stackdepot.h>
+
+struct ref_tracker {
+#ifdef CONFIG_REF_TRACKER
+	struct list_head	head;   /* anchor into dir->list or dir->quarantine */
+	bool			dead;
+	depot_stack_handle_t	alloc_stack_handle;
+	depot_stack_handle_t	free_stack_handle;
+#endif
+};
+
+struct ref_tracker_dir {
+#ifdef CONFIG_REF_TRACKER
+	spinlock_t		lock;
+	unsigned int		quarantine_avail;
+	struct list_head	list; /* List of active trackers */
+	struct list_head	quarantine; /* List of dead trackers */
+#endif
+};
+
+#ifdef CONFIG_REF_TRACKER
+static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
+					unsigned int quarantine_count)
+{
+	INIT_LIST_HEAD(&dir->list);
+	INIT_LIST_HEAD(&dir->quarantine);
+	spin_lock_init(&dir->lock);
+	dir->quarantine_avail = quarantine_count;
+}
+
+void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
+
+void ref_tracker_dir_print(struct ref_tracker_dir *dir,
+			   unsigned int display_limit);
+
+int ref_tracker_alloc(struct ref_tracker_dir *dir,
+		      struct ref_tracker **trackerp, gfp_t gfp);
+
+int ref_tracker_free(struct ref_tracker_dir *dir,
+		     struct ref_tracker **trackerp);
+
+#else /* CONFIG_REF_TRACKER */
+
+static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
+					unsigned int quarantine_count)
+{
+}
+
+static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
+{
+}
+
+static inline void ref_tracker_dir_print(struct ref_tracker_dir *dir,
+					 unsigned int display_limit)
+{
+}
+
+static inline int ref_tracker_alloc(struct ref_tracker_dir *dir,
+				    struct ref_tracker **trackerp,
+				    gfp_t gfp)
+{
+	return 0;
+}
+
+static inline int ref_tracker_free(struct ref_tracker_dir *dir,
+				   struct ref_tracker **trackerp)
+{
+	return 0;
+}
+
+#endif
+
+#endif /* _LINUX_REF_TRACKER_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 5e7165e6a346c9bec878b78c8c8c3d175fc98dfd..d01be8e9593992a7d94a46bd1716460bc33c3ae1 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -680,6 +680,10 @@ config STACK_HASH_ORDER
 	 Select the hash size as a power of 2 for the stackdepot hash table.
 	 Choose a lower value to reduce the memory impact.
 
+config REF_TRACKER
+	bool
+	select STACKDEPOT
+
 config SBITMAP
 	bool
 
diff --git a/lib/Makefile b/lib/Makefile
index 364c23f1557816f73aebd8304c01224a4846ac6c..c1fd9243ddb9cc1ac5252d7eb8009f9290782c4a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -270,6 +270,8 @@ obj-$(CONFIG_STACKDEPOT) += stackdepot.o
 KASAN_SANITIZE_stackdepot.o := n
 KCOV_INSTRUMENT_stackdepot.o := n
 
+obj-$(CONFIG_REF_TRACKER) += ref_tracker.o
+
 libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
 	       fdt_empty_tree.o fdt_addresses.o
 $(foreach file, $(libfdt_files), \
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
new file mode 100644
index 0000000000000000000000000000000000000000..e907c58c31ed49719e31c6e46abd1715d9884924
--- /dev/null
+++ b/lib/ref_tracker.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/export.h>
+#include <linux/ref_tracker.h>
+#include <linux/slab.h>
+#include <linux/stacktrace.h>
+
+#define REF_TRACKER_STACK_ENTRIES 16
+
+void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
+{
+	struct ref_tracker *tracker, *n;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dir->lock, flags);
+	list_for_each_entry_safe(tracker, n, &dir->quarantine, head) {
+		list_del(&tracker->head);
+		kfree(tracker);
+		dir->quarantine_avail++;
+	}
+	list_for_each_entry_safe(tracker, n, &dir->list, head) {
+		pr_err("leaked reference.\n");
+		if (tracker->alloc_stack_handle)
+			stack_depot_print(tracker->alloc_stack_handle);
+		list_del(&tracker->head);
+		kfree(tracker);
+	}
+	spin_unlock_irqrestore(&dir->lock, flags);
+}
+EXPORT_SYMBOL(ref_tracker_dir_exit);
+
+void ref_tracker_dir_print(struct ref_tracker_dir *dir,
+			   unsigned int display_limit)
+{
+	struct ref_tracker *tracker;
+	unsigned long flags;
+	unsigned int i = 0;
+
+	spin_lock_irqsave(&dir->lock, flags);
+	list_for_each_entry(tracker, &dir->list, head) {
+		tracker->dead = true;
+		if (i < display_limit) {
+			pr_err("leaked reference.\n");
+			if (tracker->alloc_stack_handle)
+				stack_depot_print(tracker->alloc_stack_handle);
+		}
+		i++;
+	}
+	spin_unlock_irqrestore(&dir->lock, flags);
+}
+EXPORT_SYMBOL(ref_tracker_dir_print);
+
+int ref_tracker_alloc(struct ref_tracker_dir *dir,
+		      struct ref_tracker **trackerp,
+		      gfp_t gfp)
+{
+	unsigned long entries[REF_TRACKER_STACK_ENTRIES];
+	struct ref_tracker *tracker;
+	unsigned int nr_entries;
+	unsigned long flags;
+
+	*trackerp = tracker = kzalloc(sizeof(*tracker), gfp);
+	if (!tracker) {
+		pr_err_once("memory allocation failure, unreliable refcount tracker.\n");
+		return -ENOMEM;
+	}
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+	tracker->alloc_stack_handle = stack_depot_save(entries, nr_entries, gfp);
+
+	spin_lock_irqsave(&dir->lock, flags);
+	list_add(&tracker->head, &dir->list);
+	spin_unlock_irqrestore(&dir->lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ref_tracker_alloc);
+
+int ref_tracker_free(struct ref_tracker_dir *dir,
+		     struct ref_tracker **trackerp)
+{
+	unsigned long entries[REF_TRACKER_STACK_ENTRIES];
+	struct ref_tracker *tracker = *trackerp;
+	unsigned int nr_entries;
+	unsigned long flags;
+
+	if (!tracker)
+		return -EEXIST;
+	spin_lock_irqsave(&dir->lock, flags);
+	if (tracker->dead) {
+		pr_err("reference already released.\n");
+		if (tracker->alloc_stack_handle) {
+			pr_err("allocated in:\n");
+			stack_depot_print(tracker->alloc_stack_handle);
+		}
+		if (tracker->free_stack_handle) {
+			pr_err("freed in:\n");
+			stack_depot_print(tracker->free_stack_handle);
+		}
+		spin_unlock_irqrestore(&dir->lock, flags);
+		return -EINVAL;
+	}
+	tracker->dead = true;
+
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+	tracker->free_stack_handle = stack_depot_save(entries, nr_entries, GFP_ATOMIC);
+
+	list_move_tail(&tracker->head, &dir->quarantine);
+	if (!dir->quarantine_avail) {
+		tracker = list_first_entry(&dir->quarantine, struct ref_tracker, head);
+		list_del(&tracker->head);
+		kfree(tracker);
+	} else {
+		dir->quarantine_avail--;
+	}
+	spin_unlock_irqrestore(&dir->lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ref_tracker_free);
-- 
2.34.0.rc1.387.gb447b232ab-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [RFC -next 2/2] net: add dev_hold_track() and dev_put_track() helpers
  2021-11-17 19:20 [RFC -next 0/2] refcount: add tracking infrastructure Eric Dumazet
  2021-11-17 19:20 ` [RFC -next 1/2] lib: add reference counting infrastructure Eric Dumazet
@ 2021-11-17 19:20 ` Eric Dumazet
  2021-11-25 14:24   ` kernel test robot
  1 sibling, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2021-11-17 19:20 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski; +Cc: netdev, Eric Dumazet, Eric Dumazet

From: Eric Dumazet <edumazet@google.com>

They should replace dev_hold() and dev_put().

To use these helpers, each data structure owning a refcount
should also use a "struct ref_tracker" to pair the hold and put.

Whenever a leak happens, we will get precise stack traces
of the point dev_hold_track() happened, at device dismantle phase.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/netdevice.h | 23 +++++++++++++++++++++++
 net/Kconfig               |  8 ++++++++
 net/core/dev.c            |  3 +++
 3 files changed, 34 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4f4a299e92de7ba9f61507ad4df7e334775c07a6..91957aa0779195a962ec95f491d826bdd536808e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -48,6 +48,7 @@
 #include <uapi/linux/pkt_cls.h>
 #include <linux/hashtable.h>
 #include <linux/rbtree.h>
+#include <linux/ref_tracker.h>
 
 struct netpoll_info;
 struct device;
@@ -2181,6 +2182,7 @@ struct net_device {
 #else
 	refcount_t		dev_refcnt;
 #endif
+	struct ref_tracker_dir	refcnt_tracker;
 
 	struct list_head	link_watch_list;
 
@@ -3807,6 +3809,7 @@ void netdev_run_todo(void);
  *	@dev: network device
  *
  * Release reference to device to allow it to be freed.
+ * Try using dev_put_track() instead.
  */
 static inline void dev_put(struct net_device *dev)
 {
@@ -3824,6 +3827,7 @@ static inline void dev_put(struct net_device *dev)
  *	@dev: network device
  *
  * Hold reference to device to keep it from being freed.
+ * Try using dev_hold_track() instead.
  */
 static inline void dev_hold(struct net_device *dev)
 {
@@ -3836,6 +3840,25 @@ static inline void dev_hold(struct net_device *dev)
 	}
 }
 
+static inline void dev_hold_track(struct net_device *dev,
+				  struct ref_tracker **tracker,
+				  gfp_t gfp)
+{
+	if (dev) {
+		dev_hold(dev);
+		ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp);
+	}
+}
+
+static inline void dev_put_track(struct net_device *dev,
+				 struct ref_tracker **tracker)
+{
+	if (dev) {
+		ref_tracker_free(&dev->refcnt_tracker, tracker);
+		dev_put(dev);
+	}
+}
+
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
  * and _off may be called from IRQ context, but it is caller
  * who is responsible for serialization of these calls.
diff --git a/net/Kconfig b/net/Kconfig
index 074472dfa94ae78081b7391b8ca4a73b9d0be7b7..a4743e59a35c2978ecc6d704b388ca07efe3e95c 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -253,6 +253,14 @@ config PCPU_DEV_REFCNT
 	  network device refcount are using per cpu variables if this option is set.
 	  This can be forced to N to detect underflows (with a performance drop).
 
+config NET_DEV_REFCNT_TRACKER
+	bool "Enable tracking in dev_put_track() and dev_hold_track()"
+	select REF_TRACKER
+	default n
+	help
+	  Enable debugging feature to track leaked device references.
+	  This adds memory and cpu costs.
+
 config RPS
 	bool
 	depends on SMP && SYSFS
diff --git a/net/core/dev.c b/net/core/dev.c
index 92c9258cbf28556e68f9112343f5ebc98b2c163b..90ee2ba8717bf74bd3a1f72f7034744773ef69c4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9878,6 +9878,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
 			       netdev_unregister_timeout_secs * HZ)) {
 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
 				 dev->name, refcnt);
+			ref_tracker_dir_print(&dev->refcnt_tracker, 10);
 			warning_time = jiffies;
 		}
 	}
@@ -10168,6 +10169,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
+	ref_tracker_dir_init(&dev->refcnt_tracker, 128);
 #ifdef CONFIG_PCPU_DEV_REFCNT
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
@@ -10284,6 +10286,7 @@ void free_netdev(struct net_device *dev)
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
+	ref_tracker_dir_exit(&dev->refcnt_tracker);
 #ifdef CONFIG_PCPU_DEV_REFCNT
 	free_percpu(dev->pcpu_refcnt);
 	dev->pcpu_refcnt = NULL;
-- 
2.34.0.rc1.387.gb447b232ab-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-17 19:20 ` [RFC -next 1/2] lib: add reference counting infrastructure Eric Dumazet
@ 2021-11-17 20:03   ` Jakub Kicinski
  2021-11-17 20:16     ` Eric Dumazet
  2021-11-30  9:39   ` Dmitry Vyukov
  1 sibling, 1 reply; 14+ messages in thread
From: Jakub Kicinski @ 2021-11-17 20:03 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David S . Miller, netdev, Eric Dumazet

On Wed, 17 Nov 2021 11:20:30 -0800 Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> It can be hard to track where references are taken and released.
> 
> In networking, we have annoying issues at device dismantles,
> and we had various proposals to ease root causing them.
> 
> This patch adds new infrastructure pairing refcount increases
> and decreases. This will self document code, because programmer
> will have to associate increments/decrements.
> 
> This is controled by CONFIG_REF_TRACKER which can be selected
> by users of this feature.
> 
> This adds both cpu and memory costs, and thus should be reserved
> for debug kernel builds, or be enabled on demand with a static key.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Looks great, this is what I had in mind when I said:

| In the future we can extend this structure to also catch those
| who fail to release the ref on unregistering notification.

I realized today we can get quite a lot of coverage by just plugging 
in object debug infra.

The main differences I see:
 - do we ever want to use this in prod? - if not why allocate the
   tracker itself dynamically? The double pointer interface seems
   harder to compile out completely
 - whether one stored netdev ptr can hold multiple refs
 - do we want to wrap the pointer itself or have the "tracker" object
   be a separate entity
 - do we want to catch "use after free" when ref is accessed after
   it was already released

No strong preference either way.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-17 20:03   ` Jakub Kicinski
@ 2021-11-17 20:16     ` Eric Dumazet
  2021-11-17 20:47       ` Jakub Kicinski
  0 siblings, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2021-11-17 20:16 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Eric Dumazet, David S . Miller, netdev

On Wed, Nov 17, 2021 at 12:03 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Wed, 17 Nov 2021 11:20:30 -0800 Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > It can be hard to track where references are taken and released.
> >
> > In networking, we have annoying issues at device dismantles,
> > and we had various proposals to ease root causing them.
> >
> > This patch adds new infrastructure pairing refcount increases
> > and decreases. This will self document code, because programmer
> > will have to associate increments/decrements.
> >
> > This is controled by CONFIG_REF_TRACKER which can be selected
> > by users of this feature.
> >
> > This adds both cpu and memory costs, and thus should be reserved
> > for debug kernel builds, or be enabled on demand with a static key.
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
>
> Looks great, this is what I had in mind when I said:
>
> | In the future we can extend this structure to also catch those
> | who fail to release the ref on unregistering notification.
>
> I realized today we can get quite a lot of coverage by just plugging
> in object debug infra.
>
> The main differences I see:
>  - do we ever want to use this in prod? - if not why allocate the
>    tracker itself dynamically? The double pointer interface seems
>    harder to compile out completely

I think that maintaining the tracking state in separate storage would
detect cases
where the object has been freed, without the help of KASAN.

>  - whether one stored netdev ptr can hold multiple refs

For a same stack depot then ?

Problem is that at the time of dev_hold(), we do not know if
there is one associated dev_put() or multiple ones (different stack depot)


>  - do we want to wrap the pointer itself or have the "tracker" object
>    be a separate entity
>  - do we want to catch "use after free" when ref is accessed after
>    it was already released
>
> No strong preference either way.

BTW my current suspicion about reported leaks is in
rt6_uncached_list_flush_dev()

I was considering something like

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5e8f2f15607db7e6589b8bdb984e62512ad30589..233931b7c547d852ed3adeaa15f0a48f437b6596
100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -163,9 +163,6 @@ static void rt6_uncached_list_flush_dev(struct net
*net, struct net_device *dev)
        struct net_device *loopback_dev = net->loopback_dev;
        int cpu;

-       if (dev == loopback_dev)
-               return;
-
        for_each_possible_cpu(cpu) {
                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
                struct rt6_info *rt;
@@ -175,7 +172,7 @@ static void rt6_uncached_list_flush_dev(struct net
*net, struct net_device *dev)
                        struct inet6_dev *rt_idev = rt->rt6i_idev;
                        struct net_device *rt_dev = rt->dst.dev;

-                       if (rt_idev->dev == dev) {
+                       if (rt_idev->dev == dev && dev != loopback_dev) {
                                rt->rt6i_idev = in6_dev_get(loopback_dev);
                                in6_dev_put(rt_idev);
                        }

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-17 20:16     ` Eric Dumazet
@ 2021-11-17 20:47       ` Jakub Kicinski
  2021-11-17 22:43         ` Eric Dumazet
  0 siblings, 1 reply; 14+ messages in thread
From: Jakub Kicinski @ 2021-11-17 20:47 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Eric Dumazet, David S . Miller, netdev

On Wed, 17 Nov 2021 12:16:15 -0800 Eric Dumazet wrote:
> On Wed, Nov 17, 2021 at 12:03 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > Looks great, this is what I had in mind when I said:
> >
> > | In the future we can extend this structure to also catch those
> > | who fail to release the ref on unregistering notification.
> >
> > I realized today we can get quite a lot of coverage by just plugging
> > in object debug infra.
> >
> > The main differences I see:
> >  - do we ever want to use this in prod? - if not why allocate the
> >    tracker itself dynamically? The double pointer interface seems
> >    harder to compile out completely  
> 
> I think that maintaining the tracking state in separate storage would
> detect cases where the object has been freed, without the help of KASAN.

Makes sense, I guess we can hang more of the information of a secondary
object?

Maybe I'm missing a trick on how to make the feature consume no space
when disabled via Kconfig.

> >  - whether one stored netdev ptr can hold multiple refs  
> 
> For a same stack depot then ?

Not necessarily.

> Problem is that at the time of dev_hold(), we do not know if
> there is one associated dev_put() or multiple ones (different stack depot)

Ack. My thinking was hold all stacks until tracker is completely
drained of refs. We'd have to collect both hold and put stacks in
that case and if ref leak happens try to match them up manually 
later (manually == human).

But if we can get away without allowing multiple refs with one tracker
that makes life easier, and is probably a cleaner API, anyway.

> >  - do we want to wrap the pointer itself or have the "tracker" object
> >    be a separate entity
> >  - do we want to catch "use after free" when ref is accessed after
> >    it was already released
> >
> > No strong preference either way.  
> 
> BTW my current suspicion about reported leaks is in
> rt6_uncached_list_flush_dev()
> 
> I was considering something like
> 
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index 5e8f2f15607db7e6589b8bdb984e62512ad30589..233931b7c547d852ed3adeaa15f0a48f437b6596
> 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -163,9 +163,6 @@ static void rt6_uncached_list_flush_dev(struct net
> *net, struct net_device *dev)
>         struct net_device *loopback_dev = net->loopback_dev;
>         int cpu;
> 
> -       if (dev == loopback_dev)
> -               return;
> -
>         for_each_possible_cpu(cpu) {
>                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
>                 struct rt6_info *rt;
> @@ -175,7 +172,7 @@ static void rt6_uncached_list_flush_dev(struct net
> *net, struct net_device *dev)
>                         struct inet6_dev *rt_idev = rt->rt6i_idev;
>                         struct net_device *rt_dev = rt->dst.dev;
> 
> -                       if (rt_idev->dev == dev) {
> +                       if (rt_idev->dev == dev && dev != loopback_dev) {
>                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
>                                 in6_dev_put(rt_idev);
>                         }

Interesting.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-17 20:47       ` Jakub Kicinski
@ 2021-11-17 22:43         ` Eric Dumazet
  2021-11-18  0:26           ` Jakub Kicinski
  2021-11-30  9:09           ` Dmitry Vyukov
  0 siblings, 2 replies; 14+ messages in thread
From: Eric Dumazet @ 2021-11-17 22:43 UTC (permalink / raw)
  To: Jakub Kicinski, Eric Dumazet; +Cc: David S . Miller, netdev



On 11/17/21 12:47 PM, Jakub Kicinski wrote:
> On Wed, 17 Nov 2021 12:16:15 -0800 Eric Dumazet wrote:
>> On Wed, Nov 17, 2021 at 12:03 PM Jakub Kicinski <kuba@kernel.org> wrote:
>>> Looks great, this is what I had in mind when I said:
>>>
>>> | In the future we can extend this structure to also catch those
>>> | who fail to release the ref on unregistering notification.
>>>
>>> I realized today we can get quite a lot of coverage by just plugging
>>> in object debug infra.
>>>
>>> The main differences I see:
>>>  - do we ever want to use this in prod? - if not why allocate the
>>>    tracker itself dynamically? The double pointer interface seems
>>>    harder to compile out completely  
>>
>> I think that maintaining the tracking state in separate storage would
>> detect cases where the object has been freed, without the help of KASAN.
> 
> Makes sense, I guess we can hang more of the information of a secondary
> object?
> 
> Maybe I'm missing a trick on how to make the feature consume no space
> when disabled via Kconfig.

If not enabled in Kconfig, the structures are empty, so consume no space.

Basically this should a nop.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-17 22:43         ` Eric Dumazet
@ 2021-11-18  0:26           ` Jakub Kicinski
  2021-11-30  9:09           ` Dmitry Vyukov
  1 sibling, 0 replies; 14+ messages in thread
From: Jakub Kicinski @ 2021-11-18  0:26 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Eric Dumazet, David S . Miller, netdev

On Wed, 17 Nov 2021 14:43:24 -0800 Eric Dumazet wrote:
> On 11/17/21 12:47 PM, Jakub Kicinski wrote:
> > On Wed, 17 Nov 2021 12:16:15 -0800 Eric Dumazet wrote:  
> >> I think that maintaining the tracking state in separate storage would
> >> detect cases where the object has been freed, without the help of KASAN.  
> > 
> > Makes sense, I guess we can hang more of the information of a secondary
> > object?
> > 
> > Maybe I'm missing a trick on how to make the feature consume no space
> > when disabled via Kconfig.  
> 
> If not enabled in Kconfig, the structures are empty, so consume no space.
> 
> Basically this should a nop.

Right, probably not worth going back and forth, example use will clarify
this.

I feel like the two approaches are somewhat complementary, object debug
can help us pin point where ref got freed / lost. Could be useful if
there are many release paths for the same struct.

How do you feel about the struct netdev_ref wrapper I made?  Do you
prefer to keep the tracking independent or can we provide the sort of
API I had in mind as well as yours:

void netdev_hold(struct netdev_ref *ref, struct net_device *dev)
void netdev_put(struct netdev_ref *ref)

struct net_device *netdev_ref_ptr(const struct netdev_ref *ref)

(doing both your tracking and object debug behind the scenes)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC -next 2/2] net: add dev_hold_track() and dev_put_track() helpers
  2021-11-17 19:20 ` [RFC -next 2/2] net: add dev_hold_track() and dev_put_track() helpers Eric Dumazet
@ 2021-11-25 14:24   ` kernel test robot
  0 siblings, 0 replies; 14+ messages in thread
From: kernel test robot @ 2021-11-25 14:24 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1716 bytes --]

Hi Eric,

[FYI, it's a private test report for your RFC patch.]
[auto build test ERROR on next-20211117]

url:    https://github.com/0day-ci/linux/commits/Eric-Dumazet/refcount-add-tracking-infrastructure/20211118-032232
base:    fd96a4057bd015d194a4b87e7c149fc2fef3c166
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20211125/202111252211.poJt8F5J-lkp(a)intel.com/config)
compiler: m68k-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/6fbd7498f70d011c3a0fb8fc18f4868a3c4bef40
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Eric-Dumazet/refcount-add-tracking-infrastructure/20211118-032232
        git checkout 6fbd7498f70d011c3a0fb8fc18f4868a3c4bef40
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=m68k SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   m68k-linux-ld: kernel/stacktrace.o: in function `stack_trace_save':
>> (.text+0x1e4): undefined reference to `save_stack_trace'

Kconfig warnings: (for reference only)
   WARNING: unmet direct dependencies detected for STACKTRACE
   Depends on STACKTRACE_SUPPORT
   Selected by
   - STACKDEPOT

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-17 22:43         ` Eric Dumazet
  2021-11-18  0:26           ` Jakub Kicinski
@ 2021-11-30  9:09           ` Dmitry Vyukov
  2021-11-30 15:02             ` Jakub Kicinski
  2021-11-30 15:07             ` Eric Dumazet
  1 sibling, 2 replies; 14+ messages in thread
From: Dmitry Vyukov @ 2021-11-30  9:09 UTC (permalink / raw)
  To: eric.dumazet; +Cc: davem, edumazet, kuba, netdev

Hi Eric, Jakub,

How strongly do you want to make this work w/o KASAN?
I am asking because KASAN will already memorize alloc/free stacks for every
heap object (+ pids + 2 aux stacks with kasan_record_aux_stack()).
So basically we just need to alloc struct list_head and won't need
quarantine/quarantine_avail in ref_tracker_dir.
If there are some refcount bugs, it may be due to a previous use-after-free,
so debugging a refcount bug w/o KASAN may be waste of time.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-17 19:20 ` [RFC -next 1/2] lib: add reference counting infrastructure Eric Dumazet
  2021-11-17 20:03   ` Jakub Kicinski
@ 2021-11-30  9:39   ` Dmitry Vyukov
  1 sibling, 0 replies; 14+ messages in thread
From: Dmitry Vyukov @ 2021-11-30  9:39 UTC (permalink / raw)
  To: eric.dumazet; +Cc: davem, edumazet, kuba, netdev

Hi Eric,

Nice! Especially ref_tracker_dir_print() in netdev_wait_allrefs().

> +	*trackerp = tracker = kzalloc(sizeof(*tracker), gfp);

This may benefit from __GFP_NOFAIL. syzkaller will use fault injection to fail
this. And I think it will do more bad than good.

We could also note this condition in dir, along the lines of:

	if (!tracker) {
		dir->failed = true;

To print on any errors and to check in ref_tracker_free():

int ref_tracker_free(struct ref_tracker_dir *dir,
		     struct ref_tracker **trackerp)
{
...
	if (!tracker) {
		WARN_ON(!dir->failed);
		return -EEXIST;
	}

This would be a bug, right?
Or:

	*trackerp = tracker = kzalloc(sizeof(*tracker), gfp);
	if (!tracker) {
		*tracker = TRACKERP_ALLOC_FAILED;
		 return -ENOMEM;
	}

and then check TRACKERP_ALLOC_FAILED in ref_tracker_free().
dev_hold_track() ignores the return value, so it would be useful to note
this condition.

> +	if (tracker->dead) {
> +		pr_err("reference already released.\n");

This and other custom prints won't be detected as bugs by syzkaller and other
testing systems, they detect the standard BUG/WARNING. Please use these.

ref_tracker_free() uses unnecesary long critical sections. I understand this
is debugging code, but frequently debugging code is so pessimistic that nobody
use it. If we enable this on syzbot, it will also slowdown all fuzzing.
I think with just a small code shuffling critical sections can be
significantly reduced:

	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
	tracker->free_stack_handle = stack_depot_save(entries, nr_entries, GFP_ATOMIC);

	spin_lock_irqsave(&dir->lock, flags);
	if (tracker->dead)
		...
	tracker->dead = true;

	list_move_tail(&tracker->head, &dir->quarantine);
	if (!dir->quarantine_avail) {
		tracker = list_first_entry(&dir->quarantine, struct ref_tracker, head);
		list_del(&tracker->head);
	} else {
		dir->quarantine_avail--;
		tracker = NULL;
	}
	spin_unlock_irqrestore(&dir->lock, flags);

	kfree(tracker);

> +#define REF_TRACKER_STACK_ENTRIES 16
> +	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
> +	tracker->alloc_stack_handle = stack_depot_save(entries, nr_entries, gfp);

The saved stacks can be longer because they are de-duped. But stacks insered
into stack_depot need to be trimmed with filter_irq_stacks(). It seems that
almost all current users got it wrong. We are considering moving
filter_irq_stacks() into stack_depot_save(), but it's not done yet.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-30  9:09           ` Dmitry Vyukov
@ 2021-11-30 15:02             ` Jakub Kicinski
  2021-11-30 15:07             ` Eric Dumazet
  1 sibling, 0 replies; 14+ messages in thread
From: Jakub Kicinski @ 2021-11-30 15:02 UTC (permalink / raw)
  To: Dmitry Vyukov; +Cc: eric.dumazet, davem, edumazet, netdev

On Tue, 30 Nov 2021 10:09:52 +0100 Dmitry Vyukov wrote:
> Hi Eric, Jakub,
> 
> How strongly do you want to make this work w/o KASAN?
> I am asking because KASAN will already memorize alloc/free stacks for every
> heap object (+ pids + 2 aux stacks with kasan_record_aux_stack()).
> So basically we just need to alloc struct list_head and won't need
> quarantine/quarantine_avail in ref_tracker_dir.
> If there are some refcount bugs, it may be due to a previous use-after-free,
> so debugging a refcount bug w/o KASAN may be waste of time.

I don't mind, I was primarily targeting syzbot instances which will
have KASAN enabled AFAIU.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-30  9:09           ` Dmitry Vyukov
  2021-11-30 15:02             ` Jakub Kicinski
@ 2021-11-30 15:07             ` Eric Dumazet
  2021-12-01  7:23               ` Dmitry Vyukov
  1 sibling, 1 reply; 14+ messages in thread
From: Eric Dumazet @ 2021-11-30 15:07 UTC (permalink / raw)
  To: Dmitry Vyukov; +Cc: eric.dumazet, davem, kuba, netdev

On Tue, Nov 30, 2021 at 1:09 AM Dmitry Vyukov <dvyukov@google.com> wrote:
>
> Hi Eric, Jakub,
>
> How strongly do you want to make this work w/o KASAN?
> I am asking because KASAN will already memorize alloc/free stacks for every
> heap object (+ pids + 2 aux stacks with kasan_record_aux_stack()).
> So basically we just need to alloc struct list_head and won't need
> quarantine/quarantine_avail in ref_tracker_dir.
> If there are some refcount bugs, it may be due to a previous use-after-free,
> so debugging a refcount bug w/o KASAN may be waste of time.
>

No strong opinion, we could have the quarantine stuff enabled only if
KASAN is not compiled in.
I was trying to make something that could be used even in a production
environment, for seldom modified refcounts.
As this tracking is optional, we do not have to use it in very small
sections of code, where the inc/dec are happening in obviously correct
and not long living pairs.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC -next 1/2] lib: add reference counting infrastructure
  2021-11-30 15:07             ` Eric Dumazet
@ 2021-12-01  7:23               ` Dmitry Vyukov
  0 siblings, 0 replies; 14+ messages in thread
From: Dmitry Vyukov @ 2021-12-01  7:23 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: eric.dumazet, davem, kuba, netdev

On Tue, 30 Nov 2021 at 16:08, Eric Dumazet <edumazet@google.com> wrote:
> > Hi Eric, Jakub,
> >
> > How strongly do you want to make this work w/o KASAN?
> > I am asking because KASAN will already memorize alloc/free stacks for every
> > heap object (+ pids + 2 aux stacks with kasan_record_aux_stack()).
> > So basically we just need to alloc struct list_head and won't need
> > quarantine/quarantine_avail in ref_tracker_dir.
> > If there are some refcount bugs, it may be due to a previous use-after-free,
> > so debugging a refcount bug w/o KASAN may be waste of time.
> >
>
> No strong opinion, we could have the quarantine stuff enabled only if
> KASAN is not compiled in.
> I was trying to make something that could be used even in a production
> environment, for seldom modified refcounts.
> As this tracking is optional, we do not have to use it in very small
> sections of code, where the inc/dec are happening in obviously correct
> and not long living pairs.

If it won't be used on very frequent paths, then it probably does not
matter much for syzbot as well. And additional ifdefs are not worth
it. Then try to go with your current version.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-12-01  7:23 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-17 19:20 [RFC -next 0/2] refcount: add tracking infrastructure Eric Dumazet
2021-11-17 19:20 ` [RFC -next 1/2] lib: add reference counting infrastructure Eric Dumazet
2021-11-17 20:03   ` Jakub Kicinski
2021-11-17 20:16     ` Eric Dumazet
2021-11-17 20:47       ` Jakub Kicinski
2021-11-17 22:43         ` Eric Dumazet
2021-11-18  0:26           ` Jakub Kicinski
2021-11-30  9:09           ` Dmitry Vyukov
2021-11-30 15:02             ` Jakub Kicinski
2021-11-30 15:07             ` Eric Dumazet
2021-12-01  7:23               ` Dmitry Vyukov
2021-11-30  9:39   ` Dmitry Vyukov
2021-11-17 19:20 ` [RFC -next 2/2] net: add dev_hold_track() and dev_put_track() helpers Eric Dumazet
2021-11-25 14:24   ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.