linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7
@ 2006-06-11 11:18 Catalin Marinas
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 1/9] Base support for kmemleak Catalin Marinas
                   ` (8 more replies)
  0 siblings, 9 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-11 11:18 UTC (permalink / raw)
  To: linux-kernel

This is a new version (0.7) of the kernel memory leak detector. See
the Documentation/kmemleak.txt file for a more detailed
description. The patches are downloadable from (the bundled patch or
the series):

http://homepage.ntlworld.com/cmarinas/kmemleak/patch-2.6.17-rc6-kmemleak-0.7.bz2
http://homepage.ntlworld.com/cmarinas/kmemleak/patches-kmemleak-0.7.tar.bz2

What's new in this version:

- different configuration options added for tweaking kmemleak
- reduced the length of time running with interrupts disabled (the
  interrupts are now only disabled for individual memory block
  scanning rather than for the whole memory)
- fixed bug in the memleak_seq_* functions (pointer structure accessed
  after freeing)
- more false positives ignored

To do:

- more testing
- more investigation into the task stacks scanning
- NUMA support
- (support for ioremap tracking)

Many thanks to Michal Piotrowski for stress-testing kmemleak and
reporting various issues.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [PATCH 2.6.17-rc6 1/9] Base support for kmemleak
  2006-06-11 11:18 [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7 Catalin Marinas
@ 2006-06-11 11:21 ` Catalin Marinas
  2006-06-13 11:14   ` Pekka Enberg
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 2/9] Some documentation " Catalin Marinas
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 52+ messages in thread
From: Catalin Marinas @ 2006-06-11 11:21 UTC (permalink / raw)
  To: linux-kernel

From: Catalin Marinas <catalin.marinas@arm.com>

This patch adds the base support for the kernel memory leak detector. It
traces the memory allocation/freeing in a way similar to the Boehm's
conservative garbage collector, the difference being that the orphan
pointers are not freed but only shown in /proc/memleak. Enabling this
feature would introduce an overhead to memory allocations.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---

 include/linux/kernel.h  |    7 
 include/linux/memleak.h |   83 +++
 init/main.c             |    3 
 lib/Kconfig.debug       |   65 +++
 mm/Makefile             |    2 
 mm/memleak.c            | 1166 +++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 1323 insertions(+), 3 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f4fc576..9155457 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -13,6 +13,7 @@ #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/bitops.h>
+#include <linux/memleak.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -284,9 +285,13 @@ #define max_t(type,x,y) \
  * @member:	the name of the member within the struct.
  *
  */
-#define container_of(ptr, type, member) ({			\
+#define __container_of(ptr, type, member) ({			\
         const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
         (type *)( (char *)__mptr - offsetof(type,member) );})
+#define container_of(ptr, type, member) ({			\
+	DECLARE_MEMLEAK_OFFSET(container_of, type, member);	\
+	__container_of(ptr, type, member);			\
+})
 
 /*
  * Check at compile time that something is of a particular type.
diff --git a/include/linux/memleak.h b/include/linux/memleak.h
new file mode 100644
index 0000000..afd0637
--- /dev/null
+++ b/include/linux/memleak.h
@@ -0,0 +1,83 @@
+/*
+ * include/linux/memleak.h
+ *
+ * Copyright (C) 2006 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __MEMLEAK_H
+#define __MEMLEAK_H
+
+#include <linux/stddef.h>
+
+#ifdef CONFIG_DEBUG_MEMLEAK
+
+struct memleak_offset {
+	unsigned long offset;
+	unsigned long size;
+	unsigned long member_size;
+};
+
+/* if offsetof(type, member) is not a constant known at compile time,
+ * just use 0 instead since we cannot add it to the
+ * .init.memleak_offsets section
+ */
+#define memleak_offsetof(type, member)				\
+	(__builtin_constant_p(offsetof(type, member)) ?		\
+	 offsetof(type, member) : 0)
+
+#define DECLARE_MEMLEAK_OFFSET(name, type, member)		\
+	static const struct memleak_offset			\
+	__attribute__ ((__section__ (".init.memleak_offsets")))	\
+	__attribute_used__ __memleak_offset__##name = {		\
+		memleak_offsetof(type, member),			\
+		sizeof(type),					\
+		sizeof(((type *)0)->member)			\
+	}
+
+extern void memleak_init(void);
+extern void memleak_alloc(const void *ptr, size_t size, int ref_count);
+extern void memleak_free(const void *ptr);
+extern void memleak_padding(const void *ptr, unsigned long offset, size_t size);
+extern void memleak_not_leak(const void *ptr);
+extern void memleak_ignore(const void *ptr);
+extern void memleak_scan_area(const void *ptr, unsigned long offset, size_t length);
+extern void memleak_insert_aliases(struct memleak_offset *ml_off_start,
+				   struct memleak_offset *ml_off_end);
+
+#define memleak_erase(ptr)	do { (ptr) = NULL; } while (0)
+#define memleak_container(type, member)	{			\
+	DECLARE_MEMLEAK_OFFSET(container_of, type, member);	\
+}
+
+#else
+
+#define DECLARE_MEMLEAK_OFFSET(name, type, member)
+
+#define memleak_init()
+#define memleak_alloc(ptr, size, ref_count)
+#define memleak_free(ptr)
+#define memleak_padding(ptr, offset, size)
+#define memleak_not_leak(ptr)
+#define memleak_ignore(ptr)
+#define memleak_scan_area(ptr, offset, length)
+#define memleak_insert_aliases(ml_off_start, ml_off_end)
+#define memleak_erase(ptr)
+#define memleak_container(type, member)
+
+#endif	/* CONFIG_DEBUG_MEMLEAK */
+
+#endif	/* __MEMLEAK_H */
diff --git a/init/main.c b/init/main.c
index f715b9b..986487c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -513,6 +513,8 @@ #endif
 	cpuset_init_early();
 	mem_init();
 	kmem_cache_init();
+	radix_tree_init();
+	memleak_init();
 	setup_per_cpu_pageset();
 	numa_policy_init();
 	if (late_time_init)
@@ -533,7 +535,6 @@ #endif
 	key_init();
 	security_init();
 	vfs_caches_init(num_physpages);
-	radix_tree_init();
 	signals_init();
 	/* rootfs populating might need page-writeback */
 	page_writeback_init();
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ccb0c1f..8bfc665 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -89,6 +89,71 @@ config DEBUG_SLAB_LEAK
 	bool "Memory leak debugging"
 	depends on DEBUG_SLAB
 
+menuconfig DEBUG_MEMLEAK
+	bool "Kernel memory leak detector"
+	default n
+	depends on EXPERIMENTAL && DEBUG_SLAB
+	depends on !NUMA
+	select DEBUG_FS
+	help
+	  Say Y here if you want to enable the memory leak
+	  detector. The memory allocation/freeing is traced in a way
+	  similar to the Boehm's conservative garbage collector, the
+	  difference being that the orphan pointers are not freed but
+	  only shown in /sys/kernel/debug/memleak. Enabling this
+	  feature will introduce an overhead to memory allocations.
+
+	  In order to access the memleak file, debugfs needs to be
+	  mounted (usually at /sys/kernel/debug).
+
+config DEBUG_MEMLEAK_TRACE_LENGTH
+	int "KMemLeak stack trace length"
+	default 4
+	depends on DEBUG_MEMLEAK && FRAME_POINTER
+	help
+	  This option sets the length of the stack trace for the
+	  allocated pointers tracked by kmemleak.
+
+config DEBUG_MEMLEAK_PREINIT_POINTERS
+	int "KMemLeak pre-init actions buffer size"
+	default 512
+	depends on DEBUG_MEMLEAK
+	help
+	  This is the buffer for storing the memory allocation/freeing
+	  calls before kmemleak is fully initialized. Each element in
+	  the buffer takes 20 bytes on a 32 bit architecture. This
+	  buffer will be freed once the system initialization is
+	  completed.
+
+config DEBUG_MEMLEAK_SECONDARY_ALIASES
+	bool "Create secondary level pointer aliases"
+	default y
+	depends on DEBUG_MEMLEAK
+	help
+	  This option creates aliases for container_of(container_of(member))
+	  access to pointers. Disabling this option reduces the chances of
+	  false negatives but it can slightly increase the number of false
+	  positives.
+
+config DEBUG_MEMLEAK_TASK_STACKS
+	bool "Scan task kernel stacks"
+	default n
+	depends on DEBUG_MEMLEAK
+	help
+	  This option enables the scanning of the task kernel
+	  stacks. Note that this option can introduce a lot of false
+	  negatives because of the randomness of stacks content.
+
+config DEBUG_MEMLEAK_ORPHAN_FREEING
+	bool "Notify when freeing orphan pointers"
+	default n
+	depends on DEBUG_MEMLEAK
+	help
+	  This option enables the notification when pointers
+	  considered leaks are freed. The stack dump and the pointer
+	  information displayed allow an easier identification of
+	  false positives.
+
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
 	depends on DEBUG_KERNEL && PREEMPT
diff --git a/mm/Makefile b/mm/Makefile
index 0b8f73f..d487d96 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -23,4 +23,4 @@ obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-
+obj-$(CONFIG_DEBUG_MEMLEAK) += memleak.o
diff --git a/mm/memleak.c b/mm/memleak.c
new file mode 100644
index 0000000..086124f
--- /dev/null
+++ b/mm/memleak.c
@@ -0,0 +1,1166 @@
+/*
+ * mm/memleak.c
+ *
+ * Copyright (C) 2006 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* #define DEBUG */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/gfp.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <linux/mman.h>
+#include <linux/nodemask.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include <linux/mutex.h>
+#include <linux/cpumask.h>
+
+#include <asm/bitops.h>
+#include <asm/sections.h>
+#include <asm/percpu.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/atomic.h>
+
+#include <linux/memleak.h>
+
+#ifdef CONFIG_FRAME_POINTER
+#define MAX_TRACE		CONFIG_DEBUG_MEMLEAK_TRACE_LENGTH
+#else
+#define MAX_TRACE		1
+#endif
+
+#define SCAN_BLOCK_SIZE		4096		/* maximum scan length with interrupts disabled */
+#define PREINIT_POINTERS	CONFIG_DEBUG_MEMLEAK_PREINIT_POINTERS
+#define BYTES_PER_WORD		sizeof(void *)
+
+extern struct memleak_offset __memleak_offsets_start[];
+extern struct memleak_offset __memleak_offsets_end[];
+
+struct memleak_alias {
+	struct hlist_node node;
+	unsigned long offset;
+};
+
+struct memleak_scan_area {
+	struct hlist_node node;
+	unsigned long offset;
+	size_t length;
+};
+
+struct memleak_pointer {
+	struct list_head pointer_list;
+	struct list_head gray_list;
+	int use_count;
+	unsigned long pointer;
+	unsigned long offset;		/* padding */
+	size_t size;
+	int ref_count;			/* the minimum encounters of the value */
+	int count;			/* the ecounters of the value */
+	struct hlist_head *alias_list;
+	struct hlist_head area_list;	/* areas to be scanned (or empty for all) */
+	unsigned long trace[MAX_TRACE];
+};
+
+typedef enum {
+	MEMLEAK_ALLOC,
+	MEMLEAK_FREE,
+	MEMLEAK_PADDING,
+	MEMLEAK_NOT_LEAK,
+	MEMLEAK_IGNORE,
+	MEMLEAK_SCAN_AREA
+} memleak_action_t;
+
+struct memleak_preinit_pointer {
+	memleak_action_t type;
+	const void *pointer;
+	unsigned long offset;
+	size_t size;
+	int ref_count;
+};
+
+/* Pointer colors, encoded with count and ref_count:
+ *   - white - orphan block, i.e. not enough references to it (ref_count >= 1)
+ *   - gray  - referred at least once and therefore non-orphan (ref_count == 0)
+ *   - black - ignore; it doesn't contain references (text section) (ref_count == -1)
+ */
+#define COLOR_WHITE(pointer)	((pointer)->count != -1 && (pointer)->count < (pointer)->ref_count)
+#define COLOR_GRAY(pointer)	((pointer)->ref_count != -1 && (pointer)->count >= (pointer)->ref_count)
+#define COLOR_BLACK(pointer)	((pointer)->ref_count == -1)
+
+/* Tree storing the pointer aliases indexed by size */
+static RADIX_TREE(alias_tree, GFP_ATOMIC);
+/* Tree storing all the possible pointers, indexed by the pointer value */
+static RADIX_TREE(pointer_tree, GFP_ATOMIC);
+/* The list of all allocated pointers */
+static LIST_HEAD(pointer_list);
+/* The list of the gray pointers */
+static LIST_HEAD(gray_list);
+
+static kmem_cache_t *pointer_cache;
+/* Used to avoid recursive call via the kmalloc/kfree functions */
+static spinlock_t memleak_lock = SPIN_LOCK_UNLOCKED;
+static cpumask_t memleak_cpu_mask = CPU_MASK_NONE;
+static DEFINE_MUTEX(memleak_mutex);
+static atomic_t memleak_initialized = ATOMIC_INIT(0);
+static int __initdata preinit_pos = 0;
+static struct memleak_preinit_pointer __initdata preinit_pointers[PREINIT_POINTERS];
+/* last allocated pointer (optimization); protected by memleak_lock */
+static struct memleak_pointer *last_pointer = NULL;
+
+static void dump_pointer_info(struct memleak_pointer *pointer)
+{
+#ifdef CONFIG_KALLSYMS
+	char namebuf[KSYM_NAME_LEN + 1] = "";
+	char *modname;
+	unsigned long symsize;
+	unsigned long offset = 0;
+#endif
+#ifdef DEBUG
+	struct memleak_alias *alias;
+	struct hlist_node *elem;
+#endif
+	int i;
+
+	printk(KERN_NOTICE "kmemleak: pointer 0x%08lx:\n", pointer->pointer);
+#ifdef DEBUG
+	printk(KERN_NOTICE "  size = %d\n", pointer->size);
+	printk(KERN_NOTICE "  ref_count = %d\n", pointer->ref_count);
+	printk(KERN_NOTICE "  count = %d\n", pointer->count);
+	printk(KERN_NOTICE "  aliases:\n");
+	if (pointer->alias_list)
+		hlist_for_each_entry(alias, elem, pointer->alias_list, node)
+			printk(KERN_NOTICE "    0x%lx\n", alias->offset);
+	printk(KERN_NOTICE "  trace:\n");
+#endif
+	for (i = 0; i < MAX_TRACE; i++) {
+		unsigned long trace = pointer->trace[i];
+
+		if (!trace)
+			break;
+#ifdef CONFIG_KALLSYMS
+		kallsyms_lookup(trace, &symsize, &offset, &modname, namebuf);
+		printk(KERN_NOTICE "    %lx: <%s>\n", trace, namebuf);
+#else
+		printk(KERN_NOTICE "    %lx\n", trace);
+#endif
+	}
+}
+
+/* Insert an element into the aliases radix tree.
+ * Return 0 on success.
+ */
+static int insert_alias(unsigned long size, unsigned long offset)
+{
+	int ret = 0;
+	struct hlist_head *alias_list;
+	struct memleak_alias *alias;
+
+	if (size == 0 || offset == 0 || offset >= size) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	offset &= ~(BYTES_PER_WORD - 1);
+
+	alias_list = radix_tree_lookup(&alias_tree, size);
+	if (alias_list) {
+		struct hlist_node *elem;
+
+		hlist_for_each_entry(alias, elem, alias_list, node) {
+			if (alias->offset == offset) {
+				ret = -EEXIST;
+				goto out;
+			}
+		}
+	} else {
+		alias_list = kmalloc(sizeof(*alias_list), GFP_ATOMIC);
+		if (!alias_list)
+			panic("kmemleak: cannot allocate initial memory\n");
+		INIT_HLIST_HEAD(alias_list);
+
+		ret = radix_tree_insert(&alias_tree, size, alias_list);
+		if (ret)
+			panic("kmemleak: cannot insert into the aliases radix tree: %d\n", ret);
+	}
+
+	alias = kmalloc(sizeof(*alias), GFP_ATOMIC);
+	if (!alias)
+		panic("kmemleak: cannot allocate initial memory\n");
+	INIT_HLIST_NODE(&alias->node);
+	alias->offset = offset;
+
+	hlist_add_head(&alias->node, alias_list);
+
+ out:
+	return ret;
+}
+
+/* Insert pointer aliases the from the given array */
+void memleak_insert_aliases(struct memleak_offset *ml_off_start,
+				  struct memleak_offset *ml_off_end)
+{
+	struct memleak_offset *ml_off;
+	int i = 0;
+	unsigned long flags;
+	unsigned int cpu_id;
+
+	pr_debug("%s(0x%p, 0x%p)\n", __FUNCTION__, ml_off_start, ml_off_end);
+
+	spin_lock_irqsave(&memleak_lock, flags);
+
+	/* do not track the kmemleak allocated pointers */
+	cpu_id = smp_processor_id();
+	if (cpu_test_and_set(cpu_id, memleak_cpu_mask))
+		BUG();
+
+	/* primary aliases - container_of(member) */
+	for (ml_off = ml_off_start; ml_off < ml_off_end; ml_off++)
+		if (!insert_alias(ml_off->size, ml_off->offset))
+			i++;
+	pr_debug("kmemleak: found %d primary alias(es)\n", i);
+
+	/* secondary aliases - container_of(container_of(member)) */
+#ifdef CONFIG_DEBUG_MEMLEAK_SECONDARY_ALIASES
+	for (ml_off = ml_off_start; ml_off < ml_off_end; ml_off++) {
+		struct hlist_head *alias_list;
+		struct memleak_alias *alias;
+		struct hlist_node *elem;
+
+		alias_list = radix_tree_lookup(&alias_tree, ml_off->member_size);
+		if (!alias_list)
+			continue;
+
+		hlist_for_each_entry(alias, elem, alias_list, node)
+			if (!insert_alias(ml_off->size, ml_off->offset + alias->offset))
+				i++;
+	}
+	pr_debug("kmemleak: found %d alias(es)\n", i);
+#endif
+
+	cpu_clear(cpu_id, memleak_cpu_mask);
+	spin_unlock_irqrestore(&memleak_lock, flags);
+}
+EXPORT_SYMBOL_GPL(memleak_insert_aliases);
+
+static inline struct memleak_pointer *get_cached_pointer(unsigned long ptr)
+{
+	if (!last_pointer || ptr != last_pointer->pointer)
+		last_pointer = radix_tree_lookup(&pointer_tree, ptr);
+	return last_pointer;
+}
+
+static void create_pointer_aliases(struct memleak_pointer *pointer)
+{
+	struct memleak_alias *alias;
+	struct hlist_node *elem;
+	unsigned long ptr = pointer->pointer;
+	int err;
+
+	if (pointer->offset) {
+		err = radix_tree_insert(&pointer_tree, ptr + pointer->offset, pointer);
+		if (err) {
+			dump_stack();
+			panic("kmemleak: cannot insert alias into the pointer radix tree: %d\n", err);
+		}
+	}
+
+	if (pointer->alias_list) {
+		hlist_for_each_entry(alias, elem, pointer->alias_list, node) {
+			if (alias->offset >= pointer->size)
+				BUG();
+
+			err = radix_tree_insert(&pointer_tree, ptr
+						+ pointer->offset + alias->offset,
+						pointer);
+			if (err) {
+				dump_stack();
+				panic("kmemleak: cannot insert alias into the pointer radix tree: %d\n", err);
+			}
+		}
+	}
+}
+
+static void delete_pointer_aliases(struct memleak_pointer *pointer)
+{
+	struct memleak_alias *alias;
+	struct hlist_node *elem;
+	unsigned long ptr = pointer->pointer;
+
+	if (pointer->offset)
+		radix_tree_delete(&pointer_tree, ptr + pointer->offset);
+
+	if (pointer->alias_list)
+		hlist_for_each_entry(alias, elem, pointer->alias_list, node)
+			radix_tree_delete(&pointer_tree,
+					  ptr + pointer->offset + alias->offset);
+}
+
+/* no need for atomic operations since memleak_lock is held anyway */
+static inline void get_pointer(struct memleak_pointer *pointer)
+{
+	pointer->use_count++;
+}
+
+/* called with memleak_lock held for pointer_list modification and
+ * memleak_cpu_mask set to avoid entering memleak_free (and deadlock)
+ */
+static void __put_pointer(struct memleak_pointer *pointer)
+{
+	struct hlist_node *elem, *tmp;
+	struct memleak_scan_area *area;
+
+	if (--pointer->use_count > 0)
+		return;
+
+	/* free the scanning areas */
+	hlist_for_each_entry_safe(area, elem, tmp, &pointer->area_list, node) {
+		hlist_del(elem);
+		kfree(area);
+	}
+
+	list_del(&pointer->pointer_list);
+	kmem_cache_free(pointer_cache, pointer);
+}
+
+static void put_pointer(struct memleak_pointer *pointer)
+{
+	unsigned long flags;
+	unsigned int cpu_id;
+
+	spin_lock_irqsave(&memleak_lock, flags);
+	cpu_id = smp_processor_id();
+	if (cpu_test_and_set(cpu_id, memleak_cpu_mask))
+		BUG();
+
+	__put_pointer(pointer);
+
+	cpu_clear(cpu_id, memleak_cpu_mask);
+	spin_unlock_irqrestore(&memleak_lock, flags);
+}
+
+/* Insert a pointer and its aliases into the pointer radix tree */
+static inline void create_pointer(unsigned long ptr, size_t size, int ref_count)
+{
+	struct memleak_pointer *pointer;
+	int err;
+#ifdef CONFIG_FRAME_POINTER
+	int i;
+	void *frame;
+#endif
+
+	pointer = kmem_cache_alloc(pointer_cache, SLAB_ATOMIC);
+	if (!pointer)
+		panic("kmemleak: cannot allocate a memleak_pointer structure\n");
+
+	last_pointer = pointer;
+
+	INIT_LIST_HEAD(&pointer->pointer_list);
+	INIT_LIST_HEAD(&pointer->gray_list);
+	INIT_HLIST_HEAD(&pointer->area_list);
+	pointer->use_count = 0;
+	pointer->pointer = ptr;
+	pointer->offset = 0;
+	pointer->size = size;
+	pointer->ref_count = ref_count;
+	pointer->count = -1;
+	pointer->alias_list = radix_tree_lookup(&alias_tree, size);
+
+#ifdef CONFIG_FRAME_POINTER
+	frame = __builtin_frame_address(0);
+	for (i = 0; i < MAX_TRACE; i++) {
+		void *stack = task_stack_page(current);
+
+		if (frame < stack || frame > stack + THREAD_SIZE - BYTES_PER_WORD) {
+			pointer->trace[i] = 0;
+			continue;
+		}
+
+		pointer->trace[i] = arch_call_address(frame);
+		frame = arch_prev_frame(frame);
+		/* we don't need the return to do_exit() */
+		if (kstack_end(frame))
+			pointer->trace[i] = 0;
+	}
+#else
+	pointer->trace[0] = (unsigned long)__builtin_return_address(0);
+#endif
+
+	err = radix_tree_insert(&pointer_tree, ptr, pointer);
+	if (err) {
+		dump_stack();
+		panic("kmemleak: cannot insert into the pointer radix tree: %d\n", err);
+	}
+
+	create_pointer_aliases(pointer);
+
+	list_add_tail(&pointer->pointer_list, &pointer_list);
+	get_pointer(pointer);
+}
+
+/* Remove a pointer and its aliases from the pointer radix tree */
+static inline void delete_pointer(unsigned long ptr)
+{
+	struct memleak_pointer *pointer;
+
+	pointer = radix_tree_delete(&pointer_tree, ptr);
+	if (!pointer) {
+		dump_stack();
+		printk(KERN_WARNING "kmemleak: freeing unknown pointer value 0x%08lx\n", ptr);
+		return;
+	}
+	if (pointer->pointer != ptr) {
+		dump_stack();
+		dump_pointer_info(pointer);
+		panic("kmemleak: freeing pointer by alias 0x%08lx\n", ptr);
+	}
+
+	if (last_pointer && ptr == last_pointer->pointer)
+		last_pointer = NULL;
+
+#ifdef CONFIG_DEBUG_MEMLEAK_ORPHAN_FREEING
+	if (COLOR_WHITE(pointer)) {
+		dump_stack();
+		dump_pointer_info(pointer);
+		printk(KERN_WARNING "kmemleak: freeing orphan pointer 0x%08lx\n", ptr);
+	}
+#endif
+
+	delete_pointer_aliases(pointer);
+
+	pointer->pointer = 0;
+	__put_pointer(pointer);
+}
+
+/* Re-create the pointer aliases according to the new size/offset
+ * information */
+static inline void unpad_pointer(unsigned long ptr, unsigned long offset,
+				 size_t size)
+{
+	struct memleak_pointer *pointer;
+
+	pointer = get_cached_pointer(ptr);
+	if (!pointer) {
+		dump_stack();
+		panic("kmemleak: resizing unknown pointer value 0x%08lx\n", ptr);
+	}
+	if (pointer->pointer != ptr) {
+		dump_stack();
+		dump_pointer_info(pointer);
+		panic("kmemleak: resizing pointer by alias 0x%08lx\n", ptr);
+	}
+
+	if (offset == pointer->offset && size == pointer->size)
+		return;
+
+	delete_pointer_aliases(pointer);
+
+	/* we don't update the pointer->size because the real block
+	 * size should be scanned. We don't worry about random data in
+	 * the unused area because of the object poisoning */
+	pointer->offset = offset;
+	pointer->alias_list = radix_tree_lookup(&alias_tree, size);
+
+	create_pointer_aliases(pointer);
+}
+
+/* Make a pointer permanently gray (false positive) */
+static inline void make_gray_pointer(unsigned long ptr)
+{
+	struct memleak_pointer *pointer;
+
+	pointer = get_cached_pointer(ptr);
+	if (!pointer) {
+		dump_stack();
+		panic("kmemleak: graying unknown pointer value 0x%08lx\n", ptr);
+	}
+	if (pointer->pointer != ptr) {
+		dump_stack();
+		dump_pointer_info(pointer);
+		panic("kmemleak: graying pointer by alias 0x%08lx\n", ptr);
+	}
+
+	pointer->ref_count = 0;
+}
+
+/* Mark the pointer as black */
+static inline void make_black_pointer(unsigned long ptr)
+{
+	struct memleak_pointer *pointer;
+
+	pointer = get_cached_pointer(ptr);
+	if (!pointer) {
+		dump_stack();
+		panic("kmemleak: blacking unknown pointer value 0x%08lx\n", ptr);
+	}
+	if (pointer->pointer != ptr) {
+		dump_stack();
+		dump_pointer_info(pointer);
+		panic("kmemleak: blacking pointer by alias 0x%08lx\n", ptr);
+	}
+
+	pointer->ref_count = -1;
+}
+
+/* Add a scanning area to the pointer */
+static inline void add_scan_area(unsigned long ptr, unsigned long offset, size_t length)
+{
+	struct memleak_pointer *pointer;
+	struct memleak_scan_area *area;
+
+	pointer = get_cached_pointer(ptr);
+	if (!pointer) {
+		dump_stack();
+		panic("kmemleak: adding scan area to unknown pointer value 0x%08lx\n", ptr);
+	}
+	if (pointer->pointer != ptr) {
+		dump_stack();
+		dump_pointer_info(pointer);
+		panic("kmemleak: adding scan area to pointer by alias 0x%08lx\n", ptr);
+	}
+	if (offset + length > pointer->size) {
+		dump_stack();
+		dump_pointer_info(pointer);
+		panic("kmemleak: scan area larger than block 0x%08lx\n", ptr);
+	}
+
+	area = kmalloc(sizeof(*area), GFP_ATOMIC);
+	if (!area)
+		panic("kmemleak: cannot allocate a scan area\n");
+
+	INIT_HLIST_NODE(&area->node);
+	area->offset = offset;
+	area->length = length;
+
+	hlist_add_head(&area->node, &pointer->area_list);
+}
+
+/* Allocation function hook */
+void memleak_alloc(const void *ptr, size_t size, int ref_count)
+{
+	unsigned long flags;
+	unsigned int cpu_id;
+
+	if (!ptr)
+		return;
+
+	local_irq_save(flags);
+	cpu_id = get_cpu();
+
+	/* avoid recursive calls. After disabling the interrupts, the
+	 * only calls to this function on the same CPU should be from
+	 * kmemleak itself and we ignore them. Calls from other CPU's
+	 * would wait on the spin_lock.
+	 */
+	if (cpu_test_and_set(cpu_id, memleak_cpu_mask))
+		goto out;
+
+	pr_debug("%s(0x%p, %d, %d)\n", __FUNCTION__, ptr, size, ref_count);
+
+	if (!atomic_read(&memleak_initialized)) {
+		/* no need for SMP locking since this block is
+		 * executed before the other CPUs are started */
+		struct memleak_preinit_pointer *pointer;
+
+		BUG_ON(cpu_id != 0);
+
+		if (preinit_pos >= PREINIT_POINTERS)
+			panic("kmemleak: preinit pointers buffer overflow\n");
+		pointer = &preinit_pointers[preinit_pos++];
+
+		pointer->type = MEMLEAK_ALLOC;
+		pointer->pointer = ptr;
+		pointer->size = size;
+		pointer->ref_count = ref_count;
+
+		goto unmask;
+	}
+
+	spin_lock(&memleak_lock);
+	create_pointer((unsigned long)ptr, size, ref_count);
+	spin_unlock(&memleak_lock);
+
+ unmask:
+	cpu_clear(cpu_id, memleak_cpu_mask);
+ out:
+	put_cpu_no_resched();
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(memleak_alloc);
+
+/* Freeing function hook */
+void memleak_free(const void *ptr)
+{
+	unsigned long flags;
+	unsigned int cpu_id;
+
+	if (!ptr)
+		return;
+
+	local_irq_save(flags);
+	cpu_id = get_cpu();
+
+	/* avoid recursive calls. See memleak_alloc() for an explanation */
+	if (cpu_test_and_set(cpu_id, memleak_cpu_mask))
+		goto out;
+
+	pr_debug("%s(0x%p)\n", __FUNCTION__, ptr);
+
+	if (!atomic_read(&memleak_initialized)) {
+		struct memleak_preinit_pointer *pointer;
+
+		BUG_ON(cpu_id != 0);
+
+		if (preinit_pos >= PREINIT_POINTERS)
+			panic("kmemleak: preinit pointers buffer overflow\n");
+		pointer = &preinit_pointers[preinit_pos++];
+
+		pointer->type = MEMLEAK_FREE;
+		pointer->pointer = ptr;
+
+		goto unmask;
+	}
+
+	spin_lock(&memleak_lock);
+	delete_pointer((unsigned long)ptr);
+	spin_unlock(&memleak_lock);
+
+ unmask:
+	cpu_clear(cpu_id, memleak_cpu_mask);
+ out:
+	put_cpu_no_resched();
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(memleak_free);
+
+/* Change the size and location information of an allocated memory
+ * block (this is needed for allocations padding the object) */
+void memleak_padding(const void *ptr, unsigned long offset, size_t size)
+{
+	unsigned long flags;
+	unsigned int cpu_id;
+
+	if (!ptr)
+		return;
+
+	local_irq_save(flags);
+	cpu_id = get_cpu();
+
+	/* avoid recursive calls. See memleak_alloc() for an explanation */
+	if (cpu_test_and_set(cpu_id, memleak_cpu_mask))
+		goto out;
+
+	pr_debug("%s(0x%p, %d)\n", __FUNCTION__, ptr, size);
+
+	if (!atomic_read(&memleak_initialized)) {
+		struct memleak_preinit_pointer *pointer;
+
+		BUG_ON(cpu_id != 0);
+
+		if (preinit_pos >= PREINIT_POINTERS)
+			panic("kmemleak: preinit pointers buffer overflow\n");
+		pointer = &preinit_pointers[preinit_pos++];
+
+		pointer->type = MEMLEAK_PADDING;
+		pointer->pointer = ptr;
+		pointer->offset = offset;
+		pointer->size = size;
+
+		goto unmask;
+	}
+
+	spin_lock(&memleak_lock);
+	unpad_pointer((unsigned long)ptr, offset, size);
+	spin_unlock(&memleak_lock);
+
+ unmask:
+	cpu_clear(cpu_id, memleak_cpu_mask);
+ out:
+	put_cpu_no_resched();
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(memleak_padding);
+
+/* Mark a pointer as a false positive */
+void memleak_not_leak(const void *ptr)
+{
+	unsigned long flags;
+	unsigned int cpu_id;
+
+	if (!ptr)
+		return;
+
+	local_irq_save(flags);
+	cpu_id = get_cpu();
+
+	/* avoid recursive calls. See memleak_alloc() for an explanation */
+	if (cpu_test_and_set(cpu_id, memleak_cpu_mask))
+		goto out;
+
+	pr_debug("%s(0x%p)\n", __FUNCTION__, ptr);
+
+	if (!atomic_read(&memleak_initialized)) {
+		struct memleak_preinit_pointer *pointer;
+
+		BUG_ON(cpu_id != 0);
+
+		if (preinit_pos >= PREINIT_POINTERS)
+			panic("kmemleak: preinit pointers buffer overflow\n");
+		pointer = &preinit_pointers[preinit_pos++];
+
+		pointer->type = MEMLEAK_NOT_LEAK;
+		pointer->pointer = ptr;
+
+		goto unmask;
+	}
+
+	spin_lock(&memleak_lock);
+	make_gray_pointer((unsigned long)ptr);
+	spin_unlock(&memleak_lock);
+
+ unmask:
+	cpu_clear(cpu_id, memleak_cpu_mask);
+ out:
+	put_cpu_no_resched();
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(memleak_not_leak);
+
+/* Ignore this memory block */
+void memleak_ignore(const void *ptr)
+{
+	unsigned long flags;
+	unsigned int cpu_id;
+
+	if (!ptr)
+		return;
+
+	local_irq_save(flags);
+	cpu_id = get_cpu();
+
+	/* avoid recursive calls. See memleak_alloc() for an explanation */
+	if (cpu_test_and_set(cpu_id, memleak_cpu_mask))
+		goto out;
+
+	pr_debug("%s(0x%p)\n", __FUNCTION__, ptr);
+
+	if (!atomic_read(&memleak_initialized)) {
+		struct memleak_preinit_pointer *pointer;
+
+		BUG_ON(cpu_id != 0);
+
+		if (preinit_pos >= PREINIT_POINTERS)
+			panic("kmemleak: preinit pointers buffer overflow\n");
+		pointer = &preinit_pointers[preinit_pos++];
+
+		pointer->type = MEMLEAK_IGNORE;
+		pointer->pointer = ptr;
+
+		goto unmask;
+	}
+
+	spin_lock(&memleak_lock);
+	make_black_pointer((unsigned long)ptr);
+	spin_unlock(&memleak_lock);
+
+ unmask:
+	cpu_clear(cpu_id, memleak_cpu_mask);
+ out:
+	put_cpu_no_resched();
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(memleak_ignore);
+
+/* Add a scanning area to a pointer */
+void memleak_scan_area(const void *ptr, unsigned long offset, size_t length)
+{
+	unsigned long flags;
+	unsigned int cpu_id;
+
+	if (!ptr)
+		return;
+
+	local_irq_save(flags);
+	cpu_id = get_cpu();
+
+	/* avoid recursive calls. See memleak_alloc() for an explanation */
+	if (cpu_test_and_set(cpu_id, memleak_cpu_mask))
+		goto out;
+
+	pr_debug("%s(0x%p)\n", __FUNCTION__, ptr);
+
+	if (!atomic_read(&memleak_initialized)) {
+		struct memleak_preinit_pointer *pointer;
+
+		BUG_ON(cpu_id != 0);
+
+		if (preinit_pos >= PREINIT_POINTERS)
+			panic("kmemleak: preinit pointers buffer overflow\n");
+		pointer = &preinit_pointers[preinit_pos++];
+
+		pointer->type = MEMLEAK_SCAN_AREA;
+		pointer->pointer = ptr;
+		pointer->offset = offset;
+		pointer->size = length;
+
+		goto unmask;
+	}
+
+	spin_lock(&memleak_lock);
+	add_scan_area((unsigned long)ptr, offset, length);
+	spin_unlock(&memleak_lock);
+
+ unmask:
+	cpu_clear(cpu_id, memleak_cpu_mask);
+ out:
+	put_cpu_no_resched();
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(memleak_scan_area);
+
+/* Scan a block of memory (exclusive range) for pointers and move
+ * those found to the gray list. This function is called with
+ * memleak_lock held
+ */
+static void __scan_block(void *_start, void *_end)
+{
+	unsigned long *ptr;
+	unsigned long *start = (unsigned long *)ALIGN((unsigned long)_start,
+						      BYTES_PER_WORD);
+	unsigned long *end = _end;
+
+	for (ptr = start; ptr < end; ptr++) {
+		struct memleak_pointer *pointer =
+			radix_tree_lookup(&pointer_tree,
+					  (*ptr) & ~(BYTES_PER_WORD - 1));
+		if (!pointer)
+			continue;
+		if (!COLOR_WHITE(pointer))
+			continue;
+
+		pointer->count++;
+		/* this can happen during the grey_list traversal */
+		if (COLOR_GRAY(pointer)) {
+			get_pointer(pointer);
+			list_add_tail_rcu(&pointer->gray_list, &gray_list);
+		}
+	}
+}
+
+static void scan_block(void *start, void *end)
+{
+	unsigned long flags;
+	void *s, *e;
+
+	s = start;
+	while (s < end) {
+		e = s + SCAN_BLOCK_SIZE;
+
+		spin_lock_irqsave(&memleak_lock, flags);
+		__scan_block(s, e < end ? e : end);
+		spin_unlock_irqrestore(&memleak_lock, flags);
+
+		s = e;
+	}
+}
+
+/* Scan a memory block represented by a memleak_pointer */
+static inline void scan_pointer(struct memleak_pointer *pointer)
+{
+	struct memleak_scan_area *area;
+	struct hlist_node *elem;
+	unsigned long flags;
+
+	spin_lock_irqsave(&memleak_lock, flags);
+
+	/* freed pointer */
+	if (!pointer->pointer)
+		goto out;
+
+	if (hlist_empty(&pointer->area_list))
+		__scan_block((void *)pointer->pointer,
+			     (void *)(pointer->pointer + pointer->size));
+	else
+		hlist_for_each_entry(area, elem, &pointer->area_list, node) {
+			unsigned long ptr = pointer->pointer + area->offset;
+
+			__scan_block((void *)ptr, (void *)(ptr + area->length));
+		}
+
+ out:
+	spin_unlock_irqrestore(&memleak_lock, flags);
+}
+
+/* Scan the memory and print the orphan pointers */
+static void memleak_scan(void)
+{
+	unsigned long flags;
+	struct memleak_pointer *pointer, *tmp;
+#ifdef CONFIG_DEBUG_MEMLEAK_TASK_STACKS
+	struct task_struct *task;
+#endif
+	int i;
+
+	/* initialize pointers (make them white) and build the initial
+	 * gray list */
+	spin_lock_irqsave(&memleak_lock, flags);
+	list_for_each_entry(pointer, &pointer_list, pointer_list) {
+		pointer->count = 0;
+		if (COLOR_GRAY(pointer)) {
+			get_pointer(pointer);
+			list_add_tail(&pointer->gray_list, &gray_list);
+		}
+	}
+	spin_unlock_irqrestore(&memleak_lock, flags);
+
+	/* data/bss scanning */
+	scan_block(_sdata, _edata);
+	scan_block(__bss_start, __bss_stop);
+
+#ifdef CONFIG_SMP
+	/* per-cpu scanning */
+	for (i = 0; i < NR_CPUS; i++)
+		scan_block(__per_cpu_offset[i] + __per_cpu_start,
+			   __per_cpu_offset[i] + __per_cpu_end);
+#endif
+
+	/* mem_map scanning */
+	for_each_online_node(i) {
+		struct page *page, *end;
+
+		page = NODE_MEM_MAP(i);
+		end  = page + NODE_DATA(i)->node_spanned_pages;
+
+		scan_block(page, end);
+	}
+
+#ifdef CONFIG_DEBUG_MEMLEAK_TASK_STACKS
+	read_lock(&tasklist_lock);
+	for_each_process(task)
+		scan_block(task_stack_page(task),
+			   task_stack_page(task) + THREAD_SIZE);
+	read_unlock(&tasklist_lock);
+#endif
+
+	/* gray_list scanning. RCU is needed because new elements can
+	 * be added to the list during scanning */
+	rcu_read_lock();
+	list_for_each_entry_rcu(pointer, &gray_list, gray_list)
+		scan_pointer(pointer);
+	rcu_read_unlock();
+
+	/* empty the gray list. It needs the "safe" version because
+	 * put_pointer() can free the structure */
+	list_for_each_entry_safe(pointer, tmp, &gray_list, gray_list) {
+		list_del(&pointer->gray_list);
+		put_pointer(pointer);
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void *memleak_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct memleak_pointer *pointer;
+	loff_t n = *pos;
+	unsigned long flags;
+
+	mutex_lock(&memleak_mutex);
+
+	if (!n)
+		memleak_scan();
+
+	spin_lock_irqsave(&memleak_lock, flags);
+
+	list_for_each_entry(pointer, &pointer_list, pointer_list)
+		if (!n--) {
+			get_pointer(pointer);
+			goto out;
+		}
+	pointer = NULL;
+
+ out:
+	spin_unlock_irqrestore(&memleak_lock, flags);
+	return pointer;
+}
+
+static void *memleak_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct list_head *n;
+	struct memleak_pointer *next = NULL;
+	unsigned long flags;
+
+	++(*pos);
+
+	spin_lock_irqsave(&memleak_lock, flags);
+
+	n = ((struct memleak_pointer *)v)->pointer_list.next;
+	if (n != &pointer_list) {
+		next = list_entry(n, struct memleak_pointer, pointer_list);
+		get_pointer(next);
+	}
+
+	spin_unlock_irqrestore(&memleak_lock, flags);
+
+	put_pointer(v);
+	return next;
+}
+
+static void memleak_seq_stop(struct seq_file *seq, void *v)
+{
+	if (v)
+		put_pointer(v);
+	mutex_unlock(&memleak_mutex);
+}
+
+static int memleak_seq_show(struct seq_file *seq, void *v)
+{
+	const struct memleak_pointer *pointer = v;
+#ifdef CONFIG_KALLSYMS
+	char namebuf[KSYM_NAME_LEN + 1] = "";
+	char *modname;
+	unsigned long symsize;
+	unsigned long offset = 0;
+#endif
+	int i;
+
+	if (!COLOR_WHITE(pointer))
+		return 0;
+	/* freed in the meantime (false positive) */
+	if (!pointer->pointer)
+		return 0;
+
+	seq_printf(seq, "orphan pointer 0x%08lx (size %d):\n",
+		   pointer->pointer, pointer->size);
+
+	for (i = 0; i < MAX_TRACE; i++) {
+		unsigned long trace = pointer->trace[i];
+		if (!trace)
+			break;
+
+#ifdef CONFIG_KALLSYMS
+		kallsyms_lookup(trace, &symsize, &offset, &modname, namebuf);
+		seq_printf(seq, "  %lx: <%s>\n", trace, namebuf);
+#else
+		seq_printf(seq, "  %lx\n", trace);
+#endif
+	}
+
+	return 0;
+}
+
+static struct seq_operations memleak_seq_ops = {
+	.start = memleak_seq_start,
+	.next  = memleak_seq_next,
+	.stop  = memleak_seq_stop,
+	.show  = memleak_seq_show,
+};
+
+static int memleak_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &memleak_seq_ops);
+}
+
+static struct file_operations memleak_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = memleak_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+#endif					/* CONFIG_DEBUG_FS */
+
+/* KMemLeak initialization. Set up the radix tree for the pointer aliases */
+void __init memleak_init(void)
+{
+	int i = 0;
+	unsigned long flags;
+
+	pointer_cache = kmem_cache_create("pointer_cache", sizeof(struct memleak_pointer),
+					  0, SLAB_PANIC, NULL, NULL);
+	if (!pointer_cache)
+		panic("kmemleak: cannot create the pointer cache\n");
+
+	memleak_insert_aliases(__memleak_offsets_start, __memleak_offsets_end);
+
+	/* no need to hold the spinlock as SMP is not initialized
+	 * yet. Holding it here would lead to a deadlock */
+	local_irq_save(flags);
+
+	atomic_set(&memleak_initialized, 1);
+
+	/* execute the buffered memleak actions */
+	pr_debug("kmemleak: %d preinit actions\n", preinit_pos);
+	for (i = 0; i < preinit_pos; i++) {
+		struct memleak_preinit_pointer *pointer = &preinit_pointers[i];
+
+		switch (pointer->type) {
+		case MEMLEAK_ALLOC:
+			memleak_alloc(pointer->pointer, pointer->size,
+				      pointer->ref_count);
+			break;
+		case MEMLEAK_FREE:
+			memleak_free(pointer->pointer);
+			break;
+		case MEMLEAK_PADDING:
+			memleak_padding(pointer->pointer, pointer->offset,
+					pointer->size);
+			break;
+		case MEMLEAK_NOT_LEAK:
+			memleak_not_leak(pointer->pointer);
+			break;
+		case MEMLEAK_IGNORE:
+			memleak_ignore(pointer->pointer);
+			break;
+		case MEMLEAK_SCAN_AREA:
+			memleak_scan_area(pointer->pointer,
+					  pointer->offset, pointer->size);
+			break;
+		default:
+			BUG();
+		}
+	}
+
+	local_irq_restore(flags);
+
+	printk(KERN_INFO "Kernel memory leak detector initialized\n");
+}
+
+/* Late initialization function */
+int __init memleak_late_init(void)
+{
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *dentry;
+
+	dentry = debugfs_create_file("memleak", S_IRUGO, NULL, NULL,
+				     &memleak_fops);
+	if (!dentry)
+		return -ENOMEM;
+#endif
+	pr_debug("kmemleak: late initialization completed\n");
+
+	return 0;
+}
+late_initcall(memleak_late_init);

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 2.6.17-rc6 2/9] Some documentation for kmemleak
  2006-06-11 11:18 [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7 Catalin Marinas
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 1/9] Base support for kmemleak Catalin Marinas
@ 2006-06-11 11:21 ` Catalin Marinas
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 3/9] Add the memory allocation/freeing hooks " Catalin Marinas
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-11 11:21 UTC (permalink / raw)
  To: linux-kernel

From: Catalin Marinas <catalin.marinas@arm.com>

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---

 Documentation/kmemleak.txt |   92 ++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 92 insertions(+), 0 deletions(-)

diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
new file mode 100644
index 0000000..7072325
--- /dev/null
+++ b/Documentation/kmemleak.txt
@@ -0,0 +1,92 @@
+Kernel Memory Leak Detector
+===========================
+
+
+Introduction
+------------
+
+Kmemleak provides a way of detecting possible kernel memory leaks in a
+way similar to a tracing garbage collector
+(http://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
+with the difference that the orphan pointers are not freed but only
+reported via /sys/kernel/debug/memleak. A similar method is used by
+the Valgrind tool (memcheck --leak-check) to detect the memory leaks
+in user-space applications.
+
+
+Basic Algorithm
+---------------
+
+The memory allocations via kmalloc, vmalloc, kmem_cache_alloc and
+friends are tracked and the pointers, together with additional
+information like size and stack trace, are stored in a radix tree. The
+corresponding freeing function calls are tracked and the pointers
+removed from the radix tree.
+
+An allocated block of memory is considered orphan if a pointer to its
+start address or to an alias (pointer aliases are explained later)
+cannot be found by scanning the memory (including saved
+registers). This means that there might be no way for the kernel to
+pass the address of the allocated block to a freeing function and
+therefore the block is considered a leak.
+
+The scanning algorithm steps:
+
+  1. mark all pointers as white (remaining white pointers will later
+     be considered orphan)
+  2. scan the memory starting with the data section and stacks,
+     checking the values against the addresses stored in the radix
+     tree. If a white pointer is found, it is added to the grey list
+  3. scan the grey pointers for matching addresses (some white
+     pointers can become grey and added at the end of the grey list)
+     until the grey set is finished
+  4. the remaining white pointers are considered orphan and reported
+     via /sys/kernel/debug/memleak
+
+
+Improvements
+------------
+
+Because the Linux kernel calculates many pointers at run-time via the
+container_of macro (see the lists implementation), a lot of false
+positives would be reported. This tool re-writes the container_of
+macro so that the offset and size information is stored in the
+.init.memleak_offsets section. The memleak_init() function creates a
+radix tree with corresponding offsets for every encountered block
+size. The memory allocations hook stores the pointer address together
+with its aliases based on the size of the allocated block.
+
+While one level of offsets should be enough for most cases, two levels
+are considered, i.e. container_of(container_of(...)) (one false
+positive is the "struct socket_alloc" allocation in the
+sock_alloc_inode() function).
+
+Some allocated memory blocks have pointers stored in the kernel's
+internal data structures and they cannot be detected as orphans. To
+avoid this, kmemleak can also store the number of values equal to the
+pointer (or aliases) that need to be found so that the block is not
+considered a leak. One example is __vmalloc().
+
+
+Limitations and Drawbacks
+-------------------------
+
+The biggest drawback is the reduced performance of memory allocation
+and freeing. To avoid other penalties, the memory scanning is only
+performed when the /sys/kernel/debug/memleak file is read. Anyway,
+this tool is intended for debugging purposes where the performance
+might not be the most important requirement.
+
+The tool can report false positives. These are cases where an
+allocated block doesn't need to be freed (some cases in the init_call
+functions), the pointer is calculated by other methods than the
+container_of macro or the pointer is stored in a location not scanned
+by kmemleak. If the "member" argument in the offsetof(type, member)
+call is not constant, kmemleak considers the offset as zero since it
+cannot be determined at compilation time (as a side node, it seems
+that gcc-4.0 doesn't compile these offsetof constructs either).
+
+Page allocations and ioremap are not tracked. NUMA architectures are
+not supported yet.
+
+Only the ARM and i386 architectures are currently supported.

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 2.6.17-rc6 3/9] Add the memory allocation/freeing hooks for kmemleak
  2006-06-11 11:18 [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7 Catalin Marinas
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 1/9] Base support for kmemleak Catalin Marinas
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 2/9] Some documentation " Catalin Marinas
@ 2006-06-11 11:21 ` Catalin Marinas
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 4/9] Modules support " Catalin Marinas
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-11 11:21 UTC (permalink / raw)
  To: linux-kernel

From: Catalin Marinas <catalin.marinas@arm.com>

This patch adds the callbacks to memleak_(alloc|free) functions from
kmalloc/kfree, kmem_cache_(alloc|free), vmalloc/vfree etc.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---

 include/linux/slab.h |    4 ++++
 mm/page_alloc.c      |    2 ++
 mm/slab.c            |   22 ++++++++++++++++++++--
 mm/vmalloc.c         |   24 ++++++++++++++++++++++--
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2d985d5..aa37216 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -89,6 +89,7 @@ #endif
 
 static inline void *kmalloc(size_t size, gfp_t flags)
 {
+#ifndef CONFIG_DEBUG_MEMLEAK
 	if (__builtin_constant_p(size)) {
 		int i = 0;
 #define CACHE(x) \
@@ -107,6 +108,7 @@ found:
 			malloc_sizes[i].cs_dmacachep :
 			malloc_sizes[i].cs_cachep, flags);
 	}
+#endif
 	return __kmalloc(size, flags);
 }
 
@@ -114,6 +116,7 @@ extern void *__kzalloc(size_t, gfp_t);
 
 static inline void *kzalloc(size_t size, gfp_t flags)
 {
+#ifndef CONFIG_DEBUG_MEMLEAK
 	if (__builtin_constant_p(size)) {
 		int i = 0;
 #define CACHE(x) \
@@ -132,6 +135,7 @@ found:
 			malloc_sizes[i].cs_dmacachep :
 			malloc_sizes[i].cs_cachep, flags);
 	}
+#endif
 	return __kzalloc(size, flags);
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 253a450..4a65aa9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2800,6 +2800,8 @@ void *__init alloc_large_system_hash(con
 	if (_hash_mask)
 		*_hash_mask = (1 << log2qty) - 1;
 
+	memleak_alloc(table, size, 1);
+
 	return table;
 }
 
diff --git a/mm/slab.c b/mm/slab.c
index f1b644e..0d38f74 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2878,6 +2878,7 @@ #endif
 		STATS_INC_ALLOCMISS(cachep);
 		objp = cache_alloc_refill(cachep, flags);
 	}
+	memleak_erase(ac->entry[ac->avail]);
 	return objp;
 }
 
@@ -3143,7 +3144,11 @@ #endif
  */
 void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
-	return __cache_alloc(cachep, flags, __builtin_return_address(0));
+	void *ptr = __cache_alloc(cachep, flags, __builtin_return_address(0));
+
+	memleak_alloc(ptr, cachep->obj_size, 1);
+
+	return ptr;
 }
 EXPORT_SYMBOL(kmem_cache_alloc);
 
@@ -3158,6 +3163,9 @@ EXPORT_SYMBOL(kmem_cache_alloc);
 void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags)
 {
 	void *ret = __cache_alloc(cache, flags, __builtin_return_address(0));
+
+	memleak_alloc(ret, cache->obj_size, 1);
+
 	if (ret)
 		memset(ret, 0, obj_size(cache));
 	return ret;
@@ -3279,6 +3287,7 @@ static __always_inline void *__do_kmallo
 					  void *caller)
 {
 	struct kmem_cache *cachep;
+	void *ptr;
 
 	/* If you want to save a few bytes .text space: replace
 	 * __ with kmem_.
@@ -3288,7 +3297,11 @@ static __always_inline void *__do_kmallo
 	cachep = __find_general_cachep(size, flags);
 	if (unlikely(cachep == NULL))
 		return NULL;
-	return __cache_alloc(cachep, flags, caller);
+	ptr = __cache_alloc(cachep, flags, caller);
+
+	memleak_alloc(ptr, size, 1);
+
+	return ptr;
 }
 
 
@@ -3372,6 +3385,9 @@ void kmem_cache_free(struct kmem_cache *
 	unsigned long flags;
 
 	local_irq_save(flags);
+
+	memleak_free(objp);
+
 	__cache_free(cachep, objp);
 	local_irq_restore(flags);
 }
@@ -3395,6 +3411,8 @@ void kfree(const void *objp)
 		return;
 	local_irq_save(flags);
 	kfree_debugcheck(objp);
+	memleak_free(objp);
+
 	c = virt_to_cache(objp);
 	mutex_debug_check_no_locks_freed(objp, obj_size(c));
 	__cache_free(c, (void *)objp);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c0504f1..b7a9db3 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -349,6 +349,9 @@ void __vunmap(void *addr, int deallocate
 void vfree(void *addr)
 {
 	BUG_ON(in_interrupt());
+
+	memleak_free(addr);
+
 	__vunmap(addr, 1);
 }
 EXPORT_SYMBOL(vfree);
@@ -447,7 +450,14 @@ fail:
 
 void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
 {
-	return __vmalloc_area_node(area, gfp_mask, prot, -1);
+	void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1);
+
+	/* this needs ref_count = 2 since vm_struct also contains a
+	   pointer to this address. The guard page is also subtracted
+	   from the size */
+	memleak_alloc(addr, area->size - PAGE_SIZE, 2);
+
+	return addr;
 }
 
 /**
@@ -466,6 +476,10 @@ void *__vmalloc_node(unsigned long size,
 			int node)
 {
 	struct vm_struct *area;
+	void *addr;
+#ifdef CONFIG_DEBUG_MEMLEAK
+	unsigned long real_size = size;
+#endif
 
 	size = PAGE_ALIGN(size);
 	if (!size || (size >> PAGE_SHIFT) > num_physpages)
@@ -475,7 +489,13 @@ void *__vmalloc_node(unsigned long size,
 	if (!area)
 		return NULL;
 
-	return __vmalloc_area_node(area, gfp_mask, prot, node);
+	addr = __vmalloc_area_node(area, gfp_mask, prot, node);
+
+	/* this needs ref_count = 2 since the vm_struct also contains
+	   a pointer to this address */
+	memleak_alloc(addr, real_size, 2);
+
+	return addr;
 }
 EXPORT_SYMBOL(__vmalloc_node);
 

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 2.6.17-rc6 4/9] Modules support for kmemleak
  2006-06-11 11:18 [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7 Catalin Marinas
                   ` (2 preceding siblings ...)
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 3/9] Add the memory allocation/freeing hooks " Catalin Marinas
@ 2006-06-11 11:21 ` Catalin Marinas
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 5/9] Add kmemleak support for i386 Catalin Marinas
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-11 11:21 UTC (permalink / raw)
  To: linux-kernel

From: Catalin Marinas <catalin.marinas@arm.com>

This patch handles the kmemleak operations needed for modules loading so
that memory allocations from inside a module are properly tracked.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---

 kernel/module.c |   32 ++++++++++++++++++++++++++++++++
 1 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index bbe0486..2b66374 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1413,6 +1413,9 @@ static struct module *load_module(void _
 		exportindex, modindex, obsparmindex, infoindex, gplindex,
 		crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex,
 		gplfuturecrcindex;
+#ifdef CONFIG_DEBUG_MEMLEAK
+	unsigned int dataindex, bssindex, mloffindex;
+#endif
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1510,6 +1513,11 @@ #ifdef CONFIG_KALLSYMS
 	sechdrs[symindex].sh_flags |= SHF_ALLOC;
 	sechdrs[strindex].sh_flags |= SHF_ALLOC;
 #endif
+#ifdef CONFIG_DEBUG_MEMLEAK
+	dataindex = find_sec(hdr, sechdrs, secstrings, ".data");
+	bssindex = find_sec(hdr, sechdrs, secstrings, ".bss");
+	mloffindex = find_sec(hdr, sechdrs, secstrings, ".init.memleak_offsets");
+#endif
 
 	/* Check module struct version now, before we try to use module. */
 	if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -1569,6 +1577,7 @@ #endif
 
 	/* Do the allocs. */
 	ptr = module_alloc(mod->core_size);
+	memleak_not_leak(ptr);
 	if (!ptr) {
 		err = -ENOMEM;
 		goto free_percpu;
@@ -1577,6 +1586,7 @@ #endif
 	mod->module_core = ptr;
 
 	ptr = module_alloc(mod->init_size);
+	memleak_ignore(ptr);
 	if (!ptr && mod->init_size) {
 		err = -ENOMEM;
 		goto free_core;
@@ -1608,6 +1618,28 @@ #endif
 	/* Module has been moved. */
 	mod = (void *)sechdrs[modindex].sh_addr;
 
+#ifdef CONFIG_DEBUG_MEMLEAK
+	if (mloffindex)
+		memleak_insert_aliases((void *)sechdrs[mloffindex].sh_addr,
+				       (void *)sechdrs[mloffindex].sh_addr
+				         + sechdrs[mloffindex].sh_size);
+
+	/* only scan the sections containing data */
+	memleak_scan_area(mod->module_core,
+			  (unsigned long)mod - (unsigned long)mod->module_core,
+			  sizeof(struct module));
+	if (dataindex)
+		memleak_scan_area(mod->module_core,
+				  sechdrs[dataindex].sh_addr
+				    - (unsigned long)mod->module_core,
+				  sechdrs[dataindex].sh_size);
+	if (bssindex)
+		memleak_scan_area(mod->module_core,
+				  sechdrs[bssindex].sh_addr
+				    - (unsigned long)mod->module_core,
+				  sechdrs[bssindex].sh_size);
+#endif
+
 	/* Now we've moved module, initialize linked lists, etc. */
 	module_unload_init(mod);
 

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 2.6.17-rc6 5/9] Add kmemleak support for i386
  2006-06-11 11:18 [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7 Catalin Marinas
                   ` (3 preceding siblings ...)
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 4/9] Modules support " Catalin Marinas
@ 2006-06-11 11:21 ` Catalin Marinas
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 6/9] Add kmemleak support for ARM Catalin Marinas
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-11 11:21 UTC (permalink / raw)
  To: linux-kernel

From: Catalin Marinas <catalin.marinas@arm.com>

This patch modifies the vmlinux.lds.S script and adds the backtrace support
for i386 to be used with kmemleak.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---

 arch/i386/kernel/vmlinux.lds.S |    4 ++++
 include/asm-i386/processor.h   |   12 ++++++++++++
 include/asm-i386/thread_info.h |   10 +++++++++-
 3 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 8831303..370480e 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -38,6 +38,7 @@ SECTIONS
   RODATA
 
   /* writeable */
+  _sdata = .;			/* Start of data section */
   .data : AT(ADDR(.data) - LOAD_OFFSET) {	/* Data */
 	*(.data)
 	CONSTRUCTORS
@@ -140,6 +141,9 @@ SECTIONS
   __per_cpu_start = .;
   .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
   __per_cpu_end = .;
+  __memleak_offsets_start = .;
+  .init.memleak_offsets : AT(ADDR(.init.memleak_offsets) - LOAD_OFFSET) { *(.init.memleak_offsets) }
+  __memleak_offsets_end = .;
   . = ALIGN(4096);
   __init_end = .;
   /* freed after init ends here */
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 805f0dc..9b6568a 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -743,4 +743,16 @@ #else
 #define mcheck_init(c) do {} while(0)
 #endif
 
+#ifdef CONFIG_FRAME_POINTER
+static inline unsigned long arch_call_address(void *frame)
+{
+	return *(unsigned long *) (frame + 4);
+}
+
+static inline void *arch_prev_frame(void *frame)
+{
+	return *(void **) frame;
+}
+#endif
+
 #endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index 1f7d48c..7e7c508 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -102,12 +102,20 @@ #define alloc_thread_info(tsk)					\
 		struct thread_info *ret;			\
 								\
 		ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
+		memleak_ignore(ret);				\
 		if (ret)					\
 			memset(ret, 0, THREAD_SIZE);		\
 		ret;						\
 	})
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info(tsk)					\
+	({							\
+		struct thread_info *ret;			\
+								\
+		ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
+		memleak_ignore(ret);				\
+		ret;						\
+	})
 #endif
 
 #define free_thread_info(info)	kfree(info)

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 2.6.17-rc6 6/9] Add kmemleak support for ARM
  2006-06-11 11:18 [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7 Catalin Marinas
                   ` (4 preceding siblings ...)
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 5/9] Add kmemleak support for i386 Catalin Marinas
@ 2006-06-11 11:21 ` Catalin Marinas
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives Catalin Marinas
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-11 11:21 UTC (permalink / raw)
  To: linux-kernel

From: Catalin Marinas <catalin.marinas@arm.com>

This patch modifies the vmlinux.lds.S script and adds the backtrace support
for ARM to be used with kmemleak.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---

 arch/arm/kernel/vmlinux.lds.S |    7 +++++++
 include/asm-arm/processor.h   |   12 ++++++++++++
 2 files changed, 19 insertions(+), 0 deletions(-)

diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 2b254e8..c6f038c 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -68,6 +68,11 @@ #endif
 		__per_cpu_start = .;
 			*(.data.percpu)
 		__per_cpu_end = .;
+#ifdef CONFIG_DEBUG_MEMLEAK
+		__memleak_offsets_start = .;
+			*(.init.memleak_offsets)
+		__memleak_offsets_end = .;
+#endif
 #ifndef CONFIG_XIP_KERNEL
 		__init_begin = _stext;
 		*(.init.data)
@@ -110,6 +115,7 @@ #endif
 
 	.data : AT(__data_loc) {
 		__data_start = .;	/* address in memory */
+		_sdata = .;
 
 		/*
 		 * first, the init task union, aligned
@@ -158,6 +164,7 @@ #endif
 		__bss_start = .;	/* BSS				*/
 		*(.bss)
 		*(COMMON)
+		__bss_stop = .;
 		_end = .;
 	}
 					/* Stabs debugging sections.	*/
diff --git a/include/asm-arm/processor.h b/include/asm-arm/processor.h
index 04f4d34..feaf017 100644
--- a/include/asm-arm/processor.h
+++ b/include/asm-arm/processor.h
@@ -121,6 +121,18 @@ #define spin_lock_prefetch(x) do { } whi
 
 #endif
 
+#ifdef CONFIG_FRAME_POINTER
+static inline unsigned long arch_call_address(void *frame)
+{
+	return *(unsigned long *) (frame - 4) - 4;
+}
+
+static inline void *arch_prev_frame(void *frame)
+{
+	return *(void **) (frame - 12);
+}
+#endif
+
 #endif
 
 #endif /* __ASM_ARM_PROCESSOR_H */

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-11 11:18 [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7 Catalin Marinas
                   ` (5 preceding siblings ...)
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 6/9] Add kmemleak support for ARM Catalin Marinas
@ 2006-06-11 11:21 ` Catalin Marinas
  2006-06-12  5:19   ` Pekka Enberg
  2006-06-11 11:22 ` [PATCH 2.6.17-rc6 8/9] Simple testing for kmemleak Catalin Marinas
  2006-06-11 11:22 ` [PATCH 2.6.17-rc6 9/9] Keep the __init functions after initialization Catalin Marinas
  8 siblings, 1 reply; 52+ messages in thread
From: Catalin Marinas @ 2006-06-11 11:21 UTC (permalink / raw)
  To: linux-kernel

From: Catalin Marinas <catalin.marinas@arm.com>

There are allocations for which the main pointer cannot be found but they
are not memory leaks. This patch fixes some of them.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---

 arch/i386/kernel/setup.c |    1 +
 drivers/base/platform.c  |    1 +
 drivers/hwmon/w83627hf.c |    1 +
 drivers/scsi/hosts.c     |    1 +
 fs/ext3/dir.c            |    1 +
 ipc/util.c               |    2 ++
 kernel/params.c          |    7 +++++--
 mm/slab.c                |    1 +
 net/core/dev.c           |    1 +
 net/sched/sch_generic.c  |    1 +
 10 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index dd6b0e3..0866fcf 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1323,6 +1323,7 @@ legacy_init_iomem_resources(struct resou
 		if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
 			continue;
 		res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
+		memleak_not_leak(res);
 		switch (e820.map[i].type) {
 		case E820_RAM:	res->name = "System RAM"; break;
 		case E820_ACPI:	res->name = "ACPI Tables"; break;
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 83f5c59..f818909 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -166,6 +166,7 @@ struct platform_device *platform_device_
 	struct platform_object *pa;
 
 	pa = kzalloc(sizeof(struct platform_object) + strlen(name), GFP_KERNEL);
+	memleak_padding(pa, 0, sizeof(struct platform_object));
 	if (pa) {
 		strcpy(pa->name, name);
 		pa->pdev.name = pa->name;
diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c
index 71fb7f1..4208d37 100644
--- a/drivers/hwmon/w83627hf.c
+++ b/drivers/hwmon/w83627hf.c
@@ -1065,6 +1065,7 @@ static int w83627hf_detect(struct i2c_ad
 		err = -ENOMEM;
 		goto ERROR1;
 	}
+	memleak_container(struct w83627hf_data, client);
 
 	new_client = &data->client;
 	i2c_set_clientdata(new_client, data);
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index dfcb96f..0a05151 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -297,6 +297,7 @@ struct Scsi_Host *scsi_host_alloc(struct
 	shost = kzalloc(sizeof(struct Scsi_Host) + privsize, gfp_mask);
 	if (!shost)
 		return NULL;
+	memleak_padding(shost, 0, sizeof(struct Scsi_Host));
 
 	spin_lock_init(&shost->default_lock);
 	scsi_assign_lock(shost, &shost->default_lock);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index f37528e..f66fbed 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -346,6 +346,7 @@ int ext3_htree_store_dirent(struct file 
 	new_fn = kmalloc(len, GFP_KERNEL);
 	if (!new_fn)
 		return -ENOMEM;
+	memleak_padding(new_fn, 0, sizeof(struct fname));
 	memset(new_fn, 0, len);
 	new_fn->hash = hash;
 	new_fn->minor_hash = minor_hash;
diff --git a/ipc/util.c b/ipc/util.c
index 8193299..dcf3e2d 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -389,6 +389,7 @@ void* ipc_rcu_alloc(int size)
 	 */
 	if (rcu_use_vmalloc(size)) {
 		out = vmalloc(HDRLEN_VMALLOC + size);
+		memleak_not_leak(out);
 		if (out) {
 			out += HDRLEN_VMALLOC;
 			container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
@@ -396,6 +397,7 @@ void* ipc_rcu_alloc(int size)
 		}
 	} else {
 		out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
+		memleak_not_leak(out);
 		if (out) {
 			out += HDRLEN_KMALLOC;
 			container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
diff --git a/kernel/params.c b/kernel/params.c
index af43ecd..a30beaf 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -548,6 +548,7 @@ static void __init kernel_param_sysfs_se
 					    unsigned int name_skip)
 {
 	struct module_kobject *mk;
+	struct module_param_attrs *mp;
 
 	mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
 	BUG_ON(!mk);
@@ -557,11 +558,13 @@ static void __init kernel_param_sysfs_se
 	kobject_set_name(&mk->kobj, name);
 	kobject_register(&mk->kobj);
 
+	mp = param_sysfs_setup(mk, kparam, num_params, name_skip);
 	/* no need to keep the kobject if no parameter is exported */
-	if (!param_sysfs_setup(mk, kparam, num_params, name_skip)) {
+	if (!mp) {
 		kobject_unregister(&mk->kobj);
 		kfree(mk);
-	}
+	} else
+		memleak_not_leak(mp);
 }
 
 /*
diff --git a/mm/slab.c b/mm/slab.c
index 0d38f74..395e7bb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3357,6 +3357,7 @@ void *__alloc_percpu(size_t size)
 		memset(pdata->ptrs[i], 0, size);
 	}
 
+	memleak_not_leak(pdata);
 	/* Catch derefs w/o wrappers */
 	return (void *)(~(unsigned long)pdata);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 4fba549..2ab745f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3103,6 +3103,7 @@ struct net_device *alloc_netdev(int size
 	dev = (struct net_device *)
 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
 	dev->padded = (char *)dev - (char *)p;
+	memleak_padding(p, dev->padded, sizeof(struct net_device));
 
 	if (sizeof_priv)
 		dev->priv = netdev_priv(dev);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 138ea92..5422889 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -427,6 +427,7 @@ struct Qdisc *qdisc_alloc(struct net_dev
 	memset(p, 0, size);
 	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
 	sch->padded = (char *) sch - (char *) p;
+	memleak_padding(p, sch->padded, sizeof(struct Qdisc));
 
 	INIT_LIST_HEAD(&sch->list);
 	skb_queue_head_init(&sch->q);

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 2.6.17-rc6 8/9] Simple testing for kmemleak
  2006-06-11 11:18 [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7 Catalin Marinas
                   ` (6 preceding siblings ...)
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives Catalin Marinas
@ 2006-06-11 11:22 ` Catalin Marinas
  2006-06-11 11:22 ` [PATCH 2.6.17-rc6 9/9] Keep the __init functions after initialization Catalin Marinas
  8 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-11 11:22 UTC (permalink / raw)
  To: linux-kernel

From: Catalin Marinas <catalin.marinas@arm.com>

This patch only contains some very simple testing at the moment. Proper
testing will be needed.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---

 lib/Kconfig.debug |    9 ++++++
 mm/Makefile       |    1 +
 mm/memleak-test.c |   83 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 0 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8bfc665..36e001b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -154,6 +154,15 @@ config DEBUG_MEMLEAK_ORPHAN_FREEING
 	  information displayed allow an easier identification of
 	  false positives.
 
+config DEBUG_MEMLEAK_TEST
+	tristate "Test the kernel memory leak detector"
+	default n
+	depends on DEBUG_MEMLEAK
+	help
+	  Say Y here to build the test harness for the kernel memory
+	  leak detector. At the moment, this option enables a module
+	  that explicitly leaks memory.
+
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
 	depends on DEBUG_KERNEL && PREEMPT
diff --git a/mm/Makefile b/mm/Makefile
index d487d96..aef1bd8 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_h
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_DEBUG_MEMLEAK) += memleak.o
+obj-$(CONFIG_DEBUG_MEMLEAK_TEST) += memleak-test.o
diff --git a/mm/memleak-test.c b/mm/memleak-test.c
new file mode 100644
index 0000000..4061f99
--- /dev/null
+++ b/mm/memleak-test.c
@@ -0,0 +1,83 @@
+/*
+ * mm/memleak-test.c
+ *
+ * Copyright (C) 2006 ARM Limited
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/list.h>
+
+#include <linux/memleak.h>
+
+struct test_node {
+	long header[25];
+	struct list_head list;
+	long footer[25];
+};
+
+static LIST_HEAD(test_list);
+
+/* Some very simple testing. This function needs to be extended for
+ * proper testing */
+static int __init memleak_test_init(void)
+{
+	struct test_node *elem;
+	int i;
+
+	printk(KERN_INFO "KMemLeak testing\n");
+
+	/* make some orphan pointers */
+	kmalloc(32, GFP_KERNEL);
+	kmalloc(32, GFP_KERNEL);
+#ifndef CONFIG_MODULES
+	kmem_cache_alloc(files_cachep, GFP_KERNEL);
+	kmem_cache_alloc(files_cachep, GFP_KERNEL);
+#endif
+	vmalloc(64);
+	vmalloc(64);
+
+	/* add elements to a list. They should only appear as orphan
+	 * after the module is removed */
+	for (i = 0; i < 10; i++) {
+		elem = kmalloc(sizeof(*elem), GFP_KERNEL);
+		if (!elem)
+			return -ENOMEM;
+		memset(elem, 0, sizeof(*elem));
+		INIT_LIST_HEAD(&elem->list);
+
+		list_add_tail(&elem->list, &test_list);
+	}
+
+	return 0;
+}
+module_init(memleak_test_init);
+
+static void __exit memleak_test_exit(void)
+{
+	struct test_node *elem, *tmp;
+
+	/* remove the list elements without actually freeing the memory */
+	list_for_each_entry_safe(elem, tmp, &test_list, list)
+		list_del(&elem->list);
+}
+module_exit(memleak_test_exit);
+
+MODULE_LICENSE("GPL");

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [PATCH 2.6.17-rc6 9/9] Keep the __init functions after initialization
  2006-06-11 11:18 [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7 Catalin Marinas
                   ` (7 preceding siblings ...)
  2006-06-11 11:22 ` [PATCH 2.6.17-rc6 8/9] Simple testing for kmemleak Catalin Marinas
@ 2006-06-11 11:22 ` Catalin Marinas
  8 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-11 11:22 UTC (permalink / raw)
  To: linux-kernel

From: Catalin Marinas <catalin.marinas@arm.com>

This patch adds the CONFIG_DEBUG_KEEP_INIT option which preserves the
.init.text section after initialization. Memory leaks happening during this
phase can be more easily tracked.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---

 include/linux/init.h |    4 ++++
 lib/Kconfig.debug    |   10 ++++++++++
 2 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/include/linux/init.h b/include/linux/init.h
index 93dcbe1..198edb5 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -41,7 +41,11 @@ #include <linux/compiler.h>
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
+#ifdef CONFIG_DEBUG_KEEP_INIT
+#define __init
+#else
 #define __init		__attribute__ ((__section__ (".init.text")))
+#endif
 #define __initdata	__attribute__ ((__section__ (".init.data")))
 #define __exitdata	__attribute__ ((__section__(".exit.data")))
 #define __exit_call	__attribute_used__ __attribute__ ((__section__ (".exitcall.exit")))
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 36e001b..54b3d0d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -154,6 +154,16 @@ config DEBUG_MEMLEAK_ORPHAN_FREEING
 	  information displayed allow an easier identification of
 	  false positives.
 
+config DEBUG_KEEP_INIT
+	bool "Do not free the __init functions"
+	default n
+	depends on DEBUG_MEMLEAK
+	help
+	  This option moves the __init functions out of the .init.text
+	  section and therefore they are no longer freed after the
+	  kernel initialization. It is useful for identifying memory
+	  leaks happening during the kernel or modules initialization.
+
 config DEBUG_MEMLEAK_TEST
 	tristate "Test the kernel memory leak detector"
 	default n

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives Catalin Marinas
@ 2006-06-12  5:19   ` Pekka Enberg
  2006-06-12  8:11     ` Catalin Marinas
  2006-06-24 10:20     ` Catalin Marinas
  0 siblings, 2 replies; 52+ messages in thread
From: Pekka Enberg @ 2006-06-12  5:19 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: linux-kernel

Hi Catalin,

On 6/11/06, Catalin Marinas <catalin.marinas@gmail.com> wrote:
> There are allocations for which the main pointer cannot be found but they
> are not memory leaks. This patch fixes some of them.

Can we fix this by looking for pointers to anywhere in the allocated
memory block instead of just looking for the start?

                                                     Pekka

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12  5:19   ` Pekka Enberg
@ 2006-06-12  8:11     ` Catalin Marinas
  2006-06-12  8:17       ` Pekka J Enberg
  2006-06-12  9:17       ` Peter Zijlstra
  2006-06-24 10:20     ` Catalin Marinas
  1 sibling, 2 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-12  8:11 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: linux-kernel

On 12/06/06, Pekka Enberg <penberg@cs.helsinki.fi> wrote:
> Hi Catalin,
>
> On 6/11/06, Catalin Marinas <catalin.marinas@gmail.com> wrote:
> > There are allocations for which the main pointer cannot be found but they
> > are not memory leaks. This patch fixes some of them.
>
> Can we fix this by looking for pointers to anywhere in the allocated
> memory block instead of just looking for the start?

I thought about this as well (I think that's how Valgrind works) but
it would increase the chances of missing real leaks. It currently
looks for the start of the block and a few locations inside the block
(those from which the main pointer is computed using the
container_of() macro).

I need to do some tests to see how it works but I won't be able to use
the radix_tree (as storing each location in the block would lead to a
huge tree).

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12  8:11     ` Catalin Marinas
@ 2006-06-12  8:17       ` Pekka J Enberg
  2006-06-12  8:43         ` Catalin Marinas
  2006-06-12 10:53         ` Ingo Molnar
  2006-06-12  9:17       ` Peter Zijlstra
  1 sibling, 2 replies; 52+ messages in thread
From: Pekka J Enberg @ 2006-06-12  8:17 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: linux-kernel

Hi,

On 12/06/06, Pekka Enberg <penberg@cs.helsinki.fi> wrote:
> > Can we fix this by looking for pointers to anywhere in the allocated
> > memory block instead of just looking for the start?

On Mon, 12 Jun 2006, Catalin Marinas wrote:
> I thought about this as well (I think that's how Valgrind works) but
> it would increase the chances of missing real leaks.

Yeah but that's far better than adding bunch of 'not a leak' annotations 
around the kernel which is very impractical to maintain.  I would like to 
see your leak detector in the kernel so we can finally get rid of all 
those per-subsystem magic allocators.  This patch, however, is 
unacceptable for inclusion IMHO.

				Pekka

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12  8:17       ` Pekka J Enberg
@ 2006-06-12  8:43         ` Catalin Marinas
  2006-06-12 10:53         ` Ingo Molnar
  1 sibling, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-12  8:43 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: linux-kernel

On 12/06/06, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> On 12/06/06, Pekka Enberg <penberg@cs.helsinki.fi> wrote:
> > > Can we fix this by looking for pointers to anywhere in the allocated
> > > memory block instead of just looking for the start?
>
> On Mon, 12 Jun 2006, Catalin Marinas wrote:
> > I thought about this as well (I think that's how Valgrind works) but
> > it would increase the chances of missing real leaks.
>
> Yeah but that's far better than adding bunch of 'not a leak' annotations
> around the kernel which is very impractical to maintain.  I would like to
> see your leak detector in the kernel so we can finally get rid of all
> those per-subsystem magic allocators.  This patch, however, is
> unacceptable for inclusion IMHO.

My initial hope was that simply tweaking the container_of macro would
be enough to get the inside-block pointers (or, at least, modify some
key places like net/core/dev.c) but it looks like it wasn't and I
introduced the memleak_not_leak() function which had to be added to
some individual drivers as well.

As I said, I'll do some tests first because looking at all the
locations inside a block might make the tool less useful.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12  8:11     ` Catalin Marinas
  2006-06-12  8:17       ` Pekka J Enberg
@ 2006-06-12  9:17       ` Peter Zijlstra
  2006-06-12  9:35         ` Catalin Marinas
  1 sibling, 1 reply; 52+ messages in thread
From: Peter Zijlstra @ 2006-06-12  9:17 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: Pekka Enberg, linux-kernel

On Mon, 2006-06-12 at 09:11 +0100, Catalin Marinas wrote:
> On 12/06/06, Pekka Enberg <penberg@cs.helsinki.fi> wrote:
> > Hi Catalin,
> >
> > On 6/11/06, Catalin Marinas <catalin.marinas@gmail.com> wrote:
> > > There are allocations for which the main pointer cannot be found but they
> > > are not memory leaks. This patch fixes some of them.
> >
> > Can we fix this by looking for pointers to anywhere in the allocated
> > memory block instead of just looking for the start?
> 
> I thought about this as well (I think that's how Valgrind works) but
> it would increase the chances of missing real leaks. It currently
> looks for the start of the block and a few locations inside the block
> (those from which the main pointer is computed using the
> container_of() macro).
> 
> I need to do some tests to see how it works but I won't be able to use
> the radix_tree (as storing each location in the block would lead to a
> huge tree).

A radix-priority-search-tree would allow to store intervals and query
addresses.

Peter


^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12  9:17       ` Peter Zijlstra
@ 2006-06-12  9:35         ` Catalin Marinas
  0 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-12  9:35 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Pekka Enberg, linux-kernel

On 12/06/06, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > I need to do some tests to see how it works but I won't be able to use
> > the radix_tree (as storing each location in the block would lead to a
> > huge tree).
>
> A radix-priority-search-tree would allow to store intervals and query
> addresses.

Great, this would simplify things. I'll post a new version using this
in the next days.

Thanks.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12  8:17       ` Pekka J Enberg
  2006-06-12  8:43         ` Catalin Marinas
@ 2006-06-12 10:53         ` Ingo Molnar
  2006-06-12 11:08           ` Pekka J Enberg
  2006-06-12 12:56           ` Catalin Marinas
  1 sibling, 2 replies; 52+ messages in thread
From: Ingo Molnar @ 2006-06-12 10:53 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: Catalin Marinas, linux-kernel


* Pekka J Enberg <penberg@cs.Helsinki.FI> wrote:

> On Mon, 12 Jun 2006, Catalin Marinas wrote:
> > I thought about this as well (I think that's how Valgrind works) but
> > it would increase the chances of missing real leaks.
> 
> Yeah but that's far better than adding bunch of 'not a leak' 
> annotations around the kernel which is very impractical to maintain.  
> I would like to see your leak detector in the kernel so we can finally 
> get rid of all those per-subsystem magic allocators.  This patch, 
> however, is unacceptable for inclusion IMHO.

While it's always good to reduce the amount of false positives, i dont 
think it's unacceptable for inclusion right now. A few dozen annotations 
out of 7000+ allocation call sites isnt a big maintainance problem - and 
the benefits of automatic leak-checking are really huge. The impact only 
appears to be large because the patch is trying to cover an initial 7+ 
million lines of codebase. The followup per-kernel-release overhead will 
be minimal, and will be offset by the quality increase of the kernel and 
by the productivity increase that comes due to developers not having to 
do long searches for kernel memory leaks. The debugging cost of a single 
leak found can far outweigh the cost of 10 annotations (or more).

What i'd like to see though are clear explanations about why an 
allocation is not considered a leak, in terms of comments added to the 
code. That will also help us reduce the number of annotations later on.

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 10:53         ` Ingo Molnar
@ 2006-06-12 11:08           ` Pekka J Enberg
  2006-06-12 11:36             ` Ingo Molnar
  2006-06-12 12:56           ` Catalin Marinas
  1 sibling, 1 reply; 52+ messages in thread
From: Pekka J Enberg @ 2006-06-12 11:08 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Catalin Marinas, linux-kernel

Hi Ingo,

On Mon, 12 Jun 2006, Ingo Molnar wrote:
> While it's always good to reduce the amount of false positives, i dont 
> think it's unacceptable for inclusion right now. A few dozen annotations 
> out of 7000+ allocation call sites isnt a big maintainance problem - and 
> the benefits of automatic leak-checking are really huge.

Did you look at the call sites?  It seems clear that kmemleak doesn't 
support existing kernel coding style yet (see below) which means we're not 
covering all false positives.

On Mon, 12 Jun 2006, Ingo Molnar wrote:
> What i'd like to see though are clear explanations about why an 
> allocation is not considered a leak, in terms of comments added to the 
> code. That will also help us reduce the number of annotations later on.

I found at least two unacceptable false positive classes:

  - arch/i386/kernel/setup.c:
    False positive because res pointer is stored in a global instance of
    struct resource.

  - drivers/base/platform.c and fs/ext3/dir.c:
    False positive because we allocate memory for struct + some extra
    stuff.

At least the latter can be fixed as outlined by Catalin in another mail.

					Pekka

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 11:08           ` Pekka J Enberg
@ 2006-06-12 11:36             ` Ingo Molnar
  2006-06-12 11:56               ` Pekka J Enberg
  0 siblings, 1 reply; 52+ messages in thread
From: Ingo Molnar @ 2006-06-12 11:36 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: Catalin Marinas, linux-kernel


* Pekka J Enberg <penberg@cs.Helsinki.FI> wrote:

> On Mon, 12 Jun 2006, Ingo Molnar wrote:
> > While it's always good to reduce the amount of false positives, i dont 
> > think it's unacceptable for inclusion right now. A few dozen annotations 
> > out of 7000+ allocation call sites isnt a big maintainance problem - and 
> > the benefits of automatic leak-checking are really huge.
> 
> Did you look at the call sites?  It seems clear that kmemleak doesn't 
> support existing kernel coding style yet (see below) which means we're 
> not covering all false positives.

but "supporting existing kernel coding style as-is" is not a must-have 
criterium for inclusion. While preserving semantics is strongly 
encouraged of course, a patch can change semantics (or can introduce 
restrictions) as long as it's common-sense or there is no other way out. 
The question is benefit vs. disadvantage, not a rigid "does it change 
semantics" rule.

Just look at Sparse annotations: they add maintainance overhead, but the 
overhead is well worth the trouble: they avoid bugs and they also serve 
as documentation/specification. And Sparse annotations are alot more 
numerous than kmemleak annotations will ever be!

> On Mon, 12 Jun 2006, Ingo Molnar wrote:
> > What i'd like to see though are clear explanations about why an 
> > allocation is not considered a leak, in terms of comments added to the 
> > code. That will also help us reduce the number of annotations later on.
> 
> I found at least two unacceptable false positive classes:
> 
>   - arch/i386/kernel/setup.c:
>     False positive because res pointer is stored in a global instance of
>     struct resource.

there's no good way around this one but to annotate it in one way or 
another.

We could express the non-leak nature of this allocation in a cleaner way 
by introducing the following API:

	memleak_boot_time_alloc(res);

Here kmemleak should build a global list of such allocations, with the 
following rules: these allocations must not be freed after that point, 
but these allocations will be searched for further pointers.

Via this method we will both document the special nature of these 
allocations and we'll enforce that it's not freed. (not that there is a 
high likelyhood of freeing a buffer whose pointer nobody knows - the 
purpose is rather to make sure that the annotation is correct)

>   - drivers/base/platform.c and fs/ext3/dir.c:
>     False positive because we allocate memory for struct + some extra
>     stuff.
> 
> At least the latter can be fixed as outlined by Catalin in another 
> mail.

yes.

my "document the exceptions" request was to make sure that such special 
rules are not forgotten. As long as they are documented clearly, if 
there's some common pattern between a number of them then they can be 
moved into the kmemleak infrastructure, to further reduce the annotation 
overhead.

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 11:36             ` Ingo Molnar
@ 2006-06-12 11:56               ` Pekka J Enberg
  2006-06-12 12:53                 ` Catalin Marinas
  2006-06-12 13:12                 ` Ingo Molnar
  0 siblings, 2 replies; 52+ messages in thread
From: Pekka J Enberg @ 2006-06-12 11:56 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Catalin Marinas, linux-kernel

On Mon, 12 Jun 2006, Ingo Molnar wrote:
> but "supporting existing kernel coding style as-is" is not a must-have 
> criterium for inclusion. While preserving semantics is strongly 
> encouraged of course, a patch can change semantics (or can introduce 
> restrictions) as long as it's common-sense or there is no other way out. 
> The question is benefit vs. disadvantage, not a rigid "does it change 
> semantics" rule.

Agreed.  I wasn't talking about general principles though but about the 
current kmemleak annotations, which I still find lacking as-is.

On Mon, 12 Jun 2006, Ingo Molnar wrote:
> > I found at least two unacceptable false positive classes:
> > 
> >   - arch/i386/kernel/setup.c:
> >     False positive because res pointer is stored in a global instance of
> >     struct resource.
> 
> there's no good way around this one but to annotate it in one way or 
> another.

Scanning bss and data sections is too expensive, I guess.  I would prefer 
we create a separate section for gc roots but what you're suggesting is 
ok as well.
 
On Mon, 12 Jun 2006, Ingo Molnar wrote:
> >   - drivers/base/platform.c and fs/ext3/dir.c:
> >     False positive because we allocate memory for struct + some extra
> >     stuff.
> > 
> > At least the latter can be fixed as outlined by Catalin in another 
> > mail.
> 
> yes.

Indeed and should be fixed before inclusion.

				Pekka

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 11:56               ` Pekka J Enberg
@ 2006-06-12 12:53                 ` Catalin Marinas
  2006-06-12 13:12                 ` Ingo Molnar
  1 sibling, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-12 12:53 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: Ingo Molnar, linux-kernel

On 12/06/06, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> On Mon, 12 Jun 2006, Ingo Molnar wrote:
> > > I found at least two unacceptable false positive classes:
> > >
> > >   - arch/i386/kernel/setup.c:
> > >     False positive because res pointer is stored in a global instance of
> > >     struct resource.
> >
> > there's no good way around this one but to annotate it in one way or
> > another.
>
> Scanning bss and data sections is too expensive, I guess.  I would prefer
> we create a separate section for gc roots but what you're suggesting is
> ok as well.

Data and BSS sections are scanned, otherwise it would report a lot of
false positives.

Looking again at this false positive, I might have been wrong in the
first place. Since the iomem_resource is scanned (being in the data
section), there should be a way to get to the "res" pointer, unless
the request_resource() doesn't return NULL (or some other sibling is
freed without calling release_resource).

> On Mon, 12 Jun 2006, Ingo Molnar wrote:
> > >   - drivers/base/platform.c and fs/ext3/dir.c:
> > >     False positive because we allocate memory for struct + some extra
> > >     stuff.
> > >
> > > At least the latter can be fixed as outlined by Catalin in another
> > > mail.
> >
> > yes.
>
> Indeed and should be fixed before inclusion.

... with a slight risk of increasing the false negatives (but reducing
the memleak_padding calls).

The reason for the current implementation - because the type
information is lost in alloc calls, there is no way to know what type
of structure it is and what container_of() macros are used on its
members. The sizeof() value provides an approximation to this but this
assumption is broken once the allocated memory is different from the
size of the allocated structure. The above idea would eliminate these
issues

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 10:53         ` Ingo Molnar
  2006-06-12 11:08           ` Pekka J Enberg
@ 2006-06-12 12:56           ` Catalin Marinas
  2006-06-12 19:22             ` Ingo Molnar
  1 sibling, 1 reply; 52+ messages in thread
From: Catalin Marinas @ 2006-06-12 12:56 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Pekka J Enberg, linux-kernel

On 12/06/06, Ingo Molnar <mingo@elte.hu> wrote:
> What i'd like to see though are clear explanations about why an
> allocation is not considered a leak, in terms of comments added to the
> code. That will also help us reduce the number of annotations later on.

I'll document them in both Documentation/kmemleak.txt and inside the
code. If I implement the "any pointer inside the block" method, all
the memleak_padding() false positives will disappear.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 11:56               ` Pekka J Enberg
  2006-06-12 12:53                 ` Catalin Marinas
@ 2006-06-12 13:12                 ` Ingo Molnar
  2006-06-12 14:38                   ` Catalin Marinas
  2006-06-12 22:29                   ` Catalin Marinas
  1 sibling, 2 replies; 52+ messages in thread
From: Ingo Molnar @ 2006-06-12 13:12 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: Catalin Marinas, linux-kernel


* Pekka J Enberg <penberg@cs.Helsinki.FI> wrote:

> > >   - arch/i386/kernel/setup.c:
> > >     False positive because res pointer is stored in a global instance of
> > >     struct resource.
> > 
> > there's no good way around this one but to annotate it in one way or 
> > another.
> 
> Scanning bss and data sections is too expensive, I guess.  I would 
> prefer we create a separate section for gc roots but what you're 
> suggesting is ok as well.

kmemleak does scan global data sections. I dont know why we dont 
discover this particular pointer though: the resource pointer ought to 
be accessible via the iomem_resource.parent/sibling/child sorted list. 
Hm.

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 13:12                 ` Ingo Molnar
@ 2006-06-12 14:38                   ` Catalin Marinas
  2006-06-12 22:29                   ` Catalin Marinas
  1 sibling, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-12 14:38 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Pekka J Enberg, linux-kernel

On 12/06/06, Ingo Molnar <mingo@elte.hu> wrote:
> * Pekka J Enberg <penberg@cs.Helsinki.FI> wrote:
>
> > > >   - arch/i386/kernel/setup.c:
> > > >     False positive because res pointer is stored in a global instance of
> > > >     struct resource.
> > >
> > > there's no good way around this one but to annotate it in one way or
> > > another.
> >
> > Scanning bss and data sections is too expensive, I guess.  I would
> > prefer we create a separate section for gc roots but what you're
> > suggesting is ok as well.
>
> kmemleak does scan global data sections. I dont know why we dont
> discover this particular pointer though: the resource pointer ought to
> be accessible via the iomem_resource.parent/sibling/child sorted list.
> Hm.

Maybe it only appeared in the initial versions of kmemleak due to bugs
in the tool and I never removed it. I'll check it again this evening
on my home machine (that's where I got it). However, if
request_resource() returns -EBUSY, that's a real leak.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 12:56           ` Catalin Marinas
@ 2006-06-12 19:22             ` Ingo Molnar
  2006-06-12 22:24               ` Catalin Marinas
  2006-06-13  5:53               ` Pekka J Enberg
  0 siblings, 2 replies; 52+ messages in thread
From: Ingo Molnar @ 2006-06-12 19:22 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: Pekka J Enberg, linux-kernel


* Catalin Marinas <catalin.marinas@gmail.com> wrote:

> On 12/06/06, Ingo Molnar <mingo@elte.hu> wrote:
> >What i'd like to see though are clear explanations about why an
> >allocation is not considered a leak, in terms of comments added to the
> >code. That will also help us reduce the number of annotations later on.
> 
> I'll document them in both Documentation/kmemleak.txt and inside the 
> code. If I implement the "any pointer inside the block" method, all 
> the memleak_padding() false positives will disappear.

i dont know - i feel uneasy about the 'any pointer' method - it has a 
high potential for false negatives, especially for structures that 
contain strings (or other random data), etc.

did you consider the tracking of the types of allocated blocks 
explicitly? I'd expect that most blocks dont have pointers embedded in 
them that point to allocated blocks. For the ones that do, the 
allocation could be extended with the type information. For each 
affected type, we could annotate the structures themselves with offset 
information. How intrusive would such a method be?

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 19:22             ` Ingo Molnar
@ 2006-06-12 22:24               ` Catalin Marinas
  2006-06-13  5:53               ` Pekka J Enberg
  1 sibling, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-12 22:24 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Pekka J Enberg, linux-kernel

On 12/06/06, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Catalin Marinas <catalin.marinas@gmail.com> wrote:
>
> > On 12/06/06, Ingo Molnar <mingo@elte.hu> wrote:
> > >What i'd like to see though are clear explanations about why an
> > >allocation is not considered a leak, in terms of comments added to the
> > >code. That will also help us reduce the number of annotations later on.
> >
> > I'll document them in both Documentation/kmemleak.txt and inside the
> > code. If I implement the "any pointer inside the block" method, all
> > the memleak_padding() false positives will disappear.
>
> i dont know - i feel uneasy about the 'any pointer' method - it has a
> high potential for false negatives, especially for structures that
> contain strings (or other random data), etc.

That's my concern as well. The advantage is that it simplifies
kmemleak but I can't tell how good the detection would be. I can add
some code to the current implementation to show how many values (32
bit aligned) found during scanning look like valid pointers (i.e.
PAGE_OFFSET < x < PAGE_OFFSET + ram_size) but cannot be found in the
radix_tree. It might not be that bad (I'll post tomorrow some
statistics).

> did you consider the tracking of the types of allocated blocks
> explicitly? I'd expect that most blocks dont have pointers embedded in
> them that point to allocated blocks. For the ones that do, the
> allocation could be extended with the type information. For each
> affected type, we could annotate the structures themselves with offset
> information. How intrusive would such a method be?

Do you mean that when scanning it should only consider at the pointer
members in a structure? I don't think this can be easily achieved
because of the amount of structures in the kernel. There are places
where a pointer is stored as a long. There is also no way in C to
quantify the type of an object (similar to "typeid" in C++). The
closest approximation I could get was the size.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 13:12                 ` Ingo Molnar
  2006-06-12 14:38                   ` Catalin Marinas
@ 2006-06-12 22:29                   ` Catalin Marinas
  1 sibling, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-12 22:29 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Pekka J Enberg, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1018 bytes --]

On 12/06/06, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Pekka J Enberg <penberg@cs.Helsinki.FI> wrote:
>
> > > >   - arch/i386/kernel/setup.c:
> > > >     False positive because res pointer is stored in a global instance of
> > > >     struct resource.
> > >
> > > there's no good way around this one but to annotate it in one way or
> > > another.
> >
> > Scanning bss and data sections is too expensive, I guess.  I would
> > prefer we create a separate section for gc roots but what you're
> > suggesting is ok as well.
>
> kmemleak does scan global data sections. I dont know why we dont
> discover this particular pointer though: the resource pointer ought to
> be accessible via the iomem_resource.parent/sibling/child sorted list.
> Hm.

I tested it on my x86 machine and kmemleak is right, this is a leak
because request_resource() returns -EBUSY. Something like the attached
patch fixes it (I'm not sure that's the correct fix, maybe we should
check why the resource overlaps with something else).

-- 
Catalin

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: setup-leak.patch --]
[-- Type: text/x-patch; name="setup-leak.patch", Size: 588 bytes --]

diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index dd6b0e3..60f5d99 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1332,7 +1332,10 @@ legacy_init_iomem_resources(struct resou
 		res->start = e820.map[i].addr;
 		res->end = res->start + e820.map[i].size - 1;
 		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		request_resource(&iomem_resource, res);
+		if (request_resource(&iomem_resource, res)) {
+			kfree(res);
+			continue;
+		}
 		if (e820.map[i].type == E820_RAM) {
 			/*
 			 *  We don't know which RAM region contains kernel data,

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12 19:22             ` Ingo Molnar
  2006-06-12 22:24               ` Catalin Marinas
@ 2006-06-13  5:53               ` Pekka J Enberg
  2006-06-13  6:59                 ` Catalin Marinas
  2006-06-13  7:26                 ` Ingo Molnar
  1 sibling, 2 replies; 52+ messages in thread
From: Pekka J Enberg @ 2006-06-13  5:53 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Catalin Marinas, linux-kernel

Hi Ingo,

On Mon, 12 Jun 2006, Ingo Molnar wrote:
> i dont know - i feel uneasy about the 'any pointer' method - it has a 
> high potential for false negatives, especially for structures that 
> contain strings (or other random data), etc.

Is that a problem in practice?  Structures that contain data are usually 
allocated from the slab.  There needs to be a link to that struct from the 
gc roots to get a false negative.  Or am I missing something here?

				Pekka

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-13  5:53               ` Pekka J Enberg
@ 2006-06-13  6:59                 ` Catalin Marinas
  2006-06-13  7:57                   ` Pekka J Enberg
  2006-06-13  7:26                 ` Ingo Molnar
  1 sibling, 1 reply; 52+ messages in thread
From: Catalin Marinas @ 2006-06-13  6:59 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: Ingo Molnar, linux-kernel

On 13/06/06, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> Hi Ingo,
>
> On Mon, 12 Jun 2006, Ingo Molnar wrote:
> > i dont know - i feel uneasy about the 'any pointer' method - it has a
> > high potential for false negatives, especially for structures that
> > contain strings (or other random data), etc.
>
> Is that a problem in practice?  Structures that contain data are usually
> allocated from the slab.  There needs to be a link to that struct from the
> gc roots to get a false negative.  Or am I missing something here?

The gc roots are the data and bss sections (and maybe task kernel
stacks) and all the slab-allocated blocks are scanned if a link to
them is found from the roots (and all of them are usually scanned). If
no link is found, they would be reported as memory leaks (and not
scanned). You can't really avoid the scanning of allocated blocks
since they may contain pointers to other blocks.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-13  5:53               ` Pekka J Enberg
  2006-06-13  6:59                 ` Catalin Marinas
@ 2006-06-13  7:26                 ` Ingo Molnar
  2006-06-13  8:11                   ` Pekka J Enberg
  2006-06-13 10:49                   ` Catalin Marinas
  1 sibling, 2 replies; 52+ messages in thread
From: Ingo Molnar @ 2006-06-13  7:26 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: Catalin Marinas, linux-kernel


* Pekka J Enberg <penberg@cs.Helsinki.FI> wrote:

> Hi Ingo,
> 
> On Mon, 12 Jun 2006, Ingo Molnar wrote:
> > i dont know - i feel uneasy about the 'any pointer' method - it has a 
> > high potential for false negatives, especially for structures that 
> > contain strings (or other random data), etc.
> 
> Is that a problem in practice?  Structures that contain data are 
> usually allocated from the slab.  There needs to be a link to that 
> struct from the gc roots to get a false negative.  Or am I missing 
> something here?

you should think of this in terms of a 'graph of data', where each node 
is a block of memory. The edges between nodes are represented by 
pointers. The graph roots from .data/bss, but it may go indefinitely 
into dynamically allocated blocks as well - just think of a hash-list 
where the hash list table is in .data, but all the chain entries are in 
allocated blocks and the chaining can be arbitrarily deep.

Furtermore, each block of data has a couple of fields within it that 
contain 'outgoing pointers', and each block of data has a couple of 
addresses associated with it that are valid targets for 'incoming 
pointers'.

The task of kmemleak is to find orphan blocks of memory - the ones that 
are not connected to the graph via any edge. For that it starts scanning 
in .data/bss and recursively searches through the blocks of memory 
(marking all scanned blocks, to avoid circular walking of the graph) 
until it has walked the whole graph. Blocks that were registered but 
were not touched during this recursive walking are the leaks.

Currently kmemleak does not track the per-block position of 'outgoing 
pointers': it assumes that all fields within a block may be an outgoing 
pointer. This is a source of false negatives. (fields that do not 
contain a real pointer might accidentally contain a value that is 
interpreted as a false edge - falsely connecting a leaked block to the 
graph.)

Kmemleak does recognize 'incoming pointers' via the offsetof tracking 
method, but it's limited in that it is not a type-accurate method 
either: it tracks per-size offsets, so two types accidentally having the 
same size merges their 'possible incoming pointer offset' lists, which 
introduces false negatives. (a pointer may be considered an incoming 
edge while in reality the pointer is not validly pointing into this 
structure)

The full matching that was suggested before would further weaken the 
'incoming pointers' logic and would introduce yet another source of 
false negatives: we'd match every block pointer against every possible 
target address that points to within another block.

My suggestion would be to attempt to achieve perfect matches: annotate 
structures to figure out the offset of pointers, and thus to figure out 
the precise source addresses and a precise list of valid target 
addresses. This is a quite elaborate task to pull off though, and i'm 
not sure it's possible without intolerable maintainance overhead, but we 
should consider it nevertheless. It will also be _much_ faster, because 
per block we'd only have to scan a handful of outgoing pointers.

Perhaps a hybrid method could be used: by default we assume the most 
lenient structure: if the block type is 'unknown' (which is the default 
for not-yet-annotated structures) then we'd assume that all fields are 
pointers, and that they could all be targets too.

Once a structure is annotated, the scope of scanning is drastically 
reduced: only the annotated fields are scanned for pointers (and at that 
point we'd also _enforce_ that those pointers do indeed point to valid 
blocks of memory - i.e. this would also serve as a pointer-correctness 
checker), and annotated blocks will also restrict the scope of 'incoming 
pointers'.

Naturally, there would be two types of annotations: one that finetunes 
the scanning of outgoing pointers to happen only for fields that are 
true pointers, and one that finetunes incoming pointer matching to only 
those addresses within the block that program logic allows. All in a 
strictly per-type manner.

This also means that by default we'd have no false positives at all, but 
that there is a capable annotation method to reduce the amount of false 
negatives, in a gradual and managable way - down to zero if everything 
is annotated.

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-13  6:59                 ` Catalin Marinas
@ 2006-06-13  7:57                   ` Pekka J Enberg
  2006-06-13  9:45                     ` Catalin Marinas
  0 siblings, 1 reply; 52+ messages in thread
From: Pekka J Enberg @ 2006-06-13  7:57 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: Ingo Molnar, linux-kernel

Hi Catalin,

On Tue, 13 Jun 2006, Catalin Marinas wrote:
> The gc roots are the data and bss sections (and maybe task kernel
> stacks) and all the slab-allocated blocks are scanned if a link to
> them is found from the roots (and all of them are usually scanned). If
> no link is found, they would be reported as memory leaks (and not
> scanned). You can't really avoid the scanning of allocated blocks
> since they may contain pointers to other blocks.

I am not sure you're agreeing or disagreeing :-).  As far as I understood, 
Ingo is worried about:

	struct s { /* some fields */; char *buf; };

	struct s *p = kmalloc(sizeof(struct s) + BUF_SIZE);
	p->buf = p + sizeof(struct s);

Which could lead to false negative due to p->buf pointing to p.  However, 
for us to even _find_ p->buf, we would need an incoming pointer _to_ p 
which makes me think this is not a problem in practice.  Hmm?

					Pekka

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-13  7:26                 ` Ingo Molnar
@ 2006-06-13  8:11                   ` Pekka J Enberg
  2006-06-13 10:49                   ` Catalin Marinas
  1 sibling, 0 replies; 52+ messages in thread
From: Pekka J Enberg @ 2006-06-13  8:11 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Catalin Marinas, linux-kernel

On Tue, 13 Jun 2006, Ingo Molnar wrote:
> This also means that by default we'd have no false positives at all, but 
> that there is a capable annotation method to reduce the amount of false 
> negatives, in a gradual and managable way - down to zero if everything 
> is annotated.

Yeah, sounds much better to me.  However, I am unable to figure out any 
real examples where we'd actually need the annotations for reasonably 
sane code.  Do you have some in mind?

					Pekka

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-13  7:57                   ` Pekka J Enberg
@ 2006-06-13  9:45                     ` Catalin Marinas
  2006-06-13 10:04                       ` Pekka Enberg
  0 siblings, 1 reply; 52+ messages in thread
From: Catalin Marinas @ 2006-06-13  9:45 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: Ingo Molnar, linux-kernel

On 13/06/06, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> Hi Catalin,
>
> On Tue, 13 Jun 2006, Catalin Marinas wrote:
> > The gc roots are the data and bss sections (and maybe task kernel
> > stacks) and all the slab-allocated blocks are scanned if a link to
> > them is found from the roots (and all of them are usually scanned). If
> > no link is found, they would be reported as memory leaks (and not
> > scanned). You can't really avoid the scanning of allocated blocks
> > since they may contain pointers to other blocks.
>
> I am not sure you're agreeing or disagreeing :-).

I'm not sure either :-). Doing some quick statistics on an ARM
platform shows that there are plenty of values (almost 15%) in the
scanned memory that look like pointers (i.e. between 3GB and
3GB+128MB) but do not point to any allocated block:

total scanned values = 932102
total detected pointers = 6270
detected by alias (i.e. in-block address) = 2096
looking like pointer = 135675

I'll add a test to see how many of the look-like pointers actually
point to a valid in-block address. If this is considerable larger than
the detected by alias above, it shouldn't be implemented.

> As far as I understood, Ingo is worried about:
>
>         struct s { /* some fields */; char *buf; };
>
>         struct s *p = kmalloc(sizeof(struct s) + BUF_SIZE);
>         p->buf = p + sizeof(struct s);
>
> Which could lead to false negative due to p->buf pointing to p.  However,
> for us to even _find_ p->buf, we would need an incoming pointer _to_ p
> which makes me think this is not a problem in practice.  Hmm?

Not exactly. In the above case, Ingo (and me) is worried about having
a incoming pointer (from other block) equal to p->buf and therefore
inside the block allocated with kmalloc. It doesn't matter whether any
value in a block point to locations inside the block since the actual
block first needs to be accessible on the directed graph starting from
the root.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-13  9:45                     ` Catalin Marinas
@ 2006-06-13 10:04                       ` Pekka Enberg
  2006-06-13 10:37                         ` Catalin Marinas
  0 siblings, 1 reply; 52+ messages in thread
From: Pekka Enberg @ 2006-06-13 10:04 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: Ingo Molnar, linux-kernel

On 13/06/06, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> > As far as I understood, Ingo is worried about:
> >
> >         struct s { /* some fields */; char *buf; };
> >
> >         struct s *p = kmalloc(sizeof(struct s) + BUF_SIZE);
> >         p->buf = p + sizeof(struct s);
> >
> > Which could lead to false negative due to p->buf pointing to p.  However,
> > for us to even _find_ p->buf, we would need an incoming pointer _to_ p
> > which makes me think this is not a problem in practice.  Hmm?

On 6/13/06, Catalin Marinas <catalin.marinas@gmail.com> wrote:
> Not exactly. In the above case, Ingo (and me) is worried about having
> a incoming pointer (from other block) equal to p->buf and therefore
> inside the block allocated with kmalloc.

Ah, right, I overlooked that case. But, is it really a leak? That is,
even though we currently don't have a pointer to the beginning fo the
block, we don't know for sure it was a leak. You're now allowed to do:

    p = kmalloc(...);
    p = p + HDR_SIZE;

    /* ... */

    kfree(p - HDR_SIZE);

Do you think we should ban the above?

                                     Pekka

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-13 10:04                       ` Pekka Enberg
@ 2006-06-13 10:37                         ` Catalin Marinas
  0 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-13 10:37 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: Ingo Molnar, linux-kernel

On 13/06/06, Pekka Enberg <penberg@cs.helsinki.fi> wrote:
> On 13/06/06, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> > > As far as I understood, Ingo is worried about:
> > >
> > >         struct s { /* some fields */; char *buf; };
> > >
> > >         struct s *p = kmalloc(sizeof(struct s) + BUF_SIZE);
> > >         p->buf = p + sizeof(struct s);
> > >
> > > Which could lead to false negative due to p->buf pointing to p.  However,
> > > for us to even _find_ p->buf, we would need an incoming pointer _to_ p
> > > which makes me think this is not a problem in practice.  Hmm?
>
> On 6/13/06, Catalin Marinas <catalin.marinas@gmail.com> wrote:
> > Not exactly. In the above case, Ingo (and me) is worried about having
> > a incoming pointer (from other block) equal to p->buf and therefore
> > inside the block allocated with kmalloc.
>
> Ah, right, I overlooked that case. But, is it really a leak? That is,
> even though we currently don't have a pointer to the beginning fo the
> block, we don't know for sure it was a leak. You're now allowed to do:
>
>     p = kmalloc(...);
>     p = p + HDR_SIZE;
>
>     /* ... */
>
>     kfree(p - HDR_SIZE);
>
> Do you think we should ban the above?

I don't think you can ban this because of places where the structure
needs to be aligned to a certain value. Look in the false positives
patch for the memleak_padding() calls with the 2nd argument not zero.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-13  7:26                 ` Ingo Molnar
  2006-06-13  8:11                   ` Pekka J Enberg
@ 2006-06-13 10:49                   ` Catalin Marinas
  2006-06-14  4:07                     ` Ingo Molnar
  1 sibling, 1 reply; 52+ messages in thread
From: Catalin Marinas @ 2006-06-13 10:49 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Pekka J Enberg, linux-kernel

On 13/06/06, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Pekka J Enberg <penberg@cs.Helsinki.FI> wrote:
>
> > Hi Ingo,
> >
> > On Mon, 12 Jun 2006, Ingo Molnar wrote:
> > > i dont know - i feel uneasy about the 'any pointer' method - it has a
> > > high potential for false negatives, especially for structures that
> > > contain strings (or other random data), etc.
> >
> > Is that a problem in practice?  Structures that contain data are
> > usually allocated from the slab.  There needs to be a link to that
> > struct from the gc roots to get a false negative.  Or am I missing
> > something here?
>
> you should think of this in terms of a 'graph of data', where each node
> is a block of memory. The edges between nodes are represented by
> pointers. The graph roots from .data/bss, but it may go indefinitely
> into dynamically allocated blocks as well - just think of a hash-list
> where the hash list table is in .data, but all the chain entries are in
> allocated blocks and the chaining can be arbitrarily deep.
[...]

Nice description, I should add it to the kmemleak doc :-)

> Currently kmemleak does not track the per-block position of 'outgoing
> pointers': it assumes that all fields within a block may be an outgoing
> pointer. This is a source of false negatives. (fields that do not
> contain a real pointer might accidentally contain a value that is
> interpreted as a false edge - falsely connecting a leaked block to the
> graph.)

That's correct but it might not be a real issue in practice. Many
people use the Boehm's GC and haven't complained about the amount of
false negatives (AFAIK, there is even a proposal to include it in the
next C++ standard).

> Kmemleak does recognize 'incoming pointers' via the offsetof tracking
> method, but it's limited in that it is not a type-accurate method
> either: it tracks per-size offsets, so two types accidentally having the
> same size merges their 'possible incoming pointer offset' lists, which
> introduces false negatives. (a pointer may be considered an incoming
> edge while in reality the pointer is not validly pointing into this
> structure)

The number of collisions would need to be investigated. On my system,
there are 158 distinct sizeof values generated by container_of. Of
this, 90 have at least two aliases (incoming pointer offsets). I'm not
sure how many different structures are in the kernel but I can't find
an easy (gcc magic) way to get a unique id for a type (apart from
modifying all the container_of calls and add a 4th argument - maybe a
string with the name of the type).

> The full matching that was suggested before would further weaken the
> 'incoming pointers' logic and would introduce yet another source of
> false negatives: we'd match every block pointer against every possible
> target address that points to within another block.

That's correct.

> My suggestion would be to attempt to achieve perfect matches: annotate
> structures to figure out the offset of pointers, and thus to figure out
> the precise source addresses and a precise list of valid target
> addresses. This is a quite elaborate task to pull off though, and i'm
> not sure it's possible without intolerable maintainance overhead, but we
> should consider it nevertheless. It will also be _much_ faster, because
> per block we'd only have to scan a handful of outgoing pointers.

The problem would be simpler if you get a reliable typeid. If we
consider the sizeof method, a script could scan the kernel and
generate a list of sizeof-pointer_offset pairs which is added to a
radix tree (similar to the aliases tree we have) at boot time. But
this would assume adding the memleak_padding() call not only for
incoming pointers but also for outgoing ones. The method, however,
would eliminate the need for annotating each structure in the kernel.

> This also means that by default we'd have no false positives at all,

You can still have a scenario like this - a pointer is freed but the
value remains in a valid member; it is afterwards re-allocated and
leaks but the value is found in a previous allocation. I think it's a
very low risk for this to happen and not worth the hassle.

> but
> that there is a capable annotation method to reduce the amount of false
> negatives, in a gradual and managable way - down to zero if everything
> is annotated.

I'm not sure this could be achieved in a maintainable way, at least
not without support from the compiler.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 1/9] Base support for kmemleak
  2006-06-11 11:21 ` [PATCH 2.6.17-rc6 1/9] Base support for kmemleak Catalin Marinas
@ 2006-06-13 11:14   ` Pekka Enberg
  2006-06-13 12:47     ` Catalin Marinas
  0 siblings, 1 reply; 52+ messages in thread
From: Pekka Enberg @ 2006-06-13 11:14 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: linux-kernel

Hi Catalin,

On 6/11/06, Catalin Marinas <catalin.marinas@gmail.com> wrote:
> This patch adds the base support for the kernel memory leak detector. It
> traces the memory allocation/freeing in a way similar to the Boehm's
> conservative garbage collector, the difference being that the orphan
> pointers are not freed but only shown in /proc/memleak. Enabling this
> feature would introduce an overhead to memory allocations.

Some coding style comments below.

> --- /dev/null
> +++ b/include/linux/memleak.h
> @@ -0,0 +1,83 @@
> +extern void memleak_scan_area(const void *ptr, unsigned long offset, size_t length);
> +extern void memleak_insert_aliases(struct memleak_offset *ml_off_start,
> +                                  struct memleak_offset *ml_off_end);
> +
> +#define memleak_erase(ptr)     do { (ptr) = NULL; } while (0)

Use static inline functions instead of macros, please.

> +#define memleak_init()
> +#define memleak_alloc(ptr, size, ref_count)
> +#define memleak_free(ptr)
> +#define memleak_padding(ptr, offset, size)
> +#define memleak_not_leak(ptr)
> +#define memleak_ignore(ptr)
> +#define memleak_scan_area(ptr, offset, length)
> +#define memleak_insert_aliases(ml_off_start, ml_off_end)
> +#define memleak_erase(ptr)
> +#define memleak_container(type, member)

Ditto.

> --- /dev/null
> +++ b/mm/memleak.c
> @@ -0,0 +1,1166 @@
> +typedef enum {
> +       MEMLEAK_ALLOC,
> +       MEMLEAK_FREE,
> +       MEMLEAK_PADDING,
> +       MEMLEAK_NOT_LEAK,
> +       MEMLEAK_IGNORE,
> +       MEMLEAK_SCAN_AREA
> +} memleak_action_t;

Please drop the typedef.

> +/* Pointer colors, encoded with count and ref_count:
> + *   - white - orphan block, i.e. not enough references to it (ref_count >= 1)
> + *   - gray  - referred at least once and therefore non-orphan (ref_count == 0)
> + *   - black - ignore; it doesn't contain references (text section) (ref_count == -1)
> + */
> +#define COLOR_WHITE(pointer)   ((pointer)->count != -1 && (pointer)->count < (pointer)->ref_count)
> +#define COLOR_GRAY(pointer)    ((pointer)->ref_count != -1 && (pointer)->count >= (pointer)->ref_count)
> +#define COLOR_BLACK(pointer)   ((pointer)->ref_count == -1)

Use static inline functions instead of macros, please.

> +
> +static kmem_cache_t *pointer_cache;

Please use struct kmem_cache instead of the typedef.

> +static int __initdata preinit_pos = 0;

Unnecessary initialization to zero.

> +static struct memleak_pointer *last_pointer = NULL;

Same here for NULL.

> +
> +static void dump_pointer_info(struct memleak_pointer *pointer)
> +{
> +#ifdef CONFIG_KALLSYMS
> +       char namebuf[KSYM_NAME_LEN + 1] = "";
> +       char *modname;
> +       unsigned long symsize;
> +       unsigned long offset = 0;
> +#endif
> +#ifdef DEBUG
> +       struct memleak_alias *alias;
> +       struct hlist_node *elem;
> +#endif
> +       int i;
> +
> +       printk(KERN_NOTICE "kmemleak: pointer 0x%08lx:\n", pointer->pointer);
> +#ifdef DEBUG
> +       printk(KERN_NOTICE "  size = %d\n", pointer->size);
> +       printk(KERN_NOTICE "  ref_count = %d\n", pointer->ref_count);
> +       printk(KERN_NOTICE "  count = %d\n", pointer->count);
> +       printk(KERN_NOTICE "  aliases:\n");
> +       if (pointer->alias_list)
> +               hlist_for_each_entry(alias, elem, pointer->alias_list, node)
> +                       printk(KERN_NOTICE "    0x%lx\n", alias->offset);
> +       printk(KERN_NOTICE "  trace:\n");
> +#endif
> +       for (i = 0; i < MAX_TRACE; i++) {
> +               unsigned long trace = pointer->trace[i];
> +
> +               if (!trace)
> +                       break;
> +#ifdef CONFIG_KALLSYMS
> +               kallsyms_lookup(trace, &symsize, &offset, &modname, namebuf);
> +               printk(KERN_NOTICE "    %lx: <%s>\n", trace, namebuf);
> +#else
> +               printk(KERN_NOTICE "    %lx\n", trace);
> +#endif

Please move both #ifdef cases into separate functions and provide a
stub for the case where they're not enabled for readability.

> +static int insert_alias(unsigned long size, unsigned long offset)
> +{
> +       int ret = 0;

Unnecessary initialization to zero.

> +/* KMemLeak initialization. Set up the radix tree for the pointer aliases */
> +void __init memleak_init(void)
> +{
> +       int i = 0;

Unnecessary initialization to zero.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 1/9] Base support for kmemleak
  2006-06-13 11:14   ` Pekka Enberg
@ 2006-06-13 12:47     ` Catalin Marinas
  0 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-13 12:47 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: linux-kernel

On 13/06/06, Pekka Enberg <penberg@cs.helsinki.fi> wrote:
> On 6/11/06, Catalin Marinas <catalin.marinas@gmail.com> wrote:
> > --- /dev/null
> > +++ b/include/linux/memleak.h
> > @@ -0,0 +1,83 @@
> > +extern void memleak_scan_area(const void *ptr, unsigned long offset, size_t length);
> > +extern void memleak_insert_aliases(struct memleak_offset *ml_off_start,
> > +                                  struct memleak_offset *ml_off_end);
> > +
> > +#define memleak_erase(ptr)     do { (ptr) = NULL; } while (0)
>
> Use static inline functions instead of macros, please.

That was the simplest way in some cases when kmemleak was not
dependent on DEBUG_SLAB obj_size() in mm/slab.c was not always
defined. It works correctly with inlining now so I'll change them.

> > +#define memleak_container(type, member)

This cannot be an inline function as it takes a type as argument.

> > +static int __initdata preinit_pos = 0;
>
> Unnecessary initialization to zero.
>
> > +static struct memleak_pointer *last_pointer = NULL;
>
> Same here for NULL.

True, it is unnecessary. That's just my style of marking them as initialised.

> > +static int insert_alias(unsigned long size, unsigned long offset)
> > +{
> > +       int ret = 0;
>
> Unnecessary initialization to zero.

That's needed because there is an exit path which doesn't set this variable.

I am OK with the other comments. Thanks.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-13 10:49                   ` Catalin Marinas
@ 2006-06-14  4:07                     ` Ingo Molnar
  2006-06-14  5:46                       ` Andi Kleen
                                         ` (2 more replies)
  0 siblings, 3 replies; 52+ messages in thread
From: Ingo Molnar @ 2006-06-14  4:07 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: Pekka J Enberg, linux-kernel


* Catalin Marinas <catalin.marinas@gmail.com> wrote:

> On 13/06/06, Ingo Molnar <mingo@elte.hu> wrote:
> >
> >you should think of this in terms of a 'graph of data', where each node
> >is a block of memory. The edges between nodes are represented by
> >pointers. The graph roots from .data/bss, but it may go indefinitely
> >into dynamically allocated blocks as well - just think of a hash-list
> >where the hash list table is in .data, but all the chain entries are in
> >allocated blocks and the chaining can be arbitrarily deep.
> [...]
> 
> Nice description, I should add it to the kmemleak doc :-)

feel free :-)

> >Currently kmemleak does not track the per-block position of 'outgoing
> >pointers': it assumes that all fields within a block may be an outgoing
> >pointer. This is a source of false negatives. (fields that do not
> >contain a real pointer might accidentally contain a value that is
> >interpreted as a false edge - falsely connecting a leaked block to the
> >graph.)
> 
> That's correct but it might not be a real issue in practice. Many 
> people use the Boehm's GC and haven't complained about the amount of 
> false negatives (AFAIK, there is even a proposal to include it in the 
> next C++ standard).

For a GC a false negative is no big problem - it will reduce the 
efficiency of the GC a bit, but that's all. For leak detection, if we 
happen to have a persistent false pointer in .data (or any other 
persistently allocated memory), it may prevent the detection of a leak 
permanently - at least for that bootup. Statistically it could still be 
found on other systems, but it would be better to have a design that 
will eventually lead to having no false negatives.

But it's not just about the amount of false negatives, but also about 
the overhead of scanning. You are concentrated on embedded systems with 
small RAM - but most of the testers will be running this with at last 
1GB of RAM - which is _alot_ of memory to scan.

(But, if it's not possible to implement it in a sane manner then that's 
not an issue either - it's rather the false positives that must be 
avoided.)

> >Kmemleak does recognize 'incoming pointers' via the offsetof tracking
> >method, but it's limited in that it is not a type-accurate method
> >either: it tracks per-size offsets, so two types accidentally having the
> >same size merges their 'possible incoming pointer offset' lists, which
> >introduces false negatives. (a pointer may be considered an incoming
> >edge while in reality the pointer is not validly pointing into this
> >structure)
> 
> The number of collisions would need to be investigated. On my system, 
> there are 158 distinct sizeof values generated by container_of. Of 
> this, 90 have at least two aliases (incoming pointer offsets). I'm not 
> sure how many different structures are in the kernel but I can't find 
> an easy (gcc magic) way to get a unique id for a type (apart from 
> modifying all the container_of calls and add a 4th argument - maybe a 
> string with the name of the type).

there are a couple of possibilities.

If the ID is string based then you dont even have to touch containr_of() 
calls - just generate the typename string via the "#y" stringification 
preprocessor directive, where 'y' is the second parameter of 
container_of().

there's another, much faster solution as well that assigns IDs 
build-time for globally visible types: the __builtin_types_compatible_p 
gcc extension to match the type against a global registry of types. I.e.
here's what i use in PREEMPT_RT:

#undef TYPE_EQUAL
#define TYPE_EQUAL(lock, type) \
                __builtin_types_compatible_p(typeof(lock), type *)

#define PICK_OP(type, optype, op, lock)                         \
do {                                                            \
        if (TYPE_EQUAL((lock), type))                           \
                _raw_##optype##op((type *)(lock));              \
        else if (TYPE_EQUAL(lock, spinlock_t))                  \
                _spin##op((spinlock_t *)(lock));                \
        else __bad_spinlock_type();                             \
} while (0)

so you can generate a (really) long branch that checks every known type 
that assigns an ID to a type build-time:

	if (TYPE_EQUAL(type, struct skb_head))
		type_id = 1;
	else if (TYPE_EQUAL(type, struct ))
		type_id = 2;
	...
	else
		type_id = UNKNOWN_TYPE;

despite this branch having hundreds of checks, the compiler will 
eliminate it at build time and only a single static type ID assignment 
remains.

this long branch could be auto-generated build-time (just like 
asm-offsets.c) in a maintainable way by putting a single "register type" 
line after every structure definition in global .h files:

REGISTER_TYPE(struct skb_head)

where REGISTER_TYPE(x) maps to nothing during normal kernel builds, but 
if a special flag is set it generates the type string into a special 
section:

#ifdef GENERATE_TYPE_REGISTRY
# define REGISTER_TYPE(x) \
	static char x __attribute__((section(".type.registry")) = #x;
#endif

so you can build and execute a special utility early during kernel build 
that prints out the generated code. (again, like asm-offsets.c)

it needs some thought, but this way it's quite possible to build-time 
map types to IDs.

> >but that there is a capable annotation method to reduce the amount of 
> >false negatives, in a gradual and managable way - down to zero if 
> >everything is annotated.
> 
> I'm not sure this could be achieved in a maintainable way, at least 
> not without support from the compiler.

it's possible with gcc (for global types), just hidden a bit :-)

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-14  4:07                     ` Ingo Molnar
@ 2006-06-14  5:46                       ` Andi Kleen
  2006-06-14  5:53                       ` Jeremy Fitzhardinge
  2006-06-14 13:21                       ` Catalin Marinas
  2 siblings, 0 replies; 52+ messages in thread
From: Andi Kleen @ 2006-06-14  5:46 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Pekka J Enberg, linux-kernel, catalin.marinas

Ingo Molnar <mingo@elte.hu> writes:
> But it's not just about the amount of false negatives, but also about 
> the overhead of scanning. You are concentrated on embedded systems with 
> small RAM - but most of the testers will be running this with at last 
> 1GB of RAM - which is _alot_ of memory to scan.

Most of this should be normally in page cache which doesn't need to be scanned?

There might be some extreme loads with a lot more kernel data, but they
are probably rare.

-Andi 
7

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-14  4:07                     ` Ingo Molnar
  2006-06-14  5:46                       ` Andi Kleen
@ 2006-06-14  5:53                       ` Jeremy Fitzhardinge
  2006-06-14 12:03                         ` Ingo Molnar
  2006-06-14 13:35                         ` Catalin Marinas
  2006-06-14 13:21                       ` Catalin Marinas
  2 siblings, 2 replies; 52+ messages in thread
From: Jeremy Fitzhardinge @ 2006-06-14  5:53 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Catalin Marinas, Pekka J Enberg, linux-kernel

Ingo Molnar wrote:
> For a GC a false negative is no big problem - it will reduce the 
> efficiency of the GC a bit, but that's all. For leak detection, if we 
> happen to have a persistent false pointer in .data (or any other 
> persistently allocated memory), it may prevent the detection of a leak 
> permanently - at least for that bootup. Statistically it could still be 
> found on other systems, but it would be better to have a design that 
> will eventually lead to having no false negatives.
>
> But it's not just about the amount of false negatives, but also about 
> the overhead of scanning. You are concentrated on embedded systems with 
> small RAM - but most of the testers will be running this with at last 
> 1GB of RAM - which is _alot_ of memory to scan.
>   
It seems to me that most types with any pointers are fairly 
pointer-dense.  There's not much point in trying to skip a couple of 
non-pointers nested among a dozen others; once you've worn the cost of 
pulling in the cache line there's not much else to worry about.  The 
most useful thing is to distinguish between completely pointerless 
allocations and allocations which have pointers.  Pointerless 
allocations are generally just data (strings, numbers, user data), and 
so are a waste of effort to scan, and possibly full of false pointers.  
In the kernel, you could probably do it by making it a property of 
slabs, assume all kmalloc allocations are pointerful (perhaps add 
GFP_POINTERLESS), and make sure all user-data pages are considered 
pointerless.

False pointers in kernel allocations can be avoided in a few ways.  The 
first, obviously, is the make sure all memory is initialized to a known 
non-pointer value.  The second is to ignore pointers which don't point 
near the start of an allocated region (possibly unsafe in the kernel, 
depending on the definition of "near").  You can get more sophisticated 
from there; the Boehm GC keeps tracks of things which look like pointers 
but turn out not to be (they don't point to allocated memory); it marks 
that memory as being unusable, so that the false pointer won't get 
mistaken for one later on, with the obvious risk that lots of false 
pointers can render large parts of your heap address space unusable.

In general, false pointers aren't a huge problem.  They'll generally 
lead to a bounded number of allocations being unreported as leaks; its 
highly unlikely that a large heap graph will remain hidden from a leak 
checker forever; espectially since kernel pointers are fairly unlike 
other kinds of data (large enough to not be aliased to most normal 
integer values, don't look like strings, and there are no FP numbers in 
the kernel).

> (But, if it's not possible to implement it in a sane manner then that's 
> not an issue either - it's rather the false positives that must be 
> avoided.)
>   

There's some risk of false positives.  You can imagine cases where the 
last reference to a block is transformed into a bus address, and in 
effect a piece of hardware holds it.  You don't get to know about the 
pointer until the hardware gives it back.  You might want a GFP_ROOT 
flag (or whatever), to mark a block as being always referenced in order 
to suppress these cases.

> there are a couple of possibilities.
>
> If the ID is string based then you dont even have to touch containr_of() 
> calls - just generate the typename string via the "#y" stringification 
> preprocessor directive, where 'y' is the second parameter of 
> container_of().
> [...]
> it needs some thought, but this way it's quite possible to build-time 
> map types to IDs.
>   

This seems pretty over-engineered.  I wouldn't go this far unless you're 
actually seeing performance/correctness problems, and a simple 
with/without pointers flag isn't enough.  It also doesn't address the 
most troublesome source of false pointers: stacks.  There is all sorts 
of junk lying around on stacks, and you can have an old dead pointer 
sitting there pinning old dead memory for a long time.

    J

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-14  5:53                       ` Jeremy Fitzhardinge
@ 2006-06-14 12:03                         ` Ingo Molnar
  2006-06-14 13:46                           ` Catalin Marinas
  2006-06-14 13:35                         ` Catalin Marinas
  1 sibling, 1 reply; 52+ messages in thread
From: Ingo Molnar @ 2006-06-14 12:03 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Catalin Marinas, Pekka J Enberg, linux-kernel


* Jeremy Fitzhardinge <jeremy@goop.org> wrote:

> This seems pretty over-engineered.  I wouldn't go this far unless 
> you're actually seeing performance/correctness problems, and a simple 
> with/without pointers flag isn't enough.  It also doesn't address the 
> most troublesome source of false pointers: stacks.  There is all sorts 
> of junk lying around on stacks, and you can have an old dead pointer 
> sitting there pinning old dead memory for a long time.

in an earlier thread i suggested to not scan kernel stacks at all, but 
to delay the registration of new blocks to return-from-syscall time (via 
having a per-task list of newly allocated but not yet registered 
blocks). That way we dont get false positives for stuff that is on the 
kernel stack temporarily and then freed, and we correctly register newly 
allocated blocks as well.

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-14  4:07                     ` Ingo Molnar
  2006-06-14  5:46                       ` Andi Kleen
  2006-06-14  5:53                       ` Jeremy Fitzhardinge
@ 2006-06-14 13:21                       ` Catalin Marinas
  2 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-14 13:21 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Pekka J Enberg, linux-kernel

On 14/06/06, Ingo Molnar <mingo@elte.hu> wrote:
> * Catalin Marinas <catalin.marinas@gmail.com> wrote:
>
> But it's not just about the amount of false negatives, but also about
> the overhead of scanning. You are concentrated on embedded systems with
> small RAM - but most of the testers will be running this with at last
> 1GB of RAM - which is _alot_ of memory to scan.

As Andi mentioned, it might not be a big issue since the page cache is
not scanned.

> > The number of collisions would need to be investigated. On my system,
> > there are 158 distinct sizeof values generated by container_of. Of
> > this, 90 have at least two aliases (incoming pointer offsets). I'm not
> > sure how many different structures are in the kernel but I can't find
> > an easy (gcc magic) way to get a unique id for a type (apart from
> > modifying all the container_of calls and add a 4th argument - maybe a
> > string with the name of the type).

Looking at these collisions in more detail, from the above 90 only
about 40 are real collisions that introduce new aliases. The others
are container_of() usages on other members (task_struct has about 4
different aliases).

> If the ID is string based then you dont even have to touch containr_of()
> calls - just generate the typename string via the "#y" stringification
> preprocessor directive, where 'y' is the second parameter of
> container_of().

Since the linker seems to be smart enough to eliminate string
duplicates, a "const char *" can be used as a unique id (assuming that
the container_of is always called with the type name and not
"typeof").

> there's another, much faster solution as well that assigns IDs
> build-time for globally visible types: the __builtin_types_compatible_p
> gcc extension to match the type against a global registry of types. I.e.
> here's what i use in PREEMPT_RT:
[...]
> it needs some thought, but this way it's quite possible to build-time
> map types to IDs.

That's an interesting approach and it would work even with "typeof()"
constructs.

Anyway, in both the incoming and outgoing pointers case, the type
information needs to be associated with the allocated memory block.
This would mean that all the kmalloc/kmem_cache_alloc etc. calls in
the kernel have to be modified to pass the type information. Would
such a (big) patch be acceptable for the kernel? The sizeof is the
only information that could get close to type identification and is
passed to these functions, though it has its risks.

Is it also worth the effort? With the large number of platforms Linux
runs on, a leak would be spotted sooner or later.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-14  5:53                       ` Jeremy Fitzhardinge
  2006-06-14 12:03                         ` Ingo Molnar
@ 2006-06-14 13:35                         ` Catalin Marinas
  1 sibling, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-14 13:35 UTC (permalink / raw)
  To: Jeremy Fitzhardinge; +Cc: Ingo Molnar, Pekka J Enberg, linux-kernel

Hi Jeremy,

It's good to hear some experience from the Valgrind world :-)

On 14/06/06, Jeremy Fitzhardinge <jeremy@goop.org> wrote:
> Ingo Molnar wrote:
> > But it's not just about the amount of false negatives, but also about
> > the overhead of scanning. You are concentrated on embedded systems with
> > small RAM - but most of the testers will be running this with at last
> > 1GB of RAM - which is _alot_ of memory to scan.
> >
> It seems to me that most types with any pointers are fairly
> pointer-dense.

There seem to be many structures containing mostly data and only one
or two list_head structures with pointers. Here only the list_head
member would need to be scanned.

> False pointers in kernel allocations can be avoided in a few ways.  The
> first, obviously, is the make sure all memory is initialized to a known
> non-pointer value.

This is done already by the DEBUG_SLAB configuration.

> The second is to ignore pointers which don't point
> near the start of an allocated region (possibly unsafe in the kernel,
> depending on the definition of "near").

Kmemleak only accepts values that point to either the beginning of the
allocated block or to one of its aliases (a offset from the beginning
which is determined from the container_of() usages on that
structure/member - most of the list usages). There is a discussion one
whether to allow a value to point anywhere inside a block but I would
need to investigate the risks.

> You can get more sophisticated
> from there; the Boehm GC keeps tracks of things which look like pointers
> but turn out not to be (they don't point to allocated memory); it marks
> that memory as being unusable, so that the false pointer won't get
> mistaken for one later on, with the obvious risk that lots of false
> pointers can render large parts of your heap address space unusable.

This would increase the overhead as in my tests around 15% of all the
values scanned look like pointers (i.e. between PAGE_OFFSET and
PAGE_OFFSET + ram_size).

> There's some risk of false positives.  You can imagine cases where the
> last reference to a block is transformed into a bus address, and in
> effect a piece of hardware holds it.  You don't get to know about the
> pointer until the hardware gives it back.

This is pretty rare and you can easily tell kmemleak to ignore it.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-14 12:03                         ` Ingo Molnar
@ 2006-06-14 13:46                           ` Catalin Marinas
  0 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-14 13:46 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Jeremy Fitzhardinge, Pekka J Enberg, linux-kernel

On 14/06/06, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Jeremy Fitzhardinge <jeremy@goop.org> wrote:
>
> > This seems pretty over-engineered.  I wouldn't go this far unless
> > you're actually seeing performance/correctness problems, and a simple
> > with/without pointers flag isn't enough.  It also doesn't address the
> > most troublesome source of false pointers: stacks.  There is all sorts
> > of junk lying around on stacks, and you can have an old dead pointer
> > sitting there pinning old dead memory for a long time.
>
> in an earlier thread i suggested to not scan kernel stacks at all, but
> to delay the registration of new blocks to return-from-syscall time (via
> having a per-task list of newly allocated but not yet registered
> blocks). That way we dont get false positives for stuff that is on the
> kernel stack temporarily and then freed, and we correctly register newly
> allocated blocks as well.

I didn't have time to try this idea. However, the number of false
positives doesn't seem to be increased (or they are only reported
temporarily) if you don't scan the stacks at all (especially if you
scan the memory at a relatively quiet time). That's why I added a
config option for this.

The problem looks a bit more complicated for kernel threads as they
always use the kernel stack.

Another idea would be to only scan the stacks of the sleeping tasks
(and you could also get the frame pointer from the call to the
schedule function).

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-12  5:19   ` Pekka Enberg
  2006-06-12  8:11     ` Catalin Marinas
@ 2006-06-24 10:20     ` Catalin Marinas
  2006-06-24 10:22       ` Ingo Molnar
  1 sibling, 1 reply; 52+ messages in thread
From: Catalin Marinas @ 2006-06-24 10:20 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: linux-kernel, Ingo Molnar

Hi Pekka,

On 12/06/06, Pekka Enberg <penberg@cs.helsinki.fi> wrote:
> On 6/11/06, Catalin Marinas <catalin.marinas@gmail.com> wrote:
> > There are allocations for which the main pointer cannot be found but they
> > are not memory leaks. This patch fixes some of them.
>
> Can we fix this by looking for pointers to anywhere in the allocated
> memory block instead of just looking for the start?

I eventually got some spare time to continue the investigation on this.

I did some statistics with using a priority radix tree for pointer
detection. This would eliminate the container_of hack and the
memleak_padding calls. The statistics on qemu x86 (with just a prompt,
no X or other applications started and unaligned values ignored) look
like this:

allocated/detected pointers = 30433
detected via aliases = 6355

locations scanned = 1654781
pointer like values = 476145
values found in the radix tree = 157011
values found in the prio tree = 283283

>From the scanned values, 9.5% were found in the radix tree (current
implementation). There is a total of 28.8% values that look like
pointers. This means that 67% of the values that look like pointers
aren't pointing to any block (the other 33% are pointing to a block
start or to certain offsets inside the block).

However, using a priority radix tree, 17% from the scanned values
would match allocated blocks (almost double compared to the current
method). 41% of the values that look like pointers would not be found
pointing to a block or somewhere inside one. This means that an
additional 26% of the values that look like pointers (compared to the
radix tree method) have the potential of creating false negatives.

My opinion is not to implement the "anywhere inside a block" method as
it would increase the risk of false negatives with a little benefit
(removing some false positive notifications, probably less than 30).

To the other extreme is Ingo's suggestion of using exact type
identification but I don't think this would be acceptable for the
kernel as it would to modify all the memory alloc calls in the kernel
to either pass an additional parameter (the type id) or another
post-allocation call to kmemleak to update the id.

A compromise would be to use the "sizeof" type approximation for both
incoming and outgoing pointers (kmemleak currently does this for
incoming pointers). This would require an external tool for scanning
all the structure definitions in the kernel and generate
sizeof-pointer_offset pairs. It could also compare whether the size of
the dereferenced outgoing pointer is the same as that of the detected
memory block. I think this method would add the need of additional
false-positives covering in the kernel.

Anyway, the current implementation (I'll update it for 2.6.17) detects
real memory leaks. I suspect that a wide range of leaks would be
covered if it is used on different platforms and different conditions.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-24 10:20     ` Catalin Marinas
@ 2006-06-24 10:22       ` Ingo Molnar
  2006-06-24 10:55         ` Catalin Marinas
  2006-07-24 11:15         ` Ingo Molnar
  0 siblings, 2 replies; 52+ messages in thread
From: Ingo Molnar @ 2006-06-24 10:22 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: Pekka Enberg, linux-kernel


* Catalin Marinas <catalin.marinas@gmail.com> wrote:

> My opinion is not to implement the "anywhere inside a block" method as 
> it would increase the risk of false negatives with a little benefit 
> (removing some false positive notifications, probably less than 30).

agreed.

> To the other extreme is Ingo's suggestion of using exact type 
> identification but I don't think this would be acceptable for the 
> kernel as it would to modify all the memory alloc calls in the kernel 
> to either pass an additional parameter (the type id) or another 
> post-allocation call to kmemleak to update the id.

passing in the type ID wouldnt be that bad and it would have other 
advantages as well: for example we could do strict type-checking of 
allocation size versus type-we-use-it-for.

As long as the conversion is gradual i think we could try this. I.e. 
we'd default to 'no ID passed', and in that case we would fall back to 
the size-based method and generate an ID out of the structure size.

> Anyway, the current implementation (I'll update it for 2.6.17) detects 
> real memory leaks. I suspect that a wide range of leaks would be 
> covered if it is used on different platforms and different conditions.

btw., what leaks were found so far? I know about the ACPI one - any 
other ones?

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-24 10:22       ` Ingo Molnar
@ 2006-06-24 10:55         ` Catalin Marinas
  2006-07-24 11:15         ` Ingo Molnar
  1 sibling, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-06-24 10:55 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Pekka Enberg, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1822 bytes --]

On 24/06/06, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Catalin Marinas <catalin.marinas@gmail.com> wrote:
> > To the other extreme is Ingo's suggestion of using exact type
> > identification but I don't think this would be acceptable for the
> > kernel as it would to modify all the memory alloc calls in the kernel
> > to either pass an additional parameter (the type id) or another
> > post-allocation call to kmemleak to update the id.
>
> passing in the type ID wouldnt be that bad and it would have other
> advantages as well: for example we could do strict type-checking of
> allocation size versus type-we-use-it-for.

There are valid places in the kernel where the allocated size is
different from the type's one. That's why I added the
memleak_padding().

> As long as the conversion is gradual i think we could try this. I.e.
> we'd default to 'no ID passed', and in that case we would fall back to
> the size-based method and generate an ID out of the structure size.

OK, I'll try to add the infrastructure for type ids but default to
sizeof initially.

> > Anyway, the current implementation (I'll update it for 2.6.17) detects
> > real memory leaks. I suspect that a wide range of leaks would be
> > covered if it is used on different platforms and different conditions.
>
> btw., what leaks were found so far? I know about the ACPI one - any
> other ones?

There are two leaks reported in ACPI but I only posted a patch for one
as I didn't have time to track the other (a reference count doesn't
get to zero and the structure not freed).

There is another leak in legacy_init_iomem_resources() in
arch/i386/setup.c (request_resource() fails but the memory is not
freed - see the attached patch). I initially marked this as a false
positive but it wasn't (I have to revisit the false positives).

-- 
Catalin

[-- Attachment #2: i386-setup-leak.patch --]
[-- Type: text/x-patch, Size: 835 bytes --]

Fix a memory leak in the i386 setup code

From: Catalin Marinas <catalin.marinas@gmail.com>

Signed-off-by: Catalin Marinas <catalin.marinas@gmail.com>
---

 arch/i386/kernel/setup.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index dd6b0e3..60f5d99 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1332,7 +1332,10 @@ legacy_init_iomem_resources(struct resou
 		res->start = e820.map[i].addr;
 		res->end = res->start + e820.map[i].size - 1;
 		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		request_resource(&iomem_resource, res);
+		if (request_resource(&iomem_resource, res)) {
+			kfree(res);
+			continue;
+		}
 		if (e820.map[i].type == E820_RAM) {
 			/*
 			 *  We don't know which RAM region contains kernel data,

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-06-24 10:22       ` Ingo Molnar
  2006-06-24 10:55         ` Catalin Marinas
@ 2006-07-24 11:15         ` Ingo Molnar
  2006-07-24 13:28           ` Catalin Marinas
  1 sibling, 1 reply; 52+ messages in thread
From: Ingo Molnar @ 2006-07-24 11:15 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: Pekka Enberg, linux-kernel, Arjan van de Ven


* Ingo Molnar <mingo@elte.hu> wrote:

> > To the other extreme is Ingo's suggestion of using exact type 
> > identification but I don't think this would be acceptable for the 
> > kernel as it would to modify all the memory alloc calls in the 
> > kernel to either pass an additional parameter (the type id) or 
> > another post-allocation call to kmemleak to update the id.
> 
> passing in the type ID wouldnt be that bad and it would have other 
> advantages as well: for example we could do strict type-checking of 
> allocation size versus type-we-use-it-for.
> 
> As long as the conversion is gradual i think we could try this. I.e. 
> we'd default to 'no ID passed', and in that case we would fall back to 
> the size-based method and generate an ID out of the structure size.

update: there's also a neat gcc extension trick suggested by Arjan: 
__builtin_classify_type(). This converts types into integers!

	Ingo

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-07-24 11:15         ` Ingo Molnar
@ 2006-07-24 13:28           ` Catalin Marinas
  2006-08-03  6:32             ` Jakub Jelinek
  0 siblings, 1 reply; 52+ messages in thread
From: Catalin Marinas @ 2006-07-24 13:28 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Pekka Enberg, linux-kernel, Arjan van de Ven

[-- Attachment #1: Type: text/plain, Size: 731 bytes --]

On 24/07/06, Ingo Molnar <mingo@elte.hu> wrote:
> update: there's also a neat gcc extension trick suggested by Arjan:
> __builtin_classify_type(). This converts types into integers!

It's not really reliable as it doesn't distinguish well between types.
All the structures, no matter what they contain, have the same id
(which I think only refers to the fact that it is built-in type,
pointer or structure, without differentiation).

The attached code gives the following results:

typeof(a) = 12, sizeof(a) = 136
typeof(b) = 12, sizeof(b) = 8
typeof(c) = 1, sizeof(c) = 4
typeof(d) = 1, sizeof(d) = 4
typeof(e) = 1, sizeof(e) = 4
typeof(f) = 1, sizeof(f) = 1
typeof(g) = 5, sizeof(g) = 4
typeof(h) = 5, sizeof(h) = 4

-- 
Catalin

[-- Attachment #2: builtin_classify_type.c --]
[-- Type: text/x-csrc, Size: 506 bytes --]

#include <stdio.h>

struct A {
	void *a;
	long b;
	char c[128];
};

struct B {
	long a;
	void *b;
};

enum C {
	a,
	b
};

#define PRINT_TYPE(val)							\
	printf("typeof(" #val ") = %d, sizeof(" #val ") = %d\n",	\
			__builtin_classify_type(val),			\
			sizeof(val))

int main()
{
	struct A a;
	struct B b;
	enum C c;
	int d;
	long e;
	char f;
	int *g;
	char *h;

	PRINT_TYPE(a);
	PRINT_TYPE(b);
	PRINT_TYPE(c);
	PRINT_TYPE(d);
	PRINT_TYPE(e);
	PRINT_TYPE(f);
	PRINT_TYPE(g);
	PRINT_TYPE(h);

	return 0;
}

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-07-24 13:28           ` Catalin Marinas
@ 2006-08-03  6:32             ` Jakub Jelinek
  2006-08-03  8:31               ` Catalin Marinas
  0 siblings, 1 reply; 52+ messages in thread
From: Jakub Jelinek @ 2006-08-03  6:32 UTC (permalink / raw)
  To: Catalin Marinas; +Cc: Ingo Molnar, Pekka Enberg, linux-kernel, Arjan van de Ven

On Mon, Jul 24, 2006 at 02:28:03PM +0100, Catalin Marinas wrote:
> On 24/07/06, Ingo Molnar <mingo@elte.hu> wrote:
> >update: there's also a neat gcc extension trick suggested by Arjan:
> >__builtin_classify_type(). This converts types into integers!
> 
> It's not really reliable as it doesn't distinguish well between types.
> All the structures, no matter what they contain, have the same id
> (which I think only refers to the fact that it is built-in type,
> pointer or structure, without differentiation).

__builtin_classify_type () doesn't give types unique ID, it only classifies
them:

/* Values returned by __builtin_classify_type.  */

enum type_class
{
  no_type_class = -1,
  void_type_class, integer_type_class, char_type_class,
  enumeral_type_class, boolean_type_class,
  pointer_type_class, reference_type_class, offset_type_class,
  real_type_class, complex_type_class,
  function_type_class, method_type_class,
  record_type_class, union_type_class,
  array_type_class, string_type_class,
  lang_type_class
};

All structures are given record_type_class, all unions union_type_class,
all pointers pointer_type_class, etc.
That doesn't mean you can't use it in the kernel as additional source
of type checking (in addition to e.g. sizeof and __alignof__).

	Jakub

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives
  2006-08-03  6:32             ` Jakub Jelinek
@ 2006-08-03  8:31               ` Catalin Marinas
  0 siblings, 0 replies; 52+ messages in thread
From: Catalin Marinas @ 2006-08-03  8:31 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Ingo Molnar, Pekka Enberg, linux-kernel, Arjan van de Ven

On 03/08/06, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Jul 24, 2006 at 02:28:03PM +0100, Catalin Marinas wrote:
> > On 24/07/06, Ingo Molnar <mingo@elte.hu> wrote:
> > >update: there's also a neat gcc extension trick suggested by Arjan:
> > >__builtin_classify_type(). This converts types into integers!
> >
> > It's not really reliable as it doesn't distinguish well between types.
> > All the structures, no matter what they contain, have the same id
> > (which I think only refers to the fact that it is built-in type,
> > pointer or structure, without differentiation).
>
> __builtin_classify_type () doesn't give types unique ID, it only classifies
> them:
[...]
> All structures are given record_type_class, all unions union_type_class,
> all pointers pointer_type_class, etc.

Yes, I noticed this.

> That doesn't mean you can't use it in the kernel as additional source
> of type checking (in addition to e.g. sizeof and __alignof__).

It is useful indeed, especially for identifying which types could
actually be or contain pointers. However, with the current Linux API,
the only information that can be passed to kmemleak via the alloc
functions is the size information.

My plan is to post a kmemleak update (some bug-fixes) this weekend and
get some wider testing (maybe in the -mm tree). Once people are happy
with the current implementation, I'll try to add more precise type
checking and introduce new functions (macros) like kmalloc_typed and
kmem_cache_create_typed together with Ingo's algorithm for unique type
ids.

There is another improvement that could be made to reduce the false
negatives - allocate slab objects larger than the required size and
change the offset every time the object is re-allocated. I think this
would be useful not only for kmemleak but also for catching possible
object alterations after it was re-allocated.

-- 
Catalin

^ permalink raw reply	[flat|nested] 52+ messages in thread

end of thread, other threads:[~2006-08-03  8:31 UTC | newest]

Thread overview: 52+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-06-11 11:18 [PATCH 2.6.17-rc6 0/9] Kernel memory leak detector 0.7 Catalin Marinas
2006-06-11 11:21 ` [PATCH 2.6.17-rc6 1/9] Base support for kmemleak Catalin Marinas
2006-06-13 11:14   ` Pekka Enberg
2006-06-13 12:47     ` Catalin Marinas
2006-06-11 11:21 ` [PATCH 2.6.17-rc6 2/9] Some documentation " Catalin Marinas
2006-06-11 11:21 ` [PATCH 2.6.17-rc6 3/9] Add the memory allocation/freeing hooks " Catalin Marinas
2006-06-11 11:21 ` [PATCH 2.6.17-rc6 4/9] Modules support " Catalin Marinas
2006-06-11 11:21 ` [PATCH 2.6.17-rc6 5/9] Add kmemleak support for i386 Catalin Marinas
2006-06-11 11:21 ` [PATCH 2.6.17-rc6 6/9] Add kmemleak support for ARM Catalin Marinas
2006-06-11 11:21 ` [PATCH 2.6.17-rc6 7/9] Remove some of the kmemleak false positives Catalin Marinas
2006-06-12  5:19   ` Pekka Enberg
2006-06-12  8:11     ` Catalin Marinas
2006-06-12  8:17       ` Pekka J Enberg
2006-06-12  8:43         ` Catalin Marinas
2006-06-12 10:53         ` Ingo Molnar
2006-06-12 11:08           ` Pekka J Enberg
2006-06-12 11:36             ` Ingo Molnar
2006-06-12 11:56               ` Pekka J Enberg
2006-06-12 12:53                 ` Catalin Marinas
2006-06-12 13:12                 ` Ingo Molnar
2006-06-12 14:38                   ` Catalin Marinas
2006-06-12 22:29                   ` Catalin Marinas
2006-06-12 12:56           ` Catalin Marinas
2006-06-12 19:22             ` Ingo Molnar
2006-06-12 22:24               ` Catalin Marinas
2006-06-13  5:53               ` Pekka J Enberg
2006-06-13  6:59                 ` Catalin Marinas
2006-06-13  7:57                   ` Pekka J Enberg
2006-06-13  9:45                     ` Catalin Marinas
2006-06-13 10:04                       ` Pekka Enberg
2006-06-13 10:37                         ` Catalin Marinas
2006-06-13  7:26                 ` Ingo Molnar
2006-06-13  8:11                   ` Pekka J Enberg
2006-06-13 10:49                   ` Catalin Marinas
2006-06-14  4:07                     ` Ingo Molnar
2006-06-14  5:46                       ` Andi Kleen
2006-06-14  5:53                       ` Jeremy Fitzhardinge
2006-06-14 12:03                         ` Ingo Molnar
2006-06-14 13:46                           ` Catalin Marinas
2006-06-14 13:35                         ` Catalin Marinas
2006-06-14 13:21                       ` Catalin Marinas
2006-06-12  9:17       ` Peter Zijlstra
2006-06-12  9:35         ` Catalin Marinas
2006-06-24 10:20     ` Catalin Marinas
2006-06-24 10:22       ` Ingo Molnar
2006-06-24 10:55         ` Catalin Marinas
2006-07-24 11:15         ` Ingo Molnar
2006-07-24 13:28           ` Catalin Marinas
2006-08-03  6:32             ` Jakub Jelinek
2006-08-03  8:31               ` Catalin Marinas
2006-06-11 11:22 ` [PATCH 2.6.17-rc6 8/9] Simple testing for kmemleak Catalin Marinas
2006-06-11 11:22 ` [PATCH 2.6.17-rc6 9/9] Keep the __init functions after initialization Catalin Marinas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).