linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* 2.6.16rc1-git4 slab corruption.
@ 2006-01-31 18:03 Dave Jones
  2006-01-31 19:08 ` Chris Mason
  2006-02-02  5:07 ` Dave Jones
  0 siblings, 2 replies; 11+ messages in thread
From: Dave Jones @ 2006-01-31 18:03 UTC (permalink / raw)
  To: Linux Kernel

Redzone: 0x5a2cf071/0x5a2cf071.
Last user: [<ffffffff80181cc0>](free_buffer_head+0x2a/0x43)

Call Trace: <ffffffff8017b4d0>{check_poison_obj+127}
       <ffffffff80181cea>{alloc_buffer_head+17} <ffffffff8017b638>{cache_alloc_debugcheck_after+48}
       <ffffffff8017b828>{kmem_cache_alloc+231} <ffffffff80181cea>{alloc_buffer_head+17}
       <ffffffff801824b1>{alloc_page_buffers+53} <ffffffff8018255c>{create_empty_buffers+20}
       <ffffffff801831c8>{__block_prepare_write+148} <ffffffff8807e5f4>{:ext3:ext3_get_block+0}
       <ffffffff8017b0cc>{poison_obj+38} <ffffffff8017b6f7>{cache_alloc_debugcheck_after+239}
       <ffffffff80183536>{block_prepare_write+26} <ffffffff8807fcd1>{:ext3:ext3_prepare_write+148}
       <ffffffff80340386>{_write_unlock_irq+9} <ffffffff8015e3b7>{generic_file_buffered_write+603}
       <ffffffff80137896>{current_fs_time+59} <ffffffff80137896>{current_fs_time+59}
       <ffffffff8015ea13>{__generic_file_aio_write_nolock+767}
       <ffffffff8015ee22>{generic_file_aio_write+78} <ffffffff80149d23>{debug_mutex_add_waiter+159}
       <ffffffff8033fb23>{__mutex_lock_slowpath+817} <ffffffff8015ee39>{generic_file_aio_write+101}
       <ffffffff8807be5e>{:ext3:ext3_file_write+22} <ffffffff8018054e>{do_sync_write+199}
       <ffffffff801464d6>{autoremove_wake_function+0} <ffffffff8015ae1f>{audit_syscall_entry+301}
       <ffffffff80180e48>{vfs_write+206} <ffffffff801813fa>{sys_write+69}
       <ffffffff8010aa78>{tracesys+209}
020: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 01 00 00 00
Prev obj: start=ffff81000057a2f0, len=88
Redzone: 0x170fc2a5/0x170fc2a5.
Last user: [<ffffffff80181cea>](alloc_buffer_head+0x11/0x36)
000: 23 40 00 00 00 00 00 00 f0 a2 57 00 00 81 ff ff
010: 80 6c 11 01 00 81 ff ff 01 00 00 00 00 10 00 00
Next obj: start=ffff81000057a3d0, len=88
Redzone: 0x5a2cf071/0x5a2cf071.
Last user: [<ffffffff80181cc0>](free_buffer_head+0x2a/0x43)
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: 2.6.16rc1-git4 slab corruption.
  2006-01-31 18:03 2.6.16rc1-git4 slab corruption Dave Jones
@ 2006-01-31 19:08 ` Chris Mason
  2006-01-31 22:15   ` Dave Jones
  2006-02-02  5:07 ` Dave Jones
  1 sibling, 1 reply; 11+ messages in thread
From: Chris Mason @ 2006-01-31 19:08 UTC (permalink / raw)
  To: Dave Jones; +Cc: Linux Kernel

On Tuesday 31 January 2006 13:03, Dave Jones wrote:
> Redzone: 0x5a2cf071/0x5a2cf071.
> Last user: [<ffffffff80181cc0>](free_buffer_head+0x2a/0x43)

Haven't seen this one yet, but we have an assortment of strange bugs on 2.6.16-rc1-git.  What were you doing to trigger it?

I've been trying to hammer on things with the slab exerciser below, but haven't had much luck in getting a nice reliable test case.

modprobe crasher threads=X

X defaults to 1.

-chris

diff -r abc01241b9e0 drivers/char/Kconfig
--- a/drivers/char/Kconfig	Tue Jan 24 15:03:39 2006 -0500
+++ b/drivers/char/Kconfig	Wed Jan 25 08:03:06 2006 -0500
@@ -1020,5 +1020,10 @@ config TELCLOCK
 	  sysfs directory, /sys/devices/platform/telco_clock, with a number of
 	  files for controlling the behavior of this hardware.
 
+config CRASHER
+	tristate "Crasher Module"
+	help
+	  Slab cache memory tester.  Only use this as a module
+
 endmenu
 
diff -r abc01241b9e0 drivers/char/Makefile
--- a/drivers/char/Makefile	Tue Jan 24 15:03:39 2006 -0500
+++ b/drivers/char/Makefile	Wed Jan 25 08:03:06 2006 -0500
@@ -95,6 +95,7 @@ obj-$(CONFIG_IPMI_HANDLER) += ipmi/
 
 obj-$(CONFIG_HANGCHECK_TIMER) += hangcheck-timer.o
 obj-$(CONFIG_TCG_TPM) += tpm/
+obj-$(CONFIG_CRASHER) += crasher.o
 # Files generated that shall be removed upon make clean
 clean-files := consolemap_deftbl.c defkeymap.c qtronixmap.c
 
diff -r abc01241b9e0 drivers/char/crasher.c
--- /dev/null	Thu Jan  1 00:00:00 1970 +0000
+++ b/drivers/char/crasher.c	Wed Jan 25 08:03:06 2006 -0500
@@ -0,0 +1,148 @@
+/*
+ * crasher.c, it breaks things
+ */
+
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/moduleparam.h>
+
+static int module_exiting;
+static struct completion startup = COMPLETION_INITIALIZER(startup);
+static unsigned long rand_seed = 152L;
+static unsigned long seed = 152L;
+static int threads = 1;
+
+module_param(seed, ulong, 0);
+module_param(threads, int, 0);
+MODULE_PARM_DESC(seed, "random seed for memory tests");
+MODULE_PARM_DESC(threads, "number of threads to run");
+MODULE_LICENSE("GPL");
+
+#define NUM_ALLOC 24
+#define NUM_SIZES 8
+static int sizes[]  = { 32, 64, 128, 192, 256, 1024, 2048, 4096 };
+
+struct mem_buf {
+    char *buf;
+    int size;
+};
+
+static unsigned long crasher_random(void)
+{
+        rand_seed = rand_seed*69069L+1;
+        return rand_seed^jiffies;
+}
+
+void crasher_srandom(unsigned long entropy)
+{
+        rand_seed ^= entropy;
+        crasher_random();
+}
+
+static char *mem_alloc(int size) {
+	char *p = kmalloc(size, GFP_KERNEL);
+	int i;
+	if (!p)
+		return p;
+	for (i = 0 ; i < size; i++)
+		p[i] = (i % 119) + 8;
+	return p;
+}
+
+static void mem_check(char *p, int size) {
+	int i;
+	if (!p) 
+		return;
+	for (i = 0 ; i < size; i++) {
+        	if (p[i] != ((i % 119) + 8)) {
+			printk(KERN_CRIT "verify error at %lX offset %d " 
+			       " wanted %d found %d size %d\n", 
+			       (unsigned long)(p + i), i, (i % 119) + 8, 
+			       p[i], size);
+		}
+	}
+	// try and trigger slab poisoning for people using this buffer
+	// wrong
+	memset(p, 0, size);
+}
+
+static void mem_verify(void) {
+	struct mem_buf bufs[NUM_ALLOC];
+	struct mem_buf *b;
+	int index;
+	int size;
+	unsigned long sleep;
+	memset(bufs, 0, sizeof(struct mem_buf) * NUM_ALLOC);
+	while(!module_exiting) {
+		index = crasher_random() % NUM_ALLOC;
+		b = bufs + index;
+		if (b->size) {
+			mem_check(b->buf, b->size);
+			kfree(b->buf);
+			b->buf = NULL;
+			b->size = 0;
+		} else {
+			size = crasher_random() % NUM_SIZES;
+			size = sizes[size];
+			b->buf = mem_alloc(size);
+			b->size = size;
+		}
+		sleep = crasher_random() % (HZ / 10);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(sleep);
+		set_current_state(TASK_RUNNING);
+	}
+	for (index = 0 ; index < NUM_ALLOC ; index++) {
+		b = bufs + index;
+		if (b->size) {
+			mem_check(b->buf, b->size);
+			kfree(b->buf);
+		}
+	}
+}
+
+static int crasher_thread(void *unused) 
+{
+	daemonize("crasher");
+	complete(&startup);
+	mem_verify();
+	complete(&startup);
+	return 0;
+}
+
+static int __init crasher_init(void)
+{
+	int i;
+	init_completion(&startup);
+	crasher_srandom(seed);
+
+	printk("crasher module (%d threads).  Testing sizes: ", threads);
+	for (i = 0 ; i < NUM_SIZES ; i++)
+		printk("%d ", sizes[i]);
+	printk("\n");
+
+	for (i = 0 ; i < threads ; i++) 
+		kernel_thread(crasher_thread, crasher_thread, 
+			      CLONE_FS | CLONE_FILES);
+	for (i = 0 ; i < threads ; i++) 
+		wait_for_completion(&startup);
+	return 0;
+}
+
+static void __exit crasher_exit(void)
+{
+	int i;
+	module_exiting = 1;
+	for (i = 0 ; i < threads ; i++) 
+		wait_for_completion(&startup);
+	printk("all crasher threads done\n");
+	return;
+}
+
+module_init(crasher_init);
+module_exit(crasher_exit);

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: 2.6.16rc1-git4 slab corruption.
  2006-01-31 19:08 ` Chris Mason
@ 2006-01-31 22:15   ` Dave Jones
  2006-02-01  7:27     ` Pekka Enberg
  0 siblings, 1 reply; 11+ messages in thread
From: Dave Jones @ 2006-01-31 22:15 UTC (permalink / raw)
  To: Chris Mason; +Cc: Linux Kernel

On Tue, Jan 31, 2006 at 02:08:35PM -0500, Chris Mason wrote:
 > On Tuesday 31 January 2006 13:03, Dave Jones wrote:
 > > Redzone: 0x5a2cf071/0x5a2cf071.
 > > Last user: [<ffffffff80181cc0>](free_buffer_head+0x2a/0x43)
 > 
 > Haven't seen this one yet, but we have an assortment of strange bugs on
 > 2.6.16-rc1-git.  What were you doing to trigger it?

Running fetchmail/procmail/spamassassin to pick up ~1000 mails.

 > I've been trying to hammer on things with the slab exerciser below, but haven't had much luck in getting a nice reliable test case.

Manfred had a nice 'check all slabs before they're freed' patch, which might
be worth resurrecting for some tests. It may be that we're corrupting rarely
free'd slabs, making them hard to hit.

		Dave


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: 2.6.16rc1-git4 slab corruption.
  2006-01-31 22:15   ` Dave Jones
@ 2006-02-01  7:27     ` Pekka Enberg
  2006-02-01 16:09       ` Dave Jones
  0 siblings, 1 reply; 11+ messages in thread
From: Pekka Enberg @ 2006-02-01  7:27 UTC (permalink / raw)
  To: Dave Jones, Chris Mason, Linux Kernel

Hi,

On 2/1/06, Dave Jones <davej@redhat.com> wrote:
> Manfred had a nice 'check all slabs before they're freed' patch, which might
> be worth resurrecting for some tests. It may be that we're corrupting rarely
> free'd slabs, making them hard to hit.

Do you know where I can find that patch? I would like to try to sneak
that past Andrew. It seems silly not to have these useful slab
debugging patches within mainline.

                                Pekka

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: 2.6.16rc1-git4 slab corruption.
  2006-02-01  7:27     ` Pekka Enberg
@ 2006-02-01 16:09       ` Dave Jones
  2006-02-01 16:27         ` Pekka Enberg
                           ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Dave Jones @ 2006-02-01 16:09 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: Chris Mason, Linux Kernel

On Wed, Feb 01, 2006 at 09:27:02AM +0200, Pekka Enberg wrote:
 > Hi,
 > 
 > On 2/1/06, Dave Jones <davej@redhat.com> wrote:
 > > Manfred had a nice 'check all slabs before they're freed' patch, which might
 > > be worth resurrecting for some tests. It may be that we're corrupting rarely
 > > free'd slabs, making them hard to hit.
 > 
 > Do you know where I can find that patch? I would like to try to sneak
 > that past Andrew. It seems silly not to have these useful slab
 > debugging patches within mainline.

Here's the last version that I had that was rediffed against
2.6.13 or .14 (I forget which, it's been a while since I used it).

		Dave

 
diff -urNp --exclude-from=/home/davej/.exclude linux-1000/mm/slab.c linux-1010/mm/slab.c
--- linux-1000/mm/slab.c
+++ linux-1010/mm/slab.c
@@ -189,7 +189,7 @@
  */
 
 #define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
+#define BUFCTL_ALLOC	(((kmem_bufctl_t)(~0U))-1)
 #define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
 
 /* Max number of objs-per-slab for caches which use off-slab slabs.
@@ -355,6 +355,7 @@ struct kmem_cache_s {
 #if DEBUG
 	int			dbghead;
 	int			reallen;
+	unsigned long		redzonetest;
 #endif
 };
 
@@ -370,6 +371,7 @@ struct kmem_cache_s {
  */
 #define REAPTIMEOUT_CPUC	(2*HZ)
 #define REAPTIMEOUT_LIST3	(4*HZ)
+#define REDZONETIMEOUT		(300*HZ)
 
 #if STATS
 #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
@@ -1446,7 +1448,11 @@ next:
 	} 
 
 	cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
-					((unsigned long)cachep)%REAPTIMEOUT_LIST3;
+					((unsigned long)cachep/L1_CACHE_BYTES)%REAPTIMEOUT_LIST3;
+#if DEBUG
+	cachep->redzonetest = jiffies + REDZONETIMEOUT +
+					((unsigned long)cachep/L1_CACHE_BYTES)%REDZONETIMEOUT;
+#endif
 
 	/* Need the semaphore to access the chain. */
 	down(&cache_chain_sem);
@@ -2043,7 +2049,7 @@ retry:
 			slabp->inuse++;
 			next = slab_bufctl(slabp)[slabp->free];
 #if DEBUG
-			slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
+			slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
 #endif
 		       	slabp->free = next;
 		}
@@ -2181,7 +2187,7 @@ static void free_block(kmem_cache_t *cac
 		objnr = (objp - slabp->s_mem) / cachep->objsize;
 		check_slabp(cachep, slabp);
 #if DEBUG
-		if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
+		if (slab_bufctl(slabp)[objnr] != BUFCTL_ALLOC) {
 			printk(KERN_ERR "slab: double free detected in cache '%s', objp %p.\n",
 						cachep->name, objp);
 			BUG();
@@ -2409,7 +2415,7 @@ got_slabp:
 	slabp->inuse++;
 	next = slab_bufctl(slabp)[slabp->free];
 #if DEBUG
-	slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
+	slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
 #endif
 	slabp->free = next;
 	check_slabp(cachep, slabp);
@@ -2615,6 +2621,94 @@ unsigned int kmem_cache_size(kmem_cache_
 
 EXPORT_SYMBOL(kmem_cache_size);
 
+#if DEBUG
+static void check_slabuse(kmem_cache_t *cachep, struct slab *slabp)
+{
+	int i;
+
+	if (!(cachep->flags & SLAB_RED_ZONE))
+		return;	/* no redzone data to check */
+
+#if CONFIG_DEBUG_PAGEALLOC
+	/* Page alloc debugging on for this cache. Mapping & Unmapping happens
+	 * without any locking, thus parallel checks are impossible.
+	 */
+	if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep))
+		return;
+#endif
+
+	for (i=0;i<cachep->num;i++) {
+		void *objp = slabp->s_mem + cachep->objsize * i;
+		unsigned long red1, red2;
+
+		red1 = *dbg_redzone1(cachep, objp);
+		red2 = *dbg_redzone2(cachep, objp);
+
+		/* simplest case: marked as inactive */
+		if (red1 == RED_INACTIVE && red2 == RED_INACTIVE)
+			continue;
+
+		/* tricky case: if the bufctl value is BUFCTL_ALLOC, then
+		 * the object is either allocated or somewhere in a cpu
+		 * cache. The cpu caches are lockless and there might be
+		 * a concurrent alloc/free call, thus we must accept random
+		 * combinations of RED_ACTIVE and _INACTIVE
+		 */
+		if (slab_bufctl(slabp)[i] == BUFCTL_ALLOC &&
+				(red1 == RED_INACTIVE || red1 == RED_ACTIVE) &&
+				(red2 == RED_INACTIVE || red2 == RED_ACTIVE))
+			continue;
+
+		printk(KERN_ERR "slab %s: redzone mismatch in slabp %p, objp %p, bufctl 0x%x\n",
+				cachep->name, slabp, objp, slab_bufctl(slabp)[i]);
+		print_objinfo(cachep, objp, 2);
+	}
+}
+
+/*
+ * Perform a self test on all slabs from a cache
+ */
+static void check_redzone(kmem_cache_t *cachep)
+{
+	struct list_head *q;
+	struct slab *slabp;
+
+	check_spinlock_acquired(cachep);
+
+	list_for_each(q,&cachep->lists.slabs_full) {
+		slabp = list_entry(q, struct slab, list);
+
+		if (slabp->inuse != cachep->num) {
+			printk(KERN_INFO "slab %s: wrong slabp found in full slab chain at %p (%d/%d).\n",
+					cachep->name, slabp, slabp->inuse, cachep->num);
+		}
+		check_slabp(cachep, slabp);
+		check_slabuse(cachep, slabp);
+	}
+	list_for_each(q,&cachep->lists.slabs_partial) {
+		slabp = list_entry(q, struct slab, list);
+
+		if (slabp->inuse == cachep->num || slabp->inuse == 0) {
+			printk(KERN_INFO "slab %s: wrong slab found in partial chain at %p (%d/%d).\n",
+					cachep->name, slabp, slabp->inuse, cachep->num);
+		}
+		check_slabp(cachep, slabp);
+		check_slabuse(cachep, slabp);
+	}
+	list_for_each(q,&cachep->lists.slabs_free) {
+		slabp = list_entry(q, struct slab, list);
+
+		if (slabp->inuse != 0) {
+			printk(KERN_INFO "slab %s: wrong slab found in free chain at %p (%d/%d).\n",
+					cachep->name, slabp, slabp->inuse, cachep->num);
+		}
+		check_slabp(cachep, slabp);
+		check_slabuse(cachep, slabp);
+	}
+}
+
+#endif
+
 struct ccupdate_struct {
 	kmem_cache_t *cachep;
 	struct array_cache *new[NR_CPUS];
@@ -2798,6 +2892,12 @@ static void cache_reap(void *unused)
 
 		drain_array_locked(searchp, ac_data(searchp), 0);
 
+#if DEBUG
+		if(time_before(searchp->redzonetest, jiffies)) {
+			searchp->redzonetest = jiffies + REDZONETIMEOUT;
+			check_redzone(searchp);
+		}
+#endif
 		if(time_after(searchp->lists.next_reap, jiffies))
 			goto next_unlock;
 



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: 2.6.16rc1-git4 slab corruption.
  2006-02-01 16:09       ` Dave Jones
@ 2006-02-01 16:27         ` Pekka Enberg
  2006-02-01 16:29         ` Pekka Enberg
  2006-02-01 17:09         ` Pekka Enberg
  2 siblings, 0 replies; 11+ messages in thread
From: Pekka Enberg @ 2006-02-01 16:27 UTC (permalink / raw)
  To: manfred; +Cc: Chris Mason, Linux Kernel, Dave Jones

On 2/1/06, Dave Jones <davej@redhat.com> wrote:
> > > Manfred had a nice 'check all slabs before they're freed' patch, which might
> > > be worth resurrecting for some tests. It may be that we're corrupting rarely
> > > free'd slabs, making them hard to hit.

On Wed, Feb 01, 2006 at 09:27:02AM +0200, Pekka Enberg wrote:
> > Do you know where I can find that patch? I would like to try to sneak
> > that past Andrew. It seems silly not to have these useful slab
> > debugging patches within mainline.

On Wed, 2006-02-01 at 11:09 -0500, Dave Jones wrote:
> Here's the last version that I had that was rediffed against
> 2.6.13 or .14 (I forget which, it's been a while since I used it).

Thanks Dave. Manfred, is there are reason this wasn't merged with
mainline? Needs bit of cleanup but seems useful for detecting slab
corruption early.

			Pekka

>  
> diff -urNp --exclude-from=/home/davej/.exclude linux-1000/mm/slab.c linux-1010/mm/slab.c
> --- linux-1000/mm/slab.c
> +++ linux-1010/mm/slab.c
> @@ -189,7 +189,7 @@
>   */
>  
>  #define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
> -#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
> +#define BUFCTL_ALLOC	(((kmem_bufctl_t)(~0U))-1)
>  #define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
>  
>  /* Max number of objs-per-slab for caches which use off-slab slabs.
> @@ -355,6 +355,7 @@ struct kmem_cache_s {
>  #if DEBUG
>  	int			dbghead;
>  	int			reallen;
> +	unsigned long		redzonetest;
>  #endif
>  };
>  
> @@ -370,6 +371,7 @@ struct kmem_cache_s {
>   */
>  #define REAPTIMEOUT_CPUC	(2*HZ)
>  #define REAPTIMEOUT_LIST3	(4*HZ)
> +#define REDZONETIMEOUT		(300*HZ)
>  
>  #if STATS
>  #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
> @@ -1446,7 +1448,11 @@ next:
>  	} 
>  
>  	cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
> -					((unsigned long)cachep)%REAPTIMEOUT_LIST3;
> +					((unsigned long)cachep/L1_CACHE_BYTES)%REAPTIMEOUT_LIST3;
> +#if DEBUG
> +	cachep->redzonetest = jiffies + REDZONETIMEOUT +
> +					((unsigned long)cachep/L1_CACHE_BYTES)%REDZONETIMEOUT;
> +#endif
>  
>  	/* Need the semaphore to access the chain. */
>  	down(&cache_chain_sem);
> @@ -2043,7 +2049,7 @@ retry:
>  			slabp->inuse++;
>  			next = slab_bufctl(slabp)[slabp->free];
>  #if DEBUG
> -			slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
> +			slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
>  #endif
>  		       	slabp->free = next;
>  		}
> @@ -2181,7 +2187,7 @@ static void free_block(kmem_cache_t *cac
>  		objnr = (objp - slabp->s_mem) / cachep->objsize;
>  		check_slabp(cachep, slabp);
>  #if DEBUG
> -		if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
> +		if (slab_bufctl(slabp)[objnr] != BUFCTL_ALLOC) {
>  			printk(KERN_ERR "slab: double free detected in cache '%s', objp %p.\n",
>  						cachep->name, objp);
>  			BUG();
> @@ -2409,7 +2415,7 @@ got_slabp:
>  	slabp->inuse++;
>  	next = slab_bufctl(slabp)[slabp->free];
>  #if DEBUG
> -	slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
> +	slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
>  #endif
>  	slabp->free = next;
>  	check_slabp(cachep, slabp);
> @@ -2615,6 +2621,94 @@ unsigned int kmem_cache_size(kmem_cache_
>  
>  EXPORT_SYMBOL(kmem_cache_size);
>  
> +#if DEBUG
> +static void check_slabuse(kmem_cache_t *cachep, struct slab *slabp)
> +{
> +	int i;
> +
> +	if (!(cachep->flags & SLAB_RED_ZONE))
> +		return;	/* no redzone data to check */
> +
> +#if CONFIG_DEBUG_PAGEALLOC
> +	/* Page alloc debugging on for this cache. Mapping & Unmapping happens
> +	 * without any locking, thus parallel checks are impossible.
> +	 */
> +	if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep))
> +		return;
> +#endif
> +
> +	for (i=0;i<cachep->num;i++) {
> +		void *objp = slabp->s_mem + cachep->objsize * i;
> +		unsigned long red1, red2;
> +
> +		red1 = *dbg_redzone1(cachep, objp);
> +		red2 = *dbg_redzone2(cachep, objp);
> +
> +		/* simplest case: marked as inactive */
> +		if (red1 == RED_INACTIVE && red2 == RED_INACTIVE)
> +			continue;
> +
> +		/* tricky case: if the bufctl value is BUFCTL_ALLOC, then
> +		 * the object is either allocated or somewhere in a cpu
> +		 * cache. The cpu caches are lockless and there might be
> +		 * a concurrent alloc/free call, thus we must accept random
> +		 * combinations of RED_ACTIVE and _INACTIVE
> +		 */
> +		if (slab_bufctl(slabp)[i] == BUFCTL_ALLOC &&
> +				(red1 == RED_INACTIVE || red1 == RED_ACTIVE) &&
> +				(red2 == RED_INACTIVE || red2 == RED_ACTIVE))
> +			continue;
> +
> +		printk(KERN_ERR "slab %s: redzone mismatch in slabp %p, objp %p, bufctl 0x%x\n",
> +				cachep->name, slabp, objp, slab_bufctl(slabp)[i]);
> +		print_objinfo(cachep, objp, 2);
> +	}
> +}
> +
> +/*
> + * Perform a self test on all slabs from a cache
> + */
> +static void check_redzone(kmem_cache_t *cachep)
> +{
> +	struct list_head *q;
> +	struct slab *slabp;
> +
> +	check_spinlock_acquired(cachep);
> +
> +	list_for_each(q,&cachep->lists.slabs_full) {
> +		slabp = list_entry(q, struct slab, list);
> +
> +		if (slabp->inuse != cachep->num) {
> +			printk(KERN_INFO "slab %s: wrong slabp found in full slab chain at %p (%d/%d).\n",
> +					cachep->name, slabp, slabp->inuse, cachep->num);
> +		}
> +		check_slabp(cachep, slabp);
> +		check_slabuse(cachep, slabp);
> +	}
> +	list_for_each(q,&cachep->lists.slabs_partial) {
> +		slabp = list_entry(q, struct slab, list);
> +
> +		if (slabp->inuse == cachep->num || slabp->inuse == 0) {
> +			printk(KERN_INFO "slab %s: wrong slab found in partial chain at %p (%d/%d).\n",
> +					cachep->name, slabp, slabp->inuse, cachep->num);
> +		}
> +		check_slabp(cachep, slabp);
> +		check_slabuse(cachep, slabp);
> +	}
> +	list_for_each(q,&cachep->lists.slabs_free) {
> +		slabp = list_entry(q, struct slab, list);
> +
> +		if (slabp->inuse != 0) {
> +			printk(KERN_INFO "slab %s: wrong slab found in free chain at %p (%d/%d).\n",
> +					cachep->name, slabp, slabp->inuse, cachep->num);
> +		}
> +		check_slabp(cachep, slabp);
> +		check_slabuse(cachep, slabp);
> +	}
> +}
> +
> +#endif
> +
>  struct ccupdate_struct {
>  	kmem_cache_t *cachep;
>  	struct array_cache *new[NR_CPUS];
> @@ -2798,6 +2892,12 @@ static void cache_reap(void *unused)
>  
>  		drain_array_locked(searchp, ac_data(searchp), 0);
>  
> +#if DEBUG
> +		if(time_before(searchp->redzonetest, jiffies)) {
> +			searchp->redzonetest = jiffies + REDZONETIMEOUT;
> +			check_redzone(searchp);
> +		}
> +#endif
>  		if(time_after(searchp->lists.next_reap, jiffies))
>  			goto next_unlock;
>  
> 
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: 2.6.16rc1-git4 slab corruption.
  2006-02-01 16:09       ` Dave Jones
  2006-02-01 16:27         ` Pekka Enberg
@ 2006-02-01 16:29         ` Pekka Enberg
  2006-02-01 16:38           ` Dave Jones
  2006-02-01 17:09         ` Pekka Enberg
  2 siblings, 1 reply; 11+ messages in thread
From: Pekka Enberg @ 2006-02-01 16:29 UTC (permalink / raw)
  To: Dave Jones; +Cc: Chris Mason, Linux Kernel

Hi Dave,

On Wed, 2006-02-01 at 11:09 -0500, Dave Jones wrote:
> @@ -1446,7 +1448,11 @@ next:
>  	} 
>  
>  	cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
> -					((unsigned long)cachep)%REAPTIMEOUT_LIST3;
> +					((unsigned long)cachep/L1_CACHE_BYTES)%REAPTIMEOUT_LIST3;

Hmm. This bit seems unrelated. Was it in the original patch?

			Pekka


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: 2.6.16rc1-git4 slab corruption.
  2006-02-01 16:29         ` Pekka Enberg
@ 2006-02-01 16:38           ` Dave Jones
  0 siblings, 0 replies; 11+ messages in thread
From: Dave Jones @ 2006-02-01 16:38 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: Chris Mason, Linux Kernel

On Wed, Feb 01, 2006 at 06:29:24PM +0200, Pekka Enberg wrote:
 > Hi Dave,
 > 
 > On Wed, 2006-02-01 at 11:09 -0500, Dave Jones wrote:
 > > @@ -1446,7 +1448,11 @@ next:
 > >  	} 
 > >  
 > >  	cachep->lists.next_reap = jiffies + REAPTIMEOUT_LIST3 +
 > > -					((unsigned long)cachep)%REAPTIMEOUT_LIST3;
 > > +					((unsigned long)cachep/L1_CACHE_BYTES)%REAPTIMEOUT_LIST3;
 > 
 > Hmm. This bit seems unrelated. Was it in the original patch?

as far as I recall, yes.

		Dave


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: 2.6.16rc1-git4 slab corruption.
  2006-02-01 16:09       ` Dave Jones
  2006-02-01 16:27         ` Pekka Enberg
  2006-02-01 16:29         ` Pekka Enberg
@ 2006-02-01 17:09         ` Pekka Enberg
  2 siblings, 0 replies; 11+ messages in thread
From: Pekka Enberg @ 2006-02-01 17:09 UTC (permalink / raw)
  To: Dave Jones; +Cc: Chris Mason, Linux Kernel, manfred

Hi,

On Wed, 2006-02-01 at 11:09 -0500, Dave Jones wrote:
> Here's the last version that I had that was rediffed against
> 2.6.13 or .14 (I forget which, it's been a while since I used it).

Here's an untested rediff for 2.6.16-rc1-mm4. The patch should apply to
mainline when Linus merges the slab bits from Andrew. I am wondering if
this should be a separate config option, CONFIG_VERIFY_SLAB?

			Pekka

 mm/slab.c |  127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 124 insertions(+), 3 deletions(-)

Index: 2.6-mm/mm/slab.c
===================================================================
--- 2.6-mm.orig/mm/slab.c
+++ 2.6-mm/mm/slab.c
@@ -202,7 +202,7 @@
 
 typedef unsigned long kmem_bufctl_t;
 #define BUFCTL_END	(((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE	(((kmem_bufctl_t)(~0U))-1)
+#define BUFCTL_ALLOC	(((kmem_bufctl_t)(~0U))-1)
 #define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-2)
 
 /* Max number of objs-per-slab for caches which use off-slab slabs.
@@ -433,6 +433,7 @@ struct kmem_cache {
 	 */
 	int obj_offset;
 	int obj_size;
+	unsigned long redzonetest;
 #endif
 };
 
@@ -448,6 +449,7 @@ struct kmem_cache {
  */
 #define REAPTIMEOUT_CPUC	(2*HZ)
 #define REAPTIMEOUT_LIST3	(4*HZ)
+#define REDZONETIMEOUT		(300*HZ)
 
 #if STATS
 #define	STATS_INC_ACTIVE(x)	((x)->num_active++)
@@ -1932,6 +1934,11 @@ kmem_cache_create (const char *name, siz
 		cachep->limit = BOOT_CPUCACHE_ENTRIES;
 	}
 
+#if DEBUG
+	cachep->redzonetest = jiffies + REDZONETIMEOUT +
+		((unsigned long)cachep/L1_CACHE_BYTES)%REDZONETIMEOUT;
+#endif
+
 	/* cache setup completed, link it into the list */
 	list_add(&cachep->next, &cache_chain);
       oops:
@@ -2261,7 +2268,7 @@ static void *slab_get_obj(struct kmem_ca
 	slabp->inuse++;
 	next = slab_bufctl(slabp)[slabp->free];
 #if DEBUG
-	slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
+	slab_bufctl(slabp)[slabp->free] = BUFCTL_ALLOC;
 	WARN_ON(slabp->nodeid != nodeid);
 #endif
 	slabp->free = next;
@@ -2278,7 +2285,7 @@ static void slab_put_obj(struct kmem_cac
 	/* Verify that the slab belongs to the intended node */
 	WARN_ON(slabp->nodeid != nodeid);
 
-	if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
+	if (slab_bufctl(slabp)[objnr] != BUFCTL_ALLOC) {
 		printk(KERN_ERR "slab: double free detected in cache "
 		       "'%s', objp %p\n", cachep->name, objp);
 		BUG();
@@ -3285,6 +3292,113 @@ static int alloc_kmemlist(struct kmem_ca
 	return err;
 }
 
+#if DEBUG
+
+static void check_slabuse(kmem_cache_t *cachep, struct slab *slabp)
+{
+	int i;
+
+	if (!(cachep->flags & SLAB_RED_ZONE))
+		return;	/* no redzone data to check */
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	/* Page alloc debugging on for this cache. Mapping & Unmapping happens
+	 * without any locking, thus parallel checks are impossible.
+	 */
+	if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
+		return;
+#endif
+
+	for (i=0;i<cachep->num;i++) {
+		void *objp = slabp->s_mem + cachep->buffer_size * i;
+		unsigned long red1, red2;
+
+		red1 = *dbg_redzone1(cachep, objp);
+		red2 = *dbg_redzone2(cachep, objp);
+
+		/* simplest case: marked as inactive */
+		if (red1 == RED_INACTIVE && red2 == RED_INACTIVE)
+			continue;
+
+		/* tricky case: if the bufctl value is BUFCTL_ALLOC, then
+		 * the object is either allocated or somewhere in a cpu
+		 * cache. The cpu caches are lockless and there might be
+		 * a concurrent alloc/free call, thus we must accept random
+		 * combinations of RED_ACTIVE and _INACTIVE
+		 */
+		if (slab_bufctl(slabp)[i] == BUFCTL_ALLOC &&
+				(red1 == RED_INACTIVE || red1 == RED_ACTIVE) &&
+				(red2 == RED_INACTIVE || red2 == RED_ACTIVE))
+			continue;
+
+		printk(KERN_ERR "slab %s: redzone mismatch in slabp %p, objp %p, bufctl 0x%lx\n",
+				cachep->name, slabp, objp, slab_bufctl(slabp)[i]);
+		print_objinfo(cachep, objp, 2);
+	}
+}
+
+static void print_invalid_slab(const char *list_name, struct kmem_cache *cache,
+			     struct slab *slab)
+{
+	printk(KERN_ERR "slab %s: invalid slab found in %s list at %p (%d/%d).\n",
+	       cache->name, list_name, slab, slab->inuse, cache->num);
+}
+
+static void verify_node_redzone(struct kmem_cache *cache,
+				struct kmem_list3 *lists)
+{
+	struct list_head *q;
+	struct slab *slab;
+
+	list_for_each(q, &lists->slabs_full) {
+		slab = list_entry(q, struct slab, list);
+
+		if (slab->inuse != cache->num)
+			print_invalid_slab("full", cache, slab);
+
+		check_slabp(cache, slab);
+		check_slabuse(cache, slab);
+	}
+	list_for_each(q, &lists->slabs_partial) {
+		slab = list_entry(q, struct slab, list);
+
+		if (slab->inuse == cache->num || slab->inuse == 0)
+			print_invalid_slab("partial", cache, slab);
+
+		check_slabp(cache, slab);
+		check_slabuse(cache, slab);
+	}
+	list_for_each(q, &lists->slabs_free) {
+		slab = list_entry(q, struct slab, list);
+
+		if (slab->inuse != 0)
+			print_invalid_slab("free", cache, slab);
+
+		check_slabp(cache, slab);
+		check_slabuse(cache, slab);
+	}
+}
+
+/*
+ * Perform a self test on all slabs from a cache
+ */
+static void verify_redzone(struct kmem_cache *cache)
+{
+	int node;
+
+	check_spinlock_acquired(cache);
+
+	for_each_online_node(node) {
+		struct kmem_list3 *lists = cache->nodelists[node];
+
+		if (!lists)
+			continue;
+		verify_node_redzone(cache, lists);
+	}
+}
+
+#endif
+
 struct ccupdate_struct {
 	struct kmem_cache *cachep;
 	struct array_cache *new[NR_CPUS];
@@ -3465,6 +3579,13 @@ static void cache_reap(void *unused)
 		drain_array_locked(searchp, cpu_cache_get(searchp), 0,
 				   numa_node_id());
 
+#if DEBUG
+		if (time_before(searchp->redzonetest, jiffies)) {
+			searchp->redzonetest = jiffies + REDZONETIMEOUT;
+			verify_redzone(searchp);
+		}
+#endif
+
 		if (time_after(l3->next_reap, jiffies))
 			goto next_unlock;
 



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: 2.6.16rc1-git4 slab corruption.
  2006-01-31 18:03 2.6.16rc1-git4 slab corruption Dave Jones
  2006-01-31 19:08 ` Chris Mason
@ 2006-02-02  5:07 ` Dave Jones
  2006-02-02  7:11   ` Pekka Enberg
  1 sibling, 1 reply; 11+ messages in thread
From: Dave Jones @ 2006-02-02  5:07 UTC (permalink / raw)
  To: Linux Kernel

On Tue, Jan 31, 2006 at 01:03:19PM -0500, Dave Jones wrote:
 > Slab corruption: start=ffff81000057a360, len=88
 > Redzone: 0x5a2cf071/0x5a2cf071.
 > Last user: [<ffffffff80181cc0>](free_buffer_head+0x2a/0x43)
 > 
 > Call Trace: <ffffffff8017b4d0>{check_poison_obj+127}
 >        <ffffffff80181cea>{alloc_buffer_head+17} <ffffffff8017b638>{cache_alloc_debugcheck_after+48}
 >        <ffffffff8017b828>{kmem_cache_alloc+231} <ffffffff80181cea>{alloc_buffer_head+17}
 >        <ffffffff801824b1>{alloc_page_buffers+53} <ffffffff8018255c>{create_empty_buffers+20}
 >        <ffffffff801831c8>{__block_prepare_write+148} <ffffffff8807e5f4>{:ext3:ext3_get_block+0}
 >        <ffffffff8017b0cc>{poison_obj+38} <ffffffff8017b6f7>{cache_alloc_debugcheck_after+239}
 >        <ffffffff80183536>{block_prepare_write+26} <ffffffff8807fcd1>{:ext3:ext3_prepare_write+148}
 >        <ffffffff80340386>{_write_unlock_irq+9} <ffffffff8015e3b7>{generic_file_buffered_write+603}
 >        <ffffffff80137896>{current_fs_time+59} <ffffffff80137896>{current_fs_time+59}
 >        <ffffffff8015ea13>{__generic_file_aio_write_nolock+767}
 >        <ffffffff8015ee22>{generic_file_aio_write+78} <ffffffff80149d23>{debug_mutex_add_waiter+159}
 >        <ffffffff8033fb23>{__mutex_lock_slowpath+817} <ffffffff8015ee39>{generic_file_aio_write+101}
 >        <ffffffff8807be5e>{:ext3:ext3_file_write+22} <ffffffff8018054e>{do_sync_write+199}
 >        <ffffffff801464d6>{autoremove_wake_function+0} <ffffffff8015ae1f>{audit_syscall_entry+301}
 >        <ffffffff80180e48>{vfs_write+206} <ffffffff801813fa>{sys_write+69}
 >        <ffffffff8010aa78>{tracesys+209}
 > 020: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 01 00 00 00

I just hit corruption again (I had rebooted since), but this time with
a completely different trace.

Slab corruption: start=ffff81000057a000, len=4096

Call Trace: <ffffffff8017b4f0>{check_poison_obj+127}
        <ffffffff802dd12a>{__alloc_skb+92} <ffffffff8017b658>{cache_alloc_debugcheck_after+48}
        <ffffffff8017c271>{__kmalloc+294} <ffffffff802dd12a>{__alloc_skb+92}
        <ffffffff802d9ba3>{sock_alloc_send_skb+101} <ffffffff801dd163>{avc_has_perm+67}
        <ffffffff8017b340>{cache_free_debugcheck+554} <ffffffff8033952e>{unix_stream_sendmsg+348}
        <ffffffff801dd4c2>{socket_has_perm+93} <ffffffff802d70e2>{do_sock_write+193}
        <ffffffff802d90ad>{sock_writev+183} <ffffffff801464de>{autoremove_wake_function+0}
        <ffffffff801dd818>{inode_has_perm+86} <ffffffff802d7668>{sock_aio_read+81}
        <ffffffff801dd8bb>{file_has_perm+150} <ffffffff80180bf7>{do_readv_writev+381}
        <ffffffff801811a7>{sys_writev+69} <ffffffff8010a906>{system_call+126}
380: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 01 00 00 00

What I find interesting here is the corruption pattern is the same both times.
Strange, and very scary.

		Dave


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: 2.6.16rc1-git4 slab corruption.
  2006-02-02  5:07 ` Dave Jones
@ 2006-02-02  7:11   ` Pekka Enberg
  0 siblings, 0 replies; 11+ messages in thread
From: Pekka Enberg @ 2006-02-02  7:11 UTC (permalink / raw)
  To: Dave Jones, Linux Kernel

Hi,

On 2/2/06, Dave Jones <davej@redhat.com> wrote:
> I just hit corruption again (I had rebooted since), but this time with
> a completely different trace.

[snip, snip]

> What I find interesting here is the corruption pattern is the same both times.
> Strange, and very scary.

Did you try out the slab verifier? The patch I sent now applies to
latest -linus.

                            Pekka

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2006-02-02  7:11 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-01-31 18:03 2.6.16rc1-git4 slab corruption Dave Jones
2006-01-31 19:08 ` Chris Mason
2006-01-31 22:15   ` Dave Jones
2006-02-01  7:27     ` Pekka Enberg
2006-02-01 16:09       ` Dave Jones
2006-02-01 16:27         ` Pekka Enberg
2006-02-01 16:29         ` Pekka Enberg
2006-02-01 16:38           ` Dave Jones
2006-02-01 17:09         ` Pekka Enberg
2006-02-02  5:07 ` Dave Jones
2006-02-02  7:11   ` Pekka Enberg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).