linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH][2.5.73] stack corruption in devfs_lookup
       [not found]   ` <Pine.LNX.4.55.0305050005230.1278@marabou.research.att.com>
@ 2003-07-06 17:06     ` Andrey Borzenkov
  2003-07-06 19:03       ` Andrew Morton
  0 siblings, 1 reply; 11+ messages in thread
From: Andrey Borzenkov @ 2003-07-06 17:06 UTC (permalink / raw)
  To: linux-kernel; +Cc: devfs

[-- Attachment #1: Type: text/plain, Size: 2536 bytes --]

Doing concurrent lookups for the same name in devfs with devfsd and modules 
enabled may result in stack coruption.

When devfs_lookup needs to call devfsd it arranges for other lookups for the 
same name to wait. It is using local variable as wait queue head. After 
devfsd returns devfs_lookup wakes up all waiters and returns. Unfortunately 
there is no garantee all waiters will actually get chance to run and clean up 
before devfs_lookup returns. so some of them attempt to access already freed 
storage on stack.

It is trivial to trigger with SMP kernel (I have single-CPU system if it 
matters) doing

while true
do
  ls /dev/foo &
done

With spinlock debug enabled this results in large number of stacks similar to

------------[ cut here ]------------
kernel BUG at include/asm/spinlock.h:120!
invalid operand: 0000 [#1]
CPU:    0
EIP:    0060:[<c012004c>]    Tainted: G S
EFLAGS: 00010082
EIP is at remove_wait_queue+0xac/0xc0
eax: 0000000e   ebx: f6617e4c   ecx: 00000000   edx: 00000001
esi: f6747dd0   edi: f6616000   ebp: f6617e10   esp: f6617df0
ds: 007b   es: 007b   ss: 0068
Process ls (pid: 1517, threadinfo=f6616000 task=f6619900)
Stack: c03eb9d5 c011ffa0 00000286 f6617e24 c0443880 f6747dd0 f6616000 f6617e4c 
       f6617e78 c01cb3e6 c04470c0 f6616000 00000246 f6747dcc c1a6f1dc 00000000 
       f6619900 c011d4e0 00000000 00000000 f7d4b73c f663d005 f6759828 00000000
Call Trace:
 [<c011ffa0>] remove_wait_queue+0x0/0xc0
 [<c01cb3e6>] devfs_d_revalidate_wait+0x1d6/0x1f0
 [<c011d4e0>] default_wake_function+0x0/0x30
 [<c011d4e0>] default_wake_function+0x0/0x30
 [<c017201a>] do_lookup+0x5a/0xa0
 [<c017261e>] link_path_walk+0x5be/0xb20
 [<c0148ceb>] kmem_cache_alloc+0x14b/0x190
 [<c01730fe>] __user_walk+0x3e/0x60
 [<c016d13e>] vfs_stat+0x1e/0x60
 [<c0154c5b>] do_brk+0x12b/0x200
 [<c016d7bb>] sys_stat64+0x1b/0x40
 [<c01532e2>] sys_brk+0xf2/0x120
 [<c011a820>] do_page_fault+0x0/0x4c5
 [<c0109919>] sysenter_past_esp+0x52/0x71

Code: 0f 0b 78 00 6c b0 3e c0 e9 72 ff ff ff 8d b4 26 00 00 00 00
 <6>note: ls[1517] exited with preempt_count 1
eip: c011ffa0

without spinlock debug system usually hung dead with reset button as the only 
possibility.

I was not able to reproduce it on 2.4 on single-CPU system - in 2.4 
devfs_d_revalidate_wait does not attempt to remove itself from wait queue so 
it appears to be safe.

attached patch is against 2.5.73 but applies to 2.5.74 as well. It makes 
lookup struct be allocated from heap and adds reference counter to free it 
when no more needed.

regards

-andrey



[-- Attachment #2: 2.5.73-devfs_stack_corruption.patch --]
[-- Type: text/x-diff, Size: 4028 bytes --]

--- linux-2.5.73/fs/devfs/base.c.stack	2003-06-23 19:46:44.000000000 +0400
+++ linux-2.5.73/fs/devfs/base.c	2003-06-26 21:01:45.000000000 +0400
@@ -2208,8 +2208,46 @@ struct devfs_lookup_struct
 {
     devfs_handle_t de;
     wait_queue_head_t wait_queue;
+    atomic_t count;
 };
 
+static struct devfs_lookup_struct *
+new_devfs_lookup_struct(void)
+{
+	struct devfs_lookup_struct *p = kmalloc(sizeof(*p), GFP_KERNEL);
+
+	if (!p)
+		return NULL;
+
+	init_waitqueue_head (&p->wait_queue);
+	atomic_set(&p->count, 1);
+	return p;
+}
+
+static void
+get_devfs_lookup_struct(struct devfs_lookup_struct *info)
+{
+	if (info)
+		atomic_inc(&info->count);
+	else {
+		printk(KERN_ERR "null devfs_lookup_struct pointer\n");
+		dump_stack();
+	}
+}
+
+static void
+put_devfs_lookup_struct(struct devfs_lookup_struct *info)
+{
+	if (info) {
+		if (!atomic_dec_and_test(&info->count))
+			return;
+		kfree(info);
+	} else {
+		printk(KERN_ERR "null devfs_lookup_struct pointer\n");
+		dump_stack();
+	}
+}
+
 /* XXX: this doesn't handle the case where we got a negative dentry
         but a devfs entry has been registered in the meanwhile */
 static int devfs_d_revalidate_wait (struct dentry *dentry, int flags)
@@ -2252,11 +2290,13 @@ static int devfs_d_revalidate_wait (stru
     read_lock (&parent->u.dir.lock);
     if (dentry->d_fsdata)
     {
+	get_devfs_lookup_struct(lookup_info);
 	set_current_state (TASK_UNINTERRUPTIBLE);
 	add_wait_queue (&lookup_info->wait_queue, &wait);
 	read_unlock (&parent->u.dir.lock);
 	schedule ();
 	remove_wait_queue (&lookup_info->wait_queue, &wait);
+	put_devfs_lookup_struct(lookup_info);
     }
     else read_unlock (&parent->u.dir.lock);
     return 1;
@@ -2268,7 +2308,7 @@ static int devfs_d_revalidate_wait (stru
 static struct dentry *devfs_lookup (struct inode *dir, struct dentry *dentry)
 {
     struct devfs_entry tmp;  /*  Must stay in scope until devfsd idle again  */
-    struct devfs_lookup_struct lookup_info;
+    struct devfs_lookup_struct *lookup_info;
     struct fs_info *fs_info = dir->i_sb->s_fs_info;
     struct devfs_entry *parent, *de;
     struct inode *inode;
@@ -2285,9 +2325,10 @@ static struct dentry *devfs_lookup (stru
     read_lock (&parent->u.dir.lock);
     de = _devfs_search_dir (parent, dentry->d_name.name, dentry->d_name.len);
     read_unlock (&parent->u.dir.lock);
-    lookup_info.de = de;
-    init_waitqueue_head (&lookup_info.wait_queue);
-    dentry->d_fsdata = &lookup_info;
+    lookup_info = new_devfs_lookup_struct();
+    if (!lookup_info) return ERR_PTR(-ENOMEM);
+    lookup_info->de = de;
+    dentry->d_fsdata = lookup_info;
     if (de == NULL)
     {   /*  Try with devfsd. For any kind of failure, leave a negative dentry
 	    so someone else can deal with it (in the case where the sysadmin
@@ -2297,6 +2338,7 @@ static struct dentry *devfs_lookup (stru
 	if (try_modload (parent, fs_info,
 			 dentry->d_name.name, dentry->d_name.len, &tmp) < 0)
 	{   /*  Lookup event was not queued to devfsd  */
+	    put_devfs_lookup_struct(lookup_info);
 	    d_add (dentry, NULL);
 	    return NULL;
 	}
@@ -2309,7 +2351,7 @@ static struct dentry *devfs_lookup (stru
     up (&dir->i_sem);
     wait_for_devfsd_finished (fs_info);  /*  If I'm not devfsd, must wait  */
     down (&dir->i_sem);      /*  Grab it again because them's the rules  */
-    de = lookup_info.de;
+    de = lookup_info->de;
     /*  If someone else has been so kind as to make the inode, we go home
 	early  */
     if (dentry->d_inode) goto out;
@@ -2333,10 +2375,11 @@ static struct dentry *devfs_lookup (stru
 	     de->name, de->inode.ino, inode, de, current->comm);
     d_instantiate (dentry, inode);
 out:
+    write_lock (&parent->u.dir.lock);
     dentry->d_op = &devfs_dops;
     dentry->d_fsdata = NULL;
-    write_lock (&parent->u.dir.lock);
-    wake_up (&lookup_info.wait_queue);
+    wake_up (&lookup_info->wait_queue);
+    put_devfs_lookup_struct(lookup_info);
     write_unlock (&parent->u.dir.lock);
     devfs_put (de);
     return retval;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][2.5.73] stack corruption in devfs_lookup
  2003-07-06 17:06     ` [PATCH][2.5.73] stack corruption in devfs_lookup Andrey Borzenkov
@ 2003-07-06 19:03       ` Andrew Morton
       [not found]         ` <20030706175405.518f680d.akpm@osdl.org>
  2003-07-26 14:58         ` [PATCH][2.6.0-test1] redesign - stack corruption in devfs_lookup Andrey Borzenkov
  0 siblings, 2 replies; 11+ messages in thread
From: Andrew Morton @ 2003-07-06 19:03 UTC (permalink / raw)
  To: Andrey Borzenkov; +Cc: linux-kernel, devfs

Andrey Borzenkov <arvidjaar@mail.ru> wrote:
>
> When devfs_lookup needs to call devfsd it arranges for other lookups for the 
>  same name to wait. It is using local variable as wait queue head. After 
>  devfsd returns devfs_lookup wakes up all waiters and returns. Unfortunately 
>  there is no garantee all waiters will actually get chance to run and clean up 
>  before devfs_lookup returns. so some of them attempt to access already freed 
>  storage on stack.

OK, but I think there is a simpler fix.  We can rely on the side-effects of
prepare_to_wait() and finish_wait().

The wakeup() will remove all wait_queue_t's from the wait_queue_head, and
so when the waiters wake up and call finish_wait(), they will never touch
the now-out-of-scope waitqueue head.

It is a little faster than the currentcode, too.

Could you please test this?



diff -puN fs/devfs/base.c~devfs-oops-fix-2 fs/devfs/base.c
--- 25/fs/devfs/base.c~devfs-oops-fix-2	2003-07-06 11:55:38.000000000 -0700
+++ 25-akpm/fs/devfs/base.c	2003-07-06 11:59:23.000000000 -0700
@@ -2218,7 +2218,6 @@ static int devfs_d_revalidate_wait (stru
     struct fs_info *fs_info = dir->i_sb->s_fs_info;
     devfs_handle_t parent = get_devfs_entry_from_vfs_inode (dir);
     struct devfs_lookup_struct *lookup_info = dentry->d_fsdata;
-    DECLARE_WAITQUEUE (wait, current);
 
     if ( is_devfsd_or_child (fs_info) )
     {
@@ -2252,11 +2251,12 @@ static int devfs_d_revalidate_wait (stru
     read_lock (&parent->u.dir.lock);
     if (dentry->d_fsdata)
     {
-	set_current_state (TASK_UNINTERRUPTIBLE);
-	add_wait_queue (&lookup_info->wait_queue, &wait);
-	read_unlock (&parent->u.dir.lock);
-	schedule ();
-	remove_wait_queue (&lookup_info->wait_queue, &wait);
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(&lookup_info->wait_queue, &wait, TASK_UNINTERRUPTIBLE);
+	read_unlock(&parent->u.dir.lock);
+	schedule();
+	finish_wait(&lookup_info->wait_queue, &wait);
     }
     else read_unlock (&parent->u.dir.lock);
     return 1;
@@ -2336,6 +2336,12 @@ out:
     dentry->d_op = &devfs_dops;
     dentry->d_fsdata = NULL;
     write_lock (&parent->u.dir.lock);
+    /*
+     * This wakeup will remove all waiters' wait_queue_t's from the waitqueue
+     * head, because the waiters use prepare_to_wait()/finished_wait().
+     * Hence it is OK that the waitqueue_head goes out of scope immediately
+     * after the wakeup is delivered
+     */
     wake_up (&lookup_info.wait_queue);
     write_unlock (&parent->u.dir.lock);
     devfs_put (de);

_


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch
       [not found]         ` <20030706175405.518f680d.akpm@osdl.org>
@ 2003-07-07 19:06           ` Andrey Borzenkov
  2003-07-07 21:00             ` Andrew Morton
  2003-07-07 21:41             ` Pavel Roskin
  0 siblings, 2 replies; 11+ messages in thread
From: Andrey Borzenkov @ 2003-07-07 19:06 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, devfs

[-- Attachment #1: Type: text/plain, Size: 1783 bytes --]

On Monday 07 July 2003 04:54, you wrote:
> Actually, don't bother.  This idea can be made to work, but
> we already have enough tricky stuff in the wait/wakeup area.
>
> Let's run with your original patch.
>

I finally hit a painfully trivial way to reproduce another long standing devfs 
problem - deadlock between devfs_lookup and devfs_d_revalidate_wait. When 
devfs_lookup releases directory i_sem devfs_d_revalidate_wait grabs it (it 
happens not for every path) and goes to wait to be waked up. Unfortunately, 
devfs_lookup attempts to acquire directory i_sem before ever waking it up ...

To reproduce (2.5.74 UP or SMP - does not matter, single CPU system)

ls /dev/foo & rm -f /dev/foo &

or possibly in a loop but then it easily fills up process table. In my case it 
hangs 100% reliably - on 2.5 OR 2.4.

The current fix is to move re-acquire of i_sem after all 
devfs_d_revalidate_wait waiters have been waked up. Much better fix would be 
to ensure that ->d_revalidate either is always called under i_sem or always 
without. But that means the very heart of VFS and I do not dare to touch it.

The fix has been tested on 2.4 (and is part of unofficial Mandrake Club 
kernel); I expected the same bug is in 2.5; I just was stupid not seeing the 
way to reproduce it before.

Attached is combined patch and fix for deadlock only (to show it alone). 
Andrew, I slightly polished original stack corruption version to look more 
consistent with the rest of devfs; also removed NULL pointer checks - let it 
just BUG in this case if it happens.

I have already sent the patch for 2.4 two times - please, could somebody 
finally either apply it or explain what is wrong with it. Richard is out of 
reach apparently and the bug is real and seen by many people.

regards

-andrey

[-- Attachment #2: 2.5.74-devfs_combined.patch --]
[-- Type: text/x-diff, Size: 4211 bytes --]

--- linux-2.5.74-smp/fs/devfs/base.c.deadlock	2003-06-23 19:46:44.000000000 +0400
+++ linux-2.5.74-smp/fs/devfs/base.c	2003-07-07 22:38:35.000000000 +0400
@@ -2208,8 +2208,38 @@ struct devfs_lookup_struct
 {
     devfs_handle_t de;
     wait_queue_head_t wait_queue;
+    atomic_t count;
 };
 
+static struct devfs_lookup_struct *new_devfs_lookup_struct (void)
+{
+    struct devfs_lookup_struct *info = kmalloc (sizeof (*info), GFP_KERNEL);
+
+    if (info == NULL) {
+	PRINTK ("(): cannot allocate memory\n");
+	return NULL;
+    }
+
+    init_waitqueue_head (&info->wait_queue);
+    atomic_set (&info->count, 1);
+    DPRINTK (DEBUG_I_LOOKUP, "(%p): allocated\n", info);
+    return info;
+}
+
+static inline void get_devfs_lookup_struct (struct devfs_lookup_struct *info)
+{
+    atomic_inc (&info->count);
+}
+
+static inline void put_devfs_lookup_struct (struct devfs_lookup_struct *info)
+{
+    if (!atomic_dec_and_test (&info->count))
+	return;
+
+    DPRINTK (DEBUG_I_LOOKUP, "(%p): freed\n", info);
+    kfree (info);
+}
+
 /* XXX: this doesn't handle the case where we got a negative dentry
         but a devfs entry has been registered in the meanwhile */
 static int devfs_d_revalidate_wait (struct dentry *dentry, int flags)
@@ -2252,11 +2282,13 @@ static int devfs_d_revalidate_wait (stru
     read_lock (&parent->u.dir.lock);
     if (dentry->d_fsdata)
     {
+	get_devfs_lookup_struct (lookup_info);
 	set_current_state (TASK_UNINTERRUPTIBLE);
 	add_wait_queue (&lookup_info->wait_queue, &wait);
 	read_unlock (&parent->u.dir.lock);
 	schedule ();
 	remove_wait_queue (&lookup_info->wait_queue, &wait);
+	put_devfs_lookup_struct (lookup_info);
     }
     else read_unlock (&parent->u.dir.lock);
     return 1;
@@ -2268,7 +2300,7 @@ static int devfs_d_revalidate_wait (stru
 static struct dentry *devfs_lookup (struct inode *dir, struct dentry *dentry)
 {
     struct devfs_entry tmp;  /*  Must stay in scope until devfsd idle again  */
-    struct devfs_lookup_struct lookup_info;
+    struct devfs_lookup_struct *lookup_info;
     struct fs_info *fs_info = dir->i_sb->s_fs_info;
     struct devfs_entry *parent, *de;
     struct inode *inode;
@@ -2285,9 +2317,10 @@ static struct dentry *devfs_lookup (stru
     read_lock (&parent->u.dir.lock);
     de = _devfs_search_dir (parent, dentry->d_name.name, dentry->d_name.len);
     read_unlock (&parent->u.dir.lock);
-    lookup_info.de = de;
-    init_waitqueue_head (&lookup_info.wait_queue);
-    dentry->d_fsdata = &lookup_info;
+    lookup_info = new_devfs_lookup_struct ();
+    if (lookup_info == NULL) return ERR_PTR (-ENOMEM);
+    lookup_info->de = de;
+    dentry->d_fsdata = lookup_info;
     if (de == NULL)
     {   /*  Try with devfsd. For any kind of failure, leave a negative dentry
 	    so someone else can deal with it (in the case where the sysadmin
@@ -2297,6 +2330,7 @@ static struct dentry *devfs_lookup (stru
 	if (try_modload (parent, fs_info,
 			 dentry->d_name.name, dentry->d_name.len, &tmp) < 0)
 	{   /*  Lookup event was not queued to devfsd  */
+	    put_devfs_lookup_struct (lookup_info);
 	    d_add (dentry, NULL);
 	    return NULL;
 	}
@@ -2308,8 +2342,7 @@ static struct dentry *devfs_lookup (stru
 	revalidation  */
     up (&dir->i_sem);
     wait_for_devfsd_finished (fs_info);  /*  If I'm not devfsd, must wait  */
-    down (&dir->i_sem);      /*  Grab it again because them's the rules  */
-    de = lookup_info.de;
+    de = lookup_info->de;
     /*  If someone else has been so kind as to make the inode, we go home
 	early  */
     if (dentry->d_inode) goto out;
@@ -2333,11 +2366,13 @@ static struct dentry *devfs_lookup (stru
 	     de->name, de->inode.ino, inode, de, current->comm);
     d_instantiate (dentry, inode);
 out:
+    write_lock (&parent->u.dir.lock);
     dentry->d_op = &devfs_dops;
     dentry->d_fsdata = NULL;
-    write_lock (&parent->u.dir.lock);
-    wake_up (&lookup_info.wait_queue);
+    wake_up (&lookup_info->wait_queue);
+    put_devfs_lookup_struct (lookup_info);
     write_unlock (&parent->u.dir.lock);
+    down (&dir->i_sem);      /*  Grab it again because them's the rules  */
     devfs_put (de);
     return retval;
 }   /*  End Function devfs_lookup  */

[-- Attachment #3: 2.5.74-devfs_lookup_deadlock.patch --]
[-- Type: text/x-diff, Size: 829 bytes --]

--- linux-2.5.74-smp/fs/devfs/base.c.orig	2003-07-07 20:58:36.000000000 +0400
+++ linux-2.5.74-smp/fs/devfs/base.c	2003-07-07 20:58:50.000000000 +0400
@@ -2308,7 +2308,6 @@ static struct dentry *devfs_lookup (stru
 	revalidation  */
     up (&dir->i_sem);
     wait_for_devfsd_finished (fs_info);  /*  If I'm not devfsd, must wait  */
-    down (&dir->i_sem);      /*  Grab it again because them's the rules  */
     de = lookup_info.de;
     /*  If someone else has been so kind as to make the inode, we go home
 	early  */
@@ -2338,6 +2337,7 @@ out:
     write_lock (&parent->u.dir.lock);
     wake_up (&lookup_info.wait_queue);
     write_unlock (&parent->u.dir.lock);
+    down (&dir->i_sem);      /*  Grab it again because them's the rules  */
     devfs_put (de);
     return retval;
 }   /*  End Function devfs_lookup  */

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch
  2003-07-07 19:06           ` [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch Andrey Borzenkov
@ 2003-07-07 21:00             ` Andrew Morton
  2003-07-08 17:49               ` Andrey Borzenkov
  2003-07-07 21:41             ` Pavel Roskin
  1 sibling, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2003-07-07 21:00 UTC (permalink / raw)
  To: Andrey Borzenkov; +Cc: linux-kernel, devfs

Andrey Borzenkov <arvidjaar@mail.ru> wrote:
>
> I finally hit a painfully trivial way to reproduce another long standing devfs 
> problem - deadlock between devfs_lookup and devfs_d_revalidate_wait.

uh.

> The current fix is to move re-acquire of i_sem after all 
> devfs_d_revalidate_wait waiters have been waked up.

Directory modifications appear to be under write_lock(&dir->u.dir.lock); so
that obvious problem is covered.  Your patch might introduce a race around
_devfs_get_vfs_inode() - two CPUs running that against the same inode at
the same time?

> Andrew, I slightly polished original stack corruption version to look more 
> consistent with the rest of devfs;

I think it's Lindent time.



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch
  2003-07-07 19:06           ` [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch Andrey Borzenkov
  2003-07-07 21:00             ` Andrew Morton
@ 2003-07-07 21:41             ` Pavel Roskin
  1 sibling, 0 replies; 11+ messages in thread
From: Pavel Roskin @ 2003-07-07 21:41 UTC (permalink / raw)
  To: Andrey Borzenkov; +Cc: Andrew Morton, linux-kernel, devfs

On Mon, 7 Jul 2003, Andrey Borzenkov wrote:

> To reproduce (2.5.74 UP or SMP - does not matter, single CPU system)
>
> ls /dev/foo & rm -f /dev/foo &
>
> or possibly in a loop but then it easily fills up process table. In my case it
> hangs 100% reliably - on 2.5 OR 2.4.

I've done some testing too.  I was using this script:

while :; do ls /dev/foo & rm -f /dev/foo & done

On Linux 2.5.74-bk5 without patches, running this script on a local
virtual console makes the system very slow.  I could not even login on
another virtual console.  However, I could interrupt the script by Ctrl-C.
After that I had a large number of processes in the D state.  Here's an
excerpt from the output of "ps ax":

31011 vc/1     D      0:00 ls /dev/foo
31012 vc/1     D      0:00 rm -f /dev/foo
31013 vc/1     D      0:00 ls /dev/foo
31014 vc/1     D      0:00 rm -f /dev/foo

I couldn't reboot the system cleanly.  My guess is that no new processes
could be run.

Linux 2.5.74-bk5 with the "combined" patch is OK.  Running the test
script doesn't prevent new logins.  There are no hanging processes after
interrupting the script.

> I have already sent the patch for 2.4 two times - please, could somebody
> finally either apply it or explain what is wrong with it. Richard is out of
> reach apparently and the bug is real and seen by many people.

I confirm seeing the bug on two systems with recent 2.4.x kernels with
probability over 50%.  Upgrading both systems to 2.5.x cured the problem
(of course, it's just a race condition that stopped happening).

Yes, it's an important problem to fix, and maybe we could remove the
"experimental" mark from CONFIG_DEVFS once it's done.  Maybe it's better
to have the patch accepted in the 2.5 series first just for "methodical"
reasons, but in practical terms, it's 2.4 kernels that need the deadlock
fix very badly.

-- 
Regards,
Pavel Roskin

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch
  2003-07-07 21:00             ` Andrew Morton
@ 2003-07-08 17:49               ` Andrey Borzenkov
  2003-07-09  1:20                 ` Herbert Poetzl
  0 siblings, 1 reply; 11+ messages in thread
From: Andrey Borzenkov @ 2003-07-08 17:49 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, devfs

On Tuesday 08 July 2003 01:00, Andrew Morton wrote:
> Andrey Borzenkov <arvidjaar@mail.ru> wrote:
> > I finally hit a painfully trivial way to reproduce another long standing
> > devfs problem - deadlock between devfs_lookup and
> > devfs_d_revalidate_wait.
>
> uh.
>
> > The current fix is to move re-acquire of i_sem after all
> > devfs_d_revalidate_wait waiters have been waked up.
>
> Directory modifications appear to be under write_lock(&dir->u.dir.lock); so
> that obvious problem is covered.  Your patch might introduce a race around
> _devfs_get_vfs_inode() - two CPUs running that against the same inode at
> the same time?
>

Actually it just makes it marginally more probable.

Normal open without O_CREATE runs ->d_revalidate outside of i_sem i.e. neither 
devfs_lookup vs. devfs_d_revalidate_wait nor devfs_d_revalidate_wait vs. 
itself are protected  by i_sem and this is (should be) the most common case 
for /dev access.

This race happens under non-trivial conditions. devfsd descendant (i.e. some 
action) should be involved; and action triggered by devfs_lookup does not 
race with it by definition because devfs_lookup waits for action to finish. 
I.e. it needs another devfsd action that would access /dev entry after it 
just has been created or two concurrent lookups in LOOKUP action itself. 
Quite unlikely in real life and race window is very small.

I do not want to sound like it has to be ignored - but devfs code is so messy 
that no trivial fix exists that would not make code even more messy. So I 
would still apply original fixes and let this problem be solved later - it is 
not so important as to delay two other.

-andrey

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch
  2003-07-08 17:49               ` Andrey Borzenkov
@ 2003-07-09  1:20                 ` Herbert Poetzl
  2003-07-09  1:26                   ` Andrew Morton
  0 siblings, 1 reply; 11+ messages in thread
From: Herbert Poetzl @ 2003-07-09  1:20 UTC (permalink / raw)
  To: Andrey Borzenkov; +Cc: Andrew Morton, linux-kernel

On Tue, Jul 08, 2003 at 09:49:17PM +0400, Andrey Borzenkov wrote:
> 
> I do not want to sound like it has to be ignored - but devfs code is so messy 
> that no trivial fix exists that would not make code even more messy. So I 

sorry to interrupt, but wasn't there an ongoing
efford to replace the devfs with smalldevfs or 
something even better? *hint*

please ignore me, if I'm talking nonsense ...

best,
Herbert

> would still apply original fixes and let this problem be solved later - it is 
> not so important as to delay two other.
> 
> -andrey

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch
  2003-07-09  1:20                 ` Herbert Poetzl
@ 2003-07-09  1:26                   ` Andrew Morton
  2003-07-09  2:09                     ` Herbert Poetzl
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2003-07-09  1:26 UTC (permalink / raw)
  To: herbert; +Cc: arvidjaar, linux-kernel

Herbert Poetzl <herbert@13thfloor.at> wrote:
>
> On Tue, Jul 08, 2003 at 09:49:17PM +0400, Andrey Borzenkov wrote:
> > 
> > I do not want to sound like it has to be ignored - but devfs code is so messy 
> > that no trivial fix exists that would not make code even more messy. So I 
> 
> sorry to interrupt, but wasn't there an ongoing
> efford to replace the devfs with smalldevfs or 
> something even better? *hint*
> 

Yes, but

a) It didn't have a compatible solution for the legacy device names
   (/dev/hda, etc).  Could have been fixed up in userspace but the work was
   not done.

b) Certain parties youknowwhoyouare seem to have been stricken by smalldevfs
   amnesia.

I'm hoping that smalldevfs comes back.  The current thing is a running
sore.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch
  2003-07-09  1:26                   ` Andrew Morton
@ 2003-07-09  2:09                     ` Herbert Poetzl
  2003-07-09 10:34                       ` Thierry Vignaud
  0 siblings, 1 reply; 11+ messages in thread
From: Herbert Poetzl @ 2003-07-09  2:09 UTC (permalink / raw)
  To: Andrew Morton; +Cc: arvidjaar, linux-kernel

On Tue, Jul 08, 2003 at 06:26:20PM -0700, Andrew Morton wrote:
> Herbert Poetzl <herbert@13thfloor.at> wrote:
> >
> > On Tue, Jul 08, 2003 at 09:49:17PM +0400, Andrey Borzenkov wrote:
> > > 
> > > I do not want to sound like it has to be ignored - 
> > > but devfs code is so messy that no trivial fix exists 
> > > that would not make code even more messy.
> > 
> > sorry to interrupt, but wasn't there an ongoing
> > efford to replace the devfs with smalldevfs or 
> > something even better? *hint*
> > 
> 
> Yes, but
> 
> a) It didn't have a compatible solution for the legacy device 
>    names (/dev/hda, etc).  Could have been fixed up in userspace 
>    but the work was not done.

I might be totally wrong, as I can only speak
for 2.4 (which has no ongoing/forgotten smalldevfs 
efford ;), but devfs has definitely divided the
users into two groups (think religion/war) ...

the group using devfs, usually doesn't care about
the 'compatibility' issue, the other doesn't care
at all ... so this isn't an issue at all ...

> b) Certain parties youknowwhoyouare seem to have been stricken 
>    by smalldevfs amnesia.

maybe this helps youknowwhoyoumean to remember ...

> I'm hoping that smalldevfs comes back.  
> The current thing is a running sore.

I'm hoping too, and I would like to see it on
2.6 as well as 2.4 ...

using 2.4 I'm currently bound to devfs, as I'm
one of the pro-devfs guys, and I think Richard
Gooch did a great work with it ... (maybe a 
little too much work actually ;)

best,
Herbert

> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch
  2003-07-09  2:09                     ` Herbert Poetzl
@ 2003-07-09 10:34                       ` Thierry Vignaud
  0 siblings, 0 replies; 11+ messages in thread
From: Thierry Vignaud @ 2003-07-09 10:34 UTC (permalink / raw)
  To: Andrew Morton; +Cc: arvidjaar, linux-kernel

Herbert Poetzl <herbert@13thfloor.at> writes:

> the group using devfs, usually doesn't care about
> the 'compatibility' issue,

i disagree: distro vendors wants that compatibility.
we want both the features of devfsd+hotplug+dynamic and the
compatibility devices.

> > I'm hoping that smalldevfs comes back.  
> > The current thing is a running sore.
> 
> I'm hoping too, and I would like to see it on 2.6 as well as 2.4 ...

there's been quite some cleanups on the 2.5.x side.
maybe can we complete this work before thinking about a new system ?
(remember the kbuild2 destiny ...)

if we do not want to create a compatibility layer such as devfsd for
smalldevfs, we can still alter drivers to create /dev/hda3 instead of
/dev/ide/host0/bus0/target0/lun0/part3 and the like

withouth any compatibility, smalldevfs will be a pain in th *ss for
distro vendors.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH][2.6.0-test1] redesign - stack corruption in devfs_lookup
  2003-07-06 19:03       ` Andrew Morton
       [not found]         ` <20030706175405.518f680d.akpm@osdl.org>
@ 2003-07-26 14:58         ` Andrey Borzenkov
  1 sibling, 0 replies; 11+ messages in thread
From: Andrey Borzenkov @ 2003-07-26 14:58 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, devfs

[-- Attachment #1: Type: text/plain, Size: 1298 bytes --]

On Sunday 06 July 2003 23:03, Andrew Morton wrote:
> Andrey Borzenkov <arvidjaar@mail.ru> wrote:
> > When devfs_lookup needs to call devfsd it arranges for other lookups for
> > the same name to wait. It is using local variable as wait queue head.
> > After devfsd returns devfs_lookup wakes up all waiters and returns.
> > Unfortunately there is no garantee all waiters will actually get chance
> > to run and clean up before devfs_lookup returns. so some of them attempt
> > to access already freed storage on stack.
>
> OK, but I think there is a simpler fix.  We can rely on the side-effects of
> prepare_to_wait() and finish_wait().
>

and here is even more simple fix. there is no need to ever bother about wait 
queue because it dies soon without our intervention. This is exactly what 
code in 2.4 does - somebody "improved" code in 2.5 (again) :(

the patch against 2.6.0-test1 removes my previous patch and adds comment in 
revalidate_wait to prevent it from hapenning again. It is basically 2.4 code 
except one spinlock acquisition has been moved a bit earlier (probably does 
not matter just looks better). This probably should use wake_up_all instead 
of wake_up to make intention more clear.

It is tested using the same test case. Also 2.4 never had this problem as 
well.

-andrey

[-- Attachment #2: 2.6.0-test1-devfs_stack_corruption-3.patch --]
[-- Type: text/x-diff, Size: 4275 bytes --]

--- linux-2.6.0-test1/fs/devfs/base.c.2.6.0-test1	2003-07-26 12:11:38.000000000 +0400
+++ linux-2.6.0-test1/fs/devfs/base.c	2003-07-26 12:11:06.000000000 +0400
@@ -2208,46 +2208,8 @@ struct devfs_lookup_struct
 {
     devfs_handle_t de;
     wait_queue_head_t wait_queue;
-    atomic_t count;
 };
 
-static struct devfs_lookup_struct *
-new_devfs_lookup_struct(void)
-{
-	struct devfs_lookup_struct *p = kmalloc(sizeof(*p), GFP_KERNEL);
-
-	if (!p)
-		return NULL;
-
-	init_waitqueue_head (&p->wait_queue);
-	atomic_set(&p->count, 1);
-	return p;
-}
-
-static void
-get_devfs_lookup_struct(struct devfs_lookup_struct *info)
-{
-	if (info)
-		atomic_inc(&info->count);
-	else {
-		printk(KERN_ERR "null devfs_lookup_struct pointer\n");
-		dump_stack();
-	}
-}
-
-static void
-put_devfs_lookup_struct(struct devfs_lookup_struct *info)
-{
-	if (info) {
-		if (!atomic_dec_and_test(&info->count))
-			return;
-		kfree(info);
-	} else {
-		printk(KERN_ERR "null devfs_lookup_struct pointer\n");
-		dump_stack();
-	}
-}
-
 /* XXX: this doesn't handle the case where we got a negative dentry
         but a devfs entry has been registered in the meanwhile */
 static int devfs_d_revalidate_wait (struct dentry *dentry, struct nameidata *nd)
@@ -2290,13 +2252,19 @@ static int devfs_d_revalidate_wait (stru
     read_lock (&parent->u.dir.lock);
     if (dentry->d_fsdata)
     {
-	get_devfs_lookup_struct(lookup_info);
 	set_current_state (TASK_UNINTERRUPTIBLE);
 	add_wait_queue (&lookup_info->wait_queue, &wait);
 	read_unlock (&parent->u.dir.lock);
 	schedule ();
-	remove_wait_queue (&lookup_info->wait_queue, &wait);
-	put_devfs_lookup_struct(lookup_info);
+	/*
+	 * This does not need nor should remove wait from wait_queue.
+	 * Wait queue head is never reused - nothing is ever added to it
+	 * after all waiters have been waked up and head itself disappears
+	 * very soon after it. Moreover it is local variable on stack that
+	 * is likely to have already disappeared so any reference to it
+	 * at this point is buggy.
+	 */
+
     }
     else read_unlock (&parent->u.dir.lock);
     return 1;
@@ -2308,7 +2276,7 @@ static int devfs_d_revalidate_wait (stru
 static struct dentry *devfs_lookup (struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
     struct devfs_entry tmp;  /*  Must stay in scope until devfsd idle again  */
-    struct devfs_lookup_struct *lookup_info;
+    struct devfs_lookup_struct lookup_info;
     struct fs_info *fs_info = dir->i_sb->s_fs_info;
     struct devfs_entry *parent, *de;
     struct inode *inode;
@@ -2325,10 +2293,9 @@ static struct dentry *devfs_lookup (stru
     read_lock (&parent->u.dir.lock);
     de = _devfs_search_dir (parent, dentry->d_name.name, dentry->d_name.len);
     read_unlock (&parent->u.dir.lock);
-    lookup_info = new_devfs_lookup_struct();
-    if (!lookup_info) return ERR_PTR(-ENOMEM);
-    lookup_info->de = de;
-    dentry->d_fsdata = lookup_info;
+    lookup_info.de = de;
+    init_waitqueue_head (&lookup_info.wait_queue);
+    dentry->d_fsdata = &lookup_info;
     if (de == NULL)
     {   /*  Try with devfsd. For any kind of failure, leave a negative dentry
 	    so someone else can deal with it (in the case where the sysadmin
@@ -2338,7 +2305,6 @@ static struct dentry *devfs_lookup (stru
 	if (try_modload (parent, fs_info,
 			 dentry->d_name.name, dentry->d_name.len, &tmp) < 0)
 	{   /*  Lookup event was not queued to devfsd  */
-	    put_devfs_lookup_struct(lookup_info);
 	    d_add (dentry, NULL);
 	    return NULL;
 	}
@@ -2350,7 +2316,7 @@ static struct dentry *devfs_lookup (stru
 	revalidation  */
     up (&dir->i_sem);
     wait_for_devfsd_finished (fs_info);  /*  If I'm not devfsd, must wait  */
-    de = lookup_info->de;
+    de = lookup_info.de;
     /*  If someone else has been so kind as to make the inode, we go home
 	early  */
     if (dentry->d_inode) goto out;
@@ -2377,8 +2343,7 @@ out:
     write_lock (&parent->u.dir.lock);
     dentry->d_op = &devfs_dops;
     dentry->d_fsdata = NULL;
-    wake_up (&lookup_info->wait_queue);
-    put_devfs_lookup_struct(lookup_info);
+    wake_up (&lookup_info.wait_queue);
     write_unlock (&parent->u.dir.lock);
     down (&dir->i_sem);      /*  Grab it again because them's the rules  */
     devfs_put (de);

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2003-07-26 16:04 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <E198K0q-000Am8-00.arvidjaar-mail-ru@f23.mail.ru>
     [not found] ` <Pine.LNX.4.55.0304231157560.1309@marabou.research.att.com>
     [not found]   ` <Pine.LNX.4.55.0305050005230.1278@marabou.research.att.com>
2003-07-06 17:06     ` [PATCH][2.5.73] stack corruption in devfs_lookup Andrey Borzenkov
2003-07-06 19:03       ` Andrew Morton
     [not found]         ` <20030706175405.518f680d.akpm@osdl.org>
2003-07-07 19:06           ` [PATCH][2.5.74] devfs lookup deadlock/stack corruption combined patch Andrey Borzenkov
2003-07-07 21:00             ` Andrew Morton
2003-07-08 17:49               ` Andrey Borzenkov
2003-07-09  1:20                 ` Herbert Poetzl
2003-07-09  1:26                   ` Andrew Morton
2003-07-09  2:09                     ` Herbert Poetzl
2003-07-09 10:34                       ` Thierry Vignaud
2003-07-07 21:41             ` Pavel Roskin
2003-07-26 14:58         ` [PATCH][2.6.0-test1] redesign - stack corruption in devfs_lookup Andrey Borzenkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).