linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH -mm] swsusp: freeze user space processes first
@ 2006-02-05  9:14 Rafael J. Wysocki
  2006-02-05  9:38 ` Andrew Morton
  0 siblings, 1 reply; 12+ messages in thread
From: Rafael J. Wysocki @ 2006-02-05  9:14 UTC (permalink / raw)
  To: Andrew Morton; +Cc: LKML, Pavel Machek, Nigel Cunningham

Hi,

This patch allows swsusp to freeze processes successfully under heavy load
by freezing userspace processes before kernel threads.

[Thanks to Nigel Cunningham <nigel@suspend2.net> for suggesting the
way to go.]

Please consider for applying.

Greetings,
Rafael


Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>

 kernel/power/disk.c    |    1 
 kernel/power/process.c |   61 ++++++++++++++++++++++++++++++++++++-------------
 kernel/power/user.c    |    1 
 3 files changed, 46 insertions(+), 17 deletions(-)

Index: linux-2.6.16-rc1-mm5/kernel/power/process.c
===================================================================
--- linux-2.6.16-rc1-mm5.orig/kernel/power/process.c
+++ linux-2.6.16-rc1-mm5/kernel/power/process.c
@@ -12,11 +12,12 @@
 #include <linux/interrupt.h>
 #include <linux/suspend.h>
 #include <linux/module.h>
+#include <linux/syscalls.h>
 
 /* 
  * Timeout for stopping processes
  */
-#define TIMEOUT	(6 * HZ)
+#define TIMEOUT	(20 * HZ)
 
 
 static inline int freezeable(struct task_struct * p)
@@ -54,38 +55,62 @@ void refrigerator(void)
 	current->state = save;
 }
 
+static inline void freeze_process(struct task_struct *p)
+{
+	unsigned long flags;
+
+	if (!freezing(p)) {
+		freeze(p);
+		spin_lock_irqsave(&p->sighand->siglock, flags);
+		signal_wake_up(p, 0);
+		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	}
+}
+
 /* 0 = success, else # of processes that we failed to stop */
 int freeze_processes(void)
 {
-	int todo;
+	int todo, nr_user, user_frozen;
 	unsigned long start_time;
 	struct task_struct *g, *p;
 	unsigned long flags;
 
 	printk( "Stopping tasks: " );
 	start_time = jiffies;
+	user_frozen = 0;
 	do {
-		todo = 0;
+		nr_user = todo = 0;
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			if (!freezeable(p))
 				continue;
 			if (frozen(p))
 				continue;
-
-			freeze(p);
-			spin_lock_irqsave(&p->sighand->siglock, flags);
-			signal_wake_up(p, 0);
-			spin_unlock_irqrestore(&p->sighand->siglock, flags);
-			todo++;
+			if (p->mm && !(p->flags & PF_BORROWED_MM)) {
+				/* The task is a user-space one.
+				 * Freeze it unless there's a vfork completion
+				 * pending
+				 */
+				if (!p->vfork_done)
+					freeze_process(p);
+				nr_user++;
+			} else {
+				/* Freeze only if the user space is frozen */
+				if (user_frozen)
+					freeze_process(p);
+				todo++;
+			}
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
+		todo += nr_user;
+		if (!user_frozen && !nr_user) {
+			sys_sync();
+			start_time = jiffies;
+		}
+		user_frozen = !nr_user;
 		yield();			/* Yield is okay here */
-		if (todo && time_after(jiffies, start_time + TIMEOUT)) {
-			printk( "\n" );
-			printk(KERN_ERR " stopping tasks timed out (%d tasks remaining)\n", todo );
+		if (todo && time_after(jiffies, start_time + TIMEOUT))
 			break;
-		}
 	} while(todo);
 
 	/* This does not unfreeze processes that are already frozen
@@ -94,8 +119,14 @@ int freeze_processes(void)
 	 * but it cleans up leftover PF_FREEZE requests.
 	 */
 	if (todo) {
+		printk( "\n" );
+		printk(KERN_ERR " stopping tasks timed out "
+			"after %d seconds (%d tasks remaining):\n",
+			TIMEOUT / HZ, todo);
 		read_lock(&tasklist_lock);
-		do_each_thread(g, p)
+		do_each_thread(g, p) {
+			if (freezeable(p) && !frozen(p))
+				printk(KERN_ERR "  %s\n", p->comm);
 			if (freezing(p)) {
 				pr_debug("  clean up: %s\n", p->comm);
 				p->flags &= ~PF_FREEZE;
@@ -103,7 +134,7 @@ int freeze_processes(void)
 				recalc_sigpending_tsk(p);
 				spin_unlock_irqrestore(&p->sighand->siglock, flags);
 			}
-		while_each_thread(g, p);
+		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 		return todo;
 	}
Index: linux-2.6.16-rc1-mm5/kernel/power/disk.c
===================================================================
--- linux-2.6.16-rc1-mm5.orig/kernel/power/disk.c
+++ linux-2.6.16-rc1-mm5/kernel/power/disk.c
@@ -73,7 +73,6 @@ static int prepare_processes(void)
 	int error;
 
 	pm_prepare_console();
-	sys_sync();
 	disable_nonboot_cpus();
 
 	if (freeze_processes()) {
Index: linux-2.6.16-rc1-mm5/kernel/power/user.c
===================================================================
--- linux-2.6.16-rc1-mm5.orig/kernel/power/user.c
+++ linux-2.6.16-rc1-mm5/kernel/power/user.c
@@ -137,7 +137,6 @@ static int snapshot_ioctl(struct inode *
 	case SNAPSHOT_FREEZE:
 		if (data->frozen)
 			break;
-		sys_sync();
 		down(&pm_sem);
 		pm_prepare_console();
 		disable_nonboot_cpus();

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] swsusp: freeze user space processes first
  2006-02-05  9:14 [PATCH -mm] swsusp: freeze user space processes first Rafael J. Wysocki
@ 2006-02-05  9:38 ` Andrew Morton
  2006-02-05 10:34   ` Rafael J. Wysocki
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2006-02-05  9:38 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: linux-kernel, pavel, nigel

"Rafael J. Wysocki" <rjw@sisk.pl> wrote:
>
> Hi,
> 
> This patch allows swsusp to freeze processes successfully under heavy load
> by freezing userspace processes before kernel threads.
> 
> ...
>
>  /* 0 = success, else # of processes that we failed to stop */
>  int freeze_processes(void)
>  {
> -	int todo;
> +	int todo, nr_user, user_frozen;
>  	unsigned long start_time;
>  	struct task_struct *g, *p;
>  	unsigned long flags;
>  
>  	printk( "Stopping tasks: " );
>  	start_time = jiffies;
> +	user_frozen = 0;
>  	do {
> -		todo = 0;
> +		nr_user = todo = 0;
>  		read_lock(&tasklist_lock);
>  		do_each_thread(g, p) {
>  			if (!freezeable(p))
>  				continue;
>  			if (frozen(p))
>  				continue;
> -
> -			freeze(p);
> -			spin_lock_irqsave(&p->sighand->siglock, flags);
> -			signal_wake_up(p, 0);
> -			spin_unlock_irqrestore(&p->sighand->siglock, flags);
> -			todo++;
> +			if (p->mm && !(p->flags & PF_BORROWED_MM)) {
> +				/* The task is a user-space one.
> +				 * Freeze it unless there's a vfork completion
> +				 * pending
> +				 */
> +				if (!p->vfork_done)
> +					freeze_process(p);
> +				nr_user++;
> +			} else {
> +				/* Freeze only if the user space is frozen */
> +				if (user_frozen)
> +					freeze_process(p);
> +				todo++;
> +			}
>  		} while_each_thread(g, p);
>  		read_unlock(&tasklist_lock);
> +		todo += nr_user;
> +		if (!user_frozen && !nr_user) {
> +			sys_sync();
> +			start_time = jiffies;
> +		}
> +		user_frozen = !nr_user;
>  		yield();			/* Yield is okay here */
> -		if (todo && time_after(jiffies, start_time + TIMEOUT)) {
> -			printk( "\n" );
> -			printk(KERN_ERR " stopping tasks timed out (%d tasks remaining)\n", todo );
> +		if (todo && time_after(jiffies, start_time + TIMEOUT))
>  			break;

The logic in that loop makes my brain burst.

What happens if a process does vfork();sleep(100000000)?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] swsusp: freeze user space processes first
  2006-02-05  9:38 ` Andrew Morton
@ 2006-02-05 10:34   ` Rafael J. Wysocki
  2006-02-05 10:50     ` Ingo Molnar
  0 siblings, 1 reply; 12+ messages in thread
From: Rafael J. Wysocki @ 2006-02-05 10:34 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, pavel, nigel

Hi,

On Sunday 05 February 2006 10:38, Andrew Morton wrote:
> "Rafael J. Wysocki" <rjw@sisk.pl> wrote:
> > This patch allows swsusp to freeze processes successfully under heavy load
> > by freezing userspace processes before kernel threads.
> > 
> > ...
> >
> >  /* 0 = success, else # of processes that we failed to stop */
> >  int freeze_processes(void)
> >  {
> > -	int todo;
> > +	int todo, nr_user, user_frozen;
> >  	unsigned long start_time;
> >  	struct task_struct *g, *p;
> >  	unsigned long flags;
> >  
> >  	printk( "Stopping tasks: " );
> >  	start_time = jiffies;
> > +	user_frozen = 0;
> >  	do {
> > -		todo = 0;
> > +		nr_user = todo = 0;
> >  		read_lock(&tasklist_lock);
> >  		do_each_thread(g, p) {
> >  			if (!freezeable(p))
> >  				continue;
> >  			if (frozen(p))
> >  				continue;
> > -
> > -			freeze(p);
> > -			spin_lock_irqsave(&p->sighand->siglock, flags);
> > -			signal_wake_up(p, 0);
> > -			spin_unlock_irqrestore(&p->sighand->siglock, flags);
> > -			todo++;
> > +			if (p->mm && !(p->flags & PF_BORROWED_MM)) {
> > +				/* The task is a user-space one.
> > +				 * Freeze it unless there's a vfork completion
> > +				 * pending
> > +				 */
> > +				if (!p->vfork_done)
> > +					freeze_process(p);
> > +				nr_user++;
> > +			} else {
> > +				/* Freeze only if the user space is frozen */
> > +				if (user_frozen)
> > +					freeze_process(p);
> > +				todo++;
> > +			}
> >  		} while_each_thread(g, p);
> >  		read_unlock(&tasklist_lock);
> > +		todo += nr_user;
> > +		if (!user_frozen && !nr_user) {
> > +			sys_sync();
> > +			start_time = jiffies;
> > +		}
> > +		user_frozen = !nr_user;
> >  		yield();			/* Yield is okay here */
> > -		if (todo && time_after(jiffies, start_time + TIMEOUT)) {
> > -			printk( "\n" );
> > -			printk(KERN_ERR " stopping tasks timed out (%d tasks remaining)\n", todo );
> > +		if (todo && time_after(jiffies, start_time + TIMEOUT))
> >  			break;
> 
> The logic in that loop makes my brain burst.
> 
> What happens if a process does vfork();sleep(100000000)?

The freezing of processes will fail due to the timeout.

Without the if (!p->vfork_done) it would fail too, because the child would
be frozen and the parent would wait for the vfork completion in the
TASK_UNINTERRUPTIBLE state (ie. unfreezeable).  But in that case
we have a race between the "freezer" and the child process
(ie. if the child gets frozen before it completes the vfork completion, the
paret will be unfreezeable) which sometimes leads to a failure when it
should not.  [We have a test case showing this.]

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] swsusp: freeze user space processes first
  2006-02-05 10:34   ` Rafael J. Wysocki
@ 2006-02-05 10:50     ` Ingo Molnar
  2006-02-05 11:11       ` Rafael J. Wysocki
  2006-02-05 11:11       ` [PATCH -mm] swsusp: freeze user space processes first Pavel Machek
  0 siblings, 2 replies; 12+ messages in thread
From: Ingo Molnar @ 2006-02-05 10:50 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Andrew Morton, linux-kernel, pavel, nigel


* Rafael J. Wysocki <rjw@sisk.pl> wrote:

> > The logic in that loop makes my brain burst.
> > 
> > What happens if a process does vfork();sleep(100000000)?
> 
> The freezing of processes will fail due to the timeout.
> 
> Without the if (!p->vfork_done) it would fail too, because the child 
> would be frozen and the parent would wait for the vfork completion in 
> the TASK_UNINTERRUPTIBLE state (ie. unfreezeable).  But in that case 
> we have a race between the "freezer" and the child process (ie. if the 
> child gets frozen before it completes the vfork completion, the paret 
> will be unfreezeable) which sometimes leads to a failure when it 
> should not.  [We have a test case showing this.]

then i'd suggest to change the vfork implementation to make this code 
freezable. Nothing that userspace does should cause freezing to fail.  
If it does, we've designed things incorrectly on the kernel side.

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] swsusp: freeze user space processes first
  2006-02-05 10:50     ` Ingo Molnar
@ 2006-02-05 11:11       ` Rafael J. Wysocki
  2006-02-05 11:18         ` Pavel Machek
  2006-02-05 11:11       ` [PATCH -mm] swsusp: freeze user space processes first Pavel Machek
  1 sibling, 1 reply; 12+ messages in thread
From: Rafael J. Wysocki @ 2006-02-05 11:11 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Andrew Morton, linux-kernel, pavel, nigel

On Sunday 05 February 2006 11:50, Ingo Molnar wrote:
> 
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> 
> > > The logic in that loop makes my brain burst.
> > > 
> > > What happens if a process does vfork();sleep(100000000)?
> > 
> > The freezing of processes will fail due to the timeout.
> > 
> > Without the if (!p->vfork_done) it would fail too, because the child 
> > would be frozen and the parent would wait for the vfork completion in 
> > the TASK_UNINTERRUPTIBLE state (ie. unfreezeable).  But in that case 
> > we have a race between the "freezer" and the child process (ie. if the 
> > child gets frozen before it completes the vfork completion, the paret 
> > will be unfreezeable) which sometimes leads to a failure when it 
> > should not.  [We have a test case showing this.]
> 
> then i'd suggest to change the vfork implementation to make this code 
> freezable.

I think you are right, but I don't know how to do this.

> Nothing that userspace does should cause freezing to fail.   If it does,
> we've designed things incorrectly on the kernel side. 

I tend to agree.

Generally, the problem is due to the use of completions where userland
processes are waited for.  The two places I know of are the vfork
implementation and the usermode helper code.

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] swsusp: freeze user space processes first
  2006-02-05 10:50     ` Ingo Molnar
  2006-02-05 11:11       ` Rafael J. Wysocki
@ 2006-02-05 11:11       ` Pavel Machek
  2006-02-05 14:22         ` Ingo Molnar
  1 sibling, 1 reply; 12+ messages in thread
From: Pavel Machek @ 2006-02-05 11:11 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Rafael J. Wysocki, Andrew Morton, linux-kernel, nigel

On Ne 05-02-06 11:50:37, Ingo Molnar wrote:
> 
> * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> 
> > > The logic in that loop makes my brain burst.
> > > 
> > > What happens if a process does vfork();sleep(100000000)?
> > 
> > The freezing of processes will fail due to the timeout.
> > 
> > Without the if (!p->vfork_done) it would fail too, because the child 
> > would be frozen and the parent would wait for the vfork completion in 
> > the TASK_UNINTERRUPTIBLE state (ie. unfreezeable).  But in that case 
> > we have a race between the "freezer" and the child process (ie. if the 
> > child gets frozen before it completes the vfork completion, the paret 
> > will be unfreezeable) which sometimes leads to a failure when it 
> > should not.  [We have a test case showing this.]
> 
> then i'd suggest to change the vfork implementation to make this code 
> freezable. Nothing that userspace does should cause freezing to fail.  
> If it does, we've designed things incorrectly on the kernel side.

Does that also mean we have bugs with signal delivery? If vfork();
sleep(100000); causes process to be uninterruptible for few days, it
will not be killable and increase load average...
								Pavel
-- 
Thanks, Sharp!

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] swsusp: freeze user space processes first
  2006-02-05 11:11       ` Rafael J. Wysocki
@ 2006-02-05 11:18         ` Pavel Machek
  2006-02-05 11:39           ` Rafael J. Wysocki
  0 siblings, 1 reply; 12+ messages in thread
From: Pavel Machek @ 2006-02-05 11:18 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, nigel

On Ne 05-02-06 12:11:06, Rafael J. Wysocki wrote:
> On Sunday 05 February 2006 11:50, Ingo Molnar wrote:
> > 
> > * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > 
> > > > The logic in that loop makes my brain burst.
> > > > 
> > > > What happens if a process does vfork();sleep(100000000)?
> > > 
> > > The freezing of processes will fail due to the timeout.
> > > 
> > > Without the if (!p->vfork_done) it would fail too, because the child 
> > > would be frozen and the parent would wait for the vfork completion in 
> > > the TASK_UNINTERRUPTIBLE state (ie. unfreezeable).  But in that case 
> > > we have a race between the "freezer" and the child process (ie. if the 
> > > child gets frozen before it completes the vfork completion, the paret 
> > > will be unfreezeable) which sometimes leads to a failure when it 
> > > should not.  [We have a test case showing this.]
> > 
> > then i'd suggest to change the vfork implementation to make this code 
> > freezable.
> 
> I think you are right, but I don't know how to do this.
> 
> > Nothing that userspace does should cause freezing to fail.   If it does,
> > we've designed things incorrectly on the kernel side. 
> 
> I tend to agree.
> 
> Generally, the problem is due to the use of completions where userland
> processes are waited for.  The two places I know of are the vfork
> implementation and the usermode helper code.

Can you produce userland testcase? If we have uninterruptible process for
days... that's a bug in kernel, suspend or not.
								Pavel
-- 
Thanks, Sharp!

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] swsusp: freeze user space processes first
  2006-02-05 11:18         ` Pavel Machek
@ 2006-02-05 11:39           ` Rafael J. Wysocki
  2006-02-05 13:34             ` Rafael J. Wysocki
  2006-02-10 20:20             ` vfork makes processes uninterruptible [was Re: [PATCH -mm] swsusp: freeze user space processes first] Pavel Machek
  0 siblings, 2 replies; 12+ messages in thread
From: Rafael J. Wysocki @ 2006-02-05 11:39 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, nigel

On Sunday 05 February 2006 12:18, Pavel Machek wrote:
> On Ne 05-02-06 12:11:06, Rafael J. Wysocki wrote:
> > On Sunday 05 February 2006 11:50, Ingo Molnar wrote:
> > > 
> > > * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > > 
> > > > > The logic in that loop makes my brain burst.
> > > > > 
> > > > > What happens if a process does vfork();sleep(100000000)?
> > > > 
> > > > The freezing of processes will fail due to the timeout.
> > > > 
> > > > Without the if (!p->vfork_done) it would fail too, because the child 
> > > > would be frozen and the parent would wait for the vfork completion in 
> > > > the TASK_UNINTERRUPTIBLE state (ie. unfreezeable).  But in that case 
> > > > we have a race between the "freezer" and the child process (ie. if the 
> > > > child gets frozen before it completes the vfork completion, the paret 
> > > > will be unfreezeable) which sometimes leads to a failure when it 
> > > > should not.  [We have a test case showing this.]
> > > 
> > > then i'd suggest to change the vfork implementation to make this code 
> > > freezable.
> > 
> > I think you are right, but I don't know how to do this.
> > 
> > > Nothing that userspace does should cause freezing to fail.   If it does,
> > > we've designed things incorrectly on the kernel side. 
> > 
> > I tend to agree.
> > 
> > Generally, the problem is due to the use of completions where userland
> > processes are waited for.  The two places I know of are the vfork
> > implementation and the usermode helper code.
> 
> Can you produce userland testcase? If we have uninterruptible process for
> days... that's a bug in kernel, suspend or not.

Sure, no problem.  [Pretty scary, no?]

The test code:

#include <sys/types.h>
#include <unistd.h>

int main(int argc, char *argv[])
{
	vfork();
	sleep(300);

	return 0;
}

The result:

rafael@albercik:~/programming/c> ./vfork_test &
[1] 12288
rafael@albercik:~/programming/c> ps l
F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0   500  6937  6931  17   0  10048  2368 read_c Ss+  pts/1      0:00 /bin/bash
0   500 12139 12133  15   0  10052  2380 wait   Ss   pts/2      0:00 /bin/bash
0   500 12288 12139  15   0   2420   304 fork   D    pts/2      0:00 ./vfork_tes
1   500 12291 12288  18   0   2420   304 hrtime S    pts/2      0:00 ./vfork_tes
0   500 12372 12139  15   0   3596   820 -      R+   pts/2      0:00 ps l
rafael@albercik:~/programming/c> kill 12288
rafael@albercik:~/programming/c> ps l
F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0   500  6937  6931  17   0  10048  2368 read_c Ss+  pts/1      0:00 /bin/bash
0   500 12139 12133  15   0  10052  2380 wait   Ss   pts/2      0:00 /bin/bash
0   500 12288 12139  15   0   2420   304 fork   D    pts/2      0:00 ./vfork_tes
1   500 12291 12288  18   0   2420   304 hrtime S    pts/2      0:00 ./vfork_tes
0   500 12380 12139  17   0   3600   820 -      R+   pts/2      0:00 ps l
rafael@albercik:~/programming/c> kill -9 12288
rafael@albercik:~/programming/c> ps l
F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
0   500  6937  6931  17   0  10048  2368 read_c Ss+  pts/1      0:00 /bin/bash
0   500 12139 12133  15   0  10052  2380 wait   Ss   pts/2      0:00 /bin/bash
0   500 12288 12139  15   0   2420   304 fork   D    pts/2      0:00 ./vfork_tes
1   500 12291 12288  18   0   2420   304 hrtime S    pts/2      0:00 ./vfork_tes
0   500 12387 12139  16   0   3596   816 -      R+   pts/2      0:00 ps l
rafael@albercik:~/programming/c>

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] swsusp: freeze user space processes first
  2006-02-05 11:39           ` Rafael J. Wysocki
@ 2006-02-05 13:34             ` Rafael J. Wysocki
  2006-02-10 20:20             ` vfork makes processes uninterruptible [was Re: [PATCH -mm] swsusp: freeze user space processes first] Pavel Machek
  1 sibling, 0 replies; 12+ messages in thread
From: Rafael J. Wysocki @ 2006-02-05 13:34 UTC (permalink / raw)
  To: Pavel Machek; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, nigel

On Sunday 05 February 2006 12:39, Rafael J. Wysocki wrote:
> On Sunday 05 February 2006 12:18, Pavel Machek wrote:
> > On Ne 05-02-06 12:11:06, Rafael J. Wysocki wrote:
> > > On Sunday 05 February 2006 11:50, Ingo Molnar wrote:
> > > > 
> > > > * Rafael J. Wysocki <rjw@sisk.pl> wrote:
> > > > 
> > > > > > The logic in that loop makes my brain burst.
> > > > > > 
> > > > > > What happens if a process does vfork();sleep(100000000)?
> > > > > 
> > > > > The freezing of processes will fail due to the timeout.
> > > > > 
> > > > > Without the if (!p->vfork_done) it would fail too, because the child 
> > > > > would be frozen and the parent would wait for the vfork completion in 
> > > > > the TASK_UNINTERRUPTIBLE state (ie. unfreezeable).  But in that case 
> > > > > we have a race between the "freezer" and the child process (ie. if the 
> > > > > child gets frozen before it completes the vfork completion, the paret 
> > > > > will be unfreezeable) which sometimes leads to a failure when it 
> > > > > should not.  [We have a test case showing this.]
> > > > 
> > > > then i'd suggest to change the vfork implementation to make this code 
> > > > freezable.
> > > 
> > > I think you are right, but I don't know how to do this.
> > > 
> > > > Nothing that userspace does should cause freezing to fail.   If it does,
> > > > we've designed things incorrectly on the kernel side. 
> > > 
> > > I tend to agree.
> > > 
> > > Generally, the problem is due to the use of completions where userland
> > > processes are waited for.  The two places I know of are the vfork
> > > implementation and the usermode helper code.
> > 
> > Can you produce userland testcase? If we have uninterruptible process for
> > days... that's a bug in kernel, suspend or not.
> 
> Sure, no problem.  [Pretty scary, no?]

Actually it's not that bad, because the parent will be killable when the child
exit()s (or gets killed).

Greetings,
Rafael

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] swsusp: freeze user space processes first
  2006-02-05 11:11       ` [PATCH -mm] swsusp: freeze user space processes first Pavel Machek
@ 2006-02-05 14:22         ` Ingo Molnar
  2006-02-10 20:36           ` Pavel Machek
  0 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2006-02-05 14:22 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Rafael J. Wysocki, Andrew Morton, linux-kernel, nigel,
	Ulrich Drepper, Linus Torvalds


* Pavel Machek <pavel@suse.cz> wrote:

> > then i'd suggest to change the vfork implementation to make this code 
> > freezable. Nothing that userspace does should cause freezing to fail.  
> > If it does, we've designed things incorrectly on the kernel side.
> 
> Does that also mean we have bugs with signal delivery? If vfork(); 
> sleep(100000); causes process to be uninterruptible for few days, it 
> will not be killable and increase load average...

"half-done" vforks are indeed in uninterruptible sleep. They are not 
directly killable, but they are killable indirectly through their 
parent. But yes, in theory it would be cleaner if the vfork code used 
wait_for_completion_interruptible(). It has to be done carefully though, 
for two reasons:

- implementational: use task_lock()/task_unlock() to protect
  p->vfork_done in mm_release() and in do_fork().

- semantical: signals to a vfork-ing parent are defined to be delayed
  to after the child has released the parent/MM.

the (untested) patch below handles issue #1, but doesnt handle issue #2: 
this patch opens up a vfork parent to get interrupted early, with any 
signal.

a full approach would probably be to set up a restart handler perhaps, 
and restart the wait later on? This would also require the &vfork 
completion structure [which is on the kernel stack right now] to be 
embedded in task_struct, to make sure the parent can wait on the child 
without extra state. (one more detail is nesting: a signal handler could 
do another vfork(), which thus needs to initialize the &vfork safely and 
in a race-free manner.)

i'm wondering whether signals handled in the parent during a vfork wait 
would be the correct semantics though. Ulrich?

	Ingo

--- linux/kernel/fork.c.orig
+++ linux/kernel/fork.c
@@ -423,16 +423,24 @@ EXPORT_SYMBOL_GPL(get_task_mm);
  */
 void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 {
-	struct completion *vfork_done = tsk->vfork_done;
-
 	/* Get rid of any cached register state */
 	deactivate_mm(tsk, mm);
 
-	/* notify parent sleeping on vfork() */
-	if (vfork_done) {
-		tsk->vfork_done = NULL;
-		complete(vfork_done);
+	if (unlikely(tsk->vfork_done)) {
+		task_lock(tsk);
+		/*
+		 * notify parent sleeping on vfork().
+		 *
+		 * (re-check vfork_done under the task lock, to make sure
+		 *  we did not race with a signal sent to the parent)
+		 */
+		if (tsk->vfork_done) {
+			complete(tsk->vfork_done);
+			tsk->vfork_done = NULL;
+		}
+		task_unlock(tsk);
 	}
+
 	if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) {
 		u32 __user * tidptr = tsk->clear_child_tid;
 		tsk->clear_child_tid = NULL;
@@ -1287,7 +1295,21 @@ long do_fork(unsigned long clone_flags,
 		}
 
 		if (clone_flags & CLONE_VFORK) {
-			wait_for_completion(&vfork);
+			int ret = wait_for_completion_interruptible(&vfork);
+			if (unlikely(ret)) {
+				/*
+				 * make sure we did not race with
+				 * mm_release():
+				 */
+				task_lock(p);
+				if (p->vfork_done)
+					p->vfork_done = NULL;
+				else
+					ret = 0;
+				task_unlock(p);
+				if (ret)
+					return -EINTR;
+			}
 			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
 		}

^ permalink raw reply	[flat|nested] 12+ messages in thread

* vfork makes processes uninterruptible [was Re: [PATCH -mm] swsusp: freeze user space processes first]
  2006-02-05 11:39           ` Rafael J. Wysocki
  2006-02-05 13:34             ` Rafael J. Wysocki
@ 2006-02-10 20:20             ` Pavel Machek
  1 sibling, 0 replies; 12+ messages in thread
From: Pavel Machek @ 2006-02-10 20:20 UTC (permalink / raw)
  To: Rafael J. Wysocki; +Cc: Ingo Molnar, Andrew Morton, linux-kernel, nigel

Hi!

> > Can you produce userland testcase? If we have uninterruptible process for
> > days... that's a bug in kernel, suspend or not.
> 
> Sure, no problem.  [Pretty scary, no?]

Yes, pretty scary. It will also raise system load for 300 seconds
without any real load.

> The test code:
> 
> #include <sys/types.h>
> #include <unistd.h>
> 
> int main(int argc, char *argv[])
> {
> 	vfork();
> 	sleep(300);
> 
> 	return 0;
> }
> 
> The result:
> 
> rafael@albercik:~/programming/c> ./vfork_test &
> [1] 12288
> rafael@albercik:~/programming/c> ps l
> F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
> 0   500  6937  6931  17   0  10048  2368 read_c Ss+  pts/1      0:00 /bin/bash
> 0   500 12139 12133  15   0  10052  2380 wait   Ss   pts/2      0:00 /bin/bash
> 0   500 12288 12139  15   0   2420   304 fork   D    pts/2      0:00 ./vfork_tes
> 1   500 12291 12288  18   0   2420   304 hrtime S    pts/2      0:00 ./vfork_tes
> 0   500 12372 12139  15   0   3596   820 -      R+   pts/2      0:00 ps l
> rafael@albercik:~/programming/c> kill 12288
> rafael@albercik:~/programming/c> ps l
> F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
> 0   500  6937  6931  17   0  10048  2368 read_c Ss+  pts/1      0:00 /bin/bash
> 0   500 12139 12133  15   0  10052  2380 wait   Ss   pts/2      0:00 /bin/bash
> 0   500 12288 12139  15   0   2420   304 fork   D    pts/2      0:00 ./vfork_tes
> 1   500 12291 12288  18   0   2420   304 hrtime S    pts/2      0:00 ./vfork_tes
> 0   500 12380 12139  17   0   3600   820 -      R+   pts/2      0:00 ps l
> rafael@albercik:~/programming/c> kill -9 12288
> rafael@albercik:~/programming/c> ps l
> F   UID   PID  PPID PRI  NI    VSZ   RSS WCHAN  STAT TTY        TIME COMMAND
> 0   500  6937  6931  17   0  10048  2368 read_c Ss+  pts/1      0:00 /bin/bash
> 0   500 12139 12133  15   0  10052  2380 wait   Ss   pts/2      0:00 /bin/bash
> 0   500 12288 12139  15   0   2420   304 fork   D    pts/2      0:00 ./vfork_tes
> 1   500 12291 12288  18   0   2420   304 hrtime S    pts/2      0:00 ./vfork_tes
> 0   500 12387 12139  16   0   3596   816 -      R+   pts/2      0:00 ps l
> rafael@albercik:~/programming/c>
> 
> Greetings,
> Rafael

-- 
Web maintainer for suspend.sf.net (www.sf.net/projects/suspend) wanted...

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH -mm] swsusp: freeze user space processes first
  2006-02-05 14:22         ` Ingo Molnar
@ 2006-02-10 20:36           ` Pavel Machek
  0 siblings, 0 replies; 12+ messages in thread
From: Pavel Machek @ 2006-02-10 20:36 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Rafael J. Wysocki, Andrew Morton, linux-kernel, nigel,
	Ulrich Drepper, Linus Torvalds

Hi!

> > > then i'd suggest to change the vfork implementation to make this code 
> > > freezable. Nothing that userspace does should cause freezing to fail.  
> > > If it does, we've designed things incorrectly on the kernel side.
> > 
> > Does that also mean we have bugs with signal delivery? If vfork(); 
> > sleep(100000); causes process to be uninterruptible for few days, it 
> > will not be killable and increase load average...
> 
> "half-done" vforks are indeed in uninterruptible sleep. They are not 
> directly killable, but they are killable indirectly through their 
> parent. But yes, in theory it would be cleaner if the vfork code used 
> wait_for_completion_interruptible(). It has to be done carefully though, 
> for two reasons:
> 
> - implementational: use task_lock()/task_unlock() to protect
>   p->vfork_done in mm_release() and in do_fork().
> 
> - semantical: signals to a vfork-ing parent are defined to be delayed
>   to after the child has released the parent/MM.

We could still deliver sigkill and stopping for the freezer, no?

> the (untested) patch below handles issue #1, but doesnt handle issue #2: 
> this patch opens up a vfork parent to get interrupted early, with any 
> signal.

It seems to fix D state for me, and does not seem to have any ill
effects.

							Pavel

-- 
Web maintainer for suspend.sf.net (www.sf.net/projects/suspend) wanted...

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2006-02-10 20:36 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-02-05  9:14 [PATCH -mm] swsusp: freeze user space processes first Rafael J. Wysocki
2006-02-05  9:38 ` Andrew Morton
2006-02-05 10:34   ` Rafael J. Wysocki
2006-02-05 10:50     ` Ingo Molnar
2006-02-05 11:11       ` Rafael J. Wysocki
2006-02-05 11:18         ` Pavel Machek
2006-02-05 11:39           ` Rafael J. Wysocki
2006-02-05 13:34             ` Rafael J. Wysocki
2006-02-10 20:20             ` vfork makes processes uninterruptible [was Re: [PATCH -mm] swsusp: freeze user space processes first] Pavel Machek
2006-02-05 11:11       ` [PATCH -mm] swsusp: freeze user space processes first Pavel Machek
2006-02-05 14:22         ` Ingo Molnar
2006-02-10 20:36           ` Pavel Machek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).