All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] softlockup: remove timestamp checking from hung_task
@ 2009-02-06 23:37 Mandeep Singh Baines
  2009-02-07 16:23 ` Frederic Weisbecker
  2009-02-09 10:04 ` Ingo Molnar
  0 siblings, 2 replies; 8+ messages in thread
From: Mandeep Singh Baines @ 2009-02-06 23:37 UTC (permalink / raw)
  To: mingo, fweisbec; +Cc: linux-kernel, rientjes, mbligh, thockin, akpm

Patch against tip/core/softlockup

---
Impact: saves sizeof(long) bytes per task_struct

By guaranteeing that sysctl_hung_task_timeout_secs have elapsed between
tasklist scans we can avoid using timestamps.

Signed-off-by: Mandeep Singh Baines <msb@google.com>
---
 include/linux/sched.h |    1 -
 kernel/fork.c         |    8 +++-----
 kernel/hung_task.c    |   48 +++++++++---------------------------------------
 3 files changed, 12 insertions(+), 45 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2a2811c..e0d723f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1241,7 +1241,6 @@ struct task_struct {
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 /* hung task detection */
-	unsigned long last_switch_timestamp;
 	unsigned long last_switch_count;
 #endif
 /* CPU-specific state of this task */
diff --git a/kernel/fork.c b/kernel/fork.c
index fb94442..bf582f7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -639,6 +639,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 
 	tsk->min_flt = tsk->maj_flt = 0;
 	tsk->nvcsw = tsk->nivcsw = 0;
+#ifdef CONFIG_DETECT_HUNG_TASK
+	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+#endif
 
 	tsk->mm = NULL;
 	tsk->active_mm = NULL;
@@ -1041,11 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	p->default_timer_slack_ns = current->timer_slack_ns;
 
-#ifdef CONFIG_DETECT_HUNG_TASK
-	p->last_switch_count = 0;
-	p->last_switch_timestamp = 0;
-#endif
-
 	task_io_accounting_init(&p->ioac);
 	acct_clear_integrals(p);
 
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 3951a80..4a10756 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -34,7 +34,6 @@ unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
  * Zero means infinite timeout - no checking done:
  */
 unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
-static unsigned long __read_mostly hung_task_poll_jiffies;
 
 unsigned long __read_mostly sysctl_hung_task_warnings = 10;
 
@@ -69,33 +68,17 @@ static struct notifier_block panic_block = {
 	.notifier_call = hung_task_panic,
 };
 
-/*
- * Returns seconds, approximately.  We don't need nanosecond
- * resolution, and we don't need to waste time with a big divide when
- * 2^30ns == 1.074s.
- */
-static unsigned long get_timestamp(void)
-{
-	int this_cpu = raw_smp_processor_id();
-
-	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
-}
-
-static void check_hung_task(struct task_struct *t, unsigned long now,
-			    unsigned long timeout)
+static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
 	unsigned long switch_count = t->nvcsw + t->nivcsw;
 
 	if (t->flags & PF_FROZEN)
 		return;
 
-	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
+	if (switch_count != t->last_switch_count) {
 		t->last_switch_count = switch_count;
-		t->last_switch_timestamp = now;
 		return;
 	}
-	if ((long)(now - t->last_switch_timestamp) < timeout)
-		return;
 	if (!sysctl_hung_task_warnings)
 		return;
 	sysctl_hung_task_warnings--;
@@ -111,7 +94,6 @@ static void check_hung_task(struct task_struct *t, unsigned long now,
 	sched_show_task(t);
 	__debug_show_held_locks(t);
 
-	t->last_switch_timestamp = now;
 	touch_nmi_watchdog();
 
 	if (sysctl_hung_task_panic)
@@ -145,7 +127,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 {
 	int max_count = sysctl_hung_task_check_count;
 	int batch_count = HUNG_TASK_BATCHING;
-	unsigned long now = get_timestamp();
 	struct task_struct *g, *t;
 
 	/*
@@ -168,19 +149,16 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 		}
 		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
 		if (t->state == TASK_UNINTERRUPTIBLE)
-			check_hung_task(t, now, timeout);
+			check_hung_task(t, timeout);
 	} while_each_thread(g, t);
  unlock:
 	rcu_read_unlock();
 }
 
-static void update_poll_jiffies(void)
+static unsigned long timeout_jiffies(unsigned long timeout)
 {
 	/* timeout of 0 will disable the watchdog */
-	if (sysctl_hung_task_timeout_secs == 0)
-		hung_task_poll_jiffies = MAX_SCHEDULE_TIMEOUT;
-	else
-		hung_task_poll_jiffies = sysctl_hung_task_timeout_secs * HZ / 2;
+	return (timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT);
 }
 
 /*
@@ -197,8 +175,6 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 	if (ret || !write)
 		goto out;
 
-	update_poll_jiffies();
-
 	wake_up_process(watchdog_task);
 
  out:
@@ -211,20 +187,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 static int watchdog(void *dummy)
 {
 	set_user_nice(current, 0);
-	update_poll_jiffies();
 
 	for ( ; ; ) {
-		unsigned long timeout;
+		unsigned long timeout = sysctl_hung_task_timeout_secs;
 
-		while (schedule_timeout_interruptible(hung_task_poll_jiffies));
+		while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
+			timeout = sysctl_hung_task_timeout_secs;
 
-		/*
-		 * Need to cache timeout here to avoid timeout being set
-		 * to 0 via sysctl while inside check_hung_*_tasks().
-		 */
-		timeout = sysctl_hung_task_timeout_secs;
-		if (timeout)
-			check_hung_uninterruptible_tasks(timeout);
+		check_hung_uninterruptible_tasks(timeout);
 	}
 
 	return 0;
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] softlockup: remove timestamp checking from hung_task
  2009-02-06 23:37 [PATCH] softlockup: remove timestamp checking from hung_task Mandeep Singh Baines
@ 2009-02-07 16:23 ` Frederic Weisbecker
  2009-02-07 16:34   ` Frederic Weisbecker
  2009-02-09 10:04 ` Ingo Molnar
  1 sibling, 1 reply; 8+ messages in thread
From: Frederic Weisbecker @ 2009-02-07 16:23 UTC (permalink / raw)
  To: Mandeep Singh Baines; +Cc: mingo, linux-kernel, rientjes, mbligh, thockin, akpm

Hi Mandeep,

On Fri, Feb 06, 2009 at 03:37:47PM -0800, Mandeep Singh Baines wrote:
> Patch against tip/core/softlockup
> 
> ---
> Impact: saves sizeof(long) bytes per task_struct
> 
> By guaranteeing that sysctl_hung_task_timeout_secs have elapsed between
> tasklist scans we can avoid using timestamps.
> 
> Signed-off-by: Mandeep Singh Baines <msb@google.com>


Good idea.
BTW, why haven't you put your name on top of this file?
That would help those who will send patches knowing to whom they have to
route their mails.

I made some comments below about small things...

> ---
>  include/linux/sched.h |    1 -
>  kernel/fork.c         |    8 +++-----
>  kernel/hung_task.c    |   48 +++++++++---------------------------------------
>  3 files changed, 12 insertions(+), 45 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 2a2811c..e0d723f 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1241,7 +1241,6 @@ struct task_struct {
>  #endif
>  #ifdef CONFIG_DETECT_HUNG_TASK
>  /* hung task detection */
> -	unsigned long last_switch_timestamp;
>  	unsigned long last_switch_count;
>  #endif
>  /* CPU-specific state of this task */
> diff --git a/kernel/fork.c b/kernel/fork.c
> index fb94442..bf582f7 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -639,6 +639,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
>  
>  	tsk->min_flt = tsk->maj_flt = 0;
>  	tsk->nvcsw = tsk->nivcsw = 0;
> +#ifdef CONFIG_DETECT_HUNG_TASK
> +	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
> +#endif


I think you can directly assign a zero here :-)
Or you want to let it as is to give some sense and explanation
about the role of this field?
Why not, I guess gcc will optimize it anyway.


>  	tsk->mm = NULL;
>  	tsk->active_mm = NULL;
> @@ -1041,11 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
>  
>  	p->default_timer_slack_ns = current->timer_slack_ns;
>  
> -#ifdef CONFIG_DETECT_HUNG_TASK
> -	p->last_switch_count = 0;
> -	p->last_switch_timestamp = 0;
> -#endif
> -
>  	task_io_accounting_init(&p->ioac);
>  	acct_clear_integrals(p);
>  
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 3951a80..4a10756 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -34,7 +34,6 @@ unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
>   * Zero means infinite timeout - no checking done:
>   */
>  unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
> -static unsigned long __read_mostly hung_task_poll_jiffies;
>  
>  unsigned long __read_mostly sysctl_hung_task_warnings = 10;
>  
> @@ -69,33 +68,17 @@ static struct notifier_block panic_block = {
>  	.notifier_call = hung_task_panic,
>  };
>  
> -/*
> - * Returns seconds, approximately.  We don't need nanosecond
> - * resolution, and we don't need to waste time with a big divide when
> - * 2^30ns == 1.074s.
> - */
> -static unsigned long get_timestamp(void)
> -{
> -	int this_cpu = raw_smp_processor_id();
> -
> -	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
> -}
> -
> -static void check_hung_task(struct task_struct *t, unsigned long now,
> -			    unsigned long timeout)
> +static void check_hung_task(struct task_struct *t, unsigned long timeout)
>  {
>  	unsigned long switch_count = t->nvcsw + t->nivcsw;
>  
>  	if (t->flags & PF_FROZEN)
>  		return;
>  
> -	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
> +	if (switch_count != t->last_switch_count) {
>  		t->last_switch_count = switch_count;
> -		t->last_switch_timestamp = now;
>  		return;
>  	}



What happens here if khungtaskd is scheduled in after tsk is inserted on the task_list
in copy_process() but before tsk has been scheduled once?

tsk->last_switch_count and  tsk->nvcsw + tsk->nivcsw will still be equal to zero right?

Perhaps you could add another check such as

if (!switch_count)
	return;


> -	if ((long)(now - t->last_switch_timestamp) < timeout)
> -		return;
>  	if (!sysctl_hung_task_warnings)
>  		return;
>  	sysctl_hung_task_warnings--;
> @@ -111,7 +94,6 @@ static void check_hung_task(struct task_struct *t, unsigned long now,
>  	sched_show_task(t);
>  	__debug_show_held_locks(t);
>  
> -	t->last_switch_timestamp = now;
>  	touch_nmi_watchdog();
>  
>  	if (sysctl_hung_task_panic)
> @@ -145,7 +127,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
>  {
>  	int max_count = sysctl_hung_task_check_count;
>  	int batch_count = HUNG_TASK_BATCHING;
> -	unsigned long now = get_timestamp();
>  	struct task_struct *g, *t;
>  
>  	/*
> @@ -168,19 +149,16 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
>  		}
>  		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
>  		if (t->state == TASK_UNINTERRUPTIBLE)
> -			check_hung_task(t, now, timeout);
> +			check_hung_task(t, timeout);
>  	} while_each_thread(g, t);
>   unlock:
>  	rcu_read_unlock();
>  }
>  
> -static void update_poll_jiffies(void)
> +static unsigned long timeout_jiffies(unsigned long timeout)
>  {
>  	/* timeout of 0 will disable the watchdog */
> -	if (sysctl_hung_task_timeout_secs == 0)
> -		hung_task_poll_jiffies = MAX_SCHEDULE_TIMEOUT;
> -	else
> -		hung_task_poll_jiffies = sysctl_hung_task_timeout_secs * HZ / 2;
> +	return (timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT);
>  }
>  
>  /*
> @@ -197,8 +175,6 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
>  	if (ret || !write)
>  		goto out;
>  
> -	update_poll_jiffies();
> -
>  	wake_up_process(watchdog_task);


I'm not sure what does this function now that you dropped update_poll_jiffies()
So if the user sets up a new timeout value, the only effect will be that khungtaskd will
be awakened?

But actually the /sys file doesn't seem to be set up.

Other than these comments, that looks good!
Thanks.

Frederic.


>  
>   out:
> @@ -211,20 +187,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
>  static int watchdog(void *dummy)
>  {
>  	set_user_nice(current, 0);
> -	update_poll_jiffies();
>  
>  	for ( ; ; ) {
> -		unsigned long timeout;
> +		unsigned long timeout = sysctl_hung_task_timeout_secs;
>  
> -		while (schedule_timeout_interruptible(hung_task_poll_jiffies));
> +		while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
> +			timeout = sysctl_hung_task_timeout_secs;
>  
> -		/*
> -		 * Need to cache timeout here to avoid timeout being set
> -		 * to 0 via sysctl while inside check_hung_*_tasks().
> -		 */
> -		timeout = sysctl_hung_task_timeout_secs;
> -		if (timeout)
> -			check_hung_uninterruptible_tasks(timeout);
> +		check_hung_uninterruptible_tasks(timeout);
>  	}
>  
>  	return 0;
> -- 
> 1.5.4.5
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] softlockup: remove timestamp checking from hung_task
  2009-02-07 16:23 ` Frederic Weisbecker
@ 2009-02-07 16:34   ` Frederic Weisbecker
  2009-02-07 16:51     ` Frederic Weisbecker
  0 siblings, 1 reply; 8+ messages in thread
From: Frederic Weisbecker @ 2009-02-07 16:34 UTC (permalink / raw)
  To: Mandeep Singh Baines; +Cc: mingo, linux-kernel, rientjes, mbligh, thockin, akpm

On Sat, Feb 07, 2009 at 05:23:28PM +0100, Frederic Weisbecker wrote:
> Hi Mandeep,
> 
> On Fri, Feb 06, 2009 at 03:37:47PM -0800, Mandeep Singh Baines wrote:
> > Patch against tip/core/softlockup
> > 
> > ---
> > Impact: saves sizeof(long) bytes per task_struct
> > 
> > By guaranteeing that sysctl_hung_task_timeout_secs have elapsed between
> > tasklist scans we can avoid using timestamps.
> > 
> > Signed-off-by: Mandeep Singh Baines <msb@google.com>
> 
> 
> Good idea.
> BTW, why haven't you put your name on top of this file?
> That would help those who will send patches knowing to whom they have to
> route their mails.
> 
> I made some comments below about small things...
> 
> > ---
> >  include/linux/sched.h |    1 -
> >  kernel/fork.c         |    8 +++-----
> >  kernel/hung_task.c    |   48 +++++++++---------------------------------------
> >  3 files changed, 12 insertions(+), 45 deletions(-)
> > 
> > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > index 2a2811c..e0d723f 100644
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -1241,7 +1241,6 @@ struct task_struct {
> >  #endif
> >  #ifdef CONFIG_DETECT_HUNG_TASK
> >  /* hung task detection */
> > -	unsigned long last_switch_timestamp;
> >  	unsigned long last_switch_count;
> >  #endif
> >  /* CPU-specific state of this task */
> > diff --git a/kernel/fork.c b/kernel/fork.c
> > index fb94442..bf582f7 100644
> > --- a/kernel/fork.c
> > +++ b/kernel/fork.c
> > @@ -639,6 +639,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
> >  
> >  	tsk->min_flt = tsk->maj_flt = 0;
> >  	tsk->nvcsw = tsk->nivcsw = 0;
> > +#ifdef CONFIG_DETECT_HUNG_TASK
> > +	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
> > +#endif
> 
> 
> I think you can directly assign a zero here :-)
> Or you want to let it as is to give some sense and explanation
> about the role of this field?
> Why not, I guess gcc will optimize it anyway.
> 
> 
> >  	tsk->mm = NULL;
> >  	tsk->active_mm = NULL;
> > @@ -1041,11 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
> >  
> >  	p->default_timer_slack_ns = current->timer_slack_ns;
> >  
> > -#ifdef CONFIG_DETECT_HUNG_TASK
> > -	p->last_switch_count = 0;
> > -	p->last_switch_timestamp = 0;
> > -#endif
> > -
> >  	task_io_accounting_init(&p->ioac);
> >  	acct_clear_integrals(p);
> >  
> > diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> > index 3951a80..4a10756 100644
> > --- a/kernel/hung_task.c
> > +++ b/kernel/hung_task.c
> > @@ -34,7 +34,6 @@ unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
> >   * Zero means infinite timeout - no checking done:
> >   */
> >  unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
> > -static unsigned long __read_mostly hung_task_poll_jiffies;
> >  
> >  unsigned long __read_mostly sysctl_hung_task_warnings = 10;
> >  
> > @@ -69,33 +68,17 @@ static struct notifier_block panic_block = {
> >  	.notifier_call = hung_task_panic,
> >  };
> >  
> > -/*
> > - * Returns seconds, approximately.  We don't need nanosecond
> > - * resolution, and we don't need to waste time with a big divide when
> > - * 2^30ns == 1.074s.
> > - */
> > -static unsigned long get_timestamp(void)
> > -{
> > -	int this_cpu = raw_smp_processor_id();
> > -
> > -	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
> > -}
> > -
> > -static void check_hung_task(struct task_struct *t, unsigned long now,
> > -			    unsigned long timeout)
> > +static void check_hung_task(struct task_struct *t, unsigned long timeout)
> >  {
> >  	unsigned long switch_count = t->nvcsw + t->nivcsw;
> >  
> >  	if (t->flags & PF_FROZEN)
> >  		return;
> >  
> > -	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
> > +	if (switch_count != t->last_switch_count) {
> >  		t->last_switch_count = switch_count;
> > -		t->last_switch_timestamp = now;
> >  		return;
> >  	}
> 
> 
> 
> What happens here if khungtaskd is scheduled in after tsk is inserted on the task_list
> in copy_process() but before tsk has been scheduled once?
> 
> tsk->last_switch_count and  tsk->nvcsw + tsk->nivcsw will still be equal to zero right?
> 
> Perhaps you could add another check such as
> 
> if (!switch_count)
> 	return;
> 
> 
> > -	if ((long)(now - t->last_switch_timestamp) < timeout)
> > -		return;
> >  	if (!sysctl_hung_task_warnings)
> >  		return;
> >  	sysctl_hung_task_warnings--;
> > @@ -111,7 +94,6 @@ static void check_hung_task(struct task_struct *t, unsigned long now,
> >  	sched_show_task(t);
> >  	__debug_show_held_locks(t);
> >  
> > -	t->last_switch_timestamp = now;
> >  	touch_nmi_watchdog();
> >  
> >  	if (sysctl_hung_task_panic)
> > @@ -145,7 +127,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
> >  {
> >  	int max_count = sysctl_hung_task_check_count;
> >  	int batch_count = HUNG_TASK_BATCHING;
> > -	unsigned long now = get_timestamp();
> >  	struct task_struct *g, *t;
> >  
> >  	/*
> > @@ -168,19 +149,16 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
> >  		}
> >  		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
> >  		if (t->state == TASK_UNINTERRUPTIBLE)
> > -			check_hung_task(t, now, timeout);
> > +			check_hung_task(t, timeout);
> >  	} while_each_thread(g, t);
> >   unlock:
> >  	rcu_read_unlock();
> >  }
> >  
> > -static void update_poll_jiffies(void)
> > +static unsigned long timeout_jiffies(unsigned long timeout)
> >  {
> >  	/* timeout of 0 will disable the watchdog */
> > -	if (sysctl_hung_task_timeout_secs == 0)
> > -		hung_task_poll_jiffies = MAX_SCHEDULE_TIMEOUT;
> > -	else
> > -		hung_task_poll_jiffies = sysctl_hung_task_timeout_secs * HZ / 2;
> > +	return (timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT);
> >  }
> >  
> >  /*
> > @@ -197,8 +175,6 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
> >  	if (ret || !write)
> >  		goto out;
> >  
> > -	update_poll_jiffies();
> > -
> >  	wake_up_process(watchdog_task);
> 
> 
> I'm not sure what does this function now that you dropped update_poll_jiffies()
> So if the user sets up a new timeout value, the only effect will be that khungtaskd will
> be awakened?
> 
> But actually the /sys file doesn't seem to be set up.


Oops, I should have grep on proc_dohung_task_timeout_secs which is set on kernel/sysctl.
Sorry.
But still, sysctl_hung_task_timeout_secs doesn't seem to be set :-)


> 
> Other than these comments, that looks good!
> Thanks.
> 
> Frederic.
> 
> 
> >  
> >   out:
> > @@ -211,20 +187,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
> >  static int watchdog(void *dummy)
> >  {
> >  	set_user_nice(current, 0);
> > -	update_poll_jiffies();
> >  
> >  	for ( ; ; ) {
> > -		unsigned long timeout;
> > +		unsigned long timeout = sysctl_hung_task_timeout_secs;
> >  
> > -		while (schedule_timeout_interruptible(hung_task_poll_jiffies));
> > +		while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
> > +			timeout = sysctl_hung_task_timeout_secs;
> >  
> > -		/*
> > -		 * Need to cache timeout here to avoid timeout being set
> > -		 * to 0 via sysctl while inside check_hung_*_tasks().
> > -		 */
> > -		timeout = sysctl_hung_task_timeout_secs;
> > -		if (timeout)
> > -			check_hung_uninterruptible_tasks(timeout);
> > +		check_hung_uninterruptible_tasks(timeout);
> >  	}
> >  
> >  	return 0;
> > -- 
> > 1.5.4.5
> > 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] softlockup: remove timestamp checking from hung_task
  2009-02-07 16:34   ` Frederic Weisbecker
@ 2009-02-07 16:51     ` Frederic Weisbecker
  2009-02-07 17:02       ` Frederic Weisbecker
  2009-02-09 19:29       ` Mandeep Singh Baines
  0 siblings, 2 replies; 8+ messages in thread
From: Frederic Weisbecker @ 2009-02-07 16:51 UTC (permalink / raw)
  To: Mandeep Singh Baines; +Cc: mingo, linux-kernel, rientjes, mbligh, thockin, akpm

On Sat, Feb 07, 2009 at 05:34:40PM +0100, Frederic Weisbecker wrote:
> On Sat, Feb 07, 2009 at 05:23:28PM +0100, Frederic Weisbecker wrote:
> > Hi Mandeep,
> > 
> > On Fri, Feb 06, 2009 at 03:37:47PM -0800, Mandeep Singh Baines wrote:
> > > Patch against tip/core/softlockup
> > > 
> > > ---
> > > Impact: saves sizeof(long) bytes per task_struct
> > > 
> > > By guaranteeing that sysctl_hung_task_timeout_secs have elapsed between
> > > tasklist scans we can avoid using timestamps.
> > > 
> > > Signed-off-by: Mandeep Singh Baines <msb@google.com>
> > 
> > 
> > Good idea.
> > BTW, why haven't you put your name on top of this file?
> > That would help those who will send patches knowing to whom they have to
> > route their mails.
> > 
> > I made some comments below about small things...
> > 
> > > ---
> > >  include/linux/sched.h |    1 -
> > >  kernel/fork.c         |    8 +++-----
> > >  kernel/hung_task.c    |   48 +++++++++---------------------------------------
> > >  3 files changed, 12 insertions(+), 45 deletions(-)
> > > 
> > > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > > index 2a2811c..e0d723f 100644
> > > --- a/include/linux/sched.h
> > > +++ b/include/linux/sched.h
> > > @@ -1241,7 +1241,6 @@ struct task_struct {
> > >  #endif
> > >  #ifdef CONFIG_DETECT_HUNG_TASK
> > >  /* hung task detection */
> > > -	unsigned long last_switch_timestamp;
> > >  	unsigned long last_switch_count;
> > >  #endif
> > >  /* CPU-specific state of this task */
> > > diff --git a/kernel/fork.c b/kernel/fork.c
> > > index fb94442..bf582f7 100644
> > > --- a/kernel/fork.c
> > > +++ b/kernel/fork.c
> > > @@ -639,6 +639,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
> > >  
> > >  	tsk->min_flt = tsk->maj_flt = 0;
> > >  	tsk->nvcsw = tsk->nivcsw = 0;
> > > +#ifdef CONFIG_DETECT_HUNG_TASK
> > > +	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
> > > +#endif
> > 
> > 
> > I think you can directly assign a zero here :-)
> > Or you want to let it as is to give some sense and explanation
> > about the role of this field?
> > Why not, I guess gcc will optimize it anyway.
> > 
> > 
> > >  	tsk->mm = NULL;
> > >  	tsk->active_mm = NULL;
> > > @@ -1041,11 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
> > >  
> > >  	p->default_timer_slack_ns = current->timer_slack_ns;
> > >  
> > > -#ifdef CONFIG_DETECT_HUNG_TASK
> > > -	p->last_switch_count = 0;
> > > -	p->last_switch_timestamp = 0;
> > > -#endif
> > > -
> > >  	task_io_accounting_init(&p->ioac);
> > >  	acct_clear_integrals(p);
> > >  
> > > diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> > > index 3951a80..4a10756 100644
> > > --- a/kernel/hung_task.c
> > > +++ b/kernel/hung_task.c
> > > @@ -34,7 +34,6 @@ unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
> > >   * Zero means infinite timeout - no checking done:
> > >   */
> > >  unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
> > > -static unsigned long __read_mostly hung_task_poll_jiffies;
> > >  
> > >  unsigned long __read_mostly sysctl_hung_task_warnings = 10;
> > >  
> > > @@ -69,33 +68,17 @@ static struct notifier_block panic_block = {
> > >  	.notifier_call = hung_task_panic,
> > >  };
> > >  
> > > -/*
> > > - * Returns seconds, approximately.  We don't need nanosecond
> > > - * resolution, and we don't need to waste time with a big divide when
> > > - * 2^30ns == 1.074s.
> > > - */
> > > -static unsigned long get_timestamp(void)
> > > -{
> > > -	int this_cpu = raw_smp_processor_id();
> > > -
> > > -	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
> > > -}
> > > -
> > > -static void check_hung_task(struct task_struct *t, unsigned long now,
> > > -			    unsigned long timeout)
> > > +static void check_hung_task(struct task_struct *t, unsigned long timeout)
> > >  {
> > >  	unsigned long switch_count = t->nvcsw + t->nivcsw;
> > >  
> > >  	if (t->flags & PF_FROZEN)
> > >  		return;
> > >  
> > > -	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
> > > +	if (switch_count != t->last_switch_count) {
> > >  		t->last_switch_count = switch_count;
> > > -		t->last_switch_timestamp = now;
> > >  		return;
> > >  	}
> > 
> > 
> > 
> > What happens here if khungtaskd is scheduled in after tsk is inserted on the task_list
> > in copy_process() but before tsk has been scheduled once?
> > 
> > tsk->last_switch_count and  tsk->nvcsw + tsk->nivcsw will still be equal to zero right?
> > 
> > Perhaps you could add another check such as
> > 
> > if (!switch_count)
> > 	return;
> > 
> > 
> > > -	if ((long)(now - t->last_switch_timestamp) < timeout)
> > > -		return;
> > >  	if (!sysctl_hung_task_warnings)
> > >  		return;
> > >  	sysctl_hung_task_warnings--;
> > > @@ -111,7 +94,6 @@ static void check_hung_task(struct task_struct *t, unsigned long now,
> > >  	sched_show_task(t);
> > >  	__debug_show_held_locks(t);
> > >  
> > > -	t->last_switch_timestamp = now;
> > >  	touch_nmi_watchdog();
> > >  
> > >  	if (sysctl_hung_task_panic)
> > > @@ -145,7 +127,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
> > >  {
> > >  	int max_count = sysctl_hung_task_check_count;
> > >  	int batch_count = HUNG_TASK_BATCHING;
> > > -	unsigned long now = get_timestamp();
> > >  	struct task_struct *g, *t;
> > >  
> > >  	/*
> > > @@ -168,19 +149,16 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
> > >  		}
> > >  		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
> > >  		if (t->state == TASK_UNINTERRUPTIBLE)
> > > -			check_hung_task(t, now, timeout);
> > > +			check_hung_task(t, timeout);
> > >  	} while_each_thread(g, t);
> > >   unlock:
> > >  	rcu_read_unlock();
> > >  }
> > >  
> > > -static void update_poll_jiffies(void)
> > > +static unsigned long timeout_jiffies(unsigned long timeout)
> > >  {
> > >  	/* timeout of 0 will disable the watchdog */
> > > -	if (sysctl_hung_task_timeout_secs == 0)
> > > -		hung_task_poll_jiffies = MAX_SCHEDULE_TIMEOUT;
> > > -	else
> > > -		hung_task_poll_jiffies = sysctl_hung_task_timeout_secs * HZ / 2;
> > > +	return (timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT);
> > >  }
> > >  
> > >  /*
> > > @@ -197,8 +175,6 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
> > >  	if (ret || !write)
> > >  		goto out;
> > >  
> > > -	update_poll_jiffies();
> > > -
> > >  	wake_up_process(watchdog_task);
> > 
> > 
> > I'm not sure what does this function now that you dropped update_poll_jiffies()
> > So if the user sets up a new timeout value, the only effect will be that khungtaskd will
> > be awakened?
> > 
> > But actually the /sys file doesn't seem to be set up.
> 
> 
> Oops, I should have grep on proc_dohung_task_timeout_secs which is set on kernel/sysctl.
> Sorry.
> But still, sysctl_hung_task_timeout_secs doesn't seem to be set :-)


And once again I'm wrong, I shoud read sysctl.c or at least read the sysfs documentation.
Sorry for the noise.

BTW, here is a small fixlet on top of your patch about what I commented concerning
the tasks than weren't yet scheduled once:

--
>From e7120e424b031978e482b5fe311d90916ffb8b7e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 7 Feb 2009 17:45:12 +0100
Subject: [PATCH] softlockup: ensure the task has been scheduled once

When we check if the task has been scheduled since the last scan, we might
have a race condition if the task has been inserted on the task list but not
yet scheduled once. So we just add a small check to ensure it has been switched
in at least one time to avoid false positive.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/hung_task.c |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 4a10756..7f57a71 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -72,7 +72,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
 	unsigned long switch_count = t->nvcsw + t->nivcsw;
 
-	if (t->flags & PF_FROZEN)
+	/*
+	 * Ensure the task is not frozen and that it has been scheduled
+	 * at least once.
+	 */
+	if (t->flags & PF_FROZEN || !switch_count)
 		return;
 
 	if (switch_count != t->last_switch_count) {
-- 
1.6.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] softlockup: remove timestamp checking from hung_task
  2009-02-07 16:51     ` Frederic Weisbecker
@ 2009-02-07 17:02       ` Frederic Weisbecker
  2009-02-09 19:29       ` Mandeep Singh Baines
  1 sibling, 0 replies; 8+ messages in thread
From: Frederic Weisbecker @ 2009-02-07 17:02 UTC (permalink / raw)
  To: Mandeep Singh Baines; +Cc: mingo, linux-kernel, rientjes, mbligh, thockin, akpm

On Sat, Feb 07, 2009 at 05:51:55PM +0100, Frederic Weisbecker wrote:
> On Sat, Feb 07, 2009 at 05:34:40PM +0100, Frederic Weisbecker wrote:
> > On Sat, Feb 07, 2009 at 05:23:28PM +0100, Frederic Weisbecker wrote:
> > > Hi Mandeep,
> > > 
> > > On Fri, Feb 06, 2009 at 03:37:47PM -0800, Mandeep Singh Baines wrote:
> > > > Patch against tip/core/softlockup
> > > > 
> > > > ---
> > > > Impact: saves sizeof(long) bytes per task_struct
> > > > 
> > > > By guaranteeing that sysctl_hung_task_timeout_secs have elapsed between
> > > > tasklist scans we can avoid using timestamps.
> > > > 
> > > > Signed-off-by: Mandeep Singh Baines <msb@google.com>
> > > 
> > > 
> > > Good idea.
> > > BTW, why haven't you put your name on top of this file?
> > > That would help those who will send patches knowing to whom they have to
> > > route their mails.
> > > 
> > > I made some comments below about small things...
> > > 
> > > > ---
> > > >  include/linux/sched.h |    1 -
> > > >  kernel/fork.c         |    8 +++-----
> > > >  kernel/hung_task.c    |   48 +++++++++---------------------------------------
> > > >  3 files changed, 12 insertions(+), 45 deletions(-)
> > > > 
> > > > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > > > index 2a2811c..e0d723f 100644
> > > > --- a/include/linux/sched.h
> > > > +++ b/include/linux/sched.h
> > > > @@ -1241,7 +1241,6 @@ struct task_struct {
> > > >  #endif
> > > >  #ifdef CONFIG_DETECT_HUNG_TASK
> > > >  /* hung task detection */
> > > > -	unsigned long last_switch_timestamp;
> > > >  	unsigned long last_switch_count;
> > > >  #endif
> > > >  /* CPU-specific state of this task */
> > > > diff --git a/kernel/fork.c b/kernel/fork.c
> > > > index fb94442..bf582f7 100644
> > > > --- a/kernel/fork.c
> > > > +++ b/kernel/fork.c
> > > > @@ -639,6 +639,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
> > > >  
> > > >  	tsk->min_flt = tsk->maj_flt = 0;
> > > >  	tsk->nvcsw = tsk->nivcsw = 0;
> > > > +#ifdef CONFIG_DETECT_HUNG_TASK
> > > > +	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
> > > > +#endif
> > > 
> > > 
> > > I think you can directly assign a zero here :-)
> > > Or you want to let it as is to give some sense and explanation
> > > about the role of this field?
> > > Why not, I guess gcc will optimize it anyway.
> > > 
> > > 
> > > >  	tsk->mm = NULL;
> > > >  	tsk->active_mm = NULL;
> > > > @@ -1041,11 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
> > > >  
> > > >  	p->default_timer_slack_ns = current->timer_slack_ns;
> > > >  
> > > > -#ifdef CONFIG_DETECT_HUNG_TASK
> > > > -	p->last_switch_count = 0;
> > > > -	p->last_switch_timestamp = 0;
> > > > -#endif
> > > > -
> > > >  	task_io_accounting_init(&p->ioac);
> > > >  	acct_clear_integrals(p);
> > > >  
> > > > diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> > > > index 3951a80..4a10756 100644
> > > > --- a/kernel/hung_task.c
> > > > +++ b/kernel/hung_task.c
> > > > @@ -34,7 +34,6 @@ unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
> > > >   * Zero means infinite timeout - no checking done:
> > > >   */
> > > >  unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
> > > > -static unsigned long __read_mostly hung_task_poll_jiffies;
> > > >  
> > > >  unsigned long __read_mostly sysctl_hung_task_warnings = 10;
> > > >  
> > > > @@ -69,33 +68,17 @@ static struct notifier_block panic_block = {
> > > >  	.notifier_call = hung_task_panic,
> > > >  };
> > > >  
> > > > -/*
> > > > - * Returns seconds, approximately.  We don't need nanosecond
> > > > - * resolution, and we don't need to waste time with a big divide when
> > > > - * 2^30ns == 1.074s.
> > > > - */
> > > > -static unsigned long get_timestamp(void)
> > > > -{
> > > > -	int this_cpu = raw_smp_processor_id();
> > > > -
> > > > -	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
> > > > -}
> > > > -
> > > > -static void check_hung_task(struct task_struct *t, unsigned long now,
> > > > -			    unsigned long timeout)
> > > > +static void check_hung_task(struct task_struct *t, unsigned long timeout)
> > > >  {
> > > >  	unsigned long switch_count = t->nvcsw + t->nivcsw;
> > > >  
> > > >  	if (t->flags & PF_FROZEN)
> > > >  		return;
> > > >  
> > > > -	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
> > > > +	if (switch_count != t->last_switch_count) {
> > > >  		t->last_switch_count = switch_count;
> > > > -		t->last_switch_timestamp = now;
> > > >  		return;
> > > >  	}
> > > 
> > > 
> > > 
> > > What happens here if khungtaskd is scheduled in after tsk is inserted on the task_list
> > > in copy_process() but before tsk has been scheduled once?
> > > 
> > > tsk->last_switch_count and  tsk->nvcsw + tsk->nivcsw will still be equal to zero right?
> > > 
> > > Perhaps you could add another check such as
> > > 
> > > if (!switch_count)
> > > 	return;
> > > 
> > > 
> > > > -	if ((long)(now - t->last_switch_timestamp) < timeout)
> > > > -		return;
> > > >  	if (!sysctl_hung_task_warnings)
> > > >  		return;
> > > >  	sysctl_hung_task_warnings--;
> > > > @@ -111,7 +94,6 @@ static void check_hung_task(struct task_struct *t, unsigned long now,
> > > >  	sched_show_task(t);
> > > >  	__debug_show_held_locks(t);
> > > >  
> > > > -	t->last_switch_timestamp = now;
> > > >  	touch_nmi_watchdog();
> > > >  
> > > >  	if (sysctl_hung_task_panic)
> > > > @@ -145,7 +127,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
> > > >  {
> > > >  	int max_count = sysctl_hung_task_check_count;
> > > >  	int batch_count = HUNG_TASK_BATCHING;
> > > > -	unsigned long now = get_timestamp();
> > > >  	struct task_struct *g, *t;
> > > >  
> > > >  	/*
> > > > @@ -168,19 +149,16 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
> > > >  		}
> > > >  		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
> > > >  		if (t->state == TASK_UNINTERRUPTIBLE)
> > > > -			check_hung_task(t, now, timeout);
> > > > +			check_hung_task(t, timeout);
> > > >  	} while_each_thread(g, t);
> > > >   unlock:
> > > >  	rcu_read_unlock();
> > > >  }
> > > >  
> > > > -static void update_poll_jiffies(void)
> > > > +static unsigned long timeout_jiffies(unsigned long timeout)
> > > >  {
> > > >  	/* timeout of 0 will disable the watchdog */
> > > > -	if (sysctl_hung_task_timeout_secs == 0)
> > > > -		hung_task_poll_jiffies = MAX_SCHEDULE_TIMEOUT;
> > > > -	else
> > > > -		hung_task_poll_jiffies = sysctl_hung_task_timeout_secs * HZ / 2;
> > > > +	return (timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT);
> > > >  }
> > > >  
> > > >  /*
> > > > @@ -197,8 +175,6 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
> > > >  	if (ret || !write)
> > > >  		goto out;
> > > >  
> > > > -	update_poll_jiffies();
> > > > -
> > > >  	wake_up_process(watchdog_task);
> > > 
> > > 
> > > I'm not sure what does this function now that you dropped update_poll_jiffies()
> > > So if the user sets up a new timeout value, the only effect will be that khungtaskd will
> > > be awakened?
> > > 
> > > But actually the /sys file doesn't seem to be set up.
> > 
> > 
> > Oops, I should have grep on proc_dohung_task_timeout_secs which is set on kernel/sysctl.
> > Sorry.
> > But still, sysctl_hung_task_timeout_secs doesn't seem to be set :-)
> 
> 
> And once again I'm wrong, I shoud read sysctl.c or at least read the sysfs documentation.
> Sorry for the noise.
> 
> BTW, here is a small fixlet on top of your patch about what I commented concerning
> the tasks than weren't yet scheduled once:


But it is not supposed to be TASK_UNINTERRUPTIBLE.
Forget this fixlet.
So, again sorry for the noise, I'm going to hide somewhere...

 
> --
> From e7120e424b031978e482b5fe311d90916ffb8b7e Mon Sep 17 00:00:00 2001
> From: Frederic Weisbecker <fweisbec@gmail.com>
> Date: Sat, 7 Feb 2009 17:45:12 +0100
> Subject: [PATCH] softlockup: ensure the task has been scheduled once
> 
> When we check if the task has been scheduled since the last scan, we might
> have a race condition if the task has been inserted on the task list but not
> yet scheduled once. So we just add a small check to ensure it has been switched
> in at least one time to avoid false positive.
> 
> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> ---
>  kernel/hung_task.c |    6 +++++-
>  1 files changed, 5 insertions(+), 1 deletions(-)
> 
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 4a10756..7f57a71 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -72,7 +72,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
>  {
>  	unsigned long switch_count = t->nvcsw + t->nivcsw;
>  
> -	if (t->flags & PF_FROZEN)
> +	/*
> +	 * Ensure the task is not frozen and that it has been scheduled
> +	 * at least once.
> +	 */
> +	if (t->flags & PF_FROZEN || !switch_count)
>  		return;
>  
>  	if (switch_count != t->last_switch_count) {
> -- 
> 1.6.1
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] softlockup: remove timestamp checking from hung_task
  2009-02-06 23:37 [PATCH] softlockup: remove timestamp checking from hung_task Mandeep Singh Baines
  2009-02-07 16:23 ` Frederic Weisbecker
@ 2009-02-09 10:04 ` Ingo Molnar
  1 sibling, 0 replies; 8+ messages in thread
From: Ingo Molnar @ 2009-02-09 10:04 UTC (permalink / raw)
  To: Mandeep Singh Baines
  Cc: fweisbec, linux-kernel, rientjes, mbligh, thockin, akpm


* Mandeep Singh Baines <msb@google.com> wrote:

> Patch against tip/core/softlockup
> 
> ---
> Impact: saves sizeof(long) bytes per task_struct
> 
> By guaranteeing that sysctl_hung_task_timeout_secs have elapsed between
> tasklist scans we can avoid using timestamps.
> 
> Signed-off-by: Mandeep Singh Baines <msb@google.com>
> ---
>  include/linux/sched.h |    1 -
>  kernel/fork.c         |    8 +++-----
>  kernel/hung_task.c    |   48 +++++++++---------------------------------------
>  3 files changed, 12 insertions(+), 45 deletions(-)

Nice simplification!

Applied to tip/core/softlockup, thanks Mandeep.

(i fixed the one scripts/checkpatch.pl error that your patch triggered)

	Ingo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] softlockup: remove timestamp checking from hung_task
  2009-02-07 16:51     ` Frederic Weisbecker
  2009-02-07 17:02       ` Frederic Weisbecker
@ 2009-02-09 19:29       ` Mandeep Singh Baines
  2009-02-09 19:54         ` Frederic Weisbecker
  1 sibling, 1 reply; 8+ messages in thread
From: Mandeep Singh Baines @ 2009-02-09 19:29 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: mingo, linux-kernel, rientjes, mbligh, thockin, akpm

Frederic Weisbecker (fweisbec@gmail.com) wrote:
> BTW, here is a small fixlet on top of your patch about what I commented concerning
> the tasks than weren't yet scheduled once:
> 
> --
> From e7120e424b031978e482b5fe311d90916ffb8b7e Mon Sep 17 00:00:00 2001
> From: Frederic Weisbecker <fweisbec@gmail.com>
> Date: Sat, 7 Feb 2009 17:45:12 +0100
> Subject: [PATCH] softlockup: ensure the task has been scheduled once
> 
> When we check if the task has been scheduled since the last scan, we might
> have a race condition if the task has been inserted on the task list but not
> yet scheduled once. So we just add a small check to ensure it has been switched
> in at least one time to avoid false positive.
> 
> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> ---
>  kernel/hung_task.c |    6 +++++-
>  1 files changed, 5 insertions(+), 1 deletions(-)
> 
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 4a10756..7f57a71 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -72,7 +72,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
>  {
>  	unsigned long switch_count = t->nvcsw + t->nivcsw;
>  
> -	if (t->flags & PF_FROZEN)
> +	/*
> +	 * Ensure the task is not frozen and that it has been scheduled
> +	 * at least once.
> +	 */
> +	if (t->flags & PF_FROZEN || !switch_count)
>  		return;
>  
>  	if (switch_count != t->last_switch_count) {
> -- 
> 1.6.1
> 

Good catch! I would change the description though. The race is a little
more subtle than your current description. For a newly forked process, the race
can occur if check_hung_task() processes the task in the time between the task
changing its state to UNINTERRUPTIBLE and the the scheduler updating the
switch_count (nivcsw or nvcsw).

One minor change to the  code comment. You want to ensure that the task has
context switched at least once, NOT that the task has been scheduled at least
once.

If you re-send the patch with the fixed comment and description, I'll
ack it.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] softlockup: remove timestamp checking from hung_task
  2009-02-09 19:29       ` Mandeep Singh Baines
@ 2009-02-09 19:54         ` Frederic Weisbecker
  0 siblings, 0 replies; 8+ messages in thread
From: Frederic Weisbecker @ 2009-02-09 19:54 UTC (permalink / raw)
  To: Mandeep Singh Baines; +Cc: mingo, linux-kernel, rientjes, mbligh, thockin, akpm

On Mon, Feb 09, 2009 at 11:29:56AM -0800, Mandeep Singh Baines wrote:
> Frederic Weisbecker (fweisbec@gmail.com) wrote:
> > BTW, here is a small fixlet on top of your patch about what I commented concerning
> > the tasks than weren't yet scheduled once:
> > 
> > --
> > From e7120e424b031978e482b5fe311d90916ffb8b7e Mon Sep 17 00:00:00 2001
> > From: Frederic Weisbecker <fweisbec@gmail.com>
> > Date: Sat, 7 Feb 2009 17:45:12 +0100
> > Subject: [PATCH] softlockup: ensure the task has been scheduled once
> > 
> > When we check if the task has been scheduled since the last scan, we might
> > have a race condition if the task has been inserted on the task list but not
> > yet scheduled once. So we just add a small check to ensure it has been switched
> > in at least one time to avoid false positive.
> > 
> > Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> > ---
> >  kernel/hung_task.c |    6 +++++-
> >  1 files changed, 5 insertions(+), 1 deletions(-)
> > 
> > diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> > index 4a10756..7f57a71 100644
> > --- a/kernel/hung_task.c
> > +++ b/kernel/hung_task.c
> > @@ -72,7 +72,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
> >  {
> >  	unsigned long switch_count = t->nvcsw + t->nivcsw;
> >  
> > -	if (t->flags & PF_FROZEN)
> > +	/*
> > +	 * Ensure the task is not frozen and that it has been scheduled
> > +	 * at least once.
> > +	 */
> > +	if (t->flags & PF_FROZEN || !switch_count)
> >  		return;
> >  
> >  	if (switch_count != t->last_switch_count) {
> > -- 
> > 1.6.1
> > 
> 
> Good catch! I would change the description though. The race is a little
> more subtle than your current description. For a newly forked process, the race
> can occur if check_hung_task() processes the task in the time between the task
> changing its state to UNINTERRUPTIBLE and the the scheduler updating the
> switch_count (nivcsw or nvcsw).
> 
> One minor change to the  code comment. You want to ensure that the task has
> context switched at least once, NOT that the task has been scheduled at least
> once.

Ooh ok. I thought that the first schedule updated nivcsw/nvcsw, that's why
I cancelled my comment.

Ok so I will resend the patch with the appropriate comment.


> 
> If you re-send the patch with the fixed comment and description, I'll
> ack it.

Thanks! :-)


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2009-02-09 19:55 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-06 23:37 [PATCH] softlockup: remove timestamp checking from hung_task Mandeep Singh Baines
2009-02-07 16:23 ` Frederic Weisbecker
2009-02-07 16:34   ` Frederic Weisbecker
2009-02-07 16:51     ` Frederic Weisbecker
2009-02-07 17:02       ` Frederic Weisbecker
2009-02-09 19:29       ` Mandeep Singh Baines
2009-02-09 19:54         ` Frederic Weisbecker
2009-02-09 10:04 ` Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.