LKML Archive on lore.kernel.org
 help / Atom feed
* [PATCH] kernel/hung_task.c: disable on suspend
@ 2018-09-13 16:08 Vitaly Kuznetsov
  2018-09-14 11:26 ` Rafael J. Wysocki
  0 siblings, 1 reply; 10+ messages in thread
From: Vitaly Kuznetsov @ 2018-09-13 16:08 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-pm, Rafael J. Wysocki, Andrew Morton, Dmitry Vyukov,
	Paul E. McKenney, Oleg Nesterov

It is possible to observe hung_task complaints when system goes to
suspend-to-idle state:

 PM: Syncing filesystems ... done.
 Freezing user space processes ... (elapsed 0.001 seconds) done.
 OOM killer disabled.
 Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done.
 sd 0:0:0:0: [sda] Synchronizing SCSI cache
 INFO: task bash:1569 blocked for more than 120 seconds.
       Not tainted 4.19.0-rc3_+ #687
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 bash            D    0  1569    604 0x00000000
 Call Trace:
  ? __schedule+0x1fe/0x7e0
  schedule+0x28/0x80
  suspend_devices_and_enter+0x4ac/0x750
  pm_suspend+0x2c0/0x310

Register a PM notifier to disable the detector on suspend and re-enable
back on wakeup.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
Changes since RFC:
- Add PM_HIBERNATION_PREPARE/PM_POST_HIBERNATION for consistency
  [Rafael J. Wysocki]
---
 kernel/hung_task.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index b9132d1269ef..41955c5d8427 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -15,6 +15,7 @@
 #include <linux/lockdep.h>
 #include <linux/export.h>
 #include <linux/sysctl.h>
+#include <linux/suspend.h>
 #include <linux/utsname.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
@@ -242,6 +243,26 @@ void reset_hung_task_detector(void)
 }
 EXPORT_SYMBOL_GPL(reset_hung_task_detector);
 
+static bool hung_detector_suspended;
+
+static int hungtask_pm_notify(struct notifier_block *self,
+			      unsigned long action, void *hcpu)
+{
+	switch (action) {
+	case PM_SUSPEND_PREPARE:
+	case PM_HIBERNATION_PREPARE:
+		hung_detector_suspended = true;
+		break;
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+		hung_detector_suspended = false;
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
 /*
  * kthread which checks for tasks stuck in D state
  */
@@ -261,7 +282,8 @@ static int watchdog(void *dummy)
 		interval = min_t(unsigned long, interval, timeout);
 		t = hung_timeout_jiffies(hung_last_checked, interval);
 		if (t <= 0) {
-			if (!atomic_xchg(&reset_hung_task, 0))
+			if (!atomic_xchg(&reset_hung_task, 0) &&
+			    !hung_detector_suspended)
 				check_hung_uninterruptible_tasks(timeout);
 			hung_last_checked = jiffies;
 			continue;
@@ -275,6 +297,10 @@ static int watchdog(void *dummy)
 static int __init hung_task_init(void)
 {
 	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+
+	/* Disable hung task detector on suspend */
+	pm_notifier(hungtask_pm_notify, 0);
+
 	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
 
 	return 0;
-- 
2.14.4


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] kernel/hung_task.c: disable on suspend
  2018-09-13 16:08 [PATCH] kernel/hung_task.c: disable on suspend Vitaly Kuznetsov
@ 2018-09-14 11:26 ` Rafael J. Wysocki
  2018-09-14 14:19   ` Vitaly Kuznetsov
  0 siblings, 1 reply; 10+ messages in thread
From: Rafael J. Wysocki @ 2018-09-14 11:26 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: linux-kernel, linux-pm, Andrew Morton, Dmitry Vyukov,
	Paul E. McKenney, Oleg Nesterov

On Thursday, September 13, 2018 6:08:51 PM CEST Vitaly Kuznetsov wrote:
> It is possible to observe hung_task complaints when system goes to
> suspend-to-idle state:
> 
>  PM: Syncing filesystems ... done.
>  Freezing user space processes ... (elapsed 0.001 seconds) done.
>  OOM killer disabled.
>  Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done.
>  sd 0:0:0:0: [sda] Synchronizing SCSI cache
>  INFO: task bash:1569 blocked for more than 120 seconds.
>        Not tainted 4.19.0-rc3_+ #687
>  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
>  bash            D    0  1569    604 0x00000000
>  Call Trace:
>   ? __schedule+0x1fe/0x7e0
>   schedule+0x28/0x80
>   suspend_devices_and_enter+0x4ac/0x750
>   pm_suspend+0x2c0/0x310
> 
> Register a PM notifier to disable the detector on suspend and re-enable
> back on wakeup.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
> Changes since RFC:
> - Add PM_HIBERNATION_PREPARE/PM_POST_HIBERNATION for consistency
>   [Rafael J. Wysocki]
> ---
>  kernel/hung_task.c | 28 +++++++++++++++++++++++++++-
>  1 file changed, 27 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index b9132d1269ef..41955c5d8427 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -15,6 +15,7 @@
>  #include <linux/lockdep.h>
>  #include <linux/export.h>
>  #include <linux/sysctl.h>
> +#include <linux/suspend.h>
>  #include <linux/utsname.h>
>  #include <linux/sched/signal.h>
>  #include <linux/sched/debug.h>
> @@ -242,6 +243,26 @@ void reset_hung_task_detector(void)
>  }
>  EXPORT_SYMBOL_GPL(reset_hung_task_detector);
>  
> +static bool hung_detector_suspended;
> +
> +static int hungtask_pm_notify(struct notifier_block *self,
> +			      unsigned long action, void *hcpu)
> +{
> +	switch (action) {
> +	case PM_SUSPEND_PREPARE:
> +	case PM_HIBERNATION_PREPARE:
> +		hung_detector_suspended = true;
> +		break;
> +	case PM_POST_SUSPEND:
> +	case PM_POST_HIBERNATION:
> +		hung_detector_suspended = false;
> +		break;
> +	default:
> +		break;
> +	}
> +	return NOTIFY_OK;
> +}
> +
>  /*
>   * kthread which checks for tasks stuck in D state
>   */
> @@ -261,7 +282,8 @@ static int watchdog(void *dummy)
>  		interval = min_t(unsigned long, interval, timeout);
>  		t = hung_timeout_jiffies(hung_last_checked, interval);

Since you are adding the notifier anyway, what about designing it to make
the thread wait on _PREPARE until the notifier kicks it again on exit
fron suspend/hibernation?

>  		if (t <= 0) {
> -			if (!atomic_xchg(&reset_hung_task, 0))
> +			if (!atomic_xchg(&reset_hung_task, 0) &&
> +			    !hung_detector_suspended)
>  				check_hung_uninterruptible_tasks(timeout);
>  			hung_last_checked = jiffies;
>  			continue;
> @@ -275,6 +297,10 @@ static int watchdog(void *dummy)
>  static int __init hung_task_init(void)
>  {
>  	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
> +
> +	/* Disable hung task detector on suspend */
> +	pm_notifier(hungtask_pm_notify, 0);
> +
>  	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
>  
>  	return 0;
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] kernel/hung_task.c: disable on suspend
  2018-09-14 11:26 ` Rafael J. Wysocki
@ 2018-09-14 14:19   ` Vitaly Kuznetsov
  2018-09-14 16:21     ` Oleg Nesterov
  0 siblings, 1 reply; 10+ messages in thread
From: Vitaly Kuznetsov @ 2018-09-14 14:19 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: linux-kernel, linux-pm, Andrew Morton, Dmitry Vyukov,
	Paul E. McKenney, Oleg Nesterov

"Rafael J. Wysocki" <rjw@rjwysocki.net> writes:

> On Thursday, September 13, 2018 6:08:51 PM CEST Vitaly Kuznetsov wrote:
...

>> +static int hungtask_pm_notify(struct notifier_block *self,
>> +			      unsigned long action, void *hcpu)
>> +{
>> +	switch (action) {
>> +	case PM_SUSPEND_PREPARE:
>> +	case PM_HIBERNATION_PREPARE:
>> +		hung_detector_suspended = true;
>> +		break;
>> +	case PM_POST_SUSPEND:
>> +	case PM_POST_HIBERNATION:
>> +		hung_detector_suspended = false;
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +	return NOTIFY_OK;
>> +}
>> +
>>  /*
>>   * kthread which checks for tasks stuck in D state
>>   */
>> @@ -261,7 +282,8 @@ static int watchdog(void *dummy)
>>  		interval = min_t(unsigned long, interval, timeout);
>>  		t = hung_timeout_jiffies(hung_last_checked, interval);
>
> Since you are adding the notifier anyway, what about designing it to make
> the thread wait on _PREPARE until the notifier kicks it again on exit
> fron suspend/hibernation?
>

We can either park the kthread (kthread_park/unpark) or make it go away
completely by doing kthread_stop/kthread_run(). I'm leaning toward
parking, what do you think?

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] kernel/hung_task.c: disable on suspend
  2018-09-14 14:19   ` Vitaly Kuznetsov
@ 2018-09-14 16:21     ` Oleg Nesterov
  2018-09-17  8:25       ` Rafael J. Wysocki
  0 siblings, 1 reply; 10+ messages in thread
From: Oleg Nesterov @ 2018-09-14 16:21 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Rafael J. Wysocki, linux-kernel, linux-pm, Andrew Morton,
	Dmitry Vyukov, Paul E. McKenney

On 09/14, Vitaly Kuznetsov wrote:
>
> "Rafael J. Wysocki" <rjw@rjwysocki.net> writes:
> 
> > On Thursday, September 13, 2018 6:08:51 PM CEST Vitaly Kuznetsov wrote:
> ...
> 
> >> +static int hungtask_pm_notify(struct notifier_block *self,
> >> +			      unsigned long action, void *hcpu)
> >> +{
> >> +	switch (action) {
> >> +	case PM_SUSPEND_PREPARE:
> >> +	case PM_HIBERNATION_PREPARE:
> >> +		hung_detector_suspended = true;
> >> +		break;
> >> +	case PM_POST_SUSPEND:
> >> +	case PM_POST_HIBERNATION:
> >> +		hung_detector_suspended = false;
> >> +		break;
> >> +	default:
> >> +		break;
> >> +	}
> >> +	return NOTIFY_OK;
> >> +}
> >> +
> >>  /*
> >>   * kthread which checks for tasks stuck in D state
> >>   */
> >> @@ -261,7 +282,8 @@ static int watchdog(void *dummy)
> >>  		interval = min_t(unsigned long, interval, timeout);
> >>  		t = hung_timeout_jiffies(hung_last_checked, interval);
> >
> > Since you are adding the notifier anyway, what about designing it to make
> > the thread wait on _PREPARE until the notifier kicks it again on exit
> > fron suspend/hibernation?

Well. I agree that freezable kthreads are not nice, but it seems you are
going to add another questionable interface ;)

Vitaly, could you please update the changelog to explain in details whats
going on?

Where does the caller of pm_suspend() sleep in D state? Why it sleeps more
than 120 seconds?

And. given that it takes system_transition_mutex anyway, can't it use
lock_system_sleep() which marks the caller as PF_FREEZER_SKIP (checked
in check_hung_task()) ?

I have to admit I got lost...

> We can either park the kthread (kthread_park/unpark)

No, no, please don't. Nobody outside of smpboot.c should use this (and
this interface should be reworked). Yes, there are already abused, but
please don't add new users.

Oleg.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] kernel/hung_task.c: disable on suspend
  2018-09-14 16:21     ` Oleg Nesterov
@ 2018-09-17  8:25       ` Rafael J. Wysocki
  2018-09-17 16:55         ` Oleg Nesterov
  0 siblings, 1 reply; 10+ messages in thread
From: Rafael J. Wysocki @ 2018-09-17  8:25 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Vitaly Kuznetsov, Rafael J. Wysocki, Linux Kernel Mailing List,
	Linux PM, Andrew Morton, Dmitry Vyukov, Paul McKenney

On Fri, Sep 14, 2018 at 6:21 PM Oleg Nesterov <oleg@redhat.com> wrote:
>
> On 09/14, Vitaly Kuznetsov wrote:
> >
> > "Rafael J. Wysocki" <rjw@rjwysocki.net> writes:
> >
> > > On Thursday, September 13, 2018 6:08:51 PM CEST Vitaly Kuznetsov wrote:
> > ...
> >
> > >> +static int hungtask_pm_notify(struct notifier_block *self,
> > >> +                        unsigned long action, void *hcpu)
> > >> +{
> > >> +  switch (action) {
> > >> +  case PM_SUSPEND_PREPARE:
> > >> +  case PM_HIBERNATION_PREPARE:
> > >> +          hung_detector_suspended = true;
> > >> +          break;
> > >> +  case PM_POST_SUSPEND:
> > >> +  case PM_POST_HIBERNATION:
> > >> +          hung_detector_suspended = false;
> > >> +          break;
> > >> +  default:
> > >> +          break;
> > >> +  }
> > >> +  return NOTIFY_OK;
> > >> +}
> > >> +
> > >>  /*
> > >>   * kthread which checks for tasks stuck in D state
> > >>   */
> > >> @@ -261,7 +282,8 @@ static int watchdog(void *dummy)
> > >>            interval = min_t(unsigned long, interval, timeout);
> > >>            t = hung_timeout_jiffies(hung_last_checked, interval);
> > >
> > > Since you are adding the notifier anyway, what about designing it to make
> > > the thread wait on _PREPARE until the notifier kicks it again on exit
> > > fron suspend/hibernation?
>
> Well. I agree that freezable kthreads are not nice, but it seems you are
> going to add another questionable interface ;)

Why would it be questionable?

The watchdog needs to be disarmed somehow before tasks are frozen and
re-armed after they have been thawed or it may report false-positives
on the way out.  PM notifiers can be used for that.

Or do you mean that the synchronization between it and the freezer
that's already there should be sufficient?

> Vitaly, could you please update the changelog to explain in details whats
> going on?
>
> Where does the caller of pm_suspend() sleep in D state? Why it sleeps more
> than 120 seconds?

It need not be sleeping for over 2 minutes, but if suspend-to-idle
advances the clock sufficiently, the watchdog will regard that as the
task sleep time.

> And. given that it takes system_transition_mutex anyway, can't it use
> lock_system_sleep() which marks the caller as PF_FREEZER_SKIP (checked
> in check_hung_task()) ?

Well, it could, but that would be somewhat confusing and slightly
abusing the flag IMO.

Also, if the watchdog is stopped before the task freezing kicks in and
restarted after they have been all thawed, it will not have to
synchronize with the freezer any more I suppose?

Cheers,
Rafael

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] kernel/hung_task.c: disable on suspend
  2018-09-17  8:25       ` Rafael J. Wysocki
@ 2018-09-17 16:55         ` Oleg Nesterov
  2018-09-17 21:09           ` Rafael J. Wysocki
  2018-09-21 13:11           ` Vitaly Kuznetsov
  0 siblings, 2 replies; 10+ messages in thread
From: Oleg Nesterov @ 2018-09-17 16:55 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Vitaly Kuznetsov, Rafael J. Wysocki, Linux Kernel Mailing List,
	Linux PM, Andrew Morton, Dmitry Vyukov, Paul McKenney

On 09/17, Rafael J. Wysocki wrote:
>
> On Fri, Sep 14, 2018 at 6:21 PM Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > > > Since you are adding the notifier anyway, what about designing it to make
> > > > the thread wait on _PREPARE until the notifier kicks it again on exit
> > > > fron suspend/hibernation?
> >
> > Well. I agree that freezable kthreads are not nice, but it seems you are
> > going to add another questionable interface ;)
>
> Why would it be questionable?
>
> The watchdog needs to be disarmed somehow before tasks are frozen and
> re-armed after they have been thawed or it may report false-positives
> on the way out.  PM notifiers can be used for that.

Or watchdog() can simply use set_freezable/freezing interface we already
have, without additional complications.

Yes, this is not "before tasks are frozen", but probably should work?

OK, I won't argue.

> > Where does the caller of pm_suspend() sleep in D state? Why it sleeps more
> > than 120 seconds?
>
> It need not be sleeping for over 2 minutes, but if suspend-to-idle
> advances the clock sufficiently, the watchdog will regard that as the
> task sleep time.

As I already said, I don't understand this magic, so you can ignore me.

But again, it would be nice to explain this in the changelog, I mean, how
exactly (and why) jiffies can grow for over 2 minutes in this case.

> > And. given that it takes system_transition_mutex anyway, can't it use
> > lock_system_sleep() which marks the caller as PF_FREEZER_SKIP (checked
> > in check_hung_task()) ?
>
> Well, it could, but that would be somewhat confusing and slightly
> abusing the flag IMO.

OK, I won't insist.

Oleg.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] kernel/hung_task.c: disable on suspend
  2018-09-17 16:55         ` Oleg Nesterov
@ 2018-09-17 21:09           ` Rafael J. Wysocki
  2018-09-21 13:11           ` Vitaly Kuznetsov
  1 sibling, 0 replies; 10+ messages in thread
From: Rafael J. Wysocki @ 2018-09-17 21:09 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Rafael J. Wysocki, Vitaly Kuznetsov, Rafael J. Wysocki,
	Linux Kernel Mailing List, Linux PM, Andrew Morton,
	Dmitry Vyukov, Paul McKenney

On Mon, Sep 17, 2018 at 6:55 PM Oleg Nesterov <oleg@redhat.com> wrote:
>
> On 09/17, Rafael J. Wysocki wrote:
> >
> > On Fri, Sep 14, 2018 at 6:21 PM Oleg Nesterov <oleg@redhat.com> wrote:
> > >
> > > > > Since you are adding the notifier anyway, what about designing it to make
> > > > > the thread wait on _PREPARE until the notifier kicks it again on exit
> > > > > fron suspend/hibernation?
> > >
> > > Well. I agree that freezable kthreads are not nice, but it seems you are
> > > going to add another questionable interface ;)
> >
> > Why would it be questionable?
> >
> > The watchdog needs to be disarmed somehow before tasks are frozen and
> > re-armed after they have been thawed or it may report false-positives
> > on the way out.  PM notifiers can be used for that.
>
> Or watchdog() can simply use set_freezable/freezing interface we already
> have, without additional complications.
>
> Yes, this is not "before tasks are frozen", but probably should work?

Well, not really.

It is a kernel thread and therefore it is frozen after all user space
and thawed before it.

> OK, I won't argue.
>
> > > Where does the caller of pm_suspend() sleep in D state? Why it sleeps more
> > > than 120 seconds?
> >
> > It need not be sleeping for over 2 minutes, but if suspend-to-idle
> > advances the clock sufficiently, the watchdog will regard that as the
> > task sleep time.
>
> As I already said, I don't understand this magic, so you can ignore me.

Suspend-to-RAM suspends timekeeping (among other things) on the way to
system-wide suspend and resumes it on the way back to the working
state.  The time between those two events is not added to the
monotonic clock and jiffies is not updated while timekeeping is
suspended. As a result, the new jiffies value doesn't include the time
when the system is in the sleep state.  In that case the 2 minutes
interval is more than enough to cover the two system transitions (into
system-wide suspend and back) and the sleep time doesn't count.

Suspend-to-idle, OTOH, only suspends timekeeping when the last CPU
goes idle and resumes it when the first CPU is woken up.  That may
take place for multiple times in a row while the system is regarded as
suspended, due to spurious wakeups.  While the time when timekeeping
is suspended still doesn't count (the monotonic clock is not advanced
and jiffies is not updated then), the time when at least one CPU is
not idle counts.  Hence, if the system is in suspend-to-idle for a
sufficiently long time and there are sufficiently many spurious
wakeups during that period, the monotonic clock and jiffies may be
advanced by over 2 minutes while the system is regarded as suspended.

> But again, it would be nice to explain this in the changelog, I mean, how
> exactly (and why) jiffies can grow for over 2 minutes in this case.

Agreed, the changelog should explain that.

> > > And. given that it takes system_transition_mutex anyway, can't it use
> > > lock_system_sleep() which marks the caller as PF_FREEZER_SKIP (checked
> > > in check_hung_task()) ?
> >
> > Well, it could, but that would be somewhat confusing and slightly
> > abusing the flag IMO.
>
> OK, I won't insist.

OK :-)

Cheers,
Rafael

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] kernel/hung_task.c: disable on suspend
  2018-09-17 16:55         ` Oleg Nesterov
  2018-09-17 21:09           ` Rafael J. Wysocki
@ 2018-09-21 13:11           ` Vitaly Kuznetsov
  2018-09-21 13:15             ` Rafael J. Wysocki
  1 sibling, 1 reply; 10+ messages in thread
From: Vitaly Kuznetsov @ 2018-09-21 13:11 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Rafael J. Wysocki, Rafael J. Wysocki, Linux Kernel Mailing List,
	Linux PM, Andrew Morton, Dmitry Vyukov, Paul McKenney

Oleg Nesterov <oleg@redhat.com> writes:

> On 09/17, Rafael J. Wysocki wrote:
>>
>> On Fri, Sep 14, 2018 at 6:21 PM Oleg Nesterov <oleg@redhat.com> wrote:
>> >
>> > > > Since you are adding the notifier anyway, what about designing it to make
>> > > > the thread wait on _PREPARE until the notifier kicks it again on exit
>> > > > fron suspend/hibernation?
>> >
>> > Well. I agree that freezable kthreads are not nice, but it seems you are
>> > going to add another questionable interface ;)
>>
>> Why would it be questionable?
>>
>> The watchdog needs to be disarmed somehow before tasks are frozen and
>> re-armed after they have been thawed or it may report false-positives
>> on the way out.  PM notifiers can be used for that.
>
> Or watchdog() can simply use set_freezable/freezing interface we already
> have, without additional complications.
>
> Yes, this is not "before tasks are frozen", but probably should work?
>
> OK, I won't argue.

I was hoping you and Rafael will come to an agreement but the discussion
just died ... so where do we stand on this? I see the following options:

1) The v1 patch is good, no freezing/disabling/parking required.
2) Make the kthread freezable (btw, I tested your patch and it seems to
work).
3) kthread_stop/kthread_run() (as you said 'no parking').
4) Drop the patch and wait for the root cause (increasing jiffies) to
dissolve.
5) ???

Ideas?

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] kernel/hung_task.c: disable on suspend
  2018-09-21 13:11           ` Vitaly Kuznetsov
@ 2018-09-21 13:15             ` Rafael J. Wysocki
  2018-09-21 15:18               ` Vitaly Kuznetsov
  0 siblings, 1 reply; 10+ messages in thread
From: Rafael J. Wysocki @ 2018-09-21 13:15 UTC (permalink / raw)
  To: Vitaly Kuznetsov
  Cc: Oleg Nesterov, Rafael J. Wysocki, Rafael J. Wysocki,
	Linux Kernel Mailing List, Linux PM, Andrew Morton,
	Dmitry Vyukov, Paul McKenney

On Fri, Sep 21, 2018 at 3:11 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
> Oleg Nesterov <oleg@redhat.com> writes:
>
> > On 09/17, Rafael J. Wysocki wrote:
> >>
> >> On Fri, Sep 14, 2018 at 6:21 PM Oleg Nesterov <oleg@redhat.com> wrote:
> >> >
> >> > > > Since you are adding the notifier anyway, what about designing it to make
> >> > > > the thread wait on _PREPARE until the notifier kicks it again on exit
> >> > > > fron suspend/hibernation?
> >> >
> >> > Well. I agree that freezable kthreads are not nice, but it seems you are
> >> > going to add another questionable interface ;)
> >>
> >> Why would it be questionable?
> >>
> >> The watchdog needs to be disarmed somehow before tasks are frozen and
> >> re-armed after they have been thawed or it may report false-positives
> >> on the way out.  PM notifiers can be used for that.
> >
> > Or watchdog() can simply use set_freezable/freezing interface we already
> > have, without additional complications.
> >
> > Yes, this is not "before tasks are frozen", but probably should work?
> >
> > OK, I won't argue.
>
> I was hoping you and Rafael will come to an agreement but the discussion
> just died ... so where do we stand on this? I see the following options:
>
> 1) The v1 patch is good, no freezing/disabling/parking required.

This would work IMO, but it also is somewhat wasteful to run this
thread when "suspended", because it doesn't do anything then.

Maybe you could simply force "timeout" to be zero for the "suspension"
time?  That should effectively make the thread sleep then, right?

Thanks,
Rafael

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] kernel/hung_task.c: disable on suspend
  2018-09-21 13:15             ` Rafael J. Wysocki
@ 2018-09-21 15:18               ` Vitaly Kuznetsov
  0 siblings, 0 replies; 10+ messages in thread
From: Vitaly Kuznetsov @ 2018-09-21 15:18 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Oleg Nesterov, Rafael J. Wysocki, Linux Kernel Mailing List,
	Linux PM, Andrew Morton, Dmitry Vyukov, Paul McKenney

"Rafael J. Wysocki" <rafael@kernel.org> writes:

> On Fri, Sep 21, 2018 at 3:11 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>>
>> Oleg Nesterov <oleg@redhat.com> writes:
>>
>> > On 09/17, Rafael J. Wysocki wrote:
>> >>
>> >> On Fri, Sep 14, 2018 at 6:21 PM Oleg Nesterov <oleg@redhat.com> wrote:
>> >> >
>> >> > > > Since you are adding the notifier anyway, what about designing it to make
>> >> > > > the thread wait on _PREPARE until the notifier kicks it again on exit
>> >> > > > fron suspend/hibernation?
>> >> >
>> >> > Well. I agree that freezable kthreads are not nice, but it seems you are
>> >> > going to add another questionable interface ;)
>> >>
>> >> Why would it be questionable?
>> >>
>> >> The watchdog needs to be disarmed somehow before tasks are frozen and
>> >> re-armed after they have been thawed or it may report false-positives
>> >> on the way out.  PM notifiers can be used for that.
>> >
>> > Or watchdog() can simply use set_freezable/freezing interface we already
>> > have, without additional complications.
>> >
>> > Yes, this is not "before tasks are frozen", but probably should work?
>> >
>> > OK, I won't argue.
>>
>> I was hoping you and Rafael will come to an agreement but the discussion
>> just died ... so where do we stand on this? I see the following options:
>>
>> 1) The v1 patch is good, no freezing/disabling/parking required.
>
> This would work IMO, but it also is somewhat wasteful to run this
> thread when "suspended", because it doesn't do anything then.
>
> Maybe you could simply force "timeout" to be zero for the "suspension"
> time?  That should effectively make the thread sleep then, right?
>

Right, we'll have to zero both interval and timeout (if we want to keep
the current logic picking the minimum of two). The following patch seems
to be working for me, I'll send it out as v2 if there're no objections:

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index b9132d1269ef..ac6e8c9306bd 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -15,6 +15,7 @@
 #include <linux/lockdep.h>
 #include <linux/export.h>
 #include <linux/sysctl.h>
+#include <linux/suspend.h>
 #include <linux/utsname.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
@@ -242,12 +243,14 @@ void reset_hung_task_detector(void)
 }
 EXPORT_SYMBOL_GPL(reset_hung_task_detector);
 
+static unsigned long hung_last_checked;
+
 /*
  * kthread which checks for tasks stuck in D state
  */
 static int watchdog(void *dummy)
 {
-	unsigned long hung_last_checked = jiffies;
+	hung_last_checked = jiffies;
 
 	set_user_nice(current, 0);
 
@@ -272,9 +275,40 @@ static int watchdog(void *dummy)
 	return 0;
 }
 
+static int hungtask_pm_notify(struct notifier_block *self,
+			      unsigned long action, void *hcpu)
+{
+	static unsigned long saved_timeout, saved_interval;
+
+	switch (action) {
+	case PM_SUSPEND_PREPARE:
+	case PM_HIBERNATION_PREPARE:
+		saved_timeout = sysctl_hung_task_timeout_secs;
+		saved_interval = sysctl_hung_task_check_interval_secs;
+		sysctl_hung_task_timeout_secs = 0;
+		sysctl_hung_task_check_interval_secs = 0;
+		wake_up_process(watchdog_task);
+		break;
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+		sysctl_hung_task_timeout_secs = saved_timeout;
+		sysctl_hung_task_check_interval_secs = saved_interval;
+		hung_last_checked = jiffies;
+		wake_up_process(watchdog_task);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
 static int __init hung_task_init(void)
 {
 	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+
+	/* Disable hung task detector on suspend */
+	pm_notifier(hungtask_pm_notify, 0);
+
 	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
 
 	return 0;

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, back to index

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-13 16:08 [PATCH] kernel/hung_task.c: disable on suspend Vitaly Kuznetsov
2018-09-14 11:26 ` Rafael J. Wysocki
2018-09-14 14:19   ` Vitaly Kuznetsov
2018-09-14 16:21     ` Oleg Nesterov
2018-09-17  8:25       ` Rafael J. Wysocki
2018-09-17 16:55         ` Oleg Nesterov
2018-09-17 21:09           ` Rafael J. Wysocki
2018-09-21 13:11           ` Vitaly Kuznetsov
2018-09-21 13:15             ` Rafael J. Wysocki
2018-09-21 15:18               ` Vitaly Kuznetsov

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org linux-kernel@archiver.kernel.org
	public-inbox-index lkml


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/ public-inbox