All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] hungtask: add filter kthread/check comm
@ 2021-05-21 13:25 chenguanyou
  2021-05-21 16:37 ` Randy Dunlap
  0 siblings, 1 reply; 11+ messages in thread
From: chenguanyou @ 2021-05-21 13:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: akpm, keescook, mhocko, lukas.bulwahn, vbabka, gpiccoli, chenguanyou

Some kernel threads are always in D state, when we enable hung_task,
it will misjudge, we should skip these to narrow the scope.

exp mtk mobilephone:
root            420   420      2       0      0 kwdt_thread         0 D wdtk-0
root            421   421      2       0      0 kwdt_thread         0 D wdtk-1
root            422   422      2       0      0 kwdt_thread         0 D wdtk-2
root            423   423      2       0      0 kwdt_thread         0 D wdtk-3
root            424   424      2       0      0 kwdt_thread         0 D wdtk-4
root            425   425      2       0      0 kwdt_thread         0 D wdtk-5
root            426   426      2       0      0 kwdt_thread         0 D wdtk-6
root            427   427      2       0      0 kwdt_thread         0 D wdtk-7
root            435   435      2       0      0 mtk_lpm_monitor_thread 0 D LPM-0
root            436   436      2       0      0 mtk_lpm_monitor_thread 0 D LPM-1
root            437   437      2       0      0 mtk_lpm_monitor_thread 0 D LPM-2
root            438   438      2       0      0 mtk_lpm_monitor_thread 0 D LPM-3
root            439   439      2       0      0 mtk_lpm_monitor_thread 0 D LPM-4
root            440   440      2       0      0 mtk_lpm_monitor_thread 0 D LPM-5
root            441   441      2       0      0 mtk_lpm_monitor_thread 0 D LPM-6
root            442   442      2       0      0 mtk_lpm_monitor_thread 0 D LPM-7

Signed-off-by: chenguanyou <chenguanyou@xiaomi.com>
---
 include/linux/sched/sysctl.h |  4 ++++
 kernel/hung_task.c           | 17 +++++++++++++++++
 kernel/sysctl.c              | 16 ++++++++++++++++
 lib/Kconfig.debug            | 24 ++++++++++++++++++++++++
 4 files changed, 61 insertions(+)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 3c31ba88aca5..e8a9a28215bf 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -14,11 +14,15 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace;
 #define sysctl_hung_task_all_cpu_backtrace 0
 #endif /* CONFIG_SMP */
 
+#define TASK_COMM_LEN 16
+
 extern int	     sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_check_interval_secs;
 extern int sysctl_hung_task_warnings;
+extern unsigned int sysctl_hung_task_filter_kthread;
+extern char sysctl_hung_task_check_comm[TASK_COMM_LEN];
 int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
 #else
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 396ebaebea3f..e018563d4882 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -23,6 +23,7 @@
 #include <linux/sched/sysctl.h>
 
 #include <trace/events/sched.h>
+#include <linux/string.h>
 
 /*
  * The number of tasks checked:
@@ -48,6 +49,16 @@ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_
  */
 unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
 
+/*
+ * Non zero means no checking kthread
+ */
+unsigned int __read_mostly sysctl_hung_task_filter_kthread = CONFIG_DEFAULT_HUNG_TASK_FILTER_KTHREAD;
+
+/*
+ * Only one
+ */
+char __read_mostly sysctl_hung_task_check_comm[TASK_COMM_LEN] = CONFIG_DEFAULT_HUNG_TASK_CHECK_COMM;
+
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
@@ -88,6 +99,12 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
 	unsigned long switch_count = t->nvcsw + t->nivcsw;
 
+	if (unlikely(strlen(sysctl_hung_task_check_comm) && !strstr(t->comm, sysctl_hung_task_check_comm)))
+		return;
+
+	if (unlikely(sysctl_hung_task_filter_kthread && t->flags & PF_KTHREAD))
+		return;
+
 	/*
 	 * Ensure the task is not frozen.
 	 * Also, skip vfork and any other user process that freezer should skip.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62fbd09b5dc1..1daede87c88d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2519,6 +2519,22 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &neg_one,
 	},
+	{
+		.procname	= "hung_task_filter_kthread",
+		.data		= &sysctl_hung_task_filter_kthread,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "hung_task_check_comm",
+		.data		= &sysctl_hung_task_check_comm,
+		.maxlen		= TASK_COMM_LEN,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
 #endif
 #ifdef CONFIG_RT_MUTEXES
 	{
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2c7f46b366f1..6eab8cf0c37f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1101,6 +1101,30 @@ config DEFAULT_HUNG_TASK_TIMEOUT
 	  A timeout of 0 disables the check.  The default is two minutes.
 	  Keeping the default should be fine in most cases.
 
+config DEFAULT_HUNG_TASK_FILTER_KTHREAD
+	int "Default filter kthread for hung task"
+	depends on DETECT_HUNG_TASK
+	range 0 1
+	default 0
+	help
+	  This option controls filter kthread used to determine when
+	  a kernel task has become "state=TASK_UNINTERRUPTIBLE" and should be skip.
+
+	  It can be adjusted at runtime via the kernel.hung_task_filter_kthread
+	  sysctl or by writing a value to
+	  /proc/sys/kernel/hung_task_filter_kthread.
+
+	  A filter of 1 disables the check
+
+config DEFAULT_HUNG_TASK_CHECK_COMM
+	string "Default check only one comm"
+	depends on DETECT_HUNG_TASK
+	default ""
+	help
+	  It can be adjusted at runtime via the kernel.hung_task_check_comm
+	  sysctl or by writing a value to
+	  /proc/sys/kernel/hung_task_check_comm.
+
 config BOOTPARAM_HUNG_TASK_PANIC
 	bool "Panic (Reboot) On Hung Tasks"
 	depends on DETECT_HUNG_TASK
-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] hungtask: add filter kthread/check comm
  2021-05-21 13:25 [PATCH] hungtask: add filter kthread/check comm chenguanyou
@ 2021-05-21 16:37 ` Randy Dunlap
  0 siblings, 0 replies; 11+ messages in thread
From: Randy Dunlap @ 2021-05-21 16:37 UTC (permalink / raw)
  To: chenguanyou, linux-kernel
  Cc: akpm, keescook, mhocko, lukas.bulwahn, vbabka, gpiccoli, chenguanyou

On 5/21/21 6:25 AM, chenguanyou wrote:
> Some kernel threads are always in D state, when we enable hung_task,
> it will misjudge, we should skip these to narrow the scope.
> 
> exp mtk mobilephone:
> root            420   420      2       0      0 kwdt_thread         0 D wdtk-0
> root            421   421      2       0      0 kwdt_thread         0 D wdtk-1
> root            422   422      2       0      0 kwdt_thread         0 D wdtk-2
> root            423   423      2       0      0 kwdt_thread         0 D wdtk-3
> root            424   424      2       0      0 kwdt_thread         0 D wdtk-4
> root            425   425      2       0      0 kwdt_thread         0 D wdtk-5
> root            426   426      2       0      0 kwdt_thread         0 D wdtk-6
> root            427   427      2       0      0 kwdt_thread         0 D wdtk-7
> root            435   435      2       0      0 mtk_lpm_monitor_thread 0 D LPM-0
> root            436   436      2       0      0 mtk_lpm_monitor_thread 0 D LPM-1
> root            437   437      2       0      0 mtk_lpm_monitor_thread 0 D LPM-2
> root            438   438      2       0      0 mtk_lpm_monitor_thread 0 D LPM-3
> root            439   439      2       0      0 mtk_lpm_monitor_thread 0 D LPM-4
> root            440   440      2       0      0 mtk_lpm_monitor_thread 0 D LPM-5
> root            441   441      2       0      0 mtk_lpm_monitor_thread 0 D LPM-6
> root            442   442      2       0      0 mtk_lpm_monitor_thread 0 D LPM-7
> 
> Signed-off-by: chenguanyou <chenguanyou@xiaomi.com>
> ---
>  include/linux/sched/sysctl.h |  4 ++++
>  kernel/hung_task.c           | 17 +++++++++++++++++
>  kernel/sysctl.c              | 16 ++++++++++++++++
>  lib/Kconfig.debug            | 24 ++++++++++++++++++++++++
>  4 files changed, 61 insertions(+)
> 
> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
> index 3c31ba88aca5..e8a9a28215bf 100644
> --- a/include/linux/sched/sysctl.h
> +++ b/include/linux/sched/sysctl.h
> @@ -14,11 +14,15 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace;
>  #define sysctl_hung_task_all_cpu_backtrace 0
>  #endif /* CONFIG_SMP */
>  
> +#define TASK_COMM_LEN 16
> +
>  extern int	     sysctl_hung_task_check_count;
>  extern unsigned int  sysctl_hung_task_panic;
>  extern unsigned long sysctl_hung_task_timeout_secs;
>  extern unsigned long sysctl_hung_task_check_interval_secs;
>  extern int sysctl_hung_task_warnings;
> +extern unsigned int sysctl_hung_task_filter_kthread;
> +extern char sysctl_hung_task_check_comm[TASK_COMM_LEN];
>  int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
>  		void *buffer, size_t *lenp, loff_t *ppos);
>  #else
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 396ebaebea3f..e018563d4882 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -23,6 +23,7 @@
>  #include <linux/sched/sysctl.h>
>  
>  #include <trace/events/sched.h>
> +#include <linux/string.h>
>  
>  /*
>   * The number of tasks checked:
> @@ -48,6 +49,16 @@ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_
>   */
>  unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
>  
> +/*
> + * Non zero means no checking kthread

      Non-zero

(also mentioned in v1 review, whereas this is v2, I guess, although
it doesn't say that it is v2, but it should say that.)

> + */
> +unsigned int __read_mostly sysctl_hung_task_filter_kthread = CONFIG_DEFAULT_HUNG_TASK_FILTER_KTHREAD;
> +
> +/*
> + * Only one
> + */
> +char __read_mostly sysctl_hung_task_check_comm[TASK_COMM_LEN] = CONFIG_DEFAULT_HUNG_TASK_CHECK_COMM;
> +
>  int __read_mostly sysctl_hung_task_warnings = 10;
>  
>  static int __read_mostly did_panic;
> @@ -88,6 +99,12 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
>  {
>  	unsigned long switch_count = t->nvcsw + t->nivcsw;
>  
> +	if (unlikely(strlen(sysctl_hung_task_check_comm) && !strstr(t->comm, sysctl_hung_task_check_comm)))
> +		return;
> +
> +	if (unlikely(sysctl_hung_task_filter_kthread && t->flags & PF_KTHREAD))
> +		return;
> +
>  	/*
>  	 * Ensure the task is not frozen.
>  	 * Also, skip vfork and any other user process that freezer should skip.
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 62fbd09b5dc1..1daede87c88d 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -2519,6 +2519,22 @@ static struct ctl_table kern_table[] = {
>  		.proc_handler	= proc_dointvec_minmax,
>  		.extra1		= &neg_one,
>  	},
> +	{
> +		.procname	= "hung_task_filter_kthread",
> +		.data		= &sysctl_hung_task_filter_kthread,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler   = proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "hung_task_check_comm",
> +		.data		= &sysctl_hung_task_check_comm,
> +		.maxlen		= TASK_COMM_LEN,
> +		.mode		= 0644,
> +		.proc_handler	= proc_dostring,
> +	},

(copy-paste from v1 review:)

These new sysctls should be documented in Documentation/admin-guide/sysctl/kernel.rst.


>  #endif
>  #ifdef CONFIG_RT_MUTEXES
>  	{
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 2c7f46b366f1..6eab8cf0c37f 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -1101,6 +1101,30 @@ config DEFAULT_HUNG_TASK_TIMEOUT
>  	  A timeout of 0 disables the check.  The default is two minutes.
>  	  Keeping the default should be fine in most cases.
>  
> +config DEFAULT_HUNG_TASK_FILTER_KTHREAD
> +	int "Default filter kthread for hung task"
> +	depends on DETECT_HUNG_TASK
> +	range 0 1
> +	default 0
> +	help
> +	  This option controls filter kthread used to determine when

(again:)
	                                      uses

> +	  a kernel task has become "state=TASK_UNINTERRUPTIBLE" and should be skip.

(again:)
	                                                                      skipped.

> +
> +	  It can be adjusted at runtime via the kernel.hung_task_filter_kthread
> +	  sysctl or by writing a value to
> +	  /proc/sys/kernel/hung_task_filter_kthread.
> +
> +	  A filter of 1 disables the check

(again:)
	                             check.

> +
> +config DEFAULT_HUNG_TASK_CHECK_COMM
> +	string "Default check only one comm"
> +	depends on DETECT_HUNG_TASK
> +	default ""
> +	help
> +	  It can be adjusted at runtime via the kernel.hung_task_check_comm
> +	  sysctl or by writing a value to
> +	  /proc/sys/kernel/hung_task_check_comm.
> +

(again:)
That help text doesn't tell how the Kconfig symbol is used.

>  config BOOTPARAM_HUNG_TASK_PANIC
>  	bool "Panic (Reboot) On Hung Tasks"
>  	depends on DETECT_HUNG_TASK
> 


-- 
~Randy

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] hungtask: add filter kthread/check comm
       [not found]   ` <CAHS3RMWjuB98TzvcYyQ0qtNYOxDeM7W1YmyqDYs=H-cn-VsVdw@mail.gmail.com>
@ 2021-05-25  7:26     ` Michal Hocko
  0 siblings, 0 replies; 11+ messages in thread
From: Michal Hocko @ 2021-05-25  7:26 UTC (permalink / raw)
  To: 陈冠有
  Cc: Andrew Morton, linux-kernel, Kees Cook, lukas.bulwahn, vbabka,
	gpiccoli, chenguanyou

On Tue 25-05-21 11:27:16, 陈冠有 wrote:
[...]
> These are MTK's LPM designs.

No idea what those are but it seems like an out of tree code.

> If we have linux-api, hungtask can choose to skip kthread, helpful for us
> to debug user space threads in "state=D" when enable hungtask panic.

No, this approach is wrong. You are trying to workaround an incorrect
out of tree code by creating a user visible API. I have already brought
that up in earlier version of this patch.

Nacked-by: Michal Hocko <mhocko@suse.com>

Andrew has already given you a lead on how to fix the said code. Use
TASK_IDLE when waiting for an event. This will both hide that waiting
task from load average accounting and also from the hung task detector.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] hungtask: add filter kthread/check comm
  2021-05-21 13:25 chenguanyou
@ 2021-05-22 21:51 ` Andrew Morton
       [not found]   ` <CAHS3RMWjuB98TzvcYyQ0qtNYOxDeM7W1YmyqDYs=H-cn-VsVdw@mail.gmail.com>
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2021-05-22 21:51 UTC (permalink / raw)
  To: chenguanyou
  Cc: linux-kernel, keescook, mhocko, lukas.bulwahn, vbabka, gpiccoli,
	chenguanyou

On Fri, 21 May 2021 21:25:44 +0800 chenguanyou <chenguanyou9338@gmail.com> wrote:

> Some kernel threads are always in D state, when we enable hung_task,
> it will misjudge, we should skip these to narrow the scope.
> 
> exp mtk mobilephone:
> root            435   435      2       0      0 mtk_lpm_monitor_thread 0 D LPM-0
> root            436   436      2       0      0 mtk_lpm_monitor_thread 0 D LPM-1
> root            437   437      2       0      0 mtk_lpm_monitor_thread 0 D LPM-2
> root            438   438      2       0      0 mtk_lpm_monitor_thread 0 D LPM-3
> root            439   439      2       0      0 mtk_lpm_monitor_thread 0 D LPM-4
> root            440   440      2       0      0 mtk_lpm_monitor_thread 0 D LPM-5
> root            441   441      2       0      0 mtk_lpm_monitor_thread 0 D LPM-6
> root            442   442      2       0      0 mtk_lpm_monitor_thread 0 D LPM-7

Maybe convert these threads to use TASK_IDLE?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] hungtask: add filter kthread/check comm
@ 2021-05-21 13:25 chenguanyou
  2021-05-22 21:51 ` Andrew Morton
  0 siblings, 1 reply; 11+ messages in thread
From: chenguanyou @ 2021-05-21 13:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: akpm, keescook, mhocko, lukas.bulwahn, vbabka, gpiccoli, chenguanyou

Some kernel threads are always in D state, when we enable hung_task,
it will misjudge, we should skip these to narrow the scope.

exp mtk mobilephone:
root            435   435      2       0      0 mtk_lpm_monitor_thread 0 D LPM-0
root            436   436      2       0      0 mtk_lpm_monitor_thread 0 D LPM-1
root            437   437      2       0      0 mtk_lpm_monitor_thread 0 D LPM-2
root            438   438      2       0      0 mtk_lpm_monitor_thread 0 D LPM-3
root            439   439      2       0      0 mtk_lpm_monitor_thread 0 D LPM-4
root            440   440      2       0      0 mtk_lpm_monitor_thread 0 D LPM-5
root            441   441      2       0      0 mtk_lpm_monitor_thread 0 D LPM-6
root            442   442      2       0      0 mtk_lpm_monitor_thread 0 D LPM-7

Signed-off-by: chenguanyou <chenguanyou@xiaomi.com>
---
 Documentation/admin-guide/sysctl/kernel.rst | 16 ++++++++++++
 include/linux/sched/sysctl.h                |  4 +++
 kernel/hung_task.c                          | 17 +++++++++++++
 kernel/sysctl.c                             | 16 ++++++++++++
 lib/Kconfig.debug                           | 27 +++++++++++++++++++++
 5 files changed, 80 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 1d56a6b73a4e..081ca22db4d5 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -405,6 +405,22 @@ This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled.
 
 -1: report an infinite number of warnings.
 
+hung_task_filter_kthread
+========================
+
+We should skip kthread when a hung task is detected.
+This file shows up if ``CONFIG_DEFAULT_HUNG_TASK_FILTER_KTHREAD`` is enabled.
+
+= =========================================================
+0 Not skip detect kthread.
+1 Skip detect kthread.
+= =========================================================
+
+hung_task_check_comm
+====================
+
+We should skip non ``hung_task_check_comm`` when a hung task is detected.
+This file shows up if ``CONFIG_DEFAULT_HUNG_TASK_CHECK_COMM`` is enabled.
 
 hyperv_record_panic_msg
 =======================
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 3c31ba88aca5..e8a9a28215bf 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -14,11 +14,15 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace;
 #define sysctl_hung_task_all_cpu_backtrace 0
 #endif /* CONFIG_SMP */
 
+#define TASK_COMM_LEN 16
+
 extern int	     sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_check_interval_secs;
 extern int sysctl_hung_task_warnings;
+extern unsigned int sysctl_hung_task_filter_kthread;
+extern char sysctl_hung_task_check_comm[TASK_COMM_LEN];
 int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
 #else
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 396ebaebea3f..baee8466b902 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -23,6 +23,7 @@
 #include <linux/sched/sysctl.h>
 
 #include <trace/events/sched.h>
+#include <linux/string.h>
 
 /*
  * The number of tasks checked:
@@ -48,6 +49,16 @@ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_
  */
 unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
 
+/*
+ * Non-zero means no checking kthread
+ */
+unsigned int __read_mostly sysctl_hung_task_filter_kthread = CONFIG_DEFAULT_HUNG_TASK_FILTER_KTHREAD;
+
+/*
+ * Only one
+ */
+char __read_mostly sysctl_hung_task_check_comm[TASK_COMM_LEN] = CONFIG_DEFAULT_HUNG_TASK_CHECK_COMM;
+
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
@@ -88,6 +99,12 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
 	unsigned long switch_count = t->nvcsw + t->nivcsw;
 
+	if (unlikely(strlen(sysctl_hung_task_check_comm) && !strstr(t->comm, sysctl_hung_task_check_comm)))
+		return;
+
+	if (unlikely(sysctl_hung_task_filter_kthread && t->flags & PF_KTHREAD))
+		return;
+
 	/*
 	 * Ensure the task is not frozen.
 	 * Also, skip vfork and any other user process that freezer should skip.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62fbd09b5dc1..1daede87c88d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2519,6 +2519,22 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &neg_one,
 	},
+	{
+		.procname	= "hung_task_filter_kthread",
+		.data		= &sysctl_hung_task_filter_kthread,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "hung_task_check_comm",
+		.data		= &sysctl_hung_task_check_comm,
+		.maxlen		= TASK_COMM_LEN,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
 #endif
 #ifdef CONFIG_RT_MUTEXES
 	{
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2c7f46b366f1..63570b1fec35 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1101,6 +1101,33 @@ config DEFAULT_HUNG_TASK_TIMEOUT
 	  A timeout of 0 disables the check.  The default is two minutes.
 	  Keeping the default should be fine in most cases.
 
+config DEFAULT_HUNG_TASK_FILTER_KTHREAD
+	int "Default filter kthread for hung task"
+	depends on DETECT_HUNG_TASK
+	range 0 1
+	default 0
+	help
+	  This option controls filter kthread uses to determine when
+	  a kernel task has become "state=TASK_UNINTERRUPTIBLE" and should be skipped.
+
+	  It can be adjusted at runtime via the kernel.hung_task_filter_kthread
+	  sysctl or by writing a value to
+	  /proc/sys/kernel/hung_task_filter_kthread.
+
+	  A filter of 1 disables the check.
+
+config DEFAULT_HUNG_TASK_CHECK_COMM
+	string "Default check only one comm"
+	depends on DETECT_HUNG_TASK
+	default ""
+	help
+	  This option controls only detect "task.comm = kernel.hung_task_check_comm" when
+	  it become "state=TASK_UNINTERRUPTIBLE", skip other threads.
+
+	  It can be adjusted at runtime via the kernel.hung_task_check_comm
+	  sysctl or by writing a value to
+	  /proc/sys/kernel/hung_task_check_comm.
+
 config BOOTPARAM_HUNG_TASK_PANIC
 	bool "Panic (Reboot) On Hung Tasks"
 	depends on DETECT_HUNG_TASK
-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] hungtask: add filter kthread/check comm
       [not found]   ` <CAHS3RMVYbHTzD6JnOmE331qSbcnvuYnBe0jraNuuLc0Z2NnStg@mail.gmail.com>
@ 2021-05-21  8:31     ` Michal Hocko
  0 siblings, 0 replies; 11+ messages in thread
From: Michal Hocko @ 2021-05-21  8:31 UTC (permalink / raw)
  To: 陈冠有
  Cc: Randy Dunlap, linux-kernel, akpm, Kees Cook, lukas.bulwahn,
	vbabka, gpiccoli, chenguanyou

On Fri 21-05-21 14:38:06, 陈冠有 wrote:
> when we no skip kthread, exp log:
> 3,25164,832483138,-; (1)[67:khungtaskd]INFO: task amms_task:51 blocked for
> more than 120 seconds.
> 3,25165,832483169,-; (1)[67:khungtaskd]      Tainted: P S      W  O
>  4.14.186-g9d5f2ff-dirty #4
> 3,25166,832483186,-; (1)[67:khungtaskd]"echo 0 >
> /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> 6,25167,832483205,-; (1)[67:khungtaskd]amms_task       D    0    51      2
> 0x00000000
> 4,25168,832483232,-; (1)[67:khungtaskd]Call trace:
> 4,25169,832483268,-; (1)[67:khungtaskd] __switch_to+0x134/0x150
> 4,25170,832483297,-; (1)[67:khungtaskd] __schedule+0xd5c/0x1100
> 4,25171,832483318,-; (1)[67:khungtaskd] schedule+0x70/0x90
> 4,25172,832483343,-; (1)[67:khungtaskd] kthread+0xfc/0x18c
> 4,25173,832483365,-; (1)[67:khungtaskd] ret_from_fork+0x10/0x18
> 3,25174,832483482,-; (1)[67:khungtaskd]INFO: task mdrt_thread:123 blocked
> for more than 120 seconds.
> 3,25175,832483501,-; (1)[67:khungtaskd]      Tainted: P S      W  O
>  4.14.186-g9d5f2ff-dirty #4
> 3,25176,832483516,-; (1)[67:khungtaskd]"echo 0 >
> /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> 6,25177,832483533,-; (1)[67:khungtaskd]mdrt_thread     D    0   123      2
> 0x00000000
> 4,25178,832483555,-; (1)[67:khungtaskd]Call trace:
> 4,25179,832483574,-; (1)[67:khungtaskd] __switch_to+0x134/0x150
> 4,25180,832483595,-; (1)[67:khungtaskd] __schedule+0xd5c/0x1100
> 4,25181,832483615,-; (1)[67:khungtaskd] schedule+0x70/0x90
> 4,25182,832483635,-; (1)[67:khungtaskd] kthread+0xfc/0x18c
> 4,25183,832483655,-; (1)[67:khungtaskd] ret_from_fork+0x10/0x18
> 3,25184,832483737,-; (1)[67:khungtaskd]INFO: task scp_power_reset:227
> blocked for more than 120 seconds.

What are all these kernel threads doing and why it is ok to inhibit them
in the report.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] hungtask: add filter kthread/check comm
  2021-05-19 12:13 chenguanyou
  2021-05-19 13:06 ` Michal Hocko
@ 2021-05-19 17:53 ` Randy Dunlap
       [not found]   ` <CAHS3RMVYbHTzD6JnOmE331qSbcnvuYnBe0jraNuuLc0Z2NnStg@mail.gmail.com>
  1 sibling, 1 reply; 11+ messages in thread
From: Randy Dunlap @ 2021-05-19 17:53 UTC (permalink / raw)
  To: chenguanyou, linux-kernel
  Cc: akpm, keescook, mhocko, lukas.bulwahn, vbabka, gpiccoli, chenguanyou

Hi,

Along with what Micahl has said, please see below.

On 5/19/21 5:13 AM, chenguanyou wrote:
> Some kernel threads are always in D state, when we enable hung_task,
> it will misjudge, we should skip these to narrow the scope.
> 
> exp mtk mobilephone:
> root            420   420      2       0      0 kwdt_thread         0 D wdtk-0
> root            421   421      2       0      0 kwdt_thread         0 D wdtk-1
> root            422   422      2       0      0 kwdt_thread         0 D wdtk-2
> root            423   423      2       0      0 kwdt_thread         0 D wdtk-3
> root            424   424      2       0      0 kwdt_thread         0 D wdtk-4
> root            425   425      2       0      0 kwdt_thread         0 D wdtk-5
> root            426   426      2       0      0 kwdt_thread         0 D wdtk-6
> root            427   427      2       0      0 kwdt_thread         0 D wdtk-7
> root            435   435      2       0      0 mtk_lpm_monitor_thread 0 D LPM-0
> root            436   436      2       0      0 mtk_lpm_monitor_thread 0 D LPM-1
> root            437   437      2       0      0 mtk_lpm_monitor_thread 0 D LPM-2
> root            438   438      2       0      0 mtk_lpm_monitor_thread 0 D LPM-3
> root            439   439      2       0      0 mtk_lpm_monitor_thread 0 D LPM-4
> root            440   440      2       0      0 mtk_lpm_monitor_thread 0 D LPM-5
> root            441   441      2       0      0 mtk_lpm_monitor_thread 0 D LPM-6
> root            442   442      2       0      0 mtk_lpm_monitor_thread 0 D LPM-7
> 
> Signed-off-by: chenguanyou <chenguanyou@xiaomi.com>
> ---
>  include/linux/sched/sysctl.h |  4 ++++
>  kernel/hung_task.c           | 17 +++++++++++++++++
>  kernel/sysctl.c              | 15 +++++++++++++++
>  lib/Kconfig.debug            | 23 +++++++++++++++++++++++
>  4 files changed, 59 insertions(+)
> 

> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 396ebaebea3f..e018563d4882 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -23,6 +23,7 @@
>  #include <linux/sched/sysctl.h>
>  
>  #include <trace/events/sched.h>
> +#include <linux/string.h>
>  
>  /*
>   * The number of tasks checked:
> @@ -48,6 +49,16 @@ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_
>   */
>  unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
>  
> +/*
> + * Non zero means no checking kthread

      Non-zero

> + */
> +unsigned int __read_mostly sysctl_hung_task_filter_kthread = CONFIG_DEFAULT_HUNG_TASK_FILTER_KTHREAD;
> +
> +/*
> + * Only one
> + */
> +char __read_mostly sysctl_hung_task_check_comm[TASK_COMM_LEN] = CONFIG_DEFAULT_HUNG_TASK_CHECK_COMM;
> +
>  int __read_mostly sysctl_hung_task_warnings = 10;
>  
>  static int __read_mostly did_panic;
> @@ -88,6 +99,12 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
>  {
>  	unsigned long switch_count = t->nvcsw + t->nivcsw;
>  
> +	if (unlikely(strlen(sysctl_hung_task_check_comm) && !strstr(t->comm, sysctl_hung_task_check_comm)))
> +		return;
> +
> +	if (unlikely(sysctl_hung_task_filter_kthread && t->flags & PF_KTHREAD))
> +		return;
> +
>  	/*
>  	 * Ensure the task is not frozen.
>  	 * Also, skip vfork and any other user process that freezer should skip.
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 62fbd09b5dc1..157c47a8430a 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -2519,6 +2519,21 @@ static struct ctl_table kern_table[] = {
>  		.proc_handler	= proc_dointvec_minmax,
>  		.extra1		= &neg_one,
>  	},
> +	{
> +		.procname	= "hung_task_filter_kthread",
> +		.data		= &sysctl_hung_task_filter_kthread,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "hung_task_check_comm",
> +		.data		= &sysctl_hung_task_check_comm,
> +		.maxlen		= TASK_COMM_LEN,
> +		.mode		= 0644,
> +		.proc_handler	= proc_dostring,
> +	},
>  #endif

These new sysctls should be documented in Documentation/admin-guide/sysctl/kernel.rst.

>  #ifdef CONFIG_RT_MUTEXES
>  	{
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 2c7f46b366f1..59cfa9e230ee 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -1101,6 +1101,29 @@ config DEFAULT_HUNG_TASK_TIMEOUT
>  	  A timeout of 0 disables the check.  The default is two minutes.
>  	  Keeping the default should be fine in most cases.
>  
> +config DEFAULT_HUNG_TASK_FILTER_KTHREAD
> +	bool "Default filter kthread for hung task"
> +	depends on DETECT_HUNG_TASK
> +	default 0
> +	help
> +	  This option controls filter kthread used to determine when

	                                      uses

> +	  a kernel task has become "state=TASK_UNINTERRUPTIBLE" and should be skip.

	                                                                      skipped.

> +
> +	  It can be adjusted at runtime via the kernel.hung_task_filter_kthread
> +	  sysctl or by writing a value to
> +	  /proc/sys/kernel/hung_task_filter_kthread.
> +
> +	  A filter of 1 disables the check

	                             check.

> +
> +config DEFAULT_HUNG_TASK_CHECK_COMM
> +	string "Default check only one comm"
> +	depends on DETECT_HUNG_TASK
> +	default ""
> +	help
> +	  It can be adjusted at runtime via the kernel.hung_task_check_comm
> +	  sysctl or by writing a value to
> +	  /proc/sys/kernel/hung_task_check_comm.

That help text doesn't tell how the Kconfig symbol is used.

> +
>  config BOOTPARAM_HUNG_TASK_PANIC
>  	bool "Panic (Reboot) On Hung Tasks"
>  	depends on DETECT_HUNG_TASK
> 


thanks.
-- 
~Randy


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] hungtask: add filter kthread/check comm
  2021-05-19 12:13 chenguanyou
@ 2021-05-19 13:06 ` Michal Hocko
  2021-05-19 17:53 ` Randy Dunlap
  1 sibling, 0 replies; 11+ messages in thread
From: Michal Hocko @ 2021-05-19 13:06 UTC (permalink / raw)
  To: chenguanyou
  Cc: linux-kernel, akpm, keescook, lukas.bulwahn, vbabka, gpiccoli,
	chenguanyou

On Wed 19-05-21 20:13:25, chenguanyou wrote:
> Some kernel threads are always in D state, when we enable hung_task,
> it will misjudge, we should skip these to narrow the scope.

Why they are in the D state in the first place?

Also you are proposing a new user interface which would need to be
maintained for ever. This means that any such a proposal has to be very
carefully justified. Always make sure to cc linux-api mailing list in
these cases.

From the (not much) information you have provided this interface seems
more like a workaround for buggy code so far.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] hungtask: add filter kthread/check comm
@ 2021-05-19 12:13 chenguanyou
  2021-05-19 13:06 ` Michal Hocko
  2021-05-19 17:53 ` Randy Dunlap
  0 siblings, 2 replies; 11+ messages in thread
From: chenguanyou @ 2021-05-19 12:13 UTC (permalink / raw)
  To: linux-kernel
  Cc: akpm, keescook, mhocko, lukas.bulwahn, vbabka, gpiccoli, chenguanyou

Some kernel threads are always in D state, when we enable hung_task,
it will misjudge, we should skip these to narrow the scope.

exp mtk mobilephone:
root            420   420      2       0      0 kwdt_thread         0 D wdtk-0
root            421   421      2       0      0 kwdt_thread         0 D wdtk-1
root            422   422      2       0      0 kwdt_thread         0 D wdtk-2
root            423   423      2       0      0 kwdt_thread         0 D wdtk-3
root            424   424      2       0      0 kwdt_thread         0 D wdtk-4
root            425   425      2       0      0 kwdt_thread         0 D wdtk-5
root            426   426      2       0      0 kwdt_thread         0 D wdtk-6
root            427   427      2       0      0 kwdt_thread         0 D wdtk-7
root            435   435      2       0      0 mtk_lpm_monitor_thread 0 D LPM-0
root            436   436      2       0      0 mtk_lpm_monitor_thread 0 D LPM-1
root            437   437      2       0      0 mtk_lpm_monitor_thread 0 D LPM-2
root            438   438      2       0      0 mtk_lpm_monitor_thread 0 D LPM-3
root            439   439      2       0      0 mtk_lpm_monitor_thread 0 D LPM-4
root            440   440      2       0      0 mtk_lpm_monitor_thread 0 D LPM-5
root            441   441      2       0      0 mtk_lpm_monitor_thread 0 D LPM-6
root            442   442      2       0      0 mtk_lpm_monitor_thread 0 D LPM-7

Signed-off-by: chenguanyou <chenguanyou@xiaomi.com>
---
 include/linux/sched/sysctl.h |  4 ++++
 kernel/hung_task.c           | 17 +++++++++++++++++
 kernel/sysctl.c              | 15 +++++++++++++++
 lib/Kconfig.debug            | 23 +++++++++++++++++++++++
 4 files changed, 59 insertions(+)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 3c31ba88aca5..e8a9a28215bf 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -14,11 +14,15 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace;
 #define sysctl_hung_task_all_cpu_backtrace 0
 #endif /* CONFIG_SMP */
 
+#define TASK_COMM_LEN 16
+
 extern int	     sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_check_interval_secs;
 extern int sysctl_hung_task_warnings;
+extern unsigned int sysctl_hung_task_filter_kthread;
+extern char sysctl_hung_task_check_comm[TASK_COMM_LEN];
 int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
 #else
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 396ebaebea3f..e018563d4882 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -23,6 +23,7 @@
 #include <linux/sched/sysctl.h>
 
 #include <trace/events/sched.h>
+#include <linux/string.h>
 
 /*
  * The number of tasks checked:
@@ -48,6 +49,16 @@ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_
  */
 unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
 
+/*
+ * Non zero means no checking kthread
+ */
+unsigned int __read_mostly sysctl_hung_task_filter_kthread = CONFIG_DEFAULT_HUNG_TASK_FILTER_KTHREAD;
+
+/*
+ * Only one
+ */
+char __read_mostly sysctl_hung_task_check_comm[TASK_COMM_LEN] = CONFIG_DEFAULT_HUNG_TASK_CHECK_COMM;
+
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
@@ -88,6 +99,12 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
 	unsigned long switch_count = t->nvcsw + t->nivcsw;
 
+	if (unlikely(strlen(sysctl_hung_task_check_comm) && !strstr(t->comm, sysctl_hung_task_check_comm)))
+		return;
+
+	if (unlikely(sysctl_hung_task_filter_kthread && t->flags & PF_KTHREAD))
+		return;
+
 	/*
 	 * Ensure the task is not frozen.
 	 * Also, skip vfork and any other user process that freezer should skip.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62fbd09b5dc1..157c47a8430a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2519,6 +2519,21 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &neg_one,
 	},
+	{
+		.procname	= "hung_task_filter_kthread",
+		.data		= &sysctl_hung_task_filter_kthread,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "hung_task_check_comm",
+		.data		= &sysctl_hung_task_check_comm,
+		.maxlen		= TASK_COMM_LEN,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
 #endif
 #ifdef CONFIG_RT_MUTEXES
 	{
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2c7f46b366f1..59cfa9e230ee 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1101,6 +1101,29 @@ config DEFAULT_HUNG_TASK_TIMEOUT
 	  A timeout of 0 disables the check.  The default is two minutes.
 	  Keeping the default should be fine in most cases.
 
+config DEFAULT_HUNG_TASK_FILTER_KTHREAD
+	bool "Default filter kthread for hung task"
+	depends on DETECT_HUNG_TASK
+	default 0
+	help
+	  This option controls filter kthread used to determine when
+	  a kernel task has become "state=TASK_UNINTERRUPTIBLE" and should be skip.
+
+	  It can be adjusted at runtime via the kernel.hung_task_filter_kthread
+	  sysctl or by writing a value to
+	  /proc/sys/kernel/hung_task_filter_kthread.
+
+	  A filter of 1 disables the check
+
+config DEFAULT_HUNG_TASK_CHECK_COMM
+	string "Default check only one comm"
+	depends on DETECT_HUNG_TASK
+	default ""
+	help
+	  It can be adjusted at runtime via the kernel.hung_task_check_comm
+	  sysctl or by writing a value to
+	  /proc/sys/kernel/hung_task_check_comm.
+
 config BOOTPARAM_HUNG_TASK_PANIC
 	bool "Panic (Reboot) On Hung Tasks"
 	depends on DETECT_HUNG_TASK
-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] hungtask: add filter kthread/check comm
  2021-05-18 11:16 chenguanyou
@ 2021-05-19 11:28 ` Vlastimil Babka
  0 siblings, 0 replies; 11+ messages in thread
From: Vlastimil Babka @ 2021-05-19 11:28 UTC (permalink / raw)
  To: chenguanyou, linux-kernel
  Cc: akpm, keescook, mhocko, lukas.bulwahn, gpiccoli, chenguanyou

On 5/18/21 1:16 PM, chenguanyou wrote:

The description is missing.

> Signed-off-by: chenguanyou <chenguanyou@xiaomi.com>
> ---
>  include/linux/sched/sysctl.h |  4 ++++
>  kernel/hung_task.c           | 17 +++++++++++++++++
>  kernel/sysctl.c              | 15 +++++++++++++++
>  lib/Kconfig.debug            | 23 +++++++++++++++++++++++
>  4 files changed, 59 insertions(+)
> 
> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
> index 3c31ba88aca5..e8a9a28215bf 100644
> --- a/include/linux/sched/sysctl.h
> +++ b/include/linux/sched/sysctl.h
> @@ -14,11 +14,15 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace;
>  #define sysctl_hung_task_all_cpu_backtrace 0
>  #endif /* CONFIG_SMP */
>  
> +#define TASK_COMM_LEN 16
> +
>  extern int	     sysctl_hung_task_check_count;
>  extern unsigned int  sysctl_hung_task_panic;
>  extern unsigned long sysctl_hung_task_timeout_secs;
>  extern unsigned long sysctl_hung_task_check_interval_secs;
>  extern int sysctl_hung_task_warnings;
> +extern unsigned int sysctl_hung_task_filter_kthread;
> +extern char sysctl_hung_task_check_comm[TASK_COMM_LEN];
>  int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
>  		void *buffer, size_t *lenp, loff_t *ppos);
>  #else
> diff --git a/kernel/hung_task.c b/kernel/hung_task.c
> index 396ebaebea3f..e018563d4882 100644
> --- a/kernel/hung_task.c
> +++ b/kernel/hung_task.c
> @@ -23,6 +23,7 @@
>  #include <linux/sched/sysctl.h>
>  
>  #include <trace/events/sched.h>
> +#include <linux/string.h>
>  
>  /*
>   * The number of tasks checked:
> @@ -48,6 +49,16 @@ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_
>   */
>  unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
>  
> +/*
> + * Non zero means no checking kthread
> + */
> +unsigned int __read_mostly sysctl_hung_task_filter_kthread = CONFIG_DEFAULT_HUNG_TASK_FILTER_KTHREAD;
> +
> +/*
> + * Only one
> + */
> +char __read_mostly sysctl_hung_task_check_comm[TASK_COMM_LEN] = CONFIG_DEFAULT_HUNG_TASK_CHECK_COMM;
> +
>  int __read_mostly sysctl_hung_task_warnings = 10;
>  
>  static int __read_mostly did_panic;
> @@ -88,6 +99,12 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
>  {
>  	unsigned long switch_count = t->nvcsw + t->nivcsw;
>  
> +	if (unlikely(strlen(sysctl_hung_task_check_comm) && !strstr(t->comm, sysctl_hung_task_check_comm)))
> +		return;
> +
> +	if (unlikely(sysctl_hung_task_filter_kthread && t->flags & PF_KTHREAD))
> +		return;
> +
>  	/*
>  	 * Ensure the task is not frozen.
>  	 * Also, skip vfork and any other user process that freezer should skip.
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 62fbd09b5dc1..157c47a8430a 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -2519,6 +2519,21 @@ static struct ctl_table kern_table[] = {
>  		.proc_handler	= proc_dointvec_minmax,
>  		.extra1		= &neg_one,
>  	},
> +	{
> +		.procname	= "hung_task_filter_kthread",
> +		.data		= &sysctl_hung_task_filter_kthread,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "hung_task_check_comm",
> +		.data		= &sysctl_hung_task_check_comm,
> +		.maxlen		= TASK_COMM_LEN,
> +		.mode		= 0644,
> +		.proc_handler	= proc_dostring,
> +	},
>  #endif
>  #ifdef CONFIG_RT_MUTEXES
>  	{
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 2c7f46b366f1..59cfa9e230ee 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -1101,6 +1101,29 @@ config DEFAULT_HUNG_TASK_TIMEOUT
>  	  A timeout of 0 disables the check.  The default is two minutes.
>  	  Keeping the default should be fine in most cases.
>  
> +config DEFAULT_HUNG_TASK_FILTER_KTHREAD
> +	bool "Default filter kthread for hung task"
> +	depends on DETECT_HUNG_TASK
> +	default 0
> +	help
> +	  This option controls filter kthread used to determine when
> +	  a kernel task has become "state=TASK_UNINTERRUPTIBLE" and should be skip.
> +
> +	  It can be adjusted at runtime via the kernel.hung_task_filter_kthread
> +	  sysctl or by writing a value to
> +	  /proc/sys/kernel/hung_task_filter_kthread.
> +
> +	  A filter of 1 disables the check
> +
> +config DEFAULT_HUNG_TASK_CHECK_COMM
> +	string "Default check only one comm"
> +	depends on DETECT_HUNG_TASK
> +	default ""
> +	help
> +	  It can be adjusted at runtime via the kernel.hung_task_check_comm
> +	  sysctl or by writing a value to
> +	  /proc/sys/kernel/hung_task_check_comm.
> +
>  config BOOTPARAM_HUNG_TASK_PANIC
>  	bool "Panic (Reboot) On Hung Tasks"
>  	depends on DETECT_HUNG_TASK
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] hungtask: add filter kthread/check comm
@ 2021-05-18 11:16 chenguanyou
  2021-05-19 11:28 ` Vlastimil Babka
  0 siblings, 1 reply; 11+ messages in thread
From: chenguanyou @ 2021-05-18 11:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: akpm, keescook, mhocko, lukas.bulwahn, vbabka, gpiccoli, chenguanyou

Signed-off-by: chenguanyou <chenguanyou@xiaomi.com>
---
 include/linux/sched/sysctl.h |  4 ++++
 kernel/hung_task.c           | 17 +++++++++++++++++
 kernel/sysctl.c              | 15 +++++++++++++++
 lib/Kconfig.debug            | 23 +++++++++++++++++++++++
 4 files changed, 59 insertions(+)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 3c31ba88aca5..e8a9a28215bf 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -14,11 +14,15 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace;
 #define sysctl_hung_task_all_cpu_backtrace 0
 #endif /* CONFIG_SMP */
 
+#define TASK_COMM_LEN 16
+
 extern int	     sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_check_interval_secs;
 extern int sysctl_hung_task_warnings;
+extern unsigned int sysctl_hung_task_filter_kthread;
+extern char sysctl_hung_task_check_comm[TASK_COMM_LEN];
 int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
 #else
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 396ebaebea3f..e018563d4882 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -23,6 +23,7 @@
 #include <linux/sched/sysctl.h>
 
 #include <trace/events/sched.h>
+#include <linux/string.h>
 
 /*
  * The number of tasks checked:
@@ -48,6 +49,16 @@ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_
  */
 unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
 
+/*
+ * Non zero means no checking kthread
+ */
+unsigned int __read_mostly sysctl_hung_task_filter_kthread = CONFIG_DEFAULT_HUNG_TASK_FILTER_KTHREAD;
+
+/*
+ * Only one
+ */
+char __read_mostly sysctl_hung_task_check_comm[TASK_COMM_LEN] = CONFIG_DEFAULT_HUNG_TASK_CHECK_COMM;
+
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
@@ -88,6 +99,12 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
 	unsigned long switch_count = t->nvcsw + t->nivcsw;
 
+	if (unlikely(strlen(sysctl_hung_task_check_comm) && !strstr(t->comm, sysctl_hung_task_check_comm)))
+		return;
+
+	if (unlikely(sysctl_hung_task_filter_kthread && t->flags & PF_KTHREAD))
+		return;
+
 	/*
 	 * Ensure the task is not frozen.
 	 * Also, skip vfork and any other user process that freezer should skip.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62fbd09b5dc1..157c47a8430a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2519,6 +2519,21 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &neg_one,
 	},
+	{
+		.procname	= "hung_task_filter_kthread",
+		.data		= &sysctl_hung_task_filter_kthread,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "hung_task_check_comm",
+		.data		= &sysctl_hung_task_check_comm,
+		.maxlen		= TASK_COMM_LEN,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
 #endif
 #ifdef CONFIG_RT_MUTEXES
 	{
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2c7f46b366f1..59cfa9e230ee 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1101,6 +1101,29 @@ config DEFAULT_HUNG_TASK_TIMEOUT
 	  A timeout of 0 disables the check.  The default is two minutes.
 	  Keeping the default should be fine in most cases.
 
+config DEFAULT_HUNG_TASK_FILTER_KTHREAD
+	bool "Default filter kthread for hung task"
+	depends on DETECT_HUNG_TASK
+	default 0
+	help
+	  This option controls filter kthread used to determine when
+	  a kernel task has become "state=TASK_UNINTERRUPTIBLE" and should be skip.
+
+	  It can be adjusted at runtime via the kernel.hung_task_filter_kthread
+	  sysctl or by writing a value to
+	  /proc/sys/kernel/hung_task_filter_kthread.
+
+	  A filter of 1 disables the check
+
+config DEFAULT_HUNG_TASK_CHECK_COMM
+	string "Default check only one comm"
+	depends on DETECT_HUNG_TASK
+	default ""
+	help
+	  It can be adjusted at runtime via the kernel.hung_task_check_comm
+	  sysctl or by writing a value to
+	  /proc/sys/kernel/hung_task_check_comm.
+
 config BOOTPARAM_HUNG_TASK_PANIC
 	bool "Panic (Reboot) On Hung Tasks"
 	depends on DETECT_HUNG_TASK
-- 
2.17.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-05-25  7:26 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-21 13:25 [PATCH] hungtask: add filter kthread/check comm chenguanyou
2021-05-21 16:37 ` Randy Dunlap
  -- strict thread matches above, loose matches on Subject: below --
2021-05-21 13:25 chenguanyou
2021-05-22 21:51 ` Andrew Morton
     [not found]   ` <CAHS3RMWjuB98TzvcYyQ0qtNYOxDeM7W1YmyqDYs=H-cn-VsVdw@mail.gmail.com>
2021-05-25  7:26     ` Michal Hocko
2021-05-19 12:13 chenguanyou
2021-05-19 13:06 ` Michal Hocko
2021-05-19 17:53 ` Randy Dunlap
     [not found]   ` <CAHS3RMVYbHTzD6JnOmE331qSbcnvuYnBe0jraNuuLc0Z2NnStg@mail.gmail.com>
2021-05-21  8:31     ` Michal Hocko
2021-05-18 11:16 chenguanyou
2021-05-19 11:28 ` Vlastimil Babka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.