All of lore.kernel.org
 help / color / mirror / Atom feed
From: yang che <chey84736@gmail.com>
To: mcgrof@kernel.org, keescook@chromium.org, yzaikin@google.com
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	yang che <chey84736@gmail.com>
Subject: [PATCH v2] hung_task:add detecting task in D state milliseconds timeout
Date: Sun,  5 Jul 2020 20:48:52 +0800	[thread overview]
Message-ID: <1593953332-29404-1-git-send-email-chey84736@gmail.com> (raw)

current hung_task_check_interval_secs and hung_task_timeout_secs
only supports seconds. In some cases,the TASK_UNINTERRUPTIBLE state
takes less than 1 second,may need to hung task trigger panic
get ramdump or print all cpu task.

modify hung_task_check_interval_secs to hung_task_check_interval_millisecs,
check interval use milliseconds. Add hung_task_timeout_millisecs file to
set milliseconds.
task timeout = hung_task_timeout_secs * 1000 + hung_task_timeout_millisecs.
(timeout * HZ / 1000) calculate how many are generated jiffies
in timeout milliseconds.

Signed-off-by: yang che <chey84736@gmail.com>
---

v1->v2:
 add hung_task_check_interval_millisecs,hung_task_timeout_millisecs.
 fix writing to the millisecond file silently overrides the setting in
 the seconds file.

 [1]https://lore.kernel.org/lkml/CAN_w4MWMfoDGfpON-bYHrU=KuJG2vpFj01ZbN4r-iwM4AyyuGw@mail.gmail.com

 include/linux/sched/sysctl.h |  3 ++-
 kernel/hung_task.c           | 25 ++++++++++++++++++-------
 kernel/sysctl.c              | 12 ++++++++++--
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 660ac49..179c331 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -16,8 +16,9 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace;
 
 extern int	     sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
+extern unsigned long  sysctl_hung_task_timeout_millisecs;
 extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
+extern unsigned long sysctl_hung_task_check_interval_millisecs;
 extern int sysctl_hung_task_warnings;
 int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 		void *buffer, size_t *lenp, loff_t *ppos);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index ce76f49..809c999 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -37,6 +37,7 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
  * the RCU grace period. So it needs to be upper-bound.
  */
 #define HUNG_TASK_LOCK_BREAK (HZ / 10)
+#define SECONDS 1000
 
 /*
  * Zero means infinite timeout - no checking done:
@@ -44,9 +45,14 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
 unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
 
 /*
+ * Zero means only use sysctl_hung_task_timeout_secs
+ */
+unsigned long  __read_mostly sysctl_hung_task_timeout_millisecs;
+
+/*
  * Zero (default value) means use sysctl_hung_task_timeout_secs:
  */
-unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
+unsigned long __read_mostly sysctl_hung_task_check_interval_millisecs;
 
 int __read_mostly sysctl_hung_task_warnings = 10;
 
@@ -108,7 +114,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 		t->last_switch_time = jiffies;
 		return;
 	}
-	if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+
+	if (time_is_after_jiffies(t->last_switch_time + (timeout * HZ) / SECONDS))
 		return;
 
 	trace_sched_process_hang(t);
@@ -126,13 +133,16 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 	if (sysctl_hung_task_warnings) {
 		if (sysctl_hung_task_warnings > 0)
 			sysctl_hung_task_warnings--;
-		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
-		       t->comm, t->pid, (jiffies - t->last_switch_time) / HZ);
+
+		pr_err("INFO: task %s:%d blocked for more than %ld seconds %ld milliseconds.\n",
+			t->comm, t->pid, (jiffies - t->last_switch_time) / HZ,
+			(jiffies - t->last_switch_time) % HZ * (SECONDS / HZ));
 		pr_err("      %s %s %.*s\n",
 			print_tainted(), init_utsname()->release,
 			(int)strcspn(init_utsname()->version, " "),
 			init_utsname()->version);
 		pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+			"\"echo 0 > /proc/sys/kernel/hung_task_timeout_millisecs\""
 			" disables this message.\n");
 		sched_show_task(t);
 		hung_task_show_lock = true;
@@ -217,7 +227,7 @@ static long hung_timeout_jiffies(unsigned long last_checked,
 				 unsigned long timeout)
 {
 	/* timeout of 0 will disable the watchdog */
-	return timeout ? last_checked - jiffies + timeout * HZ :
+	return timeout ? last_checked - jiffies + (timeout * HZ) / SECONDS :
 		MAX_SCHEDULE_TIMEOUT;
 }
 
@@ -281,8 +291,9 @@ static int watchdog(void *dummy)
 	set_user_nice(current, 0);
 
 	for ( ; ; ) {
-		unsigned long timeout = sysctl_hung_task_timeout_secs;
-		unsigned long interval = sysctl_hung_task_check_interval_secs;
+		unsigned long timeout = sysctl_hung_task_timeout_secs * SECONDS +
+					sysctl_hung_task_timeout_millisecs;
+		unsigned long interval = sysctl_hung_task_check_interval_millisecs;
 		long t;
 
 		if (interval == 0)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index db1ce7a..8f7ac33 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2476,6 +2476,14 @@ static struct ctl_table kern_table[] = {
 		.extra1		= SYSCTL_ZERO,
 	},
 	{
+		.procname       = "hung_task_timeout_millisecs",
+		.data           = &sysctl_hung_task_timeout_millisecs,
+		.maxlen         = sizeof(unsigned long),
+		.mode           = 0644,
+		.proc_handler   = proc_dohung_task_timeout_secs,
+		.extra2         = &hung_task_timeout_max,
+	},
+	{
 		.procname	= "hung_task_timeout_secs",
 		.data		= &sysctl_hung_task_timeout_secs,
 		.maxlen		= sizeof(unsigned long),
@@ -2484,8 +2492,8 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &hung_task_timeout_max,
 	},
 	{
-		.procname	= "hung_task_check_interval_secs",
-		.data		= &sysctl_hung_task_check_interval_secs,
+		.procname	= "hung_task_check_interval_millisecs",
+		.data		= &sysctl_hung_task_check_interval_millisecs,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= proc_dohung_task_timeout_secs,
-- 
2.7.4


             reply	other threads:[~2020-07-05 12:50 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-05 12:48 yang che [this message]
2020-07-05 17:16 ` [PATCH v2] hung_task:add detecting task in D state milliseconds timeout Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1593953332-29404-1-git-send-email-chey84736@gmail.com \
    --to=chey84736@gmail.com \
    --cc=keescook@chromium.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mcgrof@kernel.org \
    --cc=yzaikin@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.