All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kernel/hung_task: Report top CPU consumers
@ 2021-05-14 12:56 zhouchuangao
  2021-05-15  8:33 ` Tetsuo Handa
  0 siblings, 1 reply; 2+ messages in thread
From: zhouchuangao @ 2021-05-14 12:56 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Andrew Morton, Kees Cook,
	Stephen Rothwell, Guilherme G. Piccoli, Michal Hocko,
	Tetsuo Handa, Lukas Bulwahn, zhouchuangao, Vlastimil Babka,
	linux-kernel

1. If the task did not get scheduled for more than 2 minutes,
report top 3(By default) CPU consumers.

2. By default, the CPU utilization of each process in one minute
is calculated.

3. Add a new member last_cpu_time to task_struct to record the CPU
usage of the process at the beginning of the computation.

Signed-off-by: zhouchuangao <zhouchuangao@vivo.com>
---
 include/linux/sched.h |   1 +
 kernel/hung_task.c    | 161 +++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 160 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d5264b..103f98f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -973,6 +973,7 @@ struct task_struct {
 	unsigned long			last_switch_count;
 	unsigned long			last_switch_time;
 	unsigned long			killed_time;
+	u64				last_cpu_time;
 #endif
 	/* Filesystem information: */
 	struct fs_struct		*fs;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index bb2e3e1..fb5f944 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -10,6 +10,7 @@
 #include <linux/cpu.h>
 #include <linux/nmi.h>
 #include <linux/init.h>
+#include <linux/tick.h>
 #include <linux/delay.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
@@ -21,7 +22,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/sysctl.h>
-
+#include <linux/sched/cputime.h>
 #include <trace/events/sched.h>
 
 /*
@@ -55,6 +56,16 @@ static bool hung_task_show_lock;
 static bool hung_task_call_panic;
 static bool hung_task_show_all_bt;
 
+static u64	last_cpu_usage;
+static u64	interval_cpu_usage;
+
+#define NUM_CONSUMERS   3
+struct cpu_consumer {
+	char	comm[TASK_COMM_LEN];
+	pid_t	pid;
+	u64	cpu_used;
+};
+
 static struct task_struct *watchdog_task;
 
 #ifdef CONFIG_SMP
@@ -72,6 +83,145 @@ unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
 unsigned int __read_mostly sysctl_hung_task_panic =
 				CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
 
+#ifdef arch_idle_time
+static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+{
+	u64 idle;
+
+	idle = kcs->cpustat[CPUTIME_IDLE];
+	if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
+		idle += arch_idle_time(cpu);
+	return idle;
+}
+
+static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
+{
+	u64 iowait;
+
+	iowait = kcs->cpustat[CPUTIME_IOWAIT];
+	if (cpu_online(cpu) && nr_iowait_cpu(cpu))
+		iowait += arch_idle_time(cpu);
+	return iowait;
+}
+#else
+static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+{
+	u64 idle, idle_usecs = -1ULL;
+
+	if (cpu_online(cpu))
+		idle_usecs = get_cpu_idle_time_us(cpu, NULL);
+
+	if (idle_usecs == -1ULL)
+		/* !NO_HZ or cpu offline so we can rely on cpustat.idle */
+		idle = kcs->cpustat[CPUTIME_IDLE];
+	else
+		idle = idle_usecs * NSEC_PER_USEC;
+
+	return idle;
+}
+
+static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
+{
+	u64 iowait, iowait_usecs = -1ULL;
+
+	if (cpu_online(cpu))
+		iowait_usecs = get_cpu_iowait_time_us(cpu, NULL);
+
+	if (iowait_usecs == -1ULL)
+		/* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
+		iowait = kcs->cpustat[CPUTIME_IOWAIT];
+	else
+		iowait = iowait_usecs * NSEC_PER_USEC;
+
+	return iowait;
+}
+#endif
+
+static void show_top_cpu_consumers(bool compute)
+{
+	int i, j;
+	struct task_struct *g, *t;
+	struct signal_struct *sig;
+	u64 cutime, cstime, utime, stime;
+	u64 task_cpu_time, interval_time;
+	struct cpu_consumer tcc[NUM_CONSUMERS];
+
+	memset(tcc, 0, sizeof(struct cpu_consumer) * NUM_CONSUMERS);
+
+	for_each_process_thread(g, t) {
+		sig = t->signal;
+		cutime = sig->cutime;
+		cstime = sig->cstime;
+		task_cputime_adjusted(t, &utime, &stime);
+		task_cpu_time = cutime + cstime + utime + stime;
+
+		if (compute) {
+			interval_time = task_cpu_time - t->last_cpu_time;
+			for (i = 0; i < NUM_CONSUMERS; i++) {
+				if (interval_time > tcc[i].cpu_used) {
+					for (j = NUM_CONSUMERS - 1; j > i; j--) {
+						strcpy(tcc[j].comm, tcc[j-1].comm);
+						tcc[j].pid = tcc[j-1].pid;
+						tcc[j].cpu_used = tcc[j-1].cpu_used;
+					}
+					strcpy(tcc[i].comm, t->comm);
+					tcc[i].pid = t->pid;
+					tcc[i].cpu_used = interval_time;
+					break;
+				}
+			}
+		} else
+			t->last_cpu_time = task_cpu_time;
+	}
+
+	if (compute) {
+		pr_info("hung task report top %d CPU consumers:\n", NUM_CONSUMERS);
+		pr_info("TOP    COMM    PID    [TASK_CPU_TIME/ALL_CPU_TIME]\n");
+		for (i = 0; i < NUM_CONSUMERS; i++)
+			pr_info("Top%d   %s    %d    [%lld/%lld]\n", i,
+				tcc[i].comm,
+				tcc[i].pid,
+				nsec_to_clock_t(tcc[i].cpu_used),
+				nsec_to_clock_t(interval_cpu_usage));
+	}
+}
+
+static void all_cpu_usage(bool compute)
+{
+	int i;
+	u64 user, nice, system, idle, iowait, irq, softirq, steal;
+	u64 guest, guest_nice;
+	u64 current_cpu_usage = 0;
+
+	user = nice = system = idle = iowait = irq = 0;
+	softirq = steal = guest = guest_nice = 0;
+
+	for_each_possible_cpu(i) {
+		struct kernel_cpustat kcpustat;
+		u64 *cpustat = kcpustat.cpustat;
+
+		kcpustat_cpu_fetch(&kcpustat, i);
+
+		user += cpustat[CPUTIME_USER];
+		nice += cpustat[CPUTIME_NICE];
+		system += cpustat[CPUTIME_SYSTEM];
+		idle += get_idle_time(&kcpustat, i);
+		iowait += get_iowait_time(&kcpustat, i);
+		irq += cpustat[CPUTIME_IRQ];
+		softirq += cpustat[CPUTIME_SOFTIRQ];
+		steal += cpustat[CPUTIME_STEAL];
+		guest += cpustat[CPUTIME_GUEST];
+		guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+	}
+	current_cpu_usage = user + nice + system + idle + iowait +
+				irq + softirq + steal + guest + guest_nice;
+
+	if (compute)
+		interval_cpu_usage = current_cpu_usage - last_cpu_usage;
+	else
+		last_cpu_usage = current_cpu_usage;
+}
+
 static int
 hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
 {
@@ -253,8 +403,15 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 		trigger_all_cpu_backtrace();
 	}
 
-	if (hung_task_call_panic)
+	if (hung_task_call_panic) {
+		all_cpu_usage(false);
+		show_top_cpu_consumers(false);
+		msleep(1000);
+		all_cpu_usage(true);
+		show_top_cpu_consumers(true);
+
 		panic("hung_task: blocked tasks");
+	}
 }
 
 static long hung_timeout_jiffies(unsigned long last_checked,
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] kernel/hung_task: Report top CPU consumers
  2021-05-14 12:56 [PATCH] kernel/hung_task: Report top CPU consumers zhouchuangao
@ 2021-05-15  8:33 ` Tetsuo Handa
  0 siblings, 0 replies; 2+ messages in thread
From: Tetsuo Handa @ 2021-05-15  8:33 UTC (permalink / raw)
  To: zhouchuangao, Dmitry Vyukov
  Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman,
	Daniel Bristot de Oliveira, Andrew Morton, Kees Cook,
	Stephen Rothwell, Guilherme G. Piccoli, Michal Hocko,
	Lukas Bulwahn, Vlastimil Babka, linux-kernel

On 2021/05/14 21:56, zhouchuangao wrote:
> 1. If the task did not get scheduled for more than 2 minutes,
> report top 3(By default) CPU consumers.
> 
> 2. By default, the CPU utilization of each process in one minute
> is calculated.

+		all_cpu_usage(false);
+		show_top_cpu_consumers(false);
+		msleep(1000);
+		all_cpu_usage(true);
+		show_top_cpu_consumers(true);

1 second than 1 minute? Too short to determine top CPU consumers?

> 
> 3. Add a new member last_cpu_time to task_struct to record the CPU
> usage of the process at the beginning of the computation.

Speak of syzbot testing, in many cases the cause of hung task is simply
somebody else was consuming too much CPU resources. Therefore, without
backtraces of top CPU consumer processes, I think it is not different from
calling

  call_usermodehelper("/bin/sh", { "sh", "-c", "exec top -b -d 1 -n 1 > /dev/klog", NULL }, { NULL }, UMH_WAIT_PROC | UMH_KILLABLE);

before panic().

Maybe a hook for executing some userspace commands with some timeout before panic() is more useful?

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-05-15  8:35 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-14 12:56 [PATCH] kernel/hung_task: Report top CPU consumers zhouchuangao
2021-05-15  8:33 ` Tetsuo Handa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.