[PATCH] sched: watchdog: Touch kernel watchdog in sched code

* [PATCH] sched: watchdog: Touch kernel watchdog in sched code
@ 2020-03-04 21:39 Xi Wang
  2020-03-05  3:11 ` Steven Rostedt
  2020-03-05  7:57 ` Peter Zijlstra
  0 siblings, 2 replies; 12+ messages in thread
From: Xi Wang @ 2020-03-04 21:39 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Josh Don, linux-kernel, linux-fsdevel, Xi Wang, Paul Turner

The main purpose of kernel watchdog is to test whether scheduler can
still schedule tasks on a cpu. In order to reduce latency from
periodically invoking watchdog reset in thread context, we can simply
touch watchdog from pick_next_task in scheduler. Compared to actually
resetting watchdog from cpu stop / migration threads, we lose coverage
on: a migration thread actually get picked and we actually context
switch to the migration thread. Both steps are heavily protected by
kernel locks and unlikely to silently fail. Thus the change would
provide the same level of protection with less overhead.

The new way vs the old way to touch the watchdogs is configurable
from:

/proc/sys/kernel/watchdog_touch_in_thread_interval

The value means:
0: Always touch watchdog from pick_next_task
1: Always touch watchdog from migration thread
N (N>0): Touch watchdog from migration thread once in every N
         invocations, and touch watchdog from pick_next_task for
         other invocations.

Suggested-by: Paul Turner <pjt@google.com>
Signed-off-by: Xi Wang <xii@google.com>
---
 kernel/sched/core.c | 36 ++++++++++++++++++++++++++++++++++--
 kernel/sysctl.c     | 11 ++++++++++-
 kernel/watchdog.c   | 39 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1a9983da4408..9d8e00760d1c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3898,6 +3898,27 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
 	schedstat_inc(this_rq()->sched_count);
 }
 
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+
+DEFINE_PER_CPU(bool, sched_should_touch_watchdog);
+
+void touch_watchdog_from_sched(void);
+
+/* Helper called by watchdog code */
+void resched_for_watchdog(void)
+{
+	unsigned long flags;
+	struct rq *rq = this_rq();
+
+	this_cpu_write(sched_should_touch_watchdog, true);
+	raw_spin_lock_irqsave(&rq->lock, flags);
+	/* Trigger resched for code in pick_next_task to touch watchdog */
+	resched_curr(rq);
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+#endif /* CONFIG_SOFTLOCKUP_DETECTOR */
+
 /*
  * Pick up the highest-prio task:
  */
@@ -3927,7 +3948,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 			p = pick_next_task_idle(rq);
 		}
 
-		return p;
+		goto out;
 	}
 
 restart:
@@ -3951,11 +3972,22 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	for_each_class(class) {
 		p = class->pick_next_task(rq);
 		if (p)
-			return p;
+			goto out;
 	}
 
 	/* The idle class should always have a runnable task: */
 	BUG();
+
+out:
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+	if (this_cpu_read(sched_should_touch_watchdog)) {
+		touch_watchdog_from_sched();
+		this_cpu_write(sched_should_touch_watchdog, false);
+	}
+#endif
+
+	return p;
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ad5b88a53c5a..adb4b11fbccb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -118,6 +118,9 @@ extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
 #ifndef CONFIG_MMU
 extern int sysctl_nr_trim_pages;
 #endif
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+extern unsigned int sysctl_watchdog_touch_in_thread_interval;
+#endif
 
 /* Constants used for minimum and  maximum */
 #ifdef CONFIG_LOCKUP_DETECTOR
@@ -961,6 +964,13 @@ static struct ctl_table kern_table[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
+	{
+		.procname	= "watchdog_touch_in_thread_interval",
+		.data		= &sysctl_watchdog_touch_in_thread_interval,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_SMP
 	{
 		.procname	= "softlockup_all_cpu_backtrace",
@@ -996,7 +1006,6 @@ static struct ctl_table kern_table[] = {
 #endif /* CONFIG_SMP */
 #endif
 #endif
-
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 	{
 		.procname       = "unknown_nmi_panic",
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b6b1f54a7837..f9138c29db48 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -49,6 +49,16 @@ static struct cpumask watchdog_allowed_mask __read_mostly;
 struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
 
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+/*
+ * 0: Always touch watchdog from pick_next_task
+ * 1: Always touch watchdog from migration thread
+ * N (N>0): Touch watchdog from migration thread once in every N invocations,
+ *          and touch watchdog from pick_next_task for other invocations.
+ */
+unsigned int sysctl_watchdog_touch_in_thread_interval = 10;
+#endif
+
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 /*
  * Should we panic when a soft-lockup or hard-lockup occurs:
@@ -356,6 +366,9 @@ static int softlockup_fn(void *data)
 	return 0;
 }
 
+static DEFINE_PER_CPU(unsigned int, num_watchdog_wakeup_skipped);
+void resched_for_watchdog(void);
+
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -371,11 +384,20 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	watchdog_interrupt_count();
 
 	/* kick the softlockup detector */
-	if (completion_done(this_cpu_ptr(&softlockup_completion))) {
-		reinit_completion(this_cpu_ptr(&softlockup_completion));
-		stop_one_cpu_nowait(smp_processor_id(),
-				softlockup_fn, NULL,
-				this_cpu_ptr(&softlockup_stop_work));
+	if ((!sysctl_watchdog_touch_in_thread_interval ||
+	  sysctl_watchdog_touch_in_thread_interval > this_cpu_read(num_watchdog_wakeup_skipped) + 1)) {
+		this_cpu_write(num_watchdog_wakeup_skipped, sysctl_watchdog_touch_in_thread_interval ?
+		  this_cpu_read(num_watchdog_wakeup_skipped) + 1 : 0);
+		/* touch watchdog from pick_next_task */
+		resched_for_watchdog();
+	} else {
+		this_cpu_write(num_watchdog_wakeup_skipped, 0);
+		if (completion_done(this_cpu_ptr(&softlockup_completion))) {
+			reinit_completion(this_cpu_ptr(&softlockup_completion));
+			stop_one_cpu_nowait(smp_processor_id(),
+					softlockup_fn, NULL,
+					this_cpu_ptr(&softlockup_stop_work));
+		}
 	}
 
 	/* .. and repeat */
@@ -526,6 +548,13 @@ static int softlockup_start_fn(void *data)
 	return 0;
 }
 
+
+/* Similar to watchdog thread function but called from pick_next_task */
+void touch_watchdog_from_sched(void)
+{
+	__touch_watchdog();
+}
+
 static void softlockup_start_all(void)
 {
 	int cpu;
-- 
2.25.1.481.gfbce0eb801-goog


^ permalink raw reply related	[flat|nested] 12+ messages in thread