[GIT,pull] core/core for
diff mbox series

Message ID 158016896589.31887.11649925452756898441.tglx@nanos.tec.linutronix.de
State New, archived
Headers show
Series
  • [GIT,pull] core/core for
Related show

Commit Message

Thomas Gleixner Jan. 27, 2020, 11:49 p.m. UTC
Linus,

please pull the latest core/core branch from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-core-2020-01-28

up to:  11e31f608b49: watchdog/softlockup: Enforce that timestamp is valid on boot

A set of watchdog/softlockup related improvements:

 - Enforce that the watchdog timestamp is always valid on boot. The
   original implementation caused a watchdog disabled gap of one second in
   the boot process due to truncation of the underlying sched clock. The
   sched clock is divided by 1e9 to convert nanoseconds to seconds. So for
   the first second of the boot process the result is 0 which is at the
   same time the indicator to disable the watchdog. The trivial fix is to
   change the disabled indicator to ULONG_MAX.

 - Two cleanup patches removing unused and redundant code which got
   forgotten to be cleaned up in previous changes.

Thanks,

	tglx

------------------>
Jisheng Zhang (1):
      watchdog: Remove soft_lockup_hrtimer_cnt and related code

Petr Mladek (1):
      watchdog/softlockup: Remove obsolete check of last reported task

Thomas Gleixner (1):
      watchdog/softlockup: Enforce that timestamp is valid on boot


 kernel/watchdog.c | 31 +++++++------------------------
 1 file changed, 7 insertions(+), 24 deletions(-)

Comments

Linus Torvalds Jan. 28, 2020, 12:45 a.m. UTC | #1
On Mon, Jan 27, 2020 at 4:06 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>
>    git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-core-2020-01-28

Hmm. Maybe this is related to the subject line after all.

I think that your tag naming has some bug in it, and then that bug
causes problems for the subject line?

"core-core" doesn't make much sense as a name, since it is about
watchdog logic.

My guess is that you automated this and something escaped through the cracks.

Again, not a big deal - everything _works_, it just has some oddities there.

            Linus
pr-tracker-bot@kernel.org Jan. 28, 2020, 1:35 a.m. UTC | #2
The pull request you sent on Mon, 27 Jan 2020 23:49:25 -0000:

> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core-core-2020-01-28

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/b11c89a158f29a9b9d740f8f60b74f261ce6557f

Thank you!

Patch
diff mbox series

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f41334ef0971..b6b1f54a7837 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -161,6 +161,8 @@  static void lockup_detector_update_enable(void)
 
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
 
+#define SOFTLOCKUP_RESET	ULONG_MAX
+
 /* Global variables, exported for sysctl */
 unsigned int __read_mostly softlockup_panic =
 			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
@@ -173,8 +175,6 @@  static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
-static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
-static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
 static unsigned long soft_lockup_nmi_warn;
 
@@ -274,7 +274,7 @@  notrace void touch_softlockup_watchdog_sched(void)
 	 * Preemption can be enabled.  It doesn't matter which CPU's timestamp
 	 * gets zeroed here, so use the raw_ operation.
 	 */
-	raw_cpu_write(watchdog_touch_ts, 0);
+	raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
 }
 
 notrace void touch_softlockup_watchdog(void)
@@ -298,14 +298,14 @@  void touch_all_softlockup_watchdogs(void)
 	 * the softlockup check.
 	 */
 	for_each_cpu(cpu, &watchdog_allowed_mask)
-		per_cpu(watchdog_touch_ts, cpu) = 0;
+		per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET;
 	wq_watchdog_touch(-1);
 }
 
 void touch_softlockup_watchdog_sync(void)
 {
 	__this_cpu_write(softlockup_touch_sync, true);
-	__this_cpu_write(watchdog_touch_ts, 0);
+	__this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET);
 }
 
 static int is_softlockup(unsigned long touch_ts)
@@ -350,8 +350,6 @@  static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
  */
 static int softlockup_fn(void *data)
 {
-	__this_cpu_write(soft_lockup_hrtimer_cnt,
-			 __this_cpu_read(hrtimer_interrupts));
 	__touch_watchdog();
 	complete(this_cpu_ptr(&softlockup_completion));
 
@@ -383,7 +381,7 @@  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	/* .. and repeat */
 	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
 
-	if (touch_ts == 0) {
+	if (touch_ts == SOFTLOCKUP_RESET) {
 		if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
 			/*
 			 * If the time stamp was touched atomically
@@ -416,22 +414,8 @@  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 			return HRTIMER_RESTART;
 
 		/* only warn once */
-		if (__this_cpu_read(soft_watchdog_warn) == true) {
-			/*
-			 * When multiple processes are causing softlockups the
-			 * softlockup detector only warns on the first one
-			 * because the code relies on a full quiet cycle to
-			 * re-arm.  The second process prevents the quiet cycle
-			 * and never gets reported.  Use task pointers to detect
-			 * this.
-			 */
-			if (__this_cpu_read(softlockup_task_ptr_saved) !=
-			    current) {
-				__this_cpu_write(soft_watchdog_warn, false);
-				__touch_watchdog();
-			}
+		if (__this_cpu_read(soft_watchdog_warn) == true)
 			return HRTIMER_RESTART;
-		}
 
 		if (softlockup_all_cpu_backtrace) {
 			/* Prevent multiple soft-lockup reports if one cpu is already
@@ -447,7 +431,6 @@  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
 			smp_processor_id(), duration,
 			current->comm, task_pid_nr(current));
-		__this_cpu_write(softlockup_task_ptr_saved, current);
 		print_modules();
 		print_irqtrace_events(current);
 		if (regs)