All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] lockup detector fixes
@ 2010-05-17 22:06 Don Zickus
  2010-05-17 22:06 ` [PATCH 1/3] lockup_detector: convert per_cpu to __get_cpu_var for readability Don Zickus
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Don Zickus @ 2010-05-17 22:06 UTC (permalink / raw)
  To: fweisbec, mingo; +Cc: a.p.zijlstra, gorcunov, linux-kernel, Don Zickus

Another round of cleanups thanks to Frederic W.'s review.

The biggest churn is from trying to cleanup the sysctl interface.

Cheers,
Don

Don Zickus (3):
  lockup_detector: convert per_cpu to __get_cpu_var for readability
  [watchdog] separate hardlockup/softlockup enable paths
  [watchdog] re-introduce support for nmi_watchdog, nosoftlockup

 arch/x86/include/asm/nmi.h |    2 -
 include/linux/nmi.h        |    4 +-
 kernel/sysctl.c            |   15 +++-
 kernel/watchdog.c          |  230 +++++++++++++++++++++++++++-----------------
 4 files changed, 156 insertions(+), 95 deletions(-)


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/3] lockup_detector: convert per_cpu to __get_cpu_var for readability
  2010-05-17 22:06 [PATCH 0/3] lockup detector fixes Don Zickus
@ 2010-05-17 22:06 ` Don Zickus
  2010-05-19  8:47   ` Frederic Weisbecker
  2010-05-19 17:06   ` [tip:perf/nmi] lockup_detector: Convert " tip-bot for Don Zickus
  2010-05-17 22:06 ` [PATCH 2/3] [watchdog] separate hardlockup/softlockup enable paths Don Zickus
  2010-05-17 22:06 ` [PATCH 3/3] [watchdog] re-introduce support for nmi_watchdog, nosoftlockup Don Zickus
  2 siblings, 2 replies; 8+ messages in thread
From: Don Zickus @ 2010-05-17 22:06 UTC (permalink / raw)
  To: fweisbec, mingo; +Cc: a.p.zijlstra, gorcunov, linux-kernel, Don Zickus

Just a bunch of conversions as suggested by Frederic W.

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 kernel/watchdog.c |   35 +++++++++++++++++------------------
 1 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index e53622c..91b0b26 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -115,7 +115,7 @@ static unsigned long get_sample_period(void)
 /* Commands for resetting the watchdog */
 static void __touch_watchdog(void)
 {
-	int this_cpu = raw_smp_processor_id();
+	int this_cpu = smp_processor_id();
 
 	__get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
 }
@@ -157,21 +157,21 @@ void touch_softlockup_watchdog_sync(void)
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 /* watchdog detector functions */
-static int is_hardlockup(int cpu)
+static int is_hardlockup(void)
 {
-	unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
+	unsigned long hrint = __get_cpu_var(hrtimer_interrupts);
 
-	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
+	if (__get_cpu_var(hrtimer_interrupts_saved) == hrint)
 		return 1;
 
-	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+	__get_cpu_var(hrtimer_interrupts_saved) = hrint;
 	return 0;
 }
 #endif
 
-static int is_softlockup(unsigned long touch_ts, int cpu)
+static int is_softlockup(unsigned long touch_ts)
 {
-	unsigned long now = get_timestamp(cpu);
+	unsigned long now = get_timestamp(smp_processor_id());
 
 	/* Warn about unreasonable delays: */
 	if (time_after(now, touch_ts + softlockup_thresh))
@@ -206,8 +206,6 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi,
 		 struct perf_sample_data *data,
 		 struct pt_regs *regs)
 {
-	int this_cpu = smp_processor_id();
-
 	if (__get_cpu_var(watchdog_nmi_touch) == true) {
 		__get_cpu_var(watchdog_nmi_touch) = false;
 		return;
@@ -219,7 +217,9 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi,
 	 * fired multiple times before we overflow'd.  If it hasn't
 	 * then this is a good indication the cpu is stuck
 	 */
-	if (is_hardlockup(this_cpu)) {
+	if (is_hardlockup()) {
+		int this_cpu = smp_processor_id();
+
 		/* only print hardlockups once */
 		if (__get_cpu_var(hard_watchdog_warn) == true)
 			return;
@@ -247,7 +247,6 @@ static inline void watchdog_interrupt_count(void) { return; }
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
-	int this_cpu = smp_processor_id();
 	unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
@@ -262,12 +261,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
 
 	if (touch_ts == 0) {
-		if (unlikely(per_cpu(softlockup_touch_sync, this_cpu))) {
+		if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
 			/*
 			 * If the time stamp was touched atomically
 			 * make sure the scheduler tick is up to date.
 			 */
-			per_cpu(softlockup_touch_sync, this_cpu) = false;
+			__get_cpu_var(softlockup_touch_sync) = false;
 			sched_clock_tick();
 		}
 		__touch_watchdog();
@@ -280,14 +279,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	 * indicate it is getting cpu time.  If it hasn't then
 	 * this is a good indication some task is hogging the cpu
 	 */
-	duration = is_softlockup(touch_ts, this_cpu);
+	duration = is_softlockup(touch_ts);
 	if (unlikely(duration)) {
 		/* only warn once */
 		if (__get_cpu_var(soft_watchdog_warn) == true)
 			return HRTIMER_RESTART;
 
 		printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
-			this_cpu, duration,
+			smp_processor_id(), duration,
 			current->comm, task_pid_nr(current));
 		print_modules();
 		print_irqtrace_events(current);
@@ -309,10 +308,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 /*
  * The watchdog thread - touches the timestamp.
  */
-static int watchdog(void *__bind_cpu)
+static int watchdog(void *unused)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, (unsigned long)__bind_cpu);
+	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
 
 	sched_setscheduler(current, SCHED_FIFO, &param);
 
@@ -328,7 +327,7 @@ static int watchdog(void *__bind_cpu)
 	/*
 	 * Run briefly once per second to reset the softlockup timestamp.
 	 * If this gets delayed for more than 60 seconds then the
-	 * debug-printout triggers in softlockup_tick().
+	 * debug-printout triggers in watchdog_timer_fn().
 	 */
 	while (!kthread_should_stop()) {
 		__touch_watchdog();
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] [watchdog] separate hardlockup/softlockup enable paths
  2010-05-17 22:06 [PATCH 0/3] lockup detector fixes Don Zickus
  2010-05-17 22:06 ` [PATCH 1/3] lockup_detector: convert per_cpu to __get_cpu_var for readability Don Zickus
@ 2010-05-17 22:06 ` Don Zickus
  2010-05-19  8:46   ` Frederic Weisbecker
  2010-05-17 22:06 ` [PATCH 3/3] [watchdog] re-introduce support for nmi_watchdog, nosoftlockup Don Zickus
  2 siblings, 1 reply; 8+ messages in thread
From: Don Zickus @ 2010-05-17 22:06 UTC (permalink / raw)
  To: fweisbec, mingo; +Cc: a.p.zijlstra, gorcunov, linux-kernel, Don Zickus

In preparation to support the backwards compatible option nmi_watchdog properly
from the kernel commandline, the enable/disable paths for the hardlockup and
softlockup code needed to separated more cleanly.

The code is re-arranged a bit to create a watchdog_softlockup_enable/disable
function to mimic the hardlockup counterpart.  In addition, a softlockup callback
is created to make it easy to turn the softlockup code on/off with out interfering
with the hardlockup code.

The functionality should still be the same.

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 kernel/watchdog.c |   92 ++++++++++++++++++++++++++++++++++++-----------------
 1 files changed, 63 insertions(+), 29 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 91b0b26..0a6bdb7 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -30,9 +30,12 @@
 int watchdog_enabled;
 int __read_mostly softlockup_thresh = 60;
 
+typedef void (*callback_t)(void);
+
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
-static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
+static DEFINE_PER_CPU(struct task_struct *, watchdog_thread);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
+static DEFINE_PER_CPU(callback_t, softlockup_callback);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
@@ -244,21 +247,14 @@ static void watchdog_interrupt_count(void)
 static inline void watchdog_interrupt_count(void) { return; }
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
-/* watchdog kicker functions */
-static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
+static void watchdog_softlockup_callback(void)
 {
 	unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
 
-	/* kick the hardlockup detector */
-	watchdog_interrupt_count();
-
 	/* kick the softlockup detector */
-	wake_up_process(__get_cpu_var(softlockup_watchdog));
-
-	/* .. and repeat */
-	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
+	wake_up_process(__get_cpu_var(watchdog_thread));
 
 	if (touch_ts == 0) {
 		if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
@@ -270,7 +266,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 			sched_clock_tick();
 		}
 		__touch_watchdog();
-		return HRTIMER_RESTART;
+		return;
 	}
 
 	/* check for a softlockup
@@ -283,7 +279,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	if (unlikely(duration)) {
 		/* only warn once */
 		if (__get_cpu_var(soft_watchdog_warn) == true)
-			return HRTIMER_RESTART;
+			return;
 
 		printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
 			smp_processor_id(), duration,
@@ -301,6 +297,24 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	} else
 		__get_cpu_var(soft_watchdog_warn) = false;
 
+	return;
+}
+
+/* watchdog kicker functions */
+static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
+{
+	callback_t cb = __get_cpu_var(softlockup_callback);
+
+	/* setup next timer */
+	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
+
+	/* kick the hardlockup detector */
+	watchdog_interrupt_count();
+
+	/* check the softlockup detector */
+	if (cb)
+		cb();
+
 	return HRTIMER_RESTART;
 }
 
@@ -397,12 +411,27 @@ static int watchdog_nmi_enable(int cpu) { return 0; }
 static void watchdog_nmi_disable(int cpu) { return; }
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
+static int watchdog_softlockup_enable(int cpu)
+{
+	/* if any cpu succeeds, watchdog is considered enabled for the system */
+	per_cpu(softlockup_callback, cpu) = watchdog_softlockup_callback;
+	wake_up_process(per_cpu(watchdog_thread, cpu));
+
+	return 0;
+}
+
+static void watchdog_softlockup_disable(int cpu)
+{
+	per_cpu(softlockup_callback, cpu) = NULL;
+	return;
+}
+
 /* prepare/enable/disable routines */
 static int watchdog_prepare_cpu(int cpu)
 {
 	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
 
-	WARN_ON(per_cpu(softlockup_watchdog, cpu));
+	WARN_ON(per_cpu(watchdog_thread, cpu));
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hrtimer->function = watchdog_timer_fn;
 
@@ -411,11 +440,8 @@ static int watchdog_prepare_cpu(int cpu)
 
 static int watchdog_enable(int cpu)
 {
-	struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
-
-	/* enable the perf event */
-	if (watchdog_nmi_enable(cpu) != 0)
-		return -1;
+	struct task_struct *p = per_cpu(watchdog_thread, cpu);
+	int result;
 
 	/* create the watchdog thread */
 	if (!p) {
@@ -426,35 +452,43 @@ static int watchdog_enable(int cpu)
 		}
 		kthread_bind(p, cpu);
 		per_cpu(watchdog_touch_ts, cpu) = 0;
-		per_cpu(softlockup_watchdog, cpu) = p;
-		wake_up_process(p);
+		per_cpu(watchdog_thread, cpu) = p;
+
 	}
 
-	return 0;
+	/* enable the hardlockup detector */
+	if (watchdog_nmi_enable(cpu) != 0)
+		result += 1;
+
+	/* enable the softlockup detector */
+	if (watchdog_softlockup_enable(cpu) != 0)
+		result += 1;
+
+	return result;
 }
 
 static void watchdog_disable(int cpu)
 {
-	struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
+	struct task_struct *p = per_cpu(watchdog_thread, cpu);
 	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
 
+	/* disable the hardlockup detector */
+	watchdog_nmi_disable(cpu);
+
+	/* disable the softlockup detector */
+	watchdog_softlockup_disable(cpu);
+
 	/*
 	 * cancel the timer first to stop incrementing the stats
 	 * and waking up the kthread
 	 */
 	hrtimer_cancel(hrtimer);
 
-	/* disable the perf event */
-	watchdog_nmi_disable(cpu);
-
 	/* stop the watchdog thread */
 	if (p) {
-		per_cpu(softlockup_watchdog, cpu) = NULL;
+		per_cpu(watchdog_thread, cpu) = NULL;
 		kthread_stop(p);
 	}
-
-	/* if any cpu succeeds, watchdog is considered enabled for the system */
-	watchdog_enabled = 1;
 }
 
 static void watchdog_enable_all_cpus(void)
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] [watchdog] re-introduce support for nmi_watchdog, nosoftlockup
  2010-05-17 22:06 [PATCH 0/3] lockup detector fixes Don Zickus
  2010-05-17 22:06 ` [PATCH 1/3] lockup_detector: convert per_cpu to __get_cpu_var for readability Don Zickus
  2010-05-17 22:06 ` [PATCH 2/3] [watchdog] separate hardlockup/softlockup enable paths Don Zickus
@ 2010-05-17 22:06 ` Don Zickus
  2 siblings, 0 replies; 8+ messages in thread
From: Don Zickus @ 2010-05-17 22:06 UTC (permalink / raw)
  To: fweisbec, mingo; +Cc: a.p.zijlstra, gorcunov, linux-kernel, Don Zickus

These options broke during this rewrite.  The previous patch cleaned
up the internals.  This patch actually uses those changes to allow
one to properly use nmi_watchdog=0 and nosoftlockup from the kernel
commandline and the bash shell.

The downside of these changes is I removed the global option of
'watchdog' that would have enabled/disabled both the hardlockup
and softlockup detector (as if they were one).

Signed-off-by: Don Zickus <dzickus@redhat.com>
---
 arch/x86/include/asm/nmi.h |    2 -
 include/linux/nmi.h        |    4 +-
 kernel/sysctl.c            |   15 +++++-
 kernel/watchdog.c          |  103 ++++++++++++++++++++++++++-----------------
 4 files changed, 76 insertions(+), 48 deletions(-)

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 932f0f8..93da9c3 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,9 +17,7 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
 extern int check_nmi_watchdog(void);
-#if !defined(CONFIG_LOCKUP_DETECTOR)
 extern int nmi_watchdog_enabled;
-#endif
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 06aab5e..d075b3a 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -54,9 +54,9 @@ static inline bool trigger_all_cpu_backtrace(void)
 #ifdef CONFIG_LOCKUP_DETECTOR
 int hw_nmi_is_cpu_stuck(struct pt_regs *);
 u64 hw_nmi_get_sample_period(void);
-extern int watchdog_enabled;
+extern int softlockup_watchdog_enabled;
 struct ctl_table;
-extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
+extern int proc_softlockup_enabled(struct ctl_table *, int ,
 			void __user *, size_t *, loff_t *);
 #endif
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 04bcd8a..e856655 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -688,11 +688,11 @@ static struct ctl_table kern_table[] = {
 	},
 #if defined(CONFIG_LOCKUP_DETECTOR)
 	{
-		.procname       = "watchdog",
-		.data           = &watchdog_enabled,
+		.procname       = "softlockup_watchdog",
+		.data           = &softlockup_watchdog_enabled,
 		.maxlen         = sizeof (int),
 		.mode           = 0644,
-		.proc_handler   = proc_dowatchdog_enabled,
+		.proc_handler   = proc_softlockup_enabled
 	},
 	{
 		.procname	= "watchdog_thresh",
@@ -712,6 +712,15 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+	{
+		.procname       = "nmi_watchdog",
+		.data           = &nmi_watchdog_enabled,
+		.maxlen         = sizeof (int),
+		.mode           = 0644,
+		.proc_handler   = proc_nmi_enabled,
+	},
+#endif
 #endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
 	{
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 0a6bdb7..f3c63a8 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -27,7 +27,6 @@
 #include <asm/irq_regs.h>
 #include <linux/perf_event.h>
 
-int watchdog_enabled;
 int __read_mostly softlockup_thresh = 60;
 
 typedef void (*callback_t)(void);
@@ -47,7 +46,14 @@ static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 #endif
 
 static int __read_mostly did_panic;
-static int __initdata no_watchdog;
+
+#define LOCKUP_DETECT_DISABLED	-1
+#define LOCKUP_DETECT_OFF	0
+#define LOCKUP_DETECT_ON	1
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+int nmi_watchdog_enabled = LOCKUP_DETECT_OFF;
+#endif
+int softlockup_watchdog_enabled = LOCKUP_DETECT_OFF;
 
 
 /* boot commands */
@@ -59,8 +65,19 @@ static int hardlockup_panic;
 
 static int __init hardlockup_panic_setup(char *str)
 {
-	if (!strncmp(str, "panic", 5))
+	unsigned int nmi;
+
+	if (!strncmp(str, "panic", 5)) {
 		hardlockup_panic = 1;
+		str = strchr(str, ',');
+		if (!str)
+			return 1;
+		++str;
+	}
+	get_option(&str, &nmi);
+	if (nmi == 0)
+		nmi_watchdog_enabled = LOCKUP_DETECT_DISABLED;
+
 	return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -77,21 +94,12 @@ static int __init softlockup_panic_setup(char *str)
 }
 __setup("softlockup_panic=", softlockup_panic_setup);
 
-static int __init nowatchdog_setup(char *str)
-{
-	no_watchdog = 1;
-	return 1;
-}
-__setup("nowatchdog", nowatchdog_setup);
-
-/* deprecated */
 static int __init nosoftlockup_setup(char *str)
 {
-	no_watchdog = 1;
+	softlockup_watchdog_enabled = LOCKUP_DETECT_DISABLED;
 	return 1;
 }
 __setup("nosoftlockup", nosoftlockup_setup);
-/*  */
 
 
 /*
@@ -338,6 +346,7 @@ static int watchdog(void *unused)
 		      HRTIMER_MODE_REL_PINNED);
 
 	set_current_state(TASK_INTERRUPTIBLE);
+
 	/*
 	 * Run briefly once per second to reset the softlockup timestamp.
 	 * If this gets delayed for more than 60 seconds then the
@@ -364,6 +373,10 @@ static int watchdog_nmi_enable(int cpu)
 	struct perf_event_attr *wd_attr;
 	struct perf_event *event = per_cpu(watchdog_ev, cpu);
 
+	/* boot param says don't enable */
+	if (nmi_watchdog_enabled == LOCKUP_DETECT_DISABLED)
+		return 0;
+
 	/* is it already setup and enabled? */
 	if (event && event->state > PERF_EVENT_STATE_OFF)
 		goto out;
@@ -390,6 +403,7 @@ out_save:
 out_enable:
 	perf_event_enable(per_cpu(watchdog_ev, cpu));
 out:
+	nmi_watchdog_enabled = LOCKUP_DETECT_ON;
 	return 0;
 }
 
@@ -414,8 +428,13 @@ static void watchdog_nmi_disable(int cpu) { return; }
 static int watchdog_softlockup_enable(int cpu)
 {
 	/* if any cpu succeeds, watchdog is considered enabled for the system */
+	/* skip if DISABLED */
+	if (softlockup_watchdog_enabled == LOCKUP_DETECT_DISABLED)
+		return 0;
+
 	per_cpu(softlockup_callback, cpu) = watchdog_softlockup_callback;
 	wake_up_process(per_cpu(watchdog_thread, cpu));
+	softlockup_watchdog_enabled = LOCKUP_DETECT_ON;
 
 	return 0;
 }
@@ -491,46 +510,51 @@ static void watchdog_disable(int cpu)
 	}
 }
 
-static void watchdog_enable_all_cpus(void)
+/* sysctl functions */
+#ifdef CONFIG_SYSCTL
+/*
+ * proc handler for /proc/sys/kernel/nmi_watchdog
+ */
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+int proc_nmi_enabled(struct ctl_table *table, int write,
+		     void __user *buffer, size_t *length, loff_t *ppos)
 {
 	int cpu;
-	int result;
+	int result=0;
+
+	proc_dointvec(table, write, buffer, length, ppos);
 
 	for_each_online_cpu(cpu)
-		result += watchdog_enable(cpu);
+		if (nmi_watchdog_enabled)
+			result += watchdog_nmi_enable(cpu);
+		else
+			watchdog_nmi_disable(cpu);
 
 	if (result)
-		printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
+		printk(KERN_ERR "watchdog: hardlockup failed to be enabled on some cpus\n");
 
+	return 0;
 }
+#endif
 
-static void watchdog_disable_all_cpus(void)
+int proc_softlockup_enabled(struct ctl_table *table, int write,
+		     void __user *buffer, size_t *length, loff_t *ppos)
 {
 	int cpu;
+	int result=0;
 
-	for_each_online_cpu(cpu)
-		watchdog_disable(cpu);
-
-	/* if all watchdogs are disabled, then they are disabled for the system */
-	watchdog_enabled = 0;
-}
-
+	proc_dointvec(table, write, buffer, length, ppos);
 
-/* sysctl functions */
-#ifdef CONFIG_SYSCTL
-/*
- * proc handler for /proc/sys/kernel/nmi_watchdog
- */
+	for_each_online_cpu(cpu)
+		if (softlockup_watchdog_enabled)
+			result += watchdog_softlockup_enable(cpu);
+		else
+			watchdog_softlockup_disable(cpu);
 
-int proc_dowatchdog_enabled(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *length, loff_t *ppos)
-{
-	proc_dointvec(table, write, buffer, length, ppos);
+	if (result)
+		printk(KERN_ERR "watchdog: softlockup failed to be enabled on some cpus\n");
 
-	if (watchdog_enabled)
-		watchdog_enable_all_cpus();
-	else
-		watchdog_disable_all_cpus();
 	return 0;
 }
 
@@ -585,9 +609,6 @@ static int __init spawn_watchdog_task(void)
 	void *cpu = (void *)(long)smp_processor_id();
 	int err;
 
-	if (no_watchdog)
-		return 0;
-
 	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 	WARN_ON(err == NOTIFY_BAD);
 
-- 
1.7.0.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/3] [watchdog] separate hardlockup/softlockup enable paths
  2010-05-17 22:06 ` [PATCH 2/3] [watchdog] separate hardlockup/softlockup enable paths Don Zickus
@ 2010-05-19  8:46   ` Frederic Weisbecker
  0 siblings, 0 replies; 8+ messages in thread
From: Frederic Weisbecker @ 2010-05-19  8:46 UTC (permalink / raw)
  To: Don Zickus; +Cc: mingo, a.p.zijlstra, gorcunov, linux-kernel

On Mon, May 17, 2010 at 06:06:05PM -0400, Don Zickus wrote:
> In preparation to support the backwards compatible option nmi_watchdog properly
> from the kernel commandline, the enable/disable paths for the hardlockup and
> softlockup code needed to separated more cleanly.
> 
> The code is re-arranged a bit to create a watchdog_softlockup_enable/disable
> function to mimic the hardlockup counterpart.  In addition, a softlockup callback
> is created to make it easy to turn the softlockup code on/off with out interfering
> with the hardlockup code.
> 
> The functionality should still be the same.



I don't think we want this really. The unification is not only a good
thing for maintainance and genericity of code but also for the fact now
we don't need anymore to worry about which watchdog to turn on/off.

The fact is often when you have a lockup, you don't even know if it
is soft or hard. If you are on X, you won't know.

So people just don't bother about such granularity of control, they simply
enable or disable both detectors.

I would suggest you to let the things how they are and not making the code
more complicated for something that won't be used.

Just forget about the nmi_watchdog file. A simple watchdog file to control
everything is much better.

Thanks.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] lockup_detector: convert per_cpu to __get_cpu_var for readability
  2010-05-17 22:06 ` [PATCH 1/3] lockup_detector: convert per_cpu to __get_cpu_var for readability Don Zickus
@ 2010-05-19  8:47   ` Frederic Weisbecker
  2010-05-19 17:06   ` [tip:perf/nmi] lockup_detector: Convert " tip-bot for Don Zickus
  1 sibling, 0 replies; 8+ messages in thread
From: Frederic Weisbecker @ 2010-05-19  8:47 UTC (permalink / raw)
  To: Don Zickus; +Cc: mingo, a.p.zijlstra, gorcunov, linux-kernel

On Mon, May 17, 2010 at 06:06:04PM -0400, Don Zickus wrote:
> Just a bunch of conversions as suggested by Frederic W.
> 
> Signed-off-by: Don Zickus <dzickus@redhat.com>
> ---


I'm taking this one,

Thanks.



>  kernel/watchdog.c |   35 +++++++++++++++++------------------
>  1 files changed, 17 insertions(+), 18 deletions(-)
> 
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index e53622c..91b0b26 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -115,7 +115,7 @@ static unsigned long get_sample_period(void)
>  /* Commands for resetting the watchdog */
>  static void __touch_watchdog(void)
>  {
> -	int this_cpu = raw_smp_processor_id();
> +	int this_cpu = smp_processor_id();
>  
>  	__get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
>  }
> @@ -157,21 +157,21 @@ void touch_softlockup_watchdog_sync(void)
>  
>  #ifdef CONFIG_HARDLOCKUP_DETECTOR
>  /* watchdog detector functions */
> -static int is_hardlockup(int cpu)
> +static int is_hardlockup(void)
>  {
> -	unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
> +	unsigned long hrint = __get_cpu_var(hrtimer_interrupts);
>  
> -	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
> +	if (__get_cpu_var(hrtimer_interrupts_saved) == hrint)
>  		return 1;
>  
> -	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
> +	__get_cpu_var(hrtimer_interrupts_saved) = hrint;
>  	return 0;
>  }
>  #endif
>  
> -static int is_softlockup(unsigned long touch_ts, int cpu)
> +static int is_softlockup(unsigned long touch_ts)
>  {
> -	unsigned long now = get_timestamp(cpu);
> +	unsigned long now = get_timestamp(smp_processor_id());
>  
>  	/* Warn about unreasonable delays: */
>  	if (time_after(now, touch_ts + softlockup_thresh))
> @@ -206,8 +206,6 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi,
>  		 struct perf_sample_data *data,
>  		 struct pt_regs *regs)
>  {
> -	int this_cpu = smp_processor_id();
> -
>  	if (__get_cpu_var(watchdog_nmi_touch) == true) {
>  		__get_cpu_var(watchdog_nmi_touch) = false;
>  		return;
> @@ -219,7 +217,9 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi,
>  	 * fired multiple times before we overflow'd.  If it hasn't
>  	 * then this is a good indication the cpu is stuck
>  	 */
> -	if (is_hardlockup(this_cpu)) {
> +	if (is_hardlockup()) {
> +		int this_cpu = smp_processor_id();
> +
>  		/* only print hardlockups once */
>  		if (__get_cpu_var(hard_watchdog_warn) == true)
>  			return;
> @@ -247,7 +247,6 @@ static inline void watchdog_interrupt_count(void) { return; }
>  /* watchdog kicker functions */
>  static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  {
> -	int this_cpu = smp_processor_id();
>  	unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
>  	struct pt_regs *regs = get_irq_regs();
>  	int duration;
> @@ -262,12 +261,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
>  
>  	if (touch_ts == 0) {
> -		if (unlikely(per_cpu(softlockup_touch_sync, this_cpu))) {
> +		if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
>  			/*
>  			 * If the time stamp was touched atomically
>  			 * make sure the scheduler tick is up to date.
>  			 */
> -			per_cpu(softlockup_touch_sync, this_cpu) = false;
> +			__get_cpu_var(softlockup_touch_sync) = false;
>  			sched_clock_tick();
>  		}
>  		__touch_watchdog();
> @@ -280,14 +279,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  	 * indicate it is getting cpu time.  If it hasn't then
>  	 * this is a good indication some task is hogging the cpu
>  	 */
> -	duration = is_softlockup(touch_ts, this_cpu);
> +	duration = is_softlockup(touch_ts);
>  	if (unlikely(duration)) {
>  		/* only warn once */
>  		if (__get_cpu_var(soft_watchdog_warn) == true)
>  			return HRTIMER_RESTART;
>  
>  		printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
> -			this_cpu, duration,
> +			smp_processor_id(), duration,
>  			current->comm, task_pid_nr(current));
>  		print_modules();
>  		print_irqtrace_events(current);
> @@ -309,10 +308,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
>  /*
>   * The watchdog thread - touches the timestamp.
>   */
> -static int watchdog(void *__bind_cpu)
> +static int watchdog(void *unused)
>  {
>  	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
> -	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, (unsigned long)__bind_cpu);
> +	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
>  
>  	sched_setscheduler(current, SCHED_FIFO, &param);
>  
> @@ -328,7 +327,7 @@ static int watchdog(void *__bind_cpu)
>  	/*
>  	 * Run briefly once per second to reset the softlockup timestamp.
>  	 * If this gets delayed for more than 60 seconds then the
> -	 * debug-printout triggers in softlockup_tick().
> +	 * debug-printout triggers in watchdog_timer_fn().
>  	 */
>  	while (!kthread_should_stop()) {
>  		__touch_watchdog();
> -- 
> 1.7.0.1
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:perf/nmi] lockup_detector: Convert per_cpu to __get_cpu_var for readability
  2010-05-17 22:06 ` [PATCH 1/3] lockup_detector: convert per_cpu to __get_cpu_var for readability Don Zickus
  2010-05-19  8:47   ` Frederic Weisbecker
@ 2010-05-19 17:06   ` tip-bot for Don Zickus
  1 sibling, 0 replies; 8+ messages in thread
From: tip-bot for Don Zickus @ 2010-05-19 17:06 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, a.p.zijlstra, gorcunov, fweisbec, tglx,
	dzickus, mingo

Commit-ID:  26e09c6eee14f4827b55137ba0eedc4e77cd50ab
Gitweb:     http://git.kernel.org/tip/26e09c6eee14f4827b55137ba0eedc4e77cd50ab
Author:     Don Zickus <dzickus@redhat.com>
AuthorDate: Mon, 17 May 2010 18:06:04 -0400
Committer:  Frederic Weisbecker <fweisbec@gmail.com>
CommitDate: Wed, 19 May 2010 11:32:14 +0200

lockup_detector: Convert per_cpu to __get_cpu_var for readability

Just a bunch of conversions as suggested by Frederic W.
__get_cpu_var() provides preemption disabled checks.

Plus it gives more readability as it makes it obvious
we are dealing locally now with these vars.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
LKML-Reference: <1274133966-18415-2-git-send-email-dzickus@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/watchdog.c |   35 +++++++++++++++++------------------
 1 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index e53622c..91b0b26 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -115,7 +115,7 @@ static unsigned long get_sample_period(void)
 /* Commands for resetting the watchdog */
 static void __touch_watchdog(void)
 {
-	int this_cpu = raw_smp_processor_id();
+	int this_cpu = smp_processor_id();
 
 	__get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
 }
@@ -157,21 +157,21 @@ void touch_softlockup_watchdog_sync(void)
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 /* watchdog detector functions */
-static int is_hardlockup(int cpu)
+static int is_hardlockup(void)
 {
-	unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
+	unsigned long hrint = __get_cpu_var(hrtimer_interrupts);
 
-	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
+	if (__get_cpu_var(hrtimer_interrupts_saved) == hrint)
 		return 1;
 
-	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
+	__get_cpu_var(hrtimer_interrupts_saved) = hrint;
 	return 0;
 }
 #endif
 
-static int is_softlockup(unsigned long touch_ts, int cpu)
+static int is_softlockup(unsigned long touch_ts)
 {
-	unsigned long now = get_timestamp(cpu);
+	unsigned long now = get_timestamp(smp_processor_id());
 
 	/* Warn about unreasonable delays: */
 	if (time_after(now, touch_ts + softlockup_thresh))
@@ -206,8 +206,6 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi,
 		 struct perf_sample_data *data,
 		 struct pt_regs *regs)
 {
-	int this_cpu = smp_processor_id();
-
 	if (__get_cpu_var(watchdog_nmi_touch) == true) {
 		__get_cpu_var(watchdog_nmi_touch) = false;
 		return;
@@ -219,7 +217,9 @@ void watchdog_overflow_callback(struct perf_event *event, int nmi,
 	 * fired multiple times before we overflow'd.  If it hasn't
 	 * then this is a good indication the cpu is stuck
 	 */
-	if (is_hardlockup(this_cpu)) {
+	if (is_hardlockup()) {
+		int this_cpu = smp_processor_id();
+
 		/* only print hardlockups once */
 		if (__get_cpu_var(hard_watchdog_warn) == true)
 			return;
@@ -247,7 +247,6 @@ static inline void watchdog_interrupt_count(void) { return; }
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
-	int this_cpu = smp_processor_id();
 	unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
@@ -262,12 +261,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
 
 	if (touch_ts == 0) {
-		if (unlikely(per_cpu(softlockup_touch_sync, this_cpu))) {
+		if (unlikely(__get_cpu_var(softlockup_touch_sync))) {
 			/*
 			 * If the time stamp was touched atomically
 			 * make sure the scheduler tick is up to date.
 			 */
-			per_cpu(softlockup_touch_sync, this_cpu) = false;
+			__get_cpu_var(softlockup_touch_sync) = false;
 			sched_clock_tick();
 		}
 		__touch_watchdog();
@@ -280,14 +279,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	 * indicate it is getting cpu time.  If it hasn't then
 	 * this is a good indication some task is hogging the cpu
 	 */
-	duration = is_softlockup(touch_ts, this_cpu);
+	duration = is_softlockup(touch_ts);
 	if (unlikely(duration)) {
 		/* only warn once */
 		if (__get_cpu_var(soft_watchdog_warn) == true)
 			return HRTIMER_RESTART;
 
 		printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
-			this_cpu, duration,
+			smp_processor_id(), duration,
 			current->comm, task_pid_nr(current));
 		print_modules();
 		print_irqtrace_events(current);
@@ -309,10 +308,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 /*
  * The watchdog thread - touches the timestamp.
  */
-static int watchdog(void *__bind_cpu)
+static int watchdog(void *unused)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-	struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, (unsigned long)__bind_cpu);
+	struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
 
 	sched_setscheduler(current, SCHED_FIFO, &param);
 
@@ -328,7 +327,7 @@ static int watchdog(void *__bind_cpu)
 	/*
 	 * Run briefly once per second to reset the softlockup timestamp.
 	 * If this gets delayed for more than 60 seconds then the
-	 * debug-printout triggers in softlockup_tick().
+	 * debug-printout triggers in watchdog_timer_fn().
 	 */
 	while (!kthread_should_stop()) {
 		__touch_watchdog();

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 0/3] lockup detector fixes
@ 2010-09-01  3:00 Don Zickus
  0 siblings, 0 replies; 8+ messages in thread
From: Don Zickus @ 2010-09-01  3:00 UTC (permalink / raw)
  To: mingo; +Cc: peterz, gorcunov, fweisbec, linux-kernel, Don Zickus

A bunch of fixes for the lockup detector.

Tested on intel/amd on top of ingo's tip branch.

Akinobu Mita (2):
  lockup_detector: convert cpu notifier to return encapsulate errno
    value
  lockup_detector: remove unnecessary panic_notifier

Don Zickus (1):
  [lockup detector] sync touch_*_watchdog back to old semantics

 kernel/watchdog.c |   53 +++++++++++++++++++++++------------------------------
 1 files changed, 23 insertions(+), 30 deletions(-)

-- 
1.7.2.2


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2010-09-01  3:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-05-17 22:06 [PATCH 0/3] lockup detector fixes Don Zickus
2010-05-17 22:06 ` [PATCH 1/3] lockup_detector: convert per_cpu to __get_cpu_var for readability Don Zickus
2010-05-19  8:47   ` Frederic Weisbecker
2010-05-19 17:06   ` [tip:perf/nmi] lockup_detector: Convert " tip-bot for Don Zickus
2010-05-17 22:06 ` [PATCH 2/3] [watchdog] separate hardlockup/softlockup enable paths Don Zickus
2010-05-19  8:46   ` Frederic Weisbecker
2010-05-17 22:06 ` [PATCH 3/3] [watchdog] re-introduce support for nmi_watchdog, nosoftlockup Don Zickus
2010-09-01  3:00 [PATCH 0/3] lockup detector fixes Don Zickus

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.