* [RFC PATCH 1/4] watchdog: Remove hardlockup handler references
2016-10-26 16:02 [RFC PATCH 0/4] Clean up watchdog handlers Babu Moger
@ 2016-10-26 16:02 ` Babu Moger
2016-10-26 16:02 ` [RFC PATCH 2/4] watchdog: Move shared definitions to nmi.h Babu Moger
` (4 subsequent siblings)
5 siblings, 0 replies; 11+ messages in thread
From: Babu Moger @ 2016-10-26 16:02 UTC (permalink / raw)
To: mingo, akpm, ak, jkosina, baiyaowei, dzickus, atomlin, uobergfe,
tj, hidehiro.kawai.ez, johunt, davem, sparclinux
Cc: bp, bywxiaobai, cmetcalf, keescook, ebiederm, huawei.libin, ralf,
dvyukov, linux-kernel, sam, babu.moger
Separate hardlockup code from watchdog.c. It is mostly straight forward.
Remove everything inside CONFIG_HARDLOCKUP_DETECTORS. This code will go
to file watchdog_hld.c.
We also define weak handlers watchdog_nmi_enable and watchdog_nmi_disable.
Signed-off-by: Babu Moger <babu.moger@oracle.com>
---
kernel/watchdog.c | 251 ++---------------------------------------------------
1 files changed, 7 insertions(+), 244 deletions(-)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f..a88e179 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -24,7 +24,6 @@
#include <asm/irq_regs.h>
#include <linux/kvm_para.h>
-#include <linux/perf_event.h>
#include <linux/kthread.h>
/*
@@ -100,50 +99,8 @@
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static DEFINE_PER_CPU(bool, hard_watchdog_warn);
-static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
-static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
-#endif
static unsigned long soft_lockup_nmi_warn;
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-unsigned int __read_mostly hardlockup_panic =
- CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
-static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
-
-static int __init hardlockup_panic_setup(char *str)
-{
- if (!strncmp(str, "panic", 5))
- hardlockup_panic = 1;
- else if (!strncmp(str, "nopanic", 7))
- hardlockup_panic = 0;
- else if (!strncmp(str, "0", 1))
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
- else if (!strncmp(str, "1", 1))
- watchdog_enabled |= NMI_WATCHDOG_ENABLED;
- return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
-#endif
-
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
@@ -264,43 +221,12 @@ void touch_all_softlockup_watchdogs(void)
wq_watchdog_touch(-1);
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-void touch_nmi_watchdog(void)
-{
- /*
- * Using __raw here because some code paths have
- * preemption enabled. If preemption is enabled
- * then interrupts should be enabled too, in which
- * case we shouldn't have to worry about the watchdog
- * going off.
- */
- raw_cpu_write(watchdog_nmi_touch, true);
- touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-#endif
-
void touch_softlockup_watchdog_sync(void)
{
__this_cpu_write(softlockup_touch_sync, true);
__this_cpu_write(watchdog_touch_ts, 0);
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-/* watchdog detector functions */
-static bool is_hardlockup(void)
-{
- unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-
- if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
- return true;
-
- __this_cpu_write(hrtimer_interrupts_saved, hrint);
- return false;
-}
-#endif
-
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp();
@@ -313,78 +239,18 @@ static int is_softlockup(unsigned long touch_ts)
return 0;
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-
-static struct perf_event_attr wd_hw_attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
- .size = sizeof(struct perf_event_attr),
- .pinned = 1,
- .disabled = 1,
-};
-
-/* Callback function for perf event subsystem */
-static void watchdog_overflow_callback(struct perf_event *event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
-{
- /* Ensure the watchdog never gets throttled */
- event->hw.interrupts = 0;
-
- if (__this_cpu_read(watchdog_nmi_touch) == true) {
- __this_cpu_write(watchdog_nmi_touch, false);
- return;
- }
-
- /* check for a hardlockup
- * This is done by making sure our timer interrupt
- * is incrementing. The timer interrupt should have
- * fired multiple times before we overflow'd. If it hasn't
- * then this is a good indication the cpu is stuck
- */
- if (is_hardlockup()) {
- int this_cpu = smp_processor_id();
- struct pt_regs *regs = get_irq_regs();
-
- /* only print hardlockups once */
- if (__this_cpu_read(hard_watchdog_warn) == true)
- return;
-
- pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
- print_modules();
- print_irqtrace_events(current);
- if (regs)
- show_regs(regs);
- else
- dump_stack();
-
- /*
- * Perform all-CPU dump only once to avoid multiple hardlockups
- * generating interleaving traces
- */
- if (sysctl_hardlockup_all_cpu_backtrace &&
- !test_and_set_bit(0, &hardlockup_allcpu_dumped))
- trigger_allbutself_cpu_backtrace();
-
- if (hardlockup_panic)
- nmi_panic(regs, "Hard LOCKUP");
-
- __this_cpu_write(hard_watchdog_warn, true);
- return;
- }
-
- __this_cpu_write(hard_watchdog_warn, false);
- return;
-}
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
-
static void watchdog_interrupt_count(void)
{
__this_cpu_inc(hrtimer_interrupts);
}
-static int watchdog_nmi_enable(unsigned int cpu);
-static void watchdog_nmi_disable(unsigned int cpu);
+int __weak watchdog_nmi_enable(unsigned int cpu)
+{
+ return 0;
+}
+void __weak watchdog_nmi_disable(unsigned int cpu)
+{
+}
static int watchdog_enable_all_cpus(void);
static void watchdog_disable_all_cpus(void);
@@ -577,109 +443,6 @@ static void watchdog(unsigned int cpu)
watchdog_nmi_disable(cpu);
}
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-/*
- * People like the simple clean cpu node info on boot.
- * Reduce the watchdog noise by only printing messages
- * that are different from what cpu0 displayed.
- */
-static unsigned long cpu0_err;
-
-static int watchdog_nmi_enable(unsigned int cpu)
-{
- struct perf_event_attr *wd_attr;
- struct perf_event *event = per_cpu(watchdog_ev, cpu);
-
- /* nothing to do if the hard lockup detector is disabled */
- if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
- goto out;
-
- /* is it already setup and enabled? */
- if (event && event->state > PERF_EVENT_STATE_OFF)
- goto out;
-
- /* it is setup but not enabled */
- if (event != NULL)
- goto out_enable;
-
- wd_attr = &wd_hw_attr;
- wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
-
- /* Try to register using hardware perf events */
- event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
-
- /* save cpu0 error for future comparision */
- if (cpu == 0 && IS_ERR(event))
- cpu0_err = PTR_ERR(event);
-
- if (!IS_ERR(event)) {
- /* only print for cpu0 or different than cpu0 */
- if (cpu == 0 || cpu0_err)
- pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
- goto out_save;
- }
-
- /*
- * Disable the hard lockup detector if _any_ CPU fails to set up
- * set up the hardware perf event. The watchdog() function checks
- * the NMI_WATCHDOG_ENABLED bit periodically.
- *
- * The barriers are for syncing up watchdog_enabled across all the
- * cpus, as clear_bit() does not use barriers.
- */
- smp_mb__before_atomic();
- clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
- smp_mb__after_atomic();
-
- /* skip displaying the same error again */
- if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
- return PTR_ERR(event);
-
- /* vary the KERN level based on the returned errno */
- if (PTR_ERR(event) == -EOPNOTSUPP)
- pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
- else if (PTR_ERR(event) == -ENOENT)
- pr_warn("disabled (cpu%i): hardware events not enabled\n",
- cpu);
- else
- pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
- cpu, PTR_ERR(event));
-
- pr_info("Shutting down hard lockup detector on all cpus\n");
-
- return PTR_ERR(event);
-
- /* success path */
-out_save:
- per_cpu(watchdog_ev, cpu) = event;
-out_enable:
- perf_event_enable(per_cpu(watchdog_ev, cpu));
-out:
- return 0;
-}
-
-static void watchdog_nmi_disable(unsigned int cpu)
-{
- struct perf_event *event = per_cpu(watchdog_ev, cpu);
-
- if (event) {
- perf_event_disable(event);
- per_cpu(watchdog_ev, cpu) = NULL;
-
- /* should be in cleanup, but blocks oprofile */
- perf_event_release_kernel(event);
- }
- if (cpu == 0) {
- /* watchdog_nmi_enable() expects this to be zero initially. */
- cpu0_err = 0;
- }
-}
-
-#else
-static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
-static void watchdog_nmi_disable(unsigned int cpu) { return; }
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
-
static struct smp_hotplug_thread watchdog_threads = {
.store = &softlockup_watchdog,
.thread_should_run = watchdog_should_run,
--
1.7.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC PATCH 2/4] watchdog: Move shared definitions to nmi.h
2016-10-26 16:02 [RFC PATCH 0/4] Clean up watchdog handlers Babu Moger
2016-10-26 16:02 ` [RFC PATCH 1/4] watchdog: Remove hardlockup handler references Babu Moger
@ 2016-10-26 16:02 ` Babu Moger
2016-10-26 16:02 ` [RFC PATCH 3/4] watchdog: Move hardlockup detector to separate file Babu Moger
` (3 subsequent siblings)
5 siblings, 0 replies; 11+ messages in thread
From: Babu Moger @ 2016-10-26 16:02 UTC (permalink / raw)
To: mingo, akpm, ak, jkosina, baiyaowei, dzickus, atomlin, uobergfe,
tj, hidehiro.kawai.ez, johunt, davem, sparclinux
Cc: bp, bywxiaobai, cmetcalf, keescook, ebiederm, huawei.libin, ralf,
dvyukov, linux-kernel, sam, babu.moger
Move shared macros and definitions to nmi.h so that watchdog.c,
watchdog_hld.c or any other architecture specific handler can use
those definitions.
Signed-off-by: Babu Moger <babu.moger@oracle.com>
---
include/linux/nmi.h | 19 +++++++++++++++++++
kernel/watchdog.c | 25 ++++---------------------
2 files changed, 23 insertions(+), 21 deletions(-)
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a78c35c..0ea0a38 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -7,6 +7,23 @@
#include <linux/sched.h>
#include <asm/irq.h>
+/*
+ * The run state of the lockup detectors is controlled by the content of the
+ * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
+ * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
+ *
+ * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
+ * are variables that are only used as an 'interface' between the parameters
+ * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
+ * 'watchdog_thresh' variable is handled differently because its value is not
+ * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
+ * is equal zero.
+ */
+#define NMI_WATCHDOG_ENABLED_BIT 0
+#define SOFT_WATCHDOG_ENABLED_BIT 1
+#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
+#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
+
/**
* touch_nmi_watchdog - restart NMI watchdog timeout.
*
@@ -91,6 +108,8 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
extern int soft_watchdog_enabled;
extern int watchdog_user_enabled;
extern int watchdog_thresh;
+extern unsigned long watchdog_enabled;
+extern DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
extern unsigned long *watchdog_cpumask_bits;
extern int sysctl_softlockup_all_cpu_backtrace;
extern int sysctl_hardlockup_all_cpu_backtrace;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index a88e179..4ea7752 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -26,29 +26,12 @@
#include <linux/kvm_para.h>
#include <linux/kthread.h>
-/*
- * The run state of the lockup detectors is controlled by the content of the
- * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
- * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
- *
- * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
- * are variables that are only used as an 'interface' between the parameters
- * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
- * 'watchdog_thresh' variable is handled differently because its value is not
- * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
- * is equal zero.
- */
-#define NMI_WATCHDOG_ENABLED_BIT 0
-#define SOFT_WATCHDOG_ENABLED_BIT 1
-#define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT)
-#define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT)
-
static DEFINE_MUTEX(watchdog_proc_mutex);
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
#else
-static unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
#endif
int __read_mostly nmi_watchdog_enabled;
int __read_mostly soft_watchdog_enabled;
@@ -96,7 +79,7 @@
static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
static DEFINE_PER_CPU(bool, softlockup_touch_sync);
static DEFINE_PER_CPU(bool, soft_watchdog_warn);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
+DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt);
static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
static unsigned long soft_lockup_nmi_warn;
--
1.7.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC PATCH 3/4] watchdog: Move hardlockup detector to separate file
2016-10-26 16:02 [RFC PATCH 0/4] Clean up watchdog handlers Babu Moger
2016-10-26 16:02 ` [RFC PATCH 1/4] watchdog: Remove hardlockup handler references Babu Moger
2016-10-26 16:02 ` [RFC PATCH 2/4] watchdog: Move shared definitions to nmi.h Babu Moger
@ 2016-10-26 16:02 ` Babu Moger
2016-10-26 16:02 ` [RFC PATCH 4/4] sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable Babu Moger
` (2 subsequent siblings)
5 siblings, 0 replies; 11+ messages in thread
From: Babu Moger @ 2016-10-26 16:02 UTC (permalink / raw)
To: mingo, akpm, ak, jkosina, baiyaowei, dzickus, atomlin, uobergfe,
tj, hidehiro.kawai.ez, johunt, davem, sparclinux
Cc: bp, bywxiaobai, cmetcalf, keescook, ebiederm, huawei.libin, ralf,
dvyukov, linux-kernel, sam, babu.moger
Move hardlockup detector code to watchdog_hld.c.
Also update the makefile accordigly.
Signed-off-by: Babu Moger <babu.moger@oracle.com>
---
kernel/Makefile | 1 +
kernel/watchdog_hld.c | 238 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 239 insertions(+), 0 deletions(-)
create mode 100644 kernel/watchdog_hld.c
diff --git a/kernel/Makefile b/kernel/Makefile
index eb26e12..314e7d6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -84,6 +84,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
new file mode 100644
index 0000000..cd690fb
--- /dev/null
+++ b/kernel/watchdog_hld.c
@@ -0,0 +1,238 @@
+/*
+ * Detect hard and soft lockups on a system
+ *
+ * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Note: Most of this code is borrowed heavily from the original softlockup
+ * detector, so thanks to Ingo for the initial implementation.
+ * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
+ * to those contributors as well.
+ */
+
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <asm/irq_regs.h>
+#include <linux/perf_event.h>
+
+static DEFINE_PER_CPU(bool, hard_watchdog_warn);
+static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
+
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+unsigned int __read_mostly hardlockup_panic =
+ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+static unsigned long hardlockup_allcpu_dumped;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void hardlockup_detector_disable(void)
+{
+ watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+}
+
+static int __init hardlockup_panic_setup(char *str)
+{
+ if (!strncmp(str, "panic", 5))
+ hardlockup_panic = 1;
+ else if (!strncmp(str, "nopanic", 7))
+ hardlockup_panic = 0;
+ else if (!strncmp(str, "0", 1))
+ watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+ else if (!strncmp(str, "1", 1))
+ watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+ return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+
+void touch_nmi_watchdog(void)
+{
+ /*
+ * Using __raw here because some code paths have
+ * preemption enabled. If preemption is enabled
+ * then interrupts should be enabled too, in which
+ * case we shouldn't have to worry about the watchdog
+ * going off.
+ */
+ raw_cpu_write(watchdog_nmi_touch, true);
+ touch_softlockup_watchdog();
+}
+EXPORT_SYMBOL(touch_nmi_watchdog);
+
+/* watchdog detector functions */
+static bool is_hardlockup(void)
+{
+ unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
+
+ if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+ return true;
+
+ __this_cpu_write(hrtimer_interrupts_saved, hrint);
+ return false;
+}
+
+static struct perf_event_attr wd_hw_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 1,
+};
+
+/* Callback function for perf event subsystem */
+static void watchdog_overflow_callback(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ /* Ensure the watchdog never gets throttled */
+ event->hw.interrupts = 0;
+
+ if (__this_cpu_read(watchdog_nmi_touch) == true) {
+ __this_cpu_write(watchdog_nmi_touch, false);
+ return;
+ }
+
+ /* check for a hardlockup
+ * This is done by making sure our timer interrupt
+ * is incrementing. The timer interrupt should have
+ * fired multiple times before we overflow'd. If it hasn't
+ * then this is a good indication the cpu is stuck
+ */
+ if (is_hardlockup()) {
+ int this_cpu = smp_processor_id();
+ struct pt_regs *regs = get_irq_regs();
+
+ /* only print hardlockups once */
+ if (__this_cpu_read(hard_watchdog_warn) == true)
+ return;
+
+ pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ print_modules();
+ print_irqtrace_events(current);
+ if (regs)
+ show_regs(regs);
+ else
+ dump_stack();
+
+ /*
+ * Perform all-CPU dump only once to avoid multiple hardlockups
+ * generating interleaving traces
+ */
+ if (sysctl_hardlockup_all_cpu_backtrace &&
+ !test_and_set_bit(0, &hardlockup_allcpu_dumped))
+ trigger_allbutself_cpu_backtrace();
+
+ if (hardlockup_panic)
+ nmi_panic(regs, "Hard LOCKUP");
+
+ __this_cpu_write(hard_watchdog_warn, true);
+ return;
+ }
+
+ __this_cpu_write(hard_watchdog_warn, false);
+}
+
+/*
+ * People like the simple clean cpu node info on boot.
+ * Reduce the watchdog noise by only printing messages
+ * that are different from what cpu0 displayed.
+ */
+static unsigned long cpu0_err;
+
+int watchdog_nmi_enable(unsigned int cpu)
+{
+ struct perf_event_attr *wd_attr;
+ struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+ /* nothing to do if the hard lockup detector is disabled */
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ goto out;
+
+ /* is it already setup and enabled? */
+ if (event && event->state > PERF_EVENT_STATE_OFF)
+ goto out;
+
+ /* it is setup but not enabled */
+ if (event != NULL)
+ goto out_enable;
+
+ wd_attr = &wd_hw_attr;
+ wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
+
+ /* Try to register using hardware perf events */
+ event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
+
+ /* save cpu0 error for future comparision */
+ if (cpu == 0 && IS_ERR(event))
+ cpu0_err = PTR_ERR(event);
+
+ if (!IS_ERR(event)) {
+ /* only print for cpu0 or different than cpu0 */
+ if (cpu == 0 || cpu0_err)
+ pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
+ goto out_save;
+ }
+
+ /*
+ * Disable the hard lockup detector if _any_ CPU fails to set up
+ * set up the hardware perf event. The watchdog() function checks
+ * the NMI_WATCHDOG_ENABLED bit periodically.
+ *
+ * The barriers are for syncing up watchdog_enabled across all the
+ * cpus, as clear_bit() does not use barriers.
+ */
+ smp_mb__before_atomic();
+ clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
+ smp_mb__after_atomic();
+
+ /* skip displaying the same error again */
+ if (cpu > 0 && (PTR_ERR(event) == cpu0_err))
+ return PTR_ERR(event);
+
+ /* vary the KERN level based on the returned errno */
+ if (PTR_ERR(event) == -EOPNOTSUPP)
+ pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
+ else if (PTR_ERR(event) == -ENOENT)
+ pr_warn("disabled (cpu%i): hardware events not enabled\n",
+ cpu);
+ else
+ pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
+ cpu, PTR_ERR(event));
+
+ pr_info("Shutting down hard lockup detector on all cpus\n");
+
+ return PTR_ERR(event);
+
+ /* success path */
+out_save:
+ per_cpu(watchdog_ev, cpu) = event;
+out_enable:
+ perf_event_enable(per_cpu(watchdog_ev, cpu));
+out:
+ return 0;
+}
+
+void watchdog_nmi_disable(unsigned int cpu)
+{
+ struct perf_event *event = per_cpu(watchdog_ev, cpu);
+
+ if (event) {
+ perf_event_disable(event);
+ per_cpu(watchdog_ev, cpu) = NULL;
+
+ /* should be in cleanup, but blocks oprofile */
+ perf_event_release_kernel(event);
+ }
+ if (cpu == 0) {
+ /* watchdog_nmi_enable() expects this to be zero initially. */
+ cpu0_err = 0;
+ }
+}
--
1.7.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [RFC PATCH 4/4] sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable
2016-10-26 16:02 [RFC PATCH 0/4] Clean up watchdog handlers Babu Moger
` (2 preceding siblings ...)
2016-10-26 16:02 ` [RFC PATCH 3/4] watchdog: Move hardlockup detector to separate file Babu Moger
@ 2016-10-26 16:02 ` Babu Moger
2016-10-27 14:13 ` [RFC PATCH 0/4] Clean up watchdog handlers Don Zickus
2016-10-31 21:00 ` Don Zickus
5 siblings, 0 replies; 11+ messages in thread
From: Babu Moger @ 2016-10-26 16:02 UTC (permalink / raw)
To: mingo, akpm, ak, jkosina, baiyaowei, dzickus, atomlin, uobergfe,
tj, hidehiro.kawai.ez, johunt, davem, sparclinux
Cc: bp, bywxiaobai, cmetcalf, keescook, ebiederm, huawei.libin, ralf,
dvyukov, linux-kernel, sam, babu.moger
Implement functions watchdog_nmi_enable and watchdog_nmi_disable
to enable/disable nmi watchdog. Sparc uses arch specific nmi watchdog
handler. Currently, we do not have a way to enable/disable nmi watchdog
dynamically. With these patches we can enable or disable arch
specific nmi watchdogs using proc or sysctl interface.
Example commands.
To enable: echo 1 > /proc/sys/kernel/nmi_watchdog
To disable: echo 0 > /proc/sys/kernel/nmi_watchdog
It can also achieved using the sysctl parameter kernel.nmi_watchdog
Signed-off-by: Babu Moger <babu.moger@oracle.com>
---
arch/sparc/kernel/nmi.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 43 insertions(+), 1 deletions(-)
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index a9973bb..95e73c6 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -42,7 +42,7 @@
*/
atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
EXPORT_SYMBOL(nmi_active);
-
+static int nmi_init_done;
static unsigned int nmi_hz = HZ;
static DEFINE_PER_CPU(short, wd_enabled);
static int endflag __initdata;
@@ -153,6 +153,8 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)
void stop_nmi_watchdog(void *unused)
{
+ if (!__this_cpu_read(wd_enabled))
+ return;
pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
__this_cpu_write(wd_enabled, 0);
atomic_dec(&nmi_active);
@@ -207,6 +209,9 @@ static int __init check_nmi_watchdog(void)
void start_nmi_watchdog(void *unused)
{
+ if (__this_cpu_read(wd_enabled))
+ return;
+
__this_cpu_write(wd_enabled, 1);
atomic_inc(&nmi_active);
@@ -259,6 +264,8 @@ int __init nmi_init(void)
}
}
+ nmi_init_done = 1;
+
return err;
}
@@ -270,3 +277,38 @@ static int __init setup_nmi_watchdog(char *str)
return 0;
}
__setup("nmi_watchdog=", setup_nmi_watchdog);
+
+/*
+ * sparc specific NMI watchdog enable function.
+ * Enables watchdog if it is not enabled already.
+ */
+int watchdog_nmi_enable(unsigned int cpu)
+{
+ if (atomic_read(&nmi_active) == -1) {
+ pr_warn("NMI watchdog cannot be enabled or disabled\n");
+ return -1;
+ }
+
+ /*
+ * watchdog thread could start even before nmi_init is called.
+ * Just Return in that case. Let nmi_init finish the init
+ * process first.
+ */
+ if (!nmi_init_done)
+ return 0;
+
+ smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
+
+ return 0;
+}
+/*
+ * sparc specific NMI watchdog disable function.
+ * Disables watchdog if it is not disabled already.
+ */
+void watchdog_nmi_disable(unsigned int cpu)
+{
+ if (atomic_read(&nmi_active) == -1)
+ pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
+ else
+ smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1);
+}
--
1.7.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [RFC PATCH 0/4] Clean up watchdog handlers
2016-10-26 16:02 [RFC PATCH 0/4] Clean up watchdog handlers Babu Moger
` (3 preceding siblings ...)
2016-10-26 16:02 ` [RFC PATCH 4/4] sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable Babu Moger
@ 2016-10-27 14:13 ` Don Zickus
2016-10-31 21:00 ` Don Zickus
5 siblings, 0 replies; 11+ messages in thread
From: Don Zickus @ 2016-10-27 14:13 UTC (permalink / raw)
To: Babu Moger
Cc: mingo, akpm, ak, jkosina, baiyaowei, atomlin, uobergfe, tj,
hidehiro.kawai.ez, johunt, davem, sparclinux, bp, bywxiaobai,
cmetcalf, keescook, ebiederm, huawei.libin, ralf, dvyukov,
linux-kernel, sam
On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote:
> This is an attempt to cleanup watchdog handlers. Right now,
> kernel/watchdog.c implements both softlockup and hardlockup detectors.
> Softlockup code is generic. Hardlockup code is arch specific. Some
> architectures don't use hardlockup detectors. They use their own watchdog
> detectors. To make both these combination work, we have numerous #ifdefs
> in kernel/watchdog.c.
>
> We are trying here to make these handlers independent of each other.
> Also provide an interface for architectures to implement their own
> handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
> as weak such that architectures can override its definitions.
Thanks for the patches Babu. I will try to get to them today or tomorrow.
Cheers,
Don
>
> Thanks to Don Zickus for his suggestions.
> Here is the previous discussion
> http://www.spinics.net/lists/sparclinux/msg16441.html
>
> Babu Moger (4):
> watchdog: Remove hardlockup handler references
> watchdog: Move shared definitions to nmi.h
> watchdog: Move hardlockup detector in separate file
> sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable
>
> arch/sparc/kernel/nmi.c | 44 ++++++++-
> include/linux/nmi.h | 19 ++++
> kernel/Makefile | 1 +
> kernel/watchdog.c | 276 ++---------------------------------------------
> kernel/watchdog_hld.c | 238 ++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 312 insertions(+), 266 deletions(-)
> create mode 100644 kernel/watchdog_hld.c
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH 0/4] Clean up watchdog handlers
2016-10-26 16:02 [RFC PATCH 0/4] Clean up watchdog handlers Babu Moger
` (4 preceding siblings ...)
2016-10-27 14:13 ` [RFC PATCH 0/4] Clean up watchdog handlers Don Zickus
@ 2016-10-31 21:00 ` Don Zickus
2016-10-31 21:26 ` Babu Moger
2016-10-31 21:30 ` Babu Moger
5 siblings, 2 replies; 11+ messages in thread
From: Don Zickus @ 2016-10-31 21:00 UTC (permalink / raw)
To: Babu Moger
Cc: mingo, akpm, ak, jkosina, baiyaowei, atomlin, uobergfe, tj,
hidehiro.kawai.ez, johunt, davem, sparclinux, bp, bywxiaobai,
cmetcalf, keescook, ebiederm, huawei.libin, ralf, dvyukov,
linux-kernel, sam
On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote:
> This is an attempt to cleanup watchdog handlers. Right now,
> kernel/watchdog.c implements both softlockup and hardlockup detectors.
> Softlockup code is generic. Hardlockup code is arch specific. Some
> architectures don't use hardlockup detectors. They use their own watchdog
> detectors. To make both these combination work, we have numerous #ifdefs
> in kernel/watchdog.c.
>
> We are trying here to make these handlers independent of each other.
> Also provide an interface for architectures to implement their own
> handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
> as weak such that architectures can override its definitions.
>
> Thanks to Don Zickus for his suggestions.
> Here is the previous discussion
> http://www.spinics.net/lists/sparclinux/msg16441.html
Hi Babu,
I finally got some cycles to poke at this today. Good work. A couple of
suggestions. For bisectability, I am thinking patch2 should be first and
patch1 and patch3 should be combined. Also watchdog_hld.c is going to need
up top:
#define pr_fmt(fmt) "NMI watchdog: " fmt
otherwise the error messages miss the header.
Though I don't think watchdog.c and watchdog_hld.c should have the same
header. A good solution isn't coming to me right now. I will try to run
some tests on this tomorrow.
Cheers,
Don
>
> Babu Moger (4):
> watchdog: Remove hardlockup handler references
> watchdog: Move shared definitions to nmi.h
> watchdog: Move hardlockup detector in separate file
> sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable
>
> arch/sparc/kernel/nmi.c | 44 ++++++++-
> include/linux/nmi.h | 19 ++++
> kernel/Makefile | 1 +
> kernel/watchdog.c | 276 ++---------------------------------------------
> kernel/watchdog_hld.c | 238 ++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 312 insertions(+), 266 deletions(-)
> create mode 100644 kernel/watchdog_hld.c
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH 0/4] Clean up watchdog handlers
2016-10-31 21:00 ` Don Zickus
@ 2016-10-31 21:26 ` Babu Moger
2016-10-31 21:30 ` Babu Moger
1 sibling, 0 replies; 11+ messages in thread
From: Babu Moger @ 2016-10-31 21:26 UTC (permalink / raw)
To: Don Zickus
Cc: mingo, akpm, ak, jkosina, baiyaowei, atomlin, uobergfe, tj,
hidehiro.kawai.ez, johunt, davem, sparclinux, bp, bywxiaobai,
cmetcalf, keescook, ebiederm, huawei.libin, ralf, dvyukov,
linux-kernel, sam
On 10/31/2016 4:00 PM, Don Zickus wrote:
> On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote:
>> This is an attempt to cleanup watchdog handlers. Right now,
>> kernel/watchdog.c implements both softlockup and hardlockup detectors.
>> Softlockup code is generic. Hardlockup code is arch specific. Some
>> architectures don't use hardlockup detectors. They use their own watchdog
>> detectors. To make both these combination work, we have numerous #ifdefs
>> in kernel/watchdog.c.
>>
>> We are trying here to make these handlers independent of each other.
>> Also provide an interface for architectures to implement their own
>> handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
>> as weak such that architectures can override its definitions.
>>
>> Thanks to Don Zickus for his suggestions.
>> Here is the previous discussion
>> http://www.spinics.net/lists/sparclinux/msg16441.html
> Hi Babu,
>
> I finally got some cycles to poke at this today. Good work. A couple of
> suggestions. For bisectability, I am thinking patch2 should be first and
> patch1 and patch3 should be combined. Also watchdog_hld.c is going to need
> up top:
>
> #define pr_fmt(fmt) "NMI watchdog: " fmt
>
> otherwise the error messages miss the header.
>
> Though I don't think watchdog.c and watchdog_hld.c should have the same
> header. A good solution isn't coming to me right now. I will try to run
> some tests on this tomorrow.
Don, Thanks for the feedback. Let me know if you into issues with
your tests. I will start working on the
review comments.
>
>
> Cheers,
> Don
>
>> Babu Moger (4):
>> watchdog: Remove hardlockup handler references
>> watchdog: Move shared definitions to nmi.h
>> watchdog: Move hardlockup detector in separate file
>> sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable
>>
>> arch/sparc/kernel/nmi.c | 44 ++++++++-
>> include/linux/nmi.h | 19 ++++
>> kernel/Makefile | 1 +
>> kernel/watchdog.c | 276 ++---------------------------------------------
>> kernel/watchdog_hld.c | 238 ++++++++++++++++++++++++++++++++++++++++
>> 5 files changed, 312 insertions(+), 266 deletions(-)
>> create mode 100644 kernel/watchdog_hld.c
>>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH 0/4] Clean up watchdog handlers
2016-10-31 21:00 ` Don Zickus
2016-10-31 21:26 ` Babu Moger
@ 2016-10-31 21:30 ` Babu Moger
2016-11-01 13:20 ` Don Zickus
1 sibling, 1 reply; 11+ messages in thread
From: Babu Moger @ 2016-10-31 21:30 UTC (permalink / raw)
To: Don Zickus
Cc: mingo, akpm, ak, jkosina, baiyaowei, atomlin, uobergfe, tj,
hidehiro.kawai.ez, johunt, davem, sparclinux, bp, bywxiaobai,
cmetcalf, keescook, ebiederm, huawei.libin, ralf, dvyukov,
linux-kernel, sam
On 10/31/2016 4:00 PM, Don Zickus wrote:
> On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote:
>> This is an attempt to cleanup watchdog handlers. Right now,
>> kernel/watchdog.c implements both softlockup and hardlockup detectors.
>> Softlockup code is generic. Hardlockup code is arch specific. Some
>> architectures don't use hardlockup detectors. They use their own watchdog
>> detectors. To make both these combination work, we have numerous #ifdefs
>> in kernel/watchdog.c.
>>
>> We are trying here to make these handlers independent of each other.
>> Also provide an interface for architectures to implement their own
>> handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
>> as weak such that architectures can override its definitions.
>>
>> Thanks to Don Zickus for his suggestions.
>> Here is the previous discussion
>> http://www.spinics.net/lists/sparclinux/msg16441.html
> Hi Babu,
>
> I finally got some cycles to poke at this today. Good work. A couple of
> suggestions. For bisectability, I am thinking patch2 should be first and
> patch1 and patch3 should be combined. Also watchdog_hld.c is going to need
> up top:
>
> #define pr_fmt(fmt) "NMI watchdog: " fmt
>
> otherwise the error messages miss the header.
>
> Though I don't think watchdog.c and watchdog_hld.c should have the same
> header. A good solution isn't coming to me right now. I will try to run
> some tests on this tomorrow.
Don, Thanks for the feedback. Let me know if you run into problems with
your tests.
I will start working on the comments.
Thanks
Babu
>
> Cheers,
> Don
>
>> Babu Moger (4):
>> watchdog: Remove hardlockup handler references
>> watchdog: Move shared definitions to nmi.h
>> watchdog: Move hardlockup detector in separate file
>> sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable
>>
>> arch/sparc/kernel/nmi.c | 44 ++++++++-
>> include/linux/nmi.h | 19 ++++
>> kernel/Makefile | 1 +
>> kernel/watchdog.c | 276 ++---------------------------------------------
>> kernel/watchdog_hld.c | 238 ++++++++++++++++++++++++++++++++++++++++
>> 5 files changed, 312 insertions(+), 266 deletions(-)
>> create mode 100644 kernel/watchdog_hld.c
>>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH 0/4] Clean up watchdog handlers
2016-10-31 21:30 ` Babu Moger
@ 2016-11-01 13:20 ` Don Zickus
2016-11-01 14:58 ` Babu Moger
0 siblings, 1 reply; 11+ messages in thread
From: Don Zickus @ 2016-11-01 13:20 UTC (permalink / raw)
To: Babu Moger
Cc: mingo, akpm, ak, jkosina, baiyaowei, atomlin, uobergfe, tj,
hidehiro.kawai.ez, johunt, davem, sparclinux, bp, bywxiaobai,
cmetcalf, keescook, ebiederm, huawei.libin, ralf, dvyukov,
linux-kernel, sam
On Mon, Oct 31, 2016 at 04:30:59PM -0500, Babu Moger wrote:
>
> On 10/31/2016 4:00 PM, Don Zickus wrote:
> >On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote:
> >>This is an attempt to cleanup watchdog handlers. Right now,
> >>kernel/watchdog.c implements both softlockup and hardlockup detectors.
> >>Softlockup code is generic. Hardlockup code is arch specific. Some
> >>architectures don't use hardlockup detectors. They use their own watchdog
> >>detectors. To make both these combination work, we have numerous #ifdefs
> >>in kernel/watchdog.c.
> >>
> >>We are trying here to make these handlers independent of each other.
> >>Also provide an interface for architectures to implement their own
> >>handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
> >>as weak such that architectures can override its definitions.
> >>
> >>Thanks to Don Zickus for his suggestions.
> >>Here is the previous discussion
> >>http://www.spinics.net/lists/sparclinux/msg16441.html
> >Hi Babu,
> >
> >I finally got some cycles to poke at this today. Good work. A couple of
> >suggestions. For bisectability, I am thinking patch2 should be first and
> >patch1 and patch3 should be combined. Also watchdog_hld.c is going to need
> >up top:
> >
> >#define pr_fmt(fmt) "NMI watchdog: " fmt
> >
> >otherwise the error messages miss the header.
> >
> >Though I don't think watchdog.c and watchdog_hld.c should have the same
> >header. A good solution isn't coming to me right now. I will try to run
> >some tests on this tomorrow.
> Don, Thanks for the feedback. Let me know if you run into problems with your
> tests.
Hi Babu,
My tests passed. I just have to tweak the expected output lines as they
constantly change. :-(
I am going to play with different config options to see if things break from
a compile perspective.
> I will start working on the comments.
Great.
Cheers,
Don
> Thanks
> Babu
>
> >
> >Cheers,
> >Don
> >
> >>Babu Moger (4):
> >> watchdog: Remove hardlockup handler references
> >> watchdog: Move shared definitions to nmi.h
> >> watchdog: Move hardlockup detector in separate file
> >> sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable
> >>
> >> arch/sparc/kernel/nmi.c | 44 ++++++++-
> >> include/linux/nmi.h | 19 ++++
> >> kernel/Makefile | 1 +
> >> kernel/watchdog.c | 276 ++---------------------------------------------
> >> kernel/watchdog_hld.c | 238 ++++++++++++++++++++++++++++++++++++++++
> >> 5 files changed, 312 insertions(+), 266 deletions(-)
> >> create mode 100644 kernel/watchdog_hld.c
> >>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [RFC PATCH 0/4] Clean up watchdog handlers
2016-11-01 13:20 ` Don Zickus
@ 2016-11-01 14:58 ` Babu Moger
0 siblings, 0 replies; 11+ messages in thread
From: Babu Moger @ 2016-11-01 14:58 UTC (permalink / raw)
To: Don Zickus
Cc: mingo, akpm, ak, jkosina, baiyaowei, atomlin, uobergfe, tj,
hidehiro.kawai.ez, johunt, davem, sparclinux, bp, bywxiaobai,
cmetcalf, keescook, ebiederm, huawei.libin, ralf, dvyukov,
linux-kernel, sam
On 11/1/2016 8:20 AM, Don Zickus wrote:
> On Mon, Oct 31, 2016 at 04:30:59PM -0500, Babu Moger wrote:
>> On 10/31/2016 4:00 PM, Don Zickus wrote:
>>> On Wed, Oct 26, 2016 at 09:02:19AM -0700, Babu Moger wrote:
>>>> This is an attempt to cleanup watchdog handlers. Right now,
>>>> kernel/watchdog.c implements both softlockup and hardlockup detectors.
>>>> Softlockup code is generic. Hardlockup code is arch specific. Some
>>>> architectures don't use hardlockup detectors. They use their own watchdog
>>>> detectors. To make both these combination work, we have numerous #ifdefs
>>>> in kernel/watchdog.c.
>>>>
>>>> We are trying here to make these handlers independent of each other.
>>>> Also provide an interface for architectures to implement their own
>>>> handlers. watchdog_nmi_enable and watchdog_nmi_disable will be defined
>>>> as weak such that architectures can override its definitions.
>>>>
>>>> Thanks to Don Zickus for his suggestions.
>>>> Here is the previous discussion
>>>> http://www.spinics.net/lists/sparclinux/msg16441.html
>>> Hi Babu,
>>>
>>> I finally got some cycles to poke at this today. Good work. A couple of
>>> suggestions. For bisectability, I am thinking patch2 should be first and
>>> patch1 and patch3 should be combined. Also watchdog_hld.c is going to need
>>> up top:
>>>
>>> #define pr_fmt(fmt) "NMI watchdog: " fmt
>>>
>>> otherwise the error messages miss the header.
>>>
>>> Though I don't think watchdog.c and watchdog_hld.c should have the same
>>> header. A good solution isn't coming to me right now. I will try to run
>>> some tests on this tomorrow.
>> Don, Thanks for the feedback. Let me know if you run into problems with your
>> tests.
> Hi Babu,
>
> My tests passed. I just have to tweak the expected output lines as they
> constantly change. :-(
>
> I am going to play with different config options to see if things break from
> a compile perspective.
Don, Great. Thanks for the update. I had couple of compilation issues
with different config options.
1. drivers/edac/edac_device.o:(.discard+0x0): multiple definition of
`__pcpu_unique_hrtimer_interrupts'
drivers/edac/edac_mc.o:(.discard+0x0): first defined here
This was a problem with uni processor config. Thinking of moving the
definition of hrtimer_interrupts and is_hardlockup
into watchdog.c as softlockup code does most of the work here.
2. kernel/built-in.o: In function `watchdog_overflow_callback':
>> watchdog_hld.c:(.text+0x56940): undefined reference to
`sysctl_hardlockup_all_cpu_backtrace'
Moved this definition to nmi.h.
Will post the v2 version soon with all the comments included.
Thanks
Babu
>
>> I will start working on the comments.
> Great.
>
> Cheers,
> Don
>
>> Thanks
>> Babu
>>
>>> Cheers,
>>> Don
>>>
>>>> Babu Moger (4):
>>>> watchdog: Remove hardlockup handler references
>>>> watchdog: Move shared definitions to nmi.h
>>>> watchdog: Move hardlockup detector in separate file
>>>> sparc: Implement watchdog_nmi_enable and watchdog_nmi_disable
>>>>
>>>> arch/sparc/kernel/nmi.c | 44 ++++++++-
>>>> include/linux/nmi.h | 19 ++++
>>>> kernel/Makefile | 1 +
>>>> kernel/watchdog.c | 276 ++---------------------------------------------
>>>> kernel/watchdog_hld.c | 238 ++++++++++++++++++++++++++++++++++++++++
>>>> 5 files changed, 312 insertions(+), 266 deletions(-)
>>>> create mode 100644 kernel/watchdog_hld.c
>>>>
^ permalink raw reply [flat|nested] 11+ messages in thread