All of lore.kernel.org
 help / color / mirror / Atom feed
* [Xenomai-core] [PATCH 1/3] Update NMI watchdog for latest Intel CPUs
  2008-12-19  8:44 [Xenomai-core] [PATCH 0/3] NMI watchdog fixes / enhancements Jan Kiszka
  2008-12-19  8:44 ` [Xenomai-core] [PATCH 2/3] NMI watchdog support for x86-64 Jan Kiszka
  2008-12-19  8:44 ` [Xenomai-core] [PATCH 3/3] Rework x86 NMI watchdog pass-through Jan Kiszka
@ 2008-12-19  8:44 ` Jan Kiszka
  2008-12-20 16:35 ` [Xenomai-core] [PATCH 0/3] NMI watchdog fixes / enhancements Gilles Chanteperdrix
  3 siblings, 0 replies; 10+ messages in thread
From: Jan Kiszka @ 2008-12-19  8:44 UTC (permalink / raw)
  To: xenomai

Add performance-counter NMI watchdog support for recent Intel CPUs. This
should also fix potential overrun (corner) cases for P6-type CPUs as the
current code incorrectly assumes that more than 31 bits are available
as watchdog delay counter.

Refactor some dispatching paths at this chance.

Signed-off-by: Jan Kiszka <jan.kiszka@domain.hid>
---

 include/asm-x86/bits/timer.h |    2 +-
 ksrc/arch/x86/nmi_32.c       |  128 ++++++++++++++++++++++++++----------------
 2 files changed, 81 insertions(+), 49 deletions(-)

diff --git a/include/asm-x86/bits/timer.h b/include/asm-x86/bits/timer.h
index b742763..d957c54 100644
--- a/include/asm-x86/bits/timer.h
+++ b/include/asm-x86/bits/timer.h
@@ -37,7 +37,7 @@ static inline void xnarch_program_timer_shot(unsigned long delay)
 #ifdef CONFIG_XENO_HW_NMI_DEBUG_LATENCY
 	{
 		extern unsigned long rthal_maxlat_tsc;
-		if (delay <= (ULONG_MAX - rthal_maxlat_tsc))
+		if (delay <= (LONG_MAX - rthal_maxlat_tsc))
 			rthal_nmi_arm(delay + rthal_maxlat_tsc);
 	}
 #endif /* CONFIG_XENO_HW_NMI_DEBUG_LATENCY */
diff --git a/ksrc/arch/x86/nmi_32.c b/ksrc/arch/x86/nmi_32.c
index f3b3290..78ba905 100644
--- a/ksrc/arch/x86/nmi_32.c
+++ b/ksrc/arch/x86/nmi_32.c
@@ -29,11 +29,19 @@
 #include <linux/version.h>
 #include <linux/nmi.h>
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19)
+#include <asm/intel_arch_perfmon.h>
+#endif /* Linux < 2.6.19 */
 #include <asm/nmi.h>
 #endif /* Linux < 2.6 */
 #include <asm/msr.h>
 #include <asm/xenomai/hal.h>
 
+#define NMI_WD_ARMED		0x0001
+#define NMI_WD_31BITS		0x1000
+#define NMI_WD_P4		0x2000
+#define NMI_WD_P6_OR_LATER	0x4000
+
 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
 #define P4_ESCR_OS              (1<<3)
 #define P4_ESCR_USR             (1<<2)
@@ -57,7 +65,7 @@
 typedef union {
 	struct {
 		/* Xenomai watchdog data. */
-		unsigned armed;
+		unsigned int flags;
 		unsigned long perfctr_msr;
 		unsigned long long next_linux_check;
 		unsigned int p4_cccr_val;
@@ -69,11 +77,11 @@ typedef union {
 } rthal_nmi_wd_t ____cacheline_aligned;
 
 static rthal_nmi_wd_t rthal_nmi_wds[NR_CPUS];
-static unsigned long rthal_nmi_perfctr_msr;
-static unsigned int rthal_nmi_p4_cccr_val;
 static void (*rthal_nmi_emergency) (struct pt_regs *);
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+#define MSR_ARCH_PERFMON_PERFCTR0	0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1	0xc2
 static void (*rthal_linux_nmi_tick) (struct pt_regs *);
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
@@ -96,23 +104,6 @@ static int (*rthal_linux_nmi_tick) (struct pt_regs *, unsigned);
 #define rthal_nmi_active	atomic_read(&nmi_active)
 #endif /* Linux >= 2.6.19 */
 
-static void rthal_touch_nmi_watchdog(void)
-{
-	unsigned long long next_linux_check;
-	int i;
-
-	next_linux_check = rthal_rdtsc() + RTHAL_CPU_FREQ;
-
-	for (i = 0; i < NR_CPUS; i++) {
-		rthal_nmi_wd_t *wd = &rthal_nmi_wds[i];
-
-		wd->perfctr_msr = rthal_nmi_perfctr_msr;
-		wd->p4_cccr_val = rthal_nmi_p4_cccr_val;
-		wd->armed = 0;
-		wd->next_linux_check = next_linux_check;
-	}
-}
-
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 #define CALL_LINUX_NMI		rthal_linux_nmi_tick(regs)
 #define NMI_RETURN		return
@@ -127,7 +118,7 @@ static int rthal_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 	rthal_nmi_wd_t *wd = &rthal_nmi_wds[cpu];
 	unsigned long long now;
 
-	if (wd->armed) {
+	if (wd->flags & NMI_WD_ARMED) {
 		if (rthal_rdtsc() - wd->tick_date < rthal_maxlat_tsc) {
 			++wd->early_shots;
 			wd->next_linux_check = wd->tick_date + rthal_maxlat_tsc;
@@ -148,7 +139,7 @@ static int rthal_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 		} while ((long long)(now - wd->next_linux_check) >= 0);
 	}
 
-	if (wd->perfctr_msr == MSR_P4_IQ_COUNTER0) {
+	if (wd->flags & NMI_WD_P4) {
 		/*
 		 * P4 quirks:
 		 * - An overflown perfctr will assert its interrupt
@@ -158,14 +149,19 @@ static int rthal_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 		 */
 		wrmsr(MSR_P4_IQ_CCCR0, wd->p4_cccr_val, 0);
 		apic_write(APIC_LVTPC, APIC_DM_NMI);
-	} else if (rthal_nmi_perfctr_msr == MSR_P6_PERFCTR0) {
-		/* Only P6 based Pentium M need to re-unmask
+	} else if (wd->flags & NMI_WD_P6_OR_LATER) {
+		/* P6 based Pentium M need to re-unmask
 		 * the apic vector but it doesn't hurt
-		 * other P6 variant */
+		 * other P6 variant.
+		 * ArchPerfom/Core Duo also needs this */
 		apic_write(APIC_LVTPC, APIC_DM_NMI);
 	}
-	
-	wrmsrl(wd->perfctr_msr, now - wd->next_linux_check);
+
+	if (wd->flags & NMI_WD_31BITS)
+		wrmsr(wd->perfctr_msr, (u32)(now - wd->next_linux_check), 0);
+	else
+		wrmsrl(wd->perfctr_msr, now - wd->next_linux_check);
+
 	NMI_RETURN;
 }
 
@@ -194,6 +190,12 @@ static int earlyshots_read_proc(char *page,
 
 int rthal_nmi_request(void (*emergency) (struct pt_regs *))
 {
+	unsigned long long next_linux_check;
+	unsigned long perfctr_msr;
+	unsigned int wd_flags = 0;
+	unsigned int p4_cccr_val = 0;
+	int i;
+
 	if (!rthal_nmi_active || !nmi_watchdog_tick)
 		return -ENODEV;
 
@@ -202,31 +204,50 @@ int rthal_nmi_request(void (*emergency) (struct pt_regs *))
 
 	switch (boot_cpu_data.x86_vendor) {
         case X86_VENDOR_AMD:
-		rthal_nmi_perfctr_msr = MSR_K7_PERFCTR0;
+		perfctr_msr = MSR_K7_PERFCTR0;
 		break;
         case X86_VENDOR_INTEL:
-		switch (boot_cpu_data.x86) {
-                case 6:
-			rthal_nmi_perfctr_msr = MSR_P6_PERFCTR0;
-			break;
-                case 15:
-			rthal_nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
-			rthal_nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+			if (boot_cpu_data.x86 == 6 &&
+			    boot_cpu_data.x86_model == 14)
+				perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+			else
+				perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1;
+			wd_flags = NMI_WD_P6_OR_LATER | NMI_WD_31BITS;
+		} else
+			switch (boot_cpu_data.x86) {
+	                case 6:
+				perfctr_msr = MSR_P6_PERFCTR0;
+				wd_flags = NMI_WD_P6_OR_LATER | NMI_WD_31BITS;
+				break;
+	                case 15:
+				perfctr_msr = MSR_P4_IQ_COUNTER0;
+				p4_cccr_val = P4_NMI_IQ_CCCR0;
 #ifdef CONFIG_SMP
-			if (smp_num_siblings == 2)
-				rthal_nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
+				if (smp_num_siblings == 2)
+					p4_cccr_val |= P4_CCCR_OVF_PMI1;
 #endif
-			break;
-                default:
-			return -ENODEV;
-		}
+				break;
+	                default:
+				return -ENODEV;
+			}
 		break;
         default:
 		return -ENODEV;
 	}
 
 	rthal_nmi_emergency = emergency;
-	rthal_touch_nmi_watchdog();
+
+	next_linux_check = rthal_rdtsc() + RTHAL_CPU_FREQ;
+	for (i = 0; i < NR_CPUS; i++) {
+		rthal_nmi_wd_t *wd = &rthal_nmi_wds[i];
+
+		wd->flags = wd_flags;
+		wd->perfctr_msr = perfctr_msr;
+		wd->p4_cccr_val = p4_cccr_val;
+		wd->next_linux_check = next_linux_check;
+	}
+
 	rthal_linux_nmi_tick = nmi_watchdog_tick;
 	wmb();
 	nmi_watchdog_tick = &rthal_nmi_watchdog_tick;
@@ -242,6 +263,8 @@ int rthal_nmi_request(void (*emergency) (struct pt_regs *))
 
 void rthal_nmi_release(void)
 {
+	rthal_nmi_wd_t *wd = &rthal_nmi_wds[rthal_processor_id()];
+
 	if (!rthal_linux_nmi_tick)
 		return;
 
@@ -249,7 +272,10 @@ void rthal_nmi_release(void)
 	remove_proc_entry("nmi_early_shots", rthal_proc_root);
 #endif /* CONFIG_PROC_FS */
 
-	wrmsrl(rthal_nmi_perfctr_msr, 0 - RTHAL_CPU_FREQ);
+	if (wd->flags & NMI_WD_31BITS)
+		wrmsr(wd->perfctr_msr, (u32)(0 - RTHAL_CPU_FREQ), 0);
+	else
+		wrmsrl(wd->perfctr_msr, 0 - RTHAL_CPU_FREQ);
 	touch_nmi_watchdog();
 	wmb();
 	nmi_watchdog_tick = rthal_linux_nmi_tick;
@@ -269,23 +295,29 @@ void rthal_nmi_arm(unsigned long delay)
 
 		/* Protect from an interrupt handler calling rthal_nmi_arm. */
 		rthal_local_irq_save(flags);
-		wd->armed = 0;
+		wd->flags &= ~NMI_WD_ARMED;
 		wmb();
-		wrmsrl(wd->perfctr_msr, -1);
+		if (wd->flags & NMI_WD_31BITS)
+			wrmsr(wd->perfctr_msr, (u32)-1, 0);
+		else
+			wrmsrl(wd->perfctr_msr, -1);
 		asm("nop");
 		rthal_local_irq_restore(flags);
 	}
 
 	wd->tick_date = rthal_rdtsc() + (delay - rthal_maxlat_tsc);
 	wmb();
-	wrmsrl(wd->perfctr_msr, 0 - delay);
+	if (wd->flags & NMI_WD_31BITS)
+		wrmsr(wd->perfctr_msr, (u32)(0 - delay), 0);
+	else
+		wrmsrl(wd->perfctr_msr, 0 - delay);
 	wmb();
-	wd->armed = 1;
+	wd->flags |= NMI_WD_ARMED;
 }
 
 void rthal_nmi_disarm(void)
 {
-	rthal_nmi_wds[rthal_processor_id()].armed = 0;
+	rthal_nmi_wds[rthal_processor_id()].flags &= ~NMI_WD_ARMED;
 }
 
 EXPORT_SYMBOL(rthal_nmi_request);



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Xenomai-core] [PATCH  0/3] NMI watchdog fixes / enhancements
@ 2008-12-19  8:44 Jan Kiszka
  2008-12-19  8:44 ` [Xenomai-core] [PATCH 2/3] NMI watchdog support for x86-64 Jan Kiszka
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Jan Kiszka @ 2008-12-19  8:44 UTC (permalink / raw)
  To: xenomai

This is basically a repost of the NNI watchdog series I sent out a few
weeks ago. I just rebased things over latest trunk and fixed some
warnings.

All patches are also available at
git://git.kiszka.org/xenomai.git nmi-wd-queue

Jan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Xenomai-core] [PATCH 2/3] NMI watchdog support for x86-64
  2008-12-19  8:44 [Xenomai-core] [PATCH 0/3] NMI watchdog fixes / enhancements Jan Kiszka
@ 2008-12-19  8:44 ` Jan Kiszka
  2008-12-19  8:44 ` [Xenomai-core] [PATCH 3/3] Rework x86 NMI watchdog pass-through Jan Kiszka
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Jan Kiszka @ 2008-12-19  8:44 UTC (permalink / raw)
  To: xenomai

No need to lock the NMI away from x86-64 boxes, it just takes a bit
refactoring.

NOTE: Whoever applies this to SVN, make sure to MOVE nmi_32.c to nmi.c!

Signed-off-by: Jan Kiszka <jan.kiszka@domain.hid>
---

 include/asm-x86/hal.h             |    2 +
 include/asm-x86/hal_32.h          |    2 -
 ksrc/arch/x86/Kconfig             |    4 ---
 ksrc/arch/x86/Makefile            |    4 +-
 ksrc/arch/x86/hal-common.c        |   49 +++++++++++++++++++++++++++++++++++++
 ksrc/arch/x86/hal_32.c            |   45 ----------------------------------
 ksrc/arch/x86/{nmi_32.c => nmi.c} |    0 
 7 files changed, 53 insertions(+), 53 deletions(-)
 rename ksrc/arch/x86/{nmi_32.c => nmi.c} (100%)

diff --git a/include/asm-x86/hal.h b/include/asm-x86/hal.h
index 4df6bf6..158e3e6 100644
--- a/include/asm-x86/hal.h
+++ b/include/asm-x86/hal.h
@@ -69,6 +69,8 @@ typedef int (*compat_emutick_t)(unsigned long evt,
 
 extern enum rthal_ktimer_mode rthal_ktimer_saved_mode;
 
+void rthal_latency_above_max(struct pt_regs *regs);
+
 #ifdef __i386__
 #include "hal_32.h"
 #else
diff --git a/include/asm-x86/hal_32.h b/include/asm-x86/hal_32.h
index ddcec08..9a707d0 100644
--- a/include/asm-x86/hal_32.h
+++ b/include/asm-x86/hal_32.h
@@ -234,6 +234,4 @@ static inline void rthal_setup_oneshot_apic(int vector)
 
 long rthal_strncpy_from_user(char *dst, const char __user * src, long count);
 
-void rthal_latency_above_max(struct pt_regs *regs);
-
 #endif /* !_XENO_ASM_X86_HAL_32_H */
diff --git a/ksrc/arch/x86/Kconfig b/ksrc/arch/x86/Kconfig
index 865ade7..ad8a5de 100644
--- a/ksrc/arch/x86/Kconfig
+++ b/ksrc/arch/x86/Kconfig
@@ -27,8 +27,6 @@ config XENO_HW_FPU
 	Float-Point Unit on the x86 platform at the following URL:
 	http://www.intel.com/design/intarch/techinfo/Pentium/fpu.htm
 
-if !X86_64
-
 menu "NMI watchdog"
 
 config XENO_HW_NMI_DEBUG_LATENCY
@@ -59,8 +57,6 @@ config XENO_HW_NMI_DEBUG_LATENCY_MAX
 
 endmenu
 
-endif
-
 menu "SMI workaround"
 
 config XENO_HW_SMI_DETECT_DISABLE
diff --git a/ksrc/arch/x86/Makefile b/ksrc/arch/x86/Makefile
index 71cc8ec..3296206 100644
--- a/ksrc/arch/x86/Makefile
+++ b/ksrc/arch/x86/Makefile
@@ -12,7 +12,7 @@ obj-$(CONFIG_XENOMAI) += xeno_hal.o
 
 xeno_hal-y := hal_$(X86_MODE).o hal-common.o usercopy_$(X86_MODE).o
 
-xeno_hal-$(CONFIG_XENO_HW_NMI_DEBUG_LATENCY) += nmi_$(X86_MODE).o
+xeno_hal-$(CONFIG_XENO_HW_NMI_DEBUG_LATENCY) += nmi.o
 
 xeno_hal-$(CONFIG_XENO_HW_SMI_DETECT) += smi.o
 
@@ -28,7 +28,7 @@ O_TARGET := built-in.o
 
 obj-y := hal_32.o hal-common.o
 
-obj-$(CONFIG_XENO_HW_NMI_DEBUG_LATENCY) += nmi_32.o
+obj-$(CONFIG_XENO_HW_NMI_DEBUG_LATENCY) += nmi.o
 
 obj-$(CONFIG_XENO_HW_SMI_DETECT) += smi.o
 
diff --git a/ksrc/arch/x86/hal-common.c b/ksrc/arch/x86/hal-common.c
index 2b04dcf..f2b3b4c 100644
--- a/ksrc/arch/x86/hal-common.c
+++ b/ksrc/arch/x86/hal-common.c
@@ -278,6 +278,55 @@ void rthal_timer_release(int cpu)
 		rthal_timer_set_oneshot(0);
 }
 
+#ifdef CONFIG_XENO_HW_NMI_DEBUG_LATENCY
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+
+#include <linux/vt_kern.h>
+
+extern void show_registers(struct pt_regs *regs);
+
+extern spinlock_t nmi_print_lock;
+
+void die_nmi(struct pt_regs *regs, const char *msg)
+{
+	spin_lock(&nmi_print_lock);
+	/*
+	 * We are in trouble anyway, lets at least try
+	 * to get a message out.
+	 */
+	bust_spinlocks(1);
+	printk(msg);
+	show_registers(regs);
+	printk("console shuts up ...\n");
+	console_silent();
+	spin_unlock(&nmi_print_lock);
+	bust_spinlocks(0);
+	do_exit(SIGSEGV);
+}
+
+#endif /* Linux < 2.6 */
+
+#ifdef CONFIG_X86_64
+#include <asm/nmi.h>
+#define die_nmi(regs, msg)	die_nmi(msg, regs, 1)
+#endif /* CONFIG_X86_64 */
+
+void rthal_latency_above_max(struct pt_regs *regs)
+{
+	/* Try to report via latency tracer first, then fall back to panic. */
+	if (rthal_trace_user_freeze(rthal_maxlat_us, 1) < 0) {
+		char buf[128];
+
+		snprintf(buf,
+			 sizeof(buf),
+			 "NMI watchdog detected timer latency above %u us\n",
+			 rthal_maxlat_us);
+		die_nmi(regs, buf);
+	}
+}
+
+#endif /* CONFIG_XENO_HW_NMI_DEBUG_LATENCY */
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
diff --git a/ksrc/arch/x86/hal_32.c b/ksrc/arch/x86/hal_32.c
index e8e1258..026b03f 100644
--- a/ksrc/arch/x86/hal_32.c
+++ b/ksrc/arch/x86/hal_32.c
@@ -97,51 +97,6 @@ unsigned long rthal_timer_calibrate(void)
 	return rthal_imuldiv(dt, 20, RTHAL_CPU_FREQ);
 }
 
-#ifdef CONFIG_XENO_HW_NMI_DEBUG_LATENCY
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-
-#include <linux/vt_kern.h>
-
-extern void show_registers(struct pt_regs *regs);
-
-extern spinlock_t nmi_print_lock;
-
-void die_nmi(struct pt_regs *regs, const char *msg)
-{
-	spin_lock(&nmi_print_lock);
-	/*
-	 * We are in trouble anyway, lets at least try
-	 * to get a message out.
-	 */
-	bust_spinlocks(1);
-	printk(msg);
-	show_registers(regs);
-	printk("console shuts up ...\n");
-	console_silent();
-	spin_unlock(&nmi_print_lock);
-	bust_spinlocks(0);
-	do_exit(SIGSEGV);
-}
-
-#endif /* Linux < 2.6 */
-
-void rthal_latency_above_max(struct pt_regs *regs)
-{
-	/* Try to report via latency tracer first, then fall back to panic. */
-	if (rthal_trace_user_freeze(rthal_maxlat_us, 1) < 0) {
-		char buf[128];
-
-		snprintf(buf,
-			 sizeof(buf),
-			 "NMI watchdog detected timer latency above %u us\n",
-			 rthal_maxlat_us);
-		die_nmi(regs, buf);
-	}
-}
-
-#endif /* CONFIG_XENO_HW_NMI_DEBUG_LATENCY */
-
 #else /* !CONFIG_X86_LOCAL_APIC */
 
 unsigned long rthal_timer_calibrate(void)
diff --git a/ksrc/arch/x86/nmi_32.c b/ksrc/arch/x86/nmi.c
similarity index 100%
rename from ksrc/arch/x86/nmi_32.c
rename to ksrc/arch/x86/nmi.c



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Xenomai-core] [PATCH 3/3] Rework x86 NMI watchdog pass-through
  2008-12-19  8:44 [Xenomai-core] [PATCH 0/3] NMI watchdog fixes / enhancements Jan Kiszka
  2008-12-19  8:44 ` [Xenomai-core] [PATCH 2/3] NMI watchdog support for x86-64 Jan Kiszka
@ 2008-12-19  8:44 ` Jan Kiszka
  2008-12-19  8:44 ` [Xenomai-core] [PATCH 1/3] Update NMI watchdog for latest Intel CPUs Jan Kiszka
  2008-12-20 16:35 ` [Xenomai-core] [PATCH 0/3] NMI watchdog fixes / enhancements Gilles Chanteperdrix
  3 siblings, 0 replies; 10+ messages in thread
From: Jan Kiszka @ 2008-12-19  8:44 UTC (permalink / raw)
  To: xenomai

Currently, Xenomai's NMI watchdog handler assumes to be called only on
watchdog events. Other reasons are considered spurious, and a TSC-based
method is used to detect such conditions. This has several issues
 - the return code of the Linux handler is ignored
 - KGDB's NMI events (CPU roundups) are not passed through
 - early_shot mechanism suffers from a signedness bug and misses too
   early shots
 - printk from NMI can cause lock-ups, but we also support non-fatal
   reports (ipipe tracer active)

This patch therefore switches to the watchdog detection pattern that
Linux uses: Check for the highest perfctr bit being zero for true
timeouts. In case the watchdog did not time out, the Linux handler is
invoked and its return code is properly forwarded. Finally, the
early_shot reporting is dropped as it becomes pointless when KGDB is in
use (and I suspect that patch 1 of this series fixes most of the
original reasons).

Signed-off-by: Jan Kiszka <jan.kiszka@domain.hid>
---

 ksrc/arch/x86/nmi.c |   95 ++++++++++++++++++++++----------------------------
 1 files changed, 42 insertions(+), 53 deletions(-)

diff --git a/ksrc/arch/x86/nmi.c b/ksrc/arch/x86/nmi.c
index 78ba905..9f7a2ef 100644
--- a/ksrc/arch/x86/nmi.c
+++ b/ksrc/arch/x86/nmi.c
@@ -65,13 +65,11 @@
 typedef union {
 	struct {
 		/* Xenomai watchdog data. */
-		unsigned int flags;
-		unsigned long perfctr_msr;
 		unsigned long long next_linux_check;
+		unsigned long perfctr_msr;
+		u64 perfctr_checkmask;
 		unsigned int p4_cccr_val;
-
-		unsigned early_shots;
-		unsigned long long tick_date;
+		unsigned int flags;
 	};
 	char __pad[SMP_CACHE_BYTES];
 } rthal_nmi_wd_t ____cacheline_aligned;
@@ -82,6 +80,15 @@ static void (*rthal_nmi_emergency) (struct pt_regs *);
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 #define MSR_ARCH_PERFMON_PERFCTR0	0xc1
 #define MSR_ARCH_PERFMON_PERFCTR1	0xc2
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_counters:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
 static void (*rthal_linux_nmi_tick) (struct pt_regs *);
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
@@ -105,28 +112,28 @@ static int (*rthal_linux_nmi_tick) (struct pt_regs *, unsigned);
 #endif /* Linux >= 2.6.19 */
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
-#define CALL_LINUX_NMI		rthal_linux_nmi_tick(regs)
-#define NMI_RETURN		return
+#define CALL_LINUX_NMI		({ rthal_linux_nmi_tick(regs); 1; })
+#define NMI_RETURN(code)	return
 static void rthal_nmi_watchdog_tick(struct pt_regs *regs)
 #else /* Linux >= 2.6.19 */
 #define CALL_LINUX_NMI		rthal_linux_nmi_tick(regs, reason)
-#define NMI_RETURN		return 1
+#define NMI_RETURN(code)	return (code)
 static int rthal_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 #endif /* Linux >= 2.6.19 */
 {
 	int cpu = rthal_processor_id();
 	rthal_nmi_wd_t *wd = &rthal_nmi_wds[cpu];
 	unsigned long long now;
+	u64 perfctr;
 
-	if (wd->flags & NMI_WD_ARMED) {
-		if (rthal_rdtsc() - wd->tick_date < rthal_maxlat_tsc) {
-			++wd->early_shots;
-			wd->next_linux_check = wd->tick_date + rthal_maxlat_tsc;
-		} else {
-			printk("NMI early shots: %d\n", wd->early_shots);
-			rthal_nmi_emergency(regs);
-		}
-	}
+	rdmsrl(wd->perfctr_msr, perfctr);
+
+	if (perfctr & wd->perfctr_checkmask)
+		/* No watchdog tick, let Linux handle it. */
+		NMI_RETURN(CALL_LINUX_NMI);
+
+	if (wd->flags & NMI_WD_ARMED)
+		rthal_nmi_emergency(regs);
 
 	now = rthal_rdtsc();
 
@@ -162,36 +169,14 @@ static int rthal_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 	else
 		wrmsrl(wd->perfctr_msr, now - wd->next_linux_check);
 
-	NMI_RETURN;
+	NMI_RETURN(1);
 }
 
-#ifdef CONFIG_PROC_FS
-static int earlyshots_read_proc(char *page,
-				char **start,
-				off_t off, int count, int *eof, void *data)
-{
-	int i, len = 0;
-
-	for_each_online_cpu(i)
-		len += sprintf(page + len, "CPU#%d: %u\n",
-			       i, rthal_nmi_wds[i].early_shots);
-	len -= off;
-	if (len <= off + count)
-		*eof = 1;
-	*start = page + off;
-	if (len > count)
-		len = count;
-	if (len < 0)
-		len = 0;
-
-	return len;
-}
-#endif /* CONFIG_PROC_FS */
-
 int rthal_nmi_request(void (*emergency) (struct pt_regs *))
 {
 	unsigned long long next_linux_check;
 	unsigned long perfctr_msr;
+	u64 perfctr_checkmask;
 	unsigned int wd_flags = 0;
 	unsigned int p4_cccr_val = 0;
 	int i;
@@ -205,23 +190,30 @@ int rthal_nmi_request(void (*emergency) (struct pt_regs *))
 	switch (boot_cpu_data.x86_vendor) {
         case X86_VENDOR_AMD:
 		perfctr_msr = MSR_K7_PERFCTR0;
+		perfctr_checkmask = 1UL << 47;
 		break;
         case X86_VENDOR_INTEL:
 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+			union cpuid10_eax eax;
+
 			if (boot_cpu_data.x86 == 6 &&
 			    boot_cpu_data.x86_model == 14)
 				perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
 			else
 				perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1;
+			cpuid(10, &eax.full, &i, &i, &i);
+			perfctr_checkmask = 1UL << (eax.split.bit_width - 1);
 			wd_flags = NMI_WD_P6_OR_LATER | NMI_WD_31BITS;
 		} else
 			switch (boot_cpu_data.x86) {
 	                case 6:
 				perfctr_msr = MSR_P6_PERFCTR0;
+				perfctr_checkmask = 1UL << 39;
 				wd_flags = NMI_WD_P6_OR_LATER | NMI_WD_31BITS;
 				break;
 	                case 15:
 				perfctr_msr = MSR_P4_IQ_COUNTER0;
+				perfctr_checkmask = 1UL << 39;
 				p4_cccr_val = P4_NMI_IQ_CCCR0;
 #ifdef CONFIG_SMP
 				if (smp_num_siblings == 2)
@@ -244,6 +236,7 @@ int rthal_nmi_request(void (*emergency) (struct pt_regs *))
 
 		wd->flags = wd_flags;
 		wd->perfctr_msr = perfctr_msr;
+		wd->perfctr_checkmask = perfctr_checkmask;
 		wd->p4_cccr_val = p4_cccr_val;
 		wd->next_linux_check = next_linux_check;
 	}
@@ -252,12 +245,6 @@ int rthal_nmi_request(void (*emergency) (struct pt_regs *))
 	wmb();
 	nmi_watchdog_tick = &rthal_nmi_watchdog_tick;
 
-#ifdef CONFIG_PROC_FS
-	rthal_add_proc_leaf("nmi_early_shots",
-			    &earlyshots_read_proc,
-			    NULL, NULL, rthal_proc_root);
-#endif /* CONFIG_PROC_FS */
-
 	return 0;
 }
 
@@ -268,10 +255,6 @@ void rthal_nmi_release(void)
 	if (!rthal_linux_nmi_tick)
 		return;
 
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("nmi_early_shots", rthal_proc_root);
-#endif /* CONFIG_PROC_FS */
-
 	if (wd->flags & NMI_WD_31BITS)
 		wrmsr(wd->perfctr_msr, (u32)(0 - RTHAL_CPU_FREQ), 0);
 	else
@@ -296,6 +279,10 @@ void rthal_nmi_arm(unsigned long delay)
 		/* Protect from an interrupt handler calling rthal_nmi_arm. */
 		rthal_local_irq_save(flags);
 		wd->flags &= ~NMI_WD_ARMED;
+		/*
+		 * Our watchdog must be declared unarmed before we triger the
+		 * Linux watchdog NMI, entering rthal_nmi_watchdog_tick.
+		 */
 		wmb();
 		if (wd->flags & NMI_WD_31BITS)
 			wrmsr(wd->perfctr_msr, (u32)-1, 0);
@@ -305,12 +292,14 @@ void rthal_nmi_arm(unsigned long delay)
 		rthal_local_irq_restore(flags);
 	}
 
-	wd->tick_date = rthal_rdtsc() + (delay - rthal_maxlat_tsc);
-	wmb();
 	if (wd->flags & NMI_WD_31BITS)
 		wrmsr(wd->perfctr_msr, (u32)(0 - delay), 0);
 	else
 		wrmsrl(wd->perfctr_msr, 0 - delay);
+	/*
+	 * New perfctr must have been written before we can declare the
+	 * watchdog armed (avoid race with previously programmed value).
+	 */
 	wmb();
 	wd->flags |= NMI_WD_ARMED;
 }



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Xenomai-core] [PATCH  0/3] NMI watchdog fixes / enhancements
  2008-12-19  8:44 [Xenomai-core] [PATCH 0/3] NMI watchdog fixes / enhancements Jan Kiszka
                   ` (2 preceding siblings ...)
  2008-12-19  8:44 ` [Xenomai-core] [PATCH 1/3] Update NMI watchdog for latest Intel CPUs Jan Kiszka
@ 2008-12-20 16:35 ` Gilles Chanteperdrix
  2008-12-20 20:48   ` Jan Kiszka
  3 siblings, 1 reply; 10+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-20 16:35 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai

Jan Kiszka wrote:
> This is basically a repost of the NNI watchdog series I sent out a few
> weeks ago. I just rebased things over latest trunk and fixed some
> warnings.
> 
> All patches are also available at
> git://git.kiszka.org/xenomai.git nmi-wd-queue

That is a lot of stuff to review. I am afraid it is impossible to review
everything, so the only thing we can rely on is testing, hence the next
question: have these patches been tested in every configuration
(enabled, disabled, built-in, module, voluntary overrun)?

As for the 32nd bit issue, I am afraid it can not explain the spurious
shots observed on some platfomrs (note that I implemented the early shot
thing a bit in the dark: I never observed the spurious shots myself),
that is because the nmi timer is programmed for durations between 100us
and 10ms, which should be far from using the 32nd bit.

-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Xenomai-core] [PATCH  0/3] NMI watchdog fixes / enhancements
  2008-12-20 16:35 ` [Xenomai-core] [PATCH 0/3] NMI watchdog fixes / enhancements Gilles Chanteperdrix
@ 2008-12-20 20:48   ` Jan Kiszka
  2008-12-20 21:00     ` Gilles Chanteperdrix
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Kiszka @ 2008-12-20 20:48 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai

[-- Attachment #1: Type: text/plain, Size: 1574 bytes --]

Gilles Chanteperdrix wrote:
> Jan Kiszka wrote:
>> This is basically a repost of the NNI watchdog series I sent out a few
>> weeks ago. I just rebased things over latest trunk and fixed some
>> warnings.
>>
>> All patches are also available at
>> git://git.kiszka.org/xenomai.git nmi-wd-queue
> 
> That is a lot of stuff to review. I am afraid it is impossible to review
> everything, so the only thing we can rely on is testing, hence the next
> question: have these patches been tested in every configuration
> (enabled, disabled, built-in, module, voluntary overrun)?

In most configurations, but definitely not in all (they are too many).

This is a debugging tool, so first of all the disabled case must not
cause harm, and I'm quite sure I haven't changed anything regarding
this. Moreover, the enabled case was not working for many recent
platforms anymore as we were lacking P6 support. So there shouldn't be
much to loose.

> 
> As for the 32nd bit issue, I am afraid it can not explain the spurious
> shots observed on some platfomrs (note that I implemented the early shot
> thing a bit in the dark: I never observed the spurious shots myself),
> that is because the nmi timer is programmed for durations between 100us
> and 10ms, which should be far from using the 32nd bit.

Yes, the signedness issue that is fixed in patch 1 has likely nothing to
do with the spurious invocations. But wherever they may still come from,
patch 3 ensures that they are now properly ignored (ie. forwarded to the
next handling instance).

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Xenomai-core] [PATCH  0/3] NMI watchdog fixes / enhancements
  2008-12-20 20:48   ` Jan Kiszka
@ 2008-12-20 21:00     ` Gilles Chanteperdrix
  2008-12-20 21:05       ` Jan Kiszka
  0 siblings, 1 reply; 10+ messages in thread
From: Gilles Chanteperdrix @ 2008-12-20 21:00 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: xenomai

Jan Kiszka wrote:
> Gilles Chanteperdrix wrote:
>> Jan Kiszka wrote:
>>> This is basically a repost of the NNI watchdog series I sent out a few
>>> weeks ago. I just rebased things over latest trunk and fixed some
>>> warnings.
>>>
>>> All patches are also available at
>>> git://git.kiszka.org/xenomai.git nmi-wd-queue
>> That is a lot of stuff to review. I am afraid it is impossible to review
>> everything, so the only thing we can rely on is testing, hence the next
>> question: have these patches been tested in every configuration
>> (enabled, disabled, built-in, module, voluntary overrun)?
> 
> In most configurations, but definitely not in all (they are too many).
> 
> This is a debugging tool, so first of all the disabled case must not
> cause harm, and I'm quite sure I haven't changed anything regarding
> this. Moreover, the enabled case was not working for many recent
> platforms anymore as we were lacking P6 support. So there shouldn't be
> much to loose.

I disagree: the current version compiles in all configurations tested
until now, and happened to work when enabled at some point in the past.
I find it annoying, to say the least, when I want to test something on
trunk, that some previous unrelated commit breaks a configuration
because it was not tested. So, please test your patch. The
configurations to test are not so numerous: disabled, enabled built-in
with an overrun check, enabled in module with an overrun check, repeat
for x86_64. That makes 6 configurations, not that much.

-- 
					    Gilles.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Xenomai-core] [PATCH  0/3] NMI watchdog fixes / enhancements
  2008-12-20 21:00     ` Gilles Chanteperdrix
@ 2008-12-20 21:05       ` Jan Kiszka
  2008-12-20 21:37         ` Jan Kiszka
  0 siblings, 1 reply; 10+ messages in thread
From: Jan Kiszka @ 2008-12-20 21:05 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai

[-- Attachment #1: Type: text/plain, Size: 1838 bytes --]

Gilles Chanteperdrix wrote:
> Jan Kiszka wrote:
>> Gilles Chanteperdrix wrote:
>>> Jan Kiszka wrote:
>>>> This is basically a repost of the NNI watchdog series I sent out a few
>>>> weeks ago. I just rebased things over latest trunk and fixed some
>>>> warnings.
>>>>
>>>> All patches are also available at
>>>> git://git.kiszka.org/xenomai.git nmi-wd-queue
>>> That is a lot of stuff to review. I am afraid it is impossible to review
>>> everything, so the only thing we can rely on is testing, hence the next
>>> question: have these patches been tested in every configuration
>>> (enabled, disabled, built-in, module, voluntary overrun)?
>> In most configurations, but definitely not in all (they are too many).
>>
>> This is a debugging tool, so first of all the disabled case must not
>> cause harm, and I'm quite sure I haven't changed anything regarding
>> this. Moreover, the enabled case was not working for many recent
>> platforms anymore as we were lacking P6 support. So there shouldn't be
>> much to loose.
> 
> I disagree: the current version compiles in all configurations tested
> until now, and happened to work when enabled at some point in the past.
> I find it annoying, to say the least, when I want to test something on
> trunk, that some previous unrelated commit breaks a configuration
> because it was not tested. So, please test your patch. The
> configurations to test are not so numerous: disabled, enabled built-in
> with an overrun check, enabled in module with an overrun check, repeat
> for x86_64. That makes 6 configurations, not that much.

The trivial ones have been tested, of course. But keep in mind that
there are more variables (CPU types , kernel versions, interfering
config settings, etc.) that create much more that 6 variants to be built
and run.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Xenomai-core] [PATCH  0/3] NMI watchdog fixes / enhancements
  2008-12-20 21:05       ` Jan Kiszka
@ 2008-12-20 21:37         ` Jan Kiszka
  0 siblings, 0 replies; 10+ messages in thread
From: Jan Kiszka @ 2008-12-20 21:37 UTC (permalink / raw)
  To: Gilles Chanteperdrix; +Cc: xenomai

[-- Attachment #1: Type: text/plain, Size: 2211 bytes --]

Jan Kiszka wrote:
> Gilles Chanteperdrix wrote:
>> Jan Kiszka wrote:
>>> Gilles Chanteperdrix wrote:
>>>> Jan Kiszka wrote:
>>>>> This is basically a repost of the NNI watchdog series I sent out a few
>>>>> weeks ago. I just rebased things over latest trunk and fixed some
>>>>> warnings.
>>>>>
>>>>> All patches are also available at
>>>>> git://git.kiszka.org/xenomai.git nmi-wd-queue
>>>> That is a lot of stuff to review. I am afraid it is impossible to review
>>>> everything, so the only thing we can rely on is testing, hence the next
>>>> question: have these patches been tested in every configuration
>>>> (enabled, disabled, built-in, module, voluntary overrun)?
>>> In most configurations, but definitely not in all (they are too many).
>>>
>>> This is a debugging tool, so first of all the disabled case must not
>>> cause harm, and I'm quite sure I haven't changed anything regarding
>>> this. Moreover, the enabled case was not working for many recent
>>> platforms anymore as we were lacking P6 support. So there shouldn't be
>>> much to loose.
>> I disagree: the current version compiles in all configurations tested
>> until now, and happened to work when enabled at some point in the past.
>> I find it annoying, to say the least, when I want to test something on
>> trunk, that some previous unrelated commit breaks a configuration
>> because it was not tested. So, please test your patch. The
>> configurations to test are not so numerous: disabled, enabled built-in
>> with an overrun check, enabled in module with an overrun check, repeat
>> for x86_64. That makes 6 configurations, not that much.
> 
> The trivial ones have been tested, of course. But keep in mind that
> there are more variables (CPU types , kernel versions, interfering
> config settings, etc.) that create much more that 6 variants to be built
> and run.

To give an example of untested scenarios: Current trunk does not build
against 2.6.27 for x86-32 with NMI watchdog enabled (die_nmi gained
another argument doe to unification with x86-64). Well, my patch does
not change this picture yet (I once tested against 2.6.26 for 32-bit
support). Will fix this as well.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Xenomai-core] [PATCH 3/3] Rework x86 NMI watchdog pass-through
  2008-10-26 14:43 [Xenomai-core] [PATCH 0/3] x86: Fix & update NMI watchdog Jan Kiszka
@ 2008-10-26 14:43 ` Jan Kiszka
  0 siblings, 0 replies; 10+ messages in thread
From: Jan Kiszka @ 2008-10-26 14:43 UTC (permalink / raw)
  To: xenomai; +Cc: Jan Kiszka


Currently, Xenomai's NMI watchdog handler assumes to be called only on
watchdog events, other reasons are considered spurious, and a TSC-based
method is used to detect such conditions. This has several issues
 - the return code of the Linux handler is ignored
 - KGDB's NMI events (CPU roundups) are not passed through
 - early_shot mechanism suffers from a signedness bug and misses too
   early shots
 - printk from NMI can cause lock-ups, but we also support non-fatal
   reports (ipipe tracer active)

This patch therefore switched to the watchdog detection pattern that
Linux uses: Check for the highest perfctr bit being zero for true
timeouts. In case the watchdog did not time out, the Linux handler is
invoked and its return code is properly forwarded. Finally, the
early_shot reporting is dropped as it becomes pointless when KGDB is in
use (and I suspect that patch 1 of this series fixes most of the
original reasons).

Signed-off-by: Jan Kiszka <jan.kiszka@domain.hid>
---
 ksrc/arch/x86/nmi.c |   95 ++++++++++++++++++++++------------------------------
 1 file changed, 42 insertions(+), 53 deletions(-)

Index: b/ksrc/arch/x86/nmi.c
===================================================================
--- a/ksrc/arch/x86/nmi.c
+++ b/ksrc/arch/x86/nmi.c
@@ -65,13 +65,11 @@
 typedef union {
 	struct {
 		/* Xenomai watchdog data. */
-		unsigned int flags;
-		unsigned long perfctr_msr;
 		unsigned long long next_linux_check;
+		unsigned long perfctr_msr;
+		unsigned int perfctr_checkbit;
 		unsigned int p4_cccr_val;
-
-		unsigned early_shots;
-		unsigned long long tick_date;
+		unsigned int flags;
 	};
 	char __pad[SMP_CACHE_BYTES];
 } rthal_nmi_wd_t ____cacheline_aligned;
@@ -82,6 +80,15 @@ static void (*rthal_nmi_emergency) (stru
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 #define MSR_ARCH_PERFMON_PERFCTR0	0xc1
 #define MSR_ARCH_PERFMON_PERFCTR1	0xc2
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_counters:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
 static void (*rthal_linux_nmi_tick) (struct pt_regs *);
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
@@ -105,28 +112,28 @@ static int (*rthal_linux_nmi_tick) (stru
 #endif /* Linux >= 2.6.19 */
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
-#define CALL_LINUX_NMI		rthal_linux_nmi_tick(regs)
-#define NMI_RETURN		return
+#define CALL_LINUX_NMI		({ rthal_linux_nmi_tick(regs); 1; })
+#define NMI_RETURN(code)	return
 static void rthal_nmi_watchdog_tick(struct pt_regs *regs)
 #else /* Linux >= 2.6.19 */
 #define CALL_LINUX_NMI		rthal_linux_nmi_tick(regs, reason)
-#define NMI_RETURN		return 1
+#define NMI_RETURN(code)	return (code)
 static int rthal_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
 #endif /* Linux >= 2.6.19 */
 {
 	int cpu = rthal_processor_id();
 	rthal_nmi_wd_t *wd = &rthal_nmi_wds[cpu];
 	unsigned long long now;
+	u64 perfctr;
 
-	if (wd->flags & NMI_WD_ARMED) {
-		if (rthal_rdtsc() - wd->tick_date < rthal_maxlat_tsc) {
-			++wd->early_shots;
-			wd->next_linux_check = wd->tick_date + rthal_maxlat_tsc;
-		} else {
-			printk("NMI early shots: %d\n", wd->early_shots);
-			rthal_nmi_emergency(regs);
-		}
-	}
+	rdmsrl(wd->perfctr_msr, perfctr);
+
+	if (test_bit(wd->perfctr_checkbit, &perfctr))
+		/* No watchdog tick, let Linux handle it. */
+		NMI_RETURN(CALL_LINUX_NMI);
+
+	if (wd->flags & NMI_WD_ARMED)
+		rthal_nmi_emergency(regs);
 
 	now = rthal_rdtsc();
 
@@ -162,36 +169,14 @@ static int rthal_nmi_watchdog_tick(struc
 	else
 		wrmsrl(wd->perfctr_msr, now - wd->next_linux_check);
 
-	NMI_RETURN;
-}
-
-#ifdef CONFIG_PROC_FS
-static int earlyshots_read_proc(char *page,
-				char **start,
-				off_t off, int count, int *eof, void *data)
-{
-	int i, len = 0;
-
-	for_each_online_cpu(i)
-		len += sprintf(page + len, "CPU#%d: %u\n",
-			       i, rthal_nmi_wds[i].early_shots);
-	len -= off;
-	if (len <= off + count)
-		*eof = 1;
-	*start = page + off;
-	if (len > count)
-		len = count;
-	if (len < 0)
-		len = 0;
-
-	return len;
+	NMI_RETURN(1);
 }
-#endif /* CONFIG_PROC_FS */
 
 int rthal_nmi_request(void (*emergency) (struct pt_regs *))
 {
 	unsigned long long next_linux_check;
 	unsigned long perfctr_msr;
+ 	unsigned int perfctr_checkbit;
 	unsigned int wd_flags = 0;
 	unsigned int p4_cccr_val = 0;
 	int i;
@@ -205,23 +190,30 @@ int rthal_nmi_request(void (*emergency)
 	switch (boot_cpu_data.x86_vendor) {
         case X86_VENDOR_AMD:
 		perfctr_msr = MSR_K7_PERFCTR0;
+		perfctr_checkbit = 47;
 		break;
         case X86_VENDOR_INTEL:
 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+			union cpuid10_eax eax;
+
 			if (boot_cpu_data.x86 == 6 &&
 			    boot_cpu_data.x86_model == 14)
 				perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
 			else
 				perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1;
+			cpuid(10, &eax.full, &i, &i, &i);
+			perfctr_checkbit = eax.split.bit_width - 1;
 			wd_flags = NMI_WD_P6_OR_LATER | NMI_WD_31BITS;
 		} else
 			switch (boot_cpu_data.x86) {
 	                case 6:
 				perfctr_msr = MSR_P6_PERFCTR0;
+				perfctr_checkbit = 39;
 				wd_flags = NMI_WD_P6_OR_LATER | NMI_WD_31BITS;
 				break;
 	                case 15:
 				perfctr_msr = MSR_P4_IQ_COUNTER0;
+				perfctr_checkbit = 39;
 				p4_cccr_val = P4_NMI_IQ_CCCR0;
 #ifdef CONFIG_SMP
 				if (smp_num_siblings == 2)
@@ -244,6 +236,7 @@ int rthal_nmi_request(void (*emergency)
 
 		wd->flags = wd_flags;
 		wd->perfctr_msr = perfctr_msr;
+ 		wd->perfctr_checkbit = perfctr_checkbit;
 		wd->p4_cccr_val = p4_cccr_val;
 		wd->next_linux_check = next_linux_check;
 	}
@@ -252,12 +245,6 @@ int rthal_nmi_request(void (*emergency)
 	wmb();
 	nmi_watchdog_tick = &rthal_nmi_watchdog_tick;
 
-#ifdef CONFIG_PROC_FS
-	rthal_add_proc_leaf("nmi_early_shots",
-			    &earlyshots_read_proc,
-			    NULL, NULL, rthal_proc_root);
-#endif /* CONFIG_PROC_FS */
-
 	return 0;
 }
 
@@ -268,10 +255,6 @@ void rthal_nmi_release(void)
 	if (!rthal_linux_nmi_tick)
 		return;
 
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("nmi_early_shots", rthal_proc_root);
-#endif /* CONFIG_PROC_FS */
-
 	if (wd->flags & NMI_WD_31BITS)
 		wrmsr(wd->perfctr_msr, (u32)(0 - RTHAL_CPU_FREQ), 0);
 	else
@@ -296,6 +279,10 @@ void rthal_nmi_arm(unsigned long delay)
 		/* Protect from an interrupt handler calling rthal_nmi_arm. */
 		rthal_local_irq_save(flags);
 		wd->flags &= ~NMI_WD_ARMED;
+		/*
+		 * Our watchdog must be declared unarmed before we triger the
+		 * Linux watchdog NMI, entering rthal_nmi_watchdog_tick.
+		 */
 		wmb();
 		if (wd->flags & NMI_WD_31BITS)
 			wrmsr(wd->perfctr_msr, (u32)-1, 0);
@@ -305,12 +292,14 @@ void rthal_nmi_arm(unsigned long delay)
 		rthal_local_irq_restore(flags);
 	}
 
-	wd->tick_date = rthal_rdtsc() + (delay - rthal_maxlat_tsc);
-	wmb();
 	if (wd->flags & NMI_WD_31BITS)
 		wrmsr(wd->perfctr_msr, (u32)(0 - delay), 0);
 	else
 		wrmsrl(wd->perfctr_msr, 0 - delay);
+	/*
+	 * New perfctr must have been written before we can declare the
+	 * watchdog armed (avoid race with previously programmed value).
+	 */
 	wmb();
 	wd->flags |= NMI_WD_ARMED;
 }



^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2008-12-20 21:37 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-19  8:44 [Xenomai-core] [PATCH 0/3] NMI watchdog fixes / enhancements Jan Kiszka
2008-12-19  8:44 ` [Xenomai-core] [PATCH 2/3] NMI watchdog support for x86-64 Jan Kiszka
2008-12-19  8:44 ` [Xenomai-core] [PATCH 3/3] Rework x86 NMI watchdog pass-through Jan Kiszka
2008-12-19  8:44 ` [Xenomai-core] [PATCH 1/3] Update NMI watchdog for latest Intel CPUs Jan Kiszka
2008-12-20 16:35 ` [Xenomai-core] [PATCH 0/3] NMI watchdog fixes / enhancements Gilles Chanteperdrix
2008-12-20 20:48   ` Jan Kiszka
2008-12-20 21:00     ` Gilles Chanteperdrix
2008-12-20 21:05       ` Jan Kiszka
2008-12-20 21:37         ` Jan Kiszka
  -- strict thread matches above, loose matches on Subject: below --
2008-10-26 14:43 [Xenomai-core] [PATCH 0/3] x86: Fix & update NMI watchdog Jan Kiszka
2008-10-26 14:43 ` [Xenomai-core] [PATCH 3/3] Rework x86 NMI watchdog pass-through Jan Kiszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.