From mboxrd@z Thu Jan 1 00:00:00 1970 Resent-To: xenomai-core Resent-Message-Id: <49513833.7050407@domain.hid> From: Jan Kiszka Date: Tue, 23 Dec 2008 20:06:50 +0100 Message-ID: <20081223190649.26094.68753.stgit@domain.hid> In-Reply-To: <20081223190649.26094.61372.stgit@domain.hid> References: <20081223190649.26094.61372.stgit@domain.hid> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: jan.kiszka@domain.hid Subject: [Xenomai-core] [PATCH v2 5/5] Rework x86 NMI watchdog pass-through List-Id: "Xenomai life and development \(bug reports, patches, discussions\)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: xenomai@xenomai.org Currently, Xenomai's NMI watchdog handler assumes to be called only on watchdog events. Other reasons are considered spurious, and a TSC-based method is used to detect such conditions. This has several issues - the return code of the Linux handler is ignored - KGDB's NMI events (CPU roundups) are not passed through - early_shot mechanism suffers from a signedness bug and misses too early shots - printk from NMI can cause lock-ups, but we also support non-fatal reports (ipipe tracer active) This patch therefore switches to the watchdog detection pattern that Linux uses: Check for the highest perfctr bit being zero for true timeouts. In case the watchdog did not time out, the Linux handler is invoked and its return code is properly forwarded. Finally, the early_shot reporting is dropped as it becomes pointless when KGDB is in use (and I suspect that patch 1 of this series fixes most of the original reasons). Signed-off-by: Jan Kiszka --- ksrc/arch/x86/nmi.c | 95 ++++++++++++++++++++++---------------------------- 1 files changed, 42 insertions(+), 53 deletions(-) diff --git a/ksrc/arch/x86/nmi.c b/ksrc/arch/x86/nmi.c index 69a392a..a9d9380 100644 --- a/ksrc/arch/x86/nmi.c +++ b/ksrc/arch/x86/nmi.c @@ -65,13 +65,11 @@ typedef union { struct { /* Xenomai watchdog data. */ - unsigned int flags; - unsigned long perfctr_msr; unsigned long long next_linux_check; + unsigned long perfctr_msr; + u64 perfctr_checkmask; unsigned int p4_cccr_val; - - unsigned early_shots; - unsigned long long tick_date; + unsigned int flags; }; char __pad[SMP_CACHE_BYTES]; } rthal_nmi_wd_t ____cacheline_aligned; @@ -82,6 +80,15 @@ static void (*rthal_nmi_emergency) (struct pt_regs *); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; static void (*rthal_linux_nmi_tick) (struct pt_regs *); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) @@ -105,28 +112,28 @@ static int (*rthal_linux_nmi_tick) (struct pt_regs *, unsigned); #endif /* Linux >= 2.6.19 */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) -#define CALL_LINUX_NMI rthal_linux_nmi_tick(regs) -#define NMI_RETURN return +#define CALL_LINUX_NMI ({ rthal_linux_nmi_tick(regs); 1; }) +#define NMI_RETURN(code) return static void rthal_nmi_watchdog_tick(struct pt_regs *regs) #else /* Linux >= 2.6.19 */ #define CALL_LINUX_NMI rthal_linux_nmi_tick(regs, reason) -#define NMI_RETURN return 1 +#define NMI_RETURN(code) return (code) static int rthal_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) #endif /* Linux >= 2.6.19 */ { int cpu = rthal_processor_id(); rthal_nmi_wd_t *wd = &rthal_nmi_wds[cpu]; unsigned long long now; + u64 perfctr; - if (wd->flags & NMI_WD_ARMED) { - if (rthal_rdtsc() - wd->tick_date < rthal_maxlat_tsc) { - ++wd->early_shots; - wd->next_linux_check = wd->tick_date + rthal_maxlat_tsc; - } else { - printk("NMI early shots: %d\n", wd->early_shots); - rthal_nmi_emergency(regs); - } - } + rdmsrl(wd->perfctr_msr, perfctr); + + if (perfctr & wd->perfctr_checkmask) + /* No watchdog tick, let Linux handle it. */ + NMI_RETURN(CALL_LINUX_NMI); + + if (wd->flags & NMI_WD_ARMED) + rthal_nmi_emergency(regs); now = rthal_rdtsc(); @@ -162,36 +169,14 @@ static int rthal_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) else wrmsrl(wd->perfctr_msr, now - wd->next_linux_check); - NMI_RETURN; + NMI_RETURN(1); } -#ifdef CONFIG_PROC_FS -static int earlyshots_read_proc(char *page, - char **start, - off_t off, int count, int *eof, void *data) -{ - int i, len = 0; - - for_each_online_cpu(i) - len += sprintf(page + len, "CPU#%d: %u\n", - i, rthal_nmi_wds[i].early_shots); - len -= off; - if (len <= off + count) - *eof = 1; - *start = page + off; - if (len > count) - len = count; - if (len < 0) - len = 0; - - return len; -} -#endif /* CONFIG_PROC_FS */ - int rthal_nmi_request(void (*emergency) (struct pt_regs *)) { unsigned long long next_linux_check; unsigned long perfctr_msr; + u64 perfctr_checkmask; unsigned int wd_flags = 0; unsigned int p4_cccr_val = 0; int i; @@ -205,23 +190,30 @@ int rthal_nmi_request(void (*emergency) (struct pt_regs *)) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: perfctr_msr = MSR_K7_PERFCTR0; + perfctr_checkmask = 1ULL << 47; break; case X86_VENDOR_INTEL: if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + union cpuid10_eax eax; + if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; else perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1; + cpuid(10, &eax.full, &i, &i, &i); + perfctr_checkmask = 1ULL << (eax.split.bit_width - 1); wd_flags = NMI_WD_P6_OR_LATER | NMI_WD_31BITS; } else switch (boot_cpu_data.x86) { case 6: perfctr_msr = MSR_P6_PERFCTR0; + perfctr_checkmask = 1ULL << 39; wd_flags = NMI_WD_P6_OR_LATER | NMI_WD_31BITS; break; case 15: perfctr_msr = MSR_P4_IQ_COUNTER0; + perfctr_checkmask = 1ULL << 39; p4_cccr_val = P4_NMI_IQ_CCCR0; #ifdef CONFIG_SMP if (smp_num_siblings == 2) @@ -244,6 +236,7 @@ int rthal_nmi_request(void (*emergency) (struct pt_regs *)) wd->flags = wd_flags; wd->perfctr_msr = perfctr_msr; + wd->perfctr_checkmask = perfctr_checkmask; wd->p4_cccr_val = p4_cccr_val; wd->next_linux_check = next_linux_check; } @@ -252,12 +245,6 @@ int rthal_nmi_request(void (*emergency) (struct pt_regs *)) wmb(); nmi_watchdog_tick = &rthal_nmi_watchdog_tick; -#ifdef CONFIG_PROC_FS - rthal_add_proc_leaf("nmi_early_shots", - &earlyshots_read_proc, - NULL, NULL, rthal_proc_root); -#endif /* CONFIG_PROC_FS */ - return 0; } @@ -269,10 +256,6 @@ void rthal_nmi_release(void) if (!rthal_linux_nmi_tick) return; -#ifdef CONFIG_PROC_FS - remove_proc_entry("nmi_early_shots", rthal_proc_root); -#endif /* CONFIG_PROC_FS */ - if (wd->flags & NMI_WD_31BITS) wrmsr(wd->perfctr_msr, (u32)(0 - RTHAL_CPU_FREQ), 0); else @@ -299,6 +282,10 @@ void rthal_nmi_arm(unsigned long delay) /* Protect from an interrupt handler calling rthal_nmi_arm. */ rthal_local_irq_save(flags); wd->flags &= ~NMI_WD_ARMED; + /* + * Our watchdog must be declared unarmed before we triger the + * Linux watchdog NMI, entering rthal_nmi_watchdog_tick. + */ wmb(); if (wd->flags & NMI_WD_31BITS) wrmsr(wd->perfctr_msr, (u32)-1, 0); @@ -308,12 +295,14 @@ void rthal_nmi_arm(unsigned long delay) rthal_local_irq_restore(flags); } - wd->tick_date = rthal_rdtsc() + (delay - rthal_maxlat_tsc); - wmb(); if (wd->flags & NMI_WD_31BITS) wrmsr(wd->perfctr_msr, (u32)(0 - delay), 0); else wrmsrl(wd->perfctr_msr, 0 - delay); + /* + * New perfctr must have been written before we can declare the + * watchdog armed (avoid race with previously programmed value). + */ wmb(); wd->flags |= NMI_WD_ARMED; }