All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] perf: Clear MSRs on kexec
@ 2015-08-03 21:32 ` Jiri Olsa
  0 siblings, 0 replies; 10+ messages in thread
From: Jiri Olsa @ 2015-08-03 21:32 UTC (permalink / raw)
  To: linux-kernel, kexec
  Cc: Ingo Molnar, Peter Zijlstra, dong.su, Vivek Goyal, Haren Myneni,
	Don Zickus

hi,
I'm getting following message on the kdump kernel start

  Broken BIOS detected, complain to your hardware vendor.\
  [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)

it seems to be caused by NMI watchdog being configured
and fixed counter values stays in MSRs, which triggers
warning in check_hw_exists and disables perf support
in kdump kernel.. which probably does not hurt ;-)

zeroing MSRs during kdump shutdown seems to work (attached)
but I'm not sure thats correct place for kdump perf callback

thanks,
jirka


---
 arch/x86/include/asm/perf_event.h |  2 ++
 arch/x86/kernel/cpu/perf_event.c  | 23 +++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event.h  |  1 +
 arch/x86/kernel/crash.c           |  3 +++
 4 files changed, 29 insertions(+)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index dc0f6ed35b08..8e49668cf8fe 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -217,6 +217,7 @@ static inline u32 get_ibs_caps(void) { return 0; }
 
 #ifdef CONFIG_PERF_EVENTS
 extern void perf_events_lapic_init(void);
+extern void perf_clear_msrs(void);
 
 /*
  * Abuse bits {3,5} of the cpu eflags register. These flags are otherwise
@@ -275,6 +276,7 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
 
 static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
+static inline void perf_clear_msrs(void) { }
 #endif
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3658de47900f..f30dbcfb6905 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -269,6 +269,27 @@ msr_fail:
 	return false;
 }
 
+void perf_clear_msrs(void)
+{
+	int i, reg, ret;
+
+	if (!x86_pmu.enabled)
+		return;
+
+	for (i = 0; i < x86_pmu.num_counters; i++) {
+		reg = x86_pmu_config_addr(i);
+		ret = wrmsrl_safe(reg, 0);
+		if (WARN_ONCE(ret, "failed to zero perf counter msr, reg %x\n", reg))
+			break;
+	}
+
+	if (x86_pmu.num_counters_fixed) {
+		reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		ret = wrmsrl_safe(reg, 0);
+		WARN_ONCE(ret, "failed to zero perf fixed counters msr\n");
+	}
+}
+
 static void hw_perf_event_destroy(struct perf_event *event)
 {
 	x86_release_hardware();
@@ -1689,6 +1710,8 @@ static int __init init_hw_perf_events(void)
 	if (!check_hw_exists())
 		return 0;
 
+	x86_pmu.enabled = true;
+
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
 
 	x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3e7fd27dfe20..ca8a5068f8a0 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -508,6 +508,7 @@ struct x86_pmu {
 	 */
 	const char	*name;
 	int		version;
+	bool		enabled;
 	int		(*handle_irq)(struct pt_regs *);
 	void		(*disable_all)(void);
 	void		(*enable_all)(int added);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e068d6683dba..20ed1ffdab8c 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -35,6 +35,7 @@
 #include <asm/cpu.h>
 #include <asm/reboot.h>
 #include <asm/virtext.h>
+#include <asm/perf_event.h>
 
 /* Alignment required for elf header segment */
 #define ELF_CORE_HEADER_ALIGN   4096
@@ -128,6 +129,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
 	cpu_emergency_svm_disable();
 
 	disable_local_APIC();
+	perf_clear_msrs();
 }
 
 static void kdump_nmi_shootdown_cpus(void)
@@ -182,6 +184,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 	hpet_disable();
 #endif
 	crash_save_cpu(regs, safe_smp_processor_id());
+	perf_clear_msrs();
 }
 
 #ifdef CONFIG_KEXEC_FILE
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC] perf: Clear MSRs on kexec
@ 2015-08-03 21:32 ` Jiri Olsa
  0 siblings, 0 replies; 10+ messages in thread
From: Jiri Olsa @ 2015-08-03 21:32 UTC (permalink / raw)
  To: linux-kernel, kexec
  Cc: Don Zickus, Peter Zijlstra, dong.su, Haren Myneni, Ingo Molnar,
	Vivek Goyal

hi,
I'm getting following message on the kdump kernel start

  Broken BIOS detected, complain to your hardware vendor.\
  [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)

it seems to be caused by NMI watchdog being configured
and fixed counter values stays in MSRs, which triggers
warning in check_hw_exists and disables perf support
in kdump kernel.. which probably does not hurt ;-)

zeroing MSRs during kdump shutdown seems to work (attached)
but I'm not sure thats correct place for kdump perf callback

thanks,
jirka


---
 arch/x86/include/asm/perf_event.h |  2 ++
 arch/x86/kernel/cpu/perf_event.c  | 23 +++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event.h  |  1 +
 arch/x86/kernel/crash.c           |  3 +++
 4 files changed, 29 insertions(+)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index dc0f6ed35b08..8e49668cf8fe 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -217,6 +217,7 @@ static inline u32 get_ibs_caps(void) { return 0; }
 
 #ifdef CONFIG_PERF_EVENTS
 extern void perf_events_lapic_init(void);
+extern void perf_clear_msrs(void);
 
 /*
  * Abuse bits {3,5} of the cpu eflags register. These flags are otherwise
@@ -275,6 +276,7 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
 
 static inline void perf_events_lapic_init(void)	{ }
 static inline void perf_check_microcode(void) { }
+static inline void perf_clear_msrs(void) { }
 #endif
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3658de47900f..f30dbcfb6905 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -269,6 +269,27 @@ msr_fail:
 	return false;
 }
 
+void perf_clear_msrs(void)
+{
+	int i, reg, ret;
+
+	if (!x86_pmu.enabled)
+		return;
+
+	for (i = 0; i < x86_pmu.num_counters; i++) {
+		reg = x86_pmu_config_addr(i);
+		ret = wrmsrl_safe(reg, 0);
+		if (WARN_ONCE(ret, "failed to zero perf counter msr, reg %x\n", reg))
+			break;
+	}
+
+	if (x86_pmu.num_counters_fixed) {
+		reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		ret = wrmsrl_safe(reg, 0);
+		WARN_ONCE(ret, "failed to zero perf fixed counters msr\n");
+	}
+}
+
 static void hw_perf_event_destroy(struct perf_event *event)
 {
 	x86_release_hardware();
@@ -1689,6 +1710,8 @@ static int __init init_hw_perf_events(void)
 	if (!check_hw_exists())
 		return 0;
 
+	x86_pmu.enabled = true;
+
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
 
 	x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3e7fd27dfe20..ca8a5068f8a0 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -508,6 +508,7 @@ struct x86_pmu {
 	 */
 	const char	*name;
 	int		version;
+	bool		enabled;
 	int		(*handle_irq)(struct pt_regs *);
 	void		(*disable_all)(void);
 	void		(*enable_all)(int added);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e068d6683dba..20ed1ffdab8c 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -35,6 +35,7 @@
 #include <asm/cpu.h>
 #include <asm/reboot.h>
 #include <asm/virtext.h>
+#include <asm/perf_event.h>
 
 /* Alignment required for elf header segment */
 #define ELF_CORE_HEADER_ALIGN   4096
@@ -128,6 +129,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
 	cpu_emergency_svm_disable();
 
 	disable_local_APIC();
+	perf_clear_msrs();
 }
 
 static void kdump_nmi_shootdown_cpus(void)
@@ -182,6 +184,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 	hpet_disable();
 #endif
 	crash_save_cpu(regs, safe_smp_processor_id());
+	perf_clear_msrs();
 }
 
 #ifdef CONFIG_KEXEC_FILE
-- 
2.4.3


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC] perf: Clear MSRs on kexec
  2015-08-03 21:32 ` Jiri Olsa
@ 2015-08-03 21:54   ` Peter Zijlstra
  -1 siblings, 0 replies; 10+ messages in thread
From: Peter Zijlstra @ 2015-08-03 21:54 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, kexec, Ingo Molnar, dong.su, Vivek Goyal,
	Haren Myneni, Don Zickus

On Mon, Aug 03, 2015 at 11:32:28PM +0200, Jiri Olsa wrote:
> hi,
> I'm getting following message on the kdump kernel start
> 
>   Broken BIOS detected, complain to your hardware vendor.\
>   [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)
> 
> it seems to be caused by NMI watchdog being configured
> and fixed counter values stays in MSRs, which triggers
> warning in check_hw_exists and disables perf support
> in kdump kernel.. which probably does not hurt ;-)
> 
> zeroing MSRs during kdump shutdown seems to work (attached)
> but I'm not sure thats correct place for kdump perf callback

Right, but why bother? All that kernel needs to do is write a memory
dump to someplace and reboot, right? The less you do, the less can go
wrong.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] perf: Clear MSRs on kexec
@ 2015-08-03 21:54   ` Peter Zijlstra
  0 siblings, 0 replies; 10+ messages in thread
From: Peter Zijlstra @ 2015-08-03 21:54 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Don Zickus, kexec, linux-kernel, Ingo Molnar, Haren Myneni,
	dong.su, Vivek Goyal

On Mon, Aug 03, 2015 at 11:32:28PM +0200, Jiri Olsa wrote:
> hi,
> I'm getting following message on the kdump kernel start
> 
>   Broken BIOS detected, complain to your hardware vendor.\
>   [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)
> 
> it seems to be caused by NMI watchdog being configured
> and fixed counter values stays in MSRs, which triggers
> warning in check_hw_exists and disables perf support
> in kdump kernel.. which probably does not hurt ;-)
> 
> zeroing MSRs during kdump shutdown seems to work (attached)
> but I'm not sure thats correct place for kdump perf callback

Right, but why bother? All that kernel needs to do is write a memory
dump to someplace and reboot, right? The less you do, the less can go
wrong.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] perf: Clear MSRs on kexec
  2015-08-03 21:54   ` Peter Zijlstra
@ 2015-08-04  5:52     ` Jiri Olsa
  -1 siblings, 0 replies; 10+ messages in thread
From: Jiri Olsa @ 2015-08-04  5:52 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, kexec, Ingo Molnar, dong.su, Vivek Goyal,
	Haren Myneni, Don Zickus

On Mon, Aug 03, 2015 at 11:54:17PM +0200, Peter Zijlstra wrote:
> On Mon, Aug 03, 2015 at 11:32:28PM +0200, Jiri Olsa wrote:
> > hi,
> > I'm getting following message on the kdump kernel start
> > 
> >   Broken BIOS detected, complain to your hardware vendor.\
> >   [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)
> > 
> > it seems to be caused by NMI watchdog being configured
> > and fixed counter values stays in MSRs, which triggers
> > warning in check_hw_exists and disables perf support
> > in kdump kernel.. which probably does not hurt ;-)
> > 
> > zeroing MSRs during kdump shutdown seems to work (attached)
> > but I'm not sure thats correct place for kdump perf callback
> 
> Right, but why bother? All that kernel needs to do is write a memory
> dump to someplace and reboot, right? The less you do, the less can go
> wrong.

well, I was hunting that 'Broken BIOS..' message which is wrong

I wouldn't think anyone wants to use perf under kdump kernel,
but you never know ;-)

jirka

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] perf: Clear MSRs on kexec
@ 2015-08-04  5:52     ` Jiri Olsa
  0 siblings, 0 replies; 10+ messages in thread
From: Jiri Olsa @ 2015-08-04  5:52 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Don Zickus, kexec, linux-kernel, Ingo Molnar, Haren Myneni,
	dong.su, Vivek Goyal

On Mon, Aug 03, 2015 at 11:54:17PM +0200, Peter Zijlstra wrote:
> On Mon, Aug 03, 2015 at 11:32:28PM +0200, Jiri Olsa wrote:
> > hi,
> > I'm getting following message on the kdump kernel start
> > 
> >   Broken BIOS detected, complain to your hardware vendor.\
> >   [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)
> > 
> > it seems to be caused by NMI watchdog being configured
> > and fixed counter values stays in MSRs, which triggers
> > warning in check_hw_exists and disables perf support
> > in kdump kernel.. which probably does not hurt ;-)
> > 
> > zeroing MSRs during kdump shutdown seems to work (attached)
> > but I'm not sure thats correct place for kdump perf callback
> 
> Right, but why bother? All that kernel needs to do is write a memory
> dump to someplace and reboot, right? The less you do, the less can go
> wrong.

well, I was hunting that 'Broken BIOS..' message which is wrong

I wouldn't think anyone wants to use perf under kdump kernel,
but you never know ;-)

jirka

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] perf: Clear MSRs on kexec
  2015-08-04  5:52     ` Jiri Olsa
@ 2015-08-04  7:46       ` Peter Zijlstra
  -1 siblings, 0 replies; 10+ messages in thread
From: Peter Zijlstra @ 2015-08-04  7:46 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: linux-kernel, kexec, Ingo Molnar, dong.su, Vivek Goyal,
	Haren Myneni, Don Zickus

On Tue, Aug 04, 2015 at 07:52:29AM +0200, Jiri Olsa wrote:
> On Mon, Aug 03, 2015 at 11:54:17PM +0200, Peter Zijlstra wrote:
> > On Mon, Aug 03, 2015 at 11:32:28PM +0200, Jiri Olsa wrote:
> > > hi,
> > > I'm getting following message on the kdump kernel start
> > > 
> > >   Broken BIOS detected, complain to your hardware vendor.\
> > >   [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)
> > > 
> > > it seems to be caused by NMI watchdog being configured
> > > and fixed counter values stays in MSRs, which triggers
> > > warning in check_hw_exists and disables perf support
> > > in kdump kernel.. which probably does not hurt ;-)
> > > 
> > > zeroing MSRs during kdump shutdown seems to work (attached)
> > > but I'm not sure thats correct place for kdump perf callback
> > 
> > Right, but why bother? All that kernel needs to do is write a memory
> > dump to someplace and reboot, right? The less you do, the less can go
> > wrong.
> 
> well, I was hunting that 'Broken BIOS..' message which is wrong

Not really. The previous kernel being the BIOS in this case did
leave the counters in a funky state.

> I wouldn't think anyone wants to use perf under kdump kernel,
> but you never know ;-)

Yeah, I think we knew about this back then (might've been 2010) and
chose to not 'fix' it.

http://lkml.iu.edu/hypermail/linux/kernel/1012.1/00380.html

Is what google finds me.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] perf: Clear MSRs on kexec
@ 2015-08-04  7:46       ` Peter Zijlstra
  0 siblings, 0 replies; 10+ messages in thread
From: Peter Zijlstra @ 2015-08-04  7:46 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Don Zickus, kexec, linux-kernel, Ingo Molnar, Haren Myneni,
	dong.su, Vivek Goyal

On Tue, Aug 04, 2015 at 07:52:29AM +0200, Jiri Olsa wrote:
> On Mon, Aug 03, 2015 at 11:54:17PM +0200, Peter Zijlstra wrote:
> > On Mon, Aug 03, 2015 at 11:32:28PM +0200, Jiri Olsa wrote:
> > > hi,
> > > I'm getting following message on the kdump kernel start
> > > 
> > >   Broken BIOS detected, complain to your hardware vendor.\
> > >   [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)
> > > 
> > > it seems to be caused by NMI watchdog being configured
> > > and fixed counter values stays in MSRs, which triggers
> > > warning in check_hw_exists and disables perf support
> > > in kdump kernel.. which probably does not hurt ;-)
> > > 
> > > zeroing MSRs during kdump shutdown seems to work (attached)
> > > but I'm not sure thats correct place for kdump perf callback
> > 
> > Right, but why bother? All that kernel needs to do is write a memory
> > dump to someplace and reboot, right? The less you do, the less can go
> > wrong.
> 
> well, I was hunting that 'Broken BIOS..' message which is wrong

Not really. The previous kernel being the BIOS in this case did
leave the counters in a funky state.

> I wouldn't think anyone wants to use perf under kdump kernel,
> but you never know ;-)

Yeah, I think we knew about this back then (might've been 2010) and
chose to not 'fix' it.

http://lkml.iu.edu/hypermail/linux/kernel/1012.1/00380.html

Is what google finds me.

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] perf: Clear MSRs on kexec
  2015-08-04  5:52     ` Jiri Olsa
@ 2015-08-04 13:57       ` Don Zickus
  -1 siblings, 0 replies; 10+ messages in thread
From: Don Zickus @ 2015-08-04 13:57 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Peter Zijlstra, linux-kernel, kexec, Ingo Molnar, dong.su,
	Vivek Goyal, Haren Myneni

On Tue, Aug 04, 2015 at 07:52:29AM +0200, Jiri Olsa wrote:
> On Mon, Aug 03, 2015 at 11:54:17PM +0200, Peter Zijlstra wrote:
> > On Mon, Aug 03, 2015 at 11:32:28PM +0200, Jiri Olsa wrote:
> > > hi,
> > > I'm getting following message on the kdump kernel start
> > > 
> > >   Broken BIOS detected, complain to your hardware vendor.\
> > >   [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)
> > > 
> > > it seems to be caused by NMI watchdog being configured
> > > and fixed counter values stays in MSRs, which triggers
> > > warning in check_hw_exists and disables perf support
> > > in kdump kernel.. which probably does not hurt ;-)
> > > 
> > > zeroing MSRs during kdump shutdown seems to work (attached)
> > > but I'm not sure thats correct place for kdump perf callback
> > 
> > Right, but why bother? All that kernel needs to do is write a memory
> > dump to someplace and reboot, right? The less you do, the less can go
> > wrong.
> 
> well, I was hunting that 'Broken BIOS..' message which is wrong

It was the limitation we understood, when we implemented the
check_hw_exists() years ago.

> 
> I wouldn't think anyone wants to use perf under kdump kernel,
> but you never know ;-)

The amount of warning messages in the kdump kernel is significant, this
message is just another message to add to the noise.  Heck we have to poll
irqs because if we enable them, drivers would crash left and right due to
left over pending irqs from the previous kernel.

As Peter said, how much code are we going to put in the crash path to make
kdump less noisy?

Eric B.  usually frowns pretty heavily on this.

Cheers,
Don

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC] perf: Clear MSRs on kexec
@ 2015-08-04 13:57       ` Don Zickus
  0 siblings, 0 replies; 10+ messages in thread
From: Don Zickus @ 2015-08-04 13:57 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Peter Zijlstra, kexec, linux-kernel, Ingo Molnar, Haren Myneni,
	dong.su, Vivek Goyal

On Tue, Aug 04, 2015 at 07:52:29AM +0200, Jiri Olsa wrote:
> On Mon, Aug 03, 2015 at 11:54:17PM +0200, Peter Zijlstra wrote:
> > On Mon, Aug 03, 2015 at 11:32:28PM +0200, Jiri Olsa wrote:
> > > hi,
> > > I'm getting following message on the kdump kernel start
> > > 
> > >   Broken BIOS detected, complain to your hardware vendor.\
> > >   [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 38d is b0)
> > > 
> > > it seems to be caused by NMI watchdog being configured
> > > and fixed counter values stays in MSRs, which triggers
> > > warning in check_hw_exists and disables perf support
> > > in kdump kernel.. which probably does not hurt ;-)
> > > 
> > > zeroing MSRs during kdump shutdown seems to work (attached)
> > > but I'm not sure thats correct place for kdump perf callback
> > 
> > Right, but why bother? All that kernel needs to do is write a memory
> > dump to someplace and reboot, right? The less you do, the less can go
> > wrong.
> 
> well, I was hunting that 'Broken BIOS..' message which is wrong

It was the limitation we understood, when we implemented the
check_hw_exists() years ago.

> 
> I wouldn't think anyone wants to use perf under kdump kernel,
> but you never know ;-)

The amount of warning messages in the kdump kernel is significant, this
message is just another message to add to the noise.  Heck we have to poll
irqs because if we enable them, drivers would crash left and right due to
left over pending irqs from the previous kernel.

As Peter said, how much code are we going to put in the crash path to make
kdump less noisy?

Eric B.  usually frowns pretty heavily on this.

Cheers,
Don

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-08-04 13:58 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-03 21:32 [RFC] perf: Clear MSRs on kexec Jiri Olsa
2015-08-03 21:32 ` Jiri Olsa
2015-08-03 21:54 ` Peter Zijlstra
2015-08-03 21:54   ` Peter Zijlstra
2015-08-04  5:52   ` Jiri Olsa
2015-08-04  5:52     ` Jiri Olsa
2015-08-04  7:46     ` Peter Zijlstra
2015-08-04  7:46       ` Peter Zijlstra
2015-08-04 13:57     ` Don Zickus
2015-08-04 13:57       ` Don Zickus

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.