On Wed, May 20, 2015 at 11:28:00AM +0200, Borislav Petkov wrote: > Date: Wed, 20 May 2015 11:28:00 +0200 > From: Borislav Petkov > To: "Chen, Gong" , tony.luck@intel.com > Cc: linux-kernel@vger.kernel.org > Subject: Re: [PATCH 4/4 Rebase] x86, MCE: Avoid potential deadlock in MCE > context > User-Agent: Mutt/1.5.23 (2014-03-12) > > On Wed, May 20, 2015 at 03:35:38PM -0400, Chen, Gong wrote: > > Printing in MCE context is a no-no, currently, as printk is not > > NMI-safe. If some of the notifiers on the MCE chain call *printk*, we > > may deadlock. In order to avoid that, delay printk into process context > > to fix it. > > > > Background info at: https://lkml.org/lkml/2014/6/27/26 > > > > Reported-by: Xie XiuQi > > Signed-off-by: Chen, Gong > > Link: http://lkml.kernel.org/r/1406797523-28710-6-git-send-email-gong.chen@linux.intel.com > > [ Boris: rewrite a bit. ] > > Signed-off-by: Borislav Petkov > > --- > > arch/x86/include/asm/mce.h | 1 + > > arch/x86/kernel/cpu/mcheck/mce-apei.c | 2 +- > > arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++-- > > arch/x86/kernel/cpu/mcheck/mce_intel.c | 1 - > > arch/x86/kernel/cpu/mcheck/therm_throt.c | 1 + > > arch/x86/kernel/cpu/mcheck/threshold.c | 1 + > > 6 files changed, 10 insertions(+), 4 deletions(-) > > .... > > > diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c > > index 1af51b1586d7..2733f275237d 100644 > > --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c > > +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c > > @@ -427,6 +427,7 @@ static inline void __smp_thermal_interrupt(void) > > { > > inc_irq_stat(irq_thermal_count); > > smp_thermal_vector(); > > + mce_queue_irq_work(); > > Hmm, at a second glance, this looks wrong. I think we should do that > call in intel_thermal_interrupt(). > > > asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs) > > diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c > > index 7245980186ee..d695faa234eb 100644 > > --- a/arch/x86/kernel/cpu/mcheck/threshold.c > > +++ b/arch/x86/kernel/cpu/mcheck/threshold.c > > @@ -22,6 +22,7 @@ static inline void __smp_threshold_interrupt(void) > > { > > inc_irq_stat(irq_threshold_count); > > mce_threshold_vector(); > > + mce_queue_irq_work(); > > Same here. > > mce_queue_irq_work() call should be issued in both AMD and Intel > threshold handlers but not in the generic one which is unlikely to queue > any MCE... > > Right? > Since AMD doesn't queue any MCE, it should definitely only cover Intel. Please check out following patch: ----8<---- From: "Chen, Gong" Date: Fri, 22 May 2015 17:02:50 -0400 Subject: [PATCH 4/4 rebase v2] x86, MCE: Avoid potential deadlock in MCE context Printing in MCE context is a no-no, currently, as printk is not NMI-safe. If some of the notifiers on the MCE chain call *printk*, we may deadlock. In order to avoid that, delay printk into process context to fix it. Background info at: https://lkml.org/lkml/2014/6/27/26 v2 -> v1: move mce_queue_irq_work call into Intel specific logic Reported-by: Xie XiuQi Signed-off-by: Chen, Gong Link: http://lkml.kernel.org/r/1406797523-28710-6-git-send-email-gong.chen@linux.intel.com [ Boris: rewrite a bit. ] Signed-off-by: Borislav Petkov --- arch/x86/include/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce-apei.c | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++-- arch/x86/kernel/cpu/mcheck/mce_intel.c | 2 +- arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 ++ 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index d16f983f46f5..781432dd8123 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -198,6 +198,7 @@ enum mcp_flags { bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); int mce_notify_irq(void); +void mce_queue_irq_work(void); DECLARE_PER_CPU(struct mce, injectm); diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index a1aef9533154..380e3ac8fb62 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -57,7 +57,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) m.addr = mem_err->physical_addr; mce_log(&m); - mce_notify_irq(); + mce_queue_irq_work(); } EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b369b5fcda1d..a3be97961e22 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -156,7 +156,7 @@ void mce_log(struct mce *mce) /* Emit the trace record: */ trace_mce_record(mce); - atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); + mce_genpool_add(mce); mce->finished = 0; wmb(); @@ -486,6 +486,11 @@ static void mce_irq_work_cb(struct irq_work *entry) mce_schedule_work(); } +void mce_queue_irq_work(void) +{ + irq_work_queue(&mce_irq_work); +} + static void mce_report_event(struct pt_regs *regs) { if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { @@ -1105,7 +1110,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* assuming valid severity level != 0 */ m.severity = severity; m.usable_addr = mce_usable_address(&m); - mce_genpool_add(&m); mce_log(&m); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index b4a41cf030ed..3392002e2911 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -216,7 +216,7 @@ static void intel_threshold_interrupt(void) return; machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); - mce_notify_irq(); + mce_queue_irq_work(); } /* diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 1af51b1586d7..809a1e8f7fbd 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -413,6 +413,8 @@ static void intel_thermal_interrupt(void) POWER_LIMIT_EVENT, PACKAGE_LEVEL); } + + mce_queue_irq_work(); } static void unexpected_thermal_interrupt(void) -- 2.3.2