From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S934039AbcI0OYs (ORCPT ); Tue, 27 Sep 2016 10:24:48 -0400 Received: from mail-pf0-f193.google.com ([209.85.192.193]:36827 "EHLO mail-pf0-f193.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933838AbcI0OYM (ORCPT ); Tue, 27 Sep 2016 10:24:12 -0400 From: Sergey Senozhatsky To: Petr Mladek , Jan Kara Cc: Andrew Morton , Tejun Heo , Calvin Owens , linux-kernel@vger.kernel.org, Sergey Senozhatsky , Sergey Senozhatsky Subject: [RFC][PATCH 7/7] printk: new printk() recursion detection Date: Tue, 27 Sep 2016 23:22:37 +0900 Message-Id: <20160927142237.5539-8-sergey.senozhatsky@gmail.com> X-Mailer: git-send-email 2.10.0.372.g6fe1b14 In-Reply-To: <20160927142237.5539-1-sergey.senozhatsky@gmail.com> References: <20160927142237.5539-1-sergey.senozhatsky@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org We can't use alt_printk when we are flushing alt_printk buffers to logbuf, because otherwise we would just flush messages back to alt_printk buffers. At the same time, this opens a door for potential deadlock, should flushing endup in recursive printk() call: irq_work_run() alt_printk_flush_line() printk() WARN_ON() ^^^^^^^^^ depending on circumstances this can result in printk-deadlock. To cope with it, we can use alt_printk context ->entry_count: - __alt_printk_flush() sets ->entry_count to 1 - every printk() call from alt_printk_flush_foo() calls alt_printk_enter() and alt_printk_exit() - alt_printk_enter() increments ->entry_count and tests the value: - if it's `==2' then we are in normal alt_printk_flush()->printk() path - if it's `>=3' then we are in recursion. The first time we detect recursion (->entry_count == 3) we switch from default printk() to alt_printk(), to avoid possible deadlocks. - in alt_printk_exit() we test if we were in normal printk() path or in printk() recursion and rollback to default printk() if so; and decrement ->entry_count. ->entry_count is a per-cpu variable, which must be touched only with local IRQs disabled. NMI should never touch ->entry_count. An example: WARN_ON() from printk() while alt_printk_flush() flushed messages from alt_printk buffer to a logbuf: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 332 at kernel/printk/printk.c:1775 vprintk_emit+0x73/0x398 CPU: 1 PID: 332 Comm: bash Call Trace: [] dump_stack+0x68/0x92 [] __warn+0xb8/0xd3 [] warn_slowpath_null+0x18/0x1a [] vprintk_emit+0x73/0x398 [] vprintk_default+0x18/0x1a [] printk+0x43/0x4b [] alt_printk_flush_line+0x27/0x29 [] __alt_printk_flush+0xd7/0x12d [] irq_work_run_list+0x43/0x67 [] irq_work_run+0x2a/0x44 [] smp_trace_irq_work_interrupt+0x29/0x30 [] smp_irq_work_interrupt+0x9/0xb [] irq_work_interrupt+0x86/0x90 [] ? _raw_spin_unlock+0x2c/0x3f [] ? console_unlock+0x528/0x559 [] vprintk_emit+0x387/0x398 [] vprintk_default+0x18/0x1a [] printk+0x43/0x4b [..] [] entry_SYSCALL_64_fastpath+0x18/0xad ---[ end trace ]--- Signed-off-by: Sergey Senozhatsky --- kernel/printk/alt_printk.c | 26 +++++++++++++++++++++++--- kernel/printk/internal.h | 5 +++++ kernel/printk/printk.c | 38 ++------------------------------------ 3 files changed, 30 insertions(+), 39 deletions(-) diff --git a/kernel/printk/alt_printk.c b/kernel/printk/alt_printk.c index 8ac1daa..0302ea3 100644 --- a/kernel/printk/alt_printk.c +++ b/kernel/printk/alt_printk.c @@ -69,6 +69,9 @@ atomic_t nmi_message_lost; #define MAX_ALT_PRINTK_CTX 1 #endif +#define CTX_ENTRY_FLUSHING 2 +#define CTX_ENTRY_FLUSHING_RECURSION (CTX_ENTRY_FLUSHING + 1) + struct alt_printk_ctx { atomic_t idx; unsigned int entry_count; @@ -189,10 +192,15 @@ void alt_printk_enter(void) * will see ->entry_count > 2. */ ctx->entry_count++; - if (ctx->entry_count > 1) + if (ctx->entry_count == CTX_ENTRY_FLUSHING) return; - /* @TODO: do something sensible in case of printk() recursion */ + /* + * Once ->entry_coun == CTX_ENTRY_FLUSHING_RECURSION printk() + * switches to alt_printk() buffers. + */ + if (ctx->entry_count > CTX_ENTRY_FLUSHING_RECURSION) + return; __lockless_printk_enter(vprintk_alt); } @@ -202,11 +210,23 @@ void alt_printk_exit(void) { struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx); - if (ctx->entry_count == 1) + /* + * If we are returning from a normal alt_printk context or + * from printk() recursion -- switch back to default printk. + */ + if (ctx->entry_count == 1 || + ctx->entry_count == CTX_ENTRY_FLUSHING_RECURSION) __lockless_printk_exit(); ctx->entry_count--; } +bool recursed_printk_call(void) +{ + struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx); + + return oops_in_progress && (ctx->entry_count > CTX_ENTRY_FLUSHING); +} + static void alt_printk_flush_line(const char *text, int len) { /* diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index b030ca0..7d082b7 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -34,11 +34,16 @@ DECLARE_PER_CPU(printk_func_t, printk_func); void alt_printk_enter(void); void alt_printk_exit(void); +bool recursed_printk_call(void); #else void alt_printk_enter(void) { } void alt_printk_exit(void) { } +bool recursed_printk_call(void) +{ + return false; +} #endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e5dacfb..47f1e0e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1748,18 +1748,14 @@ asmlinkage int vprintk_emit(int facility, int level, const char *dict, size_t dictlen, const char *fmt, va_list args) { - static bool recursion_bug; static char textbuf[LOG_LINE_MAX]; char *text = textbuf; size_t text_len = 0; enum log_flags lflags = 0; unsigned long flags; - int this_cpu; int printed_len = 0; int nmi_message_lost; bool in_sched = false; - /* cpu currently holding logbuf_lock in this function */ - static unsigned int logbuf_cpu = UINT_MAX; if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; @@ -1770,42 +1766,13 @@ asmlinkage int vprintk_emit(int facility, int level, printk_delay(); local_irq_save(flags); - this_cpu = smp_processor_id(); + alt_printk_enter(); - /* - * Ouch, printk recursed into itself! - */ - if (unlikely(logbuf_cpu == this_cpu)) { - /* - * If a crash is occurring during printk() on this CPU, - * then try to get the crash message out but make sure - * we can't deadlock. Otherwise just return to avoid the - * recursion and return - but flag the recursion so that - * it can be printed at the next appropriate moment: - */ - if (!oops_in_progress && !lockdep_recursing(current)) { - recursion_bug = true; - local_irq_restore(flags); - return 0; - } + if (recursed_printk_call()) zap_locks(); - } - alt_printk_enter(); /* This stops the holder of console_sem just where we want him */ raw_spin_lock(&logbuf_lock); - logbuf_cpu = this_cpu; - - if (unlikely(recursion_bug)) { - static const char recursion_msg[] = - "BUG: recent printk recursion!"; - - recursion_bug = false; - /* emit KERN_CRIT message */ - printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, - NULL, 0, recursion_msg, - strlen(recursion_msg)); - } nmi_message_lost = get_nmi_message_lost(); if (unlikely(nmi_message_lost)) { @@ -1898,7 +1865,6 @@ asmlinkage int vprintk_emit(int facility, int level, dict, dictlen, text, text_len); } - logbuf_cpu = UINT_MAX; raw_spin_unlock(&logbuf_lock); alt_printk_exit(); local_irq_restore(flags); -- 2.10.0.372.g6fe1b14