From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S934289AbcI0OZa (ORCPT ); Tue, 27 Sep 2016 10:25:30 -0400 Received: from mail-pa0-f68.google.com ([209.85.220.68]:34831 "EHLO mail-pa0-f68.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933767AbcI0OXu (ORCPT ); Tue, 27 Sep 2016 10:23:50 -0400 From: Sergey Senozhatsky To: Petr Mladek , Jan Kara Cc: Andrew Morton , Tejun Heo , Calvin Owens , linux-kernel@vger.kernel.org, Sergey Senozhatsky , Sergey Senozhatsky Subject: [RFC][PATCH 3/7] printk: introduce per-cpu alt_print seq buffer Date: Tue, 27 Sep 2016 23:22:33 +0900 Message-Id: <20160927142237.5539-4-sergey.senozhatsky@gmail.com> X-Mailer: git-send-email 2.10.0.372.g6fe1b14 In-Reply-To: <20160927142237.5539-1-sergey.senozhatsky@gmail.com> References: <20160927142237.5539-1-sergey.senozhatsky@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org This patch extends the idea of NMI per-cpu buffers to regions that may cause recursive printk() calls and possible deadlocks. Namely, printk() can't handle printk calls from schedule code or printk() calls from lock debugging code (spin_dump() for instance); because those may be called with `sem->lock' already taken or any of critical scheduler locks (p->pi_lock, etc.). An example of a deadlock can be vprintk_emit() console_unlock() up() << raw_spin_lock_irqsave(&sem->lock, flags); wake_up_process() try_to_wake_up() ttwu_queue() ttwu_do_activate() ttwu_activate() activate_task() enqueue_task() enqueue_task_fair() cfs_rq_of() task_of() WARN_ON_ONCE(!entity_is_task(se)) vprintk_emit() console_trylock() down_trylock() raw_spin_lock_irqsave(&sem->lock, flags) ^^^^ deadlock and some other cases. Just like in NMI implementation, the solution uses a per-cpu `printk_func' pointer to 'redirect' printk() calls to a 'safe' callback, that store messages in a per-cpu buffer and flushes them back to logbuf buffer later. Usage example: printk() local_irq_save() alt_printk_enter() // // any printk() call from here will endup in vprintk_alt(), // that stores messages in a special per-CPU buffer. // alt_printk_exit() local_irq_restore() The patch only adds a alt_printk support, we don't use it yet. Signed-off-by: Sergey Senozhatsky --- kernel/printk/alt_printk.c | 160 +++++++++++++++++++++++++++++++++++++++++---- kernel/printk/internal.h | 12 ++++ 2 files changed, 158 insertions(+), 14 deletions(-) diff --git a/kernel/printk/alt_printk.c b/kernel/printk/alt_printk.c index 7178661..8978390 100644 --- a/kernel/printk/alt_printk.c +++ b/kernel/printk/alt_printk.c @@ -1,5 +1,5 @@ /* - * alt_printk.c - Safe printk in NMI context + * alt_printk.c - Safe printk for printk-deadlock-prone contexts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -53,24 +53,39 @@ struct alt_printk_seq_buf { static DEFINE_PER_CPU(struct alt_printk_seq_buf, nmi_print_seq); /* - * Safe printk() for NMI context. It uses a per-CPU buffer to - * store the message. NMIs are not nested, so there is always only - * one writer running. But the buffer might get flushed from another - * CPU, so we need to be careful. + * There can be two alt_printk contexts at most - a `normal' alt_printk + * and NMI alt_printk context. Normal alt_printk context is the one that + * direct caller of printk() setups (either a process or IRQ) and it can + * be preempted only by NMI (if the platform supports NMI). NMI context + * can preempt normal alt_printk context, but cannot be preempted on its + * own. */ -static int vprintk_nmi(const char *fmt, va_list args) +#ifdef CONFIG_PRINTK_NMI +#define MAX_ALT_PRINTK_CTX 2 +#else +#define MAX_ALT_PRINTK_CTX 1 +#endif + +struct alt_printk_ctx { + atomic_t idx; + unsigned int entry_count; + printk_func_t saved_printk_func[MAX_ALT_PRINTK_CTX]; +}; + +static DEFINE_PER_CPU(struct alt_printk_seq_buf, alt_print_seq); +static DEFINE_PER_CPU(struct alt_printk_ctx, alt_printk_ctx); + +static int alt_printk_log_store(struct alt_printk_seq_buf *s, + const char *fmt, va_list args) { - struct alt_printk_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - int add = 0; + int add; size_t len; again: len = atomic_read(&s->len); - if (len >= sizeof(s->buffer)) { - atomic_inc(&nmi_message_lost); + if (len >= sizeof(s->buffer)) return 0; - } /* * Make sure that all old data have been read before the buffer was @@ -99,6 +114,96 @@ static int vprintk_nmi(const char *fmt, va_list args) return add; } +/* + * Lockless printk(), to avoid deadlocks should the printk() recurse + * into itself. It uses a per-CPU buffer to store the message, just like + * NMI. + */ +static int vprintk_alt(const char *fmt, va_list args) +{ + struct alt_printk_seq_buf *s = this_cpu_ptr(&alt_print_seq); + + return alt_printk_log_store(s, fmt, args); +} + +/* + * We must keep the track of `printk_func' because alt_printk + * context can be preempted by NMI alt_printk context. + * + * Consider the following example: + * + * vprintk_emit() + * alt_printk_enter() + * printk_func = vprintk_alt; + * + * -> NMI + * printk_nmi_enter() + * printk_func = vprintk_nmi; + * printk_nmi_exit() + * printk_func = vprintk_default; + * ^^^^^^^^^^^ + * <- NMI + * + * printk("foo") -> vprintk_default(); + * + * Thus we must restore the orignal `printk_func' value, the one + * NMI saw at printk_nmi_enter() time. + */ +static void __lockless_printk_enter(printk_func_t new_func) +{ + struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx); + int idx = atomic_inc_return(&ctx->idx) - 1; + + ctx->saved_printk_func[idx] = this_cpu_read(printk_func); + this_cpu_write(printk_func, new_func); +} + +static void __lockless_printk_exit(void) +{ + struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx); + int idx = atomic_read(&ctx->idx) - 1; + + this_cpu_write(printk_func, ctx->saved_printk_func[idx]); + atomic_dec(&ctx->idx); +} + +/* Local IRQs must be disabled; can be preempted by NMI. */ +void alt_printk_enter(void) +{ + struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx); + + /* + * We can't switch `printk_func' while the CPU is flushing its + * alternative buffers. At the same time, we leave flushing + * `unprotected', because we always use vprintk_default() there. + * + * ->entry_count can detect printk() recursion from flushing context: + * -- alt_printk_flush() sets ->entry_count to 1 + * -- every vprintk_default() call from alt_printk_flush() increments + * ->entry_count to 2 when it enters the recursion un-safe region + * and decrements it back to 1 when it leaves that region + * -- thus, if printk() will recurs from recursion un-safe region, we + * will see ->entry_count > 2. + */ + ctx->entry_count++; + if (ctx->entry_count > 1) + return; + + /* @TODO: do something sensible in case of printk() recursion */ + + __lockless_printk_enter(vprintk_alt); +} + +/* Local IRQs must be disabled; can be preempted by NMI. */ +void alt_printk_exit(void) +{ + struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx); + + if (ctx->entry_count == 1) + __lockless_printk_exit(); + ctx->entry_count--; +} + static void alt_printk_flush_line(const char *text, int len) { /* @@ -110,7 +215,6 @@ static void alt_printk_flush_line(const char *text, int len) printk_deferred("%.*s", len, text); else printk("%.*s", len, text); - } /* @@ -135,6 +239,7 @@ static void __alt_printk_flush(struct irq_work *work) __RAW_SPIN_LOCK_INITIALIZER(read_lock); struct alt_printk_seq_buf *s = container_of(work, struct alt_printk_seq_buf, work); + struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx); unsigned long flags; size_t len, size; int i, last_i; @@ -147,6 +252,11 @@ static void __alt_printk_flush(struct irq_work *work) * a backtrace. */ raw_spin_lock_irqsave(&read_lock, flags); + /* + * Forbid the alt_printk on this CPU, we want to flush messages to + * logbuf, not to alt_printk buffer again. + */ + ctx->entry_count++; i = 0; more: @@ -195,6 +305,7 @@ static void __alt_printk_flush(struct irq_work *work) goto more; out: + ctx->entry_count--; raw_spin_unlock_irqrestore(&read_lock, flags); } @@ -248,6 +359,9 @@ void __init alt_printk_init(void) struct alt_printk_seq_buf *s = &per_cpu(nmi_print_seq, cpu); init_irq_work(&s->work, __alt_printk_flush); + + s = &per_cpu(alt_print_seq, cpu); + init_irq_work(&s->work, __alt_printk_flush); } /* Make sure that IRQ works are initialized before enabling. */ @@ -258,12 +372,30 @@ void __init alt_printk_init(void) alt_printk_flush(); } +/* + * Safe printk() for NMI context. It uses a per-CPU buffer to + * store the message. NMIs are not nested, so there is always only + * one writer running. But the buffer might get flushed from another + * CPU, so we need to be careful. + */ +static int vprintk_nmi(const char *fmt, va_list args) +{ + struct alt_printk_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + int add; + + add = alt_printk_log_store(s, fmt, args); + if (!add) + atomic_inc(&nmi_message_lost); + + return add; +} + void printk_nmi_enter(void) { - this_cpu_write(printk_func, vprintk_nmi); + __lockless_printk_enter(vprintk_nmi); } void printk_nmi_exit(void) { - this_cpu_write(printk_func, vprintk_default); + __lockless_printk_exit(); } diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7fd2838..c5f970e 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -20,6 +20,18 @@ typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); int __printf(1, 0) vprintk_default(const char *fmt, va_list args); +#ifdef CONFIG_PRINTK + +void alt_printk_enter(void); +void alt_printk_exit(void); + +#else + +void alt_printk_enter(void) { } +void alt_printk_exit(void) { } + +#endif /* CONFIG_PRINTK */ + #ifdef CONFIG_PRINTK_NMI extern raw_spinlock_t logbuf_lock; -- 2.10.0.372.g6fe1b14