From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1760302AbcLUOls (ORCPT ); Wed, 21 Dec 2016 09:41:48 -0500 Received: from mail-pf0-f196.google.com ([209.85.192.196]:35740 "EHLO mail-pf0-f196.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S935122AbcLUOhD (ORCPT ); Wed, 21 Dec 2016 09:37:03 -0500 From: Sergey Senozhatsky To: Andrew Morton Cc: Petr Mladek , Jan Kara , Tejun Heo , Calvin Owens , Steven Rostedt , Ingo Molnar , Peter Zijlstra , Andy Lutomirski , Linus Torvalds , Peter Hurley , linux-kernel@vger.kernel.org, Sergey Senozhatsky , Sergey Senozhatsky Subject: [PATCHv6 3/7] printk: introduce per-cpu safe_print seq buffer Date: Wed, 21 Dec 2016 23:36:01 +0900 Message-Id: <20161221143605.2272-4-sergey.senozhatsky@gmail.com> X-Mailer: git-send-email 2.11.0 In-Reply-To: <20161221143605.2272-1-sergey.senozhatsky@gmail.com> References: <20161221143605.2272-1-sergey.senozhatsky@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org This patch extends the idea of NMI per-cpu buffers to regions that may cause recursive printk() calls and possible deadlocks. Namely, printk() can't handle printk calls from schedule code or printk() calls from lock debugging code (spin_dump() for instance); because those may be called with `sem->lock' already taken or any other `critical' locks (p->pi_lock, etc.). An example of deadlock can be vprintk_emit() console_unlock() up() << raw_spin_lock_irqsave(&sem->lock, flags); wake_up_process() try_to_wake_up() ttwu_queue() ttwu_activate() activate_task() enqueue_task() enqueue_task_fair() cfs_rq_of() task_of() WARN_ON_ONCE(!entity_is_task(se)) vprintk_emit() console_trylock() down_trylock() raw_spin_lock_irqsave(&sem->lock, flags) ^^^^ deadlock and some other cases. Just like in NMI implementation, the solution uses a per-cpu `printk_func' pointer to 'redirect' printk() calls to a 'safe' callback, that store messages in a per-cpu buffer and flushes them back to logbuf buffer later. Usage example: printk() printk_safe_enter(flags) // // any printk() call from here will endup in vprintk_safe(), // that stores messages in a special per-CPU buffer. // printk_safe_exit(flags) The 'redirection' mechanism, though, has been reworked, as suggested by Petr Mladek. Instead of using a per-cpu @print_func callback we now keep a per-cpu printk-context variable and call either default or nmi vprintk function depending on its value. printk_nmi_entrer/exit and printk_safe_enter/exit, thus, just set/celar corresponding bits in printk-context functions. The patch only adds printk_safe support, we don't use it yet. Signed-off-by: Sergey Senozhatsky --- include/linux/printk.h | 21 +++++-- kernel/printk/Makefile | 2 +- kernel/printk/internal.h | 56 +++++++++++-------- kernel/printk/printk.c | 3 - kernel/printk/printk_safe.c | 130 +++++++++++++++++++++++++++++++++++--------- 5 files changed, 153 insertions(+), 59 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 67df41eca51c..f7d291cf61d9 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -147,17 +147,11 @@ void early_printk(const char *s, ...) { } #endif #ifdef CONFIG_PRINTK_NMI -extern void printk_safe_init(void); extern void printk_safe_nmi_enter(void); extern void printk_safe_nmi_exit(void); -extern void printk_safe_flush(void); -extern void printk_safe_flush_on_panic(void); #else -static inline void printk_safe_init(void) { } static inline void printk_safe_nmi_enter(void) { } static inline void printk_safe_nmi_exit(void) { } -static inline void printk_safe_flush(void) { } -static inline void printk_safe_flush_on_panic(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK @@ -209,6 +203,9 @@ void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); +extern void printk_safe_init(void); +extern void printk_safe_flush(void); +extern void printk_safe_flush_on_panic(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -268,6 +265,18 @@ static inline void dump_stack_print_info(const char *log_lvl) static inline void show_regs_print_info(const char *log_lvl) { } + +static inline void printk_safe_init(void) +{ +} + +static inline void printk_safe_flush(void) +{ +} + +static inline void printk_safe_flush_on_panic(void) +{ +} #endif extern asmlinkage void dump_stack(void) __cold; diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 607928119f26..4a2ffc39eb95 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,3 +1,3 @@ obj-y = printk.o -obj-$(CONFIG_PRINTK_NMI) += printk_safe.o +obj-$(CONFIG_PRINTK) += printk_safe.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7fd2838fa417..97cee4f482f4 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -16,26 +16,8 @@ */ #include -typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); - -int __printf(1, 0) vprintk_default(const char *fmt, va_list args); - #ifdef CONFIG_PRINTK_NMI -extern raw_spinlock_t logbuf_lock; - -/* - * printk() could not take logbuf_lock in NMI context. Instead, - * it temporary stores the strings into a per-CPU buffer. - * The alternative implementation is chosen transparently - * via per-CPU variable. - */ -DECLARE_PER_CPU(printk_func_t, printk_func); -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) -{ - return this_cpu_read(printk_func)(fmt, args); -} - extern atomic_t nmi_message_lost; static inline int get_nmi_message_lost(void) { @@ -44,14 +26,42 @@ static inline int get_nmi_message_lost(void) #else /* CONFIG_PRINTK_NMI */ -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) -{ - return vprintk_default(fmt, args); -} - static inline int get_nmi_message_lost(void) { return 0; } #endif /* CONFIG_PRINTK_NMI */ + +#ifdef CONFIG_PRINTK + +#define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff +#define PRINTK_NMI_CONTEXT_MASK 0x80000000 + +extern raw_spinlock_t logbuf_lock; + +__printf(1, 0) int vprintk_default(const char *fmt, va_list args); +__printf(1, 0) int vprintk_func(const char *fmt, va_list args); +void __printk_safe_enter(void); +void __printk_safe_exit(void); + +#define printk_safe_enter(flags) \ + do { \ + local_irq_save(flags); \ + __printk_safe_enter(); \ + } while (0) + +#define printk_safe_exit(flags) \ + do { \ + __printk_safe_exit(); \ + local_irq_restore(flags); \ + } while (0) + +#else + +__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } + +#define printk_safe_enter(f) ((void)(f)) +#define printk_safe_exit(f) ((void)(f)) + +#endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 265e815730f9..335b0fcb538e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1902,9 +1902,6 @@ static size_t msg_print_text(const struct printk_log *msg, bool syslog, char *buf, size_t size) { return 0; } static bool suppress_message_printing(int level) { return false; } -/* Still needs to be defined for users */ -DEFINE_PER_CPU(printk_func_t, printk_func); - #endif /* CONFIG_PRINTK */ #ifdef CONFIG_EARLY_PRINTK diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 09e5552f5b58..a041927a127b 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -1,5 +1,5 @@ /* - * printk_safe.c - Safe printk in NMI context + * printk_safe.c - Safe printk for printk-deadlock-prone contexts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -32,15 +32,14 @@ * is later flushed into the main ring buffer via IRQ work. * * The alternative implementation is chosen transparently - * via @printk_func per-CPU variable. + * by examinig current printk() context mask stored in @printk_context + * per-CPU variable. * * The implementation allows to flush the strings also from another CPU. * There are situations when we want to make sure that all buffers * were handled or when IRQs are blocked. */ -DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; static int printk_safe_irq_ready; -atomic_t nmi_message_lost; #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ sizeof(atomic_t) - sizeof(struct irq_work)) @@ -50,18 +49,29 @@ struct printk_safe_seq_buf { struct irq_work work; /* IRQ work that flushes the buffer */ unsigned char buffer[SAFE_LOG_BUF_LEN]; }; + +static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); +static DEFINE_PER_CPU(int, printk_context); + +#ifdef CONFIG_PRINTK_NMI static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); +atomic_t nmi_message_lost; +#endif /* - * Safe printk() for NMI context. It uses a per-CPU buffer to - * store the message. NMIs are not nested, so there is always only - * one writer running. But the buffer might get flushed from another - * CPU, so we need to be careful. + * Add a message to per-CPU context-dependent buffer. NMI and printk-safe + * have dedicated buffers, because otherwise printk-safe preempted by + * NMI-printk would have overwritten the NMI messages. + * + * The messages are fushed from irq work (of from panic()), possibly, + * from other CPU, concurrently with printk_safe_log_store(). Should this + * happen, printk_safe_log_store() will notice the buffer->len mismatch + * and repeat the write. */ -static int vprintk_safe_nmi(const char *fmt, va_list args) +static int printk_safe_log_store(struct printk_safe_seq_buf *s, + const char *fmt, va_list args) { - struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - int add = 0; + int add; size_t len; again: @@ -74,8 +84,8 @@ static int vprintk_safe_nmi(const char *fmt, va_list args) } /* - * Make sure that all old data have been read before the buffer was - * reseted. This is not needed when we just append data. + * Make sure that all old data have been read before the buffer + * was reset. This is not needed when we just append data. */ if (!len) smp_rmb(); @@ -161,7 +171,7 @@ static int printk_safe_flush_buffer(const char *start, size_t len) } /* - * Flush data from the associated per_CPU buffer. The function + * Flush data from the associated per-CPU buffer. The function * can be called either via IRQ work or independently. */ static void __printk_safe_flush(struct irq_work *work) @@ -231,8 +241,12 @@ void printk_safe_flush(void) { int cpu; - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { +#ifdef CONFIG_PRINTK_NMI __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); +#endif + __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work); + } } /** @@ -262,14 +276,88 @@ void printk_safe_flush_on_panic(void) printk_safe_flush(); } +#ifdef CONFIG_PRINTK_NMI +/* + * Safe printk() for NMI context. It uses a per-CPU buffer to + * store the message. NMIs are not nested, so there is always only + * one writer running. But the buffer might get flushed from another + * CPU, so we need to be careful. + */ +static int vprintk_safe_nmi(const char *fmt, va_list args) +{ + struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + + return printk_safe_log_store(s, fmt, args); +} + +void printk_safe_nmi_enter(void) +{ + this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); +} + +void printk_safe_nmi_exit(void) +{ + this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); +} + +#else + +static int vprintk_safe_nmi(const char *fmt, va_list args) +{ + return 0; +} + +#endif /* CONFIG_PRINTK_NMI */ + +/* + * Lock-less printk(), to avoid deadlocks should the printk() recurse + * into itself. It uses a per-CPU buffer to store the message, just like + * NMI. + */ +static int vprintk_safe(const char *fmt, va_list args) +{ + struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq); + + return printk_safe_log_store(s, fmt, args); +} + +/* Can be preempted by NMI. */ +void __printk_safe_enter(void) +{ + this_cpu_inc(printk_context); +} + +/* Can be preempted by NMI. */ +void __printk_safe_exit(void) +{ + this_cpu_dec(printk_context); +} + +__printf(1, 0) int vprintk_func(const char *fmt, va_list args) +{ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + return vprintk_safe_nmi(fmt, args); + + if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) + return vprintk_safe(fmt, args); + + return vprintk_default(fmt, args); +} + void __init printk_safe_init(void) { int cpu; for_each_possible_cpu(cpu) { - struct printk_safe_seq_buf *s = &per_cpu(nmi_print_seq, cpu); + struct printk_safe_seq_buf *s; + + s = &per_cpu(safe_print_seq, cpu); + init_irq_work(&s->work, __printk_safe_flush); +#ifdef CONFIG_PRINTK_NMI + s = &per_cpu(nmi_print_seq, cpu); init_irq_work(&s->work, __printk_safe_flush); +#endif } /* Make sure that IRQ works are initialized before enabling. */ @@ -279,13 +367,3 @@ void __init printk_safe_init(void) /* Flush pending messages that did not have scheduled IRQ works. */ printk_safe_flush(); } - -void printk_safe_nmi_enter(void) -{ - this_cpu_write(printk_func, vprintk_safe_nmi); -} - -void printk_safe_nmi_exit(void) -{ - this_cpu_write(printk_func, vprintk_default); -} -- 2.11.0