linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] printk/sysrq: Don't play with console_loglevel
@ 2019-05-28  0:24 Dmitry Safonov
  2019-05-28  3:21 ` Tetsuo Handa
  2019-05-28  4:15 ` Sergey Senozhatsky
  0 siblings, 2 replies; 26+ messages in thread
From: Dmitry Safonov @ 2019-05-28  0:24 UTC (permalink / raw)
  To: linux-kernel
  Cc: Dmitry Safonov, Greg Kroah-Hartman, Jiri Slaby, Petr Mladek,
	Sergey Senozhatsky, Steven Rostedt, Tetsuo Handa

While handling sysrq the console_loglevel is bumped to default to print
sysrq headers. It's done to print sysrq messages with WARNING level for
consumers of /proc/kmsg, though it sucks by the following reasons:
- changing console_loglevel may produce tons of messages (especially on
  bloated with debug/info prints systems)
- it doesn't guarantee that the message will be printed as printk may
  deffer the actual console output from buffer (see the comment near
  printk() in kernel/printk/printk.c)

Provide KERN_UNSUPPRESSED printk() annotation for such legacy places.
Make sysrq print the headers unsuppressed instead of changing
console_loglevel.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
(I'm not fond at all of this patch - sending as RFC just to touch
 how you all feel about this..)

 drivers/tty/sysrq.c         | 34 ++++++++++++++++++----------------
 include/linux/kern_levels.h |  6 ++++++
 include/linux/printk.h      |  1 +
 kernel/printk/printk.c      | 17 ++++++++++++-----
 4 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 59e82e6d776d..ea37e3aa84b3 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -523,23 +523,28 @@ static void __sysrq_put_key_op(int key, struct sysrq_key_op *op_p)
                 sysrq_key_table[i] = op_p;
 }
 
+#if (CONSOLE_LOGLEVEL_DEFAULT > LOGLEVEL_INFO)
+#define KERN_SYSRQ		KERN_UNSUPPRESSED
+#else
+#define KERN_SYSRQ
+#endif
+#define sysrq_info(fmt, ...)	\
+	printk(KERN_SYSRQ KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+#define sysrq_cont(fmt, ...)	\
+	printk(KERN_SYSRQ KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
+
 void __handle_sysrq(int key, bool check_mask)
 {
 	struct sysrq_key_op *op_p;
-	int orig_log_level;
 	int i;
 
 	rcu_sysrq_start();
 	rcu_read_lock();
+
 	/*
-	 * Raise the apparent loglevel to maximum so that the sysrq header
-	 * is shown to provide the user with positive feedback.  We do not
-	 * simply emit this at KERN_EMERG as that would change message
-	 * routing in the consumers of /proc/kmsg.
+	 * We do not simply emit this at KERN_EMERG as that would change
+	 * message routing in the consumers of /proc/kmsg.
 	 */
-	orig_log_level = console_loglevel;
-	console_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
-
         op_p = __sysrq_get_key_op(key);
         if (op_p) {
 		/*
@@ -547,15 +552,13 @@ void __handle_sysrq(int key, bool check_mask)
 		 * should not) and is the invoked operation enabled?
 		 */
 		if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
-			pr_info("%s\n", op_p->action_msg);
-			console_loglevel = orig_log_level;
+			sysrq_info("%s\n", op_p->action_msg);
 			op_p->handler(key);
 		} else {
-			pr_info("This sysrq operation is disabled.\n");
-			console_loglevel = orig_log_level;
+			sysrq_info("This sysrq operation is disabled.\n");
 		}
 	} else {
-		pr_info("HELP : ");
+		sysrq_info("HELP : ");
 		/* Only print the help msg once per handler */
 		for (i = 0; i < ARRAY_SIZE(sysrq_key_table); i++) {
 			if (sysrq_key_table[i]) {
@@ -566,11 +569,10 @@ void __handle_sysrq(int key, bool check_mask)
 					;
 				if (j != i)
 					continue;
-				pr_cont("%s ", sysrq_key_table[i]->help_msg);
+				sysrq_cont("%s ", sysrq_key_table[i]->help_msg);
 			}
 		}
-		pr_cont("\n");
-		console_loglevel = orig_log_level;
+		sysrq_cont("\n");
 	}
 	rcu_read_unlock();
 	rcu_sysrq_end();
diff --git a/include/linux/kern_levels.h b/include/linux/kern_levels.h
index bf2389c26ae3..2f40d6ace60d 100644
--- a/include/linux/kern_levels.h
+++ b/include/linux/kern_levels.h
@@ -23,6 +23,12 @@
  */
 #define KERN_CONT	KERN_SOH "c"
 
+/*
+ * Annotation for a message that will be printed regardless current
+ * console_loglevel. _DO_NOT_USE_IT! Exists for legacy code.
+ */
+#define KERN_UNSUPPRESSED	KERN_SOH "u"
+
 /* integer equivalents of KERN_<LEVEL> */
 #define LOGLEVEL_SCHED		-2	/* Deferred messages from sched code
 					 * are set to this special level */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 84ea4d094af3..60a5cfde1488 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -19,6 +19,7 @@ static inline int printk_get_level(const char *buffer)
 		switch (buffer[1]) {
 		case '0' ... '7':
 		case 'c':	/* KERN_CONT */
+		case 'u':	/* KERN_UNSUPPRESSED */
 			return buffer[1];
 		}
 	}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 02ca827b8fac..c6dd82c37382 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -345,6 +345,7 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
 
 enum log_flags {
 	LOG_NEWLINE	= 2,	/* text ended with a newline */
+	LOG_DONT_SUPPRESS = 4,	/* ignore current console_loglevel */
 	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
 };
 
@@ -1179,9 +1180,12 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(ignore_loglevel,
 		 "ignore loglevel setting (prints all kernel messages to the console)");
 
-static bool suppress_message_printing(int level)
+static bool suppress_message_printing(int level, u8 lflags)
 {
-	return (level >= console_loglevel && !ignore_loglevel);
+	if (ignore_loglevel || (lflags & LOG_DONT_SUPPRESS))
+		return false;
+
+	return level >= console_loglevel;
 }
 
 #ifdef CONFIG_BOOT_PRINTK_DELAY
@@ -1213,7 +1217,7 @@ static void boot_delay_msec(int level)
 	unsigned long timeout;
 
 	if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
-		|| suppress_message_printing(level)) {
+		|| suppress_message_printing(level, 0)) {
 		return;
 	}
 
@@ -1917,6 +1921,9 @@ int vprintk_store(int facility, int level,
 				break;
 			case 'c':	/* KERN_CONT */
 				lflags |= LOG_CONT;
+				break;
+			case 'u':	/* KERN_UNSUPPRESSED */
+				lflags |= LOG_DONT_SUPPRESS;
 			}
 
 			text_len -= 2;
@@ -2069,7 +2076,7 @@ static void call_console_drivers(const char *ext_text, size_t ext_len,
 				 const char *text, size_t len) {}
 static size_t msg_print_text(const struct printk_log *msg, bool syslog,
 			     bool time, char *buf, size_t size) { return 0; }
-static bool suppress_message_printing(int level) { return false; }
+static bool suppress_message_printing(int level, u8 lflags) { return false; }
 
 #endif /* CONFIG_PRINTK */
 
@@ -2407,7 +2414,7 @@ void console_unlock(void)
 			break;
 
 		msg = log_from_idx(console_idx);
-		if (suppress_message_printing(msg->level)) {
+		if (suppress_message_printing(msg->level, msg->flags)) {
 			/*
 			 * Skip record we have buffered and already printed
 			 * directly to the console when we received it, and
-- 
2.21.0


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28  0:24 [RFC] printk/sysrq: Don't play with console_loglevel Dmitry Safonov
@ 2019-05-28  3:21 ` Tetsuo Handa
  2019-05-28  4:22   ` Sergey Senozhatsky
  2019-05-28  4:15 ` Sergey Senozhatsky
  1 sibling, 1 reply; 26+ messages in thread
From: Tetsuo Handa @ 2019-05-28  3:21 UTC (permalink / raw)
  To: Dmitry Safonov, linux-kernel
  Cc: Greg Kroah-Hartman, Jiri Slaby, Petr Mladek, Sergey Senozhatsky,
	Steven Rostedt

On 2019/05/28 9:24, Dmitry Safonov wrote:
> Provide KERN_UNSUPPRESSED printk() annotation for such legacy places.
> Make sysrq print the headers unsuppressed instead of changing
> console_loglevel.

I think that kdb also wants to use KERN_UNSUPPRESSED for making sure
that messages are printed. But that user calls dump function which is
indirectly calling printk() many times. Thus, I think that we need a
way to explicitly pass "how the message should be treated" as a
function argument.

What I suggested in my proposal ("printk: Introduce "store now but print later" prefix." at
https://lore.kernel.org/lkml/1550896930-12324-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp/T/#u )
is "whether the caller wants to defer printing to consoles regarding
this printk() call". And your suggestion is "whether the caller wants
to apply ignore_loglevel regarding this printk() call".


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28  0:24 [RFC] printk/sysrq: Don't play with console_loglevel Dmitry Safonov
  2019-05-28  3:21 ` Tetsuo Handa
@ 2019-05-28  4:15 ` Sergey Senozhatsky
  2019-05-28  4:46   ` Sergey Senozhatsky
  1 sibling, 1 reply; 26+ messages in thread
From: Sergey Senozhatsky @ 2019-05-28  4:15 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: linux-kernel, Greg Kroah-Hartman, Jiri Slaby, Petr Mladek,
	Sergey Senozhatsky, Steven Rostedt, Tetsuo Handa

On (05/28/19 01:24), Dmitry Safonov wrote:
[..]
> While handling sysrq the console_loglevel is bumped to default to print
> sysrq headers. It's done to print sysrq messages with WARNING level for
> consumers of /proc/kmsg, though it sucks by the following reasons:
> - changing console_loglevel may produce tons of messages (especially on
>   bloated with debug/info prints systems)
> - it doesn't guarantee that the message will be printed as printk may
>   deffer the actual console output from buffer (see the comment near
>   printk() in kernel/printk/printk.c)
> 
> Provide KERN_UNSUPPRESSED printk() annotation for such legacy places.
> Make sysrq print the headers unsuppressed instead of changing
> console_loglevel.

I've been thinking about this a while ago... So what I thought back
then was that affected paths are atomic: sysrq, irqs, NMI, etc. Well
at leasted it seemed to be so. Hence we can use per-CPU flag to tell
printk that whatever comes from this-CPU is important and printk should
eventually print it (next time it hits console_unlock()). One candidate
for such per-CPU flag was this_cpu(printk_context). We can steal high
bit (next to NMI printk_safe bit). So the intended use case was something
like this

	sysrq/etc  /* atomic context */
	{
		printk_blah_enter();

		for (...)
			printk();
		...
		dump_bar();

		prinkt_blah_exit();
	}

printk_blah_enter() would set that special - printk_safe_mask_blah - bit,
and prinkt_blah_exit() would clear it. Whenever prinkt->vprintk_store()
would see printk_safe_mask_blah bit set it would mark the log_stored message
as "important, always print!", and console_unlock() would always print those
"important" messages.

	-ss

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28  3:21 ` Tetsuo Handa
@ 2019-05-28  4:22   ` Sergey Senozhatsky
  2019-05-28  8:02     ` Tetsuo Handa
  0 siblings, 1 reply; 26+ messages in thread
From: Sergey Senozhatsky @ 2019-05-28  4:22 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Dmitry Safonov, linux-kernel, Greg Kroah-Hartman, Jiri Slaby,
	Petr Mladek, Sergey Senozhatsky, Steven Rostedt

On (05/28/19 12:21), Tetsuo Handa wrote:
[..]
> What I suggested in my proposal ("printk: Introduce "store now but print later" prefix." at
> https://lore.kernel.org/lkml/1550896930-12324-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp/T/#u )
> is "whether the caller wants to defer printing to consoles regarding
> this printk() call". And your suggestion is "whether the caller wants
> to apply ignore_loglevel regarding this printk() call".

I'm not sure about "store now but print later" here. What Dmitry is
talking about:

     bump console_loglevel on *this* particular CPU only,
     not system-wide.
     /* Which is implemented in a form of - all messages from this-CPU
      * only should be printed regardless the loglevel, the rest should
      * pass the usual suppress_message_printing() check. */

	-ss

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28  4:15 ` Sergey Senozhatsky
@ 2019-05-28  4:46   ` Sergey Senozhatsky
  2019-05-28 13:42     ` Petr Mladek
  0 siblings, 1 reply; 26+ messages in thread
From: Sergey Senozhatsky @ 2019-05-28  4:46 UTC (permalink / raw)
  To: Dmitry Safonov
  Cc: linux-kernel, Greg Kroah-Hartman, Jiri Slaby, Petr Mladek,
	Sergey Senozhatsky, Steven Rostedt, Tetsuo Handa,
	Sergey Senozhatsky

On (05/28/19 13:15), Sergey Senozhatsky wrote:
> On (05/28/19 01:24), Dmitry Safonov wrote:
> [..]
> > While handling sysrq the console_loglevel is bumped to default to print
> > sysrq headers. It's done to print sysrq messages with WARNING level for
> > consumers of /proc/kmsg, though it sucks by the following reasons:
> > - changing console_loglevel may produce tons of messages (especially on
> >   bloated with debug/info prints systems)
> > - it doesn't guarantee that the message will be printed as printk may
> >   deffer the actual console output from buffer (see the comment near
> >   printk() in kernel/printk/printk.c)
> > 
> > Provide KERN_UNSUPPRESSED printk() annotation for such legacy places.
> > Make sysrq print the headers unsuppressed instead of changing
> > console_loglevel.
> 
> I've been thinking about this a while ago... So what I thought back
> then was that affected paths are atomic: sysrq, irqs, NMI, etc. Well
> at leasted it seemed to be so.

Ahh.. OK, now I sort of remember why I gave up on this idea (see [1]
at the bottom, when it comes to uv_nmi_dump_state()) - printk_NMI and
printk-safe redirections.

	NMI
		loglevel = NEW
		printk -> printk_safe_nmi
		loglevel = OLD

	iret

	IRQ
		flush printk_safe_nmi -> printk
		// At this point we don't remember about
		// loglevel manipulation anymore
	iret

We, probably, still need some flags to pass the "this was supposed to
be an important messages" info from printk-safe to normal printk. On
the other hand, if NMI printk-s then it's something rather important,
so we probably better print it anyway and avoid suppress_message_printing()
check for messages which are coming from printk-NMI buffers. To some
extent, it's the same with printk-safe: we don't use it unless we have
a very good reason. So if there is something in printk-safe buffers
then it better end up on consoles. So, maybe, we still can use that
per-CPU printk_context thing.

[1] https://lore.kernel.org/lkml/20180601044050.GA5687@jagdpanzerIV/

	-ss

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28  4:22   ` Sergey Senozhatsky
@ 2019-05-28  8:02     ` Tetsuo Handa
  2019-05-28  8:51       ` Sergey Senozhatsky
  0 siblings, 1 reply; 26+ messages in thread
From: Tetsuo Handa @ 2019-05-28  8:02 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Dmitry Safonov, linux-kernel, Greg Kroah-Hartman, Jiri Slaby,
	Petr Mladek, Sergey Senozhatsky, Steven Rostedt

On 2019/05/28 13:22, Sergey Senozhatsky wrote:
> On (05/28/19 12:21), Tetsuo Handa wrote:
> [..]
>> What I suggested in my proposal ("printk: Introduce "store now but print later" prefix." at
>> https://lore.kernel.org/lkml/1550896930-12324-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp/T/#u )
>> is "whether the caller wants to defer printing to consoles regarding
>> this printk() call". And your suggestion is "whether the caller wants
>> to apply ignore_loglevel regarding this printk() call".
> 
> I'm not sure about "store now but print later" here. What Dmitry is
> talking about:
> 
>      bump console_loglevel on *this* particular CPU only,
>      not system-wide.
>      /* Which is implemented in a form of - all messages from this-CPU
>       * only should be printed regardless the loglevel, the rest should
>       * pass the usual suppress_message_printing() check. */

Dmitry's patch is changing only the header line (in other words, per printk() call).
Since op_p->handler(key) is out of KERN_UNSUPPRESSED effect, the body lines might
not be printed. I think that we need a way to pass KERN_UNSUPPRESSED from printk()
calls invoked from op_p->handler(key).

You are trying to omit passing KERN_UNSUPPRESSED by utilizing implicit printk
context information. But doesn't such attempt resemble find_printk_buffer() ?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28  8:02     ` Tetsuo Handa
@ 2019-05-28  8:51       ` Sergey Senozhatsky
  2019-05-28 10:15         ` Tetsuo Handa
  0 siblings, 1 reply; 26+ messages in thread
From: Sergey Senozhatsky @ 2019-05-28  8:51 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Petr Mladek, Sergey Senozhatsky,
	Steven Rostedt

On (05/28/19 17:02), Tetsuo Handa wrote:
> On 2019/05/28 13:22, Sergey Senozhatsky wrote:
> > On (05/28/19 12:21), Tetsuo Handa wrote:
> > [..]
> Dmitry's patch is changing only the header line (in other words, per printk() call).
> Since op_p->handler(key) is out of KERN_UNSUPPRESSED effect, the body lines might
> not be printed.

Right.

> I think that we need a way to pass KERN_UNSUPPRESSED from printk()
> calls invoked from op_p->handler(key).

Right. That's what the per-CPU context bit address.

> You are trying to omit passing KERN_UNSUPPRESSED by utilizing implicit printk
> context information. But doesn't such attempt resemble find_printk_buffer() ?

Adding KERN_UNSUPPRESSED to all printks down the op_p->handler()
line is hardly possible. At the same time I'd really prefer not
to have buffering for sysrq.

	-ss

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28  8:51       ` Sergey Senozhatsky
@ 2019-05-28 10:15         ` Tetsuo Handa
  2019-05-28 13:49           ` Petr Mladek
  2019-06-07 17:09           ` Pavel Machek
  0 siblings, 2 replies; 26+ messages in thread
From: Tetsuo Handa @ 2019-05-28 10:15 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Dmitry Safonov, linux-kernel, Greg Kroah-Hartman, Jiri Slaby,
	Petr Mladek, Sergey Senozhatsky, Steven Rostedt, Andrew Morton,
	Linus Torvalds

On 2019/05/28 17:51, Sergey Senozhatsky wrote:
>> You are trying to omit passing KERN_UNSUPPRESSED by utilizing implicit printk
>> context information. But doesn't such attempt resemble find_printk_buffer() ?
> 
> Adding KERN_UNSUPPRESSED to all printks down the op_p->handler()
> line is hardly possible. At the same time I'd really prefer not
> to have buffering for sysrq.

I don't think it is hardly possible. And I really prefer having
deferred printing for SysRq.

SysRq is triggered by writing to /proc/sysrq-trigger or typing special keys
on the keyboard. This means that SysRq can be triggered by administrator's
will, and SysRq can be repeated/retried by administrator's will. Therefore,
allowing SysRq-t to write to consoles after leaving the atomic context is
an improvement. My proposal allows deferred printing for SysRq, dump_header()
and warn_alloc(). We can try to wake up console_writer kernel thread upon
leaving the atomic context. If the kernel is unhealthy enough to make
console_writer kernel thread defunctional, administrator can issue other
SysRq in order to forcibly write to consoles.

Is the attempt of making printk() completely asynchronous going to be resumed?
I think that "automatically asynchronous" won't be accepted, and I think that
at best "explicitly asynchronous" (by allowing the caller to explicitly say
it using printk() argument) is possible. (Maybe "asynchronous by default" and
"explicitly synchronous" is possible. But "explicitly synchronous" would be
told by using printk() argument rather than by implicit/global printk context
information...)

Anyway, forcing SysRq output to apply ignore_loglevel will be doable using
printk() argument.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28  4:46   ` Sergey Senozhatsky
@ 2019-05-28 13:42     ` Petr Mladek
  2019-05-28 14:21       ` Tetsuo Handa
                         ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Petr Mladek @ 2019-05-28 13:42 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Dmitry Safonov, linux-kernel, Greg Kroah-Hartman, Jiri Slaby,
	Sergey Senozhatsky, Steven Rostedt, Tetsuo Handa

On Tue 2019-05-28 13:46:19, Sergey Senozhatsky wrote:
> On (05/28/19 13:15), Sergey Senozhatsky wrote:
> > On (05/28/19 01:24), Dmitry Safonov wrote:
> > [..]
> > > While handling sysrq the console_loglevel is bumped to default to print
> > > sysrq headers. It's done to print sysrq messages with WARNING level for
> > > consumers of /proc/kmsg, though it sucks by the following reasons:
> > > - changing console_loglevel may produce tons of messages (especially on
> > >   bloated with debug/info prints systems)
> > > - it doesn't guarantee that the message will be printed as printk may
> > >   deffer the actual console output from buffer (see the comment near
> > >   printk() in kernel/printk/printk.c)
> > > 
> > > Provide KERN_UNSUPPRESSED printk() annotation for such legacy places.
> > > Make sysrq print the headers unsuppressed instead of changing
> > > console_loglevel.

I like this idea. console_loglevel is temporary manipulated only
when some messages should or should never appear on the console.
Storing this information in the message flags would help
to solve all the related races.

> > I've been thinking about this a while ago... So what I thought back
> > then was that affected paths are atomic: sysrq, irqs, NMI, etc. Well
> > at leasted it seemed to be so.

Yes, this would be useful for the other situations when all printk's
in a particular context should be handled this way. We could build
it on top of this patch.

> Ahh.. OK, now I sort of remember why I gave up on this idea (see [1]
> at the bottom, when it comes to uv_nmi_dump_state()) - printk_NMI and
> printk-safe redirections.
> 
> 	NMI
> 		loglevel = NEW
> 		printk -> printk_safe_nmi
> 		loglevel = OLD
> 
> 	iret
> 
> 	IRQ
> 		flush printk_safe_nmi -> printk
> 		// At this point we don't remember about
> 		// loglevel manipulation anymore
> 	iret

printk_safe buffer preserves KERN_* headers. It should be
possible to insert KERN_UNSUPPRESSED there.

The patch itself looks fine on the first sight. If we go
forward, please, split the printk and sysrq part
into two patches.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28 10:15         ` Tetsuo Handa
@ 2019-05-28 13:49           ` Petr Mladek
  2019-06-07 17:09           ` Pavel Machek
  1 sibling, 0 replies; 26+ messages in thread
From: Petr Mladek @ 2019-05-28 13:49 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Sergey Senozhatsky,
	Steven Rostedt, Andrew Morton, Linus Togyrvalds

On Tue 2019-05-28 19:15:43, Tetsuo Handa wrote:
> On 2019/05/28 17:51, Sergey Senozhatsky wrote:
> >> You are trying to omit passing KERN_UNSUPPRESSED by utilizing implicit printk
> >> context information. But doesn't such attempt resemble find_printk_buffer() ?
> > 
> > Adding KERN_UNSUPPRESSED to all printks down the op_p->handler()
> > line is hardly possible. At the same time I'd really prefer not
> > to have buffering for sysrq.
> 
> I don't think it is hardly possible. And I really prefer having
> deferred printing for SysRq.

This thread is about problems with manipulating console_loglevel.

"Deferred printk" is another very complicated and controversial
problem. Please, discuss it in a separate thread.

Thanks in advance.

Best Regards,
Petr Mladek

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28 13:42     ` Petr Mladek
@ 2019-05-28 14:21       ` Tetsuo Handa
  2019-05-28 15:03         ` Petr Mladek
  2019-05-31 14:11       ` Tetsuo Handa
  2019-06-03  6:51       ` [RFC] printk/sysrq: Don't play with console_loglevel Sergey Senozhatsky
  2 siblings, 1 reply; 26+ messages in thread
From: Tetsuo Handa @ 2019-05-28 14:21 UTC (permalink / raw)
  To: Petr Mladek, Sergey Senozhatsky
  Cc: Dmitry Safonov, linux-kernel, Greg Kroah-Hartman, Jiri Slaby,
	Sergey Senozhatsky, Steven Rostedt

On 2019/05/28 22:42, Petr Mladek wrote:
>> Ahh.. OK, now I sort of remember why I gave up on this idea (see [1]
>> at the bottom, when it comes to uv_nmi_dump_state()) - printk_NMI and
>> printk-safe redirections.
>>
>> 	NMI
>> 		loglevel = NEW
>> 		printk -> printk_safe_nmi
>> 		loglevel = OLD
>>
>> 	iret
>>
>> 	IRQ
>> 		flush printk_safe_nmi -> printk
>> 		// At this point we don't remember about
>> 		// loglevel manipulation anymore
>> 	iret
> 
> printk_safe buffer preserves KERN_* headers. It should be
> possible to insert KERN_UNSUPPRESSED there.

But is context dependent buffer large enough to hold SysRq-t output?
I think that only main logbuf can become large enough to hold SysRq-t output.

We can add KERN_UNSUPPRESSED to SysRq's header line. But I don't think
that we can automatically add KERN_UNSUPPRESSED to SysRq's body lines
based on some context information. If we want to avoid manipulating
console_loglevel, we need to think about how to make sure that
KERN_UNSUPPRESSED is added to all lines from such context without
overflowing capacity of that buffer.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28 14:21       ` Tetsuo Handa
@ 2019-05-28 15:03         ` Petr Mladek
  2019-05-28 15:21           ` Tetsuo Handa
  0 siblings, 1 reply; 26+ messages in thread
From: Petr Mladek @ 2019-05-28 15:03 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Sergey Senozhatsky,
	Steven Rostedt

On Tue 2019-05-28 23:21:17, Tetsuo Handa wrote:
> On 2019/05/28 22:42, Petr Mladek wrote:
> >> Ahh.. OK, now I sort of remember why I gave up on this idea (see [1]
> >> at the bottom, when it comes to uv_nmi_dump_state()) - printk_NMI and
> >> printk-safe redirections.
> >>
> >> 	NMI
> >> 		loglevel = NEW
> >> 		printk -> printk_safe_nmi
> >> 		loglevel = OLD
> >>
> >> 	iret
> >>
> >> 	IRQ
> >> 		flush printk_safe_nmi -> printk
> >> 		// At this point we don't remember about
> >> 		// loglevel manipulation anymore
> >> 	iret
> > 
> > printk_safe buffer preserves KERN_* headers. It should be
> > possible to insert KERN_UNSUPPRESSED there.
> 
> But is context dependent buffer large enough to hold SysRq-t output?
> I think that only main logbuf can become large enough to hold SysRq-t output.

SysRq messages are stored directly into the main log buffer.

The limited per-CPU buffers are needed only in printk_safe
and NMI context. We discussed it here because KERN_UNSUPPRESSED
allows to pass the information even from this context.

> We can add KERN_UNSUPPRESSED to SysRq's header line. But I don't think
> that we can automatically add KERN_UNSUPPRESSED to SysRq's body lines
> based on some context information. If we want to avoid manipulating
> console_loglevel, we need to think about how to make sure that
> KERN_UNSUPPRESSED is added to all lines from such context without
> overflowing capacity of that buffer.

We could set this context in printk_context per-CPU variable.

Then we could easily add the set per-message flag in
vprintk_store() for the normal/atomic context. And we
could store an extra KERN_UNSUPPRESSED in printk_safe_log_store()
for printk_safe and NMI context.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28 15:03         ` Petr Mladek
@ 2019-05-28 15:21           ` Tetsuo Handa
  2019-05-28 15:58             ` Dmitry Safonov
  0 siblings, 1 reply; 26+ messages in thread
From: Tetsuo Handa @ 2019-05-28 15:21 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Sergey Senozhatsky,
	Steven Rostedt

On 2019/05/29 0:03, Petr Mladek wrote:
>> But is context dependent buffer large enough to hold SysRq-t output?
>> I think that only main logbuf can become large enough to hold SysRq-t output.
> 
> SysRq messages are stored directly into the main log buffer.
> 
> The limited per-CPU buffers are needed only in printk_safe
> and NMI context. We discussed it here because KERN_UNSUPPRESSED
> allows to pass the information even from this context.
> 
>> We can add KERN_UNSUPPRESSED to SysRq's header line. But I don't think
>> that we can automatically add KERN_UNSUPPRESSED to SysRq's body lines
>> based on some context information. If we want to avoid manipulating
>> console_loglevel, we need to think about how to make sure that
>> KERN_UNSUPPRESSED is added to all lines from such context without
>> overflowing capacity of that buffer.
> 
> We could set this context in printk_context per-CPU variable.
> 
> Then we could easily add the set per-message flag in
> vprintk_store() for the normal/atomic context. And we
> could store an extra KERN_UNSUPPRESSED in printk_safe_log_store()
> for printk_safe and NMI context.

Now I got what you are trying to do. Borrow per-CPU printk_context
flags for automatically prefixing KERN_UNSUPPRESSED, based on an
assumption that any message sent during that per-CPU printk_context
flag set is important enough. Then, what we need to be careful is
nesting of setting/clearing of that flag, for NMI handler might be
called during SysRq operation is in progress. We unconditionally
prefix KERN_UNSUPPRESSED if NMI?


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28 15:21           ` Tetsuo Handa
@ 2019-05-28 15:58             ` Dmitry Safonov
  0 siblings, 0 replies; 26+ messages in thread
From: Dmitry Safonov @ 2019-05-28 15:58 UTC (permalink / raw)
  To: Tetsuo Handa, Petr Mladek
  Cc: Sergey Senozhatsky, linux-kernel, Greg Kroah-Hartman, Jiri Slaby,
	Sergey Senozhatsky, Steven Rostedt

Hi guys,

I see that the thread is ongoing and you understand printk code much
better than me or probably anybody :)

So, feel free to reuse it. Or I can send v1 with split sysrq/printk
parts if you think it's worth being shaped further.

I think worth to mention three "features" that you might had a chance to
miss from the current code:

1. op_p->handler(key) is not printed under console_loglevel hackery.
   So, under RFC I preserved the behavior. Probably, you don't miss it
   and just looking into ways to change it, but I thought worth
   mentioning.
2. I've found it surprising how pr_info() interacts with pr_cont():
   Basically, pr_cont() without KERN_<LEVEL> prefix will print the
   resulting line with default_message_loglevel AFAIU from cont.level.
   Which might be higher than warning.
   I might miss a part that corrects cont.level to the first
   message's level?
3. CONSOLE_LOGLEVEL_DEFAULT is config-based, so having in mind that it
   can be changed and (2) - it gives me hard time to understand when
   the sysrq message will be printed and when will be suppressed.

Thanks,
          Dmitry

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28 13:42     ` Petr Mladek
  2019-05-28 14:21       ` Tetsuo Handa
@ 2019-05-31 14:11       ` Tetsuo Handa
  2019-06-02  6:13         ` [RFC] printk: Introduce per context console loglevel Tetsuo Handa
  2019-06-03  6:51       ` [RFC] printk/sysrq: Don't play with console_loglevel Sergey Senozhatsky
  2 siblings, 1 reply; 26+ messages in thread
From: Tetsuo Handa @ 2019-05-31 14:11 UTC (permalink / raw)
  To: Petr Mladek, Sergey Senozhatsky
  Cc: Dmitry Safonov, linux-kernel, Greg Kroah-Hartman, Jiri Slaby,
	Sergey Senozhatsky, Steven Rostedt

On 2019/05/28 22:42, Petr Mladek wrote:
> On Tue 2019-05-28 13:46:19, Sergey Senozhatsky wrote:
>> On (05/28/19 13:15), Sergey Senozhatsky wrote:
>>> On (05/28/19 01:24), Dmitry Safonov wrote:
>>> [..]
>>>> While handling sysrq the console_loglevel is bumped to default to print
>>>> sysrq headers. It's done to print sysrq messages with WARNING level for
>>>> consumers of /proc/kmsg, though it sucks by the following reasons:
>>>> - changing console_loglevel may produce tons of messages (especially on
>>>>   bloated with debug/info prints systems)
>>>> - it doesn't guarantee that the message will be printed as printk may
>>>>   deffer the actual console output from buffer (see the comment near
>>>>   printk() in kernel/printk/printk.c)
>>>>
>>>> Provide KERN_UNSUPPRESSED printk() annotation for such legacy places.
>>>> Make sysrq print the headers unsuppressed instead of changing
>>>> console_loglevel.
> 
> I like this idea. console_loglevel is temporary manipulated only
> when some messages should or should never appear on the console.
> Storing this information in the message flags would help
> to solve all the related races.

Something like this?

---
 arch/ia64/kernel/mca.c        | 37 +++++++++++++++------------
 arch/x86/platform/uv/uv_nmi.c |  6 ++---
 drivers/tty/sysrq.c           |  9 +++----
 include/linux/printk.h        | 24 +++++++++---------
 include/linux/sched.h         |  3 +++
 kernel/debug/kdb/kdb_bt.c     |  5 ++--
 kernel/debug/kdb/kdb_io.c     |  5 ++--
 kernel/debug/kdb/kdb_main.c   |  5 ++--
 kernel/printk/printk.c        | 59 +++++++++++++++++++++++++++++++++++++++----
 kernel/printk/printk_safe.c   |  2 ++
 10 files changed, 105 insertions(+), 50 deletions(-)

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6a52d76..5f3968c 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -189,19 +189,24 @@
 static unsigned long mlogbuf_timestamp = 0;
 
 static int loglevel_save = -1;
-#define BREAK_LOGLEVEL(__console_loglevel)		\
-	oops_in_progress = 1;				\
-	if (loglevel_save < 0)				\
-		loglevel_save = __console_loglevel;	\
-	__console_loglevel = 15;
-
-#define RESTORE_LOGLEVEL(__console_loglevel)		\
-	if (loglevel_save >= 0) {			\
-		__console_loglevel = loglevel_save;	\
-		loglevel_save = -1;			\
-	}						\
-	mlogbuf_finished = 0;				\
-	oops_in_progress = 0;
+#define BREAK_LOGLEVEL()						\
+	do {								\
+		int ret;						\
+		oops_in_progress = 1;					\
+		ret = set_local_loglevel(CONSOLE_LOGLEVEL_MOTORMOUTH);	\
+		if (loglevel_save < 0)					\
+			loglevel_save = ret;				\
+	} while (0)
+
+#define RESTORE_LOGLEVEL()					\
+	do {							\
+		if (loglevel_save >= 0) {			\
+			set_local_loglevel(loglevel_save);	\
+			loglevel_save = -1;			\
+		}						\
+		mlogbuf_finished = 0;				\
+		oops_in_progress = 0;				\
+	} while (0)
 
 /*
  * Push messages into buffer, print them later if not urgent.
@@ -288,7 +293,7 @@ void ia64_mlogbuf_dump(void)
  */
 static void ia64_mlogbuf_finish(int wait)
 {
-	BREAK_LOGLEVEL(console_loglevel);
+	BREAK_LOGLEVEL();
 
 	spin_lock_init(&mlogbuf_rlock);
 	ia64_mlogbuf_dump();
@@ -1623,7 +1628,7 @@ static void mca_insert_tr(u64 iord)
 	 * To enable show_stack from INIT, we use oops_in_progress which should
 	 * be used in real oops. This would cause something wrong after INIT.
 	 */
-	BREAK_LOGLEVEL(console_loglevel);
+	BREAK_LOGLEVEL();
 	ia64_mlogbuf_dump_from_init();
 
 	printk(KERN_ERR "Processes interrupted by INIT -");
@@ -1648,7 +1653,7 @@ static void mca_insert_tr(u64 iord)
 		read_unlock(&tasklist_lock);
 	}
 	/* FIXME: This will not restore zapped printk locks. */
-	RESTORE_LOGLEVEL(console_loglevel);
+	RESTORE_LOGLEVEL();
 	return NOTIFY_DONE;
 }
 
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index b21a932..48592d1 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -766,13 +766,13 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
 	if (master) {
 		int tcpu;
 		int ignored = 0;
-		int saved_console_loglevel = console_loglevel;
+		int saved_loglevel;
 
 		pr_alert("UV: tracing %s for %d CPUs from CPU %d\n",
 			uv_nmi_action_is("ips") ? "IPs" : "processes",
 			atomic_read(&uv_nmi_cpus_in_nmi), cpu);
 
-		console_loglevel = uv_nmi_loglevel;
+		saved_loglevel = set_local_loglevel(uv_nmi_loglevel);
 		atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
 		for_each_online_cpu(tcpu) {
 			if (cpumask_test_cpu(tcpu, uv_nmi_cpu_mask))
@@ -785,7 +785,7 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
 		if (ignored)
 			pr_alert("UV: %d CPUs ignored NMI\n", ignored);
 
-		console_loglevel = saved_console_loglevel;
+		set_local_loglevel(saved_loglevel);
 		pr_alert("UV: process trace complete\n");
 	} else {
 		while (!atomic_read(&uv_nmi_slave_continue))
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 573b205..5675977 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -541,8 +541,7 @@ void __handle_sysrq(int key, bool check_mask)
 	 * simply emit this at KERN_EMERG as that would change message
 	 * routing in the consumers of /proc/kmsg.
 	 */
-	orig_log_level = console_loglevel;
-	console_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
+	orig_log_level = set_local_loglevel(CONSOLE_LOGLEVEL_DEFAULT);
 
         op_p = __sysrq_get_key_op(key);
         if (op_p) {
@@ -552,11 +551,11 @@ void __handle_sysrq(int key, bool check_mask)
 		 */
 		if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
 			pr_info("%s\n", op_p->action_msg);
-			console_loglevel = orig_log_level;
+			set_local_loglevel(orig_log_level);
 			op_p->handler(key);
 		} else {
 			pr_info("This sysrq operation is disabled.\n");
-			console_loglevel = orig_log_level;
+			set_local_loglevel(orig_log_level);
 		}
 	} else {
 		pr_info("HELP : ");
@@ -574,7 +573,7 @@ void __handle_sysrq(int key, bool check_mask)
 			}
 		}
 		pr_cont("\n");
-		console_loglevel = orig_log_level;
+		set_local_loglevel(orig_log_level);
 	}
 	rcu_read_unlock();
 	rcu_sysrq_end();
diff --git a/include/linux/printk.h b/include/linux/printk.h
index cefd374..b2dd248 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -47,7 +47,7 @@ static inline const char *printk_skip_headers(const char *buffer)
 #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
 
 /* We show everything that is MORE important than this.. */
-#define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
+#define CONSOLE_LOGLEVEL_SILENT -1 /* Mum's the word */
 #define CONSOLE_LOGLEVEL_MIN	 1 /* Minimum loglevel we let people use */
 #define CONSOLE_LOGLEVEL_DEBUG	10 /* issue debug messages */
 #define CONSOLE_LOGLEVEL_MOTORMOUTH 15	/* You can't shut this one up */
@@ -66,17 +66,6 @@ static inline const char *printk_skip_headers(const char *buffer)
 #define minimum_console_loglevel (console_printk[2])
 #define default_console_loglevel (console_printk[3])
 
-static inline void console_silent(void)
-{
-	console_loglevel = CONSOLE_LOGLEVEL_SILENT;
-}
-
-static inline void console_verbose(void)
-{
-	if (console_loglevel)
-		console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
-}
-
 /* strlen("ratelimit") + 1 */
 #define DEVKMSG_STR_MAX_SIZE 10
 extern char devkmsg_log_str[];
@@ -205,6 +194,7 @@ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 extern void printk_safe_init(void);
 extern void printk_safe_flush(void);
 extern void printk_safe_flush_on_panic(void);
+char set_local_loglevel(char level);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -280,8 +270,18 @@ static inline void printk_safe_flush(void)
 static inline void printk_safe_flush_on_panic(void)
 {
 }
+
+static inline char set_local_loglevel(char level)
+{
+	return 0;
+}
 #endif
 
+static inline void console_verbose(void)
+{
+	set_local_loglevel(CONSOLE_LOGLEVEL_MOTORMOUTH);
+}
+
 extern int kptr_restrict;
 
 #ifndef pr_fmt
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1183741..283d0d2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -732,6 +732,9 @@ struct task_struct {
 	/* to be used once the psi infrastructure lands upstream. */
 	unsigned			use_memdelay:1;
 #endif
+#ifdef CONFIG_PRINTK
+	unsigned			printk_loglevel:8;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 7e2379a..b385a7e 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -21,8 +21,7 @@
 
 static void kdb_show_stack(struct task_struct *p, void *addr)
 {
-	int old_lvl = console_loglevel;
-	console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
+	int old_lvl = set_local_loglevel(CONSOLE_LOGLEVEL_MOTORMOUTH);
 	kdb_trap_printk++;
 	kdb_set_current_task(p);
 	if (addr) {
@@ -36,7 +35,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr)
 	} else {
 		show_stack(p, NULL);
 	}
-	console_loglevel = old_lvl;
+	set_local_loglevel(old_lvl);
 	kdb_trap_printk--;
 }
 
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 3a5184e..8b76103 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -715,8 +715,7 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 		}
 	}
 	if (logging) {
-		saved_loglevel = console_loglevel;
-		console_loglevel = CONSOLE_LOGLEVEL_SILENT;
+		saved_loglevel = set_local_loglevel(CONSOLE_LOGLEVEL_SILENT);
 		if (printk_get_level(kdb_buffer) || src == KDB_MSGSRC_PRINTK)
 			printk("%s", kdb_buffer);
 		else
@@ -845,7 +844,7 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 kdb_print_out:
 	suspend_grep = 0; /* end of what may have been a recursive call */
 	if (logging)
-		console_loglevel = saved_loglevel;
+		set_local_loglevel(saved_loglevel);
 	/* kdb_printf_cpu locked the code above. */
 	smp_store_release(&kdb_printf_cpu, old_cpu);
 	local_irq_restore(flags);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 9ecfa37..395eb98 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1130,13 +1130,12 @@ static int kdb_reboot(int argc, const char **argv)
 
 static void kdb_dumpregs(struct pt_regs *regs)
 {
-	int old_lvl = console_loglevel;
-	console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
+	int old_lvl = set_local_loglevel(CONSOLE_LOGLEVEL_MOTORMOUTH);
 	kdb_trap_printk++;
 	show_regs(regs);
 	kdb_trap_printk--;
 	kdb_printf("\n");
-	console_loglevel = old_lvl;
+	set_local_loglevel(old_lvl);
 }
 
 void kdb_set_current_task(struct task_struct *p)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 1888f6a..dfdefe9 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -351,7 +351,9 @@ enum con_msg_format_flags {
  */
 
 enum log_flags {
-	LOG_NEWLINE	= 2,	/* text ended with a newline */
+	LOG_ALWAYS_CON  = 1,    /* Force suppress_message_printing() = false */
+	LOG_NEVER_CON   = 2,    /* Force suppress_message_printing() = true */
+	LOG_NEWLINE	= 4,	/* text ended with a newline */
 	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
 };
 
@@ -1172,6 +1174,9 @@ void __init setup_log_buf(int early)
 }
 
 static bool __read_mostly ignore_loglevel;
+static DEFINE_PER_CPU(char, printk_loglevel_nmi);
+static DEFINE_PER_CPU(char, printk_loglevel_irq);
+static DEFINE_PER_CPU(char, printk_loglevel_softirq);
 
 static int __init ignore_loglevel_setup(char *str)
 {
@@ -1186,11 +1191,36 @@ static int __init ignore_loglevel_setup(char *str)
 MODULE_PARM_DESC(ignore_loglevel,
 		 "ignore loglevel setting (prints all kernel messages to the console)");
 
-static bool suppress_message_printing(int level)
+static bool suppress_message_printing(int flags, int level)
 {
+	if (flags & LOG_ALWAYS_CON)
+		return false;
+	if (flags & LOG_NEVER_CON)
+		return true;
 	return (level >= console_loglevel && !ignore_loglevel);
 }
 
+char set_local_loglevel(char level)
+{
+	char old;
+
+	if (in_nmi()) {
+		old = this_cpu_read(printk_loglevel_nmi);
+		this_cpu_write(printk_loglevel_nmi, level);
+	} else if (in_irq()) {
+		old = this_cpu_read(printk_loglevel_irq);
+		this_cpu_write(printk_loglevel_irq, level);
+	} else if (in_serving_softirq()) {
+		old = this_cpu_read(printk_loglevel_softirq);
+		this_cpu_write(printk_loglevel_softirq, level);
+	} else {
+		old = current->printk_loglevel;
+		current->printk_loglevel = level;
+	}
+	return old;
+}
+EXPORT_SYMBOL(set_local_loglevel);
+
 #ifdef CONFIG_BOOT_PRINTK_DELAY
 
 static int boot_delay; /* msecs delay after each printk during bootup */
@@ -1220,7 +1250,7 @@ static void boot_delay_msec(int level)
 	unsigned long timeout;
 
 	if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
-		|| suppress_message_printing(level)) {
+	    || suppress_message_printing(0, level)) {
 		return;
 	}
 
@@ -1934,6 +1964,25 @@ int vprintk_store(int facility, int level,
 	if (level == LOGLEVEL_DEFAULT)
 		level = default_message_loglevel;
 
+	{
+		char loglevel;
+
+		if (in_nmi())
+			loglevel = this_cpu_read(printk_loglevel_nmi);
+		else if (in_irq())
+			loglevel = this_cpu_read(printk_loglevel_irq);
+		else if (in_serving_softirq())
+			loglevel = this_cpu_read(printk_loglevel_softirq);
+		else
+			loglevel = current->printk_loglevel;
+		if (loglevel) {
+			if (level >= (int) loglevel && !ignore_loglevel)
+				lflags |= LOG_NEVER_CON;
+			else
+				lflags |= LOG_ALWAYS_CON;
+		}
+	}
+
 	if (dict)
 		lflags |= LOG_NEWLINE;
 
@@ -2080,7 +2129,7 @@ static void call_console_drivers(const char *ext_text, size_t ext_len,
 				 const char *text, size_t len) {}
 static size_t msg_print_text(const struct printk_log *msg, bool syslog,
 			     bool time, char *buf, size_t size) { return 0; }
-static bool suppress_message_printing(int level) { return false; }
+static bool suppress_message_printing(int flags, int level) { return false; }
 
 #endif /* CONFIG_PRINTK */
 
@@ -2418,7 +2467,7 @@ void console_unlock(void)
 			break;
 
 		msg = log_from_idx(console_idx);
-		if (suppress_message_printing(msg->level)) {
+		if (suppress_message_printing(msg->flags, msg->level)) {
 			/*
 			 * Skip record we have buffered and already printed
 			 * directly to the console when we received it, and
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index b4045e7..d33bea5 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -186,6 +186,7 @@ static void __printk_safe_flush(struct irq_work *work)
 	unsigned long flags;
 	size_t len;
 	int i;
+	char level = set_local_loglevel(CONSOLE_LOGLEVEL_MOTORMOUTH);
 
 	/*
 	 * The lock has two functions. First, one reader has to flush all
@@ -232,6 +233,7 @@ static void __printk_safe_flush(struct irq_work *work)
 out:
 	report_message_lost(s);
 	raw_spin_unlock_irqrestore(&read_lock, flags);
+	set_local_loglevel(level);
 }
 
 /**
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [RFC] printk: Introduce per context console loglevel.
  2019-05-31 14:11       ` Tetsuo Handa
@ 2019-06-02  6:13         ` Tetsuo Handa
  2019-06-06  7:59           ` Petr Mladek
  0 siblings, 1 reply; 26+ messages in thread
From: Tetsuo Handa @ 2019-06-02  6:13 UTC (permalink / raw)
  To: Petr Mladek, Sergey Senozhatsky
  Cc: Dmitry Safonov, linux-kernel, Greg Kroah-Hartman, Jiri Slaby,
	Sergey Senozhatsky, Steven Rostedt, x86, linux-ia64,
	kgdb-bugreport

Dmitry Safonov proposed KERN_UNSUPPRESSED loglevel which pretends as if
ignore_loglevel was specified for per printk() basis, for we can fail to
apply temporarily manipulated console loglevel because console loglevel
is evaluated when the message is printed to consoles rather than when
the message is stored into the buffer [1].

Temporary manipulation of console loglevel for SysRq is applied to only
the header line. At first I though that we also want to apply temporary
manipulation of console loglevel for SysRq to the body lines, for showing
only the header line is hardly helpful. But I realized that we should not
force showing the body lines because some users might be triggering SysRq
 from /proc and reading via syslog rather than via console output. Users
who need to read via console output should be able to manipulate console
loglevel by triggering SysRq from console.

Since we currently defer storing of the messages from NMI context and
recursive context, we would need to explicitly pass KERN_UNSUPPRESSED.
But Sergey Senozhatsky thinks that it might be fine to automatically
apply KERN_UNSUPPRESSED to printk() from NMI context and recursive
context, for messages from these contexts are likely important [2].
Then, we could avoid explicitly passing KERN_UNSUPPRESSED, by introducing
per context console loglevel.

This patch introduces per CPU console loglevel (for in_nmi(), in_irq() and
in_serving_softirq()) and per thread console loglevel (for in_task()), and
replaces temporary manipulation of global console_loglevel with temporary
manipulation of per context console_loglevel based on an assumption that
users who are temporarily manipulating global console_loglevel needs to
apply it to only current context. (Note that triggering SysRq-t from /proc
runs in in_task() context, and it should not disable preemption because it
may take long period. Thus, per thread console loglevel is used.)

If per context console_loglevel is 0 when printk() is called, global
console_loglevel is evaluated when the message is printed to consoles.
If per context console_loglevel is not 0 when printk() is called, per
context console_loglevel is evaluated when the message is stored, and
that result supersedes global console_loglevel evaluation.

With the combination of automatically applying CONSOLE_LOGLEVEL_MOTORMOUTH
for printk() from NMI context and recursive context, this patch does not
introduce KERN_UNSUPPRESSED loglevel. (Note that CONSOLE_LOGLEVEL_SILENT
is changed from 0 to -1 in order to make per context console_loglevel
evaluation simpler.)

[1] https://lkml.kernel.org/r/20190528002412.1625-1-dima@arista.com
[2] https://lkml.kernel.org/r/20190528044619.GA3429@jagdpanzerIV

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 arch/ia64/kernel/mca.c        | 36 ++++++++++++++-----------
 arch/x86/platform/uv/uv_nmi.c |  6 ++---
 drivers/tty/sysrq.c           | 10 +++----
 include/linux/printk.h        | 29 +++++++++++---------
 include/linux/sched.h         |  3 +++
 kernel/debug/kdb/kdb_bt.c     |  7 ++---
 kernel/debug/kdb/kdb_io.c     |  6 ++---
 kernel/debug/kdb/kdb_main.c   |  7 ++---
 kernel/printk/printk.c        | 63 +++++++++++++++++++++++++++++++++++++++----
 kernel/printk/printk_safe.c   |  4 +++
 10 files changed, 121 insertions(+), 50 deletions(-)

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6a52d76..57b2c7c 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -189,19 +189,23 @@
 static unsigned long mlogbuf_timestamp = 0;
 
 static int loglevel_save = -1;
-#define BREAK_LOGLEVEL(__console_loglevel)		\
-	oops_in_progress = 1;				\
-	if (loglevel_save < 0)				\
-		loglevel_save = __console_loglevel;	\
-	__console_loglevel = 15;
-
-#define RESTORE_LOGLEVEL(__console_loglevel)		\
-	if (loglevel_save >= 0) {			\
-		__console_loglevel = loglevel_save;	\
-		loglevel_save = -1;			\
-	}						\
-	mlogbuf_finished = 0;				\
-	oops_in_progress = 0;
+#define BREAK_LOGLEVEL()					\
+	do {							\
+		oops_in_progress = 1;				\
+		if (loglevel_save < 0)				\
+			loglevel_save = get_local_loglevel();	\
+		set_local_loglevel(CONSOLE_LOGLEVEL_MOTORMOUTH);\
+	} while (0)
+
+#define RESTORE_LOGLEVEL()					\
+	do {							\
+		if (loglevel_save >= 0) {			\
+			set_local_loglevel(loglevel_save);	\
+			loglevel_save = -1;			\
+		}						\
+		mlogbuf_finished = 0;				\
+		oops_in_progress = 0;				\
+	} while (0)
 
 /*
  * Push messages into buffer, print them later if not urgent.
@@ -288,7 +292,7 @@ void ia64_mlogbuf_dump(void)
  */
 static void ia64_mlogbuf_finish(int wait)
 {
-	BREAK_LOGLEVEL(console_loglevel);
+	BREAK_LOGLEVEL();
 
 	spin_lock_init(&mlogbuf_rlock);
 	ia64_mlogbuf_dump();
@@ -1623,7 +1627,7 @@ static void mca_insert_tr(u64 iord)
 	 * To enable show_stack from INIT, we use oops_in_progress which should
 	 * be used in real oops. This would cause something wrong after INIT.
 	 */
-	BREAK_LOGLEVEL(console_loglevel);
+	BREAK_LOGLEVEL();
 	ia64_mlogbuf_dump_from_init();
 
 	printk(KERN_ERR "Processes interrupted by INIT -");
@@ -1648,7 +1652,7 @@ static void mca_insert_tr(u64 iord)
 		read_unlock(&tasklist_lock);
 	}
 	/* FIXME: This will not restore zapped printk locks. */
-	RESTORE_LOGLEVEL(console_loglevel);
+	RESTORE_LOGLEVEL();
 	return NOTIFY_DONE;
 }
 
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 9d08ff5..a6c1a86 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -753,13 +753,13 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
 	if (master) {
 		int tcpu;
 		int ignored = 0;
-		int saved_console_loglevel = console_loglevel;
+		int saved_loglevel = get_local_loglevel();
 
 		pr_alert("UV: tracing %s for %d CPUs from CPU %d\n",
 			uv_nmi_action_is("ips") ? "IPs" : "processes",
 			atomic_read(&uv_nmi_cpus_in_nmi), cpu);
 
-		console_loglevel = uv_nmi_loglevel;
+		set_local_loglevel(uv_nmi_loglevel);
 		atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT);
 		for_each_online_cpu(tcpu) {
 			if (cpumask_test_cpu(tcpu, uv_nmi_cpu_mask))
@@ -772,7 +772,7 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master)
 		if (ignored)
 			pr_alert("UV: %d CPUs ignored NMI\n", ignored);
 
-		console_loglevel = saved_console_loglevel;
+		set_local_loglevel(saved_loglevel);
 		pr_alert("UV: process trace complete\n");
 	} else {
 		while (!atomic_read(&uv_nmi_slave_continue))
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 573b205..18cb58e 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -541,8 +541,8 @@ void __handle_sysrq(int key, bool check_mask)
 	 * simply emit this at KERN_EMERG as that would change message
 	 * routing in the consumers of /proc/kmsg.
 	 */
-	orig_log_level = console_loglevel;
-	console_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
+	orig_log_level = get_local_loglevel();
+	set_local_loglevel(CONSOLE_LOGLEVEL_DEFAULT);
 
         op_p = __sysrq_get_key_op(key);
         if (op_p) {
@@ -552,11 +552,11 @@ void __handle_sysrq(int key, bool check_mask)
 		 */
 		if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
 			pr_info("%s\n", op_p->action_msg);
-			console_loglevel = orig_log_level;
+			set_local_loglevel(orig_log_level);
 			op_p->handler(key);
 		} else {
 			pr_info("This sysrq operation is disabled.\n");
-			console_loglevel = orig_log_level;
+			set_local_loglevel(orig_log_level);
 		}
 	} else {
 		pr_info("HELP : ");
@@ -574,7 +574,7 @@ void __handle_sysrq(int key, bool check_mask)
 			}
 		}
 		pr_cont("\n");
-		console_loglevel = orig_log_level;
+		set_local_loglevel(orig_log_level);
 	}
 	rcu_read_unlock();
 	rcu_sysrq_end();
diff --git a/include/linux/printk.h b/include/linux/printk.h
index cefd374..78b357a 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -47,7 +47,7 @@ static inline const char *printk_skip_headers(const char *buffer)
 #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
 
 /* We show everything that is MORE important than this.. */
-#define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
+#define CONSOLE_LOGLEVEL_SILENT -1 /* Mum's the word */
 #define CONSOLE_LOGLEVEL_MIN	 1 /* Minimum loglevel we let people use */
 #define CONSOLE_LOGLEVEL_DEBUG	10 /* issue debug messages */
 #define CONSOLE_LOGLEVEL_MOTORMOUTH 15	/* You can't shut this one up */
@@ -66,17 +66,6 @@ static inline const char *printk_skip_headers(const char *buffer)
 #define minimum_console_loglevel (console_printk[2])
 #define default_console_loglevel (console_printk[3])
 
-static inline void console_silent(void)
-{
-	console_loglevel = CONSOLE_LOGLEVEL_SILENT;
-}
-
-static inline void console_verbose(void)
-{
-	if (console_loglevel)
-		console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
-}
-
 /* strlen("ratelimit") + 1 */
 #define DEVKMSG_STR_MAX_SIZE 10
 extern char devkmsg_log_str[];
@@ -205,6 +194,8 @@ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 extern void printk_safe_init(void);
 extern void printk_safe_flush(void);
 extern void printk_safe_flush_on_panic(void);
+int get_local_loglevel(void);
+void set_local_loglevel(int level);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -280,8 +271,22 @@ static inline void printk_safe_flush(void)
 static inline void printk_safe_flush_on_panic(void)
 {
 }
+
+static inline int get_local_loglevel(void)
+{
+	return 0;
+}
+
+static inline void set_local_loglevel(int level)
+{
+}
 #endif
 
+static inline void console_verbose(void)
+{
+	set_local_loglevel(CONSOLE_LOGLEVEL_MOTORMOUTH);
+}
+
 extern int kptr_restrict;
 
 #ifndef pr_fmt
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1183741..283d0d2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -732,6 +732,9 @@ struct task_struct {
 	/* to be used once the psi infrastructure lands upstream. */
 	unsigned			use_memdelay:1;
 #endif
+#ifdef CONFIG_PRINTK
+	unsigned			printk_loglevel:8;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 7e2379a..e2580d3 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -21,8 +21,9 @@
 
 static void kdb_show_stack(struct task_struct *p, void *addr)
 {
-	int old_lvl = console_loglevel;
-	console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
+	int old_lvl = get_local_loglevel();
+
+	set_local_loglevel(CONSOLE_LOGLEVEL_MOTORMOUTH);
 	kdb_trap_printk++;
 	kdb_set_current_task(p);
 	if (addr) {
@@ -36,7 +37,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr)
 	} else {
 		show_stack(p, NULL);
 	}
-	console_loglevel = old_lvl;
+	set_local_loglevel(old_lvl);
 	kdb_trap_printk--;
 }
 
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 3a5184e..723d62d 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -715,8 +715,8 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 		}
 	}
 	if (logging) {
-		saved_loglevel = console_loglevel;
-		console_loglevel = CONSOLE_LOGLEVEL_SILENT;
+		saved_loglevel = get_local_loglevel();
+		set_local_loglevel(CONSOLE_LOGLEVEL_SILENT);
 		if (printk_get_level(kdb_buffer) || src == KDB_MSGSRC_PRINTK)
 			printk("%s", kdb_buffer);
 		else
@@ -845,7 +845,7 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
 kdb_print_out:
 	suspend_grep = 0; /* end of what may have been a recursive call */
 	if (logging)
-		console_loglevel = saved_loglevel;
+		set_local_loglevel(saved_loglevel);
 	/* kdb_printf_cpu locked the code above. */
 	smp_store_release(&kdb_printf_cpu, old_cpu);
 	local_irq_restore(flags);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 9ecfa37..e1f55d8 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1130,13 +1130,14 @@ static int kdb_reboot(int argc, const char **argv)
 
 static void kdb_dumpregs(struct pt_regs *regs)
 {
-	int old_lvl = console_loglevel;
-	console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
+	int old_lvl = get_local_loglevel();
+
+	set_local_loglevel(CONSOLE_LOGLEVEL_MOTORMOUTH);
 	kdb_trap_printk++;
 	show_regs(regs);
 	kdb_trap_printk--;
 	kdb_printf("\n");
-	console_loglevel = old_lvl;
+	set_local_loglevel(old_lvl);
 }
 
 void kdb_set_current_task(struct task_struct *p)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 1888f6a..9adb180 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -351,7 +351,9 @@ enum con_msg_format_flags {
  */
 
 enum log_flags {
-	LOG_NEWLINE	= 2,	/* text ended with a newline */
+	LOG_ALWAYS_CON  = 1,    /* Force suppress_message_printing() = false */
+	LOG_NEVER_CON   = 2,    /* Force suppress_message_printing() = true */
+	LOG_NEWLINE	= 4,	/* text ended with a newline */
 	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
 };
 
@@ -1172,6 +1174,9 @@ void __init setup_log_buf(int early)
 }
 
 static bool __read_mostly ignore_loglevel;
+static DEFINE_PER_CPU(char, printk_loglevel_nmi);
+static DEFINE_PER_CPU(char, printk_loglevel_irq);
+static DEFINE_PER_CPU(char, printk_loglevel_softirq);
 
 static int __init ignore_loglevel_setup(char *str)
 {
@@ -1186,11 +1191,40 @@ static int __init ignore_loglevel_setup(char *str)
 MODULE_PARM_DESC(ignore_loglevel,
 		 "ignore loglevel setting (prints all kernel messages to the console)");
 
-static bool suppress_message_printing(int level)
+static bool suppress_message_printing(int flags, int level)
 {
+	if (flags & LOG_ALWAYS_CON)
+		return false;
+	if (flags & LOG_NEVER_CON)
+		return true;
 	return (level >= console_loglevel && !ignore_loglevel);
 }
 
+int get_local_loglevel(void)
+{
+	if (in_nmi())
+		return this_cpu_read(printk_loglevel_nmi);
+	if (in_irq())
+		return this_cpu_read(printk_loglevel_irq);
+	if (in_serving_softirq())
+		return this_cpu_read(printk_loglevel_softirq);
+	return current->printk_loglevel;
+}
+EXPORT_SYMBOL(get_local_loglevel);
+
+void set_local_loglevel(int level)
+{
+	if (in_nmi())
+		this_cpu_write(printk_loglevel_nmi, (char) level);
+	else if (in_irq())
+		this_cpu_write(printk_loglevel_irq, (char) level);
+	else if (in_serving_softirq())
+		this_cpu_write(printk_loglevel_softirq, (char) level);
+	else
+		current->printk_loglevel = (char) level;
+}
+EXPORT_SYMBOL(set_local_loglevel);
+
 #ifdef CONFIG_BOOT_PRINTK_DELAY
 
 static int boot_delay; /* msecs delay after each printk during bootup */
@@ -1220,7 +1254,7 @@ static void boot_delay_msec(int level)
 	unsigned long timeout;
 
 	if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
-		|| suppress_message_printing(level)) {
+	    || suppress_message_printing(0, level)) {
 		return;
 	}
 
@@ -1934,6 +1968,25 @@ int vprintk_store(int facility, int level,
 	if (level == LOGLEVEL_DEFAULT)
 		level = default_message_loglevel;
 
+	{
+		int loglevel;
+
+		if (in_nmi())
+			loglevel = this_cpu_read(printk_loglevel_nmi);
+		else if (in_irq())
+			loglevel = this_cpu_read(printk_loglevel_irq);
+		else if (in_serving_softirq())
+			loglevel = this_cpu_read(printk_loglevel_softirq);
+		else
+			loglevel = current->printk_loglevel;
+		if (loglevel) {
+			if (level >= loglevel && !ignore_loglevel)
+				lflags |= LOG_NEVER_CON;
+			else
+				lflags |= LOG_ALWAYS_CON;
+		}
+	}
+
 	if (dict)
 		lflags |= LOG_NEWLINE;
 
@@ -2080,7 +2133,7 @@ static void call_console_drivers(const char *ext_text, size_t ext_len,
 				 const char *text, size_t len) {}
 static size_t msg_print_text(const struct printk_log *msg, bool syslog,
 			     bool time, char *buf, size_t size) { return 0; }
-static bool suppress_message_printing(int level) { return false; }
+static bool suppress_message_printing(int flags, int level) { return false; }
 
 #endif /* CONFIG_PRINTK */
 
@@ -2418,7 +2471,7 @@ void console_unlock(void)
 			break;
 
 		msg = log_from_idx(console_idx);
-		if (suppress_message_printing(msg->level)) {
+		if (suppress_message_printing(msg->flags, msg->level)) {
 			/*
 			 * Skip record we have buffered and already printed
 			 * directly to the console when we received it, and
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index b4045e7..0104ed6 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -186,6 +186,9 @@ static void __printk_safe_flush(struct irq_work *work)
 	unsigned long flags;
 	size_t len;
 	int i;
+	int level = get_local_loglevel();
+
+	set_local_loglevel(CONSOLE_LOGLEVEL_MOTORMOUTH);
 
 	/*
 	 * The lock has two functions. First, one reader has to flush all
@@ -232,6 +235,7 @@ static void __printk_safe_flush(struct irq_work *work)
 out:
 	report_message_lost(s);
 	raw_spin_unlock_irqrestore(&read_lock, flags);
+	set_local_loglevel(level);
 }
 
 /**
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28 13:42     ` Petr Mladek
  2019-05-28 14:21       ` Tetsuo Handa
  2019-05-31 14:11       ` Tetsuo Handa
@ 2019-06-03  6:51       ` Sergey Senozhatsky
  2019-06-06  7:10         ` Petr Mladek
  2 siblings, 1 reply; 26+ messages in thread
From: Sergey Senozhatsky @ 2019-06-03  6:51 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Sergey Senozhatsky,
	Steven Rostedt, Tetsuo Handa

On (05/28/19 15:42), Petr Mladek wrote:
> On Tue 2019-05-28 13:46:19, Sergey Senozhatsky wrote:
> > On (05/28/19 13:15), Sergey Senozhatsky wrote:
> > > On (05/28/19 01:24), Dmitry Safonov wrote:
> > > [..]
> > > > While handling sysrq the console_loglevel is bumped to default to print
> > > > sysrq headers. It's done to print sysrq messages with WARNING level for
> > > > consumers of /proc/kmsg, though it sucks by the following reasons:
> > > > - changing console_loglevel may produce tons of messages (especially on
> > > >   bloated with debug/info prints systems)
> > > > - it doesn't guarantee that the message will be printed as printk may
> > > >   deffer the actual console output from buffer (see the comment near
> > > >   printk() in kernel/printk/printk.c)
> > > > 
> > > > Provide KERN_UNSUPPRESSED printk() annotation for such legacy places.
> > > > Make sysrq print the headers unsuppressed instead of changing
> > > > console_loglevel.
> 
> I like this idea. console_loglevel is temporary manipulated only
> when some messages should or should never appear on the console.
> Storing this information in the message flags would help
> to solve all the related races.

I don't really like the whole system-wide console_loglevel manipulation
thing, expect for console_verbose(), which seems the be the only legit
case. Maybe we better do something about it. I like the idea of
KERN_UNSUPPRESSED, especially if it will let us to completely remove
those console_loglevel manipulations.
E.g.

	console_loglevel = NEW;
	foo()
	 bar()
	  dump_stack();
	  ....
        console_loglevel = OLD;

    I understand the intent, but printk() is deferred (not always but still),
    so this code is not really expected to do the same thing all the time.
    Especially when it comes to NMI, etc.

If KERN_UNSUPPRESSED is going to be yet-another-way-to-print-important-messages
then I'm slightly less excited.

	-ss

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-06-03  6:51       ` [RFC] printk/sysrq: Don't play with console_loglevel Sergey Senozhatsky
@ 2019-06-06  7:10         ` Petr Mladek
  2019-06-12  8:36           ` Sergey Senozhatsky
  0 siblings, 1 reply; 26+ messages in thread
From: Petr Mladek @ 2019-06-06  7:10 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Dmitry Safonov, linux-kernel, Greg Kroah-Hartman, Jiri Slaby,
	Sergey Senozhatsky, Steven Rostedt, Tetsuo Handa

On Mon 2019-06-03 15:51:53, Sergey Senozhatsky wrote:
> On (05/28/19 15:42), Petr Mladek wrote:
> > On Tue 2019-05-28 13:46:19, Sergey Senozhatsky wrote:
> > > On (05/28/19 13:15), Sergey Senozhatsky wrote:
> > > > On (05/28/19 01:24), Dmitry Safonov wrote:
> > > > [..]
> > > > > While handling sysrq the console_loglevel is bumped to default to print
> > > > > sysrq headers. It's done to print sysrq messages with WARNING level for
> > > > > consumers of /proc/kmsg, though it sucks by the following reasons:
> > > > > - changing console_loglevel may produce tons of messages (especially on
> > > > >   bloated with debug/info prints systems)
> > > > > - it doesn't guarantee that the message will be printed as printk may
> > > > >   deffer the actual console output from buffer (see the comment near
> > > > >   printk() in kernel/printk/printk.c)
> > > > > 
> > > > > Provide KERN_UNSUPPRESSED printk() annotation for such legacy places.
> > > > > Make sysrq print the headers unsuppressed instead of changing
> > > > > console_loglevel.
> > 
> > I like this idea. console_loglevel is temporary manipulated only
> > when some messages should or should never appear on the console.
> > Storing this information in the message flags would help
> > to solve all the related races.
> 
> I don't really like the whole system-wide console_loglevel manipulation
> thing,

Just to be sure. I wanted to say that I like the idea with
KERN_UNSUPRESSED. So, I think that we are on the same page.

> except for console_verbose(), which seems the be the only legit
> case.

Note that CONSOLE_LOGLEVEL_SILENT is used in vkdb_printf(). I do not
know the background. But it might make some sense in kdb context.

> If KERN_UNSUPPRESSED is going to be yet-another-way-to-print-important-messages
> then I'm slightly less excited.

Yes, KERN_EMERG would do similar job.

Well, my understanding is that KERN_UNSUPRESSED would be used even
for less critical messages because the visibility is required
by the context or situation in which they are printed.

For example, we want to make all information visible in Oops because
the machine might be about to die. We might call there some shared
functions that produce less important messages in another context.

Also the pr_info() in __handle_sysrq() provides just informative
content. My understanding is that we want to show it just because
sysrq might be called when the system is not responding and we want
to give the user some feedback that the sysrq handler was called.

Now, KERN_EMERG might alarm some monitor of console output. It might
trigger unwanted reaction (forced reboot?) of the monitoring system
even when sysrq was not called in emergency situation.

I am sure that we need to care about such monitors. I have to
think more about it.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk: Introduce per context console loglevel.
  2019-06-02  6:13         ` [RFC] printk: Introduce per context console loglevel Tetsuo Handa
@ 2019-06-06  7:59           ` Petr Mladek
  0 siblings, 0 replies; 26+ messages in thread
From: Petr Mladek @ 2019-06-06  7:59 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Sergey Senozhatsky,
	Steven Rostedt, x86, linux-ia64, kgdb-bugreport

On Sun 2019-06-02 15:13:35, Tetsuo Handa wrote:
> Dmitry Safonov proposed KERN_UNSUPPRESSED loglevel which pretends as if
> ignore_loglevel was specified for per printk() basis, for we can fail to
> apply temporarily manipulated console loglevel because console loglevel
> is evaluated when the message is printed to consoles rather than when
> the message is stored into the buffer [1].
> 
> Temporary manipulation of console loglevel for SysRq is applied to only
> the header line.

We should ask why this this is handled this way.

My understanding is to give user feedback that something is going
to happen when the system is not responsive and sysrq is the last
chance to get some information, sync, and reboot.

Maybe, it is not needed these days when the console loglevel
might be manipulated by sysrq as well.

> At first I though that we also want to apply temporary
> manipulation of console loglevel for SysRq to the body lines, for showing
> only the header line is hardly helpful. But I realized that we should not
> force showing the body lines because some users might be triggering SysRq
>  from /proc and reading via syslog rather than via console output. Users
> who need to read via console output should be able to manipulate console
> loglevel by triggering SysRq from console.

Sounds reasonable.

> Since we currently defer storing of the messages from NMI context and
> recursive context, we would need to explicitly pass KERN_UNSUPPRESSED.
> But Sergey Senozhatsky thinks that it might be fine to automatically
> apply KERN_UNSUPPRESSED to printk() from NMI context and recursive
> context, for messages from these contexts are likely important [2].

I do not agree with this. Nobody cared about printk() deadlocks in NMI
for a long time. The idea was that people just should not print
anything there.

Reality shown that people just printed from this context and we needed
to make printk() safe there.

IMHO, expecting that all messages in NMI context are super important
is a similar mistake.

Also sysrq-l prints all backtraces from NMI context. It is huge
amount of output. People might want just store it into the logbuffer.
It is the same as with sysrq-t mentioned above.

> Then, we could avoid explicitly passing KERN_UNSUPPRESSED, by introducing
> per context console loglevel.
> 
> This patch introduces per CPU console loglevel (for in_nmi(), in_irq() and
> in_serving_softirq()) and per thread console loglevel (for in_task()), and
> replaces temporary manipulation of global console_loglevel with temporary
> manipulation of per context console_loglevel based on an assumption that
> users who are temporarily manipulating global console_loglevel needs to
> apply it to only current context. (Note that triggering SysRq-t from /proc
> runs in in_task() context, and it should not disable preemption because it
> may take long period. Thus, per thread console loglevel is used.)

This is too generic and complicated.

Only the single pr_info() in __handle_sysrq() seems to be called with
interrupts enabled. And it happens only when it is triggered via
/proc/sysrq-trigger. Manipulating the console loglevel is
questionable there.

Using the existing printk_context is good enough.

Also the final output is either LOG_ALWAYS_CON or LOG_NEVER_CON.
The API should not pretend that it supports any loglevel granularity.

If we end up with the two states, the API should consist of three
functions, e.g.

   int set_console_verbose(unsigned long *flags);
   int set_console_quiet(unsigned long *flags);
   restore_console_loglevel(int loglevel, unsigned long flags);

Where the first two functions should return the original loglevel
and irqflags.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-05-28 10:15         ` Tetsuo Handa
  2019-05-28 13:49           ` Petr Mladek
@ 2019-06-07 17:09           ` Pavel Machek
  2019-06-08  2:45             ` Tetsuo Handa
  1 sibling, 1 reply; 26+ messages in thread
From: Pavel Machek @ 2019-06-07 17:09 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Petr Mladek, Sergey Senozhatsky,
	Steven Rostedt, Andrew Morton, Linus Torvalds

On Tue 2019-05-28 19:15:43, Tetsuo Handa wrote:
> On 2019/05/28 17:51, Sergey Senozhatsky wrote:
> >> You are trying to omit passing KERN_UNSUPPRESSED by utilizing implicit printk
> >> context information. But doesn't such attempt resemble find_printk_buffer() ?
> > 
> > Adding KERN_UNSUPPRESSED to all printks down the op_p->handler()
> > line is hardly possible. At the same time I'd really prefer not
> > to have buffering for sysrq.
> 
> I don't think it is hardly possible. And I really prefer having
> deferred printing for SysRq.

Well, magic SysRq was meant for situation where system is in weird/broken state.
"Give me backtrace where it is hung", etc. Direct printing is more likely to work
in that cases.
									Pavel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-06-07 17:09           ` Pavel Machek
@ 2019-06-08  2:45             ` Tetsuo Handa
  2019-06-11 15:10               ` Petr Mladek
  0 siblings, 1 reply; 26+ messages in thread
From: Tetsuo Handa @ 2019-06-08  2:45 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Petr Mladek, Sergey Senozhatsky,
	Steven Rostedt, Andrew Morton, Linus Torvalds

On 2019/06/08 2:09, Pavel Machek wrote:
> On Tue 2019-05-28 19:15:43, Tetsuo Handa wrote:
>> On 2019/05/28 17:51, Sergey Senozhatsky wrote:
>>>> You are trying to omit passing KERN_UNSUPPRESSED by utilizing implicit printk
>>>> context information. But doesn't such attempt resemble find_printk_buffer() ?
>>>
>>> Adding KERN_UNSUPPRESSED to all printks down the op_p->handler()
>>> line is hardly possible. At the same time I'd really prefer not
>>> to have buffering for sysrq.
>>
>> I don't think it is hardly possible. And I really prefer having
>> deferred printing for SysRq.
> 
> Well, magic SysRq was meant for situation where system is in weird/broken state.
> "Give me backtrace where it is hung", etc. Direct printing is more likely to work
> in that cases.

Magic SysRq from keyboard is for situation where system is in weird/broken state.

But I want to use Magic SysRq from /proc for situation where system is not fatally
weird/broken state. I have trouble getting SysRq-t from /proc when something bad
happened (e.g. some health check process did not return for 60 seconds). Since
/proc/pid/wchan shows only 1 line, it is useless for understanding why that process
got stuck. If direct printing is enforced, "echo t > /proc/sysrq-trigger" might take
many minutes. If direct printing is not enforced, "echo t > /proc/sysrq-trigger"
should complete within less than one second. If syslog is working (which is almost
equivalent to being able to write to /proc/sysrq-trigger), the latter is more helpful
for taking snapshots for multiple times (e.g. 5 times with 10 seconds interval) in
order to understand why that process got stuck. That's why I added

  At first I though that we also want to apply temporary
  manipulation of console loglevel for SysRq to the body lines, for showing
  only the header line is hardly helpful. But I realized that we should not
  force showing the body lines because some users might be triggering SysRq
  from /proc and reading via syslog rather than via console output. Users
  who need to read via console output should be able to manipulate console
  loglevel by triggering SysRq from console.

part in https://lkml.kernel.org/r/c265f674-e293-332b-a037-895025354a69@i-love.sakura.ne.jp .

A snapshot which was taken within less than one second and a snapshot which was taken
across more than many minutes, which one likely shows more accurate "snapshot" ?
I know we need to take a snapshot like vmcore if we need a perfect snapshot which
was taken with CPUs stopped. But in enterprise systems where it is difficult to
do "echo c > /proc/sysrq-trigger" in order to take a perfect snapshot, snapshots
which can be taken without destroying the VM comes in handy. There are situations
where something went wrong but still able to operate.

Also, regarding Magic SysRq from keyboard case, my intent is to allow SysRq
to just store the messages to printk() buffer, in order to avoid stalls and
take better snapshots for multiple times. And my intent of

  And I really prefer having deferred printing for SysRq.

is "we can let some other SysRq command (e.g. SysRq-h) to write to consoles in printk() buffer,
when printk() buffer filled by SysRq-t did not get written to consoles automatically".
We can implement it by introducing "printk() which uses global printk() buffer but
do not try to write to consoles" and "passing context information which tells whether
printk() messages should be written to consoles synchronously". An example is shown below.

 drivers/tty/sysrq.c    |  3 +++
 include/linux/printk.h | 15 ++++++++++++
 include/linux/sched.h  |  1 +
 kernel/printk/printk.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++-
 mm/oom_kill.c          |  3 +++
 mm/page_alloc.c        |  3 +++
 6 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 18cb58e52e9b..135acbe5c389 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -543,6 +543,7 @@ void __handle_sysrq(int key, bool check_mask)
 	 */
 	orig_log_level = get_local_loglevel();
 	set_local_loglevel(CONSOLE_LOGLEVEL_DEFAULT);
+	enable_deferred_output();
 
         op_p = __sysrq_get_key_op(key);
         if (op_p) {
@@ -576,10 +577,12 @@ void __handle_sysrq(int key, bool check_mask)
 		pr_cont("\n");
 		set_local_loglevel(orig_log_level);
 	}
+	disable_deferred_output();
 	rcu_read_unlock();
 	rcu_sysrq_end();
 
 	suppress_printk = orig_suppress_printk;
+	trigger_deferred_output();
 }
 
 void handle_sysrq(int key)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 78b357a1b109..18392376932b 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -196,6 +196,9 @@ extern void printk_safe_flush(void);
 extern void printk_safe_flush_on_panic(void);
 int get_local_loglevel(void);
 void set_local_loglevel(int level);
+void enable_deferred_output(void);
+void disable_deferred_output(void);
+void trigger_deferred_output(void);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -280,6 +283,18 @@ static inline int get_local_loglevel(void)
 static inline void set_local_loglevel(int level)
 {
 }
+
+static inline void enable_deferred_output(void)
+{
+}
+
+static inline void disable_deferred_output(void)
+{
+}
+
+static inline void trigger_deferred_output(void)
+{
+}
 #endif
 
 static inline void console_verbose(void)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 283d0d2d4546..fc538ab1f2e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,6 +734,7 @@ struct task_struct {
 #endif
 #ifdef CONFIG_PRINTK
 	unsigned			printk_loglevel:8;
+	unsigned int			printk_deferred_output;
 #endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 9adb1801ca54..266125ffea4c 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1994,6 +1994,66 @@ int vprintk_store(int facility, int level,
 			  dict, dictlen, text, text_len);
 }
 
+static DEFINE_PER_CPU(unsigned int, deferred_output_nmi);
+static DEFINE_PER_CPU(unsigned int, deferred_output_irq);
+static DEFINE_PER_CPU(unsigned int, deferred_output_softirq);
+
+void enable_deferred_output(void)
+{
+	if (in_nmi())
+		this_cpu_inc(deferred_output_nmi);
+	else if (in_irq())
+		this_cpu_inc(deferred_output_irq);
+	else if (in_serving_softirq())
+		this_cpu_inc(deferred_output_softirq);
+	else
+		current->printk_deferred_output++;
+}
+EXPORT_SYMBOL(enable_deferred_output);
+
+void disable_deferred_output(void)
+{
+	if (in_nmi())
+		this_cpu_dec(deferred_output_nmi);
+	else if (in_irq())
+		this_cpu_dec(deferred_output_irq);
+	else if (in_serving_softirq())
+		this_cpu_dec(deferred_output_softirq);
+	else
+		current->printk_deferred_output--;
+}
+EXPORT_SYMBOL(disable_deferred_output);
+
+static inline bool should_defer_output(void)
+{
+	if (oops_in_progress)
+		return false;
+	if (in_nmi())
+		return this_cpu_read(deferred_output_nmi);
+	if (in_irq())
+		return this_cpu_read(deferred_output_irq);
+	if (in_serving_softirq())
+		return this_cpu_read(deferred_output_softirq);
+	return current->printk_deferred_output;
+}
+
+static void console_writer_work_func(struct irq_work *irq_work)
+{
+	preempt_disable();
+	if (console_trylock_spinning())
+		console_unlock();
+	preempt_enable();
+}
+
+void trigger_deferred_output(void)
+{
+	static DEFINE_IRQ_WORK(console_writer_work, console_writer_work_func);
+
+	if (!in_nmi())
+		irq_work_queue(&console_writer_work);
+}
+EXPORT_SYMBOL(trigger_deferred_output);
+
 asmlinkage int vprintk_emit(int facility, int level,
 			    const char *dict, size_t dictlen,
 			    const char *fmt, va_list args)
@@ -2023,7 +2083,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 	logbuf_unlock_irqrestore(flags);
 
 	/* If called from the scheduler, we can not call up(). */
-	if (!in_sched && pending_output) {
+	if (!in_sched && pending_output && !should_defer_output()) {
 		/*
 		 * Disable preemption to avoid being preempted while holding
 		 * console_sem which would prevent anyone from printing to
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 5a58778c91d4..6ab738061f61 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -961,6 +961,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	}
 	task_unlock(victim);
 
+	enable_deferred_output();
 	if (__ratelimit(&oom_rs))
 		dump_header(oc, victim);
 
@@ -982,6 +983,8 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 				      (void*)message);
 		mem_cgroup_put(oom_group);
 	}
+	disable_deferred_output();
+	trigger_deferred_output();
 }
 
 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d66bc8abe0af..c8063c23bb82 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3682,6 +3682,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
 	if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
 		return;
 
+	enable_deferred_output();
 	va_start(args, fmt);
 	vaf.fmt = fmt;
 	vaf.va = &args;
@@ -3694,6 +3695,8 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
 	pr_cont("\n");
 	dump_stack();
 	warn_alloc_show_mem(gfp_mask, nodemask);
+	disable_deferred_output();
+	trigger_deferred_output();
 }
 
 static inline struct page *
-- 
2.16.5


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-06-08  2:45             ` Tetsuo Handa
@ 2019-06-11 15:10               ` Petr Mladek
  2019-06-12 10:06                 ` Tetsuo Handa
  0 siblings, 1 reply; 26+ messages in thread
From: Petr Mladek @ 2019-06-11 15:10 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: Pavel Machek, Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Sergey Senozhatsky,
	Steven Rostedt, Andrew Morton, Linus Torvalds

On Sat 2019-06-08 11:45:45, Tetsuo Handa wrote:
> On 2019/06/08 2:09, Pavel Machek wrote:
> > On Tue 2019-05-28 19:15:43, Tetsuo Handa wrote:
> >> On 2019/05/28 17:51, Sergey Senozhatsky wrote:
> >>>> You are trying to omit passing KERN_UNSUPPRESSED by utilizing implicit printk
> >>>> context information. But doesn't such attempt resemble find_printk_buffer() ?
> >>>
> >>> Adding KERN_UNSUPPRESSED to all printks down the op_p->handler()
> >>> line is hardly possible. At the same time I'd really prefer not
> >>> to have buffering for sysrq.
> >>
> >> I don't think it is hardly possible. And I really prefer having
> >> deferred printing for SysRq.

> > 
> > Well, magic SysRq was meant for situation where system is in weird/broken state.
> > "Give me backtrace where it is hung", etc. Direct printing is more likely to work
> > in that cases.
> 
> Magic SysRq from keyboard is for situation where system is in weird/broken state.
> 
> But I want to use Magic SysRq from /proc for situation where system is not fatally
> weird/broken state. I have trouble getting SysRq-t from /proc when something bad
> happened (e.g. some health check process did not return for 60 seconds). Since
> /proc/pid/wchan shows only 1 line, it is useless for understanding why that process
> got stuck. If direct printing is enforced, "echo t > /proc/sysrq-trigger" might take
> many minutes. If direct printing is not enforced, "echo t > /proc/sysrq-trigger"
> should complete within less than one second. If syslog is working (which is almost
> equivalent to being able to write to /proc/sysrq-trigger), the latter is more helpful
> for taking snapshots for multiple times (e.g. 5 times with 10 seconds interval) in
> order to understand why that process got stuck. That's why I added

This looks like a very specific use case. But the proposal changes
the behavior for anyone. I am not persuaded that everyone would
appreciate the change.

OK, it takes 60 seconds when the messages appear on the console and
1 second when they are just stored to the syslog.

Why the system is configured to show all messages on the console
when syslog is enough?

If sysrq is the preferred interface, it might be used also the other
way. I mean to increase console_loglevel to hide all messages on
the console, trigger output of the system state and eventually
decrease the loglevel again.

> Also, regarding Magic SysRq from keyboard case, my intent is to allow SysRq
> to just store the messages to printk() buffer, in order to avoid stalls and
> take better snapshots for multiple times. And my intent of
> 
>   And I really prefer having deferred printing for SysRq.

Always deferring printk() for everyone is not acceptable. It is one
extreme between printk() reliability and speed in favor of the speed.
But many people prefer reliability (high chance to see messages
when the system goes down.

IMHO, the only acceptable solution need to be configurable:

   + never deffer (debugging corner cases?)
   + avoid softlockup (default on normal systems?)
   + guarantee max time (throughput oriented systems)
   + always deffer (real time, special usecases)

It might be based on Sergey's patchsets adding the offload logic
into console_unlock().

IMHO, the most problematic thing is how to define the limit
(lines?, characters?, time?). A limit bases on the data amount
would also depend on console speed. Time based limit would be
better but measuring time in console_unlock() is problematic
(atomic context, possible deadlocks).

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-06-06  7:10         ` Petr Mladek
@ 2019-06-12  8:36           ` Sergey Senozhatsky
  2019-06-12 12:00             ` Petr Mladek
  0 siblings, 1 reply; 26+ messages in thread
From: Sergey Senozhatsky @ 2019-06-12  8:36 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Sergey Senozhatsky,
	Steven Rostedt, Tetsuo Handa

On (06/06/19 09:10), Petr Mladek wrote:
> > > > > > Provide KERN_UNSUPPRESSED printk() annotation for such legacy places.
> > > > > > Make sysrq print the headers unsuppressed instead of changing
> > > > > > console_loglevel.
> > > 
> > > I like this idea. console_loglevel is temporary manipulated only
> > > when some messages should or should never appear on the console.
> > > Storing this information in the message flags would help
> > > to solve all the related races.
> > 
> > I don't really like the whole system-wide console_loglevel manipulation
> > thing,
> 
> Just to be sure. I wanted to say that I like the idea with
> KERN_UNSUPRESSED. So, I think that we are on the same page.

I understand. All I wanted to say is that KERN_UNSUPRESSED is
per-message, while the most interesting (and actually broken)
cases, IMHO, are per-context, IOW things like this one

	console_loglevel = NEW
	foo()
	  dump_stack()
	     printk
	     ...
	     printk
	console_loglevel = OLD

KERN_UNSUPRESSED does not help here. We probably can't convert
dump_stack() to KERN_UNSUPRESSED.

[..]
> Now, KERN_EMERG might alarm some monitor of console output. It might
> trigger unwanted reaction (forced reboot?) of the monitoring system
> even when sysrq was not called in emergency situation.
> 
> I am sure that we need to care about such monitors. I have to
> think more about it.

Sure.

	-ss

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-06-11 15:10               ` Petr Mladek
@ 2019-06-12 10:06                 ` Tetsuo Handa
  0 siblings, 0 replies; 26+ messages in thread
From: Tetsuo Handa @ 2019-06-12 10:06 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Pavel Machek, Sergey Senozhatsky, Dmitry Safonov, linux-kernel,
	Greg Kroah-Hartman, Jiri Slaby, Sergey Senozhatsky,
	Steven Rostedt, Andrew Morton, Linus Torvalds

On 2019/06/12 0:10, Petr Mladek wrote:
> On Sat 2019-06-08 11:45:45, Tetsuo Handa wrote:
>> On 2019/06/08 2:09, Pavel Machek wrote:
>>> On Tue 2019-05-28 19:15:43, Tetsuo Handa wrote:
>>>> On 2019/05/28 17:51, Sergey Senozhatsky wrote:
>>>>>> You are trying to omit passing KERN_UNSUPPRESSED by utilizing implicit printk
>>>>>> context information. But doesn't such attempt resemble find_printk_buffer() ?
>>>>>
>>>>> Adding KERN_UNSUPPRESSED to all printks down the op_p->handler()
>>>>> line is hardly possible. At the same time I'd really prefer not
>>>>> to have buffering for sysrq.
>>>>
>>>> I don't think it is hardly possible. And I really prefer having
>>>> deferred printing for SysRq.
> 
>>>
>>> Well, magic SysRq was meant for situation where system is in weird/broken state.
>>> "Give me backtrace where it is hung", etc. Direct printing is more likely to work
>>> in that cases.
>>
>> Magic SysRq from keyboard is for situation where system is in weird/broken state.
>>
>> But I want to use Magic SysRq from /proc for situation where system is not fatally
>> weird/broken state. I have trouble getting SysRq-t from /proc when something bad
>> happened (e.g. some health check process did not return for 60 seconds). Since
>> /proc/pid/wchan shows only 1 line, it is useless for understanding why that process
>> got stuck. If direct printing is enforced, "echo t > /proc/sysrq-trigger" might take
>> many minutes. If direct printing is not enforced, "echo t > /proc/sysrq-trigger"
>> should complete within less than one second. If syslog is working (which is almost
>> equivalent to being able to write to /proc/sysrq-trigger), the latter is more helpful
>> for taking snapshots for multiple times (e.g. 5 times with 10 seconds interval) in
>> order to understand why that process got stuck. That's why I added
> 
> This looks like a very specific use case. But the proposal changes
> the behavior for anyone. I am not persuaded that everyone would
> appreciate the change.
> 
> OK, it takes 60 seconds when the messages appear on the console and
> 1 second when they are just stored to the syslog.
> 
> Why the system is configured to show all messages on the console
> when syslog is enough?

We are talking about two things.

First thing is about whether we should automatically add KERN_UNSUPPRESSED
to all printks down the op_p->handler() line. I initially thought that we
should automatically add KERN_UNSUPPRESSED, but I realized that we should
not do it. I want to allow users to configure not to show all messages
on the console when syslog is enough.

> 
> If sysrq is the preferred interface, it might be used also the other
> way. I mean to increase console_loglevel to hide all messages on
> the console, trigger output of the system state and eventually
> decrease the loglevel again.

Right. Some distributions do the opposite thing in order to speed up
the boot sequence; increase console_loglevel to hide most of messages
on the console by default. We can trigger "SysRq-9" -> "SysRq-t" ->
"SysRq-4" from keyboard in order to print all SysRq-t messages to
the consoles when we need to do.

Since vkdb_printf() uses CONSOLE_LOGLEVEL_SILENT, KERN_UNSUPPRESSED alone
is not sufficient for avoid playing with console loglevel for tree wide.
If we want to get rid of temporary manipulation of console loglevel for
only SysRq, introducing KERN_UNSUPPRESSED would be OK. But we want to get
rid of temporary manipulation of console loglevel for tree wide, don't we?
If yes, I think that we will need per context "always print to consoles" /
"never print to consoles" / "print to consoles based on console loglevel"
control, for explicitly passing that control to foo()->dump_stack()->printk()
will be (not impossible but) a huge change.

> 
>> Also, regarding Magic SysRq from keyboard case, my intent is to allow SysRq
>> to just store the messages to printk() buffer, in order to avoid stalls and
>> take better snapshots for multiple times. And my intent of
>>
>>   And I really prefer having deferred printing for SysRq.
> 
> Always deferring printk() for everyone is not acceptable. It is one
> extreme between printk() reliability and speed in favor of the speed.
> But many people prefer reliability (high chance to see messages
> when the system goes down.

Second thing is about whether we can defer console_unlock() from specific
callers.

What I suggest is to enforce "never deferred" on e.g. SysRq-h and enforce
"always deferred" on e.g. SysRq-t. Since console_unlock() enforces flushing
of printk() buffer, users can trigger "SysRq-t" -> "SysRq-h" when SysRq-t
failed to flush printk() buffer.

> 
> IMHO, the only acceptable solution need to be configurable:
> 
>    + never deffer (debugging corner cases?)
>    + avoid softlockup (default on normal systems?)
>    + guarantee max time (throughput oriented systems)
>    + always deffer (real time, special usecases)

Is this about only SysRq messages? Or any printk() messages?
If this is about SysRq messages, I don't think "always defer" is
possible. We need a way to force flushing of printk() buffer when
"always deferred" SysRq messages did not get printed to consoles.

> 
> It might be based on Sergey's patchsets adding the offload logic
> into console_unlock().
> 
> IMHO, the most problematic thing is how to define the limit
> (lines?, characters?, time?). A limit bases on the data amount
> would also depend on console speed. Time based limit would be
> better but measuring time in console_unlock() is problematic
> (atomic context, possible deadlocks).

Which patch does "Sergey's patchsets adding the offload logic into
console_unlock()" refer to? As far as I am aware, any attempt to
"almost always" offload to a in_task() context will be unlikely
accepted (like Andrew Morton worried at
https://lkml.kernel.org/r/20160404155149.a3e3307def2d1315e2099c63@linux-foundation.org ).
I'm not expecting that we can define the limit appropriately, for
the amount of data which needs to be printed on to console is
unpredictable.

My should_defer_output() attempt offloads only when the caller is willing
to defer console_unlock(). oom_kill_process() and warn_alloc() would
generate less amount of data than SysRq-t, but these callers cannot make
forward progress until printk() returns. Thus, should_defer_output() helps
avoiding stalls at at least oom_kill_process() and warn_alloc() because
deferred printk() allows __ratelimit() based on e.g. frequency, amount of
data which is waiting for being printed to consoles. And per context
"defer printing to consoles if oops is not in progress but the caller is
willing to defer printing to consoles" control should be viable.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-06-12  8:36           ` Sergey Senozhatsky
@ 2019-06-12 12:00             ` Petr Mladek
  2019-06-12 14:47               ` Dmitry Safonov
  0 siblings, 1 reply; 26+ messages in thread
From: Petr Mladek @ 2019-06-12 12:00 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Dmitry Safonov, linux-kernel, Greg Kroah-Hartman, Jiri Slaby,
	Sergey Senozhatsky, Steven Rostedt, Tetsuo Handa

On Wed 2019-06-12 17:36:43, Sergey Senozhatsky wrote:
> On (06/06/19 09:10), Petr Mladek wrote:
> > Just to be sure. I wanted to say that I like the idea with
> > KERN_UNSUPRESSED. So, I think that we are on the same page.
> 
> I understand. All I wanted to say is that KERN_UNSUPRESSED is
> per-message, while the most interesting (and actually broken)
> cases, IMHO, are per-context, IOW things like this one
> 
> 	console_loglevel = NEW
> 	foo()
> 	  dump_stack()
> 	     printk
> 	     ...
> 	     printk
> 	console_loglevel = OLD
> 
> KERN_UNSUPRESSED does not help here. We probably can't convert
> dump_stack() to KERN_UNSUPRESSED.

I agree. I take KERN_UNSUPRESSED like a nice trick how to pass
the information about the unsupressed printk context via
printk_safe and printk_nmi per-CPU buffers.

The single line in sysrq __handle_sysrq() seems to be the only
location where KERN_UNSUPRESSED can be used directly.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [RFC] printk/sysrq: Don't play with console_loglevel
  2019-06-12 12:00             ` Petr Mladek
@ 2019-06-12 14:47               ` Dmitry Safonov
  0 siblings, 0 replies; 26+ messages in thread
From: Dmitry Safonov @ 2019-06-12 14:47 UTC (permalink / raw)
  To: Petr Mladek, Sergey Senozhatsky
  Cc: linux-kernel, Greg Kroah-Hartman, Jiri Slaby, Sergey Senozhatsky,
	Steven Rostedt, Tetsuo Handa

On 6/12/19 1:00 PM, Petr Mladek wrote:
> On Wed 2019-06-12 17:36:43, Sergey Senozhatsky wrote:
>> On (06/06/19 09:10), Petr Mladek wrote:
>>> Just to be sure. I wanted to say that I like the idea with
>>> KERN_UNSUPRESSED. So, I think that we are on the same page.
>>
>> I understand. All I wanted to say is that KERN_UNSUPRESSED is
>> per-message, while the most interesting (and actually broken)
>> cases, IMHO, are per-context, IOW things like this one
>>
>> 	console_loglevel = NEW
>> 	foo()
>> 	  dump_stack()
>> 	     printk
>> 	     ...
>> 	     printk
>> 	console_loglevel = OLD
>>
>> KERN_UNSUPRESSED does not help here. We probably can't convert
>> dump_stack() to KERN_UNSUPRESSED.
> 
> I agree. I take KERN_UNSUPRESSED like a nice trick how to pass
> the information about the unsupressed printk context via
> printk_safe and printk_nmi per-CPU buffers.
> 
> The single line in sysrq __handle_sysrq() seems to be the only
> location where KERN_UNSUPRESSED can be used directly.

I likely don't understand all the reasons why it's not possible.

Looking at kdb - prints those can't be converted straight-away are
show_regs() and show_stack().. could we add helpers those take a
loglevel to show the info? (having as an example show_trace_log_lvl()
that can "eat" a modifier already).

Not that I want to sell the idea of KERN_UNSUPRESSED, but to my mind the
alternative patches were kind of too intricate and bring more complexity
to printk.. and there are only ~3(?) places those manipulate
console_loglevel directly in the kernel tree (so we might want something
simple and dumb as hell).

Thanks,
          Dima

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2019-06-12 14:47 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-28  0:24 [RFC] printk/sysrq: Don't play with console_loglevel Dmitry Safonov
2019-05-28  3:21 ` Tetsuo Handa
2019-05-28  4:22   ` Sergey Senozhatsky
2019-05-28  8:02     ` Tetsuo Handa
2019-05-28  8:51       ` Sergey Senozhatsky
2019-05-28 10:15         ` Tetsuo Handa
2019-05-28 13:49           ` Petr Mladek
2019-06-07 17:09           ` Pavel Machek
2019-06-08  2:45             ` Tetsuo Handa
2019-06-11 15:10               ` Petr Mladek
2019-06-12 10:06                 ` Tetsuo Handa
2019-05-28  4:15 ` Sergey Senozhatsky
2019-05-28  4:46   ` Sergey Senozhatsky
2019-05-28 13:42     ` Petr Mladek
2019-05-28 14:21       ` Tetsuo Handa
2019-05-28 15:03         ` Petr Mladek
2019-05-28 15:21           ` Tetsuo Handa
2019-05-28 15:58             ` Dmitry Safonov
2019-05-31 14:11       ` Tetsuo Handa
2019-06-02  6:13         ` [RFC] printk: Introduce per context console loglevel Tetsuo Handa
2019-06-06  7:59           ` Petr Mladek
2019-06-03  6:51       ` [RFC] printk/sysrq: Don't play with console_loglevel Sergey Senozhatsky
2019-06-06  7:10         ` Petr Mladek
2019-06-12  8:36           ` Sergey Senozhatsky
2019-06-12 12:00             ` Petr Mladek
2019-06-12 14:47               ` Dmitry Safonov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).