All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH][RFC] printk: make pr_cont buffer per-cpu
@ 2016-08-22 15:40 Sergey Senozhatsky
  2016-08-22 16:10 ` Joe Perches
  2016-08-23  5:18 ` Sergey Senozhatsky
  0 siblings, 2 replies; 10+ messages in thread
From: Sergey Senozhatsky @ 2016-08-22 15:40 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Jan Kara, Tejun Heo, Calvin Owens, Andrew Morton, linux-kernel,
	Sergey Senozhatsky, Sergey Senozhatsky

Hello,

	RFC and POC

	Petr, I took a very quick look at your series [1]. I think it
won't work on some of the setups I'm toying with, where multiple CPUs
can do a simultaneous pr_cont() output. What do you about the following
approach?

(it's not really tested, I just finished writing the code. Will test it
more tomorrow. But I kinda like that at this point it removes almost
twice the code it adds, which is probably because I missed some cases).

===8<====

	This patch changes pr_cont buffer to be a per-cpu variable, so
CPUs don't compete anymore for a single cont buffer. Thus we minimize
the possibility of preliminary/forced flushes of incomplete pr_cont
buffers.
	The basic idea is that the first time CPU issues a pr_cont or
!LOG_NEWLINE output it starts a non-preemptible region which ends when
CPUs writes a NEW_LINE symbol.
Schematically:

	pr_cont()			/* preempt_disable() */
		<< write to this_cpu_ptr cont buffer
	pr_cont()
		<< write to this_cpu_ptr cont buffer
	...
	pr_cont("\n")			/* log_store(). preempt_enable() */

	The only possible case (unless I'm missing something) for a
race is when IRQ is issuing a pr_cont() output, interrupting the CPU
which is already in the middle of pr_cont() printing.


[1]: http://marc.info/?l=linux-kernel&m=146860197621876

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
---
 kernel/printk/printk.c | 193 ++++++++++++++++---------------------------------
 1 file changed, 64 insertions(+), 129 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 0d3e026..2bf8b85 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -337,10 +337,9 @@ static int console_may_schedule;
  */
 
 enum log_flags {
-	LOG_NOCONS	= 1,	/* already flushed, do not print to console */
-	LOG_NEWLINE	= 2,	/* text ended with a newline */
-	LOG_PREFIX	= 4,	/* text started with a prefix */
-	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
+	LOG_NEWLINE	= 1,	/* text ended with a newline */
+	LOG_PREFIX	= 2,	/* text started with a prefix */
+	LOG_CONT	= 4,	/* text is a fragment of a continuation line */
 };
 
 struct printk_log {
@@ -1661,108 +1660,62 @@ static inline void printk_delay(void)
  * though, are printed immediately to the consoles to ensure everything has
  * reached the console in case of a kernel crash.
  */
-static struct cont {
+struct cont {
 	char buf[LOG_LINE_MAX];
 	size_t len;			/* length == 0 means unused buffer */
-	size_t cons;			/* bytes written to console */
-	struct task_struct *owner;	/* task of first print*/
 	u64 ts_nsec;			/* time of first print */
 	u8 level;			/* log level of first message */
 	u8 facility;			/* log facility of first message */
 	enum log_flags flags;		/* prefix, newline flags */
-	bool flushed:1;			/* buffer sealed and committed */
-} cont;
+};
+
+static DEFINE_PER_CPU(struct cont, pcpu_cont);
+static DEFINE_PER_CPU(bool, cont_printing);
 
-static void cont_flush(enum log_flags flags)
+static void cont_flush(struct cont *cont, enum log_flags flags)
 {
-	if (cont.flushed)
-		return;
-	if (cont.len == 0)
+	if (cont->len == 0)
 		return;
 
-	if (cont.cons) {
-		/*
-		 * If a fragment of this line was directly flushed to the
-		 * console; wait for the console to pick up the rest of the
-		 * line. LOG_NOCONS suppresses a duplicated output.
-		 */
-		log_store(cont.facility, cont.level, flags | LOG_NOCONS,
-			  cont.ts_nsec, NULL, 0, cont.buf, cont.len);
-		cont.flags = flags;
-		cont.flushed = true;
-	} else {
-		/*
-		 * If no fragment of this line ever reached the console,
-		 * just submit it to the store and free the buffer.
-		 */
-		log_store(cont.facility, cont.level, flags, 0,
-			  NULL, 0, cont.buf, cont.len);
-		cont.len = 0;
-	}
+	/*
+	 * If no fragment of this line ever reached the console,
+	 * just submit it to the store and free the buffer.
+	 */
+	log_store(cont->facility, cont->level, flags, 0,
+		  NULL, 0, cont->buf, cont->len);
+	cont->len = 0;
 }
 
 static bool cont_add(int facility, int level, const char *text, size_t len)
 {
-	if (cont.len && cont.flushed)
-		return false;
+	struct cont *cont = this_cpu_ptr(&pcpu_cont);
 
 	/*
 	 * If ext consoles are present, flush and skip in-kernel
 	 * continuation.  See nr_ext_console_drivers definition.  Also, if
 	 * the line gets too long, split it up in separate records.
 	 */
-	if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
-		cont_flush(LOG_CONT);
+	if (nr_ext_console_drivers || cont->len + len > sizeof(cont->buf)) {
+		cont_flush(cont, LOG_CONT);
 		return false;
 	}
 
-	if (!cont.len) {
-		cont.facility = facility;
-		cont.level = level;
-		cont.owner = current;
-		cont.ts_nsec = local_clock();
-		cont.flags = 0;
-		cont.cons = 0;
-		cont.flushed = false;
+	if (!cont->len) {
+		cont->facility = facility;
+		cont->level = level;
+		cont->ts_nsec = local_clock();
+		cont->flags = 0;
 	}
 
-	memcpy(cont.buf + cont.len, text, len);
-	cont.len += len;
+	memcpy(cont->buf + cont->len, text, len);
+	cont->len += len;
 
-	if (cont.len > (sizeof(cont.buf) * 80) / 100)
-		cont_flush(LOG_CONT);
+	if (cont->len > (sizeof(cont->buf) * 80) / 100)
+		cont_flush(cont, LOG_CONT);
 
 	return true;
 }
 
-static size_t cont_print_text(char *text, size_t size)
-{
-	size_t textlen = 0;
-	size_t len;
-
-	if (cont.cons == 0 && (console_prev & LOG_NEWLINE)) {
-		textlen += print_time(cont.ts_nsec, text);
-		size -= textlen;
-	}
-
-	len = cont.len - cont.cons;
-	if (len > 0) {
-		if (len+1 > size)
-			len = size-1;
-		memcpy(text + textlen, cont.buf + cont.cons, len);
-		textlen += len;
-		cont.cons = cont.len;
-	}
-
-	if (cont.flushed) {
-		if (cont.flags & LOG_NEWLINE)
-			text[textlen++] = '\n';
-		/* got everything, release buffer */
-		cont.len = 0;
-	}
-	return textlen;
-}
-
 asmlinkage int vprintk_emit(int facility, int level,
 			    const char *dict, size_t dictlen,
 			    const char *fmt, va_list args)
@@ -1779,6 +1732,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 	int printed_len = 0;
 	int nmi_message_lost;
 	bool in_sched = false;
+	struct cont *cont;
 
 	if (level == LOGLEVEL_SCHED) {
 		level = LOGLEVEL_DEFAULT;
@@ -1789,6 +1743,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 	printk_delay();
 
 	local_irq_save(flags);
+	cont = this_cpu_ptr(&pcpu_cont);
 	this_cpu = smp_processor_id();
 
 	/*
@@ -1878,12 +1833,22 @@ asmlinkage int vprintk_emit(int facility, int level,
 		lflags |= LOG_PREFIX|LOG_NEWLINE;
 
 	if (!(lflags & LOG_NEWLINE)) {
+		if (!this_cpu_read(cont_printing)) {
+			bool unsafe_pr_cont = preemptible() &&
+				!rcu_preempt_depth();
+
+			if (unsafe_pr_cont) {
+				this_cpu_write(cont_printing, true);
+				preempt_disable();
+			}
+		}
+
 		/*
 		 * Flush the conflicting buffer. An earlier newline was missing,
 		 * or another task also prints continuation lines.
 		 */
-		if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
-			cont_flush(LOG_NEWLINE);
+		if (cont->len && (lflags & LOG_PREFIX))
+			cont_flush(cont, LOG_NEWLINE);
 
 		/* buffer line if possible, otherwise store it right away */
 		if (cont_add(facility, level, text, text_len))
@@ -1895,6 +1860,11 @@ asmlinkage int vprintk_emit(int facility, int level,
 	} else {
 		bool stored = false;
 
+		if (this_cpu_read(cont_printing)) {
+			this_cpu_write(cont_printing, false);
+			preempt_enable();
+		}
+
 		/*
 		 * If an earlier newline was missing and it was the same task,
 		 * either merge it with the current buffer and flush, or if
@@ -1903,11 +1873,11 @@ asmlinkage int vprintk_emit(int facility, int level,
 		 * If the preceding printk was from a different task and missed
 		 * a newline, flush and append the newline.
 		 */
-		if (cont.len) {
-			if (cont.owner == current && !(lflags & LOG_PREFIX))
+		if (cont->len) {
+			if (!(lflags & LOG_PREFIX))
 				stored = cont_add(facility, level, text,
 						  text_len);
-			cont_flush(LOG_NEWLINE);
+			cont_flush(cont, LOG_NEWLINE);
 		}
 
 		if (stored)
@@ -2051,7 +2021,6 @@ static struct cont {
 	size_t len;
 	size_t cons;
 	u8 level;
-	bool flushed:1;
 } cont;
 static char *log_text(const struct printk_log *msg) { return NULL; }
 static char *log_dict(const struct printk_log *msg) { return NULL; }
@@ -2344,42 +2313,6 @@ static inline int can_use_console(void)
 	return cpu_online(raw_smp_processor_id()) || have_callable_console();
 }
 
-static void console_cont_flush(char *text, size_t size)
-{
-	unsigned long flags;
-	size_t len;
-
-	raw_spin_lock_irqsave(&logbuf_lock, flags);
-
-	if (!cont.len)
-		goto out;
-
-	if (suppress_message_printing(cont.level)) {
-		cont.cons = cont.len;
-		if (cont.flushed)
-			cont.len = 0;
-		goto out;
-	}
-
-	/*
-	 * We still queue earlier records, likely because the console was
-	 * busy. The earlier ones need to be printed before this one, we
-	 * did not flush any fragment so far, so just let it queue up.
-	 */
-	if (console_seq < log_next_seq && !cont.cons)
-		goto out;
-
-	len = cont_print_text(text, size);
-	raw_spin_unlock(&logbuf_lock);
-	stop_critical_timings();
-	call_console_drivers(cont.level, NULL, 0, text, len);
-	start_critical_timings();
-	local_irq_restore(flags);
-	return;
-out:
-	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
-}
-
 /**
  * console_unlock - unlock the console system
  *
@@ -2433,9 +2366,6 @@ again:
 		return;
 	}
 
-	/* flush buffered message fragment immediately to console */
-	console_cont_flush(text, sizeof(text));
-
 	for (;;) {
 		struct printk_log *msg;
 		size_t ext_len = 0;
@@ -2465,8 +2395,7 @@ skip:
 
 		msg = log_from_idx(console_idx);
 		level = msg->level;
-		if ((msg->flags & LOG_NOCONS) ||
-				suppress_message_printing(level)) {
+		if (suppress_message_printing(level)) {
 			/*
 			 * Skip record we have buffered and already printed
 			 * directly to the console when we received it, and
@@ -2474,12 +2403,6 @@ skip:
 			 */
 			console_idx = log_next(console_idx);
 			console_seq++;
-			/*
-			 * We will get here again when we register a new
-			 * CON_PRINTBUFFER console. Clear the flag so we
-			 * will properly dump everything later.
-			 */
-			msg->flags &= ~LOG_NOCONS;
 			console_prev = msg->flags;
 			goto skip;
 		}
@@ -2581,6 +2504,18 @@ void console_unblank(void)
  */
 void console_flush_on_panic(void)
 {
+	unsigned long flags;
+	unsigned int cpu;
+
+	/*
+	 * Flush the cont messages; no matter complete
+	 * or incomplete.
+	 */
+	local_irq_save(flags);
+	for_each_online_cpu(cpu)
+		cont_flush(per_cpu_ptr(&pcpu_cont, cpu), LOG_NEWLINE);
+	local_irq_restore(flags);
+
 	/*
 	 * If someone else is holding the console lock, trylock will fail
 	 * and may_schedule may be set.  Ignore and proceed to unlock so
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH][RFC] printk: make pr_cont buffer per-cpu
  2016-08-22 15:40 [PATCH][RFC] printk: make pr_cont buffer per-cpu Sergey Senozhatsky
@ 2016-08-22 16:10 ` Joe Perches
  2016-08-23  1:10   ` Sergey Senozhatsky
  2016-08-23  5:18 ` Sergey Senozhatsky
  1 sibling, 1 reply; 10+ messages in thread
From: Joe Perches @ 2016-08-22 16:10 UTC (permalink / raw)
  To: Sergey Senozhatsky, Petr Mladek, Kay Sievers
  Cc: Jan Kara, Tejun Heo, Calvin Owens, Andrew Morton, linux-kernel,
	Sergey Senozhatsky

(adding Kay Sievers who wrote most of this)

On Tue, 2016-08-23 at 00:40 +0900, Sergey Senozhatsky wrote:
> Hello,
> 
> 	RFC and POC
> 
> 	Petr, I took a very quick look at your series [1]. I think it
> won't work on some of the setups I'm toying with, where multiple CPUs
> can do a simultaneous pr_cont() output. What do you about the following
> approach?
> 
> (it's not really tested, I just finished writing the code. Will test it
> more tomorrow. But I kinda like that at this point it removes almost
> twice the code it adds, which is probably because I missed some cases).
> 
> ===8<====
> 
> 	This patch changes pr_cont buffer to be a per-cpu variable, so
> CPUs don't compete anymore for a single cont buffer. Thus we minimize
> the possibility of preliminary/forced flushes of incomplete pr_cont
> buffers.
> 	The basic idea is that the first time CPU issues a pr_cont or
> !LOG_NEWLINE output it starts a non-preemptible region which ends when
> CPUs writes a NEW_LINE symbol.
> Schematically:
> 
> 	pr_cont()			/* preempt_disable() */
> 		<< write to this_cpu_ptr cont buffer
> 	pr_cont()
> 		<< write to this_cpu_ptr cont buffer
> 	...
> 	pr_cont("\n")			/* log_store(). preempt_enable() */
> 
> 	The only possible case (unless I'm missing something) for a
> race is when IRQ is issuing a pr_cont() output, interrupting the CPU
> which is already in the middle of pr_cont() printing.
> 
> 
> [1]: http://marc.info/?l=linux-kernel&m=146860197621876
> 
> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> ---
>  kernel/printk/printk.c | 193 ++++++++++++++++---------------------------------
>  1 file changed, 64 insertions(+), 129 deletions(-)
> 
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 0d3e026..2bf8b85 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -337,10 +337,9 @@ static int console_may_schedule;
>   */
>  
>  enum log_flags {
> -	LOG_NOCONS	= 1,	/* already flushed, do not print to console */
> -	LOG_NEWLINE	= 2,	/* text ended with a newline */
> -	LOG_PREFIX	= 4,	/* text started with a prefix */
> -	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
> +	LOG_NEWLINE	= 1,	/* text ended with a newline */
> +	LOG_PREFIX	= 2,	/* text started with a prefix */
> +	LOG_CONT	= 4,	/* text is a fragment of a continuation line */
>  };
>  
>  struct printk_log {
> @@ -1661,108 +1660,62 @@ static inline void printk_delay(void)
>   * though, are printed immediately to the consoles to ensure everything has
>   * reached the console in case of a kernel crash.
>   */
> -static struct cont {
> +struct cont {
>  	char buf[LOG_LINE_MAX];
>  	size_t len;			/* length == 0 means unused buffer */
> -	size_t cons;			/* bytes written to console */
> -	struct task_struct *owner;	/* task of first print*/
>  	u64 ts_nsec;			/* time of first print */
>  	u8 level;			/* log level of first message */
>  	u8 facility;			/* log facility of first message */
>  	enum log_flags flags;		/* prefix, newline flags */
> -	bool flushed:1;			/* buffer sealed and committed */
> -} cont;
> +};
> +
> +static DEFINE_PER_CPU(struct cont, pcpu_cont);
> +static DEFINE_PER_CPU(bool, cont_printing);
>  
> -static void cont_flush(enum log_flags flags)
> +static void cont_flush(struct cont *cont, enum log_flags flags)
>  {
> -	if (cont.flushed)
> -		return;
> -	if (cont.len == 0)
> +	if (cont->len == 0)
>  		return;
>  
> -	if (cont.cons) {
> -		/*
> -		 * If a fragment of this line was directly flushed to the
> -		 * console; wait for the console to pick up the rest of the
> -		 * line. LOG_NOCONS suppresses a duplicated output.
> -		 */
> -		log_store(cont.facility, cont.level, flags | LOG_NOCONS,
> -			  cont.ts_nsec, NULL, 0, cont.buf, cont.len);
> -		cont.flags = flags;
> -		cont.flushed = true;
> -	} else {
> -		/*
> -		 * If no fragment of this line ever reached the console,
> -		 * just submit it to the store and free the buffer.
> -		 */
> -		log_store(cont.facility, cont.level, flags, 0,
> -			  NULL, 0, cont.buf, cont.len);
> -		cont.len = 0;
> -	}
> +	/*
> +	 * If no fragment of this line ever reached the console,
> +	 * just submit it to the store and free the buffer.
> +	 */
> +	log_store(cont->facility, cont->level, flags, 0,
> +		  NULL, 0, cont->buf, cont->len);
> +	cont->len = 0;
>  }
>  
>  static bool cont_add(int facility, int level, const char *text, size_t len)
>  {
> -	if (cont.len && cont.flushed)
> -		return false;
> +	struct cont *cont = this_cpu_ptr(&pcpu_cont);
>  
>  	/*
>  	 * If ext consoles are present, flush and skip in-kernel
>  	 * continuation.  See nr_ext_console_drivers definition.  Also, if
>  	 * the line gets too long, split it up in separate records.
>  	 */
> -	if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
> -		cont_flush(LOG_CONT);
> +	if (nr_ext_console_drivers || cont->len + len > sizeof(cont->buf)) {
> +		cont_flush(cont, LOG_CONT);
>  		return false;
>  	}
>  
> -	if (!cont.len) {
> -		cont.facility = facility;
> -		cont.level = level;
> -		cont.owner = current;
> -		cont.ts_nsec = local_clock();
> -		cont.flags = 0;
> -		cont.cons = 0;
> -		cont.flushed = false;
> +	if (!cont->len) {
> +		cont->facility = facility;
> +		cont->level = level;
> +		cont->ts_nsec = local_clock();
> +		cont->flags = 0;
>  	}
>  
> -	memcpy(cont.buf + cont.len, text, len);
> -	cont.len += len;
> +	memcpy(cont->buf + cont->len, text, len);
> +	cont->len += len;
>  
> -	if (cont.len > (sizeof(cont.buf) * 80) / 100)
> -		cont_flush(LOG_CONT);
> +	if (cont->len > (sizeof(cont->buf) * 80) / 100)
> +		cont_flush(cont, LOG_CONT);
>  
>  	return true;
>  }
>  
> -static size_t cont_print_text(char *text, size_t size)
> -{
> -	size_t textlen = 0;
> -	size_t len;
> -
> -	if (cont.cons == 0 && (console_prev & LOG_NEWLINE)) {
> -		textlen += print_time(cont.ts_nsec, text);
> -		size -= textlen;
> -	}
> -
> -	len = cont.len - cont.cons;
> -	if (len > 0) {
> -		if (len+1 > size)
> -			len = size-1;
> -		memcpy(text + textlen, cont.buf + cont.cons, len);
> -		textlen += len;
> -		cont.cons = cont.len;
> -	}
> -
> -	if (cont.flushed) {
> -		if (cont.flags & LOG_NEWLINE)
> -			text[textlen++] = '\n';
> -		/* got everything, release buffer */
> -		cont.len = 0;
> -	}
> -	return textlen;
> -}
> -
>  asmlinkage int vprintk_emit(int facility, int level,
>  			    const char *dict, size_t dictlen,
>  			    const char *fmt, va_list args)
> @@ -1779,6 +1732,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	int printed_len = 0;
>  	int nmi_message_lost;
>  	bool in_sched = false;
> +	struct cont *cont;
>  
>  	if (level == LOGLEVEL_SCHED) {
>  		level = LOGLEVEL_DEFAULT;
> @@ -1789,6 +1743,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	printk_delay();
>  
>  	local_irq_save(flags);
> +	cont = this_cpu_ptr(&pcpu_cont);
>  	this_cpu = smp_processor_id();
>  
>  	/*
> @@ -1878,12 +1833,22 @@ asmlinkage int vprintk_emit(int facility, int level,
>  		lflags |= LOG_PREFIX|LOG_NEWLINE;
>  
>  	if (!(lflags & LOG_NEWLINE)) {
> +		if (!this_cpu_read(cont_printing)) {
> +			bool unsafe_pr_cont = preemptible() &&
> +				!rcu_preempt_depth();
> +
> +			if (unsafe_pr_cont) {
> +				this_cpu_write(cont_printing, true);
> +				preempt_disable();
> +			}
> +		}
> +
>  		/*
>  		 * Flush the conflicting buffer. An earlier newline was missing,
>  		 * or another task also prints continuation lines.
>  		 */
> -		if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
> -			cont_flush(LOG_NEWLINE);
> +		if (cont->len && (lflags & LOG_PREFIX))
> +			cont_flush(cont, LOG_NEWLINE);
>  
>  		/* buffer line if possible, otherwise store it right away */
>  		if (cont_add(facility, level, text, text_len))
> @@ -1895,6 +1860,11 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	} else {
>  		bool stored = false;
>  
> +		if (this_cpu_read(cont_printing)) {
> +			this_cpu_write(cont_printing, false);
> +			preempt_enable();
> +		}
> +
>  		/*
>  		 * If an earlier newline was missing and it was the same task,
>  		 * either merge it with the current buffer and flush, or if
> @@ -1903,11 +1873,11 @@ asmlinkage int vprintk_emit(int facility, int level,
>  		 * If the preceding printk was from a different task and missed
>  		 * a newline, flush and append the newline.
>  		 */
> -		if (cont.len) {
> -			if (cont.owner == current && !(lflags & LOG_PREFIX))
> +		if (cont->len) {
> +			if (!(lflags & LOG_PREFIX))
>  				stored = cont_add(facility, level, text,
>  						  text_len);
> -			cont_flush(LOG_NEWLINE);
> +			cont_flush(cont, LOG_NEWLINE);
>  		}
>  
>  		if (stored)
> @@ -2051,7 +2021,6 @@ static struct cont {
>  	size_t len;
>  	size_t cons;
>  	u8 level;
> -	bool flushed:1;
>  } cont;
>  static char *log_text(const struct printk_log *msg) { return NULL; }
>  static char *log_dict(const struct printk_log *msg) { return NULL; }
> @@ -2344,42 +2313,6 @@ static inline int can_use_console(void)
>  	return cpu_online(raw_smp_processor_id()) || have_callable_console();
>  }
>  
> -static void console_cont_flush(char *text, size_t size)
> -{
> -	unsigned long flags;
> -	size_t len;
> -
> -	raw_spin_lock_irqsave(&logbuf_lock, flags);
> -
> -	if (!cont.len)
> -		goto out;
> -
> -	if (suppress_message_printing(cont.level)) {
> -		cont.cons = cont.len;
> -		if (cont.flushed)
> -			cont.len = 0;
> -		goto out;
> -	}
> -
> -	/*
> -	 * We still queue earlier records, likely because the console was
> -	 * busy. The earlier ones need to be printed before this one, we
> -	 * did not flush any fragment so far, so just let it queue up.
> -	 */
> -	if (console_seq < log_next_seq && !cont.cons)
> -		goto out;
> -
> -	len = cont_print_text(text, size);
> -	raw_spin_unlock(&logbuf_lock);
> -	stop_critical_timings();
> -	call_console_drivers(cont.level, NULL, 0, text, len);
> -	start_critical_timings();
> -	local_irq_restore(flags);
> -	return;
> -out:
> -	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
> -}
> -
>  /**
>   * console_unlock - unlock the console system
>   *
> @@ -2433,9 +2366,6 @@ again:
>  		return;
>  	}
>  
> -	/* flush buffered message fragment immediately to console */
> -	console_cont_flush(text, sizeof(text));
> -
>  	for (;;) {
>  		struct printk_log *msg;
>  		size_t ext_len = 0;
> @@ -2465,8 +2395,7 @@ skip:
>  
>  		msg = log_from_idx(console_idx);
>  		level = msg->level;
> -		if ((msg->flags & LOG_NOCONS) ||
> -				suppress_message_printing(level)) {
> +		if (suppress_message_printing(level)) {
>  			/*
>  			 * Skip record we have buffered and already printed
>  			 * directly to the console when we received it, and
> @@ -2474,12 +2403,6 @@ skip:
>  			 */
>  			console_idx = log_next(console_idx);
>  			console_seq++;
> -			/*
> -			 * We will get here again when we register a new
> -			 * CON_PRINTBUFFER console. Clear the flag so we
> -			 * will properly dump everything later.
> -			 */
> -			msg->flags &= ~LOG_NOCONS;
>  			console_prev = msg->flags;
>  			goto skip;
>  		}
> @@ -2581,6 +2504,18 @@ void console_unblank(void)
>   */
>  void console_flush_on_panic(void)
>  {
> +	unsigned long flags;
> +	unsigned int cpu;
> +
> +	/*
> +	 * Flush the cont messages; no matter complete
> +	 * or incomplete.
> +	 */
> +	local_irq_save(flags);
> +	for_each_online_cpu(cpu)
> +		cont_flush(per_cpu_ptr(&pcpu_cont, cpu), LOG_NEWLINE);
> +	local_irq_restore(flags);
> +
>  	/*
>  	 * If someone else is holding the console lock, trylock will fail
>  	 * and may_schedule may be set.  Ignore and proceed to unlock so

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][RFC] printk: make pr_cont buffer per-cpu
  2016-08-22 16:10 ` Joe Perches
@ 2016-08-23  1:10   ` Sergey Senozhatsky
  0 siblings, 0 replies; 10+ messages in thread
From: Sergey Senozhatsky @ 2016-08-23  1:10 UTC (permalink / raw)
  To: Joe Perches
  Cc: Sergey Senozhatsky, Petr Mladek, Kay Sievers, Jan Kara,
	Tejun Heo, Calvin Owens, Andrew Morton, linux-kernel,
	Sergey Senozhatsky

On (08/22/16 09:10), Joe Perches wrote:
> 
> (adding Kay Sievers who wrote most of this)

ok, thanks. then I'll add http://marc.info/?l=linux-kernel&m=146717692431893
for more information.

	-ss

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][RFC] printk: make pr_cont buffer per-cpu
  2016-08-22 15:40 [PATCH][RFC] printk: make pr_cont buffer per-cpu Sergey Senozhatsky
  2016-08-22 16:10 ` Joe Perches
@ 2016-08-23  5:18 ` Sergey Senozhatsky
  2016-08-23 11:47   ` Petr Mladek
  1 sibling, 1 reply; 10+ messages in thread
From: Sergey Senozhatsky @ 2016-08-23  5:18 UTC (permalink / raw)
  To: Petr Mladek, Jan Kara, Kay Sievers
  Cc: Tejun Heo, Calvin Owens, Andrew Morton, linux-kernel,
	Sergey Senozhatsky, Sergey Senozhatsky

On (08/23/16 00:40), Sergey Senozhatsky wrote:
> 	RFC and POC
[..]
>  	if (!(lflags & LOG_NEWLINE)) {
> +		if (!this_cpu_read(cont_printing)) {
> +			bool unsafe_pr_cont = preemptible() &&
> +				!rcu_preempt_depth();

d'oh, how did it get there... this simply disables per-cpu pr_cont(). sorry.
what I wanted to do there is to add a dependency on system_state:

	if (!this_cpu_read(cont_printing) && (system_state == SYSTEM_RUNNING)) {
		....
	}

one of the problems is x86 cpu_up, which does pr_cont()-s in announce_cpu()
and then explicitly calls shedule() in do_boot_cpu(). so we have a

	pr_cont();
		shedule();
	pr_cont();
		shedule();
	...
	pr_cont("\n");

pattern. which is probably OK, given that we are in a non-smp mode yet. thus
I'm enabling per-cpu pr_cont buffers only when the system is SYSTEM_RUNNING.

===8<====

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
---
 kernel/printk/printk.c | 190 ++++++++++++++++---------------------------------
 1 file changed, 61 insertions(+), 129 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 0d3e026..7f48cbf 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -337,10 +337,9 @@ static int console_may_schedule;
  */
 
 enum log_flags {
-	LOG_NOCONS	= 1,	/* already flushed, do not print to console */
-	LOG_NEWLINE	= 2,	/* text ended with a newline */
-	LOG_PREFIX	= 4,	/* text started with a prefix */
-	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
+	LOG_NEWLINE	= 1,	/* text ended with a newline */
+	LOG_PREFIX	= 2,	/* text started with a prefix */
+	LOG_CONT	= 4,	/* text is a fragment of a continuation line */
 };
 
 struct printk_log {
@@ -1661,108 +1660,62 @@ static inline void printk_delay(void)
  * though, are printed immediately to the consoles to ensure everything has
  * reached the console in case of a kernel crash.
  */
-static struct cont {
+struct cont {
 	char buf[LOG_LINE_MAX];
 	size_t len;			/* length == 0 means unused buffer */
-	size_t cons;			/* bytes written to console */
-	struct task_struct *owner;	/* task of first print*/
 	u64 ts_nsec;			/* time of first print */
 	u8 level;			/* log level of first message */
 	u8 facility;			/* log facility of first message */
 	enum log_flags flags;		/* prefix, newline flags */
-	bool flushed:1;			/* buffer sealed and committed */
-} cont;
+};
+
+static DEFINE_PER_CPU(struct cont, pcpu_cont);
+static DEFINE_PER_CPU(bool, cont_printing);
 
-static void cont_flush(enum log_flags flags)
+static void cont_flush(struct cont *cont, enum log_flags flags)
 {
-	if (cont.flushed)
-		return;
-	if (cont.len == 0)
+	if (cont->len == 0)
 		return;
 
-	if (cont.cons) {
-		/*
-		 * If a fragment of this line was directly flushed to the
-		 * console; wait for the console to pick up the rest of the
-		 * line. LOG_NOCONS suppresses a duplicated output.
-		 */
-		log_store(cont.facility, cont.level, flags | LOG_NOCONS,
-			  cont.ts_nsec, NULL, 0, cont.buf, cont.len);
-		cont.flags = flags;
-		cont.flushed = true;
-	} else {
-		/*
-		 * If no fragment of this line ever reached the console,
-		 * just submit it to the store and free the buffer.
-		 */
-		log_store(cont.facility, cont.level, flags, 0,
-			  NULL, 0, cont.buf, cont.len);
-		cont.len = 0;
-	}
+	/*
+	 * If no fragment of this line ever reached the console,
+	 * just submit it to the store and free the buffer.
+	 */
+	log_store(cont->facility, cont->level, flags, 0,
+		  NULL, 0, cont->buf, cont->len);
+	cont->len = 0;
 }
 
 static bool cont_add(int facility, int level, const char *text, size_t len)
 {
-	if (cont.len && cont.flushed)
-		return false;
+	struct cont *cont = this_cpu_ptr(&pcpu_cont);
 
 	/*
 	 * If ext consoles are present, flush and skip in-kernel
 	 * continuation.  See nr_ext_console_drivers definition.  Also, if
 	 * the line gets too long, split it up in separate records.
 	 */
-	if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
-		cont_flush(LOG_CONT);
+	if (nr_ext_console_drivers || cont->len + len > sizeof(cont->buf)) {
+		cont_flush(cont, LOG_CONT);
 		return false;
 	}
 
-	if (!cont.len) {
-		cont.facility = facility;
-		cont.level = level;
-		cont.owner = current;
-		cont.ts_nsec = local_clock();
-		cont.flags = 0;
-		cont.cons = 0;
-		cont.flushed = false;
+	if (!cont->len) {
+		cont->facility = facility;
+		cont->level = level;
+		cont->ts_nsec = local_clock();
+		cont->flags = 0;
 	}
 
-	memcpy(cont.buf + cont.len, text, len);
-	cont.len += len;
+	memcpy(cont->buf + cont->len, text, len);
+	cont->len += len;
 
-	if (cont.len > (sizeof(cont.buf) * 80) / 100)
-		cont_flush(LOG_CONT);
+	if (cont->len > (sizeof(cont->buf) * 80) / 100)
+		cont_flush(cont, LOG_CONT);
 
 	return true;
 }
 
-static size_t cont_print_text(char *text, size_t size)
-{
-	size_t textlen = 0;
-	size_t len;
-
-	if (cont.cons == 0 && (console_prev & LOG_NEWLINE)) {
-		textlen += print_time(cont.ts_nsec, text);
-		size -= textlen;
-	}
-
-	len = cont.len - cont.cons;
-	if (len > 0) {
-		if (len+1 > size)
-			len = size-1;
-		memcpy(text + textlen, cont.buf + cont.cons, len);
-		textlen += len;
-		cont.cons = cont.len;
-	}
-
-	if (cont.flushed) {
-		if (cont.flags & LOG_NEWLINE)
-			text[textlen++] = '\n';
-		/* got everything, release buffer */
-		cont.len = 0;
-	}
-	return textlen;
-}
-
 asmlinkage int vprintk_emit(int facility, int level,
 			    const char *dict, size_t dictlen,
 			    const char *fmt, va_list args)
@@ -1779,6 +1732,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 	int printed_len = 0;
 	int nmi_message_lost;
 	bool in_sched = false;
+	struct cont *cont;
 
 	if (level == LOGLEVEL_SCHED) {
 		level = LOGLEVEL_DEFAULT;
@@ -1789,6 +1743,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 	printk_delay();
 
 	local_irq_save(flags);
+	cont = this_cpu_ptr(&pcpu_cont);
 	this_cpu = smp_processor_id();
 
 	/*
@@ -1878,12 +1833,19 @@ asmlinkage int vprintk_emit(int facility, int level,
 		lflags |= LOG_PREFIX|LOG_NEWLINE;
 
 	if (!(lflags & LOG_NEWLINE)) {
+		if (!this_cpu_read(cont_printing)) {
+			if (system_state == SYSTEM_RUNNING) {
+				this_cpu_write(cont_printing, true);
+				preempt_disable();
+			}
+		}
+
 		/*
 		 * Flush the conflicting buffer. An earlier newline was missing,
 		 * or another task also prints continuation lines.
 		 */
-		if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
-			cont_flush(LOG_NEWLINE);
+		if (cont->len && (lflags & LOG_PREFIX))
+			cont_flush(cont, LOG_NEWLINE);
 
 		/* buffer line if possible, otherwise store it right away */
 		if (cont_add(facility, level, text, text_len))
@@ -1895,6 +1857,11 @@ asmlinkage int vprintk_emit(int facility, int level,
 	} else {
 		bool stored = false;
 
+		if (this_cpu_read(cont_printing)) {
+			this_cpu_write(cont_printing, false);
+			preempt_enable();
+		}
+
 		/*
 		 * If an earlier newline was missing and it was the same task,
 		 * either merge it with the current buffer and flush, or if
@@ -1903,11 +1870,11 @@ asmlinkage int vprintk_emit(int facility, int level,
 		 * If the preceding printk was from a different task and missed
 		 * a newline, flush and append the newline.
 		 */
-		if (cont.len) {
-			if (cont.owner == current && !(lflags & LOG_PREFIX))
+		if (cont->len) {
+			if (!(lflags & LOG_PREFIX))
 				stored = cont_add(facility, level, text,
 						  text_len);
-			cont_flush(LOG_NEWLINE);
+			cont_flush(cont, LOG_NEWLINE);
 		}
 
 		if (stored)
@@ -2051,7 +2018,6 @@ static struct cont {
 	size_t len;
 	size_t cons;
 	u8 level;
-	bool flushed:1;
 } cont;
 static char *log_text(const struct printk_log *msg) { return NULL; }
 static char *log_dict(const struct printk_log *msg) { return NULL; }
@@ -2344,42 +2310,6 @@ static inline int can_use_console(void)
 	return cpu_online(raw_smp_processor_id()) || have_callable_console();
 }
 
-static void console_cont_flush(char *text, size_t size)
-{
-	unsigned long flags;
-	size_t len;
-
-	raw_spin_lock_irqsave(&logbuf_lock, flags);
-
-	if (!cont.len)
-		goto out;
-
-	if (suppress_message_printing(cont.level)) {
-		cont.cons = cont.len;
-		if (cont.flushed)
-			cont.len = 0;
-		goto out;
-	}
-
-	/*
-	 * We still queue earlier records, likely because the console was
-	 * busy. The earlier ones need to be printed before this one, we
-	 * did not flush any fragment so far, so just let it queue up.
-	 */
-	if (console_seq < log_next_seq && !cont.cons)
-		goto out;
-
-	len = cont_print_text(text, size);
-	raw_spin_unlock(&logbuf_lock);
-	stop_critical_timings();
-	call_console_drivers(cont.level, NULL, 0, text, len);
-	start_critical_timings();
-	local_irq_restore(flags);
-	return;
-out:
-	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
-}
-
 /**
  * console_unlock - unlock the console system
  *
@@ -2433,9 +2363,6 @@ again:
 		return;
 	}
 
-	/* flush buffered message fragment immediately to console */
-	console_cont_flush(text, sizeof(text));
-
 	for (;;) {
 		struct printk_log *msg;
 		size_t ext_len = 0;
@@ -2465,8 +2392,7 @@ skip:
 
 		msg = log_from_idx(console_idx);
 		level = msg->level;
-		if ((msg->flags & LOG_NOCONS) ||
-				suppress_message_printing(level)) {
+		if (suppress_message_printing(level)) {
 			/*
 			 * Skip record we have buffered and already printed
 			 * directly to the console when we received it, and
@@ -2474,12 +2400,6 @@ skip:
 			 */
 			console_idx = log_next(console_idx);
 			console_seq++;
-			/*
-			 * We will get here again when we register a new
-			 * CON_PRINTBUFFER console. Clear the flag so we
-			 * will properly dump everything later.
-			 */
-			msg->flags &= ~LOG_NOCONS;
 			console_prev = msg->flags;
 			goto skip;
 		}
@@ -2581,6 +2501,18 @@ void console_unblank(void)
  */
 void console_flush_on_panic(void)
 {
+	unsigned long flags;
+	unsigned int cpu;
+
+	/*
+	 * Flush the cont messages; no matter complete
+	 * or incomplete.
+	 */
+	local_irq_save(flags);
+	for_each_online_cpu(cpu)
+		cont_flush(per_cpu_ptr(&pcpu_cont, cpu), LOG_NEWLINE);
+	local_irq_restore(flags);
+
 	/*
 	 * If someone else is holding the console lock, trylock will fail
 	 * and may_schedule may be set.  Ignore and proceed to unlock so
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH][RFC] printk: make pr_cont buffer per-cpu
  2016-08-23  5:18 ` Sergey Senozhatsky
@ 2016-08-23 11:47   ` Petr Mladek
  2016-08-24  1:14     ` Sergey Senozhatsky
  0 siblings, 1 reply; 10+ messages in thread
From: Petr Mladek @ 2016-08-23 11:47 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Jan Kara, Kay Sievers, Tejun Heo, Calvin Owens, Andrew Morton,
	linux-kernel, Sergey Senozhatsky

On Tue 2016-08-23 14:18:31, Sergey Senozhatsky wrote:
> On (08/23/16 00:40), Sergey Senozhatsky wrote:
> > 	RFC and POC
> [..]
> >  	if (!(lflags & LOG_NEWLINE)) {
> > +		if (!this_cpu_read(cont_printing)) {
> > +			bool unsafe_pr_cont = preemptible() &&
> > +				!rcu_preempt_depth();
> 
> d'oh, how did it get there... this simply disables per-cpu pr_cont(). sorry.
> what I wanted to do there is to add a dependency on system_state:
> 
> 	if (!this_cpu_read(cont_printing) && (system_state == SYSTEM_RUNNING)) {
> 		....
> 	}
> 
> one of the problems is x86 cpu_up, which does pr_cont()-s in announce_cpu()
> and then explicitly calls shedule() in do_boot_cpu(). so we have a
> 
> 	pr_cont();
> 		shedule();
> 	pr_cont();
> 		shedule();
> 	...
> 	pr_cont("\n");
> 
> pattern. which is probably OK, given that we are in a non-smp mode yet. thus
> I'm enabling per-cpu pr_cont buffers only when the system is SYSTEM_RUNNING.
> 
> ===8<====
> 
> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> ---
>  kernel/printk/printk.c | 190 ++++++++++++++++---------------------------------
>  1 file changed, 61 insertions(+), 129 deletions(-)

The simplification is nice but...

> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 0d3e026..7f48cbf 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> -	return textlen;
> -}
> -
>  asmlinkage int vprintk_emit(int facility, int level,
>  			    const char *dict, size_t dictlen,
>  			    const char *fmt, va_list args)
> @@ -1779,6 +1732,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	int printed_len = 0;
>  	int nmi_message_lost;
>  	bool in_sched = false;
> +	struct cont *cont;
>  
>  	if (level == LOGLEVEL_SCHED) {
>  		level = LOGLEVEL_DEFAULT;
> @@ -1789,6 +1743,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	printk_delay();
>  
>  	local_irq_save(flags);
> +	cont = this_cpu_ptr(&pcpu_cont);
>  	this_cpu = smp_processor_id();
>  
>  	/*
> @@ -1878,12 +1833,19 @@ asmlinkage int vprintk_emit(int facility, int level,
>  		lflags |= LOG_PREFIX|LOG_NEWLINE;
>  
>  	if (!(lflags & LOG_NEWLINE)) {
> +		if (!this_cpu_read(cont_printing)) {
> +			if (system_state == SYSTEM_RUNNING) {
> +				this_cpu_write(cont_printing, true);
> +				preempt_disable();
> +			}
> +		}

I am afraid that this is not acceptable. It means that printk() will have
an unexpected side effect. The missing "\n" at the end of a printed
string would disable preemption. See below for more.

> +
>  		/*
>  		 * Flush the conflicting buffer. An earlier newline was missing,
>  		 * or another task also prints continuation lines.
>  		 */
> -		if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
> -			cont_flush(LOG_NEWLINE);
> +		if (cont->len && (lflags & LOG_PREFIX))
> +			cont_flush(cont, LOG_NEWLINE);
>  
>  		/* buffer line if possible, otherwise store it right away */
>  		if (cont_add(facility, level, text, text_len))
> @@ -1895,6 +1857,11 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	} else {
>  		bool stored = false;
>  
> +		if (this_cpu_read(cont_printing)) {
> +			this_cpu_write(cont_printing, false);
> +			preempt_enable();
> +		}

If "\n" is missing by mistake. The preemption will not get enabled and
the task would stall.

Note that printk() is heavily used when debugging kernel. People would
hate us if every n-th build is unusable because of forgotten '\n'.

>  		/*
>  		 * If an earlier newline was missing and it was the same task,
>  		 * either merge it with the current buffer and flush, or if
> @@ -2051,7 +2018,6 @@ static struct cont {
>  	size_t len;
>  	size_t cons;
>  	u8 level;
> -	bool flushed:1;
>  } cont;
>  static char *log_text(const struct printk_log *msg) { return NULL; }
>  static char *log_dict(const struct printk_log *msg) { return NULL; }
> @@ -2344,42 +2310,6 @@ static inline int can_use_console(void)
>  	return cpu_online(raw_smp_processor_id()) || have_callable_console();
>  }
>  
> -static void console_cont_flush(char *text, size_t size)
> -{
> -	unsigned long flags;
> -	size_t len;
> -
> -	raw_spin_lock_irqsave(&logbuf_lock, flags);
> -
> -	if (!cont.len)
> -		goto out;
> -
> -	if (suppress_message_printing(cont.level)) {
> -		cont.cons = cont.len;
> -		if (cont.flushed)
> -			cont.len = 0;
> -		goto out;
> -	}
> -
> -	/*
> -	 * We still queue earlier records, likely because the console was
> -	 * busy. The earlier ones need to be printed before this one, we
> -	 * did not flush any fragment so far, so just let it queue up.
> -	 */
> -	if (console_seq < log_next_seq && !cont.cons)
> -		goto out;
> -
> -	len = cont_print_text(text, size);
> -	raw_spin_unlock(&logbuf_lock);
> -	stop_critical_timings();
> -	call_console_drivers(cont.level, NULL, 0, text, len);
> -	start_critical_timings();
> -	local_irq_restore(flags);
> -	return;
> -out:
> -	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
> -}

This is a trade off between a "perfect" output and an early output.
I do not feel like deciding about it.

In each case, a buggy line (without \n") would stay "hidden" in
the per-CPU buffer until another line with "\n" is printed
on the given CPU.

>  /**
>   * console_unlock - unlock the console system
>   *
> @@ -2433,9 +2363,6 @@ again:
>  		return;
>  	}
>  
> -	/* flush buffered message fragment immediately to console */
> -	console_cont_flush(text, sizeof(text));
> -
>  	for (;;) {
>  		struct printk_log *msg;
>  		size_t ext_len = 0;

I think that cont lines should be a corner case. There should be only
a limited use of them. We should not make too complicated things to
support them. Also printk() must not get harder to use because of them.
I still see a messed output rather as a cosmetic problem in compare with
possible possible deadlocks or hung tasks.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][RFC] printk: make pr_cont buffer per-cpu
  2016-08-23 11:47   ` Petr Mladek
@ 2016-08-24  1:14     ` Sergey Senozhatsky
  2016-08-24  8:19       ` Petr Mladek
  0 siblings, 1 reply; 10+ messages in thread
From: Sergey Senozhatsky @ 2016-08-24  1:14 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Sergey Senozhatsky, Jan Kara, Kay Sievers, Tejun Heo,
	Calvin Owens, Andrew Morton, linux-kernel, Sergey Senozhatsky

Hello,

On (08/23/16 13:47), Petr Mladek wrote:
[..]
> >  	if (!(lflags & LOG_NEWLINE)) {
> > +		if (!this_cpu_read(cont_printing)) {
> > +			if (system_state == SYSTEM_RUNNING) {
> > +				this_cpu_write(cont_printing, true);
> > +				preempt_disable();
> > +			}
> > +		}
> 
> I am afraid that this is not acceptable. It means that printk() will have
> an unexpected side effect. The missing "\n" at the end of a printed
> string would disable preemption. See below for more.

missing '\n' must WARN about "sched while atomic" eventually, so it
shouldn't go unnoticed or stay hidden.

> I think that cont lines should be a corner case. There should be only
> a limited use of them. We should not make too complicated things to
> support them. Also printk() must not get harder to use because of them.
> I still see a messed output rather as a cosmetic problem in compare with
> possible possible deadlocks or hung tasks.

oh, I would love it if pr_cont() was never used in SMP. but this is not
the case, unfortunately. and, ironically, where pr_cont really matters
is debugging -- for instance, look at arch/x86/kernel/dumpstack_{32,64}.c
show_regs() or show_stack_log_lvl()

void show_regs(struct pt_regs *regs)
{
	...
	if (!user_mode(regs)) {
		pr_emerg("Stack:\n");
		show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);

		pr_emerg("Code:");

		ip = (u8 *)regs->ip - code_prologue;
		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
			/* try starting at IP */
			ip = (u8 *)regs->ip;
			code_len = code_len - code_prologue + 1;
		}
		for (i = 0; i < code_len; i++, ip++) {
			if (ip < (u8 *)PAGE_OFFSET ||
					probe_kernel_address(ip, c)) {
				pr_cont("  Bad EIP value.");
				break;
			}
			if (ip == (u8 *)regs->ip)
				pr_cont(" <%02x>", c);
			else
				pr_cont(" %02x", c);
		}
	}
	pr_cont("\n");
}

or arch/x86/mm/kmemcheck/error.c
... or arch/arm/kernel/traps.c

static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
	unsigned int fp, mode;
	int ok = 1;

	printk("Backtrace: ");

	if (!tsk)
		tsk = current;

	if (regs) {
		fp = frame_pointer(regs);
		mode = processor_mode(regs);
	} else if (tsk != current) {
		fp = thread_saved_fp(tsk);
		mode = 0x10;
	} else {
		asm("mov %0, fp" : "=r" (fp) : : "cc");
		mode = 0x10;
	}

	if (!fp) {
		pr_cont("no frame pointer");
		ok = 0;
	} else if (verify_stack(fp)) {
		pr_cont("invalid frame pointer 0x%08x", fp);
		ok = 0;
	} else if (fp < (unsigned long)end_of_stack(tsk))
		pr_cont("frame pointer underflow");
	pr_cont("\n");

	if (ok)
		c_backtrace(fp, mode);
}

or arch/arm/mm/fault.c show_pte()... and so on and so forth.

well, I do understand what you mean and agree with it, but I'm
afraid pr_cont() kinda matters after all and people *probably*
expect it to be SMP safe (I'm not entirely sure whether all of
those pr_cont() calls were put there with the idea that the
output can be messed up and quite hard to read).

	-ss

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][RFC] printk: make pr_cont buffer per-cpu
  2016-08-24  1:14     ` Sergey Senozhatsky
@ 2016-08-24  8:19       ` Petr Mladek
  2016-08-24 14:27         ` Sergey Senozhatsky
  0 siblings, 1 reply; 10+ messages in thread
From: Petr Mladek @ 2016-08-24  8:19 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Jan Kara, Kay Sievers, Tejun Heo, Calvin Owens, Andrew Morton,
	linux-kernel, Sergey Senozhatsky

On Wed 2016-08-24 10:14:20, Sergey Senozhatsky wrote:
> Hello,
> 
> On (08/23/16 13:47), Petr Mladek wrote:
> [..]
> > >  	if (!(lflags & LOG_NEWLINE)) {
> > > +		if (!this_cpu_read(cont_printing)) {
> > > +			if (system_state == SYSTEM_RUNNING) {
> > > +				this_cpu_write(cont_printing, true);
> > > +				preempt_disable();
> > > +			}
> > > +		}
> > 
> > I am afraid that this is not acceptable. It means that printk() will have
> > an unexpected side effect. The missing "\n" at the end of a printed
> > string would disable preemption. See below for more.
> 
> missing '\n' must WARN about "sched while atomic" eventually, so it
> shouldn't go unnoticed or stay hidden.

Well, it will still force people to rebuilt a test kernel because they
forget to use '\n" and the test kernel is unusable.

IMHO, the connection between '\n' and preemption is not
intuitive and hard to spot. We should do our best to avoid it.


> > I think that cont lines should be a corner case. There should be only
> > a limited use of them. We should not make too complicated things to
> > support them. Also printk() must not get harder to use because of them.
> > I still see a messed output rather as a cosmetic problem in compare with
> > possible possible deadlocks or hung tasks.
> 
> oh, I would love it if pr_cont() was never used in SMP. but this is not
> the case, unfortunately. and, ironically, where pr_cont really matters
> is debugging -- for instance, look at arch/x86/kernel/dumpstack_{32,64}.c
> show_regs() or show_stack_log_lvl()

Sure but how big is the problem in the daily life? I have never heard
colleagues complaining about messed cont lines. It is rare and it
is always possible to restore it. Checking BUG/WARN messages is
a rather hard detective work anyway.

The most painful situation would be if backtraces from different
CPUs are mixed. But there will be problem even with mixed lines.
Fortunately, this usually happens when printing backtraces from
all CPUs in NMI and the output is serialized.


> well, I do understand what you mean and agree with it, but I'm
> afraid pr_cont() kinda matters after all and people *probably*
> expect it to be SMP safe (I'm not entirely sure whether all of
> those pr_cont() calls were put there with the idea that the
> output can be messed up and quite hard to read).

This was even worse before the cont lines buffer.

Sigh, I do not feel experienced enough to decide about this.
I wonder if this is rather theoretical problem or if there
are many real complains about it.

I feel that we might be trapped by a perfectionism. Perfect output
would be great. But it must not make printk() hard to use
in the daily life.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][RFC] printk: make pr_cont buffer per-cpu
  2016-08-24  8:19       ` Petr Mladek
@ 2016-08-24 14:27         ` Sergey Senozhatsky
  2016-08-25 21:27           ` Petr Mladek
  0 siblings, 1 reply; 10+ messages in thread
From: Sergey Senozhatsky @ 2016-08-24 14:27 UTC (permalink / raw)
  To: Petr Mladek
  Cc: Sergey Senozhatsky, Jan Kara, Kay Sievers, Tejun Heo,
	Calvin Owens, Andrew Morton, linux-kernel, Sergey Senozhatsky

On (08/24/16 10:19), Petr Mladek wrote:
> > On (08/23/16 13:47), Petr Mladek wrote:
> > [..]
> > > >  	if (!(lflags & LOG_NEWLINE)) {
> > > > +		if (!this_cpu_read(cont_printing)) {
> > > > +			if (system_state == SYSTEM_RUNNING) {
> > > > +				this_cpu_write(cont_printing, true);
> > > > +				preempt_disable();
> > > > +			}
> > > > +		}
> > > 
> > > I am afraid that this is not acceptable. It means that printk() will have
> > > an unexpected side effect. The missing "\n" at the end of a printed
> > > string would disable preemption. See below for more.
> > 
> > missing '\n' must WARN about "sched while atomic" eventually, so it
> > shouldn't go unnoticed or stay hidden.
> 
> Well, it will still force people to rebuilt a test kernel because they
> forget to use '\n" and the test kernel is unusable.

you are right. misusage of printk() will now force user to go and fix
it. the kernel most likely will be rebuilt anyway - there is a missing
\n after all. and rebuild here is pretty much incremental because basically
only one line is getting updated (missing `\n' in printk() message). so
it's like 15 seconds, perhaps.

> IMHO, the connection between '\n' and preemption is not
> intuitive and hard to spot. We should do our best to avoid it.

yes. what can be done is a simple hint -- we are in preempt disabled
path when we report `sched while atomic', so it's safe to check if
this_cpu(cont).len != 0 and if it is then additionally report:
	"incomplete cont line".

in normal life no one _probably_ would see the change. well, I saw broken
backtraces on my laptop before, but didn't consider it to be important.
for the last two days the problem is not theoretical anymore on my side,
as now I'm looking at actual pr_cont()-related bug report [internal].
I'm truing to move the whole thing into 'don't use the cont lines' direction;
there is a mix of numerous pr_cont() calls & missing new-lines [internal code].
the kernel, unfortunately, IMHO, was too nice in the latter case, flushing
incomplete cont buffer with LOG_NEWLINE. so many of those `accidental' cont
lines went unnoticed.

this "don't use the cont lines" is actually a bit tricky and doesn't
look so solid. there is a need for cont lines, and the kernel has cont
printk... but it's sort of broken, so people have to kinda work-around
it. not the best example, but still (arch/arm64/kernel/traps.c)

static void __dump_instr(const char *lvl, struct pt_regs *regs)
{
	unsigned long addr = instruction_pointer(regs);
	char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
	int i;

	for (i = -4; i < 1; i++) {
		unsigned int val, bad;

		bad = __get_user(val, &((u32 *)addr)[i]);

		if (!bad)
			p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
		else {
			p += sprintf(p, "bad PC value");
			break;
		}
	}
	printk("%sCode: %s\n", lvl, str);
}

which is fine, but getting a bit boring once you have more than,
let's say, 20 function that want to do cont output.

[..]
> > well, I do understand what you mean and agree with it, but I'm
> > afraid pr_cont() kinda matters after all and people *probably*
> > expect it to be SMP safe (I'm not entirely sure whether all of
> > those pr_cont() calls were put there with the idea that the
> > output can be messed up and quite hard to read).
> 
> This was even worse before the cont lines buffer.
> 
> Sigh, I do not feel experienced enough to decide about this.
> I wonder if this is rather theoretical problem or if there
> are many real complains about it.
>
> I feel that we might be trapped by a perfectionism.

well. may be, not entirely, but may be :)

	-ss

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][RFC] printk: make pr_cont buffer per-cpu
  2016-08-24 14:27         ` Sergey Senozhatsky
@ 2016-08-25 21:27           ` Petr Mladek
  2016-08-25 21:33             ` Petr Mladek
  0 siblings, 1 reply; 10+ messages in thread
From: Petr Mladek @ 2016-08-25 21:27 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Sergey Senozhatsky, Jan Kara, Kay Sievers, Tejun Heo,
	Calvin Owens, Andrew Morton, linux-kernel

On Wed 2016-08-24 23:27:29, Sergey Senozhatsky wrote:
> On (08/24/16 10:19), Petr Mladek wrote:
> > > On (08/23/16 13:47), Petr Mladek wrote:
> > > [..]
> > > > >  	if (!(lflags & LOG_NEWLINE)) {
> > > > > +		if (!this_cpu_read(cont_printing)) {
> > > > > +			if (system_state == SYSTEM_RUNNING) {
> > > > > +				this_cpu_write(cont_printing, true);
> > > > > +				preempt_disable();
> > > > > +			}
> > > > > +		}
> > > > 
> > > > I am afraid that this is not acceptable. It means that printk() will have
> > > > an unexpected side effect. The missing "\n" at the end of a printed
> > > > string would disable preemption. See below for more.
> > > 
> > > missing '\n' must WARN about "sched while atomic" eventually, so it
> > > shouldn't go unnoticed or stay hidden.
> > 
> > Well, it will still force people to rebuilt a test kernel because they
> > forget to use '\n" and the test kernel is unusable.
> 
> you are right. misusage of printk() will now force user to go and fix
> it. the kernel most likely will be rebuilt anyway - there is a missing
> \n after all.

It is not a big problem for the final build. But it is a problem when adding
temporary debugging messages. They will get removed anyway. But people
will hate that some debugging builds are unusable just because of
a missing "\n".

> and rebuild here is pretty much incremental because basically
> only one line is getting updated (missing `\n' in printk() message). so
> it's like 15 seconds, perhaps.

This is not completely true. Sometimes the developer is not able to
test it hemself and need to send a patched kernel with some debug
messages to the customer. The turnaround might be hours or even days.

Also we have a build service for building packages here at SUSE.
It uses rather powerful machines. If we need to do testing on
some less powerfull machine, it is sometimes faster to build
the package by the build service. Sometimes it is the only
possibility if the test machine does not have enought disk
or memory. The point is that the build service always does
clean build and the turnaround takes minutes.

Not to say that the missing newline might be in an error path
that is hard to test and might appear even in the released build.


> > IMHO, the connection between '\n' and preemption is not
> > intuitive and hard to spot. We should do our best to avoid it.
> 
> yes. what can be done is a simple hint -- we are in preempt disabled
> path when we report `sched while atomic', so it's safe to check if
> this_cpu(cont).len != 0 and if it is then additionally report:
> 	"incomplete cont line".
> 
> in normal life no one _probably_ would see the change. well, I saw broken
> backtraces on my laptop before, but didn't consider it to be important.
> for the last two days the problem is not theoretical anymore on my side,
> as now I'm looking at actual pr_cont()-related bug report [internal].
> I'm truing to move the whole thing into 'don't use the cont lines' direction;
> there is a mix of numerous pr_cont() calls & missing new-lines [internal code].
> the kernel, unfortunately, IMHO, was too nice in the latter case, flushing
> incomplete cont buffer with LOG_NEWLINE. so many of those `accidental' cont
> lines went unnoticed.
> 
> this "don't use the cont lines" is actually a bit tricky and doesn't
> look so solid. there is a need for cont lines, and the kernel has cont
> printk... but it's sort of broken, so people have to kinda work-around
> it. not the best example, but still (arch/arm64/kernel/traps.c)
> 
> static void __dump_instr(const char *lvl, struct pt_regs *regs)
> {
> 	unsigned long addr = instruction_pointer(regs);
> 	char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
> 	int i;
> 
> 	for (i = -4; i < 1; i++) {
> 		unsigned int val, bad;
> 
> 		bad = __get_user(val, &((u32 *)addr)[i]);
> 
> 		if (!bad)
> 			p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
> 		else {
> 			p += sprintf(p, "bad PC value");
> 			break;
> 		}
> 	}
> 	printk("%sCode: %s\n", lvl, str);
> }
> 
> which is fine, but getting a bit boring once you have more than,
> let's say, 20 function that want to do cont output.

Well, there are only two extra lines. The buffer definition and
the final printk().

Of course, it would be great to fix it transparently. But if there must
be a burden, I would prefer to keep it on the "corner" case users
rather than to push it on everyday users.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH][RFC] printk: make pr_cont buffer per-cpu
  2016-08-25 21:27           ` Petr Mladek
@ 2016-08-25 21:33             ` Petr Mladek
  0 siblings, 0 replies; 10+ messages in thread
From: Petr Mladek @ 2016-08-25 21:33 UTC (permalink / raw)
  To: Sergey Senozhatsky
  Cc: Sergey Senozhatsky, Jan Kara, Kay Sievers, Tejun Heo,
	Calvin Owens, Andrew Morton, linux-kernel

On Thu 2016-08-25 23:27:40, Petr Mladek wrote:
> On Wed 2016-08-24 23:27:29, Sergey Senozhatsky wrote:
> > On (08/24/16 10:19), Petr Mladek wrote:
> > > > On (08/23/16 13:47), Petr Mladek wrote:
> > > > [..]
> > > > > >  	if (!(lflags & LOG_NEWLINE)) {
> > > > > > +		if (!this_cpu_read(cont_printing)) {
> > > > > > +			if (system_state == SYSTEM_RUNNING) {
> > > > > > +				this_cpu_write(cont_printing, true);
> > > > > > +				preempt_disable();
> > > > > > +			}
> > > > > > +		}
> > > > > 
> > > > > I am afraid that this is not acceptable. It means that printk() will have
> > > > > an unexpected side effect. The missing "\n" at the end of a printed
> > > > > string would disable preemption. See below for more.
> > > > 
> > > > missing '\n' must WARN about "sched while atomic" eventually, so it
> > > > shouldn't go unnoticed or stay hidden.
> > > 
> > > Well, it will still force people to rebuilt a test kernel because they
> > > forget to use '\n" and the test kernel is unusable.
> > 
> > you are right. misusage of printk() will now force user to go and fix
> > it. the kernel most likely will be rebuilt anyway - there is a missing
> > \n after all.
 
> Of course, it would be great to fix it transparently. But if there must
> be a burden, I would prefer to keep it on the "corner" case users
> rather than to push it on everyday users.

Not to say that a messed log is much less painful than a locked system.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-08-25 22:14 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-22 15:40 [PATCH][RFC] printk: make pr_cont buffer per-cpu Sergey Senozhatsky
2016-08-22 16:10 ` Joe Perches
2016-08-23  1:10   ` Sergey Senozhatsky
2016-08-23  5:18 ` Sergey Senozhatsky
2016-08-23 11:47   ` Petr Mladek
2016-08-24  1:14     ` Sergey Senozhatsky
2016-08-24  8:19       ` Petr Mladek
2016-08-24 14:27         ` Sergey Senozhatsky
2016-08-25 21:27           ` Petr Mladek
2016-08-25 21:33             ` Petr Mladek

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.