All of lore.kernel.org
 help / color / mirror / Atom feed
From: Joe Perches <joe@perches.com>
To: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>,
	Petr Mladek <pmladek@suse.com>, Kay Sievers <kay@vrfy.org>
Cc: Jan Kara <jack@suse.cz>, Tejun Heo <tj@kernel.org>,
	Calvin Owens <calvinowens@fb.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org,
	Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Subject: Re: [PATCH][RFC] printk: make pr_cont buffer per-cpu
Date: Mon, 22 Aug 2016 09:10:08 -0700	[thread overview]
Message-ID: <1471882208.3746.50.camel@perches.com> (raw)
In-Reply-To: <20160822154030.2715-1-sergey.senozhatsky@gmail.com>

(adding Kay Sievers who wrote most of this)

On Tue, 2016-08-23 at 00:40 +0900, Sergey Senozhatsky wrote:
> Hello,
> 
> 	RFC and POC
> 
> 	Petr, I took a very quick look at your series [1]. I think it
> won't work on some of the setups I'm toying with, where multiple CPUs
> can do a simultaneous pr_cont() output. What do you about the following
> approach?
> 
> (it's not really tested, I just finished writing the code. Will test it
> more tomorrow. But I kinda like that at this point it removes almost
> twice the code it adds, which is probably because I missed some cases).
> 
> ===8<====
> 
> 	This patch changes pr_cont buffer to be a per-cpu variable, so
> CPUs don't compete anymore for a single cont buffer. Thus we minimize
> the possibility of preliminary/forced flushes of incomplete pr_cont
> buffers.
> 	The basic idea is that the first time CPU issues a pr_cont or
> !LOG_NEWLINE output it starts a non-preemptible region which ends when
> CPUs writes a NEW_LINE symbol.
> Schematically:
> 
> 	pr_cont()			/* preempt_disable() */
> 		<< write to this_cpu_ptr cont buffer
> 	pr_cont()
> 		<< write to this_cpu_ptr cont buffer
> 	...
> 	pr_cont("\n")			/* log_store(). preempt_enable() */
> 
> 	The only possible case (unless I'm missing something) for a
> race is when IRQ is issuing a pr_cont() output, interrupting the CPU
> which is already in the middle of pr_cont() printing.
> 
> 
> [1]: http://marc.info/?l=linux-kernel&m=146860197621876
> 
> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> ---
>  kernel/printk/printk.c | 193 ++++++++++++++++---------------------------------
>  1 file changed, 64 insertions(+), 129 deletions(-)
> 
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 0d3e026..2bf8b85 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -337,10 +337,9 @@ static int console_may_schedule;
>   */
>  
>  enum log_flags {
> -	LOG_NOCONS	= 1,	/* already flushed, do not print to console */
> -	LOG_NEWLINE	= 2,	/* text ended with a newline */
> -	LOG_PREFIX	= 4,	/* text started with a prefix */
> -	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
> +	LOG_NEWLINE	= 1,	/* text ended with a newline */
> +	LOG_PREFIX	= 2,	/* text started with a prefix */
> +	LOG_CONT	= 4,	/* text is a fragment of a continuation line */
>  };
>  
>  struct printk_log {
> @@ -1661,108 +1660,62 @@ static inline void printk_delay(void)
>   * though, are printed immediately to the consoles to ensure everything has
>   * reached the console in case of a kernel crash.
>   */
> -static struct cont {
> +struct cont {
>  	char buf[LOG_LINE_MAX];
>  	size_t len;			/* length == 0 means unused buffer */
> -	size_t cons;			/* bytes written to console */
> -	struct task_struct *owner;	/* task of first print*/
>  	u64 ts_nsec;			/* time of first print */
>  	u8 level;			/* log level of first message */
>  	u8 facility;			/* log facility of first message */
>  	enum log_flags flags;		/* prefix, newline flags */
> -	bool flushed:1;			/* buffer sealed and committed */
> -} cont;
> +};
> +
> +static DEFINE_PER_CPU(struct cont, pcpu_cont);
> +static DEFINE_PER_CPU(bool, cont_printing);
>  
> -static void cont_flush(enum log_flags flags)
> +static void cont_flush(struct cont *cont, enum log_flags flags)
>  {
> -	if (cont.flushed)
> -		return;
> -	if (cont.len == 0)
> +	if (cont->len == 0)
>  		return;
>  
> -	if (cont.cons) {
> -		/*
> -		 * If a fragment of this line was directly flushed to the
> -		 * console; wait for the console to pick up the rest of the
> -		 * line. LOG_NOCONS suppresses a duplicated output.
> -		 */
> -		log_store(cont.facility, cont.level, flags | LOG_NOCONS,
> -			  cont.ts_nsec, NULL, 0, cont.buf, cont.len);
> -		cont.flags = flags;
> -		cont.flushed = true;
> -	} else {
> -		/*
> -		 * If no fragment of this line ever reached the console,
> -		 * just submit it to the store and free the buffer.
> -		 */
> -		log_store(cont.facility, cont.level, flags, 0,
> -			  NULL, 0, cont.buf, cont.len);
> -		cont.len = 0;
> -	}
> +	/*
> +	 * If no fragment of this line ever reached the console,
> +	 * just submit it to the store and free the buffer.
> +	 */
> +	log_store(cont->facility, cont->level, flags, 0,
> +		  NULL, 0, cont->buf, cont->len);
> +	cont->len = 0;
>  }
>  
>  static bool cont_add(int facility, int level, const char *text, size_t len)
>  {
> -	if (cont.len && cont.flushed)
> -		return false;
> +	struct cont *cont = this_cpu_ptr(&pcpu_cont);
>  
>  	/*
>  	 * If ext consoles are present, flush and skip in-kernel
>  	 * continuation.  See nr_ext_console_drivers definition.  Also, if
>  	 * the line gets too long, split it up in separate records.
>  	 */
> -	if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
> -		cont_flush(LOG_CONT);
> +	if (nr_ext_console_drivers || cont->len + len > sizeof(cont->buf)) {
> +		cont_flush(cont, LOG_CONT);
>  		return false;
>  	}
>  
> -	if (!cont.len) {
> -		cont.facility = facility;
> -		cont.level = level;
> -		cont.owner = current;
> -		cont.ts_nsec = local_clock();
> -		cont.flags = 0;
> -		cont.cons = 0;
> -		cont.flushed = false;
> +	if (!cont->len) {
> +		cont->facility = facility;
> +		cont->level = level;
> +		cont->ts_nsec = local_clock();
> +		cont->flags = 0;
>  	}
>  
> -	memcpy(cont.buf + cont.len, text, len);
> -	cont.len += len;
> +	memcpy(cont->buf + cont->len, text, len);
> +	cont->len += len;
>  
> -	if (cont.len > (sizeof(cont.buf) * 80) / 100)
> -		cont_flush(LOG_CONT);
> +	if (cont->len > (sizeof(cont->buf) * 80) / 100)
> +		cont_flush(cont, LOG_CONT);
>  
>  	return true;
>  }
>  
> -static size_t cont_print_text(char *text, size_t size)
> -{
> -	size_t textlen = 0;
> -	size_t len;
> -
> -	if (cont.cons == 0 && (console_prev & LOG_NEWLINE)) {
> -		textlen += print_time(cont.ts_nsec, text);
> -		size -= textlen;
> -	}
> -
> -	len = cont.len - cont.cons;
> -	if (len > 0) {
> -		if (len+1 > size)
> -			len = size-1;
> -		memcpy(text + textlen, cont.buf + cont.cons, len);
> -		textlen += len;
> -		cont.cons = cont.len;
> -	}
> -
> -	if (cont.flushed) {
> -		if (cont.flags & LOG_NEWLINE)
> -			text[textlen++] = '\n';
> -		/* got everything, release buffer */
> -		cont.len = 0;
> -	}
> -	return textlen;
> -}
> -
>  asmlinkage int vprintk_emit(int facility, int level,
>  			    const char *dict, size_t dictlen,
>  			    const char *fmt, va_list args)
> @@ -1779,6 +1732,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	int printed_len = 0;
>  	int nmi_message_lost;
>  	bool in_sched = false;
> +	struct cont *cont;
>  
>  	if (level == LOGLEVEL_SCHED) {
>  		level = LOGLEVEL_DEFAULT;
> @@ -1789,6 +1743,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	printk_delay();
>  
>  	local_irq_save(flags);
> +	cont = this_cpu_ptr(&pcpu_cont);
>  	this_cpu = smp_processor_id();
>  
>  	/*
> @@ -1878,12 +1833,22 @@ asmlinkage int vprintk_emit(int facility, int level,
>  		lflags |= LOG_PREFIX|LOG_NEWLINE;
>  
>  	if (!(lflags & LOG_NEWLINE)) {
> +		if (!this_cpu_read(cont_printing)) {
> +			bool unsafe_pr_cont = preemptible() &&
> +				!rcu_preempt_depth();
> +
> +			if (unsafe_pr_cont) {
> +				this_cpu_write(cont_printing, true);
> +				preempt_disable();
> +			}
> +		}
> +
>  		/*
>  		 * Flush the conflicting buffer. An earlier newline was missing,
>  		 * or another task also prints continuation lines.
>  		 */
> -		if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
> -			cont_flush(LOG_NEWLINE);
> +		if (cont->len && (lflags & LOG_PREFIX))
> +			cont_flush(cont, LOG_NEWLINE);
>  
>  		/* buffer line if possible, otherwise store it right away */
>  		if (cont_add(facility, level, text, text_len))
> @@ -1895,6 +1860,11 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	} else {
>  		bool stored = false;
>  
> +		if (this_cpu_read(cont_printing)) {
> +			this_cpu_write(cont_printing, false);
> +			preempt_enable();
> +		}
> +
>  		/*
>  		 * If an earlier newline was missing and it was the same task,
>  		 * either merge it with the current buffer and flush, or if
> @@ -1903,11 +1873,11 @@ asmlinkage int vprintk_emit(int facility, int level,
>  		 * If the preceding printk was from a different task and missed
>  		 * a newline, flush and append the newline.
>  		 */
> -		if (cont.len) {
> -			if (cont.owner == current && !(lflags & LOG_PREFIX))
> +		if (cont->len) {
> +			if (!(lflags & LOG_PREFIX))
>  				stored = cont_add(facility, level, text,
>  						  text_len);
> -			cont_flush(LOG_NEWLINE);
> +			cont_flush(cont, LOG_NEWLINE);
>  		}
>  
>  		if (stored)
> @@ -2051,7 +2021,6 @@ static struct cont {
>  	size_t len;
>  	size_t cons;
>  	u8 level;
> -	bool flushed:1;
>  } cont;
>  static char *log_text(const struct printk_log *msg) { return NULL; }
>  static char *log_dict(const struct printk_log *msg) { return NULL; }
> @@ -2344,42 +2313,6 @@ static inline int can_use_console(void)
>  	return cpu_online(raw_smp_processor_id()) || have_callable_console();
>  }
>  
> -static void console_cont_flush(char *text, size_t size)
> -{
> -	unsigned long flags;
> -	size_t len;
> -
> -	raw_spin_lock_irqsave(&logbuf_lock, flags);
> -
> -	if (!cont.len)
> -		goto out;
> -
> -	if (suppress_message_printing(cont.level)) {
> -		cont.cons = cont.len;
> -		if (cont.flushed)
> -			cont.len = 0;
> -		goto out;
> -	}
> -
> -	/*
> -	 * We still queue earlier records, likely because the console was
> -	 * busy. The earlier ones need to be printed before this one, we
> -	 * did not flush any fragment so far, so just let it queue up.
> -	 */
> -	if (console_seq < log_next_seq && !cont.cons)
> -		goto out;
> -
> -	len = cont_print_text(text, size);
> -	raw_spin_unlock(&logbuf_lock);
> -	stop_critical_timings();
> -	call_console_drivers(cont.level, NULL, 0, text, len);
> -	start_critical_timings();
> -	local_irq_restore(flags);
> -	return;
> -out:
> -	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
> -}
> -
>  /**
>   * console_unlock - unlock the console system
>   *
> @@ -2433,9 +2366,6 @@ again:
>  		return;
>  	}
>  
> -	/* flush buffered message fragment immediately to console */
> -	console_cont_flush(text, sizeof(text));
> -
>  	for (;;) {
>  		struct printk_log *msg;
>  		size_t ext_len = 0;
> @@ -2465,8 +2395,7 @@ skip:
>  
>  		msg = log_from_idx(console_idx);
>  		level = msg->level;
> -		if ((msg->flags & LOG_NOCONS) ||
> -				suppress_message_printing(level)) {
> +		if (suppress_message_printing(level)) {
>  			/*
>  			 * Skip record we have buffered and already printed
>  			 * directly to the console when we received it, and
> @@ -2474,12 +2403,6 @@ skip:
>  			 */
>  			console_idx = log_next(console_idx);
>  			console_seq++;
> -			/*
> -			 * We will get here again when we register a new
> -			 * CON_PRINTBUFFER console. Clear the flag so we
> -			 * will properly dump everything later.
> -			 */
> -			msg->flags &= ~LOG_NOCONS;
>  			console_prev = msg->flags;
>  			goto skip;
>  		}
> @@ -2581,6 +2504,18 @@ void console_unblank(void)
>   */
>  void console_flush_on_panic(void)
>  {
> +	unsigned long flags;
> +	unsigned int cpu;
> +
> +	/*
> +	 * Flush the cont messages; no matter complete
> +	 * or incomplete.
> +	 */
> +	local_irq_save(flags);
> +	for_each_online_cpu(cpu)
> +		cont_flush(per_cpu_ptr(&pcpu_cont, cpu), LOG_NEWLINE);
> +	local_irq_restore(flags);
> +
>  	/*
>  	 * If someone else is holding the console lock, trylock will fail
>  	 * and may_schedule may be set.  Ignore and proceed to unlock so

  reply	other threads:[~2016-08-22 16:11 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-22 15:40 [PATCH][RFC] printk: make pr_cont buffer per-cpu Sergey Senozhatsky
2016-08-22 16:10 ` Joe Perches [this message]
2016-08-23  1:10   ` Sergey Senozhatsky
2016-08-23  5:18 ` Sergey Senozhatsky
2016-08-23 11:47   ` Petr Mladek
2016-08-24  1:14     ` Sergey Senozhatsky
2016-08-24  8:19       ` Petr Mladek
2016-08-24 14:27         ` Sergey Senozhatsky
2016-08-25 21:27           ` Petr Mladek
2016-08-25 21:33             ` Petr Mladek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1471882208.3746.50.camel@perches.com \
    --to=joe@perches.com \
    --cc=akpm@linux-foundation.org \
    --cc=calvinowens@fb.com \
    --cc=jack@suse.cz \
    --cc=kay@vrfy.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pmladek@suse.com \
    --cc=sergey.senozhatsky.work@gmail.com \
    --cc=sergey.senozhatsky@gmail.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.