All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] printk: Avoid softlockups in console_unlock()
@ 2013-02-04 22:17 Jan Kara
  2013-02-05 20:38 ` Andrew Morton
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Kara @ 2013-02-04 22:17 UTC (permalink / raw)
  To: Andrew Morton; +Cc: LKML, jslaby, Greg Kroah-Hartman, Jan Kara

A CPU can be caught in console_unlock() for a long time (tens of seconds are
reported by our customers) when other CPUs are using printk heavily and serial
console makes printing slow. Despite serial console drivers are calling
touch_nmi_watchdog() this triggers softlockup warnings because
interrupts are disabled for the whole time console_unlock() runs (e.g.
vprintk() calls console_unlock() with interrupts disabled). Thus IPIs
cannot be processed and other CPUs get stuck spinning in calls like
smp_call_function_many(). Also RCU eventually starts reporting lockups.

In my artifical testing I also managed to trigger a situation when disk
disappeared from the system apparently because commands to / from it
could not be delivered for long enough. This is why just silencing
watchdogs isn't a reliable solution to the problem and we simply have to
avoid spending too long in console_unlock().

We fix the issue by limiting the time we spend in console_unlock() to
watchdog_thresh() / 4 (unless we are in an early boot stage or oops is
happening). The rest of the buffer will be printed either by further
callers to printk() or by a queued work.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/nmi.h |   18 +++++++++++++++
 kernel/printk.c     |   59 ++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 69 insertions(+), 8 deletions(-)

So this version has cleverer logic of when to offload the printing work. It
made the lockup warnings go away... Does it look better Andrew?

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index db50840..c7a01e0 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -51,6 +51,24 @@ extern int watchdog_thresh;
 struct ctl_table;
 extern int proc_dowatchdog(struct ctl_table *, int ,
 			   void __user *, size_t *, loff_t *);
+/*
+ * Return the maximum number of nanosecond for which interrupts may be disabled
+ * on the current CPU
+ */
+static inline u64 max_interrupt_disabled_duration(void)
+{
+	/*
+	 * We give us some headroom because timers need time to fire before the
+	 * watchdog period expires.
+	 */
+	return watchdog_thresh * NSEC_PER_SEC / 2;
+}
+#else
+static inline u64 max_interrupt_disabled_duration(void)
+{
+	/* About the value we'd get with the default watchdog setting */
+	return 5 * NSEC_PER_SEC;
+}
 #endif
 
 #endif
diff --git a/kernel/printk.c b/kernel/printk.c
index 357f714..80172f7 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -246,6 +246,10 @@ static enum log_flags console_prev;
 static u64 clear_seq;
 static u32 clear_idx;
 
+/* Worker to print accumulated data to console when there's too much of it */
+static void printk_worker(struct work_struct *work);
+static DECLARE_WORK(printk_work, printk_worker);
+
 #define PREFIX_MAX		32
 #define LOG_LINE_MAX		1024 - PREFIX_MAX
 
@@ -2028,28 +2032,39 @@ out:
  * Releases the console_lock which the caller holds on the console system
  * and the console driver list.
  *
- * While the console_lock was held, console output may have been buffered
- * by printk().  If this is the case, console_unlock(); emits
- * the output prior to releasing the lock.
+ * While the console_lock was held, console output may have been buffered by
+ * printk(). If this is the case, console_unlock() emits the output prior to
+ * releasing the lock. However we need not write all the data in the buffer if
+ * we would hog the CPU for too long. Function returns true, if there's more
+ * data that needs printing in the buffer.
  *
  * If there is output waiting, we wake /dev/kmsg and syslog() users.
  *
- * console_unlock(); may be called from any context.
+ * __console_unlock() may be called from any context.
  */
-void console_unlock(void)
+static bool __console_unlock(void)
 {
 	static char text[LOG_LINE_MAX + PREFIX_MAX];
 	static u64 seen_seq;
 	unsigned long flags;
 	bool wake_klogd = false;
 	bool retry;
+	u64 end_time, now;
+	int cur_cpu;
 
 	if (console_suspended) {
 		up(&console_sem);
-		return;
+		return false;
 	}
 
 	console_may_schedule = 0;
+	cur_cpu = smp_processor_id();
+	/*
+	 * We give us some headroom because we check the time only after
+	 * printing the whole message
+	 */
+	end_time = sched_clock_cpu(cur_cpu) +
+				max_interrupt_disabled_duration() / 2;
 
 	/* flush buffered message fragment immediately to console */
 	console_cont_flush(text, sizeof(text));
@@ -2072,7 +2087,9 @@ again:
 			console_prev = 0;
 		}
 skip:
-		if (console_seq == log_next_seq)
+		now = sched_clock_cpu(cur_cpu);
+		if (console_seq == log_next_seq ||
+		    (now > end_time && !oops_in_progress && keventd_up()))
 			break;
 
 		msg = log_from_idx(console_idx);
@@ -2127,14 +2144,40 @@ skip:
 	retry = console_seq != log_next_seq;
 	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 
-	if (retry && console_trylock())
+	if (retry &&
+	    (now <= end_time || oops_in_progress || !keventd_up()) &&
+	    console_trylock())
 		goto again;
 
 	if (wake_klogd)
 		wake_up_klogd();
+	return retry;
+}
+
+void console_unlock(void)
+{
+	if (__console_unlock()) {
+		/* Let worker do the rest of printing */
+		schedule_work(&printk_work);
+	}
 }
 EXPORT_SYMBOL(console_unlock);
 
+/*
+ * This is a worker function to print data from printk buffer when
+ * console_unlock() didn't write it all. The advantage of this function is that
+ * it does the printing in a well known context where we can reschedule to
+ * avoid locking up one CPU with printing.
+ */
+static void printk_worker(struct work_struct *work)
+{
+	console_lock();
+	while (__console_unlock()) {
+		cond_resched();
+		console_lock();
+	}
+}
+
 /**
  * console_conditional_schedule - yield the CPU if required
  *
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] printk: Avoid softlockups in console_unlock()
  2013-02-04 22:17 [PATCH v2] printk: Avoid softlockups in console_unlock() Jan Kara
@ 2013-02-05 20:38 ` Andrew Morton
  2013-02-05 22:56   ` Frederic Weisbecker
  2013-02-06 14:23   ` Jan Kara
  0 siblings, 2 replies; 11+ messages in thread
From: Andrew Morton @ 2013-02-05 20:38 UTC (permalink / raw)
  To: Jan Kara
  Cc: LKML, jslaby, Greg Kroah-Hartman, Frederic Weisbecker, Steven Rostedt

On Mon,  4 Feb 2013 23:17:10 +0100
Jan Kara <jack@suse.cz> wrote:

> A CPU can be caught in console_unlock() for a long time (tens of seconds are
> reported by our customers) when other CPUs are using printk heavily and serial
> console makes printing slow. Despite serial console drivers are calling
> touch_nmi_watchdog() this triggers softlockup warnings because
> interrupts are disabled for the whole time console_unlock() runs (e.g.
> vprintk() calls console_unlock() with interrupts disabled). Thus IPIs
> cannot be processed and other CPUs get stuck spinning in calls like
> smp_call_function_many(). Also RCU eventually starts reporting lockups.
> 
> In my artifical testing I also managed to trigger a situation when disk
> disappeared from the system apparently because commands to / from it
> could not be delivered for long enough. This is why just silencing
> watchdogs isn't a reliable solution to the problem and we simply have to
> avoid spending too long in console_unlock().
> 
> We fix the issue by limiting the time we spend in console_unlock() to
> watchdog_thresh() / 4 (unless we are in an early boot stage or oops is
> happening). The rest of the buffer will be printed either by further
> callers to printk() or by a queued work.

I still hate the patch :(

> ...
>
> +void console_unlock(void)
> +{
> +	if (__console_unlock()) {
> +		/* Let worker do the rest of printing */
> +		schedule_work(&printk_work);
> +	}
>  }

This creates another place from where we cannot call printk(): anywhere
where worker_pool.lock is held.

And as schedule_work() can do a wakeup it creates a third reason why
the sched code cannot call printk (along with rq->lock taken by
wake_up(klogd) and rq->lock taken by up(&console_sem).  Hence
printk_sched().  See the lkml thread "[GIT PULL] printk: Support for
full dynticks mode".

We already have machinery for doing async tickling in printk: the
printk_pending stuff.  Did you consider adding another
PRINTK_PENDING_foo in some fashion?



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] printk: Avoid softlockups in console_unlock()
  2013-02-05 20:38 ` Andrew Morton
@ 2013-02-05 22:56   ` Frederic Weisbecker
  2013-02-06  0:42     ` Steven Rostedt
  2013-02-06 14:23   ` Jan Kara
  1 sibling, 1 reply; 11+ messages in thread
From: Frederic Weisbecker @ 2013-02-05 22:56 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Jan Kara, LKML, jslaby, Greg Kroah-Hartman, Steven Rostedt

2013/2/5 Andrew Morton <akpm@linux-foundation.org>:
> On Mon,  4 Feb 2013 23:17:10 +0100
> Jan Kara <jack@suse.cz> wrote:
>
>> A CPU can be caught in console_unlock() for a long time (tens of seconds are
>> reported by our customers) when other CPUs are using printk heavily and serial
>> console makes printing slow. Despite serial console drivers are calling
>> touch_nmi_watchdog() this triggers softlockup warnings because
>> interrupts are disabled for the whole time console_unlock() runs (e.g.
>> vprintk() calls console_unlock() with interrupts disabled). Thus IPIs
>> cannot be processed and other CPUs get stuck spinning in calls like
>> smp_call_function_many(). Also RCU eventually starts reporting lockups.
>>
>> In my artifical testing I also managed to trigger a situation when disk
>> disappeared from the system apparently because commands to / from it
>> could not be delivered for long enough. This is why just silencing
>> watchdogs isn't a reliable solution to the problem and we simply have to
>> avoid spending too long in console_unlock().
>>
>> We fix the issue by limiting the time we spend in console_unlock() to
>> watchdog_thresh() / 4 (unless we are in an early boot stage or oops is
>> happening). The rest of the buffer will be printed either by further
>> callers to printk() or by a queued work.
>
> I still hate the patch :(
>
>> ...
>>
>> +void console_unlock(void)
>> +{
>> +     if (__console_unlock()) {
>> +             /* Let worker do the rest of printing */
>> +             schedule_work(&printk_work);
>> +     }
>>  }
>
> This creates another place from where we cannot call printk(): anywhere
> where worker_pool.lock is held.
>
> And as schedule_work() can do a wakeup it creates a third reason why
> the sched code cannot call printk (along with rq->lock taken by
> wake_up(klogd) and rq->lock taken by up(&console_sem).  Hence
> printk_sched().  See the lkml thread "[GIT PULL] printk: Support for
> full dynticks mode".

Agreed, I really wish we avoid that workqueue solution.

>
> We already have machinery for doing async tickling in printk: the
> printk_pending stuff.  Did you consider adding another
> PRINTK_PENDING_foo in some fashion?

Yeah, that would delay until the next timer tick (small exception
after my patchset, if tick is stopped this will trigger as soon as
irqs are re-enabled through a self IPI) but we can probably improve
that behaviour. And that won't mess up with locking scenarios. The
printk tick (or irq work after my patchset) can also re-trigger itself
until the next tick if the batch to send to the console driver is too
big.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] printk: Avoid softlockups in console_unlock()
  2013-02-05 22:56   ` Frederic Weisbecker
@ 2013-02-06  0:42     ` Steven Rostedt
  0 siblings, 0 replies; 11+ messages in thread
From: Steven Rostedt @ 2013-02-06  0:42 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Andrew Morton, Jan Kara, LKML, jslaby, Greg Kroah-Hartman,
	Ingo Molnar, Peter Zijlstra

On Tue, 2013-02-05 at 23:56 +0100, Frederic Weisbecker wrote:
> 2013/2/5 Andrew Morton <akpm@linux-foundation.org>:
>  callers to printk() or by a queued work.
> >
> > I still hate the patch :(

I hate it too.

> >
> >> ...
> >>
> >> +void console_unlock(void)
> >> +{
> >> +     if (__console_unlock()) {
> >> +             /* Let worker do the rest of printing */
> >> +             schedule_work(&printk_work);
> >> +     }
> >>  }
> >
> > This creates another place from where we cannot call printk(): anywhere
> > where worker_pool.lock is held.
> >
> > And as schedule_work() can do a wakeup it creates a third reason why
> > the sched code cannot call printk (along with rq->lock taken by
> > wake_up(klogd) and rq->lock taken by up(&console_sem).  Hence
> > printk_sched().  See the lkml thread "[GIT PULL] printk: Support for
> > full dynticks mode".
> 
> Agreed, I really wish we avoid that workqueue solution.

Yes, please do not use a workqueue here.

> 
> >
> > We already have machinery for doing async tickling in printk: the
> > printk_pending stuff.  Did you consider adding another
> > PRINTK_PENDING_foo in some fashion?
> 
> Yeah, that would delay until the next timer tick (small exception
> after my patchset, if tick is stopped this will trigger as soon as
> irqs are re-enabled through a self IPI) but we can probably improve
> that behaviour. And that won't mess up with locking scenarios. The
> printk tick (or irq work after my patchset) can also re-trigger itself
> until the next tick if the batch to send to the console driver is too
> big.

Actually, we could have printk_sched() do something similar. Right now,
printk_sched() loads a per cpu buffer (wasting space, as it is seldom
used), and it sets the PRINTK_PENDING_SCHED flag. Then the next timer
interrupt, it calls printk() with the contents in the buffer.

What we could do, is have printk_sched() instead load the printk buffer.
This requires a little tweaking of the logbuf_lock to keep it from being
held when doing any wake ups, otherwise we have the AB BA deadlock issue
between logbuf_lock and the rq locks.

This patch and printk_sched() can use the same flag. If the
PRINTK_PENDING_SCHED is set (we probably should rename that to
PRINTK_PENDING_OUTPUT) it will do the console_trylock()/unlock() to
finish the print to console.

The below patch makes printk_sched() use the printk buffer instead of
its own (and removing the 512 bytes per cpu that it uses), and does a
console_trylock()/console_unlock() instead. Jan's patch could easily be
based off of this one with very little to change.

-- Steve


printk: Remove separate printk_sched buffers and use printk buf instead

To prevent deadlocks with doing a printk inside the scheduler,
printk_sched() was created. The issue is that printk has a console_sem
that it can grab and release. The release does a wake up if there's a
task pending on the sem, and this wake up grabs the rq locks that is
held in the scheduler. This leads to a possible deadlock if the wake up
uses the same rq as the one with the rq lock held already.

What printk_sched() does is to save the printk write in a per cpu buffer
and sets the PRINTK_PENDING_SCHED flag. On a timer tick, if this flag is
set, the printk() is done against the buffer.

There's a couple of issues with this approach.

1) If two printk_sched()s are called before the tick, the second one
will overwrite the first one.

2) The temporary buffer is 512 bytes and is per cpu. This is a quite a
bit of space wasted for something that is seldom used.

In order to remove this, the printk_sched() can instead use the printk
buffer instead, and delay the console_trylock()/console_unlock() to the
tick.

Because printk_sched() would then be taking the logbuf_lock, the
logbuf_lock must not be held while doing anything that may call into the
scheduler functions, which includes wake ups. Unfortunately, printk()
also has a console_sem that it uses, and on release, the
up(&console_sem) may do a wake up of any pending waiters. This must be
avoided while holding the logbuf_lock.

Luckily, there's not many places that do the unlock, or hold the
logbuf_lock. By moving things around a little, the console_sem can be
released without ever holding the logbuf_lock, and we can safely have
printk_sched() use the printk buffer directly.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/printk.c b/kernel/printk.c
index 357f714..ed630fa 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -219,6 +219,9 @@ struct log {
 /*
  * The logbuf_lock protects kmsg buffer, indices, counters. It is also
  * used in interesting ways to provide interlocking in console_unlock();
+ * This can be taken within the scheduler's rq lock. It must be released
+ * before calling console_unlock() or anything else that might wake up
+ * a process.
  */
 static DEFINE_RAW_SPINLOCK(logbuf_lock);
 
@@ -1343,30 +1346,46 @@ static inline int can_use_console(unsigned int cpu)
  * interrupts disabled. It should return with 'lockbuf_lock'
  * released but interrupts still disabled.
  */
-static int console_trylock_for_printk(unsigned int cpu)
+static int console_trylock_for_printk(unsigned int cpu, bool in_sched)
 	__releases(&logbuf_lock)
 {
 	int retval = 0, wake = 0;
 
-	if (console_trylock()) {
-		retval = 1;
+	/* if called from the scheduler, we can not call up() */
+	if (in_sched)
+		goto out;
 
-		/*
-		 * If we can't use the console, we need to release
-		 * the console semaphore by hand to avoid flushing
-		 * the buffer. We need to hold the console semaphore
-		 * in order to do this test safely.
-		 */
-		if (!can_use_console(cpu)) {
-			console_locked = 0;
-			wake = 1;
-			retval = 0;
-		}
+	if (down_trylock(&console_sem))
+		goto out;
+
+	/*
+	 * If we can't use the console, we need to release
+	 * the console semaphore by hand to avoid flushing
+	 * the buffer. We need to hold the console semaphore
+	 * in order to do this test safely.
+	 */
+	if (console_suspended || !can_use_console(cpu)) {
+		wake = 1;
+		goto out;
 	}
+
+	/* console is now locked */
+
+	console_locked = 1;
+	console_may_schedule = 0;
+	mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_);
+
+	retval = 1;
+
+out:
 	logbuf_cpu = UINT_MAX;
+	/*
+	 * The logbuf_lock must not be held when doing a wake up,
+	 * which the up(&console_sem) can do.
+	 */
+	raw_spin_unlock(&logbuf_lock);
 	if (wake)
 		up(&console_sem);
-	raw_spin_unlock(&logbuf_lock);
 	return retval;
 }
 
@@ -1495,11 +1514,17 @@ asmlinkage int vprintk_emit(int facility, int level,
 	static int recursion_bug;
 	static char textbuf[LOG_LINE_MAX];
 	char *text = textbuf;
-	size_t text_len;
+	size_t text_len = 0;
 	enum log_flags lflags = 0;
 	unsigned long flags;
 	int this_cpu;
 	int printed_len = 0;
+	bool in_sched = false;
+
+	if (level == -2) {
+		level = -1;
+		in_sched = true;
+	}
 
 	boot_delay_msec(level);
 	printk_delay();
@@ -1545,7 +1570,12 @@ asmlinkage int vprintk_emit(int facility, int level,
 	 * The printf needs to come first; we need the syslog
 	 * prefix which might be passed-in as a parameter.
 	 */
-	text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
+	if (in_sched)
+		text_len = scnprintf(text, sizeof(textbuf),
+				     KERN_WARNING "[sched_delayed] ");
+
+	text_len += vscnprintf(text + text_len,
+			       sizeof(textbuf) - text_len, fmt, args);
 
 	/* mark and strip a trailing newline */
 	if (text_len && text[text_len-1] == '\n') {
@@ -1620,7 +1650,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 	 * The console_trylock_for_printk() function will release 'logbuf_lock'
 	 * regardless of whether it actually gets the console semaphore or not.
 	 */
-	if (console_trylock_for_printk(this_cpu))
+	if (console_trylock_for_printk(this_cpu, in_sched))
 		console_unlock();
 
 	lockdep_on();
@@ -1962,18 +1992,20 @@ int is_console_locked(void)
 #define PRINTK_BUF_SIZE		512
 
 #define PRINTK_PENDING_WAKEUP	0x01
-#define PRINTK_PENDING_SCHED	0x02
+#define PRINTK_PENDING_OUTPUT	0x02
 
 static DEFINE_PER_CPU(int, printk_pending);
-static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
 
 void printk_tick(void)
 {
 	if (__this_cpu_read(printk_pending)) {
 		int pending = __this_cpu_xchg(printk_pending, 0);
-		if (pending & PRINTK_PENDING_SCHED) {
-			char *buf = __get_cpu_var(printk_sched_buf);
-			printk(KERN_WARNING "[sched_delayed] %s", buf);
+		if (pending & PRINTK_PENDING_OUTPUT) {
+			if (console_trylock())
+				console_unlock();
+			else
+				/* Try again later */
+				__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
 		}
 		if (pending & PRINTK_PENDING_WAKEUP)
 			wake_up_interruptible(&log_wait);
@@ -2458,20 +2490,14 @@ late_initcall(printk_late_init);
 
 int printk_sched(const char *fmt, ...)
 {
-	unsigned long flags;
 	va_list args;
-	char *buf;
 	int r;
 
-	local_irq_save(flags);
-	buf = __get_cpu_var(printk_sched_buf);
-
 	va_start(args, fmt);
-	r = vsnprintf(buf, PRINTK_BUF_SIZE, fmt, args);
+	r = vprintk_emit(0, -2, NULL, 0, fmt, args);
 	va_end(args);
 
-	__this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
-	local_irq_restore(flags);
+	__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
 
 	return r;
 }



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] printk: Avoid softlockups in console_unlock()
  2013-02-05 20:38 ` Andrew Morton
  2013-02-05 22:56   ` Frederic Weisbecker
@ 2013-02-06 14:23   ` Jan Kara
  2013-02-06 17:58     ` David Rientjes
  1 sibling, 1 reply; 11+ messages in thread
From: Jan Kara @ 2013-02-06 14:23 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jan Kara, LKML, jslaby, Greg Kroah-Hartman, Frederic Weisbecker,
	Steven Rostedt

On Tue 05-02-13 12:38:38, Andrew Morton wrote:
> On Mon,  4 Feb 2013 23:17:10 +0100
> Jan Kara <jack@suse.cz> wrote:
> 
> > A CPU can be caught in console_unlock() for a long time (tens of seconds are
> > reported by our customers) when other CPUs are using printk heavily and serial
> > console makes printing slow. Despite serial console drivers are calling
> > touch_nmi_watchdog() this triggers softlockup warnings because
> > interrupts are disabled for the whole time console_unlock() runs (e.g.
> > vprintk() calls console_unlock() with interrupts disabled). Thus IPIs
> > cannot be processed and other CPUs get stuck spinning in calls like
> > smp_call_function_many(). Also RCU eventually starts reporting lockups.
> > 
> > In my artifical testing I also managed to trigger a situation when disk
> > disappeared from the system apparently because commands to / from it
> > could not be delivered for long enough. This is why just silencing
> > watchdogs isn't a reliable solution to the problem and we simply have to
> > avoid spending too long in console_unlock().
> > 
> > We fix the issue by limiting the time we spend in console_unlock() to
> > watchdog_thresh() / 4 (unless we are in an early boot stage or oops is
> > happening). The rest of the buffer will be printed either by further
> > callers to printk() or by a queued work.
> 
> I still hate the patch :(
> 
> > ...
> >
> > +void console_unlock(void)
> > +{
> > +	if (__console_unlock()) {
> > +		/* Let worker do the rest of printing */
> > +		schedule_work(&printk_work);
> > +	}
> >  }
> 
> This creates another place from where we cannot call printk(): anywhere
> where worker_pool.lock is held.
> 
> And as schedule_work() can do a wakeup it creates a third reason why
> the sched code cannot call printk (along with rq->lock taken by
> wake_up(klogd) and rq->lock taken by up(&console_sem).  Hence
> printk_sched().  See the lkml thread "[GIT PULL] printk: Support for
> full dynticks mode".
> 
> We already have machinery for doing async tickling in printk: the
> printk_pending stuff.  Did you consider adding another
> PRINTK_PENDING_foo in some fashion?
  Yes, I noticed that thread just yesterday and also though that using
similar trick might be viable. I'll experiment if we could use the same
method for handling lockup problems I hit. Steven seems to have already
tweaked PRINTK_PENDING stuff to be usable more easily...

								Honza

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] printk: Avoid softlockups in console_unlock()
  2013-02-06 14:23   ` Jan Kara
@ 2013-02-06 17:58     ` David Rientjes
  2013-02-06 18:52       ` Jan Kara
  0 siblings, 1 reply; 11+ messages in thread
From: David Rientjes @ 2013-02-06 17:58 UTC (permalink / raw)
  To: Jan Kara
  Cc: Andrew Morton, LKML, jslaby, Greg Kroah-Hartman,
	Frederic Weisbecker, Steven Rostedt

On Wed, 6 Feb 2013, Jan Kara wrote:

>   Yes, I noticed that thread just yesterday and also though that using
> similar trick might be viable. I'll experiment if we could use the same
> method for handling lockup problems I hit. Steven seems to have already
> tweaked PRINTK_PENDING stuff to be usable more easily...
> 

Are these new build failures in linux-next coming from this patch?

kernel/printk.c: In function 'console_unlock':
kernel/printk.c:2156:18: error: 'printk_work' undeclared (first use in this function)
kernel/printk.c:2156:18: note: each undeclared identifier is reported only once for each function it appears in
kernel/printk.c: At top level:
kernel/printk.c:2167:13: warning: 'printk_worker' defined but not used [-Wunused-function]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] printk: Avoid softlockups in console_unlock()
  2013-02-06 17:58     ` David Rientjes
@ 2013-02-06 18:52       ` Jan Kara
  2013-02-06 19:26         ` David Rientjes
  2013-02-06 20:19         ` Hugh Dickins
  0 siblings, 2 replies; 11+ messages in thread
From: Jan Kara @ 2013-02-06 18:52 UTC (permalink / raw)
  To: David Rientjes
  Cc: Jan Kara, Andrew Morton, LKML, jslaby, Greg Kroah-Hartman,
	Frederic Weisbecker, Steven Rostedt

[-- Attachment #1: Type: text/plain, Size: 979 bytes --]

On Wed 06-02-13 09:58:48, David Rientjes wrote:
> On Wed, 6 Feb 2013, Jan Kara wrote:
> 
> >   Yes, I noticed that thread just yesterday and also though that using
> > similar trick might be viable. I'll experiment if we could use the same
> > method for handling lockup problems I hit. Steven seems to have already
> > tweaked PRINTK_PENDING stuff to be usable more easily...
> > 
> 
> Are these new build failures in linux-next coming from this patch?
> 
> kernel/printk.c: In function 'console_unlock':
> kernel/printk.c:2156:18: error: 'printk_work' undeclared (first use in this function)
> kernel/printk.c:2156:18: note: each undeclared identifier is reported only once for each function it appears in
> kernel/printk.c: At top level:
> kernel/printk.c:2167:13: warning: 'printk_worker' defined but not used [-Wunused-function]
  Yes, I already sent a patch to fix these (attached if you need it).
Thanks for notice.

							Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

[-- Attachment #2: 0001-printk-Fixup-compilation-with-CONFIG_PRINTK.patch --]
[-- Type: text/x-patch, Size: 1112 bytes --]

>From 313ba66418472fa532faf8c61e8981bd7b29950e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 6 Feb 2013 14:07:01 +0100
Subject: [PATCH] printk: Fixup compilation with !CONFIG_PRINTK

Signed-off-by: Jan Kara <jack@suse.cz>
---
 kernel/printk.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/printk.c b/kernel/printk.c
index 80172f7..484fdb4 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -246,10 +246,6 @@ static enum log_flags console_prev;
 static u64 clear_seq;
 static u32 clear_idx;
 
-/* Worker to print accumulated data to console when there's too much of it */
-static void printk_worker(struct work_struct *work);
-static DECLARE_WORK(printk_work, printk_worker);
-
 #define PREFIX_MAX		32
 #define LOG_LINE_MAX		1024 - PREFIX_MAX
 
@@ -2154,6 +2150,10 @@ skip:
 	return retry;
 }
 
+/* Worker to print accumulated data to console when there's too much of it */
+static void printk_worker(struct work_struct *work);
+static DECLARE_WORK(printk_work, printk_worker);
+
 void console_unlock(void)
 {
 	if (__console_unlock()) {
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] printk: Avoid softlockups in console_unlock()
  2013-02-06 18:52       ` Jan Kara
@ 2013-02-06 19:26         ` David Rientjes
  2013-02-06 19:51           ` Jan Kara
  2013-02-06 20:19         ` Hugh Dickins
  1 sibling, 1 reply; 11+ messages in thread
From: David Rientjes @ 2013-02-06 19:26 UTC (permalink / raw)
  To: Jan Kara
  Cc: Andrew Morton, LKML, jslaby, Greg Kroah-Hartman,
	Frederic Weisbecker, Steven Rostedt

On Wed, 6 Feb 2013, Jan Kara wrote:

> On Wed 06-02-13 09:58:48, David Rientjes wrote:
> > On Wed, 6 Feb 2013, Jan Kara wrote:
> > 
> > >   Yes, I noticed that thread just yesterday and also though that using
> > > similar trick might be viable. I'll experiment if we could use the same
> > > method for handling lockup problems I hit. Steven seems to have already
> > > tweaked PRINTK_PENDING stuff to be usable more easily...
> > > 
> > 
> > Are these new build failures in linux-next coming from this patch?
> > 
> > kernel/printk.c: In function 'console_unlock':
> > kernel/printk.c:2156:18: error: 'printk_work' undeclared (first use in this function)
> > kernel/printk.c:2156:18: note: each undeclared identifier is reported only once for each function it appears in
> > kernel/printk.c: At top level:
> > kernel/printk.c:2167:13: warning: 'printk_worker' defined but not used [-Wunused-function]
>   Yes, I already sent a patch to fix these (attached if you need it).
> Thanks for notice.
> 

I don't see your attached 
printk-Fixup-compilation-with-CONFIG_PRINTK.patch in -mm nor do I see it 
on any mailing list.  Regardless,

Acked-by: David Rientjes <rientjes@google.com>

for wherever Andrew has this stashed away.  Thanks.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] printk: Avoid softlockups in console_unlock()
  2013-02-06 19:26         ` David Rientjes
@ 2013-02-06 19:51           ` Jan Kara
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Kara @ 2013-02-06 19:51 UTC (permalink / raw)
  To: David Rientjes
  Cc: Jan Kara, Andrew Morton, LKML, jslaby, Greg Kroah-Hartman,
	Frederic Weisbecker, Steven Rostedt

On Wed 06-02-13 11:26:46, David Rientjes wrote:
> On Wed, 6 Feb 2013, Jan Kara wrote:
> 
> > On Wed 06-02-13 09:58:48, David Rientjes wrote:
> > > On Wed, 6 Feb 2013, Jan Kara wrote:
> > > 
> > > >   Yes, I noticed that thread just yesterday and also though that using
> > > > similar trick might be viable. I'll experiment if we could use the same
> > > > method for handling lockup problems I hit. Steven seems to have already
> > > > tweaked PRINTK_PENDING stuff to be usable more easily...
> > > > 
> > > 
> > > Are these new build failures in linux-next coming from this patch?
> > > 
> > > kernel/printk.c: In function 'console_unlock':
> > > kernel/printk.c:2156:18: error: 'printk_work' undeclared (first use in this function)
> > > kernel/printk.c:2156:18: note: each undeclared identifier is reported only once for each function it appears in
> > > kernel/printk.c: At top level:
> > > kernel/printk.c:2167:13: warning: 'printk_worker' defined but not used [-Wunused-function]
> >   Yes, I already sent a patch to fix these (attached if you need it).
> > Thanks for notice.
> > 
> 
> I don't see your attached 
> printk-Fixup-compilation-with-CONFIG_PRINTK.patch in -mm nor do I see it 
> on any mailing list.  Regardless,
  Yeah, it was a private mail from kernel compilation testing guys with
Andrew in CC.

> Acked-by: David Rientjes <rientjes@google.com>
  Thanks.

> for wherever Andrew has this stashed away.  Thanks.

									Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] printk: Avoid softlockups in console_unlock()
  2013-02-06 18:52       ` Jan Kara
  2013-02-06 19:26         ` David Rientjes
@ 2013-02-06 20:19         ` Hugh Dickins
  2013-02-06 21:29           ` Jan Kara
  1 sibling, 1 reply; 11+ messages in thread
From: Hugh Dickins @ 2013-02-06 20:19 UTC (permalink / raw)
  To: Jan Kara
  Cc: David Rientjes, Andrew Morton, LKML, jslaby, Greg Kroah-Hartman,
	Frederic Weisbecker, Steven Rostedt

On Wed, 6 Feb 2013, Jan Kara wrote:
> On Wed 06-02-13 09:58:48, David Rientjes wrote:
> > On Wed, 6 Feb 2013, Jan Kara wrote:
> > 
> > >   Yes, I noticed that thread just yesterday and also though that using
> > > similar trick might be viable. I'll experiment if we could use the same
> > > method for handling lockup problems I hit. Steven seems to have already
> > > tweaked PRINTK_PENDING stuff to be usable more easily...
> > > 
> > 
> > Are these new build failures in linux-next coming from this patch?
> > 
> > kernel/printk.c: In function 'console_unlock':
> > kernel/printk.c:2156:18: error: 'printk_work' undeclared (first use in this function)
> > kernel/printk.c:2156:18: note: each undeclared identifier is reported only once for each function it appears in
> > kernel/printk.c: At top level:
> > kernel/printk.c:2167:13: warning: 'printk_worker' defined but not used [-Wunused-function]
>   Yes, I already sent a patch to fix these (attached if you need it).
> Thanks for notice.

Is the console_unlock patch actually intended to be a console load test?
Could we just revert it until it's baked?

This little hunk from __console_unlock():

 	console_may_schedule = 0;
+	cur_cpu = smp_processor_id();

I have CONFIG_PREEMPT=y CONFIG_DEBUG_PREEMPT=y,
so that fills the console with messages:

BUG: using smp_processor_id() in preemptible [00000000] code: systemd/1
caller is __console_unlock+0x3c/0x39c
Pid: 1, comm: systemd Tainted: G        W    3.8.0-rc6-mm1 #1
Call Trace:
 [<ffffffff81223ee2>] debug_smp_processor_id+0xbe/0xd8
 [<ffffffff810615d8>] __console_unlock+0x3c/0x39c
 [<ffffffff810a6a38>] ? trace_hardirqs_on+0xd/0xf
 [<ffffffff8154a997>] ? _raw_spin_unlock_irqrestore+0x46/0x64
 [<ffffffff81061941>] console_unlock+0x9/0x1b
 [<ffffffff812955ab>] con_shutdown+0x29/0x2d
 [<ffffffff81295582>] ? visual_init+0x10d/0x10d
 [<ffffffff812857ed>] release_tty+0x4d/0x91
 [<ffffffff8128644f>] tty_release+0x421/0x460
 [<ffffffff81120c57>] __fput+0x104/0x1e9
 [<ffffffff81120d45>] ____fput+0x9/0xb
 [<ffffffff8107a6c3>] task_work_run+0x79/0xa6
 [<ffffffff8102f4a6>] do_notify_resume+0x55/0x66
 [<ffffffff8121e29e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
 [<ffffffff8154b898>] int_signal+0x12/0x17

As a quick and obviously wrong hack, I replace the call to
smp_processor_id() by call to raw_smp_processor_id() and then:

+	/*
+	 * We give us some headroom because we check the time only after
+	 * printing the whole message
+	 */
+	end_time = sched_clock_cpu(cur_cpu) +
+				max_interrupt_disabled_duration() / 2;

Oh, sched_clock_cpu() is also giving me such BUGs,
but also (phew, it's a WARN_ON_ONCE):
 
00000] WARNING: at kernel/sched/clock.c:219 sched_clock_cpu+0x2e/0xcf()
00000] Hardware name: 4174AY9
00000] Modules linked in:
00000] Pid: 0, comm: swapper/0 Not tainted 3.8.0-rc6-mm1 #2
00000] Call Trace:
00000]  [<ffffffff8106018b>] warn_slowpath_common+0x7e/0x99
00000]  [<ffffffff810601bb>] warn_slowpath_null+0x15/0x17
00000]  [<ffffffff8108a683>] sched_clock_cpu+0x2e/0xcf
00000]  [<ffffffff810615e4>] __console_unlock+0x48/0x39e
00000]  [<ffffffff8153e648>] ? printk+0x48/0x4a
00000]  [<ffffffff81061943>] console_unlock+0x9/0x1b
00000]  [<ffffffff818f7ebc>] con_init+0x252/0x27d
00000]  [<ffffffff818f75b4>] console_init+0x16/0x27
00000]  [<ffffffff818d4c34>] start_kernel+0x243/0x322
00000]  [<ffffffff818d476b>] ? repair_env_string+0x5a/0x5a
00000]  [<ffffffff818d4485>] x86_64_start_reservations+0x2a/0x2c
00000]  [<ffffffff818d454f>] x86_64_start_kernel+0xc8/0xcc

Please fix or revert; though whether yesterday's mmotm then becomes usable,
I'm not so sure (iwlwifi broken? characters dropped from i915 screen? oops
in find_pid_ns? I'd better move on to something else).

Hugh

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] printk: Avoid softlockups in console_unlock()
  2013-02-06 20:19         ` Hugh Dickins
@ 2013-02-06 21:29           ` Jan Kara
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Kara @ 2013-02-06 21:29 UTC (permalink / raw)
  To: Hugh Dickins
  Cc: Jan Kara, David Rientjes, Andrew Morton, LKML, jslaby,
	Greg Kroah-Hartman, Frederic Weisbecker, Steven Rostedt

On Wed 06-02-13 12:19:19, Hugh Dickins wrote:
> On Wed, 6 Feb 2013, Jan Kara wrote:
> > On Wed 06-02-13 09:58:48, David Rientjes wrote:
> > > On Wed, 6 Feb 2013, Jan Kara wrote:
> > > 
> > > >   Yes, I noticed that thread just yesterday and also though that using
> > > > similar trick might be viable. I'll experiment if we could use the same
> > > > method for handling lockup problems I hit. Steven seems to have already
> > > > tweaked PRINTK_PENDING stuff to be usable more easily...
> > > > 
> > > 
> > > Are these new build failures in linux-next coming from this patch?
> > > 
> > > kernel/printk.c: In function 'console_unlock':
> > > kernel/printk.c:2156:18: error: 'printk_work' undeclared (first use in this function)
> > > kernel/printk.c:2156:18: note: each undeclared identifier is reported only once for each function it appears in
> > > kernel/printk.c: At top level:
> > > kernel/printk.c:2167:13: warning: 'printk_worker' defined but not used [-Wunused-function]
> >   Yes, I already sent a patch to fix these (attached if you need it).
> > Thanks for notice.
> 
> Is the console_unlock patch actually intended to be a console load test?
> Could we just revert it until it's baked?
  Yeah, agreed. I'll probably go sligthly different route anyway.

> This little hunk from __console_unlock():
> 
>  	console_may_schedule = 0;
> +	cur_cpu = smp_processor_id();
> 
> I have CONFIG_PREEMPT=y CONFIG_DEBUG_PREEMPT=y,
> so that fills the console with messages:
  Thanks for report, I'll take this in consideration when working on the next
version of the patch. Somehow console_unlock() was never called from the
context you show below in my testing.

> BUG: using smp_processor_id() in preemptible [00000000] code: systemd/1
> caller is __console_unlock+0x3c/0x39c
> Pid: 1, comm: systemd Tainted: G        W    3.8.0-rc6-mm1 #1
> Call Trace:
>  [<ffffffff81223ee2>] debug_smp_processor_id+0xbe/0xd8
>  [<ffffffff810615d8>] __console_unlock+0x3c/0x39c
>  [<ffffffff810a6a38>] ? trace_hardirqs_on+0xd/0xf
>  [<ffffffff8154a997>] ? _raw_spin_unlock_irqrestore+0x46/0x64
>  [<ffffffff81061941>] console_unlock+0x9/0x1b
>  [<ffffffff812955ab>] con_shutdown+0x29/0x2d
>  [<ffffffff81295582>] ? visual_init+0x10d/0x10d
>  [<ffffffff812857ed>] release_tty+0x4d/0x91
>  [<ffffffff8128644f>] tty_release+0x421/0x460
>  [<ffffffff81120c57>] __fput+0x104/0x1e9
>  [<ffffffff81120d45>] ____fput+0x9/0xb
>  [<ffffffff8107a6c3>] task_work_run+0x79/0xa6
>  [<ffffffff8102f4a6>] do_notify_resume+0x55/0x66
>  [<ffffffff8121e29e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
>  [<ffffffff8154b898>] int_signal+0x12/0x17

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2013-02-06 21:30 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-04 22:17 [PATCH v2] printk: Avoid softlockups in console_unlock() Jan Kara
2013-02-05 20:38 ` Andrew Morton
2013-02-05 22:56   ` Frederic Weisbecker
2013-02-06  0:42     ` Steven Rostedt
2013-02-06 14:23   ` Jan Kara
2013-02-06 17:58     ` David Rientjes
2013-02-06 18:52       ` Jan Kara
2013-02-06 19:26         ` David Rientjes
2013-02-06 19:51           ` Jan Kara
2013-02-06 20:19         ` Hugh Dickins
2013-02-06 21:29           ` Jan Kara

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.