All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] sched/debug: more schedstats fixes
@ 2016-06-07 19:43 Josh Poimboeuf
  2016-06-07 19:43 ` [PATCH 1/2] sched/debug: fix 'schedstats=enable' cmdline option Josh Poimboeuf
  2016-06-07 19:43 ` [PATCH 2/2] sched/debug: fix deadlock when enabling sched events Josh Poimboeuf
  0 siblings, 2 replies; 15+ messages in thread
From: Josh Poimboeuf @ 2016-06-07 19:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

Fixes for two more bugs I found in the schedstats code, in addition to
to the other patches I sent out:
  
  https://lkml.kernel.org/r/cover.1464994423.git.jpoimboe@redhat.com

Josh Poimboeuf (2):
  sched/debug: fix 'schedstats=enable' cmdline option
  sched/debug: fix deadlock when enabling sched events

 include/trace/events/sched.h | 30 +++++++++++++++---------------
 kernel/sched/core.c          | 26 +++++++++++++++++++++-----
 kernel/sched/fair.c          | 29 +++++++----------------------
 3 files changed, 43 insertions(+), 42 deletions(-)

-- 
2.4.11

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/2] sched/debug: fix 'schedstats=enable' cmdline option
  2016-06-07 19:43 [PATCH 0/2] sched/debug: more schedstats fixes Josh Poimboeuf
@ 2016-06-07 19:43 ` Josh Poimboeuf
  2016-06-08  8:06   ` Mel Gorman
  2016-06-08 14:21   ` [tip:sched/core] sched/debug: Fix " tip-bot for Josh Poimboeuf
  2016-06-07 19:43 ` [PATCH 2/2] sched/debug: fix deadlock when enabling sched events Josh Poimboeuf
  1 sibling, 2 replies; 15+ messages in thread
From: Josh Poimboeuf @ 2016-06-07 19:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

The 'schedstats=enable' option doesn't work, and also produces the
following warning during boot:

  WARNING: CPU: 0 PID: 0 at /home/jpoimboe/git/linux/kernel/jump_label.c:61 static_key_slow_inc+0x8c/0xa0
  static_key_slow_inc used before call to jump_label_init
  Modules linked in:
  CPU: 0 PID: 0 Comm: swapper Not tainted 4.7.0-rc1+ #25
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
   0000000000000086 3ae3475a4bea95d4 ffffffff81e03da8 ffffffff8143fc83
   ffffffff81e03df8 0000000000000000 ffffffff81e03de8 ffffffff810b1ffb
   0000003d00000096 ffffffff823514d0 ffff88007ff197c8 0000000000000000
  Call Trace:
   [<ffffffff8143fc83>] dump_stack+0x85/0xc2
   [<ffffffff810b1ffb>] __warn+0xcb/0xf0
   [<ffffffff810b207f>] warn_slowpath_fmt+0x5f/0x80
   [<ffffffff811e9c0c>] static_key_slow_inc+0x8c/0xa0
   [<ffffffff810e07c6>] static_key_enable+0x16/0x40
   [<ffffffff8216d633>] setup_schedstats+0x29/0x94
   [<ffffffff82148a05>] unknown_bootoption+0x89/0x191
   [<ffffffff810d8617>] parse_args+0x297/0x4b0
   [<ffffffff82148d61>] start_kernel+0x1d8/0x4a9
   [<ffffffff8214897c>] ? set_init_arg+0x55/0x55
   [<ffffffff82148120>] ? early_idt_handler_array+0x120/0x120
   [<ffffffff821482db>] x86_64_start_reservations+0x2f/0x31
   [<ffffffff82148427>] x86_64_start_kernel+0x14a/0x16d

The problem is that it tries to update the 'sched_schedstats' static key
before jump labels have been initialized.

Changing jump_label_init() to be called earlier before
parse_early_param() wouldn't fix it: it would still fail trying to
poke_text() because mm isn't yet initialized.

Instead, just create a temporary '__sched_schedstats' variable which can
be copied to the static key later during sched_init() after jump labels
have been initialized.

Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default")
Cc: stable@vger.kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 kernel/sched/core.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f2cae4..385c947 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2253,9 +2253,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
 #endif
 #endif
 
+#ifdef CONFIG_SCHEDSTATS
+
 DEFINE_STATIC_KEY_FALSE(sched_schedstats);
+static bool __initdata __sched_schedstats = false;
 
-#ifdef CONFIG_SCHEDSTATS
 static void set_schedstats(bool enabled)
 {
 	if (enabled)
@@ -2278,11 +2280,16 @@ static int __init setup_schedstats(char *str)
 	if (!str)
 		goto out;
 
+	/*
+	 * This code is called before jump labels have been set up, so we can't
+	 * change the static branch directly just yet.  Instead set a temporary
+	 * variable so init_schedstats() can do it later.
+	 */
 	if (!strcmp(str, "enable")) {
-		set_schedstats(true);
+		__sched_schedstats = true;
 		ret = 1;
 	} else if (!strcmp(str, "disable")) {
-		set_schedstats(false);
+		__sched_schedstats = false;
 		ret = 1;
 	}
 out:
@@ -2293,6 +2300,11 @@ out:
 }
 __setup("schedstats=", setup_schedstats);
 
+static void __init init_schedstats(void)
+{
+	set_schedstats(__sched_schedstats);
+}
+
 #ifdef CONFIG_PROC_SYSCTL
 int sysctl_schedstats(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2313,8 +2325,10 @@ int sysctl_schedstats(struct ctl_table *table, int write,
 		set_schedstats(state);
 	return err;
 }
-#endif
-#endif
+#endif /* CONFIG_PROC_SYSCTL */
+#else  /* !CONFIG_SCHEDSTATS */
+static inline void init_schedstats(void) {}
+#endif /* CONFIG_SCHEDSTATS */
 
 /*
  * fork()/clone()-time setup:
@@ -7487,6 +7501,8 @@ void __init sched_init(void)
 #endif
 	init_sched_fair_class();
 
+	init_schedstats();
+
 	scheduler_running = 1;
 }
 
-- 
2.4.11

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/2] sched/debug: fix deadlock when enabling sched events
  2016-06-07 19:43 [PATCH 0/2] sched/debug: more schedstats fixes Josh Poimboeuf
  2016-06-07 19:43 ` [PATCH 1/2] sched/debug: fix 'schedstats=enable' cmdline option Josh Poimboeuf
@ 2016-06-07 19:43 ` Josh Poimboeuf
  2016-06-07 19:54   ` Peter Zijlstra
                     ` (3 more replies)
  1 sibling, 4 replies; 15+ messages in thread
From: Josh Poimboeuf @ 2016-06-07 19:43 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

When enabling sched trace events via:

  echo 1 > /sys/kernel/debug/tracing/events/sched/enable

I see a hang, with the following BUG in the printk buffer:

  BUG: spinlock recursion on CPU#1, swapper/1/0
   lock: 0xffff88007d5d8c00, .magic: dead4ead, .owner: swapper/1/0, .owner_cpu: 1
  CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.7.0-rc2+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
   0000000000000086 3a8e170cf69ad23f ffff88007d403898 ffffffff8143d663
   ffff88007c535480 ffff88007d5d8c00 ffff88007d4038b8 ffffffff81115948
   ffff88007d5d8c00 ffff88007d5d8c00 ffff88007d4038e8 ffffffff81115aea
  Call Trace:
   <IRQ>  [<ffffffff8143d663>] dump_stack+0x85/0xc2
   [<ffffffff81115948>] spin_dump+0x78/0xc0
   [<ffffffff81115aea>] do_raw_spin_lock+0x11a/0x150
   [<ffffffff81891471>] _raw_spin_lock+0x61/0x80
   [<ffffffff810e5466>] ? try_to_wake_up+0x256/0x4e0
   [<ffffffff810e5466>] try_to_wake_up+0x256/0x4e0
   [<ffffffff81891a0a>] ? _raw_spin_unlock_irqrestore+0x4a/0x80
   [<ffffffff810e5705>] wake_up_process+0x15/0x20
   [<ffffffff810cebb4>] insert_work+0x84/0xc0
   [<ffffffff810ced7f>] __queue_work+0x18f/0x660
   [<ffffffff810cf9a6>] queue_work_on+0x46/0x90
   [<ffffffffa00cd95b>] drm_fb_helper_dirty.isra.11+0xcb/0xe0 [drm_kms_helper]
   [<ffffffffa00cdac0>] drm_fb_helper_sys_imageblit+0x30/0x40 [drm_kms_helper]
   [<ffffffff814babcd>] soft_cursor+0x1ad/0x230
   [<ffffffff814ba379>] bit_cursor+0x649/0x680
   [<ffffffff814b9d30>] ? update_attr.isra.2+0x90/0x90
   [<ffffffff814b5e6a>] fbcon_cursor+0x14a/0x1c0
   [<ffffffff81555ef8>] hide_cursor+0x28/0x90
   [<ffffffff81558b6f>] vt_console_print+0x3bf/0x3f0
   [<ffffffff81122c63>] call_console_drivers.constprop.24+0x183/0x200
   [<ffffffff811241f4>] console_unlock+0x3d4/0x610
   [<ffffffff811247f5>] vprintk_emit+0x3c5/0x610
   [<ffffffff81124bc9>] vprintk_default+0x29/0x40
   [<ffffffff811e965b>] printk+0x57/0x73
   [<ffffffff810f7a9e>] enqueue_entity+0xc2e/0xc70
   [<ffffffff810f7b39>] enqueue_task_fair+0x59/0xab0
   [<ffffffff8106dcd9>] ? kvm_sched_clock_read+0x9/0x20
   [<ffffffff8103fb39>] ? sched_clock+0x9/0x10
   [<ffffffff810e3fcc>] activate_task+0x5c/0xa0
   [<ffffffff810e4514>] ttwu_do_activate+0x54/0xb0
   [<ffffffff810e5cea>] sched_ttwu_pending+0x7a/0xb0
   [<ffffffff810e5e51>] scheduler_ipi+0x61/0x170
   [<ffffffff81059e7f>] smp_trace_reschedule_interrupt+0x4f/0x2a0
   [<ffffffff81893ba6>] trace_reschedule_interrupt+0x96/0xa0
   <EOI>  [<ffffffff8106e0d6>] ? native_safe_halt+0x6/0x10
   [<ffffffff8110fb1d>] ? trace_hardirqs_on+0xd/0x10
   [<ffffffff81040ac0>] default_idle+0x20/0x1a0
   [<ffffffff8104147f>] arch_cpu_idle+0xf/0x20
   [<ffffffff81102f8f>] default_idle_call+0x2f/0x50
   [<ffffffff8110332e>] cpu_startup_entry+0x37e/0x450
   [<ffffffff8105af70>] start_secondary+0x160/0x1a0

The bug is caused by a deadlock where the task is trying to grab the rq
lock twice because printk()'s aren't safe in sched code.

The offending printk() is the pr_warn_once() in
check_schedstat_required() which warns the user that their
'trace_sched_stat_*' tracepoints aren't going to work because schedstats
is disabled:

	/* Force schedstat enabled if a dependent tracepoint is active */
	if (trace_sched_stat_wait_enabled()    ||
			trace_sched_stat_sleep_enabled()   ||
			trace_sched_stat_iowait_enabled()  ||
			trace_sched_stat_blocked_enabled() ||
			trace_sched_stat_runtime_enabled())  {
		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
			     "stat_blocked and stat_runtime require the "
			     "kernel parameter schedstats=enabled or "
			     "kernel.sched_schedstats=1\n");
	}

In addition to the deadlock, I think this code has other issues:

1. Instead of just warning and allowing the tracepoints to be broken,
   I'd argue that it would be better to make them work by forcing
   schedstats enabled and printing a warning about that, which is what's
   already being done in other similar cases (latencytop and profiling).
   Otherwise things like "perf sched record" won't have the intended
   effect.  In fact the comment in the above code snippet seems to agree
   with me, so maybe that was the original intent.

2. It's called in the scheduler hot path from enqueue_entity().

So change the warning to a call to force_schedstat_enabled(), and do it
only when the tracepoint gets enabled.

Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default")
Cc: stable@vger.kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 include/trace/events/sched.h | 30 +++++++++++++++---------------
 kernel/sched/fair.c          | 29 +++++++----------------------
 2 files changed, 22 insertions(+), 37 deletions(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9b90c57..ce12e93 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -341,37 +341,36 @@ DECLARE_EVENT_CLASS(sched_stat_template,
 			(unsigned long long)__entry->delay)
 );
 
+extern void trace_sched_stat_register(void);
+
+#define DEFINE_SCHED_STAT_EVENT(name)					\
+	DEFINE_EVENT_FN(sched_stat_template, name,			\
+			TP_PROTO(struct task_struct *tsk, u64 delay),	\
+			TP_ARGS(tsk, delay),				\
+			trace_sched_stat_register, NULL)
 
 /*
  * Tracepoint for accounting wait time (time the task is runnable
  * but not actually running due to scheduler contention).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_wait,
-	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+DEFINE_SCHED_STAT_EVENT(sched_stat_wait);
 
 /*
  * Tracepoint for accounting sleep time (time the task is not runnable,
  * including iowait, see below).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
-	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+DEFINE_SCHED_STAT_EVENT(sched_stat_sleep);
 
 /*
  * Tracepoint for accounting iowait time (time the task is not runnable
  * due to waiting on IO to complete).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
-	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+DEFINE_SCHED_STAT_EVENT(sched_stat_iowait);
 
 /*
  * Tracepoint for accounting blocked time (time the task is in uninterruptible).
  */
-DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
-	     TP_PROTO(struct task_struct *tsk, u64 delay),
-	     TP_ARGS(tsk, delay));
+DEFINE_SCHED_STAT_EVENT(sched_stat_blocked);
 
 /*
  * Tracepoint for accounting runtime (time the task is executing
@@ -403,9 +402,10 @@ DECLARE_EVENT_CLASS(sched_stat_runtime,
 			(unsigned long long)__entry->vruntime)
 );
 
-DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
-	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
-	     TP_ARGS(tsk, runtime, vruntime));
+DEFINE_EVENT_FN(sched_stat_runtime, sched_stat_runtime,
+		TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+		TP_ARGS(tsk, runtime, vruntime),
+		trace_sched_stat_register, NULL);
 
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c6dd8ba..b0cbcc0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -789,6 +789,13 @@ static void update_curr_fair(struct rq *rq)
 	update_curr(cfs_rq_of(&rq->curr->se));
 }
 
+void trace_sched_stat_register(void)
+{
+#ifdef CONFIG_SCHEDSTATS
+	force_schedstat_enabled();
+#endif
+}
+
 #ifdef CONFIG_SCHEDSTATS
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -3208,27 +3215,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
 
-static inline void check_schedstat_required(void)
-{
-#ifdef CONFIG_SCHEDSTATS
-	if (schedstat_enabled())
-		return;
-
-	/* Force schedstat enabled if a dependent tracepoint is active */
-	if (trace_sched_stat_wait_enabled()    ||
-			trace_sched_stat_sleep_enabled()   ||
-			trace_sched_stat_iowait_enabled()  ||
-			trace_sched_stat_blocked_enabled() ||
-			trace_sched_stat_runtime_enabled())  {
-		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
-			     "stat_blocked and stat_runtime require the "
-			     "kernel parameter schedstats=enabled or "
-			     "kernel.sched_schedstats=1\n");
-	}
-#endif
-}
-
-
 /*
  * MIGRATION
  *
@@ -3293,7 +3279,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 			enqueue_sleeper(cfs_rq, se);
 	}
 
-	check_schedstat_required();
 	if (schedstat_enabled()) {
 		update_stats_enqueue(cfs_rq, se);
 		check_spread(cfs_rq, se);
-- 
2.4.11

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] sched/debug: fix deadlock when enabling sched events
  2016-06-07 19:43 ` [PATCH 2/2] sched/debug: fix deadlock when enabling sched events Josh Poimboeuf
@ 2016-06-07 19:54   ` Peter Zijlstra
  2016-06-07 21:02     ` Josh Poimboeuf
  2016-06-08  8:13     ` Mel Gorman
  2016-06-07 21:06   ` Josh Poimboeuf
                     ` (2 subsequent siblings)
  3 siblings, 2 replies; 15+ messages in thread
From: Peter Zijlstra @ 2016-06-07 19:54 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Ingo Molnar, linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

On Tue, Jun 07, 2016 at 02:43:17PM -0500, Josh Poimboeuf wrote:
> 1. Instead of just warning and allowing the tracepoints to be broken,
>    I'd argue that it would be better to make them work by forcing
>    schedstats enabled and printing a warning about that, which is what's

Forcing them enabled doesn't make them useful per se; some of these
tracepoint rely on previously recoded state, which now wasn't previously
recorded because back then it was disabled.

But yes, maybe this is better...

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] sched/debug: fix deadlock when enabling sched events
  2016-06-07 19:54   ` Peter Zijlstra
@ 2016-06-07 21:02     ` Josh Poimboeuf
  2016-06-08  8:13     ` Mel Gorman
  1 sibling, 0 replies; 15+ messages in thread
From: Josh Poimboeuf @ 2016-06-07 21:02 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Ingo Molnar, linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

On Tue, Jun 07, 2016 at 09:54:48PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 07, 2016 at 02:43:17PM -0500, Josh Poimboeuf wrote:
> > 1. Instead of just warning and allowing the tracepoints to be broken,
> >    I'd argue that it would be better to make them work by forcing
> >    schedstats enabled and printing a warning about that, which is what's
> 
> Forcing them enabled doesn't make them useful per se; some of these
> tracepoint rely on previously recoded state, which now wasn't previously
> recorded because back then it was disabled.

Ah, that's true.  Though this issue exists regardless of whether we
force enable schedstats or the user manually enables them.  It's a side
effect of allowing schedstats to be enabled after booting.

-- 
Josh

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] sched/debug: fix deadlock when enabling sched events
  2016-06-07 19:43 ` [PATCH 2/2] sched/debug: fix deadlock when enabling sched events Josh Poimboeuf
  2016-06-07 19:54   ` Peter Zijlstra
@ 2016-06-07 21:06   ` Josh Poimboeuf
  2016-06-08  7:56   ` Ingo Molnar
  2016-06-08  8:11   ` [PATCH 2/2] sched/debug: fix " Mel Gorman
  3 siblings, 0 replies; 15+ messages in thread
From: Josh Poimboeuf @ 2016-06-07 21:06 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: linux-kernel, Mel Gorman, Matt Fleming, Srikar Dronamraju

On Tue, Jun 07, 2016 at 02:43:17PM -0500, Josh Poimboeuf wrote:
> @@ -403,9 +402,10 @@ DECLARE_EVENT_CLASS(sched_stat_runtime,
>  			(unsigned long long)__entry->vruntime)
>  );
>  
> -DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
> -	     TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
> -	     TP_ARGS(tsk, runtime, vruntime));
> +DEFINE_EVENT_FN(sched_stat_runtime, sched_stat_runtime,
> +		TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
> +		TP_ARGS(tsk, runtime, vruntime),
> +		trace_sched_stat_register, NULL);
>  
>  /*
>   * Tracepoint for showing priority inheritance modifying a tasks

Looking closer at the tracepoints, this one doesn't actually seem to
have a dependency on schedstats as far as I can tell.  So this hunk can
probably be removed.

-- 
Josh

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] sched/debug: fix deadlock when enabling sched events
  2016-06-07 19:43 ` [PATCH 2/2] sched/debug: fix deadlock when enabling sched events Josh Poimboeuf
  2016-06-07 19:54   ` Peter Zijlstra
  2016-06-07 21:06   ` Josh Poimboeuf
@ 2016-06-08  7:56   ` Ingo Molnar
  2016-06-08 13:12     ` Josh Poimboeuf
  2016-06-08  8:11   ` [PATCH 2/2] sched/debug: fix " Mel Gorman
  3 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2016-06-08  7:56 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, Mel Gorman,
	Matt Fleming, Srikar Dronamraju


* Josh Poimboeuf <jpoimboe@redhat.com> wrote:

> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -789,6 +789,13 @@ static void update_curr_fair(struct rq *rq)
>  	update_curr(cfs_rq_of(&rq->curr->se));
>  }
>  
> +void trace_sched_stat_register(void)
> +{
> +#ifdef CONFIG_SCHEDSTATS
> +	force_schedstat_enabled();
> +#endif
> +}

I think it would be cleaner to provide an empty force_schedstat_enabled() 
definition in sched.h, on !CONFIG_SCHEDSTATS.

But it might make sense to further decouple schedstats from tracing?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/2] sched/debug: fix 'schedstats=enable' cmdline option
  2016-06-07 19:43 ` [PATCH 1/2] sched/debug: fix 'schedstats=enable' cmdline option Josh Poimboeuf
@ 2016-06-08  8:06   ` Mel Gorman
  2016-06-08 14:21   ` [tip:sched/core] sched/debug: Fix " tip-bot for Josh Poimboeuf
  1 sibling, 0 replies; 15+ messages in thread
From: Mel Gorman @ 2016-06-08  8:06 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, Matt Fleming,
	Srikar Dronamraju

On Tue, Jun 07, 2016 at 02:43:16PM -0500, Josh Poimboeuf wrote:
> The 'schedstats=enable' option doesn't work, and also produces the
> following warning during boot:
> 
>   WARNING: CPU: 0 PID: 0 at /home/jpoimboe/git/linux/kernel/jump_label.c:61 static_key_slow_inc+0x8c/0xa0
>   static_key_slow_inc used before call to jump_label_init
>   Modules linked in:
>   CPU: 0 PID: 0 Comm: swapper Not tainted 4.7.0-rc1+ #25
>   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
>    0000000000000086 3ae3475a4bea95d4 ffffffff81e03da8 ffffffff8143fc83
>    ffffffff81e03df8 0000000000000000 ffffffff81e03de8 ffffffff810b1ffb
>    0000003d00000096 ffffffff823514d0 ffff88007ff197c8 0000000000000000
>   Call Trace:
>    [<ffffffff8143fc83>] dump_stack+0x85/0xc2
>    [<ffffffff810b1ffb>] __warn+0xcb/0xf0
>    [<ffffffff810b207f>] warn_slowpath_fmt+0x5f/0x80
>    [<ffffffff811e9c0c>] static_key_slow_inc+0x8c/0xa0
>    [<ffffffff810e07c6>] static_key_enable+0x16/0x40
>    [<ffffffff8216d633>] setup_schedstats+0x29/0x94
>    [<ffffffff82148a05>] unknown_bootoption+0x89/0x191
>    [<ffffffff810d8617>] parse_args+0x297/0x4b0
>    [<ffffffff82148d61>] start_kernel+0x1d8/0x4a9
>    [<ffffffff8214897c>] ? set_init_arg+0x55/0x55
>    [<ffffffff82148120>] ? early_idt_handler_array+0x120/0x120
>    [<ffffffff821482db>] x86_64_start_reservations+0x2f/0x31
>    [<ffffffff82148427>] x86_64_start_kernel+0x14a/0x16d
> 
> The problem is that it tries to update the 'sched_schedstats' static key
> before jump labels have been initialized.
> 

Odd, I thought that particular one had been addressed. At least I saw
errors like that in other areas before and they got fixed later by ftrace.

> Changing jump_label_init() to be called earlier before
> parse_early_param() wouldn't fix it: it would still fail trying to
> poke_text() because mm isn't yet initialized.
> 
> Instead, just create a temporary '__sched_schedstats' variable which can
> be copied to the static key later during sched_init() after jump labels
> have been initialized.
> 
> Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default")
> Cc: stable@vger.kernel.org
> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
> ---
>  kernel/sched/core.c | 26 +++++++++++++++++++++-----
>  1 file changed, 21 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 7f2cae4..385c947 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2253,9 +2253,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
>  #endif
>  #endif
>  
> +#ifdef CONFIG_SCHEDSTATS
> +
>  DEFINE_STATIC_KEY_FALSE(sched_schedstats);
> +static bool __initdata __sched_schedstats = false;
>  

Initialisation is unnecessary. Otherwise;

Acked-by: Mel Gorman <mgorman@techsingularity.net>

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] sched/debug: fix deadlock when enabling sched events
  2016-06-07 19:43 ` [PATCH 2/2] sched/debug: fix deadlock when enabling sched events Josh Poimboeuf
                     ` (2 preceding siblings ...)
  2016-06-08  7:56   ` Ingo Molnar
@ 2016-06-08  8:11   ` Mel Gorman
  3 siblings, 0 replies; 15+ messages in thread
From: Mel Gorman @ 2016-06-08  8:11 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, Matt Fleming,
	Srikar Dronamraju

On Tue, Jun 07, 2016 at 02:43:17PM -0500, Josh Poimboeuf wrote:
> When enabling sched trace events via:
> 
>   echo 1 > /sys/kernel/debug/tracing/events/sched/enable
> 
> I see a hang, with the following BUG in the printk buffer:
> 

I was certain I had tested this case because I was monitoring to make
sure the stats were not updating until the tuning knob was used. It was
because the context was unsafe that any printing happened because at the
time trying to enable stats in that context blew up.

> In addition to the deadlock, I think this code has other issues:
> 
> 1. Instead of just warning and allowing the tracepoints to be broken,
>    I'd argue that it would be better to make them work by forcing
>    schedstats enabled and printing a warning about that, which is what's
>    already being done in other similar cases (latencytop and profiling).
>    Otherwise things like "perf sched record" won't have the intended
>    effect.  In fact the comment in the above code snippet seems to agree
>    with me, so maybe that was the original intent.
> 
> 2. It's called in the scheduler hot path from enqueue_entity().
> 
> So change the warning to a call to force_schedstat_enabled(), and do it
> only when the tracepoint gets enabled.
> 
> Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default")
> Cc: stable@vger.kernel.org
> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>

I like it! I wasn't aware there was an option to have a registration
callback.

Acked-by: Mel Gorman <mgorman@techsingularity.net>

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] sched/debug: fix deadlock when enabling sched events
  2016-06-07 19:54   ` Peter Zijlstra
  2016-06-07 21:02     ` Josh Poimboeuf
@ 2016-06-08  8:13     ` Mel Gorman
  1 sibling, 0 replies; 15+ messages in thread
From: Mel Gorman @ 2016-06-08  8:13 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Josh Poimboeuf, Ingo Molnar, linux-kernel, Matt Fleming,
	Srikar Dronamraju

On Tue, Jun 07, 2016 at 09:54:48PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 07, 2016 at 02:43:17PM -0500, Josh Poimboeuf wrote:
> > 1. Instead of just warning and allowing the tracepoints to be broken,
> >    I'd argue that it would be better to make them work by forcing
> >    schedstats enabled and printing a warning about that, which is what's
> 
> Forcing them enabled doesn't make them useful per se; some of these
> tracepoint rely on previously recoded state, which now wasn't previously
> recorded because back then it was disabled.
> 

This is unfortunately true. It means there is a window when the data is
unreliable after the stats are enabled. It is assumed that either someone
enabling the stats knows that or is willing to enable them on the command
line.

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] sched/debug: fix deadlock when enabling sched events
  2016-06-08  7:56   ` Ingo Molnar
@ 2016-06-08 13:12     ` Josh Poimboeuf
  2016-06-08 13:15       ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Josh Poimboeuf @ 2016-06-08 13:12 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, Mel Gorman,
	Matt Fleming, Srikar Dronamraju

On Wed, Jun 08, 2016 at 09:56:12AM +0200, Ingo Molnar wrote:
> 
> * Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> 
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -789,6 +789,13 @@ static void update_curr_fair(struct rq *rq)
> >  	update_curr(cfs_rq_of(&rq->curr->se));
> >  }
> >  
> > +void trace_sched_stat_register(void)
> > +{
> > +#ifdef CONFIG_SCHEDSTATS
> > +	force_schedstat_enabled();
> > +#endif
> > +}
> 
> I think it would be cleaner to provide an empty force_schedstat_enabled() 
> definition in sched.h, on !CONFIG_SCHEDSTATS.

Yes, agreed.

> But it might make sense to further decouple schedstats from tracing?

Looking at how we could do that:

- The sched_stat_wait tracepoint is dependent on the wait_start schedstat.

- The sched_stat_sleep tracepoint is dependent on the sleep_start
  schedstat.

- The sched_stat_iowait and sched_stat_blocked tracepoints are dependent
  on the block_start schedstat.

We could move those three schedstats values out of sched_statistics and
into sched_entity, and then always update them regardless of
CONFIG_SCHEDSTATS.

That would ensure these tracepoints always work.  But then again it
would add a little bit of runtime overhead.

I don't have a strong opinion either way.  Thoughts?

-- 
Josh

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 2/2] sched/debug: fix deadlock when enabling sched events
  2016-06-08 13:12     ` Josh Poimboeuf
@ 2016-06-08 13:15       ` Ingo Molnar
  2016-06-13  7:32         ` [PATCH v2] " Josh Poimboeuf
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2016-06-08 13:15 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, Mel Gorman,
	Matt Fleming, Srikar Dronamraju


* Josh Poimboeuf <jpoimboe@redhat.com> wrote:

> On Wed, Jun 08, 2016 at 09:56:12AM +0200, Ingo Molnar wrote:
> > 
> > * Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> > 
> > > --- a/kernel/sched/fair.c
> > > +++ b/kernel/sched/fair.c
> > > @@ -789,6 +789,13 @@ static void update_curr_fair(struct rq *rq)
> > >  	update_curr(cfs_rq_of(&rq->curr->se));
> > >  }
> > >  
> > > +void trace_sched_stat_register(void)
> > > +{
> > > +#ifdef CONFIG_SCHEDSTATS
> > > +	force_schedstat_enabled();
> > > +#endif
> > > +}
> > 
> > I think it would be cleaner to provide an empty force_schedstat_enabled() 
> > definition in sched.h, on !CONFIG_SCHEDSTATS.
> 
> Yes, agreed.
> 
> > But it might make sense to further decouple schedstats from tracing?
> 
> Looking at how we could do that:
> 
> - The sched_stat_wait tracepoint is dependent on the wait_start schedstat.

Can we maintain wait_start unconditionally? Having config dependent tracepoints 
sucks.

> - The sched_stat_sleep tracepoint is dependent on the sleep_start
>   schedstat.

Ditto.

> - The sched_stat_iowait and sched_stat_blocked tracepoints are dependent
>   on the block_start schedstat.

Ditto.

> We could move those three schedstats values out of sched_statistics and
> into sched_entity, and then always update them regardless of
> CONFIG_SCHEDSTATS.
> 
> That would ensure these tracepoints always work.  But then again it
> would add a little bit of runtime overhead.
> 
> I don't have a strong opinion either way.  Thoughts?

Please do a before/after 'size vmlinux' dump of a defconfig build with tracing 
disabled. I.e. how much code overhead does the updating of those variables add?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [tip:sched/core] sched/debug: Fix 'schedstats=enable' cmdline option
  2016-06-07 19:43 ` [PATCH 1/2] sched/debug: fix 'schedstats=enable' cmdline option Josh Poimboeuf
  2016-06-08  8:06   ` Mel Gorman
@ 2016-06-08 14:21   ` tip-bot for Josh Poimboeuf
  1 sibling, 0 replies; 15+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-06-08 14:21 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: matt, mingo, torvalds, jpoimboe, mgorman, srikar, linux-kernel,
	hpa, peterz, tglx

Commit-ID:  4698f88c06b893f2acc0b443004a53bf490fde7c
Gitweb:     http://git.kernel.org/tip/4698f88c06b893f2acc0b443004a53bf490fde7c
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Tue, 7 Jun 2016 14:43:16 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Wed, 8 Jun 2016 14:33:05 +0200

sched/debug: Fix 'schedstats=enable' cmdline option

The 'schedstats=enable' option doesn't work, and also produces the
following warning during boot:

  WARNING: CPU: 0 PID: 0 at /home/jpoimboe/git/linux/kernel/jump_label.c:61 static_key_slow_inc+0x8c/0xa0
  static_key_slow_inc used before call to jump_label_init
  Modules linked in:
  CPU: 0 PID: 0 Comm: swapper Not tainted 4.7.0-rc1+ #25
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
   0000000000000086 3ae3475a4bea95d4 ffffffff81e03da8 ffffffff8143fc83
   ffffffff81e03df8 0000000000000000 ffffffff81e03de8 ffffffff810b1ffb
   0000003d00000096 ffffffff823514d0 ffff88007ff197c8 0000000000000000
  Call Trace:
   [<ffffffff8143fc83>] dump_stack+0x85/0xc2
   [<ffffffff810b1ffb>] __warn+0xcb/0xf0
   [<ffffffff810b207f>] warn_slowpath_fmt+0x5f/0x80
   [<ffffffff811e9c0c>] static_key_slow_inc+0x8c/0xa0
   [<ffffffff810e07c6>] static_key_enable+0x16/0x40
   [<ffffffff8216d633>] setup_schedstats+0x29/0x94
   [<ffffffff82148a05>] unknown_bootoption+0x89/0x191
   [<ffffffff810d8617>] parse_args+0x297/0x4b0
   [<ffffffff82148d61>] start_kernel+0x1d8/0x4a9
   [<ffffffff8214897c>] ? set_init_arg+0x55/0x55
   [<ffffffff82148120>] ? early_idt_handler_array+0x120/0x120
   [<ffffffff821482db>] x86_64_start_reservations+0x2f/0x31
   [<ffffffff82148427>] x86_64_start_kernel+0x14a/0x16d

The problem is that it tries to update the 'sched_schedstats' static key
before jump labels have been initialized.

Changing jump_label_init() to be called earlier before
parse_early_param() wouldn't fix it: it would still fail trying to
poke_text() because mm isn't yet initialized.

Instead, just create a temporary '__sched_schedstats' variable which can
be copied to the static key later during sched_init() after jump labels
have been initialized.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default")
Link: http://lkml.kernel.org/r/453775fe3433bed65731a583e228ccea806d18cd.1465322027.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f2cae4..385c947 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2253,9 +2253,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
 #endif
 #endif
 
+#ifdef CONFIG_SCHEDSTATS
+
 DEFINE_STATIC_KEY_FALSE(sched_schedstats);
+static bool __initdata __sched_schedstats = false;
 
-#ifdef CONFIG_SCHEDSTATS
 static void set_schedstats(bool enabled)
 {
 	if (enabled)
@@ -2278,11 +2280,16 @@ static int __init setup_schedstats(char *str)
 	if (!str)
 		goto out;
 
+	/*
+	 * This code is called before jump labels have been set up, so we can't
+	 * change the static branch directly just yet.  Instead set a temporary
+	 * variable so init_schedstats() can do it later.
+	 */
 	if (!strcmp(str, "enable")) {
-		set_schedstats(true);
+		__sched_schedstats = true;
 		ret = 1;
 	} else if (!strcmp(str, "disable")) {
-		set_schedstats(false);
+		__sched_schedstats = false;
 		ret = 1;
 	}
 out:
@@ -2293,6 +2300,11 @@ out:
 }
 __setup("schedstats=", setup_schedstats);
 
+static void __init init_schedstats(void)
+{
+	set_schedstats(__sched_schedstats);
+}
+
 #ifdef CONFIG_PROC_SYSCTL
 int sysctl_schedstats(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2313,8 +2325,10 @@ int sysctl_schedstats(struct ctl_table *table, int write,
 		set_schedstats(state);
 	return err;
 }
-#endif
-#endif
+#endif /* CONFIG_PROC_SYSCTL */
+#else  /* !CONFIG_SCHEDSTATS */
+static inline void init_schedstats(void) {}
+#endif /* CONFIG_SCHEDSTATS */
 
 /*
  * fork()/clone()-time setup:
@@ -7487,6 +7501,8 @@ void __init sched_init(void)
 #endif
 	init_sched_fair_class();
 
+	init_schedstats();
+
 	scheduler_running = 1;
 }
 

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2] sched/debug: fix deadlock when enabling sched events
  2016-06-08 13:15       ` Ingo Molnar
@ 2016-06-13  7:32         ` Josh Poimboeuf
  2016-06-14 11:35           ` [tip:sched/urgent] sched/debug: Fix " tip-bot for Josh Poimboeuf
  0 siblings, 1 reply; 15+ messages in thread
From: Josh Poimboeuf @ 2016-06-13  7:32 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Ingo Molnar, Peter Zijlstra, linux-kernel, Mel Gorman,
	Matt Fleming, Srikar Dronamraju

On Wed, Jun 08, 2016 at 03:15:59PM +0200, Ingo Molnar wrote:
> 
> * Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> 
> > On Wed, Jun 08, 2016 at 09:56:12AM +0200, Ingo Molnar wrote:
> > > 
> > > * Josh Poimboeuf <jpoimboe@redhat.com> wrote:
> > > 
> > > > --- a/kernel/sched/fair.c
> > > > +++ b/kernel/sched/fair.c
> > > > @@ -789,6 +789,13 @@ static void update_curr_fair(struct rq *rq)
> > > >  	update_curr(cfs_rq_of(&rq->curr->se));
> > > >  }
> > > >  
> > > > +void trace_sched_stat_register(void)
> > > > +{
> > > > +#ifdef CONFIG_SCHEDSTATS
> > > > +	force_schedstat_enabled();
> > > > +#endif
> > > > +}
> > > 
> > > I think it would be cleaner to provide an empty force_schedstat_enabled() 
> > > definition in sched.h, on !CONFIG_SCHEDSTATS.
> > 
> > Yes, agreed.
> > 
> > > But it might make sense to further decouple schedstats from tracing?
> > 
> > Looking at how we could do that:
> > 
> > - The sched_stat_wait tracepoint is dependent on the wait_start schedstat.
> 
> Can we maintain wait_start unconditionally? Having config dependent tracepoints 
> sucks.
> 
> > - The sched_stat_sleep tracepoint is dependent on the sleep_start
> >   schedstat.
> 
> Ditto.
> 
> > - The sched_stat_iowait and sched_stat_blocked tracepoints are dependent
> >   on the block_start schedstat.
> 
> Ditto.
> 
> > We could move those three schedstats values out of sched_statistics and
> > into sched_entity, and then always update them regardless of
> > CONFIG_SCHEDSTATS.
> > 
> > That would ensure these tracepoints always work.  But then again it
> > would add a little bit of runtime overhead.
> > 
> > I don't have a strong opinion either way.  Thoughts?
> 
> Please do a before/after 'size vmlinux' dump of a defconfig build with tracing 
> disabled. I.e. how much code overhead does the updating of those variables add?

When trying to decouple the sched_stat_* tracepoints from schedstats, I
see a significant (~12%) performance degradation in pipetest for the
CONFIG_SCHEDSTATS case.  Still looking into it...

In the meantime, here's a straightforward fix for the deadlock.

---

From: Josh Poimboeuf <jpoimboe@redhat.com>
Subject: [PATCH v2] sched/debug: fix deadlock when enabling sched events

I see a hang when enabling sched events:

  echo 1 > /sys/kernel/debug/tracing/events/sched/enable

The printk buffer shows:

  BUG: spinlock recursion on CPU#1, swapper/1/0
   lock: 0xffff88007d5d8c00, .magic: dead4ead, .owner: swapper/1/0, .owner_cpu: 1
  CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.7.0-rc2+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
   0000000000000086 3a8e170cf69ad23f ffff88007d403898 ffffffff8143d663
   ffff88007c535480 ffff88007d5d8c00 ffff88007d4038b8 ffffffff81115948
   ffff88007d5d8c00 ffff88007d5d8c00 ffff88007d4038e8 ffffffff81115aea
  Call Trace:
   <IRQ>  [<ffffffff8143d663>] dump_stack+0x85/0xc2
   [<ffffffff81115948>] spin_dump+0x78/0xc0
   [<ffffffff81115aea>] do_raw_spin_lock+0x11a/0x150
   [<ffffffff81891471>] _raw_spin_lock+0x61/0x80
   [<ffffffff810e5466>] ? try_to_wake_up+0x256/0x4e0
   [<ffffffff810e5466>] try_to_wake_up+0x256/0x4e0
   [<ffffffff81891a0a>] ? _raw_spin_unlock_irqrestore+0x4a/0x80
   [<ffffffff810e5705>] wake_up_process+0x15/0x20
   [<ffffffff810cebb4>] insert_work+0x84/0xc0
   [<ffffffff810ced7f>] __queue_work+0x18f/0x660
   [<ffffffff810cf9a6>] queue_work_on+0x46/0x90
   [<ffffffffa00cd95b>] drm_fb_helper_dirty.isra.11+0xcb/0xe0 [drm_kms_helper]
   [<ffffffffa00cdac0>] drm_fb_helper_sys_imageblit+0x30/0x40 [drm_kms_helper]
   [<ffffffff814babcd>] soft_cursor+0x1ad/0x230
   [<ffffffff814ba379>] bit_cursor+0x649/0x680
   [<ffffffff814b9d30>] ? update_attr.isra.2+0x90/0x90
   [<ffffffff814b5e6a>] fbcon_cursor+0x14a/0x1c0
   [<ffffffff81555ef8>] hide_cursor+0x28/0x90
   [<ffffffff81558b6f>] vt_console_print+0x3bf/0x3f0
   [<ffffffff81122c63>] call_console_drivers.constprop.24+0x183/0x200
   [<ffffffff811241f4>] console_unlock+0x3d4/0x610
   [<ffffffff811247f5>] vprintk_emit+0x3c5/0x610
   [<ffffffff81124bc9>] vprintk_default+0x29/0x40
   [<ffffffff811e965b>] printk+0x57/0x73
   [<ffffffff810f7a9e>] enqueue_entity+0xc2e/0xc70
   [<ffffffff810f7b39>] enqueue_task_fair+0x59/0xab0
   [<ffffffff8106dcd9>] ? kvm_sched_clock_read+0x9/0x20
   [<ffffffff8103fb39>] ? sched_clock+0x9/0x10
   [<ffffffff810e3fcc>] activate_task+0x5c/0xa0
   [<ffffffff810e4514>] ttwu_do_activate+0x54/0xb0
   [<ffffffff810e5cea>] sched_ttwu_pending+0x7a/0xb0
   [<ffffffff810e5e51>] scheduler_ipi+0x61/0x170
   [<ffffffff81059e7f>] smp_trace_reschedule_interrupt+0x4f/0x2a0
   [<ffffffff81893ba6>] trace_reschedule_interrupt+0x96/0xa0
   <EOI>  [<ffffffff8106e0d6>] ? native_safe_halt+0x6/0x10
   [<ffffffff8110fb1d>] ? trace_hardirqs_on+0xd/0x10
   [<ffffffff81040ac0>] default_idle+0x20/0x1a0
   [<ffffffff8104147f>] arch_cpu_idle+0xf/0x20
   [<ffffffff81102f8f>] default_idle_call+0x2f/0x50
   [<ffffffff8110332e>] cpu_startup_entry+0x37e/0x450
   [<ffffffff8105af70>] start_secondary+0x160/0x1a0

Note the hang only occurs when echoing the above from a physical serial
console, not from an ssh session.

The bug is caused by a deadlock where the task is trying to grab the rq
lock twice because printk()'s aren't safe in sched code.

Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default")
Cc: stable@vger.kernel.org
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c6dd8ba..52d9b37 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3220,7 +3220,7 @@ static inline void check_schedstat_required(void)
 			trace_sched_stat_iowait_enabled()  ||
 			trace_sched_stat_blocked_enabled() ||
 			trace_sched_stat_runtime_enabled())  {
-		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
+		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
 			     "stat_blocked and stat_runtime require the "
 			     "kernel parameter schedstats=enabled or "
 			     "kernel.sched_schedstats=1\n");
-- 
2.4.11

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [tip:sched/urgent] sched/debug: Fix deadlock when enabling sched events
  2016-06-13  7:32         ` [PATCH v2] " Josh Poimboeuf
@ 2016-06-14 11:35           ` tip-bot for Josh Poimboeuf
  0 siblings, 0 replies; 15+ messages in thread
From: tip-bot for Josh Poimboeuf @ 2016-06-14 11:35 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: peterz, matt, jpoimboe, mingo, mgorman, linux-kernel, hpa,
	efault, torvalds, srikar, tglx

Commit-ID:  eda8dca519269c92a0771668b3d5678792de7b78
Gitweb:     http://git.kernel.org/tip/eda8dca519269c92a0771668b3d5678792de7b78
Author:     Josh Poimboeuf <jpoimboe@redhat.com>
AuthorDate: Mon, 13 Jun 2016 02:32:09 -0500
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Tue, 14 Jun 2016 12:47:21 +0200

sched/debug: Fix deadlock when enabling sched events

I see a hang when enabling sched events:

  echo 1 > /sys/kernel/debug/tracing/events/sched/enable

The printk buffer shows:

  BUG: spinlock recursion on CPU#1, swapper/1/0
   lock: 0xffff88007d5d8c00, .magic: dead4ead, .owner: swapper/1/0, .owner_cpu: 1
  CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.7.0-rc2+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
  ...
  Call Trace:
   <IRQ>  [<ffffffff8143d663>] dump_stack+0x85/0xc2
   [<ffffffff81115948>] spin_dump+0x78/0xc0
   [<ffffffff81115aea>] do_raw_spin_lock+0x11a/0x150
   [<ffffffff81891471>] _raw_spin_lock+0x61/0x80
   [<ffffffff810e5466>] ? try_to_wake_up+0x256/0x4e0
   [<ffffffff810e5466>] try_to_wake_up+0x256/0x4e0
   [<ffffffff81891a0a>] ? _raw_spin_unlock_irqrestore+0x4a/0x80
   [<ffffffff810e5705>] wake_up_process+0x15/0x20
   [<ffffffff810cebb4>] insert_work+0x84/0xc0
   [<ffffffff810ced7f>] __queue_work+0x18f/0x660
   [<ffffffff810cf9a6>] queue_work_on+0x46/0x90
   [<ffffffffa00cd95b>] drm_fb_helper_dirty.isra.11+0xcb/0xe0 [drm_kms_helper]
   [<ffffffffa00cdac0>] drm_fb_helper_sys_imageblit+0x30/0x40 [drm_kms_helper]
   [<ffffffff814babcd>] soft_cursor+0x1ad/0x230
   [<ffffffff814ba379>] bit_cursor+0x649/0x680
   [<ffffffff814b9d30>] ? update_attr.isra.2+0x90/0x90
   [<ffffffff814b5e6a>] fbcon_cursor+0x14a/0x1c0
   [<ffffffff81555ef8>] hide_cursor+0x28/0x90
   [<ffffffff81558b6f>] vt_console_print+0x3bf/0x3f0
   [<ffffffff81122c63>] call_console_drivers.constprop.24+0x183/0x200
   [<ffffffff811241f4>] console_unlock+0x3d4/0x610
   [<ffffffff811247f5>] vprintk_emit+0x3c5/0x610
   [<ffffffff81124bc9>] vprintk_default+0x29/0x40
   [<ffffffff811e965b>] printk+0x57/0x73
   [<ffffffff810f7a9e>] enqueue_entity+0xc2e/0xc70
   [<ffffffff810f7b39>] enqueue_task_fair+0x59/0xab0
   [<ffffffff8106dcd9>] ? kvm_sched_clock_read+0x9/0x20
   [<ffffffff8103fb39>] ? sched_clock+0x9/0x10
   [<ffffffff810e3fcc>] activate_task+0x5c/0xa0
   [<ffffffff810e4514>] ttwu_do_activate+0x54/0xb0
   [<ffffffff810e5cea>] sched_ttwu_pending+0x7a/0xb0
   [<ffffffff810e5e51>] scheduler_ipi+0x61/0x170
   [<ffffffff81059e7f>] smp_trace_reschedule_interrupt+0x4f/0x2a0
   [<ffffffff81893ba6>] trace_reschedule_interrupt+0x96/0xa0
   <EOI>  [<ffffffff8106e0d6>] ? native_safe_halt+0x6/0x10
   [<ffffffff8110fb1d>] ? trace_hardirqs_on+0xd/0x10
   [<ffffffff81040ac0>] default_idle+0x20/0x1a0
   [<ffffffff8104147f>] arch_cpu_idle+0xf/0x20
   [<ffffffff81102f8f>] default_idle_call+0x2f/0x50
   [<ffffffff8110332e>] cpu_startup_entry+0x37e/0x450
   [<ffffffff8105af70>] start_secondary+0x160/0x1a0

Note the hang only occurs when echoing the above from a physical serial
console, not from an ssh session.

The bug is caused by a deadlock where the task is trying to grab the rq
lock twice because printk()'s aren't safe in sched code.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default")
Link: http://lkml.kernel.org/r/20160613073209.gdvdybiruljbkn3p@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4e33ad1..a2348de 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3246,7 +3246,7 @@ static inline void check_schedstat_required(void)
 			trace_sched_stat_iowait_enabled()  ||
 			trace_sched_stat_blocked_enabled() ||
 			trace_sched_stat_runtime_enabled())  {
-		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
+		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
 			     "stat_blocked and stat_runtime require the "
 			     "kernel parameter schedstats=enabled or "
 			     "kernel.sched_schedstats=1\n");

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2016-06-14 11:35 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-07 19:43 [PATCH 0/2] sched/debug: more schedstats fixes Josh Poimboeuf
2016-06-07 19:43 ` [PATCH 1/2] sched/debug: fix 'schedstats=enable' cmdline option Josh Poimboeuf
2016-06-08  8:06   ` Mel Gorman
2016-06-08 14:21   ` [tip:sched/core] sched/debug: Fix " tip-bot for Josh Poimboeuf
2016-06-07 19:43 ` [PATCH 2/2] sched/debug: fix deadlock when enabling sched events Josh Poimboeuf
2016-06-07 19:54   ` Peter Zijlstra
2016-06-07 21:02     ` Josh Poimboeuf
2016-06-08  8:13     ` Mel Gorman
2016-06-07 21:06   ` Josh Poimboeuf
2016-06-08  7:56   ` Ingo Molnar
2016-06-08 13:12     ` Josh Poimboeuf
2016-06-08 13:15       ` Ingo Molnar
2016-06-13  7:32         ` [PATCH v2] " Josh Poimboeuf
2016-06-14 11:35           ` [tip:sched/urgent] sched/debug: Fix " tip-bot for Josh Poimboeuf
2016-06-08  8:11   ` [PATCH 2/2] sched/debug: fix " Mel Gorman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.