linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] rcu: Eliminate softirq processing from rcutree
@ 2013-12-21 19:39 Sebastian Andrzej Siewior
  2013-12-22  3:07 ` Mike Galbraith
  0 siblings, 1 reply; 21+ messages in thread
From: Sebastian Andrzej Siewior @ 2013-12-21 19:39 UTC (permalink / raw)
  To: linux-rt-users
  Cc: Steven Rostedt, linux-kernel, Thomas Gleixner, Paul E. McKenney

From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>

Running RCU out of softirq is a problem for some workloads that would
like to manage RCU core processing independently of other softirq work,
for example, setting kthread priority.  This commit therefore moves the
RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER kthread
named rcuc.  The SCHED_OTHER approach avoids the scalability problems
that appeared with the earlier attempt to move RCU core processing to
from softirq to kthreads.  That said, kernels built with RCU_BOOST=y
will run the rcuc kthreads at the RCU-boosting priority.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---

I intend to apply this for the next -RT relase. My powerpc test box runs
with this for more than 24h without anything bad happending.

 kernel/rcutree.c        | 113 +++++++++++++++++++++++++++++++++++-----
 kernel/rcutree.h        |   3 +-
 kernel/rcutree_plugin.h | 134 +++++-------------------------------------------
 3 files changed, 113 insertions(+), 137 deletions(-)

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f4f61bb..507fab1 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -55,6 +55,11 @@
 #include <linux/random.h>
 #include <linux/ftrace_event.h>
 #include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/oom.h>
+#include <linux/smpboot.h>
+#include "time/tick-internal.h"
 
 #include "rcutree.h"
 #include <trace/events/rcu.h>
@@ -145,8 +150,6 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);
  */
 static int rcu_scheduler_fully_active __read_mostly;
 
-#ifdef CONFIG_RCU_BOOST
-
 /*
  * Control variables for per-CPU and per-rcu_node kthreads.  These
  * handle all flavors of RCU.
@@ -156,8 +159,6 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
 DEFINE_PER_CPU(char, rcu_cpu_has_work);
 
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
 static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
 static void invoke_rcu_core(void);
 static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
@@ -2226,16 +2227,14 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 /*
  * Do RCU core processing for the current CPU.
  */
-static void rcu_process_callbacks(struct softirq_action *unused)
+static void rcu_process_callbacks(void)
 {
 	struct rcu_state *rsp;
 
 	if (cpu_is_offline(smp_processor_id()))
 		return;
-	trace_rcu_utilization(TPS("Start RCU core"));
 	for_each_rcu_flavor(rsp)
 		__rcu_process_callbacks(rsp);
-	trace_rcu_utilization(TPS("End RCU core"));
 }
 
 /*
@@ -2249,18 +2248,105 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
 {
 	if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
 		return;
-	if (likely(!rsp->boost)) {
-		rcu_do_batch(rsp, rdp);
+	rcu_do_batch(rsp, rdp);
+}
+
+static void rcu_wake_cond(struct task_struct *t, int status)
+{
+	/*
+	 * If the thread is yielding, only wake it when this
+	 * is invoked from idle
+	 */
+	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
+		wake_up_process(t);
+}
+
+/*
+ * Wake up this CPU's rcuc kthread to do RCU core processing.
+ */
+static void invoke_rcu_core(void)
+{
+	unsigned long flags;
+	struct task_struct *t;
+
+	if (!cpu_online(smp_processor_id()))
 		return;
+	local_irq_save(flags);
+	__this_cpu_write(rcu_cpu_has_work, 1);
+	t = __this_cpu_read(rcu_cpu_kthread_task);
+	if (t != NULL && current != t)
+		rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
+	local_irq_restore(flags);
+}
+
+static void rcu_cpu_kthread_park(unsigned int cpu)
+{
+	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+}
+
+static int rcu_cpu_kthread_should_run(unsigned int cpu)
+{
+	return __this_cpu_read(rcu_cpu_has_work);
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
+ * RCU softirq used in flavors and configurations of RCU that do not
+ * support RCU priority boosting.
+ */
+static void rcu_cpu_kthread(unsigned int cpu)
+{
+	unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
+	char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
+	int spincnt;
+
+	for (spincnt = 0; spincnt < 10; spincnt++) {
+		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
+		local_bh_disable();
+		*statusp = RCU_KTHREAD_RUNNING;
+		this_cpu_inc(rcu_cpu_kthread_loops);
+		local_irq_disable();
+		work = *workp;
+		*workp = 0;
+		local_irq_enable();
+		if (work)
+			rcu_process_callbacks();
+		local_bh_enable();
+		if (*workp == 0) {
+			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
+			*statusp = RCU_KTHREAD_WAITING;
+			return;
+		}
 	}
-	invoke_rcu_callbacks_kthread();
+	*statusp = RCU_KTHREAD_YIELDING;
+	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
+	schedule_timeout_interruptible(2);
+	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
+	*statusp = RCU_KTHREAD_WAITING;
 }
 
-static void invoke_rcu_core(void)
+static struct smp_hotplug_thread rcu_cpu_thread_spec = {
+	.store			= &rcu_cpu_kthread_task,
+	.thread_should_run	= rcu_cpu_kthread_should_run,
+	.thread_fn		= rcu_cpu_kthread,
+	.thread_comm		= "rcuc/%u",
+	.setup			= rcu_cpu_kthread_setup,
+	.park			= rcu_cpu_kthread_park,
+};
+
+/*
+ * Spawn per-CPU RCU core processing kthreads.
+ */
+static int __init rcu_spawn_core_kthreads(void)
 {
-	if (cpu_online(smp_processor_id()))
-		raise_softirq(RCU_SOFTIRQ);
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(rcu_cpu_has_work, cpu) = 0;
+	BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
+	return 0;
 }
+early_initcall(rcu_spawn_core_kthreads);
 
 /*
  * Handle any core-RCU processing required by a call_rcu() invocation.
@@ -3325,7 +3411,6 @@ void __init rcu_init(void)
 	rcu_init_one(&rcu_sched_state, &rcu_sched_data);
 	rcu_init_one(&rcu_bh_state, &rcu_bh_data);
 	__rcu_init_preempt();
-	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 
 	/*
 	 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index eb4fe67..da1439d 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -526,10 +526,9 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 static void __init __rcu_init_preempt(void);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
-static void invoke_rcu_callbacks_kthread(void);
 static bool rcu_is_callbacks_kthread(void);
+static void rcu_cpu_kthread_setup(unsigned int cpu);
 #ifdef CONFIG_RCU_BOOST
-static void rcu_preempt_do_callbacks(void);
 static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
 						 struct rcu_node *rnp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c1735a1..34d2a06 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -24,12 +24,6 @@
  *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/oom.h>
-#include <linux/smpboot.h>
-#include "time/tick-internal.h"
-
 #define RCU_KTHREAD_PRIO 1
 
 #ifdef CONFIG_RCU_BOOST
@@ -656,15 +650,6 @@ static void rcu_preempt_check_callbacks(int cpu)
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 }
 
-#ifdef CONFIG_RCU_BOOST
-
-static void rcu_preempt_do_callbacks(void)
-{
-	rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
-}
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
 /*
  * Queue a preemptible-RCU callback for invocation after a grace period.
  */
@@ -1126,6 +1111,19 @@ void exit_rcu(void)
 
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
+/*
+ * If boosting, set rcuc kthreads to realtime priority.
+ */
+static void rcu_cpu_kthread_setup(unsigned int cpu)
+{
+#ifdef CONFIG_RCU_BOOST
+	struct sched_param sp;
+
+	sp.sched_priority = RCU_KTHREAD_PRIO;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+}
+
 #ifdef CONFIG_RCU_BOOST
 
 #include "rtmutex_common.h"
@@ -1157,16 +1155,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
 
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
 
-static void rcu_wake_cond(struct task_struct *t, int status)
-{
-	/*
-	 * If the thread is yielding, only wake it when this
-	 * is invoked from idle
-	 */
-	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
-		wake_up_process(t);
-}
-
 /*
  * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  * or ->boost_tasks, advancing the pointer to the next task in the
@@ -1310,23 +1298,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 }
 
 /*
- * Wake up the per-CPU kthread to invoke RCU callbacks.
- */
-static void invoke_rcu_callbacks_kthread(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__this_cpu_write(rcu_cpu_has_work, 1);
-	if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
-	    current != __this_cpu_read(rcu_cpu_kthread_task)) {
-		rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
-			      __this_cpu_read(rcu_cpu_kthread_status));
-	}
-	local_irq_restore(flags);
-}
-
-/*
  * Is the current CPU running the RCU-callbacks kthread?
  * Caller must have preemption disabled.
  */
@@ -1380,67 +1351,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
 	return 0;
 }
 
-static void rcu_kthread_do_work(void)
-{
-	rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
-	rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
-	rcu_preempt_do_callbacks();
-}
-
-static void rcu_cpu_kthread_setup(unsigned int cpu)
-{
-	struct sched_param sp;
-
-	sp.sched_priority = RCU_KTHREAD_PRIO;
-	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
-}
-
-static void rcu_cpu_kthread_park(unsigned int cpu)
-{
-	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
-}
-
-static int rcu_cpu_kthread_should_run(unsigned int cpu)
-{
-	return __get_cpu_var(rcu_cpu_has_work);
-}
-
-/*
- * Per-CPU kernel thread that invokes RCU callbacks.  This replaces the
- * RCU softirq used in flavors and configurations of RCU that do not
- * support RCU priority boosting.
- */
-static void rcu_cpu_kthread(unsigned int cpu)
-{
-	unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
-	char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
-	int spincnt;
-
-	for (spincnt = 0; spincnt < 10; spincnt++) {
-		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
-		local_bh_disable();
-		*statusp = RCU_KTHREAD_RUNNING;
-		this_cpu_inc(rcu_cpu_kthread_loops);
-		local_irq_disable();
-		work = *workp;
-		*workp = 0;
-		local_irq_enable();
-		if (work)
-			rcu_kthread_do_work();
-		local_bh_enable();
-		if (*workp == 0) {
-			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
-			*statusp = RCU_KTHREAD_WAITING;
-			return;
-		}
-	}
-	*statusp = RCU_KTHREAD_YIELDING;
-	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
-	schedule_timeout_interruptible(2);
-	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
-	*statusp = RCU_KTHREAD_WAITING;
-}
-
 /*
  * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  * served by the rcu_node in question.  The CPU hotplug lock is still
@@ -1474,27 +1384,14 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 	free_cpumask_var(cm);
 }
 
-static struct smp_hotplug_thread rcu_cpu_thread_spec = {
-	.store			= &rcu_cpu_kthread_task,
-	.thread_should_run	= rcu_cpu_kthread_should_run,
-	.thread_fn		= rcu_cpu_kthread,
-	.thread_comm		= "rcuc/%u",
-	.setup			= rcu_cpu_kthread_setup,
-	.park			= rcu_cpu_kthread_park,
-};
-
 /*
  * Spawn all kthreads -- called as soon as the scheduler is running.
  */
 static int __init rcu_spawn_kthreads(void)
 {
 	struct rcu_node *rnp;
-	int cpu;
 
 	rcu_scheduler_fully_active = 1;
-	for_each_possible_cpu(cpu)
-		per_cpu(rcu_cpu_has_work, cpu) = 0;
-	BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
 	rnp = rcu_get_root(rcu_state);
 	(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);
 	if (NUM_RCU_NODES > 1) {
@@ -1522,11 +1419,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
 
-static void invoke_rcu_callbacks_kthread(void)
-{
-	WARN_ON_ONCE(1);
-}
-
 static bool rcu_is_callbacks_kthread(void)
 {
 	return false;
-- 
1.8.5.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-21 19:39 [PATCH] rcu: Eliminate softirq processing from rcutree Sebastian Andrzej Siewior
@ 2013-12-22  3:07 ` Mike Galbraith
  2013-12-22  8:57   ` Mike Galbraith
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Galbraith @ 2013-12-22  3:07 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner,
	Paul E. McKenney

On Sat, 2013-12-21 at 20:39 +0100, Sebastian Andrzej Siewior wrote: 
> From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> 
> Running RCU out of softirq is a problem for some workloads that would
> like to manage RCU core processing independently of other softirq work,
> for example, setting kthread priority.  This commit therefore moves the
> RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER kthread
> named rcuc.  The SCHED_OTHER approach avoids the scalability problems
> that appeared with the earlier attempt to move RCU core processing to
> from softirq to kthreads.  That said, kernels built with RCU_BOOST=y
> will run the rcuc kthreads at the RCU-boosting priority.

I'll take this for a spin on my 64 core test box.

I'm pretty sure I'll still end up having to split softirq threads again
though, as big box has been unable to meet jitter requirements without,
and last upstream rt kernel tested still couldn't.

-Mike

Hm.  Another thing I'll have to check again is btrfs locking fix, and
generic IO deadlocks if you don't pull your plug upon first rtmutex
block.  In 3.0, both were required for box to survive heavy fs pounding.
Oh yeah, and the pain of rt tasks playing idle balance for SCHED_OTHER
tasks, and nohz balancing crud, and cpupri cost when cores are isolated
and and.. sigh, big boxen _suck_ ;-)


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-22  3:07 ` Mike Galbraith
@ 2013-12-22  8:57   ` Mike Galbraith
  2013-12-23  4:38     ` Mike Galbraith
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Galbraith @ 2013-12-22  8:57 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner,
	Paul E. McKenney

On Sun, 2013-12-22 at 04:07 +0100, Mike Galbraith wrote: 
> On Sat, 2013-12-21 at 20:39 +0100, Sebastian Andrzej Siewior wrote: 
> > From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> > 
> > Running RCU out of softirq is a problem for some workloads that would
> > like to manage RCU core processing independently of other softirq work,
> > for example, setting kthread priority.  This commit therefore moves the
> > RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER kthread
> > named rcuc.  The SCHED_OTHER approach avoids the scalability problems
> > that appeared with the earlier attempt to move RCU core processing to
> > from softirq to kthreads.  That said, kernels built with RCU_BOOST=y
> > will run the rcuc kthreads at the RCU-boosting priority.
> 
> I'll take this for a spin on my 64 core test box.
> 
> I'm pretty sure I'll still end up having to split softirq threads again
> though, as big box has been unable to meet jitter requirements without,
> and last upstream rt kernel tested still couldn't.

Still can't fwiw, but whatever, back to $subject.  I'll let the box give
RCU something to do for a couple days.  No news is good news.

-Mike

30 minute isolated core jitter test says tinkering will definitely be
required.  3.0-rt does single digit worst case on same old box.  Darn.

(test is imperfect, but good enough)

FREQ=960 FRAMES=1728000 LOOP=50000 using CPUs 4 - 23
FREQ=1000 FRAMES=1800000 LOOP=48000 using CPUs 24 - 43
FREQ=300 FRAMES=540000 LOOP=160000 using CPUs 44 - 63
on your marks... get set... POW!
Cpu Frames    Min     Max(Frame)      Avg     Sigma     LastTrans Fliers(Frames) 
4   1727979   0.0159  181.66 (1043545)0.4492  0.5876    0 (0)     16 (828505,828506,859225,859226,889945,..1043546)
5   1727980   0.0159  181.90 (1013305)0.4560  0.6118    0 (0)     16 (798265,798266,828985,828986,859705,..1013306)
6   1727981   0.0159  189.05 (1013785)0.3691  0.6225    0 (0)     16 (798745,798746,829465,829466,860185,..1013786)
7   1727982   0.0159  177.88 (983546) 0.2885  0.5269    0 (0)     16 (768505,768506,799225,799226,829945,..983546)
8   1727984   0.0159  192.63 (984025) 0.3131  0.6307    0 (0)     18 (738265,738266,768985,768986,799705,..984026)
9   1727985   0.0159  16.43 (801406)  0.6562  0.5794    0 (0)     
10  1727986   0.0159  186.94 (954266) 0.3514  0.6252    0 (0)     16 (739225,739226,769945,769946,800665,..954266)
11  1727987   0.0159  194.06 (954745) 0.4341  0.6547    0 (0)     18 (708985,708986,739705,739706,770425,..954746)
12  1727989   0.0159  13.61 (67116)   0.3364  0.4294    0 (0)     
13  1727990   0.0159  186.19 (894265) 0.3955  0.6113    0 (0)     16 (679225,679226,709945,709946,740665,..894266)
14  1727991   0.0159  192.18 (894746) 0.4410  0.6449    0 (0)     18 (648985,648986,679705,679706,710425,..894746)
15  1727993   0.0159  183.36 (833786) 0.5582  0.6655    0 (0)     16 (618745,618746,649465,649466,680185,..833786)
16  1727994   0.0159  193.61 (895706) 0.6073  0.7382    0 (0)     17 (649945,680665,680666,711385,711386,..895706)
17  1727995   0.0159  36.94 (739943)  0.7135  0.7543    0 (0)     6 (173558,173559,739943,739944,1224751,1224752)
18  1727996   0.0159  167.39 (835226) 0.8385  0.8287    0 (0)     16 (620185,620186,650905,650906,681625,..835226)
19  1727997   0.0159  172.84 (804985) 0.5110  0.6959    0 (0)     17 (589946,620665,620666,651385,651386,..835706)
20  1727999   0.0159  180.47 (774745) 0.7566  0.7562    0 (0)     16 (559705,559706,590425,590426,621145,..774746)
21  1728000   0.0159  169.74 (744505) 0.7719  0.8154    0 (0)     16 (560185,560186,590905,590906,621625,..775226)
22  1728000   0.0159  194.80 (836667) 0.6799  0.7063    0 (0)     16 (590906,590907,622105,622106,652346,..836667)
23  1728000   0.0159  183.12 (745466) 0.6733  0.7091    0 (0)     16 (530425,530426,561145,561146,591865,..745466)
24  1800000   0.0725  7.46 (132730)   0.5375  0.4462    0 (0)     
25  1800000   0.0725  7.23 (132730)   0.5725  0.4816    0 (0)     
26  1800000   0.0725  7.23 (132730)   0.5119  0.4194    0 (0)     
27  1800000   0.0725  4.93 (132730)   0.4102  0.3379    0 (0)     
28  1800000   0.0725  5.08 (444312)   0.4275  0.3510    0 (0)     
29  1800000   0.0725  6.75 (132717)   0.5501  0.5232    0 (0)     
30  1800000   0.0725  11.61 (12026)   0.3811  0.3934    0 (0)     
31  1800000   0.0725  11.61 (12526)   0.4054  0.4551    0 (0)     
32  1800000   0.0725  50.95 (13026)   0.6015  0.5617    0 (0)     31 (13026,13027,45026,45027,77026,..909027)
33  1800000   0.0725  62.63 (13526)   0.5643  0.5922    0 (0)     112 (13526,13527,45526,45527,77526,..1773527)
34  1800000   0.0725  70.26 (14026)   0.3698  0.6132    0 (0)     112 (14026,14027,46026,46027,78026,..1774027)
35  1800000   0.0725  84.57 (14526)   0.6490  0.7981    0 (0)     112 (14526,14527,46526,46527,78526,..1774527)
36  1800000   0.0725  81.94 (943026)  0.3917  0.6387    0 (0)     112 (15026,15027,47026,47027,79026,..1775027)
37  1800000   0.0725  93.86 (15526)   0.6346  0.8580    0 (0)     112 (15526,15527,47526,47527,79526,..1775527)
38  1800000   0.0725  82.66 (144026)  0.4776  0.7459    0 (0)     112 (16026,16027,48026,48027,80026,..1776027)
39  1800000   0.0725  96.63 (16527)   0.4559  0.6881    0 (0)     112 (16526,16527,48526,48527,80526,..1776527)
40  1800000   0.0725  169.44 (17026)  0.4103  1.2801    0 (0)     112 (17026,17027,49026,49027,81026,..1777027)
41  1800000   0.0725  172.07 (145526) 0.6840  1.4300    0 (0)     112 (17526,17527,49526,49527,81526,..1777527)
42  1800000   0.0725  180.41 (18026)  0.5174  1.4290    0 (0)     112 (18026,18027,50026,50027,82026,..1778027)
43  1800000   0.0725  193.52 (466526) 1.4156  2.1665    0 (0)     112 (18526,18527,50526,50527,82526,..1778527)
44  540000    0.0032  11.92 (465332)  0.6862  0.7986    0 (0)     
45  540000    0.0032  14.30 (452401)  1.2460  0.9368    0 (0)     
46  540000    0.0032  13.35 (452402)  1.1379  0.9079    0 (0)     
47  540000    0.0032  12.64 (457991)  1.0116  0.8752    0 (0)     
48  540000    0.0032  10.49 (412210)  0.6312  0.7135    0 (0)     
49  540000    0.0032  10.01 (412210)  0.5195  0.6649    0 (0)     
50  540000    0.0032  9.53 (382891)   0.4586  0.6074    0 (0)     
51  540000    0.0032  10.02 (374093)  0.5366  0.6469    0 (0)     
52  540000    0.0032  13.35 (360263)  0.6725  0.7738    0 (0)     
53  540000    0.0032  10.73 (344411)  1.0863  0.9166    0 (0)     
54  540000    0.0032  12.16 (345014)  0.8779  0.7472    0 (0)     
55  540000    0.0032  11.45 (339953)  0.8549  0.7650    0 (0)     
56  540000    0.0032  11.45 (220829)  0.6262  0.7004    0 (0)     
57  540000    0.0032  9.77 (209623)   0.5978  0.6652    0 (0)     
58  540000    0.0032  9.78 (188624)   0.5481  0.6939    0 (0)     
59  540000    0.0032  5.96 (124615)   1.1515  0.8079    0 (0)     
60  540000    0.0032  13.11 (162649)  0.7490  0.7415    0 (0)     
61  540000    0.0032  11.92 (161653)  0.7996  0.8211    0 (0)     
62  540000    0.0032  13.12 (163325)  0.6313  0.8024    0 (0)     
63  540000    0.0032  9.30 (182608)   0.5861  0.6881    0 (0)



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-22  8:57   ` Mike Galbraith
@ 2013-12-23  4:38     ` Mike Galbraith
  2013-12-23  5:12       ` Mike Galbraith
  2013-12-24 19:36       ` Paul E. McKenney
  0 siblings, 2 replies; 21+ messages in thread
From: Mike Galbraith @ 2013-12-23  4:38 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner,
	Paul E. McKenney

[-- Attachment #1: Type: text/plain, Size: 528 bytes --]

On Sun, 2013-12-22 at 09:57 +0100, Mike Galbraith wrote: 
> I'll let the box give
> RCU something to do for a couple days.  No news is good news.

Ho ho hum, merry christmas, gift attached.

I'll beat on virgin -rt7, see if it survives, then re-apply RCU patch
and retest.  This kernel had nohz_full enabled, along with Sebastian's
pending -rt fix for same, so RCU patch was not only not running solo,
box was running a known somewhat buggy config as well.  Box was doing
endless tbench 64 when it started stalling fwiw.

-Mike

[-- Attachment #2: vogelweide-stall.gz --]
[-- Type: application/x-gzip, Size: 14532 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-23  4:38     ` Mike Galbraith
@ 2013-12-23  5:12       ` Mike Galbraith
  2014-01-24 19:46         ` Sebastian Andrzej Siewior
  2013-12-24 19:36       ` Paul E. McKenney
  1 sibling, 1 reply; 21+ messages in thread
From: Mike Galbraith @ 2013-12-23  5:12 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner,
	Paul E. McKenney

On Mon, 2013-12-23 at 05:38 +0100, Mike Galbraith wrote: 
> On Sun, 2013-12-22 at 09:57 +0100, Mike Galbraith wrote: 
> > I'll let the box give
> > RCU something to do for a couple days.  No news is good news.
> 
> Ho ho hum, merry christmas, gift attached.
> 
> I'll beat on virgin -rt7, see if it survives, then re-apply RCU patch
> and retest.  This kernel had nohz_full enabled, along with Sebastian's
> pending -rt fix for same, so RCU patch was not only not running solo,
> box was running a known somewhat buggy config as well.  Box was doing
> endless tbench 64 when it started stalling fwiw.
> 
> -Mike

P.S.

virgin -rt7 doing tbench 64 + make -j64

[   97.907960] perf samples too long (3138 > 2500), lowering kernel.perf_event_max_sample_rate to 50000
[  103.047921] perf samples too long (5544 > 5000), lowering kernel.perf_event_max_sample_rate to 25000
[  181.561271] perf samples too long (10318 > 10000), lowering kernel.perf_event_max_sample_rate to 13000
[  184.243750] INFO: NMI handler (perf_event_nmi_handler) took too long to run: 1.084 msecs
[  248.914422] perf samples too long (19719 > 19230), lowering kernel.perf_event_max_sample_rate to 7000
[  382.116674] NOHZ: local_softirq_pending 10
[  405.201593] perf samples too long (36824 > 35714), lowering kernel.perf_event_max_sample_rate to 4000
[  444.704185] NOHZ: local_softirq_pending 08
[  444.704208] NOHZ: local_softirq_pending 08
[  444.704579] NOHZ: local_softirq_pending 08
[  444.704678] NOHZ: local_softirq_pending 08
[  444.705100] NOHZ: local_softirq_pending 08
[  444.705980] NOHZ: local_softirq_pending 08
[  444.705994] NOHZ: local_softirq_pending 08
[  444.708315] NOHZ: local_softirq_pending 08
[  444.710348] NOHZ: local_softirq_pending 08
[  474.435582] INFO: NMI handler (perf_event_nmi_handler) took too long to run: 1.096 msecs
[  475.994055] perf samples too long (63124 > 62500), lowering kernel.perf_event_max_sample_rate to 2000

Those annoying perf gripes are generic, not -rt.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-23  4:38     ` Mike Galbraith
  2013-12-23  5:12       ` Mike Galbraith
@ 2013-12-24 19:36       ` Paul E. McKenney
  2013-12-25  3:07         ` Mike Galbraith
  1 sibling, 1 reply; 21+ messages in thread
From: Paul E. McKenney @ 2013-12-24 19:36 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Sebastian Andrzej Siewior, linux-rt-users, Steven Rostedt,
	linux-kernel, Thomas Gleixner

On Mon, Dec 23, 2013 at 05:38:53AM +0100, Mike Galbraith wrote:
> On Sun, 2013-12-22 at 09:57 +0100, Mike Galbraith wrote: 
> > I'll let the box give
> > RCU something to do for a couple days.  No news is good news.
> 
> Ho ho hum, merry christmas, gift attached.

Hmmm...  I guess I should take a moment to work out who has been
naughty and nice...

> I'll beat on virgin -rt7, see if it survives, then re-apply RCU patch
> and retest.  This kernel had nohz_full enabled, along with Sebastian's
> pending -rt fix for same, so RCU patch was not only not running solo,
> box was running a known somewhat buggy config as well.  Box was doing
> endless tbench 64 when it started stalling fwiw.

[72788.040872] NMI backtrace for cpu 31
[72788.040874] CPU: 31 PID: 13975 Comm: tbench Tainted: G        W    3.12.6-rt7-nohz #192
[72788.040874] Hardware name: Hewlett-Packard ProLiant DL980 G7, BIOS P66 07/07/2010
[72788.040875] task: ffff8802728e3db0 ti: ffff88026deb2000 task.ti: ffff88026deb2000
[72788.040877] RIP: 0010:[<ffffffff815e34e4>]  [<ffffffff815e34e4>] _raw_spin_trylock+0x14/0x80
[72788.040878] RSP: 0018:ffff8802769e3e58  EFLAGS: 00000002
[72788.040879] RAX: ffff88026deb3fd8 RBX: ffff880273544000 RCX: 000000007bc87bc6
[72788.040879] RDX: 0000000000000000 RSI: ffff8802728e3db0 RDI: ffff880273544000
[72788.040880] RBP: ffff88026deb39f8 R08: 0000063c14effd0f R09: 0000000000000119
[72788.040881] R10: 0000000000000005 R11: ffff8802769f2260 R12: ffff8802728e3db0
[72788.040881] R13: 000000000000001f R14: ffff8802769ebcc0 R15: ffffffff810c4730
[72788.040883] FS:  00007f7cd380a700(0000) GS:ffff8802769e0000(0000) knlGS:0000000000000000
[72788.040883] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[72788.040884] CR2: 000000000068a0e8 CR3: 0000000267ba4000 CR4: 00000000000007e0
[72788.040885] Stack:
[72788.040886]  ffff88026deb39f8 ffffffff815e2aa0 0000000000000000 ffffffff8106711a
[72788.040887]  ffff8802769ec4e0 ffff8802769ec4e0 ffff8802769e3f58 ffffffff810c44bd
[72788.040888]  ffff88026deb39f8 ffff88026deb39f8 000015ed4f5ff89b ffffffff810c476e
[72788.040889] Call Trace:
[72788.040889]  <IRQ> 
[72788.040891]  [<ffffffff815e2aa0>] ? rt_spin_lock_slowunlock_hirq+0x10/0x20
[72788.040893]  [<ffffffff8106711a>] ? update_process_times+0x3a/0x60
[72788.040895]  [<ffffffff810c44bd>] ? tick_sched_handle+0x2d/0x70
[72788.040896]  [<ffffffff810c476e>] ? tick_sched_timer+0x3e/0x70
[72788.040898]  [<ffffffff810839dd>] ? __run_hrtimer+0x13d/0x260
[72788.040900]  [<ffffffff81083c2c>] ? hrtimer_interrupt+0x12c/0x310
[72788.040901]  [<ffffffff8109593e>] ? vtime_account_system+0x4e/0xf0
[72788.040903]  [<ffffffff81035656>] ? smp_apic_timer_interrupt+0x36/0x50
[72788.040904]  [<ffffffff815ebc9d>] ? apic_timer_interrupt+0x6d/0x80
[72788.040905]  <EOI> 
[72788.040906]  [<ffffffff815e338a>] ? _raw_spin_lock+0x2a/0x40
[72788.040908]  [<ffffffff815e23b3>] ? rt_spin_lock_slowlock+0x33/0x2d0
[72788.040910]  [<ffffffff8108ee44>] ? migrate_enable+0xc4/0x220
[72788.040911]  [<ffffffff8152f888>] ? ip_finish_output+0x258/0x450
[72788.040913]  [<ffffffff81067011>] ? lock_timer_base+0x41/0x80
[72788.040914]  [<ffffffff81068db6>] ? mod_timer+0x66/0x290
[72788.040916]  [<ffffffff814df02f>] ? sk_reset_timer+0xf/0x20
[72788.040917]  [<ffffffff81547d7f>] ? tcp_write_xmit+0x1cf/0x5d0
[72788.040919]  [<ffffffff815481e5>] ? __tcp_push_pending_frames+0x25/0x60
[72788.040921]  [<ffffffff81539e34>] ? tcp_sendmsg+0x114/0xbb0
[72788.040923]  [<ffffffff814dbc1f>] ? sock_sendmsg+0xaf/0xf0
[72788.040925]  [<ffffffff811bf5e5>] ? touch_atime+0x65/0x150
[72788.040927]  [<ffffffff814dbd78>] ? SyS_sendto+0x118/0x190
[72788.040929]  [<ffffffff81095b66>] ? vtime_account_user+0x66/0x100
[72788.040930]  [<ffffffff8100f36a>] ? syscall_trace_enter+0x2a/0x260
[72788.040932]  [<ffffffff815eb249>] ? tracesys+0xdd/0xe2

The most likely suspect is the rt_spin_lock_slowlock() that is apparently
being acquired by migrate_enable().  This could be due to:

1.	Massive contention on that lock.

2.	Someone else holding that lock for excessive time periods.
	Evidence in favor: CPU 0 appears to be running within
	migrate_enable().  But isn't migrate_enable() really quite
	lightweight?

3.	Possible looping in the networking stack -- but this seems
	unlikely given that we appear to have caught a lock acquisition
	in the act.  (Not impossible, however, if there are lots of
	migrate_enable() calls in the networking stack, which there
	are due to all the per-CPU work.)

So which code do you think deserves the big lump of coal?  ;-)

							Thanx, Paul


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-24 19:36       ` Paul E. McKenney
@ 2013-12-25  3:07         ` Mike Galbraith
  2013-12-25  7:55           ` Paul E. McKenney
  2013-12-26 10:03           ` Mike Galbraith
  0 siblings, 2 replies; 21+ messages in thread
From: Mike Galbraith @ 2013-12-25  3:07 UTC (permalink / raw)
  To: paulmck
  Cc: Sebastian Andrzej Siewior, linux-rt-users, Steven Rostedt,
	linux-kernel, Thomas Gleixner

On Tue, 2013-12-24 at 11:36 -0800, Paul E. McKenney wrote: 
> On Mon, Dec 23, 2013 at 05:38:53AM +0100, Mike Galbraith wrote:
> > On Sun, 2013-12-22 at 09:57 +0100, Mike Galbraith wrote: 
> > > I'll let the box give
> > > RCU something to do for a couple days.  No news is good news.
> > 
> > Ho ho hum, merry christmas, gift attached.
> 
> Hmmm...  I guess I should take a moment to work out who has been
> naughty and nice...
> 
> > I'll beat on virgin -rt7, see if it survives, then re-apply RCU patch
> > and retest.  This kernel had nohz_full enabled, along with Sebastian's
> > pending -rt fix for same, so RCU patch was not only not running solo,
> > box was running a known somewhat buggy config as well.  Box was doing
> > endless tbench 64 when it started stalling fwiw.
> 
> [72788.040872] NMI backtrace for cpu 31
> [72788.040874] CPU: 31 PID: 13975 Comm: tbench Tainted: G        W    3.12.6-rt7-nohz #192
> [72788.040874] Hardware name: Hewlett-Packard ProLiant DL980 G7, BIOS P66 07/07/2010
> [72788.040875] task: ffff8802728e3db0 ti: ffff88026deb2000 task.ti: ffff88026deb2000
> [72788.040877] RIP: 0010:[<ffffffff815e34e4>]  [<ffffffff815e34e4>] _raw_spin_trylock+0x14/0x80
> [72788.040878] RSP: 0018:ffff8802769e3e58  EFLAGS: 00000002
> [72788.040879] RAX: ffff88026deb3fd8 RBX: ffff880273544000 RCX: 000000007bc87bc6
> [72788.040879] RDX: 0000000000000000 RSI: ffff8802728e3db0 RDI: ffff880273544000
> [72788.040880] RBP: ffff88026deb39f8 R08: 0000063c14effd0f R09: 0000000000000119
> [72788.040881] R10: 0000000000000005 R11: ffff8802769f2260 R12: ffff8802728e3db0
> [72788.040881] R13: 000000000000001f R14: ffff8802769ebcc0 R15: ffffffff810c4730
> [72788.040883] FS:  00007f7cd380a700(0000) GS:ffff8802769e0000(0000) knlGS:0000000000000000
> [72788.040883] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [72788.040884] CR2: 000000000068a0e8 CR3: 0000000267ba4000 CR4: 00000000000007e0
> [72788.040885] Stack:
> [72788.040886]  ffff88026deb39f8 ffffffff815e2aa0 0000000000000000 ffffffff8106711a
> [72788.040887]  ffff8802769ec4e0 ffff8802769ec4e0 ffff8802769e3f58 ffffffff810c44bd
> [72788.040888]  ffff88026deb39f8 ffff88026deb39f8 000015ed4f5ff89b ffffffff810c476e
> [72788.040889] Call Trace:
> [72788.040889]  <IRQ> 
> [72788.040891]  [<ffffffff815e2aa0>] ? rt_spin_lock_slowunlock_hirq+0x10/0x20
> [72788.040893]  [<ffffffff8106711a>] ? update_process_times+0x3a/0x60
> [72788.040895]  [<ffffffff810c44bd>] ? tick_sched_handle+0x2d/0x70
> [72788.040896]  [<ffffffff810c476e>] ? tick_sched_timer+0x3e/0x70
> [72788.040898]  [<ffffffff810839dd>] ? __run_hrtimer+0x13d/0x260
> [72788.040900]  [<ffffffff81083c2c>] ? hrtimer_interrupt+0x12c/0x310
> [72788.040901]  [<ffffffff8109593e>] ? vtime_account_system+0x4e/0xf0
> [72788.040903]  [<ffffffff81035656>] ? smp_apic_timer_interrupt+0x36/0x50
> [72788.040904]  [<ffffffff815ebc9d>] ? apic_timer_interrupt+0x6d/0x80
> [72788.040905]  <EOI> 
> [72788.040906]  [<ffffffff815e338a>] ? _raw_spin_lock+0x2a/0x40
> [72788.040908]  [<ffffffff815e23b3>] ? rt_spin_lock_slowlock+0x33/0x2d0
> [72788.040910]  [<ffffffff8108ee44>] ? migrate_enable+0xc4/0x220
> [72788.040911]  [<ffffffff8152f888>] ? ip_finish_output+0x258/0x450
> [72788.040913]  [<ffffffff81067011>] ? lock_timer_base+0x41/0x80
> [72788.040914]  [<ffffffff81068db6>] ? mod_timer+0x66/0x290
> [72788.040916]  [<ffffffff814df02f>] ? sk_reset_timer+0xf/0x20
> [72788.040917]  [<ffffffff81547d7f>] ? tcp_write_xmit+0x1cf/0x5d0
> [72788.040919]  [<ffffffff815481e5>] ? __tcp_push_pending_frames+0x25/0x60
> [72788.040921]  [<ffffffff81539e34>] ? tcp_sendmsg+0x114/0xbb0
> [72788.040923]  [<ffffffff814dbc1f>] ? sock_sendmsg+0xaf/0xf0
> [72788.040925]  [<ffffffff811bf5e5>] ? touch_atime+0x65/0x150
> [72788.040927]  [<ffffffff814dbd78>] ? SyS_sendto+0x118/0x190
> [72788.040929]  [<ffffffff81095b66>] ? vtime_account_user+0x66/0x100
> [72788.040930]  [<ffffffff8100f36a>] ? syscall_trace_enter+0x2a/0x260
> [72788.040932]  [<ffffffff815eb249>] ? tracesys+0xdd/0xe2
> 
> The most likely suspect is the rt_spin_lock_slowlock() that is apparently
> being acquired by migrate_enable().  This could be due to:
> 
> 1.	Massive contention on that lock.
> 
> 2.	Someone else holding that lock for excessive time periods.
> 	Evidence in favor: CPU 0 appears to be running within
> 	migrate_enable().  But isn't migrate_enable() really quite
> 	lightweight?
> 
> 3.	Possible looping in the networking stack -- but this seems
> 	unlikely given that we appear to have caught a lock acquisition
> 	in the act.  (Not impossible, however, if there are lots of
> 	migrate_enable() calls in the networking stack, which there
> 	are due to all the per-CPU work.)
> 
> So which code do you think deserves the big lump of coal?  ;-)

Sebastian's NO_HZ_FULL locking fixes.  Locking is hard, and rt sure
doesn't make it any easier, so lets give him a cookie or three to nibble
on while he ponders that trylock stuff again instead :)

-Mike


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-25  3:07         ` Mike Galbraith
@ 2013-12-25  7:55           ` Paul E. McKenney
  2013-12-25 17:37             ` Mike Galbraith
  2013-12-26 10:03           ` Mike Galbraith
  1 sibling, 1 reply; 21+ messages in thread
From: Paul E. McKenney @ 2013-12-25  7:55 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Sebastian Andrzej Siewior, linux-rt-users, Steven Rostedt,
	linux-kernel, Thomas Gleixner

On Wed, Dec 25, 2013 at 04:07:34AM +0100, Mike Galbraith wrote:
> On Tue, 2013-12-24 at 11:36 -0800, Paul E. McKenney wrote: 
> > On Mon, Dec 23, 2013 at 05:38:53AM +0100, Mike Galbraith wrote:
> > > On Sun, 2013-12-22 at 09:57 +0100, Mike Galbraith wrote: 
> > > > I'll let the box give
> > > > RCU something to do for a couple days.  No news is good news.
> > > 
> > > Ho ho hum, merry christmas, gift attached.
> > 
> > Hmmm...  I guess I should take a moment to work out who has been
> > naughty and nice...
> > 
> > > I'll beat on virgin -rt7, see if it survives, then re-apply RCU patch
> > > and retest.  This kernel had nohz_full enabled, along with Sebastian's
> > > pending -rt fix for same, so RCU patch was not only not running solo,
> > > box was running a known somewhat buggy config as well.  Box was doing
> > > endless tbench 64 when it started stalling fwiw.
> > 
> > [72788.040872] NMI backtrace for cpu 31
> > [72788.040874] CPU: 31 PID: 13975 Comm: tbench Tainted: G        W    3.12.6-rt7-nohz #192
> > [72788.040874] Hardware name: Hewlett-Packard ProLiant DL980 G7, BIOS P66 07/07/2010
> > [72788.040875] task: ffff8802728e3db0 ti: ffff88026deb2000 task.ti: ffff88026deb2000
> > [72788.040877] RIP: 0010:[<ffffffff815e34e4>]  [<ffffffff815e34e4>] _raw_spin_trylock+0x14/0x80
> > [72788.040878] RSP: 0018:ffff8802769e3e58  EFLAGS: 00000002
> > [72788.040879] RAX: ffff88026deb3fd8 RBX: ffff880273544000 RCX: 000000007bc87bc6
> > [72788.040879] RDX: 0000000000000000 RSI: ffff8802728e3db0 RDI: ffff880273544000
> > [72788.040880] RBP: ffff88026deb39f8 R08: 0000063c14effd0f R09: 0000000000000119
> > [72788.040881] R10: 0000000000000005 R11: ffff8802769f2260 R12: ffff8802728e3db0
> > [72788.040881] R13: 000000000000001f R14: ffff8802769ebcc0 R15: ffffffff810c4730
> > [72788.040883] FS:  00007f7cd380a700(0000) GS:ffff8802769e0000(0000) knlGS:0000000000000000
> > [72788.040883] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [72788.040884] CR2: 000000000068a0e8 CR3: 0000000267ba4000 CR4: 00000000000007e0
> > [72788.040885] Stack:
> > [72788.040886]  ffff88026deb39f8 ffffffff815e2aa0 0000000000000000 ffffffff8106711a
> > [72788.040887]  ffff8802769ec4e0 ffff8802769ec4e0 ffff8802769e3f58 ffffffff810c44bd
> > [72788.040888]  ffff88026deb39f8 ffff88026deb39f8 000015ed4f5ff89b ffffffff810c476e
> > [72788.040889] Call Trace:
> > [72788.040889]  <IRQ> 
> > [72788.040891]  [<ffffffff815e2aa0>] ? rt_spin_lock_slowunlock_hirq+0x10/0x20
> > [72788.040893]  [<ffffffff8106711a>] ? update_process_times+0x3a/0x60
> > [72788.040895]  [<ffffffff810c44bd>] ? tick_sched_handle+0x2d/0x70
> > [72788.040896]  [<ffffffff810c476e>] ? tick_sched_timer+0x3e/0x70
> > [72788.040898]  [<ffffffff810839dd>] ? __run_hrtimer+0x13d/0x260
> > [72788.040900]  [<ffffffff81083c2c>] ? hrtimer_interrupt+0x12c/0x310
> > [72788.040901]  [<ffffffff8109593e>] ? vtime_account_system+0x4e/0xf0
> > [72788.040903]  [<ffffffff81035656>] ? smp_apic_timer_interrupt+0x36/0x50
> > [72788.040904]  [<ffffffff815ebc9d>] ? apic_timer_interrupt+0x6d/0x80
> > [72788.040905]  <EOI> 
> > [72788.040906]  [<ffffffff815e338a>] ? _raw_spin_lock+0x2a/0x40
> > [72788.040908]  [<ffffffff815e23b3>] ? rt_spin_lock_slowlock+0x33/0x2d0
> > [72788.040910]  [<ffffffff8108ee44>] ? migrate_enable+0xc4/0x220
> > [72788.040911]  [<ffffffff8152f888>] ? ip_finish_output+0x258/0x450
> > [72788.040913]  [<ffffffff81067011>] ? lock_timer_base+0x41/0x80
> > [72788.040914]  [<ffffffff81068db6>] ? mod_timer+0x66/0x290
> > [72788.040916]  [<ffffffff814df02f>] ? sk_reset_timer+0xf/0x20
> > [72788.040917]  [<ffffffff81547d7f>] ? tcp_write_xmit+0x1cf/0x5d0
> > [72788.040919]  [<ffffffff815481e5>] ? __tcp_push_pending_frames+0x25/0x60
> > [72788.040921]  [<ffffffff81539e34>] ? tcp_sendmsg+0x114/0xbb0
> > [72788.040923]  [<ffffffff814dbc1f>] ? sock_sendmsg+0xaf/0xf0
> > [72788.040925]  [<ffffffff811bf5e5>] ? touch_atime+0x65/0x150
> > [72788.040927]  [<ffffffff814dbd78>] ? SyS_sendto+0x118/0x190
> > [72788.040929]  [<ffffffff81095b66>] ? vtime_account_user+0x66/0x100
> > [72788.040930]  [<ffffffff8100f36a>] ? syscall_trace_enter+0x2a/0x260
> > [72788.040932]  [<ffffffff815eb249>] ? tracesys+0xdd/0xe2
> > 
> > The most likely suspect is the rt_spin_lock_slowlock() that is apparently
> > being acquired by migrate_enable().  This could be due to:
> > 
> > 1.	Massive contention on that lock.
> > 
> > 2.	Someone else holding that lock for excessive time periods.
> > 	Evidence in favor: CPU 0 appears to be running within
> > 	migrate_enable().  But isn't migrate_enable() really quite
> > 	lightweight?
> > 
> > 3.	Possible looping in the networking stack -- but this seems
> > 	unlikely given that we appear to have caught a lock acquisition
> > 	in the act.  (Not impossible, however, if there are lots of
> > 	migrate_enable() calls in the networking stack, which there
> > 	are due to all the per-CPU work.)
> > 
> > So which code do you think deserves the big lump of coal?  ;-)
> 
> Sebastian's NO_HZ_FULL locking fixes.  Locking is hard, and rt sure
> doesn't make it any easier, so lets give him a cookie or three to nibble
> on while he ponders that trylock stuff again instead :)

Fair enough.  Does Sebastian prefer milk and cookies or the other
tradition of beer and a cigar?  ;-)

							Thanx, Paul


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-25  7:55           ` Paul E. McKenney
@ 2013-12-25 17:37             ` Mike Galbraith
  2014-01-17 17:14               ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Galbraith @ 2013-12-25 17:37 UTC (permalink / raw)
  To: paulmck
  Cc: Sebastian Andrzej Siewior, linux-rt-users, Steven Rostedt,
	linux-kernel, Thomas Gleixner

On Tue, 2013-12-24 at 23:55 -0800, Paul E. McKenney wrote: 
> On Wed, Dec 25, 2013 at 04:07:34AM +0100, Mike Galbraith wrote:

> > > So which code do you think deserves the big lump of coal?  ;-)
> > 
> > Sebastian's NO_HZ_FULL locking fixes.  Locking is hard, and rt sure
> > doesn't make it any easier, so lets give him a cookie or three to nibble
> > on while he ponders that trylock stuff again instead :)
> 
> Fair enough.  Does Sebastian prefer milk and cookies or the other
> tradition of beer and a cigar?  ;-)

Having sufficiently recovered from turkey overdose to be able to slither
upstairs (bump bump bump) to check on the box, commenting..

# timers-do-not-raise-softirq-unconditionally.patch
# rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch

..those two out does seem to have stabilized the thing.

Merry Christmasss,

-Mike


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-25  3:07         ` Mike Galbraith
  2013-12-25  7:55           ` Paul E. McKenney
@ 2013-12-26 10:03           ` Mike Galbraith
  2014-01-17 17:23             ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 21+ messages in thread
From: Mike Galbraith @ 2013-12-26 10:03 UTC (permalink / raw)
  To: paulmck
  Cc: Sebastian Andrzej Siewior, linux-rt-users, Steven Rostedt,
	linux-kernel, Thomas Gleixner

On Wed, 2013-12-25 at 04:07 +0100, Mike Galbraith wrote:
> On Tue, 2013-12-24 at 11:36 -0800, Paul E. McKenney wrote: 

> > So which code do you think deserves the big lump of coal?  ;-)
> 
> Sebastian's NO_HZ_FULL locking fixes.

Whack-a-mole hasn't yet dug up any new moles.

---
 kernel/timer.c |    4 ++++
 1 file changed, 4 insertions(+)

Index: linux-2.6/kernel/timer.c
===================================================================
--- linux-2.6.orig/kernel/timer.c
+++ linux-2.6/kernel/timer.c
@@ -764,7 +764,9 @@ __mod_timer(struct timer_list *timer, un
 	timer_stats_timer_set_start_info(timer);
 	BUG_ON(!timer->function);
 
+	local_irq_disable_rt();
 	base = lock_timer_base(timer, &flags);
+	local_irq_enable_rt();
 
 	ret = detach_if_pending(timer, base, false);
 	if (!ret && pending_only)
@@ -1198,7 +1200,9 @@ static inline void __run_timers(struct t
 {
 	struct timer_list *timer;
 
+	local_irq_disable_rt();
 	spin_lock_irq(&base->lock);
+	local_irq_enable_rt();
 	while (time_after_eq(jiffies, base->timer_jiffies)) {
 		struct list_head work_list;
 		struct list_head *head = &work_list;
---
 kernel/time/tick-sched.c |    2 ++
 1 file changed, 2 insertions(+)

Index: linux-2.6/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -216,7 +216,9 @@ void __tick_nohz_full_check(void)
 
 static void nohz_full_kick_work_func(struct irq_work *work)
 {
+	local_irq_disable_rt();
 	__tick_nohz_full_check();
+	local_irq_enable_rt();
 }
 
 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {




^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-25 17:37             ` Mike Galbraith
@ 2014-01-17 17:14               ` Sebastian Andrzej Siewior
  2014-01-18  3:25                 ` Mike Galbraith
  0 siblings, 1 reply; 21+ messages in thread
From: Sebastian Andrzej Siewior @ 2014-01-17 17:14 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: paulmck, linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner

* Mike Galbraith | 2013-12-25 18:37:37 [+0100]:

>On Tue, 2013-12-24 at 23:55 -0800, Paul E. McKenney wrote: 
>> On Wed, Dec 25, 2013 at 04:07:34AM +0100, Mike Galbraith wrote:
>
>Having sufficiently recovered from turkey overdose to be able to slither
>upstairs (bump bump bump) to check on the box, commenting..
>
># timers-do-not-raise-softirq-unconditionally.patch
># rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch
>
>..those two out does seem to have stabilized the thing.

timers-do-not-raise-softirq-unconditionally.patch is on its way out.

rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch confues me.
Didn't you report once that your box deadlocks without this patch? Now
your 64way box on the other hand does not work with it?

>Merry Christmasss,
>
>-Mike

Sebastian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-26 10:03           ` Mike Galbraith
@ 2014-01-17 17:23             ` Sebastian Andrzej Siewior
  2014-01-18  3:30               ` Mike Galbraith
  0 siblings, 1 reply; 21+ messages in thread
From: Sebastian Andrzej Siewior @ 2014-01-17 17:23 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: paulmck, linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner

* Mike Galbraith | 2013-12-26 11:03:32 [+0100]:

>On Wed, 2013-12-25 at 04:07 +0100, Mike Galbraith wrote:
>> On Tue, 2013-12-24 at 11:36 -0800, Paul E. McKenney wrote: 
>
>> > So which code do you think deserves the big lump of coal?  ;-)
>> 
>> Sebastian's NO_HZ_FULL locking fixes.
>
>Whack-a-mole hasn't yet dug up any new moles.
>
>---
> kernel/timer.c |    4 ++++
> 1 file changed, 4 insertions(+)
>
>Index: linux-2.6/kernel/timer.c
>===================================================================
>--- linux-2.6.orig/kernel/timer.c
>+++ linux-2.6/kernel/timer.c
>@@ -764,7 +764,9 @@ __mod_timer(struct timer_list *timer, un
> 	timer_stats_timer_set_start_info(timer);
> 	BUG_ON(!timer->function);
> 
>+	local_irq_disable_rt();
> 	base = lock_timer_base(timer, &flags);
>+	local_irq_enable_rt();
> 
> 	ret = detach_if_pending(timer, base, false);
> 	if (!ret && pending_only)
>@@ -1198,7 +1200,9 @@ static inline void __run_timers(struct t
> {
> 	struct timer_list *timer;
> 
>+	local_irq_disable_rt();
> 	spin_lock_irq(&base->lock);
>+	local_irq_enable_rt();
> 	while (time_after_eq(jiffies, base->timer_jiffies)) {
> 		struct list_head work_list;
> 		struct list_head *head = &work_list;
>---
> kernel/time/tick-sched.c |    2 ++
> 1 file changed, 2 insertions(+)

So I had rtmutex-take-the-waiter-lock-with-irqs-off.patch in my queue
which took the waiter lock with irqs off. This should be the same thing
you try do here.

>Index: linux-2.6/kernel/time/tick-sched.c
>===================================================================
>--- linux-2.6.orig/kernel/time/tick-sched.c
>+++ linux-2.6/kernel/time/tick-sched.c
>@@ -216,7 +216,9 @@ void __tick_nohz_full_check(void)
> 
> static void nohz_full_kick_work_func(struct irq_work *work)
> {
>+	local_irq_disable_rt();
> 	__tick_nohz_full_check();
>+	local_irq_enable_rt();
> }

and this should be fixed differently. Since we come from a thread and
check "is current running" but by current we mean a user task and not a
kernel thread.

> 
> static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
>

Sebastian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2014-01-17 17:14               ` Sebastian Andrzej Siewior
@ 2014-01-18  3:25                 ` Mike Galbraith
  2014-01-24 19:50                   ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Galbraith @ 2014-01-18  3:25 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: paulmck, linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner

On Fri, 2014-01-17 at 18:14 +0100, Sebastian Andrzej Siewior wrote: 
> * Mike Galbraith | 2013-12-25 18:37:37 [+0100]:
> 
> >On Tue, 2013-12-24 at 23:55 -0800, Paul E. McKenney wrote: 
> >> On Wed, Dec 25, 2013 at 04:07:34AM +0100, Mike Galbraith wrote:
> >
> >Having sufficiently recovered from turkey overdose to be able to slither
> >upstairs (bump bump bump) to check on the box, commenting..
> >
> ># timers-do-not-raise-softirq-unconditionally.patch
> ># rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch
> >
> >..those two out does seem to have stabilized the thing.
> 
> timers-do-not-raise-softirq-unconditionally.patch is on its way out.
> 
> rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch confues me.
> Didn't you report once that your box deadlocks without this patch? Now
> your 64way box on the other hand does not work with it?

If 'do not raise' is applied, 'use a trylock' won't save you.  If 'do
not raise' is not applied, _and_ you wisely do not try to turn on very
expensive nohz_full, things work fine without 'use a trylock'.

-Mike


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2014-01-17 17:23             ` Sebastian Andrzej Siewior
@ 2014-01-18  3:30               ` Mike Galbraith
  0 siblings, 0 replies; 21+ messages in thread
From: Mike Galbraith @ 2014-01-18  3:30 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: paulmck, linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner

On Fri, 2014-01-17 at 18:23 +0100, Sebastian Andrzej Siewior wrote:

> So I had rtmutex-take-the-waiter-lock-with-irqs-off.patch in my queue
> which took the waiter lock with irqs off. This should be the same thing
> you try do here.

(yeah, these are just whacked mole body bags;)


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2013-12-23  5:12       ` Mike Galbraith
@ 2014-01-24 19:46         ` Sebastian Andrzej Siewior
  2014-01-25  5:20           ` Mike Galbraith
  0 siblings, 1 reply; 21+ messages in thread
From: Sebastian Andrzej Siewior @ 2014-01-24 19:46 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner,
	Paul E. McKenney

* Mike Galbraith | 2013-12-23 06:12:39 [+0100]:

>P.S.
>
>virgin -rt7 doing tbench 64 + make -j64
>
>[   97.907960] perf samples too long (3138 > 2500), lowering kernel.perf_event_max_sample_rate to 50000
>[  103.047921] perf samples too long (5544 > 5000), lowering kernel.perf_event_max_sample_rate to 25000
>[  181.561271] perf samples too long (10318 > 10000), lowering kernel.perf_event_max_sample_rate to 13000
>[  184.243750] INFO: NMI handler (perf_event_nmi_handler) took too long to run: 1.084 msecs
>[  248.914422] perf samples too long (19719 > 19230), lowering kernel.perf_event_max_sample_rate to 7000
>[  382.116674] NOHZ: local_softirq_pending 10
This is block

>[  405.201593] perf samples too long (36824 > 35714), lowering kernel.perf_event_max_sample_rate to 4000
>[  444.704185] NOHZ: local_softirq_pending 08
>[  444.704208] NOHZ: local_softirq_pending 08
>[  444.704579] NOHZ: local_softirq_pending 08
>[  444.704678] NOHZ: local_softirq_pending 08
>[  444.705100] NOHZ: local_softirq_pending 08
>[  444.705980] NOHZ: local_softirq_pending 08
>[  444.705994] NOHZ: local_softirq_pending 08
>[  444.708315] NOHZ: local_softirq_pending 08
>[  444.710348] NOHZ: local_softirq_pending 08

and this is RX. Is your testcase heavy disk-io or heavy disk-io +
network?

Sebastian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2014-01-18  3:25                 ` Mike Galbraith
@ 2014-01-24 19:50                   ` Sebastian Andrzej Siewior
  2014-01-25  5:12                     ` Mike Galbraith
  0 siblings, 1 reply; 21+ messages in thread
From: Sebastian Andrzej Siewior @ 2014-01-24 19:50 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: paulmck, linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner

* Mike Galbraith | 2014-01-18 04:25:14 [+0100]:

>> ># timers-do-not-raise-softirq-unconditionally.patch
>> ># rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch
>> >
>> >..those two out does seem to have stabilized the thing.
>> 
>> timers-do-not-raise-softirq-unconditionally.patch is on its way out.
>> 
>> rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch confues me.
>> Didn't you report once that your box deadlocks without this patch? Now
>> your 64way box on the other hand does not work with it?
>
>If 'do not raise' is applied, 'use a trylock' won't save you.  If 'do
is this just an observation or you do know why it won't save me?
Currently I think to go back to the version where the waiter_lock was
taken with irqs off. However I would prefer to trigger this myself so I
would know what is going on instead blindly apply patches.

>-Mike

Sebastian

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2014-01-24 19:50                   ` Sebastian Andrzej Siewior
@ 2014-01-25  5:12                     ` Mike Galbraith
  2014-01-27  5:10                       ` Mike Galbraith
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Galbraith @ 2014-01-25  5:12 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: paulmck, linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner

On Fri, 2014-01-24 at 20:50 +0100, Sebastian Andrzej Siewior wrote: 
> * Mike Galbraith | 2014-01-18 04:25:14 [+0100]:
> 
> >> ># timers-do-not-raise-softirq-unconditionally.patch
> >> ># rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch
> >> >
> >> >..those two out does seem to have stabilized the thing.
> >> 
> >> timers-do-not-raise-softirq-unconditionally.patch is on its way out.
> >> 
> >> rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch confues me.
> >> Didn't you report once that your box deadlocks without this patch? Now
> >> your 64way box on the other hand does not work with it?
> >
> >If 'do not raise' is applied, 'use a trylock' won't save you.  If 'do
> is this just an observation or you do know why it won't save me?

It's an observation from beyond the grave from the 64 core box that it
repeatedly did NOT save :)  Autopsy photos below.

I've built 3.12.8-rt9 with Stevens v2 "timer: Raise softirq if there's
irq_work" to see if it'll survive.

nohz_full_all:
PID: 508    TASK: ffff8802739ba340  CPU: 16  COMMAND: "ksoftirqd/16"
 #0 [ffff880276806a40] machine_kexec at ffffffff8103bc07
 #1 [ffff880276806aa0] crash_kexec at ffffffff810d56b3
 #2 [ffff880276806b70] panic at ffffffff815bf8b0
 #3 [ffff880276806bf0] watchdog_overflow_callback at ffffffff810fed3d
 #4 [ffff880276806c10] __perf_event_overflow at ffffffff81131928
 #5 [ffff880276806ca0] perf_event_overflow at ffffffff81132254
 #6 [ffff880276806cb0] intel_pmu_handle_irq at ffffffff8102078f
 #7 [ffff880276806de0] perf_event_nmi_handler at ffffffff815c5825
 #8 [ffff880276806e10] nmi_handle at ffffffff815c4ed3
 #9 [ffff880276806ea0] default_do_nmi at ffffffff815c5063                                                                                                                                                                                   
#10 [ffff880276806ed0] do_nmi at ffffffff815c5388                                                                                                                                                                                           
#11 [ffff880276806ef0] end_repeat_nmi at ffffffff815c4371                                                                                                                                                                                   
    [exception RIP: _raw_spin_trylock+48]                                                                                                                                                                                                   
    RIP: ffffffff815c3790  RSP: ffff880276803e28  RFLAGS: 00000002                                                                                                                                                                          
    RAX: 0000000000000010  RBX: 0000000000000010  RCX: 0000000000000002                                                                                                                                                                     
    RDX: ffff880276803e28  RSI: 0000000000000018  RDI: 0000000000000001                                                                                                                                                                     
    RBP: ffffffff815c3790   R8: ffffffff815c3790   R9: 0000000000000018
    R10: ffff880276803e28  R11: 0000000000000002  R12: ffffffffffffffff
    R13: ffff880273a0c000  R14: ffff8802739ba340  R15: ffff880273a03fd8
    ORIG_RAX: ffff880273a03fd8  CS: 0010  SS: 0018
--- <RT exception stack> ---
#12 [ffff880276803e28] _raw_spin_trylock at ffffffff815c3790
#13 [ffff880276803e30] rt_spin_lock_slowunlock_hirq at ffffffff815c2cc8
#14 [ffff880276803e50] rt_spin_unlock_after_trylock_in_irq at ffffffff815c3425
#15 [ffff880276803e60] get_next_timer_interrupt at ffffffff810684a7
#16 [ffff880276803ed0] tick_nohz_stop_sched_tick at ffffffff810c5f2e
#17 [ffff880276803f50] tick_nohz_irq_exit at ffffffff810c6333
#18 [ffff880276803f70] irq_exit at ffffffff81060065
#19 [ffff880276803f90] smp_apic_timer_interrupt at ffffffff810358f5
#20 [ffff880276803fb0] apic_timer_interrupt at ffffffff815cbf9d
--- <IRQ stack> ---
#21 [ffff880273a03b28] apic_timer_interrupt at ffffffff815cbf9d
    [exception RIP: _raw_spin_lock+50]
    RIP: ffffffff815c3642  RSP: ffff880273a03bd8  RFLAGS: 00000202
    RAX: 0000000000008b49  RBX: ffff880272157290  RCX: ffff8802739ba340
    RDX: 0000000000008b4a  RSI: 0000000000000010  RDI: ffff880273a0c000
    RBP: ffff880273a03bd8   R8: 0000000000000001   R9: 0000000000000000
    R10: 0000000000000000  R11: 0000000000000001  R12: ffffffff810927b5
    R13: ffff880273a03b68  R14: 0000000000000010  R15: 0000000000000010
    ORIG_RAX: ffffffffffffff10  CS: 0010  SS: 0018
#22 [ffff880273a03be0] rt_spin_lock_slowlock at ffffffff815c2591
#23 [ffff880273a03cc0] rt_spin_lock at ffffffff815c3362
#24 [ffff880273a03cd0] run_timer_softirq at ffffffff81069002
#25 [ffff880273a03d70] handle_softirq at ffffffff81060d0f
#26 [ffff880273a03db0] do_current_softirqs at ffffffff81060f3c
#27 [ffff880273a03e20] run_ksoftirqd at ffffffff81061045
#28 [ffff880273a03e40] smpboot_thread_fn at ffffffff81089c31
#29 [ffff880273a03ec0] kthread at ffffffff810807fe
#30 [ffff880273a03f50] ret_from_fork at ffffffff815cb28c

nohz_tick:
PID: 6948   TASK: ffff880272d1f1c0  CPU: 29  COMMAND: "tbench"
 #0 [ffff8802769a6a40] machine_kexec at ffffffff8103bc07
 #1 [ffff8802769a6aa0] crash_kexec at ffffffff810d3e93
 #2 [ffff8802769a6b70] panic at ffffffff815bce70
 #3 [ffff8802769a6bf0] watchdog_overflow_callback at ffffffff810fd51d
 #4 [ffff8802769a6c10] __perf_event_overflow at ffffffff8112f1f8
 #5 [ffff8802769a6ca0] perf_event_overflow at ffffffff8112fb14
 #6 [ffff8802769a6cb0] intel_pmu_handle_irq at ffffffff8102078f
 #7 [ffff8802769a6de0] perf_event_nmi_handler at ffffffff815c2de5
 #8 [ffff8802769a6e10] nmi_handle at ffffffff815c2493
 #9 [ffff8802769a6ea0] default_do_nmi at ffffffff815c2623
#10 [ffff8802769a6ed0] do_nmi at ffffffff815c2948
#11 [ffff8802769a6ef0] end_repeat_nmi at ffffffff815c1931
    [exception RIP: preempt_schedule+36]
    RIP: ffffffff815be944  RSP: ffff8802769a3d98  RFLAGS: 00000002
    RAX: 0000000000000010  RBX: 0000000000000010  RCX: 0000000000000002
    RDX: ffff8802769a3d98  RSI: 0000000000000018  RDI: 0000000000000001
    RBP: ffffffff815be944   R8: ffffffff815be944   R9: 0000000000000018
    R10: ffff8802769a3d98  R11: 0000000000000002  R12: ffffffffffffffff
    R13: ffff880273f74000  R14: ffff880272d1f1c0  R15: ffff880269cedfd8
    ORIG_RAX: ffff880269cedfd8  CS: 0010  SS: 0018
--- <RT exception stack> ---
#12 [ffff8802769a3d98] preempt_schedule at ffffffff815be944
#13 [ffff8802769a3db0] _raw_spin_trylock at ffffffff815c0d6e
#14 [ffff8802769a3dc0] rt_spin_lock_slowunlock_hirq at ffffffff815c0288
#15 [ffff8802769a3de0] rt_spin_unlock_after_trylock_in_irq at ffffffff815c09e5
#16 [ffff8802769a3df0] run_local_timers at ffffffff81068025
#17 [ffff8802769a3e10] update_process_times at ffffffff810680ac
#18 [ffff8802769a3e40] tick_sched_handle at ffffffff810c3a92
#19 [ffff8802769a3e60] tick_sched_timer at ffffffff810c3d2f
#20 [ffff8802769a3e90] __run_hrtimer at ffffffff8108471d
#21 [ffff8802769a3ed0] hrtimer_interrupt at ffffffff8108497a
#22 [ffff8802769a3f70] local_apic_timer_interrupt at ffffffff810349e6
#23 [ffff8802769a3f90] smp_apic_timer_interrupt at ffffffff810358ee
#24 [ffff8802769a3fb0] apic_timer_interrupt at ffffffff815c955d
--- <IRQ stack> ---
#25 [ffff880269ced848] apic_timer_interrupt at ffffffff815c955d
    [exception RIP: _raw_spin_lock+53]
    RIP: ffffffff815c0c05  RSP: ffff880269ced8f8  RFLAGS: 00000202
    RAX: 0000000000000b7b  RBX: 0000000000000282  RCX: ffff880272d1f1c0
    RDX: 0000000000000b7d  RSI: ffff880269ceda38  RDI: ffff880273f74000
    RBP: ffff880269ced8f8   R8: 0000000000000001   R9: 00000000b54d13a4
    R10: 0000000000000001  R11: 0000000000000001  R12: ffff880269ced910
    R13: ffff880276d32170  R14: ffffffff810c9030  R15: ffff880269ced8b8
    ORIG_RAX: ffffffffffffff10  CS: 0010  SS: 0018
#26 [ffff880269ced900] rt_spin_lock_slowlock at ffffffff815bfb51
#27 [ffff880269ced9e0] rt_spin_lock at ffffffff815c0922
#28 [ffff880269ced9f0] lock_timer_base at ffffffff81067f92
#29 [ffff880269ceda20] mod_timer at ffffffff81069bcb
#30 [ffff880269ceda70] sk_reset_timer at ffffffff814d1e57
#31 [ffff880269ceda90] inet_csk_reset_xmit_timer at ffffffff8152d4a8
#32 [ffff880269cedac0] tcp_rearm_rto at ffffffff8152d583
#33 [ffff880269cedae0] tcp_ack at ffffffff81534085
#34 [ffff880269cedb60] tcp_rcv_established at ffffffff8153443d
#35 [ffff880269cedbb0] tcp_v4_do_rcv at ffffffff8153f56a
#36 [ffff880269cedbe0] __release_sock at ffffffff814d3891
#37 [ffff880269cedc10] release_sock at ffffffff814d3942
#38 [ffff880269cedc30] tcp_sendmsg at ffffffff8152b955
#39 [ffff880269cedd00] inet_sendmsg at ffffffff8155350e
#40 [ffff880269cedd30] sock_sendmsg at ffffffff814cea87
#41 [ffff880269cede40] sys_sendto at ffffffff814cebdf
#42 [ffff880269cedf80] tracesys at ffffffff815c8b09 (via system_call)
    RIP: 00007f0441a1fc35  RSP: 00007fffdea86130  RFLAGS: 00000246
    RAX: ffffffffffffffda  RBX: ffffffff815c8b09  RCX: ffffffffffffffff
    RDX: 000000000000248d  RSI: 0000000000607260  RDI: 0000000000000004
    RBP: 000000000000248d   R8: 0000000000000000   R9: 0000000000000000
    R10: 0000000000000000  R11: 0000000000000246  R12: 00007fffdea86a10
    R13: 00007fffdea86414  R14: 0000000000000004  R15: 0000000000607260
    ORIG_RAX: 000000000000002c  CS: 0033  SS: 002b


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2014-01-24 19:46         ` Sebastian Andrzej Siewior
@ 2014-01-25  5:20           ` Mike Galbraith
  0 siblings, 0 replies; 21+ messages in thread
From: Mike Galbraith @ 2014-01-25  5:20 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner,
	Paul E. McKenney

On Fri, 2014-01-24 at 20:46 +0100, Sebastian Andrzej Siewior wrote: 
> * Mike Galbraith | 2013-12-23 06:12:39 [+0100]:
> 
> >P.S.
> >
> >virgin -rt7 doing tbench 64 + make -j64
> >
> >[   97.907960] perf samples too long (3138 > 2500), lowering kernel.perf_event_max_sample_rate to 50000
> >[  103.047921] perf samples too long (5544 > 5000), lowering kernel.perf_event_max_sample_rate to 25000
> >[  181.561271] perf samples too long (10318 > 10000), lowering kernel.perf_event_max_sample_rate to 13000
> >[  184.243750] INFO: NMI handler (perf_event_nmi_handler) took too long to run: 1.084 msecs
> >[  248.914422] perf samples too long (19719 > 19230), lowering kernel.perf_event_max_sample_rate to 7000
> >[  382.116674] NOHZ: local_softirq_pending 10
> This is block
> 
> >[  405.201593] perf samples too long (36824 > 35714), lowering kernel.perf_event_max_sample_rate to 4000
> >[  444.704185] NOHZ: local_softirq_pending 08
> >[  444.704208] NOHZ: local_softirq_pending 08
> >[  444.704579] NOHZ: local_softirq_pending 08
> >[  444.704678] NOHZ: local_softirq_pending 08
> >[  444.705100] NOHZ: local_softirq_pending 08
> >[  444.705980] NOHZ: local_softirq_pending 08
> >[  444.705994] NOHZ: local_softirq_pending 08
> >[  444.708315] NOHZ: local_softirq_pending 08
> >[  444.710348] NOHZ: local_softirq_pending 08
> 
> and this is RX. Is your testcase heavy disk-io or heavy disk-io +
> network?

Yeah.

-Mike


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2014-01-25  5:12                     ` Mike Galbraith
@ 2014-01-27  5:10                       ` Mike Galbraith
  2014-01-27 16:54                         ` Paul E. McKenney
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Galbraith @ 2014-01-27  5:10 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: paulmck, linux-rt-users, Steven Rostedt, linux-kernel, Thomas Gleixner

On Sat, 2014-01-25 at 06:12 +0100, Mike Galbraith wrote: 
> On Fri, 2014-01-24 at 20:50 +0100, Sebastian Andrzej Siewior wrote: 
> > * Mike Galbraith | 2014-01-18 04:25:14 [+0100]:
> > 
> > >> ># timers-do-not-raise-softirq-unconditionally.patch
> > >> ># rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch
> > >> >
> > >> >..those two out does seem to have stabilized the thing.
> > >> 
> > >> timers-do-not-raise-softirq-unconditionally.patch is on its way out.
> > >> 
> > >> rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch confues me.
> > >> Didn't you report once that your box deadlocks without this patch? Now
> > >> your 64way box on the other hand does not work with it?
> > >
> > >If 'do not raise' is applied, 'use a trylock' won't save you.  If 'do
> > is this just an observation or you do know why it won't save me?
> 
> It's an observation from beyond the grave from the 64 core box that it
> repeatedly did NOT save :)  Autopsy photos below.
> 
> I've built 3.12.8-rt9 with Stevens v2 "timer: Raise softirq if there's
> irq_work" to see if it'll survive.

And it did, configured both as nohz_tick, and nohz_full_all.  The irqs
are enabled warning in can_stop_full_tick() fired for nohz_full_all, but
that's it.

For grins, I also applied Paul's v3 timer latency series while testing
nohz_full_all config.   The box was heavily loaded the vast majority of
the time, but it didn't explode or do anything obviously evil.

-Mike


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2014-01-27  5:10                       ` Mike Galbraith
@ 2014-01-27 16:54                         ` Paul E. McKenney
  2014-01-27 17:03                           ` Mike Galbraith
  0 siblings, 1 reply; 21+ messages in thread
From: Paul E. McKenney @ 2014-01-27 16:54 UTC (permalink / raw)
  To: Mike Galbraith
  Cc: Sebastian Andrzej Siewior, linux-rt-users, Steven Rostedt,
	linux-kernel, Thomas Gleixner

On Mon, Jan 27, 2014 at 06:10:44AM +0100, Mike Galbraith wrote:
> On Sat, 2014-01-25 at 06:12 +0100, Mike Galbraith wrote: 
> > On Fri, 2014-01-24 at 20:50 +0100, Sebastian Andrzej Siewior wrote: 
> > > * Mike Galbraith | 2014-01-18 04:25:14 [+0100]:
> > > 
> > > >> ># timers-do-not-raise-softirq-unconditionally.patch
> > > >> ># rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch
> > > >> >
> > > >> >..those two out does seem to have stabilized the thing.
> > > >> 
> > > >> timers-do-not-raise-softirq-unconditionally.patch is on its way out.
> > > >> 
> > > >> rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch confues me.
> > > >> Didn't you report once that your box deadlocks without this patch? Now
> > > >> your 64way box on the other hand does not work with it?
> > > >
> > > >If 'do not raise' is applied, 'use a trylock' won't save you.  If 'do
> > > is this just an observation or you do know why it won't save me?
> > 
> > It's an observation from beyond the grave from the 64 core box that it
> > repeatedly did NOT save :)  Autopsy photos below.
> > 
> > I've built 3.12.8-rt9 with Stevens v2 "timer: Raise softirq if there's
> > irq_work" to see if it'll survive.
> 
> And it did, configured both as nohz_tick, and nohz_full_all.  The irqs
> are enabled warning in can_stop_full_tick() fired for nohz_full_all, but
> that's it.
> 
> For grins, I also applied Paul's v3 timer latency series while testing
> nohz_full_all config.   The box was heavily loaded the vast majority of
> the time, but it didn't explode or do anything obviously evil.

Cool!  May I add your Tested-by?

							Thanx, Paul


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] rcu: Eliminate softirq processing from rcutree
  2014-01-27 16:54                         ` Paul E. McKenney
@ 2014-01-27 17:03                           ` Mike Galbraith
  0 siblings, 0 replies; 21+ messages in thread
From: Mike Galbraith @ 2014-01-27 17:03 UTC (permalink / raw)
  To: paulmck
  Cc: Sebastian Andrzej Siewior, linux-rt-users, Steven Rostedt,
	linux-kernel, Thomas Gleixner

On Mon, 2014-01-27 at 08:54 -0800, Paul E. McKenney wrote: 
> On Mon, Jan 27, 2014 at 06:10:44AM +0100, Mike Galbraith wrote:
> > On Sat, 2014-01-25 at 06:12 +0100, Mike Galbraith wrote: 
> > > On Fri, 2014-01-24 at 20:50 +0100, Sebastian Andrzej Siewior wrote: 
> > > > * Mike Galbraith | 2014-01-18 04:25:14 [+0100]:
> > > > 
> > > > >> ># timers-do-not-raise-softirq-unconditionally.patch
> > > > >> ># rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch
> > > > >> >
> > > > >> >..those two out does seem to have stabilized the thing.
> > > > >> 
> > > > >> timers-do-not-raise-softirq-unconditionally.patch is on its way out.
> > > > >> 
> > > > >> rtmutex-use-a-trylock-for-waiter-lock-in-trylock.patch confues me.
> > > > >> Didn't you report once that your box deadlocks without this patch? Now
> > > > >> your 64way box on the other hand does not work with it?
> > > > >
> > > > >If 'do not raise' is applied, 'use a trylock' won't save you.  If 'do
> > > > is this just an observation or you do know why it won't save me?
> > > 
> > > It's an observation from beyond the grave from the 64 core box that it
> > > repeatedly did NOT save :)  Autopsy photos below.
> > > 
> > > I've built 3.12.8-rt9 with Stevens v2 "timer: Raise softirq if there's
> > > irq_work" to see if it'll survive.
> > 
> > And it did, configured both as nohz_tick, and nohz_full_all.  The irqs
> > are enabled warning in can_stop_full_tick() fired for nohz_full_all, but
> > that's it.
> > 
> > For grins, I also applied Paul's v3 timer latency series while testing
> > nohz_full_all config.   The box was heavily loaded the vast majority of
> > the time, but it didn't explode or do anything obviously evil.
> 
> Cool!  May I add your Tested-by?

Certainly.

-Mike


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2014-01-27 17:03 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-12-21 19:39 [PATCH] rcu: Eliminate softirq processing from rcutree Sebastian Andrzej Siewior
2013-12-22  3:07 ` Mike Galbraith
2013-12-22  8:57   ` Mike Galbraith
2013-12-23  4:38     ` Mike Galbraith
2013-12-23  5:12       ` Mike Galbraith
2014-01-24 19:46         ` Sebastian Andrzej Siewior
2014-01-25  5:20           ` Mike Galbraith
2013-12-24 19:36       ` Paul E. McKenney
2013-12-25  3:07         ` Mike Galbraith
2013-12-25  7:55           ` Paul E. McKenney
2013-12-25 17:37             ` Mike Galbraith
2014-01-17 17:14               ` Sebastian Andrzej Siewior
2014-01-18  3:25                 ` Mike Galbraith
2014-01-24 19:50                   ` Sebastian Andrzej Siewior
2014-01-25  5:12                     ` Mike Galbraith
2014-01-27  5:10                       ` Mike Galbraith
2014-01-27 16:54                         ` Paul E. McKenney
2014-01-27 17:03                           ` Mike Galbraith
2013-12-26 10:03           ` Mike Galbraith
2014-01-17 17:23             ` Sebastian Andrzej Siewior
2014-01-18  3:30               ` Mike Galbraith

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).