All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] rcu: Allow to eliminate softirq processing from rcutree
@ 2019-03-15 11:11 Sebastian Andrzej Siewior
  2019-03-15 13:35 ` Steven Rostedt
                   ` (2 more replies)
  0 siblings, 3 replies; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-15 11:11 UTC (permalink / raw)
  To: linux-kernel
  Cc: Josh Triplett, Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, tglx, Paul E. McKenney, Mike Galbraith,
	Sebastian Andrzej Siewior

From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>

Running RCU out of softirq is a problem for some workloads that would
like to manage RCU core processing independently of other softirq work,
for example, setting kthread priority.
This commit therefore introduces the `rcunosoftirq' option which moves
the RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER
kthread named rcuc.
The SCHED_OTHER approach avoids the scalability problems that appeared
with the earlier attempt to move RCU core processing to from softirq to
kthreads.
That said, kernels built with RCU_BOOST=y will run the rcuc kthreads at
the RCU-boosting priority.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Mike Galbraith <bitbucket@online.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[bigeasy: add rcunosoftirq option]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 kernel/rcu/tree.c        | 132 ++++++++++++++++++++++++++++++++---
 kernel/rcu/tree.h        |   4 +-
 kernel/rcu/tree_plugin.h | 145 +++++----------------------------------
 3 files changed, 141 insertions(+), 140 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9180158756d2c..498dc5e9287d0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -62,6 +62,12 @@
 #include <linux/suspend.h>
 #include <linux/ftrace.h>
 #include <linux/tick.h>
+#include <linux/gfp.h>
+#include <linux/oom.h>
+#include <linux/smpboot.h>
+#include <linux/jiffies.h>
+#include <linux/sched/isolation.h>
+#include "../time/tick-internal.h"
 
 #include "tree.h"
 #include "rcu.h"
@@ -2716,7 +2722,7 @@ EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
  * structures.  This may be called only from the CPU to whom the rdp
  * belongs.
  */
-static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
+static __latent_entropy void rcu_process_callbacks(void)
 {
 	unsigned long flags;
 	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
@@ -2758,6 +2764,13 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
 	trace_rcu_utilization(TPS("End RCU core"));
 }
 
+static void rcu_process_callbacks_si(struct softirq_action *h)
+{
+	rcu_process_callbacks();
+}
+
+static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
+
 /*
  * Schedule RCU callback invocation.  If the running implementation of RCU
  * does not support RCU priority boosting, just do a direct call, otherwise
@@ -2769,19 +2782,121 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
 {
 	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
 		return;
-	if (likely(!rcu_state.boost)) {
-		rcu_do_batch(rdp);
-		return;
-	}
-	invoke_rcu_callbacks_kthread();
+	rcu_do_batch(rdp);
 }
 
+static void rcu_wake_cond(struct task_struct *t, int status)
+{
+	/*
+	 * If the thread is yielding, only wake it when this
+	 * is invoked from idle
+	 */
+	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
+		wake_up_process(t);
+}
+
+static bool rcu_softirq_enabled = true;
+
+static int __init rcunosoftirq_setup(char *str)
+{
+	rcu_softirq_enabled = false;
+	return 0;
+}
+__setup("rcunosoftirq", rcunosoftirq_setup);
+
+/*
+ * Wake up this CPU's rcuc kthread to do RCU core processing.
+ */
 static void invoke_rcu_core(void)
 {
-	if (cpu_online(smp_processor_id()))
+	unsigned long flags;
+	struct task_struct *t;
+
+	if (!cpu_online(smp_processor_id()))
+		return;
+	if (rcu_softirq_enabled) {
 		raise_softirq(RCU_SOFTIRQ);
+	} else {
+		local_irq_save(flags);
+		__this_cpu_write(rcu_cpu_has_work, 1);
+		t = __this_cpu_read(rcu_cpu_kthread_task);
+		if (t != NULL && current != t)
+			rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
+		local_irq_restore(flags);
+	}
 }
 
+static void rcu_cpu_kthread_park(unsigned int cpu)
+{
+	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+}
+
+static int rcu_cpu_kthread_should_run(unsigned int cpu)
+{
+	return __this_cpu_read(rcu_cpu_has_work);
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
+ * the RCU softirq used in configurations of RCU that do not support RCU
+ * priority boosting.
+ */
+static void rcu_cpu_kthread(unsigned int cpu)
+{
+	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
+	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
+	int spincnt;
+
+	for (spincnt = 0; spincnt < 10; spincnt++) {
+		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
+		local_bh_disable();
+		*statusp = RCU_KTHREAD_RUNNING;
+		this_cpu_inc(rcu_cpu_kthread_loops);
+		local_irq_disable();
+		work = *workp;
+		*workp = 0;
+		local_irq_enable();
+		if (work)
+			rcu_process_callbacks();
+		local_bh_enable();
+		if (*workp == 0) {
+			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
+			*statusp = RCU_KTHREAD_WAITING;
+			return;
+		}
+	}
+	*statusp = RCU_KTHREAD_YIELDING;
+	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
+	schedule_timeout_interruptible(2);
+	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
+	*statusp = RCU_KTHREAD_WAITING;
+}
+
+static struct smp_hotplug_thread rcu_cpu_thread_spec = {
+	.store			= &rcu_cpu_kthread_task,
+	.thread_should_run	= rcu_cpu_kthread_should_run,
+	.thread_fn		= rcu_cpu_kthread,
+	.thread_comm		= "rcuc/%u",
+	.setup			= rcu_cpu_kthread_setup,
+	.park			= rcu_cpu_kthread_park,
+};
+
+/*
+ * Spawn per-CPU RCU core processing kthreads.
+ */
+static int __init rcu_spawn_core_kthreads(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(rcu_cpu_has_work, cpu) = 0;
+	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
+		return 0;
+	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
+	return 0;
+}
+early_initcall(rcu_spawn_core_kthreads);
+
 /*
  * Handle any core-RCU processing required by a call_rcu() invocation.
  */
@@ -3777,7 +3892,8 @@ void __init rcu_init(void)
 	rcu_init_one();
 	if (dump_tree)
 		rcu_dump_rcu_node_tree();
-	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+	if (rcu_softirq_enabled)
+		open_softirq(RCU_SOFTIRQ, rcu_process_callbacks_si);
 
 	/*
 	 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index d90b02b53c0ec..fb8fc6ecc391b 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -402,12 +402,10 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;
 
 int rcu_dynticks_snap(struct rcu_data *rdp);
 
-#ifdef CONFIG_RCU_BOOST
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
 DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
 DECLARE_PER_CPU(char, rcu_cpu_has_work);
-#endif /* #ifdef CONFIG_RCU_BOOST */
 
 /* Forward declarations for rcutree_plugin.h */
 static void rcu_bootup_announce(void);
@@ -425,8 +423,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
 static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
-static void invoke_rcu_callbacks_kthread(void);
 static bool rcu_is_callbacks_kthread(void);
+static void rcu_cpu_kthread_setup(unsigned int cpu);
 static void __init rcu_spawn_boost_kthreads(void);
 static void rcu_prepare_kthreads(int cpu);
 static void rcu_cleanup_after_idle(void);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 1b3dd2fc0cd64..b440d6ef45d16 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -24,17 +24,6 @@
  *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/oom.h>
-#include <linux/sched/debug.h>
-#include <linux/smpboot.h>
-#include <linux/sched/isolation.h>
-#include <uapi/linux/sched/types.h>
-#include "../time/tick-internal.h"
-
-#ifdef CONFIG_RCU_BOOST
-
 #include "../locking/rtmutex_common.h"
 
 /*
@@ -45,19 +34,6 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
 DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
 DEFINE_PER_CPU(char, rcu_cpu_has_work);
 
-#else /* #ifdef CONFIG_RCU_BOOST */
-
-/*
- * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
- * all uses are in dead code.  Provide a definition to keep the compiler
- * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
- * This probably needs to be excluded from -rt builds.
- */
-#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
-#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
-
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
-
 #ifdef CONFIG_RCU_NOCB_CPU
 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
 static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
@@ -652,7 +628,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		/* Need to defer quiescent state until everything is enabled. */
 		if (irqs_were_disabled) {
 			/* Enabling irqs does not reschedule, so... */
-			raise_softirq_irqoff(RCU_SOFTIRQ);
+			if (rcu_softirq_enabled)
+				raise_softirq_irqoff(RCU_SOFTIRQ);
+			else
+				invoke_rcu_core();
 		} else {
 			/* Enabling BH or preempt does reschedule, so... */
 			set_tsk_need_resched(current);
@@ -1150,18 +1129,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
 
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
-#ifdef CONFIG_RCU_BOOST
-
-static void rcu_wake_cond(struct task_struct *t, int status)
+/*
+ * If boosting, set rcuc kthreads to realtime priority.
+ */
+static void rcu_cpu_kthread_setup(unsigned int cpu)
 {
-	/*
-	 * If the thread is yielding, only wake it when this
-	 * is invoked from idle
-	 */
-	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
-		wake_up_process(t);
+#ifdef CONFIG_RCU_BOOST
+	struct sched_param sp;
+
+	sp.sched_priority = kthread_prio;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+#endif /* #ifdef CONFIG_RCU_BOOST */
 }
 
+#ifdef CONFIG_RCU_BOOST
+
 /*
  * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  * or ->boost_tasks, advancing the pointer to the next task in the
@@ -1299,23 +1281,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	}
 }
 
-/*
- * Wake up the per-CPU kthread to invoke RCU callbacks.
- */
-static void invoke_rcu_callbacks_kthread(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__this_cpu_write(rcu_cpu_has_work, 1);
-	if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
-	    current != __this_cpu_read(rcu_cpu_kthread_task)) {
-		rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
-			      __this_cpu_read(rcu_cpu_kthread_status));
-	}
-	local_irq_restore(flags);
-}
-
 /*
  * Is the current CPU running the RCU-callbacks kthread?
  * Caller must have preemption disabled.
@@ -1369,65 +1334,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
 	return 0;
 }
 
-static void rcu_kthread_do_work(void)
-{
-	rcu_do_batch(this_cpu_ptr(&rcu_data));
-}
-
-static void rcu_cpu_kthread_setup(unsigned int cpu)
-{
-	struct sched_param sp;
-
-	sp.sched_priority = kthread_prio;
-	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
-}
-
-static void rcu_cpu_kthread_park(unsigned int cpu)
-{
-	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
-}
-
-static int rcu_cpu_kthread_should_run(unsigned int cpu)
-{
-	return __this_cpu_read(rcu_cpu_has_work);
-}
-
-/*
- * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
- * the RCU softirq used in configurations of RCU that do not support RCU
- * priority boosting.
- */
-static void rcu_cpu_kthread(unsigned int cpu)
-{
-	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
-	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
-	int spincnt;
-
-	for (spincnt = 0; spincnt < 10; spincnt++) {
-		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
-		local_bh_disable();
-		*statusp = RCU_KTHREAD_RUNNING;
-		this_cpu_inc(rcu_cpu_kthread_loops);
-		local_irq_disable();
-		work = *workp;
-		*workp = 0;
-		local_irq_enable();
-		if (work)
-			rcu_kthread_do_work();
-		local_bh_enable();
-		if (*workp == 0) {
-			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
-			*statusp = RCU_KTHREAD_WAITING;
-			return;
-		}
-	}
-	*statusp = RCU_KTHREAD_YIELDING;
-	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
-	schedule_timeout_interruptible(2);
-	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
-	*statusp = RCU_KTHREAD_WAITING;
-}
-
 /*
  * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  * served by the rcu_node in question.  The CPU hotplug lock is still
@@ -1458,27 +1364,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 	free_cpumask_var(cm);
 }
 
-static struct smp_hotplug_thread rcu_cpu_thread_spec = {
-	.store			= &rcu_cpu_kthread_task,
-	.thread_should_run	= rcu_cpu_kthread_should_run,
-	.thread_fn		= rcu_cpu_kthread,
-	.thread_comm		= "rcuc/%u",
-	.setup			= rcu_cpu_kthread_setup,
-	.park			= rcu_cpu_kthread_park,
-};
-
 /*
  * Spawn boost kthreads -- called as soon as the scheduler is running.
  */
 static void __init rcu_spawn_boost_kthreads(void)
 {
 	struct rcu_node *rnp;
-	int cpu;
 
-	for_each_possible_cpu(cpu)
-		per_cpu(rcu_cpu_has_work, cpu) = 0;
-	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
-		return;
 	rcu_for_each_leaf_node(rnp)
 		(void)rcu_spawn_one_boost_kthread(rnp);
 }
@@ -1501,11 +1393,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
-static void invoke_rcu_callbacks_kthread(void)
-{
-	WARN_ON_ONCE(1);
-}
-
 static bool rcu_is_callbacks_kthread(void)
 {
 	return false;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-15 11:11 [PATCH] rcu: Allow to eliminate softirq processing from rcutree Sebastian Andrzej Siewior
@ 2019-03-15 13:35 ` Steven Rostedt
  2019-03-15 13:57   ` Sebastian Andrzej Siewior
  2019-03-18  2:24 ` Paul E. McKenney
  2019-03-20  0:26 ` [PATCH] " Joel Fernandes
  2 siblings, 1 reply; 44+ messages in thread
From: Steven Rostedt @ 2019-03-15 13:35 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, tglx, Paul E. McKenney, Mike Galbraith

On Fri, 15 Mar 2019 12:11:30 +0100
Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> +static void rcu_cpu_kthread_park(unsigned int cpu)
> +{

Should we add one of the trace_rcu_.. trace events here?

-- Steve


> +	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> +}
> +

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-15 13:35 ` Steven Rostedt
@ 2019-03-15 13:57   ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-15 13:57 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Josh Triplett, Mathieu Desnoyers, Lai Jiangshan,
	Joel Fernandes, tglx, Paul E. McKenney

On 2019-03-15 09:35:44 [-0400], Steven Rostedt wrote:
> On Fri, 15 Mar 2019 12:11:30 +0100
> Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> 
> > +static void rcu_cpu_kthread_park(unsigned int cpu)
> > +{
> 
> Should we add one of the trace_rcu_.. trace events here?

If it is required and I'm told which one it is then sure I can add it.

> -- Steve

Sebastian

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-15 11:11 [PATCH] rcu: Allow to eliminate softirq processing from rcutree Sebastian Andrzej Siewior
  2019-03-15 13:35 ` Steven Rostedt
@ 2019-03-18  2:24 ` Paul E. McKenney
  2019-03-19 11:44   ` [PATCH v2] " Sebastian Andrzej Siewior
  2019-03-20  0:26 ` [PATCH] " Joel Fernandes
  2 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-18  2:24 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Fri, Mar 15, 2019 at 12:11:30PM +0100, Sebastian Andrzej Siewior wrote:
> From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> 
> Running RCU out of softirq is a problem for some workloads that would
> like to manage RCU core processing independently of other softirq work,
> for example, setting kthread priority.
> This commit therefore introduces the `rcunosoftirq' option which moves
> the RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER
> kthread named rcuc.
> The SCHED_OTHER approach avoids the scalability problems that appeared
> with the earlier attempt to move RCU core processing to from softirq to
> kthreads.
> That said, kernels built with RCU_BOOST=y will run the rcuc kthreads at
> the RCU-boosting priority.

Is this against -rt, mainline, or some such?  I was going to start up
a test overnight, but it does not apply against -rcu.

Either way, I will take a closer look at the patch early this week.

							Thanx, Paul

> Reported-by: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: Mike Galbraith <bitbucket@online.de>
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> [bigeasy: add rcunosoftirq option]
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  kernel/rcu/tree.c        | 132 ++++++++++++++++++++++++++++++++---
>  kernel/rcu/tree.h        |   4 +-
>  kernel/rcu/tree_plugin.h | 145 +++++----------------------------------
>  3 files changed, 141 insertions(+), 140 deletions(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 9180158756d2c..498dc5e9287d0 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -62,6 +62,12 @@
>  #include <linux/suspend.h>
>  #include <linux/ftrace.h>
>  #include <linux/tick.h>
> +#include <linux/gfp.h>
> +#include <linux/oom.h>
> +#include <linux/smpboot.h>
> +#include <linux/jiffies.h>
> +#include <linux/sched/isolation.h>
> +#include "../time/tick-internal.h"
> 
>  #include "tree.h"
>  #include "rcu.h"
> @@ -2716,7 +2722,7 @@ EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
>   * structures.  This may be called only from the CPU to whom the rdp
>   * belongs.
>   */
> -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
> +static __latent_entropy void rcu_process_callbacks(void)
>  {
>  	unsigned long flags;
>  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> @@ -2758,6 +2764,13 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
>  	trace_rcu_utilization(TPS("End RCU core"));
>  }
> 
> +static void rcu_process_callbacks_si(struct softirq_action *h)
> +{
> +	rcu_process_callbacks();
> +}
> +
> +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
> +
>  /*
>   * Schedule RCU callback invocation.  If the running implementation of RCU
>   * does not support RCU priority boosting, just do a direct call, otherwise
> @@ -2769,19 +2782,121 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
>  {
>  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
>  		return;
> -	if (likely(!rcu_state.boost)) {
> -		rcu_do_batch(rdp);
> -		return;
> -	}
> -	invoke_rcu_callbacks_kthread();
> +	rcu_do_batch(rdp);
>  }
> 
> +static void rcu_wake_cond(struct task_struct *t, int status)
> +{
> +	/*
> +	 * If the thread is yielding, only wake it when this
> +	 * is invoked from idle
> +	 */
> +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> +		wake_up_process(t);
> +}
> +
> +static bool rcu_softirq_enabled = true;
> +
> +static int __init rcunosoftirq_setup(char *str)
> +{
> +	rcu_softirq_enabled = false;
> +	return 0;
> +}
> +__setup("rcunosoftirq", rcunosoftirq_setup);
> +
> +/*
> + * Wake up this CPU's rcuc kthread to do RCU core processing.
> + */
>  static void invoke_rcu_core(void)
>  {
> -	if (cpu_online(smp_processor_id()))
> +	unsigned long flags;
> +	struct task_struct *t;
> +
> +	if (!cpu_online(smp_processor_id()))
> +		return;
> +	if (rcu_softirq_enabled) {
>  		raise_softirq(RCU_SOFTIRQ);
> +	} else {
> +		local_irq_save(flags);
> +		__this_cpu_write(rcu_cpu_has_work, 1);
> +		t = __this_cpu_read(rcu_cpu_kthread_task);
> +		if (t != NULL && current != t)
> +			rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
> +		local_irq_restore(flags);
> +	}
>  }
> 
> +static void rcu_cpu_kthread_park(unsigned int cpu)
> +{
> +	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> +}
> +
> +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> +{
> +	return __this_cpu_read(rcu_cpu_has_work);
> +}
> +
> +/*
> + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> + * the RCU softirq used in configurations of RCU that do not support RCU
> + * priority boosting.
> + */
> +static void rcu_cpu_kthread(unsigned int cpu)
> +{
> +	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
> +	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
> +	int spincnt;
> +
> +	for (spincnt = 0; spincnt < 10; spincnt++) {
> +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> +		local_bh_disable();
> +		*statusp = RCU_KTHREAD_RUNNING;
> +		this_cpu_inc(rcu_cpu_kthread_loops);
> +		local_irq_disable();
> +		work = *workp;
> +		*workp = 0;
> +		local_irq_enable();
> +		if (work)
> +			rcu_process_callbacks();
> +		local_bh_enable();
> +		if (*workp == 0) {
> +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> +			*statusp = RCU_KTHREAD_WAITING;
> +			return;
> +		}
> +	}
> +	*statusp = RCU_KTHREAD_YIELDING;
> +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> +	schedule_timeout_interruptible(2);
> +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> +	*statusp = RCU_KTHREAD_WAITING;
> +}
> +
> +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> +	.store			= &rcu_cpu_kthread_task,
> +	.thread_should_run	= rcu_cpu_kthread_should_run,
> +	.thread_fn		= rcu_cpu_kthread,
> +	.thread_comm		= "rcuc/%u",
> +	.setup			= rcu_cpu_kthread_setup,
> +	.park			= rcu_cpu_kthread_park,
> +};
> +
> +/*
> + * Spawn per-CPU RCU core processing kthreads.
> + */
> +static int __init rcu_spawn_core_kthreads(void)
> +{
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu)
> +		per_cpu(rcu_cpu_has_work, cpu) = 0;
> +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
> +		return 0;
> +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
> +	return 0;
> +}
> +early_initcall(rcu_spawn_core_kthreads);
> +
>  /*
>   * Handle any core-RCU processing required by a call_rcu() invocation.
>   */
> @@ -3777,7 +3892,8 @@ void __init rcu_init(void)
>  	rcu_init_one();
>  	if (dump_tree)
>  		rcu_dump_rcu_node_tree();
> -	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
> +	if (rcu_softirq_enabled)
> +		open_softirq(RCU_SOFTIRQ, rcu_process_callbacks_si);
> 
>  	/*
>  	 * We don't need protection against CPU-hotplug here because
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index d90b02b53c0ec..fb8fc6ecc391b 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -402,12 +402,10 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;
> 
>  int rcu_dynticks_snap(struct rcu_data *rdp);
> 
> -#ifdef CONFIG_RCU_BOOST
>  DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
>  DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
>  DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
>  DECLARE_PER_CPU(char, rcu_cpu_has_work);
> -#endif /* #ifdef CONFIG_RCU_BOOST */
> 
>  /* Forward declarations for rcutree_plugin.h */
>  static void rcu_bootup_announce(void);
> @@ -425,8 +423,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
>  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
>  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
>  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> -static void invoke_rcu_callbacks_kthread(void);
>  static bool rcu_is_callbacks_kthread(void);
> +static void rcu_cpu_kthread_setup(unsigned int cpu);
>  static void __init rcu_spawn_boost_kthreads(void);
>  static void rcu_prepare_kthreads(int cpu);
>  static void rcu_cleanup_after_idle(void);
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index 1b3dd2fc0cd64..b440d6ef45d16 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -24,17 +24,6 @@
>   *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com>
>   */
> 
> -#include <linux/delay.h>
> -#include <linux/gfp.h>
> -#include <linux/oom.h>
> -#include <linux/sched/debug.h>
> -#include <linux/smpboot.h>
> -#include <linux/sched/isolation.h>
> -#include <uapi/linux/sched/types.h>
> -#include "../time/tick-internal.h"
> -
> -#ifdef CONFIG_RCU_BOOST
> -
>  #include "../locking/rtmutex_common.h"
> 
>  /*
> @@ -45,19 +34,6 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
>  DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
>  DEFINE_PER_CPU(char, rcu_cpu_has_work);
> 
> -#else /* #ifdef CONFIG_RCU_BOOST */
> -
> -/*
> - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> - * all uses are in dead code.  Provide a definition to keep the compiler
> - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> - * This probably needs to be excluded from -rt builds.
> - */
> -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> -
> -#endif /* #else #ifdef CONFIG_RCU_BOOST */
> -
>  #ifdef CONFIG_RCU_NOCB_CPU
>  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
>  static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
> @@ -652,7 +628,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
>  		/* Need to defer quiescent state until everything is enabled. */
>  		if (irqs_were_disabled) {
>  			/* Enabling irqs does not reschedule, so... */
> -			raise_softirq_irqoff(RCU_SOFTIRQ);
> +			if (rcu_softirq_enabled)
> +				raise_softirq_irqoff(RCU_SOFTIRQ);
> +			else
> +				invoke_rcu_core();
>  		} else {
>  			/* Enabling BH or preempt does reschedule, so... */
>  			set_tsk_need_resched(current);
> @@ -1150,18 +1129,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
> 
>  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
> 
> -#ifdef CONFIG_RCU_BOOST
> -
> -static void rcu_wake_cond(struct task_struct *t, int status)
> +/*
> + * If boosting, set rcuc kthreads to realtime priority.
> + */
> +static void rcu_cpu_kthread_setup(unsigned int cpu)
>  {
> -	/*
> -	 * If the thread is yielding, only wake it when this
> -	 * is invoked from idle
> -	 */
> -	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
> -		wake_up_process(t);
> +#ifdef CONFIG_RCU_BOOST
> +	struct sched_param sp;
> +
> +	sp.sched_priority = kthread_prio;
> +	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> +#endif /* #ifdef CONFIG_RCU_BOOST */
>  }
> 
> +#ifdef CONFIG_RCU_BOOST
> +
>  /*
>   * Carry out RCU priority boosting on the task indicated by ->exp_tasks
>   * or ->boost_tasks, advancing the pointer to the next task in the
> @@ -1299,23 +1281,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
>  	}
>  }
> 
> -/*
> - * Wake up the per-CPU kthread to invoke RCU callbacks.
> - */
> -static void invoke_rcu_callbacks_kthread(void)
> -{
> -	unsigned long flags;
> -
> -	local_irq_save(flags);
> -	__this_cpu_write(rcu_cpu_has_work, 1);
> -	if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
> -	    current != __this_cpu_read(rcu_cpu_kthread_task)) {
> -		rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
> -			      __this_cpu_read(rcu_cpu_kthread_status));
> -	}
> -	local_irq_restore(flags);
> -}
> -
>  /*
>   * Is the current CPU running the RCU-callbacks kthread?
>   * Caller must have preemption disabled.
> @@ -1369,65 +1334,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
>  	return 0;
>  }
> 
> -static void rcu_kthread_do_work(void)
> -{
> -	rcu_do_batch(this_cpu_ptr(&rcu_data));
> -}
> -
> -static void rcu_cpu_kthread_setup(unsigned int cpu)
> -{
> -	struct sched_param sp;
> -
> -	sp.sched_priority = kthread_prio;
> -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> -}
> -
> -static void rcu_cpu_kthread_park(unsigned int cpu)
> -{
> -	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> -}
> -
> -static int rcu_cpu_kthread_should_run(unsigned int cpu)
> -{
> -	return __this_cpu_read(rcu_cpu_has_work);
> -}
> -
> -/*
> - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> - * the RCU softirq used in configurations of RCU that do not support RCU
> - * priority boosting.
> - */
> -static void rcu_cpu_kthread(unsigned int cpu)
> -{
> -	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
> -	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
> -	int spincnt;
> -
> -	for (spincnt = 0; spincnt < 10; spincnt++) {
> -		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> -		local_bh_disable();
> -		*statusp = RCU_KTHREAD_RUNNING;
> -		this_cpu_inc(rcu_cpu_kthread_loops);
> -		local_irq_disable();
> -		work = *workp;
> -		*workp = 0;
> -		local_irq_enable();
> -		if (work)
> -			rcu_kthread_do_work();
> -		local_bh_enable();
> -		if (*workp == 0) {
> -			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> -			*statusp = RCU_KTHREAD_WAITING;
> -			return;
> -		}
> -	}
> -	*statusp = RCU_KTHREAD_YIELDING;
> -	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> -	schedule_timeout_interruptible(2);
> -	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> -	*statusp = RCU_KTHREAD_WAITING;
> -}
> -
>  /*
>   * Set the per-rcu_node kthread's affinity to cover all CPUs that are
>   * served by the rcu_node in question.  The CPU hotplug lock is still
> @@ -1458,27 +1364,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
>  	free_cpumask_var(cm);
>  }
> 
> -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> -	.store			= &rcu_cpu_kthread_task,
> -	.thread_should_run	= rcu_cpu_kthread_should_run,
> -	.thread_fn		= rcu_cpu_kthread,
> -	.thread_comm		= "rcuc/%u",
> -	.setup			= rcu_cpu_kthread_setup,
> -	.park			= rcu_cpu_kthread_park,
> -};
> -
>  /*
>   * Spawn boost kthreads -- called as soon as the scheduler is running.
>   */
>  static void __init rcu_spawn_boost_kthreads(void)
>  {
>  	struct rcu_node *rnp;
> -	int cpu;
> 
> -	for_each_possible_cpu(cpu)
> -		per_cpu(rcu_cpu_has_work, cpu) = 0;
> -	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
> -		return;
>  	rcu_for_each_leaf_node(rnp)
>  		(void)rcu_spawn_one_boost_kthread(rnp);
>  }
> @@ -1501,11 +1393,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
>  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  }
> 
> -static void invoke_rcu_callbacks_kthread(void)
> -{
> -	WARN_ON_ONCE(1);
> -}
> -
>  static bool rcu_is_callbacks_kthread(void)
>  {
>  	return false;
> -- 
> 2.20.1
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-18  2:24 ` Paul E. McKenney
@ 2019-03-19 11:44   ` Sebastian Andrzej Siewior
  2019-03-19 15:59     ` Paul E. McKenney
  2019-03-20 11:32     ` Sebastian Andrzej Siewior
  0 siblings, 2 replies; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-19 11:44 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>

Running RCU out of softirq is a problem for some workloads that would
like to manage RCU core processing independently of other softirq work,
for example, setting kthread priority.
This commit therefore introduces the `rcunosoftirq' option which moves
the RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER
kthread named rcuc.
The SCHED_OTHER approach avoids the scalability problems that appeared
with the earlier attempt to move RCU core processing to from softirq to
kthreads.
That said, kernels built with RCU_BOOST=y will run the rcuc kthreads at
the RCU-boosting priority.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[bigeasy: add rcunosoftirq option]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
v1…v2:
       - rebased to Paul's rcu/dev tree/branch
       - Replaced Mike's email with @gmx.de since the @online.de does
	 not work anymore.

 kernel/rcu/tree.c        | 129 +++++++++++++++++++++++++++++++++---
 kernel/rcu/tree.h        |   2 +-
 kernel/rcu/tree_plugin.h | 137 +++++----------------------------------
 3 files changed, 138 insertions(+), 130 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0f31b79eb6761..0a719f726e149 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -51,6 +51,12 @@
 #include <linux/tick.h>
 #include <linux/sysrq.h>
 #include <linux/kprobes.h>
+#include <linux/gfp.h>
+#include <linux/oom.h>
+#include <linux/smpboot.h>
+#include <linux/jiffies.h>
+#include <linux/sched/isolation.h>
+#include "../time/tick-internal.h"
 
 #include "tree.h"
 #include "rcu.h"
@@ -2253,7 +2259,7 @@ void rcu_force_quiescent_state(void)
 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 
 /* Perform RCU core processing work for the current CPU.  */
-static __latent_entropy void rcu_core(struct softirq_action *unused)
+static __latent_entropy void rcu_core(void)
 {
 	unsigned long flags;
 	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
@@ -2295,6 +2301,11 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
 	trace_rcu_utilization(TPS("End RCU core"));
 }
 
+static void rcu_core_si(struct softirq_action *h)
+{
+	rcu_core();
+}
+
 /*
  * Schedule RCU callback invocation.  If the running implementation of RCU
  * does not support RCU priority boosting, just do a direct call, otherwise
@@ -2306,19 +2317,120 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
 {
 	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
 		return;
-	if (likely(!rcu_state.boost)) {
-		rcu_do_batch(rdp);
-		return;
-	}
-	invoke_rcu_callbacks_kthread();
+	rcu_do_batch(rdp);
 }
 
+static void rcu_wake_cond(struct task_struct *t, int status)
+{
+	/*
+	 * If the thread is yielding, only wake it when this
+	 * is invoked from idle
+	 */
+	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
+		wake_up_process(t);
+}
+
+static bool rcu_softirq_enabled = true;
+
+static int __init rcunosoftirq_setup(char *str)
+{
+	rcu_softirq_enabled = false;
+	return 0;
+}
+__setup("rcunosoftirq", rcunosoftirq_setup);
+
+/*
+ * Wake up this CPU's rcuc kthread to do RCU core processing.
+ */
 static void invoke_rcu_core(void)
 {
-	if (cpu_online(smp_processor_id()))
+	unsigned long flags;
+	struct task_struct *t;
+
+	if (!cpu_online(smp_processor_id()))
+		return;
+	if (rcu_softirq_enabled) {
 		raise_softirq(RCU_SOFTIRQ);
+	} else {
+		local_irq_save(flags);
+		__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
+		t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
+		if (t != NULL && t != current)
+			rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
+		local_irq_restore(flags);
+	}
 }
 
+static void rcu_cpu_kthread_park(unsigned int cpu)
+{
+	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+}
+
+static int rcu_cpu_kthread_should_run(unsigned int cpu)
+{
+	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
+ * the RCU softirq used in configurations of RCU that do not support RCU
+ * priority boosting.
+ */
+static void rcu_cpu_kthread(unsigned int cpu)
+{
+	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
+	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+	int spincnt;
+
+	for (spincnt = 0; spincnt < 10; spincnt++) {
+		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
+		local_bh_disable();
+		*statusp = RCU_KTHREAD_RUNNING;
+		local_irq_disable();
+		work = *workp;
+		*workp = 0;
+		local_irq_enable();
+		if (work)
+			rcu_core();
+		local_bh_enable();
+		if (*workp == 0) {
+			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
+			*statusp = RCU_KTHREAD_WAITING;
+			return;
+		}
+	}
+	*statusp = RCU_KTHREAD_YIELDING;
+	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
+	schedule_timeout_interruptible(2);
+	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
+	*statusp = RCU_KTHREAD_WAITING;
+}
+
+static struct smp_hotplug_thread rcu_cpu_thread_spec = {
+	.store			= &rcu_data.rcu_cpu_kthread_task,
+	.thread_should_run	= rcu_cpu_kthread_should_run,
+	.thread_fn		= rcu_cpu_kthread,
+	.thread_comm		= "rcuc/%u",
+	.setup			= rcu_cpu_kthread_setup,
+	.park			= rcu_cpu_kthread_park,
+};
+
+/*
+ * Spawn per-CPU RCU core processing kthreads.
+ */
+static int __init rcu_spawn_core_kthreads(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
+	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
+		return 0;
+	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
+	return 0;
+}
+early_initcall(rcu_spawn_core_kthreads);
+
 /*
  * Handle any core-RCU processing required by a call_rcu() invocation.
  */
@@ -3355,7 +3467,8 @@ void __init rcu_init(void)
 	rcu_init_one();
 	if (dump_tree)
 		rcu_dump_rcu_node_tree();
-	open_softirq(RCU_SOFTIRQ, rcu_core);
+	if (rcu_softirq_enabled)
+		open_softirq(RCU_SOFTIRQ, rcu_core_si);
 
 	/*
 	 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e253d11af3c49..a1a72a1ecb026 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
 static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
-static void invoke_rcu_callbacks_kthread(void);
 static bool rcu_is_callbacks_kthread(void);
+static void rcu_cpu_kthread_setup(unsigned int cpu);
 static void __init rcu_spawn_boost_kthreads(void);
 static void rcu_prepare_kthreads(int cpu);
 static void rcu_cleanup_after_idle(void);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index f46b4af96ab95..eb99e750a9306 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -11,29 +11,7 @@
  *	   Paul E. McKenney <paulmck@linux.ibm.com>
  */
 
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/oom.h>
-#include <linux/sched/debug.h>
-#include <linux/smpboot.h>
-#include <linux/sched/isolation.h>
-#include <uapi/linux/sched/types.h>
-#include "../time/tick-internal.h"
-
-#ifdef CONFIG_RCU_BOOST
 #include "../locking/rtmutex_common.h"
-#else /* #ifdef CONFIG_RCU_BOOST */
-
-/*
- * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
- * all uses are in dead code.  Provide a definition to keep the compiler
- * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
- * This probably needs to be excluded from -rt builds.
- */
-#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
-#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
-
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
 
 #ifdef CONFIG_RCU_NOCB_CPU
 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
@@ -629,7 +607,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		/* Need to defer quiescent state until everything is enabled. */
 		if (irqs_were_disabled) {
 			/* Enabling irqs does not reschedule, so... */
-			raise_softirq_irqoff(RCU_SOFTIRQ);
+			if (rcu_softirq_enabled)
+				raise_softirq_irqoff(RCU_SOFTIRQ);
+			else
+				invoke_rcu_core();
 		} else {
 			/* Enabling BH or preempt does reschedule, so... */
 			set_tsk_need_resched(current);
@@ -944,18 +925,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
 
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
-#ifdef CONFIG_RCU_BOOST
-
-static void rcu_wake_cond(struct task_struct *t, int status)
+/*
+ * If boosting, set rcuc kthreads to realtime priority.
+ */
+static void rcu_cpu_kthread_setup(unsigned int cpu)
 {
-	/*
-	 * If the thread is yielding, only wake it when this
-	 * is invoked from idle
-	 */
-	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
-		wake_up_process(t);
+#ifdef CONFIG_RCU_BOOST
+	struct sched_param sp;
+
+	sp.sched_priority = kthread_prio;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+#endif /* #ifdef CONFIG_RCU_BOOST */
 }
 
+#ifdef CONFIG_RCU_BOOST
+
 /*
  * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  * or ->boost_tasks, advancing the pointer to the next task in the
@@ -1093,23 +1077,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	}
 }
 
-/*
- * Wake up the per-CPU kthread to invoke RCU callbacks.
- */
-static void invoke_rcu_callbacks_kthread(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
-	if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
-	    current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
-		rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
-			      __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
-	}
-	local_irq_restore(flags);
-}
-
 /*
  * Is the current CPU running the RCU-callbacks kthread?
  * Caller must have preemption disabled.
@@ -1163,59 +1130,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
 	return 0;
 }
 
-static void rcu_cpu_kthread_setup(unsigned int cpu)
-{
-	struct sched_param sp;
-
-	sp.sched_priority = kthread_prio;
-	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
-}
-
-static void rcu_cpu_kthread_park(unsigned int cpu)
-{
-	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
-}
-
-static int rcu_cpu_kthread_should_run(unsigned int cpu)
-{
-	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
-}
-
-/*
- * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
- * the RCU softirq used in configurations of RCU that do not support RCU
- * priority boosting.
- */
-static void rcu_cpu_kthread(unsigned int cpu)
-{
-	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
-	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
-	int spincnt;
-
-	for (spincnt = 0; spincnt < 10; spincnt++) {
-		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
-		local_bh_disable();
-		*statusp = RCU_KTHREAD_RUNNING;
-		local_irq_disable();
-		work = *workp;
-		*workp = 0;
-		local_irq_enable();
-		if (work)
-			rcu_do_batch(this_cpu_ptr(&rcu_data));
-		local_bh_enable();
-		if (*workp == 0) {
-			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
-			*statusp = RCU_KTHREAD_WAITING;
-			return;
-		}
-	}
-	*statusp = RCU_KTHREAD_YIELDING;
-	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
-	schedule_timeout_interruptible(2);
-	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
-	*statusp = RCU_KTHREAD_WAITING;
-}
-
 /*
  * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  * served by the rcu_node in question.  The CPU hotplug lock is still
@@ -1246,27 +1160,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 	free_cpumask_var(cm);
 }
 
-static struct smp_hotplug_thread rcu_cpu_thread_spec = {
-	.store			= &rcu_data.rcu_cpu_kthread_task,
-	.thread_should_run	= rcu_cpu_kthread_should_run,
-	.thread_fn		= rcu_cpu_kthread,
-	.thread_comm		= "rcuc/%u",
-	.setup			= rcu_cpu_kthread_setup,
-	.park			= rcu_cpu_kthread_park,
-};
-
 /*
  * Spawn boost kthreads -- called as soon as the scheduler is running.
  */
 static void __init rcu_spawn_boost_kthreads(void)
 {
 	struct rcu_node *rnp;
-	int cpu;
 
-	for_each_possible_cpu(cpu)
-		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
-	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
-		return;
 	rcu_for_each_leaf_node(rnp)
 		(void)rcu_spawn_one_boost_kthread(rnp);
 }
@@ -1289,11 +1189,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
-static void invoke_rcu_callbacks_kthread(void)
-{
-	WARN_ON_ONCE(1);
-}
-
 static bool rcu_is_callbacks_kthread(void)
 {
 	return false;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-19 11:44   ` [PATCH v2] " Sebastian Andrzej Siewior
@ 2019-03-19 15:59     ` Paul E. McKenney
  2019-03-19 16:24       ` Sebastian Andrzej Siewior
  2019-03-20 11:32     ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-19 15:59 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Tue, Mar 19, 2019 at 12:44:19PM +0100, Sebastian Andrzej Siewior wrote:
> From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> 
> Running RCU out of softirq is a problem for some workloads that would
> like to manage RCU core processing independently of other softirq work,
> for example, setting kthread priority.
> This commit therefore introduces the `rcunosoftirq' option which moves
> the RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER
> kthread named rcuc.
> The SCHED_OTHER approach avoids the scalability problems that appeared
> with the earlier attempt to move RCU core processing to from softirq to
> kthreads.
> That said, kernels built with RCU_BOOST=y will run the rcuc kthreads at
> the RCU-boosting priority.
> 
> Reported-by: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: Mike Galbraith <efault@gmx.de>
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> [bigeasy: add rcunosoftirq option]
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
> v1…v2:
>        - rebased to Paul's rcu/dev tree/branch

Which is way better than an answer to my question, so thank you very
much!  ;-)

I doubt that there is any code left from my original, so I set you as
author.  I queued this and am starting tests without setting rcunosoftirq,
and will run more later setting it, courtesy of --bootargs.

Steve Rostedt did raise a good question about adding event tracing to
the park functions.  I haven't really settled on an answer yet.  Thoughts?

							Thanx, Paul

>        - Replaced Mike's email with @gmx.de since the @online.de does
> 	 not work anymore.
> 
>  kernel/rcu/tree.c        | 129 +++++++++++++++++++++++++++++++++---
>  kernel/rcu/tree.h        |   2 +-
>  kernel/rcu/tree_plugin.h | 137 +++++----------------------------------
>  3 files changed, 138 insertions(+), 130 deletions(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 0f31b79eb6761..0a719f726e149 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -51,6 +51,12 @@
>  #include <linux/tick.h>
>  #include <linux/sysrq.h>
>  #include <linux/kprobes.h>
> +#include <linux/gfp.h>
> +#include <linux/oom.h>
> +#include <linux/smpboot.h>
> +#include <linux/jiffies.h>
> +#include <linux/sched/isolation.h>
> +#include "../time/tick-internal.h"
>  
>  #include "tree.h"
>  #include "rcu.h"
> @@ -2253,7 +2259,7 @@ void rcu_force_quiescent_state(void)
>  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
>  
>  /* Perform RCU core processing work for the current CPU.  */
> -static __latent_entropy void rcu_core(struct softirq_action *unused)
> +static __latent_entropy void rcu_core(void)
>  {
>  	unsigned long flags;
>  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> @@ -2295,6 +2301,11 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
>  	trace_rcu_utilization(TPS("End RCU core"));
>  }
>  
> +static void rcu_core_si(struct softirq_action *h)
> +{
> +	rcu_core();
> +}
> +
>  /*
>   * Schedule RCU callback invocation.  If the running implementation of RCU
>   * does not support RCU priority boosting, just do a direct call, otherwise
> @@ -2306,19 +2317,120 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
>  {
>  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
>  		return;
> -	if (likely(!rcu_state.boost)) {
> -		rcu_do_batch(rdp);
> -		return;
> -	}
> -	invoke_rcu_callbacks_kthread();
> +	rcu_do_batch(rdp);
>  }
>  
> +static void rcu_wake_cond(struct task_struct *t, int status)
> +{
> +	/*
> +	 * If the thread is yielding, only wake it when this
> +	 * is invoked from idle
> +	 */
> +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> +		wake_up_process(t);
> +}
> +
> +static bool rcu_softirq_enabled = true;
> +
> +static int __init rcunosoftirq_setup(char *str)
> +{
> +	rcu_softirq_enabled = false;
> +	return 0;
> +}
> +__setup("rcunosoftirq", rcunosoftirq_setup);
> +
> +/*
> + * Wake up this CPU's rcuc kthread to do RCU core processing.
> + */
>  static void invoke_rcu_core(void)
>  {
> -	if (cpu_online(smp_processor_id()))
> +	unsigned long flags;
> +	struct task_struct *t;
> +
> +	if (!cpu_online(smp_processor_id()))
> +		return;
> +	if (rcu_softirq_enabled) {
>  		raise_softirq(RCU_SOFTIRQ);
> +	} else {
> +		local_irq_save(flags);
> +		__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> +		t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> +		if (t != NULL && t != current)
> +			rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> +		local_irq_restore(flags);
> +	}
>  }
>  
> +static void rcu_cpu_kthread_park(unsigned int cpu)
> +{
> +	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> +}
> +
> +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> +{
> +	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> +}
> +
> +/*
> + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> + * the RCU softirq used in configurations of RCU that do not support RCU
> + * priority boosting.
> + */
> +static void rcu_cpu_kthread(unsigned int cpu)
> +{
> +	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> +	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> +	int spincnt;
> +
> +	for (spincnt = 0; spincnt < 10; spincnt++) {
> +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> +		local_bh_disable();
> +		*statusp = RCU_KTHREAD_RUNNING;
> +		local_irq_disable();
> +		work = *workp;
> +		*workp = 0;
> +		local_irq_enable();
> +		if (work)
> +			rcu_core();
> +		local_bh_enable();
> +		if (*workp == 0) {
> +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> +			*statusp = RCU_KTHREAD_WAITING;
> +			return;
> +		}
> +	}
> +	*statusp = RCU_KTHREAD_YIELDING;
> +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> +	schedule_timeout_interruptible(2);
> +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> +	*statusp = RCU_KTHREAD_WAITING;
> +}
> +
> +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> +	.store			= &rcu_data.rcu_cpu_kthread_task,
> +	.thread_should_run	= rcu_cpu_kthread_should_run,
> +	.thread_fn		= rcu_cpu_kthread,
> +	.thread_comm		= "rcuc/%u",
> +	.setup			= rcu_cpu_kthread_setup,
> +	.park			= rcu_cpu_kthread_park,
> +};
> +
> +/*
> + * Spawn per-CPU RCU core processing kthreads.
> + */
> +static int __init rcu_spawn_core_kthreads(void)
> +{
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu)
> +		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
> +		return 0;
> +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
> +	return 0;
> +}
> +early_initcall(rcu_spawn_core_kthreads);
> +
>  /*
>   * Handle any core-RCU processing required by a call_rcu() invocation.
>   */
> @@ -3355,7 +3467,8 @@ void __init rcu_init(void)
>  	rcu_init_one();
>  	if (dump_tree)
>  		rcu_dump_rcu_node_tree();
> -	open_softirq(RCU_SOFTIRQ, rcu_core);
> +	if (rcu_softirq_enabled)
> +		open_softirq(RCU_SOFTIRQ, rcu_core_si);
>  
>  	/*
>  	 * We don't need protection against CPU-hotplug here because
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index e253d11af3c49..a1a72a1ecb026 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
>  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
>  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
>  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> -static void invoke_rcu_callbacks_kthread(void);
>  static bool rcu_is_callbacks_kthread(void);
> +static void rcu_cpu_kthread_setup(unsigned int cpu);
>  static void __init rcu_spawn_boost_kthreads(void);
>  static void rcu_prepare_kthreads(int cpu);
>  static void rcu_cleanup_after_idle(void);
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index f46b4af96ab95..eb99e750a9306 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -11,29 +11,7 @@
>   *	   Paul E. McKenney <paulmck@linux.ibm.com>
>   */
>  
> -#include <linux/delay.h>
> -#include <linux/gfp.h>
> -#include <linux/oom.h>
> -#include <linux/sched/debug.h>
> -#include <linux/smpboot.h>
> -#include <linux/sched/isolation.h>
> -#include <uapi/linux/sched/types.h>
> -#include "../time/tick-internal.h"
> -
> -#ifdef CONFIG_RCU_BOOST
>  #include "../locking/rtmutex_common.h"
> -#else /* #ifdef CONFIG_RCU_BOOST */
> -
> -/*
> - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> - * all uses are in dead code.  Provide a definition to keep the compiler
> - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> - * This probably needs to be excluded from -rt builds.
> - */
> -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> -
> -#endif /* #else #ifdef CONFIG_RCU_BOOST */
>  
>  #ifdef CONFIG_RCU_NOCB_CPU
>  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> @@ -629,7 +607,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
>  		/* Need to defer quiescent state until everything is enabled. */
>  		if (irqs_were_disabled) {
>  			/* Enabling irqs does not reschedule, so... */
> -			raise_softirq_irqoff(RCU_SOFTIRQ);
> +			if (rcu_softirq_enabled)
> +				raise_softirq_irqoff(RCU_SOFTIRQ);
> +			else
> +				invoke_rcu_core();
>  		} else {
>  			/* Enabling BH or preempt does reschedule, so... */
>  			set_tsk_need_resched(current);
> @@ -944,18 +925,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
>  
>  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
>  
> -#ifdef CONFIG_RCU_BOOST
> -
> -static void rcu_wake_cond(struct task_struct *t, int status)
> +/*
> + * If boosting, set rcuc kthreads to realtime priority.
> + */
> +static void rcu_cpu_kthread_setup(unsigned int cpu)
>  {
> -	/*
> -	 * If the thread is yielding, only wake it when this
> -	 * is invoked from idle
> -	 */
> -	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
> -		wake_up_process(t);
> +#ifdef CONFIG_RCU_BOOST
> +	struct sched_param sp;
> +
> +	sp.sched_priority = kthread_prio;
> +	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> +#endif /* #ifdef CONFIG_RCU_BOOST */
>  }
>  
> +#ifdef CONFIG_RCU_BOOST
> +
>  /*
>   * Carry out RCU priority boosting on the task indicated by ->exp_tasks
>   * or ->boost_tasks, advancing the pointer to the next task in the
> @@ -1093,23 +1077,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
>  	}
>  }
>  
> -/*
> - * Wake up the per-CPU kthread to invoke RCU callbacks.
> - */
> -static void invoke_rcu_callbacks_kthread(void)
> -{
> -	unsigned long flags;
> -
> -	local_irq_save(flags);
> -	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> -	if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
> -	    current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
> -		rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
> -			      __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> -	}
> -	local_irq_restore(flags);
> -}
> -
>  /*
>   * Is the current CPU running the RCU-callbacks kthread?
>   * Caller must have preemption disabled.
> @@ -1163,59 +1130,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
>  	return 0;
>  }
>  
> -static void rcu_cpu_kthread_setup(unsigned int cpu)
> -{
> -	struct sched_param sp;
> -
> -	sp.sched_priority = kthread_prio;
> -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> -}
> -
> -static void rcu_cpu_kthread_park(unsigned int cpu)
> -{
> -	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> -}
> -
> -static int rcu_cpu_kthread_should_run(unsigned int cpu)
> -{
> -	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> -}
> -
> -/*
> - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> - * the RCU softirq used in configurations of RCU that do not support RCU
> - * priority boosting.
> - */
> -static void rcu_cpu_kthread(unsigned int cpu)
> -{
> -	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> -	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> -	int spincnt;
> -
> -	for (spincnt = 0; spincnt < 10; spincnt++) {
> -		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> -		local_bh_disable();
> -		*statusp = RCU_KTHREAD_RUNNING;
> -		local_irq_disable();
> -		work = *workp;
> -		*workp = 0;
> -		local_irq_enable();
> -		if (work)
> -			rcu_do_batch(this_cpu_ptr(&rcu_data));
> -		local_bh_enable();
> -		if (*workp == 0) {
> -			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> -			*statusp = RCU_KTHREAD_WAITING;
> -			return;
> -		}
> -	}
> -	*statusp = RCU_KTHREAD_YIELDING;
> -	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> -	schedule_timeout_interruptible(2);
> -	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> -	*statusp = RCU_KTHREAD_WAITING;
> -}
> -
>  /*
>   * Set the per-rcu_node kthread's affinity to cover all CPUs that are
>   * served by the rcu_node in question.  The CPU hotplug lock is still
> @@ -1246,27 +1160,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
>  	free_cpumask_var(cm);
>  }
>  
> -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> -	.store			= &rcu_data.rcu_cpu_kthread_task,
> -	.thread_should_run	= rcu_cpu_kthread_should_run,
> -	.thread_fn		= rcu_cpu_kthread,
> -	.thread_comm		= "rcuc/%u",
> -	.setup			= rcu_cpu_kthread_setup,
> -	.park			= rcu_cpu_kthread_park,
> -};
> -
>  /*
>   * Spawn boost kthreads -- called as soon as the scheduler is running.
>   */
>  static void __init rcu_spawn_boost_kthreads(void)
>  {
>  	struct rcu_node *rnp;
> -	int cpu;
>  
> -	for_each_possible_cpu(cpu)
> -		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> -	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
> -		return;
>  	rcu_for_each_leaf_node(rnp)
>  		(void)rcu_spawn_one_boost_kthread(rnp);
>  }
> @@ -1289,11 +1189,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
>  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  }
>  
> -static void invoke_rcu_callbacks_kthread(void)
> -{
> -	WARN_ON_ONCE(1);
> -}
> -
>  static bool rcu_is_callbacks_kthread(void)
>  {
>  	return false;
> -- 
> 2.20.1
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-19 15:59     ` Paul E. McKenney
@ 2019-03-19 16:24       ` Sebastian Andrzej Siewior
  2019-03-19 16:50         ` Paul E. McKenney
  0 siblings, 1 reply; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-19 16:24 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On 2019-03-19 08:59:23 [-0700], Paul E. McKenney wrote:
> I doubt that there is any code left from my original, so I set you as
> author.  

I always forward ported it the patch over the years. So if it is no
longer what it was once so be it. 

> I queued this and am starting tests without setting rcunosoftirq,
> and will run more later setting it, courtesy of --bootargs.

oki.

> Steve Rostedt did raise a good question about adding event tracing to
> the park functions.  I haven't really settled on an answer yet.  Thoughts?

It should trigger CPU hotplug events. If there was something similar for
the softirq processing then the threaded processing should also have it.
Please tell me which one should be added and I'm happy to add one.

> 							Thanx, Paul
> 

Sebastian

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-19 16:24       ` Sebastian Andrzej Siewior
@ 2019-03-19 16:50         ` Paul E. McKenney
  2019-03-19 17:02           ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-19 16:50 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Tue, Mar 19, 2019 at 05:24:31PM +0100, Sebastian Andrzej Siewior wrote:
> On 2019-03-19 08:59:23 [-0700], Paul E. McKenney wrote:
> > I doubt that there is any code left from my original, so I set you as
> > author.  
> 
> I always forward ported it the patch over the years. So if it is no
> longer what it was once so be it. 

Besides, it looks very weird for me to have two Signed-off-by lines.  ;-)

> > I queued this and am starting tests without setting rcunosoftirq,
> > and will run more later setting it, courtesy of --bootargs.
> 
> oki.
> 
> > Steve Rostedt did raise a good question about adding event tracing to
> > the park functions.  I haven't really settled on an answer yet.  Thoughts?
> 
> It should trigger CPU hotplug events. If there was something similar for
> the softirq processing then the threaded processing should also have it.
> Please tell me which one should be added and I'm happy to add one.

In theory, the trace_rcu_utilization() should be added, just like at
the beginning and end of the function rcu_core(), but as far as I know,
no one uses that, so I actually have it on my list to remove.

							Thanx, Paul


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-19 16:50         ` Paul E. McKenney
@ 2019-03-19 17:02           ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-19 17:02 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On 2019-03-19 09:50:07 [-0700], Paul E. McKenney wrote:
> Besides, it looks very weird for me to have two Signed-off-by lines.  ;-)

See commit 602cae04c4864 ("perf/x86/intel: Delay memory deallocation
until x86_pmu_dead_cpu()")

> In theory, the trace_rcu_utilization() should be added, just like at
> the beginning and end of the function rcu_core(), but as far as I know,
> no one uses that, so I actually have it on my list to remove.

oki. Then nothing should be done :)

> 							Thanx, Paul

Sebastian

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-15 11:11 [PATCH] rcu: Allow to eliminate softirq processing from rcutree Sebastian Andrzej Siewior
  2019-03-15 13:35 ` Steven Rostedt
  2019-03-18  2:24 ` Paul E. McKenney
@ 2019-03-20  0:26 ` Joel Fernandes
  2019-03-20 11:28   ` Sebastian Andrzej Siewior
  2019-03-20 15:24   ` Paul E. McKenney
  2 siblings, 2 replies; 44+ messages in thread
From: Joel Fernandes @ 2019-03-20  0:26 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, tglx, Paul E. McKenney, Mike Galbraith, rcu

Adding the rcu@vger.kernel.org list as well, more comment below:

On Fri, Mar 15, 2019 at 12:11:30PM +0100, Sebastian Andrzej Siewior wrote:
> From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> 
> Running RCU out of softirq is a problem for some workloads that would
> like to manage RCU core processing independently of other softirq work,
> for example, setting kthread priority.
> This commit therefore introduces the `rcunosoftirq' option which moves
> the RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER
> kthread named rcuc.
> The SCHED_OTHER approach avoids the scalability problems that appeared
> with the earlier attempt to move RCU core processing to from softirq to
> kthreads.
> That said, kernels built with RCU_BOOST=y will run the rcuc kthreads at
> the RCU-boosting priority.
> 
> Reported-by: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: Mike Galbraith <bitbucket@online.de>
> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> [bigeasy: add rcunosoftirq option]
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  kernel/rcu/tree.c        | 132 ++++++++++++++++++++++++++++++++---
>  kernel/rcu/tree.h        |   4 +-
>  kernel/rcu/tree_plugin.h | 145 +++++----------------------------------
>  3 files changed, 141 insertions(+), 140 deletions(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 9180158756d2c..498dc5e9287d0 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -62,6 +62,12 @@
>  #include <linux/suspend.h>
>  #include <linux/ftrace.h>
>  #include <linux/tick.h>
> +#include <linux/gfp.h>
> +#include <linux/oom.h>
> +#include <linux/smpboot.h>
> +#include <linux/jiffies.h>
> +#include <linux/sched/isolation.h>
> +#include "../time/tick-internal.h"
>  
>  #include "tree.h"
>  #include "rcu.h"
> @@ -2716,7 +2722,7 @@ EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
>   * structures.  This may be called only from the CPU to whom the rdp
>   * belongs.
>   */
> -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
> +static __latent_entropy void rcu_process_callbacks(void)
>  {
>  	unsigned long flags;
>  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> @@ -2758,6 +2764,13 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
>  	trace_rcu_utilization(TPS("End RCU core"));
>  }
>  
> +static void rcu_process_callbacks_si(struct softirq_action *h)
> +{
> +	rcu_process_callbacks();
> +}
> +
> +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
> +
>  /*
>   * Schedule RCU callback invocation.  If the running implementation of RCU
>   * does not support RCU priority boosting, just do a direct call, otherwise
> @@ -2769,19 +2782,121 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
>  {
>  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
>  		return;
> -	if (likely(!rcu_state.boost)) {
> -		rcu_do_batch(rdp);
> -		return;
> -	}
> -	invoke_rcu_callbacks_kthread();
> +	rcu_do_batch(rdp);

Looks like a nice change, but one question...

Consider the case where rcunosoftirq boot option is not passed.

Before, if RCU_BOOST=y, then callbacks would be invoked in rcuc threads if
possible, by those threads being woken up from within the softirq context
(in invoke_rcu_callbacks).

Now, if RCU_BOOST=y, then callbacks would only be invoked in softirq context
and not in the threads at all. Because rcu_softirq_enabled = false, so the
path executes:
  rcu_read_unlock_special() ->
        raise_softirq_irqsoff() ->
                rcu_process_callbacks_si() ->
                        rcu_process_callbacks() ->
                                invoke_rcu_callbacks() ->
                                        rcu_do_batch()

This seems like a behavioral change to me. This makes the callbacks always
execute from the softirq context and not the threads when boosting is
configured. IMO in the very least, such behavioral change should be
documented in the change.

One way to fix this I think could be, if boosting is enabled, then set
rcu_softirq_enabled to false by default so the callbacks are still executed
in the rcuc threads.

Did I miss something? Sorry if I did, thanks!

 - Joel


>  }
>  
> +static void rcu_wake_cond(struct task_struct *t, int status)
> +{
> +	/*
> +	 * If the thread is yielding, only wake it when this
> +	 * is invoked from idle
> +	 */
> +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> +		wake_up_process(t);
> +}
> +
> +static bool rcu_softirq_enabled = true;
> +
> +static int __init rcunosoftirq_setup(char *str)
> +{
> +	rcu_softirq_enabled = false;
> +	return 0;
> +}
> +__setup("rcunosoftirq", rcunosoftirq_setup);
> +
> +/*
> + * Wake up this CPU's rcuc kthread to do RCU core processing.
> + */
>  static void invoke_rcu_core(void)
>  {
> -	if (cpu_online(smp_processor_id()))
> +	unsigned long flags;
> +	struct task_struct *t;
> +
> +	if (!cpu_online(smp_processor_id()))
> +		return;
> +	if (rcu_softirq_enabled) {
>  		raise_softirq(RCU_SOFTIRQ);
> +	} else {
> +		local_irq_save(flags);
> +		__this_cpu_write(rcu_cpu_has_work, 1);
> +		t = __this_cpu_read(rcu_cpu_kthread_task);
> +		if (t != NULL && current != t)
> +			rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
> +		local_irq_restore(flags);
> +	}
>  }
>  
> +static void rcu_cpu_kthread_park(unsigned int cpu)
> +{
> +	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> +}
> +
> +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> +{
> +	return __this_cpu_read(rcu_cpu_has_work);
> +}
> +
> +/*
> + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> + * the RCU softirq used in configurations of RCU that do not support RCU
> + * priority boosting.
> + */
> +static void rcu_cpu_kthread(unsigned int cpu)
> +{
> +	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
> +	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
> +	int spincnt;
> +
> +	for (spincnt = 0; spincnt < 10; spincnt++) {
> +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> +		local_bh_disable();
> +		*statusp = RCU_KTHREAD_RUNNING;
> +		this_cpu_inc(rcu_cpu_kthread_loops);
> +		local_irq_disable();
> +		work = *workp;
> +		*workp = 0;
> +		local_irq_enable();
> +		if (work)
> +			rcu_process_callbacks();
> +		local_bh_enable();
> +		if (*workp == 0) {
> +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> +			*statusp = RCU_KTHREAD_WAITING;
> +			return;
> +		}
> +	}
> +	*statusp = RCU_KTHREAD_YIELDING;
> +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> +	schedule_timeout_interruptible(2);
> +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> +	*statusp = RCU_KTHREAD_WAITING;
> +}
> +
> +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> +	.store			= &rcu_cpu_kthread_task,
> +	.thread_should_run	= rcu_cpu_kthread_should_run,
> +	.thread_fn		= rcu_cpu_kthread,
> +	.thread_comm		= "rcuc/%u",
> +	.setup			= rcu_cpu_kthread_setup,
> +	.park			= rcu_cpu_kthread_park,
> +};
> +
> +/*
> + * Spawn per-CPU RCU core processing kthreads.
> + */
> +static int __init rcu_spawn_core_kthreads(void)
> +{
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu)
> +		per_cpu(rcu_cpu_has_work, cpu) = 0;
> +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
> +		return 0;
> +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
> +	return 0;
> +}
> +early_initcall(rcu_spawn_core_kthreads);
> +
>  /*
>   * Handle any core-RCU processing required by a call_rcu() invocation.
>   */
> @@ -3777,7 +3892,8 @@ void __init rcu_init(void)
>  	rcu_init_one();
>  	if (dump_tree)
>  		rcu_dump_rcu_node_tree();
> -	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
> +	if (rcu_softirq_enabled)
> +		open_softirq(RCU_SOFTIRQ, rcu_process_callbacks_si);
>  
>  	/*
>  	 * We don't need protection against CPU-hotplug here because
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index d90b02b53c0ec..fb8fc6ecc391b 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -402,12 +402,10 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;
>  
>  int rcu_dynticks_snap(struct rcu_data *rdp);
>  
> -#ifdef CONFIG_RCU_BOOST
>  DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
>  DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
>  DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
>  DECLARE_PER_CPU(char, rcu_cpu_has_work);
> -#endif /* #ifdef CONFIG_RCU_BOOST */
>  
>  /* Forward declarations for rcutree_plugin.h */
>  static void rcu_bootup_announce(void);
> @@ -425,8 +423,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
>  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
>  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
>  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> -static void invoke_rcu_callbacks_kthread(void);
>  static bool rcu_is_callbacks_kthread(void);
> +static void rcu_cpu_kthread_setup(unsigned int cpu);
>  static void __init rcu_spawn_boost_kthreads(void);
>  static void rcu_prepare_kthreads(int cpu);
>  static void rcu_cleanup_after_idle(void);
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index 1b3dd2fc0cd64..b440d6ef45d16 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -24,17 +24,6 @@
>   *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com>
>   */
>  
> -#include <linux/delay.h>
> -#include <linux/gfp.h>
> -#include <linux/oom.h>
> -#include <linux/sched/debug.h>
> -#include <linux/smpboot.h>
> -#include <linux/sched/isolation.h>
> -#include <uapi/linux/sched/types.h>
> -#include "../time/tick-internal.h"
> -
> -#ifdef CONFIG_RCU_BOOST
> -
>  #include "../locking/rtmutex_common.h"
>  
>  /*
> @@ -45,19 +34,6 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
>  DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
>  DEFINE_PER_CPU(char, rcu_cpu_has_work);
>  
> -#else /* #ifdef CONFIG_RCU_BOOST */
> -
> -/*
> - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> - * all uses are in dead code.  Provide a definition to keep the compiler
> - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> - * This probably needs to be excluded from -rt builds.
> - */
> -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> -
> -#endif /* #else #ifdef CONFIG_RCU_BOOST */
> -
>  #ifdef CONFIG_RCU_NOCB_CPU
>  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
>  static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
> @@ -652,7 +628,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
>  		/* Need to defer quiescent state until everything is enabled. */
>  		if (irqs_were_disabled) {
>  			/* Enabling irqs does not reschedule, so... */
> -			raise_softirq_irqoff(RCU_SOFTIRQ);
> +			if (rcu_softirq_enabled)
> +				raise_softirq_irqoff(RCU_SOFTIRQ);
> +			else
> +				invoke_rcu_core();
>  		} else {
>  			/* Enabling BH or preempt does reschedule, so... */
>  			set_tsk_need_resched(current);
> @@ -1150,18 +1129,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
>  
>  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
>  
> -#ifdef CONFIG_RCU_BOOST
> -
> -static void rcu_wake_cond(struct task_struct *t, int status)
> +/*
> + * If boosting, set rcuc kthreads to realtime priority.
> + */
> +static void rcu_cpu_kthread_setup(unsigned int cpu)
>  {
> -	/*
> -	 * If the thread is yielding, only wake it when this
> -	 * is invoked from idle
> -	 */
> -	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
> -		wake_up_process(t);
> +#ifdef CONFIG_RCU_BOOST
> +	struct sched_param sp;
> +
> +	sp.sched_priority = kthread_prio;
> +	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> +#endif /* #ifdef CONFIG_RCU_BOOST */
>  }
>  
> +#ifdef CONFIG_RCU_BOOST
> +
>  /*
>   * Carry out RCU priority boosting on the task indicated by ->exp_tasks
>   * or ->boost_tasks, advancing the pointer to the next task in the
> @@ -1299,23 +1281,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
>  	}
>  }
>  
> -/*
> - * Wake up the per-CPU kthread to invoke RCU callbacks.
> - */
> -static void invoke_rcu_callbacks_kthread(void)
> -{
> -	unsigned long flags;
> -
> -	local_irq_save(flags);
> -	__this_cpu_write(rcu_cpu_has_work, 1);
> -	if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
> -	    current != __this_cpu_read(rcu_cpu_kthread_task)) {
> -		rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
> -			      __this_cpu_read(rcu_cpu_kthread_status));
> -	}
> -	local_irq_restore(flags);
> -}
> -
>  /*
>   * Is the current CPU running the RCU-callbacks kthread?
>   * Caller must have preemption disabled.
> @@ -1369,65 +1334,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
>  	return 0;
>  }
>  
> -static void rcu_kthread_do_work(void)
> -{
> -	rcu_do_batch(this_cpu_ptr(&rcu_data));
> -}
> -
> -static void rcu_cpu_kthread_setup(unsigned int cpu)
> -{
> -	struct sched_param sp;
> -
> -	sp.sched_priority = kthread_prio;
> -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> -}
> -
> -static void rcu_cpu_kthread_park(unsigned int cpu)
> -{
> -	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> -}
> -
> -static int rcu_cpu_kthread_should_run(unsigned int cpu)
> -{
> -	return __this_cpu_read(rcu_cpu_has_work);
> -}
> -
> -/*
> - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> - * the RCU softirq used in configurations of RCU that do not support RCU
> - * priority boosting.
> - */
> -static void rcu_cpu_kthread(unsigned int cpu)
> -{
> -	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
> -	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
> -	int spincnt;
> -
> -	for (spincnt = 0; spincnt < 10; spincnt++) {
> -		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> -		local_bh_disable();
> -		*statusp = RCU_KTHREAD_RUNNING;
> -		this_cpu_inc(rcu_cpu_kthread_loops);
> -		local_irq_disable();
> -		work = *workp;
> -		*workp = 0;
> -		local_irq_enable();
> -		if (work)
> -			rcu_kthread_do_work();
> -		local_bh_enable();
> -		if (*workp == 0) {
> -			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> -			*statusp = RCU_KTHREAD_WAITING;
> -			return;
> -		}
> -	}
> -	*statusp = RCU_KTHREAD_YIELDING;
> -	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> -	schedule_timeout_interruptible(2);
> -	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> -	*statusp = RCU_KTHREAD_WAITING;
> -}
> -
>  /*
>   * Set the per-rcu_node kthread's affinity to cover all CPUs that are
>   * served by the rcu_node in question.  The CPU hotplug lock is still
> @@ -1458,27 +1364,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
>  	free_cpumask_var(cm);
>  }
>  
> -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> -	.store			= &rcu_cpu_kthread_task,
> -	.thread_should_run	= rcu_cpu_kthread_should_run,
> -	.thread_fn		= rcu_cpu_kthread,
> -	.thread_comm		= "rcuc/%u",
> -	.setup			= rcu_cpu_kthread_setup,
> -	.park			= rcu_cpu_kthread_park,
> -};
> -
>  /*
>   * Spawn boost kthreads -- called as soon as the scheduler is running.
>   */
>  static void __init rcu_spawn_boost_kthreads(void)
>  {
>  	struct rcu_node *rnp;
> -	int cpu;
>  
> -	for_each_possible_cpu(cpu)
> -		per_cpu(rcu_cpu_has_work, cpu) = 0;
> -	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
> -		return;
>  	rcu_for_each_leaf_node(rnp)
>  		(void)rcu_spawn_one_boost_kthread(rnp);
>  }
> @@ -1501,11 +1393,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
>  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  }
>  
> -static void invoke_rcu_callbacks_kthread(void)
> -{
> -	WARN_ON_ONCE(1);
> -}
> -
>  static bool rcu_is_callbacks_kthread(void)
>  {
>  	return false;
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20  0:26 ` [PATCH] " Joel Fernandes
@ 2019-03-20 11:28   ` Sebastian Andrzej Siewior
  2019-03-21 12:06     ` Joel Fernandes
  2019-03-20 15:24   ` Paul E. McKenney
  1 sibling, 1 reply; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-20 11:28 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, tglx, Paul E. McKenney, Mike Galbraith, rcu

On 2019-03-19 20:26:13 [-0400], Joel Fernandes wrote:
> > @@ -2769,19 +2782,121 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> >  {
> >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> >  		return;
> > -	if (likely(!rcu_state.boost)) {
> > -		rcu_do_batch(rdp);
> > -		return;
> > -	}
> > -	invoke_rcu_callbacks_kthread();
> > +	rcu_do_batch(rdp);
> 
> Looks like a nice change, but one question...
> 
> Consider the case where rcunosoftirq boot option is not passed.
> 
> Before, if RCU_BOOST=y, then callbacks would be invoked in rcuc threads if
> possible, by those threads being woken up from within the softirq context
> (in invoke_rcu_callbacks).
> 
> Now, if RCU_BOOST=y, then callbacks would only be invoked in softirq context
> and not in the threads at all. Because rcu_softirq_enabled = false, so the
> path executes:
>   rcu_read_unlock_special() ->
>         raise_softirq_irqsoff() ->
>                 rcu_process_callbacks_si() ->
>                         rcu_process_callbacks() ->
>                                 invoke_rcu_callbacks() ->
>                                         rcu_do_batch()
> 
> This seems like a behavioral change to me. This makes the callbacks always
> execute from the softirq context and not the threads when boosting is
> configured. IMO in the very least, such behavioral change should be
> documented in the change.
> 
> One way to fix this I think could be, if boosting is enabled, then set
> rcu_softirq_enabled to false by default so the callbacks are still executed
> in the rcuc threads.
> 
> Did I miss something? Sorry if I did, thanks!

So with all the swaps and reorder we talking about this change:

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0a719f726e149..82810483bfc6c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2306,20 +2306,6 @@ static void rcu_core_si(struct softirq_action *h)
 	rcu_core();
 }
 
-/*
- * Schedule RCU callback invocation.  If the running implementation of RCU
- * does not support RCU priority boosting, just do a direct call, otherwise
- * wake up the per-CPU kernel kthread.  Note that because we are running
- * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
- * cannot disappear out from under us.
- */
-static void invoke_rcu_callbacks(struct rcu_data *rdp)
-{
-	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
-		return;
-	rcu_do_batch(rdp);
-}
-
 static void rcu_wake_cond(struct task_struct *t, int status)
 {
 	/*
@@ -2330,6 +2316,19 @@ static void rcu_wake_cond(struct task_struct *t, int status)
 		wake_up_process(t);
 }
 
+static void invoke_rcu_core_kthread(void)
+{
+	struct task_struct *t;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
+	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
+	if (t != NULL && t != current)
+		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
+	local_irq_restore(flags);
+}
+
 static bool rcu_softirq_enabled = true;
 
 static int __init rcunosoftirq_setup(char *str)
@@ -2339,26 +2338,33 @@ static int __init rcunosoftirq_setup(char *str)
 }
 __setup("rcunosoftirq", rcunosoftirq_setup);
 
+/*
+ * Schedule RCU callback invocation.  If the running implementation of RCU
+ * does not support RCU priority boosting, just do a direct call, otherwise
+ * wake up the per-CPU kernel kthread.  Note that because we are running
+ * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
+ * cannot disappear out from under us.
+ */
+static void invoke_rcu_callbacks(struct rcu_data *rdp)
+{
+	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
+		return;
+	if (rcu_state.boost || rcu_softirq_enabled)
+		invoke_rcu_core_kthread();
+	rcu_do_batch(rdp);
+}
+
 /*
  * Wake up this CPU's rcuc kthread to do RCU core processing.
  */
 static void invoke_rcu_core(void)
 {
-	unsigned long flags;
-	struct task_struct *t;
-
 	if (!cpu_online(smp_processor_id()))
 		return;
-	if (rcu_softirq_enabled) {
+	if (rcu_softirq_enabled)
 		raise_softirq(RCU_SOFTIRQ);
-	} else {
-		local_irq_save(flags);
-		__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
-		t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
-		if (t != NULL && t != current)
-			rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
-		local_irq_restore(flags);
-	}
+	else
+		invoke_rcu_core_kthread();
 }
 
 static void rcu_cpu_kthread_park(unsigned int cpu)
@@ -2426,7 +2432,8 @@ static int __init rcu_spawn_core_kthreads(void)
 		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
 	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
 		return 0;
-	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
+	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
+		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
 	return 0;
 }
 early_initcall(rcu_spawn_core_kthreads);
-- 
2.20.1

>  - Joel

Sebastian

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-19 11:44   ` [PATCH v2] " Sebastian Andrzej Siewior
  2019-03-19 15:59     ` Paul E. McKenney
@ 2019-03-20 11:32     ` Sebastian Andrzej Siewior
  2019-03-20 15:21       ` Paul E. McKenney
  1 sibling, 1 reply; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-20 11:32 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On 2019-03-19 12:44:19 [+0100], To Paul E. McKenney wrote:
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 0f31b79eb6761..0a719f726e149 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> +/*
> + * Spawn per-CPU RCU core processing kthreads.
> + */
> +static int __init rcu_spawn_core_kthreads(void)
> +{
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu)
> +		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)

and this needs to become
-       if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
+       if (!IS_ENABLED(CONFIG_RCU_BOOST) && rcu_softirq_enabled)

With this change and hunk that I just sent to Joel I get thee three RCU
modes with and without BOOST booted.

Unless there is something (and Paul agrees that the Joel hunk is
correct) I would post a v3 with those changes included.

> +		return 0;
> +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
> +	return 0;
> +}
> +early_initcall(rcu_spawn_core_kthreads);

Sebastian

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 11:32     ` Sebastian Andrzej Siewior
@ 2019-03-20 15:21       ` Paul E. McKenney
  2019-03-20 15:44         ` Paul E. McKenney
  0 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-20 15:21 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Wed, Mar 20, 2019 at 12:32:19PM +0100, Sebastian Andrzej Siewior wrote:
> On 2019-03-19 12:44:19 [+0100], To Paul E. McKenney wrote:
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 0f31b79eb6761..0a719f726e149 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> …
> > +/*
> > + * Spawn per-CPU RCU core processing kthreads.
> > + */
> > +static int __init rcu_spawn_core_kthreads(void)
> > +{
> > +	int cpu;
> > +
> > +	for_each_possible_cpu(cpu)
> > +		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> > +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
> 
> and this needs to become
> -       if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
> +       if (!IS_ENABLED(CONFIG_RCU_BOOST) && rcu_softirq_enabled)
> 
> With this change and hunk that I just sent to Joel I get thee three RCU
> modes with and without BOOST booted.
> 
> Unless there is something (and Paul agrees that the Joel hunk is
> correct) I would post a v3 with those changes included.

Well, I did get a bunch of grace-period hangs of various sorts when I
ran rcutorture like this:

	tools/testing/selftests/rcutorture/bin/kvm.sh --memory 1G --bootargs rcunosoftirq

I also got a silent hard hang on TREE03.  And an rcutorture forward-progress
failure, which goes along with the grace-period hangs.

So something does need adjustment.  I will try again with this change.

							Thanx, Paul

> > +		return 0;
> > +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
> > +	return 0;
> > +}
> > +early_initcall(rcu_spawn_core_kthreads);
> 
> Sebastian
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20  0:26 ` [PATCH] " Joel Fernandes
  2019-03-20 11:28   ` Sebastian Andrzej Siewior
@ 2019-03-20 15:24   ` Paul E. McKenney
  1 sibling, 0 replies; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-20 15:24 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith, rcu

On Tue, Mar 19, 2019 at 08:26:13PM -0400, Joel Fernandes wrote:
> Adding the rcu@vger.kernel.org list as well, more comment below:
> 
> On Fri, Mar 15, 2019 at 12:11:30PM +0100, Sebastian Andrzej Siewior wrote:
> > From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> > 
> > Running RCU out of softirq is a problem for some workloads that would
> > like to manage RCU core processing independently of other softirq work,
> > for example, setting kthread priority.
> > This commit therefore introduces the `rcunosoftirq' option which moves
> > the RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER
> > kthread named rcuc.
> > The SCHED_OTHER approach avoids the scalability problems that appeared
> > with the earlier attempt to move RCU core processing to from softirq to
> > kthreads.
> > That said, kernels built with RCU_BOOST=y will run the rcuc kthreads at
> > the RCU-boosting priority.
> > 
> > Reported-by: Thomas Gleixner <tglx@linutronix.de>
> > Tested-by: Mike Galbraith <bitbucket@online.de>
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> > [bigeasy: add rcunosoftirq option]
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > ---
> >  kernel/rcu/tree.c        | 132 ++++++++++++++++++++++++++++++++---
> >  kernel/rcu/tree.h        |   4 +-
> >  kernel/rcu/tree_plugin.h | 145 +++++----------------------------------
> >  3 files changed, 141 insertions(+), 140 deletions(-)
> > 
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 9180158756d2c..498dc5e9287d0 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -62,6 +62,12 @@
> >  #include <linux/suspend.h>
> >  #include <linux/ftrace.h>
> >  #include <linux/tick.h>
> > +#include <linux/gfp.h>
> > +#include <linux/oom.h>
> > +#include <linux/smpboot.h>
> > +#include <linux/jiffies.h>
> > +#include <linux/sched/isolation.h>
> > +#include "../time/tick-internal.h"
> >  
> >  #include "tree.h"
> >  #include "rcu.h"
> > @@ -2716,7 +2722,7 @@ EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
> >   * structures.  This may be called only from the CPU to whom the rdp
> >   * belongs.
> >   */
> > -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
> > +static __latent_entropy void rcu_process_callbacks(void)
> >  {
> >  	unsigned long flags;
> >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > @@ -2758,6 +2764,13 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
> >  	trace_rcu_utilization(TPS("End RCU core"));
> >  }
> >  
> > +static void rcu_process_callbacks_si(struct softirq_action *h)
> > +{
> > +	rcu_process_callbacks();
> > +}
> > +
> > +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
> > +
> >  /*
> >   * Schedule RCU callback invocation.  If the running implementation of RCU
> >   * does not support RCU priority boosting, just do a direct call, otherwise
> > @@ -2769,19 +2782,121 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> >  {
> >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> >  		return;
> > -	if (likely(!rcu_state.boost)) {
> > -		rcu_do_batch(rdp);
> > -		return;
> > -	}
> > -	invoke_rcu_callbacks_kthread();
> > +	rcu_do_batch(rdp);
> 
> Looks like a nice change, but one question...
> 
> Consider the case where rcunosoftirq boot option is not passed.
> 
> Before, if RCU_BOOST=y, then callbacks would be invoked in rcuc threads if
> possible, by those threads being woken up from within the softirq context
> (in invoke_rcu_callbacks).
> 
> Now, if RCU_BOOST=y, then callbacks would only be invoked in softirq context
> and not in the threads at all. Because rcu_softirq_enabled = false, so the
> path executes:
>   rcu_read_unlock_special() ->
>         raise_softirq_irqsoff() ->
>                 rcu_process_callbacks_si() ->
>                         rcu_process_callbacks() ->
>                                 invoke_rcu_callbacks() ->
>                                         rcu_do_batch()
> 
> This seems like a behavioral change to me. This makes the callbacks always
> execute from the softirq context and not the threads when boosting is
> configured. IMO in the very least, such behavioral change should be
> documented in the change.
> 
> One way to fix this I think could be, if boosting is enabled, then set
> rcu_softirq_enabled to false by default so the callbacks are still executed
> in the rcuc threads.
> 
> Did I miss something? Sorry if I did, thanks!

This did pass light rcutorture testing without rcunosoftirq set, but it
is quite possible that it was just getting lucky.  Then again, there are
a few different versions of this patch in flight, as Sebastian notes in
his later email.  The one I used is on -rcu.

							Thanx, Paul

>  - Joel
> 
> 
> >  }
> >  
> > +static void rcu_wake_cond(struct task_struct *t, int status)
> > +{
> > +	/*
> > +	 * If the thread is yielding, only wake it when this
> > +	 * is invoked from idle
> > +	 */
> > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > +		wake_up_process(t);
> > +}
> > +
> > +static bool rcu_softirq_enabled = true;
> > +
> > +static int __init rcunosoftirq_setup(char *str)
> > +{
> > +	rcu_softirq_enabled = false;
> > +	return 0;
> > +}
> > +__setup("rcunosoftirq", rcunosoftirq_setup);
> > +
> > +/*
> > + * Wake up this CPU's rcuc kthread to do RCU core processing.
> > + */
> >  static void invoke_rcu_core(void)
> >  {
> > -	if (cpu_online(smp_processor_id()))
> > +	unsigned long flags;
> > +	struct task_struct *t;
> > +
> > +	if (!cpu_online(smp_processor_id()))
> > +		return;
> > +	if (rcu_softirq_enabled) {
> >  		raise_softirq(RCU_SOFTIRQ);
> > +	} else {
> > +		local_irq_save(flags);
> > +		__this_cpu_write(rcu_cpu_has_work, 1);
> > +		t = __this_cpu_read(rcu_cpu_kthread_task);
> > +		if (t != NULL && current != t)
> > +			rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
> > +		local_irq_restore(flags);
> > +	}
> >  }
> >  
> > +static void rcu_cpu_kthread_park(unsigned int cpu)
> > +{
> > +	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> > +}
> > +
> > +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> > +{
> > +	return __this_cpu_read(rcu_cpu_has_work);
> > +}
> > +
> > +/*
> > + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> > + * the RCU softirq used in configurations of RCU that do not support RCU
> > + * priority boosting.
> > + */
> > +static void rcu_cpu_kthread(unsigned int cpu)
> > +{
> > +	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
> > +	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
> > +	int spincnt;
> > +
> > +	for (spincnt = 0; spincnt < 10; spincnt++) {
> > +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> > +		local_bh_disable();
> > +		*statusp = RCU_KTHREAD_RUNNING;
> > +		this_cpu_inc(rcu_cpu_kthread_loops);
> > +		local_irq_disable();
> > +		work = *workp;
> > +		*workp = 0;
> > +		local_irq_enable();
> > +		if (work)
> > +			rcu_process_callbacks();
> > +		local_bh_enable();
> > +		if (*workp == 0) {
> > +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> > +			*statusp = RCU_KTHREAD_WAITING;
> > +			return;
> > +		}
> > +	}
> > +	*statusp = RCU_KTHREAD_YIELDING;
> > +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> > +	schedule_timeout_interruptible(2);
> > +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> > +	*statusp = RCU_KTHREAD_WAITING;
> > +}
> > +
> > +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> > +	.store			= &rcu_cpu_kthread_task,
> > +	.thread_should_run	= rcu_cpu_kthread_should_run,
> > +	.thread_fn		= rcu_cpu_kthread,
> > +	.thread_comm		= "rcuc/%u",
> > +	.setup			= rcu_cpu_kthread_setup,
> > +	.park			= rcu_cpu_kthread_park,
> > +};
> > +
> > +/*
> > + * Spawn per-CPU RCU core processing kthreads.
> > + */
> > +static int __init rcu_spawn_core_kthreads(void)
> > +{
> > +	int cpu;
> > +
> > +	for_each_possible_cpu(cpu)
> > +		per_cpu(rcu_cpu_has_work, cpu) = 0;
> > +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
> > +		return 0;
> > +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
> > +	return 0;
> > +}
> > +early_initcall(rcu_spawn_core_kthreads);
> > +
> >  /*
> >   * Handle any core-RCU processing required by a call_rcu() invocation.
> >   */
> > @@ -3777,7 +3892,8 @@ void __init rcu_init(void)
> >  	rcu_init_one();
> >  	if (dump_tree)
> >  		rcu_dump_rcu_node_tree();
> > -	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
> > +	if (rcu_softirq_enabled)
> > +		open_softirq(RCU_SOFTIRQ, rcu_process_callbacks_si);
> >  
> >  	/*
> >  	 * We don't need protection against CPU-hotplug here because
> > diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> > index d90b02b53c0ec..fb8fc6ecc391b 100644
> > --- a/kernel/rcu/tree.h
> > +++ b/kernel/rcu/tree.h
> > @@ -402,12 +402,10 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;
> >  
> >  int rcu_dynticks_snap(struct rcu_data *rdp);
> >  
> > -#ifdef CONFIG_RCU_BOOST
> >  DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
> >  DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
> >  DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
> >  DECLARE_PER_CPU(char, rcu_cpu_has_work);
> > -#endif /* #ifdef CONFIG_RCU_BOOST */
> >  
> >  /* Forward declarations for rcutree_plugin.h */
> >  static void rcu_bootup_announce(void);
> > @@ -425,8 +423,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
> >  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
> >  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
> >  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> > -static void invoke_rcu_callbacks_kthread(void);
> >  static bool rcu_is_callbacks_kthread(void);
> > +static void rcu_cpu_kthread_setup(unsigned int cpu);
> >  static void __init rcu_spawn_boost_kthreads(void);
> >  static void rcu_prepare_kthreads(int cpu);
> >  static void rcu_cleanup_after_idle(void);
> > diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> > index 1b3dd2fc0cd64..b440d6ef45d16 100644
> > --- a/kernel/rcu/tree_plugin.h
> > +++ b/kernel/rcu/tree_plugin.h
> > @@ -24,17 +24,6 @@
> >   *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> >   */
> >  
> > -#include <linux/delay.h>
> > -#include <linux/gfp.h>
> > -#include <linux/oom.h>
> > -#include <linux/sched/debug.h>
> > -#include <linux/smpboot.h>
> > -#include <linux/sched/isolation.h>
> > -#include <uapi/linux/sched/types.h>
> > -#include "../time/tick-internal.h"
> > -
> > -#ifdef CONFIG_RCU_BOOST
> > -
> >  #include "../locking/rtmutex_common.h"
> >  
> >  /*
> > @@ -45,19 +34,6 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
> >  DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
> >  DEFINE_PER_CPU(char, rcu_cpu_has_work);
> >  
> > -#else /* #ifdef CONFIG_RCU_BOOST */
> > -
> > -/*
> > - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> > - * all uses are in dead code.  Provide a definition to keep the compiler
> > - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> > - * This probably needs to be excluded from -rt builds.
> > - */
> > -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> > -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> > -
> > -#endif /* #else #ifdef CONFIG_RCU_BOOST */
> > -
> >  #ifdef CONFIG_RCU_NOCB_CPU
> >  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> >  static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */
> > @@ -652,7 +628,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
> >  		/* Need to defer quiescent state until everything is enabled. */
> >  		if (irqs_were_disabled) {
> >  			/* Enabling irqs does not reschedule, so... */
> > -			raise_softirq_irqoff(RCU_SOFTIRQ);
> > +			if (rcu_softirq_enabled)
> > +				raise_softirq_irqoff(RCU_SOFTIRQ);
> > +			else
> > +				invoke_rcu_core();
> >  		} else {
> >  			/* Enabling BH or preempt does reschedule, so... */
> >  			set_tsk_need_resched(current);
> > @@ -1150,18 +1129,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
> >  
> >  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
> >  
> > -#ifdef CONFIG_RCU_BOOST
> > -
> > -static void rcu_wake_cond(struct task_struct *t, int status)
> > +/*
> > + * If boosting, set rcuc kthreads to realtime priority.
> > + */
> > +static void rcu_cpu_kthread_setup(unsigned int cpu)
> >  {
> > -	/*
> > -	 * If the thread is yielding, only wake it when this
> > -	 * is invoked from idle
> > -	 */
> > -	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
> > -		wake_up_process(t);
> > +#ifdef CONFIG_RCU_BOOST
> > +	struct sched_param sp;
> > +
> > +	sp.sched_priority = kthread_prio;
> > +	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> > +#endif /* #ifdef CONFIG_RCU_BOOST */
> >  }
> >  
> > +#ifdef CONFIG_RCU_BOOST
> > +
> >  /*
> >   * Carry out RCU priority boosting on the task indicated by ->exp_tasks
> >   * or ->boost_tasks, advancing the pointer to the next task in the
> > @@ -1299,23 +1281,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
> >  	}
> >  }
> >  
> > -/*
> > - * Wake up the per-CPU kthread to invoke RCU callbacks.
> > - */
> > -static void invoke_rcu_callbacks_kthread(void)
> > -{
> > -	unsigned long flags;
> > -
> > -	local_irq_save(flags);
> > -	__this_cpu_write(rcu_cpu_has_work, 1);
> > -	if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
> > -	    current != __this_cpu_read(rcu_cpu_kthread_task)) {
> > -		rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
> > -			      __this_cpu_read(rcu_cpu_kthread_status));
> > -	}
> > -	local_irq_restore(flags);
> > -}
> > -
> >  /*
> >   * Is the current CPU running the RCU-callbacks kthread?
> >   * Caller must have preemption disabled.
> > @@ -1369,65 +1334,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
> >  	return 0;
> >  }
> >  
> > -static void rcu_kthread_do_work(void)
> > -{
> > -	rcu_do_batch(this_cpu_ptr(&rcu_data));
> > -}
> > -
> > -static void rcu_cpu_kthread_setup(unsigned int cpu)
> > -{
> > -	struct sched_param sp;
> > -
> > -	sp.sched_priority = kthread_prio;
> > -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> > -}
> > -
> > -static void rcu_cpu_kthread_park(unsigned int cpu)
> > -{
> > -	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> > -}
> > -
> > -static int rcu_cpu_kthread_should_run(unsigned int cpu)
> > -{
> > -	return __this_cpu_read(rcu_cpu_has_work);
> > -}
> > -
> > -/*
> > - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> > - * the RCU softirq used in configurations of RCU that do not support RCU
> > - * priority boosting.
> > - */
> > -static void rcu_cpu_kthread(unsigned int cpu)
> > -{
> > -	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
> > -	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
> > -	int spincnt;
> > -
> > -	for (spincnt = 0; spincnt < 10; spincnt++) {
> > -		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> > -		local_bh_disable();
> > -		*statusp = RCU_KTHREAD_RUNNING;
> > -		this_cpu_inc(rcu_cpu_kthread_loops);
> > -		local_irq_disable();
> > -		work = *workp;
> > -		*workp = 0;
> > -		local_irq_enable();
> > -		if (work)
> > -			rcu_kthread_do_work();
> > -		local_bh_enable();
> > -		if (*workp == 0) {
> > -			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> > -			*statusp = RCU_KTHREAD_WAITING;
> > -			return;
> > -		}
> > -	}
> > -	*statusp = RCU_KTHREAD_YIELDING;
> > -	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> > -	schedule_timeout_interruptible(2);
> > -	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> > -	*statusp = RCU_KTHREAD_WAITING;
> > -}
> > -
> >  /*
> >   * Set the per-rcu_node kthread's affinity to cover all CPUs that are
> >   * served by the rcu_node in question.  The CPU hotplug lock is still
> > @@ -1458,27 +1364,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
> >  	free_cpumask_var(cm);
> >  }
> >  
> > -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> > -	.store			= &rcu_cpu_kthread_task,
> > -	.thread_should_run	= rcu_cpu_kthread_should_run,
> > -	.thread_fn		= rcu_cpu_kthread,
> > -	.thread_comm		= "rcuc/%u",
> > -	.setup			= rcu_cpu_kthread_setup,
> > -	.park			= rcu_cpu_kthread_park,
> > -};
> > -
> >  /*
> >   * Spawn boost kthreads -- called as soon as the scheduler is running.
> >   */
> >  static void __init rcu_spawn_boost_kthreads(void)
> >  {
> >  	struct rcu_node *rnp;
> > -	int cpu;
> >  
> > -	for_each_possible_cpu(cpu)
> > -		per_cpu(rcu_cpu_has_work, cpu) = 0;
> > -	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
> > -		return;
> >  	rcu_for_each_leaf_node(rnp)
> >  		(void)rcu_spawn_one_boost_kthread(rnp);
> >  }
> > @@ -1501,11 +1393,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
> >  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> >  }
> >  
> > -static void invoke_rcu_callbacks_kthread(void)
> > -{
> > -	WARN_ON_ONCE(1);
> > -}
> > -
> >  static bool rcu_is_callbacks_kthread(void)
> >  {
> >  	return false;
> > -- 
> > 2.20.1
> > 
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 15:21       ` Paul E. McKenney
@ 2019-03-20 15:44         ` Paul E. McKenney
  2019-03-20 16:05           ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-20 15:44 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Wed, Mar 20, 2019 at 08:21:46AM -0700, Paul E. McKenney wrote:
> On Wed, Mar 20, 2019 at 12:32:19PM +0100, Sebastian Andrzej Siewior wrote:
> > On 2019-03-19 12:44:19 [+0100], To Paul E. McKenney wrote:
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index 0f31b79eb6761..0a719f726e149 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > …
> > > +/*
> > > + * Spawn per-CPU RCU core processing kthreads.
> > > + */
> > > +static int __init rcu_spawn_core_kthreads(void)
> > > +{
> > > +	int cpu;
> > > +
> > > +	for_each_possible_cpu(cpu)
> > > +		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> > > +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
> > 
> > and this needs to become
> > -       if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
> > +       if (!IS_ENABLED(CONFIG_RCU_BOOST) && rcu_softirq_enabled)
> > 
> > With this change and hunk that I just sent to Joel I get thee three RCU
> > modes with and without BOOST booted.
> > 
> > Unless there is something (and Paul agrees that the Joel hunk is
> > correct) I would post a v3 with those changes included.
> 
> Well, I did get a bunch of grace-period hangs of various sorts when I
> ran rcutorture like this:
> 
> 	tools/testing/selftests/rcutorture/bin/kvm.sh --memory 1G --bootargs rcunosoftirq
> 
> I also got a silent hard hang on TREE03.  And an rcutorture forward-progress
> failure, which goes along with the grace-period hangs.
> 
> So something does need adjustment.  I will try again with this change.

And it does seem to work better.  I will give it more intense testing
later on, but in the meantime I have merged this change into your
earlier patch.

We will see whether or not I am able summon up the courage to push it
into v5.2, however.  ;-)

							Thanx, Paul

> > > +		return 0;
> > > +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
> > > +	return 0;
> > > +}
> > > +early_initcall(rcu_spawn_core_kthreads);
> > 
> > Sebastian
> > 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 15:44         ` Paul E. McKenney
@ 2019-03-20 16:05           ` Sebastian Andrzej Siewior
  2019-03-20 16:15             ` Paul E. McKenney
  0 siblings, 1 reply; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-20 16:05 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On 2019-03-20 08:44:40 [-0700], Paul E. McKenney wrote:
> 
> And it does seem to work better.  I will give it more intense testing
> later on, but in the meantime I have merged this change into your
> earlier patch.

thanks.

> We will see whether or not I am able summon up the courage to push it
> into v5.2, however.  ;-)

:)

I added a command line option for the switch because it was easier for
testing. It can be renamed if a different name suits it better or
replaced with a Kconfig if it makes more sense.

> 							Thanx, Paul

Sebastian

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 16:05           ` Sebastian Andrzej Siewior
@ 2019-03-20 16:15             ` Paul E. McKenney
  2019-03-20 16:35               ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-20 16:15 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Wed, Mar 20, 2019 at 05:05:48PM +0100, Sebastian Andrzej Siewior wrote:
> On 2019-03-20 08:44:40 [-0700], Paul E. McKenney wrote:
> > 
> > And it does seem to work better.  I will give it more intense testing
> > later on, but in the meantime I have merged this change into your
> > earlier patch.
> 
> thanks.
> 
> > We will see whether or not I am able summon up the courage to push it
> > into v5.2, however.  ;-)
> 
> :)
> 
> I added a command line option for the switch because it was easier for
> testing. It can be renamed if a different name suits it better or
> replaced with a Kconfig if it makes more sense.

I am considering making it a module_param() to avoid namespace pollution,
as it would become something like rcutree.nosoftirq.

Thoughts?

							Thanx, Paul


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 16:15             ` Paul E. McKenney
@ 2019-03-20 16:35               ` Sebastian Andrzej Siewior
  2019-03-20 17:30                 ` Paul E. McKenney
  0 siblings, 1 reply; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-20 16:35 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On 2019-03-20 09:15:00 [-0700], Paul E. McKenney wrote:
> I am considering making it a module_param() to avoid namespace pollution,
> as it would become something like rcutree.nosoftirq.
> 
> Thoughts?

nope, perfect.

> 							Thanx, Paul

Sebastian

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 16:35               ` Sebastian Andrzej Siewior
@ 2019-03-20 17:30                 ` Paul E. McKenney
  2019-03-20 17:59                   ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-20 17:30 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Wed, Mar 20, 2019 at 05:35:32PM +0100, Sebastian Andrzej Siewior wrote:
> On 2019-03-20 09:15:00 [-0700], Paul E. McKenney wrote:
> > I am considering making it a module_param() to avoid namespace pollution,
> > as it would become something like rcutree.nosoftirq.
> > 
> > Thoughts?
> 
> nope, perfect.

Please see below for an untested patch.  Thoughts?

							Thanx, Paul

------------------------------------------------------------------------

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d377a2166b79..767cdea30a1c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3672,6 +3672,10 @@
 			the propagation of recent CPU-hotplug changes up
 			the rcu_node combining tree.
 
+	rcutree.nosoftirq=	[KNL]
+			If set, move all RCU_SOFTIRQ processing to per-CPU
+			rcuc kthreads.  Defaults to using RCU_SOFTIRQ.
+
 	rcutree.rcu_fanout_exact= [KNL]
 			Disable autobalancing of the rcu_node combining
 			tree.  This is used by rcutorture, and might
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 961dbc7b8949..e4baba8800f3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -98,6 +98,9 @@ struct rcu_state rcu_state = {
 /* Dump rcu_node combining tree at boot to verify correct setup. */
 static bool dump_tree;
 module_param(dump_tree, bool, 0444);
+/* Move RCU_SOFTIRQ to rcuc kthreads. */
+static bool nosoftirq;
+module_param(nosoftirq, bool, 0444);
 /* Control rcu_node-tree auto-balancing at boot time. */
 static bool rcu_fanout_exact;
 module_param(rcu_fanout_exact, bool, 0444);
@@ -2330,15 +2333,6 @@ static void rcu_wake_cond(struct task_struct *t, int status)
 		wake_up_process(t);
 }
 
-static bool rcu_softirq_enabled = true;
-
-static int __init rcunosoftirq_setup(char *str)
-{
-	rcu_softirq_enabled = false;
-	return 0;
-}
-__setup("rcunosoftirq", rcunosoftirq_setup);
-
 /*
  * Wake up this CPU's rcuc kthread to do RCU core processing.
  */
@@ -2349,7 +2343,7 @@ static void invoke_rcu_core(void)
 
 	if (!cpu_online(smp_processor_id()))
 		return;
-	if (rcu_softirq_enabled) {
+	if (!nosoftirq) {
 		raise_softirq(RCU_SOFTIRQ);
 	} else {
 		local_irq_save(flags);
@@ -2424,7 +2418,7 @@ static int __init rcu_spawn_core_kthreads(void)
 
 	for_each_possible_cpu(cpu)
 		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
-	if (!IS_ENABLED(CONFIG_RCU_BOOST) && rcu_softirq_enabled)
+	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !nosoftirq)
 		return 0;
 	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
 	return 0;
@@ -3467,7 +3461,7 @@ void __init rcu_init(void)
 	rcu_init_one();
 	if (dump_tree)
 		rcu_dump_rcu_node_tree();
-	if (rcu_softirq_enabled)
+	if (!nosoftirq)
 		open_softirq(RCU_SOFTIRQ, rcu_core_si);
 
 	/*
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index eb99e750a930..c5a2acb2c7af 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -72,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
 		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
 	if (gp_cleanup_delay)
 		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
+	if (nosoftirq)
+		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
 	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
 		pr_info("\tRCU debug extended QS entry/exit.\n");
 	rcupdate_announce_bootup_oddness();


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 17:30                 ` Paul E. McKenney
@ 2019-03-20 17:59                   ` Sebastian Andrzej Siewior
  2019-03-20 18:12                     ` Paul E. McKenney
  0 siblings, 1 reply; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-20 17:59 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On 2019-03-20 10:30:01 [-0700], Paul E. McKenney wrote:
> On Wed, Mar 20, 2019 at 05:35:32PM +0100, Sebastian Andrzej Siewior wrote:
> > On 2019-03-20 09:15:00 [-0700], Paul E. McKenney wrote:
> > > I am considering making it a module_param() to avoid namespace pollution,
> > > as it would become something like rcutree.nosoftirq.
> > > 
> > > Thoughts?
> > 
> > nope, perfect.
> 
> Please see below for an untested patch.  Thoughts?

> -	if (rcu_softirq_enabled) {
> +	if (!nosoftirq) {
>  		raise_softirq(RCU_SOFTIRQ);
>  	} else {

This double negation looks weird. Can we flip the logic somehow?
/me testing if it works…
> 							Thanx, Paul

Sebastian

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 17:59                   ` Sebastian Andrzej Siewior
@ 2019-03-20 18:12                     ` Paul E. McKenney
  2019-03-20 18:14                       ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-20 18:12 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Wed, Mar 20, 2019 at 06:59:52PM +0100, Sebastian Andrzej Siewior wrote:
> On 2019-03-20 10:30:01 [-0700], Paul E. McKenney wrote:
> > On Wed, Mar 20, 2019 at 05:35:32PM +0100, Sebastian Andrzej Siewior wrote:
> > > On 2019-03-20 09:15:00 [-0700], Paul E. McKenney wrote:
> > > > I am considering making it a module_param() to avoid namespace pollution,
> > > > as it would become something like rcutree.nosoftirq.
> > > > 
> > > > Thoughts?
> > > 
> > > nope, perfect.
> > 
> > Please see below for an untested patch.  Thoughts?
> 
> > -	if (rcu_softirq_enabled) {
> > +	if (!nosoftirq) {
> >  		raise_softirq(RCU_SOFTIRQ);
> >  	} else {
> 
> This double negation looks weird. Can we flip the logic somehow?
> /me testing if it works…

We could name it something like "use_softirq" and initialize it to true.
I am OK either way.

							Thanx, Paul


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v2] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 18:12                     ` Paul E. McKenney
@ 2019-03-20 18:14                       ` Sebastian Andrzej Siewior
  2019-03-20 21:13                         ` [PATCH v3] " Sebastian Andrzej Siewior
  0 siblings, 1 reply; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-20 18:14 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On 2019-03-20 11:12:10 [-0700], Paul E. McKenney wrote:
> We could name it something like "use_softirq" and initialize it to true.
> I am OK either way.

I had to add one hunk to get it compiled. It worked then. Let me swap
the logic as you suggested and then I repost the whole thing. This will
taken an hour or two…

> 							Thanx, Paul

Sebastian

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 18:14                       ` Sebastian Andrzej Siewior
@ 2019-03-20 21:13                         ` Sebastian Andrzej Siewior
  2019-03-20 23:46                           ` Paul E. McKenney
  2019-03-22 23:48                           ` Joel Fernandes
  0 siblings, 2 replies; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-20 21:13 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

Running RCU out of softirq is a problem for some workloads that would
like to manage RCU core processing independently of other softirq
work, for example, setting kthread priority.  This commit therefore
introduces the `rcunosoftirq' option which moves the RCU core work
from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
The SCHED_OTHER approach avoids the scalability problems that appeared
with the earlier attempt to move RCU core processing to from softirq
to kthreads.  That said, kernels built with RCU_BOOST=y will run the
rcuc kthreads at the RCU-boosting priority.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
v2…v3:  - Ensure that with RCU_BOOST=y the callback is invoked in thread
          context. Pointed out by Joel Fernandes.
	- Swap the init logic so it initializes the rcuc thread with
	  RCU_BOOST=n
	- Move the parameter switch to rcutree.use_softirq. Default is 1
	  which means SOFTIRQ processing (default, old behaviour). By
	  setting to 0 the softirq processing is disabled and rcuc
	  thread do the work.
	- Add a few words to kernel-parameters.txt, added by Paul E.
	  McKenney.

v1…v2:
        - rebased to Paul's rcu/dev tree/branch

 .../admin-guide/kernel-parameters.txt         |   5 +
 kernel/rcu/tree.c                             | 130 +++++++++++++++-
 kernel/rcu/tree.h                             |   2 +-
 kernel/rcu/tree_plugin.h                      | 139 +++---------------
 4 files changed, 146 insertions(+), 130 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d377a2166b79c..7a44624c13219 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3672,6 +3672,11 @@
 			the propagation of recent CPU-hotplug changes up
 			the rcu_node combining tree.
 
+	rcutree.use_softirq=	[KNL]
+			If set to zero, move all RCU_SOFTIRQ processing to
+			per-CPU rcuc kthreads. Defaults to a non-zero
+			value using RCU_SOFTIRQ.
+
 	rcutree.rcu_fanout_exact= [KNL]
 			Disable autobalancing of the rcu_node combining
 			tree.  This is used by rcutorture, and might
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0f31b79eb6761..05a1e42fdaf10 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -51,6 +51,12 @@
 #include <linux/tick.h>
 #include <linux/sysrq.h>
 #include <linux/kprobes.h>
+#include <linux/gfp.h>
+#include <linux/oom.h>
+#include <linux/smpboot.h>
+#include <linux/jiffies.h>
+#include <linux/sched/isolation.h>
+#include "../time/tick-internal.h"
 
 #include "tree.h"
 #include "rcu.h"
@@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
 /* Dump rcu_node combining tree at boot to verify correct setup. */
 static bool dump_tree;
 module_param(dump_tree, bool, 0444);
+/* Move RCU_SOFTIRQ to rcuc kthreads. */
+static bool use_softirq = 1;
+module_param(use_softirq, bool, 0444);
 /* Control rcu_node-tree auto-balancing at boot time. */
 static bool rcu_fanout_exact;
 module_param(rcu_fanout_exact, bool, 0444);
@@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 
 /* Perform RCU core processing work for the current CPU.  */
-static __latent_entropy void rcu_core(struct softirq_action *unused)
+static __latent_entropy void rcu_core(void)
 {
 	unsigned long flags;
 	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
@@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
 	trace_rcu_utilization(TPS("End RCU core"));
 }
 
+static void rcu_core_si(struct softirq_action *h)
+{
+	rcu_core();
+}
+
+static void rcu_wake_cond(struct task_struct *t, int status)
+{
+	/*
+	 * If the thread is yielding, only wake it when this
+	 * is invoked from idle
+	 */
+	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
+		wake_up_process(t);
+}
+
+static void invoke_rcu_core_kthread(void)
+{
+	struct task_struct *t;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
+	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
+	if (t != NULL && t != current)
+		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
+	local_irq_restore(flags);
+}
+
 /*
  * Schedule RCU callback invocation.  If the running implementation of RCU
  * does not support RCU priority boosting, just do a direct call, otherwise
@@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
 {
 	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
 		return;
-	if (likely(!rcu_state.boost)) {
-		rcu_do_batch(rdp);
-		return;
-	}
-	invoke_rcu_callbacks_kthread();
+	if (rcu_state.boost || !use_softirq)
+		invoke_rcu_core_kthread();
+	rcu_do_batch(rdp);
 }
 
+/*
+ * Wake up this CPU's rcuc kthread to do RCU core processing.
+ */
 static void invoke_rcu_core(void)
 {
-	if (cpu_online(smp_processor_id()))
+	if (!cpu_online(smp_processor_id()))
+		return;
+	if (use_softirq)
 		raise_softirq(RCU_SOFTIRQ);
+	else
+		invoke_rcu_core_kthread();
 }
 
+static void rcu_cpu_kthread_park(unsigned int cpu)
+{
+	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+}
+
+static int rcu_cpu_kthread_should_run(unsigned int cpu)
+{
+	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
+ * the RCU softirq used in configurations of RCU that do not support RCU
+ * priority boosting.
+ */
+static void rcu_cpu_kthread(unsigned int cpu)
+{
+	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
+	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+	int spincnt;
+
+	for (spincnt = 0; spincnt < 10; spincnt++) {
+		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
+		local_bh_disable();
+		*statusp = RCU_KTHREAD_RUNNING;
+		local_irq_disable();
+		work = *workp;
+		*workp = 0;
+		local_irq_enable();
+		if (work)
+			rcu_core();
+		local_bh_enable();
+		if (*workp == 0) {
+			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
+			*statusp = RCU_KTHREAD_WAITING;
+			return;
+		}
+	}
+	*statusp = RCU_KTHREAD_YIELDING;
+	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
+	schedule_timeout_interruptible(2);
+	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
+	*statusp = RCU_KTHREAD_WAITING;
+}
+
+static struct smp_hotplug_thread rcu_cpu_thread_spec = {
+	.store			= &rcu_data.rcu_cpu_kthread_task,
+	.thread_should_run	= rcu_cpu_kthread_should_run,
+	.thread_fn		= rcu_cpu_kthread,
+	.thread_comm		= "rcuc/%u",
+	.setup			= rcu_cpu_kthread_setup,
+	.park			= rcu_cpu_kthread_park,
+};
+
+/*
+ * Spawn per-CPU RCU core processing kthreads.
+ */
+static int __init rcu_spawn_core_kthreads(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
+	if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
+		return 0;
+	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
+		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
+	return 0;
+}
+early_initcall(rcu_spawn_core_kthreads);
+
 /*
  * Handle any core-RCU processing required by a call_rcu() invocation.
  */
@@ -3355,7 +3468,8 @@ void __init rcu_init(void)
 	rcu_init_one();
 	if (dump_tree)
 		rcu_dump_rcu_node_tree();
-	open_softirq(RCU_SOFTIRQ, rcu_core);
+	if (use_softirq)
+		open_softirq(RCU_SOFTIRQ, rcu_core_si);
 
 	/*
 	 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e253d11af3c49..a1a72a1ecb026 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
 static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
-static void invoke_rcu_callbacks_kthread(void);
 static bool rcu_is_callbacks_kthread(void);
+static void rcu_cpu_kthread_setup(unsigned int cpu);
 static void __init rcu_spawn_boost_kthreads(void);
 static void rcu_prepare_kthreads(int cpu);
 static void rcu_cleanup_after_idle(void);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index f46b4af96ab95..b807204ffd83f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -11,29 +11,7 @@
  *	   Paul E. McKenney <paulmck@linux.ibm.com>
  */
 
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/oom.h>
-#include <linux/sched/debug.h>
-#include <linux/smpboot.h>
-#include <linux/sched/isolation.h>
-#include <uapi/linux/sched/types.h>
-#include "../time/tick-internal.h"
-
-#ifdef CONFIG_RCU_BOOST
 #include "../locking/rtmutex_common.h"
-#else /* #ifdef CONFIG_RCU_BOOST */
-
-/*
- * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
- * all uses are in dead code.  Provide a definition to keep the compiler
- * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
- * This probably needs to be excluded from -rt builds.
- */
-#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
-#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
-
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
 
 #ifdef CONFIG_RCU_NOCB_CPU
 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
@@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
 		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
 	if (gp_cleanup_delay)
 		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
+	if (!use_softirq)
+		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
 	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
 		pr_info("\tRCU debug extended QS entry/exit.\n");
 	rcupdate_announce_bootup_oddness();
@@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		/* Need to defer quiescent state until everything is enabled. */
 		if (irqs_were_disabled) {
 			/* Enabling irqs does not reschedule, so... */
-			raise_softirq_irqoff(RCU_SOFTIRQ);
+			if (!use_softirq)
+				raise_softirq_irqoff(RCU_SOFTIRQ);
+			else
+				invoke_rcu_core();
 		} else {
 			/* Enabling BH or preempt does reschedule, so... */
 			set_tsk_need_resched(current);
@@ -944,18 +927,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
 
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
-#ifdef CONFIG_RCU_BOOST
-
-static void rcu_wake_cond(struct task_struct *t, int status)
+/*
+ * If boosting, set rcuc kthreads to realtime priority.
+ */
+static void rcu_cpu_kthread_setup(unsigned int cpu)
 {
-	/*
-	 * If the thread is yielding, only wake it when this
-	 * is invoked from idle
-	 */
-	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
-		wake_up_process(t);
+#ifdef CONFIG_RCU_BOOST
+	struct sched_param sp;
+
+	sp.sched_priority = kthread_prio;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+#endif /* #ifdef CONFIG_RCU_BOOST */
 }
 
+#ifdef CONFIG_RCU_BOOST
+
 /*
  * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  * or ->boost_tasks, advancing the pointer to the next task in the
@@ -1093,23 +1079,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	}
 }
 
-/*
- * Wake up the per-CPU kthread to invoke RCU callbacks.
- */
-static void invoke_rcu_callbacks_kthread(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
-	if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
-	    current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
-		rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
-			      __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
-	}
-	local_irq_restore(flags);
-}
-
 /*
  * Is the current CPU running the RCU-callbacks kthread?
  * Caller must have preemption disabled.
@@ -1163,59 +1132,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
 	return 0;
 }
 
-static void rcu_cpu_kthread_setup(unsigned int cpu)
-{
-	struct sched_param sp;
-
-	sp.sched_priority = kthread_prio;
-	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
-}
-
-static void rcu_cpu_kthread_park(unsigned int cpu)
-{
-	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
-}
-
-static int rcu_cpu_kthread_should_run(unsigned int cpu)
-{
-	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
-}
-
-/*
- * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
- * the RCU softirq used in configurations of RCU that do not support RCU
- * priority boosting.
- */
-static void rcu_cpu_kthread(unsigned int cpu)
-{
-	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
-	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
-	int spincnt;
-
-	for (spincnt = 0; spincnt < 10; spincnt++) {
-		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
-		local_bh_disable();
-		*statusp = RCU_KTHREAD_RUNNING;
-		local_irq_disable();
-		work = *workp;
-		*workp = 0;
-		local_irq_enable();
-		if (work)
-			rcu_do_batch(this_cpu_ptr(&rcu_data));
-		local_bh_enable();
-		if (*workp == 0) {
-			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
-			*statusp = RCU_KTHREAD_WAITING;
-			return;
-		}
-	}
-	*statusp = RCU_KTHREAD_YIELDING;
-	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
-	schedule_timeout_interruptible(2);
-	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
-	*statusp = RCU_KTHREAD_WAITING;
-}
-
 /*
  * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  * served by the rcu_node in question.  The CPU hotplug lock is still
@@ -1246,27 +1162,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 	free_cpumask_var(cm);
 }
 
-static struct smp_hotplug_thread rcu_cpu_thread_spec = {
-	.store			= &rcu_data.rcu_cpu_kthread_task,
-	.thread_should_run	= rcu_cpu_kthread_should_run,
-	.thread_fn		= rcu_cpu_kthread,
-	.thread_comm		= "rcuc/%u",
-	.setup			= rcu_cpu_kthread_setup,
-	.park			= rcu_cpu_kthread_park,
-};
-
 /*
  * Spawn boost kthreads -- called as soon as the scheduler is running.
  */
 static void __init rcu_spawn_boost_kthreads(void)
 {
 	struct rcu_node *rnp;
-	int cpu;
 
-	for_each_possible_cpu(cpu)
-		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
-	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
-		return;
 	rcu_for_each_leaf_node(rnp)
 		(void)rcu_spawn_one_boost_kthread(rnp);
 }
@@ -1289,11 +1191,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
-static void invoke_rcu_callbacks_kthread(void)
-{
-	WARN_ON_ONCE(1);
-}
-
 static bool rcu_is_callbacks_kthread(void)
 {
 	return false;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 21:13                         ` [PATCH v3] " Sebastian Andrzej Siewior
@ 2019-03-20 23:46                           ` Paul E. McKenney
  2019-03-21  8:27                             ` Sebastian Andrzej Siewior
  2019-03-21 23:32                             ` Paul E. McKenney
  2019-03-22 23:48                           ` Joel Fernandes
  1 sibling, 2 replies; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-20 23:46 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> Running RCU out of softirq is a problem for some workloads that would
> like to manage RCU core processing independently of other softirq
> work, for example, setting kthread priority.  This commit therefore
> introduces the `rcunosoftirq' option which moves the RCU core work
> from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> The SCHED_OTHER approach avoids the scalability problems that appeared
> with the earlier attempt to move RCU core processing to from softirq
> to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> rcuc kthreads at the RCU-boosting priority.
> 
> Reported-by: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: Mike Galbraith <efault@gmx.de>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

Thank you!  I reverted v2 and applied this one with the same sort of
update.  Testing is going well thus far aside from my failing to add
the required "=0" after the rcutree.use_softirq.  I will probably not
be the only one who will run afoul of this, so I updated the commit log
and the documentation accordingly, as shown below.

							Thanx, Paul

------------------------------------------------------------------------

commit 5971694b716d34baa86f3f1dd44f8e587a17d8f0
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date:   Wed Mar 20 22:13:33 2019 +0100

    rcu: Enable elimination of Tree-RCU softirq processing
    
    Some workloads need to change kthread priority for RCU core processing
    without affecting other softirq work.  This commit therefore introduces
    the rcutree.use_softirq kernel boot parameter, which moves the RCU core
    work from softirq to a per-CPU SCHED_OTHER kthread named rcuc.  Use of
    SCHED_OTHER approach avoids the scalability problems that appeared
    with the earlier attempt to move RCU core processing to from softirq
    to kthreads.  That said, kernels built with RCU_BOOST=y will run the
    rcuc kthreads at the RCU-boosting priority.
    
    Note that rcutree.use_softirq=0 must be specified to move RCU core
    processing to the rcuc kthreads: rcutree.use_softirq=1 is the default.
    
    Reported-by: Thomas Gleixner <tglx@linutronix.de>
    Tested-by: Mike Galbraith <efault@gmx.de>
    Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
    Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index d377a2166b79..e2ffb1d9de03 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3672,6 +3672,12 @@
 			the propagation of recent CPU-hotplug changes up
 			the rcu_node combining tree.
 
+	rcutree.use_softirq=	[KNL]
+			If set to zero, move all RCU_SOFTIRQ processing to
+			per-CPU rcuc kthreads.  Defaults to a non-zero
+			value, meaning that RCU_SOFTIRQ is used by default.
+			Specify rcutree.use_softirq=0 to use rcuc kthreads.
+
 	rcutree.rcu_fanout_exact= [KNL]
 			Disable autobalancing of the rcu_node combining
 			tree.  This is used by rcutorture, and might
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ec77ec336f58..6bd05c9918cc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -51,6 +51,12 @@
 #include <linux/tick.h>
 #include <linux/sysrq.h>
 #include <linux/kprobes.h>
+#include <linux/gfp.h>
+#include <linux/oom.h>
+#include <linux/smpboot.h>
+#include <linux/jiffies.h>
+#include <linux/sched/isolation.h>
+#include "../time/tick-internal.h"
 
 #include "tree.h"
 #include "rcu.h"
@@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
 /* Dump rcu_node combining tree at boot to verify correct setup. */
 static bool dump_tree;
 module_param(dump_tree, bool, 0444);
+/* Move RCU_SOFTIRQ to rcuc kthreads. */
+static bool use_softirq = 1;
+module_param(use_softirq, bool, 0444);
 /* Control rcu_node-tree auto-balancing at boot time. */
 static bool rcu_fanout_exact;
 module_param(rcu_fanout_exact, bool, 0444);
@@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
 EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 
 /* Perform RCU core processing work for the current CPU.  */
-static __latent_entropy void rcu_core(struct softirq_action *unused)
+static __latent_entropy void rcu_core(void)
 {
 	unsigned long flags;
 	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
@@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
 	trace_rcu_utilization(TPS("End RCU core"));
 }
 
+static void rcu_core_si(struct softirq_action *h)
+{
+	rcu_core();
+}
+
+static void rcu_wake_cond(struct task_struct *t, int status)
+{
+	/*
+	 * If the thread is yielding, only wake it when this
+	 * is invoked from idle
+	 */
+	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
+		wake_up_process(t);
+}
+
+static void invoke_rcu_core_kthread(void)
+{
+	struct task_struct *t;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
+	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
+	if (t != NULL && t != current)
+		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
+	local_irq_restore(flags);
+}
+
 /*
  * Schedule RCU callback invocation.  If the running implementation of RCU
  * does not support RCU priority boosting, just do a direct call, otherwise
@@ -2306,18 +2343,94 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
 {
 	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
 		return;
-	if (likely(!rcu_state.boost)) {
-		rcu_do_batch(rdp);
-		return;
-	}
-	invoke_rcu_callbacks_kthread();
+	if (rcu_state.boost || !use_softirq)
+		invoke_rcu_core_kthread();
+	rcu_do_batch(rdp);
 }
 
+/*
+ * Wake up this CPU's rcuc kthread to do RCU core processing.
+ */
 static void invoke_rcu_core(void)
 {
-	if (cpu_online(smp_processor_id()))
+	if (!cpu_online(smp_processor_id()))
+		return;
+	if (use_softirq)
 		raise_softirq(RCU_SOFTIRQ);
+	else
+		invoke_rcu_core_kthread();
+}
+
+static void rcu_cpu_kthread_park(unsigned int cpu)
+{
+	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
+}
+
+static int rcu_cpu_kthread_should_run(unsigned int cpu)
+{
+	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
+}
+
+/*
+ * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
+ * the RCU softirq used in configurations of RCU that do not support RCU
+ * priority boosting.
+ */
+static void rcu_cpu_kthread(unsigned int cpu)
+{
+	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
+	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
+	int spincnt;
+
+	for (spincnt = 0; spincnt < 10; spincnt++) {
+		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
+		local_bh_disable();
+		*statusp = RCU_KTHREAD_RUNNING;
+		local_irq_disable();
+		work = *workp;
+		*workp = 0;
+		local_irq_enable();
+		if (work)
+			rcu_core();
+		local_bh_enable();
+		if (*workp == 0) {
+			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
+			*statusp = RCU_KTHREAD_WAITING;
+			return;
+		}
+	}
+	*statusp = RCU_KTHREAD_YIELDING;
+	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
+	schedule_timeout_interruptible(2);
+	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
+	*statusp = RCU_KTHREAD_WAITING;
+}
+
+static struct smp_hotplug_thread rcu_cpu_thread_spec = {
+	.store			= &rcu_data.rcu_cpu_kthread_task,
+	.thread_should_run	= rcu_cpu_kthread_should_run,
+	.thread_fn		= rcu_cpu_kthread,
+	.thread_comm		= "rcuc/%u",
+	.setup			= rcu_cpu_kthread_setup,
+	.park			= rcu_cpu_kthread_park,
+};
+
+/*
+ * Spawn per-CPU RCU core processing kthreads.
+ */
+static int __init rcu_spawn_core_kthreads(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
+	if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
+		return 0;
+	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
+		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
+	return 0;
 }
+early_initcall(rcu_spawn_core_kthreads);
 
 /*
  * Handle any core-RCU processing required by a call_rcu() invocation.
@@ -3355,7 +3468,8 @@ void __init rcu_init(void)
 	rcu_init_one();
 	if (dump_tree)
 		rcu_dump_rcu_node_tree();
-	open_softirq(RCU_SOFTIRQ, rcu_core);
+	if (use_softirq)
+		open_softirq(RCU_SOFTIRQ, rcu_core_si);
 
 	/*
 	 * We don't need protection against CPU-hotplug here because
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e253d11af3c4..a1a72a1ecb02 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
 static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
-static void invoke_rcu_callbacks_kthread(void);
 static bool rcu_is_callbacks_kthread(void);
+static void rcu_cpu_kthread_setup(unsigned int cpu);
 static void __init rcu_spawn_boost_kthreads(void);
 static void rcu_prepare_kthreads(int cpu);
 static void rcu_cleanup_after_idle(void);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index f46b4af96ab9..b807204ffd83 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -11,29 +11,7 @@
  *	   Paul E. McKenney <paulmck@linux.ibm.com>
  */
 
-#include <linux/delay.h>
-#include <linux/gfp.h>
-#include <linux/oom.h>
-#include <linux/sched/debug.h>
-#include <linux/smpboot.h>
-#include <linux/sched/isolation.h>
-#include <uapi/linux/sched/types.h>
-#include "../time/tick-internal.h"
-
-#ifdef CONFIG_RCU_BOOST
 #include "../locking/rtmutex_common.h"
-#else /* #ifdef CONFIG_RCU_BOOST */
-
-/*
- * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
- * all uses are in dead code.  Provide a definition to keep the compiler
- * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
- * This probably needs to be excluded from -rt builds.
- */
-#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
-#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
-
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
 
 #ifdef CONFIG_RCU_NOCB_CPU
 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
@@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
 		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
 	if (gp_cleanup_delay)
 		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
+	if (!use_softirq)
+		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
 	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
 		pr_info("\tRCU debug extended QS entry/exit.\n");
 	rcupdate_announce_bootup_oddness();
@@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		/* Need to defer quiescent state until everything is enabled. */
 		if (irqs_were_disabled) {
 			/* Enabling irqs does not reschedule, so... */
-			raise_softirq_irqoff(RCU_SOFTIRQ);
+			if (!use_softirq)
+				raise_softirq_irqoff(RCU_SOFTIRQ);
+			else
+				invoke_rcu_core();
 		} else {
 			/* Enabling BH or preempt does reschedule, so... */
 			set_tsk_need_resched(current);
@@ -944,18 +927,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
 
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
+/*
+ * If boosting, set rcuc kthreads to realtime priority.
+ */
+static void rcu_cpu_kthread_setup(unsigned int cpu)
+{
 #ifdef CONFIG_RCU_BOOST
+	struct sched_param sp;
 
-static void rcu_wake_cond(struct task_struct *t, int status)
-{
-	/*
-	 * If the thread is yielding, only wake it when this
-	 * is invoked from idle
-	 */
-	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
-		wake_up_process(t);
+	sp.sched_priority = kthread_prio;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+#endif /* #ifdef CONFIG_RCU_BOOST */
 }
 
+#ifdef CONFIG_RCU_BOOST
+
 /*
  * Carry out RCU priority boosting on the task indicated by ->exp_tasks
  * or ->boost_tasks, advancing the pointer to the next task in the
@@ -1093,23 +1079,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	}
 }
 
-/*
- * Wake up the per-CPU kthread to invoke RCU callbacks.
- */
-static void invoke_rcu_callbacks_kthread(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
-	if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
-	    current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
-		rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
-			      __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
-	}
-	local_irq_restore(flags);
-}
-
 /*
  * Is the current CPU running the RCU-callbacks kthread?
  * Caller must have preemption disabled.
@@ -1163,59 +1132,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
 	return 0;
 }
 
-static void rcu_cpu_kthread_setup(unsigned int cpu)
-{
-	struct sched_param sp;
-
-	sp.sched_priority = kthread_prio;
-	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
-}
-
-static void rcu_cpu_kthread_park(unsigned int cpu)
-{
-	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
-}
-
-static int rcu_cpu_kthread_should_run(unsigned int cpu)
-{
-	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
-}
-
-/*
- * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
- * the RCU softirq used in configurations of RCU that do not support RCU
- * priority boosting.
- */
-static void rcu_cpu_kthread(unsigned int cpu)
-{
-	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
-	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
-	int spincnt;
-
-	for (spincnt = 0; spincnt < 10; spincnt++) {
-		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
-		local_bh_disable();
-		*statusp = RCU_KTHREAD_RUNNING;
-		local_irq_disable();
-		work = *workp;
-		*workp = 0;
-		local_irq_enable();
-		if (work)
-			rcu_do_batch(this_cpu_ptr(&rcu_data));
-		local_bh_enable();
-		if (*workp == 0) {
-			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
-			*statusp = RCU_KTHREAD_WAITING;
-			return;
-		}
-	}
-	*statusp = RCU_KTHREAD_YIELDING;
-	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
-	schedule_timeout_interruptible(2);
-	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
-	*statusp = RCU_KTHREAD_WAITING;
-}
-
 /*
  * Set the per-rcu_node kthread's affinity to cover all CPUs that are
  * served by the rcu_node in question.  The CPU hotplug lock is still
@@ -1246,27 +1162,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 	free_cpumask_var(cm);
 }
 
-static struct smp_hotplug_thread rcu_cpu_thread_spec = {
-	.store			= &rcu_data.rcu_cpu_kthread_task,
-	.thread_should_run	= rcu_cpu_kthread_should_run,
-	.thread_fn		= rcu_cpu_kthread,
-	.thread_comm		= "rcuc/%u",
-	.setup			= rcu_cpu_kthread_setup,
-	.park			= rcu_cpu_kthread_park,
-};
-
 /*
  * Spawn boost kthreads -- called as soon as the scheduler is running.
  */
 static void __init rcu_spawn_boost_kthreads(void)
 {
 	struct rcu_node *rnp;
-	int cpu;
 
-	for_each_possible_cpu(cpu)
-		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
-	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
-		return;
 	rcu_for_each_leaf_node(rnp)
 		(void)rcu_spawn_one_boost_kthread(rnp);
 }
@@ -1289,11 +1191,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
-static void invoke_rcu_callbacks_kthread(void)
-{
-	WARN_ON_ONCE(1);
-}
-
 static bool rcu_is_callbacks_kthread(void)
 {
 	return false;


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 23:46                           ` Paul E. McKenney
@ 2019-03-21  8:27                             ` Sebastian Andrzej Siewior
  2019-03-21 13:26                               ` Paul E. McKenney
  2019-03-21 23:32                             ` Paul E. McKenney
  1 sibling, 1 reply; 44+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-03-21  8:27 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On 2019-03-20 16:46:01 [-0700], Paul E. McKenney wrote:
> Thank you!  I reverted v2 and applied this one with the same sort of
> update.  Testing is going well thus far aside from my failing to add
> the required "=0" after the rcutree.use_softirq.  I will probably not
> be the only one who will run afoul of this, so I updated the commit log
> and the documentation accordingly, as shown below.

perfect, thank you.

> 							Thanx, Paul
> 
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index f46b4af96ab9..b807204ffd83 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
>  		/* Need to defer quiescent state until everything is enabled. */
>  		if (irqs_were_disabled) {
>  			/* Enabling irqs does not reschedule, so... */
> -			raise_softirq_irqoff(RCU_SOFTIRQ);
> +			if (!use_softirq)

that exclamation mark needs to go :/

> +				raise_softirq_irqoff(RCU_SOFTIRQ);
> +			else
> +				invoke_rcu_core();
>  		} else {
>  			/* Enabling BH or preempt does reschedule, so... */
>  			set_tsk_need_resched(current);

Sebastian

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 11:28   ` Sebastian Andrzej Siewior
@ 2019-03-21 12:06     ` Joel Fernandes
  2019-03-21 13:52       ` Paul E. McKenney
  0 siblings, 1 reply; 44+ messages in thread
From: Joel Fernandes @ 2019-03-21 12:06 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, tglx, Paul E. McKenney, Mike Galbraith, rcu

On Wed, Mar 20, 2019 at 12:28:35PM +0100, Sebastian Andrzej Siewior wrote:
> On 2019-03-19 20:26:13 [-0400], Joel Fernandes wrote:
> > > @@ -2769,19 +2782,121 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > >  {
> > >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > >  		return;
> > > -	if (likely(!rcu_state.boost)) {
> > > -		rcu_do_batch(rdp);
> > > -		return;
> > > -	}
> > > -	invoke_rcu_callbacks_kthread();
> > > +	rcu_do_batch(rdp);
> > 
> > Looks like a nice change, but one question...
> > 
> > Consider the case where rcunosoftirq boot option is not passed.
> > 
> > Before, if RCU_BOOST=y, then callbacks would be invoked in rcuc threads if
> > possible, by those threads being woken up from within the softirq context
> > (in invoke_rcu_callbacks).
> > 
> > Now, if RCU_BOOST=y, then callbacks would only be invoked in softirq context
> > and not in the threads at all. Because rcu_softirq_enabled = false, so the
> > path executes:
> >   rcu_read_unlock_special() ->
> >         raise_softirq_irqsoff() ->
> >                 rcu_process_callbacks_si() ->
> >                         rcu_process_callbacks() ->
> >                                 invoke_rcu_callbacks() ->
> >                                         rcu_do_batch()
> > 
> > This seems like a behavioral change to me. This makes the callbacks always
> > execute from the softirq context and not the threads when boosting is
> > configured. IMO in the very least, such behavioral change should be
> > documented in the change.
> > 
> > One way to fix this I think could be, if boosting is enabled, then set
> > rcu_softirq_enabled to false by default so the callbacks are still executed
> > in the rcuc threads.
> > 
> > Did I miss something? Sorry if I did, thanks!
> 
> So with all the swaps and reorder we talking about this change:
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 0a719f726e149..82810483bfc6c 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -2306,20 +2306,6 @@ static void rcu_core_si(struct softirq_action *h)
>  	rcu_core();
>  }
>  
> -/*
> - * Schedule RCU callback invocation.  If the running implementation of RCU
> - * does not support RCU priority boosting, just do a direct call, otherwise
> - * wake up the per-CPU kernel kthread.  Note that because we are running
> - * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
> - * cannot disappear out from under us.
> - */
> -static void invoke_rcu_callbacks(struct rcu_data *rdp)
> -{
> -	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> -		return;
> -	rcu_do_batch(rdp);
> -}
> -
>  static void rcu_wake_cond(struct task_struct *t, int status)
>  {
>  	/*
> @@ -2330,6 +2316,19 @@ static void rcu_wake_cond(struct task_struct *t, int status)
>  		wake_up_process(t);
>  }
>  
> +static void invoke_rcu_core_kthread(void)
> +{
> +	struct task_struct *t;
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> +	if (t != NULL && t != current)
> +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> +	local_irq_restore(flags);
> +}
> +
>  static bool rcu_softirq_enabled = true;
>  
>  static int __init rcunosoftirq_setup(char *str)
> @@ -2339,26 +2338,33 @@ static int __init rcunosoftirq_setup(char *str)
>  }
>  __setup("rcunosoftirq", rcunosoftirq_setup);
>  
> +/*
> + * Schedule RCU callback invocation.  If the running implementation of RCU
> + * does not support RCU priority boosting, just do a direct call, otherwise
> + * wake up the per-CPU kernel kthread.  Note that because we are running
> + * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
> + * cannot disappear out from under us.
> + */
> +static void invoke_rcu_callbacks(struct rcu_data *rdp)
> +{
> +	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> +		return;
> +	if (rcu_state.boost || rcu_softirq_enabled)
> +		invoke_rcu_core_kthread();

Here shouldn't it be this?
	if (rcu_state.boost || !rcu_softirq_enabled)

Also the rcu/dev branch has the following hunk where we unconditionally
invoke rcu_do_batch even when boosting which would still have the issue I
pointed. I would suggest Sebastian to post the latest v4 or v5 with all diff
squashed, and then we do another round of review with latest patch, thanks!

	@@ -2306,18 +2320,110 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
 {
 	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
 		return;
-	if (likely(!rcu_state.boost)) {
-		rcu_do_batch(rdp);
-		return;
-	}
-	invoke_rcu_callbacks_kthread();
+	rcu_do_batch(rdp);
+}
+

thanks,

 - Joel


> +	rcu_do_batch(rdp);
> +}
> +
>  /*
>   * Wake up this CPU's rcuc kthread to do RCU core processing.
>   */
>  static void invoke_rcu_core(void)
>  {
> -	unsigned long flags;
> -	struct task_struct *t;
> -
>  	if (!cpu_online(smp_processor_id()))
>  		return;
> -	if (rcu_softirq_enabled) {
> +	if (rcu_softirq_enabled)
>  		raise_softirq(RCU_SOFTIRQ);
> -	} else {
> -		local_irq_save(flags);
> -		__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> -		t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> -		if (t != NULL && t != current)
> -			rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> -		local_irq_restore(flags);
> -	}
> +	else
> +		invoke_rcu_core_kthread();
>  }
>  
>  static void rcu_cpu_kthread_park(unsigned int cpu)
> @@ -2426,7 +2432,8 @@ static int __init rcu_spawn_core_kthreads(void)
>  		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
>  	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
>  		return 0;
> -	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
> +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
> +		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
>  	return 0;
>  }
>  early_initcall(rcu_spawn_core_kthreads);
> -- 
> 2.20.1
> 
> >  - Joel
> 
> Sebastian

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-21  8:27                             ` Sebastian Andrzej Siewior
@ 2019-03-21 13:26                               ` Paul E. McKenney
  0 siblings, 0 replies; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-21 13:26 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Thu, Mar 21, 2019 at 09:27:37AM +0100, Sebastian Andrzej Siewior wrote:
> On 2019-03-20 16:46:01 [-0700], Paul E. McKenney wrote:
> > Thank you!  I reverted v2 and applied this one with the same sort of
> > update.  Testing is going well thus far aside from my failing to add
> > the required "=0" after the rcutree.use_softirq.  I will probably not
> > be the only one who will run afoul of this, so I updated the commit log
> > and the documentation accordingly, as shown below.
> 
> perfect, thank you.
> 
> > 							Thanx, Paul
> > 
> > diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> > index f46b4af96ab9..b807204ffd83 100644
> > --- a/kernel/rcu/tree_plugin.h
> > +++ b/kernel/rcu/tree_plugin.h
> > @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
> >  		/* Need to defer quiescent state until everything is enabled. */
> >  		if (irqs_were_disabled) {
> >  			/* Enabling irqs does not reschedule, so... */
> > -			raise_softirq_irqoff(RCU_SOFTIRQ);
> > +			if (!use_softirq)
> 
> that exclamation mark needs to go :/

That might explain some of the failures in TREE01, TREE02, TREE03, and
TREE09.  TREE01 got a NULL pointer dereference, but in __do_softirq().
So I suspect that this was related.  Ditto for TREE02, TREE03, and TREE09.
These also all have CONFIG_PREEMPT=y, and are the only ones run by default
that are set up this way.  (Well, so do SRCU-P, TASKS01, and TASKS03, but
they are torturing other forms of RCU.)

Anyway, I applied your fix above and will rerun.  The failures happened
within a few seconds in all cases, so a short run should cover this.

Once I get good rcutorture runs, I will ask you to run a heavy-duty run.
Once that passes, I will look at your changes in more detail.

							Thanx, Paul

> > +				raise_softirq_irqoff(RCU_SOFTIRQ);
> > +			else
> > +				invoke_rcu_core();
> >  		} else {
> >  			/* Enabling BH or preempt does reschedule, so... */
> >  			set_tsk_need_resched(current);
> 
> Sebastian
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-21 12:06     ` Joel Fernandes
@ 2019-03-21 13:52       ` Paul E. McKenney
  0 siblings, 0 replies; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-21 13:52 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith, rcu

On Thu, Mar 21, 2019 at 08:06:56AM -0400, Joel Fernandes wrote:
> On Wed, Mar 20, 2019 at 12:28:35PM +0100, Sebastian Andrzej Siewior wrote:
> > On 2019-03-19 20:26:13 [-0400], Joel Fernandes wrote:
> > > > @@ -2769,19 +2782,121 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > > >  {
> > > >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > > >  		return;
> > > > -	if (likely(!rcu_state.boost)) {
> > > > -		rcu_do_batch(rdp);
> > > > -		return;
> > > > -	}
> > > > -	invoke_rcu_callbacks_kthread();
> > > > +	rcu_do_batch(rdp);
> > > 
> > > Looks like a nice change, but one question...
> > > 
> > > Consider the case where rcunosoftirq boot option is not passed.
> > > 
> > > Before, if RCU_BOOST=y, then callbacks would be invoked in rcuc threads if
> > > possible, by those threads being woken up from within the softirq context
> > > (in invoke_rcu_callbacks).
> > > 
> > > Now, if RCU_BOOST=y, then callbacks would only be invoked in softirq context
> > > and not in the threads at all. Because rcu_softirq_enabled = false, so the
> > > path executes:
> > >   rcu_read_unlock_special() ->
> > >         raise_softirq_irqsoff() ->
> > >                 rcu_process_callbacks_si() ->
> > >                         rcu_process_callbacks() ->
> > >                                 invoke_rcu_callbacks() ->
> > >                                         rcu_do_batch()
> > > 
> > > This seems like a behavioral change to me. This makes the callbacks always
> > > execute from the softirq context and not the threads when boosting is
> > > configured. IMO in the very least, such behavioral change should be
> > > documented in the change.
> > > 
> > > One way to fix this I think could be, if boosting is enabled, then set
> > > rcu_softirq_enabled to false by default so the callbacks are still executed
> > > in the rcuc threads.
> > > 
> > > Did I miss something? Sorry if I did, thanks!
> > 
> > So with all the swaps and reorder we talking about this change:
> > 
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 0a719f726e149..82810483bfc6c 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -2306,20 +2306,6 @@ static void rcu_core_si(struct softirq_action *h)
> >  	rcu_core();
> >  }
> >  
> > -/*
> > - * Schedule RCU callback invocation.  If the running implementation of RCU
> > - * does not support RCU priority boosting, just do a direct call, otherwise
> > - * wake up the per-CPU kernel kthread.  Note that because we are running
> > - * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
> > - * cannot disappear out from under us.
> > - */
> > -static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > -{
> > -	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > -		return;
> > -	rcu_do_batch(rdp);
> > -}
> > -
> >  static void rcu_wake_cond(struct task_struct *t, int status)
> >  {
> >  	/*
> > @@ -2330,6 +2316,19 @@ static void rcu_wake_cond(struct task_struct *t, int status)
> >  		wake_up_process(t);
> >  }
> >  
> > +static void invoke_rcu_core_kthread(void)
> > +{
> > +	struct task_struct *t;
> > +	unsigned long flags;
> > +
> > +	local_irq_save(flags);
> > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > +	if (t != NULL && t != current)
> > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > +	local_irq_restore(flags);
> > +}
> > +
> >  static bool rcu_softirq_enabled = true;
> >  
> >  static int __init rcunosoftirq_setup(char *str)
> > @@ -2339,26 +2338,33 @@ static int __init rcunosoftirq_setup(char *str)
> >  }
> >  __setup("rcunosoftirq", rcunosoftirq_setup);
> >  
> > +/*
> > + * Schedule RCU callback invocation.  If the running implementation of RCU
> > + * does not support RCU priority boosting, just do a direct call, otherwise
> > + * wake up the per-CPU kernel kthread.  Note that because we are running
> > + * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
> > + * cannot disappear out from under us.
> > + */
> > +static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > +{
> > +	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > +		return;
> > +	if (rcu_state.boost || rcu_softirq_enabled)
> > +		invoke_rcu_core_kthread();
> 
> Here shouldn't it be this?
> 	if (rcu_state.boost || !rcu_softirq_enabled)
> 
> Also the rcu/dev branch has the following hunk where we unconditionally
> invoke rcu_do_batch even when boosting which would still have the issue I
> pointed. I would suggest Sebastian to post the latest v4 or v5 with all diff
> squashed, and then we do another round of review with latest patch, thanks!

I believe that -rcu has this change.  But it looks like there still are
failures, so yes, further review is necessary and deeply appreciated!

							Thanx, Paul

> 	@@ -2306,18 +2320,110 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
>  {
>  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
>  		return;
> -	if (likely(!rcu_state.boost)) {
> -		rcu_do_batch(rdp);
> -		return;
> -	}
> -	invoke_rcu_callbacks_kthread();
> +	rcu_do_batch(rdp);
> +}
> +
> 
> thanks,
> 
>  - Joel
> 
> 
> > +	rcu_do_batch(rdp);
> > +}
> > +
> >  /*
> >   * Wake up this CPU's rcuc kthread to do RCU core processing.
> >   */
> >  static void invoke_rcu_core(void)
> >  {
> > -	unsigned long flags;
> > -	struct task_struct *t;
> > -
> >  	if (!cpu_online(smp_processor_id()))
> >  		return;
> > -	if (rcu_softirq_enabled) {
> > +	if (rcu_softirq_enabled)
> >  		raise_softirq(RCU_SOFTIRQ);
> > -	} else {
> > -		local_irq_save(flags);
> > -		__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > -		t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > -		if (t != NULL && t != current)
> > -			rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > -		local_irq_restore(flags);
> > -	}
> > +	else
> > +		invoke_rcu_core_kthread();
> >  }
> >  
> >  static void rcu_cpu_kthread_park(unsigned int cpu)
> > @@ -2426,7 +2432,8 @@ static int __init rcu_spawn_core_kthreads(void)
> >  		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> >  	if (!IS_ENABLED(CONFIG_RCU_BOOST) && !rcu_softirq_enabled)
> >  		return 0;
> > -	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__);
> > +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
> > +		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
> >  	return 0;
> >  }
> >  early_initcall(rcu_spawn_core_kthreads);
> > -- 
> > 2.20.1
> > 
> > >  - Joel
> > 
> > Sebastian
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 23:46                           ` Paul E. McKenney
  2019-03-21  8:27                             ` Sebastian Andrzej Siewior
@ 2019-03-21 23:32                             ` Paul E. McKenney
  2019-03-22  7:35                               ` Paul E. McKenney
  2019-03-22 13:42                               ` Joel Fernandes
  1 sibling, 2 replies; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-21 23:32 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Wed, Mar 20, 2019 at 04:46:01PM -0700, Paul E. McKenney wrote:
> On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > Running RCU out of softirq is a problem for some workloads that would
> > like to manage RCU core processing independently of other softirq
> > work, for example, setting kthread priority.  This commit therefore
> > introduces the `rcunosoftirq' option which moves the RCU core work
> > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > The SCHED_OTHER approach avoids the scalability problems that appeared
> > with the earlier attempt to move RCU core processing to from softirq
> > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > rcuc kthreads at the RCU-boosting priority.
> > 
> > Reported-by: Thomas Gleixner <tglx@linutronix.de>
> > Tested-by: Mike Galbraith <efault@gmx.de>
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> 
> Thank you!  I reverted v2 and applied this one with the same sort of
> update.  Testing is going well thus far aside from my failing to add
> the required "=0" after the rcutree.use_softirq.  I will probably not
> be the only one who will run afoul of this, so I updated the commit log
> and the documentation accordingly, as shown below.

And I took a look, please see updates/questions interspersed.

I didn't find anything substantive, but still I get hangs.  Which is
the normal situation.  ;-)

Will fire off more testing...

							Thanx, Paul

> ------------------------------------------------------------------------
> 
> commit 5971694b716d34baa86f3f1dd44f8e587a17d8f0
> Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Date:   Wed Mar 20 22:13:33 2019 +0100
> 
>     rcu: Enable elimination of Tree-RCU softirq processing
>     
>     Some workloads need to change kthread priority for RCU core processing
>     without affecting other softirq work.  This commit therefore introduces
>     the rcutree.use_softirq kernel boot parameter, which moves the RCU core
>     work from softirq to a per-CPU SCHED_OTHER kthread named rcuc.  Use of
>     SCHED_OTHER approach avoids the scalability problems that appeared
>     with the earlier attempt to move RCU core processing to from softirq
>     to kthreads.  That said, kernels built with RCU_BOOST=y will run the
>     rcuc kthreads at the RCU-boosting priority.
>     
>     Note that rcutree.use_softirq=0 must be specified to move RCU core
>     processing to the rcuc kthreads: rcutree.use_softirq=1 is the default.
>     
>     Reported-by: Thomas Gleixner <tglx@linutronix.de>
>     Tested-by: Mike Galbraith <efault@gmx.de>
>     Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
>     Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index d377a2166b79..e2ffb1d9de03 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -3672,6 +3672,12 @@
>  			the propagation of recent CPU-hotplug changes up
>  			the rcu_node combining tree.
>  
> +	rcutree.use_softirq=	[KNL]
> +			If set to zero, move all RCU_SOFTIRQ processing to
> +			per-CPU rcuc kthreads.  Defaults to a non-zero
> +			value, meaning that RCU_SOFTIRQ is used by default.
> +			Specify rcutree.use_softirq=0 to use rcuc kthreads.
> +
>  	rcutree.rcu_fanout_exact= [KNL]
>  			Disable autobalancing of the rcu_node combining
>  			tree.  This is used by rcutorture, and might
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index ec77ec336f58..6bd05c9918cc 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -51,6 +51,12 @@
>  #include <linux/tick.h>
>  #include <linux/sysrq.h>
>  #include <linux/kprobes.h>
> +#include <linux/gfp.h>
> +#include <linux/oom.h>
> +#include <linux/smpboot.h>
> +#include <linux/jiffies.h>
> +#include <linux/sched/isolation.h>
> +#include "../time/tick-internal.h"
>  
>  #include "tree.h"
>  #include "rcu.h"
> @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
>  /* Dump rcu_node combining tree at boot to verify correct setup. */
>  static bool dump_tree;
>  module_param(dump_tree, bool, 0444);
> +/* Move RCU_SOFTIRQ to rcuc kthreads. */

I am replacing this with:

+/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */

> +static bool use_softirq = 1;
> +module_param(use_softirq, bool, 0444);
>  /* Control rcu_node-tree auto-balancing at boot time. */
>  static bool rcu_fanout_exact;
>  module_param(rcu_fanout_exact, bool, 0444);
> @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
>  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
>  
>  /* Perform RCU core processing work for the current CPU.  */
> -static __latent_entropy void rcu_core(struct softirq_action *unused)
> +static __latent_entropy void rcu_core(void)
>  {
>  	unsigned long flags;
>  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
>  	trace_rcu_utilization(TPS("End RCU core"));
>  }
>  
> +static void rcu_core_si(struct softirq_action *h)
> +{
> +	rcu_core();
> +}
> +
> +static void rcu_wake_cond(struct task_struct *t, int status)
> +{
> +	/*
> +	 * If the thread is yielding, only wake it when this
> +	 * is invoked from idle
> +	 */
> +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> +		wake_up_process(t);
> +}
> +
> +static void invoke_rcu_core_kthread(void)
> +{
> +	struct task_struct *t;
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> +	if (t != NULL && t != current)
> +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> +	local_irq_restore(flags);
> +}
> +
>  /*
>   * Schedule RCU callback invocation.  If the running implementation of RCU
>   * does not support RCU priority boosting, just do a direct call, otherwise
> @@ -2306,18 +2343,94 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
>  {
>  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
>  		return;
> -	if (likely(!rcu_state.boost)) {
> -		rcu_do_batch(rdp);
> -		return;
> -	}
> -	invoke_rcu_callbacks_kthread();
> +	if (rcu_state.boost || !use_softirq)
> +		invoke_rcu_core_kthread();
> +	rcu_do_batch(rdp);
>  }
>  
> +/*
> + * Wake up this CPU's rcuc kthread to do RCU core processing.
> + */
>  static void invoke_rcu_core(void)
>  {
> -	if (cpu_online(smp_processor_id()))
> +	if (!cpu_online(smp_processor_id()))
> +		return;
> +	if (use_softirq)
>  		raise_softirq(RCU_SOFTIRQ);
> +	else
> +		invoke_rcu_core_kthread();
> +}
> +
> +static void rcu_cpu_kthread_park(unsigned int cpu)
> +{
> +	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> +}
> +
> +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> +{
> +	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> +}
> +
> +/*
> + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> + * the RCU softirq used in configurations of RCU that do not support RCU
> + * priority boosting.
> + */
> +static void rcu_cpu_kthread(unsigned int cpu)
> +{
> +	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> +	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> +	int spincnt;
> +
> +	for (spincnt = 0; spincnt < 10; spincnt++) {
> +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> +		local_bh_disable();
> +		*statusp = RCU_KTHREAD_RUNNING;
> +		local_irq_disable();
> +		work = *workp;
> +		*workp = 0;
> +		local_irq_enable();
> +		if (work)
> +			rcu_core();
> +		local_bh_enable();
> +		if (*workp == 0) {
> +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> +			*statusp = RCU_KTHREAD_WAITING;
> +			return;
> +		}
> +	}
> +	*statusp = RCU_KTHREAD_YIELDING;
> +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> +	schedule_timeout_interruptible(2);
> +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> +	*statusp = RCU_KTHREAD_WAITING;
> +}
> +
> +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> +	.store			= &rcu_data.rcu_cpu_kthread_task,
> +	.thread_should_run	= rcu_cpu_kthread_should_run,
> +	.thread_fn		= rcu_cpu_kthread,
> +	.thread_comm		= "rcuc/%u",
> +	.setup			= rcu_cpu_kthread_setup,
> +	.park			= rcu_cpu_kthread_park,
> +};
> +
> +/*
> + * Spawn per-CPU RCU core processing kthreads.
> + */
> +static int __init rcu_spawn_core_kthreads(void)
> +{
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu)
> +		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
> +		return 0;
> +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
> +		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
> +	return 0;
>  }
> +early_initcall(rcu_spawn_core_kthreads);
>  
>  /*
>   * Handle any core-RCU processing required by a call_rcu() invocation.
> @@ -3355,7 +3468,8 @@ void __init rcu_init(void)
>  	rcu_init_one();
>  	if (dump_tree)
>  		rcu_dump_rcu_node_tree();
> -	open_softirq(RCU_SOFTIRQ, rcu_core);
> +	if (use_softirq)
> +		open_softirq(RCU_SOFTIRQ, rcu_core_si);
>  
>  	/*
>  	 * We don't need protection against CPU-hotplug here because
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index e253d11af3c4..a1a72a1ecb02 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
>  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
>  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
>  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> -static void invoke_rcu_callbacks_kthread(void);
>  static bool rcu_is_callbacks_kthread(void);
> +static void rcu_cpu_kthread_setup(unsigned int cpu);
>  static void __init rcu_spawn_boost_kthreads(void);
>  static void rcu_prepare_kthreads(int cpu);
>  static void rcu_cleanup_after_idle(void);
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index f46b4af96ab9..b807204ffd83 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -11,29 +11,7 @@
>   *	   Paul E. McKenney <paulmck@linux.ibm.com>
>   */
>  
> -#include <linux/delay.h>
> -#include <linux/gfp.h>
> -#include <linux/oom.h>
> -#include <linux/sched/debug.h>
> -#include <linux/smpboot.h>
> -#include <linux/sched/isolation.h>
> -#include <uapi/linux/sched/types.h>
> -#include "../time/tick-internal.h"
> -
> -#ifdef CONFIG_RCU_BOOST
>  #include "../locking/rtmutex_common.h"
> -#else /* #ifdef CONFIG_RCU_BOOST */
> -
> -/*
> - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> - * all uses are in dead code.  Provide a definition to keep the compiler
> - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> - * This probably needs to be excluded from -rt builds.
> - */
> -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> -
> -#endif /* #else #ifdef CONFIG_RCU_BOOST */
>  
>  #ifdef CONFIG_RCU_NOCB_CPU
>  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> @@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
>  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
>  	if (gp_cleanup_delay)
>  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
> +	if (!use_softirq)
> +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
>  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
>  		pr_info("\tRCU debug extended QS entry/exit.\n");
>  	rcupdate_announce_bootup_oddness();
> @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
>  		/* Need to defer quiescent state until everything is enabled. */
>  		if (irqs_were_disabled) {
>  			/* Enabling irqs does not reschedule, so... */
> -			raise_softirq_irqoff(RCU_SOFTIRQ);
> +			if (!use_softirq)
> +				raise_softirq_irqoff(RCU_SOFTIRQ);
> +			else
> +				invoke_rcu_core();

This can result in deadlock.  This happens when the scheduler invokes
rcu_read_unlock() with one of the rq or pi locks held, which means that
interrupts are disabled.  And it also means that the wakeup done in
invoke_rcu_core() could go after the same rq or pi lock.

What we really need here is some way to make soemthing happen on this
CPU just after interrupts are re-enabled.  Here are the options I see:

1.	Do set_tsk_need_resched() and set_preempt_need_resched(),
	just like in the "else" clause below.  This sort of works, but
	relies on some later interrupt or similar to get things started.
	This is just fine for normal grace periods, but not so much for
	expedited grace periods.

2.	IPI some other CPU and have it IPI us back.  Not such a good plan
	when running an SMP kernel on a single CPU.

3.	Have a "stub" RCU_SOFTIRQ that contains only the following:

	/* Report any deferred quiescent states if preemption enabled. */
	if (!(preempt_count() & PREEMPT_MASK)) {
		rcu_preempt_deferred_qs(current);
	} else if (rcu_preempt_need_deferred_qs(current)) {
		set_tsk_need_resched(current);
		set_preempt_need_resched();
	}

4.	Except that raise_softirq_irqoff() could potentially have this
	same problem if rcu_read_unlock() is invoked at process level
	from the scheduler with either rq or pi locks held.  :-/

	Which raises the question "why aren't I seeing hangs and
	lockdep splats?"

Assuming that this really is a problem, perhaps I need to do something
like the following:

		if (in_interrupt()) {
			/* In interrupt, so catch softirq on the way out. */
			if (use_softirq)
				raise_softirq_irqoff(RCU_SOFTIRQ);
			else
				invoke_rcu_core();
		} else {
			/* Force resschedule, perhaps quite a bit later. */
			set_tsk_need_resched(current);
			set_preempt_need_resched();
		}

This can delay the quiescent state when rcu_read_unlock() is invoked from
process level with interrupts disabled.  I suppose I could post a very
short-timeout hrtimer, but would that be lightweight enough?  I cannot
use self-targeted smp_call_function_single() because it wants interrupts
enabled and because it will just do a direct call, which won't help here.
I could use a timer, though the latency is larger than would be good.
Also, having lots of non-migratable timers might be considered unfriendly,
though they shouldn't be -that- heavily utilized.  Yet, anyway...
I could try adding logic to local_irq_enable() and local_irq_restore(),
but that probably wouldn't go over all that well.  Besides, sometimes
interrupt enabling happens in assembly language.

It is quite likely that delays to expedited grace periods wouldn't
happen all that often.  First, the grace period has to start while
the CPU itself (not some blocked task) is in an RCU read-side critical
section, second, that critical section cannot be preempted, and third
the rcu_read_unlock() must run with interrupts disabled.

Ah, but that sequence of events is not supposed to happen with the
scheduler lock!

From Documentation/RCU/Design/Requirements/Requirements.html:

	It is forbidden to hold any of scheduler's runqueue or
	priority-inheritance spinlocks across an rcu_read_unlock()
	unless interrupts have been disabled across the entire RCU
	read-side critical section, that is, up to and including the
	matching rcu_read_lock().

Here are the reasons we even get to rcu_read_unlock_special():

1.	The just-ended RCU read-side critical section was preempted.
	This clearly cannot happen if interrupts are disabled across
	the entire critical section.

2.	The scheduling-clock interrupt noticed that this critical
	section has been taking a long time.  But scheduling-clock
	interrupts also cannot happen while interrupts are disabled.

3.	An expedited grace periods started during this critical
	section.  But if that happened, the corresponding IPI would
	have waited until this CPU enabled interrupts, so this
	cannot happen either.

So the call to invoke_rcu_core() should be OK after all.

Which is a bit of a disappointment, given that I am still seeing hangs!

I might replace this invoke_rcu_core() with set_tsk_need_resched() and
set_preempt_need_resched() to see if that gets rid of the hangs, but
first...

>  		} else {
>  			/* Enabling BH or preempt does reschedule, so... */
>  			set_tsk_need_resched(current);
> @@ -944,18 +927,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
>  
>  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
>  
> +/*
> + * If boosting, set rcuc kthreads to realtime priority.
> + */
> +static void rcu_cpu_kthread_setup(unsigned int cpu)
> +{
>  #ifdef CONFIG_RCU_BOOST
> +	struct sched_param sp;
>  
> -static void rcu_wake_cond(struct task_struct *t, int status)
> -{
> -	/*
> -	 * If the thread is yielding, only wake it when this
> -	 * is invoked from idle
> -	 */
> -	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
> -		wake_up_process(t);
> +	sp.sched_priority = kthread_prio;
> +	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> +#endif /* #ifdef CONFIG_RCU_BOOST */
>  }
>  
> +#ifdef CONFIG_RCU_BOOST
> +
>  /*
>   * Carry out RCU priority boosting on the task indicated by ->exp_tasks
>   * or ->boost_tasks, advancing the pointer to the next task in the
> @@ -1093,23 +1079,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
>  	}
>  }
>  
> -/*
> - * Wake up the per-CPU kthread to invoke RCU callbacks.
> - */
> -static void invoke_rcu_callbacks_kthread(void)
> -{
> -	unsigned long flags;
> -
> -	local_irq_save(flags);
> -	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> -	if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&

OK, so this redundant check for NULL was in the original and thus not
your fault.  ;-)

Which leaves me still puzzled about why I am still seeing hangs.
I will continue testing.

> -	    current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
> -		rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
> -			      __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> -	}
> -	local_irq_restore(flags);
> -}
> -
>  /*
>   * Is the current CPU running the RCU-callbacks kthread?
>   * Caller must have preemption disabled.
> @@ -1163,59 +1132,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
>  	return 0;
>  }
>  
> -static void rcu_cpu_kthread_setup(unsigned int cpu)
> -{
> -	struct sched_param sp;
> -
> -	sp.sched_priority = kthread_prio;
> -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> -}
> -
> -static void rcu_cpu_kthread_park(unsigned int cpu)
> -{
> -	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> -}
> -
> -static int rcu_cpu_kthread_should_run(unsigned int cpu)
> -{
> -	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> -}
> -
> -/*
> - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> - * the RCU softirq used in configurations of RCU that do not support RCU
> - * priority boosting.
> - */
> -static void rcu_cpu_kthread(unsigned int cpu)
> -{
> -	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> -	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> -	int spincnt;
> -
> -	for (spincnt = 0; spincnt < 10; spincnt++) {
> -		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> -		local_bh_disable();
> -		*statusp = RCU_KTHREAD_RUNNING;
> -		local_irq_disable();
> -		work = *workp;
> -		*workp = 0;
> -		local_irq_enable();
> -		if (work)
> -			rcu_do_batch(this_cpu_ptr(&rcu_data));
> -		local_bh_enable();
> -		if (*workp == 0) {
> -			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> -			*statusp = RCU_KTHREAD_WAITING;
> -			return;
> -		}
> -	}
> -	*statusp = RCU_KTHREAD_YIELDING;
> -	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> -	schedule_timeout_interruptible(2);
> -	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> -	*statusp = RCU_KTHREAD_WAITING;
> -}
> -
>  /*
>   * Set the per-rcu_node kthread's affinity to cover all CPUs that are
>   * served by the rcu_node in question.  The CPU hotplug lock is still
> @@ -1246,27 +1162,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
>  	free_cpumask_var(cm);
>  }
>  
> -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> -	.store			= &rcu_data.rcu_cpu_kthread_task,
> -	.thread_should_run	= rcu_cpu_kthread_should_run,
> -	.thread_fn		= rcu_cpu_kthread,
> -	.thread_comm		= "rcuc/%u",
> -	.setup			= rcu_cpu_kthread_setup,
> -	.park			= rcu_cpu_kthread_park,
> -};
> -
>  /*
>   * Spawn boost kthreads -- called as soon as the scheduler is running.
>   */
>  static void __init rcu_spawn_boost_kthreads(void)
>  {
>  	struct rcu_node *rnp;
> -	int cpu;
>  
> -	for_each_possible_cpu(cpu)
> -		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> -	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
> -		return;
>  	rcu_for_each_leaf_node(rnp)
>  		(void)rcu_spawn_one_boost_kthread(rnp);
>  }
> @@ -1289,11 +1191,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
>  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  }
>  
> -static void invoke_rcu_callbacks_kthread(void)
> -{
> -	WARN_ON_ONCE(1);
> -}
> -
>  static bool rcu_is_callbacks_kthread(void)
>  {
>  	return false;


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-21 23:32                             ` Paul E. McKenney
@ 2019-03-22  7:35                               ` Paul E. McKenney
  2019-03-22 12:43                                 ` Paul E. McKenney
  2019-03-22 13:42                               ` Joel Fernandes
  1 sibling, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-22  7:35 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Thu, Mar 21, 2019 at 04:32:44PM -0700, Paul E. McKenney wrote:
> On Wed, Mar 20, 2019 at 04:46:01PM -0700, Paul E. McKenney wrote:
> > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > Running RCU out of softirq is a problem for some workloads that would
> > > like to manage RCU core processing independently of other softirq
> > > work, for example, setting kthread priority.  This commit therefore
> > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > with the earlier attempt to move RCU core processing to from softirq
> > > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > > rcuc kthreads at the RCU-boosting priority.
> > > 
> > > Reported-by: Thomas Gleixner <tglx@linutronix.de>
> > > Tested-by: Mike Galbraith <efault@gmx.de>
> > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > 
> > Thank you!  I reverted v2 and applied this one with the same sort of
> > update.  Testing is going well thus far aside from my failing to add
> > the required "=0" after the rcutree.use_softirq.  I will probably not
> > be the only one who will run afoul of this, so I updated the commit log
> > and the documentation accordingly, as shown below.
> 
> And I took a look, please see updates/questions interspersed.
> 
> I didn't find anything substantive, but still I get hangs.  Which is
> the normal situation.  ;-)
> 
> Will fire off more testing...

And despite my protestations about restrictions involving the scheduler
and rcu_read_unlock(), with the patch below TREE01, TREE02, TREE03, and
TREE09 pass an hour of rcutorture with rcutree.use_softirq=0.  Without
this patch, seven-minute runs get hard hangs and this:

[   18.417315] BUG: spinlock recursion on CPU#5, rcu_torture_rea/763
[   18.418624]  lock: 0xffff9d207eb61940, .magic: dead4ead, .owner: rcu_torture_rea/763, .owner_cpu: 5
[   18.420418] CPU: 5 PID: 763 Comm: rcu_torture_rea Not tainted 5.1.0-rc1+ #1
[   18.421786] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
[   18.423375] Call Trace:
[   18.423880]  <IRQ>
[   18.424284]  dump_stack+0x46/0x5b
[   18.424953]  do_raw_spin_lock+0x8d/0x90
[   18.425699]  try_to_wake_up+0x2cd/0x4f0
[   18.426493]  invoke_rcu_core_kthread+0x63/0x80
[   18.427337]  rcu_read_unlock_special+0x41/0x80
[   18.428212]  __rcu_read_unlock+0x48/0x50
[   18.428984]  cpuacct_charge+0x96/0xd0
[   18.429725]  ? cpuacct_charge+0x2e/0xd0
[   18.430463]  update_curr+0x112/0x240
[   18.431172]  enqueue_task_fair+0xa9/0x1220
[   18.432009]  ttwu_do_activate+0x49/0xa0
[   18.432741]  sched_ttwu_pending+0x75/0xa0
[   18.433583]  scheduler_ipi+0x53/0x150
[   18.434291]  reschedule_interrupt+0xf/0x20
[   18.435137]  </IRQ

I clearly need to audit the setting of ->rcu_read_unlock_special.

Again, the patch below is bad for expedited grace periods, so it is
experimental.

							Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index ca972b0b2467..d133fa837426 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -607,12 +607,9 @@ static void rcu_read_unlock_special(struct task_struct *t)
 	if (preempt_bh_were_disabled || irqs_were_disabled) {
 		WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false);
 		/* Need to defer quiescent state until everything is enabled. */
-		if (irqs_were_disabled) {
+		if (irqs_were_disabled && use_softirq) {
 			/* Enabling irqs does not reschedule, so... */
-			if (use_softirq)
-				raise_softirq_irqoff(RCU_SOFTIRQ);
-			else
-				invoke_rcu_core();
+			raise_softirq_irqoff(RCU_SOFTIRQ);
 		} else {
 			/* Enabling BH or preempt does reschedule, so... */
 			set_tsk_need_resched(current);


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-22  7:35                               ` Paul E. McKenney
@ 2019-03-22 12:43                                 ` Paul E. McKenney
  0 siblings, 0 replies; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-22 12:43 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Joel Fernandes, tglx, Mike Galbraith

On Fri, Mar 22, 2019 at 12:35:53AM -0700, Paul E. McKenney wrote:
> On Thu, Mar 21, 2019 at 04:32:44PM -0700, Paul E. McKenney wrote:
> > On Wed, Mar 20, 2019 at 04:46:01PM -0700, Paul E. McKenney wrote:
> > > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > > Running RCU out of softirq is a problem for some workloads that would
> > > > like to manage RCU core processing independently of other softirq
> > > > work, for example, setting kthread priority.  This commit therefore
> > > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > > with the earlier attempt to move RCU core processing to from softirq
> > > > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > > > rcuc kthreads at the RCU-boosting priority.
> > > > 
> > > > Reported-by: Thomas Gleixner <tglx@linutronix.de>
> > > > Tested-by: Mike Galbraith <efault@gmx.de>
> > > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > 
> > > Thank you!  I reverted v2 and applied this one with the same sort of
> > > update.  Testing is going well thus far aside from my failing to add
> > > the required "=0" after the rcutree.use_softirq.  I will probably not
> > > be the only one who will run afoul of this, so I updated the commit log
> > > and the documentation accordingly, as shown below.
> > 
> > And I took a look, please see updates/questions interspersed.
> > 
> > I didn't find anything substantive, but still I get hangs.  Which is
> > the normal situation.  ;-)
> > 
> > Will fire off more testing...
> 
> And despite my protestations about restrictions involving the scheduler
> and rcu_read_unlock(), with the patch below TREE01, TREE02, TREE03, and
> TREE09 pass an hour of rcutorture with rcutree.use_softirq=0.  Without
> this patch, seven-minute runs get hard hangs and this:
> 
> [   18.417315] BUG: spinlock recursion on CPU#5, rcu_torture_rea/763
> [   18.418624]  lock: 0xffff9d207eb61940, .magic: dead4ead, .owner: rcu_torture_rea/763, .owner_cpu: 5
> [   18.420418] CPU: 5 PID: 763 Comm: rcu_torture_rea Not tainted 5.1.0-rc1+ #1
> [   18.421786] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> [   18.423375] Call Trace:
> [   18.423880]  <IRQ>
> [   18.424284]  dump_stack+0x46/0x5b
> [   18.424953]  do_raw_spin_lock+0x8d/0x90
> [   18.425699]  try_to_wake_up+0x2cd/0x4f0
> [   18.426493]  invoke_rcu_core_kthread+0x63/0x80
> [   18.427337]  rcu_read_unlock_special+0x41/0x80
> [   18.428212]  __rcu_read_unlock+0x48/0x50
> [   18.428984]  cpuacct_charge+0x96/0xd0
> [   18.429725]  ? cpuacct_charge+0x2e/0xd0
> [   18.430463]  update_curr+0x112/0x240
> [   18.431172]  enqueue_task_fair+0xa9/0x1220
> [   18.432009]  ttwu_do_activate+0x49/0xa0
> [   18.432741]  sched_ttwu_pending+0x75/0xa0
> [   18.433583]  scheduler_ipi+0x53/0x150
> [   18.434291]  reschedule_interrupt+0xf/0x20
> [   18.435137]  </IRQ
> 
> I clearly need to audit the setting of ->rcu_read_unlock_special.
> 
> Again, the patch below is bad for expedited grace periods, so it is
> experimental.

And this was just me being slow.  A prior RCU read-side critical
section might have been preempted, but have had something (bh, irq,
preempt) disabled at rcu_read_unlock() time.  Then the task remains
queued until the next full-up quiescent state.

So this hack is what I have for the time being.  I will be looking
into it more...

							Thanx, Paul

> ------------------------------------------------------------------------
> 
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index ca972b0b2467..d133fa837426 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -607,12 +607,9 @@ static void rcu_read_unlock_special(struct task_struct *t)
>  	if (preempt_bh_were_disabled || irqs_were_disabled) {
>  		WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false);
>  		/* Need to defer quiescent state until everything is enabled. */
> -		if (irqs_were_disabled) {
> +		if (irqs_were_disabled && use_softirq) {
>  			/* Enabling irqs does not reschedule, so... */
> -			if (use_softirq)
> -				raise_softirq_irqoff(RCU_SOFTIRQ);
> -			else
> -				invoke_rcu_core();
> +			raise_softirq_irqoff(RCU_SOFTIRQ);
>  		} else {
>  			/* Enabling BH or preempt does reschedule, so... */
>  			set_tsk_need_resched(current);


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-21 23:32                             ` Paul E. McKenney
  2019-03-22  7:35                               ` Paul E. McKenney
@ 2019-03-22 13:42                               ` Joel Fernandes
  2019-03-22 14:58                                 ` Paul E. McKenney
  1 sibling, 1 reply; 44+ messages in thread
From: Joel Fernandes @ 2019-03-22 13:42 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Thu, Mar 21, 2019 at 04:32:44PM -0700, Paul E. McKenney wrote:
> On Wed, Mar 20, 2019 at 04:46:01PM -0700, Paul E. McKenney wrote:
> > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > Running RCU out of softirq is a problem for some workloads that would
> > > like to manage RCU core processing independently of other softirq
> > > work, for example, setting kthread priority.  This commit therefore
> > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > with the earlier attempt to move RCU core processing to from softirq
> > > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > > rcuc kthreads at the RCU-boosting priority.
> > > 
> > > Reported-by: Thomas Gleixner <tglx@linutronix.de>
> > > Tested-by: Mike Galbraith <efault@gmx.de>
> > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > 
> > Thank you!  I reverted v2 and applied this one with the same sort of
> > update.  Testing is going well thus far aside from my failing to add
> > the required "=0" after the rcutree.use_softirq.  I will probably not
> > be the only one who will run afoul of this, so I updated the commit log
> > and the documentation accordingly, as shown below.
> 
> And I took a look, please see updates/questions interspersed.
[snip]
> 
> > ------------------------------------------------------------------------
> > 
> > commit 5971694b716d34baa86f3f1dd44f8e587a17d8f0
> > Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > Date:   Wed Mar 20 22:13:33 2019 +0100
> > 
> >     rcu: Enable elimination of Tree-RCU softirq processing
> >     
> >     Some workloads need to change kthread priority for RCU core processing
> >     without affecting other softirq work.  This commit therefore introduces
> >     the rcutree.use_softirq kernel boot parameter, which moves the RCU core
> >     work from softirq to a per-CPU SCHED_OTHER kthread named rcuc.  Use of
> >     SCHED_OTHER approach avoids the scalability problems that appeared
> >     with the earlier attempt to move RCU core processing to from softirq
> >     to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> >     rcuc kthreads at the RCU-boosting priority.
> >     
> >     Note that rcutree.use_softirq=0 must be specified to move RCU core
> >     processing to the rcuc kthreads: rcutree.use_softirq=1 is the default.
> >     
> >     Reported-by: Thomas Gleixner <tglx@linutronix.de>
> >     Tested-by: Mike Galbraith <efault@gmx.de>
> >     Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> >     Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
> > 
> > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > index d377a2166b79..e2ffb1d9de03 100644
> > --- a/Documentation/admin-guide/kernel-parameters.txt
> > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > @@ -3672,6 +3672,12 @@
> >  			the propagation of recent CPU-hotplug changes up
> >  			the rcu_node combining tree.
> >  
> > +	rcutree.use_softirq=	[KNL]
> > +			If set to zero, move all RCU_SOFTIRQ processing to
> > +			per-CPU rcuc kthreads.  Defaults to a non-zero
> > +			value, meaning that RCU_SOFTIRQ is used by default.
> > +			Specify rcutree.use_softirq=0 to use rcuc kthreads.
> > +
> >  	rcutree.rcu_fanout_exact= [KNL]
> >  			Disable autobalancing of the rcu_node combining
> >  			tree.  This is used by rcutorture, and might
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index ec77ec336f58..6bd05c9918cc 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -51,6 +51,12 @@
> >  #include <linux/tick.h>
> >  #include <linux/sysrq.h>
> >  #include <linux/kprobes.h>
> > +#include <linux/gfp.h>
> > +#include <linux/oom.h>
> > +#include <linux/smpboot.h>
> > +#include <linux/jiffies.h>
> > +#include <linux/sched/isolation.h>
> > +#include "../time/tick-internal.h"
> >  
> >  #include "tree.h"
> >  #include "rcu.h"
> > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> >  /* Dump rcu_node combining tree at boot to verify correct setup. */
> >  static bool dump_tree;
> >  module_param(dump_tree, bool, 0444);
> > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> 
> I am replacing this with:
> 
> +/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
> 
> > +static bool use_softirq = 1;
> > +module_param(use_softirq, bool, 0444);
> >  /* Control rcu_node-tree auto-balancing at boot time. */
> >  static bool rcu_fanout_exact;
> >  module_param(rcu_fanout_exact, bool, 0444);
> > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> >  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> >  
> >  /* Perform RCU core processing work for the current CPU.  */
> > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > +static __latent_entropy void rcu_core(void)
> >  {
> >  	unsigned long flags;
> >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> >  	trace_rcu_utilization(TPS("End RCU core"));
> >  }
> >  
> > +static void rcu_core_si(struct softirq_action *h)
> > +{
> > +	rcu_core();
> > +}
> > +
> > +static void rcu_wake_cond(struct task_struct *t, int status)
> > +{
> > +	/*
> > +	 * If the thread is yielding, only wake it when this
> > +	 * is invoked from idle
> > +	 */
> > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > +		wake_up_process(t);
> > +}
> > +
> > +static void invoke_rcu_core_kthread(void)
> > +{
> > +	struct task_struct *t;
> > +	unsigned long flags;
> > +
> > +	local_irq_save(flags);
> > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > +	if (t != NULL && t != current)
> > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > +	local_irq_restore(flags);
> > +}
> > +
> >  /*
> >   * Schedule RCU callback invocation.  If the running implementation of RCU
> >   * does not support RCU priority boosting, just do a direct call, otherwise
> > @@ -2306,18 +2343,94 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> >  {
> >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> >  		return;
> > -	if (likely(!rcu_state.boost)) {
> > -		rcu_do_batch(rdp);
> > -		return;
> > -	}
> > -	invoke_rcu_callbacks_kthread();
> > +	if (rcu_state.boost || !use_softirq)
> > +		invoke_rcu_core_kthread();
> > +	rcu_do_batch(rdp);
> >  }
> >  
> > +/*
> > + * Wake up this CPU's rcuc kthread to do RCU core processing.
> > + */
> >  static void invoke_rcu_core(void)
> >  {
> > -	if (cpu_online(smp_processor_id()))
> > +	if (!cpu_online(smp_processor_id()))
> > +		return;
> > +	if (use_softirq)
> >  		raise_softirq(RCU_SOFTIRQ);
> > +	else
> > +		invoke_rcu_core_kthread();
> > +}
> > +
> > +static void rcu_cpu_kthread_park(unsigned int cpu)
> > +{
> > +	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> > +}
> > +
> > +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> > +{
> > +	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> > +}
> > +
> > +/*
> > + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> > + * the RCU softirq used in configurations of RCU that do not support RCU
> > + * priority boosting.
> > + */
> > +static void rcu_cpu_kthread(unsigned int cpu)
> > +{
> > +	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> > +	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> > +	int spincnt;
> > +
> > +	for (spincnt = 0; spincnt < 10; spincnt++) {
> > +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> > +		local_bh_disable();
> > +		*statusp = RCU_KTHREAD_RUNNING;
> > +		local_irq_disable();
> > +		work = *workp;
> > +		*workp = 0;
> > +		local_irq_enable();
> > +		if (work)
> > +			rcu_core();
> > +		local_bh_enable();
> > +		if (*workp == 0) {
> > +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> > +			*statusp = RCU_KTHREAD_WAITING;
> > +			return;
> > +		}
> > +	}
> > +	*statusp = RCU_KTHREAD_YIELDING;
> > +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> > +	schedule_timeout_interruptible(2);
> > +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> > +	*statusp = RCU_KTHREAD_WAITING;
> > +}
> > +
> > +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> > +	.store			= &rcu_data.rcu_cpu_kthread_task,
> > +	.thread_should_run	= rcu_cpu_kthread_should_run,
> > +	.thread_fn		= rcu_cpu_kthread,
> > +	.thread_comm		= "rcuc/%u",
> > +	.setup			= rcu_cpu_kthread_setup,
> > +	.park			= rcu_cpu_kthread_park,
> > +};
> > +
> > +/*
> > + * Spawn per-CPU RCU core processing kthreads.
> > + */
> > +static int __init rcu_spawn_core_kthreads(void)
> > +{
> > +	int cpu;
> > +
> > +	for_each_possible_cpu(cpu)
> > +		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> > +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
> > +		return 0;
> > +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
> > +		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
> > +	return 0;
> >  }
> > +early_initcall(rcu_spawn_core_kthreads);
> >  
> >  /*
> >   * Handle any core-RCU processing required by a call_rcu() invocation.
> > @@ -3355,7 +3468,8 @@ void __init rcu_init(void)
> >  	rcu_init_one();
> >  	if (dump_tree)
> >  		rcu_dump_rcu_node_tree();
> > -	open_softirq(RCU_SOFTIRQ, rcu_core);
> > +	if (use_softirq)
> > +		open_softirq(RCU_SOFTIRQ, rcu_core_si);
> >  
> >  	/*
> >  	 * We don't need protection against CPU-hotplug here because
> > diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> > index e253d11af3c4..a1a72a1ecb02 100644
> > --- a/kernel/rcu/tree.h
> > +++ b/kernel/rcu/tree.h
> > @@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
> >  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
> >  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
> >  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> > -static void invoke_rcu_callbacks_kthread(void);
> >  static bool rcu_is_callbacks_kthread(void);
> > +static void rcu_cpu_kthread_setup(unsigned int cpu);
> >  static void __init rcu_spawn_boost_kthreads(void);
> >  static void rcu_prepare_kthreads(int cpu);
> >  static void rcu_cleanup_after_idle(void);
> > diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> > index f46b4af96ab9..b807204ffd83 100644
> > --- a/kernel/rcu/tree_plugin.h
> > +++ b/kernel/rcu/tree_plugin.h
> > @@ -11,29 +11,7 @@
> >   *	   Paul E. McKenney <paulmck@linux.ibm.com>
> >   */
> >  
> > -#include <linux/delay.h>
> > -#include <linux/gfp.h>
> > -#include <linux/oom.h>
> > -#include <linux/sched/debug.h>
> > -#include <linux/smpboot.h>
> > -#include <linux/sched/isolation.h>
> > -#include <uapi/linux/sched/types.h>
> > -#include "../time/tick-internal.h"
> > -
> > -#ifdef CONFIG_RCU_BOOST
> >  #include "../locking/rtmutex_common.h"
> > -#else /* #ifdef CONFIG_RCU_BOOST */
> > -
> > -/*
> > - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> > - * all uses are in dead code.  Provide a definition to keep the compiler
> > - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> > - * This probably needs to be excluded from -rt builds.
> > - */
> > -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> > -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> > -
> > -#endif /* #else #ifdef CONFIG_RCU_BOOST */
> >  
> >  #ifdef CONFIG_RCU_NOCB_CPU
> >  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> > @@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
> >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
> >  	if (gp_cleanup_delay)
> >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
> > +	if (!use_softirq)
> > +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
> >  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
> >  		pr_info("\tRCU debug extended QS entry/exit.\n");
> >  	rcupdate_announce_bootup_oddness();
> > @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
> >  		/* Need to defer quiescent state until everything is enabled. */
> >  		if (irqs_were_disabled) {
> >  			/* Enabling irqs does not reschedule, so... */
> > -			raise_softirq_irqoff(RCU_SOFTIRQ);
> > +			if (!use_softirq)
> > +				raise_softirq_irqoff(RCU_SOFTIRQ);
> > +			else
> > +				invoke_rcu_core();
> 
> This can result in deadlock.  This happens when the scheduler invokes
> rcu_read_unlock() with one of the rq or pi locks held, which means that
> interrupts are disabled.  And it also means that the wakeup done in
> invoke_rcu_core() could go after the same rq or pi lock.
> 
> What we really need here is some way to make soemthing happen on this
> CPU just after interrupts are re-enabled.  Here are the options I see:
> 
> 1.	Do set_tsk_need_resched() and set_preempt_need_resched(),
> 	just like in the "else" clause below.  This sort of works, but
> 	relies on some later interrupt or similar to get things started.
> 	This is just fine for normal grace periods, but not so much for
> 	expedited grace periods.
> 
> 2.	IPI some other CPU and have it IPI us back.  Not such a good plan
> 	when running an SMP kernel on a single CPU.
> 
> 3.	Have a "stub" RCU_SOFTIRQ that contains only the following:
> 
> 	/* Report any deferred quiescent states if preemption enabled. */
> 	if (!(preempt_count() & PREEMPT_MASK)) {
> 		rcu_preempt_deferred_qs(current);
> 	} else if (rcu_preempt_need_deferred_qs(current)) {
> 		set_tsk_need_resched(current);
> 		set_preempt_need_resched();
> 	}
> 
> 4.	Except that raise_softirq_irqoff() could potentially have this
> 	same problem if rcu_read_unlock() is invoked at process level
> 	from the scheduler with either rq or pi locks held.  :-/
> 
> 	Which raises the question "why aren't I seeing hangs and
> 	lockdep splats?"

Interesting, could it be you're not seeing a hang in the regular case,
because enqueuing ksoftirqd on the same CPU as where the rcu_read_unlock is
happening is a rare event? First, ksoftirqd has to even be awakened in the
first place. On the other hand, with the new code the thread is always awaked
and is more likely to run into the issue you found?

The lockdep splats should be a more common occurence though IMO. If you could
let me know which RCU config is hanging, I can try to debug this at my end as
well.

> Assuming that this really is a problem, perhaps I need to do something
> like the following:
> 
> 		if (in_interrupt()) {
> 			/* In interrupt, so catch softirq on the way out. */
> 			if (use_softirq)
> 				raise_softirq_irqoff(RCU_SOFTIRQ);
> 			else
> 				invoke_rcu_core();
> 		} else {
> 			/* Force resschedule, perhaps quite a bit later. */
> 			set_tsk_need_resched(current);
> 			set_preempt_need_resched();
> 		}
> 
> This can delay the quiescent state when rcu_read_unlock() is invoked from
> process level with interrupts disabled.  I suppose I could post a very
> short-timeout hrtimer, but would that be lightweight enough?  I cannot
> use self-targeted smp_call_function_single() because it wants interrupts
> enabled and because it will just do a direct call, which won't help here.
> I could use a timer, though the latency is larger than would be good.

I was thinking for some time, we should have statistics counters for this
sort of thing. So we run rcutorture and then sample the stats counters from
/proc or something to see how long all of these things took (longest grace
period etc). Would that be something of interest to make this task easier?

> Also, having lots of non-migratable timers might be considered unfriendly,
> though they shouldn't be -that- heavily utilized.  Yet, anyway...
> I could try adding logic to local_irq_enable() and local_irq_restore(),
> but that probably wouldn't go over all that well.  Besides, sometimes
> interrupt enabling happens in assembly language.
> 
> It is quite likely that delays to expedited grace periods wouldn't
> happen all that often.  First, the grace period has to start while
> the CPU itself (not some blocked task) is in an RCU read-side critical
> section, second, that critical section cannot be preempted, and third
> the rcu_read_unlock() must run with interrupts disabled.
> 
> Ah, but that sequence of events is not supposed to happen with the
> scheduler lock!
> 
> From Documentation/RCU/Design/Requirements/Requirements.html:
> 
> 	It is forbidden to hold any of scheduler's runqueue or
> 	priority-inheritance spinlocks across an rcu_read_unlock()
> 	unless interrupts have been disabled across the entire RCU
> 	read-side critical section, that is, up to and including the
> 	matching rcu_read_lock().
> 
> Here are the reasons we even get to rcu_read_unlock_special():
> 
> 1.	The just-ended RCU read-side critical section was preempted.
> 	This clearly cannot happen if interrupts are disabled across
> 	the entire critical section.
> 
> 2.	The scheduling-clock interrupt noticed that this critical
> 	section has been taking a long time.  But scheduling-clock
> 	interrupts also cannot happen while interrupts are disabled.
> 
> 3.	An expedited grace periods started during this critical
> 	section.  But if that happened, the corresponding IPI would
> 	have waited until this CPU enabled interrupts, so this
> 	cannot happen either.
> 
> So the call to invoke_rcu_core() should be OK after all.
> 
> Which is a bit of a disappointment, given that I am still seeing hangs!

Oh ok, discount whatever I just said then ;-) Indeed I remember this
requirement too now. Your neat documentation skills are indeed life saving :D

> I might replace this invoke_rcu_core() with set_tsk_need_resched() and
> set_preempt_need_resched() to see if that gets rid of the hangs, but
> first...

Could we use the NMI watchdog to dump the stack at the time of the hang? May
be a deadlock will present on the stack (I think its config is called
HARDLOCKUP_DETECTOR or something).

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-22 13:42                               ` Joel Fernandes
@ 2019-03-22 14:58                                 ` Paul E. McKenney
  2019-03-22 15:50                                   ` Joel Fernandes
  0 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-22 14:58 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Fri, Mar 22, 2019 at 09:42:07AM -0400, Joel Fernandes wrote:
> On Thu, Mar 21, 2019 at 04:32:44PM -0700, Paul E. McKenney wrote:
> > On Wed, Mar 20, 2019 at 04:46:01PM -0700, Paul E. McKenney wrote:
> > > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > > Running RCU out of softirq is a problem for some workloads that would
> > > > like to manage RCU core processing independently of other softirq
> > > > work, for example, setting kthread priority.  This commit therefore
> > > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > > with the earlier attempt to move RCU core processing to from softirq
> > > > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > > > rcuc kthreads at the RCU-boosting priority.
> > > > 
> > > > Reported-by: Thomas Gleixner <tglx@linutronix.de>
> > > > Tested-by: Mike Galbraith <efault@gmx.de>
> > > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > 
> > > Thank you!  I reverted v2 and applied this one with the same sort of
> > > update.  Testing is going well thus far aside from my failing to add
> > > the required "=0" after the rcutree.use_softirq.  I will probably not
> > > be the only one who will run afoul of this, so I updated the commit log
> > > and the documentation accordingly, as shown below.
> > 
> > And I took a look, please see updates/questions interspersed.
> [snip]
> > 
> > > ------------------------------------------------------------------------
> > > 
> > > commit 5971694b716d34baa86f3f1dd44f8e587a17d8f0
> > > Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > Date:   Wed Mar 20 22:13:33 2019 +0100
> > > 
> > >     rcu: Enable elimination of Tree-RCU softirq processing
> > >     
> > >     Some workloads need to change kthread priority for RCU core processing
> > >     without affecting other softirq work.  This commit therefore introduces
> > >     the rcutree.use_softirq kernel boot parameter, which moves the RCU core
> > >     work from softirq to a per-CPU SCHED_OTHER kthread named rcuc.  Use of
> > >     SCHED_OTHER approach avoids the scalability problems that appeared
> > >     with the earlier attempt to move RCU core processing to from softirq
> > >     to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > >     rcuc kthreads at the RCU-boosting priority.
> > >     
> > >     Note that rcutree.use_softirq=0 must be specified to move RCU core
> > >     processing to the rcuc kthreads: rcutree.use_softirq=1 is the default.
> > >     
> > >     Reported-by: Thomas Gleixner <tglx@linutronix.de>
> > >     Tested-by: Mike Galbraith <efault@gmx.de>
> > >     Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > >     Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
> > > 
> > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > > index d377a2166b79..e2ffb1d9de03 100644
> > > --- a/Documentation/admin-guide/kernel-parameters.txt
> > > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > > @@ -3672,6 +3672,12 @@
> > >  			the propagation of recent CPU-hotplug changes up
> > >  			the rcu_node combining tree.
> > >  
> > > +	rcutree.use_softirq=	[KNL]
> > > +			If set to zero, move all RCU_SOFTIRQ processing to
> > > +			per-CPU rcuc kthreads.  Defaults to a non-zero
> > > +			value, meaning that RCU_SOFTIRQ is used by default.
> > > +			Specify rcutree.use_softirq=0 to use rcuc kthreads.
> > > +
> > >  	rcutree.rcu_fanout_exact= [KNL]
> > >  			Disable autobalancing of the rcu_node combining
> > >  			tree.  This is used by rcutorture, and might
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index ec77ec336f58..6bd05c9918cc 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -51,6 +51,12 @@
> > >  #include <linux/tick.h>
> > >  #include <linux/sysrq.h>
> > >  #include <linux/kprobes.h>
> > > +#include <linux/gfp.h>
> > > +#include <linux/oom.h>
> > > +#include <linux/smpboot.h>
> > > +#include <linux/jiffies.h>
> > > +#include <linux/sched/isolation.h>
> > > +#include "../time/tick-internal.h"
> > >  
> > >  #include "tree.h"
> > >  #include "rcu.h"
> > > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> > >  /* Dump rcu_node combining tree at boot to verify correct setup. */
> > >  static bool dump_tree;
> > >  module_param(dump_tree, bool, 0444);
> > > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> > 
> > I am replacing this with:
> > 
> > +/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
> > 
> > > +static bool use_softirq = 1;
> > > +module_param(use_softirq, bool, 0444);
> > >  /* Control rcu_node-tree auto-balancing at boot time. */
> > >  static bool rcu_fanout_exact;
> > >  module_param(rcu_fanout_exact, bool, 0444);
> > > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> > >  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> > >  
> > >  /* Perform RCU core processing work for the current CPU.  */
> > > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > +static __latent_entropy void rcu_core(void)
> > >  {
> > >  	unsigned long flags;
> > >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> > >  	trace_rcu_utilization(TPS("End RCU core"));
> > >  }
> > >  
> > > +static void rcu_core_si(struct softirq_action *h)
> > > +{
> > > +	rcu_core();
> > > +}
> > > +
> > > +static void rcu_wake_cond(struct task_struct *t, int status)
> > > +{
> > > +	/*
> > > +	 * If the thread is yielding, only wake it when this
> > > +	 * is invoked from idle
> > > +	 */
> > > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > > +		wake_up_process(t);
> > > +}
> > > +
> > > +static void invoke_rcu_core_kthread(void)
> > > +{
> > > +	struct task_struct *t;
> > > +	unsigned long flags;
> > > +
> > > +	local_irq_save(flags);
> > > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > > +	if (t != NULL && t != current)
> > > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > > +	local_irq_restore(flags);
> > > +}
> > > +
> > >  /*
> > >   * Schedule RCU callback invocation.  If the running implementation of RCU
> > >   * does not support RCU priority boosting, just do a direct call, otherwise
> > > @@ -2306,18 +2343,94 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > >  {
> > >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > >  		return;
> > > -	if (likely(!rcu_state.boost)) {
> > > -		rcu_do_batch(rdp);
> > > -		return;
> > > -	}
> > > -	invoke_rcu_callbacks_kthread();
> > > +	if (rcu_state.boost || !use_softirq)
> > > +		invoke_rcu_core_kthread();
> > > +	rcu_do_batch(rdp);
> > >  }
> > >  
> > > +/*
> > > + * Wake up this CPU's rcuc kthread to do RCU core processing.
> > > + */
> > >  static void invoke_rcu_core(void)
> > >  {
> > > -	if (cpu_online(smp_processor_id()))
> > > +	if (!cpu_online(smp_processor_id()))
> > > +		return;
> > > +	if (use_softirq)
> > >  		raise_softirq(RCU_SOFTIRQ);
> > > +	else
> > > +		invoke_rcu_core_kthread();
> > > +}
> > > +
> > > +static void rcu_cpu_kthread_park(unsigned int cpu)
> > > +{
> > > +	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> > > +}
> > > +
> > > +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> > > +{
> > > +	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> > > +}
> > > +
> > > +/*
> > > + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> > > + * the RCU softirq used in configurations of RCU that do not support RCU
> > > + * priority boosting.
> > > + */
> > > +static void rcu_cpu_kthread(unsigned int cpu)
> > > +{
> > > +	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> > > +	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> > > +	int spincnt;
> > > +
> > > +	for (spincnt = 0; spincnt < 10; spincnt++) {
> > > +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> > > +		local_bh_disable();
> > > +		*statusp = RCU_KTHREAD_RUNNING;
> > > +		local_irq_disable();
> > > +		work = *workp;
> > > +		*workp = 0;
> > > +		local_irq_enable();
> > > +		if (work)
> > > +			rcu_core();
> > > +		local_bh_enable();
> > > +		if (*workp == 0) {
> > > +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> > > +			*statusp = RCU_KTHREAD_WAITING;
> > > +			return;
> > > +		}
> > > +	}
> > > +	*statusp = RCU_KTHREAD_YIELDING;
> > > +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> > > +	schedule_timeout_interruptible(2);
> > > +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> > > +	*statusp = RCU_KTHREAD_WAITING;
> > > +}
> > > +
> > > +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> > > +	.store			= &rcu_data.rcu_cpu_kthread_task,
> > > +	.thread_should_run	= rcu_cpu_kthread_should_run,
> > > +	.thread_fn		= rcu_cpu_kthread,
> > > +	.thread_comm		= "rcuc/%u",
> > > +	.setup			= rcu_cpu_kthread_setup,
> > > +	.park			= rcu_cpu_kthread_park,
> > > +};
> > > +
> > > +/*
> > > + * Spawn per-CPU RCU core processing kthreads.
> > > + */
> > > +static int __init rcu_spawn_core_kthreads(void)
> > > +{
> > > +	int cpu;
> > > +
> > > +	for_each_possible_cpu(cpu)
> > > +		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> > > +	if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
> > > +		return 0;
> > > +	WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
> > > +		  "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
> > > +	return 0;
> > >  }
> > > +early_initcall(rcu_spawn_core_kthreads);
> > >  
> > >  /*
> > >   * Handle any core-RCU processing required by a call_rcu() invocation.
> > > @@ -3355,7 +3468,8 @@ void __init rcu_init(void)
> > >  	rcu_init_one();
> > >  	if (dump_tree)
> > >  		rcu_dump_rcu_node_tree();
> > > -	open_softirq(RCU_SOFTIRQ, rcu_core);
> > > +	if (use_softirq)
> > > +		open_softirq(RCU_SOFTIRQ, rcu_core_si);
> > >  
> > >  	/*
> > >  	 * We don't need protection against CPU-hotplug here because
> > > diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> > > index e253d11af3c4..a1a72a1ecb02 100644
> > > --- a/kernel/rcu/tree.h
> > > +++ b/kernel/rcu/tree.h
> > > @@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
> > >  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
> > >  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
> > >  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> > > -static void invoke_rcu_callbacks_kthread(void);
> > >  static bool rcu_is_callbacks_kthread(void);
> > > +static void rcu_cpu_kthread_setup(unsigned int cpu);
> > >  static void __init rcu_spawn_boost_kthreads(void);
> > >  static void rcu_prepare_kthreads(int cpu);
> > >  static void rcu_cleanup_after_idle(void);
> > > diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> > > index f46b4af96ab9..b807204ffd83 100644
> > > --- a/kernel/rcu/tree_plugin.h
> > > +++ b/kernel/rcu/tree_plugin.h
> > > @@ -11,29 +11,7 @@
> > >   *	   Paul E. McKenney <paulmck@linux.ibm.com>
> > >   */
> > >  
> > > -#include <linux/delay.h>
> > > -#include <linux/gfp.h>
> > > -#include <linux/oom.h>
> > > -#include <linux/sched/debug.h>
> > > -#include <linux/smpboot.h>
> > > -#include <linux/sched/isolation.h>
> > > -#include <uapi/linux/sched/types.h>
> > > -#include "../time/tick-internal.h"
> > > -
> > > -#ifdef CONFIG_RCU_BOOST
> > >  #include "../locking/rtmutex_common.h"
> > > -#else /* #ifdef CONFIG_RCU_BOOST */
> > > -
> > > -/*
> > > - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> > > - * all uses are in dead code.  Provide a definition to keep the compiler
> > > - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> > > - * This probably needs to be excluded from -rt builds.
> > > - */
> > > -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> > > -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> > > -
> > > -#endif /* #else #ifdef CONFIG_RCU_BOOST */
> > >  
> > >  #ifdef CONFIG_RCU_NOCB_CPU
> > >  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> > > @@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
> > >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
> > >  	if (gp_cleanup_delay)
> > >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
> > > +	if (!use_softirq)
> > > +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
> > >  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
> > >  		pr_info("\tRCU debug extended QS entry/exit.\n");
> > >  	rcupdate_announce_bootup_oddness();
> > > @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
> > >  		/* Need to defer quiescent state until everything is enabled. */
> > >  		if (irqs_were_disabled) {
> > >  			/* Enabling irqs does not reschedule, so... */
> > > -			raise_softirq_irqoff(RCU_SOFTIRQ);
> > > +			if (!use_softirq)
> > > +				raise_softirq_irqoff(RCU_SOFTIRQ);
> > > +			else
> > > +				invoke_rcu_core();
> > 
> > This can result in deadlock.  This happens when the scheduler invokes
> > rcu_read_unlock() with one of the rq or pi locks held, which means that
> > interrupts are disabled.  And it also means that the wakeup done in
> > invoke_rcu_core() could go after the same rq or pi lock.
> > 
> > What we really need here is some way to make soemthing happen on this
> > CPU just after interrupts are re-enabled.  Here are the options I see:
> > 
> > 1.	Do set_tsk_need_resched() and set_preempt_need_resched(),
> > 	just like in the "else" clause below.  This sort of works, but
> > 	relies on some later interrupt or similar to get things started.
> > 	This is just fine for normal grace periods, but not so much for
> > 	expedited grace periods.
> > 
> > 2.	IPI some other CPU and have it IPI us back.  Not such a good plan
> > 	when running an SMP kernel on a single CPU.
> > 
> > 3.	Have a "stub" RCU_SOFTIRQ that contains only the following:
> > 
> > 	/* Report any deferred quiescent states if preemption enabled. */
> > 	if (!(preempt_count() & PREEMPT_MASK)) {
> > 		rcu_preempt_deferred_qs(current);
> > 	} else if (rcu_preempt_need_deferred_qs(current)) {
> > 		set_tsk_need_resched(current);
> > 		set_preempt_need_resched();
> > 	}
> > 
> > 4.	Except that raise_softirq_irqoff() could potentially have this
> > 	same problem if rcu_read_unlock() is invoked at process level
> > 	from the scheduler with either rq or pi locks held.  :-/
> > 
> > 	Which raises the question "why aren't I seeing hangs and
> > 	lockdep splats?"
> 
> Interesting, could it be you're not seeing a hang in the regular case,
> because enqueuing ksoftirqd on the same CPU as where the rcu_read_unlock is
> happening is a rare event? First, ksoftirqd has to even be awakened in the
> first place. On the other hand, with the new code the thread is always awaked
> and is more likely to run into the issue you found?

No, in many cases, including the self-deadlock that showed up last night,
raise_softirq_irqoff() will simply set a bit in a per-CPU variable.
One case where this happens is when called from an interrupt handler.

> The lockdep splats should be a more common occurence though IMO. If you could
> let me know which RCU config is hanging, I can try to debug this at my end as
> well.

TREE01, TREE02, TREE03, and TREE09.  I would guess that TREE08 would also
do the same thing, given that it also sets PREEMPT=y and tests Tree RCU.

Please see the patch I posted and tested overnight.  I suspect that there
is a better fix, but this does at least seem to suppress the error.

> > Assuming that this really is a problem, perhaps I need to do something
> > like the following:
> > 
> > 		if (in_interrupt()) {
> > 			/* In interrupt, so catch softirq on the way out. */
> > 			if (use_softirq)
> > 				raise_softirq_irqoff(RCU_SOFTIRQ);
> > 			else
> > 				invoke_rcu_core();
> > 		} else {
> > 			/* Force resschedule, perhaps quite a bit later. */
> > 			set_tsk_need_resched(current);
> > 			set_preempt_need_resched();
> > 		}
> > 
> > This can delay the quiescent state when rcu_read_unlock() is invoked from
> > process level with interrupts disabled.  I suppose I could post a very
> > short-timeout hrtimer, but would that be lightweight enough?  I cannot
> > use self-targeted smp_call_function_single() because it wants interrupts
> > enabled and because it will just do a direct call, which won't help here.
> > I could use a timer, though the latency is larger than would be good.
> 
> I was thinking for some time, we should have statistics counters for this
> sort of thing. So we run rcutorture and then sample the stats counters from
> /proc or something to see how long all of these things took (longest grace
> period etc). Would that be something of interest to make this task easier?
> 
> > Also, having lots of non-migratable timers might be considered unfriendly,
> > though they shouldn't be -that- heavily utilized.  Yet, anyway...
> > I could try adding logic to local_irq_enable() and local_irq_restore(),
> > but that probably wouldn't go over all that well.  Besides, sometimes
> > interrupt enabling happens in assembly language.
> > 
> > It is quite likely that delays to expedited grace periods wouldn't
> > happen all that often.  First, the grace period has to start while
> > the CPU itself (not some blocked task) is in an RCU read-side critical
> > section, second, that critical section cannot be preempted, and third
> > the rcu_read_unlock() must run with interrupts disabled.
> > 
> > Ah, but that sequence of events is not supposed to happen with the
> > scheduler lock!
> > 
> > From Documentation/RCU/Design/Requirements/Requirements.html:
> > 
> > 	It is forbidden to hold any of scheduler's runqueue or
> > 	priority-inheritance spinlocks across an rcu_read_unlock()
> > 	unless interrupts have been disabled across the entire RCU
> > 	read-side critical section, that is, up to and including the
> > 	matching rcu_read_lock().
> > 
> > Here are the reasons we even get to rcu_read_unlock_special():
> > 
> > 1.	The just-ended RCU read-side critical section was preempted.
> > 	This clearly cannot happen if interrupts are disabled across
> > 	the entire critical section.
> > 
> > 2.	The scheduling-clock interrupt noticed that this critical
> > 	section has been taking a long time.  But scheduling-clock
> > 	interrupts also cannot happen while interrupts are disabled.
> > 
> > 3.	An expedited grace periods started during this critical
> > 	section.  But if that happened, the corresponding IPI would
> > 	have waited until this CPU enabled interrupts, so this
> > 	cannot happen either.
> > 
> > So the call to invoke_rcu_core() should be OK after all.
> > 
> > Which is a bit of a disappointment, given that I am still seeing hangs!
> 
> Oh ok, discount whatever I just said then ;-) Indeed I remember this
> requirement too now. Your neat documentation skills are indeed life saving :D

No, this did turn out to be the problem area.  Or at least one of the
problem areas.  Again, see my earlier email.

> > I might replace this invoke_rcu_core() with set_tsk_need_resched() and
> > set_preempt_need_resched() to see if that gets rid of the hangs, but
> > first...
> 
> Could we use the NMI watchdog to dump the stack at the time of the hang? May
> be a deadlock will present on the stack (I think its config is called
> HARDLOCKUP_DETECTOR or something).

Another approach would be to instrument the locking code that notices
the recursive acquisition.  Or to run lockdep...  Because none of the
failing scenarios enable lockdep!  ;-)

							Thanx, Paul


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-22 14:58                                 ` Paul E. McKenney
@ 2019-03-22 15:50                                   ` Joel Fernandes
  2019-03-22 16:26                                     ` Paul E. McKenney
  0 siblings, 1 reply; 44+ messages in thread
From: Joel Fernandes @ 2019-03-22 15:50 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Fri, Mar 22, 2019 at 07:58:23AM -0700, Paul E. McKenney wrote:
[snip]
> > > >  #ifdef CONFIG_RCU_NOCB_CPU
> > > >  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> > > > @@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
> > > >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
> > > >  	if (gp_cleanup_delay)
> > > >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
> > > > +	if (!use_softirq)
> > > > +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
> > > >  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
> > > >  		pr_info("\tRCU debug extended QS entry/exit.\n");
> > > >  	rcupdate_announce_bootup_oddness();
> > > > @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
> > > >  		/* Need to defer quiescent state until everything is enabled. */
> > > >  		if (irqs_were_disabled) {
> > > >  			/* Enabling irqs does not reschedule, so... */
> > > > -			raise_softirq_irqoff(RCU_SOFTIRQ);
> > > > +			if (!use_softirq)
> > > > +				raise_softirq_irqoff(RCU_SOFTIRQ);
> > > > +			else
> > > > +				invoke_rcu_core();
> > > 
> > > This can result in deadlock.  This happens when the scheduler invokes
> > > rcu_read_unlock() with one of the rq or pi locks held, which means that
> > > interrupts are disabled.  And it also means that the wakeup done in
> > > invoke_rcu_core() could go after the same rq or pi lock.
> > > 
> > > What we really need here is some way to make soemthing happen on this
> > > CPU just after interrupts are re-enabled.  Here are the options I see:
> > > 
> > > 1.	Do set_tsk_need_resched() and set_preempt_need_resched(),
> > > 	just like in the "else" clause below.  This sort of works, but
> > > 	relies on some later interrupt or similar to get things started.
> > > 	This is just fine for normal grace periods, but not so much for
> > > 	expedited grace periods.
> > > 
> > > 2.	IPI some other CPU and have it IPI us back.  Not such a good plan
> > > 	when running an SMP kernel on a single CPU.
> > > 
> > > 3.	Have a "stub" RCU_SOFTIRQ that contains only the following:
> > > 
> > > 	/* Report any deferred quiescent states if preemption enabled. */
> > > 	if (!(preempt_count() & PREEMPT_MASK)) {
> > > 		rcu_preempt_deferred_qs(current);
> > > 	} else if (rcu_preempt_need_deferred_qs(current)) {
> > > 		set_tsk_need_resched(current);
> > > 		set_preempt_need_resched();
> > > 	}
> > > 
> > > 4.	Except that raise_softirq_irqoff() could potentially have this
> > > 	same problem if rcu_read_unlock() is invoked at process level
> > > 	from the scheduler with either rq or pi locks held.  :-/
> > > 
> > > 	Which raises the question "why aren't I seeing hangs and
> > > 	lockdep splats?"
> > 
> > Interesting, could it be you're not seeing a hang in the regular case,
> > because enqueuing ksoftirqd on the same CPU as where the rcu_read_unlock is
> > happening is a rare event? First, ksoftirqd has to even be awakened in the
> > first place. On the other hand, with the new code the thread is always awaked
> > and is more likely to run into the issue you found?
> 
> No, in many cases, including the self-deadlock that showed up last night,
> raise_softirq_irqoff() will simply set a bit in a per-CPU variable.
> One case where this happens is when called from an interrupt handler.

I think we are saying the same thing, in some cases ksoftirqd will be
awakened and some case it will not. I will go through all scenarios to
convince myself it is safe, if I find some issue I will let you know.

> > The lockdep splats should be a more common occurence though IMO. If you could
> > let me know which RCU config is hanging, I can try to debug this at my end as
> > well.
> 
> TREE01, TREE02, TREE03, and TREE09.  I would guess that TREE08 would also
> do the same thing, given that it also sets PREEMPT=y and tests Tree RCU.
> 
> Please see the patch I posted and tested overnight.  I suspect that there
> is a better fix, but this does at least seem to suppress the error.

Ok, will do.

> > > Also, having lots of non-migratable timers might be considered unfriendly,
> > > though they shouldn't be -that- heavily utilized.  Yet, anyway...
> > > I could try adding logic to local_irq_enable() and local_irq_restore(),
> > > but that probably wouldn't go over all that well.  Besides, sometimes
> > > interrupt enabling happens in assembly language.
> > > 
> > > It is quite likely that delays to expedited grace periods wouldn't
> > > happen all that often.  First, the grace period has to start while
> > > the CPU itself (not some blocked task) is in an RCU read-side critical
> > > section, second, that critical section cannot be preempted, and third
> > > the rcu_read_unlock() must run with interrupts disabled.
> > > 
> > > Ah, but that sequence of events is not supposed to happen with the
> > > scheduler lock!
> > > 
> > > From Documentation/RCU/Design/Requirements/Requirements.html:
> > > 
> > > 	It is forbidden to hold any of scheduler's runqueue or
> > > 	priority-inheritance spinlocks across an rcu_read_unlock()
> > > 	unless interrupts have been disabled across the entire RCU
> > > 	read-side critical section, that is, up to and including the
> > > 	matching rcu_read_lock().
> > > 
> > > Here are the reasons we even get to rcu_read_unlock_special():
> > > 
> > > 1.	The just-ended RCU read-side critical section was preempted.
> > > 	This clearly cannot happen if interrupts are disabled across
> > > 	the entire critical section.
> > > 
> > > 2.	The scheduling-clock interrupt noticed that this critical
> > > 	section has been taking a long time.  But scheduling-clock
> > > 	interrupts also cannot happen while interrupts are disabled.
> > > 
> > > 3.	An expedited grace periods started during this critical
> > > 	section.  But if that happened, the corresponding IPI would
> > > 	have waited until this CPU enabled interrupts, so this
> > > 	cannot happen either.
> > > 
> > > So the call to invoke_rcu_core() should be OK after all.
> > > 
> > > Which is a bit of a disappointment, given that I am still seeing hangs!
> > 
> > Oh ok, discount whatever I just said then ;-) Indeed I remember this
> > requirement too now. Your neat documentation skills are indeed life saving :D
> 
> No, this did turn out to be the problem area.  Or at least one of the
> problem areas.  Again, see my earlier email.

Ok. Too many emails so I got confused :-D. I also forgot which version of the
patch are we testing since I don't think an updated one was posted. But I
will refer to your last night diff dig out the base patch from your git tree,
no problem.

> > > I might replace this invoke_rcu_core() with set_tsk_need_resched() and
> > > set_preempt_need_resched() to see if that gets rid of the hangs, but
> > > first...
> > 
> > Could we use the NMI watchdog to dump the stack at the time of the hang? May
> > be a deadlock will present on the stack (I think its config is called
> > HARDLOCKUP_DETECTOR or something).
> 
> Another approach would be to instrument the locking code that notices
> the recursive acquisition.  Or to run lockdep...  Because none of the
> failing scenarios enable lockdep!  ;-)

I was wondering why lockdep is not always turned on in your testing. Is it
due to performance concerns?

thanks,

 - Joel

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-22 15:50                                   ` Joel Fernandes
@ 2019-03-22 16:26                                     ` Paul E. McKenney
  2019-03-22 18:07                                       ` Paul E. McKenney
  0 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-22 16:26 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Fri, Mar 22, 2019 at 11:50:49AM -0400, Joel Fernandes wrote:
> On Fri, Mar 22, 2019 at 07:58:23AM -0700, Paul E. McKenney wrote:
> [snip]
> > > > >  #ifdef CONFIG_RCU_NOCB_CPU
> > > > >  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> > > > > @@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
> > > > >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
> > > > >  	if (gp_cleanup_delay)
> > > > >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
> > > > > +	if (!use_softirq)
> > > > > +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
> > > > >  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
> > > > >  		pr_info("\tRCU debug extended QS entry/exit.\n");
> > > > >  	rcupdate_announce_bootup_oddness();
> > > > > @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
> > > > >  		/* Need to defer quiescent state until everything is enabled. */
> > > > >  		if (irqs_were_disabled) {
> > > > >  			/* Enabling irqs does not reschedule, so... */
> > > > > -			raise_softirq_irqoff(RCU_SOFTIRQ);
> > > > > +			if (!use_softirq)
> > > > > +				raise_softirq_irqoff(RCU_SOFTIRQ);
> > > > > +			else
> > > > > +				invoke_rcu_core();
> > > > 
> > > > This can result in deadlock.  This happens when the scheduler invokes
> > > > rcu_read_unlock() with one of the rq or pi locks held, which means that
> > > > interrupts are disabled.  And it also means that the wakeup done in
> > > > invoke_rcu_core() could go after the same rq or pi lock.
> > > > 
> > > > What we really need here is some way to make soemthing happen on this
> > > > CPU just after interrupts are re-enabled.  Here are the options I see:
> > > > 
> > > > 1.	Do set_tsk_need_resched() and set_preempt_need_resched(),
> > > > 	just like in the "else" clause below.  This sort of works, but
> > > > 	relies on some later interrupt or similar to get things started.
> > > > 	This is just fine for normal grace periods, but not so much for
> > > > 	expedited grace periods.
> > > > 
> > > > 2.	IPI some other CPU and have it IPI us back.  Not such a good plan
> > > > 	when running an SMP kernel on a single CPU.
> > > > 
> > > > 3.	Have a "stub" RCU_SOFTIRQ that contains only the following:
> > > > 
> > > > 	/* Report any deferred quiescent states if preemption enabled. */
> > > > 	if (!(preempt_count() & PREEMPT_MASK)) {
> > > > 		rcu_preempt_deferred_qs(current);
> > > > 	} else if (rcu_preempt_need_deferred_qs(current)) {
> > > > 		set_tsk_need_resched(current);
> > > > 		set_preempt_need_resched();
> > > > 	}
> > > > 
> > > > 4.	Except that raise_softirq_irqoff() could potentially have this
> > > > 	same problem if rcu_read_unlock() is invoked at process level
> > > > 	from the scheduler with either rq or pi locks held.  :-/
> > > > 
> > > > 	Which raises the question "why aren't I seeing hangs and
> > > > 	lockdep splats?"
> > > 
> > > Interesting, could it be you're not seeing a hang in the regular case,
> > > because enqueuing ksoftirqd on the same CPU as where the rcu_read_unlock is
> > > happening is a rare event? First, ksoftirqd has to even be awakened in the
> > > first place. On the other hand, with the new code the thread is always awaked
> > > and is more likely to run into the issue you found?
> > 
> > No, in many cases, including the self-deadlock that showed up last night,
> > raise_softirq_irqoff() will simply set a bit in a per-CPU variable.
> > One case where this happens is when called from an interrupt handler.
> 
> I think we are saying the same thing, in some cases ksoftirqd will be
> awakened and some case it will not. I will go through all scenarios to
> convince myself it is safe, if I find some issue I will let you know.

I am suspecting that raise_softirq_irqsoff() is in fact unsafe, just
only very rarely unsafe.

> > > The lockdep splats should be a more common occurence though IMO. If you could
> > > let me know which RCU config is hanging, I can try to debug this at my end as
> > > well.
> > 
> > TREE01, TREE02, TREE03, and TREE09.  I would guess that TREE08 would also
> > do the same thing, given that it also sets PREEMPT=y and tests Tree RCU.
> > 
> > Please see the patch I posted and tested overnight.  I suspect that there
> > is a better fix, but this does at least seem to suppress the error.
> 
> Ok, will do.
> 
> > > > Also, having lots of non-migratable timers might be considered unfriendly,
> > > > though they shouldn't be -that- heavily utilized.  Yet, anyway...
> > > > I could try adding logic to local_irq_enable() and local_irq_restore(),
> > > > but that probably wouldn't go over all that well.  Besides, sometimes
> > > > interrupt enabling happens in assembly language.
> > > > 
> > > > It is quite likely that delays to expedited grace periods wouldn't
> > > > happen all that often.  First, the grace period has to start while
> > > > the CPU itself (not some blocked task) is in an RCU read-side critical
> > > > section, second, that critical section cannot be preempted, and third
> > > > the rcu_read_unlock() must run with interrupts disabled.
> > > > 
> > > > Ah, but that sequence of events is not supposed to happen with the
> > > > scheduler lock!
> > > > 
> > > > From Documentation/RCU/Design/Requirements/Requirements.html:
> > > > 
> > > > 	It is forbidden to hold any of scheduler's runqueue or
> > > > 	priority-inheritance spinlocks across an rcu_read_unlock()
> > > > 	unless interrupts have been disabled across the entire RCU
> > > > 	read-side critical section, that is, up to and including the
> > > > 	matching rcu_read_lock().
> > > > 
> > > > Here are the reasons we even get to rcu_read_unlock_special():
> > > > 
> > > > 1.	The just-ended RCU read-side critical section was preempted.
> > > > 	This clearly cannot happen if interrupts are disabled across
> > > > 	the entire critical section.
> > > > 
> > > > 2.	The scheduling-clock interrupt noticed that this critical
> > > > 	section has been taking a long time.  But scheduling-clock
> > > > 	interrupts also cannot happen while interrupts are disabled.
> > > > 
> > > > 3.	An expedited grace periods started during this critical
> > > > 	section.  But if that happened, the corresponding IPI would
> > > > 	have waited until this CPU enabled interrupts, so this
> > > > 	cannot happen either.
> > > > 
> > > > So the call to invoke_rcu_core() should be OK after all.
> > > > 
> > > > Which is a bit of a disappointment, given that I am still seeing hangs!
> > > 
> > > Oh ok, discount whatever I just said then ;-) Indeed I remember this
> > > requirement too now. Your neat documentation skills are indeed life saving :D
> > 
> > No, this did turn out to be the problem area.  Or at least one of the
> > problem areas.  Again, see my earlier email.
> 
> Ok. Too many emails so I got confused :-D. I also forgot which version of the
> patch are we testing since I don't think an updated one was posted. But I
> will refer to your last night diff dig out the base patch from your git tree,
> no problem.
> 
> > > > I might replace this invoke_rcu_core() with set_tsk_need_resched() and
> > > > set_preempt_need_resched() to see if that gets rid of the hangs, but
> > > > first...
> > > 
> > > Could we use the NMI watchdog to dump the stack at the time of the hang? May
> > > be a deadlock will present on the stack (I think its config is called
> > > HARDLOCKUP_DETECTOR or something).
> > 
> > Another approach would be to instrument the locking code that notices
> > the recursive acquisition.  Or to run lockdep...  Because none of the
> > failing scenarios enable lockdep!  ;-)
> 
> I was wondering why lockdep is not always turned on in your testing. Is it
> due to performance concerns?

Because I also need to test without lockdep.  I sometimes use
"--kconfig CONFIG_PROVE_LOCKING=y" to force lockdep everywhere on
a particular rcutorture run, though.  Like on the run that I just
now started.  ;-)

							Thanx, Paul


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-22 16:26                                     ` Paul E. McKenney
@ 2019-03-22 18:07                                       ` Paul E. McKenney
  0 siblings, 0 replies; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-22 18:07 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Fri, Mar 22, 2019 at 09:26:35AM -0700, Paul E. McKenney wrote:
> On Fri, Mar 22, 2019 at 11:50:49AM -0400, Joel Fernandes wrote:
> > On Fri, Mar 22, 2019 at 07:58:23AM -0700, Paul E. McKenney wrote:
> > [snip]
> > > > > >  #ifdef CONFIG_RCU_NOCB_CPU
> > > > > >  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> > > > > > @@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
> > > > > >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
> > > > > >  	if (gp_cleanup_delay)
> > > > > >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
> > > > > > +	if (!use_softirq)
> > > > > > +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
> > > > > >  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
> > > > > >  		pr_info("\tRCU debug extended QS entry/exit.\n");
> > > > > >  	rcupdate_announce_bootup_oddness();
> > > > > > @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
> > > > > >  		/* Need to defer quiescent state until everything is enabled. */
> > > > > >  		if (irqs_were_disabled) {
> > > > > >  			/* Enabling irqs does not reschedule, so... */
> > > > > > -			raise_softirq_irqoff(RCU_SOFTIRQ);
> > > > > > +			if (!use_softirq)
> > > > > > +				raise_softirq_irqoff(RCU_SOFTIRQ);
> > > > > > +			else
> > > > > > +				invoke_rcu_core();
> > > > > 
> > > > > This can result in deadlock.  This happens when the scheduler invokes
> > > > > rcu_read_unlock() with one of the rq or pi locks held, which means that
> > > > > interrupts are disabled.  And it also means that the wakeup done in
> > > > > invoke_rcu_core() could go after the same rq or pi lock.
> > > > > 
> > > > > What we really need here is some way to make soemthing happen on this
> > > > > CPU just after interrupts are re-enabled.  Here are the options I see:
> > > > > 
> > > > > 1.	Do set_tsk_need_resched() and set_preempt_need_resched(),
> > > > > 	just like in the "else" clause below.  This sort of works, but
> > > > > 	relies on some later interrupt or similar to get things started.
> > > > > 	This is just fine for normal grace periods, but not so much for
> > > > > 	expedited grace periods.
> > > > > 
> > > > > 2.	IPI some other CPU and have it IPI us back.  Not such a good plan
> > > > > 	when running an SMP kernel on a single CPU.
> > > > > 
> > > > > 3.	Have a "stub" RCU_SOFTIRQ that contains only the following:
> > > > > 
> > > > > 	/* Report any deferred quiescent states if preemption enabled. */
> > > > > 	if (!(preempt_count() & PREEMPT_MASK)) {
> > > > > 		rcu_preempt_deferred_qs(current);
> > > > > 	} else if (rcu_preempt_need_deferred_qs(current)) {
> > > > > 		set_tsk_need_resched(current);
> > > > > 		set_preempt_need_resched();
> > > > > 	}
> > > > > 
> > > > > 4.	Except that raise_softirq_irqoff() could potentially have this
> > > > > 	same problem if rcu_read_unlock() is invoked at process level
> > > > > 	from the scheduler with either rq or pi locks held.  :-/
> > > > > 
> > > > > 	Which raises the question "why aren't I seeing hangs and
> > > > > 	lockdep splats?"
> > > > 
> > > > Interesting, could it be you're not seeing a hang in the regular case,
> > > > because enqueuing ksoftirqd on the same CPU as where the rcu_read_unlock is
> > > > happening is a rare event? First, ksoftirqd has to even be awakened in the
> > > > first place. On the other hand, with the new code the thread is always awaked
> > > > and is more likely to run into the issue you found?
> > > 
> > > No, in many cases, including the self-deadlock that showed up last night,
> > > raise_softirq_irqoff() will simply set a bit in a per-CPU variable.
> > > One case where this happens is when called from an interrupt handler.
> > 
> > I think we are saying the same thing, in some cases ksoftirqd will be
> > awakened and some case it will not. I will go through all scenarios to
> > convince myself it is safe, if I find some issue I will let you know.
> 
> I am suspecting that raise_softirq_irqsoff() is in fact unsafe, just
> only very rarely unsafe.
> 
> > > > The lockdep splats should be a more common occurence though IMO. If you could
> > > > let me know which RCU config is hanging, I can try to debug this at my end as
> > > > well.
> > > 
> > > TREE01, TREE02, TREE03, and TREE09.  I would guess that TREE08 would also
> > > do the same thing, given that it also sets PREEMPT=y and tests Tree RCU.
> > > 
> > > Please see the patch I posted and tested overnight.  I suspect that there
> > > is a better fix, but this does at least seem to suppress the error.
> > 
> > Ok, will do.
> > 
> > > > > Also, having lots of non-migratable timers might be considered unfriendly,
> > > > > though they shouldn't be -that- heavily utilized.  Yet, anyway...
> > > > > I could try adding logic to local_irq_enable() and local_irq_restore(),
> > > > > but that probably wouldn't go over all that well.  Besides, sometimes
> > > > > interrupt enabling happens in assembly language.
> > > > > 
> > > > > It is quite likely that delays to expedited grace periods wouldn't
> > > > > happen all that often.  First, the grace period has to start while
> > > > > the CPU itself (not some blocked task) is in an RCU read-side critical
> > > > > section, second, that critical section cannot be preempted, and third
> > > > > the rcu_read_unlock() must run with interrupts disabled.
> > > > > 
> > > > > Ah, but that sequence of events is not supposed to happen with the
> > > > > scheduler lock!
> > > > > 
> > > > > From Documentation/RCU/Design/Requirements/Requirements.html:
> > > > > 
> > > > > 	It is forbidden to hold any of scheduler's runqueue or
> > > > > 	priority-inheritance spinlocks across an rcu_read_unlock()
> > > > > 	unless interrupts have been disabled across the entire RCU
> > > > > 	read-side critical section, that is, up to and including the
> > > > > 	matching rcu_read_lock().
> > > > > 
> > > > > Here are the reasons we even get to rcu_read_unlock_special():
> > > > > 
> > > > > 1.	The just-ended RCU read-side critical section was preempted.
> > > > > 	This clearly cannot happen if interrupts are disabled across
> > > > > 	the entire critical section.
> > > > > 
> > > > > 2.	The scheduling-clock interrupt noticed that this critical
> > > > > 	section has been taking a long time.  But scheduling-clock
> > > > > 	interrupts also cannot happen while interrupts are disabled.
> > > > > 
> > > > > 3.	An expedited grace periods started during this critical
> > > > > 	section.  But if that happened, the corresponding IPI would
> > > > > 	have waited until this CPU enabled interrupts, so this
> > > > > 	cannot happen either.
> > > > > 
> > > > > So the call to invoke_rcu_core() should be OK after all.
> > > > > 
> > > > > Which is a bit of a disappointment, given that I am still seeing hangs!
> > > > 
> > > > Oh ok, discount whatever I just said then ;-) Indeed I remember this
> > > > requirement too now. Your neat documentation skills are indeed life saving :D
> > > 
> > > No, this did turn out to be the problem area.  Or at least one of the
> > > problem areas.  Again, see my earlier email.
> > 
> > Ok. Too many emails so I got confused :-D. I also forgot which version of the
> > patch are we testing since I don't think an updated one was posted. But I
> > will refer to your last night diff dig out the base patch from your git tree,
> > no problem.
> > 
> > > > > I might replace this invoke_rcu_core() with set_tsk_need_resched() and
> > > > > set_preempt_need_resched() to see if that gets rid of the hangs, but
> > > > > first...
> > > > 
> > > > Could we use the NMI watchdog to dump the stack at the time of the hang? May
> > > > be a deadlock will present on the stack (I think its config is called
> > > > HARDLOCKUP_DETECTOR or something).
> > > 
> > > Another approach would be to instrument the locking code that notices
> > > the recursive acquisition.  Or to run lockdep...  Because none of the
> > > failing scenarios enable lockdep!  ;-)
> > 
> > I was wondering why lockdep is not always turned on in your testing. Is it
> > due to performance concerns?
> 
> Because I also need to test without lockdep.  I sometimes use
> "--kconfig CONFIG_PROVE_LOCKING=y" to force lockdep everywhere on
> a particular rcutorture run, though.  Like on the run that I just
> now started.  ;-)

But this produced no complaints.  And yes, I did check the console output
to make sure that lockdep was in fact enabled.  Color me confused...

							Thanx, Paul


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-20 21:13                         ` [PATCH v3] " Sebastian Andrzej Siewior
  2019-03-20 23:46                           ` Paul E. McKenney
@ 2019-03-22 23:48                           ` Joel Fernandes
  2019-03-23  0:25                             ` Paul E. McKenney
  1 sibling, 1 reply; 44+ messages in thread
From: Joel Fernandes @ 2019-03-22 23:48 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Paul E. McKenney, linux-kernel, Josh Triplett, Steven Rostedt,
	Mathieu Desnoyers, Lai Jiangshan, tglx, Mike Galbraith

On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> Running RCU out of softirq is a problem for some workloads that would
> like to manage RCU core processing independently of other softirq
> work, for example, setting kthread priority.  This commit therefore
> introduces the `rcunosoftirq' option which moves the RCU core work
> from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> The SCHED_OTHER approach avoids the scalability problems that appeared
> with the earlier attempt to move RCU core processing to from softirq
> to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> rcuc kthreads at the RCU-boosting priority.
[snip]
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 0f31b79eb6761..05a1e42fdaf10 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -51,6 +51,12 @@
>  #include <linux/tick.h>
>  #include <linux/sysrq.h>
>  #include <linux/kprobes.h>
> +#include <linux/gfp.h>
> +#include <linux/oom.h>
> +#include <linux/smpboot.h>
> +#include <linux/jiffies.h>
> +#include <linux/sched/isolation.h>
> +#include "../time/tick-internal.h"
>  
>  #include "tree.h"
>  #include "rcu.h"
> @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
>  /* Dump rcu_node combining tree at boot to verify correct setup. */
>  static bool dump_tree;
>  module_param(dump_tree, bool, 0444);
> +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> +static bool use_softirq = 1;
> +module_param(use_softirq, bool, 0444);
>  /* Control rcu_node-tree auto-balancing at boot time. */
>  static bool rcu_fanout_exact;
>  module_param(rcu_fanout_exact, bool, 0444);
> @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
>  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
>  
>  /* Perform RCU core processing work for the current CPU.  */
> -static __latent_entropy void rcu_core(struct softirq_action *unused)
> +static __latent_entropy void rcu_core(void)
>  {
>  	unsigned long flags;
>  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
>  	trace_rcu_utilization(TPS("End RCU core"));
>  }
>  
> +static void rcu_core_si(struct softirq_action *h)
> +{
> +	rcu_core();
> +}
> +
> +static void rcu_wake_cond(struct task_struct *t, int status)
> +{
> +	/*
> +	 * If the thread is yielding, only wake it when this
> +	 * is invoked from idle
> +	 */
> +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> +		wake_up_process(t);
> +}
> +
> +static void invoke_rcu_core_kthread(void)
> +{
> +	struct task_struct *t;
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> +	if (t != NULL && t != current)
> +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> +	local_irq_restore(flags);
> +}
> +
>  /*
>   * Schedule RCU callback invocation.  If the running implementation of RCU
>   * does not support RCU priority boosting, just do a direct call, otherwise
> @@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
>  {
>  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
>  		return;
> -	if (likely(!rcu_state.boost)) {
> -		rcu_do_batch(rdp);
> -		return;
> -	}
> -	invoke_rcu_callbacks_kthread();
> +	if (rcu_state.boost || !use_softirq)
> +		invoke_rcu_core_kthread();
> +	rcu_do_batch(rdp);

Shouldn't there be an else before the rcu_do_batch? If we are waking up the
rcuc thread, then that will do the rcu_do_batch when it runs right?

Something like:
	if (rcu_state.boost || !use_softirq)
		invoke_rcu_core_kthread();
	else
		rcu_do_batch(rdp);

Previous code similarly had a return; also.

>  }
>  
> +/*
> + * Wake up this CPU's rcuc kthread to do RCU core processing.
> + */
>  static void invoke_rcu_core(void)
>  {
> -	if (cpu_online(smp_processor_id()))
> +	if (!cpu_online(smp_processor_id()))
> +		return;
> +	if (use_softirq)
>  		raise_softirq(RCU_SOFTIRQ);
> +	else
> +		invoke_rcu_core_kthread();
>  }
>  
> +static void rcu_cpu_kthread_park(unsigned int cpu)
> +{
> +	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> +}
> +
> +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> +{
> +	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> +}
> +
> +/*
> + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> + * the RCU softirq used in configurations of RCU that do not support RCU
> + * priority boosting.
> + */
> +static void rcu_cpu_kthread(unsigned int cpu)
> +{
> +	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> +	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> +	int spincnt;
> +
> +	for (spincnt = 0; spincnt < 10; spincnt++) {
> +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> +		local_bh_disable();
> +		*statusp = RCU_KTHREAD_RUNNING;
> +		local_irq_disable();
> +		work = *workp;
> +		*workp = 0;
> +		local_irq_enable();
> +		if (work)
> +			rcu_core();
> +		local_bh_enable();
> +		if (*workp == 0) {
> +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> +			*statusp = RCU_KTHREAD_WAITING;
> +			return;
> +		}
> +	}
> +	*statusp = RCU_KTHREAD_YIELDING;
> +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> +	schedule_timeout_interruptible(2);
> +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> +	*statusp = RCU_KTHREAD_WAITING;
> +}
> +
[snip]
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index e253d11af3c49..a1a72a1ecb026 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
>  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
>  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
>  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> -static void invoke_rcu_callbacks_kthread(void);
>  static bool rcu_is_callbacks_kthread(void);
> +static void rcu_cpu_kthread_setup(unsigned int cpu);
>  static void __init rcu_spawn_boost_kthreads(void);
>  static void rcu_prepare_kthreads(int cpu);
>  static void rcu_cleanup_after_idle(void);
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index f46b4af96ab95..b807204ffd83f 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -11,29 +11,7 @@
>   *	   Paul E. McKenney <paulmck@linux.ibm.com>
>   */
>  
> -#include <linux/delay.h>
> -#include <linux/gfp.h>
> -#include <linux/oom.h>
> -#include <linux/sched/debug.h>
> -#include <linux/smpboot.h>
> -#include <linux/sched/isolation.h>
> -#include <uapi/linux/sched/types.h>
> -#include "../time/tick-internal.h"
> -
> -#ifdef CONFIG_RCU_BOOST
>  #include "../locking/rtmutex_common.h"
> -#else /* #ifdef CONFIG_RCU_BOOST */
> -
> -/*
> - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> - * all uses are in dead code.  Provide a definition to keep the compiler
> - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> - * This probably needs to be excluded from -rt builds.
> - */
> -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> -
> -#endif /* #else #ifdef CONFIG_RCU_BOOST */
>  
>  #ifdef CONFIG_RCU_NOCB_CPU
>  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> @@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
>  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
>  	if (gp_cleanup_delay)
>  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
> +	if (!use_softirq)
> +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
>  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
>  		pr_info("\tRCU debug extended QS entry/exit.\n");
>  	rcupdate_announce_bootup_oddness();
> @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
>  		/* Need to defer quiescent state until everything is enabled. */
>  		if (irqs_were_disabled) {
>  			/* Enabling irqs does not reschedule, so... */
> -			raise_softirq_irqoff(RCU_SOFTIRQ);
> +			if (!use_softirq)
> +				raise_softirq_irqoff(RCU_SOFTIRQ);

I believe this exclamation has been corrected in Paul's tree so that's Ok.

> +			else
> +				invoke_rcu_core();

But why not just directly call invoke_rcu_core() here? That will do the
appropriate use_softirq check right?

thanks,

 - Joel


>  		} else {
>  			/* Enabling BH or preempt does reschedule, so... */
>  			set_tsk_need_resched(current);
> @@ -944,18 +927,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
>  
>  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
>  
> -#ifdef CONFIG_RCU_BOOST
> -
> -static void rcu_wake_cond(struct task_struct *t, int status)
> +/*
> + * If boosting, set rcuc kthreads to realtime priority.
> + */
> +static void rcu_cpu_kthread_setup(unsigned int cpu)
>  {
> -	/*
> -	 * If the thread is yielding, only wake it when this
> -	 * is invoked from idle
> -	 */
> -	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
> -		wake_up_process(t);
> +#ifdef CONFIG_RCU_BOOST
> +	struct sched_param sp;
> +
> +	sp.sched_priority = kthread_prio;
> +	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> +#endif /* #ifdef CONFIG_RCU_BOOST */
>  }
>  
> +#ifdef CONFIG_RCU_BOOST
> +
>  /*
>   * Carry out RCU priority boosting on the task indicated by ->exp_tasks
>   * or ->boost_tasks, advancing the pointer to the next task in the
> @@ -1093,23 +1079,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
>  	}
>  }
>  
> -/*
> - * Wake up the per-CPU kthread to invoke RCU callbacks.
> - */
> -static void invoke_rcu_callbacks_kthread(void)
> -{
> -	unsigned long flags;
> -
> -	local_irq_save(flags);
> -	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> -	if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
> -	    current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
> -		rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
> -			      __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> -	}
> -	local_irq_restore(flags);
> -}
> -
>  /*
>   * Is the current CPU running the RCU-callbacks kthread?
>   * Caller must have preemption disabled.
> @@ -1163,59 +1132,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
>  	return 0;
>  }
>  
> -static void rcu_cpu_kthread_setup(unsigned int cpu)
> -{
> -	struct sched_param sp;
> -
> -	sp.sched_priority = kthread_prio;
> -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> -}
> -
> -static void rcu_cpu_kthread_park(unsigned int cpu)
> -{
> -	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> -}
> -
> -static int rcu_cpu_kthread_should_run(unsigned int cpu)
> -{
> -	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> -}
> -
> -/*
> - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> - * the RCU softirq used in configurations of RCU that do not support RCU
> - * priority boosting.
> - */
> -static void rcu_cpu_kthread(unsigned int cpu)
> -{
> -	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> -	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> -	int spincnt;
> -
> -	for (spincnt = 0; spincnt < 10; spincnt++) {
> -		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> -		local_bh_disable();
> -		*statusp = RCU_KTHREAD_RUNNING;
> -		local_irq_disable();
> -		work = *workp;
> -		*workp = 0;
> -		local_irq_enable();
> -		if (work)
> -			rcu_do_batch(this_cpu_ptr(&rcu_data));
> -		local_bh_enable();
> -		if (*workp == 0) {
> -			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> -			*statusp = RCU_KTHREAD_WAITING;
> -			return;
> -		}
> -	}
> -	*statusp = RCU_KTHREAD_YIELDING;
> -	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> -	schedule_timeout_interruptible(2);
> -	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> -	*statusp = RCU_KTHREAD_WAITING;
> -}
> -
>  /*
>   * Set the per-rcu_node kthread's affinity to cover all CPUs that are
>   * served by the rcu_node in question.  The CPU hotplug lock is still
> @@ -1246,27 +1162,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
>  	free_cpumask_var(cm);
>  }
>  
> -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> -	.store			= &rcu_data.rcu_cpu_kthread_task,
> -	.thread_should_run	= rcu_cpu_kthread_should_run,
> -	.thread_fn		= rcu_cpu_kthread,
> -	.thread_comm		= "rcuc/%u",
> -	.setup			= rcu_cpu_kthread_setup,
> -	.park			= rcu_cpu_kthread_park,
> -};
> -
>  /*
>   * Spawn boost kthreads -- called as soon as the scheduler is running.
>   */
>  static void __init rcu_spawn_boost_kthreads(void)
>  {
>  	struct rcu_node *rnp;
> -	int cpu;
>  
> -	for_each_possible_cpu(cpu)
> -		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> -	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
> -		return;
>  	rcu_for_each_leaf_node(rnp)
>  		(void)rcu_spawn_one_boost_kthread(rnp);
>  }
> @@ -1289,11 +1191,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
>  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  }
>  
> -static void invoke_rcu_callbacks_kthread(void)
> -{
> -	WARN_ON_ONCE(1);
> -}
> -
>  static bool rcu_is_callbacks_kthread(void)
>  {
>  	return false;
> -- 
> 2.20.1
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-22 23:48                           ` Joel Fernandes
@ 2019-03-23  0:25                             ` Paul E. McKenney
  2019-03-23  1:04                               ` Joel Fernandes
  2019-03-23 16:10                               ` Paul E. McKenney
  0 siblings, 2 replies; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-23  0:25 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Fri, Mar 22, 2019 at 07:48:19PM -0400, Joel Fernandes wrote:
> On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > Running RCU out of softirq is a problem for some workloads that would
> > like to manage RCU core processing independently of other softirq
> > work, for example, setting kthread priority.  This commit therefore
> > introduces the `rcunosoftirq' option which moves the RCU core work
> > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > The SCHED_OTHER approach avoids the scalability problems that appeared
> > with the earlier attempt to move RCU core processing to from softirq
> > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > rcuc kthreads at the RCU-boosting priority.
> [snip]
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 0f31b79eb6761..05a1e42fdaf10 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -51,6 +51,12 @@
> >  #include <linux/tick.h>
> >  #include <linux/sysrq.h>
> >  #include <linux/kprobes.h>
> > +#include <linux/gfp.h>
> > +#include <linux/oom.h>
> > +#include <linux/smpboot.h>
> > +#include <linux/jiffies.h>
> > +#include <linux/sched/isolation.h>
> > +#include "../time/tick-internal.h"
> >  
> >  #include "tree.h"
> >  #include "rcu.h"
> > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> >  /* Dump rcu_node combining tree at boot to verify correct setup. */
> >  static bool dump_tree;
> >  module_param(dump_tree, bool, 0444);
> > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> > +static bool use_softirq = 1;
> > +module_param(use_softirq, bool, 0444);
> >  /* Control rcu_node-tree auto-balancing at boot time. */
> >  static bool rcu_fanout_exact;
> >  module_param(rcu_fanout_exact, bool, 0444);
> > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> >  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> >  
> >  /* Perform RCU core processing work for the current CPU.  */
> > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > +static __latent_entropy void rcu_core(void)
> >  {
> >  	unsigned long flags;
> >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> >  	trace_rcu_utilization(TPS("End RCU core"));
> >  }
> >  
> > +static void rcu_core_si(struct softirq_action *h)
> > +{
> > +	rcu_core();
> > +}
> > +
> > +static void rcu_wake_cond(struct task_struct *t, int status)
> > +{
> > +	/*
> > +	 * If the thread is yielding, only wake it when this
> > +	 * is invoked from idle
> > +	 */
> > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > +		wake_up_process(t);
> > +}
> > +
> > +static void invoke_rcu_core_kthread(void)
> > +{
> > +	struct task_struct *t;
> > +	unsigned long flags;
> > +
> > +	local_irq_save(flags);
> > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > +	if (t != NULL && t != current)
> > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > +	local_irq_restore(flags);
> > +}
> > +
> >  /*
> >   * Schedule RCU callback invocation.  If the running implementation of RCU
> >   * does not support RCU priority boosting, just do a direct call, otherwise
> > @@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> >  {
> >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> >  		return;
> > -	if (likely(!rcu_state.boost)) {
> > -		rcu_do_batch(rdp);
> > -		return;
> > -	}
> > -	invoke_rcu_callbacks_kthread();
> > +	if (rcu_state.boost || !use_softirq)
> > +		invoke_rcu_core_kthread();
> > +	rcu_do_batch(rdp);
> 
> Shouldn't there be an else before the rcu_do_batch? If we are waking up the
> rcuc thread, then that will do the rcu_do_batch when it runs right?
> 
> Something like:
> 	if (rcu_state.boost || !use_softirq)
> 		invoke_rcu_core_kthread();
> 	else
> 		rcu_do_batch(rdp);
> 
> Previous code similarly had a return; also.

I believe that you are correct, so I will give it a shot.  Good eyes!

> >  }
> >  
> > +/*
> > + * Wake up this CPU's rcuc kthread to do RCU core processing.
> > + */
> >  static void invoke_rcu_core(void)
> >  {
> > -	if (cpu_online(smp_processor_id()))
> > +	if (!cpu_online(smp_processor_id()))
> > +		return;
> > +	if (use_softirq)
> >  		raise_softirq(RCU_SOFTIRQ);
> > +	else
> > +		invoke_rcu_core_kthread();
> >  }
> >  
> > +static void rcu_cpu_kthread_park(unsigned int cpu)
> > +{
> > +	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> > +}
> > +
> > +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> > +{
> > +	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> > +}
> > +
> > +/*
> > + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> > + * the RCU softirq used in configurations of RCU that do not support RCU
> > + * priority boosting.
> > + */
> > +static void rcu_cpu_kthread(unsigned int cpu)
> > +{
> > +	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> > +	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> > +	int spincnt;
> > +
> > +	for (spincnt = 0; spincnt < 10; spincnt++) {
> > +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> > +		local_bh_disable();
> > +		*statusp = RCU_KTHREAD_RUNNING;
> > +		local_irq_disable();
> > +		work = *workp;
> > +		*workp = 0;
> > +		local_irq_enable();
> > +		if (work)
> > +			rcu_core();
> > +		local_bh_enable();
> > +		if (*workp == 0) {
> > +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> > +			*statusp = RCU_KTHREAD_WAITING;
> > +			return;
> > +		}
> > +	}
> > +	*statusp = RCU_KTHREAD_YIELDING;
> > +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> > +	schedule_timeout_interruptible(2);
> > +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> > +	*statusp = RCU_KTHREAD_WAITING;
> > +}
> > +
> [snip]
> > diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> > index e253d11af3c49..a1a72a1ecb026 100644
> > --- a/kernel/rcu/tree.h
> > +++ b/kernel/rcu/tree.h
> > @@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
> >  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
> >  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
> >  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> > -static void invoke_rcu_callbacks_kthread(void);
> >  static bool rcu_is_callbacks_kthread(void);
> > +static void rcu_cpu_kthread_setup(unsigned int cpu);
> >  static void __init rcu_spawn_boost_kthreads(void);
> >  static void rcu_prepare_kthreads(int cpu);
> >  static void rcu_cleanup_after_idle(void);
> > diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> > index f46b4af96ab95..b807204ffd83f 100644
> > --- a/kernel/rcu/tree_plugin.h
> > +++ b/kernel/rcu/tree_plugin.h
> > @@ -11,29 +11,7 @@
> >   *	   Paul E. McKenney <paulmck@linux.ibm.com>
> >   */
> >  
> > -#include <linux/delay.h>
> > -#include <linux/gfp.h>
> > -#include <linux/oom.h>
> > -#include <linux/sched/debug.h>
> > -#include <linux/smpboot.h>
> > -#include <linux/sched/isolation.h>
> > -#include <uapi/linux/sched/types.h>
> > -#include "../time/tick-internal.h"
> > -
> > -#ifdef CONFIG_RCU_BOOST
> >  #include "../locking/rtmutex_common.h"
> > -#else /* #ifdef CONFIG_RCU_BOOST */
> > -
> > -/*
> > - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> > - * all uses are in dead code.  Provide a definition to keep the compiler
> > - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> > - * This probably needs to be excluded from -rt builds.
> > - */
> > -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> > -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> > -
> > -#endif /* #else #ifdef CONFIG_RCU_BOOST */
> >  
> >  #ifdef CONFIG_RCU_NOCB_CPU
> >  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> > @@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
> >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
> >  	if (gp_cleanup_delay)
> >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
> > +	if (!use_softirq)
> > +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
> >  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
> >  		pr_info("\tRCU debug extended QS entry/exit.\n");
> >  	rcupdate_announce_bootup_oddness();
> > @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
> >  		/* Need to defer quiescent state until everything is enabled. */
> >  		if (irqs_were_disabled) {
> >  			/* Enabling irqs does not reschedule, so... */
> > -			raise_softirq_irqoff(RCU_SOFTIRQ);
> > +			if (!use_softirq)
> > +				raise_softirq_irqoff(RCU_SOFTIRQ);
> 
> I believe this exclamation has been corrected in Paul's tree so that's Ok.
> 
> > +			else
> > +				invoke_rcu_core();
> 
> But why not just directly call invoke_rcu_core() here? That will do the
> appropriate use_softirq check right?

It is -so- close!  But it invokes raise_softirq() instead of the needed
raise_softirq_irqoff().

Plus I bet that this has a few more changes to go before it is all the
way there.  ;-)

							Thanx, Paul

> thanks,
> 
>  - Joel
> 
> 
> >  		} else {
> >  			/* Enabling BH or preempt does reschedule, so... */
> >  			set_tsk_need_resched(current);
> > @@ -944,18 +927,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
> >  
> >  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
> >  
> > -#ifdef CONFIG_RCU_BOOST
> > -
> > -static void rcu_wake_cond(struct task_struct *t, int status)
> > +/*
> > + * If boosting, set rcuc kthreads to realtime priority.
> > + */
> > +static void rcu_cpu_kthread_setup(unsigned int cpu)
> >  {
> > -	/*
> > -	 * If the thread is yielding, only wake it when this
> > -	 * is invoked from idle
> > -	 */
> > -	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
> > -		wake_up_process(t);
> > +#ifdef CONFIG_RCU_BOOST
> > +	struct sched_param sp;
> > +
> > +	sp.sched_priority = kthread_prio;
> > +	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> > +#endif /* #ifdef CONFIG_RCU_BOOST */
> >  }
> >  
> > +#ifdef CONFIG_RCU_BOOST
> > +
> >  /*
> >   * Carry out RCU priority boosting on the task indicated by ->exp_tasks
> >   * or ->boost_tasks, advancing the pointer to the next task in the
> > @@ -1093,23 +1079,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
> >  	}
> >  }
> >  
> > -/*
> > - * Wake up the per-CPU kthread to invoke RCU callbacks.
> > - */
> > -static void invoke_rcu_callbacks_kthread(void)
> > -{
> > -	unsigned long flags;
> > -
> > -	local_irq_save(flags);
> > -	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > -	if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
> > -	    current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
> > -		rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
> > -			      __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > -	}
> > -	local_irq_restore(flags);
> > -}
> > -
> >  /*
> >   * Is the current CPU running the RCU-callbacks kthread?
> >   * Caller must have preemption disabled.
> > @@ -1163,59 +1132,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
> >  	return 0;
> >  }
> >  
> > -static void rcu_cpu_kthread_setup(unsigned int cpu)
> > -{
> > -	struct sched_param sp;
> > -
> > -	sp.sched_priority = kthread_prio;
> > -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> > -}
> > -
> > -static void rcu_cpu_kthread_park(unsigned int cpu)
> > -{
> > -	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> > -}
> > -
> > -static int rcu_cpu_kthread_should_run(unsigned int cpu)
> > -{
> > -	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> > -}
> > -
> > -/*
> > - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> > - * the RCU softirq used in configurations of RCU that do not support RCU
> > - * priority boosting.
> > - */
> > -static void rcu_cpu_kthread(unsigned int cpu)
> > -{
> > -	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> > -	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> > -	int spincnt;
> > -
> > -	for (spincnt = 0; spincnt < 10; spincnt++) {
> > -		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> > -		local_bh_disable();
> > -		*statusp = RCU_KTHREAD_RUNNING;
> > -		local_irq_disable();
> > -		work = *workp;
> > -		*workp = 0;
> > -		local_irq_enable();
> > -		if (work)
> > -			rcu_do_batch(this_cpu_ptr(&rcu_data));
> > -		local_bh_enable();
> > -		if (*workp == 0) {
> > -			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> > -			*statusp = RCU_KTHREAD_WAITING;
> > -			return;
> > -		}
> > -	}
> > -	*statusp = RCU_KTHREAD_YIELDING;
> > -	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> > -	schedule_timeout_interruptible(2);
> > -	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> > -	*statusp = RCU_KTHREAD_WAITING;
> > -}
> > -
> >  /*
> >   * Set the per-rcu_node kthread's affinity to cover all CPUs that are
> >   * served by the rcu_node in question.  The CPU hotplug lock is still
> > @@ -1246,27 +1162,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
> >  	free_cpumask_var(cm);
> >  }
> >  
> > -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> > -	.store			= &rcu_data.rcu_cpu_kthread_task,
> > -	.thread_should_run	= rcu_cpu_kthread_should_run,
> > -	.thread_fn		= rcu_cpu_kthread,
> > -	.thread_comm		= "rcuc/%u",
> > -	.setup			= rcu_cpu_kthread_setup,
> > -	.park			= rcu_cpu_kthread_park,
> > -};
> > -
> >  /*
> >   * Spawn boost kthreads -- called as soon as the scheduler is running.
> >   */
> >  static void __init rcu_spawn_boost_kthreads(void)
> >  {
> >  	struct rcu_node *rnp;
> > -	int cpu;
> >  
> > -	for_each_possible_cpu(cpu)
> > -		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> > -	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
> > -		return;
> >  	rcu_for_each_leaf_node(rnp)
> >  		(void)rcu_spawn_one_boost_kthread(rnp);
> >  }
> > @@ -1289,11 +1191,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
> >  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> >  }
> >  
> > -static void invoke_rcu_callbacks_kthread(void)
> > -{
> > -	WARN_ON_ONCE(1);
> > -}
> > -
> >  static bool rcu_is_callbacks_kthread(void)
> >  {
> >  	return false;
> > -- 
> > 2.20.1
> > 
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-23  0:25                             ` Paul E. McKenney
@ 2019-03-23  1:04                               ` Joel Fernandes
  2019-03-23 16:10                               ` Paul E. McKenney
  1 sibling, 0 replies; 44+ messages in thread
From: Joel Fernandes @ 2019-03-23  1:04 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Fri, Mar 22, 2019 at 05:25:19PM -0700, Paul E. McKenney wrote:
> On Fri, Mar 22, 2019 at 07:48:19PM -0400, Joel Fernandes wrote:
> > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > Running RCU out of softirq is a problem for some workloads that would
> > > like to manage RCU core processing independently of other softirq
> > > work, for example, setting kthread priority.  This commit therefore
> > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > with the earlier attempt to move RCU core processing to from softirq
> > > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > > rcuc kthreads at the RCU-boosting priority.
> > [snip]
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index 0f31b79eb6761..05a1e42fdaf10 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -51,6 +51,12 @@
> > >  #include <linux/tick.h>
> > >  #include <linux/sysrq.h>
> > >  #include <linux/kprobes.h>
> > > +#include <linux/gfp.h>
> > > +#include <linux/oom.h>
> > > +#include <linux/smpboot.h>
> > > +#include <linux/jiffies.h>
> > > +#include <linux/sched/isolation.h>
> > > +#include "../time/tick-internal.h"
> > >  
> > >  #include "tree.h"
> > >  #include "rcu.h"
> > > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> > >  /* Dump rcu_node combining tree at boot to verify correct setup. */
> > >  static bool dump_tree;
> > >  module_param(dump_tree, bool, 0444);
> > > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> > > +static bool use_softirq = 1;
> > > +module_param(use_softirq, bool, 0444);
> > >  /* Control rcu_node-tree auto-balancing at boot time. */
> > >  static bool rcu_fanout_exact;
> > >  module_param(rcu_fanout_exact, bool, 0444);
> > > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> > >  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> > >  
> > >  /* Perform RCU core processing work for the current CPU.  */
> > > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > +static __latent_entropy void rcu_core(void)
> > >  {
> > >  	unsigned long flags;
> > >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> > >  	trace_rcu_utilization(TPS("End RCU core"));
> > >  }
> > >  
> > > +static void rcu_core_si(struct softirq_action *h)
> > > +{
> > > +	rcu_core();
> > > +}
> > > +
> > > +static void rcu_wake_cond(struct task_struct *t, int status)
> > > +{
> > > +	/*
> > > +	 * If the thread is yielding, only wake it when this
> > > +	 * is invoked from idle
> > > +	 */
> > > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > > +		wake_up_process(t);
> > > +}
> > > +
> > > +static void invoke_rcu_core_kthread(void)
> > > +{
> > > +	struct task_struct *t;
> > > +	unsigned long flags;
> > > +
> > > +	local_irq_save(flags);
> > > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > > +	if (t != NULL && t != current)
> > > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > > +	local_irq_restore(flags);
> > > +}
> > > +
> > >  /*
> > >   * Schedule RCU callback invocation.  If the running implementation of RCU
> > >   * does not support RCU priority boosting, just do a direct call, otherwise
> > > @@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > >  {
> > >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > >  		return;
> > > -	if (likely(!rcu_state.boost)) {
> > > -		rcu_do_batch(rdp);
> > > -		return;
> > > -	}
> > > -	invoke_rcu_callbacks_kthread();
> > > +	if (rcu_state.boost || !use_softirq)
> > > +		invoke_rcu_core_kthread();
> > > +	rcu_do_batch(rdp);
> > 
> > Shouldn't there be an else before the rcu_do_batch? If we are waking up the
> > rcuc thread, then that will do the rcu_do_batch when it runs right?
> > 
> > Something like:
> > 	if (rcu_state.boost || !use_softirq)
> > 		invoke_rcu_core_kthread();
> > 	else
> > 		rcu_do_batch(rdp);
> > 
> > Previous code similarly had a return; also.
> 
> I believe that you are correct, so I will give it a shot.  Good eyes!

Thanks! Also I am sending some the lockdep dyntick checking patches shortly :)

> > >  }
> > >  
> > > +/*
> > > + * Wake up this CPU's rcuc kthread to do RCU core processing.
> > > + */
> > >  static void invoke_rcu_core(void)
> > >  {
> > > -	if (cpu_online(smp_processor_id()))
> > > +	if (!cpu_online(smp_processor_id()))
> > > +		return;
> > > +	if (use_softirq)
> > >  		raise_softirq(RCU_SOFTIRQ);
> > > +	else
> > > +		invoke_rcu_core_kthread();
> > >  }
> > >  
> > > +static void rcu_cpu_kthread_park(unsigned int cpu)
> > > +{
> > > +	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> > > +}
> > > +
> > > +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> > > +{
> > > +	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> > > +}
> > > +
> > > +/*
> > > + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> > > + * the RCU softirq used in configurations of RCU that do not support RCU
> > > + * priority boosting.
> > > + */
> > > +static void rcu_cpu_kthread(unsigned int cpu)
> > > +{
> > > +	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> > > +	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> > > +	int spincnt;
> > > +
> > > +	for (spincnt = 0; spincnt < 10; spincnt++) {
> > > +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> > > +		local_bh_disable();
> > > +		*statusp = RCU_KTHREAD_RUNNING;
> > > +		local_irq_disable();
> > > +		work = *workp;
> > > +		*workp = 0;
> > > +		local_irq_enable();
> > > +		if (work)
> > > +			rcu_core();
> > > +		local_bh_enable();
> > > +		if (*workp == 0) {
> > > +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> > > +			*statusp = RCU_KTHREAD_WAITING;
> > > +			return;
> > > +		}
> > > +	}
> > > +	*statusp = RCU_KTHREAD_YIELDING;
> > > +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> > > +	schedule_timeout_interruptible(2);
> > > +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> > > +	*statusp = RCU_KTHREAD_WAITING;
> > > +}
> > > +
> > [snip]
> > > diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> > > index e253d11af3c49..a1a72a1ecb026 100644
> > > --- a/kernel/rcu/tree.h
> > > +++ b/kernel/rcu/tree.h
> > > @@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
> > >  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
> > >  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
> > >  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> > > -static void invoke_rcu_callbacks_kthread(void);
> > >  static bool rcu_is_callbacks_kthread(void);
> > > +static void rcu_cpu_kthread_setup(unsigned int cpu);
> > >  static void __init rcu_spawn_boost_kthreads(void);
> > >  static void rcu_prepare_kthreads(int cpu);
> > >  static void rcu_cleanup_after_idle(void);
> > > diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> > > index f46b4af96ab95..b807204ffd83f 100644
> > > --- a/kernel/rcu/tree_plugin.h
> > > +++ b/kernel/rcu/tree_plugin.h
> > > @@ -11,29 +11,7 @@
> > >   *	   Paul E. McKenney <paulmck@linux.ibm.com>
> > >   */
> > >  
> > > -#include <linux/delay.h>
> > > -#include <linux/gfp.h>
> > > -#include <linux/oom.h>
> > > -#include <linux/sched/debug.h>
> > > -#include <linux/smpboot.h>
> > > -#include <linux/sched/isolation.h>
> > > -#include <uapi/linux/sched/types.h>
> > > -#include "../time/tick-internal.h"
> > > -
> > > -#ifdef CONFIG_RCU_BOOST
> > >  #include "../locking/rtmutex_common.h"
> > > -#else /* #ifdef CONFIG_RCU_BOOST */
> > > -
> > > -/*
> > > - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> > > - * all uses are in dead code.  Provide a definition to keep the compiler
> > > - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> > > - * This probably needs to be excluded from -rt builds.
> > > - */
> > > -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> > > -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> > > -
> > > -#endif /* #else #ifdef CONFIG_RCU_BOOST */
> > >  
> > >  #ifdef CONFIG_RCU_NOCB_CPU
> > >  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> > > @@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
> > >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
> > >  	if (gp_cleanup_delay)
> > >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
> > > +	if (!use_softirq)
> > > +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
> > >  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
> > >  		pr_info("\tRCU debug extended QS entry/exit.\n");
> > >  	rcupdate_announce_bootup_oddness();
> > > @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
> > >  		/* Need to defer quiescent state until everything is enabled. */
> > >  		if (irqs_were_disabled) {
> > >  			/* Enabling irqs does not reschedule, so... */
> > > -			raise_softirq_irqoff(RCU_SOFTIRQ);
> > > +			if (!use_softirq)
> > > +				raise_softirq_irqoff(RCU_SOFTIRQ);
> > 
> > I believe this exclamation has been corrected in Paul's tree so that's Ok.
> > 
> > > +			else
> > > +				invoke_rcu_core();
> > 
> > But why not just directly call invoke_rcu_core() here? That will do the
> > appropriate use_softirq check right?
> 
> It is -so- close!  But it invokes raise_softirq() instead of the needed
> raise_softirq_irqoff().
> 
> Plus I bet that this has a few more changes to go before it is all the
> way there.  ;-)

Ah yes, you are right :-)

thanks,

 - Joel
 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-23  0:25                             ` Paul E. McKenney
  2019-03-23  1:04                               ` Joel Fernandes
@ 2019-03-23 16:10                               ` Paul E. McKenney
  2019-03-24 23:42                                 ` Paul E. McKenney
  1 sibling, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-23 16:10 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Fri, Mar 22, 2019 at 05:25:19PM -0700, Paul E. McKenney wrote:
> On Fri, Mar 22, 2019 at 07:48:19PM -0400, Joel Fernandes wrote:
> > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > Running RCU out of softirq is a problem for some workloads that would
> > > like to manage RCU core processing independently of other softirq
> > > work, for example, setting kthread priority.  This commit therefore
> > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > with the earlier attempt to move RCU core processing to from softirq
> > > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > > rcuc kthreads at the RCU-boosting priority.
> > [snip]
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index 0f31b79eb6761..05a1e42fdaf10 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -51,6 +51,12 @@
> > >  #include <linux/tick.h>
> > >  #include <linux/sysrq.h>
> > >  #include <linux/kprobes.h>
> > > +#include <linux/gfp.h>
> > > +#include <linux/oom.h>
> > > +#include <linux/smpboot.h>
> > > +#include <linux/jiffies.h>
> > > +#include <linux/sched/isolation.h>
> > > +#include "../time/tick-internal.h"
> > >  
> > >  #include "tree.h"
> > >  #include "rcu.h"
> > > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> > >  /* Dump rcu_node combining tree at boot to verify correct setup. */
> > >  static bool dump_tree;
> > >  module_param(dump_tree, bool, 0444);
> > > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> > > +static bool use_softirq = 1;
> > > +module_param(use_softirq, bool, 0444);
> > >  /* Control rcu_node-tree auto-balancing at boot time. */
> > >  static bool rcu_fanout_exact;
> > >  module_param(rcu_fanout_exact, bool, 0444);
> > > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> > >  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> > >  
> > >  /* Perform RCU core processing work for the current CPU.  */
> > > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > +static __latent_entropy void rcu_core(void)
> > >  {
> > >  	unsigned long flags;
> > >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> > >  	trace_rcu_utilization(TPS("End RCU core"));
> > >  }
> > >  
> > > +static void rcu_core_si(struct softirq_action *h)
> > > +{
> > > +	rcu_core();
> > > +}
> > > +
> > > +static void rcu_wake_cond(struct task_struct *t, int status)
> > > +{
> > > +	/*
> > > +	 * If the thread is yielding, only wake it when this
> > > +	 * is invoked from idle
> > > +	 */
> > > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > > +		wake_up_process(t);
> > > +}
> > > +
> > > +static void invoke_rcu_core_kthread(void)
> > > +{
> > > +	struct task_struct *t;
> > > +	unsigned long flags;
> > > +
> > > +	local_irq_save(flags);
> > > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > > +	if (t != NULL && t != current)
> > > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > > +	local_irq_restore(flags);
> > > +}
> > > +
> > >  /*
> > >   * Schedule RCU callback invocation.  If the running implementation of RCU
> > >   * does not support RCU priority boosting, just do a direct call, otherwise
> > > @@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > >  {
> > >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > >  		return;
> > > -	if (likely(!rcu_state.boost)) {
> > > -		rcu_do_batch(rdp);
> > > -		return;
> > > -	}
> > > -	invoke_rcu_callbacks_kthread();
> > > +	if (rcu_state.boost || !use_softirq)
> > > +		invoke_rcu_core_kthread();
> > > +	rcu_do_batch(rdp);
> > 
> > Shouldn't there be an else before the rcu_do_batch? If we are waking up the
> > rcuc thread, then that will do the rcu_do_batch when it runs right?
> > 
> > Something like:
> > 	if (rcu_state.boost || !use_softirq)
> > 		invoke_rcu_core_kthread();
> > 	else
> > 		rcu_do_batch(rdp);
> > 
> > Previous code similarly had a return; also.
> 
> I believe that you are correct, so I will give it a shot.  Good eyes!

Yet rcutorture disagrees.  Actually, if we are using rcuc kthreads, this
is only ever invoked from within tha thread, so the only check we need is
for the scheduler being operational.  I am therefore trying this one out.

Thoughts?

							Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 76d6c0902f66..8d6ebc0944ec 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2333,18 +2333,16 @@ static void invoke_rcu_core_kthread(void)
 }
 
 /*
- * Schedule RCU callback invocation.  If the running implementation of RCU
- * does not support RCU priority boosting, just do a direct call, otherwise
- * wake up the per-CPU kernel kthread.  Note that because we are running
- * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
- * cannot disappear out from under us.
+ * Do RCU callback invocation.  Not that if we are running !use_softirq,
+ * we are already in the rcuc kthread.  If callbacks are offloaded, then
+ * ->cblist is always empty, so we don't get here.  Therefore, we only
+ * ever need to check for the scheduler being operational (some callbacks
+ * do wakeups, so we do need the scheduler).
  */
 static void invoke_rcu_callbacks(struct rcu_data *rdp)
 {
 	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
 		return;
-	if (rcu_state.boost || !use_softirq)
-		invoke_rcu_core_kthread();
 	rcu_do_batch(rdp);
 }
 


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-23 16:10                               ` Paul E. McKenney
@ 2019-03-24 23:42                                 ` Paul E. McKenney
  2019-03-25 13:41                                   ` Joel Fernandes
  0 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-24 23:42 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Sat, Mar 23, 2019 at 09:10:02AM -0700, Paul E. McKenney wrote:
> On Fri, Mar 22, 2019 at 05:25:19PM -0700, Paul E. McKenney wrote:
> > On Fri, Mar 22, 2019 at 07:48:19PM -0400, Joel Fernandes wrote:
> > > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > > Running RCU out of softirq is a problem for some workloads that would
> > > > like to manage RCU core processing independently of other softirq
> > > > work, for example, setting kthread priority.  This commit therefore
> > > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > > with the earlier attempt to move RCU core processing to from softirq
> > > > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > > > rcuc kthreads at the RCU-boosting priority.
> > > [snip]
> > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > > index 0f31b79eb6761..05a1e42fdaf10 100644
> > > > --- a/kernel/rcu/tree.c
> > > > +++ b/kernel/rcu/tree.c
> > > > @@ -51,6 +51,12 @@
> > > >  #include <linux/tick.h>
> > > >  #include <linux/sysrq.h>
> > > >  #include <linux/kprobes.h>
> > > > +#include <linux/gfp.h>
> > > > +#include <linux/oom.h>
> > > > +#include <linux/smpboot.h>
> > > > +#include <linux/jiffies.h>
> > > > +#include <linux/sched/isolation.h>
> > > > +#include "../time/tick-internal.h"
> > > >  
> > > >  #include "tree.h"
> > > >  #include "rcu.h"
> > > > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> > > >  /* Dump rcu_node combining tree at boot to verify correct setup. */
> > > >  static bool dump_tree;
> > > >  module_param(dump_tree, bool, 0444);
> > > > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> > > > +static bool use_softirq = 1;
> > > > +module_param(use_softirq, bool, 0444);
> > > >  /* Control rcu_node-tree auto-balancing at boot time. */
> > > >  static bool rcu_fanout_exact;
> > > >  module_param(rcu_fanout_exact, bool, 0444);
> > > > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> > > >  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> > > >  
> > > >  /* Perform RCU core processing work for the current CPU.  */
> > > > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > > +static __latent_entropy void rcu_core(void)
> > > >  {
> > > >  	unsigned long flags;
> > > >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > > > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > >  	trace_rcu_utilization(TPS("End RCU core"));
> > > >  }
> > > >  
> > > > +static void rcu_core_si(struct softirq_action *h)
> > > > +{
> > > > +	rcu_core();
> > > > +}
> > > > +
> > > > +static void rcu_wake_cond(struct task_struct *t, int status)
> > > > +{
> > > > +	/*
> > > > +	 * If the thread is yielding, only wake it when this
> > > > +	 * is invoked from idle
> > > > +	 */
> > > > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > > > +		wake_up_process(t);
> > > > +}
> > > > +
> > > > +static void invoke_rcu_core_kthread(void)
> > > > +{
> > > > +	struct task_struct *t;
> > > > +	unsigned long flags;
> > > > +
> > > > +	local_irq_save(flags);
> > > > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > > > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > > > +	if (t != NULL && t != current)
> > > > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > > > +	local_irq_restore(flags);
> > > > +}
> > > > +
> > > >  /*
> > > >   * Schedule RCU callback invocation.  If the running implementation of RCU
> > > >   * does not support RCU priority boosting, just do a direct call, otherwise
> > > > @@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > > >  {
> > > >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > > >  		return;
> > > > -	if (likely(!rcu_state.boost)) {
> > > > -		rcu_do_batch(rdp);
> > > > -		return;
> > > > -	}
> > > > -	invoke_rcu_callbacks_kthread();
> > > > +	if (rcu_state.boost || !use_softirq)
> > > > +		invoke_rcu_core_kthread();
> > > > +	rcu_do_batch(rdp);
> > > 
> > > Shouldn't there be an else before the rcu_do_batch? If we are waking up the
> > > rcuc thread, then that will do the rcu_do_batch when it runs right?
> > > 
> > > Something like:
> > > 	if (rcu_state.boost || !use_softirq)
> > > 		invoke_rcu_core_kthread();
> > > 	else
> > > 		rcu_do_batch(rdp);
> > > 
> > > Previous code similarly had a return; also.
> > 
> > I believe that you are correct, so I will give it a shot.  Good eyes!
> 
> Yet rcutorture disagrees.  Actually, if we are using rcuc kthreads, this
> is only ever invoked from within tha thread, so the only check we need is
> for the scheduler being operational.  I am therefore trying this one out.
> 
> Thoughts?

And rcutorture likes this one, though at this point this function should
be pulled into its sole callsite.  ;-)

							Thanx, Paul

> ------------------------------------------------------------------------
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 76d6c0902f66..8d6ebc0944ec 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -2333,18 +2333,16 @@ static void invoke_rcu_core_kthread(void)
>  }
>  
>  /*
> - * Schedule RCU callback invocation.  If the running implementation of RCU
> - * does not support RCU priority boosting, just do a direct call, otherwise
> - * wake up the per-CPU kernel kthread.  Note that because we are running
> - * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
> - * cannot disappear out from under us.
> + * Do RCU callback invocation.  Not that if we are running !use_softirq,
> + * we are already in the rcuc kthread.  If callbacks are offloaded, then
> + * ->cblist is always empty, so we don't get here.  Therefore, we only
> + * ever need to check for the scheduler being operational (some callbacks
> + * do wakeups, so we do need the scheduler).
>   */
>  static void invoke_rcu_callbacks(struct rcu_data *rdp)
>  {
>  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
>  		return;
> -	if (rcu_state.boost || !use_softirq)
> -		invoke_rcu_core_kthread();
>  	rcu_do_batch(rdp);
>  }
>  


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-24 23:42                                 ` Paul E. McKenney
@ 2019-03-25 13:41                                   ` Joel Fernandes
  2019-03-25 15:08                                     ` Paul E. McKenney
  0 siblings, 1 reply; 44+ messages in thread
From: Joel Fernandes @ 2019-03-25 13:41 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Sun, Mar 24, 2019 at 04:42:11PM -0700, Paul E. McKenney wrote:
> On Sat, Mar 23, 2019 at 09:10:02AM -0700, Paul E. McKenney wrote:
> > On Fri, Mar 22, 2019 at 05:25:19PM -0700, Paul E. McKenney wrote:
> > > On Fri, Mar 22, 2019 at 07:48:19PM -0400, Joel Fernandes wrote:
> > > > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > > > Running RCU out of softirq is a problem for some workloads that would
> > > > > like to manage RCU core processing independently of other softirq
> > > > > work, for example, setting kthread priority.  This commit therefore
> > > > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > > > with the earlier attempt to move RCU core processing to from softirq
> > > > > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > > > > rcuc kthreads at the RCU-boosting priority.
> > > > [snip]
> > > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > > > index 0f31b79eb6761..05a1e42fdaf10 100644
> > > > > --- a/kernel/rcu/tree.c
> > > > > +++ b/kernel/rcu/tree.c
> > > > > @@ -51,6 +51,12 @@
> > > > >  #include <linux/tick.h>
> > > > >  #include <linux/sysrq.h>
> > > > >  #include <linux/kprobes.h>
> > > > > +#include <linux/gfp.h>
> > > > > +#include <linux/oom.h>
> > > > > +#include <linux/smpboot.h>
> > > > > +#include <linux/jiffies.h>
> > > > > +#include <linux/sched/isolation.h>
> > > > > +#include "../time/tick-internal.h"
> > > > >  
> > > > >  #include "tree.h"
> > > > >  #include "rcu.h"
> > > > > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> > > > >  /* Dump rcu_node combining tree at boot to verify correct setup. */
> > > > >  static bool dump_tree;
> > > > >  module_param(dump_tree, bool, 0444);
> > > > > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> > > > > +static bool use_softirq = 1;
> > > > > +module_param(use_softirq, bool, 0444);
> > > > >  /* Control rcu_node-tree auto-balancing at boot time. */
> > > > >  static bool rcu_fanout_exact;
> > > > >  module_param(rcu_fanout_exact, bool, 0444);
> > > > > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> > > > >  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> > > > >  
> > > > >  /* Perform RCU core processing work for the current CPU.  */
> > > > > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > > > +static __latent_entropy void rcu_core(void)
> > > > >  {
> > > > >  	unsigned long flags;
> > > > >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > > > > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > > >  	trace_rcu_utilization(TPS("End RCU core"));
> > > > >  }
> > > > >  
> > > > > +static void rcu_core_si(struct softirq_action *h)
> > > > > +{
> > > > > +	rcu_core();
> > > > > +}
> > > > > +
> > > > > +static void rcu_wake_cond(struct task_struct *t, int status)
> > > > > +{
> > > > > +	/*
> > > > > +	 * If the thread is yielding, only wake it when this
> > > > > +	 * is invoked from idle
> > > > > +	 */
> > > > > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > > > > +		wake_up_process(t);
> > > > > +}
> > > > > +
> > > > > +static void invoke_rcu_core_kthread(void)
> > > > > +{
> > > > > +	struct task_struct *t;
> > > > > +	unsigned long flags;
> > > > > +
> > > > > +	local_irq_save(flags);
> > > > > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > > > > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > > > > +	if (t != NULL && t != current)
> > > > > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > > > > +	local_irq_restore(flags);
> > > > > +}
> > > > > +
> > > > >  /*
> > > > >   * Schedule RCU callback invocation.  If the running implementation of RCU
> > > > >   * does not support RCU priority boosting, just do a direct call, otherwise
> > > > > @@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > > > >  {
> > > > >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > > > >  		return;
> > > > > -	if (likely(!rcu_state.boost)) {
> > > > > -		rcu_do_batch(rdp);
> > > > > -		return;
> > > > > -	}
> > > > > -	invoke_rcu_callbacks_kthread();
> > > > > +	if (rcu_state.boost || !use_softirq)
> > > > > +		invoke_rcu_core_kthread();
> > > > > +	rcu_do_batch(rdp);
> > > > 
> > > > Shouldn't there be an else before the rcu_do_batch? If we are waking up the
> > > > rcuc thread, then that will do the rcu_do_batch when it runs right?
> > > > 
> > > > Something like:
> > > > 	if (rcu_state.boost || !use_softirq)
> > > > 		invoke_rcu_core_kthread();
> > > > 	else
> > > > 		rcu_do_batch(rdp);
> > > > 
> > > > Previous code similarly had a return; also.
> > > 
> > > I believe that you are correct, so I will give it a shot.  Good eyes!
> > 
> > Yet rcutorture disagrees.  Actually, if we are using rcuc kthreads, this
> > is only ever invoked from within tha thread, so the only check we need is
> > for the scheduler being operational.  I am therefore trying this one out.
> > 
> > Thoughts?
> 
> And rcutorture likes this one, though at this point this function should
> be pulled into its sole callsite.  ;-)

Great, I'm glad the testing is going well.

By the way I enlightened that jitter.sh script about CPU offline issues as
well (sent patch last week).  Let me know if you agree with it.

thanks!

 - Joel


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-25 13:41                                   ` Joel Fernandes
@ 2019-03-25 15:08                                     ` Paul E. McKenney
  2019-03-25 15:52                                       ` Paul E. McKenney
  0 siblings, 1 reply; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-25 15:08 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Mon, Mar 25, 2019 at 09:41:29AM -0400, Joel Fernandes wrote:
> On Sun, Mar 24, 2019 at 04:42:11PM -0700, Paul E. McKenney wrote:
> > On Sat, Mar 23, 2019 at 09:10:02AM -0700, Paul E. McKenney wrote:
> > > On Fri, Mar 22, 2019 at 05:25:19PM -0700, Paul E. McKenney wrote:
> > > > On Fri, Mar 22, 2019 at 07:48:19PM -0400, Joel Fernandes wrote:
> > > > > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > > > > Running RCU out of softirq is a problem for some workloads that would
> > > > > > like to manage RCU core processing independently of other softirq
> > > > > > work, for example, setting kthread priority.  This commit therefore
> > > > > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > > > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > > > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > > > > with the earlier attempt to move RCU core processing to from softirq
> > > > > > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > > > > > rcuc kthreads at the RCU-boosting priority.
> > > > > [snip]
> > > > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > > > > index 0f31b79eb6761..05a1e42fdaf10 100644
> > > > > > --- a/kernel/rcu/tree.c
> > > > > > +++ b/kernel/rcu/tree.c
> > > > > > @@ -51,6 +51,12 @@
> > > > > >  #include <linux/tick.h>
> > > > > >  #include <linux/sysrq.h>
> > > > > >  #include <linux/kprobes.h>
> > > > > > +#include <linux/gfp.h>
> > > > > > +#include <linux/oom.h>
> > > > > > +#include <linux/smpboot.h>
> > > > > > +#include <linux/jiffies.h>
> > > > > > +#include <linux/sched/isolation.h>
> > > > > > +#include "../time/tick-internal.h"
> > > > > >  
> > > > > >  #include "tree.h"
> > > > > >  #include "rcu.h"
> > > > > > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> > > > > >  /* Dump rcu_node combining tree at boot to verify correct setup. */
> > > > > >  static bool dump_tree;
> > > > > >  module_param(dump_tree, bool, 0444);
> > > > > > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> > > > > > +static bool use_softirq = 1;
> > > > > > +module_param(use_softirq, bool, 0444);
> > > > > >  /* Control rcu_node-tree auto-balancing at boot time. */
> > > > > >  static bool rcu_fanout_exact;
> > > > > >  module_param(rcu_fanout_exact, bool, 0444);
> > > > > > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> > > > > >  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> > > > > >  
> > > > > >  /* Perform RCU core processing work for the current CPU.  */
> > > > > > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > > > > +static __latent_entropy void rcu_core(void)
> > > > > >  {
> > > > > >  	unsigned long flags;
> > > > > >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > > > > > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > > > >  	trace_rcu_utilization(TPS("End RCU core"));
> > > > > >  }
> > > > > >  
> > > > > > +static void rcu_core_si(struct softirq_action *h)
> > > > > > +{
> > > > > > +	rcu_core();
> > > > > > +}
> > > > > > +
> > > > > > +static void rcu_wake_cond(struct task_struct *t, int status)
> > > > > > +{
> > > > > > +	/*
> > > > > > +	 * If the thread is yielding, only wake it when this
> > > > > > +	 * is invoked from idle
> > > > > > +	 */
> > > > > > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > > > > > +		wake_up_process(t);
> > > > > > +}
> > > > > > +
> > > > > > +static void invoke_rcu_core_kthread(void)
> > > > > > +{
> > > > > > +	struct task_struct *t;
> > > > > > +	unsigned long flags;
> > > > > > +
> > > > > > +	local_irq_save(flags);
> > > > > > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > > > > > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > > > > > +	if (t != NULL && t != current)
> > > > > > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > > > > > +	local_irq_restore(flags);
> > > > > > +}
> > > > > > +
> > > > > >  /*
> > > > > >   * Schedule RCU callback invocation.  If the running implementation of RCU
> > > > > >   * does not support RCU priority boosting, just do a direct call, otherwise
> > > > > > @@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > > > > >  {
> > > > > >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > > > > >  		return;
> > > > > > -	if (likely(!rcu_state.boost)) {
> > > > > > -		rcu_do_batch(rdp);
> > > > > > -		return;
> > > > > > -	}
> > > > > > -	invoke_rcu_callbacks_kthread();
> > > > > > +	if (rcu_state.boost || !use_softirq)
> > > > > > +		invoke_rcu_core_kthread();
> > > > > > +	rcu_do_batch(rdp);
> > > > > 
> > > > > Shouldn't there be an else before the rcu_do_batch? If we are waking up the
> > > > > rcuc thread, then that will do the rcu_do_batch when it runs right?
> > > > > 
> > > > > Something like:
> > > > > 	if (rcu_state.boost || !use_softirq)
> > > > > 		invoke_rcu_core_kthread();
> > > > > 	else
> > > > > 		rcu_do_batch(rdp);
> > > > > 
> > > > > Previous code similarly had a return; also.
> > > > 
> > > > I believe that you are correct, so I will give it a shot.  Good eyes!
> > > 
> > > Yet rcutorture disagrees.  Actually, if we are using rcuc kthreads, this
> > > is only ever invoked from within tha thread, so the only check we need is
> > > for the scheduler being operational.  I am therefore trying this one out.
> > > 
> > > Thoughts?
> > 
> > And rcutorture likes this one, though at this point this function should
> > be pulled into its sole callsite.  ;-)
> 
> Great, I'm glad the testing is going well.

Which reminds me...  I have been assuming that Frederic Weisbecker's
split-softirq patches were stalled for the time being.

http://lkml.kernel.org/r/20190228171242.32144-1-frederic@kernel.org

If those were to show up soonish, perhaps that would allow per-softirq
control of priority.

My thought is not to wait, but I figured I should mention it.

> By the way I enlightened that jitter.sh script about CPU offline issues as
> well (sent patch last week).  Let me know if you agree with it.

I just sent a reply.  Still trying to remember why I excluded CPU 0.  ;-)

Perhaps because of issues with single-CPU rcutorture runs?

							Thanx, Paul


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
  2019-03-25 15:08                                     ` Paul E. McKenney
@ 2019-03-25 15:52                                       ` Paul E. McKenney
  0 siblings, 0 replies; 44+ messages in thread
From: Paul E. McKenney @ 2019-03-25 15:52 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, Lai Jiangshan, tglx,
	Mike Galbraith

On Mon, Mar 25, 2019 at 08:08:00AM -0700, Paul E. McKenney wrote:
> On Mon, Mar 25, 2019 at 09:41:29AM -0400, Joel Fernandes wrote:
> > On Sun, Mar 24, 2019 at 04:42:11PM -0700, Paul E. McKenney wrote:
> > > On Sat, Mar 23, 2019 at 09:10:02AM -0700, Paul E. McKenney wrote:
> > > > On Fri, Mar 22, 2019 at 05:25:19PM -0700, Paul E. McKenney wrote:
> > > > > On Fri, Mar 22, 2019 at 07:48:19PM -0400, Joel Fernandes wrote:
> > > > > > On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > > > > > > Running RCU out of softirq is a problem for some workloads that would
> > > > > > > like to manage RCU core processing independently of other softirq
> > > > > > > work, for example, setting kthread priority.  This commit therefore
> > > > > > > introduces the `rcunosoftirq' option which moves the RCU core work
> > > > > > > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > > > > > > The SCHED_OTHER approach avoids the scalability problems that appeared
> > > > > > > with the earlier attempt to move RCU core processing to from softirq
> > > > > > > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > > > > > > rcuc kthreads at the RCU-boosting priority.
> > > > > > [snip]
> > > > > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > > > > > index 0f31b79eb6761..05a1e42fdaf10 100644
> > > > > > > --- a/kernel/rcu/tree.c
> > > > > > > +++ b/kernel/rcu/tree.c
> > > > > > > @@ -51,6 +51,12 @@
> > > > > > >  #include <linux/tick.h>
> > > > > > >  #include <linux/sysrq.h>
> > > > > > >  #include <linux/kprobes.h>
> > > > > > > +#include <linux/gfp.h>
> > > > > > > +#include <linux/oom.h>
> > > > > > > +#include <linux/smpboot.h>
> > > > > > > +#include <linux/jiffies.h>
> > > > > > > +#include <linux/sched/isolation.h>
> > > > > > > +#include "../time/tick-internal.h"
> > > > > > >  
> > > > > > >  #include "tree.h"
> > > > > > >  #include "rcu.h"
> > > > > > > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> > > > > > >  /* Dump rcu_node combining tree at boot to verify correct setup. */
> > > > > > >  static bool dump_tree;
> > > > > > >  module_param(dump_tree, bool, 0444);
> > > > > > > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> > > > > > > +static bool use_softirq = 1;
> > > > > > > +module_param(use_softirq, bool, 0444);
> > > > > > >  /* Control rcu_node-tree auto-balancing at boot time. */
> > > > > > >  static bool rcu_fanout_exact;
> > > > > > >  module_param(rcu_fanout_exact, bool, 0444);
> > > > > > > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> > > > > > >  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> > > > > > >  
> > > > > > >  /* Perform RCU core processing work for the current CPU.  */
> > > > > > > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > > > > > +static __latent_entropy void rcu_core(void)
> > > > > > >  {
> > > > > > >  	unsigned long flags;
> > > > > > >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > > > > > > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> > > > > > >  	trace_rcu_utilization(TPS("End RCU core"));
> > > > > > >  }
> > > > > > >  
> > > > > > > +static void rcu_core_si(struct softirq_action *h)
> > > > > > > +{
> > > > > > > +	rcu_core();
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void rcu_wake_cond(struct task_struct *t, int status)
> > > > > > > +{
> > > > > > > +	/*
> > > > > > > +	 * If the thread is yielding, only wake it when this
> > > > > > > +	 * is invoked from idle
> > > > > > > +	 */
> > > > > > > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > > > > > > +		wake_up_process(t);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void invoke_rcu_core_kthread(void)
> > > > > > > +{
> > > > > > > +	struct task_struct *t;
> > > > > > > +	unsigned long flags;
> > > > > > > +
> > > > > > > +	local_irq_save(flags);
> > > > > > > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > > > > > > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > > > > > > +	if (t != NULL && t != current)
> > > > > > > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > > > > > > +	local_irq_restore(flags);
> > > > > > > +}
> > > > > > > +
> > > > > > >  /*
> > > > > > >   * Schedule RCU callback invocation.  If the running implementation of RCU
> > > > > > >   * does not support RCU priority boosting, just do a direct call, otherwise
> > > > > > > @@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> > > > > > >  {
> > > > > > >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> > > > > > >  		return;
> > > > > > > -	if (likely(!rcu_state.boost)) {
> > > > > > > -		rcu_do_batch(rdp);
> > > > > > > -		return;
> > > > > > > -	}
> > > > > > > -	invoke_rcu_callbacks_kthread();
> > > > > > > +	if (rcu_state.boost || !use_softirq)
> > > > > > > +		invoke_rcu_core_kthread();
> > > > > > > +	rcu_do_batch(rdp);
> > > > > > 
> > > > > > Shouldn't there be an else before the rcu_do_batch? If we are waking up the
> > > > > > rcuc thread, then that will do the rcu_do_batch when it runs right?
> > > > > > 
> > > > > > Something like:
> > > > > > 	if (rcu_state.boost || !use_softirq)
> > > > > > 		invoke_rcu_core_kthread();
> > > > > > 	else
> > > > > > 		rcu_do_batch(rdp);
> > > > > > 
> > > > > > Previous code similarly had a return; also.
> > > > > 
> > > > > I believe that you are correct, so I will give it a shot.  Good eyes!
> > > > 
> > > > Yet rcutorture disagrees.  Actually, if we are using rcuc kthreads, this
> > > > is only ever invoked from within tha thread, so the only check we need is
> > > > for the scheduler being operational.  I am therefore trying this one out.
> > > > 
> > > > Thoughts?
> > > 
> > > And rcutorture likes this one, though at this point this function should
> > > be pulled into its sole callsite.  ;-)
> > 
> > Great, I'm glad the testing is going well.
> 
> Which reminds me...  I have been assuming that Frederic Weisbecker's
> split-softirq patches were stalled for the time being.
> 
> http://lkml.kernel.org/r/20190228171242.32144-1-frederic@kernel.org
> 
> If those were to show up soonish, perhaps that would allow per-softirq
> control of priority.
> 
> My thought is not to wait, but I figured I should mention it.
> 
> > By the way I enlightened that jitter.sh script about CPU offline issues as
> > well (sent patch last week).  Let me know if you agree with it.
> 
> I just sent a reply.  Still trying to remember why I excluded CPU 0.  ;-)
> 
> Perhaps because of issues with single-CPU rcutorture runs?

I also considered and rejected the following patch because it actually
can make sense to build with CONFIG_RCU_BOOST but still use softirq, for
example, when SCHED_IDLE tasks might get stuck in RCU read-side critical
sections.  But then I noticed that rcu_spawn_core_kthreads() unconditionally
creates the rcuc kthreads if CONFIG_RCU_BOOST.

So I either need to apply the patch below, or I need to remove
the "!IS_ENABLED(CONFIG_RCU_BOOST)" from the "if" statement in
rcu_spawn_core_kthreads().  The question is "do we allow CONFIG_RCU_BOOST
kernels to use RCU_SOFTIRQ?"  Some plusses and minuses:

+	Supports the SCHED_IDLE use case for CONFIG_RCU_BOOST without
	slowing down other workloads.  This might be important given
	RCU flavor consolidation

-	Another configuration combination to test and maintain.

So I am leaning towards ditching the patch below in favor of updating
the "if" condition in rcu_spawn_core_kthreads().

Thoughts?

							Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a17034ee4d3d..5782fe9ac27d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -99,8 +99,12 @@ struct rcu_state rcu_state = {
 static bool dump_tree;
 module_param(dump_tree, bool, 0444);
 /* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
+#ifdef CONFIG_RCU_BOOST
+static const bool use_softirq = 0;
+#else /* #ifdef CONFIG_RCU_BOOST */
 static bool use_softirq = 1;
 module_param(use_softirq, bool, 0444);
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
 /* Control rcu_node-tree auto-balancing at boot time. */
 static bool rcu_fanout_exact;
 module_param(rcu_fanout_exact, bool, 0444);


^ permalink raw reply related	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2019-03-25 15:52 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-15 11:11 [PATCH] rcu: Allow to eliminate softirq processing from rcutree Sebastian Andrzej Siewior
2019-03-15 13:35 ` Steven Rostedt
2019-03-15 13:57   ` Sebastian Andrzej Siewior
2019-03-18  2:24 ` Paul E. McKenney
2019-03-19 11:44   ` [PATCH v2] " Sebastian Andrzej Siewior
2019-03-19 15:59     ` Paul E. McKenney
2019-03-19 16:24       ` Sebastian Andrzej Siewior
2019-03-19 16:50         ` Paul E. McKenney
2019-03-19 17:02           ` Sebastian Andrzej Siewior
2019-03-20 11:32     ` Sebastian Andrzej Siewior
2019-03-20 15:21       ` Paul E. McKenney
2019-03-20 15:44         ` Paul E. McKenney
2019-03-20 16:05           ` Sebastian Andrzej Siewior
2019-03-20 16:15             ` Paul E. McKenney
2019-03-20 16:35               ` Sebastian Andrzej Siewior
2019-03-20 17:30                 ` Paul E. McKenney
2019-03-20 17:59                   ` Sebastian Andrzej Siewior
2019-03-20 18:12                     ` Paul E. McKenney
2019-03-20 18:14                       ` Sebastian Andrzej Siewior
2019-03-20 21:13                         ` [PATCH v3] " Sebastian Andrzej Siewior
2019-03-20 23:46                           ` Paul E. McKenney
2019-03-21  8:27                             ` Sebastian Andrzej Siewior
2019-03-21 13:26                               ` Paul E. McKenney
2019-03-21 23:32                             ` Paul E. McKenney
2019-03-22  7:35                               ` Paul E. McKenney
2019-03-22 12:43                                 ` Paul E. McKenney
2019-03-22 13:42                               ` Joel Fernandes
2019-03-22 14:58                                 ` Paul E. McKenney
2019-03-22 15:50                                   ` Joel Fernandes
2019-03-22 16:26                                     ` Paul E. McKenney
2019-03-22 18:07                                       ` Paul E. McKenney
2019-03-22 23:48                           ` Joel Fernandes
2019-03-23  0:25                             ` Paul E. McKenney
2019-03-23  1:04                               ` Joel Fernandes
2019-03-23 16:10                               ` Paul E. McKenney
2019-03-24 23:42                                 ` Paul E. McKenney
2019-03-25 13:41                                   ` Joel Fernandes
2019-03-25 15:08                                     ` Paul E. McKenney
2019-03-25 15:52                                       ` Paul E. McKenney
2019-03-20  0:26 ` [PATCH] " Joel Fernandes
2019-03-20 11:28   ` Sebastian Andrzej Siewior
2019-03-21 12:06     ` Joel Fernandes
2019-03-21 13:52       ` Paul E. McKenney
2019-03-20 15:24   ` Paul E. McKenney

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.