linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
To: Joel Fernandes <joel@joelfernandes.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	linux-kernel@vger.kernel.org,
	Josh Triplett <josh@joshtriplett.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Lai Jiangshan <jiangshanlai@gmail.com>,
	tglx@linutronix.de, Mike Galbraith <efault@gmx.de>
Subject: Re: [PATCH v3] rcu: Allow to eliminate softirq processing from rcutree
Date: Fri, 22 Mar 2019 17:25:19 -0700	[thread overview]
Message-ID: <20190323002519.GV4102@linux.ibm.com> (raw)
In-Reply-To: <20190322234819.GA99360@google.com>

On Fri, Mar 22, 2019 at 07:48:19PM -0400, Joel Fernandes wrote:
> On Wed, Mar 20, 2019 at 10:13:33PM +0100, Sebastian Andrzej Siewior wrote:
> > Running RCU out of softirq is a problem for some workloads that would
> > like to manage RCU core processing independently of other softirq
> > work, for example, setting kthread priority.  This commit therefore
> > introduces the `rcunosoftirq' option which moves the RCU core work
> > from softirq to a per-CPU/per-flavor SCHED_OTHER kthread named rcuc.
> > The SCHED_OTHER approach avoids the scalability problems that appeared
> > with the earlier attempt to move RCU core processing to from softirq
> > to kthreads.  That said, kernels built with RCU_BOOST=y will run the
> > rcuc kthreads at the RCU-boosting priority.
> [snip]
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 0f31b79eb6761..05a1e42fdaf10 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -51,6 +51,12 @@
> >  #include <linux/tick.h>
> >  #include <linux/sysrq.h>
> >  #include <linux/kprobes.h>
> > +#include <linux/gfp.h>
> > +#include <linux/oom.h>
> > +#include <linux/smpboot.h>
> > +#include <linux/jiffies.h>
> > +#include <linux/sched/isolation.h>
> > +#include "../time/tick-internal.h"
> >  
> >  #include "tree.h"
> >  #include "rcu.h"
> > @@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
> >  /* Dump rcu_node combining tree at boot to verify correct setup. */
> >  static bool dump_tree;
> >  module_param(dump_tree, bool, 0444);
> > +/* Move RCU_SOFTIRQ to rcuc kthreads. */
> > +static bool use_softirq = 1;
> > +module_param(use_softirq, bool, 0444);
> >  /* Control rcu_node-tree auto-balancing at boot time. */
> >  static bool rcu_fanout_exact;
> >  module_param(rcu_fanout_exact, bool, 0444);
> > @@ -2253,7 +2262,7 @@ void rcu_force_quiescent_state(void)
> >  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
> >  
> >  /* Perform RCU core processing work for the current CPU.  */
> > -static __latent_entropy void rcu_core(struct softirq_action *unused)
> > +static __latent_entropy void rcu_core(void)
> >  {
> >  	unsigned long flags;
> >  	struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
> > @@ -2295,6 +2304,34 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
> >  	trace_rcu_utilization(TPS("End RCU core"));
> >  }
> >  
> > +static void rcu_core_si(struct softirq_action *h)
> > +{
> > +	rcu_core();
> > +}
> > +
> > +static void rcu_wake_cond(struct task_struct *t, int status)
> > +{
> > +	/*
> > +	 * If the thread is yielding, only wake it when this
> > +	 * is invoked from idle
> > +	 */
> > +	if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
> > +		wake_up_process(t);
> > +}
> > +
> > +static void invoke_rcu_core_kthread(void)
> > +{
> > +	struct task_struct *t;
> > +	unsigned long flags;
> > +
> > +	local_irq_save(flags);
> > +	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > +	t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
> > +	if (t != NULL && t != current)
> > +		rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > +	local_irq_restore(flags);
> > +}
> > +
> >  /*
> >   * Schedule RCU callback invocation.  If the running implementation of RCU
> >   * does not support RCU priority boosting, just do a direct call, otherwise
> > @@ -2306,19 +2343,95 @@ static void invoke_rcu_callbacks(struct rcu_data *rdp)
> >  {
> >  	if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
> >  		return;
> > -	if (likely(!rcu_state.boost)) {
> > -		rcu_do_batch(rdp);
> > -		return;
> > -	}
> > -	invoke_rcu_callbacks_kthread();
> > +	if (rcu_state.boost || !use_softirq)
> > +		invoke_rcu_core_kthread();
> > +	rcu_do_batch(rdp);
> 
> Shouldn't there be an else before the rcu_do_batch? If we are waking up the
> rcuc thread, then that will do the rcu_do_batch when it runs right?
> 
> Something like:
> 	if (rcu_state.boost || !use_softirq)
> 		invoke_rcu_core_kthread();
> 	else
> 		rcu_do_batch(rdp);
> 
> Previous code similarly had a return; also.

I believe that you are correct, so I will give it a shot.  Good eyes!

> >  }
> >  
> > +/*
> > + * Wake up this CPU's rcuc kthread to do RCU core processing.
> > + */
> >  static void invoke_rcu_core(void)
> >  {
> > -	if (cpu_online(smp_processor_id()))
> > +	if (!cpu_online(smp_processor_id()))
> > +		return;
> > +	if (use_softirq)
> >  		raise_softirq(RCU_SOFTIRQ);
> > +	else
> > +		invoke_rcu_core_kthread();
> >  }
> >  
> > +static void rcu_cpu_kthread_park(unsigned int cpu)
> > +{
> > +	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> > +}
> > +
> > +static int rcu_cpu_kthread_should_run(unsigned int cpu)
> > +{
> > +	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> > +}
> > +
> > +/*
> > + * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> > + * the RCU softirq used in configurations of RCU that do not support RCU
> > + * priority boosting.
> > + */
> > +static void rcu_cpu_kthread(unsigned int cpu)
> > +{
> > +	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> > +	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> > +	int spincnt;
> > +
> > +	for (spincnt = 0; spincnt < 10; spincnt++) {
> > +		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> > +		local_bh_disable();
> > +		*statusp = RCU_KTHREAD_RUNNING;
> > +		local_irq_disable();
> > +		work = *workp;
> > +		*workp = 0;
> > +		local_irq_enable();
> > +		if (work)
> > +			rcu_core();
> > +		local_bh_enable();
> > +		if (*workp == 0) {
> > +			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> > +			*statusp = RCU_KTHREAD_WAITING;
> > +			return;
> > +		}
> > +	}
> > +	*statusp = RCU_KTHREAD_YIELDING;
> > +	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> > +	schedule_timeout_interruptible(2);
> > +	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> > +	*statusp = RCU_KTHREAD_WAITING;
> > +}
> > +
> [snip]
> > diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> > index e253d11af3c49..a1a72a1ecb026 100644
> > --- a/kernel/rcu/tree.h
> > +++ b/kernel/rcu/tree.h
> > @@ -407,8 +407,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
> >  static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
> >  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
> >  static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
> > -static void invoke_rcu_callbacks_kthread(void);
> >  static bool rcu_is_callbacks_kthread(void);
> > +static void rcu_cpu_kthread_setup(unsigned int cpu);
> >  static void __init rcu_spawn_boost_kthreads(void);
> >  static void rcu_prepare_kthreads(int cpu);
> >  static void rcu_cleanup_after_idle(void);
> > diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> > index f46b4af96ab95..b807204ffd83f 100644
> > --- a/kernel/rcu/tree_plugin.h
> > +++ b/kernel/rcu/tree_plugin.h
> > @@ -11,29 +11,7 @@
> >   *	   Paul E. McKenney <paulmck@linux.ibm.com>
> >   */
> >  
> > -#include <linux/delay.h>
> > -#include <linux/gfp.h>
> > -#include <linux/oom.h>
> > -#include <linux/sched/debug.h>
> > -#include <linux/smpboot.h>
> > -#include <linux/sched/isolation.h>
> > -#include <uapi/linux/sched/types.h>
> > -#include "../time/tick-internal.h"
> > -
> > -#ifdef CONFIG_RCU_BOOST
> >  #include "../locking/rtmutex_common.h"
> > -#else /* #ifdef CONFIG_RCU_BOOST */
> > -
> > -/*
> > - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
> > - * all uses are in dead code.  Provide a definition to keep the compiler
> > - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
> > - * This probably needs to be excluded from -rt builds.
> > - */
> > -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
> > -#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
> > -
> > -#endif /* #else #ifdef CONFIG_RCU_BOOST */
> >  
> >  #ifdef CONFIG_RCU_NOCB_CPU
> >  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
> > @@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
> >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
> >  	if (gp_cleanup_delay)
> >  		pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
> > +	if (!use_softirq)
> > +		pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
> >  	if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
> >  		pr_info("\tRCU debug extended QS entry/exit.\n");
> >  	rcupdate_announce_bootup_oddness();
> > @@ -629,7 +609,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
> >  		/* Need to defer quiescent state until everything is enabled. */
> >  		if (irqs_were_disabled) {
> >  			/* Enabling irqs does not reschedule, so... */
> > -			raise_softirq_irqoff(RCU_SOFTIRQ);
> > +			if (!use_softirq)
> > +				raise_softirq_irqoff(RCU_SOFTIRQ);
> 
> I believe this exclamation has been corrected in Paul's tree so that's Ok.
> 
> > +			else
> > +				invoke_rcu_core();
> 
> But why not just directly call invoke_rcu_core() here? That will do the
> appropriate use_softirq check right?

It is -so- close!  But it invokes raise_softirq() instead of the needed
raise_softirq_irqoff().

Plus I bet that this has a few more changes to go before it is all the
way there.  ;-)

							Thanx, Paul

> thanks,
> 
>  - Joel
> 
> 
> >  		} else {
> >  			/* Enabling BH or preempt does reschedule, so... */
> >  			set_tsk_need_resched(current);
> > @@ -944,18 +927,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
> >  
> >  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
> >  
> > -#ifdef CONFIG_RCU_BOOST
> > -
> > -static void rcu_wake_cond(struct task_struct *t, int status)
> > +/*
> > + * If boosting, set rcuc kthreads to realtime priority.
> > + */
> > +static void rcu_cpu_kthread_setup(unsigned int cpu)
> >  {
> > -	/*
> > -	 * If the thread is yielding, only wake it when this
> > -	 * is invoked from idle
> > -	 */
> > -	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
> > -		wake_up_process(t);
> > +#ifdef CONFIG_RCU_BOOST
> > +	struct sched_param sp;
> > +
> > +	sp.sched_priority = kthread_prio;
> > +	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> > +#endif /* #ifdef CONFIG_RCU_BOOST */
> >  }
> >  
> > +#ifdef CONFIG_RCU_BOOST
> > +
> >  /*
> >   * Carry out RCU priority boosting on the task indicated by ->exp_tasks
> >   * or ->boost_tasks, advancing the pointer to the next task in the
> > @@ -1093,23 +1079,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
> >  	}
> >  }
> >  
> > -/*
> > - * Wake up the per-CPU kthread to invoke RCU callbacks.
> > - */
> > -static void invoke_rcu_callbacks_kthread(void)
> > -{
> > -	unsigned long flags;
> > -
> > -	local_irq_save(flags);
> > -	__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
> > -	if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
> > -	    current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
> > -		rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
> > -			      __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
> > -	}
> > -	local_irq_restore(flags);
> > -}
> > -
> >  /*
> >   * Is the current CPU running the RCU-callbacks kthread?
> >   * Caller must have preemption disabled.
> > @@ -1163,59 +1132,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
> >  	return 0;
> >  }
> >  
> > -static void rcu_cpu_kthread_setup(unsigned int cpu)
> > -{
> > -	struct sched_param sp;
> > -
> > -	sp.sched_priority = kthread_prio;
> > -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
> > -}
> > -
> > -static void rcu_cpu_kthread_park(unsigned int cpu)
> > -{
> > -	per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
> > -}
> > -
> > -static int rcu_cpu_kthread_should_run(unsigned int cpu)
> > -{
> > -	return __this_cpu_read(rcu_data.rcu_cpu_has_work);
> > -}
> > -
> > -/*
> > - * Per-CPU kernel thread that invokes RCU callbacks.  This replaces
> > - * the RCU softirq used in configurations of RCU that do not support RCU
> > - * priority boosting.
> > - */
> > -static void rcu_cpu_kthread(unsigned int cpu)
> > -{
> > -	unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
> > -	char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
> > -	int spincnt;
> > -
> > -	for (spincnt = 0; spincnt < 10; spincnt++) {
> > -		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
> > -		local_bh_disable();
> > -		*statusp = RCU_KTHREAD_RUNNING;
> > -		local_irq_disable();
> > -		work = *workp;
> > -		*workp = 0;
> > -		local_irq_enable();
> > -		if (work)
> > -			rcu_do_batch(this_cpu_ptr(&rcu_data));
> > -		local_bh_enable();
> > -		if (*workp == 0) {
> > -			trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
> > -			*statusp = RCU_KTHREAD_WAITING;
> > -			return;
> > -		}
> > -	}
> > -	*statusp = RCU_KTHREAD_YIELDING;
> > -	trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
> > -	schedule_timeout_interruptible(2);
> > -	trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
> > -	*statusp = RCU_KTHREAD_WAITING;
> > -}
> > -
> >  /*
> >   * Set the per-rcu_node kthread's affinity to cover all CPUs that are
> >   * served by the rcu_node in question.  The CPU hotplug lock is still
> > @@ -1246,27 +1162,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
> >  	free_cpumask_var(cm);
> >  }
> >  
> > -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
> > -	.store			= &rcu_data.rcu_cpu_kthread_task,
> > -	.thread_should_run	= rcu_cpu_kthread_should_run,
> > -	.thread_fn		= rcu_cpu_kthread,
> > -	.thread_comm		= "rcuc/%u",
> > -	.setup			= rcu_cpu_kthread_setup,
> > -	.park			= rcu_cpu_kthread_park,
> > -};
> > -
> >  /*
> >   * Spawn boost kthreads -- called as soon as the scheduler is running.
> >   */
> >  static void __init rcu_spawn_boost_kthreads(void)
> >  {
> >  	struct rcu_node *rnp;
> > -	int cpu;
> >  
> > -	for_each_possible_cpu(cpu)
> > -		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
> > -	if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
> > -		return;
> >  	rcu_for_each_leaf_node(rnp)
> >  		(void)rcu_spawn_one_boost_kthread(rnp);
> >  }
> > @@ -1289,11 +1191,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
> >  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> >  }
> >  
> > -static void invoke_rcu_callbacks_kthread(void)
> > -{
> > -	WARN_ON_ONCE(1);
> > -}
> > -
> >  static bool rcu_is_callbacks_kthread(void)
> >  {
> >  	return false;
> > -- 
> > 2.20.1
> > 
> 


  reply	other threads:[~2019-03-23  0:24 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-15 11:11 [PATCH] rcu: Allow to eliminate softirq processing from rcutree Sebastian Andrzej Siewior
2019-03-15 13:35 ` Steven Rostedt
2019-03-15 13:57   ` Sebastian Andrzej Siewior
2019-03-18  2:24 ` Paul E. McKenney
2019-03-19 11:44   ` [PATCH v2] " Sebastian Andrzej Siewior
2019-03-19 15:59     ` Paul E. McKenney
2019-03-19 16:24       ` Sebastian Andrzej Siewior
2019-03-19 16:50         ` Paul E. McKenney
2019-03-19 17:02           ` Sebastian Andrzej Siewior
2019-03-20 11:32     ` Sebastian Andrzej Siewior
2019-03-20 15:21       ` Paul E. McKenney
2019-03-20 15:44         ` Paul E. McKenney
2019-03-20 16:05           ` Sebastian Andrzej Siewior
2019-03-20 16:15             ` Paul E. McKenney
2019-03-20 16:35               ` Sebastian Andrzej Siewior
2019-03-20 17:30                 ` Paul E. McKenney
2019-03-20 17:59                   ` Sebastian Andrzej Siewior
2019-03-20 18:12                     ` Paul E. McKenney
2019-03-20 18:14                       ` Sebastian Andrzej Siewior
2019-03-20 21:13                         ` [PATCH v3] " Sebastian Andrzej Siewior
2019-03-20 23:46                           ` Paul E. McKenney
2019-03-21  8:27                             ` Sebastian Andrzej Siewior
2019-03-21 13:26                               ` Paul E. McKenney
2019-03-21 23:32                             ` Paul E. McKenney
2019-03-22  7:35                               ` Paul E. McKenney
2019-03-22 12:43                                 ` Paul E. McKenney
2019-03-22 13:42                               ` Joel Fernandes
2019-03-22 14:58                                 ` Paul E. McKenney
2019-03-22 15:50                                   ` Joel Fernandes
2019-03-22 16:26                                     ` Paul E. McKenney
2019-03-22 18:07                                       ` Paul E. McKenney
2019-03-22 23:48                           ` Joel Fernandes
2019-03-23  0:25                             ` Paul E. McKenney [this message]
2019-03-23  1:04                               ` Joel Fernandes
2019-03-23 16:10                               ` Paul E. McKenney
2019-03-24 23:42                                 ` Paul E. McKenney
2019-03-25 13:41                                   ` Joel Fernandes
2019-03-25 15:08                                     ` Paul E. McKenney
2019-03-25 15:52                                       ` Paul E. McKenney
2019-03-20  0:26 ` [PATCH] " Joel Fernandes
2019-03-20 11:28   ` Sebastian Andrzej Siewior
2019-03-21 12:06     ` Joel Fernandes
2019-03-21 13:52       ` Paul E. McKenney
2019-03-20 15:24   ` Paul E. McKenney

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190323002519.GV4102@linux.ibm.com \
    --to=paulmck@linux.ibm.com \
    --cc=bigeasy@linutronix.de \
    --cc=efault@gmx.de \
    --cc=jiangshanlai@gmail.com \
    --cc=joel@joelfernandes.org \
    --cc=josh@joshtriplett.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).