From: paulmck@kernel.org
To: rcu@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, mingo@kernel.org,
jiangshanlai@gmail.com, dipankar@in.ibm.com,
akpm@linux-foundation.org, mathieu.desnoyers@efficios.com,
josh@joshtriplett.org, tglx@linutronix.de, peterz@infradead.org,
rostedt@goodmis.org, dhowells@redhat.com, edumazet@google.com,
fweisbec@gmail.com, oleg@redhat.com, joel@joelfernandes.org,
"Paul E. McKenney" <paulmck@linux.ibm.com>
Subject: [PATCH tip/core/rcu 08/12] rcu: Force tick on for nohz_full CPUs not reaching quiescent states
Date: Wed, 2 Oct 2019 18:38:59 -0700 [thread overview]
Message-ID: <20191003013903.13079-8-paulmck@kernel.org> (raw)
In-Reply-To: <20191003013834.GA12927@paulmck-ThinkPad-P72>
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
CPUs running for long time periods in the kernel in nohz_full mode
might leave the scheduling-clock interrupt disabled for then full
duration of their in-kernel execution. This can (among other things)
delay grace periods. This commit therefore forces the tick back on
for any nohz_full CPU that is failing to pass through a quiescent state
upon return from interrupt, which the resched_cpu() will induce.
Reported-by: Joel Fernandes <joel@joelfernandes.org>
[ paulmck: Clear ->rcu_forced_tick as reported by Joel Fernandes testing. ]
[ paulmck: Apply Joel Fernandes TICK_DEP_MASK_RCU->TICK_DEP_BIT_RCU fix. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
kernel/rcu/tree.c | 38 +++++++++++++++++++++++++++++++-------
kernel/rcu/tree.h | 1 +
2 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 74bf5c65..621cc06 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -650,6 +650,12 @@ static __always_inline void rcu_nmi_exit_common(bool irq)
*/
if (rdp->dynticks_nmi_nesting != 1) {
trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, rdp->dynticks);
+ if (tick_nohz_full_cpu(rdp->cpu) &&
+ rdp->dynticks_nmi_nesting == 2 &&
+ rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
+ rdp->rcu_forced_tick = true;
+ tick_dep_set_cpu(rdp->cpu, TICK_DEP_MASK_RCU);
+ }
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
rdp->dynticks_nmi_nesting - 2);
return;
@@ -885,6 +891,18 @@ void rcu_irq_enter_irqson(void)
local_irq_restore(flags);
}
+/*
+ * If the scheduler-clock interrupt was enabled on a nohz_full CPU
+ * in order to get to a quiescent state, disable it.
+ */
+void rcu_disable_tick_upon_qs(struct rcu_data *rdp)
+{
+ if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
+ tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
+ rdp->rcu_forced_tick = false;
+ }
+}
+
/**
* rcu_is_watching - see if RCU thinks that the current CPU is not idle
*
@@ -1979,6 +1997,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
if (!offloaded)
needwake = rcu_accelerate_cbs(rnp, rdp);
+ rcu_disable_tick_upon_qs(rdp);
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
/* ^^^ Released rnp->lock */
if (needwake)
@@ -2268,6 +2287,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
int cpu;
unsigned long flags;
unsigned long mask;
+ struct rcu_data *rdp;
struct rcu_node *rnp;
rcu_for_each_leaf_node(rnp) {
@@ -2292,8 +2312,11 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
for_each_leaf_node_possible_cpu(rnp, cpu) {
unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
if ((rnp->qsmask & bit) != 0) {
- if (f(per_cpu_ptr(&rcu_data, cpu)))
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ if (f(rdp)) {
mask |= bit;
+ rcu_disable_tick_upon_qs(rdp);
+ }
}
}
if (mask != 0) {
@@ -2321,7 +2344,7 @@ void rcu_force_quiescent_state(void)
rnp = __this_cpu_read(rcu_data.mynode);
for (; rnp != NULL; rnp = rnp->parent) {
ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
- !raw_spin_trylock(&rnp->fqslock);
+ !raw_spin_trylock(&rnp->fqslock);
if (rnp_old != NULL)
raw_spin_unlock(&rnp_old->fqslock);
if (ret)
@@ -2854,7 +2877,7 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
{
if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) {
rcu_barrier_trace(TPS("LastCB"), -1,
- rcu_state.barrier_sequence);
+ rcu_state.barrier_sequence);
complete(&rcu_state.barrier_completion);
} else {
rcu_barrier_trace(TPS("CB"), -1, rcu_state.barrier_sequence);
@@ -2878,7 +2901,7 @@ static void rcu_barrier_func(void *unused)
} else {
debug_rcu_head_unqueue(&rdp->barrier_head);
rcu_barrier_trace(TPS("IRQNQ"), -1,
- rcu_state.barrier_sequence);
+ rcu_state.barrier_sequence);
}
rcu_nocb_unlock(rdp);
}
@@ -2905,7 +2928,7 @@ void rcu_barrier(void)
/* Did someone else do our work for us? */
if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
rcu_barrier_trace(TPS("EarlyExit"), -1,
- rcu_state.barrier_sequence);
+ rcu_state.barrier_sequence);
smp_mb(); /* caller's subsequent code after above check. */
mutex_unlock(&rcu_state.barrier_mutex);
return;
@@ -2937,11 +2960,11 @@ void rcu_barrier(void)
continue;
if (rcu_segcblist_n_cbs(&rdp->cblist)) {
rcu_barrier_trace(TPS("OnlineQ"), cpu,
- rcu_state.barrier_sequence);
+ rcu_state.barrier_sequence);
smp_call_function_single(cpu, rcu_barrier_func, NULL, 1);
} else {
rcu_barrier_trace(TPS("OnlineNQ"), cpu,
- rcu_state.barrier_sequence);
+ rcu_state.barrier_sequence);
}
}
put_online_cpus();
@@ -3167,6 +3190,7 @@ void rcu_cpu_starting(unsigned int cpu)
rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
+ rcu_disable_tick_upon_qs(rdp);
/* Report QS -after- changing ->qsmaskinitnext! */
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
} else {
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index c612f30..055c317 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -181,6 +181,7 @@ struct rcu_data {
atomic_t dynticks; /* Even value for idle, else odd. */
bool rcu_need_heavy_qs; /* GP old, so heavy quiescent state! */
bool rcu_urgent_qs; /* GP old need light quiescent state. */
+ bool rcu_forced_tick; /* Forced tick to provide QS. */
#ifdef CONFIG_RCU_FAST_NO_HZ
bool all_lazy; /* All CPU's CBs lazy at idle start? */
unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */
--
2.9.5
next prev parent reply other threads:[~2019-10-03 1:39 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-03 1:38 [PATCH tip/core/rcu 0/12] NO_HZ fixes for v5.5 Paul E. McKenney
2019-10-03 1:38 ` [PATCH tip/core/rcu 01/12] nohz: Add TICK_DEP_BIT_RCU paulmck
2019-10-03 1:38 ` [PATCH tip/core/rcu 02/12] time: Export tick start/stop functions for rcutorture paulmck
2019-10-03 1:38 ` [PATCH tip/core/rcu 03/12] rcu: Force on tick when invoking lots of callbacks paulmck
2019-10-03 14:10 ` Frederic Weisbecker
2019-10-05 16:42 ` Paul E. McKenney
2019-10-03 1:38 ` [PATCH tip/core/rcu 04/12] rcutorture: Force on tick for readers and callback flooders paulmck
2019-10-03 14:14 ` Frederic Weisbecker
2019-10-05 16:52 ` Paul E. McKenney
2019-10-03 1:38 ` [PATCH tip/core/rcu 05/12] stop_machine: EXP Provide RCU quiescent state in multi_cpu_stop() paulmck
2019-10-03 1:38 ` [PATCH tip/core/rcu 06/12] rcu: Make CPU-hotplug removal operations enable tick paulmck
2019-10-03 14:34 ` Frederic Weisbecker
2019-10-05 17:17 ` Paul E. McKenney
2019-10-03 1:38 ` [PATCH tip/core/rcu 07/12] stop_machine: Use {READ,WRITE)_ONCE() for multi_cpu_stop() ->state paulmck
2019-10-03 1:38 ` paulmck [this message]
2019-10-03 14:50 ` [PATCH tip/core/rcu 08/12] rcu: Force tick on for nohz_full CPUs not reaching quiescent states Frederic Weisbecker
2019-10-05 17:21 ` Paul E. McKenney
2019-10-03 1:39 ` [PATCH tip/core/rcu 09/12] rcu: Force nohz_full tick on upon irq enter instead of exit paulmck
2019-10-03 1:39 ` [PATCH tip/core/rcu 10/12] rcu: Reset CPU hints when reporting a quiescent state paulmck
2019-10-03 1:39 ` [PATCH tip/core/rcu 11/12] rcu: Confine ->core_needs_qs accesses to the corresponding CPU paulmck
2019-10-03 1:39 ` [PATCH tip/core/rcu 12/12] rcu: Make kernel-mode nohz_full CPUs invoke the RCU core processing paulmck
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191003013903.13079-8-paulmck@kernel.org \
--to=paulmck@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=dhowells@redhat.com \
--cc=dipankar@in.ibm.com \
--cc=edumazet@google.com \
--cc=fweisbec@gmail.com \
--cc=jiangshanlai@gmail.com \
--cc=joel@joelfernandes.org \
--cc=josh@joshtriplett.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@efficios.com \
--cc=mingo@kernel.org \
--cc=oleg@redhat.com \
--cc=paulmck@linux.ibm.com \
--cc=peterz@infradead.org \
--cc=rcu@vger.kernel.org \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).