From: David Woodhouse <dwmw2@infradead.org>
To: Paul McKenney <paulmck@linux.vnet.ibm.com>,
Peter Zijlstra <peterz@infradead.org>,
mhillenb@amazon.de
Cc: linux-kernel <linux-kernel@vger.kernel.org>
Subject: [RFC] Make need_resched() return true when rcu_urgent_qs requested
Date: Fri, 06 Jul 2018 15:53:30 +0100 [thread overview]
Message-ID: <bb77e730a5ada3689650b636e1dd03ac5cc87256.camel@infradead.org> (raw)
In 4.15 without CONFIG_PREEMPT we observed expand_fdtable() taking
about 10 seconds for synchronize_sched() to complete, when most of the
other threads were running KVM guests.
In vcpu_run() there's a loop with the fairly common construct:
if (need_resched()) {
… local unlocks …
cond_resched();
… local locks …
}
But because need_resched() wasn't true (until half the RCU warning time
was completed and rcu_implicit_dynticks_qs() calls resched_cpu()), that
never happens and cond_resched() is never called. In cond_resched()
there is an unconditional call to rcu_all_qs() which would DTRT.
Now, there's a simple way to fix it for the specific case of KVM — we
can find a place we can just call rcu_all_qs(), something like this:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00520711..a304693 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7214,6 +7214,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
*/
smp_mb__after_srcu_read_unlock();
+ /* Force quiescent state (if requested) before entering guest mode */
+ rcu_all_qs();
+
/*
* This handles the case where a posted interrupt was
* notified with kvm_vcpu_kick.
But I wonder if we should attempt to fix the general case by making
need_resched() return true when an RCU quiescent state is needed. To do
that without having an out-of-line function call in kernel/rcu/tree.c
would look something like the patch below. Paul, did you say you had
other ideas about how to export/inline it?
Alternatively — or perhaps additionally — shouldn't CPUs which are
currently in guest mode be counted as quiescent anyway? Or is that
something we'll only ever want to do in full NOHZ mode?
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index b3dbf95..2f8a3bd 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -126,6 +126,7 @@ static inline bool rcu_is_watching(void) { return true; }
/* Avoid RCU read-side critical sections leaking across. */
static inline void rcu_all_qs(void) { barrier(); }
+static inline bool rcu_urgent_qs_requested(void) { return false; }
/* RCUtree hotplug events */
#define rcutree_prepare_cpu NULL
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 37d6fd3..d20b987 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,6 +30,36 @@
#ifndef __LINUX_RCUTREE_H
#define __LINUX_RCUTREE_H
+/*
+ * Dynticks per-CPU state.
+ */
+struct rcu_dynticks {
+ long long dynticks_nesting; /* Track irq/process nesting level. */
+ /* Process level is worth LLONG_MAX/2. */
+ int dynticks_nmi_nesting; /* Track NMI nesting level. */
+ atomic_t dynticks; /* Even value for idle, else odd. */
+ bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */
+ unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */
+ bool rcu_urgent_qs; /* GP old need light quiescent state. */
+#ifdef CONFIG_RCU_FAST_NO_HZ
+ bool all_lazy; /* Are all CPU's CBs lazy? */
+ unsigned long nonlazy_posted;
+ /* # times non-lazy CBs posted to CPU. */
+ unsigned long nonlazy_posted_snap;
+ /* idle-period nonlazy_posted snapshot. */
+ unsigned long last_accelerate;
+ /* Last jiffy CBs were accelerated. */
+ unsigned long last_advance_all;
+ /* Last jiffy CBs were all advanced. */
+ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
+#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+};
+DECLARE_PER_CPU(struct rcu_dynticks, rcu_dynticks);
+static __always_inline bool rcu_urgent_qs_requested(void)
+{
+ return unlikely(raw_cpu_read(rcu_dynticks.rcu_urgent_qs));
+}
+
void rcu_note_context_switch(bool preempt);
int rcu_needs_cpu(u64 basem, u64 *nextevt);
void rcu_cpu_stall_reset(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e4d4e60..89f5814 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1616,7 +1616,8 @@ static inline int spin_needbreak(spinlock_t *lock)
static __always_inline bool need_resched(void)
{
- return unlikely(tif_need_resched());
+ return unlikely(tif_need_resched()) ||
+ rcu_urgent_qs_requested();
}
/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f9c0ca2..cf1c66c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -264,10 +264,11 @@ void rcu_bh_qs(void)
#define rcu_eqs_special_exit() do { } while (0)
#endif
-static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
+DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
};
+EXPORT_SYMBOL(rcu_dynticks); /* for need_resched() */
/*
* There's a few places, currently just in the tracing infrastructure,
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 46a5d19..462b25b 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -34,31 +34,6 @@
#include "rcu_segcblist.h"
-/*
- * Dynticks per-CPU state.
- */
-struct rcu_dynticks {
- long long dynticks_nesting; /* Track irq/process nesting level. */
- /* Process level is worth LLONG_MAX/2. */
- int dynticks_nmi_nesting; /* Track NMI nesting level. */
- atomic_t dynticks; /* Even value for idle, else odd. */
- bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */
- unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */
- bool rcu_urgent_qs; /* GP old need light quiescent state. */
-#ifdef CONFIG_RCU_FAST_NO_HZ
- bool all_lazy; /* Are all CPU's CBs lazy? */
- unsigned long nonlazy_posted;
- /* # times non-lazy CBs posted to CPU. */
- unsigned long nonlazy_posted_snap;
- /* idle-period nonlazy_posted snapshot. */
- unsigned long last_accelerate;
- /* Last jiffy CBs were accelerated. */
- unsigned long last_advance_all;
- /* Last jiffy CBs were all advanced. */
- int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
-#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-};
-
/* RCU's kthread states for tracing. */
#define RCU_KTHREAD_STOPPED 0
#define RCU_KTHREAD_RUNNING 1
--
dwmw2
next reply other threads:[~2018-07-06 14:53 UTC|newest]
Thread overview: 93+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-07-06 14:53 David Woodhouse [this message]
2018-07-06 16:29 ` [RFC] Make need_resched() return true when rcu_urgent_qs requested Peter Zijlstra
2018-07-06 17:11 ` Paul E. McKenney
2018-07-06 17:14 ` David Woodhouse
2018-07-06 21:12 ` Paul E. McKenney
2018-07-09 8:58 ` Peter Zijlstra
2018-07-09 8:53 ` Peter Zijlstra
2018-07-09 9:18 ` David Woodhouse
2018-07-09 10:44 ` Peter Zijlstra
2018-07-09 10:56 ` David Woodhouse
2018-07-09 11:06 ` Peter Zijlstra
2018-07-09 11:12 ` David Woodhouse
2018-07-09 11:31 ` Peter Zijlstra
2018-07-09 12:34 ` Paul E. McKenney
2018-07-09 12:47 ` David Woodhouse
2018-07-09 14:30 ` Paul E. McKenney
2018-07-09 12:55 ` Peter Zijlstra
2018-07-09 12:57 ` David Woodhouse
2018-07-09 13:02 ` Peter Zijlstra
2018-07-09 14:29 ` Paul E. McKenney
2018-07-09 14:43 ` Peter Zijlstra
2018-07-09 14:54 ` Paul E. McKenney
2018-07-09 15:26 ` Peter Zijlstra
2018-07-09 16:34 ` Paul E. McKenney
2018-07-09 16:44 ` Paul E. McKenney
2018-07-09 18:50 ` David Woodhouse
2018-07-09 20:34 ` Paul E. McKenney
2018-07-09 20:35 ` David Woodhouse
2018-07-09 20:42 ` Paul E. McKenney
2018-07-09 20:45 ` David Woodhouse
2018-07-09 21:05 ` Paul E. McKenney
2018-07-09 22:08 ` Paul E. McKenney
2018-07-11 10:57 ` David Woodhouse
2018-07-11 12:51 ` Paul E. McKenney
2018-07-11 12:58 ` David Woodhouse
2018-07-11 14:25 ` Paul E. McKenney
2018-07-11 14:23 ` David Woodhouse
2018-07-11 14:43 ` Paul E. McKenney
2018-07-11 16:49 ` Paul E. McKenney
2018-07-11 17:03 ` David Woodhouse
2018-07-11 17:48 ` Paul E. McKenney
2018-07-11 18:01 ` [PATCH v2] kvm/x86: Inform RCU of quiescent state when entering guest mode David Woodhouse
2018-07-11 18:20 ` Paul E. McKenney
2018-07-11 18:36 ` Paul E. McKenney
2018-07-11 18:39 ` Christian Borntraeger
2018-07-11 20:27 ` Paul E. McKenney
2018-07-11 20:54 ` David Woodhouse
2018-07-11 21:09 ` Paul E. McKenney
2018-07-11 21:11 ` Christian Borntraeger
2018-07-11 21:32 ` Paul E. McKenney
2018-07-11 21:39 ` Christian Borntraeger
2018-07-11 23:47 ` Paul E. McKenney
2018-07-12 8:31 ` David Woodhouse
2018-07-12 11:00 ` Christian Borntraeger
2018-07-12 11:10 ` David Woodhouse
2018-07-12 11:58 ` Christian Borntraeger
2018-07-12 12:04 ` Christian Borntraeger
2018-07-11 23:37 ` Paul E. McKenney
2018-07-12 2:15 ` Paul E. McKenney
2018-07-12 6:21 ` Christian Borntraeger
2018-07-12 9:52 ` David Woodhouse
2018-07-11 18:31 ` [RFC] Make need_resched() return true when rcu_urgent_qs requested Christian Borntraeger
2018-07-11 20:17 ` Paul E. McKenney
2018-07-11 20:19 ` David Woodhouse
2018-07-11 21:08 ` Paul E. McKenney
2018-07-12 12:00 ` David Woodhouse
2018-07-12 12:53 ` Paul E. McKenney
2018-07-12 16:17 ` Paul E. McKenney
2018-07-16 15:40 ` Paul E. McKenney
2018-07-17 8:19 ` David Woodhouse
2018-07-17 12:56 ` Paul E. McKenney
2018-07-18 15:36 ` Paul E. McKenney
2018-07-18 16:01 ` David Woodhouse
2018-07-18 16:37 ` Paul E. McKenney
2018-07-18 19:41 ` David Woodhouse
2018-07-18 20:17 ` Paul E. McKenney
2018-07-19 0:26 ` Frederic Weisbecker
2018-07-19 6:45 ` Christian Borntraeger
2018-07-19 7:20 ` David Woodhouse
2018-07-19 10:23 ` Christian Borntraeger
2018-07-19 12:55 ` Paul E. McKenney
2018-07-19 13:14 ` Frederic Weisbecker
2018-07-19 13:36 ` David Woodhouse
2018-07-19 17:09 ` Paul E. McKenney
2018-07-23 8:08 ` David Woodhouse
2018-07-23 12:22 ` Paul E. McKenney
2018-07-19 0:32 ` Frederic Weisbecker
2018-07-19 3:11 ` Paul E. McKenney
2018-07-19 6:16 ` David Woodhouse
2018-07-19 13:17 ` Frederic Weisbecker
2018-07-19 13:15 ` Frederic Weisbecker
2018-07-10 9:24 ` Peter Zijlstra
2018-07-10 16:26 ` Paul E. McKenney
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bb77e730a5ada3689650b636e1dd03ac5cc87256.camel@infradead.org \
--to=dwmw2@infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mhillenb@amazon.de \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).