linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Frederic Weisbecker <frederic@kernel.org>
To: "Paul E . McKenney" <paulmck@kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Boqun Feng <boqun.feng@gmail.com>,
	Lai Jiangshan <jiangshanlai@gmail.com>,
	Neeraj Upadhyay <neeraju@codeaurora.org>,
	Josh Triplett <josh@joshtriplett.org>,
	Stable <stable@vger.kernel.org>,
	Joel Fernandes <joel@joelfernandes.org>
Subject: [PATCH 16/16] rcu/nocb: Unify timers
Date: Thu, 28 Jan 2021 18:12:22 +0100	[thread overview]
Message-ID: <20210128171222.131380-17-frederic@kernel.org> (raw)
In-Reply-To: <20210128171222.131380-1-frederic@kernel.org>

Now that nocb_timer and nocb_bypass_timer have become very similar,
merge them together. A new RCU_NOCB_WAKE_BYPASS wake level is introduced.
As a result, timers perform all kinds of deferred wake ups but other
deferred wakeup callsites only handle non-bypass wakeups in order not
to wake up rcuo too early.

The timer also performs the full barrier all the time to order
timer_pending() and callback enqueue although the path performing
RCU_NOCB_WAKE_FORCE that makes use of it is debatable. It should also
test against the rdp leader instead of the current rdp.

The permanent full barrier shouldn't bring visible overhead since the
timers almost never fire.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
---
 include/trace/events/rcu.h |  1 +
 kernel/rcu/tree.h          |  6 +--
 kernel/rcu/tree_plugin.h   | 88 ++++++++++++++++----------------------
 3 files changed, 42 insertions(+), 53 deletions(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 5fc29400e1a2..c16cb7d78f51 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -278,6 +278,7 @@ TRACE_EVENT_RCU(rcu_exp_funnel_lock,
  * "WakeNot": Don't wake rcuo kthread.
  * "WakeNotPoll": Don't wake rcuo kthread because it is polling.
  * "WakeOvfIsDeferred": Wake rcuo kthread later, CB list is huge.
+ * "WakeBypassIsDeferred": Wake rcuo kthread later, bypass list is contended.
  * "WokeEmpty": rcuo CB kthread woke to find empty list.
  */
 TRACE_EVENT_RCU(rcu_nocb_wake,
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 2510e86265c1..9a16487edfca 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -218,7 +218,6 @@ struct rcu_data {
 
 	/* The following fields are used by GP kthread, hence own cacheline. */
 	raw_spinlock_t nocb_gp_lock ____cacheline_internodealigned_in_smp;
-	struct timer_list nocb_bypass_timer; /* Force nocb_bypass flush. */
 	u8 nocb_gp_sleep;		/* Is the nocb GP thread asleep? */
 	u8 nocb_gp_bypass;		/* Found a bypass on last scan? */
 	u8 nocb_gp_gp;			/* GP to wait for on last scan? */
@@ -258,8 +257,9 @@ struct rcu_data {
 
 /* Values for nocb_defer_wakeup field in struct rcu_data. */
 #define RCU_NOCB_WAKE_NOT	0
-#define RCU_NOCB_WAKE		1
-#define RCU_NOCB_WAKE_FORCE	2
+#define RCU_NOCB_WAKE_BYPASS	1
+#define RCU_NOCB_WAKE		2
+#define RCU_NOCB_WAKE_FORCE	3
 
 #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
 					/* For jiffies_till_first_fqs and */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 162dda3714f1..516bacbea7b9 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1703,7 +1703,6 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
 
 	rdp_gp->nocb_defer_wakeup = RCU_NOCB_WAKE_NOT;
 	del_timer(&rdp_gp->nocb_timer);
-	del_timer(&rdp_gp->nocb_bypass_timer);
 
 	if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
 		WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
@@ -1742,10 +1741,19 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
 
 	raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
 
-	if (rdp_gp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
-		mod_timer(&rdp_gp->nocb_timer, jiffies + 1);
-	if (rdp_gp->nocb_defer_wakeup < waketype)
+	/*
+	 * Bypass wakeup overrides previous deferments. In case
+	 * of callback storm, no need to wake up too early.
+	 */
+	if (waketype == RCU_NOCB_WAKE_BYPASS) {
+		mod_timer(&rdp_gp->nocb_timer, jiffies + 2);
 		WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
+	} else {
+		if (rdp_gp->nocb_defer_wakeup < RCU_NOCB_WAKE)
+			mod_timer(&rdp_gp->nocb_timer, jiffies + 1);
+		if (rdp_gp->nocb_defer_wakeup < waketype)
+			WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype);
+	}
 
 	raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
 
@@ -1997,7 +2005,7 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
 		smp_mb(); /* Enqueue before timer_pending(). */
 		if ((rdp->nocb_cb_sleep ||
 		     !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
-		    !timer_pending(&rdp->nocb_bypass_timer)) {
+		    !timer_pending(&rdp->nocb_timer)) {
 			rcu_nocb_unlock_irqrestore(rdp, flags);
 			wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
 					   TPS("WakeOvfIsDeferred"));
@@ -2012,19 +2020,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
 	return;
 }
 
-/* Wake up the no-CBs GP kthread to flush ->nocb_bypass. */
-static void do_nocb_bypass_wakeup_timer(struct timer_list *t)
-{
-	unsigned long flags;
-	struct rcu_data *rdp = from_timer(rdp, t, nocb_bypass_timer);
-
-	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
-
-	raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags);
-	smp_mb__after_spinlock(); /* Timer expire before wakeup. */
-	__wake_nocb_gp(rdp, rdp, false, flags);
-}
-
 /*
  * Check if we ignore this rdp.
  *
@@ -2177,17 +2172,11 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
 	my_rdp->nocb_gp_bypass = bypass;
 	my_rdp->nocb_gp_gp = needwait_gp;
 	my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
-	if (bypass) {
-		if (!rcu_nocb_poll) {
-			raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
-			// Avoid race with first bypass CB.
-			WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
-			del_timer(&my_rdp->nocb_timer);
-			// At least one child with non-empty ->nocb_bypass, so set
-			// timer in order to avoid stranding its callbacks.
-			mod_timer(&my_rdp->nocb_bypass_timer, j + 2);
-			raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
-		}
+	if (bypass && !rcu_nocb_poll) {
+		// At least one child with non-empty ->nocb_bypass, so set
+		// timer in order to avoid stranding its callbacks.
+		wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS,
+				   TPS("WakeBypassIsDeferred"));
 	}
 	if (rcu_nocb_poll) {
 		/* Polling, so trace if first poll in the series. */
@@ -2211,8 +2200,6 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
 	}
 	if (!rcu_nocb_poll) {
 		raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
-		if (bypass)
-			del_timer(&my_rdp->nocb_bypass_timer);
 		if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
 			WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
 			del_timer(&my_rdp->nocb_timer);
@@ -2360,16 +2347,14 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level)
 }
 
 /* Do a deferred wakeup of rcu_nocb_kthread(). */
-static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp,
-					   int level)
+static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
+					   struct rcu_data *rdp, int level,
+					   unsigned long flags)
+	__releases(rdp_gp->nocb_gp_lock)
 {
-	unsigned long flags;
 	int ndw;
-	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
 	int ret;
 
-	raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
-
 	if (!rcu_nocb_need_deferred_wakeup(rdp_gp, level)) {
 		raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);;
 		return false;
@@ -2384,9 +2369,15 @@ static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp,
 /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
 static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
 {
+	unsigned long flags;
 	struct rcu_data *rdp = from_timer(rdp, t, nocb_timer);
 
-	do_nocb_deferred_wakeup_common(rdp, RCU_NOCB_WAKE);
+	WARN_ON_ONCE(rdp->nocb_gp_rdp != rdp);
+	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
+
+	raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags);
+	smp_mb__after_spinlock(); /* Timer expire before wakeup. */
+	do_nocb_deferred_wakeup_common(rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags);
 }
 
 /*
@@ -2396,12 +2387,14 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
  */
 static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
 {
-	if (!rdp->nocb_gp_rdp)
+	unsigned long flags;
+	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
+
+	if (!rdp_gp || !rcu_nocb_need_deferred_wakeup(rdp_gp, RCU_NOCB_WAKE))
 		return false;
 
-	if (rcu_nocb_need_deferred_wakeup(rdp->nocb_gp_rdp, RCU_NOCB_WAKE))
-		return do_nocb_deferred_wakeup_common(rdp, RCU_NOCB_WAKE);
-	return false;
+	raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
+	return do_nocb_deferred_wakeup_common(rdp_gp, rdp, RCU_NOCB_WAKE, flags);
 }
 
 void rcu_nocb_flush_deferred_wakeup(void)
@@ -2636,7 +2629,6 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 	raw_spin_lock_init(&rdp->nocb_bypass_lock);
 	raw_spin_lock_init(&rdp->nocb_gp_lock);
 	timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
-	timer_setup(&rdp->nocb_bypass_timer, do_nocb_bypass_wakeup_timer, 0);
 	rcu_cblist_init(&rdp->nocb_bypass);
 }
 
@@ -2795,13 +2787,12 @@ static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
 {
 	struct rcu_node *rnp = rdp->mynode;
 
-	pr_info("nocb GP %d %c%c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n",
+	pr_info("nocb GP %d %c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n",
 		rdp->cpu,
 		"kK"[!!rdp->nocb_gp_kthread],
 		"lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],
 		"dD"[!!rdp->nocb_defer_wakeup],
 		"tT"[timer_pending(&rdp->nocb_timer)],
-		"bB"[timer_pending(&rdp->nocb_bypass_timer)],
 		"sS"[!!rdp->nocb_gp_sleep],
 		".W"[swait_active(&rdp->nocb_gp_wq)],
 		".W"[swait_active(&rnp->nocb_gp_wq[0])],
@@ -2822,7 +2813,6 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
 	char bufr[20];
 	struct rcu_segcblist *rsclp = &rdp->cblist;
 	bool waslocked;
-	bool wastimer;
 	bool wassleep;
 
 	if (rdp->nocb_gp_rdp == rdp)
@@ -2859,15 +2849,13 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
 		return;
 
 	waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
-	wastimer = timer_pending(&rdp->nocb_bypass_timer);
 	wassleep = swait_active(&rdp->nocb_gp_wq);
-	if (!rdp->nocb_gp_sleep && !waslocked && !wastimer && !wassleep)
+	if (!rdp->nocb_gp_sleep && !waslocked && !wassleep)
 		return;  /* Nothing untowards. */
 
-	pr_info("   nocb GP activity on CB-only CPU!!! %c%c%c%c %c\n",
+	pr_info("   nocb GP activity on CB-only CPU!!! %c%c%c %c\n",
 		"lL"[waslocked],
 		"dD"[!!rdp->nocb_defer_wakeup],
-		"tT"[wastimer],
 		"sS"[!!rdp->nocb_gp_sleep],
 		".W"[wassleep]);
 }
-- 
2.25.1


      parent reply	other threads:[~2021-01-28 17:21 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-28 17:12 [PATCH 00/16] rcu/nocb updates Frederic Weisbecker
2021-01-28 17:12 ` [PATCH 01/16] rcu/nocb: Fix potential missed nocb_timer rearm Frederic Weisbecker
     [not found]   ` <20210128184834.GP2743@paulmck-ThinkPad-P72>
2021-01-28 21:23     ` Frederic Weisbecker
2021-01-28 17:12 ` [PATCH 02/16] rcu/nocb: Comment the reason behind BH disablement on batch processing Frederic Weisbecker
2021-01-28 17:12 ` [PATCH 03/16] rcu/nocb: Forbid NOCB toggling on offline CPUs Frederic Weisbecker
2021-01-28 19:52   ` Paul E. McKenney
2021-01-28 17:12 ` [PATCH 04/16] rcu/nocb: Only (re-)initialize segcblist when needed on CPU up Frederic Weisbecker
     [not found]   ` <20210128191228.GQ2743@paulmck-ThinkPad-P72>
2021-01-28 21:34     ` Frederic Weisbecker
2021-01-28 21:45       ` Paul E. McKenney
2021-01-29  0:26         ` Paul E. McKenney
2021-01-28 17:12 ` [PATCH 05/16] rcu/nocb: Disable bypass when CPU isn't completely offloaded Frederic Weisbecker
2021-01-28 21:31   ` Paul E. McKenney
2021-01-28 22:25     ` Frederic Weisbecker
2021-01-29  0:19       ` Paul E. McKenney
2021-01-28 17:12 ` [PATCH 06/16] rcu/nocb: Avoid confusing double write of rdp->nocb_cb_sleep Frederic Weisbecker
2021-01-28 21:42   ` Paul E. McKenney
2021-01-28 17:12 ` [PATCH 07/16] rcu/nocb: Rename nocb_gp_update_state to nocb_gp_update_state_deoffloading Frederic Weisbecker
2021-01-29  0:49   ` Paul E. McKenney
2021-01-28 17:12 ` [PATCH 08/16] rcu/nocb: Move trace_rcu_nocb_wake() calls outside nocb_lock when possible Frederic Weisbecker
2021-01-29  0:51   ` Paul E. McKenney
2021-01-28 17:12 ` [PATCH 09/16] rcu/nocb: Merge nocb_timer to the rdp leader Frederic Weisbecker
2021-01-28 17:12 ` [PATCH 10/16] rcu/nocb: Directly call __wake_nocb_gp() from bypass timer Frederic Weisbecker
2021-01-28 17:12 ` [PATCH 11/16] rcu/nocb: Allow de-offloading rdp leader Frederic Weisbecker
2021-01-28 17:12 ` [PATCH 12/16] rcu/nocb: Cancel nocb_timer upon nocb_gp wakeup Frederic Weisbecker
2021-01-28 17:12 ` [PATCH 13/16] rcu/nocb: Delete bypass_timer " Frederic Weisbecker
2021-01-28 17:12 ` [PATCH 14/16] rcu/nocb: Only cancel nocb timer if not polling Frederic Weisbecker
2021-01-28 17:12 ` [PATCH 15/16] rcu/nocb: Prepare for finegrained deferred wakeup Frederic Weisbecker
2021-01-28 17:12 ` Frederic Weisbecker [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210128171222.131380-17-frederic@kernel.org \
    --to=frederic@kernel.org \
    --cc=boqun.feng@gmail.com \
    --cc=jiangshanlai@gmail.com \
    --cc=joel@joelfernandes.org \
    --cc=josh@joshtriplett.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=neeraju@codeaurora.org \
    --cc=paulmck@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).