rcu.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
@ 2019-08-30 16:23 Joel Fernandes (Google)
  2019-08-30 16:23 ` [PATCH -rcu dev 2/2] rcu/dyntick-idle: Add better tracing Joel Fernandes (Google)
  2019-09-03 20:02 ` [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter") Paul E. McKenney
  0 siblings, 2 replies; 19+ messages in thread
From: Joel Fernandes (Google) @ 2019-08-30 16:23 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google),
	Andy Lutomirski, Bjorn Helgaas, Ingo Molnar, Josh Triplett,
	Lai Jiangshan, Mathieu Desnoyers, Paul E. McKenney, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

This code is unused and can be removed now. Revert was straightforward.

Tested with light rcutorture.

Link: http://lore.kernel.org/r/CALCETrWNPOOdTrFabTDd=H7+wc6xJ9rJceg6OL1S0rTV5pfSsA@mail.gmail.com
Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

---
 include/linux/rcutiny.h |  3 --
 kernel/rcu/tree.c       | 82 ++++++++++-------------------------------
 2 files changed, 19 insertions(+), 66 deletions(-)

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index b7607e2667ae..b3f689711289 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -14,9 +14,6 @@
 
 #include <asm/param.h> /* for HZ */
 
-/* Never flag non-existent other CPUs! */
-static inline bool rcu_eqs_special_set(int cpu) { return false; }
-
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 68ebf0eb64c8..417dd00b9e87 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -69,20 +69,10 @@
 
 /* Data structures. */
 
-/*
- * Steal a bit from the bottom of ->dynticks for idle entry/exit
- * control.  Initially this is for TLB flushing.
- */
-#define RCU_DYNTICK_CTRL_MASK 0x1
-#define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)
-#ifndef rcu_eqs_special_exit
-#define rcu_eqs_special_exit() do { } while (0)
-#endif
-
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
 	.dynticks_nesting = 1,
 	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
-	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
+	.dynticks = ATOMIC_INIT(1),
 };
 struct rcu_state rcu_state = {
 	.level = { &rcu_state.node[0] },
@@ -229,20 +219,15 @@ void rcu_softirq_qs(void)
 static void rcu_dynticks_eqs_enter(void)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
-	int seq;
+	int special;
 
 	/*
-	 * CPUs seeing atomic_add_return() must see prior RCU read-side
+	 * CPUs seeing atomic_inc_return() must see prior RCU read-side
 	 * critical sections, and we also must force ordering with the
 	 * next idle sojourn.
 	 */
-	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
-	/* Better be in an extended quiescent state! */
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-		     (seq & RCU_DYNTICK_CTRL_CTR));
-	/* Better not have special action (TLB flush) pending! */
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-		     (seq & RCU_DYNTICK_CTRL_MASK));
+	special = atomic_inc_return(&rdp->dynticks);
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && special & 0x1);
 }
 
 /*
@@ -252,22 +237,15 @@ static void rcu_dynticks_eqs_enter(void)
 static void rcu_dynticks_eqs_exit(void)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
-	int seq;
+	int special;
 
 	/*
-	 * CPUs seeing atomic_add_return() must see prior idle sojourns,
+	 * CPUs seeing atomic_inc_return() must see prior idle sojourns,
 	 * and we also must force ordering with the next RCU read-side
 	 * critical section.
 	 */
-	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
-	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
-		     !(seq & RCU_DYNTICK_CTRL_CTR));
-	if (seq & RCU_DYNTICK_CTRL_MASK) {
-		atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
-		smp_mb__after_atomic(); /* _exit after clearing mask. */
-		/* Prefer duplicate flushes to losing a flush. */
-		rcu_eqs_special_exit();
-	}
+	special = atomic_inc_return(&rdp->dynticks);
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1));
 }
 
 /*
@@ -284,9 +262,9 @@ static void rcu_dynticks_eqs_online(void)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 
-	if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR)
+	if (atomic_read(&rdp->dynticks) & 0x1)
 		return;
-	atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
+	atomic_add(0x1, &rdp->dynticks);
 }
 
 /*
@@ -298,7 +276,7 @@ bool rcu_dynticks_curr_cpu_in_eqs(void)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 
-	return !(atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
+	return !(atomic_read(&rdp->dynticks) & 0x1);
 }
 
 /*
@@ -309,7 +287,7 @@ int rcu_dynticks_snap(struct rcu_data *rdp)
 {
 	int snap = atomic_add_return(0, &rdp->dynticks);
 
-	return snap & ~RCU_DYNTICK_CTRL_MASK;
+	return snap;
 }
 
 /*
@@ -318,7 +296,7 @@ int rcu_dynticks_snap(struct rcu_data *rdp)
  */
 static bool rcu_dynticks_in_eqs(int snap)
 {
-	return !(snap & RCU_DYNTICK_CTRL_CTR);
+	return !(snap & 0x1);
 }
 
 /*
@@ -331,28 +309,6 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
 	return snap != rcu_dynticks_snap(rdp);
 }
 
-/*
- * Set the special (bottom) bit of the specified CPU so that it
- * will take special action (such as flushing its TLB) on the
- * next exit from an extended quiescent state.  Returns true if
- * the bit was successfully set, or false if the CPU was not in
- * an extended quiescent state.
- */
-bool rcu_eqs_special_set(int cpu)
-{
-	int old;
-	int new;
-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-
-	do {
-		old = atomic_read(&rdp->dynticks);
-		if (old & RCU_DYNTICK_CTRL_CTR)
-			return false;
-		new = old | RCU_DYNTICK_CTRL_MASK;
-	} while (atomic_cmpxchg(&rdp->dynticks, old, new) != old);
-	return true;
-}
-
 /*
  * Let the RCU core know that this CPU has gone through the scheduler,
  * which is a quiescent state.  This is called when the need for a
@@ -366,13 +322,13 @@ bool rcu_eqs_special_set(int cpu)
  */
 void rcu_momentary_dyntick_idle(void)
 {
-	int special;
+	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+	int special = atomic_add_return(2, &rdp->dynticks);
 
-	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
-	special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
-				    &this_cpu_ptr(&rcu_data)->dynticks);
 	/* It is illegal to call this from idle state. */
-	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
+	WARN_ON_ONCE(!(special & 0x1));
+
+	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
 	rcu_preempt_deferred_qs(current);
 }
 EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
-- 
2.23.0.187.g17f5b7556c-goog


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH -rcu dev 2/2] rcu/dyntick-idle: Add better tracing
  2019-08-30 16:23 [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter") Joel Fernandes (Google)
@ 2019-08-30 16:23 ` Joel Fernandes (Google)
  2019-09-03 20:04   ` Paul E. McKenney
  2019-09-03 20:02 ` [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter") Paul E. McKenney
  1 sibling, 1 reply; 19+ messages in thread
From: Joel Fernandes (Google) @ 2019-08-30 16:23 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google),
	Bjorn Helgaas, Ingo Molnar, Josh Triplett, Lai Jiangshan,
	Mathieu Desnoyers, Paul E. McKenney, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

The dyntick-idle traces are a bit confusing. This patch makes it simpler
and adds some missing cases such as EQS-enter due to user vs idle mode.

Following are the changes:
(1) Add a new context field to trace_rcu_dyntick tracepoint. This
    context field can be "USER", "IDLE" or "IRQ".

(2) Remove the "++=" and "--=" strings and replace them with
   "StillNonIdle". This is much easier on the eyes, and the -- and ++
   are easily apparent in the dynticks_nesting counters we are printing
   anyway.

This patch is based on the previous patches to simplify rcu_dyntick
counters [1] and with these traces, I have verified the counters are
working properly.

[1]
Link: https://lore.kernel.org/patchwork/patch/1120021/
Link: https://lore.kernel.org/patchwork/patch/1120022/

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 include/trace/events/rcu.h | 13 ++++++++-----
 kernel/rcu/tree.c          | 19 +++++++++++++------
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 66122602bd08..474c1f7e7104 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -449,12 +449,14 @@ TRACE_EVENT_RCU(rcu_fqs,
  */
 TRACE_EVENT_RCU(rcu_dyntick,
 
-	TP_PROTO(const char *polarity, long oldnesting, long newnesting, atomic_t dynticks),
+	TP_PROTO(const char *polarity, const char *context, long oldnesting,
+		 long newnesting, atomic_t dynticks),
 
-	TP_ARGS(polarity, oldnesting, newnesting, dynticks),
+	TP_ARGS(polarity, context, oldnesting, newnesting, dynticks),
 
 	TP_STRUCT__entry(
 		__field(const char *, polarity)
+		__field(const char *, context)
 		__field(long, oldnesting)
 		__field(long, newnesting)
 		__field(int, dynticks)
@@ -462,14 +464,15 @@ TRACE_EVENT_RCU(rcu_dyntick,
 
 	TP_fast_assign(
 		__entry->polarity = polarity;
+		__entry->context = context;
 		__entry->oldnesting = oldnesting;
 		__entry->newnesting = newnesting;
 		__entry->dynticks = atomic_read(&dynticks);
 	),
 
-	TP_printk("%s %lx %lx %#3x", __entry->polarity,
-		  __entry->oldnesting, __entry->newnesting,
-		  __entry->dynticks & 0xfff)
+	TP_printk("%s %s %lx %lx %#3x", __entry->polarity,
+		__entry->context, __entry->oldnesting, __entry->newnesting,
+		__entry->dynticks & 0xfff)
 );
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 417dd00b9e87..463407762b5a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -533,7 +533,8 @@ static void rcu_eqs_enter(bool user)
 	}
 
 	lockdep_assert_irqs_disabled();
-	trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, rdp->dynticks);
+	trace_rcu_dyntick(TPS("Start"), (user ? TPS("USER") : TPS("IDLE")),
+			  rdp->dynticks_nesting, 0, rdp->dynticks);
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
 	rdp = this_cpu_ptr(&rcu_data);
 	do_nocb_deferred_wakeup(rdp);
@@ -606,14 +607,17 @@ static __always_inline void rcu_nmi_exit_common(bool irq)
 	 * leave it in non-RCU-idle state.
 	 */
 	if (rdp->dynticks_nmi_nesting != 1) {
-		trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, rdp->dynticks);
+		trace_rcu_dyntick(TPS("StillNonIdle"), TPS("IRQ"),
+				  rdp->dynticks_nmi_nesting,
+				  rdp->dynticks_nmi_nesting - 2, rdp->dynticks);
 		WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
 			   rdp->dynticks_nmi_nesting - 2);
 		return;
 	}
 
 	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
-	trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, rdp->dynticks);
+	trace_rcu_dyntick(TPS("Start"), TPS("IRQ"), rdp->dynticks_nmi_nesting,
+			  0, rdp->dynticks);
 	WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
 
 	if (irq)
@@ -700,7 +704,8 @@ static void rcu_eqs_exit(bool user)
 	rcu_dynticks_task_exit();
 	rcu_dynticks_eqs_exit();
 	rcu_cleanup_after_idle();
-	trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, rdp->dynticks);
+	trace_rcu_dyntick(TPS("End"), (user ? TPS("USER") : TPS("IDLE")),
+			  rdp->dynticks_nesting, 1, rdp->dynticks);
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
 	WRITE_ONCE(rdp->dynticks_nesting, 1);
 	WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
@@ -787,9 +792,11 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
 		rdp->rcu_forced_tick = true;
 		tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
 	}
-	trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
-			  rdp->dynticks_nmi_nesting,
+
+	trace_rcu_dyntick(incby == 1 ? TPS("End") : TPS("StillNonIdle"),
+			  TPS("IRQ"), rdp->dynticks_nmi_nesting,
 			  rdp->dynticks_nmi_nesting + incby, rdp->dynticks);
+
 	WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
 		   rdp->dynticks_nmi_nesting + incby);
 	barrier();
-- 
2.23.0.187.g17f5b7556c-goog


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-08-30 16:23 [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter") Joel Fernandes (Google)
  2019-08-30 16:23 ` [PATCH -rcu dev 2/2] rcu/dyntick-idle: Add better tracing Joel Fernandes (Google)
@ 2019-09-03 20:02 ` Paul E. McKenney
  2019-09-04  4:59   ` Joel Fernandes
  1 sibling, 1 reply; 19+ messages in thread
From: Paul E. McKenney @ 2019-09-03 20:02 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Fri, Aug 30, 2019 at 12:23:47PM -0400, Joel Fernandes (Google) wrote:
> This code is unused and can be removed now. Revert was straightforward.
> 
> Tested with light rcutorture.
> 
> Link: http://lore.kernel.org/r/CALCETrWNPOOdTrFabTDd=H7+wc6xJ9rJceg6OL1S0rTV5pfSsA@mail.gmail.com
> Suggested-by: Andy Lutomirski <luto@kernel.org>
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

Some questions below.

> ---
>  include/linux/rcutiny.h |  3 --
>  kernel/rcu/tree.c       | 82 ++++++++++-------------------------------
>  2 files changed, 19 insertions(+), 66 deletions(-)
> 
> diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
> index b7607e2667ae..b3f689711289 100644
> --- a/include/linux/rcutiny.h
> +++ b/include/linux/rcutiny.h
> @@ -14,9 +14,6 @@
>  
>  #include <asm/param.h> /* for HZ */
>  
> -/* Never flag non-existent other CPUs! */
> -static inline bool rcu_eqs_special_set(int cpu) { return false; }
> -
>  static inline unsigned long get_state_synchronize_rcu(void)
>  {
>  	return 0;
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 68ebf0eb64c8..417dd00b9e87 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -69,20 +69,10 @@
>  
>  /* Data structures. */
>  
> -/*
> - * Steal a bit from the bottom of ->dynticks for idle entry/exit
> - * control.  Initially this is for TLB flushing.
> - */
> -#define RCU_DYNTICK_CTRL_MASK 0x1
> -#define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)
> -#ifndef rcu_eqs_special_exit
> -#define rcu_eqs_special_exit() do { } while (0)
> -#endif
> -
>  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
>  	.dynticks_nesting = 1,
>  	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
> -	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
> +	.dynticks = ATOMIC_INIT(1),
>  };
>  struct rcu_state rcu_state = {
>  	.level = { &rcu_state.node[0] },
> @@ -229,20 +219,15 @@ void rcu_softirq_qs(void)
>  static void rcu_dynticks_eqs_enter(void)
>  {
>  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> -	int seq;
> +	int special;

Given that we really are now loading a pure sequence number, why
change the name to "special"?  This revert is after all removing
the ability of ->dynticks to be special.

>  	/*
> -	 * CPUs seeing atomic_add_return() must see prior RCU read-side
> +	 * CPUs seeing atomic_inc_return() must see prior RCU read-side
>  	 * critical sections, and we also must force ordering with the
>  	 * next idle sojourn.
>  	 */
> -	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
> -	/* Better be in an extended quiescent state! */
> -	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> -		     (seq & RCU_DYNTICK_CTRL_CTR));
> -	/* Better not have special action (TLB flush) pending! */
> -	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> -		     (seq & RCU_DYNTICK_CTRL_MASK));
> +	special = atomic_inc_return(&rdp->dynticks);
> +	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && special & 0x1);
>  }
>  
>  /*
> @@ -252,22 +237,15 @@ static void rcu_dynticks_eqs_enter(void)
>  static void rcu_dynticks_eqs_exit(void)
>  {
>  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> -	int seq;
> +	int special;

Ditto.

>  	/*
> -	 * CPUs seeing atomic_add_return() must see prior idle sojourns,
> +	 * CPUs seeing atomic_inc_return() must see prior idle sojourns,
>  	 * and we also must force ordering with the next RCU read-side
>  	 * critical section.
>  	 */
> -	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
> -	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> -		     !(seq & RCU_DYNTICK_CTRL_CTR));
> -	if (seq & RCU_DYNTICK_CTRL_MASK) {
> -		atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
> -		smp_mb__after_atomic(); /* _exit after clearing mask. */
> -		/* Prefer duplicate flushes to losing a flush. */
> -		rcu_eqs_special_exit();
> -	}
> +	special = atomic_inc_return(&rdp->dynticks);
> +	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1));
>  }
>  
>  /*
> @@ -284,9 +262,9 @@ static void rcu_dynticks_eqs_online(void)
>  {
>  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
>  
> -	if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR)
> +	if (atomic_read(&rdp->dynticks) & 0x1)
>  		return;
> -	atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
> +	atomic_add(0x1, &rdp->dynticks);

This could be atomic_inc(), right?

>  }
>  
>  /*
> @@ -298,7 +276,7 @@ bool rcu_dynticks_curr_cpu_in_eqs(void)
>  {
>  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
>  
> -	return !(atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
> +	return !(atomic_read(&rdp->dynticks) & 0x1);
>  }
>  
>  /*
> @@ -309,7 +287,7 @@ int rcu_dynticks_snap(struct rcu_data *rdp)
>  {
>  	int snap = atomic_add_return(0, &rdp->dynticks);
>  
> -	return snap & ~RCU_DYNTICK_CTRL_MASK;
> +	return snap;
>  }
>  
>  /*
> @@ -318,7 +296,7 @@ int rcu_dynticks_snap(struct rcu_data *rdp)
>   */
>  static bool rcu_dynticks_in_eqs(int snap)
>  {
> -	return !(snap & RCU_DYNTICK_CTRL_CTR);
> +	return !(snap & 0x1);
>  }
>  
>  /*
> @@ -331,28 +309,6 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
>  	return snap != rcu_dynticks_snap(rdp);
>  }
>  
> -/*
> - * Set the special (bottom) bit of the specified CPU so that it
> - * will take special action (such as flushing its TLB) on the
> - * next exit from an extended quiescent state.  Returns true if
> - * the bit was successfully set, or false if the CPU was not in
> - * an extended quiescent state.
> - */
> -bool rcu_eqs_special_set(int cpu)
> -{
> -	int old;
> -	int new;
> -	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
> -
> -	do {
> -		old = atomic_read(&rdp->dynticks);
> -		if (old & RCU_DYNTICK_CTRL_CTR)
> -			return false;
> -		new = old | RCU_DYNTICK_CTRL_MASK;
> -	} while (atomic_cmpxchg(&rdp->dynticks, old, new) != old);
> -	return true;
> -}
> -
>  /*
>   * Let the RCU core know that this CPU has gone through the scheduler,
>   * which is a quiescent state.  This is called when the need for a
> @@ -366,13 +322,13 @@ bool rcu_eqs_special_set(int cpu)
>   */
>  void rcu_momentary_dyntick_idle(void)
>  {
> -	int special;
> +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> +	int special = atomic_add_return(2, &rdp->dynticks);
>  
> -	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
> -	special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
> -				    &this_cpu_ptr(&rcu_data)->dynticks);
>  	/* It is illegal to call this from idle state. */
> -	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
> +	WARN_ON_ONCE(!(special & 0x1));
> +
> +	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);

Is it really OK to clear .rcu_need_heavy_qs after the atomic_add_return()?

>  	rcu_preempt_deferred_qs(current);
>  }
>  EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
> -- 
> 2.23.0.187.g17f5b7556c-goog
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 2/2] rcu/dyntick-idle: Add better tracing
  2019-08-30 16:23 ` [PATCH -rcu dev 2/2] rcu/dyntick-idle: Add better tracing Joel Fernandes (Google)
@ 2019-09-03 20:04   ` Paul E. McKenney
  2019-09-04  0:46     ` Joel Fernandes
  0 siblings, 1 reply; 19+ messages in thread
From: Paul E. McKenney @ 2019-09-03 20:04 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, Bjorn Helgaas, Ingo Molnar, Josh Triplett,
	Lai Jiangshan, Mathieu Desnoyers, Petr Mladek, Rafael J. Wysocki,
	rcu, Steven Rostedt, Yafang Shao

On Fri, Aug 30, 2019 at 12:23:48PM -0400, Joel Fernandes (Google) wrote:
> The dyntick-idle traces are a bit confusing. This patch makes it simpler
> and adds some missing cases such as EQS-enter due to user vs idle mode.
> 
> Following are the changes:
> (1) Add a new context field to trace_rcu_dyntick tracepoint. This
>     context field can be "USER", "IDLE" or "IRQ".
> 
> (2) Remove the "++=" and "--=" strings and replace them with
>    "StillNonIdle". This is much easier on the eyes, and the -- and ++
>    are easily apparent in the dynticks_nesting counters we are printing
>    anyway.
> 
> This patch is based on the previous patches to simplify rcu_dyntick
> counters [1] and with these traces, I have verified the counters are
> working properly.
> 
> [1]
> Link: https://lore.kernel.org/patchwork/patch/1120021/
> Link: https://lore.kernel.org/patchwork/patch/1120022/
> 
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

This looks fine, but depends on the earlier patch.

							Thanx, Paul

> ---
>  include/trace/events/rcu.h | 13 ++++++++-----
>  kernel/rcu/tree.c          | 19 +++++++++++++------
>  2 files changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
> index 66122602bd08..474c1f7e7104 100644
> --- a/include/trace/events/rcu.h
> +++ b/include/trace/events/rcu.h
> @@ -449,12 +449,14 @@ TRACE_EVENT_RCU(rcu_fqs,
>   */
>  TRACE_EVENT_RCU(rcu_dyntick,
>  
> -	TP_PROTO(const char *polarity, long oldnesting, long newnesting, atomic_t dynticks),
> +	TP_PROTO(const char *polarity, const char *context, long oldnesting,
> +		 long newnesting, atomic_t dynticks),
>  
> -	TP_ARGS(polarity, oldnesting, newnesting, dynticks),
> +	TP_ARGS(polarity, context, oldnesting, newnesting, dynticks),
>  
>  	TP_STRUCT__entry(
>  		__field(const char *, polarity)
> +		__field(const char *, context)
>  		__field(long, oldnesting)
>  		__field(long, newnesting)
>  		__field(int, dynticks)
> @@ -462,14 +464,15 @@ TRACE_EVENT_RCU(rcu_dyntick,
>  
>  	TP_fast_assign(
>  		__entry->polarity = polarity;
> +		__entry->context = context;
>  		__entry->oldnesting = oldnesting;
>  		__entry->newnesting = newnesting;
>  		__entry->dynticks = atomic_read(&dynticks);
>  	),
>  
> -	TP_printk("%s %lx %lx %#3x", __entry->polarity,
> -		  __entry->oldnesting, __entry->newnesting,
> -		  __entry->dynticks & 0xfff)
> +	TP_printk("%s %s %lx %lx %#3x", __entry->polarity,
> +		__entry->context, __entry->oldnesting, __entry->newnesting,
> +		__entry->dynticks & 0xfff)
>  );
>  
>  /*
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 417dd00b9e87..463407762b5a 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -533,7 +533,8 @@ static void rcu_eqs_enter(bool user)
>  	}
>  
>  	lockdep_assert_irqs_disabled();
> -	trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, rdp->dynticks);
> +	trace_rcu_dyntick(TPS("Start"), (user ? TPS("USER") : TPS("IDLE")),
> +			  rdp->dynticks_nesting, 0, rdp->dynticks);
>  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
>  	rdp = this_cpu_ptr(&rcu_data);
>  	do_nocb_deferred_wakeup(rdp);
> @@ -606,14 +607,17 @@ static __always_inline void rcu_nmi_exit_common(bool irq)
>  	 * leave it in non-RCU-idle state.
>  	 */
>  	if (rdp->dynticks_nmi_nesting != 1) {
> -		trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, rdp->dynticks);
> +		trace_rcu_dyntick(TPS("StillNonIdle"), TPS("IRQ"),
> +				  rdp->dynticks_nmi_nesting,
> +				  rdp->dynticks_nmi_nesting - 2, rdp->dynticks);
>  		WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
>  			   rdp->dynticks_nmi_nesting - 2);
>  		return;
>  	}
>  
>  	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
> -	trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, rdp->dynticks);
> +	trace_rcu_dyntick(TPS("Start"), TPS("IRQ"), rdp->dynticks_nmi_nesting,
> +			  0, rdp->dynticks);
>  	WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
>  
>  	if (irq)
> @@ -700,7 +704,8 @@ static void rcu_eqs_exit(bool user)
>  	rcu_dynticks_task_exit();
>  	rcu_dynticks_eqs_exit();
>  	rcu_cleanup_after_idle();
> -	trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, rdp->dynticks);
> +	trace_rcu_dyntick(TPS("End"), (user ? TPS("USER") : TPS("IDLE")),
> +			  rdp->dynticks_nesting, 1, rdp->dynticks);
>  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
>  	WRITE_ONCE(rdp->dynticks_nesting, 1);
>  	WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
> @@ -787,9 +792,11 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
>  		rdp->rcu_forced_tick = true;
>  		tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
>  	}
> -	trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
> -			  rdp->dynticks_nmi_nesting,
> +
> +	trace_rcu_dyntick(incby == 1 ? TPS("End") : TPS("StillNonIdle"),
> +			  TPS("IRQ"), rdp->dynticks_nmi_nesting,
>  			  rdp->dynticks_nmi_nesting + incby, rdp->dynticks);
> +
>  	WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
>  		   rdp->dynticks_nmi_nesting + incby);
>  	barrier();
> -- 
> 2.23.0.187.g17f5b7556c-goog
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 2/2] rcu/dyntick-idle: Add better tracing
  2019-09-03 20:04   ` Paul E. McKenney
@ 2019-09-04  0:46     ` Joel Fernandes
  0 siblings, 0 replies; 19+ messages in thread
From: Joel Fernandes @ 2019-09-04  0:46 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Bjorn Helgaas, Ingo Molnar, Josh Triplett,
	Lai Jiangshan, Mathieu Desnoyers, Petr Mladek, Rafael J. Wysocki,
	rcu, Steven Rostedt, Yafang Shao

On Tue, Sep 03, 2019 at 01:04:46PM -0700, Paul E. McKenney wrote:
> On Fri, Aug 30, 2019 at 12:23:48PM -0400, Joel Fernandes (Google) wrote:
> > The dyntick-idle traces are a bit confusing. This patch makes it simpler
> > and adds some missing cases such as EQS-enter due to user vs idle mode.
> > 
> > Following are the changes:
> > (1) Add a new context field to trace_rcu_dyntick tracepoint. This
> >     context field can be "USER", "IDLE" or "IRQ".
> > 
> > (2) Remove the "++=" and "--=" strings and replace them with
> >    "StillNonIdle". This is much easier on the eyes, and the -- and ++
> >    are easily apparent in the dynticks_nesting counters we are printing
> >    anyway.
> > 
> > This patch is based on the previous patches to simplify rcu_dyntick
> > counters [1] and with these traces, I have verified the counters are
> > working properly.
> > 
> > [1]
> > Link: https://lore.kernel.org/patchwork/patch/1120021/
> > Link: https://lore.kernel.org/patchwork/patch/1120022/
> > 
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> 
> This looks fine, but depends on the earlier patch.

The earlier patch looks Ok? Did not see any review comments. That is just a
revert of an old commit.

thanks,

 - Joel

> 							Thanx, Paul
> 
> > ---
> >  include/trace/events/rcu.h | 13 ++++++++-----
> >  kernel/rcu/tree.c          | 19 +++++++++++++------
> >  2 files changed, 21 insertions(+), 11 deletions(-)
> > 
> > diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
> > index 66122602bd08..474c1f7e7104 100644
> > --- a/include/trace/events/rcu.h
> > +++ b/include/trace/events/rcu.h
> > @@ -449,12 +449,14 @@ TRACE_EVENT_RCU(rcu_fqs,
> >   */
> >  TRACE_EVENT_RCU(rcu_dyntick,
> >  
> > -	TP_PROTO(const char *polarity, long oldnesting, long newnesting, atomic_t dynticks),
> > +	TP_PROTO(const char *polarity, const char *context, long oldnesting,
> > +		 long newnesting, atomic_t dynticks),
> >  
> > -	TP_ARGS(polarity, oldnesting, newnesting, dynticks),
> > +	TP_ARGS(polarity, context, oldnesting, newnesting, dynticks),
> >  
> >  	TP_STRUCT__entry(
> >  		__field(const char *, polarity)
> > +		__field(const char *, context)
> >  		__field(long, oldnesting)
> >  		__field(long, newnesting)
> >  		__field(int, dynticks)
> > @@ -462,14 +464,15 @@ TRACE_EVENT_RCU(rcu_dyntick,
> >  
> >  	TP_fast_assign(
> >  		__entry->polarity = polarity;
> > +		__entry->context = context;
> >  		__entry->oldnesting = oldnesting;
> >  		__entry->newnesting = newnesting;
> >  		__entry->dynticks = atomic_read(&dynticks);
> >  	),
> >  
> > -	TP_printk("%s %lx %lx %#3x", __entry->polarity,
> > -		  __entry->oldnesting, __entry->newnesting,
> > -		  __entry->dynticks & 0xfff)
> > +	TP_printk("%s %s %lx %lx %#3x", __entry->polarity,
> > +		__entry->context, __entry->oldnesting, __entry->newnesting,
> > +		__entry->dynticks & 0xfff)
> >  );
> >  
> >  /*
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 417dd00b9e87..463407762b5a 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -533,7 +533,8 @@ static void rcu_eqs_enter(bool user)
> >  	}
> >  
> >  	lockdep_assert_irqs_disabled();
> > -	trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, rdp->dynticks);
> > +	trace_rcu_dyntick(TPS("Start"), (user ? TPS("USER") : TPS("IDLE")),
> > +			  rdp->dynticks_nesting, 0, rdp->dynticks);
> >  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
> >  	rdp = this_cpu_ptr(&rcu_data);
> >  	do_nocb_deferred_wakeup(rdp);
> > @@ -606,14 +607,17 @@ static __always_inline void rcu_nmi_exit_common(bool irq)
> >  	 * leave it in non-RCU-idle state.
> >  	 */
> >  	if (rdp->dynticks_nmi_nesting != 1) {
> > -		trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, rdp->dynticks);
> > +		trace_rcu_dyntick(TPS("StillNonIdle"), TPS("IRQ"),
> > +				  rdp->dynticks_nmi_nesting,
> > +				  rdp->dynticks_nmi_nesting - 2, rdp->dynticks);
> >  		WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
> >  			   rdp->dynticks_nmi_nesting - 2);
> >  		return;
> >  	}
> >  
> >  	/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
> > -	trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, rdp->dynticks);
> > +	trace_rcu_dyntick(TPS("Start"), TPS("IRQ"), rdp->dynticks_nmi_nesting,
> > +			  0, rdp->dynticks);
> >  	WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
> >  
> >  	if (irq)
> > @@ -700,7 +704,8 @@ static void rcu_eqs_exit(bool user)
> >  	rcu_dynticks_task_exit();
> >  	rcu_dynticks_eqs_exit();
> >  	rcu_cleanup_after_idle();
> > -	trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, rdp->dynticks);
> > +	trace_rcu_dyntick(TPS("End"), (user ? TPS("USER") : TPS("IDLE")),
> > +			  rdp->dynticks_nesting, 1, rdp->dynticks);
> >  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
> >  	WRITE_ONCE(rdp->dynticks_nesting, 1);
> >  	WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
> > @@ -787,9 +792,11 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
> >  		rdp->rcu_forced_tick = true;
> >  		tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
> >  	}
> > -	trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
> > -			  rdp->dynticks_nmi_nesting,
> > +
> > +	trace_rcu_dyntick(incby == 1 ? TPS("End") : TPS("StillNonIdle"),
> > +			  TPS("IRQ"), rdp->dynticks_nmi_nesting,
> >  			  rdp->dynticks_nmi_nesting + incby, rdp->dynticks);
> > +
> >  	WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
> >  		   rdp->dynticks_nmi_nesting + incby);
> >  	barrier();
> > -- 
> > 2.23.0.187.g17f5b7556c-goog
> > 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-03 20:02 ` [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter") Paul E. McKenney
@ 2019-09-04  4:59   ` Joel Fernandes
  2019-09-04 10:12     ` Paul E. McKenney
  0 siblings, 1 reply; 19+ messages in thread
From: Joel Fernandes @ 2019-09-04  4:59 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Tue, Sep 03, 2019 at 01:02:49PM -0700, Paul E. McKenney wrote:
[snip] 
> > ---
> >  include/linux/rcutiny.h |  3 --
> >  kernel/rcu/tree.c       | 82 ++++++++++-------------------------------
> >  2 files changed, 19 insertions(+), 66 deletions(-)
> > 
> > diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
> > index b7607e2667ae..b3f689711289 100644
> > --- a/include/linux/rcutiny.h
> > +++ b/include/linux/rcutiny.h
> > @@ -14,9 +14,6 @@
> >  
> >  #include <asm/param.h> /* for HZ */
> >  
> > -/* Never flag non-existent other CPUs! */
> > -static inline bool rcu_eqs_special_set(int cpu) { return false; }
> > -
> >  static inline unsigned long get_state_synchronize_rcu(void)
> >  {
> >  	return 0;
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 68ebf0eb64c8..417dd00b9e87 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -69,20 +69,10 @@
> >  
> >  /* Data structures. */
> >  
> > -/*
> > - * Steal a bit from the bottom of ->dynticks for idle entry/exit
> > - * control.  Initially this is for TLB flushing.
> > - */
> > -#define RCU_DYNTICK_CTRL_MASK 0x1
> > -#define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)
> > -#ifndef rcu_eqs_special_exit
> > -#define rcu_eqs_special_exit() do { } while (0)
> > -#endif
> > -
> >  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
> >  	.dynticks_nesting = 1,
> >  	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
> > -	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
> > +	.dynticks = ATOMIC_INIT(1),
> >  };
> >  struct rcu_state rcu_state = {
> >  	.level = { &rcu_state.node[0] },
> > @@ -229,20 +219,15 @@ void rcu_softirq_qs(void)
> >  static void rcu_dynticks_eqs_enter(void)
> >  {
> >  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> > -	int seq;
> > +	int special;
> 
> Given that we really are now loading a pure sequence number, why
> change the name to "special"?  This revert is after all removing
> the ability of ->dynticks to be special.

I have no preference for this variable name, I can call it seq. I was going
by staying close to 'git revert' to minimize any issues from reverting. I'll
change it to seq then. But we are also going to rewrite this code anyway as
we were discussing.
 
> >  	/*
> > -	 * CPUs seeing atomic_add_return() must see prior idle sojourns,
> > +	 * CPUs seeing atomic_inc_return() must see prior idle sojourns,
> >  	 * and we also must force ordering with the next RCU read-side
> >  	 * critical section.
> >  	 */
> > -	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
> > -	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> > -		     !(seq & RCU_DYNTICK_CTRL_CTR));
> > -	if (seq & RCU_DYNTICK_CTRL_MASK) {
> > -		atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
> > -		smp_mb__after_atomic(); /* _exit after clearing mask. */
> > -		/* Prefer duplicate flushes to losing a flush. */
> > -		rcu_eqs_special_exit();
> > -	}
> > +	special = atomic_inc_return(&rdp->dynticks);
> > +	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1));
> >  }
> >  
> >  /*
> > @@ -284,9 +262,9 @@ static void rcu_dynticks_eqs_online(void)
> >  {
> >  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> >  
> > -	if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR)
> > +	if (atomic_read(&rdp->dynticks) & 0x1)
> >  		return;
> > -	atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
> > +	atomic_add(0x1, &rdp->dynticks);
> 
> This could be atomic_inc(), right?

True, again I will blame 'git revert' ;-) Will change.

> > -/*
> > - * Set the special (bottom) bit of the specified CPU so that it
> > - * will take special action (such as flushing its TLB) on the
> > - * next exit from an extended quiescent state.  Returns true if
> > - * the bit was successfully set, or false if the CPU was not in
> > - * an extended quiescent state.
> > - */
> > -bool rcu_eqs_special_set(int cpu)
> > -{
> > -	int old;
> > -	int new;
> > -	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
> > -
> > -	do {
> > -		old = atomic_read(&rdp->dynticks);
> > -		if (old & RCU_DYNTICK_CTRL_CTR)
> > -			return false;
> > -		new = old | RCU_DYNTICK_CTRL_MASK;
> > -	} while (atomic_cmpxchg(&rdp->dynticks, old, new) != old);
> > -	return true;
> > -}
> > -
> >  /*
> >   * Let the RCU core know that this CPU has gone through the scheduler,
> >   * which is a quiescent state.  This is called when the need for a
> > @@ -366,13 +322,13 @@ bool rcu_eqs_special_set(int cpu)
> >   */
> >  void rcu_momentary_dyntick_idle(void)
> >  {
> > -	int special;
> > +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> > +	int special = atomic_add_return(2, &rdp->dynticks);
> >  
> > -	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
> > -	special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
> > -				    &this_cpu_ptr(&rcu_data)->dynticks);
> >  	/* It is illegal to call this from idle state. */
> > -	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
> > +	WARN_ON_ONCE(!(special & 0x1));
> > +
> > +	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
> 
> Is it really OK to clear .rcu_need_heavy_qs after the atomic_add_return()?

I think so. I reviewed other code paths and did not an issue with it. Did you
see something wrong with it?

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-04  4:59   ` Joel Fernandes
@ 2019-09-04 10:12     ` Paul E. McKenney
  2019-09-04 13:54       ` Joel Fernandes
  0 siblings, 1 reply; 19+ messages in thread
From: Paul E. McKenney @ 2019-09-04 10:12 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Wed, Sep 04, 2019 at 12:59:10AM -0400, Joel Fernandes wrote:
> On Tue, Sep 03, 2019 at 01:02:49PM -0700, Paul E. McKenney wrote:
> [snip] 
> > > ---
> > >  include/linux/rcutiny.h |  3 --
> > >  kernel/rcu/tree.c       | 82 ++++++++++-------------------------------
> > >  2 files changed, 19 insertions(+), 66 deletions(-)
> > > 
> > > diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
> > > index b7607e2667ae..b3f689711289 100644
> > > --- a/include/linux/rcutiny.h
> > > +++ b/include/linux/rcutiny.h
> > > @@ -14,9 +14,6 @@
> > >  
> > >  #include <asm/param.h> /* for HZ */
> > >  
> > > -/* Never flag non-existent other CPUs! */
> > > -static inline bool rcu_eqs_special_set(int cpu) { return false; }
> > > -
> > >  static inline unsigned long get_state_synchronize_rcu(void)
> > >  {
> > >  	return 0;
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index 68ebf0eb64c8..417dd00b9e87 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -69,20 +69,10 @@
> > >  
> > >  /* Data structures. */
> > >  
> > > -/*
> > > - * Steal a bit from the bottom of ->dynticks for idle entry/exit
> > > - * control.  Initially this is for TLB flushing.
> > > - */
> > > -#define RCU_DYNTICK_CTRL_MASK 0x1
> > > -#define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)
> > > -#ifndef rcu_eqs_special_exit
> > > -#define rcu_eqs_special_exit() do { } while (0)
> > > -#endif
> > > -
> > >  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
> > >  	.dynticks_nesting = 1,
> > >  	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
> > > -	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
> > > +	.dynticks = ATOMIC_INIT(1),
> > >  };
> > >  struct rcu_state rcu_state = {
> > >  	.level = { &rcu_state.node[0] },
> > > @@ -229,20 +219,15 @@ void rcu_softirq_qs(void)
> > >  static void rcu_dynticks_eqs_enter(void)
> > >  {
> > >  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> > > -	int seq;
> > > +	int special;
> > 
> > Given that we really are now loading a pure sequence number, why
> > change the name to "special"?  This revert is after all removing
> > the ability of ->dynticks to be special.
> 
> I have no preference for this variable name, I can call it seq. I was going
> by staying close to 'git revert' to minimize any issues from reverting. I'll
> change it to seq then. But we are also going to rewrite this code anyway as
> we were discussing.
>  
> > >  	/*
> > > -	 * CPUs seeing atomic_add_return() must see prior idle sojourns,
> > > +	 * CPUs seeing atomic_inc_return() must see prior idle sojourns,
> > >  	 * and we also must force ordering with the next RCU read-side
> > >  	 * critical section.
> > >  	 */
> > > -	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
> > > -	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> > > -		     !(seq & RCU_DYNTICK_CTRL_CTR));
> > > -	if (seq & RCU_DYNTICK_CTRL_MASK) {
> > > -		atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
> > > -		smp_mb__after_atomic(); /* _exit after clearing mask. */
> > > -		/* Prefer duplicate flushes to losing a flush. */
> > > -		rcu_eqs_special_exit();
> > > -	}
> > > +	special = atomic_inc_return(&rdp->dynticks);
> > > +	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1));
> > >  }
> > >  
> > >  /*
> > > @@ -284,9 +262,9 @@ static void rcu_dynticks_eqs_online(void)
> > >  {
> > >  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> > >  
> > > -	if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR)
> > > +	if (atomic_read(&rdp->dynticks) & 0x1)
> > >  		return;
> > > -	atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
> > > +	atomic_add(0x1, &rdp->dynticks);
> > 
> > This could be atomic_inc(), right?
> 
> True, again I will blame 'git revert' ;-) Will change.
> 
> > > -/*
> > > - * Set the special (bottom) bit of the specified CPU so that it
> > > - * will take special action (such as flushing its TLB) on the
> > > - * next exit from an extended quiescent state.  Returns true if
> > > - * the bit was successfully set, or false if the CPU was not in
> > > - * an extended quiescent state.
> > > - */
> > > -bool rcu_eqs_special_set(int cpu)
> > > -{
> > > -	int old;
> > > -	int new;
> > > -	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
> > > -
> > > -	do {
> > > -		old = atomic_read(&rdp->dynticks);
> > > -		if (old & RCU_DYNTICK_CTRL_CTR)
> > > -			return false;
> > > -		new = old | RCU_DYNTICK_CTRL_MASK;
> > > -	} while (atomic_cmpxchg(&rdp->dynticks, old, new) != old);
> > > -	return true;
> > > -}
> > > -
> > >  /*
> > >   * Let the RCU core know that this CPU has gone through the scheduler,
> > >   * which is a quiescent state.  This is called when the need for a
> > > @@ -366,13 +322,13 @@ bool rcu_eqs_special_set(int cpu)
> > >   */
> > >  void rcu_momentary_dyntick_idle(void)
> > >  {
> > > -	int special;
> > > +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> > > +	int special = atomic_add_return(2, &rdp->dynticks);
> > >  
> > > -	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
> > > -	special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
> > > -				    &this_cpu_ptr(&rcu_data)->dynticks);
> > >  	/* It is illegal to call this from idle state. */
> > > -	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
> > > +	WARN_ON_ONCE(!(special & 0x1));
> > > +
> > > +	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
> > 
> > Is it really OK to clear .rcu_need_heavy_qs after the atomic_add_return()?
> 
> I think so. I reviewed other code paths and did not an issue with it. Did you
> see something wrong with it?

If this task gets delayed betweentimes, rcu_implicit_dynticks_qs() would
fail to set .rcu_need_heavy_qs because it saw it already being set,
even though the corresponding ->dynticks update had already happened.
(It might be a new grace period, given that the old grace period might
have ended courtesy of the atomic_add_return().)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-04 10:12     ` Paul E. McKenney
@ 2019-09-04 13:54       ` Joel Fernandes
  2019-09-04 23:13         ` Paul E. McKenney
  0 siblings, 1 reply; 19+ messages in thread
From: Joel Fernandes @ 2019-09-04 13:54 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Wed, Sep 04, 2019 at 03:12:10AM -0700, Paul E. McKenney wrote:
> On Wed, Sep 04, 2019 at 12:59:10AM -0400, Joel Fernandes wrote:
> > On Tue, Sep 03, 2019 at 01:02:49PM -0700, Paul E. McKenney wrote:
> > [snip] 
> > > > ---
> > > >  include/linux/rcutiny.h |  3 --
> > > >  kernel/rcu/tree.c       | 82 ++++++++++-------------------------------
> > > >  2 files changed, 19 insertions(+), 66 deletions(-)
> > > > 
> > > > diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
> > > > index b7607e2667ae..b3f689711289 100644
> > > > --- a/include/linux/rcutiny.h
> > > > +++ b/include/linux/rcutiny.h
> > > > @@ -14,9 +14,6 @@
> > > >  
> > > >  #include <asm/param.h> /* for HZ */
> > > >  
> > > > -/* Never flag non-existent other CPUs! */
> > > > -static inline bool rcu_eqs_special_set(int cpu) { return false; }
> > > > -
> > > >  static inline unsigned long get_state_synchronize_rcu(void)
> > > >  {
> > > >  	return 0;
> > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > > index 68ebf0eb64c8..417dd00b9e87 100644
> > > > --- a/kernel/rcu/tree.c
> > > > +++ b/kernel/rcu/tree.c
> > > > @@ -69,20 +69,10 @@
> > > >  
> > > >  /* Data structures. */
> > > >  
> > > > -/*
> > > > - * Steal a bit from the bottom of ->dynticks for idle entry/exit
> > > > - * control.  Initially this is for TLB flushing.
> > > > - */
> > > > -#define RCU_DYNTICK_CTRL_MASK 0x1
> > > > -#define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)
> > > > -#ifndef rcu_eqs_special_exit
> > > > -#define rcu_eqs_special_exit() do { } while (0)
> > > > -#endif
> > > > -
> > > >  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
> > > >  	.dynticks_nesting = 1,
> > > >  	.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
> > > > -	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
> > > > +	.dynticks = ATOMIC_INIT(1),
> > > >  };
> > > >  struct rcu_state rcu_state = {
> > > >  	.level = { &rcu_state.node[0] },
> > > > @@ -229,20 +219,15 @@ void rcu_softirq_qs(void)
> > > >  static void rcu_dynticks_eqs_enter(void)
> > > >  {
> > > >  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> > > > -	int seq;
> > > > +	int special;
> > > 
> > > Given that we really are now loading a pure sequence number, why
> > > change the name to "special"?  This revert is after all removing
> > > the ability of ->dynticks to be special.
> > 
> > I have no preference for this variable name, I can call it seq. I was going
> > by staying close to 'git revert' to minimize any issues from reverting. I'll
> > change it to seq then. But we are also going to rewrite this code anyway as
> > we were discussing.
> >  
> > > >  	/*
> > > > -	 * CPUs seeing atomic_add_return() must see prior idle sojourns,
> > > > +	 * CPUs seeing atomic_inc_return() must see prior idle sojourns,
> > > >  	 * and we also must force ordering with the next RCU read-side
> > > >  	 * critical section.
> > > >  	 */
> > > > -	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
> > > > -	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> > > > -		     !(seq & RCU_DYNTICK_CTRL_CTR));
> > > > -	if (seq & RCU_DYNTICK_CTRL_MASK) {
> > > > -		atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
> > > > -		smp_mb__after_atomic(); /* _exit after clearing mask. */
> > > > -		/* Prefer duplicate flushes to losing a flush. */
> > > > -		rcu_eqs_special_exit();
> > > > -	}
> > > > +	special = atomic_inc_return(&rdp->dynticks);
> > > > +	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1));
> > > >  }
> > > >  
> > > >  /*
> > > > @@ -284,9 +262,9 @@ static void rcu_dynticks_eqs_online(void)
> > > >  {
> > > >  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> > > >  
> > > > -	if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR)
> > > > +	if (atomic_read(&rdp->dynticks) & 0x1)
> > > >  		return;
> > > > -	atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
> > > > +	atomic_add(0x1, &rdp->dynticks);
> > > 
> > > This could be atomic_inc(), right?
> > 
> > True, again I will blame 'git revert' ;-) Will change.
> > 
> > > > -/*
> > > > - * Set the special (bottom) bit of the specified CPU so that it
> > > > - * will take special action (such as flushing its TLB) on the
> > > > - * next exit from an extended quiescent state.  Returns true if
> > > > - * the bit was successfully set, or false if the CPU was not in
> > > > - * an extended quiescent state.
> > > > - */
> > > > -bool rcu_eqs_special_set(int cpu)
> > > > -{
> > > > -	int old;
> > > > -	int new;
> > > > -	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
> > > > -
> > > > -	do {
> > > > -		old = atomic_read(&rdp->dynticks);
> > > > -		if (old & RCU_DYNTICK_CTRL_CTR)
> > > > -			return false;
> > > > -		new = old | RCU_DYNTICK_CTRL_MASK;
> > > > -	} while (atomic_cmpxchg(&rdp->dynticks, old, new) != old);
> > > > -	return true;
> > > > -}
> > > > -
> > > >  /*
> > > >   * Let the RCU core know that this CPU has gone through the scheduler,
> > > >   * which is a quiescent state.  This is called when the need for a
> > > > @@ -366,13 +322,13 @@ bool rcu_eqs_special_set(int cpu)
> > > >   */
> > > >  void rcu_momentary_dyntick_idle(void)
> > > >  {
> > > > -	int special;
> > > > +	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> > > > +	int special = atomic_add_return(2, &rdp->dynticks);
> > > >  
> > > > -	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
> > > > -	special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
> > > > -				    &this_cpu_ptr(&rcu_data)->dynticks);
> > > >  	/* It is illegal to call this from idle state. */
> > > > -	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
> > > > +	WARN_ON_ONCE(!(special & 0x1));
> > > > +
> > > > +	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
> > > 
> > > Is it really OK to clear .rcu_need_heavy_qs after the atomic_add_return()?
> > 
> > I think so. I reviewed other code paths and did not an issue with it. Did you
> > see something wrong with it?
> 
> If this task gets delayed betweentimes, rcu_implicit_dynticks_qs() would
> fail to set .rcu_need_heavy_qs because it saw it already being set,
> even though the corresponding ->dynticks update had already happened.
> (It might be a new grace period, given that the old grace period might
> have ended courtesy of the atomic_add_return().)

Makes sense and I agree.

Also, I would really appreciate if you can correct the nits in the above
patch we're reviewing, and apply them (if you can).
I think, there are only 2 changes left:
- rename special to seq.
- reorder the rcu_need_heavy_qs write.

 On a related point, when I was working on the NOHZ_FULL testing I noticed a
 weird issue where rcu_urgent_qs was reset but rcu_need_heavy_qs was still
 set indefinitely. I am a bit afraid our hints are not being cleared
 appropriately and I believe I fixed a similar issue a few months ago. I
 would rather have them cleared once they are no longer needed.  What do you
 think about the below patch? I did not submit it yet because I was working
 on other patches. 

---8<-----------------------

From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Subject: [RFC] rcu/tree: Reset CPU hints when reporting a quiescent state

While tracing, I am seeing cases where need_heavy_qs is still set even
though urgent_qs was cleared, after a quiescent state is reported. One
such case is when the softirq reports that a CPU has passed quiescent
state.

Previously in 671a63517cf9 ("rcu: Avoid unnecessary softirq when system
is idle"), I had fixed a bug where core_needs_qs was not being cleared.
I worry we keep running into similar situations. Let us just add a
function to clear hints and call it from all relevant places to make the
code more robust and avoid such stale hints which could in theory at
least, cause false hints after the quiescent state was already reported.

Tested overnight with rcutorture running for 60 minutes on all
configurations of RCU.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/tree.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4f7c3096d786..493a526b390e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1957,6 +1957,18 @@ rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 	rcu_report_qs_rnp(mask, rnp_p, gps, flags);
 }
 
+/*
+ * Helper function to reset the hints provided by RCU's grace period management
+ * system. Typically used when a quiescent state for the CPU is reported.
+ * Called with the leaf node's lock held.
+ */
+static void rcu_reset_rdp_hints(struct rcu_data *rdp)
+{
+	rdp->core_needs_qs = false;
+	rdp->rcu_urgent_qs = false;
+	rdp->rcu_need_heavy_qs = false;
+}
+
 /*
  * Record a quiescent state for the specified CPU to that CPU's rcu_data
  * structure.  This must be called from the specified CPU.
@@ -1987,7 +1999,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
 		return;
 	}
 	mask = rdp->grpmask;
-	rdp->core_needs_qs = false;
+	rcu_reset_rdp_hints(rdp);
+
 	if ((rnp->qsmask & mask) == 0) {
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	} else {
@@ -2315,6 +2328,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
 			if ((rnp->qsmask & bit) != 0) {
 				rdp = per_cpu_ptr(&rcu_data, cpu);
 				if (f(rdp)) {
+					rcu_reset_rdp_hints(rdp);
 					mask |= bit;
 					rcu_disable_tick_upon_qs(rdp);
 				}
@@ -3359,6 +3373,7 @@ void rcu_cpu_starting(unsigned int cpu)
 	rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
 	rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
 	if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
+		rcu_reset_rdp_hints(rdp);
 		rcu_disable_tick_upon_qs(rdp);
 		/* Report QS -after- changing ->qsmaskinitnext! */
 		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
-- 
2.23.0.187.g17f5b7556c-goog


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-04 13:54       ` Joel Fernandes
@ 2019-09-04 23:13         ` Paul E. McKenney
  2019-09-05 15:36           ` Joel Fernandes
  0 siblings, 1 reply; 19+ messages in thread
From: Paul E. McKenney @ 2019-09-04 23:13 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Wed, Sep 04, 2019 at 09:54:20AM -0400, Joel Fernandes wrote:
> On Wed, Sep 04, 2019 at 03:12:10AM -0700, Paul E. McKenney wrote:
> > On Wed, Sep 04, 2019 at 12:59:10AM -0400, Joel Fernandes wrote:
> > > On Tue, Sep 03, 2019 at 01:02:49PM -0700, Paul E. McKenney wrote:

[ . . . ]

> > If this task gets delayed betweentimes, rcu_implicit_dynticks_qs() would
> > fail to set .rcu_need_heavy_qs because it saw it already being set,
> > even though the corresponding ->dynticks update had already happened.
> > (It might be a new grace period, given that the old grace period might
> > have ended courtesy of the atomic_add_return().)
> 
> Makes sense and I agree.
> 
> Also, I would really appreciate if you can correct the nits in the above
> patch we're reviewing, and apply them (if you can).
> I think, there are only 2 changes left:
> - rename special to seq.
> - reorder the rcu_need_heavy_qs write.
> 
>  On a related point, when I was working on the NOHZ_FULL testing I noticed a
>  weird issue where rcu_urgent_qs was reset but rcu_need_heavy_qs was still
>  set indefinitely. I am a bit afraid our hints are not being cleared
>  appropriately and I believe I fixed a similar issue a few months ago. I
>  would rather have them cleared once they are no longer needed.  What do you
>  think about the below patch? I did not submit it yet because I was working
>  on other patches. 
> 
> ---8<-----------------------
> 
> From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
> Subject: [RFC] rcu/tree: Reset CPU hints when reporting a quiescent state
> 
> While tracing, I am seeing cases where need_heavy_qs is still set even
> though urgent_qs was cleared, after a quiescent state is reported. One
> such case is when the softirq reports that a CPU has passed quiescent
> state.
> 
> Previously in 671a63517cf9 ("rcu: Avoid unnecessary softirq when system
> is idle"), I had fixed a bug where core_needs_qs was not being cleared.
> I worry we keep running into similar situations. Let us just add a
> function to clear hints and call it from all relevant places to make the
> code more robust and avoid such stale hints which could in theory at
> least, cause false hints after the quiescent state was already reported.
> 
> Tested overnight with rcutorture running for 60 minutes on all
> configurations of RCU.
> 
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> ---
>  kernel/rcu/tree.c | 17 ++++++++++++++++-
>  1 file changed, 16 insertions(+), 1 deletion(-)

Excellent point!  But how about if we combine it with the existing
disabling of the scheduler tick, perhaps something like the following?

Note that the FQS clearing can come from some other CPU, hence the added
{READ,WRITE}_ONCE() calls.  The call is moved down in rcu_report_qs_rdp()
because something would have had to clear the bit to prevent execution
from getting there, and I believe that the other bit-clearing events
have calls to rcu_disable_urgency_upon_qs().  (But I easily could have
missed something!)

I am OK leaving RCU urgency set on offline CPUs, hence clearing things
at online time.

							Thanx, Paul

------------------------------------------------------------------------

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 68ebf0eb64c8..2b74b6c94086 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -827,7 +827,7 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
 		incby = 1;
 	} else if (tick_nohz_full_cpu(rdp->cpu) &&
 		   rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
-		   rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
+		   READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) {
 		rdp->rcu_forced_tick = true;
 		tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
 	}
@@ -892,11 +892,15 @@ void rcu_irq_enter_irqson(void)
 }
 
 /*
- * If the scheduler-clock interrupt was enabled on a nohz_full CPU
- * in order to get to a quiescent state, disable it.
+ * If any sort of urgency was applied to the current CPU (for example,
+ * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
+ * to get to a quiescent state, disable it.
  */
-void rcu_disable_tick_upon_qs(struct rcu_data *rdp)
+void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
 {
+	WRITE_ONCE(rdp->core_needs_qs, false);
+	WRITE_ONCE(rdp->rcu_urgent_qs, false);
+	WRITE_ONCE(rdp->rcu_need_heavy_qs, false);
 	if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
 		tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
 		rdp->rcu_forced_tick = false;
@@ -1417,7 +1421,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
 		trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));
 		need_gp = !!(rnp->qsmask & rdp->grpmask);
 		rdp->cpu_no_qs.b.norm = need_gp;
-		rdp->core_needs_qs = need_gp;
+		WRITE_ONCE(rdp->core_needs_qs, need_gp);
 		zero_cpu_stall_ticks(rdp);
 	}
 	rdp->gp_seq = rnp->gp_seq;  /* Remember new grace-period state. */
@@ -1987,7 +1991,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
 		return;
 	}
 	mask = rdp->grpmask;
-	rdp->core_needs_qs = false;
 	if ((rnp->qsmask & mask) == 0) {
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	} else {
@@ -1998,7 +2001,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
 		if (!offloaded)
 			needwake = rcu_accelerate_cbs(rnp, rdp);
 
-		rcu_disable_tick_upon_qs(rdp);
+		rcu_disable_urgency_upon_qs(rdp);
 		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
 		/* ^^^ Released rnp->lock */
 		if (needwake)
@@ -2022,7 +2025,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
 	 * Does this CPU still need to do its part for current grace period?
 	 * If no, return and let the other CPUs do their part as well.
 	 */
-	if (!rdp->core_needs_qs)
+	if (!READ_ONCE(rdp->core_needs_qs))
 		return;
 
 	/*
@@ -2316,7 +2319,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
 				rdp = per_cpu_ptr(&rcu_data, cpu);
 				if (f(rdp)) {
 					mask |= bit;
-					rcu_disable_tick_upon_qs(rdp);
+					rcu_disable_urgency_upon_qs(rdp);
 				}
 			}
 		}
@@ -3004,7 +3007,7 @@ static int rcu_pending(void)
 		return 0;
 
 	/* Is the RCU core waiting for a quiescent state from this CPU? */
-	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
+	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
 		return 1;
 
 	/* Does this CPU have callbacks ready to invoke? */
@@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
 	rdp->gp_seq = rnp->gp_seq;
 	rdp->gp_seq_needed = rnp->gp_seq;
 	rdp->cpu_no_qs.b.norm = true;
-	rdp->core_needs_qs = false;
 	rdp->rcu_iw_pending = false;
 	rdp->rcu_iw_gp_seq = rnp->gp_seq - 1;
 	trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
@@ -3359,7 +3361,7 @@ void rcu_cpu_starting(unsigned int cpu)
 	rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
 	rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
 	if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
-		rcu_disable_tick_upon_qs(rdp);
+		rcu_disable_urgency_upon_qs(rdp);
 		/* Report QS -after- changing ->qsmaskinitnext! */
 		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
 	} else {

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-04 23:13         ` Paul E. McKenney
@ 2019-09-05 15:36           ` Joel Fernandes
  2019-09-05 16:43             ` Paul E. McKenney
  0 siblings, 1 reply; 19+ messages in thread
From: Joel Fernandes @ 2019-09-05 15:36 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Wed, Sep 04, 2019 at 04:13:08PM -0700, Paul E. McKenney wrote:
> On Wed, Sep 04, 2019 at 09:54:20AM -0400, Joel Fernandes wrote:
> > On Wed, Sep 04, 2019 at 03:12:10AM -0700, Paul E. McKenney wrote:
> > > On Wed, Sep 04, 2019 at 12:59:10AM -0400, Joel Fernandes wrote:
> > > > On Tue, Sep 03, 2019 at 01:02:49PM -0700, Paul E. McKenney wrote:
> 
> [ . . . ]
> 
> > > If this task gets delayed betweentimes, rcu_implicit_dynticks_qs() would
> > > fail to set .rcu_need_heavy_qs because it saw it already being set,
> > > even though the corresponding ->dynticks update had already happened.
> > > (It might be a new grace period, given that the old grace period might
> > > have ended courtesy of the atomic_add_return().)
> > 
> > Makes sense and I agree.
> > 
> > Also, I would really appreciate if you can correct the nits in the above
> > patch we're reviewing, and apply them (if you can).
> > I think, there are only 2 changes left:
> > - rename special to seq.
> > - reorder the rcu_need_heavy_qs write.
> > 
> >  On a related point, when I was working on the NOHZ_FULL testing I noticed a
> >  weird issue where rcu_urgent_qs was reset but rcu_need_heavy_qs was still
> >  set indefinitely. I am a bit afraid our hints are not being cleared
> >  appropriately and I believe I fixed a similar issue a few months ago. I
> >  would rather have them cleared once they are no longer needed.  What do you
> >  think about the below patch? I did not submit it yet because I was working
> >  on other patches. 
> > 
> > ---8<-----------------------
> > 
> > From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
> > Subject: [RFC] rcu/tree: Reset CPU hints when reporting a quiescent state
> > 
> > While tracing, I am seeing cases where need_heavy_qs is still set even
> > though urgent_qs was cleared, after a quiescent state is reported. One
> > such case is when the softirq reports that a CPU has passed quiescent
> > state.
> > 
> > Previously in 671a63517cf9 ("rcu: Avoid unnecessary softirq when system
> > is idle"), I had fixed a bug where core_needs_qs was not being cleared.
> > I worry we keep running into similar situations. Let us just add a
> > function to clear hints and call it from all relevant places to make the
> > code more robust and avoid such stale hints which could in theory at
> > least, cause false hints after the quiescent state was already reported.
> > 
> > Tested overnight with rcutorture running for 60 minutes on all
> > configurations of RCU.
> > 
> > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > ---
> >  kernel/rcu/tree.c | 17 ++++++++++++++++-
> >  1 file changed, 16 insertions(+), 1 deletion(-)
> 
> Excellent point!  But how about if we combine it with the existing
> disabling of the scheduler tick, perhaps something like the following?
> 
> Note that the FQS clearing can come from some other CPU, hence the added
> {READ,WRITE}_ONCE() calls.  The call is moved down in rcu_report_qs_rdp()
> because something would have had to clear the bit to prevent execution
> from getting there, and I believe that the other bit-clearing events
> have calls to rcu_disable_urgency_upon_qs().  (But I easily could have
> missed something!)

Is there any harm just clearing it earlier in rcu_report_qs_rdp()? If no,
then let us just play it safe and do it that way (clear earlier in
rcu_report_qs_rdp())?

> I am OK leaving RCU urgency set on offline CPUs, hence clearing things
> at online time.

Got it, probably this point can be added to the commit message.

Added more comments below but otherwise it looks good to me:

> ------------------------------------------------------------------------
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 68ebf0eb64c8..2b74b6c94086 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -827,7 +827,7 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
>  		incby = 1;
>  	} else if (tick_nohz_full_cpu(rdp->cpu) &&
>  		   rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
> -		   rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
> +		   READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) {
>  		rdp->rcu_forced_tick = true;
>  		tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
>  	}
> @@ -892,11 +892,15 @@ void rcu_irq_enter_irqson(void)
>  }
>  
>  /*
> - * If the scheduler-clock interrupt was enabled on a nohz_full CPU
> - * in order to get to a quiescent state, disable it.
> + * If any sort of urgency was applied to the current CPU (for example,
> + * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
> + * to get to a quiescent state, disable it.
>   */
> -void rcu_disable_tick_upon_qs(struct rcu_data *rdp)
> +void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
>  {
> +	WRITE_ONCE(rdp->core_needs_qs, false);
> +	WRITE_ONCE(rdp->rcu_urgent_qs, false);
> +	WRITE_ONCE(rdp->rcu_need_heavy_qs, false);

Better to put a comment here saying _ONCE is needed to avoid data-races with
the FQS loop? Just so if anyone thinks why we are using _ONCE().

And I am guessing the __this_cpu_read(rcu_data.core_needs_qs) in
rcu_flavor_sched_clock_irq() implies READ_ONCE() so no need READ_ONCE()
there right?

>  	if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
>  		tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
>  		rdp->rcu_forced_tick = false;
> @@ -1417,7 +1421,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
>  		trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));
>  		need_gp = !!(rnp->qsmask & rdp->grpmask);
>  		rdp->cpu_no_qs.b.norm = need_gp;
> -		rdp->core_needs_qs = need_gp;
> +		WRITE_ONCE(rdp->core_needs_qs, need_gp);
>  		zero_cpu_stall_ticks(rdp);
>  	}
>  	rdp->gp_seq = rnp->gp_seq;  /* Remember new grace-period state. */
> @@ -1987,7 +1991,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
>  		return;
>  	}
>  	mask = rdp->grpmask;
> -	rdp->core_needs_qs = false;
>  	if ((rnp->qsmask & mask) == 0) {
>  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  	} else {
> @@ -1998,7 +2001,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
>  		if (!offloaded)
>  			needwake = rcu_accelerate_cbs(rnp, rdp);
>  
> -		rcu_disable_tick_upon_qs(rdp);
> +		rcu_disable_urgency_upon_qs(rdp);
>  		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
>  		/* ^^^ Released rnp->lock */
>  		if (needwake)
> @@ -2022,7 +2025,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
>  	 * Does this CPU still need to do its part for current grace period?
>  	 * If no, return and let the other CPUs do their part as well.
>  	 */
> -	if (!rdp->core_needs_qs)
> +	if (!READ_ONCE(rdp->core_needs_qs))
>  		return;
>  
>  	/*
> @@ -2316,7 +2319,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
>  				rdp = per_cpu_ptr(&rcu_data, cpu);
>  				if (f(rdp)) {
>  					mask |= bit;
> -					rcu_disable_tick_upon_qs(rdp);
> +					rcu_disable_urgency_upon_qs(rdp);
>  				}
>  			}
>  		}
> @@ -3004,7 +3007,7 @@ static int rcu_pending(void)
>  		return 0;
>  
>  	/* Is the RCU core waiting for a quiescent state from this CPU? */
> -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
> +	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
>  		return 1;
>  
>  	/* Does this CPU have callbacks ready to invoke? */
> @@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
>  	rdp->gp_seq = rnp->gp_seq;
>  	rdp->gp_seq_needed = rnp->gp_seq;
>  	rdp->cpu_no_qs.b.norm = true;
> -	rdp->core_needs_qs = false;

How about calling the new hint-clearing function here as well? Just for
robustness and consistency purposes?

thanks,

 - Joel

>  	rdp->rcu_iw_pending = false;
>  	rdp->rcu_iw_gp_seq = rnp->gp_seq - 1;
>  	trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
> @@ -3359,7 +3361,7 @@ void rcu_cpu_starting(unsigned int cpu)
>  	rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
>  	rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
>  	if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
> -		rcu_disable_tick_upon_qs(rdp);
> +		rcu_disable_urgency_upon_qs(rdp);
>  		/* Report QS -after- changing ->qsmaskinitnext! */
>  		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
>  	} else {

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-05 15:36           ` Joel Fernandes
@ 2019-09-05 16:43             ` Paul E. McKenney
  2019-09-06  0:01               ` Joel Fernandes
  0 siblings, 1 reply; 19+ messages in thread
From: Paul E. McKenney @ 2019-09-05 16:43 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Thu, Sep 05, 2019 at 11:36:20AM -0400, Joel Fernandes wrote:
> On Wed, Sep 04, 2019 at 04:13:08PM -0700, Paul E. McKenney wrote:
> > On Wed, Sep 04, 2019 at 09:54:20AM -0400, Joel Fernandes wrote:
> > > On Wed, Sep 04, 2019 at 03:12:10AM -0700, Paul E. McKenney wrote:
> > > > On Wed, Sep 04, 2019 at 12:59:10AM -0400, Joel Fernandes wrote:
> > > > > On Tue, Sep 03, 2019 at 01:02:49PM -0700, Paul E. McKenney wrote:
> > 
> > [ . . . ]
> > 
> > > > If this task gets delayed betweentimes, rcu_implicit_dynticks_qs() would
> > > > fail to set .rcu_need_heavy_qs because it saw it already being set,
> > > > even though the corresponding ->dynticks update had already happened.
> > > > (It might be a new grace period, given that the old grace period might
> > > > have ended courtesy of the atomic_add_return().)
> > > 
> > > Makes sense and I agree.
> > > 
> > > Also, I would really appreciate if you can correct the nits in the above
> > > patch we're reviewing, and apply them (if you can).
> > > I think, there are only 2 changes left:
> > > - rename special to seq.
> > > - reorder the rcu_need_heavy_qs write.
> > > 
> > >  On a related point, when I was working on the NOHZ_FULL testing I noticed a
> > >  weird issue where rcu_urgent_qs was reset but rcu_need_heavy_qs was still
> > >  set indefinitely. I am a bit afraid our hints are not being cleared
> > >  appropriately and I believe I fixed a similar issue a few months ago. I
> > >  would rather have them cleared once they are no longer needed.  What do you
> > >  think about the below patch? I did not submit it yet because I was working
> > >  on other patches. 
> > > 
> > > ---8<-----------------------
> > > 
> > > From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
> > > Subject: [RFC] rcu/tree: Reset CPU hints when reporting a quiescent state
> > > 
> > > While tracing, I am seeing cases where need_heavy_qs is still set even
> > > though urgent_qs was cleared, after a quiescent state is reported. One
> > > such case is when the softirq reports that a CPU has passed quiescent
> > > state.
> > > 
> > > Previously in 671a63517cf9 ("rcu: Avoid unnecessary softirq when system
> > > is idle"), I had fixed a bug where core_needs_qs was not being cleared.
> > > I worry we keep running into similar situations. Let us just add a
> > > function to clear hints and call it from all relevant places to make the
> > > code more robust and avoid such stale hints which could in theory at
> > > least, cause false hints after the quiescent state was already reported.
> > > 
> > > Tested overnight with rcutorture running for 60 minutes on all
> > > configurations of RCU.
> > > 
> > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > ---
> > >  kernel/rcu/tree.c | 17 ++++++++++++++++-
> > >  1 file changed, 16 insertions(+), 1 deletion(-)
> > 
> > Excellent point!  But how about if we combine it with the existing
> > disabling of the scheduler tick, perhaps something like the following?
> > 
> > Note that the FQS clearing can come from some other CPU, hence the added
> > {READ,WRITE}_ONCE() calls.  The call is moved down in rcu_report_qs_rdp()
> > because something would have had to clear the bit to prevent execution
> > from getting there, and I believe that the other bit-clearing events
> > have calls to rcu_disable_urgency_upon_qs().  (But I easily could have
> > missed something!)
> 
> Is there any harm just clearing it earlier in rcu_report_qs_rdp()? If no,
> then let us just play it safe and do it that way (clear earlier in
> rcu_report_qs_rdp())?

Maybe...

But given that missing a path doesn't cause a major failure (too-short
grace period, for example), I am more inclined to find the paths and
fix them as needed.  Especially given that my ignorance of any path to
a quiescent state likely hides a serious bug.

> > I am OK leaving RCU urgency set on offline CPUs, hence clearing things
> > at online time.
> 
> Got it, probably this point can be added to the commit message.
> 
> Added more comments below but otherwise it looks good to me:
> 
> > ------------------------------------------------------------------------
> > 
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 68ebf0eb64c8..2b74b6c94086 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -827,7 +827,7 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
> >  		incby = 1;
> >  	} else if (tick_nohz_full_cpu(rdp->cpu) &&
> >  		   rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
> > -		   rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
> > +		   READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) {
> >  		rdp->rcu_forced_tick = true;
> >  		tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
> >  	}
> > @@ -892,11 +892,15 @@ void rcu_irq_enter_irqson(void)
> >  }
> >  
> >  /*
> > - * If the scheduler-clock interrupt was enabled on a nohz_full CPU
> > - * in order to get to a quiescent state, disable it.
> > + * If any sort of urgency was applied to the current CPU (for example,
> > + * the scheduler-clock interrupt was enabled on a nohz_full CPU) in order
> > + * to get to a quiescent state, disable it.
> >   */
> > -void rcu_disable_tick_upon_qs(struct rcu_data *rdp)
> > +void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
> >  {
> > +	WRITE_ONCE(rdp->core_needs_qs, false);
> > +	WRITE_ONCE(rdp->rcu_urgent_qs, false);
> > +	WRITE_ONCE(rdp->rcu_need_heavy_qs, false);
> 
> Better to put a comment here saying _ONCE is needed to avoid data-races with
> the FQS loop? Just so if anyone thinks why we are using _ONCE().

Good point.  I added a "// WRITE_ONCE() for FQS".

> And I am guessing the __this_cpu_read(rcu_data.core_needs_qs) in
> rcu_flavor_sched_clock_irq() implies READ_ONCE() so no need READ_ONCE()
> there right?

Assembly in x86.  Not so much on other architectures, though.  ;-)
See raw_cpu_generic_read().

> >  	if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
> >  		tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
> >  		rdp->rcu_forced_tick = false;
> > @@ -1417,7 +1421,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
> >  		trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));
> >  		need_gp = !!(rnp->qsmask & rdp->grpmask);
> >  		rdp->cpu_no_qs.b.norm = need_gp;
> > -		rdp->core_needs_qs = need_gp;
> > +		WRITE_ONCE(rdp->core_needs_qs, need_gp);
> >  		zero_cpu_stall_ticks(rdp);
> >  	}
> >  	rdp->gp_seq = rnp->gp_seq;  /* Remember new grace-period state. */
> > @@ -1987,7 +1991,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
> >  		return;
> >  	}
> >  	mask = rdp->grpmask;
> > -	rdp->core_needs_qs = false;
> >  	if ((rnp->qsmask & mask) == 0) {
> >  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> >  	} else {
> > @@ -1998,7 +2001,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
> >  		if (!offloaded)
> >  			needwake = rcu_accelerate_cbs(rnp, rdp);
> >  
> > -		rcu_disable_tick_upon_qs(rdp);
> > +		rcu_disable_urgency_upon_qs(rdp);
> >  		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
> >  		/* ^^^ Released rnp->lock */
> >  		if (needwake)
> > @@ -2022,7 +2025,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
> >  	 * Does this CPU still need to do its part for current grace period?
> >  	 * If no, return and let the other CPUs do their part as well.
> >  	 */
> > -	if (!rdp->core_needs_qs)
> > +	if (!READ_ONCE(rdp->core_needs_qs))
> >  		return;
> >  
> >  	/*
> > @@ -2316,7 +2319,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
> >  				rdp = per_cpu_ptr(&rcu_data, cpu);
> >  				if (f(rdp)) {
> >  					mask |= bit;
> > -					rcu_disable_tick_upon_qs(rdp);
> > +					rcu_disable_urgency_upon_qs(rdp);
> >  				}
> >  			}
> >  		}
> > @@ -3004,7 +3007,7 @@ static int rcu_pending(void)
> >  		return 0;
> >  
> >  	/* Is the RCU core waiting for a quiescent state from this CPU? */
> > -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
> > +	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
> >  		return 1;
> >  
> >  	/* Does this CPU have callbacks ready to invoke? */
> > @@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
> >  	rdp->gp_seq = rnp->gp_seq;
> >  	rdp->gp_seq_needed = rnp->gp_seq;
> >  	rdp->cpu_no_qs.b.norm = true;
> > -	rdp->core_needs_qs = false;
> 
> How about calling the new hint-clearing function here as well? Just for
> robustness and consistency purposes?

This and the next function are both called during a CPU-hotplug online
operation, so there is little robustness or consistency to be had by
doing it twice.

							Thanx, Paul

> thanks,
> 
>  - Joel
> 
> >  	rdp->rcu_iw_pending = false;
> >  	rdp->rcu_iw_gp_seq = rnp->gp_seq - 1;
> >  	trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
> > @@ -3359,7 +3361,7 @@ void rcu_cpu_starting(unsigned int cpu)
> >  	rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
> >  	rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
> >  	if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
> > -		rcu_disable_tick_upon_qs(rdp);
> > +		rcu_disable_urgency_upon_qs(rdp);
> >  		/* Report QS -after- changing ->qsmaskinitnext! */
> >  		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
> >  	} else {

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-05 16:43             ` Paul E. McKenney
@ 2019-09-06  0:01               ` Joel Fernandes
  2019-09-06 15:08                 ` Joel Fernandes
  0 siblings, 1 reply; 19+ messages in thread
From: Joel Fernandes @ 2019-09-06  0:01 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Thu, Sep 05, 2019 at 09:43:29AM -0700, Paul E. McKenney wrote:
> On Thu, Sep 05, 2019 at 11:36:20AM -0400, Joel Fernandes wrote:
> > On Wed, Sep 04, 2019 at 04:13:08PM -0700, Paul E. McKenney wrote:
> > > On Wed, Sep 04, 2019 at 09:54:20AM -0400, Joel Fernandes wrote:
> > > > On Wed, Sep 04, 2019 at 03:12:10AM -0700, Paul E. McKenney wrote:
> > > > > On Wed, Sep 04, 2019 at 12:59:10AM -0400, Joel Fernandes wrote:
> > > > > > On Tue, Sep 03, 2019 at 01:02:49PM -0700, Paul E. McKenney wrote:
> > > 
> > > [ . . . ]
> > > 
> > > > > If this task gets delayed betweentimes, rcu_implicit_dynticks_qs() would
> > > > > fail to set .rcu_need_heavy_qs because it saw it already being set,
> > > > > even though the corresponding ->dynticks update had already happened.
> > > > > (It might be a new grace period, given that the old grace period might
> > > > > have ended courtesy of the atomic_add_return().)
> > > > 
> > > > Makes sense and I agree.
> > > > 
> > > > Also, I would really appreciate if you can correct the nits in the above
> > > > patch we're reviewing, and apply them (if you can).
> > > > I think, there are only 2 changes left:
> > > > - rename special to seq.
> > > > - reorder the rcu_need_heavy_qs write.
> > > > 
> > > >  On a related point, when I was working on the NOHZ_FULL testing I noticed a
> > > >  weird issue where rcu_urgent_qs was reset but rcu_need_heavy_qs was still
> > > >  set indefinitely. I am a bit afraid our hints are not being cleared
> > > >  appropriately and I believe I fixed a similar issue a few months ago. I
> > > >  would rather have them cleared once they are no longer needed.  What do you
> > > >  think about the below patch? I did not submit it yet because I was working
> > > >  on other patches. 
> > > > 
> > > > ---8<-----------------------
> > > > 
> > > > From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
> > > > Subject: [RFC] rcu/tree: Reset CPU hints when reporting a quiescent state
> > > > 
> > > > While tracing, I am seeing cases where need_heavy_qs is still set even
> > > > though urgent_qs was cleared, after a quiescent state is reported. One
> > > > such case is when the softirq reports that a CPU has passed quiescent
> > > > state.
> > > > 
> > > > Previously in 671a63517cf9 ("rcu: Avoid unnecessary softirq when system
> > > > is idle"), I had fixed a bug where core_needs_qs was not being cleared.
> > > > I worry we keep running into similar situations. Let us just add a
> > > > function to clear hints and call it from all relevant places to make the
> > > > code more robust and avoid such stale hints which could in theory at
> > > > least, cause false hints after the quiescent state was already reported.
> > > > 
> > > > Tested overnight with rcutorture running for 60 minutes on all
> > > > configurations of RCU.
> > > > 
> > > > Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > ---
> > > >  kernel/rcu/tree.c | 17 ++++++++++++++++-
> > > >  1 file changed, 16 insertions(+), 1 deletion(-)
> > > 
> > > Excellent point!  But how about if we combine it with the existing
> > > disabling of the scheduler tick, perhaps something like the following?
> > > 
> > > Note that the FQS clearing can come from some other CPU, hence the added
> > > {READ,WRITE}_ONCE() calls.  The call is moved down in rcu_report_qs_rdp()
> > > because something would have had to clear the bit to prevent execution
> > > from getting there, and I believe that the other bit-clearing events
> > > have calls to rcu_disable_urgency_upon_qs().  (But I easily could have
> > > missed something!)
> > 
> > Is there any harm just clearing it earlier in rcu_report_qs_rdp()? If no,
> > then let us just play it safe and do it that way (clear earlier in
> > rcu_report_qs_rdp())?
> 
> Maybe...
> 
> But given that missing a path doesn't cause a major failure (too-short
> grace period, for example), I am more inclined to find the paths and
> fix them as needed.  Especially given that my ignorance of any path to
> a quiescent state likely hides a serious bug.

Ok that's fine.

> > And I am guessing the __this_cpu_read(rcu_data.core_needs_qs) in
> > rcu_flavor_sched_clock_irq() implies READ_ONCE() so no need READ_ONCE()
> > there right?
> 
> Assembly in x86.  Not so much on other architectures, though.  ;-)
> See raw_cpu_generic_read().

Interesting. That one seems like a plain access, I wonder why it does not use
READ_ONCE() in there or volatile accesses.

> > > @@ -3004,7 +3007,7 @@ static int rcu_pending(void)
> > >  		return 0;
> > >  
> > >  	/* Is the RCU core waiting for a quiescent state from this CPU? */
> > > -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
> > > +	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
> > >  		return 1;
> > >  
> > >  	/* Does this CPU have callbacks ready to invoke? */
> > > @@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
> > >  	rdp->gp_seq = rnp->gp_seq;
> > >  	rdp->gp_seq_needed = rnp->gp_seq;
> > >  	rdp->cpu_no_qs.b.norm = true;
> > > -	rdp->core_needs_qs = false;
> > 
> > How about calling the new hint-clearing function here as well? Just for
> > robustness and consistency purposes?
> 
> This and the next function are both called during a CPU-hotplug online
> operation, so there is little robustness or consistency to be had by
> doing it twice.

Ok, sorry I missed you are clearing it below in the next function. That's
fine with me.

This patch looks good to me and I am Ok with merging of these changes into
the original patch with my authorship as you mentioned. Or if you wanted to
be author, that's fine too :)

Let me know anything else needed with it, thanks!

 - Joel


> > thanks,
> > 
> >  - Joel
> > 
> > >  	rdp->rcu_iw_pending = false;
> > >  	rdp->rcu_iw_gp_seq = rnp->gp_seq - 1;
> > >  	trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
> > > @@ -3359,7 +3361,7 @@ void rcu_cpu_starting(unsigned int cpu)
> > >  	rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
> > >  	rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
> > >  	if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
> > > -		rcu_disable_tick_upon_qs(rdp);
> > > +		rcu_disable_urgency_upon_qs(rdp);
> > >  		/* Report QS -after- changing ->qsmaskinitnext! */
> > >  		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
> > >  	} else {

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-06  0:01               ` Joel Fernandes
@ 2019-09-06 15:08                 ` Joel Fernandes
  2019-09-06 15:21                   ` Paul E. McKenney
  0 siblings, 1 reply; 19+ messages in thread
From: Joel Fernandes @ 2019-09-06 15:08 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Thu, Sep 05, 2019 at 08:01:37PM -0400, Joel Fernandes wrote:
[snip] 
> > > > @@ -3004,7 +3007,7 @@ static int rcu_pending(void)
> > > >  		return 0;
> > > >  
> > > >  	/* Is the RCU core waiting for a quiescent state from this CPU? */
> > > > -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
> > > > +	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
> > > >  		return 1;
> > > >  
> > > >  	/* Does this CPU have callbacks ready to invoke? */
> > > > @@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
> > > >  	rdp->gp_seq = rnp->gp_seq;
> > > >  	rdp->gp_seq_needed = rnp->gp_seq;
> > > >  	rdp->cpu_no_qs.b.norm = true;
> > > > -	rdp->core_needs_qs = false;
> > > 
> > > How about calling the new hint-clearing function here as well? Just for
> > > robustness and consistency purposes?
> > 
> > This and the next function are both called during a CPU-hotplug online
> > operation, so there is little robustness or consistency to be had by
> > doing it twice.
> 
> Ok, sorry I missed you are clearing it below in the next function. That's
> fine with me.
> 
> This patch looks good to me and I am Ok with merging of these changes into
> the original patch with my authorship as you mentioned. Or if you wanted to
> be author, that's fine too :)

Paul, does it make sense to clear these urgency hints in rcu_qs() as well?
After all, we are clearing atleast one urgency hint there: the
rcu_read_unlock_special::need_qs bit.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-06 15:08                 ` Joel Fernandes
@ 2019-09-06 15:21                   ` Paul E. McKenney
  2019-09-06 15:27                     ` Paul E. McKenney
  0 siblings, 1 reply; 19+ messages in thread
From: Paul E. McKenney @ 2019-09-06 15:21 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Fri, Sep 06, 2019 at 11:08:06AM -0400, Joel Fernandes wrote:
> On Thu, Sep 05, 2019 at 08:01:37PM -0400, Joel Fernandes wrote:
> [snip] 
> > > > > @@ -3004,7 +3007,7 @@ static int rcu_pending(void)
> > > > >  		return 0;
> > > > >  
> > > > >  	/* Is the RCU core waiting for a quiescent state from this CPU? */
> > > > > -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
> > > > > +	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
> > > > >  		return 1;
> > > > >  
> > > > >  	/* Does this CPU have callbacks ready to invoke? */
> > > > > @@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
> > > > >  	rdp->gp_seq = rnp->gp_seq;
> > > > >  	rdp->gp_seq_needed = rnp->gp_seq;
> > > > >  	rdp->cpu_no_qs.b.norm = true;
> > > > > -	rdp->core_needs_qs = false;
> > > > 
> > > > How about calling the new hint-clearing function here as well? Just for
> > > > robustness and consistency purposes?
> > > 
> > > This and the next function are both called during a CPU-hotplug online
> > > operation, so there is little robustness or consistency to be had by
> > > doing it twice.
> > 
> > Ok, sorry I missed you are clearing it below in the next function. That's
> > fine with me.
> > 
> > This patch looks good to me and I am Ok with merging of these changes into
> > the original patch with my authorship as you mentioned. Or if you wanted to
> > be author, that's fine too :)
> 
> Paul, does it make sense to clear these urgency hints in rcu_qs() as well?
> After all, we are clearing atleast one urgency hint there: the
> rcu_read_unlock_special::need_qs bit.

We certainly don't want to turn off the scheduling-clock interrupt until
after the quiescent state has been reported to the RCU core.  And it might
still be useful to have a heavy quiescent state because the grace-period
kthread can detect that.  Just in case the CPU that just called rcu_qs()
is slow about actually reporting that quiescent state to the RCU core.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-06 15:21                   ` Paul E. McKenney
@ 2019-09-06 15:27                     ` Paul E. McKenney
  2019-09-06 16:57                       ` Joel Fernandes
  0 siblings, 1 reply; 19+ messages in thread
From: Paul E. McKenney @ 2019-09-06 15:27 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Fri, Sep 06, 2019 at 08:21:44AM -0700, Paul E. McKenney wrote:
> On Fri, Sep 06, 2019 at 11:08:06AM -0400, Joel Fernandes wrote:
> > On Thu, Sep 05, 2019 at 08:01:37PM -0400, Joel Fernandes wrote:
> > [snip] 
> > > > > > @@ -3004,7 +3007,7 @@ static int rcu_pending(void)
> > > > > >  		return 0;
> > > > > >  
> > > > > >  	/* Is the RCU core waiting for a quiescent state from this CPU? */
> > > > > > -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
> > > > > > +	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
> > > > > >  		return 1;
> > > > > >  
> > > > > >  	/* Does this CPU have callbacks ready to invoke? */
> > > > > > @@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
> > > > > >  	rdp->gp_seq = rnp->gp_seq;
> > > > > >  	rdp->gp_seq_needed = rnp->gp_seq;
> > > > > >  	rdp->cpu_no_qs.b.norm = true;
> > > > > > -	rdp->core_needs_qs = false;
> > > > > 
> > > > > How about calling the new hint-clearing function here as well? Just for
> > > > > robustness and consistency purposes?
> > > > 
> > > > This and the next function are both called during a CPU-hotplug online
> > > > operation, so there is little robustness or consistency to be had by
> > > > doing it twice.
> > > 
> > > Ok, sorry I missed you are clearing it below in the next function. That's
> > > fine with me.
> > > 
> > > This patch looks good to me and I am Ok with merging of these changes into
> > > the original patch with my authorship as you mentioned. Or if you wanted to
> > > be author, that's fine too :)
> > 
> > Paul, does it make sense to clear these urgency hints in rcu_qs() as well?
> > After all, we are clearing atleast one urgency hint there: the
> > rcu_read_unlock_special::need_qs bit.
> 
> We certainly don't want to turn off the scheduling-clock interrupt until
> after the quiescent state has been reported to the RCU core.  And it might
> still be useful to have a heavy quiescent state because the grace-period
> kthread can detect that.  Just in case the CPU that just called rcu_qs()
> is slow about actually reporting that quiescent state to the RCU core.

Hmmm...  Should ->need_qs ever be cleared from FQS to begin with?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-06 15:27                     ` Paul E. McKenney
@ 2019-09-06 16:57                       ` Joel Fernandes
  2019-09-06 17:16                         ` Paul E. McKenney
  0 siblings, 1 reply; 19+ messages in thread
From: Joel Fernandes @ 2019-09-06 16:57 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Fri, Sep 06, 2019 at 08:27:53AM -0700, Paul E. McKenney wrote:
> On Fri, Sep 06, 2019 at 08:21:44AM -0700, Paul E. McKenney wrote:
> > On Fri, Sep 06, 2019 at 11:08:06AM -0400, Joel Fernandes wrote:
> > > On Thu, Sep 05, 2019 at 08:01:37PM -0400, Joel Fernandes wrote:
> > > [snip] 
> > > > > > > @@ -3004,7 +3007,7 @@ static int rcu_pending(void)
> > > > > > >  		return 0;
> > > > > > >  
> > > > > > >  	/* Is the RCU core waiting for a quiescent state from this CPU? */
> > > > > > > -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
> > > > > > > +	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
> > > > > > >  		return 1;
> > > > > > >  
> > > > > > >  	/* Does this CPU have callbacks ready to invoke? */
> > > > > > > @@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
> > > > > > >  	rdp->gp_seq = rnp->gp_seq;
> > > > > > >  	rdp->gp_seq_needed = rnp->gp_seq;
> > > > > > >  	rdp->cpu_no_qs.b.norm = true;
> > > > > > > -	rdp->core_needs_qs = false;
> > > > > > 
> > > > > > How about calling the new hint-clearing function here as well? Just for
> > > > > > robustness and consistency purposes?
> > > > > 
> > > > > This and the next function are both called during a CPU-hotplug online
> > > > > operation, so there is little robustness or consistency to be had by
> > > > > doing it twice.
> > > > 
> > > > Ok, sorry I missed you are clearing it below in the next function. That's
> > > > fine with me.
> > > > 
> > > > This patch looks good to me and I am Ok with merging of these changes into
> > > > the original patch with my authorship as you mentioned. Or if you wanted to
> > > > be author, that's fine too :)
> > > 
> > > Paul, does it make sense to clear these urgency hints in rcu_qs() as well?
> > > After all, we are clearing atleast one urgency hint there: the
> > > rcu_read_unlock_special::need_qs bit.

Makes sense.

> > We certainly don't want to turn off the scheduling-clock interrupt until
> > after the quiescent state has been reported to the RCU core.  And it might
> > still be useful to have a heavy quiescent state because the grace-period
> > kthread can detect that.  Just in case the CPU that just called rcu_qs()
> > is slow about actually reporting that quiescent state to the RCU core.
> 
> Hmmm...  Should ->need_qs ever be cleared from FQS to begin with?

I did not see the FQS loop clearing ->need_qs in the rcu_read_unlock_special
union after looking for a few minutes. Could you clarify which path this?

Or do you mean ->core_needs_qs? If so, I feel the FQS loop should clear it as
I think your patch does, since the FQS loop is essentially doing heavy-weight
RCU core processing right?

Also, where does FQS loop clear rdp.cpu_no_qs? Shouldn't it clear that as
well for the dyntick-idle CPUs?

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-06 16:57                       ` Joel Fernandes
@ 2019-09-06 17:16                         ` Paul E. McKenney
  2019-09-06 17:26                           ` Joel Fernandes
  2019-09-07 17:28                           ` Joel Fernandes
  0 siblings, 2 replies; 19+ messages in thread
From: Paul E. McKenney @ 2019-09-06 17:16 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Fri, Sep 06, 2019 at 12:57:51PM -0400, Joel Fernandes wrote:
> On Fri, Sep 06, 2019 at 08:27:53AM -0700, Paul E. McKenney wrote:
> > On Fri, Sep 06, 2019 at 08:21:44AM -0700, Paul E. McKenney wrote:
> > > On Fri, Sep 06, 2019 at 11:08:06AM -0400, Joel Fernandes wrote:
> > > > On Thu, Sep 05, 2019 at 08:01:37PM -0400, Joel Fernandes wrote:
> > > > [snip] 
> > > > > > > > @@ -3004,7 +3007,7 @@ static int rcu_pending(void)
> > > > > > > >  		return 0;
> > > > > > > >  
> > > > > > > >  	/* Is the RCU core waiting for a quiescent state from this CPU? */
> > > > > > > > -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
> > > > > > > > +	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
> > > > > > > >  		return 1;
> > > > > > > >  
> > > > > > > >  	/* Does this CPU have callbacks ready to invoke? */
> > > > > > > > @@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
> > > > > > > >  	rdp->gp_seq = rnp->gp_seq;
> > > > > > > >  	rdp->gp_seq_needed = rnp->gp_seq;
> > > > > > > >  	rdp->cpu_no_qs.b.norm = true;
> > > > > > > > -	rdp->core_needs_qs = false;
> > > > > > > 
> > > > > > > How about calling the new hint-clearing function here as well? Just for
> > > > > > > robustness and consistency purposes?
> > > > > > 
> > > > > > This and the next function are both called during a CPU-hotplug online
> > > > > > operation, so there is little robustness or consistency to be had by
> > > > > > doing it twice.
> > > > > 
> > > > > Ok, sorry I missed you are clearing it below in the next function. That's
> > > > > fine with me.
> > > > > 
> > > > > This patch looks good to me and I am Ok with merging of these changes into
> > > > > the original patch with my authorship as you mentioned. Or if you wanted to
> > > > > be author, that's fine too :)
> > > > 
> > > > Paul, does it make sense to clear these urgency hints in rcu_qs() as well?
> > > > After all, we are clearing atleast one urgency hint there: the
> > > > rcu_read_unlock_special::need_qs bit.
> 
> Makes sense.
> 
> > > We certainly don't want to turn off the scheduling-clock interrupt until
> > > after the quiescent state has been reported to the RCU core.  And it might
> > > still be useful to have a heavy quiescent state because the grace-period
> > > kthread can detect that.  Just in case the CPU that just called rcu_qs()
> > > is slow about actually reporting that quiescent state to the RCU core.
> > 
> > Hmmm...  Should ->need_qs ever be cleared from FQS to begin with?
> 
> I did not see the FQS loop clearing ->need_qs in the rcu_read_unlock_special
> union after looking for a few minutes. Could you clarify which path this?
> 
> Or do you mean ->core_needs_qs? If so, I feel the FQS loop should clear it as
> I think your patch does, since the FQS loop is essentially doing heavy-weight
> RCU core processing right?
> 
> Also, where does FQS loop clear rdp.cpu_no_qs? Shouldn't it clear that as
> well for the dyntick-idle CPUs?

Synchronization?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-06 17:16                         ` Paul E. McKenney
@ 2019-09-06 17:26                           ` Joel Fernandes
  2019-09-07 17:28                           ` Joel Fernandes
  1 sibling, 0 replies; 19+ messages in thread
From: Joel Fernandes @ 2019-09-06 17:26 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Fri, Sep 06, 2019 at 10:16:47AM -0700, Paul E. McKenney wrote:
> On Fri, Sep 06, 2019 at 12:57:51PM -0400, Joel Fernandes wrote:
> > On Fri, Sep 06, 2019 at 08:27:53AM -0700, Paul E. McKenney wrote:
> > > On Fri, Sep 06, 2019 at 08:21:44AM -0700, Paul E. McKenney wrote:
> > > > On Fri, Sep 06, 2019 at 11:08:06AM -0400, Joel Fernandes wrote:
> > > > > On Thu, Sep 05, 2019 at 08:01:37PM -0400, Joel Fernandes wrote:
> > > > > [snip] 
> > > > > > > > > @@ -3004,7 +3007,7 @@ static int rcu_pending(void)
> > > > > > > > >  		return 0;
> > > > > > > > >  
> > > > > > > > >  	/* Is the RCU core waiting for a quiescent state from this CPU? */
> > > > > > > > > -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
> > > > > > > > > +	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
> > > > > > > > >  		return 1;
> > > > > > > > >  
> > > > > > > > >  	/* Does this CPU have callbacks ready to invoke? */
> > > > > > > > > @@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
> > > > > > > > >  	rdp->gp_seq = rnp->gp_seq;
> > > > > > > > >  	rdp->gp_seq_needed = rnp->gp_seq;
> > > > > > > > >  	rdp->cpu_no_qs.b.norm = true;
> > > > > > > > > -	rdp->core_needs_qs = false;
> > > > > > > > 
> > > > > > > > How about calling the new hint-clearing function here as well? Just for
> > > > > > > > robustness and consistency purposes?
> > > > > > > 
> > > > > > > This and the next function are both called during a CPU-hotplug online
> > > > > > > operation, so there is little robustness or consistency to be had by
> > > > > > > doing it twice.
> > > > > > 
> > > > > > Ok, sorry I missed you are clearing it below in the next function. That's
> > > > > > fine with me.
> > > > > > 
> > > > > > This patch looks good to me and I am Ok with merging of these changes into
> > > > > > the original patch with my authorship as you mentioned. Or if you wanted to
> > > > > > be author, that's fine too :)
> > > > > 
> > > > > Paul, does it make sense to clear these urgency hints in rcu_qs() as well?
> > > > > After all, we are clearing atleast one urgency hint there: the
> > > > > rcu_read_unlock_special::need_qs bit.
> > 
> > Makes sense.
> > 
> > > > We certainly don't want to turn off the scheduling-clock interrupt until
> > > > after the quiescent state has been reported to the RCU core.  And it might
> > > > still be useful to have a heavy quiescent state because the grace-period
> > > > kthread can detect that.  Just in case the CPU that just called rcu_qs()
> > > > is slow about actually reporting that quiescent state to the RCU core.
> > > 
> > > Hmmm...  Should ->need_qs ever be cleared from FQS to begin with?
> > 
> > I did not see the FQS loop clearing ->need_qs in the rcu_read_unlock_special
> > union after looking for a few minutes. Could you clarify which path this?
> > 
> > Or do you mean ->core_needs_qs? If so, I feel the FQS loop should clear it as
> > I think your patch does, since the FQS loop is essentially doing heavy-weight
> > RCU core processing right?
> > 
> > Also, where does FQS loop clear rdp.cpu_no_qs? Shouldn't it clear that as
> > well for the dyntick-idle CPUs?
> 
> Synchronization?

Didn't follow. Probably I am asking silly questions. There's too many hints
so it is confusing. I will trace this out later and study it more.

Doing by best,  ;-)

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter")
  2019-09-06 17:16                         ` Paul E. McKenney
  2019-09-06 17:26                           ` Joel Fernandes
@ 2019-09-07 17:28                           ` Joel Fernandes
  1 sibling, 0 replies; 19+ messages in thread
From: Joel Fernandes @ 2019-09-07 17:28 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, Andy Lutomirski, Bjorn Helgaas, Ingo Molnar,
	Josh Triplett, Lai Jiangshan, Mathieu Desnoyers, Petr Mladek,
	Rafael J. Wysocki, rcu, Steven Rostedt, Yafang Shao

On Fri, Sep 06, 2019 at 10:16:47AM -0700, Paul E. McKenney wrote:
> On Fri, Sep 06, 2019 at 12:57:51PM -0400, Joel Fernandes wrote:
> > On Fri, Sep 06, 2019 at 08:27:53AM -0700, Paul E. McKenney wrote:
> > > On Fri, Sep 06, 2019 at 08:21:44AM -0700, Paul E. McKenney wrote:
> > > > On Fri, Sep 06, 2019 at 11:08:06AM -0400, Joel Fernandes wrote:
> > > > > On Thu, Sep 05, 2019 at 08:01:37PM -0400, Joel Fernandes wrote:
> > > > > [snip] 
> > > > > > > > > @@ -3004,7 +3007,7 @@ static int rcu_pending(void)
> > > > > > > > >  		return 0;
> > > > > > > > >  
> > > > > > > > >  	/* Is the RCU core waiting for a quiescent state from this CPU? */
> > > > > > > > > -	if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
> > > > > > > > > +	if (READ_ONCE(rdp->core_needs_qs) && !rdp->cpu_no_qs.b.norm)
> > > > > > > > >  		return 1;
> > > > > > > > >  
> > > > > > > > >  	/* Does this CPU have callbacks ready to invoke? */
> > > > > > > > > @@ -3244,7 +3247,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
> > > > > > > > >  	rdp->gp_seq = rnp->gp_seq;
> > > > > > > > >  	rdp->gp_seq_needed = rnp->gp_seq;
> > > > > > > > >  	rdp->cpu_no_qs.b.norm = true;
> > > > > > > > > -	rdp->core_needs_qs = false;
> > > > > > > > 
> > > > > > > > How about calling the new hint-clearing function here as well? Just for
> > > > > > > > robustness and consistency purposes?
> > > > > > > 
> > > > > > > This and the next function are both called during a CPU-hotplug online
> > > > > > > operation, so there is little robustness or consistency to be had by
> > > > > > > doing it twice.
> > > > > > 
> > > > > > Ok, sorry I missed you are clearing it below in the next function. That's
> > > > > > fine with me.
> > > > > > 
> > > > > > This patch looks good to me and I am Ok with merging of these changes into
> > > > > > the original patch with my authorship as you mentioned. Or if you wanted to
> > > > > > be author, that's fine too :)
> > > > > 
> > > > > Paul, does it make sense to clear these urgency hints in rcu_qs() as well?
> > > > > After all, we are clearing atleast one urgency hint there: the
> > > > > rcu_read_unlock_special::need_qs bit.
> > 
> > Makes sense.
> > 
> > > > We certainly don't want to turn off the scheduling-clock interrupt until
> > > > after the quiescent state has been reported to the RCU core.  And it might
> > > > still be useful to have a heavy quiescent state because the grace-period
> > > > kthread can detect that.  Just in case the CPU that just called rcu_qs()
> > > > is slow about actually reporting that quiescent state to the RCU core.
> > > 
> > > Hmmm...  Should ->need_qs ever be cleared from FQS to begin with?
> > 
> > I did not see the FQS loop clearing ->need_qs in the rcu_read_unlock_special
> > union after looking for a few minutes. Could you clarify which path this?
> > 
> > Or do you mean ->core_needs_qs? If so, I feel the FQS loop should clear it as
> > I think your patch does, since the FQS loop is essentially doing heavy-weight
> > RCU core processing right?

Took another look, rcu_read_unlock_special::need_qs is not cleared from FQS
loop. It is only set. So I did not follow what you meant. You probably were
concerned with some other hint being cleared in the FQS loop. No urgency on
this but do let me know what you meant (whenever after LPC etc).

> > Also, where does FQS loop clear rdp.cpu_no_qs? Shouldn't it clear that as
> > well for the dyntick-idle CPUs?
> 
> Synchronization?

I think you here you meant that we don't want to acquire rdp's locks so we
can't easily clear cpu_no_qs. But why not use WRITE_ONCE() to clear it, like
we are doing for the other hints?  I just felt idle CPU can't do this
clearing so the FQS loop (GP thread) should do it on its behalf.

Anyway, since you were going to squash the hint clearing with the other patch
as you mentioned in this thread, let me know once you have the squashed patch
for futher review/tracing/testing (after LPC, conferences, whenever. No rush!).

Thanks, Paul!!

 - Joel


^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2019-09-07 17:28 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-30 16:23 [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter") Joel Fernandes (Google)
2019-08-30 16:23 ` [PATCH -rcu dev 2/2] rcu/dyntick-idle: Add better tracing Joel Fernandes (Google)
2019-09-03 20:04   ` Paul E. McKenney
2019-09-04  0:46     ` Joel Fernandes
2019-09-03 20:02 ` [PATCH -rcu dev 1/2] Revert b8c17e6664c4 ("rcu: Maintain special bits at bottom of ->dynticks counter") Paul E. McKenney
2019-09-04  4:59   ` Joel Fernandes
2019-09-04 10:12     ` Paul E. McKenney
2019-09-04 13:54       ` Joel Fernandes
2019-09-04 23:13         ` Paul E. McKenney
2019-09-05 15:36           ` Joel Fernandes
2019-09-05 16:43             ` Paul E. McKenney
2019-09-06  0:01               ` Joel Fernandes
2019-09-06 15:08                 ` Joel Fernandes
2019-09-06 15:21                   ` Paul E. McKenney
2019-09-06 15:27                     ` Paul E. McKenney
2019-09-06 16:57                       ` Joel Fernandes
2019-09-06 17:16                         ` Paul E. McKenney
2019-09-06 17:26                           ` Joel Fernandes
2019-09-07 17:28                           ` Joel Fernandes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).