Linux-rt-users archive on lore.kernel.org
 help / color / Atom feed
* [PATCH RT v2 0/3] RCU fixes
@ 2019-08-21 23:19 Scott Wood
  2019-08-21 23:19 ` [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs Scott Wood
                   ` (2 more replies)
  0 siblings, 3 replies; 43+ messages in thread
From: Scott Wood @ 2019-08-21 23:19 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, linux-kernel, Paul E . McKenney, Joel Fernandes,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

This is a respin of the "Address rcutorture issues" patchset,
minus the actual rcutorture changes.

I still plan to implement detection of bad nesting scenarios, but it's
complicated by the need to distinguish (on a non-RT kernel) between
irq/preempt disabling that would and would not happen on an RT kernel
(which would also have the benefit of being able to detect nesting
regular spinlocks inside raw spinlocks on a non-RT debug kernel).  In
the meantime I could send the rcutorture changes as a PREEMPT_RT only
patch, though the extent of the changes depends on whether my migrate
disable patchset is applied since it removes a restriction.

Scott Wood (3):
  rcu: Acquire RCU lock when disabling BHs
  sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  rcu: Disable use_softirq on PREEMPT_RT

 include/linux/rcupdate.h |  4 ++++
 include/linux/sched.h    |  4 ++--
 kernel/rcu/tree.c        |  9 ++++++++-
 kernel/rcu/tree_plugin.h |  2 +-
 kernel/rcu/update.c      |  4 ++++
 kernel/sched/core.c      |  8 ++++++++
 kernel/softirq.c         | 12 +++++++++---
 7 files changed, 36 insertions(+), 7 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-21 23:19 [PATCH RT v2 0/3] RCU fixes Scott Wood
@ 2019-08-21 23:19 ` Scott Wood
  2019-08-21 23:33   ` Paul E. McKenney
  2019-08-21 23:19 ` [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep Scott Wood
  2019-08-21 23:19 ` [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT Scott Wood
  2 siblings, 1 reply; 43+ messages in thread
From: Scott Wood @ 2019-08-21 23:19 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, linux-kernel, Paul E . McKenney, Joel Fernandes,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams,
	Scott Wood

A plain local_bh_disable() is documented as creating an RCU critical
section, and (at least) rcutorture expects this to be the case.  However,
in_softirq() doesn't block a grace period on PREEMPT_RT, since RCU checks
preempt_count() directly.  Even if RCU were changed to check
in_softirq(), that wouldn't allow blocked BH disablers to be boosted.

Fix this by calling rcu_read_lock() from local_bh_disable(), and update
rcu_read_lock_bh_held() accordingly.

Signed-off-by: Scott Wood <swood@redhat.com>
---
Another question is whether non-raw spinlocks are intended to create an
RCU read-side critical section due to implicit preempt disable.  If they
are, then we'd need to add rcu_read_lock() there as well since RT doesn't
disable preemption (and rcutorture should explicitly test with a
spinlock).  If not, the documentation should make that clear.

 include/linux/rcupdate.h |  4 ++++
 kernel/rcu/update.c      |  4 ++++
 kernel/softirq.c         | 12 +++++++++---
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 388ace315f32..d6e357378732 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -615,10 +615,12 @@ static inline void rcu_read_unlock(void)
 static inline void rcu_read_lock_bh(void)
 {
 	local_bh_disable();
+#ifndef CONFIG_PREEMPT_RT_FULL
 	__acquire(RCU_BH);
 	rcu_lock_acquire(&rcu_bh_lock_map);
 	RCU_LOCKDEP_WARN(!rcu_is_watching(),
 			 "rcu_read_lock_bh() used illegally while idle");
+#endif
 }
 
 /*
@@ -628,10 +630,12 @@ static inline void rcu_read_lock_bh(void)
  */
 static inline void rcu_read_unlock_bh(void)
 {
+#ifndef CONFIG_PREEMPT_RT_FULL
 	RCU_LOCKDEP_WARN(!rcu_is_watching(),
 			 "rcu_read_unlock_bh() used illegally while idle");
 	rcu_lock_release(&rcu_bh_lock_map);
 	__release(RCU_BH);
+#endif
 	local_bh_enable();
 }
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 016c66a98292..a9cdf3d562bc 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -296,7 +296,11 @@ int rcu_read_lock_bh_held(void)
 		return 0;
 	if (!rcu_lockdep_current_cpu_online())
 		return 0;
+#ifdef CONFIG_PREEMPT_RT_FULL
+	return lock_is_held(&rcu_lock_map) || irqs_disabled();
+#else
 	return in_softirq() || irqs_disabled();
+#endif
 }
 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index d16d080a74f7..6080c9328df1 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -115,8 +115,10 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 	long soft_cnt;
 
 	WARN_ON_ONCE(in_irq());
-	if (!in_atomic())
+	if (!in_atomic()) {
 		local_lock(bh_lock);
+		rcu_read_lock();
+	}
 	soft_cnt = this_cpu_inc_return(softirq_counter);
 	WARN_ON_ONCE(soft_cnt == 0);
 	current->softirq_count += SOFTIRQ_DISABLE_OFFSET;
@@ -151,8 +153,10 @@ void _local_bh_enable(void)
 #endif
 
 	current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
-	if (!in_atomic())
+	if (!in_atomic()) {
+		rcu_read_unlock();
 		local_unlock(bh_lock);
+	}
 }
 
 void _local_bh_enable_rt(void)
@@ -185,8 +189,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 	WARN_ON_ONCE(count < 0);
 	local_irq_enable();
 
-	if (!in_atomic())
+	if (!in_atomic()) {
+		rcu_read_unlock();
 		local_unlock(bh_lock);
+	}
 
 	current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
 	preempt_check_resched();
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-21 23:19 [PATCH RT v2 0/3] RCU fixes Scott Wood
  2019-08-21 23:19 ` [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs Scott Wood
@ 2019-08-21 23:19 ` Scott Wood
  2019-08-21 23:35   ` Paul E. McKenney
  2019-08-23 16:20   ` Sebastian Andrzej Siewior
  2019-08-21 23:19 ` [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT Scott Wood
  2 siblings, 2 replies; 43+ messages in thread
From: Scott Wood @ 2019-08-21 23:19 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, linux-kernel, Paul E . McKenney, Joel Fernandes,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams,
	Scott Wood

Without this, rcu_note_context_switch() will complain if an RCU read
lock is held when migrate_enable() calls stop_one_cpu().

Signed-off-by: Scott Wood <swood@redhat.com>
---
v2: Added comment.

If my migrate disable changes aren't taken, then pin_current_cpu()
will also need to use sleeping_lock_inc() because calling
__read_rt_lock() bypasses the usual place it's done.

 include/linux/sched.h    | 4 ++--
 kernel/rcu/tree_plugin.h | 2 +-
 kernel/sched/core.c      | 8 ++++++++
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7e892e727f12..1ebc97f28009 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -673,7 +673,7 @@ struct task_struct {
 	int				migrate_disable_atomic;
 # endif
 #endif
-#ifdef CONFIG_PREEMPT_RT_FULL
+#ifdef CONFIG_PREEMPT_RT_BASE
 	int				sleeping_lock;
 #endif
 
@@ -1881,7 +1881,7 @@ static __always_inline bool need_resched(void)
 	return unlikely(tif_need_resched());
 }
 
-#ifdef CONFIG_PREEMPT_RT_FULL
+#ifdef CONFIG_PREEMPT_RT_BASE
 static inline void sleeping_lock_inc(void)
 {
 	current->sleeping_lock++;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 23a54e4b649c..7a3aa085ce2c 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -292,7 +292,7 @@ void rcu_note_context_switch(bool preempt)
 	barrier(); /* Avoid RCU read-side critical sections leaking down. */
 	trace_rcu_utilization(TPS("Start context switch"));
 	lockdep_assert_irqs_disabled();
-#if defined(CONFIG_PREEMPT_RT_FULL)
+#if defined(CONFIG_PREEMPT_RT_BASE)
 	sleeping_l = t->sleeping_lock;
 #endif
 	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !sleeping_l);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e1bdd7f9be05..0758ee85634e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7405,7 +7405,15 @@ void migrate_enable(void)
 			unpin_current_cpu();
 			preempt_lazy_enable();
 			preempt_enable();
+
+			/*
+			 * sleeping_lock_inc suppresses a debug check for
+			 * sleeping inside an RCU read side critical section
+			 */
+			sleeping_lock_inc();
 			stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
+			sleeping_lock_dec();
+
 			return;
 		}
 	}
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT
  2019-08-21 23:19 [PATCH RT v2 0/3] RCU fixes Scott Wood
  2019-08-21 23:19 ` [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs Scott Wood
  2019-08-21 23:19 ` [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep Scott Wood
@ 2019-08-21 23:19 ` Scott Wood
  2019-08-21 23:40   ` Paul E. McKenney
  2019-08-22 13:59   ` Joel Fernandes
  2 siblings, 2 replies; 43+ messages in thread
From: Scott Wood @ 2019-08-21 23:19 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, linux-kernel, Paul E . McKenney, Joel Fernandes,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams,
	Scott Wood

Besides restoring behavior that used to be default on RT, this avoids
a deadlock on scheduler locks:

[  136.894657] 039: ============================================
[  136.900401] 039: WARNING: possible recursive locking detected
[  136.906146] 039: 5.2.9-rt3.dbg+ #174 Tainted: G            E
[  136.912152] 039: --------------------------------------------
[  136.917896] 039: rcu_torture_rea/13474 is trying to acquire lock:
[  136.923990] 039: 000000005f25146d
[  136.927310] 039:  (
[  136.929414] 039: &p->pi_lock
[  136.932303] 039: ){-...}
[  136.934840] 039: , at: try_to_wake_up+0x39/0x920
[  136.939461] 039:
but task is already holding lock:
[  136.944425] 039: 000000005f25146d
[  136.947745] 039:  (
[  136.949852] 039: &p->pi_lock
[  136.952738] 039: ){-...}
[  136.955274] 039: , at: try_to_wake_up+0x39/0x920
[  136.959895] 039:
other info that might help us debug this:
[  136.965555] 039:  Possible unsafe locking scenario:

[  136.970608] 039:        CPU0
[  136.973493] 039:        ----
[  136.976378] 039:   lock(
[  136.978918] 039: &p->pi_lock
[  136.981806] 039: );
[  136.983911] 039:   lock(
[  136.986451] 039: &p->pi_lock
[  136.989336] 039: );
[  136.991444] 039:
 *** DEADLOCK ***

[  136.995194] 039:  May be due to missing lock nesting notation

[  137.001115] 039: 3 locks held by rcu_torture_rea/13474:
[  137.006341] 039:  #0:
[  137.008707] 039: 000000005f25146d
[  137.012024] 039:  (
[  137.014131] 039: &p->pi_lock
[  137.017015] 039: ){-...}
[  137.019558] 039: , at: try_to_wake_up+0x39/0x920
[  137.024175] 039:  #1:
[  137.026540] 039: 0000000011c8e51d
[  137.029859] 039:  (
[  137.031966] 039: &rq->lock
[  137.034679] 039: ){-...}
[  137.037217] 039: , at: try_to_wake_up+0x241/0x920
[  137.041924] 039:  #2:
[  137.044291] 039: 00000000098649b9
[  137.047610] 039:  (
[  137.049714] 039: rcu_read_lock
[  137.052774] 039: ){....}
[  137.055314] 039: , at: cpuacct_charge+0x33/0x1e0
[  137.059934] 039:
stack backtrace:
[  137.063425] 039: CPU: 39 PID: 13474 Comm: rcu_torture_rea Kdump: loaded Tainted: G            E     5.2.9-rt3.dbg+ #174
[  137.074197] 039: Hardware name: Intel Corporation S2600BT/S2600BT, BIOS SE5C620.86B.01.00.0763.022420181017 02/24/2018
[  137.084886] 039: Call Trace:
[  137.087773] 039:  <IRQ>
[  137.090226] 039:  dump_stack+0x5e/0x8b
[  137.093997] 039:  __lock_acquire+0x725/0x1100
[  137.098358] 039:  lock_acquire+0xc0/0x240
[  137.102374] 039:  ? try_to_wake_up+0x39/0x920
[  137.106737] 039:  _raw_spin_lock_irqsave+0x47/0x90
[  137.111534] 039:  ? try_to_wake_up+0x39/0x920
[  137.115910] 039:  try_to_wake_up+0x39/0x920
[  137.120098] 039:  rcu_read_unlock_special+0x65/0xb0
[  137.124977] 039:  __rcu_read_unlock+0x5d/0x70
[  137.129337] 039:  cpuacct_charge+0xd9/0x1e0
[  137.133522] 039:  ? cpuacct_charge+0x33/0x1e0
[  137.137880] 039:  update_curr+0x14b/0x420
[  137.141894] 039:  enqueue_entity+0x42/0x370
[  137.146080] 039:  enqueue_task_fair+0xa9/0x490
[  137.150528] 039:  activate_task+0x5a/0xf0
[  137.154539] 039:  ttwu_do_activate+0x4e/0x90
[  137.158813] 039:  try_to_wake_up+0x277/0x920
[  137.163086] 039:  irq_exit+0xb6/0xf0
[  137.166661] 039:  smp_apic_timer_interrupt+0xe3/0x3a0
[  137.171714] 039:  apic_timer_interrupt+0xf/0x20
[  137.176249] 039:  </IRQ>
[  137.178785] 039: RIP: 0010:__schedule+0x0/0x8e0
[  137.183319] 039: Code: 00 02 48 89 43 20 e8 0f 5a 00 00 48 8d 7b 28 e8 86 f2 fd ff 31 c0 5b 5d 41 5c c3 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 <55> 48 89 e5 41 57 41 56 49 c7 c6 c0 ca 1e 00 41 55 41 89 fd 41 54
[  137.202498] 039: RSP: 0018:ffffc9005835fbc0 EFLAGS: 00000246
[  137.208158] 039:  ORIG_RAX: ffffffffffffff13
[  137.212428] 039: RAX: 0000000000000000 RBX: ffff8897c3e1bb00 RCX: 0000000000000001
[  137.219994] 039: RDX: 0000000080004008 RSI: 0000000000000006 RDI: 0000000000000001
[  137.227560] 039: RBP: ffff8897c3e1bb00 R08: 0000000000000000 R09: 0000000000000000
[  137.235126] 039: R10: 0000000000000001 R11: 0000000000000001 R12: ffffffff81001fd1
[  137.242694] 039: R13: 0000000000000044 R14: 0000000000000000 R15: ffffc9005835fcac
[  137.250259] 039:  ? ___preempt_schedule+0x16/0x18
[  137.254969] 039:  preempt_schedule_common+0x32/0x80
[  137.259846] 039:  ___preempt_schedule+0x16/0x18
[  137.264379] 039:  rcutorture_one_extend+0x33a/0x510 [rcutorture]
[  137.270397] 039:  rcu_torture_one_read+0x18c/0x450 [rcutorture]
[  137.276334] 039:  rcu_torture_reader+0xac/0x1f0 [rcutorture]
[  137.281998] 039:  ? rcu_torture_reader+0x1f0/0x1f0 [rcutorture]
[  137.287920] 039:  kthread+0x106/0x140
[  137.291591] 039:  ? rcu_torture_one_read+0x450/0x450 [rcutorture]
[  137.297681] 039:  ? kthread_bind+0x10/0x10
[  137.301783] 039:  ret_from_fork+0x3a/0x50

Signed-off-by: Scott Wood <swood@redhat.com>
---
I think the prohibition on use_softirq can be dropped once RT gets the
latest RCU code, but the question of what use_softirq should default
to on PREEMPT_RT remains.
---
 kernel/rcu/tree.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index fc8b00c61b32..12036d33e2f9 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -98,9 +98,16 @@ struct rcu_state rcu_state = {
 /* Dump rcu_node combining tree at boot to verify correct setup. */
 static bool dump_tree;
 module_param(dump_tree, bool, 0444);
-/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
+/*
+ * By default, use RCU_SOFTIRQ instead of rcuc kthreads.
+ * But, avoid RCU_SOFTIRQ on PREEMPT_RT due to pi/rq deadlocks.
+ */
+#ifdef CONFIG_PREEMPT_RT_FULL
+static bool use_softirq;
+#else
 static bool use_softirq = 1;
 module_param(use_softirq, bool, 0444);
+#endif
 /* Control rcu_node-tree auto-balancing at boot time. */
 static bool rcu_fanout_exact;
 module_param(rcu_fanout_exact, bool, 0444);
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-21 23:19 ` [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs Scott Wood
@ 2019-08-21 23:33   ` Paul E. McKenney
  2019-08-22 13:39     ` Joel Fernandes
  2019-08-23  2:36     ` Scott Wood
  0 siblings, 2 replies; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-21 23:33 UTC (permalink / raw)
  To: Scott Wood
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Joel Fernandes, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:
> A plain local_bh_disable() is documented as creating an RCU critical
> section, and (at least) rcutorture expects this to be the case.  However,
> in_softirq() doesn't block a grace period on PREEMPT_RT, since RCU checks
> preempt_count() directly.  Even if RCU were changed to check
> in_softirq(), that wouldn't allow blocked BH disablers to be boosted.
> 
> Fix this by calling rcu_read_lock() from local_bh_disable(), and update
> rcu_read_lock_bh_held() accordingly.

Cool!  Some questions and comments below.

							Thanx, Paul

> Signed-off-by: Scott Wood <swood@redhat.com>
> ---
> Another question is whether non-raw spinlocks are intended to create an
> RCU read-side critical section due to implicit preempt disable.

Hmmm...  Did non-raw spinlocks act like rcu_read_lock_sched()
and rcu_read_unlock_sched() pairs in -rt prior to the RCU flavor
consolidation?  If not, I don't see why they should do so after that
consolidation in -rt.

>                                                                  If they
> are, then we'd need to add rcu_read_lock() there as well since RT doesn't
> disable preemption (and rcutorture should explicitly test with a
> spinlock).  If not, the documentation should make that clear.

True enough!

>  include/linux/rcupdate.h |  4 ++++
>  kernel/rcu/update.c      |  4 ++++
>  kernel/softirq.c         | 12 +++++++++---
>  3 files changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 388ace315f32..d6e357378732 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -615,10 +615,12 @@ static inline void rcu_read_unlock(void)
>  static inline void rcu_read_lock_bh(void)
>  {
>  	local_bh_disable();
> +#ifndef CONFIG_PREEMPT_RT_FULL
>  	__acquire(RCU_BH);
>  	rcu_lock_acquire(&rcu_bh_lock_map);
>  	RCU_LOCKDEP_WARN(!rcu_is_watching(),
>  			 "rcu_read_lock_bh() used illegally while idle");
> +#endif

Any chance of this using "if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))"?
We should be OK providing a do-nothing __maybe_unused rcu_bh_lock_map
for lockdep-enabled -rt kernels, right?

>  }
>  
>  /*
> @@ -628,10 +630,12 @@ static inline void rcu_read_lock_bh(void)
>   */
>  static inline void rcu_read_unlock_bh(void)
>  {
> +#ifndef CONFIG_PREEMPT_RT_FULL
>  	RCU_LOCKDEP_WARN(!rcu_is_watching(),
>  			 "rcu_read_unlock_bh() used illegally while idle");
>  	rcu_lock_release(&rcu_bh_lock_map);
>  	__release(RCU_BH);
> +#endif

Ditto.

>  	local_bh_enable();
>  }
>  
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index 016c66a98292..a9cdf3d562bc 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -296,7 +296,11 @@ int rcu_read_lock_bh_held(void)
>  		return 0;
>  	if (!rcu_lockdep_current_cpu_online())
>  		return 0;
> +#ifdef CONFIG_PREEMPT_RT_FULL
> +	return lock_is_held(&rcu_lock_map) || irqs_disabled();
> +#else
>  	return in_softirq() || irqs_disabled();
> +#endif

And globally.

>  }
>  EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
>  
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index d16d080a74f7..6080c9328df1 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -115,8 +115,10 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
>  	long soft_cnt;
>  
>  	WARN_ON_ONCE(in_irq());
> -	if (!in_atomic())
> +	if (!in_atomic()) {
>  		local_lock(bh_lock);
> +		rcu_read_lock();
> +	}
>  	soft_cnt = this_cpu_inc_return(softirq_counter);
>  	WARN_ON_ONCE(soft_cnt == 0);
>  	current->softirq_count += SOFTIRQ_DISABLE_OFFSET;
> @@ -151,8 +153,10 @@ void _local_bh_enable(void)
>  #endif
>  
>  	current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
> -	if (!in_atomic())
> +	if (!in_atomic()) {
> +		rcu_read_unlock();
>  		local_unlock(bh_lock);
> +	}
>  }
>  
>  void _local_bh_enable_rt(void)
> @@ -185,8 +189,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
>  	WARN_ON_ONCE(count < 0);
>  	local_irq_enable();
>  
> -	if (!in_atomic())
> +	if (!in_atomic()) {
> +		rcu_read_unlock();
>  		local_unlock(bh_lock);
> +	}

The return from in_atomic() is guaranteed to be the same at
local_bh_enable() time as was at the call to the corresponding
local_bh_disable()?

I could have sworn that I ran afoul of this last year.  Might these
added rcu_read_lock() and rcu_read_unlock() calls need to check for
CONFIG_PREEMPT_RT_FULL?

>  	current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
>  	preempt_check_resched();
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-21 23:19 ` [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep Scott Wood
@ 2019-08-21 23:35   ` Paul E. McKenney
  2019-08-23  1:21     ` Scott Wood
  2019-08-23 16:20   ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-21 23:35 UTC (permalink / raw)
  To: Scott Wood
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Joel Fernandes, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Wed, Aug 21, 2019 at 06:19:05PM -0500, Scott Wood wrote:
> Without this, rcu_note_context_switch() will complain if an RCU read
> lock is held when migrate_enable() calls stop_one_cpu().
> 
> Signed-off-by: Scott Wood <swood@redhat.com>

I have to ask...  Both sleeping_lock_inc() and sleeping_lock_dec() are
no-ops if not CONFIG_PREEMPT_RT_BASE?

							Thanx, Paul

> ---
> v2: Added comment.
> 
> If my migrate disable changes aren't taken, then pin_current_cpu()
> will also need to use sleeping_lock_inc() because calling
> __read_rt_lock() bypasses the usual place it's done.
> 
>  include/linux/sched.h    | 4 ++--
>  kernel/rcu/tree_plugin.h | 2 +-
>  kernel/sched/core.c      | 8 ++++++++
>  3 files changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 7e892e727f12..1ebc97f28009 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -673,7 +673,7 @@ struct task_struct {
>  	int				migrate_disable_atomic;
>  # endif
>  #endif
> -#ifdef CONFIG_PREEMPT_RT_FULL
> +#ifdef CONFIG_PREEMPT_RT_BASE
>  	int				sleeping_lock;
>  #endif
>  
> @@ -1881,7 +1881,7 @@ static __always_inline bool need_resched(void)
>  	return unlikely(tif_need_resched());
>  }
>  
> -#ifdef CONFIG_PREEMPT_RT_FULL
> +#ifdef CONFIG_PREEMPT_RT_BASE
>  static inline void sleeping_lock_inc(void)
>  {
>  	current->sleeping_lock++;
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index 23a54e4b649c..7a3aa085ce2c 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -292,7 +292,7 @@ void rcu_note_context_switch(bool preempt)
>  	barrier(); /* Avoid RCU read-side critical sections leaking down. */
>  	trace_rcu_utilization(TPS("Start context switch"));
>  	lockdep_assert_irqs_disabled();
> -#if defined(CONFIG_PREEMPT_RT_FULL)
> +#if defined(CONFIG_PREEMPT_RT_BASE)
>  	sleeping_l = t->sleeping_lock;
>  #endif
>  	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !sleeping_l);
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index e1bdd7f9be05..0758ee85634e 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -7405,7 +7405,15 @@ void migrate_enable(void)
>  			unpin_current_cpu();
>  			preempt_lazy_enable();
>  			preempt_enable();
> +
> +			/*
> +			 * sleeping_lock_inc suppresses a debug check for
> +			 * sleeping inside an RCU read side critical section
> +			 */
> +			sleeping_lock_inc();
>  			stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
> +			sleeping_lock_dec();
> +
>  			return;
>  		}
>  	}
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT
  2019-08-21 23:19 ` [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT Scott Wood
@ 2019-08-21 23:40   ` Paul E. McKenney
  2019-08-23 16:32     ` Sebastian Andrzej Siewior
  2019-08-22 13:59   ` Joel Fernandes
  1 sibling, 1 reply; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-21 23:40 UTC (permalink / raw)
  To: Scott Wood
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Joel Fernandes, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Wed, Aug 21, 2019 at 06:19:06PM -0500, Scott Wood wrote:
> Besides restoring behavior that used to be default on RT, this avoids
> a deadlock on scheduler locks:
> 
> [  136.894657] 039: ============================================
> [  136.900401] 039: WARNING: possible recursive locking detected
> [  136.906146] 039: 5.2.9-rt3.dbg+ #174 Tainted: G            E
> [  136.912152] 039: --------------------------------------------
> [  136.917896] 039: rcu_torture_rea/13474 is trying to acquire lock:
> [  136.923990] 039: 000000005f25146d
> [  136.927310] 039:  (
> [  136.929414] 039: &p->pi_lock
> [  136.932303] 039: ){-...}
> [  136.934840] 039: , at: try_to_wake_up+0x39/0x920
> [  136.939461] 039:
> but task is already holding lock:
> [  136.944425] 039: 000000005f25146d
> [  136.947745] 039:  (
> [  136.949852] 039: &p->pi_lock
> [  136.952738] 039: ){-...}
> [  136.955274] 039: , at: try_to_wake_up+0x39/0x920
> [  136.959895] 039:
> other info that might help us debug this:
> [  136.965555] 039:  Possible unsafe locking scenario:
> 
> [  136.970608] 039:        CPU0
> [  136.973493] 039:        ----
> [  136.976378] 039:   lock(
> [  136.978918] 039: &p->pi_lock
> [  136.981806] 039: );
> [  136.983911] 039:   lock(
> [  136.986451] 039: &p->pi_lock
> [  136.989336] 039: );
> [  136.991444] 039:
>  *** DEADLOCK ***
> 
> [  136.995194] 039:  May be due to missing lock nesting notation
> 
> [  137.001115] 039: 3 locks held by rcu_torture_rea/13474:
> [  137.006341] 039:  #0:
> [  137.008707] 039: 000000005f25146d
> [  137.012024] 039:  (
> [  137.014131] 039: &p->pi_lock
> [  137.017015] 039: ){-...}
> [  137.019558] 039: , at: try_to_wake_up+0x39/0x920
> [  137.024175] 039:  #1:
> [  137.026540] 039: 0000000011c8e51d
> [  137.029859] 039:  (
> [  137.031966] 039: &rq->lock
> [  137.034679] 039: ){-...}
> [  137.037217] 039: , at: try_to_wake_up+0x241/0x920
> [  137.041924] 039:  #2:
> [  137.044291] 039: 00000000098649b9
> [  137.047610] 039:  (
> [  137.049714] 039: rcu_read_lock
> [  137.052774] 039: ){....}
> [  137.055314] 039: , at: cpuacct_charge+0x33/0x1e0
> [  137.059934] 039:
> stack backtrace:
> [  137.063425] 039: CPU: 39 PID: 13474 Comm: rcu_torture_rea Kdump: loaded Tainted: G            E     5.2.9-rt3.dbg+ #174
> [  137.074197] 039: Hardware name: Intel Corporation S2600BT/S2600BT, BIOS SE5C620.86B.01.00.0763.022420181017 02/24/2018
> [  137.084886] 039: Call Trace:
> [  137.087773] 039:  <IRQ>
> [  137.090226] 039:  dump_stack+0x5e/0x8b
> [  137.093997] 039:  __lock_acquire+0x725/0x1100
> [  137.098358] 039:  lock_acquire+0xc0/0x240
> [  137.102374] 039:  ? try_to_wake_up+0x39/0x920
> [  137.106737] 039:  _raw_spin_lock_irqsave+0x47/0x90
> [  137.111534] 039:  ? try_to_wake_up+0x39/0x920
> [  137.115910] 039:  try_to_wake_up+0x39/0x920
> [  137.120098] 039:  rcu_read_unlock_special+0x65/0xb0
> [  137.124977] 039:  __rcu_read_unlock+0x5d/0x70
> [  137.129337] 039:  cpuacct_charge+0xd9/0x1e0
> [  137.133522] 039:  ? cpuacct_charge+0x33/0x1e0
> [  137.137880] 039:  update_curr+0x14b/0x420
> [  137.141894] 039:  enqueue_entity+0x42/0x370
> [  137.146080] 039:  enqueue_task_fair+0xa9/0x490
> [  137.150528] 039:  activate_task+0x5a/0xf0
> [  137.154539] 039:  ttwu_do_activate+0x4e/0x90
> [  137.158813] 039:  try_to_wake_up+0x277/0x920
> [  137.163086] 039:  irq_exit+0xb6/0xf0
> [  137.166661] 039:  smp_apic_timer_interrupt+0xe3/0x3a0
> [  137.171714] 039:  apic_timer_interrupt+0xf/0x20
> [  137.176249] 039:  </IRQ>
> [  137.178785] 039: RIP: 0010:__schedule+0x0/0x8e0
> [  137.183319] 039: Code: 00 02 48 89 43 20 e8 0f 5a 00 00 48 8d 7b 28 e8 86 f2 fd ff 31 c0 5b 5d 41 5c c3 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 <55> 48 89 e5 41 57 41 56 49 c7 c6 c0 ca 1e 00 41 55 41 89 fd 41 54
> [  137.202498] 039: RSP: 0018:ffffc9005835fbc0 EFLAGS: 00000246
> [  137.208158] 039:  ORIG_RAX: ffffffffffffff13
> [  137.212428] 039: RAX: 0000000000000000 RBX: ffff8897c3e1bb00 RCX: 0000000000000001
> [  137.219994] 039: RDX: 0000000080004008 RSI: 0000000000000006 RDI: 0000000000000001
> [  137.227560] 039: RBP: ffff8897c3e1bb00 R08: 0000000000000000 R09: 0000000000000000
> [  137.235126] 039: R10: 0000000000000001 R11: 0000000000000001 R12: ffffffff81001fd1
> [  137.242694] 039: R13: 0000000000000044 R14: 0000000000000000 R15: ffffc9005835fcac
> [  137.250259] 039:  ? ___preempt_schedule+0x16/0x18
> [  137.254969] 039:  preempt_schedule_common+0x32/0x80
> [  137.259846] 039:  ___preempt_schedule+0x16/0x18
> [  137.264379] 039:  rcutorture_one_extend+0x33a/0x510 [rcutorture]
> [  137.270397] 039:  rcu_torture_one_read+0x18c/0x450 [rcutorture]
> [  137.276334] 039:  rcu_torture_reader+0xac/0x1f0 [rcutorture]
> [  137.281998] 039:  ? rcu_torture_reader+0x1f0/0x1f0 [rcutorture]
> [  137.287920] 039:  kthread+0x106/0x140
> [  137.291591] 039:  ? rcu_torture_one_read+0x450/0x450 [rcutorture]
> [  137.297681] 039:  ? kthread_bind+0x10/0x10
> [  137.301783] 039:  ret_from_fork+0x3a/0x50
> 
> Signed-off-by: Scott Wood <swood@redhat.com>
> ---
> I think the prohibition on use_softirq can be dropped once RT gets the
> latest RCU code, but the question of what use_softirq should default
> to on PREEMPT_RT remains.
> ---
>  kernel/rcu/tree.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index fc8b00c61b32..12036d33e2f9 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -98,9 +98,16 @@ struct rcu_state rcu_state = {
>  /* Dump rcu_node combining tree at boot to verify correct setup. */
>  static bool dump_tree;
>  module_param(dump_tree, bool, 0444);
> -/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
> +/*
> + * By default, use RCU_SOFTIRQ instead of rcuc kthreads.
> + * But, avoid RCU_SOFTIRQ on PREEMPT_RT due to pi/rq deadlocks.
> + */
> +#ifdef CONFIG_PREEMPT_RT_FULL
> +static bool use_softirq;
> +#else
>  static bool use_softirq = 1;
>  module_param(use_softirq, bool, 0444);
> +#endif

Save a couple of lines?

static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT_FULL);

And if I understand your point above, the module_param() might be
able to be the same either way given the new RCU.  But if not,
at least we get rid of the #else.

							Thanx, Paul

>  /* Control rcu_node-tree auto-balancing at boot time. */
>  static bool rcu_fanout_exact;
>  module_param(rcu_fanout_exact, bool, 0444);
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-21 23:33   ` Paul E. McKenney
@ 2019-08-22 13:39     ` Joel Fernandes
  2019-08-22 15:27       ` Paul E. McKenney
  2019-08-23  3:23       ` Scott Wood
  2019-08-23  2:36     ` Scott Wood
  1 sibling, 2 replies; 43+ messages in thread
From: Joel Fernandes @ 2019-08-22 13:39 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Scott Wood, Sebastian Andrzej Siewior, linux-rt-users,
	linux-kernel, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Wed, Aug 21, 2019 at 04:33:58PM -0700, Paul E. McKenney wrote:
> On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:
> > A plain local_bh_disable() is documented as creating an RCU critical
> > section, and (at least) rcutorture expects this to be the case.  However,
> > in_softirq() doesn't block a grace period on PREEMPT_RT, since RCU checks
> > preempt_count() directly.  Even if RCU were changed to check
> > in_softirq(), that wouldn't allow blocked BH disablers to be boosted.
> > 
> > Fix this by calling rcu_read_lock() from local_bh_disable(), and update
> > rcu_read_lock_bh_held() accordingly.
> 
> Cool!  Some questions and comments below.
> 
> 							Thanx, Paul
> 
> > Signed-off-by: Scott Wood <swood@redhat.com>
> > ---
> > Another question is whether non-raw spinlocks are intended to create an
> > RCU read-side critical section due to implicit preempt disable.
> 
> Hmmm...  Did non-raw spinlocks act like rcu_read_lock_sched()
> and rcu_read_unlock_sched() pairs in -rt prior to the RCU flavor
> consolidation?  If not, I don't see why they should do so after that
> consolidation in -rt.

May be I am missing something, but I didn't see the connection between
consolidation and this patch. AFAICS, this patch is so that
rcu_read_lock_bh_held() works at all on -rt. Did I badly miss something?

> >                                                                  If they
> > are, then we'd need to add rcu_read_lock() there as well since RT doesn't
> > disable preemption (and rcutorture should explicitly test with a
> > spinlock).  If not, the documentation should make that clear.
> 
> True enough!
> 
> >  include/linux/rcupdate.h |  4 ++++
> >  kernel/rcu/update.c      |  4 ++++
> >  kernel/softirq.c         | 12 +++++++++---
> >  3 files changed, 17 insertions(+), 3 deletions(-)
> > 
> > diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> > index 388ace315f32..d6e357378732 100644
> > --- a/include/linux/rcupdate.h
> > +++ b/include/linux/rcupdate.h
> > @@ -615,10 +615,12 @@ static inline void rcu_read_unlock(void)
> >  static inline void rcu_read_lock_bh(void)
> >  {
> >  	local_bh_disable();
> > +#ifndef CONFIG_PREEMPT_RT_FULL
> >  	__acquire(RCU_BH);
> >  	rcu_lock_acquire(&rcu_bh_lock_map);
> >  	RCU_LOCKDEP_WARN(!rcu_is_watching(),
> >  			 "rcu_read_lock_bh() used illegally while idle");
> > +#endif
> 
> Any chance of this using "if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))"?
> We should be OK providing a do-nothing __maybe_unused rcu_bh_lock_map
> for lockdep-enabled -rt kernels, right?

Since this function is small, I prefer if -rt defines their own
rcu_read_lock_bh() which just does the local_bh_disable(). That would be way
cleaner IMO. IIRC, -rt does similar things for spinlocks, but it has been
sometime since I look at the -rt patchset.

> >  }
> >  
> >  /*
> > @@ -628,10 +630,12 @@ static inline void rcu_read_lock_bh(void)
> >   */
> >  static inline void rcu_read_unlock_bh(void)
> >  {
> > +#ifndef CONFIG_PREEMPT_RT_FULL
> >  	RCU_LOCKDEP_WARN(!rcu_is_watching(),
> >  			 "rcu_read_unlock_bh() used illegally while idle");
> >  	rcu_lock_release(&rcu_bh_lock_map);
> >  	__release(RCU_BH);
> > +#endif
> 
> Ditto.
> 
> >  	local_bh_enable();
> >  }
> >  
> > diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> > index 016c66a98292..a9cdf3d562bc 100644
> > --- a/kernel/rcu/update.c
> > +++ b/kernel/rcu/update.c
> > @@ -296,7 +296,11 @@ int rcu_read_lock_bh_held(void)
> >  		return 0;
> >  	if (!rcu_lockdep_current_cpu_online())
> >  		return 0;
> > +#ifdef CONFIG_PREEMPT_RT_FULL
> > +	return lock_is_held(&rcu_lock_map) || irqs_disabled();
> > +#else
> >  	return in_softirq() || irqs_disabled();
> > +#endif
> 
> And globally.

And could be untangled a bit as well:

if (irqs_disabled())
	return 1;

if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
	return lock_is_held(&rcu_lock_map);

return in_softirq();

> >  }
> >  EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
> >  
> > diff --git a/kernel/softirq.c b/kernel/softirq.c
> > index d16d080a74f7..6080c9328df1 100644
> > --- a/kernel/softirq.c
> > +++ b/kernel/softirq.c
> > @@ -115,8 +115,10 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
> >  	long soft_cnt;
> >  
> >  	WARN_ON_ONCE(in_irq());
> > -	if (!in_atomic())
> > +	if (!in_atomic()) {
> >  		local_lock(bh_lock);
> > +		rcu_read_lock();
> > +	}
> >  	soft_cnt = this_cpu_inc_return(softirq_counter);
> >  	WARN_ON_ONCE(soft_cnt == 0);
> >  	current->softirq_count += SOFTIRQ_DISABLE_OFFSET;
> > @@ -151,8 +153,10 @@ void _local_bh_enable(void)
> >  #endif
> >  
> >  	current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
> > -	if (!in_atomic())
> > +	if (!in_atomic()) {
> > +		rcu_read_unlock();
> >  		local_unlock(bh_lock);
> > +	}
> >  }
> >  
> >  void _local_bh_enable_rt(void)
> > @@ -185,8 +189,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
> >  	WARN_ON_ONCE(count < 0);
> >  	local_irq_enable();
> >  
> > -	if (!in_atomic())
> > +	if (!in_atomic()) {
> > +		rcu_read_unlock();
> >  		local_unlock(bh_lock);
> > +	}
> 
> The return from in_atomic() is guaranteed to be the same at
> local_bh_enable() time as was at the call to the corresponding
> local_bh_disable()?
> 
> I could have sworn that I ran afoul of this last year.  Might these
> added rcu_read_lock() and rcu_read_unlock() calls need to check for
> CONFIG_PREEMPT_RT_FULL?

Great point! I think they should be guarded but will let Scott answer that
one.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT
  2019-08-21 23:19 ` [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT Scott Wood
  2019-08-21 23:40   ` Paul E. McKenney
@ 2019-08-22 13:59   ` Joel Fernandes
  2019-08-22 15:29     ` Paul E. McKenney
  2019-08-22 19:31     ` Scott Wood
  1 sibling, 2 replies; 43+ messages in thread
From: Joel Fernandes @ 2019-08-22 13:59 UTC (permalink / raw)
  To: Scott Wood
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Paul E . McKenney, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Wed, Aug 21, 2019 at 06:19:06PM -0500, Scott Wood wrote:
> Besides restoring behavior that used to be default on RT, this avoids
> a deadlock on scheduler locks:
> 
> [  136.894657] 039: ============================================
> [  136.900401] 039: WARNING: possible recursive locking detected
> [  136.906146] 039: 5.2.9-rt3.dbg+ #174 Tainted: G            E
> [  136.912152] 039: --------------------------------------------
> [  136.917896] 039: rcu_torture_rea/13474 is trying to acquire lock:
> [  136.923990] 039: 000000005f25146d
> [  136.927310] 039:  (
> [  136.929414] 039: &p->pi_lock
> [  136.932303] 039: ){-...}
> [  136.934840] 039: , at: try_to_wake_up+0x39/0x920
> [  136.939461] 039:
> but task is already holding lock:
> [  136.944425] 039: 000000005f25146d
> [  136.947745] 039:  (
> [  136.949852] 039: &p->pi_lock
> [  136.952738] 039: ){-...}
> [  136.955274] 039: , at: try_to_wake_up+0x39/0x920
> [  136.959895] 039:
> other info that might help us debug this:
> [  136.965555] 039:  Possible unsafe locking scenario:
> 
> [  136.970608] 039:        CPU0
> [  136.973493] 039:        ----
> [  136.976378] 039:   lock(
> [  136.978918] 039: &p->pi_lock
> [  136.981806] 039: );
> [  136.983911] 039:   lock(
> [  136.986451] 039: &p->pi_lock
> [  136.989336] 039: );
> [  136.991444] 039:
>  *** DEADLOCK ***
> 
> [  136.995194] 039:  May be due to missing lock nesting notation
> 
> [  137.001115] 039: 3 locks held by rcu_torture_rea/13474:
> [  137.006341] 039:  #0:
> [  137.008707] 039: 000000005f25146d
> [  137.012024] 039:  (
> [  137.014131] 039: &p->pi_lock
> [  137.017015] 039: ){-...}
> [  137.019558] 039: , at: try_to_wake_up+0x39/0x920
> [  137.024175] 039:  #1:
> [  137.026540] 039: 0000000011c8e51d
> [  137.029859] 039:  (
> [  137.031966] 039: &rq->lock
> [  137.034679] 039: ){-...}
> [  137.037217] 039: , at: try_to_wake_up+0x241/0x920
> [  137.041924] 039:  #2:
> [  137.044291] 039: 00000000098649b9
> [  137.047610] 039:  (
> [  137.049714] 039: rcu_read_lock
> [  137.052774] 039: ){....}
> [  137.055314] 039: , at: cpuacct_charge+0x33/0x1e0
> [  137.059934] 039:
> stack backtrace:
> [  137.063425] 039: CPU: 39 PID: 13474 Comm: rcu_torture_rea Kdump: loaded Tainted: G            E     5.2.9-rt3.dbg+ #174
> [  137.074197] 039: Hardware name: Intel Corporation S2600BT/S2600BT, BIOS SE5C620.86B.01.00.0763.022420181017 02/24/2018
> [  137.084886] 039: Call Trace:
> [  137.087773] 039:  <IRQ>
> [  137.090226] 039:  dump_stack+0x5e/0x8b
> [  137.093997] 039:  __lock_acquire+0x725/0x1100
> [  137.098358] 039:  lock_acquire+0xc0/0x240
> [  137.102374] 039:  ? try_to_wake_up+0x39/0x920
> [  137.106737] 039:  _raw_spin_lock_irqsave+0x47/0x90
> [  137.111534] 039:  ? try_to_wake_up+0x39/0x920
> [  137.115910] 039:  try_to_wake_up+0x39/0x920
> [  137.120098] 039:  rcu_read_unlock_special+0x65/0xb0
> [  137.124977] 039:  __rcu_read_unlock+0x5d/0x70
> [  137.129337] 039:  cpuacct_charge+0xd9/0x1e0
> [  137.133522] 039:  ? cpuacct_charge+0x33/0x1e0
> [  137.137880] 039:  update_curr+0x14b/0x420
> [  137.141894] 039:  enqueue_entity+0x42/0x370
> [  137.146080] 039:  enqueue_task_fair+0xa9/0x490
> [  137.150528] 039:  activate_task+0x5a/0xf0
> [  137.154539] 039:  ttwu_do_activate+0x4e/0x90
> [  137.158813] 039:  try_to_wake_up+0x277/0x920
> [  137.163086] 039:  irq_exit+0xb6/0xf0
> [  137.166661] 039:  smp_apic_timer_interrupt+0xe3/0x3a0
> [  137.171714] 039:  apic_timer_interrupt+0xf/0x20
> [  137.176249] 039:  </IRQ>
> [  137.178785] 039: RIP: 0010:__schedule+0x0/0x8e0
> [  137.183319] 039: Code: 00 02 48 89 43 20 e8 0f 5a 00 00 48 8d 7b 28 e8 86 f2 fd ff 31 c0 5b 5d 41 5c c3 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 <55> 48 89 e5 41 57 41 56 49 c7 c6 c0 ca 1e 00 41 55 41 89 fd 41 54
> [  137.202498] 039: RSP: 0018:ffffc9005835fbc0 EFLAGS: 00000246
> [  137.208158] 039:  ORIG_RAX: ffffffffffffff13
> [  137.212428] 039: RAX: 0000000000000000 RBX: ffff8897c3e1bb00 RCX: 0000000000000001
> [  137.219994] 039: RDX: 0000000080004008 RSI: 0000000000000006 RDI: 0000000000000001
> [  137.227560] 039: RBP: ffff8897c3e1bb00 R08: 0000000000000000 R09: 0000000000000000
> [  137.235126] 039: R10: 0000000000000001 R11: 0000000000000001 R12: ffffffff81001fd1
> [  137.242694] 039: R13: 0000000000000044 R14: 0000000000000000 R15: ffffc9005835fcac
> [  137.250259] 039:  ? ___preempt_schedule+0x16/0x18
> [  137.254969] 039:  preempt_schedule_common+0x32/0x80
> [  137.259846] 039:  ___preempt_schedule+0x16/0x18
> [  137.264379] 039:  rcutorture_one_extend+0x33a/0x510 [rcutorture]
> [  137.270397] 039:  rcu_torture_one_read+0x18c/0x450 [rcutorture]
> [  137.276334] 039:  rcu_torture_reader+0xac/0x1f0 [rcutorture]
> [  137.281998] 039:  ? rcu_torture_reader+0x1f0/0x1f0 [rcutorture]
> [  137.287920] 039:  kthread+0x106/0x140
> [  137.291591] 039:  ? rcu_torture_one_read+0x450/0x450 [rcutorture]
> [  137.297681] 039:  ? kthread_bind+0x10/0x10
> [  137.301783] 039:  ret_from_fork+0x3a/0x50
> 
> Signed-off-by: Scott Wood <swood@redhat.com>
> ---
> I think the prohibition on use_softirq can be dropped once RT gets the
> latest RCU code, but the question of what use_softirq should default
> to on PREEMPT_RT remains.

Independent of the question of what use_softirq should default to, could we
test RT with latest RCU code now to check if the deadlock goes away?  That
way, maybe we can find any issues in current RCU that cause scheduler
deadlocks in the situation you pointed. The reason I am asking is because
recently additional commits [1] try to prevent deadlock and it'd be nice to
ensure that other conditions are not lingering (I don't think they are but
it'd be nice to be sure).

I am happy to do such testing myself if you want, however what does it take
to apply the RT patchset to the latest mainline? Is it an achievable feat?

thanks,

 - Joel

[1]
23634ebc1d94 ("rcu: Check for wakeup-safe conditions")
25102de ("rcu: Only do rcu_read_unlock_special() wakeups if expedited")


>  kernel/rcu/tree.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index fc8b00c61b32..12036d33e2f9 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -98,9 +98,16 @@ struct rcu_state rcu_state = {
>  /* Dump rcu_node combining tree at boot to verify correct setup. */
>  static bool dump_tree;
>  module_param(dump_tree, bool, 0444);
> -/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
> +/*
> + * By default, use RCU_SOFTIRQ instead of rcuc kthreads.
> + * But, avoid RCU_SOFTIRQ on PREEMPT_RT due to pi/rq deadlocks.
> + */
> +#ifdef CONFIG_PREEMPT_RT_FULL
> +static bool use_softirq;
> +#else
>  static bool use_softirq = 1;
>  module_param(use_softirq, bool, 0444);
> +#endif
>  /* Control rcu_node-tree auto-balancing at boot time. */
>  static bool rcu_fanout_exact;
>  module_param(rcu_fanout_exact, bool, 0444);
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-22 13:39     ` Joel Fernandes
@ 2019-08-22 15:27       ` Paul E. McKenney
  2019-08-23  1:50         ` Joel Fernandes
  2019-08-23  3:23       ` Scott Wood
  1 sibling, 1 reply; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-22 15:27 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Scott Wood, Sebastian Andrzej Siewior, linux-rt-users,
	linux-kernel, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Thu, Aug 22, 2019 at 09:39:55AM -0400, Joel Fernandes wrote:
> On Wed, Aug 21, 2019 at 04:33:58PM -0700, Paul E. McKenney wrote:
> > On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:
> > > A plain local_bh_disable() is documented as creating an RCU critical
> > > section, and (at least) rcutorture expects this to be the case.  However,
> > > in_softirq() doesn't block a grace period on PREEMPT_RT, since RCU checks
> > > preempt_count() directly.  Even if RCU were changed to check
> > > in_softirq(), that wouldn't allow blocked BH disablers to be boosted.
> > > 
> > > Fix this by calling rcu_read_lock() from local_bh_disable(), and update
> > > rcu_read_lock_bh_held() accordingly.
> > 
> > Cool!  Some questions and comments below.
> > 
> > 							Thanx, Paul
> > 
> > > Signed-off-by: Scott Wood <swood@redhat.com>
> > > ---
> > > Another question is whether non-raw spinlocks are intended to create an
> > > RCU read-side critical section due to implicit preempt disable.
> > 
> > Hmmm...  Did non-raw spinlocks act like rcu_read_lock_sched()
> > and rcu_read_unlock_sched() pairs in -rt prior to the RCU flavor
> > consolidation?  If not, I don't see why they should do so after that
> > consolidation in -rt.
> 
> May be I am missing something, but I didn't see the connection between
> consolidation and this patch. AFAICS, this patch is so that
> rcu_read_lock_bh_held() works at all on -rt. Did I badly miss something?

I was interpreting Scott's question (which would be excluded from the
git commit log) as relating to a possible follow-on patch.

The question is "how special can non-raw spinlocks be in -rt?".  From what
I can see, they have been treated as sleeplocks from an RCU viewpoint,
so maybe that should continue to be the case.  It does deserve some
thought because in mainline a non-raw spinlock really would block a
post-consolidation RCU grace period, even in PREEMPT kernels.

But then again, you cannot preempt a non-raw spinlock in mainline but
you can in -rt, so extending that exception to RCU is not unreasonable.

Either way, we do need to make a definite decision and document it.
If I were forced to make a decision right now, I would follow the old
behavior, so that only raw spinlocks were guaranteed to block RCU grace
periods.  But I am not being forced, so let's actually discuss and make
a conscious decision.  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT
  2019-08-22 13:59   ` Joel Fernandes
@ 2019-08-22 15:29     ` Paul E. McKenney
  2019-08-22 19:31     ` Scott Wood
  1 sibling, 0 replies; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-22 15:29 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Scott Wood, Sebastian Andrzej Siewior, linux-rt-users,
	linux-kernel, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Thu, Aug 22, 2019 at 09:59:53AM -0400, Joel Fernandes wrote:
> On Wed, Aug 21, 2019 at 06:19:06PM -0500, Scott Wood wrote:
> > Besides restoring behavior that used to be default on RT, this avoids
> > a deadlock on scheduler locks:
> > 
> > [  136.894657] 039: ============================================
> > [  136.900401] 039: WARNING: possible recursive locking detected
> > [  136.906146] 039: 5.2.9-rt3.dbg+ #174 Tainted: G            E
> > [  136.912152] 039: --------------------------------------------
> > [  136.917896] 039: rcu_torture_rea/13474 is trying to acquire lock:
> > [  136.923990] 039: 000000005f25146d
> > [  136.927310] 039:  (
> > [  136.929414] 039: &p->pi_lock
> > [  136.932303] 039: ){-...}
> > [  136.934840] 039: , at: try_to_wake_up+0x39/0x920
> > [  136.939461] 039:
> > but task is already holding lock:
> > [  136.944425] 039: 000000005f25146d
> > [  136.947745] 039:  (
> > [  136.949852] 039: &p->pi_lock
> > [  136.952738] 039: ){-...}
> > [  136.955274] 039: , at: try_to_wake_up+0x39/0x920
> > [  136.959895] 039:
> > other info that might help us debug this:
> > [  136.965555] 039:  Possible unsafe locking scenario:
> > 
> > [  136.970608] 039:        CPU0
> > [  136.973493] 039:        ----
> > [  136.976378] 039:   lock(
> > [  136.978918] 039: &p->pi_lock
> > [  136.981806] 039: );
> > [  136.983911] 039:   lock(
> > [  136.986451] 039: &p->pi_lock
> > [  136.989336] 039: );
> > [  136.991444] 039:
> >  *** DEADLOCK ***
> > 
> > [  136.995194] 039:  May be due to missing lock nesting notation
> > 
> > [  137.001115] 039: 3 locks held by rcu_torture_rea/13474:
> > [  137.006341] 039:  #0:
> > [  137.008707] 039: 000000005f25146d
> > [  137.012024] 039:  (
> > [  137.014131] 039: &p->pi_lock
> > [  137.017015] 039: ){-...}
> > [  137.019558] 039: , at: try_to_wake_up+0x39/0x920
> > [  137.024175] 039:  #1:
> > [  137.026540] 039: 0000000011c8e51d
> > [  137.029859] 039:  (
> > [  137.031966] 039: &rq->lock
> > [  137.034679] 039: ){-...}
> > [  137.037217] 039: , at: try_to_wake_up+0x241/0x920
> > [  137.041924] 039:  #2:
> > [  137.044291] 039: 00000000098649b9
> > [  137.047610] 039:  (
> > [  137.049714] 039: rcu_read_lock
> > [  137.052774] 039: ){....}
> > [  137.055314] 039: , at: cpuacct_charge+0x33/0x1e0
> > [  137.059934] 039:
> > stack backtrace:
> > [  137.063425] 039: CPU: 39 PID: 13474 Comm: rcu_torture_rea Kdump: loaded Tainted: G            E     5.2.9-rt3.dbg+ #174
> > [  137.074197] 039: Hardware name: Intel Corporation S2600BT/S2600BT, BIOS SE5C620.86B.01.00.0763.022420181017 02/24/2018
> > [  137.084886] 039: Call Trace:
> > [  137.087773] 039:  <IRQ>
> > [  137.090226] 039:  dump_stack+0x5e/0x8b
> > [  137.093997] 039:  __lock_acquire+0x725/0x1100
> > [  137.098358] 039:  lock_acquire+0xc0/0x240
> > [  137.102374] 039:  ? try_to_wake_up+0x39/0x920
> > [  137.106737] 039:  _raw_spin_lock_irqsave+0x47/0x90
> > [  137.111534] 039:  ? try_to_wake_up+0x39/0x920
> > [  137.115910] 039:  try_to_wake_up+0x39/0x920
> > [  137.120098] 039:  rcu_read_unlock_special+0x65/0xb0
> > [  137.124977] 039:  __rcu_read_unlock+0x5d/0x70
> > [  137.129337] 039:  cpuacct_charge+0xd9/0x1e0
> > [  137.133522] 039:  ? cpuacct_charge+0x33/0x1e0
> > [  137.137880] 039:  update_curr+0x14b/0x420
> > [  137.141894] 039:  enqueue_entity+0x42/0x370
> > [  137.146080] 039:  enqueue_task_fair+0xa9/0x490
> > [  137.150528] 039:  activate_task+0x5a/0xf0
> > [  137.154539] 039:  ttwu_do_activate+0x4e/0x90
> > [  137.158813] 039:  try_to_wake_up+0x277/0x920
> > [  137.163086] 039:  irq_exit+0xb6/0xf0
> > [  137.166661] 039:  smp_apic_timer_interrupt+0xe3/0x3a0
> > [  137.171714] 039:  apic_timer_interrupt+0xf/0x20
> > [  137.176249] 039:  </IRQ>
> > [  137.178785] 039: RIP: 0010:__schedule+0x0/0x8e0
> > [  137.183319] 039: Code: 00 02 48 89 43 20 e8 0f 5a 00 00 48 8d 7b 28 e8 86 f2 fd ff 31 c0 5b 5d 41 5c c3 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 <55> 48 89 e5 41 57 41 56 49 c7 c6 c0 ca 1e 00 41 55 41 89 fd 41 54
> > [  137.202498] 039: RSP: 0018:ffffc9005835fbc0 EFLAGS: 00000246
> > [  137.208158] 039:  ORIG_RAX: ffffffffffffff13
> > [  137.212428] 039: RAX: 0000000000000000 RBX: ffff8897c3e1bb00 RCX: 0000000000000001
> > [  137.219994] 039: RDX: 0000000080004008 RSI: 0000000000000006 RDI: 0000000000000001
> > [  137.227560] 039: RBP: ffff8897c3e1bb00 R08: 0000000000000000 R09: 0000000000000000
> > [  137.235126] 039: R10: 0000000000000001 R11: 0000000000000001 R12: ffffffff81001fd1
> > [  137.242694] 039: R13: 0000000000000044 R14: 0000000000000000 R15: ffffc9005835fcac
> > [  137.250259] 039:  ? ___preempt_schedule+0x16/0x18
> > [  137.254969] 039:  preempt_schedule_common+0x32/0x80
> > [  137.259846] 039:  ___preempt_schedule+0x16/0x18
> > [  137.264379] 039:  rcutorture_one_extend+0x33a/0x510 [rcutorture]
> > [  137.270397] 039:  rcu_torture_one_read+0x18c/0x450 [rcutorture]
> > [  137.276334] 039:  rcu_torture_reader+0xac/0x1f0 [rcutorture]
> > [  137.281998] 039:  ? rcu_torture_reader+0x1f0/0x1f0 [rcutorture]
> > [  137.287920] 039:  kthread+0x106/0x140
> > [  137.291591] 039:  ? rcu_torture_one_read+0x450/0x450 [rcutorture]
> > [  137.297681] 039:  ? kthread_bind+0x10/0x10
> > [  137.301783] 039:  ret_from_fork+0x3a/0x50
> > 
> > Signed-off-by: Scott Wood <swood@redhat.com>
> > ---
> > I think the prohibition on use_softirq can be dropped once RT gets the
> > latest RCU code, but the question of what use_softirq should default
> > to on PREEMPT_RT remains.
> 
> Independent of the question of what use_softirq should default to, could we
> test RT with latest RCU code now to check if the deadlock goes away?  That
> way, maybe we can find any issues in current RCU that cause scheduler
> deadlocks in the situation you pointed. The reason I am asking is because
> recently additional commits [1] try to prevent deadlock and it'd be nice to
> ensure that other conditions are not lingering (I don't think they are but
> it'd be nice to be sure).
> 
> I am happy to do such testing myself if you want, however what does it take
> to apply the RT patchset to the latest mainline? Is it an achievable feat?

I suggest just using the -rt git tree, which I believe lives here:

https://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git

I would guess that branch linux-5.2.y-rt is the one you want, but I
would ask Scott instead of blindly trusting my guess.  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT
  2019-08-22 13:59   ` Joel Fernandes
  2019-08-22 15:29     ` Paul E. McKenney
@ 2019-08-22 19:31     ` Scott Wood
  2019-08-23  0:52       ` Joel Fernandes
  1 sibling, 1 reply; 43+ messages in thread
From: Scott Wood @ 2019-08-22 19:31 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Paul E . McKenney, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Thu, 2019-08-22 at 09:59 -0400, Joel Fernandes wrote:
> On Wed, Aug 21, 2019 at 06:19:06PM -0500, Scott Wood wrote:
> > I think the prohibition on use_softirq can be dropped once RT gets the
> > latest RCU code, but the question of what use_softirq should default
> > to on PREEMPT_RT remains.
> 
> Independent of the question of what use_softirq should default to, could
> we
> test RT with latest RCU code now to check if the deadlock goes away?  That
> way, maybe we can find any issues in current RCU that cause scheduler
> deadlocks in the situation you pointed. The reason I am asking is because
> recently additional commits [1] try to prevent deadlock and it'd be nice
> to
> ensure that other conditions are not lingering (I don't think they are but
> it'd be nice to be sure).
> 
> I am happy to do such testing myself if you want, however what does it
> take
> to apply the RT patchset to the latest mainline? Is it an achievable feat?

I did run such a test (cherry picking all RCU patches that aren't already in
RT, plus your RFC patch to rcu_read_unlock_special, rather than applying RT
to current mainline) with rcutorture plus a looping kernel build overnight,
and didn't see any splats with or without use_softirq.

-Scott



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT
  2019-08-22 19:31     ` Scott Wood
@ 2019-08-23  0:52       ` Joel Fernandes
  0 siblings, 0 replies; 43+ messages in thread
From: Joel Fernandes @ 2019-08-23  0:52 UTC (permalink / raw)
  To: Scott Wood
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Paul E . McKenney, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Thu, Aug 22, 2019 at 02:31:17PM -0500, Scott Wood wrote:
> On Thu, 2019-08-22 at 09:59 -0400, Joel Fernandes wrote:
> > On Wed, Aug 21, 2019 at 06:19:06PM -0500, Scott Wood wrote:
> > > I think the prohibition on use_softirq can be dropped once RT gets the
> > > latest RCU code, but the question of what use_softirq should default
> > > to on PREEMPT_RT remains.
> > 
> > Independent of the question of what use_softirq should default to, could
> > we
> > test RT with latest RCU code now to check if the deadlock goes away?  That
> > way, maybe we can find any issues in current RCU that cause scheduler
> > deadlocks in the situation you pointed. The reason I am asking is because
> > recently additional commits [1] try to prevent deadlock and it'd be nice
> > to
> > ensure that other conditions are not lingering (I don't think they are but
> > it'd be nice to be sure).
> > 
> > I am happy to do such testing myself if you want, however what does it
> > take
> > to apply the RT patchset to the latest mainline? Is it an achievable feat?
> 
> I did run such a test (cherry picking all RCU patches that aren't already in
> RT, plus your RFC patch to rcu_read_unlock_special, rather than applying RT
> to current mainline) with rcutorture plus a looping kernel build overnight,
> and didn't see any splats with or without use_softirq.

Cool, that's good to know you didn't see splats!

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-21 23:35   ` Paul E. McKenney
@ 2019-08-23  1:21     ` Scott Wood
  0 siblings, 0 replies; 43+ messages in thread
From: Scott Wood @ 2019-08-23  1:21 UTC (permalink / raw)
  To: paulmck
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Joel Fernandes, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Wed, 2019-08-21 at 16:35 -0700, Paul E. McKenney wrote:
> On Wed, Aug 21, 2019 at 06:19:05PM -0500, Scott Wood wrote:
> > Without this, rcu_note_context_switch() will complain if an RCU read
> > lock is held when migrate_enable() calls stop_one_cpu().
> > 
> > Signed-off-by: Scott Wood <swood@redhat.com>
> 
> I have to ask...  Both sleeping_lock_inc() and sleeping_lock_dec() are
> no-ops if not CONFIG_PREEMPT_RT_BASE?

Yes.

-Scott



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-22 15:27       ` Paul E. McKenney
@ 2019-08-23  1:50         ` Joel Fernandes
  2019-08-23  2:11           ` Paul E. McKenney
  0 siblings, 1 reply; 43+ messages in thread
From: Joel Fernandes @ 2019-08-23  1:50 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Scott Wood, Sebastian Andrzej Siewior, linux-rt-users,
	linux-kernel, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Thu, Aug 22, 2019 at 08:27:06AM -0700, Paul E. McKenney wrote:
> On Thu, Aug 22, 2019 at 09:39:55AM -0400, Joel Fernandes wrote:
> > On Wed, Aug 21, 2019 at 04:33:58PM -0700, Paul E. McKenney wrote:
> > > On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:
> > > > A plain local_bh_disable() is documented as creating an RCU critical
> > > > section, and (at least) rcutorture expects this to be the case.  However,
> > > > in_softirq() doesn't block a grace period on PREEMPT_RT, since RCU checks
> > > > preempt_count() directly.  Even if RCU were changed to check
> > > > in_softirq(), that wouldn't allow blocked BH disablers to be boosted.
> > > > 
> > > > Fix this by calling rcu_read_lock() from local_bh_disable(), and update
> > > > rcu_read_lock_bh_held() accordingly.
> > > 
> > > Cool!  Some questions and comments below.
> > > 
> > > 							Thanx, Paul
> > > 
> > > > Signed-off-by: Scott Wood <swood@redhat.com>
> > > > ---
> > > > Another question is whether non-raw spinlocks are intended to create an
> > > > RCU read-side critical section due to implicit preempt disable.
> > > 
> > > Hmmm...  Did non-raw spinlocks act like rcu_read_lock_sched()
> > > and rcu_read_unlock_sched() pairs in -rt prior to the RCU flavor
> > > consolidation?  If not, I don't see why they should do so after that
> > > consolidation in -rt.
> > 
> > May be I am missing something, but I didn't see the connection between
> > consolidation and this patch. AFAICS, this patch is so that
> > rcu_read_lock_bh_held() works at all on -rt. Did I badly miss something?
> 
> I was interpreting Scott's question (which would be excluded from the
> git commit log) as relating to a possible follow-on patch.
> 
> The question is "how special can non-raw spinlocks be in -rt?".  From what
> I can see, they have been treated as sleeplocks from an RCU viewpoint,
> so maybe that should continue to be the case.  It does deserve some
> thought because in mainline a non-raw spinlock really would block a
> post-consolidation RCU grace period, even in PREEMPT kernels.
> 
> But then again, you cannot preempt a non-raw spinlock in mainline but
> you can in -rt, so extending that exception to RCU is not unreasonable.
> 
> Either way, we do need to make a definite decision and document it.
> If I were forced to make a decision right now, I would follow the old
> behavior, so that only raw spinlocks were guaranteed to block RCU grace
> periods.  But I am not being forced, so let's actually discuss and make
> a conscious decision.  ;-)

I think non-raw spinlocks on -rt should at least do rcu_read_lock() so that
any driver or kernel code that depends on this behavior and works on non-rt
also works on -rt. It also removes the chance a kernel developer may miss
documentation and accidentally forget that their code may break on -rt. I am
curious to see how much this design pattern appears in the kernel
(spin_lock'ed section "intended" as an RCU-reader by code sequences).

Logically speaking, to me anything that disables preemption on non-RT should
do rcu_read_lock() on -rt so that from RCU's perspective, things are working.
But I wonder where we would draw the line and if the bar is to need actual
examples of usage patterns to make a decision..

Any thoughts?

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-23  1:50         ` Joel Fernandes
@ 2019-08-23  2:11           ` Paul E. McKenney
  0 siblings, 0 replies; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-23  2:11 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Scott Wood, Sebastian Andrzej Siewior, linux-rt-users,
	linux-kernel, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Thu, Aug 22, 2019 at 09:50:09PM -0400, Joel Fernandes wrote:
> On Thu, Aug 22, 2019 at 08:27:06AM -0700, Paul E. McKenney wrote:
> > On Thu, Aug 22, 2019 at 09:39:55AM -0400, Joel Fernandes wrote:
> > > On Wed, Aug 21, 2019 at 04:33:58PM -0700, Paul E. McKenney wrote:
> > > > On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:
> > > > > A plain local_bh_disable() is documented as creating an RCU critical
> > > > > section, and (at least) rcutorture expects this to be the case.  However,
> > > > > in_softirq() doesn't block a grace period on PREEMPT_RT, since RCU checks
> > > > > preempt_count() directly.  Even if RCU were changed to check
> > > > > in_softirq(), that wouldn't allow blocked BH disablers to be boosted.
> > > > > 
> > > > > Fix this by calling rcu_read_lock() from local_bh_disable(), and update
> > > > > rcu_read_lock_bh_held() accordingly.
> > > > 
> > > > Cool!  Some questions and comments below.
> > > > 
> > > > 							Thanx, Paul
> > > > 
> > > > > Signed-off-by: Scott Wood <swood@redhat.com>
> > > > > ---
> > > > > Another question is whether non-raw spinlocks are intended to create an
> > > > > RCU read-side critical section due to implicit preempt disable.
> > > > 
> > > > Hmmm...  Did non-raw spinlocks act like rcu_read_lock_sched()
> > > > and rcu_read_unlock_sched() pairs in -rt prior to the RCU flavor
> > > > consolidation?  If not, I don't see why they should do so after that
> > > > consolidation in -rt.
> > > 
> > > May be I am missing something, but I didn't see the connection between
> > > consolidation and this patch. AFAICS, this patch is so that
> > > rcu_read_lock_bh_held() works at all on -rt. Did I badly miss something?
> > 
> > I was interpreting Scott's question (which would be excluded from the
> > git commit log) as relating to a possible follow-on patch.
> > 
> > The question is "how special can non-raw spinlocks be in -rt?".  From what
> > I can see, they have been treated as sleeplocks from an RCU viewpoint,
> > so maybe that should continue to be the case.  It does deserve some
> > thought because in mainline a non-raw spinlock really would block a
> > post-consolidation RCU grace period, even in PREEMPT kernels.
> > 
> > But then again, you cannot preempt a non-raw spinlock in mainline but
> > you can in -rt, so extending that exception to RCU is not unreasonable.
> > 
> > Either way, we do need to make a definite decision and document it.
> > If I were forced to make a decision right now, I would follow the old
> > behavior, so that only raw spinlocks were guaranteed to block RCU grace
> > periods.  But I am not being forced, so let's actually discuss and make
> > a conscious decision.  ;-)
> 
> I think non-raw spinlocks on -rt should at least do rcu_read_lock() so that
> any driver or kernel code that depends on this behavior and works on non-rt
> also works on -rt. It also removes the chance a kernel developer may miss
> documentation and accidentally forget that their code may break on -rt. I am
> curious to see how much this design pattern appears in the kernel
> (spin_lock'ed section "intended" as an RCU-reader by code sequences).
> 
> Logically speaking, to me anything that disables preemption on non-RT should
> do rcu_read_lock() on -rt so that from RCU's perspective, things are working.
> But I wonder where we would draw the line and if the bar is to need actual
> examples of usage patterns to make a decision..
> 
> Any thoughts?

Yes.  Let's listen to what the -rt guys have to say.  After all, they
are the ones who would be dealing with any differences in semantics.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-21 23:33   ` Paul E. McKenney
  2019-08-22 13:39     ` Joel Fernandes
@ 2019-08-23  2:36     ` Scott Wood
  2019-08-23  2:54       ` Paul E. McKenney
  1 sibling, 1 reply; 43+ messages in thread
From: Scott Wood @ 2019-08-23  2:36 UTC (permalink / raw)
  To: paulmck
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Joel Fernandes, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Wed, 2019-08-21 at 16:33 -0700, Paul E. McKenney wrote:
> On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:
> > diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> > index 388ace315f32..d6e357378732 100644
> > --- a/include/linux/rcupdate.h
> > +++ b/include/linux/rcupdate.h
> > @@ -615,10 +615,12 @@ static inline void rcu_read_unlock(void)
> >  static inline void rcu_read_lock_bh(void)
> >  {
> >  	local_bh_disable();
> > +#ifndef CONFIG_PREEMPT_RT_FULL
> >  	__acquire(RCU_BH);
> >  	rcu_lock_acquire(&rcu_bh_lock_map);
> >  	RCU_LOCKDEP_WARN(!rcu_is_watching(),
> >  			 "rcu_read_lock_bh() used illegally while idle");
> > +#endif
> 
> Any chance of this using "if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))"?
> We should be OK providing a do-nothing __maybe_unused rcu_bh_lock_map
> for lockdep-enabled -rt kernels, right?

OK.

> > @@ -185,8 +189,10 @@ void __local_bh_enable_ip(unsigned long ip,
> > > > unsigned int cnt)
> >  	WARN_ON_ONCE(count < 0);
> >  	local_irq_enable();
> >  
> > -	if (!in_atomic())
> > +	if (!in_atomic()) {
> > +		rcu_read_unlock();
> >  		local_unlock(bh_lock);
> > +	}
> 
> The return from in_atomic() is guaranteed to be the same at
> local_bh_enable() time as was at the call to the corresponding
> local_bh_disable()?

That's an existing requirement on RT (which rcutorture currently violates)
due to bh_lock.

> I could have sworn that I ran afoul of this last year.  Might these
> added rcu_read_lock() and rcu_read_unlock() calls need to check for
> CONFIG_PREEMPT_RT_FULL?

This code is already under a PREEMPT_RT_FULL ifdef.

-Scott



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-23  2:36     ` Scott Wood
@ 2019-08-23  2:54       ` Paul E. McKenney
  0 siblings, 0 replies; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-23  2:54 UTC (permalink / raw)
  To: Scott Wood
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Joel Fernandes, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Thu, Aug 22, 2019 at 09:36:21PM -0500, Scott Wood wrote:
> On Wed, 2019-08-21 at 16:33 -0700, Paul E. McKenney wrote:
> > On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:
> > > diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> > > index 388ace315f32..d6e357378732 100644
> > > --- a/include/linux/rcupdate.h
> > > +++ b/include/linux/rcupdate.h
> > > @@ -615,10 +615,12 @@ static inline void rcu_read_unlock(void)
> > >  static inline void rcu_read_lock_bh(void)
> > >  {
> > >  	local_bh_disable();
> > > +#ifndef CONFIG_PREEMPT_RT_FULL
> > >  	__acquire(RCU_BH);
> > >  	rcu_lock_acquire(&rcu_bh_lock_map);
> > >  	RCU_LOCKDEP_WARN(!rcu_is_watching(),
> > >  			 "rcu_read_lock_bh() used illegally while idle");
> > > +#endif
> > 
> > Any chance of this using "if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))"?
> > We should be OK providing a do-nothing __maybe_unused rcu_bh_lock_map
> > for lockdep-enabled -rt kernels, right?
> 
> OK.
> 
> > > @@ -185,8 +189,10 @@ void __local_bh_enable_ip(unsigned long ip,
> > > > > unsigned int cnt)
> > >  	WARN_ON_ONCE(count < 0);
> > >  	local_irq_enable();
> > >  
> > > -	if (!in_atomic())
> > > +	if (!in_atomic()) {
> > > +		rcu_read_unlock();
> > >  		local_unlock(bh_lock);
> > > +	}
> > 
> > The return from in_atomic() is guaranteed to be the same at
> > local_bh_enable() time as was at the call to the corresponding
> > local_bh_disable()?
> 
> That's an existing requirement on RT (which rcutorture currently violates)
> due to bh_lock.
> 
> > I could have sworn that I ran afoul of this last year.  Might these
> > added rcu_read_lock() and rcu_read_unlock() calls need to check for
> > CONFIG_PREEMPT_RT_FULL?
> 
> This code is already under a PREEMPT_RT_FULL ifdef.

Good enough, then!

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-22 13:39     ` Joel Fernandes
  2019-08-22 15:27       ` Paul E. McKenney
@ 2019-08-23  3:23       ` Scott Wood
  2019-08-23 12:30         ` Paul E. McKenney
  2019-08-23 16:17         ` Sebastian Andrzej Siewior
  1 sibling, 2 replies; 43+ messages in thread
From: Scott Wood @ 2019-08-23  3:23 UTC (permalink / raw)
  To: Joel Fernandes, Paul E. McKenney
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On Thu, 2019-08-22 at 09:39 -0400, Joel Fernandes wrote:
> On Wed, Aug 21, 2019 at 04:33:58PM -0700, Paul E. McKenney wrote:
> > On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:
> > > Signed-off-by: Scott Wood <swood@redhat.com>
> > > ---
> > > Another question is whether non-raw spinlocks are intended to create
> > > an
> > > RCU read-side critical section due to implicit preempt disable.
> > 
> > Hmmm...  Did non-raw spinlocks act like rcu_read_lock_sched()
> > and rcu_read_unlock_sched() pairs in -rt prior to the RCU flavor
> > consolidation?  If not, I don't see why they should do so after that
> > consolidation in -rt.
> 
> May be I am missing something, but I didn't see the connection between
> consolidation and this patch. AFAICS, this patch is so that
> rcu_read_lock_bh_held() works at all on -rt. Did I badly miss something?

Before consolidation, RT mapped rcu_read_lock_bh_held() to
rcu_read_lock_bh() and called rcu_read_lock() from rcu_read_lock_bh().  This
somehow got lost when rebasing on top of 5.0.

> > >  include/linux/rcupdate.h |  4 ++++
> > >  kernel/rcu/update.c      |  4 ++++
> > >  kernel/softirq.c         | 12 +++++++++---
> > >  3 files changed, 17 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> > > index 388ace315f32..d6e357378732 100644
> > > --- a/include/linux/rcupdate.h
> > > +++ b/include/linux/rcupdate.h
> > > @@ -615,10 +615,12 @@ static inline void rcu_read_unlock(void)
> > >  static inline void rcu_read_lock_bh(void)
> > >  {
> > >  	local_bh_disable();
> > > +#ifndef CONFIG_PREEMPT_RT_FULL
> > >  	__acquire(RCU_BH);
> > >  	rcu_lock_acquire(&rcu_bh_lock_map);
> > >  	RCU_LOCKDEP_WARN(!rcu_is_watching(),
> > >  			 "rcu_read_lock_bh() used illegally while idle");
> > > +#endif
> > 
> > Any chance of this using "if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))"?
> > We should be OK providing a do-nothing __maybe_unused rcu_bh_lock_map
> > for lockdep-enabled -rt kernels, right?
> 
> Since this function is small, I prefer if -rt defines their own
> rcu_read_lock_bh() which just does the local_bh_disable(). That would be
> way
> cleaner IMO. IIRC, -rt does similar things for spinlocks, but it has been
> sometime since I look at the -rt patchset.

I'll do it whichever way you all decide, though I'm not sure I agree about
it being cleaner (especially while RT is still out-of-tree and a change to
the non-RT version that fails to trigger a merge conflict is a concern).

What about moving everything but the local_bh_disable into a separate
function called from rcu_read_lock_bh, and making that a no-op on RT?

> > >  
> > > diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> > > index 016c66a98292..a9cdf3d562bc 100644
> > > --- a/kernel/rcu/update.c
> > > +++ b/kernel/rcu/update.c
> > > @@ -296,7 +296,11 @@ int rcu_read_lock_bh_held(void)
> > >  		return 0;
> > >  	if (!rcu_lockdep_current_cpu_online())
> > >  		return 0;
> > > +#ifdef CONFIG_PREEMPT_RT_FULL
> > > +	return lock_is_held(&rcu_lock_map) || irqs_disabled();
> > > +#else
> > >  	return in_softirq() || irqs_disabled();
> > > +#endif
> > 
> > And globally.
> 
> And could be untangled a bit as well:
> 
> if (irqs_disabled())
> 	return 1;
> 
> if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL))
> 	return lock_is_held(&rcu_lock_map);
> 
> return in_softirq();

OK.

-Scott



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-23  3:23       ` Scott Wood
@ 2019-08-23 12:30         ` Paul E. McKenney
  2019-08-23 16:17         ` Sebastian Andrzej Siewior
  1 sibling, 0 replies; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-23 12:30 UTC (permalink / raw)
  To: Scott Wood
  Cc: Joel Fernandes, Sebastian Andrzej Siewior, linux-rt-users,
	linux-kernel, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Thu, Aug 22, 2019 at 10:23:23PM -0500, Scott Wood wrote:
> On Thu, 2019-08-22 at 09:39 -0400, Joel Fernandes wrote:
> > On Wed, Aug 21, 2019 at 04:33:58PM -0700, Paul E. McKenney wrote:
> > > On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:

[ . . . ]

> > > >  include/linux/rcupdate.h |  4 ++++
> > > >  kernel/rcu/update.c      |  4 ++++
> > > >  kernel/softirq.c         | 12 +++++++++---
> > > >  3 files changed, 17 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> > > > index 388ace315f32..d6e357378732 100644
> > > > --- a/include/linux/rcupdate.h
> > > > +++ b/include/linux/rcupdate.h
> > > > @@ -615,10 +615,12 @@ static inline void rcu_read_unlock(void)
> > > >  static inline void rcu_read_lock_bh(void)
> > > >  {
> > > >  	local_bh_disable();
> > > > +#ifndef CONFIG_PREEMPT_RT_FULL
> > > >  	__acquire(RCU_BH);
> > > >  	rcu_lock_acquire(&rcu_bh_lock_map);
> > > >  	RCU_LOCKDEP_WARN(!rcu_is_watching(),
> > > >  			 "rcu_read_lock_bh() used illegally while idle");
> > > > +#endif
> > > 
> > > Any chance of this using "if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL))"?
> > > We should be OK providing a do-nothing __maybe_unused rcu_bh_lock_map
> > > for lockdep-enabled -rt kernels, right?
> > 
> > Since this function is small, I prefer if -rt defines their own
> > rcu_read_lock_bh() which just does the local_bh_disable(). That would be
> > way
> > cleaner IMO. IIRC, -rt does similar things for spinlocks, but it has been
> > sometime since I look at the -rt patchset.
> 
> I'll do it whichever way you all decide, though I'm not sure I agree about
> it being cleaner (especially while RT is still out-of-tree and a change to
> the non-RT version that fails to trigger a merge conflict is a concern).
> 
> What about moving everything but the local_bh_disable into a separate
> function called from rcu_read_lock_bh, and making that a no-op on RT?

That makes a lot of sense to me!

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-23  3:23       ` Scott Wood
  2019-08-23 12:30         ` Paul E. McKenney
@ 2019-08-23 16:17         ` Sebastian Andrzej Siewior
  2019-08-23 19:46           ` Scott Wood
  1 sibling, 1 reply; 43+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-08-23 16:17 UTC (permalink / raw)
  To: Scott Wood
  Cc: Joel Fernandes, Paul E. McKenney, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On 2019-08-22 22:23:23 [-0500], Scott Wood wrote:
> On Thu, 2019-08-22 at 09:39 -0400, Joel Fernandes wrote:
> > On Wed, Aug 21, 2019 at 04:33:58PM -0700, Paul E. McKenney wrote:
> > > On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:
> > > > Signed-off-by: Scott Wood <swood@redhat.com>
> > > > ---
> > > > Another question is whether non-raw spinlocks are intended to create
> > > > an
> > > > RCU read-side critical section due to implicit preempt disable.
> > > 
> > > Hmmm...  Did non-raw spinlocks act like rcu_read_lock_sched()
> > > and rcu_read_unlock_sched() pairs in -rt prior to the RCU flavor
> > > consolidation?  If not, I don't see why they should do so after that
> > > consolidation in -rt.
> > 
> > May be I am missing something, but I didn't see the connection between
> > consolidation and this patch. AFAICS, this patch is so that
> > rcu_read_lock_bh_held() works at all on -rt. Did I badly miss something?
> 
> Before consolidation, RT mapped rcu_read_lock_bh_held() to
> rcu_read_lock_bh() and called rcu_read_lock() from rcu_read_lock_bh().  This
> somehow got lost when rebasing on top of 5.0.

so now rcu_read_lock_bh_held() is untouched and in_softirq() reports 1.
So the problem is that we never hold RCU but report 1 like we do?

Sebastian

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-21 23:19 ` [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep Scott Wood
  2019-08-21 23:35   ` Paul E. McKenney
@ 2019-08-23 16:20   ` Sebastian Andrzej Siewior
  2019-08-23 19:28     ` Scott Wood
  1 sibling, 1 reply; 43+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-08-23 16:20 UTC (permalink / raw)
  To: Scott Wood
  Cc: linux-rt-users, linux-kernel, Paul E . McKenney, Joel Fernandes,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On 2019-08-21 18:19:05 [-0500], Scott Wood wrote:
> Without this, rcu_note_context_switch() will complain if an RCU read
> lock is held when migrate_enable() calls stop_one_cpu().
> 
> Signed-off-by: Scott Wood <swood@redhat.com>
> ---
> v2: Added comment.
> 
> If my migrate disable changes aren't taken, then pin_current_cpu()
> will also need to use sleeping_lock_inc() because calling
> __read_rt_lock() bypasses the usual place it's done.
> 
>  include/linux/sched.h    | 4 ++--
>  kernel/rcu/tree_plugin.h | 2 +-
>  kernel/sched/core.c      | 8 ++++++++
>  3 files changed, 11 insertions(+), 3 deletions(-)
> 
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -7405,7 +7405,15 @@ void migrate_enable(void)
>  			unpin_current_cpu();
>  			preempt_lazy_enable();
>  			preempt_enable();
> +
> +			/*
> +			 * sleeping_lock_inc suppresses a debug check for
> +			 * sleeping inside an RCU read side critical section
> +			 */
> +			sleeping_lock_inc();
>  			stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
> +			sleeping_lock_dec();

this looks like an ugly hack. This sleeping_lock_inc() is used where we
actually hold a sleeping lock and schedule() which is okay. But this
would mean we hold a RCU lock and schedule() anyway. Is that okay?

> +
>  			return;
>  		}
>  	}

Sebastian

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT
  2019-08-21 23:40   ` Paul E. McKenney
@ 2019-08-23 16:32     ` Sebastian Andrzej Siewior
  0 siblings, 0 replies; 43+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-08-23 16:32 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Scott Wood, linux-rt-users, linux-kernel, Joel Fernandes,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On 2019-08-21 16:40:18 [-0700], Paul E. McKenney wrote:
> Save a couple of lines?
> 
> static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
> 
> And if I understand your point above, the module_param() might be
> able to be the same either way given the new RCU.  But if not,
> at least we get rid of the #else.

I *think* we wanted this. And while I took the RCU patches for v5.2 I
forgot to enable it by default…

> 							Thanx, Paul

Sebastian

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-23 16:20   ` Sebastian Andrzej Siewior
@ 2019-08-23 19:28     ` Scott Wood
  2019-08-24  3:10       ` Joel Fernandes
  0 siblings, 1 reply; 43+ messages in thread
From: Scott Wood @ 2019-08-23 19:28 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-rt-users, linux-kernel, Paul E . McKenney, Joel Fernandes,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On Fri, 2019-08-23 at 18:20 +0200, Sebastian Andrzej Siewior wrote:
> On 2019-08-21 18:19:05 [-0500], Scott Wood wrote:
> > Without this, rcu_note_context_switch() will complain if an RCU read
> > lock is held when migrate_enable() calls stop_one_cpu().
> > 
> > Signed-off-by: Scott Wood <swood@redhat.com>
> > ---
> > v2: Added comment.
> > 
> > If my migrate disable changes aren't taken, then pin_current_cpu()
> > will also need to use sleeping_lock_inc() because calling
> > __read_rt_lock() bypasses the usual place it's done.
> > 
> >  include/linux/sched.h    | 4 ++--
> >  kernel/rcu/tree_plugin.h | 2 +-
> >  kernel/sched/core.c      | 8 ++++++++
> >  3 files changed, 11 insertions(+), 3 deletions(-)
> > 
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -7405,7 +7405,15 @@ void migrate_enable(void)
> >  			unpin_current_cpu();
> >  			preempt_lazy_enable();
> >  			preempt_enable();
> > +
> > +			/*
> > +			 * sleeping_lock_inc suppresses a debug check for
> > +			 * sleeping inside an RCU read side critical section
> > +			 */
> > +			sleeping_lock_inc();
> >  			stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
> > +			sleeping_lock_dec();
> 
> this looks like an ugly hack. This sleeping_lock_inc() is used where we
> actually hold a sleeping lock and schedule() which is okay. But this
> would mean we hold a RCU lock and schedule() anyway. Is that okay?

Perhaps the name should be changed, but the concept is the same -- RT-
specific sleeping which should be considered involuntary for the purpose of
debug checks.  Voluntary sleeping is not allowed in an RCU critical section
because it will break the critical section on certain flavors of RCU, but
that doesn't apply to the flavor used on RT.  Sleeping for a long time in an
RCU critical section would also be a bad thing, but that also doesn't apply
here.

-Scott



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-23 16:17         ` Sebastian Andrzej Siewior
@ 2019-08-23 19:46           ` Scott Wood
  2019-08-26 15:59             ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 43+ messages in thread
From: Scott Wood @ 2019-08-23 19:46 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Joel Fernandes, Paul E. McKenney, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On Fri, 2019-08-23 at 18:17 +0200, Sebastian Andrzej Siewior wrote:
> On 2019-08-22 22:23:23 [-0500], Scott Wood wrote:
> > On Thu, 2019-08-22 at 09:39 -0400, Joel Fernandes wrote:
> > > On Wed, Aug 21, 2019 at 04:33:58PM -0700, Paul E. McKenney wrote:
> > > > On Wed, Aug 21, 2019 at 06:19:04PM -0500, Scott Wood wrote:
> > > > > Signed-off-by: Scott Wood <swood@redhat.com>
> > > > > ---
> > > > > Another question is whether non-raw spinlocks are intended to
> > > > > create
> > > > > an
> > > > > RCU read-side critical section due to implicit preempt disable.
> > > > 
> > > > Hmmm...  Did non-raw spinlocks act like rcu_read_lock_sched()
> > > > and rcu_read_unlock_sched() pairs in -rt prior to the RCU flavor
> > > > consolidation?  If not, I don't see why they should do so after that
> > > > consolidation in -rt.
> > > 
> > > May be I am missing something, but I didn't see the connection between
> > > consolidation and this patch. AFAICS, this patch is so that
> > > rcu_read_lock_bh_held() works at all on -rt. Did I badly miss
> > > something?
> > 
> > Before consolidation, RT mapped rcu_read_lock_bh_held() to
> > rcu_read_lock_bh() and called rcu_read_lock() from
> > rcu_read_lock_bh().  This
> > somehow got lost when rebasing on top of 5.0.
> 
> so now rcu_read_lock_bh_held() is untouched and in_softirq() reports 1.
> So the problem is that we never hold RCU but report 1 like we do?

Yes.

-Scott



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-23 19:28     ` Scott Wood
@ 2019-08-24  3:10       ` Joel Fernandes
  2019-08-26 15:25         ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 43+ messages in thread
From: Joel Fernandes @ 2019-08-24  3:10 UTC (permalink / raw)
  To: Scott Wood
  Cc: Sebastian Andrzej Siewior, linux-rt-users, linux-kernel,
	Paul E . McKenney, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Fri, Aug 23, 2019 at 02:28:46PM -0500, Scott Wood wrote:
> On Fri, 2019-08-23 at 18:20 +0200, Sebastian Andrzej Siewior wrote:
> > On 2019-08-21 18:19:05 [-0500], Scott Wood wrote:
> > > Without this, rcu_note_context_switch() will complain if an RCU read
> > > lock is held when migrate_enable() calls stop_one_cpu().
> > > 
> > > Signed-off-by: Scott Wood <swood@redhat.com>
> > > ---
> > > v2: Added comment.
> > > 
> > > If my migrate disable changes aren't taken, then pin_current_cpu()
> > > will also need to use sleeping_lock_inc() because calling
> > > __read_rt_lock() bypasses the usual place it's done.
> > > 
> > >  include/linux/sched.h    | 4 ++--
> > >  kernel/rcu/tree_plugin.h | 2 +-
> > >  kernel/sched/core.c      | 8 ++++++++
> > >  3 files changed, 11 insertions(+), 3 deletions(-)
> > > 
> > > --- a/kernel/sched/core.c
> > > +++ b/kernel/sched/core.c
> > > @@ -7405,7 +7405,15 @@ void migrate_enable(void)
> > >  			unpin_current_cpu();
> > >  			preempt_lazy_enable();
> > >  			preempt_enable();
> > > +
> > > +			/*
> > > +			 * sleeping_lock_inc suppresses a debug check for
> > > +			 * sleeping inside an RCU read side critical section
> > > +			 */
> > > +			sleeping_lock_inc();
> > >  			stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
> > > +			sleeping_lock_dec();
> > 
> > this looks like an ugly hack. This sleeping_lock_inc() is used where we
> > actually hold a sleeping lock and schedule() which is okay. But this
> > would mean we hold a RCU lock and schedule() anyway. Is that okay?
> 
> Perhaps the name should be changed, but the concept is the same -- RT-
> specific sleeping which should be considered involuntary for the purpose of
> debug checks.  Voluntary sleeping is not allowed in an RCU critical section
> because it will break the critical section on certain flavors of RCU, but
> that doesn't apply to the flavor used on RT.  Sleeping for a long time in an
> RCU critical section would also be a bad thing, but that also doesn't apply
> here.

I think the name should definitely be changed. At best, it is super confusing to
call it "sleeping_lock" for this scenario. In fact here, you are not even
blocking on a lock.

Maybe "sleeping_allowed" or some such.

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-24  3:10       ` Joel Fernandes
@ 2019-08-26 15:25         ` Sebastian Andrzej Siewior
  2019-08-26 16:29           ` Paul E. McKenney
  0 siblings, 1 reply; 43+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-08-26 15:25 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Scott Wood, linux-rt-users, linux-kernel, Paul E . McKenney,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On 2019-08-23 23:10:14 [-0400], Joel Fernandes wrote:
> On Fri, Aug 23, 2019 at 02:28:46PM -0500, Scott Wood wrote:
> > On Fri, 2019-08-23 at 18:20 +0200, Sebastian Andrzej Siewior wrote:
> > > 
> > > this looks like an ugly hack. This sleeping_lock_inc() is used where we
> > > actually hold a sleeping lock and schedule() which is okay. But this
> > > would mean we hold a RCU lock and schedule() anyway. Is that okay?
> > 
> > Perhaps the name should be changed, but the concept is the same -- RT-
> > specific sleeping which should be considered involuntary for the purpose of
> > debug checks.  Voluntary sleeping is not allowed in an RCU critical section
> > because it will break the critical section on certain flavors of RCU, but
> > that doesn't apply to the flavor used on RT.  Sleeping for a long time in an
> > RCU critical section would also be a bad thing, but that also doesn't apply
> > here.
> 
> I think the name should definitely be changed. At best, it is super confusing to
> call it "sleeping_lock" for this scenario. In fact here, you are not even
> blocking on a lock.
> 
> Maybe "sleeping_allowed" or some such.

The mechanism that is used here may change in future. I just wanted to
make sure that from RCU's side it is okay to schedule here.

> thanks,
> 
>  - Joel

Sebastian

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-23 19:46           ` Scott Wood
@ 2019-08-26 15:59             ` Sebastian Andrzej Siewior
  2019-08-26 23:21               ` Scott Wood
  0 siblings, 1 reply; 43+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-08-26 15:59 UTC (permalink / raw)
  To: Scott Wood
  Cc: Joel Fernandes, Paul E. McKenney, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On 2019-08-23 14:46:39 [-0500], Scott Wood wrote:
> > > Before consolidation, RT mapped rcu_read_lock_bh_held() to
> > > rcu_read_lock_bh() and called rcu_read_lock() from
> > > rcu_read_lock_bh().  This
> > > somehow got lost when rebasing on top of 5.0.
> > 
> > so now rcu_read_lock_bh_held() is untouched and in_softirq() reports 1.
> > So the problem is that we never hold RCU but report 1 like we do?
> 
> Yes.

I understand the part where "rcu_read_lock() becomes part of
local_bh_disable()". But why do you modify rcu_read_lock_bh_held() and
rcu_read_lock_bh()? Couldn't they remain as-is?

> -Scott

Sebastian

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-26 15:25         ` Sebastian Andrzej Siewior
@ 2019-08-26 16:29           ` Paul E. McKenney
  2019-08-26 17:49             ` Scott Wood
  2019-08-27  9:23             ` Sebastian Andrzej Siewior
  0 siblings, 2 replies; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-26 16:29 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Joel Fernandes, Scott Wood, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On Mon, Aug 26, 2019 at 05:25:23PM +0200, Sebastian Andrzej Siewior wrote:
> On 2019-08-23 23:10:14 [-0400], Joel Fernandes wrote:
> > On Fri, Aug 23, 2019 at 02:28:46PM -0500, Scott Wood wrote:
> > > On Fri, 2019-08-23 at 18:20 +0200, Sebastian Andrzej Siewior wrote:
> > > > 
> > > > this looks like an ugly hack. This sleeping_lock_inc() is used where we
> > > > actually hold a sleeping lock and schedule() which is okay. But this
> > > > would mean we hold a RCU lock and schedule() anyway. Is that okay?
> > > 
> > > Perhaps the name should be changed, but the concept is the same -- RT-
> > > specific sleeping which should be considered involuntary for the purpose of
> > > debug checks.  Voluntary sleeping is not allowed in an RCU critical section
> > > because it will break the critical section on certain flavors of RCU, but
> > > that doesn't apply to the flavor used on RT.  Sleeping for a long time in an
> > > RCU critical section would also be a bad thing, but that also doesn't apply
> > > here.
> > 
> > I think the name should definitely be changed. At best, it is super confusing to
> > call it "sleeping_lock" for this scenario. In fact here, you are not even
> > blocking on a lock.
> > 
> > Maybe "sleeping_allowed" or some such.
> 
> The mechanism that is used here may change in future. I just wanted to
> make sure that from RCU's side it is okay to schedule here.

Good point.

The effect from RCU's viewpoint will be to split any non-rcu_read_lock()
RCU read-side critical section at this point.  This alrady happens in a
few places, for example, rcu_note_context_switch() constitutes an RCU
quiescent state despite being invoked with interrupts disabled (as is
required!).  The __schedule() function just needs to understand (and does
understand) that the RCU read-side critical section that would otherwise
span that call to rcu_node_context_switch() is split in two by that call.

However, if this was instead an rcu_read_lock() critical section within
a PREEMPT=y kernel, then if a schedule() occured within stop_one_task(),
RCU would consider that critical section to be preempted.  This means
that any RCU grace period that is blocked by this RCU read-side critical
section would remain blocked until stop_one_cpu() resumed, returned,
and so on until the matching rcu_read_unlock() was reached.  In other
words, RCU would consider that RCU read-side critical section to span
the call to stop_one_cpu() even if stop_one_cpu() invoked schedule().

On the other hand, within a PREEMPT=n kernel, the call to schedule()
would split even an rcu_read_lock() critical section.  Which is why I
asked earlier if sleeping_lock_inc() and sleeping_lock_dec() are no-ops
in !PREEMPT_RT_BASE kernels.  We would after all want the usual lockdep
complaints in that case.

Does that help, or am I missing the point?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-26 16:29           ` Paul E. McKenney
@ 2019-08-26 17:49             ` Scott Wood
  2019-08-26 18:12               ` Paul E. McKenney
  2019-08-27  9:23             ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 43+ messages in thread
From: Scott Wood @ 2019-08-26 17:49 UTC (permalink / raw)
  To: paulmck, Sebastian Andrzej Siewior
  Cc: Joel Fernandes, linux-rt-users, linux-kernel, Thomas Gleixner,
	Peter Zijlstra, Juri Lelli, Clark Williams

On Mon, 2019-08-26 at 09:29 -0700, Paul E. McKenney wrote:
> On Mon, Aug 26, 2019 at 05:25:23PM +0200, Sebastian Andrzej Siewior wrote:
> > On 2019-08-23 23:10:14 [-0400], Joel Fernandes wrote:
> > > On Fri, Aug 23, 2019 at 02:28:46PM -0500, Scott Wood wrote:
> > > > On Fri, 2019-08-23 at 18:20 +0200, Sebastian Andrzej Siewior wrote:
> > > > > this looks like an ugly hack. This sleeping_lock_inc() is used
> > > > > where we
> > > > > actually hold a sleeping lock and schedule() which is okay. But
> > > > > this
> > > > > would mean we hold a RCU lock and schedule() anyway. Is that okay?
> > > > 
> > > > Perhaps the name should be changed, but the concept is the same --
> > > > RT-
> > > > specific sleeping which should be considered involuntary for the
> > > > purpose of
> > > > debug checks.  Voluntary sleeping is not allowed in an RCU critical
> > > > section
> > > > because it will break the critical section on certain flavors of
> > > > RCU, but
> > > > that doesn't apply to the flavor used on RT.  Sleeping for a long
> > > > time in an
> > > > RCU critical section would also be a bad thing, but that also
> > > > doesn't apply
> > > > here.
> > > 
> > > I think the name should definitely be changed. At best, it is super
> > > confusing to
> > > call it "sleeping_lock" for this scenario. In fact here, you are not
> > > even
> > > blocking on a lock.
> > > 
> > > Maybe "sleeping_allowed" or some such.
> > 
> > The mechanism that is used here may change in future. I just wanted to
> > make sure that from RCU's side it is okay to schedule here.
> 
> Good point.
> 
> The effect from RCU's viewpoint will be to split any non-rcu_read_lock()
> RCU read-side critical section at this point.  This alrady happens in a
> few places, for example, rcu_note_context_switch() constitutes an RCU
> quiescent state despite being invoked with interrupts disabled (as is
> required!).  The __schedule() function just needs to understand (and does
> understand) that the RCU read-side critical section that would otherwise
> span that call to rcu_node_context_switch() is split in two by that call.
> 
> However, if this was instead an rcu_read_lock() critical section within
> a PREEMPT=y kernel, then if a schedule() occured within stop_one_task(),
> RCU would consider that critical section to be preempted.  This means
> that any RCU grace period that is blocked by this RCU read-side critical
> section would remain blocked until stop_one_cpu() resumed, returned,
> and so on until the matching rcu_read_unlock() was reached.  In other
> words, RCU would consider that RCU read-side critical section to span
> the call to stop_one_cpu() even if stop_one_cpu() invoked schedule().
> 
> On the other hand, within a PREEMPT=n kernel, the call to schedule()
> would split even an rcu_read_lock() critical section.  Which is why I
> asked earlier if sleeping_lock_inc() and sleeping_lock_dec() are no-ops
> in !PREEMPT_RT_BASE kernels.  We would after all want the usual lockdep
> complaints in that case.

migrate_enable() is PREEMPT_RT_BASE-specific -- this code won't execute at
all with PREEMPT=n.

-Scott



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-26 17:49             ` Scott Wood
@ 2019-08-26 18:12               ` Paul E. McKenney
  0 siblings, 0 replies; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-26 18:12 UTC (permalink / raw)
  To: Scott Wood
  Cc: Sebastian Andrzej Siewior, Joel Fernandes, linux-rt-users,
	linux-kernel, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Mon, Aug 26, 2019 at 12:49:22PM -0500, Scott Wood wrote:
> On Mon, 2019-08-26 at 09:29 -0700, Paul E. McKenney wrote:
> > On Mon, Aug 26, 2019 at 05:25:23PM +0200, Sebastian Andrzej Siewior wrote:
> > > On 2019-08-23 23:10:14 [-0400], Joel Fernandes wrote:
> > > > On Fri, Aug 23, 2019 at 02:28:46PM -0500, Scott Wood wrote:
> > > > > On Fri, 2019-08-23 at 18:20 +0200, Sebastian Andrzej Siewior wrote:
> > > > > > this looks like an ugly hack. This sleeping_lock_inc() is used
> > > > > > where we
> > > > > > actually hold a sleeping lock and schedule() which is okay. But
> > > > > > this
> > > > > > would mean we hold a RCU lock and schedule() anyway. Is that okay?
> > > > > 
> > > > > Perhaps the name should be changed, but the concept is the same --
> > > > > RT-
> > > > > specific sleeping which should be considered involuntary for the
> > > > > purpose of
> > > > > debug checks.  Voluntary sleeping is not allowed in an RCU critical
> > > > > section
> > > > > because it will break the critical section on certain flavors of
> > > > > RCU, but
> > > > > that doesn't apply to the flavor used on RT.  Sleeping for a long
> > > > > time in an
> > > > > RCU critical section would also be a bad thing, but that also
> > > > > doesn't apply
> > > > > here.
> > > > 
> > > > I think the name should definitely be changed. At best, it is super
> > > > confusing to
> > > > call it "sleeping_lock" for this scenario. In fact here, you are not
> > > > even
> > > > blocking on a lock.
> > > > 
> > > > Maybe "sleeping_allowed" or some such.
> > > 
> > > The mechanism that is used here may change in future. I just wanted to
> > > make sure that from RCU's side it is okay to schedule here.
> > 
> > Good point.
> > 
> > The effect from RCU's viewpoint will be to split any non-rcu_read_lock()
> > RCU read-side critical section at this point.  This alrady happens in a
> > few places, for example, rcu_note_context_switch() constitutes an RCU
> > quiescent state despite being invoked with interrupts disabled (as is
> > required!).  The __schedule() function just needs to understand (and does
> > understand) that the RCU read-side critical section that would otherwise
> > span that call to rcu_node_context_switch() is split in two by that call.
> > 
> > However, if this was instead an rcu_read_lock() critical section within
> > a PREEMPT=y kernel, then if a schedule() occured within stop_one_task(),
> > RCU would consider that critical section to be preempted.  This means
> > that any RCU grace period that is blocked by this RCU read-side critical
> > section would remain blocked until stop_one_cpu() resumed, returned,
> > and so on until the matching rcu_read_unlock() was reached.  In other
> > words, RCU would consider that RCU read-side critical section to span
> > the call to stop_one_cpu() even if stop_one_cpu() invoked schedule().
> > 
> > On the other hand, within a PREEMPT=n kernel, the call to schedule()
> > would split even an rcu_read_lock() critical section.  Which is why I
> > asked earlier if sleeping_lock_inc() and sleeping_lock_dec() are no-ops
> > in !PREEMPT_RT_BASE kernels.  We would after all want the usual lockdep
> > complaints in that case.
> 
> migrate_enable() is PREEMPT_RT_BASE-specific -- this code won't execute at
> all with PREEMPT=n.

Understood!  And yes, that was your answer to my question.  Me, I was
just answering Sebastian's question.  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs
  2019-08-26 15:59             ` Sebastian Andrzej Siewior
@ 2019-08-26 23:21               ` Scott Wood
  0 siblings, 0 replies; 43+ messages in thread
From: Scott Wood @ 2019-08-26 23:21 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Joel Fernandes, Paul E. McKenney, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On Mon, 2019-08-26 at 17:59 +0200, Sebastian Andrzej Siewior wrote:
> On 2019-08-23 14:46:39 [-0500], Scott Wood wrote:
> > > > Before consolidation, RT mapped rcu_read_lock_bh_held() to
> > > > rcu_read_lock_bh() and called rcu_read_lock() from
> > > > rcu_read_lock_bh().  This
> > > > somehow got lost when rebasing on top of 5.0.
> > > 
> > > so now rcu_read_lock_bh_held() is untouched and in_softirq() reports
> > > 1.
> > > So the problem is that we never hold RCU but report 1 like we do?
> > 
> > Yes.
> 
> I understand the part where "rcu_read_lock() becomes part of
> local_bh_disable()". But why do you modify rcu_read_lock_bh_held() and
> rcu_read_lock_bh()? Couldn't they remain as-is?

Yes, it looks like they can.  I recall having problems with
rcu_read_lock_bh_held() in an earlier version which is what prompted the
change, but looking back I don't see what the problem was.

-Scott



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-26 16:29           ` Paul E. McKenney
  2019-08-26 17:49             ` Scott Wood
@ 2019-08-27  9:23             ` Sebastian Andrzej Siewior
  2019-08-27 13:08               ` Joel Fernandes
  2019-08-27 15:53               ` Paul E. McKenney
  1 sibling, 2 replies; 43+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-08-27  9:23 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Joel Fernandes, Scott Wood, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On 2019-08-26 09:29:45 [-0700], Paul E. McKenney wrote:
> > The mechanism that is used here may change in future. I just wanted to
> > make sure that from RCU's side it is okay to schedule here.
> 
> Good point.
> 
> The effect from RCU's viewpoint will be to split any non-rcu_read_lock()
> RCU read-side critical section at this point.  This alrady happens in a
> few places, for example, rcu_note_context_switch() constitutes an RCU
> quiescent state despite being invoked with interrupts disabled (as is
> required!).  The __schedule() function just needs to understand (and does
> understand) that the RCU read-side critical section that would otherwise
> span that call to rcu_node_context_switch() is split in two by that call.

Okay. So I read this as invoking schedule() at this point is okay. 
Looking at this again, this could also happen on a PREEMPT=y kernel if
the kernel decides to preempt a task within a rcu_read_lock() section
and put it back later on another CPU.

> However, if this was instead an rcu_read_lock() critical section within
> a PREEMPT=y kernel, then if a schedule() occured within stop_one_task(),
> RCU would consider that critical section to be preempted.  This means
> that any RCU grace period that is blocked by this RCU read-side critical
> section would remain blocked until stop_one_cpu() resumed, returned,
> and so on until the matching rcu_read_unlock() was reached.  In other
> words, RCU would consider that RCU read-side critical section to span
> the call to stop_one_cpu() even if stop_one_cpu() invoked schedule().

Isn't that my example from above and what we do in RT? My understanding
is that this is the reason why we need BOOST on RT otherwise the RCU
critical section could remain blocked for some time.

> On the other hand, within a PREEMPT=n kernel, the call to schedule()
> would split even an rcu_read_lock() critical section.  Which is why I
> asked earlier if sleeping_lock_inc() and sleeping_lock_dec() are no-ops
> in !PREEMPT_RT_BASE kernels.  We would after all want the usual lockdep
> complaints in that case.

sleeping_lock_inc() +dec() is only RT specific. It is part of RT's
spin_lock() implementation and used by RCU (rcu_note_context_switch())
to not complain if invoked within a critical section.

> Does that help, or am I missing the point?
> 
> 							Thanx, Paul
Sebastian

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-27  9:23             ` Sebastian Andrzej Siewior
@ 2019-08-27 13:08               ` Joel Fernandes
  2019-08-27 15:58                 ` Paul E. McKenney
  2019-08-27 15:53               ` Paul E. McKenney
  1 sibling, 1 reply; 43+ messages in thread
From: Joel Fernandes @ 2019-08-27 13:08 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Paul E. McKenney, Scott Wood, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On Tue, Aug 27, 2019 at 11:23:33AM +0200, Sebastian Andrzej Siewior wrote:
[snip]
> > However, if this was instead an rcu_read_lock() critical section within
> > a PREEMPT=y kernel, then if a schedule() occured within stop_one_task(),
> > RCU would consider that critical section to be preempted.  This means
> > that any RCU grace period that is blocked by this RCU read-side critical
> > section would remain blocked until stop_one_cpu() resumed, returned,
> > and so on until the matching rcu_read_unlock() was reached.  In other
> > words, RCU would consider that RCU read-side critical section to span
> > the call to stop_one_cpu() even if stop_one_cpu() invoked schedule().
> 
> Isn't that my example from above and what we do in RT? My understanding
> is that this is the reason why we need BOOST on RT otherwise the RCU
> critical section could remain blocked for some time.

Not just for boost, it is needed to block the grace period itself on
PREEMPT=y. On PREEMPT=y, if rcu_note_context_switch() happens in middle of a
rcu_read_lock() reader section, then the task is added to a blocked list
(rcu_preempt_ctxt_queue). Then just after that, the CPU reports a QS state
(rcu_qs()) as you can see in the PREEMPT=y implementation of
rcu_note_context_switch(). Even though the CPU has reported a QS, the grace
period will not end because the preempted (or block as could be in -rt) task
is still blocking the grace period. This is fundamental to the function of
Preemptible-RCU where there is the concept of tasks blocking a grace period,
not just CPUs.

I think what Paul is trying to explain AIUI (Paul please let me know if I
missed something):

(1) Anyone calling rcu_note_context_switch() and expecting it to respect
RCU-readers that are readers as a result of interrupt disabled regions, have
incorrect expectations. So calling rcu_note_context_switch() has to be done
carefully.

(2) Disabling interrupts is "generally" implied as an RCU-sched flavor
reader. However, invoking rcu_note_context_switch() from a disabled interrupt
region is *required* for rcu_note_context_switch() to work correctly.

(3) On PREEMPT=y kernels, invoking rcu_note_context_switch() from an
interrupt disabled region does not mean that that the task will be added to a
blocked list (unless it is also in an RCU-preempt reader) so
rcu_note_context_switch() may immediately report a quiescent state and
nothing blockings the grace period.
So callers of rcu_note_context_switch() must be aware of this behavior.

(4) On PREEMPT=n, unlike PREEMPT=y, there is no blocked list handling and so
nothing will block the grace period once rcu_note_context_switch() is called.
So any path calling rcu_note_context_switch() on a PREEMPT=n kernel, in the
middle of something that is expected to be an RCU reader would be really bad
from an RCU view point.

Probably, we should add this all to documentation somewhere.

thanks!

 - Joel


> > On the other hand, within a PREEMPT=n kernel, the call to schedule()
> > would split even an rcu_read_lock() critical section.  Which is why I
> > asked earlier if sleeping_lock_inc() and sleeping_lock_dec() are no-ops
> > in !PREEMPT_RT_BASE kernels.  We would after all want the usual lockdep
> > complaints in that case.
> 
> sleeping_lock_inc() +dec() is only RT specific. It is part of RT's
> spin_lock() implementation and used by RCU (rcu_note_context_switch())
> to not complain if invoked within a critical section.
> 
> > Does that help, or am I missing the point?
> > 
> > 							Thanx, Paul
> Sebastian

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-27  9:23             ` Sebastian Andrzej Siewior
  2019-08-27 13:08               ` Joel Fernandes
@ 2019-08-27 15:53               ` Paul E. McKenney
  2019-08-28  9:27                 ` Sebastian Andrzej Siewior
  1 sibling, 1 reply; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-27 15:53 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Joel Fernandes, Scott Wood, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On Tue, Aug 27, 2019 at 11:23:33AM +0200, Sebastian Andrzej Siewior wrote:
> On 2019-08-26 09:29:45 [-0700], Paul E. McKenney wrote:
> > > The mechanism that is used here may change in future. I just wanted to
> > > make sure that from RCU's side it is okay to schedule here.
> > 
> > Good point.
> > 
> > The effect from RCU's viewpoint will be to split any non-rcu_read_lock()
> > RCU read-side critical section at this point.  This alrady happens in a
> > few places, for example, rcu_note_context_switch() constitutes an RCU
> > quiescent state despite being invoked with interrupts disabled (as is
> > required!).  The __schedule() function just needs to understand (and does
> > understand) that the RCU read-side critical section that would otherwise
> > span that call to rcu_node_context_switch() is split in two by that call.
> 
> Okay. So I read this as invoking schedule() at this point is okay. 

As long as no one is relying on a non-rcu_read_lock() RCU
read-side critical section (local_bh_disable(), preempt_disable(),
local_irq_disable(), ...) spanning this call.  But that depends on the
calling code and on other code it interacts with it, not on any specific
need on the part of RCU itself.

> Looking at this again, this could also happen on a PREEMPT=y kernel if
> the kernel decides to preempt a task within a rcu_read_lock() section
> and put it back later on another CPU.

This is an rcu_read_lock() critical section, so yes, on a PREEMPT=y
kernel, executing schedule() will cause the corresponding RCU read-side
critical section to persist, following the preempted tasks.  Give or
take lockdep complaints.

On a PREEMPT=n kernel, schedule() within an RCU read-side critical
section instead results in that critical section being split in two.
And this will also results in lockdep complaints.

> > However, if this was instead an rcu_read_lock() critical section within
> > a PREEMPT=y kernel, then if a schedule() occured within stop_one_task(),
> > RCU would consider that critical section to be preempted.  This means
> > that any RCU grace period that is blocked by this RCU read-side critical
> > section would remain blocked until stop_one_cpu() resumed, returned,
> > and so on until the matching rcu_read_unlock() was reached.  In other
> > words, RCU would consider that RCU read-side critical section to span
> > the call to stop_one_cpu() even if stop_one_cpu() invoked schedule().
> 
> Isn't that my example from above and what we do in RT? My understanding
> is that this is the reason why we need BOOST on RT otherwise the RCU
> critical section could remain blocked for some time.

At this point, I must confess that I have lost track of whose example
it is.  It was first reported in 2006, if I remember correctly.  ;-)

But yes, you are correct, the point of RCU priority boosting is to
cause tasks that have been preempted while within RCU read-side critical
sections to be scheduled so that they can reach their rcu_read_unlock()
calls, thus allowing the current grace period to end.

> > On the other hand, within a PREEMPT=n kernel, the call to schedule()
> > would split even an rcu_read_lock() critical section.  Which is why I
> > asked earlier if sleeping_lock_inc() and sleeping_lock_dec() are no-ops
> > in !PREEMPT_RT_BASE kernels.  We would after all want the usual lockdep
> > complaints in that case.
> 
> sleeping_lock_inc() +dec() is only RT specific. It is part of RT's
> spin_lock() implementation and used by RCU (rcu_note_context_switch())
> to not complain if invoked within a critical section.

Then this is being called when we have something like this, correct?

	DEFINE_SPINLOCK(mylock); // As opposed to DEFINE_RAW_SPINLOCK().

	...

	rcu_read_lock();
	do_something();
	spin_lock(&mylock); // Can block in -rt, thus needs sleeping_lock_inc()
	...
	rcu_read_unlock();

Without sleeping_lock_inc(), lockdep would complain about a voluntary
schedule within an RCU read-side critical section.  But in -rt, voluntary
schedules due to sleeping on a "spinlock" are OK.

Am I understanding this correctly?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-27 13:08               ` Joel Fernandes
@ 2019-08-27 15:58                 ` Paul E. McKenney
  2019-08-27 16:06                   ` Joel Fernandes
  0 siblings, 1 reply; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-27 15:58 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, Scott Wood, linux-rt-users,
	linux-kernel, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Tue, Aug 27, 2019 at 09:08:53AM -0400, Joel Fernandes wrote:
> On Tue, Aug 27, 2019 at 11:23:33AM +0200, Sebastian Andrzej Siewior wrote:
> [snip]
> > > However, if this was instead an rcu_read_lock() critical section within
> > > a PREEMPT=y kernel, then if a schedule() occured within stop_one_task(),
> > > RCU would consider that critical section to be preempted.  This means
> > > that any RCU grace period that is blocked by this RCU read-side critical
> > > section would remain blocked until stop_one_cpu() resumed, returned,
> > > and so on until the matching rcu_read_unlock() was reached.  In other
> > > words, RCU would consider that RCU read-side critical section to span
> > > the call to stop_one_cpu() even if stop_one_cpu() invoked schedule().
> > 
> > Isn't that my example from above and what we do in RT? My understanding
> > is that this is the reason why we need BOOST on RT otherwise the RCU
> > critical section could remain blocked for some time.
> 
> Not just for boost, it is needed to block the grace period itself on
> PREEMPT=y. On PREEMPT=y, if rcu_note_context_switch() happens in middle of a
> rcu_read_lock() reader section, then the task is added to a blocked list
> (rcu_preempt_ctxt_queue). Then just after that, the CPU reports a QS state
> (rcu_qs()) as you can see in the PREEMPT=y implementation of
> rcu_note_context_switch(). Even though the CPU has reported a QS, the grace
> period will not end because the preempted (or block as could be in -rt) task
> is still blocking the grace period. This is fundamental to the function of
> Preemptible-RCU where there is the concept of tasks blocking a grace period,
> not just CPUs.
> 
> I think what Paul is trying to explain AIUI (Paul please let me know if I
> missed something):
> 
> (1) Anyone calling rcu_note_context_switch() and expecting it to respect
> RCU-readers that are readers as a result of interrupt disabled regions, have
> incorrect expectations. So calling rcu_note_context_switch() has to be done
> carefully.
> 
> (2) Disabling interrupts is "generally" implied as an RCU-sched flavor
> reader. However, invoking rcu_note_context_switch() from a disabled interrupt
> region is *required* for rcu_note_context_switch() to work correctly.
> 
> (3) On PREEMPT=y kernels, invoking rcu_note_context_switch() from an
> interrupt disabled region does not mean that that the task will be added to a
> blocked list (unless it is also in an RCU-preempt reader) so
> rcu_note_context_switch() may immediately report a quiescent state and
> nothing blockings the grace period.
> So callers of rcu_note_context_switch() must be aware of this behavior.
> 
> (4) On PREEMPT=n, unlike PREEMPT=y, there is no blocked list handling and so
> nothing will block the grace period once rcu_note_context_switch() is called.
> So any path calling rcu_note_context_switch() on a PREEMPT=n kernel, in the
> middle of something that is expected to be an RCU reader would be really bad
> from an RCU view point.
> 
> Probably, we should add this all to documentation somewhere.

I think that Sebastian understands this and was using the example of RCU
priority boosting to confirm his understanding.  But documentation would
be good.  Extremely difficult to keep current, but good.  I believe that
the requirements document does cover this.

							Thanx, Paul

> thanks!
> 
>  - Joel
> 
> 
> > > On the other hand, within a PREEMPT=n kernel, the call to schedule()
> > > would split even an rcu_read_lock() critical section.  Which is why I
> > > asked earlier if sleeping_lock_inc() and sleeping_lock_dec() are no-ops
> > > in !PREEMPT_RT_BASE kernels.  We would after all want the usual lockdep
> > > complaints in that case.
> > 
> > sleeping_lock_inc() +dec() is only RT specific. It is part of RT's
> > spin_lock() implementation and used by RCU (rcu_note_context_switch())
> > to not complain if invoked within a critical section.
> > 
> > > Does that help, or am I missing the point?
> > > 
> > > 							Thanx, Paul
> > Sebastian

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-27 15:58                 ` Paul E. McKenney
@ 2019-08-27 16:06                   ` Joel Fernandes
  0 siblings, 0 replies; 43+ messages in thread
From: Joel Fernandes @ 2019-08-27 16:06 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Sebastian Andrzej Siewior, Scott Wood, linux-rt-users,
	linux-kernel, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Tue, Aug 27, 2019 at 08:58:13AM -0700, Paul E. McKenney wrote:
> On Tue, Aug 27, 2019 at 09:08:53AM -0400, Joel Fernandes wrote:
> > On Tue, Aug 27, 2019 at 11:23:33AM +0200, Sebastian Andrzej Siewior wrote:
> > [snip]
> > > > However, if this was instead an rcu_read_lock() critical section within
> > > > a PREEMPT=y kernel, then if a schedule() occured within stop_one_task(),
> > > > RCU would consider that critical section to be preempted.  This means
> > > > that any RCU grace period that is blocked by this RCU read-side critical
> > > > section would remain blocked until stop_one_cpu() resumed, returned,
> > > > and so on until the matching rcu_read_unlock() was reached.  In other
> > > > words, RCU would consider that RCU read-side critical section to span
> > > > the call to stop_one_cpu() even if stop_one_cpu() invoked schedule().
> > > 
> > > Isn't that my example from above and what we do in RT? My understanding
> > > is that this is the reason why we need BOOST on RT otherwise the RCU
> > > critical section could remain blocked for some time.
> > 
> > Not just for boost, it is needed to block the grace period itself on
> > PREEMPT=y. On PREEMPT=y, if rcu_note_context_switch() happens in middle of a
> > rcu_read_lock() reader section, then the task is added to a blocked list
> > (rcu_preempt_ctxt_queue). Then just after that, the CPU reports a QS state
> > (rcu_qs()) as you can see in the PREEMPT=y implementation of
> > rcu_note_context_switch(). Even though the CPU has reported a QS, the grace
> > period will not end because the preempted (or block as could be in -rt) task
> > is still blocking the grace period. This is fundamental to the function of
> > Preemptible-RCU where there is the concept of tasks blocking a grace period,
> > not just CPUs.
> > 
> > I think what Paul is trying to explain AIUI (Paul please let me know if I
> > missed something):
> > 
> > (1) Anyone calling rcu_note_context_switch() and expecting it to respect
> > RCU-readers that are readers as a result of interrupt disabled regions, have
> > incorrect expectations. So calling rcu_note_context_switch() has to be done
> > carefully.
> > 
> > (2) Disabling interrupts is "generally" implied as an RCU-sched flavor
> > reader. However, invoking rcu_note_context_switch() from a disabled interrupt
> > region is *required* for rcu_note_context_switch() to work correctly.
> > 
> > (3) On PREEMPT=y kernels, invoking rcu_note_context_switch() from an
> > interrupt disabled region does not mean that that the task will be added to a
> > blocked list (unless it is also in an RCU-preempt reader) so
> > rcu_note_context_switch() may immediately report a quiescent state and
> > nothing blockings the grace period.
> > So callers of rcu_note_context_switch() must be aware of this behavior.
> > 
> > (4) On PREEMPT=n, unlike PREEMPT=y, there is no blocked list handling and so
> > nothing will block the grace period once rcu_note_context_switch() is called.
> > So any path calling rcu_note_context_switch() on a PREEMPT=n kernel, in the
> > middle of something that is expected to be an RCU reader would be really bad
> > from an RCU view point.
> > 
> > Probably, we should add this all to documentation somewhere.
> 
> I think that Sebastian understands this and was using the example of RCU
> priority boosting to confirm his understanding.  But documentation would
> be good.  Extremely difficult to keep current, but good.  I believe that
> the requirements document does cover this.

Oh ok, got it. Sorry about the noise then!

(In a way, I was just thinking out loud since this is a slightly confusing
 topic :-P and an archive link to this discussion serves a great purpose in
 my notes :-D :-)).

thanks!

 - Joel


^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-27 15:53               ` Paul E. McKenney
@ 2019-08-28  9:27                 ` Sebastian Andrzej Siewior
  2019-08-28 12:54                   ` Paul E. McKenney
  0 siblings, 1 reply; 43+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-08-28  9:27 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Joel Fernandes, Scott Wood, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On 2019-08-27 08:53:06 [-0700], Paul E. McKenney wrote:
> > > On the other hand, within a PREEMPT=n kernel, the call to schedule()
> > > would split even an rcu_read_lock() critical section.  Which is why I
> > > asked earlier if sleeping_lock_inc() and sleeping_lock_dec() are no-ops
> > > in !PREEMPT_RT_BASE kernels.  We would after all want the usual lockdep
> > > complaints in that case.
> > 
> > sleeping_lock_inc() +dec() is only RT specific. It is part of RT's
> > spin_lock() implementation and used by RCU (rcu_note_context_switch())
> > to not complain if invoked within a critical section.
> 
> Then this is being called when we have something like this, correct?
> 
> 	DEFINE_SPINLOCK(mylock); // As opposed to DEFINE_RAW_SPINLOCK().
> 
> 	...
> 
> 	rcu_read_lock();
> 	do_something();
> 	spin_lock(&mylock); // Can block in -rt, thus needs sleeping_lock_inc()
> 	...
> 	rcu_read_unlock();
> 
> Without sleeping_lock_inc(), lockdep would complain about a voluntary
> schedule within an RCU read-side critical section.  But in -rt, voluntary
> schedules due to sleeping on a "spinlock" are OK.
> 
> Am I understanding this correctly?

Everything perfect except that it is not lockdep complaining but the
WARN_ON_ONCE() in rcu_note_context_switch().

> 
> 							Thanx, Paul

Sebastian

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-28  9:27                 ` Sebastian Andrzej Siewior
@ 2019-08-28 12:54                   ` Paul E. McKenney
  2019-08-28 13:14                     ` Sebastian Andrzej Siewior
  0 siblings, 1 reply; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-28 12:54 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Joel Fernandes, Scott Wood, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On Wed, Aug 28, 2019 at 11:27:39AM +0200, Sebastian Andrzej Siewior wrote:
> On 2019-08-27 08:53:06 [-0700], Paul E. McKenney wrote:
> > > > On the other hand, within a PREEMPT=n kernel, the call to schedule()
> > > > would split even an rcu_read_lock() critical section.  Which is why I
> > > > asked earlier if sleeping_lock_inc() and sleeping_lock_dec() are no-ops
> > > > in !PREEMPT_RT_BASE kernels.  We would after all want the usual lockdep
> > > > complaints in that case.
> > > 
> > > sleeping_lock_inc() +dec() is only RT specific. It is part of RT's
> > > spin_lock() implementation and used by RCU (rcu_note_context_switch())
> > > to not complain if invoked within a critical section.
> > 
> > Then this is being called when we have something like this, correct?
> > 
> > 	DEFINE_SPINLOCK(mylock); // As opposed to DEFINE_RAW_SPINLOCK().
> > 
> > 	...
> > 
> > 	rcu_read_lock();
> > 	do_something();
> > 	spin_lock(&mylock); // Can block in -rt, thus needs sleeping_lock_inc()
> > 	...
> > 	rcu_read_unlock();
> > 
> > Without sleeping_lock_inc(), lockdep would complain about a voluntary
> > schedule within an RCU read-side critical section.  But in -rt, voluntary
> > schedules due to sleeping on a "spinlock" are OK.
> > 
> > Am I understanding this correctly?
> 
> Everything perfect except that it is not lockdep complaining but the
> WARN_ON_ONCE() in rcu_note_context_switch().

This one, right?

	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);

Another approach would be to change that WARN_ON_ONCE().  This fix might
be too extreme, as it would suppress other issues:

	WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && !preempt && t->rcu_read_lock_nesting > 0);

But maybe what is happening under the covers is that preempt is being
set when sleeping on a spinlock.  Is that the case?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-28 12:54                   ` Paul E. McKenney
@ 2019-08-28 13:14                     ` Sebastian Andrzej Siewior
  2019-08-28 13:59                       ` Joel Fernandes
  2019-08-28 15:50                       ` Paul E. McKenney
  0 siblings, 2 replies; 43+ messages in thread
From: Sebastian Andrzej Siewior @ 2019-08-28 13:14 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Joel Fernandes, Scott Wood, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On 2019-08-28 05:54:26 [-0700], Paul E. McKenney wrote:
> On Wed, Aug 28, 2019 at 11:27:39AM +0200, Sebastian Andrzej Siewior wrote:
> > On 2019-08-27 08:53:06 [-0700], Paul E. McKenney wrote:
> > > Am I understanding this correctly?
> > 
> > Everything perfect except that it is not lockdep complaining but the
> > WARN_ON_ONCE() in rcu_note_context_switch().
> 
> This one, right?
> 
> 	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
> 
> Another approach would be to change that WARN_ON_ONCE().  This fix might
> be too extreme, as it would suppress other issues:
> 
> 	WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && !preempt && t->rcu_read_lock_nesting > 0);
> 
> But maybe what is happening under the covers is that preempt is being
> set when sleeping on a spinlock.  Is that the case?

I would like to keep that check and that is why we have:

|   #if defined(CONFIG_PREEMPT_RT_FULL)
|         sleeping_l = t->sleeping_lock;
|   #endif
|         WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !sleeping_l);

in -RT and ->sleeping_lock is that counter that is incremented in
spin_lock(). And the only reason why sleeping_lock_inc() was used in the
patch was to disable _this_ warning.

> 							Thanx, Paul

Sebastian

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-28 13:14                     ` Sebastian Andrzej Siewior
@ 2019-08-28 13:59                       ` Joel Fernandes
  2019-08-28 15:51                         ` Paul E. McKenney
  2019-08-28 15:50                       ` Paul E. McKenney
  1 sibling, 1 reply; 43+ messages in thread
From: Joel Fernandes @ 2019-08-28 13:59 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Paul E. McKenney, Scott Wood, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On Wed, Aug 28, 2019 at 03:14:33PM +0200, Sebastian Andrzej Siewior wrote:
> On 2019-08-28 05:54:26 [-0700], Paul E. McKenney wrote:
> > On Wed, Aug 28, 2019 at 11:27:39AM +0200, Sebastian Andrzej Siewior wrote:
> > > On 2019-08-27 08:53:06 [-0700], Paul E. McKenney wrote:
> > > > Am I understanding this correctly?
> > > 
> > > Everything perfect except that it is not lockdep complaining but the
> > > WARN_ON_ONCE() in rcu_note_context_switch().
> > 
> > This one, right?
> > 
> > 	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
> > 
> > Another approach would be to change that WARN_ON_ONCE().  This fix might
> > be too extreme, as it would suppress other issues:
> > 
> > 	WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && !preempt && t->rcu_read_lock_nesting > 0);
> > 
> > But maybe what is happening under the covers is that preempt is being
> > set when sleeping on a spinlock.  Is that the case?
> 
> I would like to keep that check and that is why we have:
> 
> |   #if defined(CONFIG_PREEMPT_RT_FULL)
> |         sleeping_l = t->sleeping_lock;
> |   #endif
> |         WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !sleeping_l);
> 
> in -RT and ->sleeping_lock is that counter that is incremented in
> spin_lock(). And the only reason why sleeping_lock_inc() was used in the
> patch was to disable _this_ warning.

Makes sense, Sebastian.

Paul, you meant "!" in front of the IS_ENABLED right in your code snippet right?

The other issue with:
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && !preempt && t->rcu_read_lock_nesting > 0);

.. could be that, the warning will be disabled for -rt entirely, not just for
sleeping locks. And we probably do want to keep this warning for the cases in
-rt where we are blocking but it is not a sleeping lock. Right?

thanks,

 - Joel



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-28 13:14                     ` Sebastian Andrzej Siewior
  2019-08-28 13:59                       ` Joel Fernandes
@ 2019-08-28 15:50                       ` Paul E. McKenney
  1 sibling, 0 replies; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-28 15:50 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Joel Fernandes, Scott Wood, linux-rt-users, linux-kernel,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Clark Williams

On Wed, Aug 28, 2019 at 03:14:33PM +0200, Sebastian Andrzej Siewior wrote:
> On 2019-08-28 05:54:26 [-0700], Paul E. McKenney wrote:
> > On Wed, Aug 28, 2019 at 11:27:39AM +0200, Sebastian Andrzej Siewior wrote:
> > > On 2019-08-27 08:53:06 [-0700], Paul E. McKenney wrote:
> > > > Am I understanding this correctly?
> > > 
> > > Everything perfect except that it is not lockdep complaining but the
> > > WARN_ON_ONCE() in rcu_note_context_switch().
> > 
> > This one, right?
> > 
> > 	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
> > 
> > Another approach would be to change that WARN_ON_ONCE().  This fix might
> > be too extreme, as it would suppress other issues:
> > 
> > 	WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && !preempt && t->rcu_read_lock_nesting > 0);
> > 
> > But maybe what is happening under the covers is that preempt is being
> > set when sleeping on a spinlock.  Is that the case?
> 
> I would like to keep that check and that is why we have:
> 
> |   #if defined(CONFIG_PREEMPT_RT_FULL)
> |         sleeping_l = t->sleeping_lock;
> |   #endif
> |         WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !sleeping_l);
> 
> in -RT and ->sleeping_lock is that counter that is incremented in
> spin_lock(). And the only reason why sleeping_lock_inc() was used in the
> patch was to disable _this_ warning.

Very good!  I would prefer an (one-line) access function to keep the
number of #if uses down to dull roar, but other than that, looks good!

(Yeah, I know, this is tree_preempt.h, but still...)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep
  2019-08-28 13:59                       ` Joel Fernandes
@ 2019-08-28 15:51                         ` Paul E. McKenney
  0 siblings, 0 replies; 43+ messages in thread
From: Paul E. McKenney @ 2019-08-28 15:51 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, Scott Wood, linux-rt-users,
	linux-kernel, Thomas Gleixner, Peter Zijlstra, Juri Lelli,
	Clark Williams

On Wed, Aug 28, 2019 at 09:59:38AM -0400, Joel Fernandes wrote:
> On Wed, Aug 28, 2019 at 03:14:33PM +0200, Sebastian Andrzej Siewior wrote:
> > On 2019-08-28 05:54:26 [-0700], Paul E. McKenney wrote:
> > > On Wed, Aug 28, 2019 at 11:27:39AM +0200, Sebastian Andrzej Siewior wrote:
> > > > On 2019-08-27 08:53:06 [-0700], Paul E. McKenney wrote:
> > > > > Am I understanding this correctly?
> > > > 
> > > > Everything perfect except that it is not lockdep complaining but the
> > > > WARN_ON_ONCE() in rcu_note_context_switch().
> > > 
> > > This one, right?
> > > 
> > > 	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
> > > 
> > > Another approach would be to change that WARN_ON_ONCE().  This fix might
> > > be too extreme, as it would suppress other issues:
> > > 
> > > 	WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && !preempt && t->rcu_read_lock_nesting > 0);
> > > 
> > > But maybe what is happening under the covers is that preempt is being
> > > set when sleeping on a spinlock.  Is that the case?
> > 
> > I would like to keep that check and that is why we have:
> > 
> > |   #if defined(CONFIG_PREEMPT_RT_FULL)
> > |         sleeping_l = t->sleeping_lock;
> > |   #endif
> > |         WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !sleeping_l);
> > 
> > in -RT and ->sleeping_lock is that counter that is incremented in
> > spin_lock(). And the only reason why sleeping_lock_inc() was used in the
> > patch was to disable _this_ warning.
> 
> Makes sense, Sebastian.
> 
> Paul, you meant "!" in front of the IS_ENABLED right in your code snippet right?
> 
> The other issue with:
> WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && !preempt && t->rcu_read_lock_nesting > 0);
> 
> .. could be that, the warning will be disabled for -rt entirely, not just for
> sleeping locks. And we probably do want to keep this warning for the cases in
> -rt where we are blocking but it is not a sleeping lock. Right?

Yes, my code was missing a "!", but I prefer Sebastian's and Scott's
approach to mine anyway.  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, back to index

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-21 23:19 [PATCH RT v2 0/3] RCU fixes Scott Wood
2019-08-21 23:19 ` [PATCH RT v2 1/3] rcu: Acquire RCU lock when disabling BHs Scott Wood
2019-08-21 23:33   ` Paul E. McKenney
2019-08-22 13:39     ` Joel Fernandes
2019-08-22 15:27       ` Paul E. McKenney
2019-08-23  1:50         ` Joel Fernandes
2019-08-23  2:11           ` Paul E. McKenney
2019-08-23  3:23       ` Scott Wood
2019-08-23 12:30         ` Paul E. McKenney
2019-08-23 16:17         ` Sebastian Andrzej Siewior
2019-08-23 19:46           ` Scott Wood
2019-08-26 15:59             ` Sebastian Andrzej Siewior
2019-08-26 23:21               ` Scott Wood
2019-08-23  2:36     ` Scott Wood
2019-08-23  2:54       ` Paul E. McKenney
2019-08-21 23:19 ` [PATCH RT v2 2/3] sched: migrate_enable: Use sleeping_lock to indicate involuntary sleep Scott Wood
2019-08-21 23:35   ` Paul E. McKenney
2019-08-23  1:21     ` Scott Wood
2019-08-23 16:20   ` Sebastian Andrzej Siewior
2019-08-23 19:28     ` Scott Wood
2019-08-24  3:10       ` Joel Fernandes
2019-08-26 15:25         ` Sebastian Andrzej Siewior
2019-08-26 16:29           ` Paul E. McKenney
2019-08-26 17:49             ` Scott Wood
2019-08-26 18:12               ` Paul E. McKenney
2019-08-27  9:23             ` Sebastian Andrzej Siewior
2019-08-27 13:08               ` Joel Fernandes
2019-08-27 15:58                 ` Paul E. McKenney
2019-08-27 16:06                   ` Joel Fernandes
2019-08-27 15:53               ` Paul E. McKenney
2019-08-28  9:27                 ` Sebastian Andrzej Siewior
2019-08-28 12:54                   ` Paul E. McKenney
2019-08-28 13:14                     ` Sebastian Andrzej Siewior
2019-08-28 13:59                       ` Joel Fernandes
2019-08-28 15:51                         ` Paul E. McKenney
2019-08-28 15:50                       ` Paul E. McKenney
2019-08-21 23:19 ` [PATCH RT v2 3/3] rcu: Disable use_softirq on PREEMPT_RT Scott Wood
2019-08-21 23:40   ` Paul E. McKenney
2019-08-23 16:32     ` Sebastian Andrzej Siewior
2019-08-22 13:59   ` Joel Fernandes
2019-08-22 15:29     ` Paul E. McKenney
2019-08-22 19:31     ` Scott Wood
2019-08-23  0:52       ` Joel Fernandes

Linux-rt-users archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-rt-users/0 linux-rt-users/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-rt-users linux-rt-users/ https://lore.kernel.org/linux-rt-users \
		linux-rt-users@vger.kernel.org
	public-inbox-index linux-rt-users

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-rt-users


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git