rcu.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
@ 2023-02-23  6:30 Zqiang
  2023-02-23  8:36 ` Zhuo, Qiuxu
  0 siblings, 1 reply; 15+ messages in thread
From: Zqiang @ 2023-02-23  6:30 UTC (permalink / raw)
  To: paulmck, frederic, quic_neeraju, joel; +Cc: rcu, linux-kernel

According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups
from call_rcu_tasks_generic()")', the grace-period kthread is delayed
to wakeup using irq_work_queue() is because if the caller of
call_rcu_tasks_generic() holds a raw spinlock, when the kernel is
built with CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will
be hold in wake_up(), so the lockdep splats will happen. but now
using rcuwait_wake_up() to wakeup grace-period kthread instead of
wake_up(), in rcuwait_wake_up() no spinlock will be acquired, so
this commit remove using irq_work_queue(), invoke rcuwait_wake_up()
directly in call_rcu_tasks_generic().

Signed-off-by: Zqiang <qiang1.zhang@intel.com>
---
 kernel/rcu/tasks.h | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index baf7ec178155..757b8c6da1ad 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -39,7 +39,6 @@ struct rcu_tasks_percpu {
 	unsigned long rtp_jiffies;
 	unsigned long rtp_n_lock_retries;
 	struct work_struct rtp_work;
-	struct irq_work rtp_irq_work;
 	struct rcu_head barrier_q_head;
 	struct list_head rtp_blkd_tasks;
 	int cpu;
@@ -112,12 +111,9 @@ struct rcu_tasks {
 	char *kname;
 };
 
-static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
-
 #define DEFINE_RCU_TASKS(rt_name, gp, call, n)						\
 static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {			\
 	.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock),		\
-	.rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),			\
 };											\
 static struct rcu_tasks rt_name =							\
 {											\
@@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
 	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
 }
 
-// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
-static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
-{
-	struct rcu_tasks *rtp;
-	struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work);
-
-	rtp = rtpcp->rtpp;
-	rcuwait_wake_up(&rtp->cbs_wait);
-}
-
 // Enqueue a callback for the specified flavor of Tasks RCU.
 static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 				   struct rcu_tasks *rtp)
@@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 	rcu_read_unlock();
 	/* We can't create the thread unless interrupts are enabled. */
 	if (needwake && READ_ONCE(rtp->kthread_ptr))
-		irq_work_queue(&rtpcp->rtp_irq_work);
+		rcuwait_wake_up(&rtp->cbs_wait);
 }
 
 // RCU callback function for rcu_barrier_tasks_generic().
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* RE: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-23  6:30 [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic() Zqiang
@ 2023-02-23  8:36 ` Zhuo, Qiuxu
  2023-02-23  8:43   ` Zhang, Qiang1
  0 siblings, 1 reply; 15+ messages in thread
From: Zhuo, Qiuxu @ 2023-02-23  8:36 UTC (permalink / raw)
  To: Zhang, Qiang1, paulmck, frederic, quic_neeraju, joel; +Cc: rcu, linux-kernel

> From: Zqiang <qiang1.zhang@intel.com>
> Sent: Thursday, February 23, 2023 2:30 PM
> To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> joel@joelfernandes.org
> Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> call_rcu_tasks_generic()
> 
> According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> using irq_work_queue() is because if the caller of
> call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> wake_up(), so the lockdep splats will happen. but now using
> rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> rcuwait_wake_up() no spinlock will be acquired, so this commit remove using

There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().

rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:

    raw_spin_lock_irqsave()
    ...
    raw_spin_unlock_irqrestore


> irq_work_queue(), invoke rcuwait_wake_up() directly in
> call_rcu_tasks_generic().
> 
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> ---
>  kernel/rcu/tasks.h | 16 +---------------
>  1 file changed, 1 insertion(+), 15 deletions(-)
> 
> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index
> baf7ec178155..757b8c6da1ad 100644
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -39,7 +39,6 @@ struct rcu_tasks_percpu {
>  	unsigned long rtp_jiffies;
>  	unsigned long rtp_n_lock_retries;
>  	struct work_struct rtp_work;
> -	struct irq_work rtp_irq_work;
>  	struct rcu_head barrier_q_head;
>  	struct list_head rtp_blkd_tasks;
>  	int cpu;
> @@ -112,12 +111,9 @@ struct rcu_tasks {
>  	char *kname;
>  };
> 
> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
> -
>  #define DEFINE_RCU_TASKS(rt_name, gp, call, n)
> 			\
>  static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {
> 			\
>  	.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ##
> __percpu.cbs_pcpu_lock),		\
> -	.rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),
> 			\
>  };
> 		\
>  static struct rcu_tasks rt_name =
> 		\
>  {
> 		\
> @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
>  	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__,
> data_race(rtp->percpu_enqueue_shift), data_race(rtp-
> >percpu_enqueue_lim));
>  }
> 
> -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{
> -	struct rcu_tasks *rtp;
> -	struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct
> rcu_tasks_percpu, rtp_irq_work);
> -
> -	rtp = rtpcp->rtpp;
> -	rcuwait_wake_up(&rtp->cbs_wait);
> -}
> -
>  // Enqueue a callback for the specified flavor of Tasks RCU.
>  static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
>  				   struct rcu_tasks *rtp)
> @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head
> *rhp, rcu_callback_t func,
>  	rcu_read_unlock();
>  	/* We can't create the thread unless interrupts are enabled. */
>  	if (needwake && READ_ONCE(rtp->kthread_ptr))
> -		irq_work_queue(&rtpcp->rtp_irq_work);
> +		rcuwait_wake_up(&rtp->cbs_wait);
>  }
> 
>  // RCU callback function for rcu_barrier_tasks_generic().
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-23  8:36 ` Zhuo, Qiuxu
@ 2023-02-23  8:43   ` Zhang, Qiang1
  2023-02-23 14:36     ` Zhuo, Qiuxu
  2023-02-23 16:10     ` Joel Fernandes
  0 siblings, 2 replies; 15+ messages in thread
From: Zhang, Qiang1 @ 2023-02-23  8:43 UTC (permalink / raw)
  To: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, joel; +Cc: rcu, linux-kernel

> From: Zqiang <qiang1.zhang@intel.com>
> Sent: Thursday, February 23, 2023 2:30 PM
> To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> joel@joelfernandes.org
> Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> call_rcu_tasks_generic()
> 
> According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> using irq_work_queue() is because if the caller of
> call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> wake_up(), so the lockdep splats will happen. but now using
> rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
>
>There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
>
>rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
>
>    raw_spin_lock_irqsave()
>    ...
>    raw_spin_unlock_irqrestore

Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
sleepable lock in Preempt-RT kernel, but raw spinlock is not change).

acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.

Thanks
Zqiang

>
>
> irq_work_queue(), invoke rcuwait_wake_up() directly in
> call_rcu_tasks_generic().
> 
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> ---
>  kernel/rcu/tasks.h | 16 +---------------
>  1 file changed, 1 insertion(+), 15 deletions(-)
> 
> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index
> baf7ec178155..757b8c6da1ad 100644
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -39,7 +39,6 @@ struct rcu_tasks_percpu {
>  	unsigned long rtp_jiffies;
>  	unsigned long rtp_n_lock_retries;
>  	struct work_struct rtp_work;
> -	struct irq_work rtp_irq_work;
>  	struct rcu_head barrier_q_head;
>  	struct list_head rtp_blkd_tasks;
>  	int cpu;
> @@ -112,12 +111,9 @@ struct rcu_tasks {
>  	char *kname;
>  };
> 
> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
> -
>  #define DEFINE_RCU_TASKS(rt_name, gp, call, n)
> 			\
>  static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {
> 			\
>  	.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ##
> __percpu.cbs_pcpu_lock),		\
> -	.rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),
> 			\
>  };
> 		\
>  static struct rcu_tasks rt_name =
> 		\
>  {
> 		\
> @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
>  	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__,
> data_race(rtp->percpu_enqueue_shift), data_race(rtp-
> >percpu_enqueue_lim));
>  }
> 
> -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{
> -	struct rcu_tasks *rtp;
> -	struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct
> rcu_tasks_percpu, rtp_irq_work);
> -
> -	rtp = rtpcp->rtpp;
> -	rcuwait_wake_up(&rtp->cbs_wait);
> -}
> -
>  // Enqueue a callback for the specified flavor of Tasks RCU.
>  static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
>  				   struct rcu_tasks *rtp)
> @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head
> *rhp, rcu_callback_t func,
>  	rcu_read_unlock();
>  	/* We can't create the thread unless interrupts are enabled. */
>  	if (needwake && READ_ONCE(rtp->kthread_ptr))
> -		irq_work_queue(&rtpcp->rtp_irq_work);
> +		rcuwait_wake_up(&rtp->cbs_wait);
>  }
> 
>  // RCU callback function for rcu_barrier_tasks_generic().
> --
> 2.25.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-23  8:43   ` Zhang, Qiang1
@ 2023-02-23 14:36     ` Zhuo, Qiuxu
  2023-02-23 16:10     ` Joel Fernandes
  1 sibling, 0 replies; 15+ messages in thread
From: Zhuo, Qiuxu @ 2023-02-23 14:36 UTC (permalink / raw)
  To: Zhang, Qiang1, paulmck, frederic, quic_neeraju, joel; +Cc: rcu, linux-kernel

> From: Zhang, Qiang1 <qiang1.zhang@intel.com>
> Sent: Thursday, February 23, 2023 4:43 PM
> To: Zhuo, Qiuxu <qiuxu.zhuo@intel.com>; paulmck@kernel.org;
> frederic@kernel.org; quic_neeraju@quicinc.com; joel@joelfernandes.org
> Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: RE: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> call_rcu_tasks_generic()
> 
> > From: Zqiang <qiang1.zhang@intel.com>
> > Sent: Thursday, February 23, 2023 2:30 PM
> > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> > joel@joelfernandes.org
> > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> > call_rcu_tasks_generic()
> >
> > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> > call_rcu_tasks_generic()")', the grace-period kthread is delayed to
> > wakeup using irq_work_queue() is because if the caller of
> > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is
> > built with CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will
> be
> > hold in wake_up(), so the lockdep splats will happen. but now using
> > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(),
> > in
> > rcuwait_wake_up() no spinlock will be acquired, so this commit remove
> > using
> >
> >There are still spinlock-acquisition and spinlock-release invocations within
> the call path from rcuwait_wake_up().
> >
> >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> >
> >    raw_spin_lock_irqsave()
> >    ...
> >    raw_spin_unlock_irqrestore
> 
> Yes, but this is raw_spinlock acquisition and release(note: spinlock will
> convert to sleepable lock in Preempt-RT kernel, but raw spinlock is not
> change).

Hi Qiang,

Yes, you're correct. Thanks for correcting me.

-Qiuxu

> acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
> 
> Thanks
> Zqiang


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-23  8:43   ` Zhang, Qiang1
  2023-02-23 14:36     ` Zhuo, Qiuxu
@ 2023-02-23 16:10     ` Joel Fernandes
  2023-02-23 16:57       ` Joel Fernandes
  2023-02-24  0:36       ` Zhang, Qiang1
  1 sibling, 2 replies; 15+ messages in thread
From: Joel Fernandes @ 2023-02-23 16:10 UTC (permalink / raw)
  To: Zhang, Qiang1
  Cc: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, rcu, linux-kernel

On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
> > From: Zqiang <qiang1.zhang@intel.com>
> > Sent: Thursday, February 23, 2023 2:30 PM
> > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> > joel@joelfernandes.org
> > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> > call_rcu_tasks_generic()
> > 
> > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> > using irq_work_queue() is because if the caller of
> > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> > wake_up(), so the lockdep splats will happen. but now using
> > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
> >
> >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
> >
> >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> >
> >    raw_spin_lock_irqsave()
> >    ...
> >    raw_spin_unlock_irqrestore
> 
> Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
> sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
> 
> acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.

Is this really safe in the long run though? I seem to remember there are
weird locking dependencies if RCU is used from within the scheduler [1].

I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
Generally, there has to be a 'win' or other justification for adding more
risk.

thanks,

 - Joel
[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html

> > irq_work_queue(), invoke rcuwait_wake_up() directly in
> > call_rcu_tasks_generic().
> > 
> > Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> > ---
> >  kernel/rcu/tasks.h | 16 +---------------
> >  1 file changed, 1 insertion(+), 15 deletions(-)
> > 
> > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index
> > baf7ec178155..757b8c6da1ad 100644
> > --- a/kernel/rcu/tasks.h
> > +++ b/kernel/rcu/tasks.h
> > @@ -39,7 +39,6 @@ struct rcu_tasks_percpu {
> >  	unsigned long rtp_jiffies;
> >  	unsigned long rtp_n_lock_retries;
> >  	struct work_struct rtp_work;
> > -	struct irq_work rtp_irq_work;
> >  	struct rcu_head barrier_q_head;
> >  	struct list_head rtp_blkd_tasks;
> >  	int cpu;
> > @@ -112,12 +111,9 @@ struct rcu_tasks {
> >  	char *kname;
> >  };
> > 
> > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
> > -
> >  #define DEFINE_RCU_TASKS(rt_name, gp, call, n)
> > 			\
> >  static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {
> > 			\
> >  	.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ##
> > __percpu.cbs_pcpu_lock),		\
> > -	.rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),
> > 			\
> >  };
> > 		\
> >  static struct rcu_tasks rt_name =
> > 		\
> >  {
> > 		\
> > @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> >  	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__,
> > data_race(rtp->percpu_enqueue_shift), data_race(rtp-
> > >percpu_enqueue_lim));
> >  }
> > 
> > -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
> > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{
> > -	struct rcu_tasks *rtp;
> > -	struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct
> > rcu_tasks_percpu, rtp_irq_work);
> > -
> > -	rtp = rtpcp->rtpp;
> > -	rcuwait_wake_up(&rtp->cbs_wait);
> > -}
> > -
> >  // Enqueue a callback for the specified flavor of Tasks RCU.
> >  static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
> >  				   struct rcu_tasks *rtp)
> > @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head
> > *rhp, rcu_callback_t func,
> >  	rcu_read_unlock();
> >  	/* We can't create the thread unless interrupts are enabled. */
> >  	if (needwake && READ_ONCE(rtp->kthread_ptr))
> > -		irq_work_queue(&rtpcp->rtp_irq_work);
> > +		rcuwait_wake_up(&rtp->cbs_wait);
> >  }
> > 
> >  // RCU callback function for rcu_barrier_tasks_generic().
> > --
> > 2.25.1
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-23 16:10     ` Joel Fernandes
@ 2023-02-23 16:57       ` Joel Fernandes
  2023-02-23 17:13         ` Paul E. McKenney
  2023-02-24  0:36       ` Zhang, Qiang1
  1 sibling, 1 reply; 15+ messages in thread
From: Joel Fernandes @ 2023-02-23 16:57 UTC (permalink / raw)
  To: Zhang, Qiang1
  Cc: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, rcu, linux-kernel



> On Feb 23, 2023, at 11:10 AM, Joel Fernandes <joel@joelfernandes.org> wrote:
> 
> On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
>>> From: Zqiang <qiang1.zhang@intel.com>
>>> Sent: Thursday, February 23, 2023 2:30 PM
>>> To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
>>> joel@joelfernandes.org
>>> Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
>>> Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
>>> call_rcu_tasks_generic()
>>> 
>>> According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
>>> call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
>>> using irq_work_queue() is because if the caller of
>>> call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
>>> CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
>>> wake_up(), so the lockdep splats will happen. but now using
>>> rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
>>> rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
>>> 
>>> There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
>>> 
>>> rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
>>> 
>>>   raw_spin_lock_irqsave()
>>>   ...
>>>   raw_spin_unlock_irqrestore
>> 
>> Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
>> sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
>> 
>> acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
> 
> Is this really safe in the long run though? I seem to remember there are
> weird locking dependencies if RCU is used from within the scheduler [1].
> 
> I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
> Generally, there has to be a 'win' or other justification for adding more
> risk.

On second thought, you are deleting a decent number of lines.

What do others think?

I will take a closer look later, I am interested in researching the new lock dependency this adds.

 - Joel

> 
> thanks,
> 
> - Joel
> [1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html
> 
>>> irq_work_queue(), invoke rcuwait_wake_up() directly in
>>> call_rcu_tasks_generic().
>>> 
>>> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
>>> ---
>>> kernel/rcu/tasks.h | 16 +---------------
>>> 1 file changed, 1 insertion(+), 15 deletions(-)
>>> 
>>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index
>>> baf7ec178155..757b8c6da1ad 100644
>>> --- a/kernel/rcu/tasks.h
>>> +++ b/kernel/rcu/tasks.h
>>> @@ -39,7 +39,6 @@ struct rcu_tasks_percpu {
>>>    unsigned long rtp_jiffies;
>>>    unsigned long rtp_n_lock_retries;
>>>    struct work_struct rtp_work;
>>> -    struct irq_work rtp_irq_work;
>>>    struct rcu_head barrier_q_head;
>>>    struct list_head rtp_blkd_tasks;
>>>    int cpu;
>>> @@ -112,12 +111,9 @@ struct rcu_tasks {
>>>    char *kname;
>>> };
>>> 
>>> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
>>> -
>>> #define DEFINE_RCU_TASKS(rt_name, gp, call, n)
>>>            \
>>> static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {
>>>            \
>>>    .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ##
>>> __percpu.cbs_pcpu_lock),        \
>>> -    .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),
>>>            \
>>> };
>>>        \
>>> static struct rcu_tasks rt_name =
>>>        \
>>> {
>>>        \
>>> @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
>>>    pr_info("%s: Setting shift to %d and lim to %d.\n", __func__,
>>> data_race(rtp->percpu_enqueue_shift), data_race(rtp-
>>>> percpu_enqueue_lim));
>>> }
>>> 
>>> -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
>>> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{
>>> -    struct rcu_tasks *rtp;
>>> -    struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct
>>> rcu_tasks_percpu, rtp_irq_work);
>>> -
>>> -    rtp = rtpcp->rtpp;
>>> -    rcuwait_wake_up(&rtp->cbs_wait);
>>> -}
>>> -
>>> // Enqueue a callback for the specified flavor of Tasks RCU.
>>> static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
>>>                   struct rcu_tasks *rtp)
>>> @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head
>>> *rhp, rcu_callback_t func,
>>>    rcu_read_unlock();
>>>    /* We can't create the thread unless interrupts are enabled. */
>>>    if (needwake && READ_ONCE(rtp->kthread_ptr))
>>> -        irq_work_queue(&rtpcp->rtp_irq_work);
>>> +        rcuwait_wake_up(&rtp->cbs_wait);
>>> }
>>> 
>>> // RCU callback function for rcu_barrier_tasks_generic().
>>> --
>>> 2.25.1
>> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-23 16:57       ` Joel Fernandes
@ 2023-02-23 17:13         ` Paul E. McKenney
  0 siblings, 0 replies; 15+ messages in thread
From: Paul E. McKenney @ 2023-02-23 17:13 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Zhang, Qiang1, Zhuo, Qiuxu, frederic, quic_neeraju, rcu, linux-kernel

On Thu, Feb 23, 2023 at 11:57:54AM -0500, Joel Fernandes wrote:
> 
> 
> > On Feb 23, 2023, at 11:10 AM, Joel Fernandes <joel@joelfernandes.org> wrote:
> > 
> > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
> >>> From: Zqiang <qiang1.zhang@intel.com>
> >>> Sent: Thursday, February 23, 2023 2:30 PM
> >>> To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> >>> joel@joelfernandes.org
> >>> Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> >>> Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> >>> call_rcu_tasks_generic()
> >>> 
> >>> According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> >>> call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> >>> using irq_work_queue() is because if the caller of
> >>> call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> >>> CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> >>> wake_up(), so the lockdep splats will happen. but now using
> >>> rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> >>> rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
> >>> 
> >>> There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
> >>> 
> >>> rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> >>> 
> >>>   raw_spin_lock_irqsave()
> >>>   ...
> >>>   raw_spin_unlock_irqrestore
> >> 
> >> Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
> >> sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
> >> 
> >> acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
> > 
> > Is this really safe in the long run though? I seem to remember there are
> > weird locking dependencies if RCU is used from within the scheduler [1].
> > 
> > I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
> > Generally, there has to be a 'win' or other justification for adding more
> > risk.
> 
> On second thought, you are deleting a decent number of lines.
> 
> What do others think?
> 
> I will take a closer look later, I am interested in researching the new lock dependency this adds.

One place to start is rcu_read_unlock_trace_special(), keeping firmly
in mind that rcu_read_unlock_trace() is intended to be invoked from a
great many places.

							Thanx, Paul

>  - Joel
> 
> > 
> > thanks,
> > 
> > - Joel
> > [1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html
> > 
> >>> irq_work_queue(), invoke rcuwait_wake_up() directly in
> >>> call_rcu_tasks_generic().
> >>> 
> >>> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> >>> ---
> >>> kernel/rcu/tasks.h | 16 +---------------
> >>> 1 file changed, 1 insertion(+), 15 deletions(-)
> >>> 
> >>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index
> >>> baf7ec178155..757b8c6da1ad 100644
> >>> --- a/kernel/rcu/tasks.h
> >>> +++ b/kernel/rcu/tasks.h
> >>> @@ -39,7 +39,6 @@ struct rcu_tasks_percpu {
> >>>    unsigned long rtp_jiffies;
> >>>    unsigned long rtp_n_lock_retries;
> >>>    struct work_struct rtp_work;
> >>> -    struct irq_work rtp_irq_work;
> >>>    struct rcu_head barrier_q_head;
> >>>    struct list_head rtp_blkd_tasks;
> >>>    int cpu;
> >>> @@ -112,12 +111,9 @@ struct rcu_tasks {
> >>>    char *kname;
> >>> };
> >>> 
> >>> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
> >>> -
> >>> #define DEFINE_RCU_TASKS(rt_name, gp, call, n)
> >>>            \
> >>> static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {
> >>>            \
> >>>    .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ##
> >>> __percpu.cbs_pcpu_lock),        \
> >>> -    .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),
> >>>            \
> >>> };
> >>>        \
> >>> static struct rcu_tasks rt_name =
> >>>        \
> >>> {
> >>>        \
> >>> @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> >>>    pr_info("%s: Setting shift to %d and lim to %d.\n", __func__,
> >>> data_race(rtp->percpu_enqueue_shift), data_race(rtp-
> >>>> percpu_enqueue_lim));
> >>> }
> >>> 
> >>> -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
> >>> -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{
> >>> -    struct rcu_tasks *rtp;
> >>> -    struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct
> >>> rcu_tasks_percpu, rtp_irq_work);
> >>> -
> >>> -    rtp = rtpcp->rtpp;
> >>> -    rcuwait_wake_up(&rtp->cbs_wait);
> >>> -}
> >>> -
> >>> // Enqueue a callback for the specified flavor of Tasks RCU.
> >>> static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
> >>>                   struct rcu_tasks *rtp)
> >>> @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head
> >>> *rhp, rcu_callback_t func,
> >>>    rcu_read_unlock();
> >>>    /* We can't create the thread unless interrupts are enabled. */
> >>>    if (needwake && READ_ONCE(rtp->kthread_ptr))
> >>> -        irq_work_queue(&rtpcp->rtp_irq_work);
> >>> +        rcuwait_wake_up(&rtp->cbs_wait);
> >>> }
> >>> 
> >>> // RCU callback function for rcu_barrier_tasks_generic().
> >>> --
> >>> 2.25.1
> >> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-23 16:10     ` Joel Fernandes
  2023-02-23 16:57       ` Joel Fernandes
@ 2023-02-24  0:36       ` Zhang, Qiang1
  2023-02-24  2:25         ` Joel Fernandes
  1 sibling, 1 reply; 15+ messages in thread
From: Zhang, Qiang1 @ 2023-02-24  0:36 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, rcu, linux-kernel

On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
> > From: Zqiang <qiang1.zhang@intel.com>
> > Sent: Thursday, February 23, 2023 2:30 PM
> > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> > joel@joelfernandes.org
> > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> > call_rcu_tasks_generic()
> > 
> > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> > using irq_work_queue() is because if the caller of
> > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> > wake_up(), so the lockdep splats will happen. but now using
> > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
> >
> >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
> >
> >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> >
> >    raw_spin_lock_irqsave()
> >    ...
> >    raw_spin_unlock_irqrestore
> 
> Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
> sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
> 
> acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
>
>Is this really safe in the long run though? I seem to remember there are
>weird locking dependencies if RCU is used from within the scheduler [1].
>


I have  been running rcutorture with rcutorture.type = tasks-tracing,
so far no problems have been found.


>I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
>Generally, there has to be a 'win' or other justification for adding more
>risk.
>
>thanks,
>
>- Joel
>[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html


The problem in this link,  in an earlier RCU version, rcu_read_unlock_special()
Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for
call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay
more attention to rcu_read_unlock_trace_special()

Thanks
Zqiang

>
> > irq_work_queue(), invoke rcuwait_wake_up() directly in
> > call_rcu_tasks_generic().
> > 
> > Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> > ---
> >  kernel/rcu/tasks.h | 16 +---------------
> >  1 file changed, 1 insertion(+), 15 deletions(-)
> > 
> > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index
> > baf7ec178155..757b8c6da1ad 100644
> > --- a/kernel/rcu/tasks.h
> > +++ b/kernel/rcu/tasks.h
> > @@ -39,7 +39,6 @@ struct rcu_tasks_percpu {
> >  	unsigned long rtp_jiffies;
> >  	unsigned long rtp_n_lock_retries;
> >  	struct work_struct rtp_work;
> > -	struct irq_work rtp_irq_work;
> >  	struct rcu_head barrier_q_head;
> >  	struct list_head rtp_blkd_tasks;
> >  	int cpu;
> > @@ -112,12 +111,9 @@ struct rcu_tasks {
> >  	char *kname;
> >  };
> > 
> > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
> > -
> >  #define DEFINE_RCU_TASKS(rt_name, gp, call, n)
> > 			\
> >  static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {
> > 			\
> >  	.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ##
> > __percpu.cbs_pcpu_lock),		\
> > -	.rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),
> > 			\
> >  };
> > 		\
> >  static struct rcu_tasks rt_name =
> > 		\
> >  {
> > 		\
> > @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> >  	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__,
> > data_race(rtp->percpu_enqueue_shift), data_race(rtp-
> > >percpu_enqueue_lim));
> >  }
> > 
> > -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
> > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{
> > -	struct rcu_tasks *rtp;
> > -	struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct
> > rcu_tasks_percpu, rtp_irq_work);
> > -
> > -	rtp = rtpcp->rtpp;
> > -	rcuwait_wake_up(&rtp->cbs_wait);
> > -}
> > -
> >  // Enqueue a callback for the specified flavor of Tasks RCU.
> >  static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
> >  				   struct rcu_tasks *rtp)
> > @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head
> > *rhp, rcu_callback_t func,
> >  	rcu_read_unlock();
> >  	/* We can't create the thread unless interrupts are enabled. */
> >  	if (needwake && READ_ONCE(rtp->kthread_ptr))
> > -		irq_work_queue(&rtpcp->rtp_irq_work);
> > +		rcuwait_wake_up(&rtp->cbs_wait);
> >  }
> > 
> >  // RCU callback function for rcu_barrier_tasks_generic().
> > --
> > 2.25.1
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-24  0:36       ` Zhang, Qiang1
@ 2023-02-24  2:25         ` Joel Fernandes
  2023-02-24  2:35           ` Joel Fernandes
  0 siblings, 1 reply; 15+ messages in thread
From: Joel Fernandes @ 2023-02-24  2:25 UTC (permalink / raw)
  To: Zhang, Qiang1
  Cc: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, rcu, linux-kernel

On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote:
> On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
> > > From: Zqiang <qiang1.zhang@intel.com>
> > > Sent: Thursday, February 23, 2023 2:30 PM
> > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> > > joel@joelfernandes.org
> > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> > > call_rcu_tasks_generic()
> > > 
> > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> > > using irq_work_queue() is because if the caller of
> > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> > > wake_up(), so the lockdep splats will happen. but now using
> > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
> > >
> > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
> > >
> > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> > >
> > >    raw_spin_lock_irqsave()
> > >    ...
> > >    raw_spin_unlock_irqrestore
> > 
> > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
> > sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
> > 
> > acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
> >
> >Is this really safe in the long run though? I seem to remember there are
> >weird locking dependencies if RCU is used from within the scheduler [1].
> >
> 
> 
> I have  been running rcutorture with rcutorture.type = tasks-tracing,
> so far no problems have been found.
> 
> 
> >I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
> >Generally, there has to be a 'win' or other justification for adding more
> >risk.
> >
> >thanks,
> >
> >- Joel
> >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html
> 
> 
> The problem in this link,  in an earlier RCU version, rcu_read_unlock_special()
> Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for
> call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay
> more attention to rcu_read_unlock_trace_special()

Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what
the straight-RCU rcu_read_unlock() issues were about).

What prevents the following scenario?

In the scheduler you have code like this:
                rq = task_rq_lock(p, &rf);
                trace_sched_wait_task(p);

Someone can hook up a BPF program to that tracepoint that then calls
rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of
this while holding the rq and pi scheduler locks.

That's A (rq lock) -> B (rtpcp lock).

In another path, your change adds the following dependency due to doing
wakeup under the rtpcp lock.

That's call_rcu_tasks_generic() -> B (rtpcp lock) -> A (rq lock).

Maybe there is some other state that prevents this case, but it still makes
me queasy specially since there is perhaps no benefit more than deleting a
few lines of code.

Either way, nice observation!

Btw, the way irq_work works is quite interesting, so I guess what it does is
it does a self-IPI and then runs the callback in hard IRQ context, without
holding any locks. Another interesting fact is, there is also a "lazy"
version of the IRQ work API (IRQ_WORK_INIT_LAZY) which seems currently to be
used by printk. This executes the work from the scheduler tick instead of an
IPI handler unless the tick is stopped.

thanks,

 - Joel


> 
> Thanks
> Zqiang
> 
> >
> > > irq_work_queue(), invoke rcuwait_wake_up() directly in
> > > call_rcu_tasks_generic().
> > > 
> > > Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> > > ---
> > >  kernel/rcu/tasks.h | 16 +---------------
> > >  1 file changed, 1 insertion(+), 15 deletions(-)
> > > 
> > > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index
> > > baf7ec178155..757b8c6da1ad 100644
> > > --- a/kernel/rcu/tasks.h
> > > +++ b/kernel/rcu/tasks.h
> > > @@ -39,7 +39,6 @@ struct rcu_tasks_percpu {
> > >  	unsigned long rtp_jiffies;
> > >  	unsigned long rtp_n_lock_retries;
> > >  	struct work_struct rtp_work;
> > > -	struct irq_work rtp_irq_work;
> > >  	struct rcu_head barrier_q_head;
> > >  	struct list_head rtp_blkd_tasks;
> > >  	int cpu;
> > > @@ -112,12 +111,9 @@ struct rcu_tasks {
> > >  	char *kname;
> > >  };
> > > 
> > > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp);
> > > -
> > >  #define DEFINE_RCU_TASKS(rt_name, gp, call, n)
> > > 			\
> > >  static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {
> > > 			\
> > >  	.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ##
> > > __percpu.cbs_pcpu_lock),		\
> > > -	.rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup),
> > > 			\
> > >  };
> > > 		\
> > >  static struct rcu_tasks rt_name =
> > > 		\
> > >  {
> > > 		\
> > > @@ -273,16 +269,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> > >  	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__,
> > > data_race(rtp->percpu_enqueue_shift), data_race(rtp-
> > > >percpu_enqueue_lim));
> > >  }
> > > 
> > > -// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
> > > -static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) -{
> > > -	struct rcu_tasks *rtp;
> > > -	struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct
> > > rcu_tasks_percpu, rtp_irq_work);
> > > -
> > > -	rtp = rtpcp->rtpp;
> > > -	rcuwait_wake_up(&rtp->cbs_wait);
> > > -}
> > > -
> > >  // Enqueue a callback for the specified flavor of Tasks RCU.
> > >  static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
> > >  				   struct rcu_tasks *rtp)
> > > @@ -334,7 +320,7 @@ static void call_rcu_tasks_generic(struct rcu_head
> > > *rhp, rcu_callback_t func,
> > >  	rcu_read_unlock();
> > >  	/* We can't create the thread unless interrupts are enabled. */
> > >  	if (needwake && READ_ONCE(rtp->kthread_ptr))
> > > -		irq_work_queue(&rtpcp->rtp_irq_work);
> > > +		rcuwait_wake_up(&rtp->cbs_wait);
> > >  }
> > > 
> > >  // RCU callback function for rcu_barrier_tasks_generic().
> > > --
> > > 2.25.1
> > 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-24  2:25         ` Joel Fernandes
@ 2023-02-24  2:35           ` Joel Fernandes
  2023-02-24  2:36             ` Joel Fernandes
  0 siblings, 1 reply; 15+ messages in thread
From: Joel Fernandes @ 2023-02-24  2:35 UTC (permalink / raw)
  To: Zhang, Qiang1
  Cc: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, rcu, linux-kernel

On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote:
>
> On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote:
> > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
> > > > From: Zqiang <qiang1.zhang@intel.com>
> > > > Sent: Thursday, February 23, 2023 2:30 PM
> > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> > > > joel@joelfernandes.org
> > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> > > > call_rcu_tasks_generic()
> > > >
> > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> > > > using irq_work_queue() is because if the caller of
> > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> > > > wake_up(), so the lockdep splats will happen. but now using
> > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
> > > >
> > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
> > > >
> > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> > > >
> > > >    raw_spin_lock_irqsave()
> > > >    ...
> > > >    raw_spin_unlock_irqrestore
> > >
> > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
> > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
> > >
> > > acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
> > >
> > >Is this really safe in the long run though? I seem to remember there are
> > >weird locking dependencies if RCU is used from within the scheduler [1].
> > >
> >
> >
> > I have  been running rcutorture with rcutorture.type = tasks-tracing,
> > so far no problems have been found.
> >
> >
> > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
> > >Generally, there has to be a 'win' or other justification for adding more
> > >risk.
> > >
> > >thanks,
> > >
> > >- Joel
> > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html
> >
> >
> > The problem in this link,  in an earlier RCU version, rcu_read_unlock_special()
> > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for
> > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay
> > more attention to rcu_read_unlock_trace_special()
>
> Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what
> the straight-RCU rcu_read_unlock() issues were about).
>
> What prevents the following scenario?
>
> In the scheduler you have code like this:
>                 rq = task_rq_lock(p, &rf);
>                 trace_sched_wait_task(p);
>
> Someone can hook up a BPF program to that tracepoint that then calls
> rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of
> this while holding the rq and pi scheduler locks.
>
> That's A (rq lock) -> B (rtpcp lock).
>
> In another path, your change adds the following dependency due to doing
> wakeup under the rtpcp lock.
>
> That's call_rcu_tasks_generic() -> B (rtpcp lock) -> A (rq lock).

I would like to correct this last statement. That cannot happen but
the concern I guess is, can the following happen due to the change?

call_rcu_tasks_generic() -> B (some BPF lock) -> A (rq lock)

So by doing a wakeup in this change, you have added the dependency for
callers of call_rcu_tasks_trace() . Now, the BPF program is called
from the trace point above and you have ABBA deadlock possibility.

 - Joel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-24  2:35           ` Joel Fernandes
@ 2023-02-24  2:36             ` Joel Fernandes
  2023-02-24  3:04               ` Zhang, Qiang1
  0 siblings, 1 reply; 15+ messages in thread
From: Joel Fernandes @ 2023-02-24  2:36 UTC (permalink / raw)
  To: Zhang, Qiang1
  Cc: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, rcu, linux-kernel

On Thu, Feb 23, 2023 at 9:35 PM Joel Fernandes <joel@joelfernandes.org> wrote:
>
> On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote:
> >
> > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote:
> > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
> > > > > From: Zqiang <qiang1.zhang@intel.com>
> > > > > Sent: Thursday, February 23, 2023 2:30 PM
> > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> > > > > joel@joelfernandes.org
> > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> > > > > call_rcu_tasks_generic()
> > > > >
> > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> > > > > using irq_work_queue() is because if the caller of
> > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> > > > > wake_up(), so the lockdep splats will happen. but now using
> > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
> > > > >
> > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
> > > > >
> > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> > > > >
> > > > >    raw_spin_lock_irqsave()
> > > > >    ...
> > > > >    raw_spin_unlock_irqrestore
> > > >
> > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
> > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
> > > >
> > > > acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
> > > >
> > > >Is this really safe in the long run though? I seem to remember there are
> > > >weird locking dependencies if RCU is used from within the scheduler [1].
> > > >
> > >
> > >
> > > I have  been running rcutorture with rcutorture.type = tasks-tracing,
> > > so far no problems have been found.
> > >
> > >
> > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
> > > >Generally, there has to be a 'win' or other justification for adding more
> > > >risk.
> > > >
> > > >thanks,
> > > >
> > > >- Joel
> > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html
> > >
> > >
> > > The problem in this link,  in an earlier RCU version, rcu_read_unlock_special()
> > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for
> > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay
> > > more attention to rcu_read_unlock_trace_special()
> >
> > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what
> > the straight-RCU rcu_read_unlock() issues were about).
> >
> > What prevents the following scenario?
> >
> > In the scheduler you have code like this:
> >                 rq = task_rq_lock(p, &rf);
> >                 trace_sched_wait_task(p);
> >
> > Someone can hook up a BPF program to that tracepoint that then calls
> > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of
> > this while holding the rq and pi scheduler locks.
> >
> > That's A (rq lock) -> B (rtpcp lock).
> >
> > In another path, your change adds the following dependency due to doing
> > wakeup under the rtpcp lock.
> >
> > That's call_rcu_tasks_generic() -> B (rtpcp lock) -> A (rq lock).
>
> I would like to correct this last statement. That cannot happen but
> the concern I guess is, can the following happen due to the change?
>
> call_rcu_tasks_generic() -> B (some BPF lock) -> A (rq lock)
>

Aaargh, one more correction:
B (some BPF lock) -> call_rcu_tasks_generic() ->  -> A (rq lock)

 ;-)

-Joel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-24  2:36             ` Joel Fernandes
@ 2023-02-24  3:04               ` Zhang, Qiang1
  2023-02-24  3:11                 ` Joel Fernandes
  0 siblings, 1 reply; 15+ messages in thread
From: Zhang, Qiang1 @ 2023-02-24  3:04 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, rcu, linux-kernel

On Thu, Feb 23, 2023 at 9:35 PM Joel Fernandes <joel@joelfernandes.org> wrote:
>
> On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote:
> >
> > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote:
> > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
> > > > > From: Zqiang <qiang1.zhang@intel.com>
> > > > > Sent: Thursday, February 23, 2023 2:30 PM
> > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> > > > > joel@joelfernandes.org
> > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> > > > > call_rcu_tasks_generic()
> > > > >
> > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> > > > > using irq_work_queue() is because if the caller of
> > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> > > > > wake_up(), so the lockdep splats will happen. but now using
> > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
> > > > >
> > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
> > > > >
> > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> > > > >
> > > > >    raw_spin_lock_irqsave()
> > > > >    ...
> > > > >    raw_spin_unlock_irqrestore
> > > >
> > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
> > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
> > > >
> > > > acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
> > > >
> > > >Is this really safe in the long run though? I seem to remember there are
> > > >weird locking dependencies if RCU is used from within the scheduler [1].
> > > >
> > >
> > >
> > > I have  been running rcutorture with rcutorture.type = tasks-tracing,
> > > so far no problems have been found.
> > >
> > >
> > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
> > > >Generally, there has to be a 'win' or other justification for adding more
> > > >risk.
> > > >
> > > >thanks,
> > > >
> > > >- Joel
> > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html
> > >
> > >
> > > The problem in this link,  in an earlier RCU version, rcu_read_unlock_special()
> > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for
> > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay
> > > more attention to rcu_read_unlock_trace_special()
> >
> > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what
> > the straight-RCU rcu_read_unlock() issues were about).
> >
> > What prevents the following scenario?
> >
> > In the scheduler you have code like this:
> >                 rq = task_rq_lock(p, &rf);
> >                 trace_sched_wait_task(p);
> >
> > Someone can hook up a BPF program to that tracepoint that then calls
> > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of
> > this while holding the rq and pi scheduler locks.
> >
> > That's A (rq lock) -> B (rtpcp lock).

In rcu_read_unlock_trace_special(), the premise of acquiring the rtpcp lock is that
before that, we have task switch in the rcu_read_lock_trace/unlock_trace critical section.
but after we already hold the rq lock, no task switch is generated in the
rcu_read_lock_trace/unlock_trace  critical section.

Please correct me if my understanding is wrong.

Thanks
Zqiang

> >
> > In another path, your change adds the following dependency due to doing
> > wakeup under the rtpcp lock.
> >
> > That's call_rcu_tasks_generic() -> B (rtpcp lock) -> A (rq lock).
>
> I would like to correct this last statement. That cannot happen but
> the concern I guess is, can the following happen due to the change?
>
> call_rcu_tasks_generic() -> B (some BPF lock) -> A (rq lock)
>
>
>Aaargh, one more correction:
>B (some BPF lock) -> call_rcu_tasks_generic() ->  -> A (rq lock)
>
> ;-)
>
>-Joel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-24  3:04               ` Zhang, Qiang1
@ 2023-02-24  3:11                 ` Joel Fernandes
  2023-02-24  3:22                   ` Zhang, Qiang1
  0 siblings, 1 reply; 15+ messages in thread
From: Joel Fernandes @ 2023-02-24  3:11 UTC (permalink / raw)
  To: Zhang, Qiang1
  Cc: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, rcu, linux-kernel

On Thu, Feb 23, 2023 at 10:05 PM Zhang, Qiang1 <qiang1.zhang@intel.com> wrote:
>
> On Thu, Feb 23, 2023 at 9:35 PM Joel Fernandes <joel@joelfernandes.org> wrote:
> >
> > On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote:
> > >
> > > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote:
> > > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
> > > > > > From: Zqiang <qiang1.zhang@intel.com>
> > > > > > Sent: Thursday, February 23, 2023 2:30 PM
> > > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> > > > > > joel@joelfernandes.org
> > > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> > > > > > call_rcu_tasks_generic()
> > > > > >
> > > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> > > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> > > > > > using irq_work_queue() is because if the caller of
> > > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> > > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> > > > > > wake_up(), so the lockdep splats will happen. but now using
> > > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> > > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
> > > > > >
> > > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
> > > > > >
> > > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> > > > > >
> > > > > >    raw_spin_lock_irqsave()
> > > > > >    ...
> > > > > >    raw_spin_unlock_irqrestore
> > > > >
> > > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
> > > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
> > > > >
> > > > > acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
> > > > >
> > > > >Is this really safe in the long run though? I seem to remember there are
> > > > >weird locking dependencies if RCU is used from within the scheduler [1].
> > > > >
> > > >
> > > >
> > > > I have  been running rcutorture with rcutorture.type = tasks-tracing,
> > > > so far no problems have been found.
> > > >
> > > >
> > > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
> > > > >Generally, there has to be a 'win' or other justification for adding more
> > > > >risk.
> > > > >
> > > > >thanks,
> > > > >
> > > > >- Joel
> > > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html
> > > >
> > > >
> > > > The problem in this link,  in an earlier RCU version, rcu_read_unlock_special()
> > > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for
> > > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay
> > > > more attention to rcu_read_unlock_trace_special()
> > >
> > > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what
> > > the straight-RCU rcu_read_unlock() issues were about).
> > >
> > > What prevents the following scenario?
> > >
> > > In the scheduler you have code like this:
> > >                 rq = task_rq_lock(p, &rf);
> > >                 trace_sched_wait_task(p);
> > >
> > > Someone can hook up a BPF program to that tracepoint that then calls
> > > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of
> > > this while holding the rq and pi scheduler locks.
> > >
> > > That's A (rq lock) -> B (rtpcp lock).
>
> In rcu_read_unlock_trace_special(), the premise of acquiring the rtpcp lock is that
> before that, we have task switch in the rcu_read_lock_trace/unlock_trace critical section.
> but after we already hold the rq lock, no task switch is generated in the
> rcu_read_lock_trace/unlock_trace  critical section.
>
> Please correct me if my understanding is wrong.

Yes, but in the next reply I corrected myself and I am still concerned
about ABBA. There is obviously *some lock* that is held by the callers
of call_rcu_tasks*(). So there is a dependency that gets created
between _that_ lock and the rq lock, if you do a wakeup here.  And I
am not sure whether that lock is also acquired when the BPF program
runs. If it is, then the BPF programs may hang. It is probably worth
checking with the BPF guys.

More importantly, do you see a benefit with this change in terms of
anything more than deleting a few lines of code? Paul typically favors
robustness and guard rails (as do I), unless there is significant
benefit in performance, power or both.

Thanks,

 - Joel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-24  3:11                 ` Joel Fernandes
@ 2023-02-24  3:22                   ` Zhang, Qiang1
  2023-02-24  4:30                     ` Joel Fernandes
  0 siblings, 1 reply; 15+ messages in thread
From: Zhang, Qiang1 @ 2023-02-24  3:22 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, rcu, linux-kernel

On Thu, Feb 23, 2023 at 10:05 PM Zhang, Qiang1 <qiang1.zhang@intel.com> wrote:
>
> On Thu, Feb 23, 2023 at 9:35 PM Joel Fernandes <joel@joelfernandes.org> wrote:
> >
> > On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote:
> > >
> > > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote:
> > > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
> > > > > > From: Zqiang <qiang1.zhang@intel.com>
> > > > > > Sent: Thursday, February 23, 2023 2:30 PM
> > > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> > > > > > joel@joelfernandes.org
> > > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> > > > > > call_rcu_tasks_generic()
> > > > > >
> > > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> > > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> > > > > > using irq_work_queue() is because if the caller of
> > > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> > > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> > > > > > wake_up(), so the lockdep splats will happen. but now using
> > > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> > > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
> > > > > >
> > > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
> > > > > >
> > > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> > > > > >
> > > > > >    raw_spin_lock_irqsave()
> > > > > >    ...
> > > > > >    raw_spin_unlock_irqrestore
> > > > >
> > > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
> > > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
> > > > >
> > > > > acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
> > > > >
> > > > >Is this really safe in the long run though? I seem to remember there are
> > > > >weird locking dependencies if RCU is used from within the scheduler [1].
> > > > >
> > > >
> > > >
> > > > I have  been running rcutorture with rcutorture.type = tasks-tracing,
> > > > so far no problems have been found.
> > > >
> > > >
> > > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
> > > > >Generally, there has to be a 'win' or other justification for adding more
> > > > >risk.
> > > > >
> > > > >thanks,
> > > > >
> > > > >- Joel
> > > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html
> > > >
> > > >
> > > > The problem in this link,  in an earlier RCU version, rcu_read_unlock_special()
> > > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for
> > > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay
> > > > more attention to rcu_read_unlock_trace_special()
> > >
> > > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what
> > > the straight-RCU rcu_read_unlock() issues were about).
> > >
> > > What prevents the following scenario?
> > >
> > > In the scheduler you have code like this:
> > >                 rq = task_rq_lock(p, &rf);
> > >                 trace_sched_wait_task(p);
> > >
> > > Someone can hook up a BPF program to that tracepoint that then calls
> > > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of
> > > this while holding the rq and pi scheduler locks.
> > >
> > > That's A (rq lock) -> B (rtpcp lock).
>
> In rcu_read_unlock_trace_special(), the premise of acquiring the rtpcp lock is that
> before that, we have task switch in the rcu_read_lock_trace/unlock_trace critical section.
> but after we already hold the rq lock, no task switch is generated in the
> rcu_read_lock_trace/unlock_trace  critical section.
>
> Please correct me if my understanding is wrong.
>
>Yes, but in the next reply I corrected myself and I am still concerned
>about ABBA. There is obviously *some lock* that is held by the callers
>of call_rcu_tasks*(). So there is a dependency that gets created
>between _that_ lock and the rq lock, if you do a wakeup here.  And I
>am not sure whether that lock is also acquired when the BPF program
>runs. If it is, then the BPF programs may hang. It is probably worth
>checking with the BPF guys.
>
>More importantly, do you see a benefit with this change in terms of
>anything more than deleting a few lines of code? Paul typically favors
>robustness and guard rails (as do I), unless there is significant
>benefit in performance, power or both.

because I found that the purpose of using irq_work_queue() early is to solve the problem of lockep splat,
my modified junior is also to avoid unnecessary IPI.  but like you said, indeed we are not completely sure
whether there is a potential lock dependency problem, so I agree your opinion.

Thanks
Zqiang

>
>Thanks,
>
> - Joel

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic()
  2023-02-24  3:22                   ` Zhang, Qiang1
@ 2023-02-24  4:30                     ` Joel Fernandes
  0 siblings, 0 replies; 15+ messages in thread
From: Joel Fernandes @ 2023-02-24  4:30 UTC (permalink / raw)
  To: Zhang, Qiang1
  Cc: Zhuo, Qiuxu, paulmck, frederic, quic_neeraju, rcu, linux-kernel

On Thu, Feb 23, 2023 at 10:22 PM Zhang, Qiang1 <qiang1.zhang@intel.com> wrote:
>
> On Thu, Feb 23, 2023 at 10:05 PM Zhang, Qiang1 <qiang1.zhang@intel.com> wrote:
> >
> > On Thu, Feb 23, 2023 at 9:35 PM Joel Fernandes <joel@joelfernandes.org> wrote:
> > >
> > > On Thu, Feb 23, 2023 at 9:25 PM Joel Fernandes <joel@joelfernandes.org> wrote:
> > > >
> > > > On Fri, Feb 24, 2023 at 12:36:05AM +0000, Zhang, Qiang1 wrote:
> > > > > On Thu, Feb 23, 2023 at 08:43:05AM +0000, Zhang, Qiang1 wrote:
> > > > > > > From: Zqiang <qiang1.zhang@intel.com>
> > > > > > > Sent: Thursday, February 23, 2023 2:30 PM
> > > > > > > To: paulmck@kernel.org; frederic@kernel.org; quic_neeraju@quicinc.com;
> > > > > > > joel@joelfernandes.org
> > > > > > > Cc: rcu@vger.kernel.org; linux-kernel@vger.kernel.org
> > > > > > > Subject: [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in
> > > > > > > call_rcu_tasks_generic()
> > > > > > >
> > > > > > > According to commit '3063b33a347c ("Avoid raw-spinlocked wakeups from
> > > > > > > call_rcu_tasks_generic()")', the grace-period kthread is delayed to wakeup
> > > > > > > using irq_work_queue() is because if the caller of
> > > > > > > call_rcu_tasks_generic() holds a raw spinlock, when the kernel is built with
> > > > > > > CONFIG_PROVE_RAW_LOCK_NESTING=y, due to a spinlock will be hold in
> > > > > > > wake_up(), so the lockdep splats will happen. but now using
> > > > > > > rcuwait_wake_up() to wakeup grace-period kthread instead of wake_up(), in
> > > > > > > rcuwait_wake_up() no spinlock will be acquired, so this commit remove using
> > > > > > >
> > > > > > >There are still spinlock-acquisition and spinlock-release invocations within the call path from rcuwait_wake_up().
> > > > > > >
> > > > > > >rcuwait_wake_up() -> wake_up_process() -> try_to_wake_up(), then:
> > > > > > >
> > > > > > >    raw_spin_lock_irqsave()
> > > > > > >    ...
> > > > > > >    raw_spin_unlock_irqrestore
> > > > > >
> > > > > > Yes, but this is raw_spinlock acquisition and release(note: spinlock will convert to
> > > > > > sleepable lock in Preempt-RT kernel, but raw spinlock is not change).
> > > > > >
> > > > > > acquire raw_spinlock -> acquire spinlock  will trigger lockdep warning.
> > > > > >
> > > > > >Is this really safe in the long run though? I seem to remember there are
> > > > > >weird locking dependencies if RCU is used from within the scheduler [1].
> > > > > >
> > > > >
> > > > >
> > > > > I have  been running rcutorture with rcutorture.type = tasks-tracing,
> > > > > so far no problems have been found.
> > > > >
> > > > >
> > > > > >I prefer to keep it as irq_work_queue() unless you are seeing some benefit.
> > > > > >Generally, there has to be a 'win' or other justification for adding more
> > > > > >risk.
> > > > > >
> > > > > >thanks,
> > > > > >
> > > > > >- Joel
> > > > > >[1] http://www.joelfernandes.org/rcu/scheduler/locking/2019/09/02/rcu-schedlocks.html
> > > > >
> > > > >
> > > > > The problem in this link,  in an earlier RCU version, rcu_read_unlock_special()
> > > > > Invoke wakeup and enter scheduler can lead to deadlock, but my modification is for
> > > > > call_rcu_tasks_generic(), even if there is a lock dependency problem, we should pay
> > > > > more attention to rcu_read_unlock_trace_special()
> > > >
> > > > Consider ABBA deadlocks as well, not just self-deadlocks (which IIRC is what
> > > > the straight-RCU rcu_read_unlock() issues were about).
> > > >
> > > > What prevents the following scenario?
> > > >
> > > > In the scheduler you have code like this:
> > > >                 rq = task_rq_lock(p, &rf);
> > > >                 trace_sched_wait_task(p);
> > > >
> > > > Someone can hook up a BPF program to that tracepoint that then calls
> > > > rcu_read_unlock_trace() -> rcu_read_unlock_trace_special(). All of
> > > > this while holding the rq and pi scheduler locks.
> > > >
> > > > That's A (rq lock) -> B (rtpcp lock).
> >
> > In rcu_read_unlock_trace_special(), the premise of acquiring the rtpcp lock is that
> > before that, we have task switch in the rcu_read_lock_trace/unlock_trace critical section.
> > but after we already hold the rq lock, no task switch is generated in the
> > rcu_read_lock_trace/unlock_trace  critical section.
> >
> > Please correct me if my understanding is wrong.
> >
> >Yes, but in the next reply I corrected myself and I am still concerned
> >about ABBA. There is obviously *some lock* that is held by the callers
> >of call_rcu_tasks*(). So there is a dependency that gets created
> >between _that_ lock and the rq lock, if you do a wakeup here.  And I
> >am not sure whether that lock is also acquired when the BPF program
> >runs. If it is, then the BPF programs may hang. It is probably worth
> >checking with the BPF guys.
> >
> >More importantly, do you see a benefit with this change in terms of
> >anything more than deleting a few lines of code? Paul typically favors
> >robustness and guard rails (as do I), unless there is significant
> >benefit in performance, power or both.
>
> because I found that the purpose of using irq_work_queue() early is to solve the problem of lockep splat,
> my modified junior is also to avoid unnecessary IPI.

irq_work_queue() which this code uses does a self-IPI, unlike
irq_work_queue_on() which AFAIK is significantly cheaper on ARM64 than
cross-CPU IPIs. Not sure about x86 though.

Another way to avoid IPI sometimes is making the IRQ work lazy.  It
will then do self-IPI only if the tick is turned off. But I'm not sure
if that is any better than leaving the code as-is.

> but like you said, indeed we are not completely sure
> whether there is a potential lock dependency problem, so I agree your opinion.

Ok and thanks for digging into it. This was fun!

 - Joel


>
> Thanks
> Zqiang
>
> >
> >Thanks,
> >
> > - Joel

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-02-24  4:31 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-23  6:30 [PATCH] rcu-tasks: Directly invoke rcuwait_wake_up() in call_rcu_tasks_generic() Zqiang
2023-02-23  8:36 ` Zhuo, Qiuxu
2023-02-23  8:43   ` Zhang, Qiang1
2023-02-23 14:36     ` Zhuo, Qiuxu
2023-02-23 16:10     ` Joel Fernandes
2023-02-23 16:57       ` Joel Fernandes
2023-02-23 17:13         ` Paul E. McKenney
2023-02-24  0:36       ` Zhang, Qiang1
2023-02-24  2:25         ` Joel Fernandes
2023-02-24  2:35           ` Joel Fernandes
2023-02-24  2:36             ` Joel Fernandes
2023-02-24  3:04               ` Zhang, Qiang1
2023-02-24  3:11                 ` Joel Fernandes
2023-02-24  3:22                   ` Zhang, Qiang1
2023-02-24  4:30                     ` Joel Fernandes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).