linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] srcu: Remove srcu_queue_delayed_work_on()
@ 2018-12-11 11:12 Sebastian Andrzej Siewior
  2018-12-11 17:40 ` Paul E. McKenney
  2018-12-12  1:40 ` Joel Fernandes
  0 siblings, 2 replies; 9+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-12-11 11:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: Lai Jiangshan, Paul E. McKenney, Josh Triplett, Steven Rostedt,
	Mathieu Desnoyers, Joel Fernandes, tglx,
	Sebastian Andrzej Siewior

srcu_queue_delayed_work_on() disables preemption (and therefore CPU
hotplug in RCU's case) and then checks based on its own accounting if a
CPU is online. If the CPU is online it uses queue_delayed_work_on()
otherwise it fallbacks to queue_delayed_work().
The problem here is that queue_work() on -RT does not work with disabled
preemption.

queue_work_on() works also on an offlined CPU. queue_delayed_work_on()
has the problem that it is possible to program a timer on an offlined
CPU. This timer will fire once the CPU is online again. But until then,
the timer remains programmed and nothing will happen.

Add a local timer which will fire (as requested per delay) on the local
CPU and then enqueue the work on the specific CPU.

RCUtorture testing with SRCU-P for 24h showed no problems.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/srcutree.h |  3 ++-
 kernel/rcu/srcutree.c    | 57 ++++++++++++++++++----------------------
 kernel/rcu/tree.c        |  4 ---
 kernel/rcu/tree.h        |  8 ------
 4 files changed, 27 insertions(+), 45 deletions(-)

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 6f292bd3e7db7..0faa978c98807 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -45,7 +45,8 @@ struct srcu_data {
 	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
 	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
 	bool srcu_cblist_invoking;		/* Invoking these CBs? */
-	struct delayed_work work;		/* Context for CB invoking. */
+	struct timer_list delay_work;		/* Delay for CB invoking */
+	struct work_struct work;		/* Context for CB invoking. */
 	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
 	struct srcu_node *mynode;		/* Leaf srcu_node. */
 	unsigned long grpmask;			/* Mask for leaf srcu_node */
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 3600d88d8956b..7f041f2435df9 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -58,6 +58,7 @@ static bool __read_mostly srcu_init_done;
 static void srcu_invoke_callbacks(struct work_struct *work);
 static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
 static void process_srcu(struct work_struct *work);
+static void srcu_delay_timer(struct timer_list *t);
 
 /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
 #define spin_lock_rcu_node(p)					\
@@ -156,7 +157,8 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
 			snp->grphi = cpu;
 		}
 		sdp->cpu = cpu;
-		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
+		INIT_WORK(&sdp->work, srcu_invoke_callbacks);
+		timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
 		sdp->ssp = ssp;
 		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
 		if (is_static)
@@ -386,13 +388,19 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
 	} else {
 		flush_delayed_work(&ssp->work);
 	}
-	for_each_possible_cpu(cpu)
+	for_each_possible_cpu(cpu) {
+		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
+
 		if (quiesced) {
-			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work)))
+			if (WARN_ON(timer_pending(&sdp->delay_work)))
+				return; /* Just leak it! */
+			if (WARN_ON(work_pending(&sdp->work)))
 				return; /* Just leak it! */
 		} else {
-			flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work);
+			del_timer_sync(&sdp->delay_work);
+			flush_work(&sdp->work);
 		}
+	}
 	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
 	    WARN_ON(srcu_readers_active(ssp))) {
 		pr_info("%s: Active srcu_struct %p state: %d\n",
@@ -463,39 +471,23 @@ static void srcu_gp_start(struct srcu_struct *ssp)
 	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
 }
 
-/*
- * Track online CPUs to guide callback workqueue placement.
- */
-DEFINE_PER_CPU(bool, srcu_online);
 
-void srcu_online_cpu(unsigned int cpu)
+static void srcu_delay_timer(struct timer_list *t)
 {
-	WRITE_ONCE(per_cpu(srcu_online, cpu), true);
+	struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work);
+
+	queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
 }
 
-void srcu_offline_cpu(unsigned int cpu)
-{
-	WRITE_ONCE(per_cpu(srcu_online, cpu), false);
-}
-
-/*
- * Place the workqueue handler on the specified CPU if online, otherwise
- * just run it whereever.  This is useful for placing workqueue handlers
- * that are to invoke the specified CPU's callbacks.
- */
-static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
-				       struct delayed_work *dwork,
+static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
 				       unsigned long delay)
 {
-	bool ret;
+	if (!delay) {
+		queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
+		return;
+	}
 
-	preempt_disable();
-	if (READ_ONCE(per_cpu(srcu_online, cpu)))
-		ret = queue_delayed_work_on(cpu, wq, dwork, delay);
-	else
-		ret = queue_delayed_work(wq, dwork, delay);
-	preempt_enable();
-	return ret;
+	timer_reduce(&sdp->delay_work, jiffies + delay);
 }
 
 /*
@@ -504,7 +496,7 @@ static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
  */
 static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
 {
-	srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay);
+	srcu_queue_delayed_work_on(sdp, delay);
 }
 
 /*
@@ -1186,7 +1178,8 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	struct srcu_data *sdp;
 	struct srcu_struct *ssp;
 
-	sdp = container_of(work, struct srcu_data, work.work);
+	sdp = container_of(work, struct srcu_data, work);
+
 	ssp = sdp->ssp;
 	rcu_cblist_init(&ready_cbs);
 	spin_lock_irq_rcu_node(sdp);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index be67a1bcba1da..86538c72cae90 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3361,8 +3361,6 @@ int rcutree_online_cpu(unsigned int cpu)
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	rnp->ffmask |= rdp->grpmask;
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-	if (IS_ENABLED(CONFIG_TREE_SRCU))
-		srcu_online_cpu(cpu);
 	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
 		return 0; /* Too early in boot for scheduler work. */
 	sync_sched_exp_online_cleanup(cpu);
@@ -3387,8 +3385,6 @@ int rcutree_offline_cpu(unsigned int cpu)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 	rcutree_affinity_setting(cpu, cpu);
-	if (IS_ENABLED(CONFIG_TREE_SRCU))
-		srcu_offline_cpu(cpu);
 	return 0;
 }
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 0ab060c8e9a71..e27fb27e087df 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -456,11 +456,3 @@ static void rcu_bind_gp_kthread(void);
 static bool rcu_nohz_full_cpu(void);
 static void rcu_dynticks_task_enter(void);
 static void rcu_dynticks_task_exit(void);
-
-#ifdef CONFIG_SRCU
-void srcu_online_cpu(unsigned int cpu);
-void srcu_offline_cpu(unsigned int cpu);
-#else /* #ifdef CONFIG_SRCU */
-void srcu_online_cpu(unsigned int cpu) { }
-void srcu_offline_cpu(unsigned int cpu) { }
-#endif /* #else #ifdef CONFIG_SRCU */
-- 
2.20.0.rc2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] srcu: Remove srcu_queue_delayed_work_on()
  2018-12-11 11:12 [PATCH] srcu: Remove srcu_queue_delayed_work_on() Sebastian Andrzej Siewior
@ 2018-12-11 17:40 ` Paul E. McKenney
  2018-12-12  1:40 ` Joel Fernandes
  1 sibling, 0 replies; 9+ messages in thread
From: Paul E. McKenney @ 2018-12-11 17:40 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Lai Jiangshan, Josh Triplett, Steven Rostedt,
	Mathieu Desnoyers, Joel Fernandes, tglx

On Tue, Dec 11, 2018 at 12:12:38PM +0100, Sebastian Andrzej Siewior wrote:
> srcu_queue_delayed_work_on() disables preemption (and therefore CPU
> hotplug in RCU's case) and then checks based on its own accounting if a
> CPU is online. If the CPU is online it uses queue_delayed_work_on()
> otherwise it fallbacks to queue_delayed_work().
> The problem here is that queue_work() on -RT does not work with disabled
> preemption.
> 
> queue_work_on() works also on an offlined CPU. queue_delayed_work_on()
> has the problem that it is possible to program a timer on an offlined
> CPU. This timer will fire once the CPU is online again. But until then,
> the timer remains programmed and nothing will happen.
> 
> Add a local timer which will fire (as requested per delay) on the local
> CPU and then enqueue the work on the specific CPU.
> 
> RCUtorture testing with SRCU-P for 24h showed no problems.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

Queued and pushed, thank you!

						Thanx, Paul

> ---
>  include/linux/srcutree.h |  3 ++-
>  kernel/rcu/srcutree.c    | 57 ++++++++++++++++++----------------------
>  kernel/rcu/tree.c        |  4 ---
>  kernel/rcu/tree.h        |  8 ------
>  4 files changed, 27 insertions(+), 45 deletions(-)
> 
> diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> index 6f292bd3e7db7..0faa978c98807 100644
> --- a/include/linux/srcutree.h
> +++ b/include/linux/srcutree.h
> @@ -45,7 +45,8 @@ struct srcu_data {
>  	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
>  	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
>  	bool srcu_cblist_invoking;		/* Invoking these CBs? */
> -	struct delayed_work work;		/* Context for CB invoking. */
> +	struct timer_list delay_work;		/* Delay for CB invoking */
> +	struct work_struct work;		/* Context for CB invoking. */
>  	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
>  	struct srcu_node *mynode;		/* Leaf srcu_node. */
>  	unsigned long grpmask;			/* Mask for leaf srcu_node */
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 3600d88d8956b..7f041f2435df9 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -58,6 +58,7 @@ static bool __read_mostly srcu_init_done;
>  static void srcu_invoke_callbacks(struct work_struct *work);
>  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
>  static void process_srcu(struct work_struct *work);
> +static void srcu_delay_timer(struct timer_list *t);
>  
>  /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
>  #define spin_lock_rcu_node(p)					\
> @@ -156,7 +157,8 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
>  			snp->grphi = cpu;
>  		}
>  		sdp->cpu = cpu;
> -		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
> +		INIT_WORK(&sdp->work, srcu_invoke_callbacks);
> +		timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
>  		sdp->ssp = ssp;
>  		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
>  		if (is_static)
> @@ -386,13 +388,19 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
>  	} else {
>  		flush_delayed_work(&ssp->work);
>  	}
> -	for_each_possible_cpu(cpu)
> +	for_each_possible_cpu(cpu) {
> +		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
> +
>  		if (quiesced) {
> -			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work)))
> +			if (WARN_ON(timer_pending(&sdp->delay_work)))
> +				return; /* Just leak it! */
> +			if (WARN_ON(work_pending(&sdp->work)))
>  				return; /* Just leak it! */
>  		} else {
> -			flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work);
> +			del_timer_sync(&sdp->delay_work);
> +			flush_work(&sdp->work);
>  		}
> +	}
>  	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
>  	    WARN_ON(srcu_readers_active(ssp))) {
>  		pr_info("%s: Active srcu_struct %p state: %d\n",
> @@ -463,39 +471,23 @@ static void srcu_gp_start(struct srcu_struct *ssp)
>  	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
>  }
>  
> -/*
> - * Track online CPUs to guide callback workqueue placement.
> - */
> -DEFINE_PER_CPU(bool, srcu_online);
>  
> -void srcu_online_cpu(unsigned int cpu)
> +static void srcu_delay_timer(struct timer_list *t)
>  {
> -	WRITE_ONCE(per_cpu(srcu_online, cpu), true);
> +	struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work);
> +
> +	queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
>  }
>  
> -void srcu_offline_cpu(unsigned int cpu)
> -{
> -	WRITE_ONCE(per_cpu(srcu_online, cpu), false);
> -}
> -
> -/*
> - * Place the workqueue handler on the specified CPU if online, otherwise
> - * just run it whereever.  This is useful for placing workqueue handlers
> - * that are to invoke the specified CPU's callbacks.
> - */
> -static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
> -				       struct delayed_work *dwork,
> +static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
>  				       unsigned long delay)
>  {
> -	bool ret;
> +	if (!delay) {
> +		queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
> +		return;
> +	}
>  
> -	preempt_disable();
> -	if (READ_ONCE(per_cpu(srcu_online, cpu)))
> -		ret = queue_delayed_work_on(cpu, wq, dwork, delay);
> -	else
> -		ret = queue_delayed_work(wq, dwork, delay);
> -	preempt_enable();
> -	return ret;
> +	timer_reduce(&sdp->delay_work, jiffies + delay);
>  }
>  
>  /*
> @@ -504,7 +496,7 @@ static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
>   */
>  static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
>  {
> -	srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay);
> +	srcu_queue_delayed_work_on(sdp, delay);
>  }
>  
>  /*
> @@ -1186,7 +1178,8 @@ static void srcu_invoke_callbacks(struct work_struct *work)
>  	struct srcu_data *sdp;
>  	struct srcu_struct *ssp;
>  
> -	sdp = container_of(work, struct srcu_data, work.work);
> +	sdp = container_of(work, struct srcu_data, work);
> +
>  	ssp = sdp->ssp;
>  	rcu_cblist_init(&ready_cbs);
>  	spin_lock_irq_rcu_node(sdp);
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index be67a1bcba1da..86538c72cae90 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -3361,8 +3361,6 @@ int rcutree_online_cpu(unsigned int cpu)
>  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
>  	rnp->ffmask |= rdp->grpmask;
>  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> -	if (IS_ENABLED(CONFIG_TREE_SRCU))
> -		srcu_online_cpu(cpu);
>  	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
>  		return 0; /* Too early in boot for scheduler work. */
>  	sync_sched_exp_online_cleanup(cpu);
> @@ -3387,8 +3385,6 @@ int rcutree_offline_cpu(unsigned int cpu)
>  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  
>  	rcutree_affinity_setting(cpu, cpu);
> -	if (IS_ENABLED(CONFIG_TREE_SRCU))
> -		srcu_offline_cpu(cpu);
>  	return 0;
>  }
>  
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index 0ab060c8e9a71..e27fb27e087df 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -456,11 +456,3 @@ static void rcu_bind_gp_kthread(void);
>  static bool rcu_nohz_full_cpu(void);
>  static void rcu_dynticks_task_enter(void);
>  static void rcu_dynticks_task_exit(void);
> -
> -#ifdef CONFIG_SRCU
> -void srcu_online_cpu(unsigned int cpu);
> -void srcu_offline_cpu(unsigned int cpu);
> -#else /* #ifdef CONFIG_SRCU */
> -void srcu_online_cpu(unsigned int cpu) { }
> -void srcu_offline_cpu(unsigned int cpu) { }
> -#endif /* #else #ifdef CONFIG_SRCU */
> -- 
> 2.20.0.rc2
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] srcu: Remove srcu_queue_delayed_work_on()
  2018-12-11 11:12 [PATCH] srcu: Remove srcu_queue_delayed_work_on() Sebastian Andrzej Siewior
  2018-12-11 17:40 ` Paul E. McKenney
@ 2018-12-12  1:40 ` Joel Fernandes
  2018-12-12 14:05   ` Boqun Feng
  1 sibling, 1 reply; 9+ messages in thread
From: Joel Fernandes @ 2018-12-12  1:40 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: linux-kernel, Lai Jiangshan, Paul E. McKenney, Josh Triplett,
	Steven Rostedt, Mathieu Desnoyers, tglx, boqun.feng

On Tue, Dec 11, 2018 at 12:12:38PM +0100, Sebastian Andrzej Siewior wrote:
> srcu_queue_delayed_work_on() disables preemption (and therefore CPU
> hotplug in RCU's case) and then checks based on its own accounting if a
> CPU is online. If the CPU is online it uses queue_delayed_work_on()
> otherwise it fallbacks to queue_delayed_work().
> The problem here is that queue_work() on -RT does not work with disabled
> preemption.
> 
> queue_work_on() works also on an offlined CPU. queue_delayed_work_on()
> has the problem that it is possible to program a timer on an offlined
> CPU. This timer will fire once the CPU is online again. But until then,
> the timer remains programmed and nothing will happen.
> Add a local timer which will fire (as requested per delay) on the local
> CPU and then enqueue the work on the specific CPU.
> 
> RCUtorture testing with SRCU-P for 24h showed no problems.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  include/linux/srcutree.h |  3 ++-
>  kernel/rcu/srcutree.c    | 57 ++++++++++++++++++----------------------
>  kernel/rcu/tree.c        |  4 ---
>  kernel/rcu/tree.h        |  8 ------
>  4 files changed, 27 insertions(+), 45 deletions(-)
> 
> diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> index 6f292bd3e7db7..0faa978c98807 100644
> --- a/include/linux/srcutree.h
> +++ b/include/linux/srcutree.h
> @@ -45,7 +45,8 @@ struct srcu_data {
>  	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
>  	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
>  	bool srcu_cblist_invoking;		/* Invoking these CBs? */
> -	struct delayed_work work;		/* Context for CB invoking. */
> +	struct timer_list delay_work;		/* Delay for CB invoking */
> +	struct work_struct work;		/* Context for CB invoking. */
>  	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
>  	struct srcu_node *mynode;		/* Leaf srcu_node. */
>  	unsigned long grpmask;			/* Mask for leaf srcu_node */
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 3600d88d8956b..7f041f2435df9 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -58,6 +58,7 @@ static bool __read_mostly srcu_init_done;
>  static void srcu_invoke_callbacks(struct work_struct *work);
>  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
>  static void process_srcu(struct work_struct *work);
> +static void srcu_delay_timer(struct timer_list *t);
>  
>  /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
>  #define spin_lock_rcu_node(p)					\
> @@ -156,7 +157,8 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
>  			snp->grphi = cpu;
>  		}
>  		sdp->cpu = cpu;
> -		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
> +		INIT_WORK(&sdp->work, srcu_invoke_callbacks);
> +		timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
>  		sdp->ssp = ssp;
>  		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
>  		if (is_static)
> @@ -386,13 +388,19 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
>  	} else {
>  		flush_delayed_work(&ssp->work);
>  	}
> -	for_each_possible_cpu(cpu)
> +	for_each_possible_cpu(cpu) {
> +		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
> +
>  		if (quiesced) {
> -			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work)))
> +			if (WARN_ON(timer_pending(&sdp->delay_work)))
> +				return; /* Just leak it! */
> +			if (WARN_ON(work_pending(&sdp->work)))
>  				return; /* Just leak it! */
>  		} else {
> -			flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work);
> +			del_timer_sync(&sdp->delay_work);
> +			flush_work(&sdp->work);
>  		}
> +	}
>  	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
>  	    WARN_ON(srcu_readers_active(ssp))) {
>  		pr_info("%s: Active srcu_struct %p state: %d\n",
> @@ -463,39 +471,23 @@ static void srcu_gp_start(struct srcu_struct *ssp)
>  	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
>  }
>  
> -/*
> - * Track online CPUs to guide callback workqueue placement.
> - */
> -DEFINE_PER_CPU(bool, srcu_online);
>  
> -void srcu_online_cpu(unsigned int cpu)
> +static void srcu_delay_timer(struct timer_list *t)
>  {
> -	WRITE_ONCE(per_cpu(srcu_online, cpu), true);
> +	struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work);
> +
> +	queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
>  }
>  
> -void srcu_offline_cpu(unsigned int cpu)
> -{
> -	WRITE_ONCE(per_cpu(srcu_online, cpu), false);
> -}
> -
> -/*
> - * Place the workqueue handler on the specified CPU if online, otherwise
> - * just run it whereever.  This is useful for placing workqueue handlers
> - * that are to invoke the specified CPU's callbacks.
> - */
> -static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
> -				       struct delayed_work *dwork,
> +static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
>  				       unsigned long delay)
>  {
> -	bool ret;
> +	if (!delay) {
> +		queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
> +		return;
> +	}
>  
> -	preempt_disable();
> -	if (READ_ONCE(per_cpu(srcu_online, cpu)))
> -		ret = queue_delayed_work_on(cpu, wq, dwork, delay);
> -	else
> -		ret = queue_delayed_work(wq, dwork, delay);
> -	preempt_enable();

The deleted code looks like 'cpu' could be offlined.

Question for my clarification: According to sync_rcu_exp_select_cpus, we have
to disable preemption before calling queue_work_on to ensure the CPU is online.

Also same is said in Boqun's presentation on the topic:
https://linuxplumbersconf.org/event/2/contributions/158/attachments/68/79/workqueue_and_cpu_hotplug.pdf

Calling queue_work_on on an offline CPU sounds like could be problematic. So
in your patch, don't you still need to disable preemption around
queue_work_on, or the very least check if said CPU is online before calling
queue_work_on?

thanks,

 - Joel


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] srcu: Remove srcu_queue_delayed_work_on()
  2018-12-12  1:40 ` Joel Fernandes
@ 2018-12-12 14:05   ` Boqun Feng
  2018-12-12 15:55     ` Paul E. McKenney
  2018-12-13 15:03     ` Sebastian Andrzej Siewior
  0 siblings, 2 replies; 9+ messages in thread
From: Boqun Feng @ 2018-12-12 14:05 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Sebastian Andrzej Siewior, linux-kernel, Lai Jiangshan,
	Paul E. McKenney, Josh Triplett, Steven Rostedt,
	Mathieu Desnoyers, tglx, tj

[-- Attachment #1: Type: text/plain, Size: 7515 bytes --]

Hi Joel,

On Tue, Dec 11, 2018 at 05:40:16PM -0800, Joel Fernandes wrote:
> On Tue, Dec 11, 2018 at 12:12:38PM +0100, Sebastian Andrzej Siewior wrote:
> > srcu_queue_delayed_work_on() disables preemption (and therefore CPU
> > hotplug in RCU's case) and then checks based on its own accounting if a
> > CPU is online. If the CPU is online it uses queue_delayed_work_on()
> > otherwise it fallbacks to queue_delayed_work().
> > The problem here is that queue_work() on -RT does not work with disabled
> > preemption.
> > 
> > queue_work_on() works also on an offlined CPU. queue_delayed_work_on()
> > has the problem that it is possible to program a timer on an offlined
> > CPU. This timer will fire once the CPU is online again. But until then,
> > the timer remains programmed and nothing will happen.
> > Add a local timer which will fire (as requested per delay) on the local
> > CPU and then enqueue the work on the specific CPU.
> > 
> > RCUtorture testing with SRCU-P for 24h showed no problems.
> > 
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > ---
> >  include/linux/srcutree.h |  3 ++-
> >  kernel/rcu/srcutree.c    | 57 ++++++++++++++++++----------------------
> >  kernel/rcu/tree.c        |  4 ---
> >  kernel/rcu/tree.h        |  8 ------
> >  4 files changed, 27 insertions(+), 45 deletions(-)
> > 
> > diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> > index 6f292bd3e7db7..0faa978c98807 100644
> > --- a/include/linux/srcutree.h
> > +++ b/include/linux/srcutree.h
> > @@ -45,7 +45,8 @@ struct srcu_data {
> >  	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
> >  	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
> >  	bool srcu_cblist_invoking;		/* Invoking these CBs? */
> > -	struct delayed_work work;		/* Context for CB invoking. */
> > +	struct timer_list delay_work;		/* Delay for CB invoking */
> > +	struct work_struct work;		/* Context for CB invoking. */
> >  	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
> >  	struct srcu_node *mynode;		/* Leaf srcu_node. */
> >  	unsigned long grpmask;			/* Mask for leaf srcu_node */
> > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > index 3600d88d8956b..7f041f2435df9 100644
> > --- a/kernel/rcu/srcutree.c
> > +++ b/kernel/rcu/srcutree.c
> > @@ -58,6 +58,7 @@ static bool __read_mostly srcu_init_done;
> >  static void srcu_invoke_callbacks(struct work_struct *work);
> >  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
> >  static void process_srcu(struct work_struct *work);
> > +static void srcu_delay_timer(struct timer_list *t);
> >  
> >  /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
> >  #define spin_lock_rcu_node(p)					\
> > @@ -156,7 +157,8 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
> >  			snp->grphi = cpu;
> >  		}
> >  		sdp->cpu = cpu;
> > -		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
> > +		INIT_WORK(&sdp->work, srcu_invoke_callbacks);
> > +		timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
> >  		sdp->ssp = ssp;
> >  		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
> >  		if (is_static)
> > @@ -386,13 +388,19 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
> >  	} else {
> >  		flush_delayed_work(&ssp->work);
> >  	}
> > -	for_each_possible_cpu(cpu)
> > +	for_each_possible_cpu(cpu) {
> > +		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
> > +
> >  		if (quiesced) {
> > -			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work)))
> > +			if (WARN_ON(timer_pending(&sdp->delay_work)))
> > +				return; /* Just leak it! */
> > +			if (WARN_ON(work_pending(&sdp->work)))
> >  				return; /* Just leak it! */
> >  		} else {
> > -			flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work);
> > +			del_timer_sync(&sdp->delay_work);
> > +			flush_work(&sdp->work);
> >  		}
> > +	}
> >  	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
> >  	    WARN_ON(srcu_readers_active(ssp))) {
> >  		pr_info("%s: Active srcu_struct %p state: %d\n",
> > @@ -463,39 +471,23 @@ static void srcu_gp_start(struct srcu_struct *ssp)
> >  	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
> >  }
> >  
> > -/*
> > - * Track online CPUs to guide callback workqueue placement.
> > - */
> > -DEFINE_PER_CPU(bool, srcu_online);
> >  
> > -void srcu_online_cpu(unsigned int cpu)
> > +static void srcu_delay_timer(struct timer_list *t)
> >  {
> > -	WRITE_ONCE(per_cpu(srcu_online, cpu), true);
> > +	struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work);
> > +
> > +	queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
> >  }
> >  
> > -void srcu_offline_cpu(unsigned int cpu)
> > -{
> > -	WRITE_ONCE(per_cpu(srcu_online, cpu), false);
> > -}
> > -
> > -/*
> > - * Place the workqueue handler on the specified CPU if online, otherwise
> > - * just run it whereever.  This is useful for placing workqueue handlers
> > - * that are to invoke the specified CPU's callbacks.
> > - */
> > -static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
> > -				       struct delayed_work *dwork,
> > +static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
> >  				       unsigned long delay)
> >  {
> > -	bool ret;
> > +	if (!delay) {
> > +		queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
> > +		return;
> > +	}
> >  
> > -	preempt_disable();
> > -	if (READ_ONCE(per_cpu(srcu_online, cpu)))
> > -		ret = queue_delayed_work_on(cpu, wq, dwork, delay);
> > -	else
> > -		ret = queue_delayed_work(wq, dwork, delay);
> > -	preempt_enable();
> 
> The deleted code looks like 'cpu' could be offlined.
> 
> Question for my clarification: According to sync_rcu_exp_select_cpus, we have
> to disable preemption before calling queue_work_on to ensure the CPU is online.
> 
> Also same is said in Boqun's presentation on the topic:
> https://linuxplumbersconf.org/event/2/contributions/158/attachments/68/79/workqueue_and_cpu_hotplug.pdf
> 
> Calling queue_work_on on an offline CPU sounds like could be problematic. So
> in your patch, don't you still need to disable preemption around
> queue_work_on, or the very least check if said CPU is online before calling
> queue_work_on?
> 

I should be the one who answers this ;-)

So I found something after my LPC topic, that is queue_work_on() may
work well even if racing with a cpu offline. The reason is that in the
cpu offline hook for workqueue only switches the percpu pool into an
unbound one, so as long as the related cpu has been online once, we are
fine here. 

Please note this is only my own analysis, and I'm the one who told
Sebastian and Paul about this.

I should send an email to check with workqueue maintainers about this,
but I didn't find the time. Sorry about this.. But let's do this right
now, while we at it.

(Cc TJ)

So Jiangshan and TJ, what's your opion on this one? If we call a
queue_work_on() at a place where that target cpu may be offlined, I
think we have the guarantee that the work will be eventually executed
even if the cpu is never online again, right? In other words, if a cpu
has been online once, queue_work_on() on it will be free from racing
with cpu hotplug.

Am I right about this, or did I miss something subtle?

Regards,
Boqun

> thanks,
> 
>  - Joel
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] srcu: Remove srcu_queue_delayed_work_on()
  2018-12-12 14:05   ` Boqun Feng
@ 2018-12-12 15:55     ` Paul E. McKenney
  2018-12-13  1:36       ` Joel Fernandes
  2018-12-13 15:06       ` Sebastian Andrzej Siewior
  2018-12-13 15:03     ` Sebastian Andrzej Siewior
  1 sibling, 2 replies; 9+ messages in thread
From: Paul E. McKenney @ 2018-12-12 15:55 UTC (permalink / raw)
  To: Boqun Feng
  Cc: Joel Fernandes, Sebastian Andrzej Siewior, linux-kernel,
	Lai Jiangshan, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	tglx, tj

On Wed, Dec 12, 2018 at 10:05:19PM +0800, Boqun Feng wrote:
> Hi Joel,
> 
> On Tue, Dec 11, 2018 at 05:40:16PM -0800, Joel Fernandes wrote:
> > On Tue, Dec 11, 2018 at 12:12:38PM +0100, Sebastian Andrzej Siewior wrote:
> > > srcu_queue_delayed_work_on() disables preemption (and therefore CPU
> > > hotplug in RCU's case) and then checks based on its own accounting if a
> > > CPU is online. If the CPU is online it uses queue_delayed_work_on()
> > > otherwise it fallbacks to queue_delayed_work().
> > > The problem here is that queue_work() on -RT does not work with disabled
> > > preemption.
> > > 
> > > queue_work_on() works also on an offlined CPU. queue_delayed_work_on()
> > > has the problem that it is possible to program a timer on an offlined
> > > CPU. This timer will fire once the CPU is online again. But until then,
> > > the timer remains programmed and nothing will happen.
> > > Add a local timer which will fire (as requested per delay) on the local
> > > CPU and then enqueue the work on the specific CPU.
> > > 
> > > RCUtorture testing with SRCU-P for 24h showed no problems.
> > > 
> > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > ---
> > >  include/linux/srcutree.h |  3 ++-
> > >  kernel/rcu/srcutree.c    | 57 ++++++++++++++++++----------------------
> > >  kernel/rcu/tree.c        |  4 ---
> > >  kernel/rcu/tree.h        |  8 ------
> > >  4 files changed, 27 insertions(+), 45 deletions(-)
> > > 
> > > diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> > > index 6f292bd3e7db7..0faa978c98807 100644
> > > --- a/include/linux/srcutree.h
> > > +++ b/include/linux/srcutree.h
> > > @@ -45,7 +45,8 @@ struct srcu_data {
> > >  	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
> > >  	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
> > >  	bool srcu_cblist_invoking;		/* Invoking these CBs? */
> > > -	struct delayed_work work;		/* Context for CB invoking. */
> > > +	struct timer_list delay_work;		/* Delay for CB invoking */
> > > +	struct work_struct work;		/* Context for CB invoking. */
> > >  	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
> > >  	struct srcu_node *mynode;		/* Leaf srcu_node. */
> > >  	unsigned long grpmask;			/* Mask for leaf srcu_node */
> > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > index 3600d88d8956b..7f041f2435df9 100644
> > > --- a/kernel/rcu/srcutree.c
> > > +++ b/kernel/rcu/srcutree.c
> > > @@ -58,6 +58,7 @@ static bool __read_mostly srcu_init_done;
> > >  static void srcu_invoke_callbacks(struct work_struct *work);
> > >  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
> > >  static void process_srcu(struct work_struct *work);
> > > +static void srcu_delay_timer(struct timer_list *t);
> > >  
> > >  /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
> > >  #define spin_lock_rcu_node(p)					\
> > > @@ -156,7 +157,8 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
> > >  			snp->grphi = cpu;
> > >  		}
> > >  		sdp->cpu = cpu;
> > > -		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
> > > +		INIT_WORK(&sdp->work, srcu_invoke_callbacks);
> > > +		timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
> > >  		sdp->ssp = ssp;
> > >  		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
> > >  		if (is_static)
> > > @@ -386,13 +388,19 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
> > >  	} else {
> > >  		flush_delayed_work(&ssp->work);
> > >  	}
> > > -	for_each_possible_cpu(cpu)
> > > +	for_each_possible_cpu(cpu) {
> > > +		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
> > > +
> > >  		if (quiesced) {
> > > -			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work)))
> > > +			if (WARN_ON(timer_pending(&sdp->delay_work)))
> > > +				return; /* Just leak it! */
> > > +			if (WARN_ON(work_pending(&sdp->work)))
> > >  				return; /* Just leak it! */
> > >  		} else {
> > > -			flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work);
> > > +			del_timer_sync(&sdp->delay_work);
> > > +			flush_work(&sdp->work);
> > >  		}
> > > +	}
> > >  	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
> > >  	    WARN_ON(srcu_readers_active(ssp))) {
> > >  		pr_info("%s: Active srcu_struct %p state: %d\n",
> > > @@ -463,39 +471,23 @@ static void srcu_gp_start(struct srcu_struct *ssp)
> > >  	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
> > >  }
> > >  
> > > -/*
> > > - * Track online CPUs to guide callback workqueue placement.
> > > - */
> > > -DEFINE_PER_CPU(bool, srcu_online);
> > >  
> > > -void srcu_online_cpu(unsigned int cpu)
> > > +static void srcu_delay_timer(struct timer_list *t)
> > >  {
> > > -	WRITE_ONCE(per_cpu(srcu_online, cpu), true);
> > > +	struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work);
> > > +
> > > +	queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
> > >  }
> > >  
> > > -void srcu_offline_cpu(unsigned int cpu)
> > > -{
> > > -	WRITE_ONCE(per_cpu(srcu_online, cpu), false);
> > > -}
> > > -
> > > -/*
> > > - * Place the workqueue handler on the specified CPU if online, otherwise
> > > - * just run it whereever.  This is useful for placing workqueue handlers
> > > - * that are to invoke the specified CPU's callbacks.
> > > - */
> > > -static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
> > > -				       struct delayed_work *dwork,
> > > +static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
> > >  				       unsigned long delay)
> > >  {
> > > -	bool ret;
> > > +	if (!delay) {
> > > +		queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
> > > +		return;
> > > +	}
> > >  
> > > -	preempt_disable();
> > > -	if (READ_ONCE(per_cpu(srcu_online, cpu)))
> > > -		ret = queue_delayed_work_on(cpu, wq, dwork, delay);
> > > -	else
> > > -		ret = queue_delayed_work(wq, dwork, delay);
> > > -	preempt_enable();
> > 
> > The deleted code looks like 'cpu' could be offlined.
> > 
> > Question for my clarification: According to sync_rcu_exp_select_cpus, we have
> > to disable preemption before calling queue_work_on to ensure the CPU is online.
> > 
> > Also same is said in Boqun's presentation on the topic:
> > https://linuxplumbersconf.org/event/2/contributions/158/attachments/68/79/workqueue_and_cpu_hotplug.pdf
> > 
> > Calling queue_work_on on an offline CPU sounds like could be problematic. So
> > in your patch, don't you still need to disable preemption around
> > queue_work_on, or the very least check if said CPU is online before calling
> > queue_work_on?
> > 
> 
> I should be the one who answers this ;-)
> 
> So I found something after my LPC topic, that is queue_work_on() may
> work well even if racing with a cpu offline. The reason is that in the
> cpu offline hook for workqueue only switches the percpu pool into an
> unbound one, so as long as the related cpu has been online once, we are
> fine here. 
> 
> Please note this is only my own analysis, and I'm the one who told
> Sebastian and Paul about this.
> 
> I should send an email to check with workqueue maintainers about this,
> but I didn't find the time. Sorry about this.. But let's do this right
> now, while we at it.
> 
> (Cc TJ)
> 
> So Jiangshan and TJ, what's your opion on this one? If we call a
> queue_work_on() at a place where that target cpu may be offlined, I
> think we have the guarantee that the work will be eventually executed
> even if the cpu is never online again, right? In other words, if a cpu
> has been online once, queue_work_on() on it will be free from racing
> with cpu hotplug.
> 
> Am I right about this, or did I miss something subtle?

Well, what I was relying on was Thomas Gleixner's assertion that it should
work and would be fixed if it didn't.  ;-)

							Thanx, Paul

> Regards,
> Boqun
> 
> > thanks,
> > 
> >  - Joel
> > 



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] srcu: Remove srcu_queue_delayed_work_on()
  2018-12-12 15:55     ` Paul E. McKenney
@ 2018-12-13  1:36       ` Joel Fernandes
  2018-12-13 15:06       ` Sebastian Andrzej Siewior
  1 sibling, 0 replies; 9+ messages in thread
From: Joel Fernandes @ 2018-12-13  1:36 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Boqun Feng, Sebastian Andrzej Siewior, linux-kernel,
	Lai Jiangshan, Josh Triplett, Steven Rostedt, Mathieu Desnoyers,
	tglx, tj

On Wed, Dec 12, 2018 at 07:55:46AM -0800, Paul E. McKenney wrote:
> On Wed, Dec 12, 2018 at 10:05:19PM +0800, Boqun Feng wrote:
> > Hi Joel,
> > 
> > On Tue, Dec 11, 2018 at 05:40:16PM -0800, Joel Fernandes wrote:
> > > On Tue, Dec 11, 2018 at 12:12:38PM +0100, Sebastian Andrzej Siewior wrote:
> > > > srcu_queue_delayed_work_on() disables preemption (and therefore CPU
> > > > hotplug in RCU's case) and then checks based on its own accounting if a
> > > > CPU is online. If the CPU is online it uses queue_delayed_work_on()
> > > > otherwise it fallbacks to queue_delayed_work().
> > > > The problem here is that queue_work() on -RT does not work with disabled
> > > > preemption.
> > > > 
> > > > queue_work_on() works also on an offlined CPU. queue_delayed_work_on()
> > > > has the problem that it is possible to program a timer on an offlined
> > > > CPU. This timer will fire once the CPU is online again. But until then,
> > > > the timer remains programmed and nothing will happen.
> > > > Add a local timer which will fire (as requested per delay) on the local
> > > > CPU and then enqueue the work on the specific CPU.
> > > > 
> > > > RCUtorture testing with SRCU-P for 24h showed no problems.
> > > > 
> > > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > > ---
> > > >  include/linux/srcutree.h |  3 ++-
> > > >  kernel/rcu/srcutree.c    | 57 ++++++++++++++++++----------------------
> > > >  kernel/rcu/tree.c        |  4 ---
> > > >  kernel/rcu/tree.h        |  8 ------
> > > >  4 files changed, 27 insertions(+), 45 deletions(-)
> > > > 
> > > > diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
> > > > index 6f292bd3e7db7..0faa978c98807 100644
> > > > --- a/include/linux/srcutree.h
> > > > +++ b/include/linux/srcutree.h
> > > > @@ -45,7 +45,8 @@ struct srcu_data {
> > > >  	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
> > > >  	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
> > > >  	bool srcu_cblist_invoking;		/* Invoking these CBs? */
> > > > -	struct delayed_work work;		/* Context for CB invoking. */
> > > > +	struct timer_list delay_work;		/* Delay for CB invoking */
> > > > +	struct work_struct work;		/* Context for CB invoking. */
> > > >  	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
> > > >  	struct srcu_node *mynode;		/* Leaf srcu_node. */
> > > >  	unsigned long grpmask;			/* Mask for leaf srcu_node */
> > > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > > index 3600d88d8956b..7f041f2435df9 100644
> > > > --- a/kernel/rcu/srcutree.c
> > > > +++ b/kernel/rcu/srcutree.c
> > > > @@ -58,6 +58,7 @@ static bool __read_mostly srcu_init_done;
> > > >  static void srcu_invoke_callbacks(struct work_struct *work);
> > > >  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
> > > >  static void process_srcu(struct work_struct *work);
> > > > +static void srcu_delay_timer(struct timer_list *t);
> > > >  
> > > >  /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
> > > >  #define spin_lock_rcu_node(p)					\
> > > > @@ -156,7 +157,8 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
> > > >  			snp->grphi = cpu;
> > > >  		}
> > > >  		sdp->cpu = cpu;
> > > > -		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
> > > > +		INIT_WORK(&sdp->work, srcu_invoke_callbacks);
> > > > +		timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
> > > >  		sdp->ssp = ssp;
> > > >  		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
> > > >  		if (is_static)
> > > > @@ -386,13 +388,19 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
> > > >  	} else {
> > > >  		flush_delayed_work(&ssp->work);
> > > >  	}
> > > > -	for_each_possible_cpu(cpu)
> > > > +	for_each_possible_cpu(cpu) {
> > > > +		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
> > > > +
> > > >  		if (quiesced) {
> > > > -			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work)))
> > > > +			if (WARN_ON(timer_pending(&sdp->delay_work)))
> > > > +				return; /* Just leak it! */
> > > > +			if (WARN_ON(work_pending(&sdp->work)))
> > > >  				return; /* Just leak it! */
> > > >  		} else {
> > > > -			flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work);
> > > > +			del_timer_sync(&sdp->delay_work);
> > > > +			flush_work(&sdp->work);
> > > >  		}
> > > > +	}
> > > >  	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
> > > >  	    WARN_ON(srcu_readers_active(ssp))) {
> > > >  		pr_info("%s: Active srcu_struct %p state: %d\n",
> > > > @@ -463,39 +471,23 @@ static void srcu_gp_start(struct srcu_struct *ssp)
> > > >  	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
> > > >  }
> > > >  
> > > > -/*
> > > > - * Track online CPUs to guide callback workqueue placement.
> > > > - */
> > > > -DEFINE_PER_CPU(bool, srcu_online);
> > > >  
> > > > -void srcu_online_cpu(unsigned int cpu)
> > > > +static void srcu_delay_timer(struct timer_list *t)
> > > >  {
> > > > -	WRITE_ONCE(per_cpu(srcu_online, cpu), true);
> > > > +	struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work);
> > > > +
> > > > +	queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
> > > >  }
> > > >  
> > > > -void srcu_offline_cpu(unsigned int cpu)
> > > > -{
> > > > -	WRITE_ONCE(per_cpu(srcu_online, cpu), false);
> > > > -}
> > > > -
> > > > -/*
> > > > - * Place the workqueue handler on the specified CPU if online, otherwise
> > > > - * just run it whereever.  This is useful for placing workqueue handlers
> > > > - * that are to invoke the specified CPU's callbacks.
> > > > - */
> > > > -static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
> > > > -				       struct delayed_work *dwork,
> > > > +static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
> > > >  				       unsigned long delay)
> > > >  {
> > > > -	bool ret;
> > > > +	if (!delay) {
> > > > +		queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
> > > > +		return;
> > > > +	}
> > > >  
> > > > -	preempt_disable();
> > > > -	if (READ_ONCE(per_cpu(srcu_online, cpu)))
> > > > -		ret = queue_delayed_work_on(cpu, wq, dwork, delay);
> > > > -	else
> > > > -		ret = queue_delayed_work(wq, dwork, delay);
> > > > -	preempt_enable();
> > > 
> > > The deleted code looks like 'cpu' could be offlined.
> > > 
> > > Question for my clarification: According to sync_rcu_exp_select_cpus, we have
> > > to disable preemption before calling queue_work_on to ensure the CPU is online.
> > > 
> > > Also same is said in Boqun's presentation on the topic:
> > > https://linuxplumbersconf.org/event/2/contributions/158/attachments/68/79/workqueue_and_cpu_hotplug.pdf
> > > 
> > > Calling queue_work_on on an offline CPU sounds like could be problematic. So
> > > in your patch, don't you still need to disable preemption around
> > > queue_work_on, or the very least check if said CPU is online before calling
> > > queue_work_on?
> > > 
> > 
> > I should be the one who answers this ;-)
> > So I found something after my LPC topic, that is queue_work_on() may
> > work well even if racing with a cpu offline. The reason is that in the
> > cpu offline hook for workqueue only switches the percpu pool into an
> > unbound one, so as long as the related cpu has been online once, we are
> > fine here. 
> > 
> > Please note this is only my own analysis, and I'm the one who told
> > Sebastian and Paul about this.
> > 
> > I should send an email to check with workqueue maintainers about this,
> > but I didn't find the time. Sorry about this.. But let's do this right
> > now, while we at it.
> > 
> > (Cc TJ)
> > 
> > So Jiangshan and TJ, what's your opion on this one? If we call a
> > queue_work_on() at a place where that target cpu may be offlined, I
> > think we have the guarantee that the work will be eventually executed
> > even if the cpu is never online again, right? In other words, if a cpu
> > has been online once, queue_work_on() on it will be free from racing
> > with cpu hotplug.
> > 
> > Am I right about this, or did I miss something subtle?
> 
> Well, what I was relying on was Thomas Gleixner's assertion that it should
> work and would be fixed if it didn't.  ;-)

FWIW I did a simple manual test calling queue_work_on on an offline CPU to
see what happens and it appears to be working fine. On a 4 CPU system, I
offline CPU 3 and the work ends up executing on CPU 0 instead. But I haven't
done any stress tests to know of any possible races with doing so.

thanks,

- Joel


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] srcu: Remove srcu_queue_delayed_work_on()
  2018-12-12 14:05   ` Boqun Feng
  2018-12-12 15:55     ` Paul E. McKenney
@ 2018-12-13 15:03     ` Sebastian Andrzej Siewior
  2018-12-13 15:26       ` Paul E. McKenney
  1 sibling, 1 reply; 9+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-12-13 15:03 UTC (permalink / raw)
  To: Boqun Feng
  Cc: Joel Fernandes, linux-kernel, Lai Jiangshan, Paul E. McKenney,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, tglx, tj

On 2018-12-12 22:05:19 [+0800], Boqun Feng wrote:
> So Jiangshan and TJ, what's your opion on this one? If we call a
> queue_work_on() at a place where that target cpu may be offlined, I
> think we have the guarantee that the work will be eventually executed
> even if the cpu is never online again, right? In other words, if a cpu
> has been online once, queue_work_on() on it will be free from racing
> with cpu hotplug.
> 
> Am I right about this, or did I miss something subtle?

tj answered this one:
  https://lkml.kernel.org/r/20180919205521.GE902964@devbig004.ftw2.facebook.com

> Regards,
> Boqun

Sebastian

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] srcu: Remove srcu_queue_delayed_work_on()
  2018-12-12 15:55     ` Paul E. McKenney
  2018-12-13  1:36       ` Joel Fernandes
@ 2018-12-13 15:06       ` Sebastian Andrzej Siewior
  1 sibling, 0 replies; 9+ messages in thread
From: Sebastian Andrzej Siewior @ 2018-12-13 15:06 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Boqun Feng, Joel Fernandes, linux-kernel, Lai Jiangshan,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, tglx, tj

On 2018-12-12 07:55:46 [-0800], Paul E. McKenney wrote:
> Well, what I was relying on was Thomas Gleixner's assertion that it should
> work and would be fixed if it didn't.  ;-)

and there is work_on_cpu_safe() in case work needs to be performed on
CPU X and only on CPU X.

> 							Thanx, Paul

Sebastian

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] srcu: Remove srcu_queue_delayed_work_on()
  2018-12-13 15:03     ` Sebastian Andrzej Siewior
@ 2018-12-13 15:26       ` Paul E. McKenney
  0 siblings, 0 replies; 9+ messages in thread
From: Paul E. McKenney @ 2018-12-13 15:26 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Boqun Feng, Joel Fernandes, linux-kernel, Lai Jiangshan,
	Josh Triplett, Steven Rostedt, Mathieu Desnoyers, tglx, tj

On Thu, Dec 13, 2018 at 04:03:58PM +0100, Sebastian Andrzej Siewior wrote:
> On 2018-12-12 22:05:19 [+0800], Boqun Feng wrote:
> > So Jiangshan and TJ, what's your opion on this one? If we call a
> > queue_work_on() at a place where that target cpu may be offlined, I
> > think we have the guarantee that the work will be eventually executed
> > even if the cpu is never online again, right? In other words, if a cpu
> > has been online once, queue_work_on() on it will be free from racing
> > with cpu hotplug.
> > 
> > Am I right about this, or did I miss something subtle?
> 
> tj answered this one:
>   https://lkml.kernel.org/r/20180919205521.GE902964@devbig004.ftw2.facebook.com

I must confess that I would have felt better about that email had it
been more definite than "is might just work already".  ;-)

							Thanx, Paul


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-12-13 15:27 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-11 11:12 [PATCH] srcu: Remove srcu_queue_delayed_work_on() Sebastian Andrzej Siewior
2018-12-11 17:40 ` Paul E. McKenney
2018-12-12  1:40 ` Joel Fernandes
2018-12-12 14:05   ` Boqun Feng
2018-12-12 15:55     ` Paul E. McKenney
2018-12-13  1:36       ` Joel Fernandes
2018-12-13 15:06       ` Sebastian Andrzej Siewior
2018-12-13 15:03     ` Sebastian Andrzej Siewior
2018-12-13 15:26       ` Paul E. McKenney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).