rcu.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 5/5] rcu: Remove kfree_call_rcu_nobatch()
@ 2019-08-27 19:01 Joel Fernandes (Google)
  2019-08-28 21:56 ` Paul E. McKenney
  0 siblings, 1 reply; 3+ messages in thread
From: Joel Fernandes (Google) @ 2019-08-27 19:01 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google),
	byungchul.park, Josh Triplett, Lai Jiangshan, linux-doc,
	Mathieu Desnoyers, Paul E. McKenney, rcu, Steven Rostedt,
	kernel-team

Now that kfree_rcu() special casing have been removed from tree RCU,
remove kfree_call_rcu_nobatch() since it is not needed.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 .../admin-guide/kernel-parameters.txt         |  4 ---
 include/linux/rcutiny.h                       |  5 ---
 include/linux/rcutree.h                       |  1 -
 kernel/rcu/rcuperf.c                          | 10 +-----
 kernel/rcu/tree.c                             | 33 ++++++++-----------
 5 files changed, 14 insertions(+), 39 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 24fe8aefb12c..56be0e30100b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3909,10 +3909,6 @@
 			Number of loops doing rcuperf.kfree_alloc_num number
 			of allocations and frees.
 
-	rcuperf.kfree_no_batch= [KNL]
-			Use the non-batching (less efficient) version of kfree_rcu().
-			This is useful for comparing with the batched version.
-
 	rcuperf.nreaders= [KNL]
 			Set number of RCU readers.  The value -1 selects
 			N, where N is the number of CPUs.  A value
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 949841f52ec5..7aa93afa5d8d 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -39,11 +39,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 	call_rcu(head, func);
 }
 
-static inline void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
-}
-
 void rcu_qs(void);
 
 static inline void rcu_softirq_qs(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 961b7e05d141..0b68aa952f8b 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -34,7 +34,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
 
 void synchronize_rcu_expedited(void);
 void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
-void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func);
 
 void rcu_barrier(void);
 bool rcu_eqs_special_set(int cpu);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index c1e25fd10f2a..da94b89cd531 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -593,7 +593,6 @@ rcu_perf_shutdown(void *arg)
 torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu().");
 torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration.");
 torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees.");
-torture_param(int, kfree_no_batch, 0, "Use the non-batching (slower) version of kfree_rcu().");
 
 static struct task_struct **kfree_reader_tasks;
 static int kfree_nrealthreads;
@@ -632,14 +631,7 @@ kfree_perf_thread(void *arg)
 			if (!alloc_ptr)
 				return -ENOMEM;
 
-			if (!kfree_no_batch) {
-				kfree_rcu(alloc_ptr, rh);
-			} else {
-				rcu_callback_t cb;
-
-				cb = (rcu_callback_t)(unsigned long)offsetof(struct kfree_obj, rh);
-				kfree_call_rcu_nobatch(&(alloc_ptr->rh), cb);
-			}
+			kfree_rcu(alloc_ptr, rh);
 		}
 
 		cond_resched();
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 12c17e10f2b4..c767973d62ac 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2777,8 +2777,10 @@ static void kfree_rcu_work(struct work_struct *work)
 		rcu_lock_acquire(&rcu_callback_map);
 		trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
 
-		/* Could be possible to optimize with kfree_bulk in future */
-		kfree((void *)head - offset);
+		if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) {
+			/* Could be optimized with kfree_bulk() in future. */
+			kfree((void *)head - offset);
+		}
 
 		rcu_lock_release(&rcu_callback_map);
 		cond_resched_tasks_rcu_qs();
@@ -2856,16 +2858,6 @@ static void kfree_rcu_monitor(struct work_struct *work)
 		spin_unlock_irqrestore(&krcp->lock, flags);
 }
 
-/*
- * This version of kfree_call_rcu does not do batching of kfree_rcu() requests.
- * Used only by rcuperf torture test for comparison with kfree_rcu_batch().
- */
-void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func);
-}
-EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
-
 /*
  * Queue a request for lazy invocation of kfree() after a grace period.
  *
@@ -2885,12 +2877,6 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 	unsigned long flags;
 	struct kfree_rcu_cpu *krcp;
 
-	/* kfree_call_rcu() batching requires timers to be up. If the scheduler
-	 * is not yet up, just skip batching and do the non-batched version.
-	 */
-	if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING)
-		return kfree_call_rcu_nobatch(head, func);
-
 	if (debug_rcu_head_queue(head)) {
 		/* Probable double kfree_rcu() */
 		WARN_ONCE(1, "kfree_call_rcu(): Double-freed call. rcu_head %p\n",
@@ -2909,8 +2895,15 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 	krcp->head = head;
 
 	/* Schedule monitor for timely drain after KFREE_DRAIN_JIFFIES. */
-	if (!xchg(&krcp->monitor_todo, true))
-		schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+	if (!xchg(&krcp->monitor_todo, true)) {
+		/* Scheduling the monitor requires scheduler/timers to be up,
+		 * if it is not, just skip it. An eventual kfree_rcu() will
+		 * kick it again.
+		 */
+		if ((rcu_scheduler_active == RCU_SCHEDULER_RUNNING)) {
+			schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+		}
+	}
 
 	spin_unlock(&krcp->lock);
 	local_irq_restore(flags);
-- 
2.23.0.187.g17f5b7556c-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 5/5] rcu: Remove kfree_call_rcu_nobatch()
  2019-08-27 19:01 [PATCH 5/5] rcu: Remove kfree_call_rcu_nobatch() Joel Fernandes (Google)
@ 2019-08-28 21:56 ` Paul E. McKenney
  2019-08-29 22:23   ` Joel Fernandes
  0 siblings, 1 reply; 3+ messages in thread
From: Paul E. McKenney @ 2019-08-28 21:56 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, byungchul.park, Josh Triplett, Lai Jiangshan,
	linux-doc, Mathieu Desnoyers, rcu, Steven Rostedt, kernel-team

On Tue, Aug 27, 2019 at 03:01:59PM -0400, Joel Fernandes (Google) wrote:
> Now that kfree_rcu() special casing have been removed from tree RCU,
> remove kfree_call_rcu_nobatch() since it is not needed.
> 
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

Now -this- one qualifies as a nice negative delta!  ;-)

A few things below, please fix in next version.

							Thanx, Paul

> ---
>  .../admin-guide/kernel-parameters.txt         |  4 ---
>  include/linux/rcutiny.h                       |  5 ---
>  include/linux/rcutree.h                       |  1 -
>  kernel/rcu/rcuperf.c                          | 10 +-----
>  kernel/rcu/tree.c                             | 33 ++++++++-----------
>  5 files changed, 14 insertions(+), 39 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 24fe8aefb12c..56be0e30100b 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -3909,10 +3909,6 @@
>  			Number of loops doing rcuperf.kfree_alloc_num number
>  			of allocations and frees.
>  
> -	rcuperf.kfree_no_batch= [KNL]
> -			Use the non-batching (less efficient) version of kfree_rcu().
> -			This is useful for comparing with the batched version.
> -
>  	rcuperf.nreaders= [KNL]
>  			Set number of RCU readers.  The value -1 selects
>  			N, where N is the number of CPUs.  A value
> diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
> index 949841f52ec5..7aa93afa5d8d 100644
> --- a/include/linux/rcutiny.h
> +++ b/include/linux/rcutiny.h
> @@ -39,11 +39,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
>  	call_rcu(head, func);
>  }
>  
> -static inline void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
> -{
> -	call_rcu(head, func);
> -}
> -
>  void rcu_qs(void);
>  
>  static inline void rcu_softirq_qs(void)
> diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
> index 961b7e05d141..0b68aa952f8b 100644
> --- a/include/linux/rcutree.h
> +++ b/include/linux/rcutree.h
> @@ -34,7 +34,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
>  
>  void synchronize_rcu_expedited(void);
>  void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
> -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func);
>  
>  void rcu_barrier(void);
>  bool rcu_eqs_special_set(int cpu);
> diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
> index c1e25fd10f2a..da94b89cd531 100644
> --- a/kernel/rcu/rcuperf.c
> +++ b/kernel/rcu/rcuperf.c
> @@ -593,7 +593,6 @@ rcu_perf_shutdown(void *arg)
>  torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu().");
>  torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration.");
>  torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees.");
> -torture_param(int, kfree_no_batch, 0, "Use the non-batching (slower) version of kfree_rcu().");
>  
>  static struct task_struct **kfree_reader_tasks;
>  static int kfree_nrealthreads;
> @@ -632,14 +631,7 @@ kfree_perf_thread(void *arg)
>  			if (!alloc_ptr)
>  				return -ENOMEM;
>  
> -			if (!kfree_no_batch) {
> -				kfree_rcu(alloc_ptr, rh);
> -			} else {
> -				rcu_callback_t cb;
> -
> -				cb = (rcu_callback_t)(unsigned long)offsetof(struct kfree_obj, rh);
> -				kfree_call_rcu_nobatch(&(alloc_ptr->rh), cb);
> -			}
> +			kfree_rcu(alloc_ptr, rh);
>  		}
>  
>  		cond_resched();
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 12c17e10f2b4..c767973d62ac 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -2777,8 +2777,10 @@ static void kfree_rcu_work(struct work_struct *work)
>  		rcu_lock_acquire(&rcu_callback_map);
>  		trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
>  
> -		/* Could be possible to optimize with kfree_bulk in future */
> -		kfree((void *)head - offset);
> +		if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) {
> +			/* Could be optimized with kfree_bulk() in future. */
> +			kfree((void *)head - offset);
> +		}

This really needs to be in the previous patch until such time as Tiny RCU
no longer needs the restriction.

>  		rcu_lock_release(&rcu_callback_map);
>  		cond_resched_tasks_rcu_qs();
> @@ -2856,16 +2858,6 @@ static void kfree_rcu_monitor(struct work_struct *work)
>  		spin_unlock_irqrestore(&krcp->lock, flags);
>  }
>  
> -/*
> - * This version of kfree_call_rcu does not do batching of kfree_rcu() requests.
> - * Used only by rcuperf torture test for comparison with kfree_rcu_batch().
> - */
> -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
> -{
> -	__call_rcu(head, func);
> -}
> -EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
> -
>  /*
>   * Queue a request for lazy invocation of kfree() after a grace period.
>   *
> @@ -2885,12 +2877,6 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
>  	unsigned long flags;
>  	struct kfree_rcu_cpu *krcp;
>  
> -	/* kfree_call_rcu() batching requires timers to be up. If the scheduler
> -	 * is not yet up, just skip batching and do the non-batched version.
> -	 */
> -	if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING)
> -		return kfree_call_rcu_nobatch(head, func);
> -
>  	if (debug_rcu_head_queue(head)) {
>  		/* Probable double kfree_rcu() */
>  		WARN_ONCE(1, "kfree_call_rcu(): Double-freed call. rcu_head %p\n",
> @@ -2909,8 +2895,15 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
>  	krcp->head = head;
>  
>  	/* Schedule monitor for timely drain after KFREE_DRAIN_JIFFIES. */
> -	if (!xchg(&krcp->monitor_todo, true))
> -		schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
> +	if (!xchg(&krcp->monitor_todo, true)) {
> +		/* Scheduling the monitor requires scheduler/timers to be up,
> +		 * if it is not, just skip it. An eventual kfree_rcu() will
> +		 * kick it again.
> +		 */
> +		if ((rcu_scheduler_active == RCU_SCHEDULER_RUNNING)) {
> +			schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
> +		}
> +	}

And this also needs to be in an earlier patch.  Bisectability and all that!

Are we really guaranteed that there will be an eventual kfree_rcu()?
More of a worry for Tiny RCU than for Tree RCU, but still could be
annoying for someone trying to debug a memory leak.

							Thanx, Paul

>  	spin_unlock(&krcp->lock);
>  	local_irq_restore(flags);
> -- 
> 2.23.0.187.g17f5b7556c-goog
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 5/5] rcu: Remove kfree_call_rcu_nobatch()
  2019-08-28 21:56 ` Paul E. McKenney
@ 2019-08-29 22:23   ` Joel Fernandes
  0 siblings, 0 replies; 3+ messages in thread
From: Joel Fernandes @ 2019-08-29 22:23 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, byungchul.park, Josh Triplett, Lai Jiangshan,
	linux-doc, Mathieu Desnoyers, rcu, Steven Rostedt, kernel-team

Hi Paul,

I think this is the only contentious patch preventing my resend of the
series, let me know what you think, I replied below:

On Wed, Aug 28, 2019 at 02:56:36PM -0700, Paul E. McKenney wrote:
> On Tue, Aug 27, 2019 at 03:01:59PM -0400, Joel Fernandes (Google) wrote:
[snip]
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 12c17e10f2b4..c767973d62ac 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -2777,8 +2777,10 @@ static void kfree_rcu_work(struct work_struct *work)
> >  		rcu_lock_acquire(&rcu_callback_map);
> >  		trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
> >  
> > -		/* Could be possible to optimize with kfree_bulk in future */
> > -		kfree((void *)head - offset);
> > +		if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) {
> > +			/* Could be optimized with kfree_bulk() in future. */
> > +			kfree((void *)head - offset);
> > +		}
> 
> This really needs to be in the previous patch until such time as Tiny RCU
> no longer needs the restriction.

I was only going by whatever is already committed to the -rcu dev branch. The
series is based on the -dev branch.

The original patch adding the kfree_rcu() batching is already merged into the
-rcu dev branch (that version just had 1 list, this series adds multiple
lists).

In the above diff, I just added the WARN_ON_ONCE() as extra checking for tree
RCU kfree batching. It has nothing to do with tiny RCU per-se. Should I
submit the WARN_ON_ONCE() as a separate patch then?

To prevent confusion, could you let me know if I am supposed to submitting
patches against a branch other than the dev branch?

> >  		rcu_lock_release(&rcu_callback_map);
> >  		cond_resched_tasks_rcu_qs();
> > @@ -2856,16 +2858,6 @@ static void kfree_rcu_monitor(struct work_struct *work)
> >  		spin_unlock_irqrestore(&krcp->lock, flags);
> >  }
> >  
> > -/*
> > - * This version of kfree_call_rcu does not do batching of kfree_rcu() requests.
> > - * Used only by rcuperf torture test for comparison with kfree_rcu_batch().
> > - */
> > -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
> > -{
> > -	__call_rcu(head, func);
> > -}
> > -EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
> > -
> >  /*
> >   * Queue a request for lazy invocation of kfree() after a grace period.
> >   *
> > @@ -2885,12 +2877,6 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> >  	unsigned long flags;
> >  	struct kfree_rcu_cpu *krcp;
> >  
> > -	/* kfree_call_rcu() batching requires timers to be up. If the scheduler
> > -	 * is not yet up, just skip batching and do the non-batched version.
> > -	 */
> > -	if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING)
> > -		return kfree_call_rcu_nobatch(head, func);
> > -
> >  	if (debug_rcu_head_queue(head)) {
> >  		/* Probable double kfree_rcu() */
> >  		WARN_ONCE(1, "kfree_call_rcu(): Double-freed call. rcu_head %p\n",
> > @@ -2909,8 +2895,15 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> >  	krcp->head = head;
> >  
> >  	/* Schedule monitor for timely drain after KFREE_DRAIN_JIFFIES. */
> > -	if (!xchg(&krcp->monitor_todo, true))
> > -		schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
> > +	if (!xchg(&krcp->monitor_todo, true)) {
> > +		/* Scheduling the monitor requires scheduler/timers to be up,
> > +		 * if it is not, just skip it. An eventual kfree_rcu() will
> > +		 * kick it again.
> > +		 */
> > +		if ((rcu_scheduler_active == RCU_SCHEDULER_RUNNING)) {
> > +			schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
> > +		}
> > +	}
> 
> And this also needs to be in an earlier patch.  Bisectability and all that!
> 
> Are we really guaranteed that there will be an eventual kfree_rcu()?
> More of a worry for Tiny RCU than for Tree RCU, but still could be
> annoying for someone trying to debug a memory leak.

Same comment as above, the original patch adding the schedule_delayed_work()
is already merged into the -dev branch. This series is based on top of that.
The reason I had to rearrange &krcp->monitor_todo code above is because we no
longer have kfree_rcu_no_batch() which this patch removes.

thanks,

 - Joel



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-08-29 22:23 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-27 19:01 [PATCH 5/5] rcu: Remove kfree_call_rcu_nobatch() Joel Fernandes (Google)
2019-08-28 21:56 ` Paul E. McKenney
2019-08-29 22:23   ` Joel Fernandes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).