All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] rcu: Only boost rcu reader tasks with lower priority than boost kthreads
@ 2022-03-09  8:15 Zqiang
  2022-03-10 18:27 ` Uladzislau Rezki
  0 siblings, 1 reply; 3+ messages in thread
From: Zqiang @ 2022-03-09  8:15 UTC (permalink / raw)
  To: paulmck, frederic, quic_neeraju, josh, urezki
  Cc: bigeasy, juri.lelli, rcu, linux-kernel

When RCU_BOOST is enabled, the boost kthreads will boosting readers
who are blocking a given grace period, if the current reader tasks
have a higher priority than boost kthreads(the boost kthreads priority
not always 1, if the kthread_prio is set), boosting is useless, skip
current task and select next task to boosting, reduce the time for a
given grace period.

Signed-off-by: Zqiang <qiang1.zhang@intel.com>
---
 v1->v2:
 rename label 'end' to 'skip_boost'.
 add 'boost_exp_tasks' pointer to point 'rnp->exp_tasks'
 do the similar thing as normal grace period.

 kernel/rcu/tree.h        |  2 ++
 kernel/rcu/tree_plugin.h | 31 +++++++++++++++++++++++--------
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b8d07bf92d29..862ca09b56c7 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -103,6 +103,8 @@ struct rcu_node {
 				/*  queued on this rcu_node structure that */
 				/*  are blocking the current grace period, */
 				/*  there can be no such task. */
+	struct list_head *boost_exp_tasks;
+
 	struct rt_mutex boost_mtx;
 				/* Used only for the priority-boosting */
 				/*  side effect, not as a lock. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c3d212bc5338..22bf5a8040f5 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -12,6 +12,7 @@
  */
 
 #include "../locking/rtmutex_common.h"
+#include <linux/sched/deadline.h>
 
 static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
 {
@@ -535,6 +536,8 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 			drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
 			if (&t->rcu_node_entry == rnp->boost_tasks)
 				WRITE_ONCE(rnp->boost_tasks, np);
+			if (&t->rcu_node_entry == rnp->boost_exp_tasks)
+				WRITE_ONCE(rnp->boost_exp_tasks, np);
 		}
 
 		/*
@@ -1022,7 +1025,7 @@ static int rcu_boost(struct rcu_node *rnp)
 	struct task_struct *t;
 	struct list_head *tb;
 
-	if (READ_ONCE(rnp->exp_tasks) == NULL &&
+	if (READ_ONCE(rnp->boost_exp_tasks) == NULL &&
 	    READ_ONCE(rnp->boost_tasks) == NULL)
 		return 0;  /* Nothing left to boost. */
 
@@ -1032,7 +1035,7 @@ static int rcu_boost(struct rcu_node *rnp)
 	 * Recheck under the lock: all tasks in need of boosting
 	 * might exit their RCU read-side critical sections on their own.
 	 */
-	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
+	if (rnp->boost_exp_tasks == NULL && rnp->boost_tasks == NULL) {
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return 0;
 	}
@@ -1043,8 +1046,8 @@ static int rcu_boost(struct rcu_node *rnp)
 	 * expedited grace period must boost all blocked tasks, including
 	 * those blocking the pre-existing normal grace period.
 	 */
-	if (rnp->exp_tasks != NULL)
-		tb = rnp->exp_tasks;
+	if (rnp->boost_exp_tasks != NULL)
+		tb = rnp->boost_exp_tasks;
 	else
 		tb = rnp->boost_tasks;
 
@@ -1065,14 +1068,24 @@ static int rcu_boost(struct rcu_node *rnp)
 	 * section.
 	 */
 	t = container_of(tb, struct task_struct, rcu_node_entry);
+	if (dl_task(t) || t->prio <= current->prio) {
+		tb = rcu_next_node_entry(t, rnp);
+		if (rnp->boost_exp_tasks)
+			WRITE_ONCE(rnp->boost_exp_tasks, tb);
+		else
+			WRITE_ONCE(rnp->boost_tasks, tb);
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+		goto skip_boost;
+	}
+
 	rt_mutex_init_proxy_locked(&rnp->boost_mtx.rtmutex, t);
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	/* Lock only for side effect: boosts task t's priority. */
 	rt_mutex_lock(&rnp->boost_mtx);
 	rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */
 	rnp->n_boosts++;
-
-	return READ_ONCE(rnp->exp_tasks) != NULL ||
+skip_boost:
+	return READ_ONCE(rnp->boost_exp_tasks) != NULL ||
 	       READ_ONCE(rnp->boost_tasks) != NULL;
 }
 
@@ -1090,7 +1103,7 @@ static int rcu_boost_kthread(void *arg)
 		WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
 		trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
 		rcu_wait(READ_ONCE(rnp->boost_tasks) ||
-			 READ_ONCE(rnp->exp_tasks));
+			 READ_ONCE(rnp->boost_exp_tasks));
 		trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
 		WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
 		more2boost = rcu_boost(rnp);
@@ -1129,13 +1142,15 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		return;
 	}
-	if (rnp->exp_tasks != NULL ||
+	if ((rnp->exp_tasks != NULL && rnp->boost_exp_tasks == NULL) ||
 	    (rnp->gp_tasks != NULL &&
 	     rnp->boost_tasks == NULL &&
 	     rnp->qsmask == 0 &&
 	     (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
 		if (rnp->exp_tasks == NULL)
 			WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
+		else
+			WRITE_ONCE(rnp->boost_exp_tasks, rnp->exp_tasks);
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 		rcu_wake_cond(rnp->boost_kthread_task,
 			      READ_ONCE(rnp->boost_kthread_status));
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] rcu: Only boost rcu reader tasks with lower priority than boost kthreads
  2022-03-09  8:15 [PATCH v2] rcu: Only boost rcu reader tasks with lower priority than boost kthreads Zqiang
@ 2022-03-10 18:27 ` Uladzislau Rezki
  2022-03-11  1:53   ` Zhang, Qiang1
  0 siblings, 1 reply; 3+ messages in thread
From: Uladzislau Rezki @ 2022-03-10 18:27 UTC (permalink / raw)
  To: Zqiang
  Cc: paulmck, frederic, quic_neeraju, josh, urezki, bigeasy,
	juri.lelli, rcu, linux-kernel

> When RCU_BOOST is enabled, the boost kthreads will boosting readers
> who are blocking a given grace period, if the current reader tasks
> have a higher priority than boost kthreads(the boost kthreads priority
> not always 1, if the kthread_prio is set), boosting is useless, skip
> current task and select next task to boosting, reduce the time for a
> given grace period.
> 
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> ---
>  v1->v2:
>  rename label 'end' to 'skip_boost'.
>  add 'boost_exp_tasks' pointer to point 'rnp->exp_tasks'
>  do the similar thing as normal grace period.
> 
>  kernel/rcu/tree.h        |  2 ++
>  kernel/rcu/tree_plugin.h | 31 +++++++++++++++++++++++--------
>  2 files changed, 25 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index b8d07bf92d29..862ca09b56c7 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -103,6 +103,8 @@ struct rcu_node {
>  				/*  queued on this rcu_node structure that */
>  				/*  are blocking the current grace period, */
>  				/*  there can be no such task. */
> +	struct list_head *boost_exp_tasks;
> +
>  	struct rt_mutex boost_mtx;
>  				/* Used only for the priority-boosting */
>  				/*  side effect, not as a lock. */
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index c3d212bc5338..22bf5a8040f5 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -12,6 +12,7 @@
>   */
>  
>  #include "../locking/rtmutex_common.h"
> +#include <linux/sched/deadline.h>
>  
>  static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
>  {
> @@ -535,6 +536,8 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
>  			drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
>  			if (&t->rcu_node_entry == rnp->boost_tasks)
>  				WRITE_ONCE(rnp->boost_tasks, np);
> +			if (&t->rcu_node_entry == rnp->boost_exp_tasks)
> +				WRITE_ONCE(rnp->boost_exp_tasks, np);
>  		}
>  
>  		/*
> @@ -1022,7 +1025,7 @@ static int rcu_boost(struct rcu_node *rnp)
>  	struct task_struct *t;
>  	struct list_head *tb;
>  
> -	if (READ_ONCE(rnp->exp_tasks) == NULL &&
> +	if (READ_ONCE(rnp->boost_exp_tasks) == NULL &&
>  	    READ_ONCE(rnp->boost_tasks) == NULL)
>  		return 0;  /* Nothing left to boost. */
>  
> @@ -1032,7 +1035,7 @@ static int rcu_boost(struct rcu_node *rnp)
>  	 * Recheck under the lock: all tasks in need of boosting
>  	 * might exit their RCU read-side critical sections on their own.
>  	 */
> -	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
> +	if (rnp->boost_exp_tasks == NULL && rnp->boost_tasks == NULL) {
>  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  		return 0;
>  	}
> @@ -1043,8 +1046,8 @@ static int rcu_boost(struct rcu_node *rnp)
>  	 * expedited grace period must boost all blocked tasks, including
>  	 * those blocking the pre-existing normal grace period.
>  	 */
> -	if (rnp->exp_tasks != NULL)
> -		tb = rnp->exp_tasks;
> +	if (rnp->boost_exp_tasks != NULL)
> +		tb = rnp->boost_exp_tasks;
>  	else
>  		tb = rnp->boost_tasks;
>  
> @@ -1065,14 +1068,24 @@ static int rcu_boost(struct rcu_node *rnp)
>  	 * section.
>  	 */
>  	t = container_of(tb, struct task_struct, rcu_node_entry);
> +	if (dl_task(t) || t->prio <= current->prio) {
This is a bit confusing to me. We know that "current" has SCHED_FIFO
class. In this scenario if "t->prio" has lower value(higher priority)
the task falls into all real-time categories anyway and is either:
  - SCHED_FIFO
  - SCHED_RR
  - SCHED_DEADLINE

Checking whether the task is dl_task() sounds like unnecessary there.
Am i missing something? Could you please comment on it?

Thanks!

--
Vlad Rezki

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH v2] rcu: Only boost rcu reader tasks with lower priority than boost kthreads
  2022-03-10 18:27 ` Uladzislau Rezki
@ 2022-03-11  1:53   ` Zhang, Qiang1
  0 siblings, 0 replies; 3+ messages in thread
From: Zhang, Qiang1 @ 2022-03-11  1:53 UTC (permalink / raw)
  To: Uladzislau Rezki
  Cc: paulmck, frederic, quic_neeraju, josh, bigeasy, juri.lelli, rcu,
	linux-kernel


> When RCU_BOOST is enabled, the boost kthreads will boosting readers 
> who are blocking a given grace period, if the current reader tasks 
> have a higher priority than boost kthreads(the boost kthreads priority 
> not always 1, if the kthread_prio is set), boosting is useless, skip 
> current task and select next task to boosting, reduce the time for a 
> given grace period.
> 
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> ---
>  v1->v2:
>  rename label 'end' to 'skip_boost'.
>  add 'boost_exp_tasks' pointer to point 'rnp->exp_tasks'
>  do the similar thing as normal grace period.
> 
>  kernel/rcu/tree.h        |  2 ++
>  kernel/rcu/tree_plugin.h | 31 +++++++++++++++++++++++--------
>  2 files changed, 25 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 
> b8d07bf92d29..862ca09b56c7 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -103,6 +103,8 @@ struct rcu_node {
>  				/*  queued on this rcu_node structure that */
>  				/*  are blocking the current grace period, */
>  				/*  there can be no such task. */
> +	struct list_head *boost_exp_tasks;
> +
>  	struct rt_mutex boost_mtx;
>  				/* Used only for the priority-boosting */
>  				/*  side effect, not as a lock. */ diff --git 
> a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 
> c3d212bc5338..22bf5a8040f5 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -12,6 +12,7 @@
>   */
>  
>  #include "../locking/rtmutex_common.h"
> +#include <linux/sched/deadline.h>
>  
>  static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)  { @@ -535,6 
> +536,8 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
>  			drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
>  			if (&t->rcu_node_entry == rnp->boost_tasks)
>  				WRITE_ONCE(rnp->boost_tasks, np);
> +			if (&t->rcu_node_entry == rnp->boost_exp_tasks)
> +				WRITE_ONCE(rnp->boost_exp_tasks, np);
>  		}
>  
>  		/*
> @@ -1022,7 +1025,7 @@ static int rcu_boost(struct rcu_node *rnp)
>  	struct task_struct *t;
>  	struct list_head *tb;
>  
> -	if (READ_ONCE(rnp->exp_tasks) == NULL &&
> +	if (READ_ONCE(rnp->boost_exp_tasks) == NULL &&
>  	    READ_ONCE(rnp->boost_tasks) == NULL)
>  		return 0;  /* Nothing left to boost. */
>  
> @@ -1032,7 +1035,7 @@ static int rcu_boost(struct rcu_node *rnp)
>  	 * Recheck under the lock: all tasks in need of boosting
>  	 * might exit their RCU read-side critical sections on their own.
>  	 */
> -	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
> +	if (rnp->boost_exp_tasks == NULL && rnp->boost_tasks == NULL) {
>  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  		return 0;
>  	}
> @@ -1043,8 +1046,8 @@ static int rcu_boost(struct rcu_node *rnp)
>  	 * expedited grace period must boost all blocked tasks, including
>  	 * those blocking the pre-existing normal grace period.
>  	 */
> -	if (rnp->exp_tasks != NULL)
> -		tb = rnp->exp_tasks;
> +	if (rnp->boost_exp_tasks != NULL)
> +		tb = rnp->boost_exp_tasks;
>  	else
>  		tb = rnp->boost_tasks;
>  
> @@ -1065,14 +1068,24 @@ static int rcu_boost(struct rcu_node *rnp)
>  	 * section.
>  	 */
>  	t = container_of(tb, struct task_struct, rcu_node_entry);
> +	if (dl_task(t) || t->prio <= current->prio) {
>This is a bit confusing to me. We know that "current" has SCHED_FIFO class. In this scenario if "t->prio" has lower value(higher priority) the task falls into all real-time categories anyway and is either:
>  - SCHED_FIFO
>  - SCHED_RR
>  - SCHED_DEADLINE
>
>Checking whether the task is dl_task() sounds like unnecessary there.
>Am i missing something? Could you please comment on it?

Thanks feedback

Right, the deadline task's prio has lower value than rt fifo/rr task.
dl_task() condition detection seems redundant.

>
>Thanks!
>
>--
>Vlad Rezki

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-03-11  1:53 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-09  8:15 [PATCH v2] rcu: Only boost rcu reader tasks with lower priority than boost kthreads Zqiang
2022-03-10 18:27 ` Uladzislau Rezki
2022-03-11  1:53   ` Zhang, Qiang1

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.