All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] rcu: only raise softirq when need
@ 2010-03-30 10:11 Lai Jiangshan
  2010-03-30 16:30 ` Paul E. McKenney
  0 siblings, 1 reply; 6+ messages in thread
From: Lai Jiangshan @ 2010-03-30 10:11 UTC (permalink / raw)
  To: Paul E. McKenney, Ingo Molnar, LKML


I found something RCU_SOFTIRQ are called without do any thing.
(use function_graph to find it:
 1)               |  rcu_process_callbacks() {
 1)               |    __rcu_process_callbacks() {
 1)   0.634 us    |      rcu_process_gp_end();
 1)   0.487 us    |      check_for_new_grace_period();
 1)   2.672 us    |    }
 1)               |    __rcu_process_callbacks() {
 1)   0.633 us    |      rcu_process_gp_end();
 1)   0.491 us    |      check_for_new_grace_period();
 1)   2.672 us    |    }
)

This patch make RCU_SOFTIRQ raised when need.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 1947c4e..06a1780 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -133,6 +133,7 @@ module_param(qlowmark, int, 0);
 
 static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
 static int rcu_pending(int cpu);
+static int rcu_qs_pending(int cpu);
 
 /*
  * Return the number of RCU-sched batches processed thus far for debug & stats.
@@ -1125,8 +1126,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
  */
 void rcu_check_callbacks(int cpu, int user)
 {
-	if (!rcu_pending(cpu))
-		return; /* if nothing for RCU to do. */
+	if (!rcu_qs_pending(cpu))
+		goto check_rcu_softirq_work;
+
 	if (user ||
 	    (idle_cpu(cpu) && rcu_scheduler_active &&
 	     !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
@@ -1158,7 +1160,10 @@ void rcu_check_callbacks(int cpu, int user)
 		rcu_bh_qs(cpu);
 	}
 	rcu_preempt_check_callbacks(cpu);
-	raise_softirq(RCU_SOFTIRQ);
+
+check_rcu_softirq_work:
+	if (rcu_pending(cpu))
+		raise_softirq(RCU_SOFTIRQ);
 }
 
 #ifdef CONFIG_SMP
@@ -1497,9 +1502,9 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 	/* Check for CPU stalls, if enabled. */
 	check_cpu_stall(rsp, rdp);
 
-	/* Is the RCU core waiting for a quiescent state from this CPU? */
-	if (rdp->qs_pending) {
-		rdp->n_rp_qs_pending++;
+	/* Does this RCU need to report quiescent state? */
+	if (rdp->qs_pending && rdp->passed_quiesc) {
+		rdp->n_rp_report_qs++;
 		return 1;
 	}
 
@@ -1551,6 +1556,24 @@ static int rcu_pending(int cpu)
 	       rcu_preempt_pending(cpu);
 }
 
+/* Is the RCU core waiting for a quiescent state from this CPU? */
+static int __rcu_qs_pending(struct rcu_data *rdp)
+{
+	if (rdp->qs_pending) {
+		rdp->n_rp_qs_pending++;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int rcu_qs_pending(int cpu)
+{
+	return __rcu_qs_pending(&per_cpu(rcu_sched_data, cpu)) ||
+	       __rcu_qs_pending(&per_cpu(rcu_bh_data, cpu)) ||
+	       rcu_preempt_qs_pending(cpu);
+}
+
 /*
  * Check to see if any future RCU-related work will need to be done
  * by the current CPU, even if none need be done immediately, returning
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 4a525a3..c0f312e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -223,6 +223,7 @@ struct rcu_data {
 	/* 5) __rcu_pending() statistics. */
 	unsigned long n_rcu_pending;	/* rcu_pending() calls since boot. */
 	unsigned long n_rp_qs_pending;
+	unsigned long n_rp_report_qs;
 	unsigned long n_rp_cb_ready;
 	unsigned long n_rp_cpu_needs_gp;
 	unsigned long n_rp_gp_completed;
@@ -378,6 +379,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
 #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
 static int rcu_preempt_pending(int cpu);
+static int rcu_preempt_qs_pending(int cpu);
 static int rcu_preempt_needs_cpu(int cpu);
 static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
 static void rcu_preempt_send_cbs_to_orphanage(void);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 79b53bd..4befb64 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -689,6 +689,11 @@ static int rcu_preempt_pending(int cpu)
 			     &per_cpu(rcu_preempt_data, cpu));
 }
 
+static int rcu_preempt_qs_pending(int cpu)
+{
+	return __rcu_qs_pending(&per_cpu(rcu_preempt_data, cpu));
+}
+
 /*
  * Does preemptable RCU need the CPU to stay out of dynticks mode?
  */
@@ -914,6 +919,11 @@ static int rcu_preempt_pending(int cpu)
 	return 0;
 }
 
+static int rcu_preempt_qs_pending(int cpu)
+{
+	return 0;
+}
+
 /*
  * Because preemptable RCU does not exist, it never needs any CPU.
  */




^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] rcu: only raise softirq when need
  2010-03-30 10:11 [PATCH] rcu: only raise softirq when need Lai Jiangshan
@ 2010-03-30 16:30 ` Paul E. McKenney
  2010-03-31  2:10   ` Lai Jiangshan
  0 siblings, 1 reply; 6+ messages in thread
From: Paul E. McKenney @ 2010-03-30 16:30 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Ingo Molnar, LKML

On Tue, Mar 30, 2010 at 06:11:55PM +0800, Lai Jiangshan wrote:
> 
> I found something RCU_SOFTIRQ are called without do any thing.
> (use function_graph to find it:
>  1)               |  rcu_process_callbacks() {
>  1)               |    __rcu_process_callbacks() {
>  1)   0.634 us    |      rcu_process_gp_end();
>  1)   0.487 us    |      check_for_new_grace_period();
>  1)   2.672 us    |    }
>  1)               |    __rcu_process_callbacks() {
>  1)   0.633 us    |      rcu_process_gp_end();
>  1)   0.491 us    |      check_for_new_grace_period();
>  1)   2.672 us    |    }
> )
> 
> This patch make RCU_SOFTIRQ raised when need.

So this seems to have two effects:

1.	Avoid checking for a quiescent state if RCU doesn't need one
	from this CPU.

2.	Avoid RCU_SOFTIRQ if RCU did need a quiescent state from
	this CPU, and if rcu_check_callbacks() saw a quiescent state.

Except that if rcu_check_callbacks() did see a quiescent state, then we
-need- RCU_SOFTIRQ to propagate this up the tree.  So I don't see how
this patch helps, and unless I am missing something, it can result in
grace-period hangs.  (This CPU is the last one to pass through a
quiescent state, and this call to rcu_check_callbacks() finds one,
and we fail to report it up the tree.)

Please note that there are other possible causes for empty calls to
rcu_process_callbacks():

1.	RCU needs a call to force_quiescent_state(), but some other
	CPU beats us to it.  We raise RCU_SOFTIRQ, but by the time
	we get there, our work is done.

2.	RCU needs to check for CPU stalls, but some other CPU beats
	us to it.

3.	RCU is idle, and this CPU needs another grace period, but
	some other CPU starts up a new grace period before our
	softirq gets started.

So I do not believe that this patch is worthwhile even if it does turn
out to be safe.

Please let me know if I am missing something.

							Thanx, Paul

> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---
> diff --git a/kernel/rcutree.c b/kernel/rcutree.c
> index 1947c4e..06a1780 100644
> --- a/kernel/rcutree.c
> +++ b/kernel/rcutree.c
> @@ -133,6 +133,7 @@ module_param(qlowmark, int, 0);
> 
>  static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
>  static int rcu_pending(int cpu);
> +static int rcu_qs_pending(int cpu);
> 
>  /*
>   * Return the number of RCU-sched batches processed thus far for debug & stats.
> @@ -1125,8 +1126,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
>   */
>  void rcu_check_callbacks(int cpu, int user)
>  {
> -	if (!rcu_pending(cpu))
> -		return; /* if nothing for RCU to do. */
> +	if (!rcu_qs_pending(cpu))
> +		goto check_rcu_softirq_work;
> +
>  	if (user ||
>  	    (idle_cpu(cpu) && rcu_scheduler_active &&
>  	     !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
> @@ -1158,7 +1160,10 @@ void rcu_check_callbacks(int cpu, int user)
>  		rcu_bh_qs(cpu);
>  	}
>  	rcu_preempt_check_callbacks(cpu);
> -	raise_softirq(RCU_SOFTIRQ);
> +
> +check_rcu_softirq_work:
> +	if (rcu_pending(cpu))
> +		raise_softirq(RCU_SOFTIRQ);
>  }
> 
>  #ifdef CONFIG_SMP
> @@ -1497,9 +1502,9 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
>  	/* Check for CPU stalls, if enabled. */
>  	check_cpu_stall(rsp, rdp);
> 
> -	/* Is the RCU core waiting for a quiescent state from this CPU? */
> -	if (rdp->qs_pending) {
> -		rdp->n_rp_qs_pending++;
> +	/* Does this RCU need to report quiescent state? */
> +	if (rdp->qs_pending && rdp->passed_quiesc) {
> +		rdp->n_rp_report_qs++;
>  		return 1;
>  	}
> 
> @@ -1551,6 +1556,24 @@ static int rcu_pending(int cpu)
>  	       rcu_preempt_pending(cpu);
>  }
> 
> +/* Is the RCU core waiting for a quiescent state from this CPU? */
> +static int __rcu_qs_pending(struct rcu_data *rdp)
> +{
> +	if (rdp->qs_pending) {
> +		rdp->n_rp_qs_pending++;
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int rcu_qs_pending(int cpu)
> +{
> +	return __rcu_qs_pending(&per_cpu(rcu_sched_data, cpu)) ||
> +	       __rcu_qs_pending(&per_cpu(rcu_bh_data, cpu)) ||
> +	       rcu_preempt_qs_pending(cpu);
> +}
> +
>  /*
>   * Check to see if any future RCU-related work will need to be done
>   * by the current CPU, even if none need be done immediately, returning
> diff --git a/kernel/rcutree.h b/kernel/rcutree.h
> index 4a525a3..c0f312e 100644
> --- a/kernel/rcutree.h
> +++ b/kernel/rcutree.h
> @@ -223,6 +223,7 @@ struct rcu_data {
>  	/* 5) __rcu_pending() statistics. */
>  	unsigned long n_rcu_pending;	/* rcu_pending() calls since boot. */
>  	unsigned long n_rp_qs_pending;
> +	unsigned long n_rp_report_qs;
>  	unsigned long n_rp_cb_ready;
>  	unsigned long n_rp_cpu_needs_gp;
>  	unsigned long n_rp_gp_completed;
> @@ -378,6 +379,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
>  static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
>  #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
>  static int rcu_preempt_pending(int cpu);
> +static int rcu_preempt_qs_pending(int cpu);
>  static int rcu_preempt_needs_cpu(int cpu);
>  static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
>  static void rcu_preempt_send_cbs_to_orphanage(void);
> diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
> index 79b53bd..4befb64 100644
> --- a/kernel/rcutree_plugin.h
> +++ b/kernel/rcutree_plugin.h
> @@ -689,6 +689,11 @@ static int rcu_preempt_pending(int cpu)
>  			     &per_cpu(rcu_preempt_data, cpu));
>  }
> 
> +static int rcu_preempt_qs_pending(int cpu)
> +{
> +	return __rcu_qs_pending(&per_cpu(rcu_preempt_data, cpu));
> +}
> +
>  /*
>   * Does preemptable RCU need the CPU to stay out of dynticks mode?
>   */
> @@ -914,6 +919,11 @@ static int rcu_preempt_pending(int cpu)
>  	return 0;
>  }
> 
> +static int rcu_preempt_qs_pending(int cpu)
> +{
> +	return 0;
> +}
> +
>  /*
>   * Because preemptable RCU does not exist, it never needs any CPU.
>   */
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] rcu: only raise softirq when need
  2010-03-30 16:30 ` Paul E. McKenney
@ 2010-03-31  2:10   ` Lai Jiangshan
  2010-03-31 15:30     ` Paul E. McKenney
  0 siblings, 1 reply; 6+ messages in thread
From: Lai Jiangshan @ 2010-03-31  2:10 UTC (permalink / raw)
  To: paulmck; +Cc: Ingo Molnar, LKML

Paul E. McKenney wrote:
> On Tue, Mar 30, 2010 at 06:11:55PM +0800, Lai Jiangshan wrote:
>> I found something RCU_SOFTIRQ are called without do any thing.
>> (use function_graph to find it:
>>  1)               |  rcu_process_callbacks() {
>>  1)               |    __rcu_process_callbacks() {
>>  1)   0.634 us    |      rcu_process_gp_end();
>>  1)   0.487 us    |      check_for_new_grace_period();
>>  1)   2.672 us    |    }
>>  1)               |    __rcu_process_callbacks() {
>>  1)   0.633 us    |      rcu_process_gp_end();
>>  1)   0.491 us    |      check_for_new_grace_period();
>>  1)   2.672 us    |    }
>> )
>>
>> This patch make RCU_SOFTIRQ raised when need.
> 
> So this seems to have two effects:
> 
> 1.	Avoid checking for a quiescent state if RCU doesn't need one
> 	from this CPU.
> 
> 2.	Avoid RCU_SOFTIRQ if RCU did need a quiescent state from
> 	this CPU, and if rcu_check_callbacks() saw a quiescent state.

This RCU_SOFTIRQ is not avoided.

+	if (rdp->qs_pending && rdp->passed_quiesc) {
+		rdp->n_rp_report_qs++;
 		return 1;
 	}

Old: raise RCU_SOFTIRQ when rdp->qs_pending is not zero
New: raise RCU_SOFTIRQ when rdp->qs_pending && rdp->passed_quiesc

So the different effects only happen when this state:
rdp->qs_pending == 1 && rdp->passed_quiesc == 0,
But this state will be changed after next rcu_sched_qs() or families.
So it will not hang up.

> 
> Except that if rcu_check_callbacks() did see a quiescent state, then we
> -need- RCU_SOFTIRQ to propagate this up the tree.  So I don't see how
> this patch helps, and unless I am missing something, it can result in
> grace-period hangs.  (This CPU is the last one to pass through a
> quiescent state, and this call to rcu_check_callbacks() finds one,
> and we fail to report it up the tree.)
> 
> Please note that there are other possible causes for empty calls to
> rcu_process_callbacks():
> 
> 1.	RCU needs a call to force_quiescent_state(), but some other
> 	CPU beats us to it.  We raise RCU_SOFTIRQ, but by the time
> 	we get there, our work is done.
> 
> 2.	RCU needs to check for CPU stalls, but some other CPU beats
> 	us to it.
> 
> 3.	RCU is idle, and this CPU needs another grace period, but
> 	some other CPU starts up a new grace period before our
> 	softirq gets started.

These may happen, but I have not seen any empty call after patch applied.

> 
> So I do not believe that this patch is worthwhile even if it does turn
> out to be safe.

I accept that this patch is not worthwhile.

Raising empty call is harmless, and it is a chance
to progress RCU or detect problems.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] rcu: only raise softirq when need
  2010-03-31  2:10   ` Lai Jiangshan
@ 2010-03-31 15:30     ` Paul E. McKenney
  2010-04-01  7:40       ` Lai Jiangshan
  0 siblings, 1 reply; 6+ messages in thread
From: Paul E. McKenney @ 2010-03-31 15:30 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Ingo Molnar, LKML

On Wed, Mar 31, 2010 at 10:10:55AM +0800, Lai Jiangshan wrote:
> Paul E. McKenney wrote:
> > On Tue, Mar 30, 2010 at 06:11:55PM +0800, Lai Jiangshan wrote:
> >> I found something RCU_SOFTIRQ are called without do any thing.
> >> (use function_graph to find it:
> >>  1)               |  rcu_process_callbacks() {
> >>  1)               |    __rcu_process_callbacks() {
> >>  1)   0.634 us    |      rcu_process_gp_end();
> >>  1)   0.487 us    |      check_for_new_grace_period();
> >>  1)   2.672 us    |    }
> >>  1)               |    __rcu_process_callbacks() {
> >>  1)   0.633 us    |      rcu_process_gp_end();
> >>  1)   0.491 us    |      check_for_new_grace_period();
> >>  1)   2.672 us    |    }
> >> )
> >>
> >> This patch make RCU_SOFTIRQ raised when need.
> > 
> > So this seems to have two effects:
> > 
> > 1.	Avoid checking for a quiescent state if RCU doesn't need one
> > 	from this CPU.
> > 
> > 2.	Avoid RCU_SOFTIRQ if RCU did need a quiescent state from
> > 	this CPU, and if rcu_check_callbacks() saw a quiescent state.
> 
> This RCU_SOFTIRQ is not avoided.
> 
> +	if (rdp->qs_pending && rdp->passed_quiesc) {
> +		rdp->n_rp_report_qs++;
>  		return 1;
>  	}
> 
> Old: raise RCU_SOFTIRQ when rdp->qs_pending is not zero
> New: raise RCU_SOFTIRQ when rdp->qs_pending && rdp->passed_quiesc
> 
> So the different effects only happen when this state:
> rdp->qs_pending == 1 && rdp->passed_quiesc == 0,
> But this state will be changed after next rcu_sched_qs() or families.
> So it will not hang up.

You are quite correct.  I clearly need to give myself a day to think
about patches to RCU functionality before replying.  On the other hand,
I might have been just as confused after thinking on it, so airing my
confusion immediately might well have been the optimal approach.  ;-)

So this patch looks like it would work, so the next question is how much
it helps and/or hurts.

> > Except that if rcu_check_callbacks() did see a quiescent state, then we
> > -need- RCU_SOFTIRQ to propagate this up the tree.  So I don't see how
> > this patch helps, and unless I am missing something, it can result in
> > grace-period hangs.  (This CPU is the last one to pass through a
> > quiescent state, and this call to rcu_check_callbacks() finds one,
> > and we fail to report it up the tree.)
> > 
> > Please note that there are other possible causes for empty calls to
> > rcu_process_callbacks():
> > 
> > 1.	RCU needs a call to force_quiescent_state(), but some other
> > 	CPU beats us to it.  We raise RCU_SOFTIRQ, but by the time
> > 	we get there, our work is done.
> > 
> > 2.	RCU needs to check for CPU stalls, but some other CPU beats
> > 	us to it.
> > 
> > 3.	RCU is idle, and this CPU needs another grace period, but
> > 	some other CPU starts up a new grace period before our
> > 	softirq gets started.
> 
> These may happen, but I have not seen any empty call after patch applied.

Before the patch, what fraction of the calls were empty calls?

> > So I do not believe that this patch is worthwhile even if it does turn
> > out to be safe.
> 
> I accept that this patch is not worthwhile.
> 
> Raising empty call is harmless, and it is a chance
> to progress RCU or detect problems.

If you say "this patch has not been yet proven to be worthwhile" instead
of "this patch is not worthwhile", I will agree with you.  Here is a
quick rundown of my thoughts on it, which cannot be considered to be
fully formed:

1.	It cannot improve real-time scheduling latency, since there
	can be RCU_SOFTIRQ actions that really do something.  This
	patch therefore does not improve the worst-case code path
	(though it might -- or might not -- help the average path).

2.	It seems to increase path length a bit, but I doubt that this
	is measurable, so not a valid objection.

3.	It -might- improve throughput, but this would need to be
	proven.  This is the motivation for my question about the
	fraction of empty calls above.

	Kernels that force all softirqs to execute in process
	context (e.g., CONFIG_PREEMPT_RT) are the most likely to
	see benefit from this patch, as a pair of context switches
	are saved in that case.  Of course, CONFIG_PREEMPT_RT is
	more about scheduling latency than maximum throughput.

4.	It does complicate the code a bit, but could be refactored
	to reduce the complication.  (I haven't thought this through,
	but my guess is that the initial check should be factored
	across the "if" statement.)

All that aside, I don't believe the priority of this patch is particularly
high, although it did do a good job of showing that your understanding
of the treercu implementation is increasing.  Which is a very good thing!

Just out of curiosity, what is your test procedure?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] rcu: only raise softirq when need
  2010-03-31 15:30     ` Paul E. McKenney
@ 2010-04-01  7:40       ` Lai Jiangshan
  2010-04-02  1:07         ` Paul E. McKenney
  0 siblings, 1 reply; 6+ messages in thread
From: Lai Jiangshan @ 2010-04-01  7:40 UTC (permalink / raw)
  To: paulmck; +Cc: Ingo Molnar, LKML

Paul E. McKenney wrote:
> On Wed, Mar 31, 2010 at 10:10:55AM +0800, Lai Jiangshan wrote:
>> Paul E. McKenney wrote:
>>> On Tue, Mar 30, 2010 at 06:11:55PM +0800, Lai Jiangshan wrote:
>>>> I found something RCU_SOFTIRQ are called without do any thing.
>>>> (use function_graph to find it:
>>>>  1)               |  rcu_process_callbacks() {
>>>>  1)               |    __rcu_process_callbacks() {
>>>>  1)   0.634 us    |      rcu_process_gp_end();
>>>>  1)   0.487 us    |      check_for_new_grace_period();
>>>>  1)   2.672 us    |    }
>>>>  1)               |    __rcu_process_callbacks() {
>>>>  1)   0.633 us    |      rcu_process_gp_end();
>>>>  1)   0.491 us    |      check_for_new_grace_period();
>>>>  1)   2.672 us    |    }
>>>> )
>>>>
>>>> This patch make RCU_SOFTIRQ raised when need.
>>> So this seems to have two effects:
>>>
>>> 1.	Avoid checking for a quiescent state if RCU doesn't need one
>>> 	from this CPU.
>>>
>>> 2.	Avoid RCU_SOFTIRQ if RCU did need a quiescent state from
>>> 	this CPU, and if rcu_check_callbacks() saw a quiescent state.
>> This RCU_SOFTIRQ is not avoided.
>>
>> +	if (rdp->qs_pending && rdp->passed_quiesc) {
>> +		rdp->n_rp_report_qs++;
>>  		return 1;
>>  	}
>>
>> Old: raise RCU_SOFTIRQ when rdp->qs_pending is not zero
>> New: raise RCU_SOFTIRQ when rdp->qs_pending && rdp->passed_quiesc
>>
>> So the different effects only happen when this state:
>> rdp->qs_pending == 1 && rdp->passed_quiesc == 0,
>> But this state will be changed after next rcu_sched_qs() or families.
>> So it will not hang up.
> 
> You are quite correct.  I clearly need to give myself a day to think
> about patches to RCU functionality before replying.  On the other hand,
> I might have been just as confused after thinking on it, so airing my
> confusion immediately might well have been the optimal approach.  ;-)
> 
> So this patch looks like it would work, so the next question is how much
> it helps and/or hurts.
> 
>>> Except that if rcu_check_callbacks() did see a quiescent state, then we
>>> -need- RCU_SOFTIRQ to propagate this up the tree.  So I don't see how
>>> this patch helps, and unless I am missing something, it can result in
>>> grace-period hangs.  (This CPU is the last one to pass through a
>>> quiescent state, and this call to rcu_check_callbacks() finds one,
>>> and we fail to report it up the tree.)
>>>
>>> Please note that there are other possible causes for empty calls to
>>> rcu_process_callbacks():
>>>
>>> 1.	RCU needs a call to force_quiescent_state(), but some other
>>> 	CPU beats us to it.  We raise RCU_SOFTIRQ, but by the time
>>> 	we get there, our work is done.
>>>
>>> 2.	RCU needs to check for CPU stalls, but some other CPU beats
>>> 	us to it.
>>>
>>> 3.	RCU is idle, and this CPU needs another grace period, but
>>> 	some other CPU starts up a new grace period before our
>>> 	softirq gets started.
>> These may happen, but I have not seen any empty call after patch applied.
> 
> Before the patch, what fraction of the calls were empty calls?
> 
>>> So I do not believe that this patch is worthwhile even if it does turn
>>> out to be safe.
>> I accept that this patch is not worthwhile.
>>
>> Raising empty call is harmless, and it is a chance
>> to progress RCU or detect problems.
> 
> If you say "this patch has not been yet proven to be worthwhile" instead
> of "this patch is not worthwhile", I will agree with you.  Here is a
> quick rundown of my thoughts on it, which cannot be considered to be
> fully formed:
> 
> 1.	It cannot improve real-time scheduling latency, since there
> 	can be RCU_SOFTIRQ actions that really do something.  This
> 	patch therefore does not improve the worst-case code path
> 	(though it might -- or might not -- help the average path).
> 
> 2.	It seems to increase path length a bit, but I doubt that this
> 	is measurable, so not a valid objection.
> 
> 3.	It -might- improve throughput, but this would need to be
> 	proven.  This is the motivation for my question about the
> 	fraction of empty calls above.
> 
> 	Kernels that force all softirqs to execute in process
> 	context (e.g., CONFIG_PREEMPT_RT) are the most likely to
> 	see benefit from this patch, as a pair of context switches
> 	are saved in that case.  Of course, CONFIG_PREEMPT_RT is
> 	more about scheduling latency than maximum throughput.
> 
> 4.	It does complicate the code a bit, but could be refactored
> 	to reduce the complication.  (I haven't thought this through,
> 	but my guess is that the initial check should be factored
> 	across the "if" statement.)
> 
> All that aside, I don't believe the priority of this patch is particularly
> high, although it did do a good job of showing that your understanding
> of the treercu implementation is increasing.  Which is a very good thing!
> 
> Just out of curiosity, what is your test procedure?
> 
> 							Thanx, Paul
> 
>

1------------------result
(CONFIG_HZ=1000 and CONFIG_PREEMPT=y)
Before patch applied: 10% empty calls (something 30+ percent)
after patch applied: <0.5% empty calls

2------------------test procedure
I think you can add kernel trace code to detect it. (I think it's simpler)
The next is my fast test:

ensure CONFIG_FUNCTION_GRAPH_TRACER = y

mount -t debugfs xx /debugfs/
echo rcu_process_callbacks > /debugfs/tracing/set_graph_function
echo function_graph > /debugfs/tracing/current_tracer

# make all cpu busy, example: complier the kernel with 'make -j XXX'
cat /debugfs/tracing/trace_pipe > trace.tmp # CTRL-C after 20 seconds
./parser.py trace.tmp

3----------------------test code and ....
(Because compiler may or may not inline a static function, So:)
Your 'empty_call_graph' is not deferent from mine very likely,
you should change it.

Q: why 'rcu_process_gp_end();' is empty call?
A: _raw_spin_trylock() is not inline-ed, we will
see a '{' and '_raw_spin_trylock()' in the graph
when it is not empty call:
 0)               |      rcu_process_gp_end() {
 0)   0.350 us    |        _raw_spin_trylock();
 0)   0.250 us    |        _raw_spin_unlock_irqrestore();
 0)   1.326 us    |      }

Q: Is this test trusted.
A: No, but I believe it.

------------------parser.py----------
#!/bin/env python

import sys

empty_call_graph = tuple(
''' rcu_process_callbacks() {
 __rcu_process_callbacks() {
 rcu_process_gp_end();
 check_for_new_grace_period();
 }
 __rcu_process_callbacks() {
 rcu_process_gp_end();
 check_for_new_grace_period();
 }
 }
'''.splitlines())

total=0
empty=0
match=0
empty_block=[]

for line in file(sys.argv[1]):
  empty_block.append(line)
  if line.find('force_quiescent_state();') >= 0:
    continue

  if line.find(empty_call_graph[match]) < 0:
    match = 0
    empty_block=[]
    continue

  match = match + 1
  if match == 1:
    total = total + 1
  elif match == len(empty_call_graph):
    empty = empty + 1
    for emptyline in empty_block:
      print emptyline,
    empty_block=[]
    match = 0

print('empty call/total call: %d/%d' % (empty, total))
print('fraction: %f%%' % (100.0 * empty / total))



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] rcu: only raise softirq when need
  2010-04-01  7:40       ` Lai Jiangshan
@ 2010-04-02  1:07         ` Paul E. McKenney
  0 siblings, 0 replies; 6+ messages in thread
From: Paul E. McKenney @ 2010-04-02  1:07 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Ingo Molnar, LKML

On Thu, Apr 01, 2010 at 03:40:00PM +0800, Lai Jiangshan wrote:
> Paul E. McKenney wrote:
> > On Wed, Mar 31, 2010 at 10:10:55AM +0800, Lai Jiangshan wrote:
> >> Paul E. McKenney wrote:
> >>> On Tue, Mar 30, 2010 at 06:11:55PM +0800, Lai Jiangshan wrote:
> >>>> I found something RCU_SOFTIRQ are called without do any thing.
> >>>> (use function_graph to find it:
> >>>>  1)               |  rcu_process_callbacks() {
> >>>>  1)               |    __rcu_process_callbacks() {
> >>>>  1)   0.634 us    |      rcu_process_gp_end();
> >>>>  1)   0.487 us    |      check_for_new_grace_period();
> >>>>  1)   2.672 us    |    }
> >>>>  1)               |    __rcu_process_callbacks() {
> >>>>  1)   0.633 us    |      rcu_process_gp_end();
> >>>>  1)   0.491 us    |      check_for_new_grace_period();
> >>>>  1)   2.672 us    |    }
> >>>> )
> >>>>
> >>>> This patch make RCU_SOFTIRQ raised when need.
> >>> So this seems to have two effects:
> >>>
> >>> 1.	Avoid checking for a quiescent state if RCU doesn't need one
> >>> 	from this CPU.
> >>>
> >>> 2.	Avoid RCU_SOFTIRQ if RCU did need a quiescent state from
> >>> 	this CPU, and if rcu_check_callbacks() saw a quiescent state.
> >> This RCU_SOFTIRQ is not avoided.
> >>
> >> +	if (rdp->qs_pending && rdp->passed_quiesc) {
> >> +		rdp->n_rp_report_qs++;
> >>  		return 1;
> >>  	}
> >>
> >> Old: raise RCU_SOFTIRQ when rdp->qs_pending is not zero
> >> New: raise RCU_SOFTIRQ when rdp->qs_pending && rdp->passed_quiesc
> >>
> >> So the different effects only happen when this state:
> >> rdp->qs_pending == 1 && rdp->passed_quiesc == 0,
> >> But this state will be changed after next rcu_sched_qs() or families.
> >> So it will not hang up.
> > 
> > You are quite correct.  I clearly need to give myself a day to think
> > about patches to RCU functionality before replying.  On the other hand,
> > I might have been just as confused after thinking on it, so airing my
> > confusion immediately might well have been the optimal approach.  ;-)
> > 
> > So this patch looks like it would work, so the next question is how much
> > it helps and/or hurts.
> > 
> >>> Except that if rcu_check_callbacks() did see a quiescent state, then we
> >>> -need- RCU_SOFTIRQ to propagate this up the tree.  So I don't see how
> >>> this patch helps, and unless I am missing something, it can result in
> >>> grace-period hangs.  (This CPU is the last one to pass through a
> >>> quiescent state, and this call to rcu_check_callbacks() finds one,
> >>> and we fail to report it up the tree.)
> >>>
> >>> Please note that there are other possible causes for empty calls to
> >>> rcu_process_callbacks():
> >>>
> >>> 1.	RCU needs a call to force_quiescent_state(), but some other
> >>> 	CPU beats us to it.  We raise RCU_SOFTIRQ, but by the time
> >>> 	we get there, our work is done.
> >>>
> >>> 2.	RCU needs to check for CPU stalls, but some other CPU beats
> >>> 	us to it.
> >>>
> >>> 3.	RCU is idle, and this CPU needs another grace period, but
> >>> 	some other CPU starts up a new grace period before our
> >>> 	softirq gets started.
> >> These may happen, but I have not seen any empty call after patch applied.
> > 
> > Before the patch, what fraction of the calls were empty calls?
> > 
> >>> So I do not believe that this patch is worthwhile even if it does turn
> >>> out to be safe.
> >> I accept that this patch is not worthwhile.
> >>
> >> Raising empty call is harmless, and it is a chance
> >> to progress RCU or detect problems.
> > 
> > If you say "this patch has not been yet proven to be worthwhile" instead
> > of "this patch is not worthwhile", I will agree with you.  Here is a
> > quick rundown of my thoughts on it, which cannot be considered to be
> > fully formed:
> > 
> > 1.	It cannot improve real-time scheduling latency, since there
> > 	can be RCU_SOFTIRQ actions that really do something.  This
> > 	patch therefore does not improve the worst-case code path
> > 	(though it might -- or might not -- help the average path).
> > 
> > 2.	It seems to increase path length a bit, but I doubt that this
> > 	is measurable, so not a valid objection.
> > 
> > 3.	It -might- improve throughput, but this would need to be
> > 	proven.  This is the motivation for my question about the
> > 	fraction of empty calls above.
> > 
> > 	Kernels that force all softirqs to execute in process
> > 	context (e.g., CONFIG_PREEMPT_RT) are the most likely to
> > 	see benefit from this patch, as a pair of context switches
> > 	are saved in that case.  Of course, CONFIG_PREEMPT_RT is
> > 	more about scheduling latency than maximum throughput.
> > 
> > 4.	It does complicate the code a bit, but could be refactored
> > 	to reduce the complication.  (I haven't thought this through,
> > 	but my guess is that the initial check should be factored
> > 	across the "if" statement.)
> > 
> > All that aside, I don't believe the priority of this patch is particularly
> > high, although it did do a good job of showing that your understanding
> > of the treercu implementation is increasing.  Which is a very good thing!
> > 
> > Just out of curiosity, what is your test procedure?

Apologies, my question was unclear.  I was asking how you test patches
to RCU in general, not thing in particular.

Still, very good info, thank you for providing it.

> 1------------------result
> (CONFIG_HZ=1000 and CONFIG_PREEMPT=y)

CONFIG_TREE_RCU or CONFIG_TREE_PREEMPT_RCU?

> Before patch applied: 10% empty calls (something 30+ percent)
> after patch applied: <0.5% empty calls

Hmmm...  You weren't running rcutorture in the background, were you?  ;-)

> 2------------------test procedure
> I think you can add kernel trace code to detect it. (I think it's simpler)
> The next is my fast test:
> 
> ensure CONFIG_FUNCTION_GRAPH_TRACER = y
> 
> mount -t debugfs xx /debugfs/
> echo rcu_process_callbacks > /debugfs/tracing/set_graph_function
> echo function_graph > /debugfs/tracing/current_tracer
> 
> # make all cpu busy, example: complier the kernel with 'make -j XXX'
> cat /debugfs/tracing/trace_pipe > trace.tmp # CTRL-C after 20 seconds
> ./parser.py trace.tmp

OK, so if no rcutorture, then I wonder where the extra-long RCU read-side
critical sections are.

> 3----------------------test code and ....
> (Because compiler may or may not inline a static function, So:)
> Your 'empty_call_graph' is not deferent from mine very likely,
> you should change it.
> 
> Q: why 'rcu_process_gp_end();' is empty call?
> A: _raw_spin_trylock() is not inline-ed, we will
> see a '{' and '_raw_spin_trylock()' in the graph
> when it is not empty call:
>  0)               |      rcu_process_gp_end() {
>  0)   0.350 us    |        _raw_spin_trylock();
>  0)   0.250 us    |        _raw_spin_unlock_irqrestore();
>  0)   1.326 us    |      }
> 
> Q: Is this test trusted.
> A: No, but I believe it.

What you measured is what you measured, don't get me wrong.  I am just
wondering how 10% of the scheduling-clock interrupts managed to hit
RCU read-side critical sections.

OK.  If you were running CONFIG_TREE_RCU, and if 10% of the execution
time was in preemption-disabled regions in the kernel, and if there was
an RCU grace period outstanding at all times (which there might well be
due to file removal during kernel builds), and if each RCU grace period
lasted no more than a millisecond or so, then that might explain your
results.  If the grace periods last longer, then the fraction of time
in preemption-disabled regions in the kernel must increase accordingly.

If you had been running CONFIG_PREEMPT=n, then of course all of the
time spent in the kernel would be preempt-disabled.

Please understand that I am not doubting your measurements.  I instead
want to be very sure that we are not papering over some subtle bug by
making such a change.

							Thanx, Paul

> ------------------parser.py----------
> #!/bin/env python
> 
> import sys
> 
> empty_call_graph = tuple(
> ''' rcu_process_callbacks() {
>  __rcu_process_callbacks() {
>  rcu_process_gp_end();
>  check_for_new_grace_period();
>  }
>  __rcu_process_callbacks() {
>  rcu_process_gp_end();
>  check_for_new_grace_period();
>  }
>  }
> '''.splitlines())
> 
> total=0
> empty=0
> match=0
> empty_block=[]
> 
> for line in file(sys.argv[1]):
>   empty_block.append(line)
>   if line.find('force_quiescent_state();') >= 0:
>     continue
> 
>   if line.find(empty_call_graph[match]) < 0:
>     match = 0
>     empty_block=[]
>     continue
> 
>   match = match + 1
>   if match == 1:
>     total = total + 1
>   elif match == len(empty_call_graph):
>     empty = empty + 1
>     for emptyline in empty_block:
>       print emptyline,
>     empty_block=[]
>     match = 0
> 
> print('empty call/total call: %d/%d' % (empty, total))
> print('fraction: %f%%' % (100.0 * empty / total))
> 
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-04-02  1:08 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-30 10:11 [PATCH] rcu: only raise softirq when need Lai Jiangshan
2010-03-30 16:30 ` Paul E. McKenney
2010-03-31  2:10   ` Lai Jiangshan
2010-03-31 15:30     ` Paul E. McKenney
2010-04-01  7:40       ` Lai Jiangshan
2010-04-02  1:07         ` Paul E. McKenney

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.