linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 4/5] rcu: join rcu_ctrlblk and rcu_state
@ 2006-01-08 19:19 Oleg Nesterov
  2006-01-10  0:28 ` Paul E. McKenney
  0 siblings, 1 reply; 8+ messages in thread
From: Oleg Nesterov @ 2006-01-08 19:19 UTC (permalink / raw)
  To: linux-kernel
  Cc: Dipankar Sarma, Manfred Spraul, Linus Torvalds, Paul E. McKenney,
	Andrew Morton

This patch moves rcu_state into the rcu_ctrlblk. I think there
are no reasons why we should have 2 different variables to control
rcu state. Every user of rcu_state has also "rcu_ctrlblk *rcp" in
the parameter list.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>

--- 2.6.15/include/linux/rcupdate.h~4_JOIN	2006-01-08 21:36:07.000000000 +0300
+++ 2.6.15/include/linux/rcupdate.h	2006-01-08 23:16:36.000000000 +0300
@@ -65,6 +65,10 @@ struct rcu_ctrlblk {
 	long	cur;		/* Current batch number.                      */
 	long	completed;	/* Number of the last completed batch         */
 	int	next_pending;	/* Is the next batch already waiting?         */
+
+	spinlock_t	lock	____cacheline_maxaligned_in_smp;
+	cpumask_t	cpumask; /* CPUs that need to switch in order    */
+	                         /* for current batch to proceed.        */
 } ____cacheline_maxaligned_in_smp;
 
 /* Is batch a before batch b ? */
--- 2.6.15/kernel/rcupdate.c~4_JOIN	2006-01-08 22:46:13.000000000 +0300
+++ 2.6.15/kernel/rcupdate.c	2006-01-08 23:49:31.000000000 +0300
@@ -49,22 +49,18 @@
 #include <linux/cpu.h>
 
 /* Definition for rcupdate control block. */
-struct rcu_ctrlblk rcu_ctrlblk = 
-	{ .cur = -300, .completed = -300 };
-struct rcu_ctrlblk rcu_bh_ctrlblk =
-	{ .cur = -300, .completed = -300 };
-
-/* Bookkeeping of the progress of the grace period */
-struct rcu_state {
-	spinlock_t	lock; /* Guard this struct and writes to rcu_ctrlblk */
-	cpumask_t	cpumask; /* CPUs that need to switch in order    */
-	                              /* for current batch to proceed.        */
+struct rcu_ctrlblk rcu_ctrlblk = {
+	.cur = -300,
+	.completed = -300,
+	.lock = SPIN_LOCK_UNLOCKED,
+	.cpumask = CPU_MASK_NONE,
+};
+struct rcu_ctrlblk rcu_bh_ctrlblk = {
+	.cur = -300,
+	.completed = -300,
+	.lock = SPIN_LOCK_UNLOCKED,
+	.cpumask = CPU_MASK_NONE,
 };
-
-static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
-	  {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
-static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
-	  {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
 
 DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
@@ -233,13 +229,13 @@ static void rcu_do_batch(struct rcu_data
  *   This is done by rcu_start_batch. The start is not broadcasted to
  *   all cpus, they must pick this up by comparing rcp->cur with
  *   rdp->quiescbatch. All cpus are recorded  in the
- *   rcu_state.cpumask bitmap.
+ *   rcu_ctrlblk.cpumask bitmap.
  * - All cpus must go through a quiescent state.
  *   Since the start of the grace period is not broadcasted, at least two
  *   calls to rcu_check_quiescent_state are required:
  *   The first call just notices that a new grace period is running. The
  *   following calls check if there was a quiescent state since the beginning
- *   of the grace period. If so, it updates rcu_state.cpumask. If
+ *   of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
  *   the bitmap is empty, then the grace period is completed.
  *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
  *   period (if necessary).
@@ -247,9 +243,9 @@ static void rcu_do_batch(struct rcu_data
 /*
  * Register a new batch of callbacks, and start it up if there is currently no
  * active batch and the batch to be registered has not already occurred.
- * Caller must hold rcu_state.lock.
+ * Caller must hold rcu_ctrlblk.lock.
  */
-static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
+static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 {
 	if (rcp->next_pending &&
 			rcp->completed == rcp->cur) {
@@ -264,11 +260,11 @@ static void rcu_start_batch(struct rcu_c
 		/*
 		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
 		 * Barrier  Otherwise it can cause tickless idle CPUs to be
-		 * included in rsp->cpumask, which will extend graceperiods
+		 * included in rcp->cpumask, which will extend graceperiods
 		 * unnecessarily.
 		 */
 		smp_mb();
-		cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
+		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
 
 	}
 }
@@ -278,13 +274,13 @@ static void rcu_start_batch(struct rcu_c
  * Clear it from the cpu mask and complete the grace period if it was the last
  * cpu. Start another grace period if someone has further entries pending
  */
-static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
+static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
 {
-	cpu_clear(cpu, rsp->cpumask);
-	if (cpus_empty(rsp->cpumask)) {
+	cpu_clear(cpu, rcp->cpumask);
+	if (cpus_empty(rcp->cpumask)) {
 		/* batch completed ! */
 		rcp->completed = rcp->cur;
-		rcu_start_batch(rcp, rsp);
+		rcu_start_batch(rcp);
 	}
 }
 
@@ -294,7 +290,7 @@ static void cpu_quiet(int cpu, struct rc
  * quiescent cycle, then indicate that it has done so.
  */
 static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
-			struct rcu_state *rsp, struct rcu_data *rdp)
+					struct rcu_data *rdp)
 {
 	if (rdp->quiescbatch != rcp->cur) {
 		/* start new grace period: */
@@ -319,15 +315,15 @@ static void rcu_check_quiescent_state(st
 		return;
 	rdp->qs_pending = 0;
 
-	spin_lock(&rsp->lock);
+	spin_lock(&rcp->lock);
 	/*
 	 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
 	 * during cpu startup. Ignore the quiescent state.
 	 */
 	if (likely(rdp->quiescbatch == rcp->cur))
-		cpu_quiet(rdp->cpu, rcp, rsp);
+		cpu_quiet(rdp->cpu, rcp);
 
-	spin_unlock(&rsp->lock);
+	spin_unlock(&rcp->lock);
 }
 
 
@@ -348,16 +344,16 @@ static void rcu_move_batch(struct rcu_da
 }
 
 static void __rcu_offline_cpu(struct rcu_data *this_rdp,
-	struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp)
+				struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
 	/* if the cpu going offline owns the grace period
 	 * we can block indefinitely waiting for it, so flush
 	 * it here
 	 */
-	spin_lock_bh(&rsp->lock);
+	spin_lock_bh(&rcp->lock);
 	if (rcp->cur != rcp->completed)
-		cpu_quiet(rdp->cpu, rcp, rsp);
-	spin_unlock_bh(&rsp->lock);
+		cpu_quiet(rdp->cpu, rcp);
+	spin_unlock_bh(&rcp->lock);
 	rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
 	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
 
@@ -367,9 +363,9 @@ static void rcu_offline_cpu(int cpu)
 	struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
 	struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
 
-	__rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state,
+	__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
 					&per_cpu(rcu_data, cpu));
-	__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state,
+	__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
 					&per_cpu(rcu_bh_data, cpu));
 	put_cpu_var(rcu_data);
 	put_cpu_var(rcu_bh_data);
@@ -388,7 +384,7 @@ static void rcu_offline_cpu(int cpu)
  * This does the RCU processing work from tasklet context. 
  */
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
-			struct rcu_state *rsp, struct rcu_data *rdp)
+					struct rcu_data *rdp)
 {
 	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
 		*rdp->donetail = rdp->curlist;
@@ -418,25 +414,23 @@ static void __rcu_process_callbacks(stru
 
 		if (!rcp->next_pending) {
 			/* and start it/schedule start if it's a new batch */
-			spin_lock(&rsp->lock);
+			spin_lock(&rcp->lock);
 			rcp->next_pending = 1;
-			rcu_start_batch(rcp, rsp);
-			spin_unlock(&rsp->lock);
+			rcu_start_batch(rcp);
+			spin_unlock(&rcp->lock);
 		}
 	} else {
 		local_irq_enable();
 	}
-	rcu_check_quiescent_state(rcp, rsp, rdp);
+	rcu_check_quiescent_state(rcp, rdp);
 	if (rdp->donelist)
 		rcu_do_batch(rdp);
 }
 
 static void rcu_process_callbacks(unsigned long unused)
 {
-	__rcu_process_callbacks(&rcu_ctrlblk, &rcu_state,
-				&__get_cpu_var(rcu_data));
-	__rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state,
-				&__get_cpu_var(rcu_bh_data));
+	__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
+	__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
 }
 
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/5] rcu: join rcu_ctrlblk and rcu_state
  2006-01-08 19:19 [PATCH 4/5] rcu: join rcu_ctrlblk and rcu_state Oleg Nesterov
@ 2006-01-10  0:28 ` Paul E. McKenney
  2006-01-10  0:43   ` Linus Torvalds
  2006-01-10 18:09   ` Dipankar Sarma
  0 siblings, 2 replies; 8+ messages in thread
From: Paul E. McKenney @ 2006-01-10  0:28 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: linux-kernel, Dipankar Sarma, Manfred Spraul, Linus Torvalds,
	Andrew Morton

On Sun, Jan 08, 2006 at 10:19:42PM +0300, Oleg Nesterov wrote:
> This patch moves rcu_state into the rcu_ctrlblk. I think there
> are no reasons why we should have 2 different variables to control
> rcu state. Every user of rcu_state has also "rcu_ctrlblk *rcp" in
> the parameter list.

This patch looks sane to me.  It passes a short one-hour rcutorture
on ppc64 and x86, firing up some overnight runs as well.

Dipankar, Manfred, any other concerns?  Cacheline alignment?  (Seems
to me this code is far enough from the fastpath that this should not
be a problem, but thought I should ask.)

							Thanx, Paul

Acked-by: Paul E. McKenney <paulmck@us.ibm.com>
> Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
> 
> --- 2.6.15/include/linux/rcupdate.h~4_JOIN	2006-01-08 21:36:07.000000000 +0300
> +++ 2.6.15/include/linux/rcupdate.h	2006-01-08 23:16:36.000000000 +0300
> @@ -65,6 +65,10 @@ struct rcu_ctrlblk {
>  	long	cur;		/* Current batch number.                      */
>  	long	completed;	/* Number of the last completed batch         */
>  	int	next_pending;	/* Is the next batch already waiting?         */
> +
> +	spinlock_t	lock	____cacheline_maxaligned_in_smp;
> +	cpumask_t	cpumask; /* CPUs that need to switch in order    */
> +	                         /* for current batch to proceed.        */
>  } ____cacheline_maxaligned_in_smp;
>  
>  /* Is batch a before batch b ? */
> --- 2.6.15/kernel/rcupdate.c~4_JOIN	2006-01-08 22:46:13.000000000 +0300
> +++ 2.6.15/kernel/rcupdate.c	2006-01-08 23:49:31.000000000 +0300
> @@ -49,22 +49,18 @@
>  #include <linux/cpu.h>
>  
>  /* Definition for rcupdate control block. */
> -struct rcu_ctrlblk rcu_ctrlblk = 
> -	{ .cur = -300, .completed = -300 };
> -struct rcu_ctrlblk rcu_bh_ctrlblk =
> -	{ .cur = -300, .completed = -300 };
> -
> -/* Bookkeeping of the progress of the grace period */
> -struct rcu_state {
> -	spinlock_t	lock; /* Guard this struct and writes to rcu_ctrlblk */
> -	cpumask_t	cpumask; /* CPUs that need to switch in order    */
> -	                              /* for current batch to proceed.        */
> +struct rcu_ctrlblk rcu_ctrlblk = {
> +	.cur = -300,
> +	.completed = -300,
> +	.lock = SPIN_LOCK_UNLOCKED,
> +	.cpumask = CPU_MASK_NONE,
> +};
> +struct rcu_ctrlblk rcu_bh_ctrlblk = {
> +	.cur = -300,
> +	.completed = -300,
> +	.lock = SPIN_LOCK_UNLOCKED,
> +	.cpumask = CPU_MASK_NONE,
>  };
> -
> -static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
> -	  {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
> -static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
> -	  {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
>  
>  DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
>  DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
> @@ -233,13 +229,13 @@ static void rcu_do_batch(struct rcu_data
>   *   This is done by rcu_start_batch. The start is not broadcasted to
>   *   all cpus, they must pick this up by comparing rcp->cur with
>   *   rdp->quiescbatch. All cpus are recorded  in the
> - *   rcu_state.cpumask bitmap.
> + *   rcu_ctrlblk.cpumask bitmap.
>   * - All cpus must go through a quiescent state.
>   *   Since the start of the grace period is not broadcasted, at least two
>   *   calls to rcu_check_quiescent_state are required:
>   *   The first call just notices that a new grace period is running. The
>   *   following calls check if there was a quiescent state since the beginning
> - *   of the grace period. If so, it updates rcu_state.cpumask. If
> + *   of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
>   *   the bitmap is empty, then the grace period is completed.
>   *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
>   *   period (if necessary).
> @@ -247,9 +243,9 @@ static void rcu_do_batch(struct rcu_data
>  /*
>   * Register a new batch of callbacks, and start it up if there is currently no
>   * active batch and the batch to be registered has not already occurred.
> - * Caller must hold rcu_state.lock.
> + * Caller must hold rcu_ctrlblk.lock.
>   */
> -static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
> +static void rcu_start_batch(struct rcu_ctrlblk *rcp)
>  {
>  	if (rcp->next_pending &&
>  			rcp->completed == rcp->cur) {
> @@ -264,11 +260,11 @@ static void rcu_start_batch(struct rcu_c
>  		/*
>  		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
>  		 * Barrier  Otherwise it can cause tickless idle CPUs to be
> -		 * included in rsp->cpumask, which will extend graceperiods
> +		 * included in rcp->cpumask, which will extend graceperiods
>  		 * unnecessarily.
>  		 */
>  		smp_mb();
> -		cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
> +		cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
>  
>  	}
>  }
> @@ -278,13 +274,13 @@ static void rcu_start_batch(struct rcu_c
>   * Clear it from the cpu mask and complete the grace period if it was the last
>   * cpu. Start another grace period if someone has further entries pending
>   */
> -static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
> +static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
>  {
> -	cpu_clear(cpu, rsp->cpumask);
> -	if (cpus_empty(rsp->cpumask)) {
> +	cpu_clear(cpu, rcp->cpumask);
> +	if (cpus_empty(rcp->cpumask)) {
>  		/* batch completed ! */
>  		rcp->completed = rcp->cur;
> -		rcu_start_batch(rcp, rsp);
> +		rcu_start_batch(rcp);
>  	}
>  }
>  
> @@ -294,7 +290,7 @@ static void cpu_quiet(int cpu, struct rc
>   * quiescent cycle, then indicate that it has done so.
>   */
>  static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
> -			struct rcu_state *rsp, struct rcu_data *rdp)
> +					struct rcu_data *rdp)
>  {
>  	if (rdp->quiescbatch != rcp->cur) {
>  		/* start new grace period: */
> @@ -319,15 +315,15 @@ static void rcu_check_quiescent_state(st
>  		return;
>  	rdp->qs_pending = 0;
>  
> -	spin_lock(&rsp->lock);
> +	spin_lock(&rcp->lock);
>  	/*
>  	 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
>  	 * during cpu startup. Ignore the quiescent state.
>  	 */
>  	if (likely(rdp->quiescbatch == rcp->cur))
> -		cpu_quiet(rdp->cpu, rcp, rsp);
> +		cpu_quiet(rdp->cpu, rcp);
>  
> -	spin_unlock(&rsp->lock);
> +	spin_unlock(&rcp->lock);
>  }
>  
>  
> @@ -348,16 +344,16 @@ static void rcu_move_batch(struct rcu_da
>  }
>  
>  static void __rcu_offline_cpu(struct rcu_data *this_rdp,
> -	struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp)
> +				struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
>  {
>  	/* if the cpu going offline owns the grace period
>  	 * we can block indefinitely waiting for it, so flush
>  	 * it here
>  	 */
> -	spin_lock_bh(&rsp->lock);
> +	spin_lock_bh(&rcp->lock);
>  	if (rcp->cur != rcp->completed)
> -		cpu_quiet(rdp->cpu, rcp, rsp);
> -	spin_unlock_bh(&rsp->lock);
> +		cpu_quiet(rdp->cpu, rcp);
> +	spin_unlock_bh(&rcp->lock);
>  	rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
>  	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
>  
> @@ -367,9 +363,9 @@ static void rcu_offline_cpu(int cpu)
>  	struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
>  	struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
>  
> -	__rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state,
> +	__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
>  					&per_cpu(rcu_data, cpu));
> -	__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state,
> +	__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
>  					&per_cpu(rcu_bh_data, cpu));
>  	put_cpu_var(rcu_data);
>  	put_cpu_var(rcu_bh_data);
> @@ -388,7 +384,7 @@ static void rcu_offline_cpu(int cpu)
>   * This does the RCU processing work from tasklet context. 
>   */
>  static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
> -			struct rcu_state *rsp, struct rcu_data *rdp)
> +					struct rcu_data *rdp)
>  {
>  	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
>  		*rdp->donetail = rdp->curlist;
> @@ -418,25 +414,23 @@ static void __rcu_process_callbacks(stru
>  
>  		if (!rcp->next_pending) {
>  			/* and start it/schedule start if it's a new batch */
> -			spin_lock(&rsp->lock);
> +			spin_lock(&rcp->lock);
>  			rcp->next_pending = 1;
> -			rcu_start_batch(rcp, rsp);
> -			spin_unlock(&rsp->lock);
> +			rcu_start_batch(rcp);
> +			spin_unlock(&rcp->lock);
>  		}
>  	} else {
>  		local_irq_enable();
>  	}
> -	rcu_check_quiescent_state(rcp, rsp, rdp);
> +	rcu_check_quiescent_state(rcp, rdp);
>  	if (rdp->donelist)
>  		rcu_do_batch(rdp);
>  }
>  
>  static void rcu_process_callbacks(unsigned long unused)
>  {
> -	__rcu_process_callbacks(&rcu_ctrlblk, &rcu_state,
> -				&__get_cpu_var(rcu_data));
> -	__rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state,
> -				&__get_cpu_var(rcu_bh_data));
> +	__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
> +	__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
>  }
>  
>  static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/5] rcu: join rcu_ctrlblk and rcu_state
  2006-01-10  0:28 ` Paul E. McKenney
@ 2006-01-10  0:43   ` Linus Torvalds
  2006-01-10  2:54     ` Paul E. McKenney
  2006-01-10 18:09   ` Dipankar Sarma
  1 sibling, 1 reply; 8+ messages in thread
From: Linus Torvalds @ 2006-01-10  0:43 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Oleg Nesterov, linux-kernel, Dipankar Sarma, Manfred Spraul,
	Andrew Morton



On Mon, 9 Jan 2006, Paul E. McKenney wrote:
> 
> This patch looks sane to me.  It passes a short one-hour rcutorture
> on ppc64 and x86, firing up some overnight runs as well.
> 
> Dipankar, Manfred, any other concerns?  Cacheline alignment?  (Seems
> to me this code is far enough from the fastpath that this should not
> be a problem, but thought I should ask.)

I'd ask you and Oleg to re-synchronize, and perhaps Oleg to re-send the 
(part of?) the series that has no debate. I'm unsure, for example, whether 
#2 was just to be dropped.

I already applied #1, and it looks like there's agreement on #3 and #4, 
but basically, just to make sure, can Oleg please re-send to make sure I 
got it right?

Getting a screwed-up RCU thing is going to be too painful to debug, so I'd 
rather get it right the first time it hits my tree..

		Linus

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/5] rcu: join rcu_ctrlblk and rcu_state
  2006-01-10  0:43   ` Linus Torvalds
@ 2006-01-10  2:54     ` Paul E. McKenney
  2006-01-10 10:02       ` Srivatsa Vaddagiri
  0 siblings, 1 reply; 8+ messages in thread
From: Paul E. McKenney @ 2006-01-10  2:54 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Oleg Nesterov, linux-kernel, Dipankar Sarma, Manfred Spraul,
	Andrew Morton

On Mon, Jan 09, 2006 at 04:43:17PM -0800, Linus Torvalds wrote:
> 
> On Mon, 9 Jan 2006, Paul E. McKenney wrote:
> > 
> > This patch looks sane to me.  It passes a short one-hour rcutorture
> > on ppc64 and x86, firing up some overnight runs as well.
> > 
> > Dipankar, Manfred, any other concerns?  Cacheline alignment?  (Seems
> > to me this code is far enough from the fastpath that this should not
> > be a problem, but thought I should ask.)
> 
> I'd ask you and Oleg to re-synchronize, and perhaps Oleg to re-send the 
> (part of?) the series that has no debate. I'm unsure, for example, whether 
> #2 was just to be dropped.

I believe that the original #2 is to be dropped, but that the patch Oleg
submitted in:

	http://marc.theaimsgroup.com/?l=linux-kernel&m=113681388600342&w=2

may be needed.  I have added Vatsa to the CC to get his take on this.

However, this patch should be independent from #4, so it should be OK
to apply an updated #4 first while we are working out what to do about #2.

> I already applied #1, and it looks like there's agreement on #3 and #4, 
> but basically, just to make sure, can Oleg please re-send to make sure I 
> got it right?
> 
> Getting a screwed-up RCU thing is going to be too painful to debug, so I'd 
> rather get it right the first time it hits my tree..

Been there more times than I care to admit, and I most definitely agree!!!

							Thanx, Paul

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/5] rcu: join rcu_ctrlblk and rcu_state
  2006-01-10  2:54     ` Paul E. McKenney
@ 2006-01-10 10:02       ` Srivatsa Vaddagiri
  0 siblings, 0 replies; 8+ messages in thread
From: Srivatsa Vaddagiri @ 2006-01-10 10:02 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Linus Torvalds, Oleg Nesterov, linux-kernel, Dipankar Sarma,
	Manfred Spraul, Andrew Morton

On Mon, Jan 09, 2006 at 06:54:39PM -0800, Paul E. McKenney wrote:
> I believe that the original #2 is to be dropped, but that the patch Oleg
> submitted in:
> 
> 	http://marc.theaimsgroup.com/?l=linux-kernel&m=113681388600342&w=2
> 
> may be needed.  I have added Vatsa to the CC to get his take on this.

The patch submitted in the above URL seems fine to me and I think we should
take it after Oleg does some basic testing.

- vatsa

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/5] rcu: join rcu_ctrlblk and rcu_state
  2006-01-10  0:28 ` Paul E. McKenney
  2006-01-10  0:43   ` Linus Torvalds
@ 2006-01-10 18:09   ` Dipankar Sarma
  2006-01-10 20:44     ` Manfred Spraul
  1 sibling, 1 reply; 8+ messages in thread
From: Dipankar Sarma @ 2006-01-10 18:09 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Oleg Nesterov, linux-kernel, Manfred Spraul, Linus Torvalds,
	Andrew Morton

On Mon, Jan 09, 2006 at 04:28:18PM -0800, Paul E. McKenney wrote:
> On Sun, Jan 08, 2006 at 10:19:42PM +0300, Oleg Nesterov wrote:
> > This patch moves rcu_state into the rcu_ctrlblk. I think there
> > are no reasons why we should have 2 different variables to control
> > rcu state. Every user of rcu_state has also "rcu_ctrlblk *rcp" in
> > the parameter list.
> 
> This patch looks sane to me.  It passes a short one-hour rcutorture
> on ppc64 and x86, firing up some overnight runs as well.
> 
> Dipankar, Manfred, any other concerns?  Cacheline alignment?  (Seems
> to me this code is far enough from the fastpath that this should not
> be a problem, but thought I should ask.)
> 

rcu_state came over from Manfred's RCU_HUGE patch IIRC. I don't
think it is necessary to allocate rcu_state separately in the
current mainline RCU code. So, the patch looks OK to me, but
Manfred might see something that I am not seeing.

Thanks
Dipankar

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/5] rcu: join rcu_ctrlblk and rcu_state
  2006-01-10 18:09   ` Dipankar Sarma
@ 2006-01-10 20:44     ` Manfred Spraul
  2006-01-10 21:51       ` Paul E. McKenney
  0 siblings, 1 reply; 8+ messages in thread
From: Manfred Spraul @ 2006-01-10 20:44 UTC (permalink / raw)
  To: dipankar
  Cc: Paul E. McKenney, Oleg Nesterov, linux-kernel, Linus Torvalds,
	Andrew Morton

[I haven't read the diff, just a short comment]

Dipankar Sarma wrote:

>rcu_state came over from Manfred's RCU_HUGE patch IIRC. I don't
>think it is necessary to allocate rcu_state separately in the
>current mainline RCU code. So, the patch looks OK to me, but
>Manfred might see something that I am not seeing.
>
>  
>
The two-level rcu code was never merged, I still plan to clean it up.

But the idea of splitting the control block and the state is used in the 
current code:
- __rcu_pending() is the hot path, it only performs a read access to 
rcu_ctrlblk.
- write accesses to the rcu_ctrlblk are really rare, they only happen 
when a new batch is started. Especially: independant from the number of 
cpus.

Write access to the rcu_state are common:
- each cpu must write once in each cycle to update it's cpu mask.
- The last cpu then completes the quiescent cycle.

The idea is that rcu_state is more or less write-only and rcu_state is 
read-only. Theoretically, rcu_state could be shared in all cpus caches, 
and there will be only one invalidate when a new batch is started. Thus 
no cacheline trashing due to rcu_pending calls.
I think it would be safer to keep the two state counters in a separate 
cacheline from the spinlock and the cpu mask, but I don't have any hard 
numbers. IIRC the problems with the large SGI systems disappered, and 
everyone was happy. No real benchmark comparisons were made.

--
    Manfred

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 4/5] rcu: join rcu_ctrlblk and rcu_state
  2006-01-10 20:44     ` Manfred Spraul
@ 2006-01-10 21:51       ` Paul E. McKenney
  0 siblings, 0 replies; 8+ messages in thread
From: Paul E. McKenney @ 2006-01-10 21:51 UTC (permalink / raw)
  To: Manfred Spraul
  Cc: dipankar, Oleg Nesterov, linux-kernel, Linus Torvalds,
	Andrew Morton, vatsa

On Tue, Jan 10, 2006 at 09:44:56PM +0100, Manfred Spraul wrote:
> [I haven't read the diff, just a short comment]
> 
> Dipankar Sarma wrote:
> 
> >rcu_state came over from Manfred's RCU_HUGE patch IIRC. I don't
> >think it is necessary to allocate rcu_state separately in the
> >current mainline RCU code. So, the patch looks OK to me, but
> >Manfred might see something that I am not seeing.
> >
> > 
> >
> The two-level rcu code was never merged, I still plan to clean it up.
> 
> But the idea of splitting the control block and the state is used in the 
> current code:
> - __rcu_pending() is the hot path, it only performs a read access to 
> rcu_ctrlblk.
> - write accesses to the rcu_ctrlblk are really rare, they only happen 
> when a new batch is started. Especially: independant from the number of 
> cpus.
> 
> Write access to the rcu_state are common:
> - each cpu must write once in each cycle to update it's cpu mask.
> - The last cpu then completes the quiescent cycle.
> 
> The idea is that rcu_state is more or less write-only and rcu_state is 
> read-only. Theoretically, rcu_state could be shared in all cpus caches, 
> and there will be only one invalidate when a new batch is started. Thus 
> no cacheline trashing due to rcu_pending calls.
> I think it would be safer to keep the two state counters in a separate 
> cacheline from the spinlock and the cpu mask, but I don't have any hard 
> numbers. IIRC the problems with the large SGI systems disappered, and 
> everyone was happy. No real benchmark comparisons were made.

Good point!

But doesn't the ____cacheline_maxaligned_in_smp directive on the "lock"
field take care of this?  Here is the structure:

/* Global control variables for rcupdate callback mechanism. */
struct rcu_ctrlblk {
        long    cur;            /* Current batch number.                      */
        long    completed;      /* Number of the last completed batch         */
        int     next_pending;   /* Is the next batch already waiting?         */

	spinlock_t      lock    ____cacheline_internodealigned_in_smp;
	cpumask_t       cpumask; /* CPUs that need to switch in order    */
				 /* for current batch to proceed.        */
} ____cacheline_internodealigned_in_smp;

If this does not cover this case, what more is needed?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2006-01-10 21:51 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-01-08 19:19 [PATCH 4/5] rcu: join rcu_ctrlblk and rcu_state Oleg Nesterov
2006-01-10  0:28 ` Paul E. McKenney
2006-01-10  0:43   ` Linus Torvalds
2006-01-10  2:54     ` Paul E. McKenney
2006-01-10 10:02       ` Srivatsa Vaddagiri
2006-01-10 18:09   ` Dipankar Sarma
2006-01-10 20:44     ` Manfred Spraul
2006-01-10 21:51       ` Paul E. McKenney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).