All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
@ 2010-04-05 10:38 Lai Jiangshan
  2010-04-05 16:29 ` Oleg Nesterov
  0 siblings, 1 reply; 12+ messages in thread
From: Lai Jiangshan @ 2010-04-05 10:38 UTC (permalink / raw)
  To: Rusty Russell, Benjamin Herrenschmidt, Hugh Dickins, Ingo Molnar,
	Paul E. McKenney, Nathan Fontenot, Peter Zijlstra, Andrew Morton,
	Thomas Gleixner, Oleg Nesterov, Sachin Sant, H. Peter Anvin,
	Shane Wang, Roland McGrath, linux-kernel, Gautham R Shenoy


Current get_online_cpus() acquires a mutex lock and then
release it. It is not scale and it hurts cache. This patch rewrite it.

1) get_online_cpus() must be allowed to be called recursively, so I added
   get_online_cpus_nest for every task for new code.

   This patch just allows get_online_cpus() to be called recursively,
   but when it is not nested, get_online_cpus() will wait until
   cpuhotplug finished, so the potential starvation is avoided.

   And, the livelock of cpu_hotplug_begin() is avoided, so the comment
   is removed.

2) This new code use PER_CPU counters, and this counters protected
   by RCU. These counters acts like the reference counters of a modules.
   (Actually, all these code is stolen from module.c: try_refcount_get()
   is stolen from try_module_get(), put_online_cpus() from module_put()...)

   After this patch applied, get_online_cpus() is very light and scale when
   cpuhotplug is not running. It just disables preemption and increase
   the cpu counter and then enables preemption.

3) Since we have try_refcount_get(), I add a new API try_get_online_cpus().

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/cpu.h   |    2
 include/linux/sched.h |    3 +
 kernel/cpu.c          |  131 ++++++++++++++++++++++++++++++++------------------
 kernel/fork.c         |    3 +
 4 files changed, 94 insertions(+), 45 deletions(-)
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index e287863..a32809c 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -112,6 +112,7 @@ extern struct sysdev_class cpu_sysdev_class;
 
 extern void get_online_cpus(void);
 extern void put_online_cpus(void);
+extern int try_get_online_cpus(void);
 #define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
 #define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
@@ -134,6 +135,7 @@ static inline void cpu_hotplug_driver_unlock(void)
 
 #define get_online_cpus()	do { } while (0)
 #define put_online_cpus()	do { } while (0)
+#define try_get_online_cpus()	(1)
 #define hotcpu_notifier(fn, pri)	do { (void)(fn); } while (0)
 /* These aren't inline functions due to a GCC bug. */
 #define register_hotcpu_notifier(nb)	({ (void)(nb); 0; })
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c46b6e5..0422ea3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1501,6 +1501,9 @@ struct task_struct {
 		unsigned long memsw_bytes; /* uncharged mem+swap usage */
 	} memcg_batch;
 #endif
+#ifdef CONFIG_HOTPLUG_CPU
+	int get_online_cpus_nest;
+#endif
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index bc1e3d5..ede02c6 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -14,6 +14,7 @@
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
+#include <linux/percpu.h>
 
 #ifdef CONFIG_SMP
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
@@ -42,41 +43,82 @@ static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static struct {
-	struct task_struct *active_writer;
-	struct mutex lock; /* Synchronizes accesses to refcount, */
-	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
-	 */
-	int refcount;
-} cpu_hotplug = {
-	.active_writer = NULL,
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-	.refcount = 0,
-};
+DEFINE_MUTEX(cpu_hotplug_lock);
+struct task_struct *cpu_hotplug_task;
+DEFINE_PER_CPU(int, refcount);
+
+static int try_refcount_get(void)
+{
+	preempt_disable();
+
+	if (likely(!cpu_hotplug_task)) {
+		__get_cpu_var(refcount)++;
+		preempt_enable();
+		return 1;
+	}
+
+	preempt_enable();
+	return 0;
+}
+
+int try_get_online_cpus(void)
+{
+	if (cpu_hotplug_task == current)
+		return 1;
+
+	if (current->get_online_cpus_nest || try_refcount_get()) {
+		current->get_online_cpus_nest++;
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(try_get_online_cpus);
 
 void get_online_cpus(void)
 {
 	might_sleep();
-	if (cpu_hotplug.active_writer == current)
+	if (cpu_hotplug_task == current)
+		return;
+
+	if (current->get_online_cpus_nest++)
+		return;
+
+	if (likely(try_refcount_get()))
 		return;
-	mutex_lock(&cpu_hotplug.lock);
-	cpu_hotplug.refcount++;
-	mutex_unlock(&cpu_hotplug.lock);
 
+	mutex_lock(&cpu_hotplug_lock);
+	percpu_add(refcount, 1);
+	mutex_unlock(&cpu_hotplug_lock);
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
+static unsigned int refcount_sum(void)
+{
+	unsigned int total = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		total += per_cpu(refcount, cpu);
+
+	return total;
+}
+
 void put_online_cpus(void)
 {
-	if (cpu_hotplug.active_writer == current)
+	if (cpu_hotplug_task == current)
+		return;
+
+	if (WARN_ON(!current->get_online_cpus_nest))
 		return;
-	mutex_lock(&cpu_hotplug.lock);
-	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
-		wake_up_process(cpu_hotplug.active_writer);
-	mutex_unlock(&cpu_hotplug.lock);
 
+	if (!--current->get_online_cpus_nest) {
+		preempt_disable();
+		__get_cpu_var(refcount)--;
+		if (cpu_hotplug_task)
+			wake_up_process(cpu_hotplug_task);
+		preempt_enable();
+	}
 }
 EXPORT_SYMBOL_GPL(put_online_cpus);
 
@@ -85,41 +127,40 @@ EXPORT_SYMBOL_GPL(put_online_cpus);
  * refcount goes to zero.
  *
  * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
+ * will be blocked by the cpu_hotplug_lock
  */
 static void cpu_hotplug_begin(void)
 {
-	cpu_hotplug.active_writer = current;
+	mutex_lock(&cpu_hotplug_lock);
+
+	/*
+	 * Set cpu_hotplug_task. Wait until all running try_refcount_get()
+	 * finished and all these try_refcount_get() behavior are seen.
+	 */
+	cpu_hotplug_task = current;
+	synchronize_sched();
 
+	/* Wait for zero refcount */
 	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
-		if (likely(!cpu_hotplug.refcount))
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!refcount_sum())
 			break;
-		__set_current_state(TASK_UNINTERRUPTIBLE);
-		mutex_unlock(&cpu_hotplug.lock);
 		schedule();
 	}
+
+	__set_current_state(TASK_RUNNING);
 }
 
 static void cpu_hotplug_done(void)
 {
-	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
+	/*
+	 * Ensure try_refcount_get() sees the front befavior
+	 * after it sees cpu_hotplug_task == NULL.
+	 */
+	smp_mb();
+
+	cpu_hotplug_task = NULL;
+	mutex_unlock(&cpu_hotplug_lock);
 }
 
 #else /* #if CONFIG_HOTPLUG_CPU */
diff --git a/kernel/fork.c b/kernel/fork.c
index d67f1db..b162014 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1109,6 +1109,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->memcg_batch.memcg = NULL;
 #endif
 	p->stack_start = stack_start;
+#ifdef CONFIG_HOTPLUG_CPU
+	p->get_online_cpus_nest = 0;
+#endif
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-05 10:38 [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter Lai Jiangshan
@ 2010-04-05 16:29 ` Oleg Nesterov
  2010-04-06 12:00   ` Oleg Nesterov
  0 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2010-04-05 16:29 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: Rusty Russell, Benjamin Herrenschmidt, Hugh Dickins, Ingo Molnar,
	Paul E. McKenney, Nathan Fontenot, Peter Zijlstra, Andrew Morton,
	Thomas Gleixner, Sachin Sant, H. Peter Anvin, Shane Wang,
	Roland McGrath, linux-kernel, Gautham R Shenoy

On 04/05, Lai Jiangshan wrote:
>
> 1) get_online_cpus() must be allowed to be called recursively, so I added
>    get_online_cpus_nest for every task for new code.

Well, iirc one of the goals of

	cpu-hotplug: replace lock_cpu_hotplug() with get_online_cpus()
	86ef5c9a8edd78e6bf92879f32329d89b2d55b5a

was avoiding the new members in task_struct. I leave this up to you
and Gautham.


Lai, I didn't read this patch carefully yet (and I can't apply it to
Linus's tree). But at first glance,

>  void put_online_cpus(void)
>  {
> ...
> +	if (!--current->get_online_cpus_nest) {
> +		preempt_disable();
> +		__get_cpu_var(refcount)--;
> +		if (cpu_hotplug_task)
> +			wake_up_process(cpu_hotplug_task);

This looks unsafe. In theory nothing protects cpu_hotplug_task from
exiting if refcount_sum() becomes zero, this means wake_up_process()
can hit the freed/reused/unmapped task_struct. Probably cpu_hotplug_done()
needs another synhronize_sched() before return.

OTOH, I do not understand why the result of __get_cpu_var(refcount)
must be visible to refcount_sum() if we race with cpu_hotplug_begin(),
so it seems to me cpu_hotplug_begin() also needs synchronize_sched()
before refcount_sum().

Oleg.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-05 16:29 ` Oleg Nesterov
@ 2010-04-06 12:00   ` Oleg Nesterov
  2010-04-07 13:35     ` Lai Jiangshan
  0 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2010-04-06 12:00 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: Rusty Russell, Benjamin Herrenschmidt, Hugh Dickins, Ingo Molnar,
	Paul E. McKenney, Nathan Fontenot, Peter Zijlstra, Andrew Morton,
	Thomas Gleixner, Sachin Sant, H. Peter Anvin, Shane Wang,
	Roland McGrath, linux-kernel, Gautham R Shenoy

On 04/05, Oleg Nesterov wrote:
>
> On 04/05, Lai Jiangshan wrote:
> >
> > 1) get_online_cpus() must be allowed to be called recursively, so I added
> >    get_online_cpus_nest for every task for new code.
>
> Well, iirc one of the goals of
>
> 	cpu-hotplug: replace lock_cpu_hotplug() with get_online_cpus()
> 	86ef5c9a8edd78e6bf92879f32329d89b2d55b5a
>
> was avoiding the new members in task_struct. I leave this up to you
> and Gautham.
>
>
> Lai, I didn't read this patch carefully yet (and I can't apply it to
> Linus's tree). But at first glance,

because I tried to apply it without 1/2 ;)

> >  void put_online_cpus(void)
> >  {
> > ...
> > +	if (!--current->get_online_cpus_nest) {
> > +		preempt_disable();
> > +		__get_cpu_var(refcount)--;
> > +		if (cpu_hotplug_task)
> > +			wake_up_process(cpu_hotplug_task);
>
> This looks unsafe. In theory nothing protects cpu_hotplug_task from
> exiting if refcount_sum() becomes zero, this means wake_up_process()
> can hit the freed/reused/unmapped task_struct. Probably cpu_hotplug_done()
> needs another synhronize_sched() before return.

Yes, I think this is true, at least in theory.

> OTOH, I do not understand why the result of __get_cpu_var(refcount)
> must be visible to refcount_sum() if we race with cpu_hotplug_begin(),
> so it seems to me cpu_hotplug_begin() also needs synchronize_sched()
> before refcount_sum().

No, I misread the unapplied patch, sorry for noise.

Oleg.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-06 12:00   ` Oleg Nesterov
@ 2010-04-07 13:35     ` Lai Jiangshan
  2010-04-07 13:54       ` Oleg Nesterov
  0 siblings, 1 reply; 12+ messages in thread
From: Lai Jiangshan @ 2010-04-07 13:35 UTC (permalink / raw)
  To: Oleg Nesterov, Gautham R Shenoy
  Cc: Rusty Russell, Benjamin Herrenschmidt, Hugh Dickins, Ingo Molnar,
	Paul E. McKenney, Nathan Fontenot, Peter Zijlstra, Andrew Morton,
	Thomas Gleixner, Sachin Sant, H. Peter Anvin, Shane Wang,
	Roland McGrath, linux-kernel

Oleg Nesterov wrote:
> On 04/05, Oleg Nesterov wrote:
>> On 04/05, Lai Jiangshan wrote:
>>> 1) get_online_cpus() must be allowed to be called recursively, so I added
>>>    get_online_cpus_nest for every task for new code.
>> Well, iirc one of the goals of
>>
>> 	cpu-hotplug: replace lock_cpu_hotplug() with get_online_cpus()
>> 	86ef5c9a8edd78e6bf92879f32329d89b2d55b5a
>>
>> was avoiding the new members in task_struct. I leave this up to you
>> and Gautham.

Old get_online_cpus() is read-preference, I think the goal of this ability
is allow get_online_cpus()/put_online_cpus() to be called nested.

But read-preference RWL may cause write side starvation, so I abandon this ability,
and use per-task counter for allowing get_online_cpus()/put_online_cpus()
to be called nested, I think this deal is absolutely worth.

>>
>>
>> Lai, I didn't read this patch carefully yet (and I can't apply it to
>> Linus's tree). But at first glance,
> 
> because I tried to apply it without 1/2 ;)
> 
>>>  void put_online_cpus(void)
>>>  {
>>> ...
>>> +	if (!--current->get_online_cpus_nest) {
>>> +		preempt_disable();
>>> +		__get_cpu_var(refcount)--;
>>> +		if (cpu_hotplug_task)
>>> +			wake_up_process(cpu_hotplug_task);
>> This looks unsafe. In theory nothing protects cpu_hotplug_task from
>> exiting if refcount_sum() becomes zero, this means wake_up_process()
>> can hit the freed/reused/unmapped task_struct. Probably cpu_hotplug_done()
>> needs another synhronize_sched() before return.
> 
> Yes, I think this is true, at least in theory.

preempt_disable() prevent cpu_hotplug_task from exiting.


Thanks, Lai

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-07 13:35     ` Lai Jiangshan
@ 2010-04-07 13:54       ` Oleg Nesterov
  2010-04-09 12:12         ` Oleg Nesterov
  0 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2010-04-07 13:54 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: Gautham R Shenoy, Rusty Russell, Benjamin Herrenschmidt,
	Hugh Dickins, Ingo Molnar, Paul E. McKenney, Nathan Fontenot,
	Peter Zijlstra, Andrew Morton, Thomas Gleixner, Sachin Sant,
	H. Peter Anvin, Shane Wang, Roland McGrath, linux-kernel

On 04/07, Lai Jiangshan wrote:
>
> Oleg Nesterov wrote:
> > On 04/05, Oleg Nesterov wrote:
> >> On 04/05, Lai Jiangshan wrote:
> >>> 1) get_online_cpus() must be allowed to be called recursively, so I added
> >>>    get_online_cpus_nest for every task for new code.
> >> Well, iirc one of the goals of
> >>
> >> 	cpu-hotplug: replace lock_cpu_hotplug() with get_online_cpus()
> >> 	86ef5c9a8edd78e6bf92879f32329d89b2d55b5a
> >>
> >> was avoiding the new members in task_struct. I leave this up to you
> >> and Gautham.
>
> Old get_online_cpus() is read-preference, I think the goal of this ability
> is allow get_online_cpus()/put_online_cpus() to be called nested.

Sure, I understand why you added task_struct->get_online_cpus_nest.

> and use per-task counter for allowing get_online_cpus()/put_online_cpus()
> to be called nested, I think this deal is absolutely worth.

As I said, I am not going to argue. I can't justify this tradeoff.

> >>>  void put_online_cpus(void)
> >>>  {
> >>> ...
> >>> +	if (!--current->get_online_cpus_nest) {
> >>> +		preempt_disable();
> >>> +		__get_cpu_var(refcount)--;
> >>> +		if (cpu_hotplug_task)
> >>> +			wake_up_process(cpu_hotplug_task);
> >> This looks unsafe. In theory nothing protects cpu_hotplug_task from
> >> exiting if refcount_sum() becomes zero, this means wake_up_process()
> >> can hit the freed/reused/unmapped task_struct. Probably cpu_hotplug_done()
> >> needs another synhronize_sched() before return.
> >
> > Yes, I think this is true, at least in theory.
>
> preempt_disable() prevent cpu_hotplug_task from exiting.

If the cpu_down() is the caller of cpu_hotplug_begin/done, then yes.

But unless I missed something, nothing protects from cpu_up() which
takes this lock too.

Just in case... I am not saying this is really possible in practice.

Oleg.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-07 13:54       ` Oleg Nesterov
@ 2010-04-09 12:12         ` Oleg Nesterov
  2010-04-12  9:24           ` Lai Jiangshan
  0 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2010-04-09 12:12 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: Gautham R Shenoy, Rusty Russell, Benjamin Herrenschmidt,
	Hugh Dickins, Ingo Molnar, Paul E. McKenney, Nathan Fontenot,
	Peter Zijlstra, Andrew Morton, Thomas Gleixner, Sachin Sant,
	H. Peter Anvin, Shane Wang, Roland McGrath, linux-kernel

On 04/07, Oleg Nesterov wrote:
>
> On 04/07, Lai Jiangshan wrote:
> >
> > Old get_online_cpus() is read-preference, I think the goal of this ability
> > is allow get_online_cpus()/put_online_cpus() to be called nested.
>
> Sure, I understand why you added task_struct->get_online_cpus_nest.
>
> > and use per-task counter for allowing get_online_cpus()/put_online_cpus()
> > to be called nested, I think this deal is absolutely worth.
>
> As I said, I am not going to argue. I can't justify this tradeoff.

But, I must admit, I'd like to avoid adding the new member to task_struct.

What do you think about the code below?

I didn't even try to compile it, just to explain what I mean.

In short: we have the per-cpu fast counters, plus the slow counter
which is only used when cpu_hotplug_begin() is in progress.

Oleg.


static DEFINE_PER_CPU(long, cpuhp_fast_ctr);
static struct task_struct *cpuhp_writer;
static DEFINE_MUTEX(cpuhp_slow_lock)
static long cpuhp_slow_ctr;

static bool update_fast_ctr(int inc)
{
	bool success = true;

	preempt_disable();
	if (likely(!cpuhp_writer))
		__get_cpu_var(cpuhp_fast_ctr) += inc;
	else if (cpuhp_writer != current)
		success = false;
	preempt_enable();

	return success;
}

void get_online_cpus(void)
{
	if (likely(update_fast_ctr(+1));
		return;

	mutex_lock(&cpuhp_slow_lock);
	cpuhp_slow_ctr++;
	mutex_unlock(&cpuhp_slow_lock);
}

void put_online_cpus(void)
{
	if (likely(update_fast_ctr(-1));
		return;

	mutex_lock(&cpuhp_slow_lock);
	if (!--cpuhp_slow_ctr && cpuhp_writer)
		wake_up_process(cpuhp_writer);
	mutex_unlock(&cpuhp_slow_lock);
}

static void clear_fast_ctr(void)
{
	long total = 0;
	int cpu;

	for_each_possible_cpu(cpu) {
		total += per_cpu(cpuhp_fast_ctr, cpu);
		per_cpu(cpuhp_fast_ctr, cpu) = 0;
	}

	return total;
}

static void cpu_hotplug_begin(void)
{
	cpuhp_writer = current;
	synchronize_sched();

	/* Nobody except us can use can use cpuhp_fast_ctr */

	mutex_lock(&cpuhp_slow_lock);
	cpuhp_slow_ctr += clear_fast_ctr();

	while (cpuhp_slow_ctr) {
		__set_current_state(TASK_UNINTERRUPTIBLE);
		mutex_unlock(&&cpuhp_slow_lock);
		schedule();
		mutex_lock(&cpuhp_slow_lock);
	}
}

static void cpu_hotplug_done(void)
{
	cpuhp_writer = NULL;
	mutex_unlock(&cpuhp_slow_lock);
}


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-09 12:12         ` Oleg Nesterov
@ 2010-04-12  9:24           ` Lai Jiangshan
  2010-04-12  9:28             ` Peter Zijlstra
  2010-04-12 18:16             ` Oleg Nesterov
  0 siblings, 2 replies; 12+ messages in thread
From: Lai Jiangshan @ 2010-04-12  9:24 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Gautham R Shenoy, Rusty Russell, Benjamin Herrenschmidt,
	Hugh Dickins, Ingo Molnar, Paul E. McKenney, Nathan Fontenot,
	Peter Zijlstra, Andrew Morton, Thomas Gleixner, Sachin Sant,
	H. Peter Anvin, Shane Wang, Roland McGrath, linux-kernel

Oleg Nesterov wrote:
> On 04/07, Oleg Nesterov wrote:
>> On 04/07, Lai Jiangshan wrote:
>>> Old get_online_cpus() is read-preference, I think the goal of this ability
>>> is allow get_online_cpus()/put_online_cpus() to be called nested.
>> Sure, I understand why you added task_struct->get_online_cpus_nest.
>>
>>> and use per-task counter for allowing get_online_cpus()/put_online_cpus()
>>> to be called nested, I think this deal is absolutely worth.
>> As I said, I am not going to argue. I can't justify this tradeoff.
> 
> But, I must admit, I'd like to avoid adding the new member to task_struct.
> 
> What do you think about the code below?
> 
> I didn't even try to compile it, just to explain what I mean.
> 
> In short: we have the per-cpu fast counters, plus the slow counter
> which is only used when cpu_hotplug_begin() is in progress.
> 
> Oleg.
> 

get_online_cpus() in your code is still read-preference.
I wish we quit this ability of get_online_cpus().

Lai.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-12  9:24           ` Lai Jiangshan
@ 2010-04-12  9:28             ` Peter Zijlstra
  2010-04-12 12:30               ` Lai Jiangshan
  2010-04-12 18:16             ` Oleg Nesterov
  1 sibling, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2010-04-12  9:28 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: Oleg Nesterov, Gautham R Shenoy, Rusty Russell,
	Benjamin Herrenschmidt, Hugh Dickins, Ingo Molnar,
	Paul E. McKenney, Nathan Fontenot, Andrew Morton,
	Thomas Gleixner, Sachin Sant, H. Peter Anvin, Shane Wang,
	Roland McGrath, linux-kernel

On Mon, 2010-04-12 at 17:24 +0800, Lai Jiangshan wrote:
> Oleg Nesterov wrote:
> > On 04/07, Oleg Nesterov wrote:
> >> On 04/07, Lai Jiangshan wrote:
> >>> Old get_online_cpus() is read-preference, I think the goal of this ability
> >>> is allow get_online_cpus()/put_online_cpus() to be called nested.
> >> Sure, I understand why you added task_struct->get_online_cpus_nest.
> >>
> >>> and use per-task counter for allowing get_online_cpus()/put_online_cpus()
> >>> to be called nested, I think this deal is absolutely worth.
> >> As I said, I am not going to argue. I can't justify this tradeoff.
> > 
> > But, I must admit, I'd like to avoid adding the new member to task_struct.
> > 
> > What do you think about the code below?
> > 
> > I didn't even try to compile it, just to explain what I mean.
> > 
> > In short: we have the per-cpu fast counters, plus the slow counter
> > which is only used when cpu_hotplug_begin() is in progress.
> > 
> > Oleg.
> > 
> 
> get_online_cpus() in your code is still read-preference.
> I wish we quit this ability of get_online_cpus().

Why?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-12  9:28             ` Peter Zijlstra
@ 2010-04-12 12:30               ` Lai Jiangshan
  2010-04-12 12:34                 ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Lai Jiangshan @ 2010-04-12 12:30 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Oleg Nesterov, Gautham R Shenoy, Rusty Russell,
	Benjamin Herrenschmidt, Hugh Dickins, Ingo Molnar,
	Paul E. McKenney, Nathan Fontenot, Andrew Morton,
	Thomas Gleixner, Sachin Sant, H. Peter Anvin, Shane Wang,
	Roland McGrath, linux-kernel

Peter Zijlstra wrote:
> On Mon, 2010-04-12 at 17:24 +0800, Lai Jiangshan wrote:
>> Oleg Nesterov wrote:
>>> On 04/07, Oleg Nesterov wrote:
>>>> On 04/07, Lai Jiangshan wrote:
>>>>> Old get_online_cpus() is read-preference, I think the goal of this ability
>>>>> is allow get_online_cpus()/put_online_cpus() to be called nested.
>>>> Sure, I understand why you added task_struct->get_online_cpus_nest.
>>>>
>>>>> and use per-task counter for allowing get_online_cpus()/put_online_cpus()
>>>>> to be called nested, I think this deal is absolutely worth.
>>>> As I said, I am not going to argue. I can't justify this tradeoff.
>>> But, I must admit, I'd like to avoid adding the new member to task_struct.
>>>
>>> What do you think about the code below?
>>>
>>> I didn't even try to compile it, just to explain what I mean.
>>>
>>> In short: we have the per-cpu fast counters, plus the slow counter
>>> which is only used when cpu_hotplug_begin() is in progress.
>>>
>>> Oleg.
>>>
>> get_online_cpus() in your code is still read-preference.
>> I wish we quit this ability of get_online_cpus().
> 
> Why?

Because read-preference RWL will cause write site starvation.

A user run the following code will cause cpuhotplug starvation.
(100 processes run sched_setaffinity().)

Lai

#define _GNU_SOURCE
#include <sched.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>

#define NCPU 4
#define NPROCESS 100

cpu_set_t set;
pid_t target;

void stress_test(void)
{
	int cpu;

	srand((int)target);
	for (;;) {
		cpu = rand() % NCPU;
		CPU_SET(cpu, &set);
		sched_setaffinity(target, sizeof(set), &set);
		CPU_CLR(cpu, &set);
	}
}

int main(int argc, char *argv[])
{
	pid_t ret;
	int i;

	target = getpid();
	for (i = 1; i < NPROCESS; i++) {
		ret = fork();
		if (ret < 0)
			break;
		else if (ret)
			target = ret;
		else
			stress_test();
	}

	stress_test();
}




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-12 12:30               ` Lai Jiangshan
@ 2010-04-12 12:34                 ` Peter Zijlstra
  2010-04-13  1:47                   ` Lai Jiangshan
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2010-04-12 12:34 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: Oleg Nesterov, Gautham R Shenoy, Rusty Russell,
	Benjamin Herrenschmidt, Hugh Dickins, Ingo Molnar,
	Paul E. McKenney, Nathan Fontenot, Andrew Morton,
	Thomas Gleixner, Sachin Sant, H. Peter Anvin, Shane Wang,
	Roland McGrath, linux-kernel

On Mon, 2010-04-12 at 20:30 +0800, Lai Jiangshan wrote:
> Because read-preference RWL will cause write site starvation.

Sure, but why is that a problem? Hotplug should be a rare event, who
cares if it takes a while to come through.



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-12  9:24           ` Lai Jiangshan
  2010-04-12  9:28             ` Peter Zijlstra
@ 2010-04-12 18:16             ` Oleg Nesterov
  1 sibling, 0 replies; 12+ messages in thread
From: Oleg Nesterov @ 2010-04-12 18:16 UTC (permalink / raw)
  To: Lai Jiangshan
  Cc: Gautham R Shenoy, Rusty Russell, Benjamin Herrenschmidt,
	Hugh Dickins, Ingo Molnar, Paul E. McKenney, Nathan Fontenot,
	Peter Zijlstra, Andrew Morton, Thomas Gleixner, Sachin Sant,
	H. Peter Anvin, Shane Wang, Roland McGrath, linux-kernel

On 04/12, Lai Jiangshan wrote:
>
> Oleg Nesterov wrote:
> >
> > But, I must admit, I'd like to avoid adding the new member to task_struct.
> >
> > What do you think about the code below?
> >
> > I didn't even try to compile it, just to explain what I mean.
> >
> > In short: we have the per-cpu fast counters, plus the slow counter
> > which is only used when cpu_hotplug_begin() is in progress.
>
> get_online_cpus() in your code is still read-preference.

Yes,

> I wish we quit this ability of get_online_cpus().

Heh. Since I never read the changelogs, I didn't even notice this was
one of the goals of your patch. I thought this is just the side-effect.

Yes, if we want to block the new readers, I don't see how it is possible
to avoid the counter in task_struct.

I can't judge whether this new member worth the trouble. Once again, I am
not arguing, just I don't know. And I think your patch is correct (apart
from pure theoretical race with cpu_hotplug_done afaics).

Oleg.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter
  2010-04-12 12:34                 ` Peter Zijlstra
@ 2010-04-13  1:47                   ` Lai Jiangshan
  0 siblings, 0 replies; 12+ messages in thread
From: Lai Jiangshan @ 2010-04-13  1:47 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Oleg Nesterov, Gautham R Shenoy, Rusty Russell,
	Benjamin Herrenschmidt, Hugh Dickins, Ingo Molnar,
	Paul E. McKenney, Nathan Fontenot, Andrew Morton,
	Thomas Gleixner, Sachin Sant, H. Peter Anvin, Shane Wang,
	Roland McGrath, linux-kernel

Peter Zijlstra wrote:
> On Mon, 2010-04-12 at 20:30 +0800, Lai Jiangshan wrote:
>> Because read-preference RWL will cause write site starvation.
> 
> Sure, but why is that a problem? Hotplug should be a rare event,


> who cares if it takes a while to come through.

It is totally starvation, hotplug can not complete.
I think it is a kind of DOS attack.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2010-04-13  1:47 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-05 10:38 [PATCH 2/2] cpuhotplug: make get_online_cpus() scalability by using percpu counter Lai Jiangshan
2010-04-05 16:29 ` Oleg Nesterov
2010-04-06 12:00   ` Oleg Nesterov
2010-04-07 13:35     ` Lai Jiangshan
2010-04-07 13:54       ` Oleg Nesterov
2010-04-09 12:12         ` Oleg Nesterov
2010-04-12  9:24           ` Lai Jiangshan
2010-04-12  9:28             ` Peter Zijlstra
2010-04-12 12:30               ` Lai Jiangshan
2010-04-12 12:34                 ` Peter Zijlstra
2010-04-13  1:47                   ` Lai Jiangshan
2010-04-12 18:16             ` Oleg Nesterov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.