All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mel Gorman <mgorman@suse.de>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>, Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>,
	Ingo Molnar <mingo@kernel.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Linux-MM <linux-mm@kvack.org>,
	LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@suse.de>
Subject: [PATCH 01/63] hotplug: Optimize {get,put}_online_cpus()
Date: Mon,  7 Oct 2013 11:28:39 +0100	[thread overview]
Message-ID: <1381141781-10992-2-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1381141781-10992-1-git-send-email-mgorman@suse.de>

From: Peter Zijlstra <peterz@infradead.org>

NOTE: This is a placeholder only. A more comprehensive series is in
	progress but this patch on its own mitigates most of the
	overhead the migrate_swap patch is concerned with. It's
	expected that CPU hotplug locking series would go in before
	this series.

The current implementation of get_online_cpus() is global of nature
and thus not suited for any kind of common usage.

Re-implement the current recursive r/w cpu hotplug lock such that the
read side locks are as light as possible.

The current cpu hotplug lock is entirely reader biased; but since
readers are expensive there aren't a lot of them about and writer
starvation isn't a particular problem.

However by making the reader side more usable there is a fair chance
it will get used more and thus the starvation issue becomes a real
possibility.

Therefore this new implementation is fair, alternating readers and
writers; this however requires per-task state to allow the reader
recursion -- this new task_struct member is placed in a 4 byte hole on
64bit builds.

Many comments are contributed by Paul McKenney, and many previous
attempts were shown to be inadequate by both Paul and Oleg; many
thanks to them for persisting to poke holes in my attempts.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/cpu.h   |  67 ++++++++++++++-
 include/linux/sched.h |   3 +
 kernel/cpu.c          | 227 +++++++++++++++++++++++++++++++++++++-------------
 kernel/sched/core.c   |   2 +
 4 files changed, 237 insertions(+), 62 deletions(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 801ff9e..e520c76 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -16,6 +16,8 @@
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
 
 struct device;
 
@@ -173,10 +175,69 @@ extern struct bus_type cpu_subsys;
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
 
+extern void cpu_hotplug_init_task(struct task_struct *p);
+
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
-extern void get_online_cpus(void);
-extern void put_online_cpus(void);
+
+extern int __cpuhp_state;
+DECLARE_PER_CPU(unsigned int, __cpuhp_refcount);
+
+extern void __get_online_cpus(void);
+
+static inline void get_online_cpus(void)
+{
+	might_sleep();
+
+	/* Support reader recursion */
+	/* The value was >= 1 and remains so, reordering causes no harm. */
+	if (current->cpuhp_ref++)
+		return;
+
+	preempt_disable();
+	/*
+	 * We are in an RCU-sched read-side critical section, so the writer
+	 * cannot both change __cpuhp_state from readers_fast and start
+	 * checking counters while we are here. So if we see !__cpuhp_state,
+	 * we know that the writer won't be checking until we past the
+	 * preempt_enable() and that once the synchronize_sched() is done, the
+	 * writer will see anything we did within this RCU-sched read-side
+	 * critical section.
+	 */
+	if (likely(!__cpuhp_state))
+		__this_cpu_inc(__cpuhp_refcount);
+	else
+		__get_online_cpus(); /* Unconditional memory barrier. */
+	preempt_enable();
+	/*
+	 * The barrier() from preempt_enable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+}
+
+extern void __put_online_cpus(void);
+
+static inline void put_online_cpus(void)
+{
+	/* The value was >= 1 and remains so, reordering causes no harm. */
+	if (--current->cpuhp_ref)
+		return;
+
+	/*
+	 * The barrier() in preempt_disable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+	preempt_disable();
+	/*
+	 * Same as in get_online_cpus().
+	 */
+	if (likely(!__cpuhp_state))
+		__this_cpu_dec(__cpuhp_refcount);
+	else
+		__put_online_cpus(); /* Unconditional memory barrier. */
+	preempt_enable();
+}
+
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
 #define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
@@ -200,6 +261,8 @@ static inline void cpu_hotplug_driver_unlock(void)
 
 #else		/* CONFIG_HOTPLUG_CPU */
 
+static inline void cpu_hotplug_init_task(struct task_struct *p) {}
+
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()	do { } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6682da3..5308d89 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1026,6 +1026,9 @@ struct task_struct {
 #ifdef CONFIG_SMP
 	struct llist_node wake_entry;
 	int on_cpu;
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpuhp_ref;
+#endif
 	struct task_struct *last_wakee;
 	unsigned long wakee_flips;
 	unsigned long wakee_flip_decay_ts;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d7f07a2..dccf605 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -49,88 +49,195 @@ static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static struct {
-	struct task_struct *active_writer;
-	struct mutex lock; /* Synchronizes accesses to refcount, */
+enum { readers_fast = 0, readers_slow, readers_block };
+
+int __cpuhp_state;
+EXPORT_SYMBOL_GPL(__cpuhp_state);
+
+DEFINE_PER_CPU(unsigned int, __cpuhp_refcount);
+EXPORT_PER_CPU_SYMBOL_GPL(__cpuhp_refcount);
+
+static atomic_t cpuhp_waitcount;
+static DECLARE_WAIT_QUEUE_HEAD(cpuhp_readers);
+static DECLARE_WAIT_QUEUE_HEAD(cpuhp_writer);
+
+void cpu_hotplug_init_task(struct task_struct *p)
+{
+	p->cpuhp_ref = 0;
+}
+
+void __get_online_cpus(void)
+{
+again:
+	__this_cpu_inc(__cpuhp_refcount);
+
 	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
+	 * Due to having preemption disabled the decrement happens on
+	 * the same CPU as the increment, avoiding the
+	 * increment-on-one-CPU-and-decrement-on-another problem.
+	 *
+	 * And yes, if the reader misses the writer's assignment of
+	 * readers_block to __cpuhp_state, then the writer is
+	 * guaranteed to see the reader's increment.  Conversely, any
+	 * readers that increment their __cpuhp_refcount after the
+	 * writer looks are guaranteed to see the readers_block value,
+	 * which in turn means that they are guaranteed to immediately
+	 * decrement their __cpuhp_refcount, so that it doesn't matter
+	 * that the writer missed them.
 	 */
-	int refcount;
-} cpu_hotplug = {
-	.active_writer = NULL,
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-	.refcount = 0,
-};
 
-void get_online_cpus(void)
-{
-	might_sleep();
-	if (cpu_hotplug.active_writer == current)
+	smp_mb(); /* A matches D */
+
+	if (likely(__cpuhp_state != readers_block))
 		return;
-	mutex_lock(&cpu_hotplug.lock);
-	cpu_hotplug.refcount++;
-	mutex_unlock(&cpu_hotplug.lock);
 
+	/*
+	 * Make sure an outgoing writer sees the waitcount to ensure we
+	 * make progress.
+	 */
+	atomic_inc(&cpuhp_waitcount);
+
+	/*
+	 * Per the above comment; we still have preemption disabled and
+	 * will thus decrement on the same CPU as we incremented.
+	 */
+	__put_online_cpus();
+
+	/*
+	 * We either call schedule() in the wait, or we'll fall through
+	 * and reschedule on the preempt_enable() in get_online_cpus().
+	 */
+	preempt_enable_no_resched();
+	__wait_event(cpuhp_readers, __cpuhp_state != readers_block);
+	preempt_disable();
+
+	/*
+	 * Given we've still got preempt_disabled and new cpu_hotplug_begin()
+	 * must do a synchronize_sched() we're guaranteed a successfull
+	 * acquisition this time -- even if we wake the current
+	 * cpu_hotplug_end() now.
+	 */
+	if (atomic_dec_and_test(&cpuhp_waitcount))
+		wake_up(&cpuhp_writer);
+
+	goto again;
 }
-EXPORT_SYMBOL_GPL(get_online_cpus);
+EXPORT_SYMBOL_GPL(__get_online_cpus);
 
-void put_online_cpus(void)
+void __put_online_cpus(void)
 {
-	if (cpu_hotplug.active_writer == current)
-		return;
-	mutex_lock(&cpu_hotplug.lock);
+	smp_mb(); /* B matches C */
+	/*
+	 * In other words, if they see our decrement (presumably to aggregate
+	 * zero, as that is the only time it matters) they will also see our
+	 * critical section.
+	 */
+	this_cpu_dec(__cpuhp_refcount);
+
+	/* Prod writer to recheck readers_active */
+	wake_up(&cpuhp_writer);
+}
+EXPORT_SYMBOL_GPL(__put_online_cpus);
 
-	if (WARN_ON(!cpu_hotplug.refcount))
-		cpu_hotplug.refcount++; /* try to fix things up */
+#define per_cpu_sum(var)						\
+({ 									\
+ 	typeof(var) __sum = 0;						\
+ 	int cpu;							\
+ 	for_each_possible_cpu(cpu)					\
+ 		__sum += per_cpu(var, cpu);				\
+ 	__sum;								\
+})
 
-	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
-		wake_up_process(cpu_hotplug.active_writer);
-	mutex_unlock(&cpu_hotplug.lock);
+/*
+ * Return true if the modular sum of the __cpuhp_refcount per-CPU variables
+ * is zero. If this sum is zero, then it is stable due to the fact that if
+ * any newly arriving readers increment a given counter, they will
+ * immediately decrement that same counter.
+ */
+static bool cpuhp_readers_active_check(void)
+{
+	if (per_cpu_sum(__cpuhp_refcount) != 0)
+		return false;
 
+	/*
+	 * If we observed the decrement; ensure we see the entire critical
+	 * section.
+	 */
+
+	smp_mb(); /* C matches B */
+
+	return true;
 }
-EXPORT_SYMBOL_GPL(put_online_cpus);
 
 /*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
+ * This will notify new readers to block and wait for all active readers to
+ * complete.
  */
 void cpu_hotplug_begin(void)
 {
-	cpu_hotplug.active_writer = current;
+	/*
+	 * Since cpu_hotplug_begin() is always called after invoking
+	 * cpu_maps_update_begin(), we can be sure that only one writer is
+	 * active.
+	 */
+	lockdep_assert_held(&cpu_add_remove_lock);
 
-	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
-		if (likely(!cpu_hotplug.refcount))
-			break;
-		__set_current_state(TASK_UNINTERRUPTIBLE);
-		mutex_unlock(&cpu_hotplug.lock);
-		schedule();
-	}
+	/* Allow reader-in-writer recursion. */
+	current->cpuhp_ref++;
+
+	/* Notify readers to take the slow path. */
+	__cpuhp_state = readers_slow;
+
+	/* See percpu_down_write(); guarantees all readers take the slow path */
+	synchronize_sched();
+
+	/*
+	 * Notify new readers to block; up until now, and thus throughout the
+	 * longish synchronize_sched() above, new readers could still come in.
+	 */
+	__cpuhp_state = readers_block;
+
+	smp_mb(); /* D matches A */
+
+	/*
+	 * If they don't see our writer of readers_block to __cpuhp_state,
+	 * then we are guaranteed to see their __cpuhp_refcount increment, and
+	 * therefore will wait for them.
+	 */
+
+	/* Wait for all now active readers to complete. */
+	wait_event(cpuhp_writer, cpuhp_readers_active_check());
 }
 
 void cpu_hotplug_done(void)
 {
-	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
+	/*
+	 * Signal the writer is done, no fast path yet.
+	 *
+	 * One reason that we cannot just immediately flip to readers_fast is
+	 * that new readers might fail to see the results of this writer's
+	 * critical section.
+	 */
+	__cpuhp_state = readers_slow;
+	wake_up_all(&cpuhp_readers);
+
+	/*
+	 * The wait_event()/wake_up_all() prevents the race where the readers
+	 * are delayed between fetching __cpuhp_state and blocking.
+	 */
+
+	/* See percpu_up_write(); readers will no longer attempt to block. */
+	synchronize_sched();
+
+	/* Let 'em rip */
+	__cpuhp_state = readers_fast;
+	current->cpuhp_ref--;
+
+	/*
+	 * Wait for any pending readers to be running. This ensures readers
+	 * after writer and avoids writers starving readers.
+	 */
+	wait_event(cpuhp_writer, !atomic_read(&cpuhp_waitcount));
 }
 
 /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5ac63c9..2f3420c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1630,6 +1630,8 @@ static void __sched_fork(struct task_struct *p)
 	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
 	p->numa_work.next = &p->numa_work;
 #endif /* CONFIG_NUMA_BALANCING */
+
+	cpu_hotplug_init_task(p);
 }
 
 #ifdef CONFIG_NUMA_BALANCING
-- 
1.8.4


WARNING: multiple messages have this Message-ID
From: Mel Gorman <mgorman@suse.de>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>, Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>,
	Ingo Molnar <mingo@kernel.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Linux-MM <linux-mm@kvack.org>,
	LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@suse.de>
Subject: [PATCH 01/63] hotplug: Optimize {get,put}_online_cpus()
Date: Mon,  7 Oct 2013 11:28:39 +0100	[thread overview]
Message-ID: <1381141781-10992-2-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1381141781-10992-1-git-send-email-mgorman@suse.de>

From: Peter Zijlstra <peterz@infradead.org>

NOTE: This is a placeholder only. A more comprehensive series is in
	progress but this patch on its own mitigates most of the
	overhead the migrate_swap patch is concerned with. It's
	expected that CPU hotplug locking series would go in before
	this series.

The current implementation of get_online_cpus() is global of nature
and thus not suited for any kind of common usage.

Re-implement the current recursive r/w cpu hotplug lock such that the
read side locks are as light as possible.

The current cpu hotplug lock is entirely reader biased; but since
readers are expensive there aren't a lot of them about and writer
starvation isn't a particular problem.

However by making the reader side more usable there is a fair chance
it will get used more and thus the starvation issue becomes a real
possibility.

Therefore this new implementation is fair, alternating readers and
writers; this however requires per-task state to allow the reader
recursion -- this new task_struct member is placed in a 4 byte hole on
64bit builds.

Many comments are contributed by Paul McKenney, and many previous
attempts were shown to be inadequate by both Paul and Oleg; many
thanks to them for persisting to poke holes in my attempts.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/cpu.h   |  67 ++++++++++++++-
 include/linux/sched.h |   3 +
 kernel/cpu.c          | 227 +++++++++++++++++++++++++++++++++++++-------------
 kernel/sched/core.c   |   2 +
 4 files changed, 237 insertions(+), 62 deletions(-)

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 801ff9e..e520c76 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -16,6 +16,8 @@
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
 
 struct device;
 
@@ -173,10 +175,69 @@ extern struct bus_type cpu_subsys;
 #ifdef CONFIG_HOTPLUG_CPU
 /* Stop CPUs going up and down. */
 
+extern void cpu_hotplug_init_task(struct task_struct *p);
+
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
-extern void get_online_cpus(void);
-extern void put_online_cpus(void);
+
+extern int __cpuhp_state;
+DECLARE_PER_CPU(unsigned int, __cpuhp_refcount);
+
+extern void __get_online_cpus(void);
+
+static inline void get_online_cpus(void)
+{
+	might_sleep();
+
+	/* Support reader recursion */
+	/* The value was >= 1 and remains so, reordering causes no harm. */
+	if (current->cpuhp_ref++)
+		return;
+
+	preempt_disable();
+	/*
+	 * We are in an RCU-sched read-side critical section, so the writer
+	 * cannot both change __cpuhp_state from readers_fast and start
+	 * checking counters while we are here. So if we see !__cpuhp_state,
+	 * we know that the writer won't be checking until we past the
+	 * preempt_enable() and that once the synchronize_sched() is done, the
+	 * writer will see anything we did within this RCU-sched read-side
+	 * critical section.
+	 */
+	if (likely(!__cpuhp_state))
+		__this_cpu_inc(__cpuhp_refcount);
+	else
+		__get_online_cpus(); /* Unconditional memory barrier. */
+	preempt_enable();
+	/*
+	 * The barrier() from preempt_enable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+}
+
+extern void __put_online_cpus(void);
+
+static inline void put_online_cpus(void)
+{
+	/* The value was >= 1 and remains so, reordering causes no harm. */
+	if (--current->cpuhp_ref)
+		return;
+
+	/*
+	 * The barrier() in preempt_disable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+	preempt_disable();
+	/*
+	 * Same as in get_online_cpus().
+	 */
+	if (likely(!__cpuhp_state))
+		__this_cpu_dec(__cpuhp_refcount);
+	else
+		__put_online_cpus(); /* Unconditional memory barrier. */
+	preempt_enable();
+}
+
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
 #define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
@@ -200,6 +261,8 @@ static inline void cpu_hotplug_driver_unlock(void)
 
 #else		/* CONFIG_HOTPLUG_CPU */
 
+static inline void cpu_hotplug_init_task(struct task_struct *p) {}
+
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()	do { } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6682da3..5308d89 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1026,6 +1026,9 @@ struct task_struct {
 #ifdef CONFIG_SMP
 	struct llist_node wake_entry;
 	int on_cpu;
+#ifdef CONFIG_HOTPLUG_CPU
+	int cpuhp_ref;
+#endif
 	struct task_struct *last_wakee;
 	unsigned long wakee_flips;
 	unsigned long wakee_flip_decay_ts;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d7f07a2..dccf605 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -49,88 +49,195 @@ static int cpu_hotplug_disabled;
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static struct {
-	struct task_struct *active_writer;
-	struct mutex lock; /* Synchronizes accesses to refcount, */
+enum { readers_fast = 0, readers_slow, readers_block };
+
+int __cpuhp_state;
+EXPORT_SYMBOL_GPL(__cpuhp_state);
+
+DEFINE_PER_CPU(unsigned int, __cpuhp_refcount);
+EXPORT_PER_CPU_SYMBOL_GPL(__cpuhp_refcount);
+
+static atomic_t cpuhp_waitcount;
+static DECLARE_WAIT_QUEUE_HEAD(cpuhp_readers);
+static DECLARE_WAIT_QUEUE_HEAD(cpuhp_writer);
+
+void cpu_hotplug_init_task(struct task_struct *p)
+{
+	p->cpuhp_ref = 0;
+}
+
+void __get_online_cpus(void)
+{
+again:
+	__this_cpu_inc(__cpuhp_refcount);
+
 	/*
-	 * Also blocks the new readers during
-	 * an ongoing cpu hotplug operation.
+	 * Due to having preemption disabled the decrement happens on
+	 * the same CPU as the increment, avoiding the
+	 * increment-on-one-CPU-and-decrement-on-another problem.
+	 *
+	 * And yes, if the reader misses the writer's assignment of
+	 * readers_block to __cpuhp_state, then the writer is
+	 * guaranteed to see the reader's increment.  Conversely, any
+	 * readers that increment their __cpuhp_refcount after the
+	 * writer looks are guaranteed to see the readers_block value,
+	 * which in turn means that they are guaranteed to immediately
+	 * decrement their __cpuhp_refcount, so that it doesn't matter
+	 * that the writer missed them.
 	 */
-	int refcount;
-} cpu_hotplug = {
-	.active_writer = NULL,
-	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
-	.refcount = 0,
-};
 
-void get_online_cpus(void)
-{
-	might_sleep();
-	if (cpu_hotplug.active_writer == current)
+	smp_mb(); /* A matches D */
+
+	if (likely(__cpuhp_state != readers_block))
 		return;
-	mutex_lock(&cpu_hotplug.lock);
-	cpu_hotplug.refcount++;
-	mutex_unlock(&cpu_hotplug.lock);
 
+	/*
+	 * Make sure an outgoing writer sees the waitcount to ensure we
+	 * make progress.
+	 */
+	atomic_inc(&cpuhp_waitcount);
+
+	/*
+	 * Per the above comment; we still have preemption disabled and
+	 * will thus decrement on the same CPU as we incremented.
+	 */
+	__put_online_cpus();
+
+	/*
+	 * We either call schedule() in the wait, or we'll fall through
+	 * and reschedule on the preempt_enable() in get_online_cpus().
+	 */
+	preempt_enable_no_resched();
+	__wait_event(cpuhp_readers, __cpuhp_state != readers_block);
+	preempt_disable();
+
+	/*
+	 * Given we've still got preempt_disabled and new cpu_hotplug_begin()
+	 * must do a synchronize_sched() we're guaranteed a successfull
+	 * acquisition this time -- even if we wake the current
+	 * cpu_hotplug_end() now.
+	 */
+	if (atomic_dec_and_test(&cpuhp_waitcount))
+		wake_up(&cpuhp_writer);
+
+	goto again;
 }
-EXPORT_SYMBOL_GPL(get_online_cpus);
+EXPORT_SYMBOL_GPL(__get_online_cpus);
 
-void put_online_cpus(void)
+void __put_online_cpus(void)
 {
-	if (cpu_hotplug.active_writer == current)
-		return;
-	mutex_lock(&cpu_hotplug.lock);
+	smp_mb(); /* B matches C */
+	/*
+	 * In other words, if they see our decrement (presumably to aggregate
+	 * zero, as that is the only time it matters) they will also see our
+	 * critical section.
+	 */
+	this_cpu_dec(__cpuhp_refcount);
+
+	/* Prod writer to recheck readers_active */
+	wake_up(&cpuhp_writer);
+}
+EXPORT_SYMBOL_GPL(__put_online_cpus);
 
-	if (WARN_ON(!cpu_hotplug.refcount))
-		cpu_hotplug.refcount++; /* try to fix things up */
+#define per_cpu_sum(var)						\
+({ 									\
+ 	typeof(var) __sum = 0;						\
+ 	int cpu;							\
+ 	for_each_possible_cpu(cpu)					\
+ 		__sum += per_cpu(var, cpu);				\
+ 	__sum;								\
+})
 
-	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
-		wake_up_process(cpu_hotplug.active_writer);
-	mutex_unlock(&cpu_hotplug.lock);
+/*
+ * Return true if the modular sum of the __cpuhp_refcount per-CPU variables
+ * is zero. If this sum is zero, then it is stable due to the fact that if
+ * any newly arriving readers increment a given counter, they will
+ * immediately decrement that same counter.
+ */
+static bool cpuhp_readers_active_check(void)
+{
+	if (per_cpu_sum(__cpuhp_refcount) != 0)
+		return false;
 
+	/*
+	 * If we observed the decrement; ensure we see the entire critical
+	 * section.
+	 */
+
+	smp_mb(); /* C matches B */
+
+	return true;
 }
-EXPORT_SYMBOL_GPL(put_online_cpus);
 
 /*
- * This ensures that the hotplug operation can begin only when the
- * refcount goes to zero.
- *
- * Note that during a cpu-hotplug operation, the new readers, if any,
- * will be blocked by the cpu_hotplug.lock
- *
- * Since cpu_hotplug_begin() is always called after invoking
- * cpu_maps_update_begin(), we can be sure that only one writer is active.
- *
- * Note that theoretically, there is a possibility of a livelock:
- * - Refcount goes to zero, last reader wakes up the sleeping
- *   writer.
- * - Last reader unlocks the cpu_hotplug.lock.
- * - A new reader arrives at this moment, bumps up the refcount.
- * - The writer acquires the cpu_hotplug.lock finds the refcount
- *   non zero and goes to sleep again.
- *
- * However, this is very difficult to achieve in practice since
- * get_online_cpus() not an api which is called all that often.
- *
+ * This will notify new readers to block and wait for all active readers to
+ * complete.
  */
 void cpu_hotplug_begin(void)
 {
-	cpu_hotplug.active_writer = current;
+	/*
+	 * Since cpu_hotplug_begin() is always called after invoking
+	 * cpu_maps_update_begin(), we can be sure that only one writer is
+	 * active.
+	 */
+	lockdep_assert_held(&cpu_add_remove_lock);
 
-	for (;;) {
-		mutex_lock(&cpu_hotplug.lock);
-		if (likely(!cpu_hotplug.refcount))
-			break;
-		__set_current_state(TASK_UNINTERRUPTIBLE);
-		mutex_unlock(&cpu_hotplug.lock);
-		schedule();
-	}
+	/* Allow reader-in-writer recursion. */
+	current->cpuhp_ref++;
+
+	/* Notify readers to take the slow path. */
+	__cpuhp_state = readers_slow;
+
+	/* See percpu_down_write(); guarantees all readers take the slow path */
+	synchronize_sched();
+
+	/*
+	 * Notify new readers to block; up until now, and thus throughout the
+	 * longish synchronize_sched() above, new readers could still come in.
+	 */
+	__cpuhp_state = readers_block;
+
+	smp_mb(); /* D matches A */
+
+	/*
+	 * If they don't see our writer of readers_block to __cpuhp_state,
+	 * then we are guaranteed to see their __cpuhp_refcount increment, and
+	 * therefore will wait for them.
+	 */
+
+	/* Wait for all now active readers to complete. */
+	wait_event(cpuhp_writer, cpuhp_readers_active_check());
 }
 
 void cpu_hotplug_done(void)
 {
-	cpu_hotplug.active_writer = NULL;
-	mutex_unlock(&cpu_hotplug.lock);
+	/*
+	 * Signal the writer is done, no fast path yet.
+	 *
+	 * One reason that we cannot just immediately flip to readers_fast is
+	 * that new readers might fail to see the results of this writer's
+	 * critical section.
+	 */
+	__cpuhp_state = readers_slow;
+	wake_up_all(&cpuhp_readers);
+
+	/*
+	 * The wait_event()/wake_up_all() prevents the race where the readers
+	 * are delayed between fetching __cpuhp_state and blocking.
+	 */
+
+	/* See percpu_up_write(); readers will no longer attempt to block. */
+	synchronize_sched();
+
+	/* Let 'em rip */
+	__cpuhp_state = readers_fast;
+	current->cpuhp_ref--;
+
+	/*
+	 * Wait for any pending readers to be running. This ensures readers
+	 * after writer and avoids writers starving readers.
+	 */
+	wait_event(cpuhp_writer, !atomic_read(&cpuhp_waitcount));
 }
 
 /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5ac63c9..2f3420c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1630,6 +1630,8 @@ static void __sched_fork(struct task_struct *p)
 	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
 	p->numa_work.next = &p->numa_work;
 #endif /* CONFIG_NUMA_BALANCING */
+
+	cpu_hotplug_init_task(p);
 }
 
 #ifdef CONFIG_NUMA_BALANCING
-- 
1.8.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2013-10-07 10:46 UTC|newest]

Thread overview: 338+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-07 10:28 [PATCH 0/63] Basic scheduler support for automatic NUMA balancing V9 Mel Gorman
2013-10-07 10:28 ` Mel Gorman
2013-10-07 10:28 ` Mel Gorman [this message]
2013-10-07 10:28   ` [PATCH 01/63] hotplug: Optimize {get,put}_online_cpus() Mel Gorman
2013-10-07 10:28 ` [PATCH 02/63] mm: numa: Document automatic NUMA balancing sysctls Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 12:46   ` Rik van Riel
2013-10-07 12:46     ` Rik van Riel
2013-10-09 17:24   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 03/63] sched, numa: Comment fixlets Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 12:46   ` Rik van Riel
2013-10-07 12:46     ` Rik van Riel
2013-10-09 17:24   ` [tip:sched/core] sched/numa: Fix comments tip-bot for Peter Zijlstra
2013-10-07 10:28 ` [PATCH 04/63] mm: numa: Do not account for a hinting fault if we raced Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 12:47   ` Rik van Riel
2013-10-07 12:47     ` Rik van Riel
2013-10-09 17:24   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:42   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 05/63] mm: Wait for THP migrations to complete during NUMA hinting faults Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 13:55   ` Rik van Riel
2013-10-07 13:55     ` Rik van Riel
2013-10-09 17:24   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:42   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 06/63] mm: Prevent parallel splits during THP migration Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 14:01   ` Rik van Riel
2013-10-07 14:01     ` Rik van Riel
2013-10-09 17:24   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:42   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 07/63] mm: numa: Sanitize task_numa_fault() callsites Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 14:02   ` Rik van Riel
2013-10-07 14:02     ` Rik van Riel
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:42   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 08/63] mm: Close races between THP migration and PMD numa clearing Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 14:02   ` Rik van Riel
2013-10-07 14:02     ` Rik van Riel
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:42   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 09/63] mm: Account for a THP NUMA hinting update as one PTE update Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 14:02   ` Rik van Riel
2013-10-07 14:02     ` Rik van Riel
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:43   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 10/63] mm: Do not flush TLB during protection change if !pte_present && !migration_entry Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 15:12   ` Rik van Riel
2013-10-07 15:12     ` Rik van Riel
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 11/63] mm: Only flush TLBs if a transhuge PMD is modified for NUMA pte scanning Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 12/63] mm: numa: Do not migrate or account for hinting faults on the zero page Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:10   ` Rik van Riel
2013-10-07 17:10     ` Rik van Riel
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 13/63] sched: numa: Mitigate chance that same task always updates PTEs Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:24   ` Rik van Riel
2013-10-07 17:24     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] sched/numa: " tip-bot for Peter Zijlstra
2013-10-07 10:28 ` [PATCH 14/63] sched: numa: Continue PTE scanning even if migrate rate limited Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:24   ` Rik van Riel
2013-10-07 17:24     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] sched/numa: " tip-bot for Peter Zijlstra
2013-10-07 10:28 ` [PATCH 15/63] Revert "mm: sched: numa: Delay PTE scanning until a task is scheduled on a new node" Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:42   ` Rik van Riel
2013-10-07 17:42     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 16/63] sched: numa: Initialise numa_next_scan properly Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:44   ` Rik van Riel
2013-10-07 17:44     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 17/63] sched: Set the scan rate proportional to the memory usage of the task being scanned Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:44   ` Rik van Riel
2013-10-07 17:44     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 18/63] sched: numa: Slow scan rate if no NUMA hinting faults are being recorded Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 18:02   ` Rik van Riel
2013-10-07 18:02     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 19/63] sched: Track NUMA hinting faults on per-node basis Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 18:02   ` Rik van Riel
2013-10-07 18:02     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-12-04  5:32   ` [PATCH 19/63] sched: " Wanpeng Li
2013-12-04  5:37     ` Wanpeng Li
2013-10-07 10:28 ` [PATCH 20/63] sched: Select a preferred node with the most numa hinting faults Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 18:04   ` Rik van Riel
2013-10-07 18:04     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 21/63] sched: Update NUMA hinting faults once per scan Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 18:39   ` Rik van Riel
2013-10-07 18:39     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 22/63] sched: Favour moving tasks towards the preferred node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:39   ` Rik van Riel
2013-10-07 18:39     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 23/63] sched: Resist moving tasks towards nodes with fewer hinting faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:40   ` Rik van Riel
2013-10-07 18:40     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 24/63] sched: Reschedule task on preferred NUMA node once selected Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:40   ` Rik van Riel
2013-10-07 18:40     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 25/63] sched: Add infrastructure for split shared/private accounting of NUMA hinting faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:41   ` Rik van Riel
2013-10-07 18:41     ` Rik van Riel
2013-10-09 17:28   ` [tip:sched/core] sched/numa: Add infrastructure for split shared/ private " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 26/63] sched: Check current->mm before allocating NUMA faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:41   ` Rik van Riel
2013-10-07 18:41     ` Rik van Riel
2013-10-09 17:28   ` [tip:sched/core] sched/numa: Check current-> mm " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 27/63] mm: numa: Scan pages with elevated page_mapcount Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:43   ` Rik van Riel
2013-10-07 18:43     ` Rik van Riel
2013-10-09 17:28   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 28/63] sched: Remove check that skips small VMAs Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:44   ` Rik van Riel
2013-10-07 18:44     ` Rik van Riel
2013-10-09 17:28   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 29/63] sched: Set preferred NUMA node based on number of private faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:45   ` Rik van Riel
2013-10-07 18:45     ` Rik van Riel
2013-10-09 17:28   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 30/63] sched: Do not migrate memory immediately after switching node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:28   ` [tip:sched/core] sched/numa: " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 31/63] mm: numa: only unmap migrate-on-fault VMAs Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:29   ` [tip:sched/core] mm: numa: Limit NUMA scanning to " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 32/63] sched: Avoid overloading CPUs on a preferred NUMA node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:58   ` Rik van Riel
2013-10-07 18:58     ` Rik van Riel
2013-10-09 17:29   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 33/63] sched: Retry migration of tasks to CPU on a preferred node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:58   ` Rik van Riel
2013-10-07 18:58     ` Rik van Riel
2013-10-09 17:29   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 34/63] sched: numa: increment numa_migrate_seq when task runs in correct location Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:29   ` [tip:sched/core] sched/numa: Increment " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 35/63] sched: numa: Do not trap hinting faults for shared libraries Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:04   ` Rik van Riel
2013-10-07 19:04     ` Rik van Riel
2013-10-09 17:29   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 36/63] mm: numa: Only trap pmd hinting faults if we would otherwise trap PTE faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:06   ` Rik van Riel
2013-10-07 19:06     ` Rik van Riel
2013-10-09 17:29   ` [tip:sched/core] mm: numa: Trap pmd hinting faults only " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 37/63] stop_machine: Introduce stop_two_cpus() Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:30   ` [tip:sched/core] " tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 38/63] sched: Introduce migrate_swap() Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:06   ` Rik van Riel
2013-10-07 19:06     ` Rik van Riel
2013-10-09 17:30   ` [tip:sched/core] sched/numa: " tip-bot for Peter Zijlstra
2013-10-10 18:17     ` Peter Zijlstra
2013-10-10 19:04       ` Rik van Riel
2013-10-15  9:55       ` Mel Gorman
2013-10-17 16:49       ` [tip:sched/core] sched: Fix race in migrate_swap_stop() tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 39/63] sched: numa: Use a system-wide search to find swap/migration candidates Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:07   ` Rik van Riel
2013-10-07 19:07     ` Rik van Riel
2013-10-09 17:30   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 40/63] sched: numa: Favor placing a task on the preferred node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:07   ` Rik van Riel
2013-10-07 19:07     ` Rik van Riel
2013-10-09 17:30   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 41/63] sched: numa: fix placement of workloads spread across multiple nodes Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:30   ` [tip:sched/core] sched/numa: Fix " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 42/63] mm: numa: Change page last {nid,pid} into {cpu,pid} Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:08   ` Rik van Riel
2013-10-07 19:08     ` Rik van Riel
2013-10-09 17:30   ` [tip:sched/core] mm: numa: Change page last {nid,pid} into {cpu, pid} tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 43/63] sched: numa: Use {cpu, pid} to create task groups for shared faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:09   ` Rik van Riel
2013-10-07 19:09     ` Rik van Riel
2013-10-09 17:31   ` [tip:sched/core] sched/numa: " tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 44/63] sched: numa: Report a NUMA task group ID Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:09   ` Rik van Riel
2013-10-07 19:09     ` Rik van Riel
2013-10-09 17:31   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 45/63] mm: numa: copy cpupid on page migration Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:31   ` [tip:sched/core] mm: numa: Copy " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 46/63] mm: numa: Do not group on RO pages Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:10   ` Rik van Riel
2013-10-07 19:10     ` Rik van Riel
2013-10-09 17:31   ` [tip:sched/core] " tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 47/63] mm: numa: Do not batch handle PMD pages Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:11   ` Rik van Riel
2013-10-07 19:11     ` Rik van Riel
2013-10-09 17:31   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 48/63] sched: numa: stay on the same node if CLONE_VM Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:31   ` [tip:sched/core] sched/numa: Stay " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 49/63] sched: numa: use group fault statistics in numa placement Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:32   ` [tip:sched/core] sched/numa: Use " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 50/63] sched: numa: call task_numa_free from do_execve Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:32   ` [tip:sched/core] sched/numa: Call task_numa_free() from do_execve () tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 51/63] sched: numa: Prevent parallel updates to group stats during placement Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:13   ` Rik van Riel
2013-10-07 19:13     ` Rik van Riel
2013-10-09 17:32   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 52/63] sched: numa: add debugging Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:13   ` Rik van Riel
2013-10-07 19:13     ` Rik van Riel
2013-10-09 17:32   ` [tip:sched/core] sched/numa: Add debugging tip-bot for Ingo Molnar
2013-10-07 10:29 ` [PATCH 53/63] sched: numa: Decide whether to favour task or group weights based on swap candidate relationships Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:32   ` [tip:sched/core] sched/numa: " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 54/63] sched: numa: fix task or group comparison Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:32   ` [tip:sched/core] sched/numa: Fix " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 55/63] sched: numa: Avoid migrating tasks that are placed on their preferred node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:14   ` Rik van Riel
2013-10-07 19:14     ` Rik van Riel
2013-10-09 17:33   ` [tip:sched/core] sched/numa: " tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 56/63] sched: numa: be more careful about joining numa groups Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:33   ` [tip:sched/core] sched/numa: Be " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 57/63] sched: numa: Take false sharing into account when adapting scan rate Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:14   ` Rik van Riel
2013-10-07 19:14     ` Rik van Riel
2013-10-09 17:33   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 58/63] sched: numa: adjust scan rate in task_numa_placement Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:33   ` [tip:sched/core] sched/numa: Adjust " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 59/63] sched: numa: Remove the numa_balancing_scan_period_reset sysctl Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:14   ` Rik van Riel
2013-10-07 19:14     ` Rik van Riel
2013-10-09 17:33   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 60/63] mm: numa: revert temporarily disabling of NUMA migration Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:33   ` [tip:sched/core] mm: numa: Revert " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 61/63] sched: numa: skip some page migrations after a shared fault Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:34   ` [tip:sched/core] sched/numa: Skip " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 62/63] sched: numa: use unsigned longs for numa group fault stats Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:15   ` Rik van Riel
2013-10-07 19:15     ` Rik van Riel
2013-10-09 17:34   ` [tip:sched/core] sched/numa: Use " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 63/63] sched: numa: periodically retry task_numa_migrate Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:34   ` [tip:sched/core] sched/numa: Retry task_numa_migrate() periodically tip-bot for Rik van Riel
2013-10-09 11:03 ` [PATCH 0/63] Basic scheduler support for automatic NUMA balancing V9 Ingo Molnar
2013-10-09 11:03   ` Ingo Molnar
2013-10-09 11:11   ` Ingo Molnar
2013-10-09 11:11     ` Ingo Molnar
2013-10-09 11:13     ` Ingo Molnar
2013-10-09 11:13       ` Ingo Molnar
2013-10-09 12:05   ` Peter Zijlstra
2013-10-09 12:05     ` Peter Zijlstra
2013-10-09 12:48     ` Ingo Molnar
2013-10-09 12:48       ` Ingo Molnar
2013-10-10  7:05   ` Mel Gorman
2013-10-10  7:05     ` Mel Gorman
2013-10-09 16:28 ` Ingo Molnar
2013-10-09 16:29   ` Ingo Molnar
2013-10-09 16:57     ` Ingo Molnar
2013-10-09 16:57       ` Ingo Molnar
2013-10-09 17:09       ` Ingo Molnar
2013-10-09 17:09         ` Ingo Molnar
2013-10-09 17:11         ` Peter Zijlstra
2013-10-09 17:11           ` Peter Zijlstra
2013-10-09 17:08   ` Peter Zijlstra
2013-10-09 17:08     ` Peter Zijlstra
2013-10-09 17:15     ` Ingo Molnar
2013-10-09 17:15       ` Ingo Molnar
2013-10-09 17:18       ` Peter Zijlstra
2013-10-09 17:18         ` Peter Zijlstra
2013-10-24 12:26 ` Automatic NUMA balancing patches for tip-urgent/stable Mel Gorman
2013-10-24 12:26   ` Mel Gorman
2013-10-26 12:11   ` Ingo Molnar
2013-10-26 12:11     ` Ingo Molnar
2013-10-29  9:42     ` Mel Gorman
2013-10-29  9:42       ` Mel Gorman
2013-10-29  9:48       ` Ingo Molnar
2013-10-29  9:48         ` Ingo Molnar
2013-10-29 10:24         ` Mel Gorman
2013-10-29 10:24           ` Mel Gorman
2013-10-29 10:41           ` Ingo Molnar
2013-10-29 10:41             ` Ingo Molnar
2013-10-29 12:48             ` Mel Gorman
2013-10-29 12:48               ` Mel Gorman
2013-10-31  9:51   ` [RFC GIT PULL] NUMA-balancing memory corruption fixes Ingo Molnar
2013-10-31  9:51     ` Ingo Molnar
2013-10-31 22:25     ` Linus Torvalds
2013-10-31 22:25       ` Linus Torvalds
2013-11-01  7:36       ` Ingo Molnar
2013-11-01  7:36         ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1381141781-10992-2-git-send-email-mgorman@suse.de \
    --to=mgorman@suse.de \
    --cc=a.p.zijlstra@chello.nl \
    --cc=aarcange@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@kernel.org \
    --cc=riel@redhat.com \
    --cc=srikar@linux.vnet.ibm.com \
    --subject='Re: [PATCH 01/63] hotplug: Optimize {get,put}_online_cpus()' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.