All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mel Gorman <mgorman@suse.de>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>, Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>,
	Ingo Molnar <mingo@kernel.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Linux-MM <linux-mm@kvack.org>,
	LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@suse.de>
Subject: [PATCH 39/63] sched: numa: Use a system-wide search to find swap/migration candidates
Date: Mon,  7 Oct 2013 11:29:17 +0100	[thread overview]
Message-ID: <1381141781-10992-40-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1381141781-10992-1-git-send-email-mgorman@suse.de>

This patch implements a system-wide search for swap/migration candidates
based on total NUMA hinting faults. It has a balance limit, however it
doesn't properly consider total node balance.

In the old scheme a task selected a preferred node based on the highest
number of private faults recorded on the node. In this scheme, the preferred
node is based on the total number of faults. If the preferred node for a
task changes then task_numa_migrate will search the whole system looking
for tasks to swap with that would improve both the overall compute
balance and minimise the expected number of remote NUMA hinting faults.

Not there is no guarantee that the node the source task is placed
on by task_numa_migrate() has any relationship to the newly selected
task->numa_preferred_nid due to compute overloading.

[riel@redhat.com: Do not swap with tasks that cannot run on source cpu]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 kernel/sched/core.c  |   4 +
 kernel/sched/fair.c  | 253 ++++++++++++++++++++++++++++++++++++---------------
 kernel/sched/sched.h |  13 +++
 3 files changed, 199 insertions(+), 71 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0862196..18f9bbe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5241,6 +5241,7 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
 DEFINE_PER_CPU(struct sched_domain *, sd_llc);
 DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
+DEFINE_PER_CPU(struct sched_domain *, sd_numa);
 
 static void update_top_cache_domain(int cpu)
 {
@@ -5257,6 +5258,9 @@ static void update_top_cache_domain(int cpu)
 	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
 	per_cpu(sd_llc_size, cpu) = size;
 	per_cpu(sd_llc_id, cpu) = id;
+
+	sd = lowest_flag_domain(cpu, SD_NUMA);
+	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
 }
 
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b19c044..59abe50 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -816,6 +816,8 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
+static unsigned long task_h_load(struct task_struct *p);
+
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * Approximate time to scan a full NUMA task in ms. The task scan period is
@@ -906,12 +908,40 @@ static unsigned long target_load(int cpu, int type);
 static unsigned long power_of(int cpu);
 static long effective_load(struct task_group *tg, int cpu, long wl, long wg);
 
+/* Cached statistics for all CPUs within a node */
 struct numa_stats {
+	unsigned long nr_running;
 	unsigned long load;
-	s64 eff_load;
-	unsigned long faults;
+
+	/* Total compute capacity of CPUs on a node */
+	unsigned long power;
+
+	/* Approximate capacity in terms of runnable tasks on a node */
+	unsigned long capacity;
+	int has_capacity;
 };
 
+/*
+ * XXX borrowed from update_sg_lb_stats
+ */
+static void update_numa_stats(struct numa_stats *ns, int nid)
+{
+	int cpu;
+
+	memset(ns, 0, sizeof(*ns));
+	for_each_cpu(cpu, cpumask_of_node(nid)) {
+		struct rq *rq = cpu_rq(cpu);
+
+		ns->nr_running += rq->nr_running;
+		ns->load += weighted_cpuload(cpu);
+		ns->power += power_of(cpu);
+	}
+
+	ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power;
+	ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE);
+	ns->has_capacity = (ns->nr_running < ns->capacity);
+}
+
 struct task_numa_env {
 	struct task_struct *p;
 
@@ -920,95 +950,178 @@ struct task_numa_env {
 
 	struct numa_stats src_stats, dst_stats;
 
-	unsigned long best_load;
+	int imbalance_pct, idx;
+
+	struct task_struct *best_task;
+	long best_imp;
 	int best_cpu;
 };
 
+static void task_numa_assign(struct task_numa_env *env,
+			     struct task_struct *p, long imp)
+{
+	if (env->best_task)
+		put_task_struct(env->best_task);
+	if (p)
+		get_task_struct(p);
+
+	env->best_task = p;
+	env->best_imp = imp;
+	env->best_cpu = env->dst_cpu;
+}
+
+/*
+ * This checks if the overall compute and NUMA accesses of the system would
+ * be improved if the source tasks was migrated to the target dst_cpu taking
+ * into account that it might be best if task running on the dst_cpu should
+ * be exchanged with the source task
+ */
+static void task_numa_compare(struct task_numa_env *env, long imp)
+{
+	struct rq *src_rq = cpu_rq(env->src_cpu);
+	struct rq *dst_rq = cpu_rq(env->dst_cpu);
+	struct task_struct *cur;
+	long dst_load, src_load;
+	long load;
+
+	rcu_read_lock();
+	cur = ACCESS_ONCE(dst_rq->curr);
+	if (cur->pid == 0) /* idle */
+		cur = NULL;
+
+	/*
+	 * "imp" is the fault differential for the source task between the
+	 * source and destination node. Calculate the total differential for
+	 * the source task and potential destination task. The more negative
+	 * the value is, the more rmeote accesses that would be expected to
+	 * be incurred if the tasks were swapped.
+	 */
+	if (cur) {
+		/* Skip this swap candidate if cannot move to the source cpu */
+		if (!cpumask_test_cpu(env->src_cpu, tsk_cpus_allowed(cur)))
+			goto unlock;
+
+		imp += task_faults(cur, env->src_nid) -
+		       task_faults(cur, env->dst_nid);
+	}
+
+	if (imp < env->best_imp)
+		goto unlock;
+
+	if (!cur) {
+		/* Is there capacity at our destination? */
+		if (env->src_stats.has_capacity &&
+		    !env->dst_stats.has_capacity)
+			goto unlock;
+
+		goto balance;
+	}
+
+	/* Balance doesn't matter much if we're running a task per cpu */
+	if (src_rq->nr_running == 1 && dst_rq->nr_running == 1)
+		goto assign;
+
+	/*
+	 * In the overloaded case, try and keep the load balanced.
+	 */
+balance:
+	dst_load = env->dst_stats.load;
+	src_load = env->src_stats.load;
+
+	/* XXX missing power terms */
+	load = task_h_load(env->p);
+	dst_load += load;
+	src_load -= load;
+
+	if (cur) {
+		load = task_h_load(cur);
+		dst_load -= load;
+		src_load += load;
+	}
+
+	/* make src_load the smaller */
+	if (dst_load < src_load)
+		swap(dst_load, src_load);
+
+	if (src_load * env->imbalance_pct < dst_load * 100)
+		goto unlock;
+
+assign:
+	task_numa_assign(env, cur, imp);
+unlock:
+	rcu_read_unlock();
+}
+
 static int task_numa_migrate(struct task_struct *p)
 {
-	int node_cpu = cpumask_first(cpumask_of_node(p->numa_preferred_nid));
 	struct task_numa_env env = {
 		.p = p,
+
 		.src_cpu = task_cpu(p),
 		.src_nid = cpu_to_node(task_cpu(p)),
-		.dst_cpu = node_cpu,
-		.dst_nid = p->numa_preferred_nid,
-		.best_load = ULONG_MAX,
-		.best_cpu = task_cpu(p),
+
+		.imbalance_pct = 112,
+
+		.best_task = NULL,
+		.best_imp = 0,
+		.best_cpu = -1
 	};
 	struct sched_domain *sd;
-	int cpu;
-	struct task_group *tg = task_group(p);
-	unsigned long weight;
-	bool balanced;
-	int imbalance_pct, idx = -1;
+	unsigned long faults;
+	int nid, cpu, ret;
 
 	/*
-	 * Find the lowest common scheduling domain covering the nodes of both
-	 * the CPU the task is currently running on and the target NUMA node.
+	 * Pick the lowest SD_NUMA domain, as that would have the smallest
+	 * imbalance and would be the first to start moving tasks about.
+	 *
+	 * And we want to avoid any moving of tasks about, as that would create
+	 * random movement of tasks -- counter the numa conditions we're trying
+	 * to satisfy here.
 	 */
 	rcu_read_lock();
-	for_each_domain(env.src_cpu, sd) {
-		if (cpumask_test_cpu(node_cpu, sched_domain_span(sd))) {
-			/*
-			 * busy_idx is used for the load decision as it is the
-			 * same index used by the regular load balancer for an
-			 * active cpu.
-			 */
-			idx = sd->busy_idx;
-			imbalance_pct = sd->imbalance_pct;
-			break;
-		}
-	}
+	sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
+	env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
 	rcu_read_unlock();
 
-	if (WARN_ON_ONCE(idx == -1))
-		return 0;
+	faults = task_faults(p, env.src_nid);
+	update_numa_stats(&env.src_stats, env.src_nid);
 
-	/*
-	 * XXX the below is mostly nicked from wake_affine(); we should
-	 * see about sharing a bit if at all possible; also it might want
-	 * some per entity weight love.
-	 */
-	weight = p->se.load.weight;
-	env.src_stats.load = source_load(env.src_cpu, idx);
-	env.src_stats.eff_load = 100 + (imbalance_pct - 100) / 2;
-	env.src_stats.eff_load *= power_of(env.src_cpu);
-	env.src_stats.eff_load *= env.src_stats.load + effective_load(tg, env.src_cpu, -weight, -weight);
-
-	for_each_cpu(cpu, cpumask_of_node(env.dst_nid)) {
-		env.dst_cpu = cpu;
-		env.dst_stats.load = target_load(cpu, idx);
-
-		/* If the CPU is idle, use it */
-		if (!env.dst_stats.load) {
-			env.best_cpu = cpu;
-			goto migrate;
-		}
+	/* Find an alternative node with relatively better statistics */
+	for_each_online_node(nid) {
+		long imp;
 
-		/* Otherwise check the target CPU load */
-		env.dst_stats.eff_load = 100;
-		env.dst_stats.eff_load *= power_of(cpu);
-		env.dst_stats.eff_load *= env.dst_stats.load + effective_load(tg, cpu, weight, weight);
+		if (nid == env.src_nid)
+			continue;
 
-		/*
-		 * Destination is considered balanced if the destination CPU is
-		 * less loaded than the source CPU. Unfortunately there is a
-		 * risk that a task running on a lightly loaded CPU will not
-		 * migrate to its preferred node due to load imbalances.
-		 */
-		balanced = (env.dst_stats.eff_load <= env.src_stats.eff_load);
-		if (!balanced)
+		/* Only consider nodes that recorded more faults */
+		imp = task_faults(p, nid) - faults;
+		if (imp < 0)
 			continue;
 
-		if (env.dst_stats.eff_load < env.best_load) {
-			env.best_load = env.dst_stats.eff_load;
-			env.best_cpu = cpu;
+		env.dst_nid = nid;
+		update_numa_stats(&env.dst_stats, env.dst_nid);
+		for_each_cpu(cpu, cpumask_of_node(nid)) {
+			/* Skip this CPU if the source task cannot migrate */
+			if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+				continue;
+
+			env.dst_cpu = cpu;
+			task_numa_compare(&env, imp);
 		}
 	}
 
-migrate:
-	return migrate_task_to(p, env.best_cpu);
+	/* No better CPU than the current one was found. */
+	if (env.best_cpu == -1)
+		return -EAGAIN;
+
+	if (env.best_task == NULL) {
+		int ret = migrate_task_to(p, env.best_cpu);
+		return ret;
+	}
+
+	ret = migrate_swap(p, env.best_task);
+	put_task_struct(env.best_task);
+	return ret;
 }
 
 /* Attempt to migrate a task to a CPU on the preferred node. */
@@ -1050,7 +1163,7 @@ static void task_numa_placement(struct task_struct *p)
 
 	/* Find the node with the highest number of faults */
 	for_each_online_node(nid) {
-		unsigned long faults;
+		unsigned long faults = 0;
 		int priv, i;
 
 		for (priv = 0; priv < 2; priv++) {
@@ -1060,10 +1173,10 @@ static void task_numa_placement(struct task_struct *p)
 			p->numa_faults[i] >>= 1;
 			p->numa_faults[i] += p->numa_faults_buffer[i];
 			p->numa_faults_buffer[i] = 0;
+
+			faults += p->numa_faults[i];
 		}
 
-		/* Find maximum private faults */
-		faults = p->numa_faults[task_faults_idx(nid, 1)];
 		if (faults > max_faults) {
 			max_faults = faults;
 			max_nid = nid;
@@ -4452,8 +4565,6 @@ static int move_one_task(struct lb_env *env)
 	return 0;
 }
 
-static unsigned long task_h_load(struct task_struct *p);
-
 static const unsigned int sched_nr_migrate_break = 32;
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e9ab96c..fd3f7b6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -607,9 +607,22 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
 	return hsd;
 }
 
+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
+{
+	struct sched_domain *sd;
+
+	for_each_domain(cpu, sd) {
+		if (sd->flags & flag)
+			break;
+	}
+
+	return sd;
+}
+
 DECLARE_PER_CPU(struct sched_domain *, sd_llc);
 DECLARE_PER_CPU(int, sd_llc_size);
 DECLARE_PER_CPU(int, sd_llc_id);
+DECLARE_PER_CPU(struct sched_domain *, sd_numa);
 
 struct sched_group_power {
 	atomic_t ref;
-- 
1.8.4


WARNING: multiple messages have this Message-ID
From: Mel Gorman <mgorman@suse.de>
To: Peter Zijlstra <a.p.zijlstra@chello.nl>, Rik van Riel <riel@redhat.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>,
	Ingo Molnar <mingo@kernel.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Linux-MM <linux-mm@kvack.org>,
	LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@suse.de>
Subject: [PATCH 39/63] sched: numa: Use a system-wide search to find swap/migration candidates
Date: Mon,  7 Oct 2013 11:29:17 +0100	[thread overview]
Message-ID: <1381141781-10992-40-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1381141781-10992-1-git-send-email-mgorman@suse.de>

This patch implements a system-wide search for swap/migration candidates
based on total NUMA hinting faults. It has a balance limit, however it
doesn't properly consider total node balance.

In the old scheme a task selected a preferred node based on the highest
number of private faults recorded on the node. In this scheme, the preferred
node is based on the total number of faults. If the preferred node for a
task changes then task_numa_migrate will search the whole system looking
for tasks to swap with that would improve both the overall compute
balance and minimise the expected number of remote NUMA hinting faults.

Not there is no guarantee that the node the source task is placed
on by task_numa_migrate() has any relationship to the newly selected
task->numa_preferred_nid due to compute overloading.

[riel@redhat.com: Do not swap with tasks that cannot run on source cpu]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 kernel/sched/core.c  |   4 +
 kernel/sched/fair.c  | 253 ++++++++++++++++++++++++++++++++++++---------------
 kernel/sched/sched.h |  13 +++
 3 files changed, 199 insertions(+), 71 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0862196..18f9bbe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5241,6 +5241,7 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
 DEFINE_PER_CPU(struct sched_domain *, sd_llc);
 DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
+DEFINE_PER_CPU(struct sched_domain *, sd_numa);
 
 static void update_top_cache_domain(int cpu)
 {
@@ -5257,6 +5258,9 @@ static void update_top_cache_domain(int cpu)
 	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
 	per_cpu(sd_llc_size, cpu) = size;
 	per_cpu(sd_llc_id, cpu) = id;
+
+	sd = lowest_flag_domain(cpu, SD_NUMA);
+	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
 }
 
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b19c044..59abe50 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -816,6 +816,8 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
+static unsigned long task_h_load(struct task_struct *p);
+
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * Approximate time to scan a full NUMA task in ms. The task scan period is
@@ -906,12 +908,40 @@ static unsigned long target_load(int cpu, int type);
 static unsigned long power_of(int cpu);
 static long effective_load(struct task_group *tg, int cpu, long wl, long wg);
 
+/* Cached statistics for all CPUs within a node */
 struct numa_stats {
+	unsigned long nr_running;
 	unsigned long load;
-	s64 eff_load;
-	unsigned long faults;
+
+	/* Total compute capacity of CPUs on a node */
+	unsigned long power;
+
+	/* Approximate capacity in terms of runnable tasks on a node */
+	unsigned long capacity;
+	int has_capacity;
 };
 
+/*
+ * XXX borrowed from update_sg_lb_stats
+ */
+static void update_numa_stats(struct numa_stats *ns, int nid)
+{
+	int cpu;
+
+	memset(ns, 0, sizeof(*ns));
+	for_each_cpu(cpu, cpumask_of_node(nid)) {
+		struct rq *rq = cpu_rq(cpu);
+
+		ns->nr_running += rq->nr_running;
+		ns->load += weighted_cpuload(cpu);
+		ns->power += power_of(cpu);
+	}
+
+	ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power;
+	ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE);
+	ns->has_capacity = (ns->nr_running < ns->capacity);
+}
+
 struct task_numa_env {
 	struct task_struct *p;
 
@@ -920,95 +950,178 @@ struct task_numa_env {
 
 	struct numa_stats src_stats, dst_stats;
 
-	unsigned long best_load;
+	int imbalance_pct, idx;
+
+	struct task_struct *best_task;
+	long best_imp;
 	int best_cpu;
 };
 
+static void task_numa_assign(struct task_numa_env *env,
+			     struct task_struct *p, long imp)
+{
+	if (env->best_task)
+		put_task_struct(env->best_task);
+	if (p)
+		get_task_struct(p);
+
+	env->best_task = p;
+	env->best_imp = imp;
+	env->best_cpu = env->dst_cpu;
+}
+
+/*
+ * This checks if the overall compute and NUMA accesses of the system would
+ * be improved if the source tasks was migrated to the target dst_cpu taking
+ * into account that it might be best if task running on the dst_cpu should
+ * be exchanged with the source task
+ */
+static void task_numa_compare(struct task_numa_env *env, long imp)
+{
+	struct rq *src_rq = cpu_rq(env->src_cpu);
+	struct rq *dst_rq = cpu_rq(env->dst_cpu);
+	struct task_struct *cur;
+	long dst_load, src_load;
+	long load;
+
+	rcu_read_lock();
+	cur = ACCESS_ONCE(dst_rq->curr);
+	if (cur->pid == 0) /* idle */
+		cur = NULL;
+
+	/*
+	 * "imp" is the fault differential for the source task between the
+	 * source and destination node. Calculate the total differential for
+	 * the source task and potential destination task. The more negative
+	 * the value is, the more rmeote accesses that would be expected to
+	 * be incurred if the tasks were swapped.
+	 */
+	if (cur) {
+		/* Skip this swap candidate if cannot move to the source cpu */
+		if (!cpumask_test_cpu(env->src_cpu, tsk_cpus_allowed(cur)))
+			goto unlock;
+
+		imp += task_faults(cur, env->src_nid) -
+		       task_faults(cur, env->dst_nid);
+	}
+
+	if (imp < env->best_imp)
+		goto unlock;
+
+	if (!cur) {
+		/* Is there capacity at our destination? */
+		if (env->src_stats.has_capacity &&
+		    !env->dst_stats.has_capacity)
+			goto unlock;
+
+		goto balance;
+	}
+
+	/* Balance doesn't matter much if we're running a task per cpu */
+	if (src_rq->nr_running == 1 && dst_rq->nr_running == 1)
+		goto assign;
+
+	/*
+	 * In the overloaded case, try and keep the load balanced.
+	 */
+balance:
+	dst_load = env->dst_stats.load;
+	src_load = env->src_stats.load;
+
+	/* XXX missing power terms */
+	load = task_h_load(env->p);
+	dst_load += load;
+	src_load -= load;
+
+	if (cur) {
+		load = task_h_load(cur);
+		dst_load -= load;
+		src_load += load;
+	}
+
+	/* make src_load the smaller */
+	if (dst_load < src_load)
+		swap(dst_load, src_load);
+
+	if (src_load * env->imbalance_pct < dst_load * 100)
+		goto unlock;
+
+assign:
+	task_numa_assign(env, cur, imp);
+unlock:
+	rcu_read_unlock();
+}
+
 static int task_numa_migrate(struct task_struct *p)
 {
-	int node_cpu = cpumask_first(cpumask_of_node(p->numa_preferred_nid));
 	struct task_numa_env env = {
 		.p = p,
+
 		.src_cpu = task_cpu(p),
 		.src_nid = cpu_to_node(task_cpu(p)),
-		.dst_cpu = node_cpu,
-		.dst_nid = p->numa_preferred_nid,
-		.best_load = ULONG_MAX,
-		.best_cpu = task_cpu(p),
+
+		.imbalance_pct = 112,
+
+		.best_task = NULL,
+		.best_imp = 0,
+		.best_cpu = -1
 	};
 	struct sched_domain *sd;
-	int cpu;
-	struct task_group *tg = task_group(p);
-	unsigned long weight;
-	bool balanced;
-	int imbalance_pct, idx = -1;
+	unsigned long faults;
+	int nid, cpu, ret;
 
 	/*
-	 * Find the lowest common scheduling domain covering the nodes of both
-	 * the CPU the task is currently running on and the target NUMA node.
+	 * Pick the lowest SD_NUMA domain, as that would have the smallest
+	 * imbalance and would be the first to start moving tasks about.
+	 *
+	 * And we want to avoid any moving of tasks about, as that would create
+	 * random movement of tasks -- counter the numa conditions we're trying
+	 * to satisfy here.
 	 */
 	rcu_read_lock();
-	for_each_domain(env.src_cpu, sd) {
-		if (cpumask_test_cpu(node_cpu, sched_domain_span(sd))) {
-			/*
-			 * busy_idx is used for the load decision as it is the
-			 * same index used by the regular load balancer for an
-			 * active cpu.
-			 */
-			idx = sd->busy_idx;
-			imbalance_pct = sd->imbalance_pct;
-			break;
-		}
-	}
+	sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
+	env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
 	rcu_read_unlock();
 
-	if (WARN_ON_ONCE(idx == -1))
-		return 0;
+	faults = task_faults(p, env.src_nid);
+	update_numa_stats(&env.src_stats, env.src_nid);
 
-	/*
-	 * XXX the below is mostly nicked from wake_affine(); we should
-	 * see about sharing a bit if at all possible; also it might want
-	 * some per entity weight love.
-	 */
-	weight = p->se.load.weight;
-	env.src_stats.load = source_load(env.src_cpu, idx);
-	env.src_stats.eff_load = 100 + (imbalance_pct - 100) / 2;
-	env.src_stats.eff_load *= power_of(env.src_cpu);
-	env.src_stats.eff_load *= env.src_stats.load + effective_load(tg, env.src_cpu, -weight, -weight);
-
-	for_each_cpu(cpu, cpumask_of_node(env.dst_nid)) {
-		env.dst_cpu = cpu;
-		env.dst_stats.load = target_load(cpu, idx);
-
-		/* If the CPU is idle, use it */
-		if (!env.dst_stats.load) {
-			env.best_cpu = cpu;
-			goto migrate;
-		}
+	/* Find an alternative node with relatively better statistics */
+	for_each_online_node(nid) {
+		long imp;
 
-		/* Otherwise check the target CPU load */
-		env.dst_stats.eff_load = 100;
-		env.dst_stats.eff_load *= power_of(cpu);
-		env.dst_stats.eff_load *= env.dst_stats.load + effective_load(tg, cpu, weight, weight);
+		if (nid == env.src_nid)
+			continue;
 
-		/*
-		 * Destination is considered balanced if the destination CPU is
-		 * less loaded than the source CPU. Unfortunately there is a
-		 * risk that a task running on a lightly loaded CPU will not
-		 * migrate to its preferred node due to load imbalances.
-		 */
-		balanced = (env.dst_stats.eff_load <= env.src_stats.eff_load);
-		if (!balanced)
+		/* Only consider nodes that recorded more faults */
+		imp = task_faults(p, nid) - faults;
+		if (imp < 0)
 			continue;
 
-		if (env.dst_stats.eff_load < env.best_load) {
-			env.best_load = env.dst_stats.eff_load;
-			env.best_cpu = cpu;
+		env.dst_nid = nid;
+		update_numa_stats(&env.dst_stats, env.dst_nid);
+		for_each_cpu(cpu, cpumask_of_node(nid)) {
+			/* Skip this CPU if the source task cannot migrate */
+			if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+				continue;
+
+			env.dst_cpu = cpu;
+			task_numa_compare(&env, imp);
 		}
 	}
 
-migrate:
-	return migrate_task_to(p, env.best_cpu);
+	/* No better CPU than the current one was found. */
+	if (env.best_cpu == -1)
+		return -EAGAIN;
+
+	if (env.best_task == NULL) {
+		int ret = migrate_task_to(p, env.best_cpu);
+		return ret;
+	}
+
+	ret = migrate_swap(p, env.best_task);
+	put_task_struct(env.best_task);
+	return ret;
 }
 
 /* Attempt to migrate a task to a CPU on the preferred node. */
@@ -1050,7 +1163,7 @@ static void task_numa_placement(struct task_struct *p)
 
 	/* Find the node with the highest number of faults */
 	for_each_online_node(nid) {
-		unsigned long faults;
+		unsigned long faults = 0;
 		int priv, i;
 
 		for (priv = 0; priv < 2; priv++) {
@@ -1060,10 +1173,10 @@ static void task_numa_placement(struct task_struct *p)
 			p->numa_faults[i] >>= 1;
 			p->numa_faults[i] += p->numa_faults_buffer[i];
 			p->numa_faults_buffer[i] = 0;
+
+			faults += p->numa_faults[i];
 		}
 
-		/* Find maximum private faults */
-		faults = p->numa_faults[task_faults_idx(nid, 1)];
 		if (faults > max_faults) {
 			max_faults = faults;
 			max_nid = nid;
@@ -4452,8 +4565,6 @@ static int move_one_task(struct lb_env *env)
 	return 0;
 }
 
-static unsigned long task_h_load(struct task_struct *p);
-
 static const unsigned int sched_nr_migrate_break = 32;
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e9ab96c..fd3f7b6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -607,9 +607,22 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
 	return hsd;
 }
 
+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
+{
+	struct sched_domain *sd;
+
+	for_each_domain(cpu, sd) {
+		if (sd->flags & flag)
+			break;
+	}
+
+	return sd;
+}
+
 DECLARE_PER_CPU(struct sched_domain *, sd_llc);
 DECLARE_PER_CPU(int, sd_llc_size);
 DECLARE_PER_CPU(int, sd_llc_id);
+DECLARE_PER_CPU(struct sched_domain *, sd_numa);
 
 struct sched_group_power {
 	atomic_t ref;
-- 
1.8.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-10-07 10:36 UTC|newest]

Thread overview: 340+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-07 10:28 [PATCH 0/63] Basic scheduler support for automatic NUMA balancing V9 Mel Gorman
2013-10-07 10:28 ` Mel Gorman
2013-10-07 10:28 ` [PATCH 01/63] hotplug: Optimize {get,put}_online_cpus() Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 10:28 ` [PATCH 02/63] mm: numa: Document automatic NUMA balancing sysctls Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 12:46   ` Rik van Riel
2013-10-07 12:46     ` Rik van Riel
2013-10-09 17:24   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 03/63] sched, numa: Comment fixlets Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 12:46   ` Rik van Riel
2013-10-07 12:46     ` Rik van Riel
2013-10-09 17:24   ` [tip:sched/core] sched/numa: Fix comments tip-bot for Peter Zijlstra
2013-10-07 10:28 ` [PATCH 04/63] mm: numa: Do not account for a hinting fault if we raced Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 12:47   ` Rik van Riel
2013-10-07 12:47     ` Rik van Riel
2013-10-09 17:24   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:42   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 05/63] mm: Wait for THP migrations to complete during NUMA hinting faults Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 13:55   ` Rik van Riel
2013-10-07 13:55     ` Rik van Riel
2013-10-09 17:24   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:42   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 06/63] mm: Prevent parallel splits during THP migration Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 14:01   ` Rik van Riel
2013-10-07 14:01     ` Rik van Riel
2013-10-09 17:24   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:42   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 07/63] mm: numa: Sanitize task_numa_fault() callsites Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 14:02   ` Rik van Riel
2013-10-07 14:02     ` Rik van Riel
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:42   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 08/63] mm: Close races between THP migration and PMD numa clearing Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 14:02   ` Rik van Riel
2013-10-07 14:02     ` Rik van Riel
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:42   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 09/63] mm: Account for a THP NUMA hinting update as one PTE update Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 14:02   ` Rik van Riel
2013-10-07 14:02     ` Rik van Riel
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-29 10:43   ` [tip:core/urgent] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 10/63] mm: Do not flush TLB during protection change if !pte_present && !migration_entry Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 15:12   ` Rik van Riel
2013-10-07 15:12     ` Rik van Riel
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 11/63] mm: Only flush TLBs if a transhuge PMD is modified for NUMA pte scanning Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 12/63] mm: numa: Do not migrate or account for hinting faults on the zero page Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:10   ` Rik van Riel
2013-10-07 17:10     ` Rik van Riel
2013-10-09 17:25   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 13/63] sched: numa: Mitigate chance that same task always updates PTEs Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:24   ` Rik van Riel
2013-10-07 17:24     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] sched/numa: " tip-bot for Peter Zijlstra
2013-10-07 10:28 ` [PATCH 14/63] sched: numa: Continue PTE scanning even if migrate rate limited Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:24   ` Rik van Riel
2013-10-07 17:24     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] sched/numa: " tip-bot for Peter Zijlstra
2013-10-07 10:28 ` [PATCH 15/63] Revert "mm: sched: numa: Delay PTE scanning until a task is scheduled on a new node" Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:42   ` Rik van Riel
2013-10-07 17:42     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 16/63] sched: numa: Initialise numa_next_scan properly Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:44   ` Rik van Riel
2013-10-07 17:44     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 17/63] sched: Set the scan rate proportional to the memory usage of the task being scanned Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 17:44   ` Rik van Riel
2013-10-07 17:44     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 18/63] sched: numa: Slow scan rate if no NUMA hinting faults are being recorded Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 18:02   ` Rik van Riel
2013-10-07 18:02     ` Rik van Riel
2013-10-09 17:26   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 19/63] sched: Track NUMA hinting faults on per-node basis Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 18:02   ` Rik van Riel
2013-10-07 18:02     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-12-04  5:32   ` [PATCH 19/63] sched: " Wanpeng Li
2013-12-04  5:37     ` Wanpeng Li
2013-10-07 10:28 ` [PATCH 20/63] sched: Select a preferred node with the most numa hinting faults Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 18:04   ` Rik van Riel
2013-10-07 18:04     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:28 ` [PATCH 21/63] sched: Update NUMA hinting faults once per scan Mel Gorman
2013-10-07 10:28   ` Mel Gorman
2013-10-07 18:39   ` Rik van Riel
2013-10-07 18:39     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 22/63] sched: Favour moving tasks towards the preferred node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:39   ` Rik van Riel
2013-10-07 18:39     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 23/63] sched: Resist moving tasks towards nodes with fewer hinting faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:40   ` Rik van Riel
2013-10-07 18:40     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 24/63] sched: Reschedule task on preferred NUMA node once selected Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:40   ` Rik van Riel
2013-10-07 18:40     ` Rik van Riel
2013-10-09 17:27   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 25/63] sched: Add infrastructure for split shared/private accounting of NUMA hinting faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:41   ` Rik van Riel
2013-10-07 18:41     ` Rik van Riel
2013-10-09 17:28   ` [tip:sched/core] sched/numa: Add infrastructure for split shared/ private " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 26/63] sched: Check current->mm before allocating NUMA faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:41   ` Rik van Riel
2013-10-07 18:41     ` Rik van Riel
2013-10-09 17:28   ` [tip:sched/core] sched/numa: Check current-> mm " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 27/63] mm: numa: Scan pages with elevated page_mapcount Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:43   ` Rik van Riel
2013-10-07 18:43     ` Rik van Riel
2013-10-09 17:28   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 28/63] sched: Remove check that skips small VMAs Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:44   ` Rik van Riel
2013-10-07 18:44     ` Rik van Riel
2013-10-09 17:28   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 29/63] sched: Set preferred NUMA node based on number of private faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:45   ` Rik van Riel
2013-10-07 18:45     ` Rik van Riel
2013-10-09 17:28   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 30/63] sched: Do not migrate memory immediately after switching node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:28   ` [tip:sched/core] sched/numa: " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 31/63] mm: numa: only unmap migrate-on-fault VMAs Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:29   ` [tip:sched/core] mm: numa: Limit NUMA scanning to " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 32/63] sched: Avoid overloading CPUs on a preferred NUMA node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:58   ` Rik van Riel
2013-10-07 18:58     ` Rik van Riel
2013-10-09 17:29   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 33/63] sched: Retry migration of tasks to CPU on a preferred node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 18:58   ` Rik van Riel
2013-10-07 18:58     ` Rik van Riel
2013-10-09 17:29   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 34/63] sched: numa: increment numa_migrate_seq when task runs in correct location Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:29   ` [tip:sched/core] sched/numa: Increment " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 35/63] sched: numa: Do not trap hinting faults for shared libraries Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:04   ` Rik van Riel
2013-10-07 19:04     ` Rik van Riel
2013-10-09 17:29   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 36/63] mm: numa: Only trap pmd hinting faults if we would otherwise trap PTE faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:06   ` Rik van Riel
2013-10-07 19:06     ` Rik van Riel
2013-10-09 17:29   ` [tip:sched/core] mm: numa: Trap pmd hinting faults only " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 37/63] stop_machine: Introduce stop_two_cpus() Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:30   ` [tip:sched/core] " tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 38/63] sched: Introduce migrate_swap() Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:06   ` Rik van Riel
2013-10-07 19:06     ` Rik van Riel
2013-10-09 17:30   ` [tip:sched/core] sched/numa: " tip-bot for Peter Zijlstra
2013-10-10 18:17     ` Peter Zijlstra
2013-10-10 19:04       ` Rik van Riel
2013-10-15  9:55       ` Mel Gorman
2013-10-17 16:49       ` [tip:sched/core] sched: Fix race in migrate_swap_stop() tip-bot for Peter Zijlstra
2013-10-07 10:29 ` Mel Gorman [this message]
2013-10-07 10:29   ` [PATCH 39/63] sched: numa: Use a system-wide search to find swap/migration candidates Mel Gorman
2013-10-07 19:07   ` Rik van Riel
2013-10-07 19:07     ` Rik van Riel
2013-10-09 17:30   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 40/63] sched: numa: Favor placing a task on the preferred node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:07   ` Rik van Riel
2013-10-07 19:07     ` Rik van Riel
2013-10-09 17:30   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 41/63] sched: numa: fix placement of workloads spread across multiple nodes Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:30   ` [tip:sched/core] sched/numa: Fix " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 42/63] mm: numa: Change page last {nid,pid} into {cpu,pid} Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:08   ` Rik van Riel
2013-10-07 19:08     ` Rik van Riel
2013-10-09 17:30   ` [tip:sched/core] mm: numa: Change page last {nid,pid} into {cpu, pid} tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 43/63] sched: numa: Use {cpu, pid} to create task groups for shared faults Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:09   ` Rik van Riel
2013-10-07 19:09     ` Rik van Riel
2013-10-09 17:31   ` [tip:sched/core] sched/numa: " tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 44/63] sched: numa: Report a NUMA task group ID Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:09   ` Rik van Riel
2013-10-07 19:09     ` Rik van Riel
2013-10-09 17:31   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 45/63] mm: numa: copy cpupid on page migration Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:31   ` [tip:sched/core] mm: numa: Copy " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 46/63] mm: numa: Do not group on RO pages Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:10   ` Rik van Riel
2013-10-07 19:10     ` Rik van Riel
2013-10-09 17:31   ` [tip:sched/core] " tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 47/63] mm: numa: Do not batch handle PMD pages Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:11   ` Rik van Riel
2013-10-07 19:11     ` Rik van Riel
2013-10-09 17:31   ` [tip:sched/core] " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 48/63] sched: numa: stay on the same node if CLONE_VM Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:31   ` [tip:sched/core] sched/numa: Stay " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 49/63] sched: numa: use group fault statistics in numa placement Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:32   ` [tip:sched/core] sched/numa: Use " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 50/63] sched: numa: call task_numa_free from do_execve Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:32   ` [tip:sched/core] sched/numa: Call task_numa_free() from do_execve () tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 51/63] sched: numa: Prevent parallel updates to group stats during placement Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:13   ` Rik van Riel
2013-10-07 19:13     ` Rik van Riel
2013-10-09 17:32   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 52/63] sched: numa: add debugging Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:13   ` Rik van Riel
2013-10-07 19:13     ` Rik van Riel
2013-10-09 17:32   ` [tip:sched/core] sched/numa: Add debugging tip-bot for Ingo Molnar
2013-10-07 10:29 ` [PATCH 53/63] sched: numa: Decide whether to favour task or group weights based on swap candidate relationships Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:32   ` [tip:sched/core] sched/numa: " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 54/63] sched: numa: fix task or group comparison Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:32   ` [tip:sched/core] sched/numa: Fix " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 55/63] sched: numa: Avoid migrating tasks that are placed on their preferred node Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:14   ` Rik van Riel
2013-10-07 19:14     ` Rik van Riel
2013-10-09 17:33   ` [tip:sched/core] sched/numa: " tip-bot for Peter Zijlstra
2013-10-07 10:29 ` [PATCH 56/63] sched: numa: be more careful about joining numa groups Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:33   ` [tip:sched/core] sched/numa: Be " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 57/63] sched: numa: Take false sharing into account when adapting scan rate Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:14   ` Rik van Riel
2013-10-07 19:14     ` Rik van Riel
2013-10-09 17:33   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 58/63] sched: numa: adjust scan rate in task_numa_placement Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:33   ` [tip:sched/core] sched/numa: Adjust " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 59/63] sched: numa: Remove the numa_balancing_scan_period_reset sysctl Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:14   ` Rik van Riel
2013-10-07 19:14     ` Rik van Riel
2013-10-09 17:33   ` [tip:sched/core] sched/numa: " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 60/63] mm: numa: revert temporarily disabling of NUMA migration Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:33   ` [tip:sched/core] mm: numa: Revert " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 61/63] sched: numa: skip some page migrations after a shared fault Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:34   ` [tip:sched/core] sched/numa: Skip " tip-bot for Rik van Riel
2013-10-07 10:29 ` [PATCH 62/63] sched: numa: use unsigned longs for numa group fault stats Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-07 19:15   ` Rik van Riel
2013-10-07 19:15     ` Rik van Riel
2013-10-09 17:34   ` [tip:sched/core] sched/numa: Use " tip-bot for Mel Gorman
2013-10-07 10:29 ` [PATCH 63/63] sched: numa: periodically retry task_numa_migrate Mel Gorman
2013-10-07 10:29   ` Mel Gorman
2013-10-09 17:34   ` [tip:sched/core] sched/numa: Retry task_numa_migrate() periodically tip-bot for Rik van Riel
2013-10-09 11:03 ` [PATCH 0/63] Basic scheduler support for automatic NUMA balancing V9 Ingo Molnar
2013-10-09 11:03   ` Ingo Molnar
2013-10-09 11:11   ` Ingo Molnar
2013-10-09 11:11     ` Ingo Molnar
2013-10-09 11:13     ` Ingo Molnar
2013-10-09 11:13       ` Ingo Molnar
2013-10-09 12:05   ` Peter Zijlstra
2013-10-09 12:05     ` Peter Zijlstra
2013-10-09 12:48     ` Ingo Molnar
2013-10-09 12:48       ` Ingo Molnar
2013-10-10  7:05   ` Mel Gorman
2013-10-10  7:05     ` Mel Gorman
2013-10-09 16:28 ` Ingo Molnar
2013-10-09 16:29   ` Ingo Molnar
2013-10-09 16:57     ` Ingo Molnar
2013-10-09 16:57       ` Ingo Molnar
2013-10-09 17:09       ` Ingo Molnar
2013-10-09 17:09         ` Ingo Molnar
2013-10-09 17:11         ` Peter Zijlstra
2013-10-09 17:11           ` Peter Zijlstra
2013-10-09 17:08   ` Peter Zijlstra
2013-10-09 17:08     ` Peter Zijlstra
2013-10-09 17:15     ` Ingo Molnar
2013-10-09 17:15       ` Ingo Molnar
2013-10-09 17:18       ` Peter Zijlstra
2013-10-09 17:18         ` Peter Zijlstra
2013-10-24 12:26 ` Automatic NUMA balancing patches for tip-urgent/stable Mel Gorman
2013-10-24 12:26   ` Mel Gorman
2013-10-26 12:11   ` Ingo Molnar
2013-10-26 12:11     ` Ingo Molnar
2013-10-29  9:42     ` Mel Gorman
2013-10-29  9:42       ` Mel Gorman
2013-10-29  9:48       ` Ingo Molnar
2013-10-29  9:48         ` Ingo Molnar
2013-10-29 10:24         ` Mel Gorman
2013-10-29 10:24           ` Mel Gorman
2013-10-29 10:41           ` Ingo Molnar
2013-10-29 10:41             ` Ingo Molnar
2013-10-29 12:48             ` Mel Gorman
2013-10-29 12:48               ` Mel Gorman
2013-10-31  9:51   ` [RFC GIT PULL] NUMA-balancing memory corruption fixes Ingo Molnar
2013-10-31  9:51     ` Ingo Molnar
2013-10-31 22:25     ` Linus Torvalds
2013-10-31 22:25       ` Linus Torvalds
2013-11-01  7:36       ` Ingo Molnar
2013-11-01  7:36         ` Ingo Molnar
  -- strict thread matches above, loose matches on Subject: below --
2013-09-27 13:26 [PATCH 0/63] Basic scheduler support for automatic NUMA balancing V8 Mel Gorman
2013-09-27 13:27 ` [PATCH 39/63] sched: numa: Use a system-wide search to find swap/migration candidates Mel Gorman
2013-09-27 13:27   ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1381141781-10992-40-git-send-email-mgorman@suse.de \
    --to=mgorman@suse.de \
    --cc=a.p.zijlstra@chello.nl \
    --cc=aarcange@redhat.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@kernel.org \
    --cc=riel@redhat.com \
    --cc=srikar@linux.vnet.ibm.com \
    --subject='Re: [PATCH 39/63] sched: numa: Use a system-wide search to find swap/migration candidates' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.