linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] node affine NUMA scheduler 2/5
@ 2002-10-14 11:07 Erich Focht
  0 siblings, 0 replies; only message in thread
From: Erich Focht @ 2002-10-14 11:07 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 509 bytes --]

----------  Resent Message  ----------

Subject: [PATCH] node affine NUMA scheduler 2/5
Date: Fri, 11 Oct 2002 19:56:22 +0200

This is the second part of the node affine NUMA scheduler patches.

> 02-numa_sched_ilb-2.5.39-10.patch :
>        This patch provides simple initial load balancing during exec().
>        It is node aware and will select the least loaded node. Also it
>        does a round-robin initial node selection to distribute the load
>        better across the nodes.

Erich

[-- Attachment #2: 02-numa_sched_ilb-2.5.39-10.patch --]
[-- Type: text/x-diff, Size: 2851 bytes --]

diff -urNp a/fs/exec.c b/fs/exec.c
--- a/fs/exec.c	Tue Oct  8 15:03:54 2002
+++ b/fs/exec.c	Fri Oct 11 16:21:02 2002
@@ -993,6 +993,7 @@ int do_execve(char * filename, char ** a
 	int retval;
 	int i;
 
+	sched_balance_exec();
 	file = open_exec(filename);
 
 	retval = PTR_ERR(file);
diff -urNp a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h	Thu Oct 10 13:45:18 2002
+++ b/include/linux/sched.h	Fri Oct 11 16:21:02 2002
@@ -461,6 +461,9 @@ extern void set_cpus_allowed(task_t *p, 
 extern void build_pools(void);
 extern void pooldata_lock(void);
 extern void pooldata_unlock(void);
+extern void sched_balance_exec(void);
+#else
+#define sched_balance_exec() {}
 #endif
 extern void sched_migrate_task(task_t *p, int cpu);
 
diff -urNp a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c	Fri Oct 11 16:18:58 2002
+++ b/kernel/sched.c	Fri Oct 11 16:21:02 2002
@@ -2166,6 +2166,78 @@ out:
 	preempt_enable();
 }
 
+#ifdef CONFIG_NUMA
+/* used as counter for round-robin node-scheduling */
+static atomic_t sched_node=ATOMIC_INIT(0);
+
+/*
+ * Find the least loaded CPU on the current node of the task.
+ */
+static int sched_best_cpu(struct task_struct *p, int node)
+{
+	int n, cpu, load, best_cpu = task_cpu(p);
+	
+	load = 1000000;
+	loop_over_node(n,cpu,node) {
+		if (!(p->cpus_allowed & (1UL << cpu) & cpu_online_map))
+			continue;
+		if (cpu_rq(cpu)->nr_running < load) {
+			best_cpu = cpu;
+			load = cpu_rq(cpu)->nr_running;
+		}
+	}
+	return best_cpu;
+}
+
+/*
+ * Find the node with fewest number of tasks running on it.
+ */
+static int sched_best_node(struct task_struct *p)
+{
+	int i, n, best_node=0, min_load, pool_load, min_pool=numa_node_id();
+	int cpu, pool, load;
+	unsigned long mask = p->cpus_allowed & cpu_online_map;
+
+	do {
+		/* atomic_inc_return is not implemented on all archs [EF] */
+		atomic_inc(&sched_node);
+		best_node = atomic_read(&sched_node) % numpools;
+	} while (!(pool_mask[best_node] & mask));
+
+	min_load = 100000000;
+	for (n = 0; n < numpools; n++) {
+		pool = (best_node + n) % numpools;
+		load = 0;
+		loop_over_node(i, cpu, pool) {
+			if (!cpu_online(cpu)) continue;
+			load += cpu_rq(cpu)->nr_running;
+		}
+		if (pool == numa_node_id()) load--;
+		pool_load = 100*load/pool_nr_cpus[pool];
+		if ((pool_load < min_load) && (pool_mask[pool] & mask)) {
+			min_load = pool_load;
+			min_pool = pool;
+		}
+	}
+	atomic_set(&sched_node, min_pool);
+	return min_pool;
+}
+
+void sched_balance_exec(void)
+{
+	int new_cpu, new_node=0;
+
+	while (pooldata_is_locked())
+		cpu_relax();
+	if (numpools > 1) {
+		new_node = sched_best_node(current);
+	} 
+	new_cpu = sched_best_cpu(current, new_node);
+	if (new_cpu != smp_processor_id())
+		sched_migrate_task(current, new_cpu);
+}
+#endif
+
 void sched_migrate_task(task_t *p, int dest_cpu)
 {
 	unsigned long old_mask;

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2002-10-14 11:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-10-14 11:07 [PATCH] node affine NUMA scheduler 2/5 Erich Focht

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).