linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Gregory Haskins <ghaskins@novell.com>
To: mingo@elte.hu
Cc: rostedt@goodmis.org, ghaskins@novell.com,
	linux-rt-users@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 1/3] Subject: SCHED - Add sched-domain roots
Date: Tue, 04 Dec 2007 21:55:28 -0500	[thread overview]
Message-ID: <20071205025528.4494.37033.stgit@novell1.haskins.net> (raw)
In-Reply-To: <20071205025137.4494.18683.stgit@novell1.haskins.net>

We add the notion of a root-domain which will be used later to rescope
global variables to per-domain variables.  Each exclusive cpuset
essentially defines an island domain by fully partitioning the member cpus
from any other cpuset.  However, we currently still maintain some
policy/state as global variables which transcend all cpusets.  Consider,
for instance, rt-overload state.

Whenever a new exclusive cpuset is created, we also create a new
root-domain object and move each cpu member to the root-domain's span.
By default the system creates a single root-domain with all cpus as
members (mimicking the global state we have today).

We add some plumbing for storing class specific data in our root-domain.
Whenever a RQ is switching root-domains (because of repartitioning) we
give each sched_class the opportunity to remove any state from its old
domain and add state to the new one.  This logic doesn't have any clients
yet but it will later in the series.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
CC: Christoph Lameter <clameter@sgi.com>
CC: Paul Jackson <pj@sgi.com>
CC: Simon Derr <simon.derr@bull.net>
---

 include/linux/sched.h |    3 +
 kernel/sched.c        |  121 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0bb7033..b6885ee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -854,6 +854,9 @@ struct sched_class {
 	void (*task_tick) (struct rq *rq, struct task_struct *p);
 	void (*task_new) (struct rq *rq, struct task_struct *p);
 	void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask);
+
+	void (*join_domain)(struct rq *rq);
+	void (*leave_domain)(struct rq *rq);
 };
 
 struct load_weight {
diff --git a/kernel/sched.c b/kernel/sched.c
index 4849c72..dfb939f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -351,6 +351,28 @@ struct rt_rq {
 	int overloaded;
 };
 
+#ifdef CONFIG_SMP
+
+/*
+ * We add the notion of a root-domain which will be used to define per-domain
+ * variables.  Each exclusive cpuset essentially defines an island domain by
+ * fully partitioning the member cpus from any other cpuset.  Whenever a new
+ * exclusive cpuset is created, we also create and attach a new root-domain
+ * object.
+ *
+ * By default the system creates a single root-domain with all cpus as
+ * members (mimicking the global state we have today).
+ */
+struct root_domain {
+	atomic_t refcount;
+	cpumask_t span;
+	cpumask_t online;
+};
+
+static struct root_domain def_root_domain;
+
+#endif
+
 /*
  * This is the main, per-CPU runqueue data structure.
  *
@@ -408,6 +430,7 @@ struct rq {
 	atomic_t nr_iowait;
 
 #ifdef CONFIG_SMP
+	struct root_domain  *rd;
 	struct sched_domain *sd;
 
 	/* For active balancing */
@@ -5540,6 +5563,15 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	case CPU_ONLINE_FROZEN:
 		/* Strictly unnecessary, as first user will wake it. */
 		wake_up_process(cpu_rq(cpu)->migration_thread);
+
+		/* Update our root-domain */
+		rq = cpu_rq(cpu);
+		spin_lock_irqsave(&rq->lock, flags);
+		if (rq->rd) {
+			BUG_ON(!cpu_isset(cpu, rq->rd->span));
+			cpu_set(cpu, rq->rd->online);
+		}
+		spin_unlock_irqrestore(&rq->lock, flags);
 		break;
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -5588,6 +5620,17 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		}
 		spin_unlock_irq(&rq->lock);
 		break;
+
+	case CPU_DOWN_PREPARE:
+		/* Update our root-domain */
+		rq = cpu_rq(cpu);
+		spin_lock_irqsave(&rq->lock, flags);
+		if (rq->rd) {
+			BUG_ON(!cpu_isset(cpu, rq->rd->span));
+			cpu_clear(cpu, rq->rd->online);
+		}
+		spin_unlock_irqrestore(&rq->lock, flags);
+		break;
 #endif
 	}
 	return NOTIFY_OK;
@@ -5776,11 +5819,69 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	return 1;
 }
 
+static void rq_attach_root(struct rq *rq, struct root_domain *rd)
+{
+	unsigned long flags;
+	const struct sched_class *class;
+
+	spin_lock_irqsave(&rq->lock, flags);
+
+	if (rq->rd) {
+		struct root_domain *old_rd = rq->rd;
+
+		for (class = sched_class_highest; class; class = class->next)
+			if (class->leave_domain)
+				class->leave_domain(rq);
+
+		if (atomic_dec_and_test(&old_rd->refcount))
+			kfree(old_rd);
+	}
+
+	atomic_inc(&rd->refcount);
+	rq->rd = rd;
+
+	for (class = sched_class_highest; class; class = class->next)
+		if (class->join_domain)
+			class->join_domain(rq);
+
+	spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static void init_rootdomain(struct root_domain *rd, const cpumask_t *map)
+{
+	memset(rd, 0, sizeof(*rd));
+
+	rd->span = *map;
+	cpus_and(rd->online, rd->span, cpu_online_map);
+}
+
+static void init_defrootdomain(void)
+{
+	cpumask_t cpus = CPU_MASK_ALL;
+
+	init_rootdomain(&def_root_domain, &cpus);
+	atomic_set(&def_root_domain.refcount, 1);
+}
+
+static struct root_domain *alloc_rootdomain(const cpumask_t *map)
+{
+	struct root_domain *rd;
+
+	rd = kmalloc(sizeof(*rd), GFP_KERNEL);
+	if (!rd)
+		return NULL;
+
+	init_rootdomain(rd, map);
+
+	return rd;
+}
+
 /*
  * Attach the domain 'sd' to 'cpu' as its base domain.  Callers must
  * hold the hotplug lock.
  */
-static void cpu_attach_domain(struct sched_domain *sd, int cpu)
+static void cpu_attach_domain(struct sched_domain *sd,
+			      struct root_domain *rd, int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	struct sched_domain *tmp;
@@ -5805,6 +5906,7 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu)
 
 	sched_domain_debug(sd, cpu);
 
+	rq_attach_root(rq, rd);
 	rcu_assign_pointer(rq->sd, sd);
 }
 
@@ -6173,6 +6275,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 static int build_sched_domains(const cpumask_t *cpu_map)
 {
 	int i;
+	struct root_domain *rd;
 #ifdef CONFIG_NUMA
 	struct sched_group **sched_group_nodes = NULL;
 	int sd_allnodes = 0;
@@ -6189,6 +6292,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 	sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
 #endif
 
+	rd = alloc_rootdomain(cpu_map);
+	if (!rd) {
+		printk(KERN_WARNING "Cannot alloc root domain\n");
+		return -ENOMEM;
+	}
+
 	/*
 	 * Set up domains for cpus specified by the cpu_map.
 	 */
@@ -6405,7 +6514,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 #else
 		sd = &per_cpu(phys_domains, i);
 #endif
-		cpu_attach_domain(sd, i);
+		cpu_attach_domain(sd, rd, i);
 	}
 
 	return 0;
@@ -6463,7 +6572,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
 	unregister_sched_domain_sysctl();
 
 	for_each_cpu_mask(i, *cpu_map)
-		cpu_attach_domain(NULL, i);
+		cpu_attach_domain(NULL, &def_root_domain, i);
 	synchronize_sched();
 	arch_destroy_sched_domains(cpu_map);
 }
@@ -6712,6 +6821,10 @@ void __init sched_init(void)
 	int highest_cpu = 0;
 	int i, j;
 
+#ifdef CONFIG_SMP
+	init_defrootdomain();
+#endif
+
 	for_each_possible_cpu(i) {
 		struct rt_prio_array *array;
 		struct rq *rq;
@@ -6750,6 +6863,8 @@ void __init sched_init(void)
 			rq->cpu_load[j] = 0;
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
+		rq->rd = NULL;
+		rq_attach_root(rq, &def_root_domain);
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;
 		rq->push_cpu = 0;


  reply	other threads:[~2007-12-05  3:17 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-12-04 20:44 [PATCH 00/23] RT balance v7 Gregory Haskins
2007-12-04 20:44 ` [PATCH 01/23] Subject: SCHED - Add rt_nr_running accounting Gregory Haskins
2007-12-04 20:44 ` [PATCH 02/23] Subject: SCHED - track highest prio queued on runqueue Gregory Haskins
2007-12-04 20:44 ` [PATCH 03/23] Subject: SCHED - push RT tasks Gregory Haskins
2007-12-04 20:44 ` [PATCH 04/23] Subject: SCHED - RT overloaded runqueues accounting Gregory Haskins
2007-12-04 20:44 ` [PATCH 05/23] Subject: SCHED - pull RT tasks Gregory Haskins
2007-12-04 20:44 ` [PATCH 06/23] Subject: SCHED - wake up balance RT Gregory Haskins
2007-12-04 20:45 ` [PATCH 07/23] Subject: SCHED - disable CFS RT load balancing Gregory Haskins
2007-12-04 20:45 ` [PATCH 08/23] Subject: SCHED - Cache cpus_allowed weight for optimizing migration Gregory Haskins
2007-12-04 20:45 ` [PATCH 09/23] Subject: SCHED - Consistency cleanup for this_rq usage Gregory Haskins
2007-12-04 20:45 ` [PATCH 10/23] Subject: SCHED - Remove some CFS specific code from the wakeup path of RT tasks Gregory Haskins
2007-12-04 20:45 ` [PATCH 11/23] Subject: SCHED - Break out the search function Gregory Haskins
2007-12-04 20:45 ` [PATCH 12/23] Subject: SCHED - Allow current_cpu to be included in search Gregory Haskins
2007-12-04 20:45 ` [PATCH 13/23] Subject: SCHED - Pre-route RT tasks on wakeup Gregory Haskins
2007-12-04 20:45 ` [PATCH 14/23] Subject: SCHED - Optimize our cpu selection based on topology Gregory Haskins
2007-12-04 20:45 ` [PATCH 15/23] Subject: SCHED - Optimize rebalancing Gregory Haskins
2007-12-04 20:45 ` [PATCH 16/23] Subject: SCHED - Avoid overload Gregory Haskins
2007-12-04 20:45 ` [PATCH 17/23] Subject: SCHED - restore the migratable conditional Gregory Haskins
2007-12-04 20:45 ` [PATCH 18/23] Subject: SCHED - Optimize cpu search with hamming weight Gregory Haskins
2007-12-04 20:46 ` [PATCH 19/23] Subject: SCHED - Optimize out cpu_clears Gregory Haskins
2007-12-04 20:46 ` [PATCH 20/23] Subject: SCHED - balance RT tasks no new wake up Gregory Haskins
2007-12-04 20:46 ` [PATCH 21/23] Subject: SCHED - Add sched-domain roots Gregory Haskins
2007-12-04 20:46 ` [PATCH 22/23] Subject: SCHED - Only balance our RT tasks within our root-domain Gregory Haskins
2007-12-04 20:46 ` [PATCH 23/23] Subject: SCHED - Use a 2-d bitmap for searching lowest-pri CPU Gregory Haskins
2007-12-04 21:27 ` [PATCH 00/23] RT balance v7 Ingo Molnar
2007-12-04 21:35   ` Gregory Haskins
2007-12-05  2:55   ` [PATCH 0/3] RT balance v7a Gregory Haskins
2007-12-05  2:55     ` Gregory Haskins [this message]
2007-12-05  2:55     ` [PATCH 2/3] Subject: SCHED - Only balance our RT tasks within our root-domain Gregory Haskins
2007-12-05  2:55     ` [PATCH 3/3] Subject: SCHED - Use a 2-d bitmap for searching lowest-pri CPU Gregory Haskins
2007-12-05  9:34       ` Ingo Molnar
2007-12-05 10:19         ` Gregory Haskins
2007-12-05 11:44           ` Ingo Molnar
2007-12-05 13:41             ` Gregory Haskins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071205025528.4494.37033.stgit@novell1.haskins.net \
    --to=ghaskins@novell.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).