linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Gregory Haskins <ghaskins@novell.com>
To: mingo@elte.hu
Cc: rostedt@goodmis.org, ghaskins@novell.com,
	linux-rt-users@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 08/23] Subject: SCHED - Cache cpus_allowed weight for optimizing migration
Date: Tue, 04 Dec 2007 15:45:06 -0500	[thread overview]
Message-ID: <20071204204506.3567.35263.stgit@novell1.haskins.net> (raw)
In-Reply-To: <20071204204236.3567.65491.stgit@novell1.haskins.net>

Some RT tasks (particularly kthreads) are bound to one specific CPU.
It is fairly common for two or more bound tasks to get queued up at the
same time.  Consider, for instance, softirq_timer and softirq_sched.  A
timer goes off in an ISR which schedules softirq_thread to run at RT50.
Then the timer handler determines that it's time to smp-rebalance the
system so it schedules softirq_sched to run.  So we are in a situation
where we have two RT50 tasks queued, and the system will go into
rt-overload condition to request other CPUs for help.

This causes two problems in the current code:

1) If a high-priority bound task and a low-priority unbounded task queue
   up behind the running task, we will fail to ever relocate the unbounded
   task because we terminate the search on the first unmovable task.

2) We spend precious futile cycles in the fast-path trying to pull
   overloaded tasks over.  It is therefore optimial to strive to avoid the
   overhead all together if we can cheaply detect the condition before
   overload even occurs.

This patch tries to achieve this optimization by utilizing the hamming
weight of the task->cpus_allowed mask.  A weight of 1 indicates that
the task cannot be migrated.  We will then utilize this information to
skip non-migratable tasks and to eliminate uncessary rebalance attempts.

We introduce a per-rq variable to count the number of migratable tasks
that are currently running.  We only go into overload if we have more
than one rt task, AND at least one of them is migratable.

In addition, we introduce a per-task variable to cache the cpus_allowed
weight, since the hamming calculation is probably relatively expensive.
We only update the cached value when the mask is updated which should be
relatively infrequent, especially compared to scheduling frequency
in the fast path.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---

 include/linux/init_task.h |    1 +
 include/linux/sched.h     |    2 ++
 kernel/fork.c             |    1 +
 kernel/sched.c            |    9 +++++++-
 kernel/sched_rt.c         |   50 +++++++++++++++++++++++++++++++++++++++++----
 5 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index cae35b6..572c65b 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -130,6 +130,7 @@ extern struct group_info init_groups;
 	.normal_prio	= MAX_PRIO-20,					\
 	.policy		= SCHED_NORMAL,					\
 	.cpus_allowed	= CPU_MASK_ALL,					\
+	.nr_cpus_allowed = NR_CPUS,					\
 	.mm		= NULL,						\
 	.active_mm	= &init_mm,					\
 	.run_list	= LIST_HEAD_INIT(tsk.run_list),			\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac3d496..a3dc53b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -847,6 +847,7 @@ struct sched_class {
 	void (*set_curr_task) (struct rq *rq);
 	void (*task_tick) (struct rq *rq, struct task_struct *p);
 	void (*task_new) (struct rq *rq, struct task_struct *p);
+	void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask);
 };
 
 struct load_weight {
@@ -956,6 +957,7 @@ struct task_struct {
 
 	unsigned int policy;
 	cpumask_t cpus_allowed;
+	int nr_cpus_allowed;
 	unsigned int time_slice;
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
diff --git a/kernel/fork.c b/kernel/fork.c
index 8ca1a14..12e3e58 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1237,6 +1237,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	 * parent's CPU). This avoids alot of nasty races.
 	 */
 	p->cpus_allowed = current->cpus_allowed;
+	p->nr_cpus_allowed = current->nr_cpus_allowed;
 	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
 			!cpu_online(task_cpu(p))))
 		set_task_cpu(p, smp_processor_id());
diff --git a/kernel/sched.c b/kernel/sched.c
index ebd114b..70f08de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -269,6 +269,7 @@ struct rt_rq {
 	int rt_load_balance_idx;
 	struct list_head *rt_load_balance_head, *rt_load_balance_curr;
 	unsigned long rt_nr_running;
+	unsigned long rt_nr_migratory;
 	/* highest queued rt task prio */
 	int highest_prio;
 };
@@ -5080,7 +5081,13 @@ int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 		goto out;
 	}
 
-	p->cpus_allowed = new_mask;
+	if (p->sched_class->set_cpus_allowed)
+		p->sched_class->set_cpus_allowed(p, &new_mask);
+	else {
+		p->cpus_allowed    = new_mask;
+		p->nr_cpus_allowed = cpus_weight(new_mask);
+	}
+
 	/* Can the task run on the task's current CPU? If so, we're done */
 	if (cpu_isset(task_cpu(p), new_mask))
 		goto out;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index ea07ffa..e5bce6a 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -33,6 +33,14 @@ static inline void rt_clear_overload(struct rq *rq)
 	atomic_dec(&rto_count);
 	cpu_clear(rq->cpu, rt_overload_mask);
 }
+
+static void update_rt_migration(struct rq *rq)
+{
+	if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1))
+		rt_set_overload(rq);
+	else
+		rt_clear_overload(rq);
+}
 #endif /* CONFIG_SMP */
 
 /*
@@ -65,8 +73,10 @@ static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
 #ifdef CONFIG_SMP
 	if (p->prio < rq->rt.highest_prio)
 		rq->rt.highest_prio = p->prio;
-	if (rq->rt.rt_nr_running > 1)
-		rt_set_overload(rq);
+	if (p->nr_cpus_allowed > 1)
+		rq->rt.rt_nr_migratory++;
+
+	update_rt_migration(rq);
 #endif /* CONFIG_SMP */
 }
 
@@ -88,8 +98,10 @@ static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
 		} /* otherwise leave rq->highest prio alone */
 	} else
 		rq->rt.highest_prio = MAX_RT_PRIO;
-	if (rq->rt.rt_nr_running < 2)
-		rt_clear_overload(rq);
+	if (p->nr_cpus_allowed > 1)
+		rq->rt.rt_nr_migratory--;
+
+	update_rt_migration(rq);
 #endif /* CONFIG_SMP */
 }
 
@@ -180,7 +192,8 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)))
+	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
+	    (p->nr_cpus_allowed > 1))
 		return 1;
 	return 0;
 }
@@ -582,6 +595,32 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
 	/* don't touch RT tasks */
 	return 0;
 }
+static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
+{
+	int weight = cpus_weight(*new_mask);
+
+	BUG_ON(!rt_task(p));
+
+	/*
+	 * Update the migration status of the RQ if we have an RT task
+	 * which is running AND changing its weight value.
+	 */
+	if (p->se.on_rq && (weight != p->nr_cpus_allowed)) {
+		struct rq *rq = task_rq(p);
+
+		if ((p->nr_cpus_allowed <= 1) && (weight > 1))
+			rq->rt.rt_nr_migratory++;
+		else if ((p->nr_cpus_allowed > 1) && (weight <= 1)) {
+			BUG_ON(!rq->rt.rt_nr_migratory);
+			rq->rt.rt_nr_migratory--;
+		}
+
+		update_rt_migration(rq);
+	}
+
+	p->cpus_allowed    = *new_mask;
+	p->nr_cpus_allowed = weight;
+}
 #else /* CONFIG_SMP */
 # define schedule_tail_balance_rt(rq)	do { } while (0)
 # define schedule_balance_rt(rq, prev)	do { } while (0)
@@ -633,6 +672,7 @@ const struct sched_class rt_sched_class = {
 #ifdef CONFIG_SMP
 	.load_balance		= load_balance_rt,
 	.move_one_task		= move_one_task_rt,
+	.set_cpus_allowed       = set_cpus_allowed_rt,
 #endif
 
 	.set_curr_task          = set_curr_task_rt,


  parent reply	other threads:[~2007-12-04 21:10 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-12-04 20:44 [PATCH 00/23] RT balance v7 Gregory Haskins
2007-12-04 20:44 ` [PATCH 01/23] Subject: SCHED - Add rt_nr_running accounting Gregory Haskins
2007-12-04 20:44 ` [PATCH 02/23] Subject: SCHED - track highest prio queued on runqueue Gregory Haskins
2007-12-04 20:44 ` [PATCH 03/23] Subject: SCHED - push RT tasks Gregory Haskins
2007-12-04 20:44 ` [PATCH 04/23] Subject: SCHED - RT overloaded runqueues accounting Gregory Haskins
2007-12-04 20:44 ` [PATCH 05/23] Subject: SCHED - pull RT tasks Gregory Haskins
2007-12-04 20:44 ` [PATCH 06/23] Subject: SCHED - wake up balance RT Gregory Haskins
2007-12-04 20:45 ` [PATCH 07/23] Subject: SCHED - disable CFS RT load balancing Gregory Haskins
2007-12-04 20:45 ` Gregory Haskins [this message]
2007-12-04 20:45 ` [PATCH 09/23] Subject: SCHED - Consistency cleanup for this_rq usage Gregory Haskins
2007-12-04 20:45 ` [PATCH 10/23] Subject: SCHED - Remove some CFS specific code from the wakeup path of RT tasks Gregory Haskins
2007-12-04 20:45 ` [PATCH 11/23] Subject: SCHED - Break out the search function Gregory Haskins
2007-12-04 20:45 ` [PATCH 12/23] Subject: SCHED - Allow current_cpu to be included in search Gregory Haskins
2007-12-04 20:45 ` [PATCH 13/23] Subject: SCHED - Pre-route RT tasks on wakeup Gregory Haskins
2007-12-04 20:45 ` [PATCH 14/23] Subject: SCHED - Optimize our cpu selection based on topology Gregory Haskins
2007-12-04 20:45 ` [PATCH 15/23] Subject: SCHED - Optimize rebalancing Gregory Haskins
2007-12-04 20:45 ` [PATCH 16/23] Subject: SCHED - Avoid overload Gregory Haskins
2007-12-04 20:45 ` [PATCH 17/23] Subject: SCHED - restore the migratable conditional Gregory Haskins
2007-12-04 20:45 ` [PATCH 18/23] Subject: SCHED - Optimize cpu search with hamming weight Gregory Haskins
2007-12-04 20:46 ` [PATCH 19/23] Subject: SCHED - Optimize out cpu_clears Gregory Haskins
2007-12-04 20:46 ` [PATCH 20/23] Subject: SCHED - balance RT tasks no new wake up Gregory Haskins
2007-12-04 20:46 ` [PATCH 21/23] Subject: SCHED - Add sched-domain roots Gregory Haskins
2007-12-04 20:46 ` [PATCH 22/23] Subject: SCHED - Only balance our RT tasks within our root-domain Gregory Haskins
2007-12-04 20:46 ` [PATCH 23/23] Subject: SCHED - Use a 2-d bitmap for searching lowest-pri CPU Gregory Haskins
2007-12-04 21:27 ` [PATCH 00/23] RT balance v7 Ingo Molnar
2007-12-04 21:35   ` Gregory Haskins
2007-12-05  2:55   ` [PATCH 0/3] RT balance v7a Gregory Haskins
2007-12-05  2:55     ` [PATCH 1/3] Subject: SCHED - Add sched-domain roots Gregory Haskins
2007-12-05  2:55     ` [PATCH 2/3] Subject: SCHED - Only balance our RT tasks within our root-domain Gregory Haskins
2007-12-05  2:55     ` [PATCH 3/3] Subject: SCHED - Use a 2-d bitmap for searching lowest-pri CPU Gregory Haskins
2007-12-05  9:34       ` Ingo Molnar
2007-12-05 10:19         ` Gregory Haskins
2007-12-05 11:44           ` Ingo Molnar
2007-12-05 13:41             ` Gregory Haskins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071204204506.3567.35263.stgit@novell1.haskins.net \
    --to=ghaskins@novell.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).