From: Gregory Haskins <ghaskins@novell.com>
To: mingo@elte.hu
Cc: rostedt@goodmis.org, ghaskins@novell.com,
linux-rt-users@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 05/23] Subject: SCHED - pull RT tasks
Date: Tue, 04 Dec 2007 15:44:51 -0500 [thread overview]
Message-ID: <20071204204451.3567.10693.stgit@novell1.haskins.net> (raw)
In-Reply-To: <20071204204236.3567.65491.stgit@novell1.haskins.net>
From: Steven Rostedt <srostedt@redhat.com>
This patch adds the algorithm to pull tasks from RT overloaded runqueues.
When a pull RT is initiated, all overloaded runqueues are examined for
a RT task that is higher in prio than the highest prio task queued on the
target runqueue. If another runqueue holds a RT task that is of higher
prio than the highest prio task on the target runqueue is found it is pulled
to the target runqueue.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---
kernel/sched.c | 2 +
kernel/sched_rt.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 178 insertions(+), 11 deletions(-)
diff --git a/kernel/sched.c b/kernel/sched.c
index 7748d14..a30147e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3652,6 +3652,8 @@ need_resched_nonpreemptible:
switch_count = &prev->nvcsw;
}
+ schedule_balance_rt(rq, prev);
+
if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b8c758a..a2f1057 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -177,8 +177,17 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
+static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
+{
+ if (!task_running(rq, p) &&
+ (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)))
+ return 1;
+ return 0;
+}
+
/* Return the second highest RT task, NULL otherwise */
-static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
+static struct task_struct *pick_next_highest_task_rt(struct rq *rq,
+ int cpu)
{
struct rt_prio_array *array = &rq->rt.active;
struct task_struct *next;
@@ -197,26 +206,36 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
}
queue = array->queue + idx;
+ BUG_ON(list_empty(queue));
+
next = list_entry(queue->next, struct task_struct, run_list);
- if (unlikely(next != rq->curr))
- return next;
+ if (unlikely(pick_rt_task(rq, next, cpu)))
+ goto out;
if (queue->next->next != queue) {
/* same prio task */
next = list_entry(queue->next->next, struct task_struct, run_list);
- return next;
+ if (pick_rt_task(rq, next, cpu))
+ goto out;
}
+ retry:
/* slower, but more flexible */
idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
- if (unlikely(idx >= MAX_RT_PRIO)) {
- WARN_ON(1); /* rt_nr_running was 2 and above! */
+ if (unlikely(idx >= MAX_RT_PRIO))
return NULL;
- }
queue = array->queue + idx;
- next = list_entry(queue->next, struct task_struct, run_list);
+ BUG_ON(list_empty(queue));
+
+ list_for_each_entry(next, queue, run_list) {
+ if (pick_rt_task(rq, next, cpu))
+ goto out;
+ }
+
+ goto retry;
+ out:
return next;
}
@@ -303,13 +322,15 @@ static int push_rt_task(struct rq *this_rq)
assert_spin_locked(&this_rq->lock);
- next_task = pick_next_highest_task_rt(this_rq);
+ next_task = pick_next_highest_task_rt(this_rq, -1);
if (!next_task)
return 0;
retry:
- if (unlikely(next_task == this_rq->curr))
+ if (unlikely(next_task == this_rq->curr)) {
+ WARN_ON(1);
return 0;
+ }
/*
* It's possible that the next_task slipped in of
@@ -333,7 +354,7 @@ static int push_rt_task(struct rq *this_rq)
* so it is possible that next_task has changed.
* If it has, then try again.
*/
- task = pick_next_highest_task_rt(this_rq);
+ task = pick_next_highest_task_rt(this_rq, -1);
if (unlikely(task != next_task) && task && paranoid--) {
put_task_struct(next_task);
next_task = task;
@@ -376,6 +397,149 @@ static void push_rt_tasks(struct rq *rq)
;
}
+static int pull_rt_task(struct rq *this_rq)
+{
+ struct task_struct *next;
+ struct task_struct *p;
+ struct rq *src_rq;
+ cpumask_t *rto_cpumask;
+ int this_cpu = this_rq->cpu;
+ int cpu;
+ int ret = 0;
+
+ assert_spin_locked(&this_rq->lock);
+
+ /*
+ * If cpusets are used, and we have overlapping
+ * run queue cpusets, then this algorithm may not catch all.
+ * This is just the price you pay on trying to keep
+ * dirtying caches down on large SMP machines.
+ */
+ if (likely(!rt_overloaded()))
+ return 0;
+
+ next = pick_next_task_rt(this_rq);
+
+ rto_cpumask = rt_overload();
+
+ for_each_cpu_mask(cpu, *rto_cpumask) {
+ if (this_cpu == cpu)
+ continue;
+
+ src_rq = cpu_rq(cpu);
+ if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
+ /*
+ * It is possible that overlapping cpusets
+ * will miss clearing a non overloaded runqueue.
+ * Clear it now.
+ */
+ if (double_lock_balance(this_rq, src_rq)) {
+ /* unlocked our runqueue lock */
+ struct task_struct *old_next = next;
+ next = pick_next_task_rt(this_rq);
+ if (next != old_next)
+ ret = 1;
+ }
+ if (likely(src_rq->rt.rt_nr_running <= 1))
+ /*
+ * Small chance that this_rq->curr changed
+ * but it's really harmless here.
+ */
+ rt_clear_overload(this_rq);
+ else
+ /*
+ * Heh, the src_rq is now overloaded, since
+ * we already have the src_rq lock, go straight
+ * to pulling tasks from it.
+ */
+ goto try_pulling;
+ spin_unlock(&src_rq->lock);
+ continue;
+ }
+
+ /*
+ * We can potentially drop this_rq's lock in
+ * double_lock_balance, and another CPU could
+ * steal our next task - hence we must cause
+ * the caller to recalculate the next task
+ * in that case:
+ */
+ if (double_lock_balance(this_rq, src_rq)) {
+ struct task_struct *old_next = next;
+ next = pick_next_task_rt(this_rq);
+ if (next != old_next)
+ ret = 1;
+ }
+
+ /*
+ * Are there still pullable RT tasks?
+ */
+ if (src_rq->rt.rt_nr_running <= 1) {
+ spin_unlock(&src_rq->lock);
+ continue;
+ }
+
+ try_pulling:
+ p = pick_next_highest_task_rt(src_rq, this_cpu);
+
+ /*
+ * Do we have an RT task that preempts
+ * the to-be-scheduled task?
+ */
+ if (p && (!next || (p->prio < next->prio))) {
+ WARN_ON(p == src_rq->curr);
+ WARN_ON(!p->se.on_rq);
+
+ /*
+ * There's a chance that p is higher in priority
+ * than what's currently running on its cpu.
+ * This is just that p is wakeing up and hasn't
+ * had a chance to schedule. We only pull
+ * p if it is lower in priority than the
+ * current task on the run queue or
+ * this_rq next task is lower in prio than
+ * the current task on that rq.
+ */
+ if (p->prio < src_rq->curr->prio ||
+ (next && next->prio < src_rq->curr->prio))
+ goto bail;
+
+ ret = 1;
+
+ deactivate_task(src_rq, p, 0);
+ set_task_cpu(p, this_cpu);
+ activate_task(this_rq, p, 0);
+ /*
+ * We continue with the search, just in
+ * case there's an even higher prio task
+ * in another runqueue. (low likelyhood
+ * but possible)
+ */
+
+ /*
+ * Update next so that we won't pick a task
+ * on another cpu with a priority lower (or equal)
+ * than the one we just picked.
+ */
+ next = p;
+
+ }
+ bail:
+ spin_unlock(&src_rq->lock);
+ }
+
+ return ret;
+}
+
+static void schedule_balance_rt(struct rq *rq,
+ struct task_struct *prev)
+{
+ /* Try to pull RT tasks here if we lower this rq's prio */
+ if (unlikely(rt_task(prev)) &&
+ rq->rt.highest_prio > prev->prio)
+ pull_rt_task(rq);
+}
+
static void schedule_tail_balance_rt(struct rq *rq)
{
/*
@@ -498,6 +662,7 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
}
#else /* CONFIG_SMP */
# define schedule_tail_balance_rt(rq) do { } while (0)
+# define schedule_balance_rt(rq, prev) do { } while (0)
#endif /* CONFIG_SMP */
static void task_tick_rt(struct rq *rq, struct task_struct *p)
next prev parent reply other threads:[~2007-12-04 21:09 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-12-04 20:44 [PATCH 00/23] RT balance v7 Gregory Haskins
2007-12-04 20:44 ` [PATCH 01/23] Subject: SCHED - Add rt_nr_running accounting Gregory Haskins
2007-12-04 20:44 ` [PATCH 02/23] Subject: SCHED - track highest prio queued on runqueue Gregory Haskins
2007-12-04 20:44 ` [PATCH 03/23] Subject: SCHED - push RT tasks Gregory Haskins
2007-12-04 20:44 ` [PATCH 04/23] Subject: SCHED - RT overloaded runqueues accounting Gregory Haskins
2007-12-04 20:44 ` Gregory Haskins [this message]
2007-12-04 20:44 ` [PATCH 06/23] Subject: SCHED - wake up balance RT Gregory Haskins
2007-12-04 20:45 ` [PATCH 07/23] Subject: SCHED - disable CFS RT load balancing Gregory Haskins
2007-12-04 20:45 ` [PATCH 08/23] Subject: SCHED - Cache cpus_allowed weight for optimizing migration Gregory Haskins
2007-12-04 20:45 ` [PATCH 09/23] Subject: SCHED - Consistency cleanup for this_rq usage Gregory Haskins
2007-12-04 20:45 ` [PATCH 10/23] Subject: SCHED - Remove some CFS specific code from the wakeup path of RT tasks Gregory Haskins
2007-12-04 20:45 ` [PATCH 11/23] Subject: SCHED - Break out the search function Gregory Haskins
2007-12-04 20:45 ` [PATCH 12/23] Subject: SCHED - Allow current_cpu to be included in search Gregory Haskins
2007-12-04 20:45 ` [PATCH 13/23] Subject: SCHED - Pre-route RT tasks on wakeup Gregory Haskins
2007-12-04 20:45 ` [PATCH 14/23] Subject: SCHED - Optimize our cpu selection based on topology Gregory Haskins
2007-12-04 20:45 ` [PATCH 15/23] Subject: SCHED - Optimize rebalancing Gregory Haskins
2007-12-04 20:45 ` [PATCH 16/23] Subject: SCHED - Avoid overload Gregory Haskins
2007-12-04 20:45 ` [PATCH 17/23] Subject: SCHED - restore the migratable conditional Gregory Haskins
2007-12-04 20:45 ` [PATCH 18/23] Subject: SCHED - Optimize cpu search with hamming weight Gregory Haskins
2007-12-04 20:46 ` [PATCH 19/23] Subject: SCHED - Optimize out cpu_clears Gregory Haskins
2007-12-04 20:46 ` [PATCH 20/23] Subject: SCHED - balance RT tasks no new wake up Gregory Haskins
2007-12-04 20:46 ` [PATCH 21/23] Subject: SCHED - Add sched-domain roots Gregory Haskins
2007-12-04 20:46 ` [PATCH 22/23] Subject: SCHED - Only balance our RT tasks within our root-domain Gregory Haskins
2007-12-04 20:46 ` [PATCH 23/23] Subject: SCHED - Use a 2-d bitmap for searching lowest-pri CPU Gregory Haskins
2007-12-04 21:27 ` [PATCH 00/23] RT balance v7 Ingo Molnar
2007-12-04 21:35 ` Gregory Haskins
2007-12-05 2:55 ` [PATCH 0/3] RT balance v7a Gregory Haskins
2007-12-05 2:55 ` [PATCH 1/3] Subject: SCHED - Add sched-domain roots Gregory Haskins
2007-12-05 2:55 ` [PATCH 2/3] Subject: SCHED - Only balance our RT tasks within our root-domain Gregory Haskins
2007-12-05 2:55 ` [PATCH 3/3] Subject: SCHED - Use a 2-d bitmap for searching lowest-pri CPU Gregory Haskins
2007-12-05 9:34 ` Ingo Molnar
2007-12-05 10:19 ` Gregory Haskins
2007-12-05 11:44 ` Ingo Molnar
2007-12-05 13:41 ` Gregory Haskins
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071204204451.3567.10693.stgit@novell1.haskins.net \
--to=ghaskins@novell.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rt-users@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).