[PATCH 0/2] [GIT PULL] sched: fixes for rt-migration-test failures

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/2] [GIT PULL] sched: fixes for rt-migration-test failures
@ 2009-07-29  4:21 Steven Rostedt
  2009-07-29  4:21 ` [PATCH 1/2] sched: check for pushing rt tasks after all scheduling Steven Rostedt
  2009-07-29  4:21 ` [PATCH 2/2] sched: add new prio to cpupri before removing old prio Steven Rostedt
  0 siblings, 2 replies; 10+ messages in thread
From: Steven Rostedt @ 2009-07-29  4:21 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, Peter Zijlstra,
	Gregory Haskins

Ingo,

We've been experiencing rt-migration-test failures

 http://rostedt.homelinux.com/code/rt-migrate-test.c

Gregory Haskins and myself have been debugging this for several days.
We found three bugs so far in the code that causes the failure
of this test. The failure is that an RT task may not migrate
properly and may wait behind a lower RT prio task for the migration.

These two patches solve two of the bugs. There's also a bug that
is triggered with RT_GROUP scheduling that we will look at later.

The test can still fail after a long time running and we are still
looking into other issues. But for now this solves two of those
issues.

Please pull the latest update tree, which can be found at:

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git
update

Steven Rostedt (2):
      sched: check for pushing rt tasks after all scheduling
      sched: add new prio to cpupri before removing old prio

----
 kernel/sched.c        |   38 +++++++++++++++++++++++++++-----------
 kernel/sched_cpupri.c |   30 ++++++++++++++++--------------
 2 files changed, 43 insertions(+), 25 deletions(-)
-- 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/2] sched: check for pushing rt tasks after all scheduling
  2009-07-29  4:21 [PATCH 0/2] [GIT PULL] sched: fixes for rt-migration-test failures Steven Rostedt
@ 2009-07-29  4:21 ` Steven Rostedt
  2009-07-29  8:41   ` Peter Zijlstra
  2009-08-02 13:12   ` [tip:sched/core] sched: Check for pushing rt tasks after all scheduling tip-bot for Steven Rostedt
  2009-07-29  4:21 ` [PATCH 2/2] sched: add new prio to cpupri before removing old prio Steven Rostedt
  1 sibling, 2 replies; 10+ messages in thread
From: Steven Rostedt @ 2009-07-29  4:21 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, Peter Zijlstra,
	Gregory Haskins, Steven Rostedt

[-- Attachment #1: 0001-sched-check-for-pushing-rt-tasks-after-all-schedulin.patch --]
[-- Type: text/plain, Size: 3886 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

The current method for pushing RT tasks after scheduling only
happens after a context switch. But we found cases where a task
is set up on a run queue to be pushed but the push never happens
because the schedule chooses the same task.

This bug was found with the help of Gregory Haskins and the use of
ftrace (trace_printk). It tooks several days for both of us analyzing
the code and the trace output to find this.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/sched.c |   38 +++++++++++++++++++++++++++-----------
 1 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 1b59e26..3134ea5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2791,14 +2791,14 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
  * with the lock held can cause deadlocks; see schedule() for
  * details.)
  */
-static void finish_task_switch(struct rq *rq, struct task_struct *prev)
+static int finish_task_switch(struct rq *rq, struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct mm_struct *mm = rq->prev_mm;
 	long prev_state;
-#ifdef CONFIG_SMP
 	int post_schedule = 0;
 
+#ifdef CONFIG_SMP
 	if (current->sched_class->needs_post_schedule)
 		post_schedule = current->sched_class->needs_post_schedule(rq);
 #endif
@@ -2820,10 +2820,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	finish_arch_switch(prev);
 	perf_counter_task_sched_in(current, cpu_of(rq));
 	finish_lock_switch(rq, prev);
-#ifdef CONFIG_SMP
-	if (post_schedule)
-		current->sched_class->post_schedule(rq);
-#endif
 
 	fire_sched_in_preempt_notifiers(current);
 	if (mm)
@@ -2836,6 +2832,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 		kprobe_flush_task(prev);
 		put_task_struct(prev);
 	}
+
+	return post_schedule;
 }
 
 /**
@@ -2846,8 +2844,15 @@ asmlinkage void schedule_tail(struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct rq *rq = this_rq();
+	int post_schedule;
+
+	post_schedule = finish_task_switch(rq, prev);
+
+#ifdef CONFIG_SMP
+	if (post_schedule)
+		current->sched_class->post_schedule(rq);
+#endif
 
-	finish_task_switch(rq, prev);
 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
 	/* In this case, finish_task_switch does not reenable preemption */
 	preempt_enable();
@@ -2860,7 +2865,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
  * context_switch - switch to the new MM and the new
  * thread's register state.
  */
-static inline void
+static inline int
 context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next)
 {
@@ -2907,7 +2912,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 * CPUs since it called schedule(), thus the 'rq' on its stack
 	 * frame will be invalid.
 	 */
-	finish_task_switch(this_rq(), prev);
+	return finish_task_switch(this_rq(), prev);
 }
 
 /*
@@ -5318,6 +5323,7 @@ asmlinkage void __sched schedule(void)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
+	int post_schedule = 0;
 	struct rq *rq;
 	int cpu;
 
@@ -5368,15 +5374,25 @@ need_resched_nonpreemptible:
 		rq->curr = next;
 		++*switch_count;
 
-		context_switch(rq, prev, next); /* unlocks the rq */
+		post_schedule = context_switch(rq, prev, next); /* unlocks the rq */
 		/*
 		 * the context switch might have flipped the stack from under
 		 * us, hence refresh the local variables.
 		 */
 		cpu = smp_processor_id();
 		rq = cpu_rq(cpu);
-	} else
+	} else {
+#ifdef CONFIG_SMP
+		if (current->sched_class->needs_post_schedule)
+			post_schedule = current->sched_class->needs_post_schedule(rq);
+#endif
 		spin_unlock_irq(&rq->lock);
+	}
+
+#ifdef CONFIG_SMP
+	if (post_schedule)
+		current->sched_class->post_schedule(rq);
+#endif
 
 	if (unlikely(reacquire_kernel_lock(current) < 0))
 		goto need_resched_nonpreemptible;
-- 
1.6.3.3

-- 

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] sched: add new prio to cpupri before removing old prio
  2009-07-29  4:21 [PATCH 0/2] [GIT PULL] sched: fixes for rt-migration-test failures Steven Rostedt
  2009-07-29  4:21 ` [PATCH 1/2] sched: check for pushing rt tasks after all scheduling Steven Rostedt
@ 2009-07-29  4:21 ` Steven Rostedt
  2009-08-02 13:13   ` [tip:sched/core] sched: Add " tip-bot for Steven Rostedt
  1 sibling, 1 reply; 10+ messages in thread
From: Steven Rostedt @ 2009-07-29  4:21 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, Peter Zijlstra,
	Gregory Haskins, Steven Rostedt

[-- Attachment #1: 0002-sched-add-new-prio-to-cpupri-before-removing-old-pri.patch --]
[-- Type: text/plain, Size: 2087 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

We need to add the new prio to the cpupri accounting before removing the
old prio. This is because removing the old prio first will open a
race window where the cpu will be removed from pri_active. In this
case the cpu will not be visible for RT push and pulls. This could cause
a RT task to not migrate appropriately, and create a very large latency.

This bug was found with the use of ftrace sched events and trace_printk.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 kernel/sched_cpupri.c |   30 ++++++++++++++++--------------
 1 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index e6c2517..aef68b6 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -114,21 +114,11 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
 	/*
 	 * If the cpu was currently mapped to a different value, we
-	 * first need to unmap the old value
+	 * need to map it to the new value then remove the old value.
+	 * Note, we must add the new value first, otherwise we risk the
+	 * cpu being cleared from pri_active, and this cpu could be
+	 * missed for a push or pull.
 	 */
-	if (likely(oldpri != CPUPRI_INVALID)) {
-		struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];
-
-		spin_lock_irqsave(&vec->lock, flags);
-
-		vec->count--;
-		if (!vec->count)
-			clear_bit(oldpri, cp->pri_active);
-		cpumask_clear_cpu(cpu, vec->mask);
-
-		spin_unlock_irqrestore(&vec->lock, flags);
-	}
-
 	if (likely(newpri != CPUPRI_INVALID)) {
 		struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
 
@@ -141,6 +131,18 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
 		spin_unlock_irqrestore(&vec->lock, flags);
 	}
+	if (likely(oldpri != CPUPRI_INVALID)) {
+		struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];
+
+		spin_lock_irqsave(&vec->lock, flags);
+
+		vec->count--;
+		if (!vec->count)
+			clear_bit(oldpri, cp->pri_active);
+		cpumask_clear_cpu(cpu, vec->mask);
+
+		spin_unlock_irqrestore(&vec->lock, flags);
+	}
 
 	*currpri = newpri;
 }
-- 
1.6.3.3

-- 

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] sched: check for pushing rt tasks after all scheduling
  2009-07-29  4:21 ` [PATCH 1/2] sched: check for pushing rt tasks after all scheduling Steven Rostedt
@ 2009-07-29  8:41   ` Peter Zijlstra
  2009-07-29 13:14     ` Gregory Haskins
  2009-07-29 15:08     ` [PATCH] sched: enhance the pre/post scheduling logic Gregory Haskins
  2009-08-02 13:12   ` [tip:sched/core] sched: Check for pushing rt tasks after all scheduling tip-bot for Steven Rostedt
  1 sibling, 2 replies; 10+ messages in thread
From: Peter Zijlstra @ 2009-07-29  8:41 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, Ingo Molnar, Andrew Morton, Thomas Gleixner,
	Gregory Haskins, Steven Rostedt

On Wed, 2009-07-29 at 00:21 -0400, Steven Rostedt wrote:
> plain text document attachment
> (0001-sched-check-for-pushing-rt-tasks-after-all-schedulin.patch)
> From: Steven Rostedt <srostedt@redhat.com>
> 
> The current method for pushing RT tasks after scheduling only
> happens after a context switch. But we found cases where a task
> is set up on a run queue to be pushed but the push never happens
> because the schedule chooses the same task.
> 
> This bug was found with the help of Gregory Haskins and the use of
> ftrace (trace_printk). It tooks several days for both of us analyzing
> the code and the trace output to find this.


> +               if (current->sched_class->needs_post_schedule)
> +                       post_schedule = current->sched_class->needs_post_schedule(rq);


> +       if (post_schedule)
> +               current->sched_class->post_schedule(rq);


Why can't we omit that first call, and do the second unconditionally,
using storage in the class rq to save state?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] sched: check for pushing rt tasks after all scheduling
  2009-07-29  8:41   ` Peter Zijlstra
@ 2009-07-29 13:14     ` Gregory Haskins
  2009-07-29 15:08     ` [PATCH] sched: enhance the pre/post scheduling logic Gregory Haskins
  1 sibling, 0 replies; 10+ messages in thread
From: Gregory Haskins @ 2009-07-29 13:14 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Steven Rostedt, linux-kernel, Ingo Molnar, Andrew Morton,
	Thomas Gleixner, Steven Rostedt

[-- Attachment #1: Type: text/plain, Size: 1249 bytes --]

Peter Zijlstra wrote:
> On Wed, 2009-07-29 at 00:21 -0400, Steven Rostedt wrote:
>> plain text document attachment
>> (0001-sched-check-for-pushing-rt-tasks-after-all-schedulin.patch)
>> From: Steven Rostedt <srostedt@redhat.com>
>>
>> The current method for pushing RT tasks after scheduling only
>> happens after a context switch. But we found cases where a task
>> is set up on a run queue to be pushed but the push never happens
>> because the schedule chooses the same task.
>>
>> This bug was found with the help of Gregory Haskins and the use of
>> ftrace (trace_printk). It tooks several days for both of us analyzing
>> the code and the trace output to find this.
> 
> 
>> +               if (current->sched_class->needs_post_schedule)
>> +                       post_schedule = current->sched_class->needs_post_schedule(rq);
> 
> 
>> +       if (post_schedule)
>> +               current->sched_class->post_schedule(rq);
> 
> 
> Why can't we omit that first call, and do the second unconditionally,
> using storage in the class rq to save state?

Yeah, that is a good idea.  Plus I see another bug that Steven and I
overlooked.  Steve is out on holiday today, so I will put together a v2.

Regards,
-Greg


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 267 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] sched: enhance the pre/post scheduling logic
  2009-07-29  8:41   ` Peter Zijlstra
  2009-07-29 13:14     ` Gregory Haskins
@ 2009-07-29 15:08     ` Gregory Haskins
  2009-07-30  7:36       ` Peter Zijlstra
  2009-08-02 13:13       ` [tip:sched/core] sched: Enhance " tip-bot for Gregory Haskins
  1 sibling, 2 replies; 10+ messages in thread
From: Gregory Haskins @ 2009-07-29 15:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: rostedt, mingo, peterz, akpm, tglx

(Applies to Steve's linux-2.6-trace.git/update:c8cab1dd)

This patch applies on top of Steven's recent submission to address the
feedback from Peter, as well as fix a different race that was not
previously noticed.

It should be noted that all three of these patches should be considered
for any kernel that includes our push/pull scheduler design, which I believe
includes 24-rt and newer, as well as roughly 2.6.27 mainline.

Kind Regards,
-Greg

---------------------------

From: Gregory Haskins <ghaskins@novell.com>
sched: enhance the pre/post scheduling logic

We currently have an explicit "needs_post" vtable method which returns
a stack variable for whether we should later run post-schedule.  This
leads to an awkward exchange of the variable as it bubbles back up
out of the context switch.  Peter Zijlstra observed that this information
could be stored in the run-queue itself instead of handled on the stack.

Therefore, we revert to the method of having context_switch return void,
and update an internal rq->post_schedule variable when we require further
processing.

In addition, we fix a race condition where we try to access
current->sched_class without holding the rq->lock.  This is technically
racy, as the sched-class could change out from under us.  Instead,
we reference the per-rq post_schedule variable with the runqueue unlocked,
but with preemption disabled to see if we need to reacquire the rq->lock.

Finally, we clean the code up slightly by removing the #ifdef CONFIG_SMP
conditionals from the schedule() call, and implement some inline helper
functions instead.

This patch passes checkpatch, and rt-migrate

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 include/linux/sched.h |    1 -
 kernel/sched.c        |   82 ++++++++++++++++++++++++++++++-------------------
 kernel/sched_rt.c     |   31 +++++++------------
 3 files changed, 61 insertions(+), 53 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3ab08e4..df14cae 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1045,7 +1045,6 @@ struct sched_class {
 			      struct rq *busiest, struct sched_domain *sd,
 			      enum cpu_idle_type idle);
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
-	int (*needs_post_schedule) (struct rq *this_rq);
 	void (*post_schedule) (struct rq *this_rq);
 	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 3134ea5..1a104e6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -616,6 +616,7 @@ struct rq {
 
 	unsigned char idle_at_tick;
 	/* For active balancing */
+	int post_schedule;
 	int active_balance;
 	int push_cpu;
 	/* cpu of this runqueue: */
@@ -2791,17 +2792,11 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
  * with the lock held can cause deadlocks; see schedule() for
  * details.)
  */
-static int finish_task_switch(struct rq *rq, struct task_struct *prev)
+static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct mm_struct *mm = rq->prev_mm;
 	long prev_state;
-	int post_schedule = 0;
-
-#ifdef CONFIG_SMP
-	if (current->sched_class->needs_post_schedule)
-		post_schedule = current->sched_class->needs_post_schedule(rq);
-#endif
 
 	rq->prev_mm = NULL;
 
@@ -2832,10 +2827,44 @@ static int finish_task_switch(struct rq *rq, struct task_struct *prev)
 		kprobe_flush_task(prev);
 		put_task_struct(prev);
 	}
+}
+
+#ifdef CONFIG_SMP
+
+/* assumes rq->lock is held */
+static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
+{
+	if (prev->sched_class->pre_schedule)
+		prev->sched_class->pre_schedule(rq, prev);
+}
+
+/* rq->lock is NOT held, but preemption is disabled */
+static inline void post_schedule(struct rq *rq)
+{
+	if (rq->post_schedule) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&rq->lock, flags);
+		if (rq->curr->sched_class->post_schedule)
+			rq->curr->sched_class->post_schedule(rq);
+		spin_unlock_irqrestore(&rq->lock, flags);
+
+		rq->post_schedule = 0;
+	}
+}
+
+#else
 
-	return post_schedule;
+static inline void pre_schedule(struct rq *rq, struct task_struct *p)
+{
+}
+
+static inline void post_schedule(struct rq *rq)
+{
 }
 
+#endif
+
 /**
  * schedule_tail - first thing a freshly forked thread must call.
  * @prev: the thread we just switched away from.
@@ -2844,14 +2873,14 @@ asmlinkage void schedule_tail(struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct rq *rq = this_rq();
-	int post_schedule;
 
-	post_schedule = finish_task_switch(rq, prev);
+	finish_task_switch(rq, prev);
 
-#ifdef CONFIG_SMP
-	if (post_schedule)
-		current->sched_class->post_schedule(rq);
-#endif
+	/*
+	 * FIXME: do we need to worry about rq being invalidated by the
+	 * task_switch?
+	 */
+	post_schedule(rq);
 
 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
 	/* In this case, finish_task_switch does not reenable preemption */
@@ -2865,7 +2894,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
  * context_switch - switch to the new MM and the new
  * thread's register state.
  */
-static inline int
+static inline void
 context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next)
 {
@@ -2912,7 +2941,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 * CPUs since it called schedule(), thus the 'rq' on its stack
 	 * frame will be invalid.
 	 */
-	return finish_task_switch(this_rq(), prev);
+	finish_task_switch(this_rq(), prev);
 }
 
 /*
@@ -5323,7 +5352,6 @@ asmlinkage void __sched schedule(void)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
-	int post_schedule = 0;
 	struct rq *rq;
 	int cpu;
 
@@ -5355,10 +5383,7 @@ need_resched_nonpreemptible:
 		switch_count = &prev->nvcsw;
 	}
 
-#ifdef CONFIG_SMP
-	if (prev->sched_class->pre_schedule)
-		prev->sched_class->pre_schedule(rq, prev);
-#endif
+	pre_schedule(rq, prev);
 
 	if (unlikely(!rq->nr_running))
 		idle_balance(cpu, rq);
@@ -5374,25 +5399,17 @@ need_resched_nonpreemptible:
 		rq->curr = next;
 		++*switch_count;
 
-		post_schedule = context_switch(rq, prev, next); /* unlocks the rq */
+		context_switch(rq, prev, next); /* unlocks the rq */
 		/*
 		 * the context switch might have flipped the stack from under
 		 * us, hence refresh the local variables.
 		 */
 		cpu = smp_processor_id();
 		rq = cpu_rq(cpu);
-	} else {
-#ifdef CONFIG_SMP
-		if (current->sched_class->needs_post_schedule)
-			post_schedule = current->sched_class->needs_post_schedule(rq);
-#endif
+	} else
 		spin_unlock_irq(&rq->lock);
-	}
 
-#ifdef CONFIG_SMP
-	if (post_schedule)
-		current->sched_class->post_schedule(rq);
-#endif
+	post_schedule(rq);
 
 	if (unlikely(reacquire_kernel_lock(current) < 0))
 		goto need_resched_nonpreemptible;
@@ -9350,6 +9367,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
+		rq->post_schedule = 0;
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;
 		rq->push_cpu = 0;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 3918e01..a8f89bc 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1056,6 +1056,11 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
 	return p;
 }
 
+static inline int has_pushable_tasks(struct rq *rq)
+{
+	return !plist_head_empty(&rq->rt.pushable_tasks);
+}
+
 static struct task_struct *pick_next_task_rt(struct rq *rq)
 {
 	struct task_struct *p = _pick_next_task_rt(rq);
@@ -1064,6 +1069,12 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 	if (p)
 		dequeue_pushable_task(rq, p);
 
+	/*
+	 * We detect this state here so that we can avoid taking the RQ
+	 * lock again later if there is no need to push
+	 */
+	rq->post_schedule = has_pushable_tasks(rq);
+
 	return p;
 }
 
@@ -1262,11 +1273,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 	return lowest_rq;
 }
 
-static inline int has_pushable_tasks(struct rq *rq)
-{
-	return !plist_head_empty(&rq->rt.pushable_tasks);
-}
-
 static struct task_struct *pick_next_pushable_task(struct rq *rq)
 {
 	struct task_struct *p;
@@ -1466,23 +1472,9 @@ static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
 		pull_rt_task(rq);
 }
 
-/*
- * assumes rq->lock is held
- */
-static int needs_post_schedule_rt(struct rq *rq)
-{
-	return has_pushable_tasks(rq);
-}
-
 static void post_schedule_rt(struct rq *rq)
 {
-	/*
-	 * This is only called if needs_post_schedule_rt() indicates that
-	 * we need to push tasks away
-	 */
-	spin_lock_irq(&rq->lock);
 	push_rt_tasks(rq);
-	spin_unlock_irq(&rq->lock);
 }
 
 /*
@@ -1758,7 +1750,6 @@ static const struct sched_class rt_sched_class = {
 	.rq_online              = rq_online_rt,
 	.rq_offline             = rq_offline_rt,
 	.pre_schedule		= pre_schedule_rt,
-	.needs_post_schedule	= needs_post_schedule_rt,
 	.post_schedule		= post_schedule_rt,
 	.task_wake_up		= task_wake_up_rt,
 	.switched_from		= switched_from_rt,


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched: enhance the pre/post scheduling logic
  2009-07-29 15:08     ` [PATCH] sched: enhance the pre/post scheduling logic Gregory Haskins
@ 2009-07-30  7:36       ` Peter Zijlstra
  2009-08-02 13:13       ` [tip:sched/core] sched: Enhance " tip-bot for Gregory Haskins
  1 sibling, 0 replies; 10+ messages in thread
From: Peter Zijlstra @ 2009-07-30  7:36 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: linux-kernel, rostedt, mingo, akpm, tglx

On Wed, 2009-07-29 at 11:08 -0400, Gregory Haskins wrote:

> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 3ab08e4..df14cae 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1045,7 +1045,6 @@ struct sched_class {
>  			      struct rq *busiest, struct sched_domain *sd,
>  			      enum cpu_idle_type idle);
>  	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
> -	int (*needs_post_schedule) (struct rq *this_rq);
>  	void (*post_schedule) (struct rq *this_rq);
>  	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);

awesome, one method less ;-)

> +#ifdef CONFIG_SMP
> +
> +/* assumes rq->lock is held */
> +static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
> +{
> +	if (prev->sched_class->pre_schedule)
> +		prev->sched_class->pre_schedule(rq, prev);
> +}
> +
> +/* rq->lock is NOT held, but preemption is disabled */
> +static inline void post_schedule(struct rq *rq)
> +{
> +	if (rq->post_schedule) {
> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&rq->lock, flags);
> +		if (rq->curr->sched_class->post_schedule)
> +			rq->curr->sched_class->post_schedule(rq);
> +		spin_unlock_irqrestore(&rq->lock, flags);
> +
> +		rq->post_schedule = 0;
> +	}
> +}
> +
> +#else
>  
> -	return post_schedule;
> +static inline void pre_schedule(struct rq *rq, struct task_struct *p)
> +{
> +}
> +
> +static inline void post_schedule(struct rq *rq)
> +{
>  }
>  
> +#endif

Wouldn't you sleep much safer at night if both versions were to check
those assumptions under SCHED_DEBUG? :-)

> @@ -2844,14 +2873,14 @@ asmlinkage void schedule_tail(struct task_struct *prev)
>  	__releases(rq->lock)
>  {
>  	struct rq *rq = this_rq();
> -	int post_schedule;
>  
> -	post_schedule = finish_task_switch(rq, prev);
> +	finish_task_switch(rq, prev);
>  
> -#ifdef CONFIG_SMP
> -	if (post_schedule)
> -		current->sched_class->post_schedule(rq);
> -#endif
> +	/*
> +	 * FIXME: do we need to worry about rq being invalidated by the
> +	 * task_switch?
> +	 */
> +	post_schedule(rq);
>  
>  #ifdef __ARCH_WANT_UNLOCKED_CTXSW
>  	/* In this case, finish_task_switch does not reenable preemption */

You know I really can't take patches with FIXME's in ;-)

I think only switch_to() messes with your stacks, finish_task_switch()
should be safe, but double check me.


OK, so I stuck the patch in anyway.. 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [tip:sched/core] sched: Check for pushing rt tasks after all scheduling
  2009-07-29  4:21 ` [PATCH 1/2] sched: check for pushing rt tasks after all scheduling Steven Rostedt
  2009-07-29  8:41   ` Peter Zijlstra
@ 2009-08-02 13:12   ` tip-bot for Steven Rostedt
  1 sibling, 0 replies; 10+ messages in thread
From: tip-bot for Steven Rostedt @ 2009-08-02 13:12 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, a.p.zijlstra, srostedt, tglx, mingo

Commit-ID:  da19ab510343c6496fe8b8f890091296032025c9
Gitweb:     http://git.kernel.org/tip/da19ab510343c6496fe8b8f890091296032025c9
Author:     Steven Rostedt <srostedt@redhat.com>
AuthorDate: Wed, 29 Jul 2009 00:21:22 -0400
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Sun, 2 Aug 2009 14:26:08 +0200

sched: Check for pushing rt tasks after all scheduling

The current method for pushing RT tasks after scheduling only
happens after a context switch. But we found cases where a task
is set up on a run queue to be pushed but the push never
happens because the schedule chooses the same task.

This bug was found with the help of Gregory Haskins and the use
of ftrace (trace_printk). It tooks several days for both of us
analyzing the code and the trace output to find this.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090729042526.205923666@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 kernel/sched.c |   38 +++++++++++++++++++++++++++-----------
 1 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index ca1f76b..a030d45 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2839,14 +2839,14 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
  * with the lock held can cause deadlocks; see schedule() for
  * details.)
  */
-static void finish_task_switch(struct rq *rq, struct task_struct *prev)
+static int finish_task_switch(struct rq *rq, struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct mm_struct *mm = rq->prev_mm;
 	long prev_state;
-#ifdef CONFIG_SMP
 	int post_schedule = 0;
 
+#ifdef CONFIG_SMP
 	if (current->sched_class->needs_post_schedule)
 		post_schedule = current->sched_class->needs_post_schedule(rq);
 #endif
@@ -2868,10 +2868,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	finish_arch_switch(prev);
 	perf_counter_task_sched_in(current, cpu_of(rq));
 	finish_lock_switch(rq, prev);
-#ifdef CONFIG_SMP
-	if (post_schedule)
-		current->sched_class->post_schedule(rq);
-#endif
 
 	fire_sched_in_preempt_notifiers(current);
 	if (mm)
@@ -2884,6 +2880,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 		kprobe_flush_task(prev);
 		put_task_struct(prev);
 	}
+
+	return post_schedule;
 }
 
 /**
@@ -2894,8 +2892,15 @@ asmlinkage void schedule_tail(struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct rq *rq = this_rq();
+	int post_schedule;
+
+	post_schedule = finish_task_switch(rq, prev);
+
+#ifdef CONFIG_SMP
+	if (post_schedule)
+		current->sched_class->post_schedule(rq);
+#endif
 
-	finish_task_switch(rq, prev);
 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
 	/* In this case, finish_task_switch does not reenable preemption */
 	preempt_enable();
@@ -2908,7 +2913,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
  * context_switch - switch to the new MM and the new
  * thread's register state.
  */
-static inline void
+static inline int
 context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next)
 {
@@ -2955,7 +2960,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 * CPUs since it called schedule(), thus the 'rq' on its stack
 	 * frame will be invalid.
 	 */
-	finish_task_switch(this_rq(), prev);
+	return finish_task_switch(this_rq(), prev);
 }
 
 /*
@@ -5366,6 +5371,7 @@ asmlinkage void __sched schedule(void)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
+	int post_schedule = 0;
 	struct rq *rq;
 	int cpu;
 
@@ -5416,15 +5422,25 @@ need_resched_nonpreemptible:
 		rq->curr = next;
 		++*switch_count;
 
-		context_switch(rq, prev, next); /* unlocks the rq */
+		post_schedule = context_switch(rq, prev, next); /* unlocks the rq */
 		/*
 		 * the context switch might have flipped the stack from under
 		 * us, hence refresh the local variables.
 		 */
 		cpu = smp_processor_id();
 		rq = cpu_rq(cpu);
-	} else
+	} else {
+#ifdef CONFIG_SMP
+		if (current->sched_class->needs_post_schedule)
+			post_schedule = current->sched_class->needs_post_schedule(rq);
+#endif
 		spin_unlock_irq(&rq->lock);
+	}
+
+#ifdef CONFIG_SMP
+	if (post_schedule)
+		current->sched_class->post_schedule(rq);
+#endif
 
 	if (unlikely(reacquire_kernel_lock(current) < 0))
 		goto need_resched_nonpreemptible;

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [tip:sched/core] sched: Add new prio to cpupri before removing old prio
  2009-07-29  4:21 ` [PATCH 2/2] sched: add new prio to cpupri before removing old prio Steven Rostedt
@ 2009-08-02 13:13   ` tip-bot for Steven Rostedt
  0 siblings, 0 replies; 10+ messages in thread
From: tip-bot for Steven Rostedt @ 2009-08-02 13:13 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, a.p.zijlstra, srostedt, tglx, mingo

Commit-ID:  c3a2ae3d93c0f10d29c071f599764d00b8de00cb
Gitweb:     http://git.kernel.org/tip/c3a2ae3d93c0f10d29c071f599764d00b8de00cb
Author:     Steven Rostedt <srostedt@redhat.com>
AuthorDate: Wed, 29 Jul 2009 00:21:23 -0400
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Sun, 2 Aug 2009 14:26:09 +0200

sched: Add new prio to cpupri before removing old prio

We need to add the new prio to the cpupri accounting before
removing the old prio. This is because removing the old prio
first will open a race window where the cpu will be removed
from pri_active. In this case the cpu will not be visible for
RT push and pulls. This could cause a RT task to not migrate
appropriately, and create a very large latency.

This bug was found with the use of ftrace sched events and
trace_printk.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090729042526.438281019@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 kernel/sched_cpupri.c |   30 ++++++++++++++++--------------
 1 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index d014efb..0f052fc 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -127,21 +127,11 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
 	/*
 	 * If the cpu was currently mapped to a different value, we
-	 * first need to unmap the old value
+	 * need to map it to the new value then remove the old value.
+	 * Note, we must add the new value first, otherwise we risk the
+	 * cpu being cleared from pri_active, and this cpu could be
+	 * missed for a push or pull.
 	 */
-	if (likely(oldpri != CPUPRI_INVALID)) {
-		struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];
-
-		spin_lock_irqsave(&vec->lock, flags);
-
-		vec->count--;
-		if (!vec->count)
-			clear_bit(oldpri, cp->pri_active);
-		cpumask_clear_cpu(cpu, vec->mask);
-
-		spin_unlock_irqrestore(&vec->lock, flags);
-	}
-
 	if (likely(newpri != CPUPRI_INVALID)) {
 		struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
 
@@ -154,6 +144,18 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
 		spin_unlock_irqrestore(&vec->lock, flags);
 	}
+	if (likely(oldpri != CPUPRI_INVALID)) {
+		struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];
+
+		spin_lock_irqsave(&vec->lock, flags);
+
+		vec->count--;
+		if (!vec->count)
+			clear_bit(oldpri, cp->pri_active);
+		cpumask_clear_cpu(cpu, vec->mask);
+
+		spin_unlock_irqrestore(&vec->lock, flags);
+	}
 
 	*currpri = newpri;
 }

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [tip:sched/core] sched: Enhance the pre/post scheduling logic
  2009-07-29 15:08     ` [PATCH] sched: enhance the pre/post scheduling logic Gregory Haskins
  2009-07-30  7:36       ` Peter Zijlstra
@ 2009-08-02 13:13       ` tip-bot for Gregory Haskins
  1 sibling, 0 replies; 10+ messages in thread
From: tip-bot for Gregory Haskins @ 2009-08-02 13:13 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, a.p.zijlstra, ghaskins, tglx, mingo

Commit-ID:  3f029d3c6d62068d59301d90c18dbde8ee402107
Gitweb:     http://git.kernel.org/tip/3f029d3c6d62068d59301d90c18dbde8ee402107
Author:     Gregory Haskins <ghaskins@novell.com>
AuthorDate: Wed, 29 Jul 2009 11:08:47 -0400
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Sun, 2 Aug 2009 14:26:10 +0200

sched: Enhance the pre/post scheduling logic

We currently have an explicit "needs_post" vtable method which
returns a stack variable for whether we should later run
post-schedule.  This leads to an awkward exchange of the
variable as it bubbles back up out of the context switch. Peter
Zijlstra observed that this information could be stored in the
run-queue itself instead of handled on the stack.

Therefore, we revert to the method of having context_switch
return void, and update an internal rq->post_schedule variable
when we require further processing.

In addition, we fix a race condition where we try to access
current->sched_class without holding the rq->lock.  This is
technically racy, as the sched-class could change out from
under us.  Instead, we reference the per-rq post_schedule
variable with the runqueue unlocked, but with preemption
disabled to see if we need to reacquire the rq->lock.

Finally, we clean the code up slightly by removing the #ifdef
CONFIG_SMP conditionals from the schedule() call, and implement
some inline helper functions instead.

This patch passes checkpatch, and rt-migrate.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090729150422.17691.55590.stgit@dev.haskins.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 include/linux/sched.h |    1 -
 kernel/sched.c        |   82 ++++++++++++++++++++++++++++++-------------------
 kernel/sched_rt.c     |   31 ++++++------------
 3 files changed, 61 insertions(+), 53 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c35bc2..195d72d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1047,7 +1047,6 @@ struct sched_class {
 			      struct rq *busiest, struct sched_domain *sd,
 			      enum cpu_idle_type idle);
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
-	int (*needs_post_schedule) (struct rq *this_rq);
 	void (*post_schedule) (struct rq *this_rq);
 	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index a030d45..613fee5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -616,6 +616,7 @@ struct rq {
 
 	unsigned char idle_at_tick;
 	/* For active balancing */
+	int post_schedule;
 	int active_balance;
 	int push_cpu;
 	/* cpu of this runqueue: */
@@ -2839,17 +2840,11 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
  * with the lock held can cause deadlocks; see schedule() for
  * details.)
  */
-static int finish_task_switch(struct rq *rq, struct task_struct *prev)
+static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct mm_struct *mm = rq->prev_mm;
 	long prev_state;
-	int post_schedule = 0;
-
-#ifdef CONFIG_SMP
-	if (current->sched_class->needs_post_schedule)
-		post_schedule = current->sched_class->needs_post_schedule(rq);
-#endif
 
 	rq->prev_mm = NULL;
 
@@ -2880,10 +2875,44 @@ static int finish_task_switch(struct rq *rq, struct task_struct *prev)
 		kprobe_flush_task(prev);
 		put_task_struct(prev);
 	}
+}
+
+#ifdef CONFIG_SMP
+
+/* assumes rq->lock is held */
+static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
+{
+	if (prev->sched_class->pre_schedule)
+		prev->sched_class->pre_schedule(rq, prev);
+}
+
+/* rq->lock is NOT held, but preemption is disabled */
+static inline void post_schedule(struct rq *rq)
+{
+	if (rq->post_schedule) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&rq->lock, flags);
+		if (rq->curr->sched_class->post_schedule)
+			rq->curr->sched_class->post_schedule(rq);
+		spin_unlock_irqrestore(&rq->lock, flags);
+
+		rq->post_schedule = 0;
+	}
+}
+
+#else
 
-	return post_schedule;
+static inline void pre_schedule(struct rq *rq, struct task_struct *p)
+{
+}
+
+static inline void post_schedule(struct rq *rq)
+{
 }
 
+#endif
+
 /**
  * schedule_tail - first thing a freshly forked thread must call.
  * @prev: the thread we just switched away from.
@@ -2892,14 +2921,14 @@ asmlinkage void schedule_tail(struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct rq *rq = this_rq();
-	int post_schedule;
 
-	post_schedule = finish_task_switch(rq, prev);
+	finish_task_switch(rq, prev);
 
-#ifdef CONFIG_SMP
-	if (post_schedule)
-		current->sched_class->post_schedule(rq);
-#endif
+	/*
+	 * FIXME: do we need to worry about rq being invalidated by the
+	 * task_switch?
+	 */
+	post_schedule(rq);
 
 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
 	/* In this case, finish_task_switch does not reenable preemption */
@@ -2913,7 +2942,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
  * context_switch - switch to the new MM and the new
  * thread's register state.
  */
-static inline int
+static inline void
 context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next)
 {
@@ -2960,7 +2989,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 * CPUs since it called schedule(), thus the 'rq' on its stack
 	 * frame will be invalid.
 	 */
-	return finish_task_switch(this_rq(), prev);
+	finish_task_switch(this_rq(), prev);
 }
 
 /*
@@ -5371,7 +5400,6 @@ asmlinkage void __sched schedule(void)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
-	int post_schedule = 0;
 	struct rq *rq;
 	int cpu;
 
@@ -5403,10 +5431,7 @@ need_resched_nonpreemptible:
 		switch_count = &prev->nvcsw;
 	}
 
-#ifdef CONFIG_SMP
-	if (prev->sched_class->pre_schedule)
-		prev->sched_class->pre_schedule(rq, prev);
-#endif
+	pre_schedule(rq, prev);
 
 	if (unlikely(!rq->nr_running))
 		idle_balance(cpu, rq);
@@ -5422,25 +5447,17 @@ need_resched_nonpreemptible:
 		rq->curr = next;
 		++*switch_count;
 
-		post_schedule = context_switch(rq, prev, next); /* unlocks the rq */
+		context_switch(rq, prev, next); /* unlocks the rq */
 		/*
 		 * the context switch might have flipped the stack from under
 		 * us, hence refresh the local variables.
 		 */
 		cpu = smp_processor_id();
 		rq = cpu_rq(cpu);
-	} else {
-#ifdef CONFIG_SMP
-		if (current->sched_class->needs_post_schedule)
-			post_schedule = current->sched_class->needs_post_schedule(rq);
-#endif
+	} else
 		spin_unlock_irq(&rq->lock);
-	}
 
-#ifdef CONFIG_SMP
-	if (post_schedule)
-		current->sched_class->post_schedule(rq);
-#endif
+	post_schedule(rq);
 
 	if (unlikely(reacquire_kernel_lock(current) < 0))
 		goto need_resched_nonpreemptible;
@@ -9403,6 +9420,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
+		rq->post_schedule = 0;
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;
 		rq->push_cpu = 0;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 3918e01..a8f89bc 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1056,6 +1056,11 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
 	return p;
 }
 
+static inline int has_pushable_tasks(struct rq *rq)
+{
+	return !plist_head_empty(&rq->rt.pushable_tasks);
+}
+
 static struct task_struct *pick_next_task_rt(struct rq *rq)
 {
 	struct task_struct *p = _pick_next_task_rt(rq);
@@ -1064,6 +1069,12 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 	if (p)
 		dequeue_pushable_task(rq, p);
 
+	/*
+	 * We detect this state here so that we can avoid taking the RQ
+	 * lock again later if there is no need to push
+	 */
+	rq->post_schedule = has_pushable_tasks(rq);
+
 	return p;
 }
 
@@ -1262,11 +1273,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 	return lowest_rq;
 }
 
-static inline int has_pushable_tasks(struct rq *rq)
-{
-	return !plist_head_empty(&rq->rt.pushable_tasks);
-}
-
 static struct task_struct *pick_next_pushable_task(struct rq *rq)
 {
 	struct task_struct *p;
@@ -1466,23 +1472,9 @@ static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
 		pull_rt_task(rq);
 }
 
-/*
- * assumes rq->lock is held
- */
-static int needs_post_schedule_rt(struct rq *rq)
-{
-	return has_pushable_tasks(rq);
-}
-
 static void post_schedule_rt(struct rq *rq)
 {
-	/*
-	 * This is only called if needs_post_schedule_rt() indicates that
-	 * we need to push tasks away
-	 */
-	spin_lock_irq(&rq->lock);
 	push_rt_tasks(rq);
-	spin_unlock_irq(&rq->lock);
 }
 
 /*
@@ -1758,7 +1750,6 @@ static const struct sched_class rt_sched_class = {
 	.rq_online              = rq_online_rt,
 	.rq_offline             = rq_offline_rt,
 	.pre_schedule		= pre_schedule_rt,
-	.needs_post_schedule	= needs_post_schedule_rt,
 	.post_schedule		= post_schedule_rt,
 	.task_wake_up		= task_wake_up_rt,
 	.switched_from		= switched_from_rt,

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2009-08-02 13:13 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-07-29  4:21 [PATCH 0/2] [GIT PULL] sched: fixes for rt-migration-test failures Steven Rostedt
2009-07-29  4:21 ` [PATCH 1/2] sched: check for pushing rt tasks after all scheduling Steven Rostedt
2009-07-29  8:41   ` Peter Zijlstra
2009-07-29 13:14     ` Gregory Haskins
2009-07-29 15:08     ` [PATCH] sched: enhance the pre/post scheduling logic Gregory Haskins
2009-07-30  7:36       ` Peter Zijlstra
2009-08-02 13:13       ` [tip:sched/core] sched: Enhance " tip-bot for Gregory Haskins
2009-08-02 13:12   ` [tip:sched/core] sched: Check for pushing rt tasks after all scheduling tip-bot for Steven Rostedt
2009-07-29  4:21 ` [PATCH 2/2] sched: add new prio to cpupri before removing old prio Steven Rostedt
2009-08-02 13:13   ` [tip:sched/core] sched: Add " tip-bot for Steven Rostedt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.