linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Subject: [PATCH] sched: fixed erroneous all_pinned logic.
@ 2011-04-08  0:24 Ken Chen
  2011-04-08 10:57 ` Vladimir Davydov
  2011-04-08 11:39 ` Peter Zijlstra
  0 siblings, 2 replies; 8+ messages in thread
From: Ken Chen @ 2011-04-08  0:24 UTC (permalink / raw)
  To: a.p.zijlstra, mingo; +Cc: linux-kernel

The scheduler load balancer has specific code to deal with cases of
unbalanced system due to lots of unmovable tasks (for example because
of hard CPU affinity). In those situation, it exclude the busiest CPU
that has pinned tasks for load balance consideration such that it can
perform second 2nd load balance pass on the rest of the system.  This
all works as designed if there is only one cgroup in the system.

However, when we have multiple cgroups, this logic has false positive
and triggers multiple load balance passes despite there are actually
no pinned tasks at all.

The reason it has false positive is that the all pinned logic is deep
in the lowest function of can_migrate_task() and is too low level.
load_balance_fair() iterate each task group and calls balance_tasks()
to migrate target load.  Along the way, balance_tasks() will also set
a all_pinned variable.  Given that task-groups are iterated, this
all_pinned variable is essentially the status of last group in the
scanning process.  Task group can have number of reasons that no load
being migrated, none due to cpu affinity.  However, this status bit
is being propagated back up to the higher level load_balance(), which
incorrectly think that no tasks were moved.  It kick off the all pinned
logic and start multiple passes attempt to move load onto puller CPU.

Moved the all_pinned logic up at the iterator level.  This ensures
that the logic is aggregated over all task-groups, not just the last
one in the list.  The core change is in the move_tasks() function:

+       if (total_load_moved)
+               *all_pinned = 0;

The rest of the patch are code churn that removes parameter passing
in the lower level functions.

Signed-off-by: Ken Chen <kenchen@google.com>
---
 kernel/sched_fair.c |   37 +++++++++++++++----------------------
 1 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c46568a..2e224e0 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2016,8 +2016,7 @@ static void pull_task(
  */
 static
 int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
-		     struct sched_domain *sd, enum cpu_idle_type idle,
-		     int *all_pinned)
+		     struct sched_domain *sd, enum cpu_idle_type idle)
 {
 	int tsk_cache_hot = 0;
 	/*
@@ -2030,7 +2029,6 @@ int can_migrate_task(
 		schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
 		return 0;
 	}
-	*all_pinned = 0;
 
 	if (task_running(rq, p)) {
 		schedstat_inc(p, se.statistics.nr_failed_migrations_running);
@@ -2075,13 +2073,11 @@ move_one_task(
 {
 	struct task_struct *p, *n;
 	struct cfs_rq *cfs_rq;
-	int pinned = 0;
 
 	for_each_leaf_cfs_rq(busiest, cfs_rq) {
 		list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) {
 
-			if (!can_migrate_task(p, busiest, this_cpu,
-						sd, idle, &pinned))
+			if (!can_migrate_task(p, busiest, this_cpu, sd, idle))
 				continue;
 
 			pull_task(busiest, p, this_rq, this_cpu);
@@ -2101,24 +2097,22 @@ move_one_task(
 static unsigned long
 balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 	      unsigned long max_load_move, struct sched_domain *sd,
-	      enum cpu_idle_type idle, int *all_pinned,
-	      int *this_best_prio, struct cfs_rq *busiest_cfs_rq)
+	      enum cpu_idle_type idle, int *this_best_prio,
+	      struct cfs_rq *busiest_cfs_rq)
 {
-	int loops = 0, pulled = 0, pinned = 0;
+	int loops = 0, pulled = 0;
 	long rem_load_move = max_load_move;
 	struct task_struct *p, *n;
 
 	if (max_load_move == 0)
 		goto out;
 
-	pinned = 1;
-
 	list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) {
 		if (loops++ > sysctl_sched_nr_migrate)
 			break;
 
 		if ((p->se.load.weight >> 1) > rem_load_move ||
-		    !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned))
+		    !can_migrate_task(p, busiest, this_cpu, sd, idle))
 			continue;
 
 		pull_task(busiest, p, this_rq, this_cpu);
@@ -2153,9 +2147,6 @@ out:
 	 */
 	schedstat_add(sd, lb_gained[idle], pulled);
 
-	if (all_pinned)
-		*all_pinned = pinned;
-
 	return max_load_move - rem_load_move;
 }
 
@@ -2206,7 +2197,7 @@ static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
 		  struct sched_domain *sd, enum cpu_idle_type idle,
-		  int *all_pinned, int *this_best_prio)
+		  int *this_best_prio)
 {
 	long rem_load_move = max_load_move;
 	int busiest_cpu = cpu_of(busiest);
@@ -2231,7 +2222,7 @@ load_balance_fair(
 		rem_load = div_u64(rem_load, busiest_h_load + 1);
 
 		moved_load = balance_tasks(this_rq, this_cpu, busiest,
-				rem_load, sd, idle, all_pinned, this_best_prio,
+				rem_load, sd, idle, this_best_prio,
 				busiest_cfs_rq);
 
 		if (!moved_load)
@@ -2257,11 +2248,10 @@ static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
 		  struct sched_domain *sd, enum cpu_idle_type idle,
-		  int *all_pinned, int *this_best_prio)
+		  int *this_best_prio)
 {
 	return balance_tasks(this_rq, this_cpu, busiest,
-			max_load_move, sd, idle, all_pinned,
-			this_best_prio, &busiest->cfs);
+			max_load_move, sd, idle, this_best_prio, &busiest->cfs);
 }
 #endif
 
@@ -2283,7 +2273,7 @@ static int move_tasks(
 	do {
 		load_moved = load_balance_fair(this_rq, this_cpu, busiest,
 				max_load_move - total_load_moved,
-				sd, idle, all_pinned, &this_best_prio);
+				sd, idle, &this_best_prio);
 
 		total_load_moved += load_moved;
 
@@ -2302,6 +2292,9 @@ static int move_tasks(
 #endif
 	} while (load_moved && max_load_move > total_load_moved);
 
+	if (total_load_moved)
+		*all_pinned = 0;
+
 	return total_load_moved > 0;
 }
 
@@ -3300,7 +3293,7 @@ static int load_balance(
 			struct sched_domain *sd, enum cpu_idle_type idle,
 			int *balance)
 {
-	int ld_moved, all_pinned = 0, active_balance = 0;
+	int ld_moved, all_pinned = 1, active_balance = 0;
 	struct sched_group *group;
 	unsigned long imbalance;
 	struct rq *busiest;
-- 
1.7.3.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: Subject: [PATCH] sched: fixed erroneous all_pinned logic.
  2011-04-08  0:24 Subject: [PATCH] sched: fixed erroneous all_pinned logic Ken Chen
@ 2011-04-08 10:57 ` Vladimir Davydov
  2011-04-08 11:49   ` Peter Zijlstra
  2011-04-08 18:25   ` Subject: [PATCH] sched: fixed " Ken Chen
  2011-04-08 11:39 ` Peter Zijlstra
  1 sibling, 2 replies; 8+ messages in thread
From: Vladimir Davydov @ 2011-04-08 10:57 UTC (permalink / raw)
  To: Ken Chen; +Cc: a.p.zijlstra, mingo, linux-kernel

On Apr 8, 2011, at 4:24 AM, Ken Chen wrote:

> @@ -2302,6 +2292,9 @@ static int move_tasks(
> #endif
> 	} while (load_moved && max_load_move > total_load_moved);
> 
> +	if (total_load_moved)
> +		*all_pinned = 0;
> +
> 	return total_load_moved > 0;
> }
> 
> @@ -3300,7 +3293,7 @@ static int load_balance(
> 			struct sched_domain *sd, enum cpu_idle_type idle,
> 			int *balance)
> {
> -	int ld_moved, all_pinned = 0, active_balance = 0;
> +	int ld_moved, all_pinned = 1, active_balance = 0;
> 	struct sched_group *group;
> 	unsigned long imbalance;
> 	struct rq *busiest;

As far as I understand, this patch sets the all_pinned flag if and only if we fail to move any tasks during the load balance. However, the migration can fail because e.g. all tasks are cache hot on their cpus (can_migrate_task() returns 0 in this case), and in this case we shouldn't treat all tasks as cpu bound, should we?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Subject: [PATCH] sched: fixed erroneous all_pinned logic.
  2011-04-08  0:24 Subject: [PATCH] sched: fixed erroneous all_pinned logic Ken Chen
  2011-04-08 10:57 ` Vladimir Davydov
@ 2011-04-08 11:39 ` Peter Zijlstra
  1 sibling, 0 replies; 8+ messages in thread
From: Peter Zijlstra @ 2011-04-08 11:39 UTC (permalink / raw)
  To: Ken Chen; +Cc: mingo, linux-kernel

On Thu, 2011-04-07 at 17:24 -0700, Ken Chen wrote:
> The scheduler load balancer has specific code to deal with cases of
> unbalanced system due to lots of unmovable tasks (for example because
> of hard CPU affinity). In those situation, it exclude the busiest CPU
> that has pinned tasks for load balance consideration such that it can
> perform second 2nd load balance pass on the rest of the system.  This
> all works as designed if there is only one cgroup in the system.
> 
> However, when we have multiple cgroups, this logic has false positive
> and triggers multiple load balance passes despite there are actually
> no pinned tasks at all.
> 
> The reason it has false positive is that the all pinned logic is deep
> in the lowest function of can_migrate_task() and is too low level.
> load_balance_fair() iterate each task group and calls balance_tasks()
> to migrate target load.  Along the way, balance_tasks() will also set
> a all_pinned variable.  Given that task-groups are iterated, this
> all_pinned variable is essentially the status of last group in the
> scanning process.  Task group can have number of reasons that no load
> being migrated, none due to cpu affinity.  However, this status bit
> is being propagated back up to the higher level load_balance(), which
> incorrectly think that no tasks were moved.  It kick off the all pinned
> logic and start multiple passes attempt to move load onto puller CPU.
> 
> Moved the all_pinned logic up at the iterator level.  This ensures
> that the logic is aggregated over all task-groups, not just the last
> one in the list.  The core change is in the move_tasks() function:
> 
> +       if (total_load_moved)
> +               *all_pinned = 0;
> 
> The rest of the patch are code churn that removes parameter passing
> in the lower level functions. 

Very nice!

This looks applicable to earlier kernels as well, so I stuck a stable
tag on it.

Also, your From: field is somewhat weird as it consists of and addr-spec
and comment instead of the regular name-addr.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Subject: [PATCH] sched: fixed erroneous all_pinned logic.
  2011-04-08 10:57 ` Vladimir Davydov
@ 2011-04-08 11:49   ` Peter Zijlstra
  2011-04-08 19:20     ` Ken Chen
  2011-04-08 18:25   ` Subject: [PATCH] sched: fixed " Ken Chen
  1 sibling, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2011-04-08 11:49 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: Ken Chen, mingo, linux-kernel

On Fri, 2011-04-08 at 14:57 +0400, Vladimir Davydov wrote:
> 
> On Apr 8, 2011, at 4:24 AM, Ken Chen wrote:
> 
> > @@ -2302,6 +2292,9 @@ static int move_tasks(
> > #endif
> >       } while (load_moved && max_load_move > total_load_moved);
> > 
> > +     if (total_load_moved)
> > +             *all_pinned = 0;
> > +
> >       return total_load_moved > 0;
> > }
> > 
> > @@ -3300,7 +3293,7 @@ static int load_balance(
> >                       struct sched_domain *sd, enum cpu_idle_type idle,
> >                       int *balance)
> > {
> > -     int ld_moved, all_pinned = 0, active_balance = 0;
> > +     int ld_moved, all_pinned = 1, active_balance = 0;
> >       struct sched_group *group;
> >       unsigned long imbalance;
> >       struct rq *busiest;
> 
> As far as I understand, this patch sets the all_pinned flag if and
> only if we fail to move any tasks during the load balance. However,
> the migration can fail because e.g. all tasks are cache hot on their
> cpus (can_migrate_task() returns 0 in this case), and in this case we
> shouldn't treat all tasks as cpu bound, should we?-- 

Hmm, you've got a good point there... (that'll teach me to read email in
date order).

Ken would it work to only push the all_pinned = 1 higher and not also
the all_pinned = 0?

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Subject: [PATCH] sched: fixed erroneous all_pinned logic.
  2011-04-08 10:57 ` Vladimir Davydov
  2011-04-08 11:49   ` Peter Zijlstra
@ 2011-04-08 18:25   ` Ken Chen
  1 sibling, 0 replies; 8+ messages in thread
From: Ken Chen @ 2011-04-08 18:25 UTC (permalink / raw)
  To: Vladimir Davydov; +Cc: a.p.zijlstra, mingo, linux-kernel

On Fri, Apr 8, 2011 at 3:57 AM, Vladimir Davydov wrote:
>
> As far as I understand, this patch sets the all_pinned flag if
> and only if we fail to move any tasks during the load balance.
> However, the migration can fail because e.g. all tasks are
> cache hot on their cpus (can_migrate_task() returns 0 in this
> case), and in this case we shouldn't treat all tasks as cpu
> bound, should we?

all_pinned logic is not restricted for tasks that have hard CPU affinity.
In balance_tasks(), e.g. if all tasks have high weight, it would also trip
all_pinned to 1.  See ( p->se.load.weight >> 1) > rem_load_move).  I don't
see much difference whether a task has high weight, or is cache hot, or
has hard cpu affinity.

The effect of classifying cache_hot as pinned task or not is difficult to
measure, I think.  Because that state is more temporal compare to the
other two conditions.

I don't have enough data to indicate which way is better though.

- Ken

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Subject: [PATCH] sched: fixed erroneous all_pinned logic.
  2011-04-08 11:49   ` Peter Zijlstra
@ 2011-04-08 19:20     ` Ken Chen
  2011-04-09 11:14       ` Peter Zijlstra
  2011-04-11 10:46       ` [tip:sched/urgent] sched: Fix " tip-bot for Ken Chen
  0 siblings, 2 replies; 8+ messages in thread
From: Ken Chen @ 2011-04-08 19:20 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Vladimir Davydov, mingo, linux-kernel

On Fri, Apr 8, 2011 at 4:49 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>> As far as I understand, this patch sets the all_pinned flag if and
>> only if we fail to move any tasks during the load balance. However,
>> the migration can fail because e.g. all tasks are cache hot on their
>> cpus (can_migrate_task() returns 0 in this case), and in this case we
>> shouldn't treat all tasks as cpu bound, should we?--
>
> Hmm, you've got a good point there... (that'll teach me to read email in
> date order).
>
> Ken would it work to only push the all_pinned = 1 higher and not also
> the all_pinned = 0?

OK.  That would work as well.  We just need to aggregate all_pinned at
level higher than task-group iterator so it collect status accross all
tasks, instead of just the last task group.  The following is the
revised patch and changelog.

-----
sched: fixed erroneous all_pinned logic.

The scheduler load balancer has specific code to deal with cases of
unbalanced system due to lots of unmovable tasks (for example because
of hard CPU affinity). In those situation, it exclude the busiest CPU
that has pinned tasks for load balance consideration such that it can
perform second 2nd load balance pass on the rest of the system.  This
all works as designed if there is only one cgroup in the system.

However, when we have multiple cgroups, this logic has false positive
and triggers multiple load balance passes despite there are actually
no pinned tasks at all.

The reason it has false positive is that the all pinned logic is deep
in the lowest function of can_migrate_task() and is too low level.
load_balance_fair() iterate each task group and calls balance_tasks()
to migrate target load.  Along the way, balance_tasks() will also set
a all_pinned variable.  Given that task-groups are iterated, this
all_pinned variable is essentially the status of last group in the
scanning process.  Task group can have number of reasons that no load
being migrated, none due to cpu affinity.  However, this status bit
is being propagated back up to the higher level load_balance(), which
incorrectly think that no tasks were moved.  It kick off the all pinned
logic and start multiple passes attempt to move load onto puller CPU.

Moved the all_pinned aggregation up at the iterator level. This ensures
that the status is aggregated over all task-groups, not just last one
in the list.

Signed-off-by: Ken Chen <kenchen@google.com>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c46568a..bb7dba7 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2104,21 +2104,20 @@ balance_tasks(
 	      enum cpu_idle_type idle, int *all_pinned,
 	      int *this_best_prio, struct cfs_rq *busiest_cfs_rq)
 {
-	int loops = 0, pulled = 0, pinned = 0;
+	int loops = 0, pulled = 0;
 	long rem_load_move = max_load_move;
 	struct task_struct *p, *n;

 	if (max_load_move == 0)
 		goto out;

-	pinned = 1;
-
 	list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) {
 		if (loops++ > sysctl_sched_nr_migrate)
 			break;

 		if ((p->se.load.weight >> 1) > rem_load_move ||
-		    !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned))
+		    !can_migrate_task(p, busiest, this_cpu, sd, idle,
+				      all_pinned))
 			continue;

 		pull_task(busiest, p, this_rq, this_cpu);
@@ -2153,9 +2152,6 @@ out:
 	 */
 	schedstat_add(sd, lb_gained[idle], pulled);

-	if (all_pinned)
-		*all_pinned = pinned;
-
 	return max_load_move - rem_load_move;
 }

@@ -3341,6 +3337,7 @@ redo:
 		 * still unbalanced. ld_moved simply stays zero, so it is
 		 * correctly treated as an imbalance.
 		 */
+		all_pinned = 1;
 		local_irq_save(flags);
 		double_rq_lock(this_rq, busiest);
 		ld_moved = move_tasks(this_rq, this_cpu, busiest,

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: Subject: [PATCH] sched: fixed erroneous all_pinned logic.
  2011-04-08 19:20     ` Ken Chen
@ 2011-04-09 11:14       ` Peter Zijlstra
  2011-04-11 10:46       ` [tip:sched/urgent] sched: Fix " tip-bot for Ken Chen
  1 sibling, 0 replies; 8+ messages in thread
From: Peter Zijlstra @ 2011-04-09 11:14 UTC (permalink / raw)
  To: Ken Chen; +Cc: Vladimir Davydov, mingo, linux-kernel

On Fri, 2011-04-08 at 12:20 -0700, Ken Chen wrote:
> sched: fixed erroneous all_pinned logic.
> 
> The scheduler load balancer has specific code to deal with cases of
> unbalanced system due to lots of unmovable tasks (for example because
> of hard CPU affinity). In those situation, it exclude the busiest CPU
> that has pinned tasks for load balance consideration such that it can
> perform second 2nd load balance pass on the rest of the system.  This
> all works as designed if there is only one cgroup in the system.
> 
> However, when we have multiple cgroups, this logic has false positive
> and triggers multiple load balance passes despite there are actually
> no pinned tasks at all.
> 
> The reason it has false positive is that the all pinned logic is deep
> in the lowest function of can_migrate_task() and is too low level.
> load_balance_fair() iterate each task group and calls balance_tasks()
> to migrate target load.  Along the way, balance_tasks() will also set
> a all_pinned variable.  Given that task-groups are iterated, this
> all_pinned variable is essentially the status of last group in the
> scanning process.  Task group can have number of reasons that no load
> being migrated, none due to cpu affinity.  However, this status bit
> is being propagated back up to the higher level load_balance(), which
> incorrectly think that no tasks were moved.  It kick off the all pinned
> logic and start multiple passes attempt to move load onto puller CPU.
> 
> Moved the all_pinned aggregation up at the iterator level. This ensures
> that the status is aggregated over all task-groups, not just last one
> in the list.
> 
> Signed-off-by: Ken Chen <kenchen@google.com> 

Thanks Ken!

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:sched/urgent] sched: Fix erroneous all_pinned logic
  2011-04-08 19:20     ` Ken Chen
  2011-04-09 11:14       ` Peter Zijlstra
@ 2011-04-11 10:46       ` tip-bot for Ken Chen
  1 sibling, 0 replies; 8+ messages in thread
From: tip-bot for Ken Chen @ 2011-04-11 10:46 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, a.p.zijlstra, kenchen, tglx, mingo

Commit-ID:  b30aef17f71cf9e24b10c11cbb5e5f0ebe8a85ab
Gitweb:     http://git.kernel.org/tip/b30aef17f71cf9e24b10c11cbb5e5f0ebe8a85ab
Author:     Ken Chen <kenchen@google.com>
AuthorDate: Fri, 8 Apr 2011 12:20:16 -0700
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Mon, 11 Apr 2011 11:08:54 +0200

sched: Fix erroneous all_pinned logic

The scheduler load balancer has specific code to deal with cases of
unbalanced system due to lots of unmovable tasks (for example because of
hard CPU affinity). In those situation, it excludes the busiest CPU that
has pinned tasks for load balance consideration such that it can perform
second 2nd load balance pass on the rest of the system.

This all works as designed if there is only one cgroup in the system.

However, when we have multiple cgroups, this logic has false positives and
triggers multiple load balance passes despite there are actually no pinned
tasks at all.

The reason it has false positives is that the all pinned logic is deep in
the lowest function of can_migrate_task() and is too low level:

load_balance_fair() iterates each task group and calls balance_tasks() to
migrate target load. Along the way, balance_tasks() will also set a
all_pinned variable. Given that task-groups are iterated, this all_pinned
variable is essentially the status of last group in the scanning process.
Task group can have number of reasons that no load being migrated, none
due to cpu affinity. However, this status bit is being propagated back up
to the higher level load_balance(), which incorrectly think that no tasks
were moved.  It kick off the all pinned logic and start multiple passes
attempt to move load onto puller CPU.

To fix this, move the all_pinned aggregation up at the iterator level.
This ensures that the status is aggregated over all task-groups, not just
last one in the list.

Signed-off-by: Ken Chen <kenchen@google.com>
Cc: stable@kernel.org
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/BANLkTi=ernzNawaR5tJZEsV_QVnfxqXmsQ@mail.gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_fair.c |   11 ++++-------
 1 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 60f9d40..6fa833a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2104,21 +2104,20 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 	      enum cpu_idle_type idle, int *all_pinned,
 	      int *this_best_prio, struct cfs_rq *busiest_cfs_rq)
 {
-	int loops = 0, pulled = 0, pinned = 0;
+	int loops = 0, pulled = 0;
 	long rem_load_move = max_load_move;
 	struct task_struct *p, *n;
 
 	if (max_load_move == 0)
 		goto out;
 
-	pinned = 1;
-
 	list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) {
 		if (loops++ > sysctl_sched_nr_migrate)
 			break;
 
 		if ((p->se.load.weight >> 1) > rem_load_move ||
-		    !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned))
+		    !can_migrate_task(p, busiest, this_cpu, sd, idle,
+				      all_pinned))
 			continue;
 
 		pull_task(busiest, p, this_rq, this_cpu);
@@ -2153,9 +2152,6 @@ out:
 	 */
 	schedstat_add(sd, lb_gained[idle], pulled);
 
-	if (all_pinned)
-		*all_pinned = pinned;
-
 	return max_load_move - rem_load_move;
 }
 
@@ -3341,6 +3337,7 @@ redo:
 		 * still unbalanced. ld_moved simply stays zero, so it is
 		 * correctly treated as an imbalance.
 		 */
+		all_pinned = 1;
 		local_irq_save(flags);
 		double_rq_lock(this_rq, busiest);
 		ld_moved = move_tasks(this_rq, this_cpu, busiest,

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-04-11 10:47 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-08  0:24 Subject: [PATCH] sched: fixed erroneous all_pinned logic Ken Chen
2011-04-08 10:57 ` Vladimir Davydov
2011-04-08 11:49   ` Peter Zijlstra
2011-04-08 19:20     ` Ken Chen
2011-04-09 11:14       ` Peter Zijlstra
2011-04-11 10:46       ` [tip:sched/urgent] sched: Fix " tip-bot for Ken Chen
2011-04-08 18:25   ` Subject: [PATCH] sched: fixed " Ken Chen
2011-04-08 11:39 ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).