* [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed
@ 2010-03-15 9:10 Oleg Nesterov
2010-03-15 19:45 ` Rafael J. Wysocki
2010-04-02 19:12 ` [tip:sched/core] sched: _cpu_down(): Don't " tip-bot for Oleg Nesterov
0 siblings, 2 replies; 3+ messages in thread
From: Oleg Nesterov @ 2010-03-15 9:10 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar
Cc: Ben Blum, Jiri Slaby, Lai Jiangshan, Li Zefan, Miao Xie,
Paul Menage, Rafael J. Wysocki, Tejun Heo, linux-kernel
_cpu_down() changes the current task's affinity and then recovers it at
the end. The problems are well known: we can't restore old_allowed if it
was bound to the now-dead-cpu, and we can race with the userspace which
can change cpu-affinity during unplug.
_cpu_down() should not play with current->cpus_allowed at all. Instead,
take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
removes the dying cpu from cpu_online_mask.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
include/linux/sched.h | 1 +
kernel/sched.c | 2 +-
kernel/cpu.c | 18 ++++++------------
3 files changed, 8 insertions(+), 13 deletions(-)
--- 34-rc1/include/linux/sched.h~4_CPU_DOWN_AFFINITY 2010-03-15 09:37:46.000000000 +0100
+++ 34-rc1/include/linux/sched.h 2010-03-15 09:41:51.000000000 +0100
@@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#ifdef CONFIG_HOTPLUG_CPU
+extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
extern void idle_task_exit(void);
#else
static inline void idle_task_exit(void) {}
--- 34-rc1/kernel/sched.c~4_CPU_DOWN_AFFINITY 2010-03-15 09:41:28.000000000 +0100
+++ 34-rc1/kernel/sched.c 2010-03-15 09:41:51.000000000 +0100
@@ -5503,7 +5503,7 @@ static int migration_thread(void *data)
/*
* Figure out where task on dead CPU should go, use force if necessary.
*/
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
{
struct rq *rq = cpu_rq(dead_cpu);
int needs_cpu, dest_cpu;
--- 34-rc1/kernel/cpu.c~4_CPU_DOWN_AFFINITY 2010-03-15 09:37:46.000000000 +0100
+++ 34-rc1/kernel/cpu.c 2010-03-15 09:41:51.000000000 +0100
@@ -163,6 +163,7 @@ static inline void check_for_tasks(int c
}
struct take_cpu_down_param {
+ struct task_struct *caller;
unsigned long mod;
void *hcpu;
};
@@ -171,6 +172,7 @@ struct take_cpu_down_param {
static int __ref take_cpu_down(void *_param)
{
struct take_cpu_down_param *param = _param;
+ unsigned int cpu = (unsigned long)param->hcpu;
int err;
/* Ensure this CPU doesn't handle any more interrupts. */
@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_pa
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
param->hcpu);
+ if (task_cpu(param->caller) == cpu)
+ move_task_off_dead_cpu(cpu, param->caller);
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
sched_idle_next();
@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_pa
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{
int err, nr_calls = 0;
- cpumask_var_t old_allowed;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
+ .caller = current,
.mod = mod,
.hcpu = hcpu,
};
@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int
if (!cpu_online(cpu))
return -EINVAL;
- if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
- return -ENOMEM;
-
cpu_hotplug_begin();
set_cpu_active(cpu, false);
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
@@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int
goto out_release;
}
- /* Ensure that we are not runnable on dying cpu */
- cpumask_copy(old_allowed, ¤t->cpus_allowed);
- set_cpus_allowed_ptr(current, cpu_active_mask);
-
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
set_cpu_active(cpu, true);
@@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int
hcpu) == NOTIFY_BAD)
BUG();
- goto out_allowed;
+ goto out_release;
}
BUG_ON(cpu_online(cpu));
@@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int
check_for_tasks(cpu);
-out_allowed:
- set_cpus_allowed_ptr(current, old_allowed);
out_release:
cpu_hotplug_done();
if (!err) {
@@ -263,7 +258,6 @@ out_release:
hcpu) == NOTIFY_BAD)
BUG();
}
- free_cpumask_var(old_allowed);
return err;
}
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed
2010-03-15 9:10 [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed Oleg Nesterov
@ 2010-03-15 19:45 ` Rafael J. Wysocki
2010-04-02 19:12 ` [tip:sched/core] sched: _cpu_down(): Don't " tip-bot for Oleg Nesterov
1 sibling, 0 replies; 3+ messages in thread
From: Rafael J. Wysocki @ 2010-03-15 19:45 UTC (permalink / raw)
To: Oleg Nesterov
Cc: Peter Zijlstra, Ingo Molnar, Ben Blum, Jiri Slaby, Lai Jiangshan,
Li Zefan, Miao Xie, Paul Menage, Tejun Heo, linux-kernel
On Monday 15 March 2010, Oleg Nesterov wrote:
> _cpu_down() changes the current task's affinity and then recovers it at
> the end. The problems are well known: we can't restore old_allowed if it
> was bound to the now-dead-cpu, and we can race with the userspace which
> can change cpu-affinity during unplug.
>
> _cpu_down() should not play with current->cpus_allowed at all. Instead,
> take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
> removes the dying cpu from cpu_online_mask.
>
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
> ---
>
> include/linux/sched.h | 1 +
> kernel/sched.c | 2 +-
> kernel/cpu.c | 18 ++++++------------
> 3 files changed, 8 insertions(+), 13 deletions(-)
>
> --- 34-rc1/include/linux/sched.h~4_CPU_DOWN_AFFINITY 2010-03-15 09:37:46.000000000 +0100
> +++ 34-rc1/include/linux/sched.h 2010-03-15 09:41:51.000000000 +0100
> @@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event
> extern void sched_clock_idle_wakeup_event(u64 delta_ns);
>
> #ifdef CONFIG_HOTPLUG_CPU
> +extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
> extern void idle_task_exit(void);
> #else
> static inline void idle_task_exit(void) {}
> --- 34-rc1/kernel/sched.c~4_CPU_DOWN_AFFINITY 2010-03-15 09:41:28.000000000 +0100
> +++ 34-rc1/kernel/sched.c 2010-03-15 09:41:51.000000000 +0100
> @@ -5503,7 +5503,7 @@ static int migration_thread(void *data)
> /*
> * Figure out where task on dead CPU should go, use force if necessary.
> */
> -static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
> +void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
> {
> struct rq *rq = cpu_rq(dead_cpu);
> int needs_cpu, dest_cpu;
> --- 34-rc1/kernel/cpu.c~4_CPU_DOWN_AFFINITY 2010-03-15 09:37:46.000000000 +0100
> +++ 34-rc1/kernel/cpu.c 2010-03-15 09:41:51.000000000 +0100
> @@ -163,6 +163,7 @@ static inline void check_for_tasks(int c
> }
>
> struct take_cpu_down_param {
> + struct task_struct *caller;
> unsigned long mod;
> void *hcpu;
> };
> @@ -171,6 +172,7 @@ struct take_cpu_down_param {
> static int __ref take_cpu_down(void *_param)
> {
> struct take_cpu_down_param *param = _param;
> + unsigned int cpu = (unsigned long)param->hcpu;
> int err;
>
> /* Ensure this CPU doesn't handle any more interrupts. */
> @@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_pa
> raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
> param->hcpu);
>
> + if (task_cpu(param->caller) == cpu)
> + move_task_off_dead_cpu(cpu, param->caller);
> /* Force idle task to run as soon as we yield: it should
> immediately notice cpu is offline and die quickly. */
> sched_idle_next();
> @@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_pa
> static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
> {
> int err, nr_calls = 0;
> - cpumask_var_t old_allowed;
> void *hcpu = (void *)(long)cpu;
> unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
> struct take_cpu_down_param tcd_param = {
> + .caller = current,
> .mod = mod,
> .hcpu = hcpu,
> };
> @@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int
> if (!cpu_online(cpu))
> return -EINVAL;
>
> - if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
> - return -ENOMEM;
> -
> cpu_hotplug_begin();
> set_cpu_active(cpu, false);
> err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
> @@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int
> goto out_release;
> }
>
> - /* Ensure that we are not runnable on dying cpu */
> - cpumask_copy(old_allowed, ¤t->cpus_allowed);
> - set_cpus_allowed_ptr(current, cpu_active_mask);
> -
> err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
> if (err) {
> set_cpu_active(cpu, true);
> @@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int
> hcpu) == NOTIFY_BAD)
> BUG();
>
> - goto out_allowed;
> + goto out_release;
> }
> BUG_ON(cpu_online(cpu));
>
> @@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int
>
> check_for_tasks(cpu);
>
> -out_allowed:
> - set_cpus_allowed_ptr(current, old_allowed);
> out_release:
> cpu_hotplug_done();
> if (!err) {
> @@ -263,7 +258,6 @@ out_release:
> hcpu) == NOTIFY_BAD)
> BUG();
> }
> - free_cpumask_var(old_allowed);
> return err;
> }
^ permalink raw reply [flat|nested] 3+ messages in thread
* [tip:sched/core] sched: _cpu_down(): Don't play with current->cpus_allowed
2010-03-15 9:10 [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed Oleg Nesterov
2010-03-15 19:45 ` Rafael J. Wysocki
@ 2010-04-02 19:12 ` tip-bot for Oleg Nesterov
1 sibling, 0 replies; 3+ messages in thread
From: tip-bot for Oleg Nesterov @ 2010-04-02 19:12 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, hpa, mingo, a.p.zijlstra, oleg, tglx, rjw, mingo
Commit-ID: 6a1bdc1b577ebcb65f6603c57f8347309bc4ab13
Gitweb: http://git.kernel.org/tip/6a1bdc1b577ebcb65f6603c57f8347309bc4ab13
Author: Oleg Nesterov <oleg@redhat.com>
AuthorDate: Mon, 15 Mar 2010 10:10:23 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 2 Apr 2010 20:12:03 +0200
sched: _cpu_down(): Don't play with current->cpus_allowed
_cpu_down() changes the current task's affinity and then recovers it at
the end. The problems are well known: we can't restore old_allowed if it
was bound to the now-dead-cpu, and we can race with the userspace which
can change cpu-affinity during unplug.
_cpu_down() should not play with current->cpus_allowed at all. Instead,
take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
removes the dying cpu from cpu_online_mask.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100315091023.GA9148@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/sched.h | 1 +
kernel/cpu.c | 18 ++++++------------
kernel/sched.c | 2 +-
3 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43c9451..8bea407 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#ifdef CONFIG_HOTPLUG_CPU
+extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
extern void idle_task_exit(void);
#else
static inline void idle_task_exit(void) {}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f8cced2..8d340fa 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -163,6 +163,7 @@ static inline void check_for_tasks(int cpu)
}
struct take_cpu_down_param {
+ struct task_struct *caller;
unsigned long mod;
void *hcpu;
};
@@ -171,6 +172,7 @@ struct take_cpu_down_param {
static int __ref take_cpu_down(void *_param)
{
struct take_cpu_down_param *param = _param;
+ unsigned int cpu = (unsigned long)param->hcpu;
int err;
/* Ensure this CPU doesn't handle any more interrupts. */
@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_param)
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
param->hcpu);
+ if (task_cpu(param->caller) == cpu)
+ move_task_off_dead_cpu(cpu, param->caller);
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
sched_idle_next();
@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_param)
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{
int err, nr_calls = 0;
- cpumask_var_t old_allowed;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
+ .caller = current,
.mod = mod,
.hcpu = hcpu,
};
@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
if (!cpu_online(cpu))
return -EINVAL;
- if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
- return -ENOMEM;
-
cpu_hotplug_begin();
set_cpu_active(cpu, false);
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
@@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
goto out_release;
}
- /* Ensure that we are not runnable on dying cpu */
- cpumask_copy(old_allowed, ¤t->cpus_allowed);
- set_cpus_allowed_ptr(current, cpu_active_mask);
-
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
set_cpu_active(cpu, true);
@@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
hcpu) == NOTIFY_BAD)
BUG();
- goto out_allowed;
+ goto out_release;
}
BUG_ON(cpu_online(cpu));
@@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
check_for_tasks(cpu);
-out_allowed:
- set_cpus_allowed_ptr(current, old_allowed);
out_release:
cpu_hotplug_done();
if (!err) {
@@ -263,7 +258,6 @@ out_release:
hcpu) == NOTIFY_BAD)
BUG();
}
- free_cpumask_var(old_allowed);
return err;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 165b532..11119de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5442,7 +5442,7 @@ static int migration_thread(void *data)
/*
* Figure out where task on dead CPU should go, use force if necessary.
*/
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
{
struct rq *rq = cpu_rq(dead_cpu);
int needs_cpu, uninitialized_var(dest_cpu);
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2010-04-02 19:13 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-15 9:10 [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed Oleg Nesterov
2010-03-15 19:45 ` Rafael J. Wysocki
2010-04-02 19:12 ` [tip:sched/core] sched: _cpu_down(): Don't " tip-bot for Oleg Nesterov
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.