All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed
@ 2010-03-15  9:10 Oleg Nesterov
  2010-03-15 19:45 ` Rafael J. Wysocki
  2010-04-02 19:12 ` [tip:sched/core] sched: _cpu_down(): Don't " tip-bot for Oleg Nesterov
  0 siblings, 2 replies; 3+ messages in thread
From: Oleg Nesterov @ 2010-03-15  9:10 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar
  Cc: Ben Blum, Jiri Slaby, Lai Jiangshan, Li Zefan, Miao Xie,
	Paul Menage, Rafael J. Wysocki, Tejun Heo, linux-kernel

_cpu_down() changes the current task's affinity and then recovers it at
the end. The problems are well known: we can't restore old_allowed if it
was bound to the now-dead-cpu, and we can race with the userspace which
can change cpu-affinity during unplug.

_cpu_down() should not play with current->cpus_allowed at all. Instead,
take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
removes the dying cpu from cpu_online_mask.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---

 include/linux/sched.h |    1 +
 kernel/sched.c        |    2 +-
 kernel/cpu.c          |   18 ++++++------------
 3 files changed, 8 insertions(+), 13 deletions(-)

--- 34-rc1/include/linux/sched.h~4_CPU_DOWN_AFFINITY	2010-03-15 09:37:46.000000000 +0100
+++ 34-rc1/include/linux/sched.h	2010-03-15 09:41:51.000000000 +0100
@@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 
 #ifdef CONFIG_HOTPLUG_CPU
+extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
 extern void idle_task_exit(void);
 #else
 static inline void idle_task_exit(void) {}
--- 34-rc1/kernel/sched.c~4_CPU_DOWN_AFFINITY	2010-03-15 09:41:28.000000000 +0100
+++ 34-rc1/kernel/sched.c	2010-03-15 09:41:51.000000000 +0100
@@ -5503,7 +5503,7 @@ static int migration_thread(void *data)
 /*
  * Figure out where task on dead CPU should go, use force if necessary.
  */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	struct rq *rq = cpu_rq(dead_cpu);
 	int needs_cpu, dest_cpu;
--- 34-rc1/kernel/cpu.c~4_CPU_DOWN_AFFINITY	2010-03-15 09:37:46.000000000 +0100
+++ 34-rc1/kernel/cpu.c	2010-03-15 09:41:51.000000000 +0100
@@ -163,6 +163,7 @@ static inline void check_for_tasks(int c
 }
 
 struct take_cpu_down_param {
+	struct task_struct *caller;
 	unsigned long mod;
 	void *hcpu;
 };
@@ -171,6 +172,7 @@ struct take_cpu_down_param {
 static int __ref take_cpu_down(void *_param)
 {
 	struct take_cpu_down_param *param = _param;
+	unsigned int cpu = (unsigned long)param->hcpu;
 	int err;
 
 	/* Ensure this CPU doesn't handle any more interrupts. */
@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_pa
 	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
 				param->hcpu);
 
+	if (task_cpu(param->caller) == cpu)
+		move_task_off_dead_cpu(cpu, param->caller);
 	/* Force idle task to run as soon as we yield: it should
 	   immediately notice cpu is offline and die quickly. */
 	sched_idle_next();
@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_pa
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 {
 	int err, nr_calls = 0;
-	cpumask_var_t old_allowed;
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 	struct take_cpu_down_param tcd_param = {
+		.caller = current,
 		.mod = mod,
 		.hcpu = hcpu,
 	};
@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int 
 	if (!cpu_online(cpu))
 		return -EINVAL;
 
-	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
-		return -ENOMEM;
-
 	cpu_hotplug_begin();
 	set_cpu_active(cpu, false);
 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
@@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int 
 		goto out_release;
 	}
 
-	/* Ensure that we are not runnable on dying cpu */
-	cpumask_copy(old_allowed, &current->cpus_allowed);
-	set_cpus_allowed_ptr(current, cpu_active_mask);
-
 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 	if (err) {
 		set_cpu_active(cpu, true);
@@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int 
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 
-		goto out_allowed;
+		goto out_release;
 	}
 	BUG_ON(cpu_online(cpu));
 
@@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int 
 
 	check_for_tasks(cpu);
 
-out_allowed:
-	set_cpus_allowed_ptr(current, old_allowed);
 out_release:
 	cpu_hotplug_done();
 	if (!err) {
@@ -263,7 +258,6 @@ out_release:
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 	}
-	free_cpumask_var(old_allowed);
 	return err;
 }
 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed
  2010-03-15  9:10 [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed Oleg Nesterov
@ 2010-03-15 19:45 ` Rafael J. Wysocki
  2010-04-02 19:12 ` [tip:sched/core] sched: _cpu_down(): Don't " tip-bot for Oleg Nesterov
  1 sibling, 0 replies; 3+ messages in thread
From: Rafael J. Wysocki @ 2010-03-15 19:45 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Peter Zijlstra, Ingo Molnar, Ben Blum, Jiri Slaby, Lai Jiangshan,
	Li Zefan, Miao Xie, Paul Menage, Tejun Heo, linux-kernel

On Monday 15 March 2010, Oleg Nesterov wrote:
> _cpu_down() changes the current task's affinity and then recovers it at
> the end. The problems are well known: we can't restore old_allowed if it
> was bound to the now-dead-cpu, and we can race with the userspace which
> can change cpu-affinity during unplug.
> 
> _cpu_down() should not play with current->cpus_allowed at all. Instead,
> take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
> removes the dying cpu from cpu_online_mask.
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
> ---
> 
>  include/linux/sched.h |    1 +
>  kernel/sched.c        |    2 +-
>  kernel/cpu.c          |   18 ++++++------------
>  3 files changed, 8 insertions(+), 13 deletions(-)
> 
> --- 34-rc1/include/linux/sched.h~4_CPU_DOWN_AFFINITY	2010-03-15 09:37:46.000000000 +0100
> +++ 34-rc1/include/linux/sched.h	2010-03-15 09:41:51.000000000 +0100
> @@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event
>  extern void sched_clock_idle_wakeup_event(u64 delta_ns);
>  
>  #ifdef CONFIG_HOTPLUG_CPU
> +extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
>  extern void idle_task_exit(void);
>  #else
>  static inline void idle_task_exit(void) {}
> --- 34-rc1/kernel/sched.c~4_CPU_DOWN_AFFINITY	2010-03-15 09:41:28.000000000 +0100
> +++ 34-rc1/kernel/sched.c	2010-03-15 09:41:51.000000000 +0100
> @@ -5503,7 +5503,7 @@ static int migration_thread(void *data)
>  /*
>   * Figure out where task on dead CPU should go, use force if necessary.
>   */
> -static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
> +void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
>  {
>  	struct rq *rq = cpu_rq(dead_cpu);
>  	int needs_cpu, dest_cpu;
> --- 34-rc1/kernel/cpu.c~4_CPU_DOWN_AFFINITY	2010-03-15 09:37:46.000000000 +0100
> +++ 34-rc1/kernel/cpu.c	2010-03-15 09:41:51.000000000 +0100
> @@ -163,6 +163,7 @@ static inline void check_for_tasks(int c
>  }
>  
>  struct take_cpu_down_param {
> +	struct task_struct *caller;
>  	unsigned long mod;
>  	void *hcpu;
>  };
> @@ -171,6 +172,7 @@ struct take_cpu_down_param {
>  static int __ref take_cpu_down(void *_param)
>  {
>  	struct take_cpu_down_param *param = _param;
> +	unsigned int cpu = (unsigned long)param->hcpu;
>  	int err;
>  
>  	/* Ensure this CPU doesn't handle any more interrupts. */
> @@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_pa
>  	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
>  				param->hcpu);
>  
> +	if (task_cpu(param->caller) == cpu)
> +		move_task_off_dead_cpu(cpu, param->caller);
>  	/* Force idle task to run as soon as we yield: it should
>  	   immediately notice cpu is offline and die quickly. */
>  	sched_idle_next();
> @@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_pa
>  static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
>  {
>  	int err, nr_calls = 0;
> -	cpumask_var_t old_allowed;
>  	void *hcpu = (void *)(long)cpu;
>  	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
>  	struct take_cpu_down_param tcd_param = {
> +		.caller = current,
>  		.mod = mod,
>  		.hcpu = hcpu,
>  	};
> @@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int 
>  	if (!cpu_online(cpu))
>  		return -EINVAL;
>  
> -	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
> -		return -ENOMEM;
> -
>  	cpu_hotplug_begin();
>  	set_cpu_active(cpu, false);
>  	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
> @@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int 
>  		goto out_release;
>  	}
>  
> -	/* Ensure that we are not runnable on dying cpu */
> -	cpumask_copy(old_allowed, &current->cpus_allowed);
> -	set_cpus_allowed_ptr(current, cpu_active_mask);
> -
>  	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
>  	if (err) {
>  		set_cpu_active(cpu, true);
> @@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int 
>  					    hcpu) == NOTIFY_BAD)
>  			BUG();
>  
> -		goto out_allowed;
> +		goto out_release;
>  	}
>  	BUG_ON(cpu_online(cpu));
>  
> @@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int 
>  
>  	check_for_tasks(cpu);
>  
> -out_allowed:
> -	set_cpus_allowed_ptr(current, old_allowed);
>  out_release:
>  	cpu_hotplug_done();
>  	if (!err) {
> @@ -263,7 +258,6 @@ out_release:
>  					    hcpu) == NOTIFY_BAD)
>  			BUG();
>  	}
> -	free_cpumask_var(old_allowed);
>  	return err;
>  }

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [tip:sched/core] sched: _cpu_down(): Don't play with current->cpus_allowed
  2010-03-15  9:10 [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed Oleg Nesterov
  2010-03-15 19:45 ` Rafael J. Wysocki
@ 2010-04-02 19:12 ` tip-bot for Oleg Nesterov
  1 sibling, 0 replies; 3+ messages in thread
From: tip-bot for Oleg Nesterov @ 2010-04-02 19:12 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, a.p.zijlstra, oleg, tglx, rjw, mingo

Commit-ID:  6a1bdc1b577ebcb65f6603c57f8347309bc4ab13
Gitweb:     http://git.kernel.org/tip/6a1bdc1b577ebcb65f6603c57f8347309bc4ab13
Author:     Oleg Nesterov <oleg@redhat.com>
AuthorDate: Mon, 15 Mar 2010 10:10:23 +0100
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 2 Apr 2010 20:12:03 +0200

sched: _cpu_down(): Don't play with current->cpus_allowed

_cpu_down() changes the current task's affinity and then recovers it at
the end. The problems are well known: we can't restore old_allowed if it
was bound to the now-dead-cpu, and we can race with the userspace which
can change cpu-affinity during unplug.

_cpu_down() should not play with current->cpus_allowed at all. Instead,
take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
removes the dying cpu from cpu_online_mask.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100315091023.GA9148@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |    1 +
 kernel/cpu.c          |   18 ++++++------------
 kernel/sched.c        |    2 +-
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43c9451..8bea407 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 
 #ifdef CONFIG_HOTPLUG_CPU
+extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
 extern void idle_task_exit(void);
 #else
 static inline void idle_task_exit(void) {}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f8cced2..8d340fa 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -163,6 +163,7 @@ static inline void check_for_tasks(int cpu)
 }
 
 struct take_cpu_down_param {
+	struct task_struct *caller;
 	unsigned long mod;
 	void *hcpu;
 };
@@ -171,6 +172,7 @@ struct take_cpu_down_param {
 static int __ref take_cpu_down(void *_param)
 {
 	struct take_cpu_down_param *param = _param;
+	unsigned int cpu = (unsigned long)param->hcpu;
 	int err;
 
 	/* Ensure this CPU doesn't handle any more interrupts. */
@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_param)
 	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
 				param->hcpu);
 
+	if (task_cpu(param->caller) == cpu)
+		move_task_off_dead_cpu(cpu, param->caller);
 	/* Force idle task to run as soon as we yield: it should
 	   immediately notice cpu is offline and die quickly. */
 	sched_idle_next();
@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_param)
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 {
 	int err, nr_calls = 0;
-	cpumask_var_t old_allowed;
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 	struct take_cpu_down_param tcd_param = {
+		.caller = current,
 		.mod = mod,
 		.hcpu = hcpu,
 	};
@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	if (!cpu_online(cpu))
 		return -EINVAL;
 
-	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
-		return -ENOMEM;
-
 	cpu_hotplug_begin();
 	set_cpu_active(cpu, false);
 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
@@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 		goto out_release;
 	}
 
-	/* Ensure that we are not runnable on dying cpu */
-	cpumask_copy(old_allowed, &current->cpus_allowed);
-	set_cpus_allowed_ptr(current, cpu_active_mask);
-
 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 	if (err) {
 		set_cpu_active(cpu, true);
@@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 
-		goto out_allowed;
+		goto out_release;
 	}
 	BUG_ON(cpu_online(cpu));
 
@@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
 	check_for_tasks(cpu);
 
-out_allowed:
-	set_cpus_allowed_ptr(current, old_allowed);
 out_release:
 	cpu_hotplug_done();
 	if (!err) {
@@ -263,7 +258,6 @@ out_release:
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 	}
-	free_cpumask_var(old_allowed);
 	return err;
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 165b532..11119de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5442,7 +5442,7 @@ static int migration_thread(void *data)
 /*
  * Figure out where task on dead CPU should go, use force if necessary.
  */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	struct rq *rq = cpu_rq(dead_cpu);
 	int needs_cpu, uninitialized_var(dest_cpu);

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-04-02 19:13 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-15  9:10 [PATCH 5/6] _cpu_down: don't play with current->cpus_allowed Oleg Nesterov
2010-03-15 19:45 ` Rafael J. Wysocki
2010-04-02 19:12 ` [tip:sched/core] sched: _cpu_down(): Don't " tip-bot for Oleg Nesterov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.