linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched: Allow per-cpu kernel threads to run on online && !active
@ 2016-03-01 15:23 Peter Zijlstra
  2016-03-03 11:43 ` Thomas Gleixner
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Peter Zijlstra @ 2016-03-01 15:23 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner; +Cc: linux-kernel, laijs, jschoenh, oleg


In order to enable symmetric hotplug, we must mirror the online &&
!active state of cpu-down on the cpu-up side.

However, to retain sanity, limit this state to per-cpu kthreads.

Aside from the change to set_cpus_allowed_ptr(), which allow moving
the per-cpu kthreads on, the other critical piece is the cpu selection
for pinned tasks in select_task_rq(). This avoids dropping into
select_fallback_rq().

select_fallback_rq() cannot be allowed to select !active cpus because
its used to migrate user tasks away. And we do not want to move user
tasks onto cpus that are in transition.

Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Jan H. Schönherr <jschoenh@amazon.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Requested-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/powerpc/kernel/smp.c |    2 -
 arch/s390/kernel/smp.c    |    2 -
 include/linux/cpumask.h   |    6 +----
 kernel/sched/core.c       |   49 +++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 46 insertions(+), 13 deletions(-)

--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -542,7 +542,7 @@ int __cpu_up(unsigned int cpu, struct ta
 		smp_ops->give_timebase();
 
 	/* Wait until cpu puts itself in the online & active maps */
-	while (!cpu_online(cpu) || !cpu_active(cpu))
+	while (!cpu_online(cpu))
 		cpu_relax();
 
 	return 0;
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct ta
 	pcpu_attach_task(pcpu, tidle);
 	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
 	/* Wait until cpu puts itself in the online & active maps */
-	while (!cpu_online(cpu) || !cpu_active(cpu))
+	while (!cpu_online(cpu))
 		cpu_relax();
 	return 0;
 }
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -745,12 +745,10 @@ set_cpu_present(unsigned int cpu, bool p
 static inline void
 set_cpu_online(unsigned int cpu, bool online)
 {
-	if (online) {
+	if (online)
 		cpumask_set_cpu(cpu, &__cpu_online_mask);
-		cpumask_set_cpu(cpu, &__cpu_active_mask);
-	} else {
+	else
 		cpumask_clear_cpu(cpu, &__cpu_online_mask);
-	}
 }
 
 static inline void
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1205,13 +1205,21 @@ void do_set_cpus_allowed(struct task_str
 static int __set_cpus_allowed_ptr(struct task_struct *p,
 				  const struct cpumask *new_mask, bool check)
 {
+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
+	unsigned int dest_cpu;
 	unsigned long flags;
 	struct rq *rq;
-	unsigned int dest_cpu;
 	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
 
+	if (p->flags & PF_KTHREAD) {
+		/*
+		 * Kernel threads are allowed on online && !active CPUs
+		 */
+		cpu_valid_mask = cpu_online_mask;
+	}
+
 	/*
 	 * Must re-check here, to close a race against __kthread_bind(),
 	 * sched_setaffinity() is not guaranteed to observe the flag.
@@ -1224,18 +1232,28 @@ static int __set_cpus_allowed_ptr(struct
 	if (cpumask_equal(&p->cpus_allowed, new_mask))
 		goto out;
 
-	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+	if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	do_set_cpus_allowed(p, new_mask);
 
+	if (p->flags & PF_KTHREAD) {
+		/*
+		 * For kernel threads that do indeed end up on online &&
+		 * !active we want to ensure they are strict per-cpu threads.
+		 */
+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
+			!cpumask_intersects(new_mask, cpu_active_mask) &&
+			p->nr_cpus_allowed != 1);
+	}
+
 	/* Can the task run on the task's current CPU? If so, we're done */
 	if (cpumask_test_cpu(task_cpu(p), new_mask))
 		goto out;
 
-	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
 	if (task_running(rq, p) || p->state == TASK_WAKING) {
 		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
@@ -1554,6 +1572,25 @@ EXPORT_SYMBOL_GPL(kick_process);
 
 /*
  * ->cpus_allowed is protected by both rq->lock and p->pi_lock
+ *
+ * A few notes on cpu_active vs cpu_online:
+ *
+ *  - cpu_active must be a subset of cpu_online
+ *
+ *  - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
+ *    see __set_cpus_allowed_ptr(). At this point the newly online
+ *    cpu isn't yet part of the sched domains, and balancing will not
+ *    see it.
+ *
+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
+ *    avoid the load balancer to place new tasks on the to be removed
+ *    cpu. Existing tasks will remain running there and will be taken
+ *    off.
+ *
+ * This means that fallback selection must not select !active CPUs.
+ * And can assume that any active CPU must be online. Conversely
+ * select_task_rq() below may allow selection of !active CPUs in order
+ * to satisfy the above rules.
  */
 static int select_fallback_rq(int cpu, struct task_struct *p)
 {
@@ -1572,8 +1609,6 @@ static int select_fallback_rq(int cpu, s
 
 		/* Look for allowed, online CPU in same node. */
 		for_each_cpu(dest_cpu, nodemask) {
-			if (!cpu_online(dest_cpu))
-				continue;
 			if (!cpu_active(dest_cpu))
 				continue;
 			if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
@@ -1584,8 +1619,6 @@ static int select_fallback_rq(int cpu, s
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
-			if (!cpu_online(dest_cpu))
-				continue;
 			if (!cpu_active(dest_cpu))
 				continue;
 			goto out;
@@ -1637,6 +1670,8 @@ int select_task_rq(struct task_struct *p
 
 	if (p->nr_cpus_allowed > 1)
 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
+	else
+		cpu = cpumask_any(tsk_cpus_allowed(p));
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched: Allow per-cpu kernel threads to run on online && !active
  2016-03-01 15:23 [PATCH] sched: Allow per-cpu kernel threads to run on online && !active Peter Zijlstra
@ 2016-03-03 11:43 ` Thomas Gleixner
  2016-03-03 11:55   ` Peter Zijlstra
  2016-05-05 11:21 ` [tip:smp/hotplug] " tip-bot for Peter Zijlstra (Intel)
  2016-05-06 13:03 ` tip-bot for Peter Zijlstra (Intel)
  2 siblings, 1 reply; 7+ messages in thread
From: Thomas Gleixner @ 2016-03-03 11:43 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, linux-kernel, laijs, jschoenh, oleg

On Tue, 1 Mar 2016, Peter Zijlstra wrote:
> Tested-by: Thomas Gleixner <tglx@linutronix.de>

Works nicely especially when we move the set_cpu_active() calls as the last
state in the state machine. See patch below.

Thanks,

	tglx

8<--------------------------

Subject: cpu/hotplug: Handle cpu active as last state
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 03 Mar 2016 12:33:37 +0100

We want to make sure that everything is initialized before we allow scheduling
of arbitrary work on a upcoming CPU and we don't want to have no more random
work on it when we shut it down.

Now that the scheduler handles this nicely via the cpu_active_mask we can put
set_cpu_active() as last action when a cpu is brought up and as first action
when it goes down.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/cpuhotplug.h |    1 +
 kernel/cpu.c               |   19 +++++++++++++++++--
 kernel/sched/core.c        |   18 ------------------
 3 files changed, 18 insertions(+), 20 deletions(-)

--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -16,6 +16,7 @@ enum cpuhp_state {
 	CPUHP_AP_NOTIFY_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
 	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
+	CPUHP_AP_ACTIVE,
 	CPUHP_ONLINE,
 };
 
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -904,8 +904,6 @@ void cpuhp_online_idle(enum cpuhp_state
 
 	st->state = CPUHP_AP_ONLINE_IDLE;
 
-	/* The cpu is marked online, set it active now */
-	set_cpu_active(cpu, true);
 	/* Unpark the stopper thread and the hotplug thread of this cpu */
 	stop_machine_unpark(cpu);
 	kthread_unpark(st->thread);
@@ -917,6 +915,18 @@ void cpuhp_online_idle(enum cpuhp_state
 		complete(&st->done);
 }
 
+static int cpu_activate(unsigned int cpu)
+{
+	set_cpu_active(cpu, true);
+	return 0;
+}
+
+static int cpu_deactivate(unsigned int cpu)
+{
+	set_cpu_active(cpu, false);
+	return 0;
+}
+
 /* Requires cpu_add_remove_lock to be held */
 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 {
@@ -1213,6 +1223,11 @@ static struct cpuhp_step cpuhp_ap_states
 		.startup		= notify_online,
 		.teardown		= notify_down_prepare,
 	},
+	[CPUHP_AP_ACTIVE] = {
+		.name			= "active",
+		.startup		= cpu_activate,
+		.teardown		= cpu_deactivate,
+	},
 #endif
 	[CPUHP_ONLINE] = {
 		.name			= "online",
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5726,23 +5726,6 @@ static int sched_cpu_active(struct notif
 	case CPU_STARTING:
 		set_cpu_rq_start_time();
 		return NOTIFY_OK;
-
-	case CPU_DOWN_FAILED:
-		set_cpu_active(cpu, true);
-		return NOTIFY_OK;
-
-	default:
-		return NOTIFY_DONE;
-	}
-}
-
-static int sched_cpu_inactive(struct notifier_block *nfb,
-					unsigned long action, void *hcpu)
-{
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DOWN_PREPARE:
-		set_cpu_active((long)hcpu, false);
-		return NOTIFY_OK;
 	default:
 		return NOTIFY_DONE;
 	}
@@ -5761,7 +5744,6 @@ static int __init migration_init(void)
 
 	/* Register cpu active notifiers */
 	cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
-	cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
 
 	return 0;
 }

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched: Allow per-cpu kernel threads to run on online && !active
  2016-03-03 11:43 ` Thomas Gleixner
@ 2016-03-03 11:55   ` Peter Zijlstra
  2016-03-03 11:58     ` Thomas Gleixner
  0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2016-03-03 11:55 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Ingo Molnar, linux-kernel, jschoenh, oleg

On Thu, Mar 03, 2016 at 12:43:53PM +0100, Thomas Gleixner wrote:

> Subject: cpu/hotplug: Handle cpu active as last state
> From: Thomas Gleixner <tglx@linutronix.de>
> Date: Thu, 03 Mar 2016 12:33:37 +0100
> 
> We want to make sure that everything is initialized before we allow scheduling
> of arbitrary work on a upcoming CPU and we don't want to have no more random
> work on it when we shut it down.
> 
> Now that the scheduler handles this nicely via the cpu_active_mask we can put
> set_cpu_active() as last action when a cpu is brought up and as first action
> when it goes down.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Peter Zijlstra <peterz@infradead.org>

Nice!

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

> ---
>  include/linux/cpuhotplug.h |    1 +
>  kernel/cpu.c               |   19 +++++++++++++++++--
>  kernel/sched/core.c        |   18 ------------------
>  3 files changed, 18 insertions(+), 20 deletions(-)
> 
> +++ b/kernel/sched/core.c
> @@ -5726,23 +5726,6 @@ static int sched_cpu_active(struct notif
>  	case CPU_STARTING:
>  		set_cpu_rq_start_time();

If we stick that in notify_starting() we can kill all of this.

>  		return NOTIFY_OK;
> -
> -	case CPU_DOWN_FAILED:
> -		set_cpu_active(cpu, true);
> -		return NOTIFY_OK;
> -
> -	default:
> -		return NOTIFY_DONE;
> -	}
> -}
> -
> -static int sched_cpu_inactive(struct notifier_block *nfb,
> -					unsigned long action, void *hcpu)
> -{
> -	switch (action & ~CPU_TASKS_FROZEN) {
> -	case CPU_DOWN_PREPARE:
> -		set_cpu_active((long)hcpu, false);
> -		return NOTIFY_OK;
>  	default:
>  		return NOTIFY_DONE;
>  	}
> @@ -5761,7 +5744,6 @@ static int __init migration_init(void)
>  
>  	/* Register cpu active notifiers */
>  	cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
> -	cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
>  
>  	return 0;
>  }

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched: Allow per-cpu kernel threads to run on online && !active
  2016-03-03 11:55   ` Peter Zijlstra
@ 2016-03-03 11:58     ` Thomas Gleixner
  2016-03-03 17:45       ` Thomas Gleixner
  0 siblings, 1 reply; 7+ messages in thread
From: Thomas Gleixner @ 2016-03-03 11:58 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, linux-kernel, jschoenh, oleg

On Thu, 3 Mar 2016, Peter Zijlstra wrote:
> > +++ b/kernel/sched/core.c
> > @@ -5726,23 +5726,6 @@ static int sched_cpu_active(struct notif
> >  	case CPU_STARTING:
> >  		set_cpu_rq_start_time();
> 
> If we stick that in notify_starting() we can kill all of this.

Yep. I'm going to kill that one by one :)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] sched: Allow per-cpu kernel threads to run on online && !active
  2016-03-03 11:58     ` Thomas Gleixner
@ 2016-03-03 17:45       ` Thomas Gleixner
  0 siblings, 0 replies; 7+ messages in thread
From: Thomas Gleixner @ 2016-03-03 17:45 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Ingo Molnar, linux-kernel, jschoenh, oleg

On Thu, 3 Mar 2016, Thomas Gleixner wrote:

> On Thu, 3 Mar 2016, Peter Zijlstra wrote:
> > > +++ b/kernel/sched/core.c
> > > @@ -5726,23 +5726,6 @@ static int sched_cpu_active(struct notif
> > >  	case CPU_STARTING:
> > >  		set_cpu_rq_start_time();
> > 
> > If we stick that in notify_starting() we can kill all of this.
> 
> Yep. I'm going to kill that one by one :)

After I'm done fixing the wreckage which is caused by moving set_active()
alone. The numa and cpuset muck rely on it ....

But now that we can delay random workload on the upcoming/outgoing cpu we can
move that stuff alltogether.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tip:smp/hotplug] sched: Allow per-cpu kernel threads to run on online && !active
  2016-03-01 15:23 [PATCH] sched: Allow per-cpu kernel threads to run on online && !active Peter Zijlstra
  2016-03-03 11:43 ` Thomas Gleixner
@ 2016-05-05 11:21 ` tip-bot for Peter Zijlstra (Intel)
  2016-05-06 13:03 ` tip-bot for Peter Zijlstra (Intel)
  2 siblings, 0 replies; 7+ messages in thread
From: tip-bot for Peter Zijlstra (Intel) @ 2016-05-05 11:21 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: oleg, jschoenh, peterz, linux-kernel, laijs, mingo, tglx, hpa

Commit-ID:  618d6e31623149c6203b46850e2e76ee0f29e577
Gitweb:     http://git.kernel.org/tip/618d6e31623149c6203b46850e2e76ee0f29e577
Author:     Peter Zijlstra (Intel) <peterz@infradead.org>
AuthorDate: Thu, 10 Mar 2016 12:54:08 +0100
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Thu, 5 May 2016 13:17:52 +0200

sched: Allow per-cpu kernel threads to run on online && !active

In order to enable symmetric hotplug, we must mirror the online &&
!active state of cpu-down on the cpu-up side.

However, to retain sanity, limit this state to per-cpu kthreads.

Aside from the change to set_cpus_allowed_ptr(), which allow moving
the per-cpu kthreads on, the other critical piece is the cpu selection
for pinned tasks in select_task_rq(). This avoids dropping into
select_fallback_rq().

select_fallback_rq() cannot be allowed to select !active cpus because
its used to migrate user tasks away. And we do not want to move user
tasks onto cpus that are in transition.

Requested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Jan H. Schönherr <jschoenh@amazon.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20160301152303.GV6356@twins.programming.kicks-ass.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

---
 arch/powerpc/kernel/smp.c |  2 +-
 arch/s390/kernel/smp.c    |  2 +-
 include/linux/cpumask.h   |  6 ++----
 kernel/sched/core.c       | 49 ++++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8cac1eb..55c924b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 		smp_ops->give_timebase();
 
 	/* Wait until cpu puts itself in the online & active maps */
-	while (!cpu_online(cpu) || !cpu_active(cpu))
+	while (!cpu_online(cpu))
 		cpu_relax();
 
 	return 0;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 40a6b4f..7b89a75 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	pcpu_attach_task(pcpu, tidle);
 	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
 	/* Wait until cpu puts itself in the online & active maps */
-	while (!cpu_online(cpu) || !cpu_active(cpu))
+	while (!cpu_online(cpu))
 		cpu_relax();
 	return 0;
 }
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 40cee6b..e828cf6 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present)
 static inline void
 set_cpu_online(unsigned int cpu, bool online)
 {
-	if (online) {
+	if (online)
 		cpumask_set_cpu(cpu, &__cpu_online_mask);
-		cpumask_set_cpu(cpu, &__cpu_active_mask);
-	} else {
+	else
 		cpumask_clear_cpu(cpu, &__cpu_online_mask);
-	}
 }
 
 static inline void
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8b489fc..8bfd7d4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1082,13 +1082,21 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 static int __set_cpus_allowed_ptr(struct task_struct *p,
 				  const struct cpumask *new_mask, bool check)
 {
+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
+	unsigned int dest_cpu;
 	unsigned long flags;
 	struct rq *rq;
-	unsigned int dest_cpu;
 	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
 
+	if (p->flags & PF_KTHREAD) {
+		/*
+		 * Kernel threads are allowed on online && !active CPUs
+		 */
+		cpu_valid_mask = cpu_online_mask;
+	}
+
 	/*
 	 * Must re-check here, to close a race against __kthread_bind(),
 	 * sched_setaffinity() is not guaranteed to observe the flag.
@@ -1101,18 +1109,28 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
 	if (cpumask_equal(&p->cpus_allowed, new_mask))
 		goto out;
 
-	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+	if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	do_set_cpus_allowed(p, new_mask);
 
+	if (p->flags & PF_KTHREAD) {
+		/*
+		 * For kernel threads that do indeed end up on online &&
+		 * !active we want to ensure they are strict per-cpu threads.
+		 */
+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
+			!cpumask_intersects(new_mask, cpu_active_mask) &&
+			p->nr_cpus_allowed != 1);
+	}
+
 	/* Can the task run on the task's current CPU? If so, we're done */
 	if (cpumask_test_cpu(task_cpu(p), new_mask))
 		goto out;
 
-	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
 	if (task_running(rq, p) || p->state == TASK_WAKING) {
 		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
@@ -1431,6 +1449,25 @@ EXPORT_SYMBOL_GPL(kick_process);
 
 /*
  * ->cpus_allowed is protected by both rq->lock and p->pi_lock
+ *
+ * A few notes on cpu_active vs cpu_online:
+ *
+ *  - cpu_active must be a subset of cpu_online
+ *
+ *  - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
+ *    see __set_cpus_allowed_ptr(). At this point the newly online
+ *    cpu isn't yet part of the sched domains, and balancing will not
+ *    see it.
+ *
+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
+ *    avoid the load balancer to place new tasks on the to be removed
+ *    cpu. Existing tasks will remain running there and will be taken
+ *    off.
+ *
+ * This means that fallback selection must not select !active CPUs.
+ * And can assume that any active CPU must be online. Conversely
+ * select_task_rq() below may allow selection of !active CPUs in order
+ * to satisfy the above rules.
  */
 static int select_fallback_rq(int cpu, struct task_struct *p)
 {
@@ -1449,8 +1486,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 
 		/* Look for allowed, online CPU in same node. */
 		for_each_cpu(dest_cpu, nodemask) {
-			if (!cpu_online(dest_cpu))
-				continue;
 			if (!cpu_active(dest_cpu))
 				continue;
 			if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
@@ -1461,8 +1496,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
-			if (!cpu_online(dest_cpu))
-				continue;
 			if (!cpu_active(dest_cpu))
 				continue;
 			goto out;
@@ -1514,6 +1547,8 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 
 	if (p->nr_cpus_allowed > 1)
 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
+	else
+		cpu = cpumask_any(tsk_cpus_allowed(p));
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [tip:smp/hotplug] sched: Allow per-cpu kernel threads to run on online && !active
  2016-03-01 15:23 [PATCH] sched: Allow per-cpu kernel threads to run on online && !active Peter Zijlstra
  2016-03-03 11:43 ` Thomas Gleixner
  2016-05-05 11:21 ` [tip:smp/hotplug] " tip-bot for Peter Zijlstra (Intel)
@ 2016-05-06 13:03 ` tip-bot for Peter Zijlstra (Intel)
  2 siblings, 0 replies; 7+ messages in thread
From: tip-bot for Peter Zijlstra (Intel) @ 2016-05-06 13:03 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, laijs, hpa, oleg, linux-kernel, tglx, peterz, jschoenh

Commit-ID:  e9d867a67fd03ccc07248ca4e9c2f74fed494d5b
Gitweb:     http://git.kernel.org/tip/e9d867a67fd03ccc07248ca4e9c2f74fed494d5b
Author:     Peter Zijlstra (Intel) <peterz@infradead.org>
AuthorDate: Thu, 10 Mar 2016 12:54:08 +0100
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Fri, 6 May 2016 14:58:22 +0200

sched: Allow per-cpu kernel threads to run on online && !active

In order to enable symmetric hotplug, we must mirror the online &&
!active state of cpu-down on the cpu-up side.

However, to retain sanity, limit this state to per-cpu kthreads.

Aside from the change to set_cpus_allowed_ptr(), which allow moving
the per-cpu kthreads on, the other critical piece is the cpu selection
for pinned tasks in select_task_rq(). This avoids dropping into
select_fallback_rq().

select_fallback_rq() cannot be allowed to select !active cpus because
its used to migrate user tasks away. And we do not want to move user
tasks onto cpus that are in transition.

Requested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Jan H. Schönherr <jschoenh@amazon.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20160301152303.GV6356@twins.programming.kicks-ass.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/powerpc/kernel/smp.c |  2 +-
 arch/s390/kernel/smp.c    |  2 +-
 include/linux/cpumask.h   |  6 ++----
 kernel/sched/core.c       | 49 ++++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8cac1eb..55c924b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -565,7 +565,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 		smp_ops->give_timebase();
 
 	/* Wait until cpu puts itself in the online & active maps */
-	while (!cpu_online(cpu) || !cpu_active(cpu))
+	while (!cpu_online(cpu))
 		cpu_relax();
 
 	return 0;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 40a6b4f..7b89a75 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -832,7 +832,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	pcpu_attach_task(pcpu, tidle);
 	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
 	/* Wait until cpu puts itself in the online & active maps */
-	while (!cpu_online(cpu) || !cpu_active(cpu))
+	while (!cpu_online(cpu))
 		cpu_relax();
 	return 0;
 }
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 40cee6b..e828cf6 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -743,12 +743,10 @@ set_cpu_present(unsigned int cpu, bool present)
 static inline void
 set_cpu_online(unsigned int cpu, bool online)
 {
-	if (online) {
+	if (online)
 		cpumask_set_cpu(cpu, &__cpu_online_mask);
-		cpumask_set_cpu(cpu, &__cpu_active_mask);
-	} else {
+	else
 		cpumask_clear_cpu(cpu, &__cpu_online_mask);
-	}
 }
 
 static inline void
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8b489fc..8bfd7d4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1082,13 +1082,21 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 static int __set_cpus_allowed_ptr(struct task_struct *p,
 				  const struct cpumask *new_mask, bool check)
 {
+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
+	unsigned int dest_cpu;
 	unsigned long flags;
 	struct rq *rq;
-	unsigned int dest_cpu;
 	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
 
+	if (p->flags & PF_KTHREAD) {
+		/*
+		 * Kernel threads are allowed on online && !active CPUs
+		 */
+		cpu_valid_mask = cpu_online_mask;
+	}
+
 	/*
 	 * Must re-check here, to close a race against __kthread_bind(),
 	 * sched_setaffinity() is not guaranteed to observe the flag.
@@ -1101,18 +1109,28 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
 	if (cpumask_equal(&p->cpus_allowed, new_mask))
 		goto out;
 
-	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+	if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	do_set_cpus_allowed(p, new_mask);
 
+	if (p->flags & PF_KTHREAD) {
+		/*
+		 * For kernel threads that do indeed end up on online &&
+		 * !active we want to ensure they are strict per-cpu threads.
+		 */
+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
+			!cpumask_intersects(new_mask, cpu_active_mask) &&
+			p->nr_cpus_allowed != 1);
+	}
+
 	/* Can the task run on the task's current CPU? If so, we're done */
 	if (cpumask_test_cpu(task_cpu(p), new_mask))
 		goto out;
 
-	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
 	if (task_running(rq, p) || p->state == TASK_WAKING) {
 		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
@@ -1431,6 +1449,25 @@ EXPORT_SYMBOL_GPL(kick_process);
 
 /*
  * ->cpus_allowed is protected by both rq->lock and p->pi_lock
+ *
+ * A few notes on cpu_active vs cpu_online:
+ *
+ *  - cpu_active must be a subset of cpu_online
+ *
+ *  - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
+ *    see __set_cpus_allowed_ptr(). At this point the newly online
+ *    cpu isn't yet part of the sched domains, and balancing will not
+ *    see it.
+ *
+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
+ *    avoid the load balancer to place new tasks on the to be removed
+ *    cpu. Existing tasks will remain running there and will be taken
+ *    off.
+ *
+ * This means that fallback selection must not select !active CPUs.
+ * And can assume that any active CPU must be online. Conversely
+ * select_task_rq() below may allow selection of !active CPUs in order
+ * to satisfy the above rules.
  */
 static int select_fallback_rq(int cpu, struct task_struct *p)
 {
@@ -1449,8 +1486,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 
 		/* Look for allowed, online CPU in same node. */
 		for_each_cpu(dest_cpu, nodemask) {
-			if (!cpu_online(dest_cpu))
-				continue;
 			if (!cpu_active(dest_cpu))
 				continue;
 			if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
@@ -1461,8 +1496,6 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
-			if (!cpu_online(dest_cpu))
-				continue;
 			if (!cpu_active(dest_cpu))
 				continue;
 			goto out;
@@ -1514,6 +1547,8 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 
 	if (p->nr_cpus_allowed > 1)
 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
+	else
+		cpu = cpumask_any(tsk_cpus_allowed(p));
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-05-06 13:04 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-01 15:23 [PATCH] sched: Allow per-cpu kernel threads to run on online && !active Peter Zijlstra
2016-03-03 11:43 ` Thomas Gleixner
2016-03-03 11:55   ` Peter Zijlstra
2016-03-03 11:58     ` Thomas Gleixner
2016-03-03 17:45       ` Thomas Gleixner
2016-05-05 11:21 ` [tip:smp/hotplug] " tip-bot for Peter Zijlstra (Intel)
2016-05-06 13:03 ` tip-bot for Peter Zijlstra (Intel)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).