All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs
@ 2017-06-17 12:10 Tejun Heo
  2017-06-17 12:11 ` simple repro case Tejun Heo
  2017-07-25 16:58 ` [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs Peter Zijlstra
  0 siblings, 2 replies; 8+ messages in thread
From: Tejun Heo @ 2017-06-17 12:10 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: Paul E. McKenney, Steven Rostedt, linux-kernel, Lai Jiangshan,
	kernel-team

Per-cpu workqueues have been tripping CPU affinity sanity checks while
a CPU is being offlined.  A per-cpu kworker ends up running on a CPU
which isn't its target CPU while the CPU is online but inactive.

While the scheduler allows kthreads to wake up on an online but
inactive CPU, it doesn't allow a running kthread to be migrated to
such a CPU, which leads to an odd situation where setting affinity on
a sleeping and running kthread leads to different results.

Each mem-reclaim workqueue has one rescuer which guarantees forward
progress and the rescuer needs to bind itself to the CPU which needs
help in making forward progress; however, due to the above issue,
while set_cpus_allowed_ptr() succeeds, the rescuer doesn't end up on
the correct CPU if the CPU is in the process of going offline,
tripping the sanity check and executing the work item on the wrong
CPU.

This patch updates __migrate_task() so that kthreads can be migrated
into an inactive but online CPU.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Reported-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/sched/core.c |    9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 803c3bc274c4..1500217ce4b4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -980,8 +980,13 @@ struct migration_arg {
 static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
 				 struct task_struct *p, int dest_cpu)
 {
-	if (unlikely(!cpu_active(dest_cpu)))
-		return rq;
+	if (p->flags & PF_KTHREAD) {
+		if (unlikely(!cpu_online(dest_cpu)))
+			return rq;
+	} else {
+		if (unlikely(!cpu_active(dest_cpu)))
+			return rq;
+	}
 
 	/* Affinity changed (again). */
 	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: simple repro case
  2017-06-17 12:10 [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs Tejun Heo
@ 2017-06-17 12:11 ` Tejun Heo
  2017-06-21 14:24   ` Steven Rostedt
  2017-07-25 16:58 ` [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs Peter Zijlstra
  1 sibling, 1 reply; 8+ messages in thread
From: Tejun Heo @ 2017-06-17 12:11 UTC (permalink / raw)
  To: Ingo Molnar, Peter Zijlstra
  Cc: Paul E. McKenney, Steven Rostedt, linux-kernel, Lai Jiangshan,
	kernel-team

Here's a simple rerpo.  The test code runs whenever a CPU goes
off/online.  The test kthread is created on a different CPU and
migrated to the target CPU while running.  Without the previous patch
applied, the kthread ends up running on the wrong CPU.

Thanks.

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c74bf39ef764..faed30edbb21 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4648,12 +4648,56 @@ int workqueue_prepare_cpu(unsigned int cpu)
 	return 0;
 }
 
+#include <linux/delay.h>
+
+static int test_last_cpu = -1;
+
+static int test_kthread_migration_threadfn(void *data)
+{
+	while (!kthread_should_stop()) {
+		test_last_cpu = raw_smp_processor_id();
+		cond_resched();
+	}
+	return 0;
+}
+
+static void test_kthread_migration(int inactive_cpu)
+{
+	int start_cpu = cpumask_any(cpu_online_mask);
+	struct task_struct *task;
+
+	printk("TEST: cpu %d inactive, starting on %d and migrating (active/online=%*pbl/%*pbl)\n",
+	       inactive_cpu, start_cpu, cpumask_pr_args(cpu_active_mask),
+	       cpumask_pr_args(cpu_online_mask));
+
+	task = kthread_create(test_kthread_migration_threadfn, NULL, "test");
+	if (IS_ERR(task)) {
+		printk("TEST: kthread_create failed with %ld\n", PTR_ERR(task));
+		return;
+	}
+
+	kthread_bind(task, start_cpu);
+	wake_up_process(task);
+	msleep(100);
+	printk("TEST: test_last_cpu=%d cpus_allowed=%*pbl\n",
+	       test_last_cpu, cpumask_pr_args(&task->cpus_allowed));
+	printk("TEST: migrating to inactve cpu %d\n", inactive_cpu);
+	set_cpus_allowed_ptr(task, cpumask_of(inactive_cpu));
+	msleep(100);
+	printk("TEST: test_last_cpu=%d cpus_allowed=%*pbl\n",
+	       test_last_cpu, cpumask_pr_args(&task->cpus_allowed));
+	kthread_stop(task);
+	return;
+}
+
 int workqueue_online_cpu(unsigned int cpu)
 {
 	struct worker_pool *pool;
 	struct workqueue_struct *wq;
 	int pi;
 
+	test_kthread_migration(cpu);
+
 	mutex_lock(&wq_pool_mutex);
 
 	for_each_pool(pool, pi) {
@@ -4680,6 +4724,8 @@ int workqueue_offline_cpu(unsigned int cpu)
 	struct work_struct unbind_work;
 	struct workqueue_struct *wq;
 
+	test_kthread_migration(cpu);
+
 	/* unbinding per-cpu workers should happen on the local CPU */
 	INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
 	queue_work_on(cpu, system_highpri_wq, &unbind_work);

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: simple repro case
  2017-06-17 12:11 ` simple repro case Tejun Heo
@ 2017-06-21 14:24   ` Steven Rostedt
  2017-06-21 17:59     ` Tejun Heo
  0 siblings, 1 reply; 8+ messages in thread
From: Steven Rostedt @ 2017-06-21 14:24 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Ingo Molnar, Peter Zijlstra, Paul E. McKenney, linux-kernel,
	Lai Jiangshan, kernel-team

On Sat, 17 Jun 2017 08:11:49 -0400
Tejun Heo <tj@kernel.org> wrote:

> Here's a simple rerpo.  The test code runs whenever a CPU goes
> off/online.  The test kthread is created on a different CPU and
> migrated to the target CPU while running.  Without the previous patch
> applied, the kthread ends up running on the wrong CPU.
> 

Hmm, I'm not able to trigger the warn_on, with this patch applied.

Adding a trace_printk("here!\n") just above the warn_on in
wq_worker_sleeping(), and doing the following:

 # cd /sys/kernel/debug/tracing
 # echo 1 > events/printk/console/enable
 # echo 1 > events/sched/sched_migrate_task/enable
 # echo 0 > /sys/devices/system/cpu/cpu2/online
 # echo 1 > /sys/devices/system/cpu/cpu2/online
 # cat trace
#
#                              _-----=> irqs-off
#                             / _----=> need-resched
#                            | / _---=> hardirq/softirq
#                            || / _--=> preempt-depth
#                            ||| /     delay
#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
#              | |       |   ||||       |         |
          <idle>-0     [000] d.s4   744.572889: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=1 dest_cpu=0
     kworker/0:2-406   [000] d..2   744.750944: wq_worker_sleeping: here
    kworker/u8:3-111   [001] d..3   746.029980: sched_migrate_task: comm=bash pid=2390 prio=120 orig_cpu=1 dest_cpu=2
     kworker/0:2-406   [000] d..2   746.798796: wq_worker_sleeping: here
          <idle>-0     [001] d.s4   747.164692: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=0 dest_cpu=1
          <idle>-0     [000] d.s4   747.632662: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=1 dest_cpu=0
     kworker/0:2-406   [000] d..2   748.846646: wq_worker_sleeping: here
     kworker/0:2-406   [000] d..2   750.894525: wq_worker_sleeping: here
          <idle>-0     [002] d.s4   751.188404: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=0 dest_cpu=2
     kworker/2:3-118   [002] d..2   751.194212: wq_worker_sleeping: here
         cpuhp/2-20    [002] d..1   751.204894: console: [  751.018261] TEST: cpu 2 inactive, starting on 0 and migrating (active/online=0-1,3/0-3)
            test-2633  [000] d.s3   751.216796: sched_migrate_task: comm=systemd-journal pid=1839 prio=120 orig_cpu=0 dest_cpu=1
 systemd-journal-1839  [001] d..4   751.216851: sched_migrate_task: comm=jbd2/dm-0-8 pid=1754 prio=120 orig_cpu=0 dest_cpu=3
         cpuhp/2-20    [002] d..1   751.318375: console: [  751.131745] TEST: test_last_cpu=0 cpus_allowed=0
         cpuhp/2-20    [002] d..1   751.324249: console: [  751.137621] TEST: migrating to inactve cpu 2
         cpuhp/2-20    [002] d..1   751.438368: console: [  751.251738] TEST: test_last_cpu=0 cpus_allowed=2
            bash-2390  [000] d..1   751.445862: console: [  751.259232] smpboot: CPU 2 is now offline
          <idle>-0     [000] d.h5   751.452370: sched_migrate_task: comm=systemd-journal pid=1839 prio=120 orig_cpu=1 dest_cpu=0
          <idle>-0     [000] dNs3   751.452392: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=2 dest_cpu=0
            bash-2390  [000] d.s2   751.464359: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=0 dest_cpu=1
            bash-2390  [000] d..2   751.466028: sched_migrate_task: comm=kworker/2:3 pid=118 prio=120 orig_cpu=2 dest_cpu=0
            bash-2390  [000] d..4   751.466236: sched_migrate_task: comm=libvirtd pid=2197 prio=120 orig_cpu=2 dest_cpu=0
            bash-2390  [000] d..3   751.466479: sched_migrate_task: comm=kworker/u8:5 pid=113 prio=120 orig_cpu=3 dest_cpu=1
    kworker/u8:5-113   [001] d..3   751.466519: sched_migrate_task: comm=sshd pid=2389 prio=120 orig_cpu=0 dest_cpu=1
    kworker/u8:5-113   [001] d..3   751.466593: sched_migrate_task: comm=sshd pid=2389 prio=120 orig_cpu=1 dest_cpu=0
   systemd-udevd-2634  [001] d.s3   751.468358: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=1 dest_cpu=3
          <idle>-0     [000] d.s4   751.472372: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=3 dest_cpu=0
 systemd-journal-1839  [000] d..3   751.530744: sched_migrate_task: comm=in:imjournal pid=2152 prio=120 orig_cpu=2 dest_cpu=0
    in:imjournal-2152  [000] d..2   751.530859: sched_migrate_task: comm=rs:main Q:Reg pid=2360 prio=120 orig_cpu=1 dest_cpu=3
    in:imjournal-2152  [000] d.s3   751.531370: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=0 dest_cpu=1
        sendmail-2334  [001] d.s3   752.169327: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=1 dest_cpu=3
     kworker/0:2-406   [000] d..2   752.942348: wq_worker_sleeping: here
          <idle>-0     [000] d.s3   753.582220: sched_migrate_task: comm=khugepaged pid=48 prio=139 orig_cpu=2 dest_cpu=0
          <idle>-0     [000] d.s4   753.772209: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=3 dest_cpu=0
    kworker/u8:5-113   [001] d..3   754.947065: sched_migrate_task: comm=sshd pid=2389 prio=120 orig_cpu=0 dest_cpu=3
            bash-2390  [000] d.s3   754.956107: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=0 dest_cpu=1
            bash-2390  [000] d..1   754.959938: console: [  754.773306] smpboot: Booting Node 0 Processor 2 APIC 0x4
     migration/1-15    [001] d..4   754.968577: sched_migrate_task: comm=bash pid=2390 prio=120 orig_cpu=0 dest_cpu=1
         cpuhp/2-20    [002] d..1   754.970747: console: [  754.784112] TEST: cpu 2 inactive, starting on 0 and migrating (active/online=0-1,3/0-3)
         cpuhp/2-20    [002] d..2   754.981195: sched_migrate_task: comm=test pid=2635 prio=120 orig_cpu=3 dest_cpu=0
     kworker/0:2-406   [000] d..2   754.990133: wq_worker_sleeping: here
         cpuhp/2-20    [002] d..1   755.086101: console: [  754.899471] TEST: test_last_cpu=0 cpus_allowed=0
         cpuhp/2-20    [002] d..1   755.091957: console: [  754.905328] TEST: migrating to inactve cpu 2
         cpuhp/2-20    [002] d..3   755.097453: sched_migrate_task: comm=test pid=2635 prio=120 orig_cpu=0 dest_cpu=2
         cpuhp/2-20    [002] d..1   755.206090: console: [  755.019460] TEST: test_last_cpu=2 cpus_allowed=2
         cpuhp/2-20    [002] dN.3   755.213683: sched_migrate_task: comm=kworker/2:3 pid=118 prio=120 orig_cpu=0 dest_cpu=2
         cpuhp/2-20    [002] dN.3   755.216126: sched_migrate_task: comm=bash pid=2390 prio=120 orig_cpu=1 dest_cpu=0
     kworker/2:3-118   [002] d..2   755.216143: wq_worker_sleeping: here
            bash-2390  [000] d..3   755.216386: sched_migrate_task: comm=kworker/u8:5 pid=113 prio=120 orig_cpu=1 dest_cpu=2
    kworker/u8:5-113   [002] d..3   755.216425: sched_migrate_task: comm=sshd pid=2389 prio=120 orig_cpu=3 dest_cpu=2
   systemd-udevd-2636  [001] d.s4   755.217088: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=1 dest_cpu=2
   systemd-udevd-2636  [001] d..4   755.217613: sched_migrate_task: comm=libvirtd pid=2197 prio=120 orig_cpu=0 dest_cpu=2
   systemd-udevd-2637  [000] d..5   755.218850: sched_migrate_task: comm=systemd-udevd pid=1877 prio=120 orig_cpu=3 dest_cpu=0
   systemd-udevd-1877  [000] d..7   755.220192: sched_migrate_task: comm=systemd-udevd pid=2637 prio=120 orig_cpu=0 dest_cpu=1
   systemd-udevd-1877  [000] d..7   755.220212: sched_migrate_task: comm=systemd-udevd pid=2636 prio=120 orig_cpu=1 dest_cpu=2
          <idle>-0     [000] d.s4   755.223118: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=2 dest_cpu=0
          <idle>-0     [002] d.s4   755.225101: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=0 dest_cpu=2
 systemd-journal-1839  [000] d..3   755.280473: sched_migrate_task: comm=in:imjournal pid=2152 prio=120 orig_cpu=0 dest_cpu=2
          <idle>-0     [000] d.s8   755.722735: sched_migrate_task: comm=sshd pid=2389 prio=120 orig_cpu=2 dest_cpu=0
          <idle>-0     [000] d.s4   756.334014: sched_migrate_task: comm=kworker/u8:5 pid=113 prio=120 orig_cpu=2 dest_cpu=0
            sshd-2389  [000] d..3   756.350661: sched_migrate_task: comm=kworker/u8:5 pid=113 prio=120 orig_cpu=0 dest_cpu=1
          <idle>-0     [000] d.s4   756.831984: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=2 dest_cpu=0
          <idle>-0     [001] d.s3   756.845975: sched_migrate_task: comm=jbd2/dm-0-8 pid=1754 prio=120 orig_cpu=3 dest_cpu=1
            bash-2390  [000] d.s3   756.846974: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=0 dest_cpu=1
          <idle>-0     [000] d.s5   756.875008: sched_migrate_task: comm=jbd2/dm-0-8 pid=1754 prio=120 orig_cpu=1 dest_cpu=0
     kworker/0:2-406   [000] d..2   757.038044: wq_worker_sleeping: here
    kworker/u8:5-113   [001] d..3   758.142638: sched_migrate_task: comm=sshd pid=2389 prio=120 orig_cpu=0 dest_cpu=2
            bash-2645  [001] dNs4   758.143899: sched_migrate_task: comm=rcu_sched pid=8 prio=120 orig_cpu=1 dest_cpu=2

Only the kworker/0:2-406 and kworker/2:3-113 ran the warnon path, and
was fine.

-- Steve

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: simple repro case
  2017-06-21 14:24   ` Steven Rostedt
@ 2017-06-21 17:59     ` Tejun Heo
  0 siblings, 0 replies; 8+ messages in thread
From: Tejun Heo @ 2017-06-21 17:59 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Ingo Molnar, Peter Zijlstra, Paul E. McKenney, linux-kernel,
	Lai Jiangshan, kernel-team

Hello, Steven.

On Wed, Jun 21, 2017 at 10:24:57AM -0400, Steven Rostedt wrote:
> On Sat, 17 Jun 2017 08:11:49 -0400
> Tejun Heo <tj@kernel.org> wrote:
> 
> > Here's a simple rerpo.  The test code runs whenever a CPU goes
> > off/online.  The test kthread is created on a different CPU and
> > migrated to the target CPU while running.  Without the previous patch
> > applied, the kthread ends up running on the wrong CPU.
> > 
> 
> Hmm, I'm not able to trigger the warn_on, with this patch applied.
> 
> Adding a trace_printk("here!\n") just above the warn_on in
> wq_worker_sleeping(), and doing the following:
> 
>          cpuhp/2-20    [002] d..1   751.204894: console: [  751.018261] TEST: cpu 2 inactive, starting on 0 and migrating (active/online=0-1,3/0-3)
>          cpuhp/2-20    [002] d..1   751.318375: console: [  751.131745] TEST: test_last_cpu=0 cpus_allowed=0
>          cpuhp/2-20    [002] d..1   751.324249: console: [  751.137621] TEST: migrating to inactve cpu 2
>          cpuhp/2-20    [002] d..1   751.438368: console: [  751.251738] TEST: test_last_cpu=0 cpus_allowed=2

Ah, sorry about not being clear.  The repro is that test_last_cpu
isn't 2 on the last line.  It created a kthread on CPU 0 and tried to
migrate that to an online but inactive CPU 2 but the kthread couldn't
get on that CPU because the migration code disallowed the kthread from
moving to an inactive CPU.

The same problem affects workqueue rescuer.  It tries to migrate to an
inactive CPU to service the workqueue there but silently fails to and
then ends up running the work item on the wrong CPU.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs
  2017-06-17 12:10 [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs Tejun Heo
  2017-06-17 12:11 ` simple repro case Tejun Heo
@ 2017-07-25 16:58 ` Peter Zijlstra
  2017-07-25 17:52   ` Paul E. McKenney
                     ` (2 more replies)
  1 sibling, 3 replies; 8+ messages in thread
From: Peter Zijlstra @ 2017-07-25 16:58 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Ingo Molnar, Paul E. McKenney, Steven Rostedt, linux-kernel,
	Lai Jiangshan, kernel-team

Hi,

On Sat, Jun 17, 2017 at 08:10:08AM -0400, Tejun Heo wrote:
> Per-cpu workqueues have been tripping CPU affinity sanity checks while
> a CPU is being offlined.  A per-cpu kworker ends up running on a CPU
> which isn't its target CPU while the CPU is online but inactive.
> 
> While the scheduler allows kthreads to wake up on an online but
> inactive CPU, it doesn't allow a running kthread to be migrated to
> such a CPU, which leads to an odd situation where setting affinity on
> a sleeping and running kthread leads to different results.
> 
> Each mem-reclaim workqueue has one rescuer which guarantees forward
> progress and the rescuer needs to bind itself to the CPU which needs
> help in making forward progress; however, due to the above issue,
> while set_cpus_allowed_ptr() succeeds, the rescuer doesn't end up on
> the correct CPU if the CPU is in the process of going offline,
> tripping the sanity check and executing the work item on the wrong
> CPU.
> 
> This patch updates __migrate_task() so that kthreads can be migrated
> into an inactive but online CPU.
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Reported-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> Reported-by: Steven Rostedt <rostedt@goodmis.org>

Hmm.. so the rules for running on !active && online are slightly
stricter than just being a kthread, how about the below, does that work
too?

 kernel/sched/core.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d3d39a283beb..59b667c16826 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -894,6 +894,22 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 }
 
 #ifdef CONFIG_SMP
+
+/*
+ * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
+ * __set_cpus_allowed_ptr() and select_fallback_rq().
+ */
+static inline bool is_per_cpu_kthread(struct task_struct *p)
+{
+	if (!(p->flags & PF_KTHREAD))
+		return false;
+
+	if (p->nr_cpus_allowed != 1)
+		return false;
+
+	return true;
+}
+
 /*
  * This is how migration works:
  *
@@ -951,8 +967,13 @@ struct migration_arg {
 static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
 				 struct task_struct *p, int dest_cpu)
 {
-	if (unlikely(!cpu_active(dest_cpu)))
-		return rq;
+	if (is_per_cpu_kthread(p)) {
+		if (unlikely(!cpu_online(dest_cpu)))
+			return rq;
+	} else {
+		if (unlikely(!cpu_active(dest_cpu)))
+			return rq;
+	}
 
 	/* Affinity changed (again). */
 	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
@@ -1482,10 +1503,13 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, &p->cpus_allowed) {
-			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
-				continue;
-			if (!cpu_online(dest_cpu))
-				continue;
+			if (is_per_cpu_kthread(p)) {
+				if (!cpu_online(dest_cpu))
+					continue;
+			} else {
+				if (!cpu_active(dest_cpu))
+					continue;
+			}
 			goto out;
 		}
 

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs
  2017-07-25 16:58 ` [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs Peter Zijlstra
@ 2017-07-25 17:52   ` Paul E. McKenney
  2017-07-26 12:57   ` Paul E. McKenney
  2018-05-31 12:28   ` [tip:sched/urgent] sched/core: Fix rules for running on online && !active CPUs tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 8+ messages in thread
From: Paul E. McKenney @ 2017-07-25 17:52 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Tejun Heo, Ingo Molnar, Steven Rostedt, linux-kernel,
	Lai Jiangshan, kernel-team

On Tue, Jul 25, 2017 at 06:58:21PM +0200, Peter Zijlstra wrote:
> Hi,
> 
> On Sat, Jun 17, 2017 at 08:10:08AM -0400, Tejun Heo wrote:
> > Per-cpu workqueues have been tripping CPU affinity sanity checks while
> > a CPU is being offlined.  A per-cpu kworker ends up running on a CPU
> > which isn't its target CPU while the CPU is online but inactive.
> > 
> > While the scheduler allows kthreads to wake up on an online but
> > inactive CPU, it doesn't allow a running kthread to be migrated to
> > such a CPU, which leads to an odd situation where setting affinity on
> > a sleeping and running kthread leads to different results.
> > 
> > Each mem-reclaim workqueue has one rescuer which guarantees forward
> > progress and the rescuer needs to bind itself to the CPU which needs
> > help in making forward progress; however, due to the above issue,
> > while set_cpus_allowed_ptr() succeeds, the rescuer doesn't end up on
> > the correct CPU if the CPU is in the process of going offline,
> > tripping the sanity check and executing the work item on the wrong
> > CPU.
> > 
> > This patch updates __migrate_task() so that kthreads can be migrated
> > into an inactive but online CPU.
> > 
> > Signed-off-by: Tejun Heo <tj@kernel.org>
> > Reported-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> > Reported-by: Steven Rostedt <rostedt@goodmis.org>
> 
> Hmm.. so the rules for running on !active && online are slightly
> stricter than just being a kthread, how about the below, does that work
> too?

I will give this a shot over night, Pacific Time, but the bug occurs
with such low probability that a pass won't mean much.  :-(

							Thanx, Paul

>  kernel/sched/core.c | 36 ++++++++++++++++++++++++++++++------
>  1 file changed, 30 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index d3d39a283beb..59b667c16826 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -894,6 +894,22 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
>  }
> 
>  #ifdef CONFIG_SMP
> +
> +/*
> + * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
> + * __set_cpus_allowed_ptr() and select_fallback_rq().
> + */
> +static inline bool is_per_cpu_kthread(struct task_struct *p)
> +{
> +	if (!(p->flags & PF_KTHREAD))
> +		return false;
> +
> +	if (p->nr_cpus_allowed != 1)
> +		return false;
> +
> +	return true;
> +}
> +
>  /*
>   * This is how migration works:
>   *
> @@ -951,8 +967,13 @@ struct migration_arg {
>  static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
>  				 struct task_struct *p, int dest_cpu)
>  {
> -	if (unlikely(!cpu_active(dest_cpu)))
> -		return rq;
> +	if (is_per_cpu_kthread(p)) {
> +		if (unlikely(!cpu_online(dest_cpu)))
> +			return rq;
> +	} else {
> +		if (unlikely(!cpu_active(dest_cpu)))
> +			return rq;
> +	}
> 
>  	/* Affinity changed (again). */
>  	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
> @@ -1482,10 +1503,13 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
>  	for (;;) {
>  		/* Any allowed, online CPU? */
>  		for_each_cpu(dest_cpu, &p->cpus_allowed) {
> -			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
> -				continue;
> -			if (!cpu_online(dest_cpu))
> -				continue;
> +			if (is_per_cpu_kthread(p)) {
> +				if (!cpu_online(dest_cpu))
> +					continue;
> +			} else {
> +				if (!cpu_active(dest_cpu))
> +					continue;
> +			}
>  			goto out;
>  		}
> 
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs
  2017-07-25 16:58 ` [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs Peter Zijlstra
  2017-07-25 17:52   ` Paul E. McKenney
@ 2017-07-26 12:57   ` Paul E. McKenney
  2018-05-31 12:28   ` [tip:sched/urgent] sched/core: Fix rules for running on online && !active CPUs tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 8+ messages in thread
From: Paul E. McKenney @ 2017-07-26 12:57 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Tejun Heo, Ingo Molnar, Steven Rostedt, linux-kernel,
	Lai Jiangshan, kernel-team

On Tue, Jul 25, 2017 at 06:58:21PM +0200, Peter Zijlstra wrote:
> Hi,
> 
> On Sat, Jun 17, 2017 at 08:10:08AM -0400, Tejun Heo wrote:
> > Per-cpu workqueues have been tripping CPU affinity sanity checks while
> > a CPU is being offlined.  A per-cpu kworker ends up running on a CPU
> > which isn't its target CPU while the CPU is online but inactive.
> > 
> > While the scheduler allows kthreads to wake up on an online but
> > inactive CPU, it doesn't allow a running kthread to be migrated to
> > such a CPU, which leads to an odd situation where setting affinity on
> > a sleeping and running kthread leads to different results.
> > 
> > Each mem-reclaim workqueue has one rescuer which guarantees forward
> > progress and the rescuer needs to bind itself to the CPU which needs
> > help in making forward progress; however, due to the above issue,
> > while set_cpus_allowed_ptr() succeeds, the rescuer doesn't end up on
> > the correct CPU if the CPU is in the process of going offline,
> > tripping the sanity check and executing the work item on the wrong
> > CPU.
> > 
> > This patch updates __migrate_task() so that kthreads can be migrated
> > into an inactive but online CPU.
> > 
> > Signed-off-by: Tejun Heo <tj@kernel.org>
> > Reported-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
> > Reported-by: Steven Rostedt <rostedt@goodmis.org>
> 
> Hmm.. so the rules for running on !active && online are slightly
> stricter than just being a kthread, how about the below, does that work
> too?

Of 24 one-hour runs of the TREE07 rcutorture scenario, two had stalled
tasks with this patch.  One of them had more than 200 instances, the other
two instances.  In contrast, a 30-hour run a week ago with Tejun's patch
completed cleanly.  Here "stalled task" means that one of rcutorture's
update-side kthreads fails to make any progress for more than 15 seconds.
Grace periods are progressing, but a kthread waiting for a grace period
isn't making progress, and is stuck with its ->state field at 0x402,
that is TASK_NOLOAD|TASK_UNINTERRUPTIBLE.  Which is as if it never got
the wakeup, given that it is sleeping on schedule_timeout_idle().

Now, two of 24 might just be bad luck, but I haven't seen anything like
this out of TREE07 since I queued Tejun's patch, so I am inclined to
view your patch below with considerable suspicion.

I -am- seeing this out of TREE01, even with Tejun's patch, but that
scenario sets maxcpu=8 and nr_cpus=43, which seems to be tickling an issue
that several other people are seeing.  Others' testing seems to indicate
that setting CONFIG_SOFTLOCKUP_DETECTOR=y suppresses this issue, but I
need to do an overnight run to check my test cases, and that is tonight.

So there might be something else going on as well.

							Thanx, Paul

>  kernel/sched/core.c | 36 ++++++++++++++++++++++++++++++------
>  1 file changed, 30 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index d3d39a283beb..59b667c16826 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -894,6 +894,22 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
>  }
> 
>  #ifdef CONFIG_SMP
> +
> +/*
> + * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
> + * __set_cpus_allowed_ptr() and select_fallback_rq().
> + */
> +static inline bool is_per_cpu_kthread(struct task_struct *p)
> +{
> +	if (!(p->flags & PF_KTHREAD))
> +		return false;
> +
> +	if (p->nr_cpus_allowed != 1)
> +		return false;
> +
> +	return true;
> +}
> +
>  /*
>   * This is how migration works:
>   *
> @@ -951,8 +967,13 @@ struct migration_arg {
>  static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
>  				 struct task_struct *p, int dest_cpu)
>  {
> -	if (unlikely(!cpu_active(dest_cpu)))
> -		return rq;
> +	if (is_per_cpu_kthread(p)) {
> +		if (unlikely(!cpu_online(dest_cpu)))
> +			return rq;
> +	} else {
> +		if (unlikely(!cpu_active(dest_cpu)))
> +			return rq;
> +	}
> 
>  	/* Affinity changed (again). */
>  	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
> @@ -1482,10 +1503,13 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
>  	for (;;) {
>  		/* Any allowed, online CPU? */
>  		for_each_cpu(dest_cpu, &p->cpus_allowed) {
> -			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
> -				continue;
> -			if (!cpu_online(dest_cpu))
> -				continue;
> +			if (is_per_cpu_kthread(p)) {
> +				if (!cpu_online(dest_cpu))
> +					continue;
> +			} else {
> +				if (!cpu_active(dest_cpu))
> +					continue;
> +			}
>  			goto out;
>  		}
> 
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [tip:sched/urgent] sched/core: Fix rules for running on online && !active CPUs
  2017-07-25 16:58 ` [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs Peter Zijlstra
  2017-07-25 17:52   ` Paul E. McKenney
  2017-07-26 12:57   ` Paul E. McKenney
@ 2018-05-31 12:28   ` tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 8+ messages in thread
From: tip-bot for Peter Zijlstra @ 2018-05-31 12:28 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: tglx, paulmck, tj, linux-kernel, mingo, torvalds, peterz, rostedt, hpa

Commit-ID:  175f0e25abeaa2218d431141ce19cf1de70fa82d
Gitweb:     https://git.kernel.org/tip/175f0e25abeaa2218d431141ce19cf1de70fa82d
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Tue, 25 Jul 2017 18:58:21 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 31 May 2018 12:24:24 +0200

sched/core: Fix rules for running on online && !active CPUs

As already enforced by the WARN() in __set_cpus_allowed_ptr(), the rules
for running on an online && !active CPU are stricter than just being a
kthread, you need to be a per-cpu kthread.

If you're not strictly per-CPU, you have better CPUs to run on and
don't need the partially booted one to get your work done.

The exception is to allow smpboot threads to bootstrap the CPU itself
and get kernel 'services' initialized before we allow userspace on it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 955dbdf4ce87 ("sched: Allow migrating kthreads into online but inactive CPUs")
Link: http://lkml.kernel.org/r/20170725165821.cejhb7v2s3kecems@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 092f7c4de903..1c58f54b9114 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -881,6 +881,33 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 }
 
 #ifdef CONFIG_SMP
+
+static inline bool is_per_cpu_kthread(struct task_struct *p)
+{
+	if (!(p->flags & PF_KTHREAD))
+		return false;
+
+	if (p->nr_cpus_allowed != 1)
+		return false;
+
+	return true;
+}
+
+/*
+ * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
+ * __set_cpus_allowed_ptr() and select_fallback_rq().
+ */
+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
+{
+	if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+		return false;
+
+	if (is_per_cpu_kthread(p))
+		return cpu_online(cpu);
+
+	return cpu_active(cpu);
+}
+
 /*
  * This is how migration works:
  *
@@ -938,16 +965,8 @@ struct migration_arg {
 static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
 				 struct task_struct *p, int dest_cpu)
 {
-	if (p->flags & PF_KTHREAD) {
-		if (unlikely(!cpu_online(dest_cpu)))
-			return rq;
-	} else {
-		if (unlikely(!cpu_active(dest_cpu)))
-			return rq;
-	}
-
 	/* Affinity changed (again). */
-	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+	if (!is_cpu_allowed(p, dest_cpu))
 		return rq;
 
 	update_rq_clock(rq);
@@ -1476,10 +1495,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, &p->cpus_allowed) {
-			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
-				continue;
-			if (!cpu_online(dest_cpu))
+			if (!is_cpu_allowed(p, dest_cpu))
 				continue;
+
 			goto out;
 		}
 

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-05-31 12:28 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-17 12:10 [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs Tejun Heo
2017-06-17 12:11 ` simple repro case Tejun Heo
2017-06-21 14:24   ` Steven Rostedt
2017-06-21 17:59     ` Tejun Heo
2017-07-25 16:58 ` [PATCH RFC] sched: Allow migrating kthreads into online but inactive CPUs Peter Zijlstra
2017-07-25 17:52   ` Paul E. McKenney
2017-07-26 12:57   ` Paul E. McKenney
2018-05-31 12:28   ` [tip:sched/urgent] sched/core: Fix rules for running on online && !active CPUs tip-bot for Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.