All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online
@ 2020-10-28  7:30 qiang.zhang
  2020-10-28  8:30 ` Thomas Gleixner
  0 siblings, 1 reply; 8+ messages in thread
From: qiang.zhang @ 2020-10-28  7:30 UTC (permalink / raw)
  To: pmladek, tj; +Cc: akpm, tglx, linux-mm, linux-kernel

From: Zqiang <qiang.zhang@windriver.com>

When someone CPU offlined, the 'kthread_worker' which bind this CPU,
will run anywhere, if this CPU online, recovery of 'kthread_worker'
affinity by cpuhp notifiers.

Signed-off-by: Zqiang <qiang.zhang@windriver.com>
---
 v1->v2:
 rename variable kworker_online to kthread_worker_online.
 add 'cpuhp_node' and 'bind_cpu' init in KTHREAD_WORKER_INIT.
 add a comment explaining for WARN_ON_ONCE.

 include/linux/kthread.h |  4 ++++
 kernel/kthread.c        | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 65b81e0c494d..c28963e87b18 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -93,6 +93,8 @@ struct kthread_worker {
 	struct list_head	delayed_work_list;
 	struct task_struct	*task;
 	struct kthread_work	*current_work;
+	struct hlist_node       cpuhp_node;
+	int                     bind_cpu;
 };
 
 struct kthread_work {
@@ -112,6 +114,8 @@ struct kthread_delayed_work {
 	.lock = __RAW_SPIN_LOCK_UNLOCKED((worker).lock),		\
 	.work_list = LIST_HEAD_INIT((worker).work_list),		\
 	.delayed_work_list = LIST_HEAD_INIT((worker).delayed_work_list),\
+	.cpuhp_node = {.next = NULL, .pprev = NULL},			\
+	.bind_cpu = -1,							\
 	}
 
 #define KTHREAD_WORK_INIT(work, fn)	{				\
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 34516b0a6eb7..6c66df585225 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -28,8 +28,10 @@
 #include <linux/uaccess.h>
 #include <linux/numa.h>
 #include <linux/sched/isolation.h>
+#include <linux/cpu.h>
 #include <trace/events/sched.h>
 
+static enum cpuhp_state kthread_worker_online;
 
 static DEFINE_SPINLOCK(kthread_create_lock);
 static LIST_HEAD(kthread_create_list);
@@ -649,6 +651,8 @@ void __kthread_init_worker(struct kthread_worker *worker,
 	lockdep_set_class_and_name(&worker->lock, key, name);
 	INIT_LIST_HEAD(&worker->work_list);
 	INIT_LIST_HEAD(&worker->delayed_work_list);
+	worker->bind_cpu = -1;
+	INIT_HLIST_NODE(&worker->cpuhp_node);
 }
 EXPORT_SYMBOL_GPL(__kthread_init_worker);
 
@@ -744,8 +748,11 @@ __kthread_create_worker(int cpu, unsigned int flags,
 	if (IS_ERR(task))
 		goto fail_task;
 
-	if (cpu >= 0)
+	if (cpu >= 0) {
+		cpuhp_state_add_instance_nocalls(kthread_worker_online, &worker->cpuhp_node);
 		kthread_bind(task, cpu);
+		worker->bind_cpu = cpu;
+	}
 
 	worker->flags = flags;
 	worker->task = task;
@@ -1230,6 +1237,9 @@ void kthread_destroy_worker(struct kthread_worker *worker)
 	if (WARN_ON(!task))
 		return;
 
+	if (worker->bind_cpu >= 0)
+		cpuhp_state_remove_instance_nocalls(kthread_worker_online, &worker->cpuhp_node);
+
 	kthread_flush_worker(worker);
 	kthread_stop(task);
 	WARN_ON(!list_empty(&worker->work_list));
@@ -1237,6 +1247,30 @@ void kthread_destroy_worker(struct kthread_worker *worker)
 }
 EXPORT_SYMBOL(kthread_destroy_worker);
 
+static int kthread_worker_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+	struct kthread_worker *worker = hlist_entry(node, struct kthread_worker, cpuhp_node);
+	struct task_struct *task = worker->task;
+
+	/* as we're called from CPU_ONLINE, the following shouldn't fail */
+	if (cpu == worker->bind_cpu)
+		WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpumask_of(cpu)) < 0);
+	return 0;
+}
+
+static __init int kthread_worker_hotplug_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "kthread-worker/online",
+					kthread_worker_cpu_online, NULL);
+	if (ret < 0)
+		return ret;
+	kthread_worker_online = ret;
+	return 0;
+}
+core_initcall(kthread_worker_hotplug_init);
+
 /**
  * kthread_use_mm - make the calling kthread operate on an address space
  * @mm: address space to operate on
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online
  2020-10-28  7:30 [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online qiang.zhang
@ 2020-10-28  8:30 ` Thomas Gleixner
  2020-10-28  8:45   ` 回复: " Zhang, Qiang
  0 siblings, 1 reply; 8+ messages in thread
From: Thomas Gleixner @ 2020-10-28  8:30 UTC (permalink / raw)
  To: qiang.zhang, pmladek, tj; +Cc: akpm, linux-mm, linux-kernel

On Wed, Oct 28 2020 at 15:30, qiang zhang wrote:
> From: Zqiang <qiang.zhang@windriver.com>
>
> When someone CPU offlined, the 'kthread_worker' which bind this CPU,
> will run anywhere, if this CPU online, recovery of 'kthread_worker'
> affinity by cpuhp notifiers.
>
> Signed-off-by: Zqiang <qiang.zhang@windriver.com>
> ---
>  v1->v2:
>  rename variable kworker_online to kthread_worker_online.
>  add 'cpuhp_node' and 'bind_cpu' init in KTHREAD_WORKER_INIT.
>  add a comment explaining for WARN_ON_ONCE.

How is that addressing any of the comments I made on V1 of this?

Thanks,

        tglx



^ permalink raw reply	[flat|nested] 8+ messages in thread

* 回复: [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online
  2020-10-28  8:30 ` Thomas Gleixner
@ 2020-10-28  8:45   ` Zhang, Qiang
  2020-10-28  9:23     ` Thomas Gleixner
  0 siblings, 1 reply; 8+ messages in thread
From: Zhang, Qiang @ 2020-10-28  8:45 UTC (permalink / raw)
  To: Thomas Gleixner, pmladek, tj; +Cc: akpm, linux-mm, linux-kernel



________________________________________
发件人: Thomas Gleixner <tglx@linutronix.de>
发送时间: 2020年10月28日 16:30
收件人: Zhang, Qiang; pmladek@suse.com; tj@kernel.org
抄送: akpm@linux-foundation.org; linux-mm@kvack.org; linux-kernel@vger.kernel.org
主题: Re: [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online

[Please note this e-mail is from an EXTERNAL e-mail address]

On Wed, Oct 28 2020 at 15:30, qiang zhang wrote:
> From: Zqiang <qiang.zhang@windriver.com>
>
> When someone CPU offlined, the 'kthread_worker' which bind this CPU,
> will run anywhere, if this CPU online, recovery of 'kthread_worker'
> affinity by cpuhp notifiers.
>
> Signed-off-by: Zqiang <qiang.zhang@windriver.com>
> ---
>  v1->v2:
>  rename variable kworker_online to kthread_worker_online.
>  add 'cpuhp_node' and 'bind_cpu' init in KTHREAD_WORKER_INIT.
>  add a comment explaining for WARN_ON_ONCE.

>How is that addressing any of the comments I made on V1 of this?

Do you mean the following problem:
 
"The dynamic hotplug states run late. What's preventing work to be queued
on such a worker before it is bound to the CPU again?"

Thanks
Qiang
>
>Thanks,
>
>       tglx



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: 回复: [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online
  2020-10-28  8:45   ` 回复: " Zhang, Qiang
@ 2020-10-28  9:23     ` Thomas Gleixner
  2020-10-29  3:14       ` 回复: " Zhang, Qiang
  0 siblings, 1 reply; 8+ messages in thread
From: Thomas Gleixner @ 2020-10-28  9:23 UTC (permalink / raw)
  To: Zhang, Qiang, pmladek, tj; +Cc: akpm, linux-mm, linux-kernel

Quiang,

On Wed, Oct 28 2020 at 08:45, Qiang Zhang wrote:
> ________________________________________
> 发件人: Thomas Gleixner <tglx@linutronix.de>
> 发送时间: 2020年10月28日 16:30
> 收件人: Zhang, Qiang; pmladek@suse.com; tj@kernel.org
> 抄送: akpm@linux-foundation.org; linux-mm@kvack.org; linux-kernel@vger.kernel.org
> 主题: Re: [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online

Can you please teach your mail client not to copy the mail headers into
the reply?

> [Please note this e-mail is from an EXTERNAL e-mail address]

And delete this non-information please.

> On Wed, Oct 28 2020 at 15:30, qiang zhang wrote:
>
>>How is that addressing any of the comments I made on V1 of this?
>
> Do you mean the following problem:
>  
> "The dynamic hotplug states run late. What's preventing work to be queued
> on such a worker before it is bound to the CPU again?"

This is one problem, but there are more and I explained them in great
length. If there is anything unclear, then please ask.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* 回复: 回复: [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online
  2020-10-28  9:23     ` Thomas Gleixner
@ 2020-10-29  3:14       ` Zhang, Qiang
  2020-10-29  8:27         ` Thomas Gleixner
  0 siblings, 1 reply; 8+ messages in thread
From: Zhang, Qiang @ 2020-10-29  3:14 UTC (permalink / raw)
  To: Thomas Gleixner, pmladek, tj; +Cc: akpm, linux-mm, linux-kernel



________________________________________
发件人: Thomas Gleixner <tglx@linutronix.de>
发送时间: 2020年10月28日 17:23
收件人: Zhang, Qiang; pmladek@suse.com; tj@kernel.org
抄送: akpm@linux-foundation.org; linux-mm@kvack.org; linux-kernel@vger.kernel.org
主题: Re: 回复: [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online

> [Please note this e-mail is from an EXTERNAL e-mail address]
>
> On Wed, Oct 28 2020 at 15:30, qiang zhang wrote:
>
>>How is that addressing any of the comments I made on V1 of this?
>
> Do you mean the following problem:
>
> "The dynamic hotplug states run late. What's preventing work to be queued
> on such a worker before it is bound to the CPU again?"
>
>This is one problem, but there are more and I explained them in great
>length. If there is anything unclear, then please ask.

Really, this patch is not considered that work may be put into the queue after the bound CPU is offline.   in addition, when the bound CPU goes online again, before restoring the worker's CPU affinity, work may be put into the queue.

Although  int this (powerclamp) way,that's not a problem, that it is solved by destroying and creating  tasks when the CPU hotplug,  in addition,  when CPU going down , this need call 'cancel_work_sync' func in offline callback,  this may be blocked long time. these operation is expensive.

this patch only just to recover  the worker task's affinity when CPU go to online again that create by "kthread_create_worker_on_cpu" func ,  likely per-CPU worker method when CPU hotplug in "workqueue" and "io-wq".

Thanks

Qiang

>
>Thanks,
>
>        tglx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: 回复: 回复: [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online
  2020-10-29  3:14       ` 回复: " Zhang, Qiang
@ 2020-10-29  8:27         ` Thomas Gleixner
  2020-10-29 13:08           ` Petr Mladek
  0 siblings, 1 reply; 8+ messages in thread
From: Thomas Gleixner @ 2020-10-29  8:27 UTC (permalink / raw)
  To: Zhang, Qiang, pmladek, tj; +Cc: akpm, linux-mm, linux-kernel

On Thu, Oct 29 2020 at 03:14, Qiang Zhang wrote:
> Really, this patch is not considered that work may be put into the
> queue after the bound CPU is offline.  in addition, when the bound CPU
> goes online again, before restoring the worker's CPU affinity, work
> may be put into the queue.

And how is that supposed to be correct?

> Although int this (powerclamp) way,that's not a problem, that it is
> solved by destroying and creating tasks when the CPU hotplug, in
> addition, when CPU going down , this need call 'cancel_work_sync' func
> in offline callback, this may be blocked long time. these operation is
> expensive.

It does not matter whether it's expensive or not. It's correct and
that's what matters most.

> this patch only just to recover the worker task's affinity when CPU go
> to online again that create by "kthread_create_worker_on_cpu" func ,
> likely per-CPU worker method when CPU hotplug in "workqueue" and
> "io-wq".

I know what this patch just does, but that makes it not any more
correct. It creates a semanticaly ill defined illusion of correctness.

We are not "fixing" a problem by making it work for your particular and
even not explained use case.

The expected semantics of a cpu bound kthread_worker are completely
unclear and undocumented. This needs to be fixed first and once this is
established and agreed on then the gaps in the implementation can be
closed.

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online
  2020-10-29  8:27         ` Thomas Gleixner
@ 2020-10-29 13:08           ` Petr Mladek
  2020-10-29 15:01             ` Thomas Gleixner
  0 siblings, 1 reply; 8+ messages in thread
From: Petr Mladek @ 2020-10-29 13:08 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Zhang, Qiang, tj, akpm, linux-mm, linux-kernel

On Thu 2020-10-29 09:27:26, Thomas Gleixner wrote:
> The expected semantics of a cpu bound kthread_worker are completely
> unclear and undocumented. This needs to be fixed first and once this is
> established and agreed on then the gaps in the implementation can be
> closed.

I thought about some sane semantic and it goes down to
the following problem:

The per-CPU kthread workers are created by explicitly calling
kthread_create_worker_on_cpu() on each CPU.

The API does _not_ store the information how to start the worker.
As a result, it is not able to start a new one when the CPU
goes online "for the first time". I mean when the CPU was offline
when the API user created the workers.

It means that the API user is responsible for handling CPU hotplug
on its own. We probably should just document it and do nothing else [*]


Alternative solution would be to extend the API and allow to create
kthread_worker on each online CPU. It would require to store
parameters needed to create the kthread only new online CPUs.
Then we might think about some sane semantic for CPU hotplug.

Well, it might be hard to define a sane semantic unless there are
more users of the API. So, I tend to keep it simple and just
document the status quo.

Any ideas?


[*] IMHO, it does not even make sense to manipulate the affinity.
    It would just give a false feeling that it is enough.

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online
  2020-10-29 13:08           ` Petr Mladek
@ 2020-10-29 15:01             ` Thomas Gleixner
  0 siblings, 0 replies; 8+ messages in thread
From: Thomas Gleixner @ 2020-10-29 15:01 UTC (permalink / raw)
  To: Petr Mladek; +Cc: Zhang, Qiang, tj, akpm, linux-mm, linux-kernel

On Thu, Oct 29 2020 at 14:08, Petr Mladek wrote:
> On Thu 2020-10-29 09:27:26, Thomas Gleixner wrote:
>> The expected semantics of a cpu bound kthread_worker are completely
>> unclear and undocumented. This needs to be fixed first and once this is
>> established and agreed on then the gaps in the implementation can be
>> closed.
>
> I thought about some sane semantic and it goes down to
> the following problem:
>
> The per-CPU kthread workers are created by explicitly calling
> kthread_create_worker_on_cpu() on each CPU.
>
> The API does _not_ store the information how to start the worker.
> As a result, it is not able to start a new one when the CPU
> goes online "for the first time". I mean when the CPU was offline
> when the API user created the workers.
>
> It means that the API user is responsible for handling CPU hotplug
> on its own. We probably should just document it and do nothing else [*]

> [*] IMHO, it does not even make sense to manipulate the affinity.
>     It would just give a false feeling that it is enough.

Agreed on both.

> Alternative solution would be to extend the API and allow to create
> kthread_worker on each online CPU. It would require to store
> parameters needed to create the kthread only new online CPUs.
> Then we might think about some sane semantic for CPU hotplug.

That facility already exists: smpboot_register_percpu_thread()

So "all" you'd need to do is to provide a kthread_worker variant which
utilizes that. It's straight forward, but not sure whether it's worth
the trouble.

> Well, it might be hard to define a sane semantic unless there are
> more users of the API. So, I tend to keep it simple and just
> document the status quo.

Ack.

Thanks,

        tglx



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-10-29 15:02 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-28  7:30 [PATCH v2] kthread_worker: re-set CPU affinities if CPU come online qiang.zhang
2020-10-28  8:30 ` Thomas Gleixner
2020-10-28  8:45   ` 回复: " Zhang, Qiang
2020-10-28  9:23     ` Thomas Gleixner
2020-10-29  3:14       ` 回复: " Zhang, Qiang
2020-10-29  8:27         ` Thomas Gleixner
2020-10-29 13:08           ` Petr Mladek
2020-10-29 15:01             ` Thomas Gleixner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.