linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
@ 2016-10-25 11:05 Roman Pen
  2016-10-25 14:03 ` Oleg Nesterov
  0 siblings, 1 reply; 31+ messages in thread
From: Roman Pen @ 2016-10-25 11:05 UTC (permalink / raw)
  Cc: Roman Pen, Andy Lutomirski, Oleg Nesterov, Peter Zijlstra,
	Thomas Gleixner, Ingo Molnar, Tejun Heo, linux-kernel

This patch avoids allocation of kthread structure on a stack, and simply
uses kmalloc.  Allocation on a stack became a huge problem (with memory
corruption and all other not nice consequences) after the following commit
2deb4be28077 ("x86/dumpstack: When OOPSing, rewind the stack before do_exit()")
by Andy Lutomirski, which rewinds the stack on oops, thus ooopsed kthread
steps on a garbage memory while completion of task->vfork_done structure
on the following path:

       oops_end()
       rewind_stack_do_exit()
       exit_mm()
       mm_release()
       complete_vfork_done()

Also in this patch two structures 'struct kthread_create_info' and
'struct kthread' are merged into one 'struct kthread' and its freeing
is controlled by a reference counter.

The last reference on kthread is put from a task work, the callback,
which is invoked from do_exit().  The major thing is that the last
put is happens *after* completion_vfork_done() is invoked.

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-kernel@vger.kernel.org
---
v3:
  o handle to_live_kthread() calls, which should increase a kthread
    ref or return NULL.  Function was renamed to to_live_kthread_and_get().
  o minor comments tweaks.

v2:
  o let x86/kernel/dumpstack.c rewind a stack, but do not use a stack
    for a structure allocation.

 kernel/kthread.c | 198 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 117 insertions(+), 81 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ab4c3766a80..e8adc10556e0 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -18,14 +18,19 @@
 #include <linux/freezer.h>
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
+#include <linux/task_work.h>
 #include <trace/events/sched.h>
 
 static DEFINE_SPINLOCK(kthread_create_lock);
 static LIST_HEAD(kthread_create_list);
 struct task_struct *kthreadd_task;
 
-struct kthread_create_info
-{
+struct kthread {
+	struct list_head list;
+	unsigned long flags;
+	unsigned int cpu;
+	atomic_t refs;
+
 	/* Information passed to kthread() from kthreadd. */
 	int (*threadfn)(void *data);
 	void *data;
@@ -33,15 +38,9 @@ struct kthread_create_info
 
 	/* Result passed back to kthread_create() from kthreadd. */
 	struct task_struct *result;
-	struct completion *done;
 
-	struct list_head list;
-};
-
-struct kthread {
-	unsigned long flags;
-	unsigned int cpu;
-	void *data;
+	struct callback_head put_work;
+	struct completion *started;
 	struct completion parked;
 	struct completion exited;
 };
@@ -56,17 +55,49 @@ enum KTHREAD_BITS {
 #define __to_kthread(vfork)	\
 	container_of(vfork, struct kthread, exited)
 
+static inline void get_kthread(struct kthread *kthread)
+{
+	BUG_ON(atomic_read(&kthread->refs) <= 0);
+	atomic_inc(&kthread->refs);
+}
+
+static inline void put_kthread(struct kthread *kthread)
+{
+	BUG_ON(atomic_read(&kthread->refs) <= 0);
+	if (atomic_dec_and_test(&kthread->refs))
+		kfree(kthread);
+}
+
+/**
+ * put_kthread_cb - is called from do_exit() and does likely
+ *                  the final put.
+ */
+static void put_kthread_cb(struct callback_head *work)
+{
+	struct kthread *kthread;
+
+	kthread = container_of(work, struct kthread, put_work);
+	put_kthread(kthread);
+}
+
 static inline struct kthread *to_kthread(struct task_struct *k)
 {
 	return __to_kthread(k->vfork_done);
 }
 
-static struct kthread *to_live_kthread(struct task_struct *k)
+static struct kthread *to_live_kthread_and_get(struct task_struct *k)
 {
-	struct completion *vfork = ACCESS_ONCE(k->vfork_done);
-	if (likely(vfork) && try_get_task_stack(k))
-		return __to_kthread(vfork);
-	return NULL;
+	struct kthread *kthread = NULL;
+
+	BUG_ON(!(k->flags & PF_KTHREAD));
+	task_lock(k);
+	if (likely(k->vfork_done)) {
+		kthread = __to_kthread(k->vfork_done);
+		get_kthread(kthread);
+	}
+	task_unlock(k);
+
+	return kthread;
 }
 
 /**
@@ -174,41 +205,37 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
 
-static int kthread(void *_create)
+static int kthreadfn(void *_self)
 {
-	/* Copy data: it's on kthread's stack */
-	struct kthread_create_info *create = _create;
-	int (*threadfn)(void *data) = create->threadfn;
-	void *data = create->data;
-	struct completion *done;
-	struct kthread self;
-	int ret;
-
-	self.flags = 0;
-	self.data = data;
-	init_completion(&self.exited);
-	init_completion(&self.parked);
-	current->vfork_done = &self.exited;
-
-	/* If user was SIGKILLed, I release the structure. */
-	done = xchg(&create->done, NULL);
-	if (!done) {
-		kfree(create);
-		do_exit(-EINTR);
+	struct completion *started;
+	struct kthread *self = _self;
+	int ret = -EINTR;
+
+	/* If user was SIGKILLed, put a ref and exit silently. */
+	started = xchg(&self->started, NULL);
+	if (!started) {
+		put_kthread(self);
+		goto exit;
 	}
+	/*
+	 * Delegate last ref put to a task work, which will happen
+	 * after 'vfork_done' completion.
+	 */
+	init_task_work(&self->put_work, put_kthread_cb);
+	task_work_add(current, &self->put_work, false);
+	current->vfork_done = &self->exited;
+
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
-	create->result = current;
-	complete(done);
+	self->result = current;
+	complete(started);
 	schedule();
 
-	ret = -EINTR;
-
-	if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
-		__kthread_parkme(&self);
-		ret = threadfn(data);
+	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+		__kthread_parkme(self);
+		ret = self->threadfn(self->data);
 	}
-	/* we can't just return, we must preserve "self" on stack */
+exit:
 	do_exit(ret);
 }
 
@@ -222,25 +249,25 @@ int tsk_fork_get_node(struct task_struct *tsk)
 	return NUMA_NO_NODE;
 }
 
-static void create_kthread(struct kthread_create_info *create)
+static void create_kthread(struct kthread *kthread)
 {
+	struct completion *started;
 	int pid;
 
 #ifdef CONFIG_NUMA
-	current->pref_node_fork = create->node;
+	current->pref_node_fork = kthread->node;
 #endif
 	/* We want our own signal handler (we take no signals by default). */
-	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
+	pid = kernel_thread(kthreadfn, kthread,
+			    CLONE_FS | CLONE_FILES | SIGCHLD);
 	if (pid < 0) {
-		/* If user was SIGKILLed, I release the structure. */
-		struct completion *done = xchg(&create->done, NULL);
-
-		if (!done) {
-			kfree(create);
-			return;
+		started = xchg(&kthread->started, NULL);
+		if (started) {
+			/* The user was not SIGKILLed and wants the result. */
+			kthread->result = ERR_PTR(pid);
+			complete(started);
 		}
-		create->result = ERR_PTR(pid);
-		complete(done);
+		put_kthread(kthread);
 	}
 }
 
@@ -272,20 +299,26 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 					   const char namefmt[],
 					   ...)
 {
-	DECLARE_COMPLETION_ONSTACK(done);
+	DECLARE_COMPLETION_ONSTACK(started);
 	struct task_struct *task;
-	struct kthread_create_info *create = kmalloc(sizeof(*create),
-						     GFP_KERNEL);
+	struct kthread *kthread;
 
-	if (!create)
+	kthread = kmalloc(sizeof(*kthread), GFP_KERNEL);
+	if (!kthread)
 		return ERR_PTR(-ENOMEM);
-	create->threadfn = threadfn;
-	create->data = data;
-	create->node = node;
-	create->done = &done;
+	/* One ref for us and one ref for a new kernel thread. */
+	atomic_set(&kthread->refs, 2);
+	kthread->flags = 0;
+	kthread->cpu = 0;
+	kthread->threadfn = threadfn;
+	kthread->data = data;
+	kthread->node = node;
+	kthread->started = &started;
+	init_completion(&kthread->exited);
+	init_completion(&kthread->parked);
 
 	spin_lock(&kthread_create_lock);
-	list_add_tail(&create->list, &kthread_create_list);
+	list_add_tail(&kthread->list, &kthread_create_list);
 	spin_unlock(&kthread_create_lock);
 
 	wake_up_process(kthreadd_task);
@@ -294,21 +327,23 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 	 * the OOM killer while kthreadd is trying to allocate memory for
 	 * new kernel thread.
 	 */
-	if (unlikely(wait_for_completion_killable(&done))) {
+	if (unlikely(wait_for_completion_killable(&started))) {
 		/*
 		 * If I was SIGKILLed before kthreadd (or new kernel thread)
-		 * calls complete(), leave the cleanup of this structure to
-		 * that thread.
+		 * calls complete(), put a ref and return an error.
 		 */
-		if (xchg(&create->done, NULL))
+		if (xchg(&kthread->started, NULL)) {
+			put_kthread(kthread);
+
 			return ERR_PTR(-EINTR);
+		}
 		/*
 		 * kthreadd (or new kernel thread) will call complete()
 		 * shortly.
 		 */
-		wait_for_completion(&done);
+		wait_for_completion(&started);
 	}
-	task = create->result;
+	task = kthread->result;
 	if (!IS_ERR(task)) {
 		static const struct sched_param param = { .sched_priority = 0 };
 		va_list args;
@@ -323,7 +358,8 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
 		set_cpus_allowed_ptr(task, cpu_all_mask);
 	}
-	kfree(create);
+	put_kthread(kthread);
+
 	return task;
 }
 EXPORT_SYMBOL(kthread_create_on_node);
@@ -423,11 +459,11 @@ static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
  */
 void kthread_unpark(struct task_struct *k)
 {
-	struct kthread *kthread = to_live_kthread(k);
+	struct kthread *kthread = to_live_kthread_and_get(k);
 
 	if (kthread) {
 		__kthread_unpark(k, kthread);
-		put_task_stack(k);
+		put_kthread(kthread);
 	}
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -446,7 +482,7 @@ EXPORT_SYMBOL_GPL(kthread_unpark);
  */
 int kthread_park(struct task_struct *k)
 {
-	struct kthread *kthread = to_live_kthread(k);
+	struct kthread *kthread = to_live_kthread_and_get(k);
 	int ret = -ENOSYS;
 
 	if (kthread) {
@@ -457,7 +493,7 @@ int kthread_park(struct task_struct *k)
 				wait_for_completion(&kthread->parked);
 			}
 		}
-		put_task_stack(k);
+		put_kthread(kthread);
 		ret = 0;
 	}
 	return ret;
@@ -487,13 +523,13 @@ int kthread_stop(struct task_struct *k)
 	trace_sched_kthread_stop(k);
 
 	get_task_struct(k);
-	kthread = to_live_kthread(k);
+	kthread = to_live_kthread_and_get(k);
 	if (kthread) {
 		set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
 		__kthread_unpark(k, kthread);
 		wake_up_process(k);
 		wait_for_completion(&kthread->exited);
-		put_task_stack(k);
+		put_kthread(kthread);
 	}
 	ret = k->exit_code;
 	put_task_struct(k);
@@ -523,14 +559,14 @@ int kthreadd(void *unused)
 
 		spin_lock(&kthread_create_lock);
 		while (!list_empty(&kthread_create_list)) {
-			struct kthread_create_info *create;
+			struct kthread *kthread;
 
-			create = list_entry(kthread_create_list.next,
-					    struct kthread_create_info, list);
-			list_del_init(&create->list);
+			kthread = list_entry(kthread_create_list.next,
+					     struct kthread, list);
+			list_del_init(&kthread->list);
 			spin_unlock(&kthread_create_lock);
 
-			create_kthread(create);
+			create_kthread(kthread);
 
 			spin_lock(&kthread_create_lock);
 		}
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-25 11:05 [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc Roman Pen
@ 2016-10-25 14:03 ` Oleg Nesterov
  2016-10-25 15:43   ` Oleg Nesterov
  2016-10-25 15:46   ` Roman Penyaev
  0 siblings, 2 replies; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-25 14:03 UTC (permalink / raw)
  To: Roman Pen
  Cc: Andy Lutomirski, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	Tejun Heo, linux-kernel

On 10/25, Roman Pen wrote:
>
> This patch avoids allocation of kthread structure on a stack, and simply
> uses kmalloc.

Oh. I didn't even read this patch, but I have to admit I personally do not
like it. I can be wrong, but imo this is the step to the wrong direction.

struct kthread is already bloated, we should not bloat it more. Instead
we should kill it. And to_kthread() too, at least in its current form.

For example. parked/exited/cpu should go into smp_hotplug_thread. Yes,
this needs cleanups.

All we need is kthread_data() which returns the pointer to the private
data used by kthread.

As for kthread_stop(), we no longer need to abuse ->vfork_done, we can
use task_works:

	struct xxx {
		struct struct callback_head work;
		struct completion done;
	};

	static void kthread_signal_exit(struct callback_head *arg)
	{
		struct xxx *xxx = container_of(arg);
		complete(xxx->done);
	}

	int kthread_stop(struct task_struct *k)
	{
		struct xxx xxx;
		int ret;

		init_task_work(&xxx.work, kthread_signal_exit);
		init_completion(&xxx.done);

		trace_sched_kthread_stop(k);

		get_task_struct(k);
		if (!task_work_add(k, &xxx.work, false)) {
			set(KTHREAD_SHOULD_STOP);
			wake_up_process(k);
			wait_for_completion(&xxx.done);
		}
		ret = k->exit_code;
		put_task_struct(k);

		trace_sched_kthread_stop_ret(ret);
		return ret;
	}

Oleg.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-25 14:03 ` Oleg Nesterov
@ 2016-10-25 15:43   ` Oleg Nesterov
  2016-10-25 16:08     ` Roman Penyaev
  2016-10-25 16:52     ` Andy Lutomirski
  2016-10-25 15:46   ` Roman Penyaev
  1 sibling, 2 replies; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-25 15:43 UTC (permalink / raw)
  To: Roman Pen
  Cc: Andy Lutomirski, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	Tejun Heo, linux-kernel

On 10/25, Oleg Nesterov wrote:
>
> On 10/25, Roman Pen wrote:
> >
> > This patch avoids allocation of kthread structure on a stack, and simply
> > uses kmalloc.
>
> Oh. I didn't even read this patch, but I have to admit I personally do not
> like it. I can be wrong, but imo this is the step to the wrong direction.

And after I tried to actually read it I dislike it even more, sorry Roman.
Starting from the fact it moves kthread_create_info into struct kthread.

> struct kthread is already bloated, we should not bloat it more. Instead
> we should kill it. And to_kthread() too, at least in its current form.

Yes, but even if we can't or do not want to do this, even if we want to
kmalloc struct kthread, I really think it should not be refcounted
separately from task_struct.

something like the patch in http://marc.info/?l=linux-kernel&m=146715459127804

Either way to_live_kthread() must go away. Currently we can't avoid it
because we abuse vfork_done, but as I already said we no longer need this.

Oleg.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-25 14:03 ` Oleg Nesterov
  2016-10-25 15:43   ` Oleg Nesterov
@ 2016-10-25 15:46   ` Roman Penyaev
  1 sibling, 0 replies; 31+ messages in thread
From: Roman Penyaev @ 2016-10-25 15:46 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	Tejun Heo, linux-kernel

On Tue, Oct 25, 2016 at 4:03 PM, Oleg Nesterov <oleg@redhat.com> wrote:
> On 10/25, Roman Pen wrote:
>>
>> This patch avoids allocation of kthread structure on a stack, and simply
>> uses kmalloc.
>
> Oh. I didn't even read this patch, but I have to admit I personally do not
> like it. I can be wrong, but imo this is the step to the wrong direction.

it targets only one thing - we can't use stack anymore for keeping kthread
structure on it.  so basically it is not even a step, just an attempt to
fix memory corruption after oops.

> struct kthread is already bloated, we should not bloat it more.

that is clear, but I wanted to keep code simpler with one allocation and
one structure for all the needs.

> Instead we should kill it. And to_kthread() too, at least in its current
> form.

that is nice and this is a step forward, but not with this patch.

>
> For example. parked/exited/cpu should go into smp_hotplug_thread. Yes,
> this needs cleanups.
>
> All we need is kthread_data() which returns the pointer to the private
> data used by kthread.

that means that we still need to store the private data inside task_struct
and probably again abuse some member.

>
> As for kthread_stop(), we no longer need to abuse ->vfork_done, we can
> use task_works:

yes, that can be done easily. in the current patch I already use the
task_work to free the kthread structure. but still for me is not clear
where to keep the private data if you have only task_struct in your
hands.

--
Roman

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-25 15:43   ` Oleg Nesterov
@ 2016-10-25 16:08     ` Roman Penyaev
  2016-10-25 16:17       ` Oleg Nesterov
  2016-10-25 16:52     ` Andy Lutomirski
  1 sibling, 1 reply; 31+ messages in thread
From: Roman Penyaev @ 2016-10-25 16:08 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	Tejun Heo, linux-kernel

On Tue, Oct 25, 2016 at 5:43 PM, Oleg Nesterov <oleg@redhat.com> wrote:
> On 10/25, Oleg Nesterov wrote:
>>
>> On 10/25, Roman Pen wrote:
>> >
>> > This patch avoids allocation of kthread structure on a stack, and simply
>> > uses kmalloc.
>>
>> Oh. I didn't even read this patch, but I have to admit I personally do not
>> like it. I can be wrong, but imo this is the step to the wrong direction.
>
> And after I tried to actually read it I dislike it even more, sorry Roman.
> Starting from the fact it moves kthread_create_info into struct kthread.

that can be changed, of course, as I told, I wanted to keep allocations/
deallocations simpler.

>> struct kthread is already bloated, we should not bloat it more. Instead
>> we should kill it. And to_kthread() too, at least in its current form.
>
> Yes, but even if we can't or do not want to do this, even if we want to
> kmalloc struct kthread, I really think it should not be refcounted
> separately from task_struct.

it is already like that, we have to get/put references on a task stack.

>
> something like the patch in http://marc.info/?l=linux-kernel&m=146715459127804

the key function in that patch is:

free_kthread_struct(tsk);

so if we teach the generic free_task() to deal with kthreads, that of course
solves these kind of problems.  I did not consider that variant.

>
> Either way to_live_kthread() must go away. Currently we can't avoid it
> because we abuse vfork_done, but as I already said we no longer need this.

There is something which I do not understand.  You still need to have a
connection (a pointer) between task_struct and private data (kthread AND
private data, whatever), which is passed by the user of kthread API.
You still need to find a victim in a task_struct and abuse it :)

So in particular I do not understand this comment from the patch above
where you abuse 'current->set_child_tid':

 * This is the ugly but simple hack we will hopefully remove soon.

how you are going to avoid this abuse of set_child_tid? or vfork_done?
because vfork_done is not only for waking up (yes, I totally agree, we
can reuse task_work), it is also for getting a private data (like
workqueue uses it):  task_struct->vfork_done->kthread->data.

--
Roman

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-25 16:08     ` Roman Penyaev
@ 2016-10-25 16:17       ` Oleg Nesterov
  0 siblings, 0 replies; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-25 16:17 UTC (permalink / raw)
  To: Roman Penyaev
  Cc: Andy Lutomirski, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	Tejun Heo, linux-kernel

Roman, I need to run away, just one note.

On 10/25, Roman Penyaev wrote:
>
> On Tue, Oct 25, 2016 at 5:43 PM, Oleg Nesterov <oleg@redhat.com> wrote:
>
> So in particular I do not understand this comment from the patch above
> where you abuse 'current->set_child_tid':
>
>  * This is the ugly but simple hack we will hopefully remove soon.
>
> how you are going to avoid this abuse of set_child_tid?

please ignore this comment. It actually reflects my desire to kill
struct kthread. If we won't do this, we can (ab)use this or another member
in task_struct.

> or vfork_done?
> because vfork_done is not only for waking up (yes, I totally agree, we
> can reuse task_work), it is also for getting a private data (like
> workqueue uses it):  task_struct->vfork_done->kthread->data.

kthreads simply should not use ->vfork_done at all. to_kthread() can
use the same pointer.

Oleg.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-25 15:43   ` Oleg Nesterov
  2016-10-25 16:08     ` Roman Penyaev
@ 2016-10-25 16:52     ` Andy Lutomirski
  2016-10-26 14:14       ` Oleg Nesterov
  1 sibling, 1 reply; 31+ messages in thread
From: Andy Lutomirski @ 2016-10-25 16:52 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Roman Pen, Andy Lutomirski, Peter Zijlstra, Thomas Gleixner,
	Ingo Molnar, Tejun Heo, linux-kernel

On Tue, Oct 25, 2016 at 8:43 AM, Oleg Nesterov <oleg@redhat.com> wrote:
> On 10/25, Oleg Nesterov wrote:
>>
>> On 10/25, Roman Pen wrote:
>> >
>> > This patch avoids allocation of kthread structure on a stack, and simply
>> > uses kmalloc.
>>
>> Oh. I didn't even read this patch, but I have to admit I personally do not
>> like it. I can be wrong, but imo this is the step to the wrong direction.
>
> And after I tried to actually read it I dislike it even more, sorry Roman.
> Starting from the fact it moves kthread_create_info into struct kthread.

Would it perhaps make sense to do something like Roman's patch for 4.9
and then consider further changes down the road?  Roman's patch
appears to fix a real bug, and I think that, while not really ideal,
the code is an incredible mess right now and Roman's patch (assuming
it's correct) makes it considerably nicer.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-25 16:52     ` Andy Lutomirski
@ 2016-10-26 14:14       ` Oleg Nesterov
  2016-10-26 14:57         ` Thomas Gleixner
                           ` (2 more replies)
  0 siblings, 3 replies; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-26 14:14 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Roman Pen, Andy Lutomirski, Peter Zijlstra, Thomas Gleixner,
	Ingo Molnar, Tejun Heo, linux-kernel

On 10/25, Andy Lutomirski wrote:
>
> Would it perhaps make sense to do something like Roman's patch for 4.9
> and then consider further changes down the road?

OK, lets make it kmalloc'ed. Please see my old patch below, slightly changed.

I'll send it after I do some testing and write the changelog. No separate
refcounting, no complications.

Some notes right now. Of course, with this patch we are ready to remove
put_task_stack() from kthread.c right now. The next change should kill
to_live_kthread() altogether. And stop using ->vfork_done.

And. With this patch we do not need another "workqueue: ignore dead tasks
in a workqueue sleep hook" fix from Roman.

> Roman's patch
> appears to fix a real bug,

Well, it fixes the additional problems if we already have a bug, but I
agree this is a problem anyway.

> and I think that, while not really ideal,
> the code is an incredible mess right now and Roman's patch (assuming
> it's correct) makes it considerably nicer.

This is where I disagree with you and Roman. Yes, it needs cleanups and
only because of kthread on stack. But _IMO_ Roman's patch makes it much,
much worse and adds a lot of unnecessary complications.

Could you please look at the patch below?

Oleg.
---

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index a6e82a6..c1c3e63 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -48,6 +48,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 	__k;								   \
 })
 
+void free_kthread_struct(struct task_struct *k);
 void kthread_bind(struct task_struct *k, unsigned int cpu);
 void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
 int kthread_stop(struct task_struct *k);
diff --git a/kernel/fork.c b/kernel/fork.c
index 623259f..663c6a7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -351,6 +351,8 @@ void free_task(struct task_struct *tsk)
 	ftrace_graph_exit_task(tsk);
 	put_seccomp_filter(tsk);
 	arch_release_task_struct(tsk);
+	if (tsk->flags & PF_KTHREAD)
+		free_kthread_struct(tsk);
 	free_task_struct(tsk);
 }
 EXPORT_SYMBOL(free_task);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index be2cc1f..c6adbde 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -53,14 +53,34 @@ enum KTHREAD_BITS {
 	KTHREAD_IS_PARKED,
 };
 
-#define __to_kthread(vfork)	\
-	container_of(vfork, struct kthread, exited)
+static inline void set_kthread_struct(void *kthread)
+{
+	/*
+	 * We abuse ->set_child_tid to avoid the new member and because it
+	 * can't be wrongly copied by copy_process(). We also rely on fact
+	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
+	 */
+	current->set_child_tid = (__force void __user *)kthread;
+}
 
 static inline struct kthread *to_kthread(struct task_struct *k)
 {
-	return __to_kthread(k->vfork_done);
+	WARN_ON(!(k->flags & PF_KTHREAD));
+	return (__force void *)k->set_child_tid;
 }
 
+void free_kthread_struct(struct task_struct *k)
+{
+	kfree(to_kthread(k)); /* can be NULL if kmalloc() failed */
+}
+
+#define __to_kthread(vfork)	\
+	container_of(vfork, struct kthread, exited)
+
+/*
+ * TODO: kill it and use to_kthread(). But we still need the users
+ * like kthread_stop() which has to sync with the exiting kthread.
+ */
 static struct kthread *to_live_kthread(struct task_struct *k)
 {
 	struct completion *vfork = ACCESS_ONCE(k->vfork_done);
@@ -181,14 +201,11 @@ static int kthread(void *_create)
 	int (*threadfn)(void *data) = create->threadfn;
 	void *data = create->data;
 	struct completion *done;
-	struct kthread self;
+	struct kthread *self;
 	int ret;
 
-	self.flags = 0;
-	self.data = data;
-	init_completion(&self.exited);
-	init_completion(&self.parked);
-	current->vfork_done = &self.exited;
+	self = kmalloc(sizeof(*self), GFP_KERNEL);
+	set_kthread_struct(self);
 
 	/* If user was SIGKILLed, I release the structure. */
 	done = xchg(&create->done, NULL);
@@ -196,6 +213,19 @@ static int kthread(void *_create)
 		kfree(create);
 		do_exit(-EINTR);
 	}
+
+	if (!self) {
+		create->result = ERR_PTR(-ENOMEM);
+		complete(done);
+		do_exit(-ENOMEM);
+	}
+
+	self->flags = 0;
+	self->data = data;
+	init_completion(&self->exited);
+	init_completion(&self->parked);
+	current->vfork_done = &self->exited;
+
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
 	create->result = current;
@@ -203,12 +233,10 @@ static int kthread(void *_create)
 	schedule();
 
 	ret = -EINTR;
-
-	if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
-		__kthread_parkme(&self);
+	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+		__kthread_parkme(self);
 		ret = threadfn(data);
 	}
-	/* we can't just return, we must preserve "self" on stack */
 	do_exit(ret);
 }
 

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-26 14:14       ` Oleg Nesterov
@ 2016-10-26 14:57         ` Thomas Gleixner
  2016-10-26 15:51           ` Oleg Nesterov
  2016-10-26 16:13         ` [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc Oleg Nesterov
  2016-10-27  2:56         ` Josh Poimboeuf
  2 siblings, 1 reply; 31+ messages in thread
From: Thomas Gleixner @ 2016-10-26 14:57 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel

On Wed, 26 Oct 2016, Oleg Nesterov wrote:
> Some notes right now. Of course, with this patch we are ready to remove
> put_task_stack() from kthread.c right now. The next change should kill
> to_live_kthread() altogether. And stop using ->vfork_done.
> 
> And. With this patch we do not need another "workqueue: ignore dead tasks
> in a workqueue sleep hook" fix from Roman.

Nice !
 
> diff --git a/kernel/kthread.c b/kernel/kthread.c
> index be2cc1f..c6adbde 100644
> --- a/kernel/kthread.c
> +++ b/kernel/kthread.c
> @@ -53,14 +53,34 @@ enum KTHREAD_BITS {
>  	KTHREAD_IS_PARKED,
>  };
>  
> -#define __to_kthread(vfork)	\
> -	container_of(vfork, struct kthread, exited)
> +static inline void set_kthread_struct(void *kthread)
> +{
> +	/*
> +	 * We abuse ->set_child_tid to avoid the new member and because it
> +	 * can't be wrongly copied by copy_process(). We also rely on fact
> +	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
> +	 */
> +	current->set_child_tid = (__force void __user *)kthread;

Can we pretty please avoid this type casting? We only have 5 places using
set_child_tid. So we can really make it a proper union and fix up the 5
usage sites as a preparatory patch for this.

> +}
>  
>  static inline struct kthread *to_kthread(struct task_struct *k)
>  {
> -	return __to_kthread(k->vfork_done);
> +	WARN_ON(!(k->flags & PF_KTHREAD));
> +	return (__force void *)k->set_child_tid;
>  }
>  
> +void free_kthread_struct(struct task_struct *k)
> +{
> +	kfree(to_kthread(k)); /* can be NULL if kmalloc() failed */

Can you please not use tail comments? They really stop the reading flow.

> +#define __to_kthread(vfork)	\
> +	container_of(vfork, struct kthread, exited)
> +
> +/*
> + * TODO: kill it and use to_kthread(). But we still need the users
> + * like kthread_stop() which has to sync with the exiting kthread.
> + */
>  static struct kthread *to_live_kthread(struct task_struct *k)
>  {
>  	struct completion *vfork = ACCESS_ONCE(k->vfork_done);
> @@ -181,14 +201,11 @@ static int kthread(void *_create)
>  	int (*threadfn)(void *data) = create->threadfn;
>  	void *data = create->data;
>  	struct completion *done;
> -	struct kthread self;
> +	struct kthread *self;
>  	int ret;
>  
> -	self.flags = 0;
> -	self.data = data;
> -	init_completion(&self.exited);
> -	init_completion(&self.parked);
> -	current->vfork_done = &self.exited;
> +	self = kmalloc(sizeof(*self), GFP_KERNEL);
> +	set_kthread_struct(self);
>  
>  	/* If user was SIGKILLed, I release the structure. */
>  	done = xchg(&create->done, NULL);
> @@ -196,6 +213,19 @@ static int kthread(void *_create)
>  		kfree(create);
>  		do_exit(-EINTR);
>  	}
> +
> +	if (!self) {
> +		create->result = ERR_PTR(-ENOMEM);
> +		complete(done);
> +		do_exit(-ENOMEM);
> +	}
> +
> +	self->flags = 0;
> +	self->data = data;
> +	init_completion(&self->exited);
> +	init_completion(&self->parked);
> +	current->vfork_done = &self->exited;
> +
>  	/* OK, tell user we're spawned, wait for stop or wakeup */
>  	__set_current_state(TASK_UNINTERRUPTIBLE);
>  	create->result = current;
> @@ -203,12 +233,10 @@ static int kthread(void *_create)
>  	schedule();
>  
>  	ret = -EINTR;
> -
> -	if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
> -		__kthread_parkme(&self);
> +	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
> +		__kthread_parkme(self);
>  		ret = threadfn(data);
>  	}
> -	/* we can't just return, we must preserve "self" on stack */
>  	do_exit(ret);
>  }

Other than the above nits, this is the right direction to go.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-26 14:57         ` Thomas Gleixner
@ 2016-10-26 15:51           ` Oleg Nesterov
  2016-10-26 18:31             ` Thomas Gleixner
  0 siblings, 1 reply; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-26 15:51 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel

On 10/26, Thomas Gleixner wrote:
>
> On Wed, 26 Oct 2016, Oleg Nesterov wrote:
> > +static inline void set_kthread_struct(void *kthread)
> > +{
> > +	/*
> > +	 * We abuse ->set_child_tid to avoid the new member and because it
> > +	 * can't be wrongly copied by copy_process(). We also rely on fact
> > +	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
> > +	 */
> > +	current->set_child_tid = (__force void __user *)kthread;
>
> Can we pretty please avoid this type casting? We only have 5 places using
> set_child_tid. So we can really make it a proper union

Yes, I thought about anonymous union too, the only problem is that
it will need more comments ;)

And I agree with other nits, will redo/resend tomorrow.

Thanks!

Oleg.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-26 14:14       ` Oleg Nesterov
  2016-10-26 14:57         ` Thomas Gleixner
@ 2016-10-26 16:13         ` Oleg Nesterov
  2016-10-27  2:56         ` Josh Poimboeuf
  2 siblings, 0 replies; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-26 16:13 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Roman Pen, Andy Lutomirski, Peter Zijlstra, Thomas Gleixner,
	Ingo Molnar, Tejun Heo, linux-kernel

Damn. sorry for noise, this doesn't really matter, but ...

On 10/26, Oleg Nesterov wrote:
>
> Yes, it needs cleanups and
> only because of kthread on stack.

What I actually tried to say is "NOT only because of kthread on stack".

Oleg.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-26 15:51           ` Oleg Nesterov
@ 2016-10-26 18:31             ` Thomas Gleixner
  2016-10-28 16:11               ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Oleg Nesterov
  0 siblings, 1 reply; 31+ messages in thread
From: Thomas Gleixner @ 2016-10-26 18:31 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel

On Wed, 26 Oct 2016, Oleg Nesterov wrote:
> On 10/26, Thomas Gleixner wrote:
> >
> > On Wed, 26 Oct 2016, Oleg Nesterov wrote:
> > > +static inline void set_kthread_struct(void *kthread)
> > > +{
> > > +	/*
> > > +	 * We abuse ->set_child_tid to avoid the new member and because it
> > > +	 * can't be wrongly copied by copy_process(). We also rely on fact
> > > +	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
> > > +	 */
> > > +	current->set_child_tid = (__force void __user *)kthread;
> >
> > Can we pretty please avoid this type casting? We only have 5 places using
> > set_child_tid. So we can really make it a proper union
> 
> Yes, I thought about anonymous union too, the only problem is that
> it will need more comments ;)

Be careful with anonymous unions. There are a few pitfalls with older
compilers. That's why I said make it a proper union and fixup the 5 usage
sites. It might work in this case, but you've been warned :)

Thanks,

	tglx

 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-26 14:14       ` Oleg Nesterov
  2016-10-26 14:57         ` Thomas Gleixner
  2016-10-26 16:13         ` [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc Oleg Nesterov
@ 2016-10-27  2:56         ` Josh Poimboeuf
  2016-10-27 13:10           ` Oleg Nesterov
  2 siblings, 1 reply; 31+ messages in thread
From: Josh Poimboeuf @ 2016-10-27  2:56 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Thomas Gleixner, Ingo Molnar, Tejun Heo, linux-kernel

On Wed, Oct 26, 2016 at 04:14:00PM +0200, Oleg Nesterov wrote:
> +/*
> + * TODO: kill it and use to_kthread(). But we still need the users
> + * like kthread_stop() which has to sync with the exiting kthread.
> + */
>  static struct kthread *to_live_kthread(struct task_struct *k)

Now that the kthread struct is no longer on the stack, are the
try_get_task_stack() and its corresponding put_task_stack()'s still
needed?

-- 
Josh

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
  2016-10-27  2:56         ` Josh Poimboeuf
@ 2016-10-27 13:10           ` Oleg Nesterov
  0 siblings, 0 replies; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-27 13:10 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Thomas Gleixner, Ingo Molnar, Tejun Heo, linux-kernel

On 10/26, Josh Poimboeuf wrote:
>
> On Wed, Oct 26, 2016 at 04:14:00PM +0200, Oleg Nesterov wrote:
> > +/*
> > + * TODO: kill it and use to_kthread(). But we still need the users
> > + * like kthread_stop() which has to sync with the exiting kthread.
> > + */
> >  static struct kthread *to_live_kthread(struct task_struct *k)
>
> Now that the kthread struct is no longer on the stack, are the
> try_get_task_stack() and its corresponding put_task_stack()'s still
> needed?

It seems you missed this part

	Of course, with this patch we are ready to remove
	put_task_stack() from kthread.c right now. The next change should kill
	to_live_kthread() altogether.

in the same email ;)

Oleg.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 0/2] kthread: make struct kthread kmalloc'ed
  2016-10-26 18:31             ` Thomas Gleixner
@ 2016-10-28 16:11               ` Oleg Nesterov
  2016-10-28 16:11                 ` [PATCH 1/2] " Oleg Nesterov
                                   ` (3 more replies)
  0 siblings, 4 replies; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-28 16:11 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel

Sorry for delay, I was distracted...

On 10/26, Thomas Gleixner wrote:
>
> On Wed, 26 Oct 2016, Oleg Nesterov wrote:
> > On 10/26, Thomas Gleixner wrote:
> > >
> > > On Wed, 26 Oct 2016, Oleg Nesterov wrote:
> > > > +static inline void set_kthread_struct(void *kthread)
> > > > +{
> > > > +	/*
> > > > +	 * We abuse ->set_child_tid to avoid the new member and because it
> > > > +	 * can't be wrongly copied by copy_process(). We also rely on fact
> > > > +	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
> > > > +	 */
> > > > +	current->set_child_tid = (__force void __user *)kthread;
> > >
> > > Can we pretty please avoid this type casting? We only have 5 places using
> > > set_child_tid. So we can really make it a proper union
> >
> > Yes, I thought about anonymous union too, the only problem is that
> > it will need more comments ;)
>
> Be careful with anonymous unions. There are a few pitfalls with older
> compilers. That's why I said make it a proper union and fixup the 5 usage
> sites.

Ah. Then I'd prefer to do this later or in a separate change, unless you
feel strongly. I certainly do not want to update other users at least right
now.

Yes, these 2 type casts do not look nice, but they are hidden in the trivial
helpers. And, for example, if something goes wrong we can trivially change
this code to use, say, sas_ss_sp. Just we need to update the comments to
explain why it is safe too.

Finally. I still hope we will kill struct kthread (I mean, unbloat it and
embed into task_struct), and this means that the proper union should touch
more members. Say, sas_* and/or vfork_done+set/clear_child_tid. I'd like
to do this only once if possible.


I'll try to kill to_live_kthread() tomorrow, didn't have time to do this
today.

Oleg.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 1/2] kthread: make struct kthread kmalloc'ed
  2016-10-28 16:11               ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Oleg Nesterov
@ 2016-10-28 16:11                 ` Oleg Nesterov
  2016-10-28 18:48                   ` Thomas Gleixner
  2016-10-28 16:12                 ` [PATCH 2/2] Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function" Oleg Nesterov
                                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-28 16:11 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel

I still think we should kill struct kthread in its current form, but this
needs cleanups outside of kthread.c.

So make it kmalloc'ed for now to avoid the problems with stack corruption,
for example the crashed kthread will likely OOPS again because its .exited
was destroyed by rewind_stack_do_exit().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/kthread.h |  1 +
 kernel/fork.c           |  2 ++
 kernel/kthread.c        | 58 ++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index a6e82a6..c1c3e63 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -48,6 +48,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 	__k;								   \
 })
 
+void free_kthread_struct(struct task_struct *k);
 void kthread_bind(struct task_struct *k, unsigned int cpu);
 void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
 int kthread_stop(struct task_struct *k);
diff --git a/kernel/fork.c b/kernel/fork.c
index 623259f..663c6a7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -351,6 +351,8 @@ void free_task(struct task_struct *tsk)
 	ftrace_graph_exit_task(tsk);
 	put_seccomp_filter(tsk);
 	arch_release_task_struct(tsk);
+	if (tsk->flags & PF_KTHREAD)
+		free_kthread_struct(tsk);
 	free_task_struct(tsk);
 }
 EXPORT_SYMBOL(free_task);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index be2cc1f..9d64b65 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -53,14 +53,38 @@ enum KTHREAD_BITS {
 	KTHREAD_IS_PARKED,
 };
 
-#define __to_kthread(vfork)	\
-	container_of(vfork, struct kthread, exited)
+static inline void set_kthread_struct(void *kthread)
+{
+	/*
+	 * We abuse ->set_child_tid to avoid the new member and because it
+	 * can't be wrongly copied by copy_process(). We also rely on fact
+	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
+	 */
+	current->set_child_tid = (__force void __user *)kthread;
+}
 
 static inline struct kthread *to_kthread(struct task_struct *k)
 {
-	return __to_kthread(k->vfork_done);
+	WARN_ON(!(k->flags & PF_KTHREAD));
+	return (__force void *)k->set_child_tid;
+}
+
+void free_kthread_struct(struct task_struct *k)
+{
+	/*
+	 * Can be NULL if this kthread was created by kernel_thread()
+	 * or if kmalloc() in kthread() failed.
+	 */
+	kfree(to_kthread(k));
 }
 
+#define __to_kthread(vfork)	\
+	container_of(vfork, struct kthread, exited)
+
+/*
+ * TODO: kill it and use to_kthread(). But we still need the users
+ * like kthread_stop() which has to sync with the exiting kthread.
+ */
 static struct kthread *to_live_kthread(struct task_struct *k)
 {
 	struct completion *vfork = ACCESS_ONCE(k->vfork_done);
@@ -181,14 +205,11 @@ static int kthread(void *_create)
 	int (*threadfn)(void *data) = create->threadfn;
 	void *data = create->data;
 	struct completion *done;
-	struct kthread self;
+	struct kthread *self;
 	int ret;
 
-	self.flags = 0;
-	self.data = data;
-	init_completion(&self.exited);
-	init_completion(&self.parked);
-	current->vfork_done = &self.exited;
+	self = kmalloc(sizeof(*self), GFP_KERNEL);
+	set_kthread_struct(self);
 
 	/* If user was SIGKILLed, I release the structure. */
 	done = xchg(&create->done, NULL);
@@ -196,6 +217,19 @@ static int kthread(void *_create)
 		kfree(create);
 		do_exit(-EINTR);
 	}
+
+	if (!self) {
+		create->result = ERR_PTR(-ENOMEM);
+		complete(done);
+		do_exit(-ENOMEM);
+	}
+
+	self->flags = 0;
+	self->data = data;
+	init_completion(&self->exited);
+	init_completion(&self->parked);
+	current->vfork_done = &self->exited;
+
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
 	create->result = current;
@@ -203,12 +237,10 @@ static int kthread(void *_create)
 	schedule();
 
 	ret = -EINTR;
-
-	if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
-		__kthread_parkme(&self);
+	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+		__kthread_parkme(self);
 		ret = threadfn(data);
 	}
-	/* we can't just return, we must preserve "self" on stack */
 	do_exit(ret);
 }
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 2/2] Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function"
  2016-10-28 16:11               ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Oleg Nesterov
  2016-10-28 16:11                 ` [PATCH 1/2] " Oleg Nesterov
@ 2016-10-28 16:12                 ` Oleg Nesterov
  2016-10-28 18:49                   ` Thomas Gleixner
  2016-10-28 18:44                 ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Thomas Gleixner
  2016-10-31 20:07                 ` [PATCH 0/2] kthread: kill to_live_kthread() Oleg Nesterov
  3 siblings, 1 reply; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-28 16:12 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel

This reverts commit 23196f2e5f5d810578a772785807dcdc2b9fdce9.

After the previous change struct kthread can't go away, no need to pin
the stack.

TODO: kill to_live_kthread().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/kthread.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 9d64b65..7891a94 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -88,7 +88,7 @@ void free_kthread_struct(struct task_struct *k)
 static struct kthread *to_live_kthread(struct task_struct *k)
 {
 	struct completion *vfork = ACCESS_ONCE(k->vfork_done);
-	if (likely(vfork) && try_get_task_stack(k))
+	if (likely(vfork))
 		return __to_kthread(vfork);
 	return NULL;
 }
@@ -473,10 +473,8 @@ void kthread_unpark(struct task_struct *k)
 {
 	struct kthread *kthread = to_live_kthread(k);
 
-	if (kthread) {
+	if (kthread)
 		__kthread_unpark(k, kthread);
-		put_task_stack(k);
-	}
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
 
@@ -505,7 +503,6 @@ int kthread_park(struct task_struct *k)
 				wait_for_completion(&kthread->parked);
 			}
 		}
-		put_task_stack(k);
 		ret = 0;
 	}
 	return ret;
@@ -541,7 +538,6 @@ int kthread_stop(struct task_struct *k)
 		__kthread_unpark(k, kthread);
 		wake_up_process(k);
 		wait_for_completion(&kthread->exited);
-		put_task_stack(k);
 	}
 	ret = k->exit_code;
 	put_task_struct(k);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/2] kthread: make struct kthread kmalloc'ed
  2016-10-28 16:11               ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Oleg Nesterov
  2016-10-28 16:11                 ` [PATCH 1/2] " Oleg Nesterov
  2016-10-28 16:12                 ` [PATCH 2/2] Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function" Oleg Nesterov
@ 2016-10-28 18:44                 ` Thomas Gleixner
  2016-10-31 20:07                 ` [PATCH 0/2] kthread: kill to_live_kthread() Oleg Nesterov
  3 siblings, 0 replies; 31+ messages in thread
From: Thomas Gleixner @ 2016-10-28 18:44 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel

On Fri, 28 Oct 2016, Oleg Nesterov wrote:
> On 10/26, Thomas Gleixner wrote:
> 
> > Be careful with anonymous unions. There are a few pitfalls with older
> > compilers. That's why I said make it a proper union and fixup the 5 usage
> > sites.
> 
> Ah. Then I'd prefer to do this later or in a separate change, unless you
> feel strongly. I certainly do not want to update other users at least right
> now.
> 
> Yes, these 2 type casts do not look nice, but they are hidden in the trivial
> helpers. And, for example, if something goes wrong we can trivially change
> this code to use, say, sas_ss_sp. Just we need to update the comments to
> explain why it is safe too.
> 
> Finally. I still hope we will kill struct kthread (I mean, unbloat it and
> embed into task_struct), and this means that the proper union should touch
> more members. Say, sas_* and/or vfork_done+set/clear_child_tid. I'd like
> to do this only once if possible.

Fair enough.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/2] kthread: make struct kthread kmalloc'ed
  2016-10-28 16:11                 ` [PATCH 1/2] " Oleg Nesterov
@ 2016-10-28 18:48                   ` Thomas Gleixner
  0 siblings, 0 replies; 31+ messages in thread
From: Thomas Gleixner @ 2016-10-28 18:48 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel

On Fri, 28 Oct 2016, Oleg Nesterov wrote:

> I still think we should kill struct kthread in its current form, but this
> needs cleanups outside of kthread.c.
> 
> So make it kmalloc'ed for now to avoid the problems with stack corruption,
> for example the crashed kthread will likely OOPS again because its .exited
> was destroyed by rewind_stack_do_exit().
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/2] Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function"
  2016-10-28 16:12                 ` [PATCH 2/2] Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function" Oleg Nesterov
@ 2016-10-28 18:49                   ` Thomas Gleixner
  0 siblings, 0 replies; 31+ messages in thread
From: Thomas Gleixner @ 2016-10-28 18:49 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel

On Fri, 28 Oct 2016, Oleg Nesterov wrote:

> This reverts commit 23196f2e5f5d810578a772785807dcdc2b9fdce9.
> 
> After the previous change struct kthread can't go away, no need to pin
> the stack.
> 
> TODO: kill to_live_kthread().

Acked-by: Thomas Gleixner <tglx@linutronix.de>
 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 0/2] kthread: kill to_live_kthread()
  2016-10-28 16:11               ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Oleg Nesterov
                                   ` (2 preceding siblings ...)
  2016-10-28 18:44                 ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Thomas Gleixner
@ 2016-10-31 20:07                 ` Oleg Nesterov
  2016-10-31 20:07                   ` [PATCH 1/2] kthread: don't use to_live_kthread() in kthread_stop() Oleg Nesterov
                                     ` (2 more replies)
  3 siblings, 3 replies; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-31 20:07 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel, Chunming Zhou,
	Alex Deucher

On 10/28, Oleg Nesterov wrote:
>
> I'll try to kill to_live_kthread() tomorrow, didn't have time to do this
> today.

Hmm. And this looks even simpler than I thought, or I am totally confused.
Thomas, could you please review this series too?

Chunming, Alex, I _think_ that drm/amdgpu should not use kthread_park(), but
lets discuss this separately. It would be nice if you can confirm that 2/2
can't break this code, even if I think "it obviously can't".

Oleg.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 1/2] kthread: don't use to_live_kthread() in kthread_stop()
  2016-10-31 20:07                 ` [PATCH 0/2] kthread: kill to_live_kthread() Oleg Nesterov
@ 2016-10-31 20:07                   ` Oleg Nesterov
  2016-11-09  7:58                     ` Thomas Gleixner
  2016-10-31 20:08                   ` [PATCH 2/2] kthread: don't use to_live_kthread() in kthread_park() and kthread_unpark() Oleg Nesterov
  2016-11-07 18:23                   ` [PATCH 0/2] kthread: kill to_live_kthread() Andy Lutomirski
  2 siblings, 1 reply; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-31 20:07 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel, Chunming Zhou,
	Alex Deucher

kthread_stop() had to use to_live_kthread() simply because it was not
possible to access kthread->exited after the exiting kthread clears
task_struct->vfork_done. Now that to_kthread() is always valid we can
do wake_up_process() + wait_for_completion() unconditionally, we don't
care if it has already passed complete_vfork_done() or even dead.

The exiting kthread can get the spurious wakeup after mm_release() but
this is possible without this change too and this is fine, do_task_dead()
ensures that this can't make any harm.

Note: we can even change this function to use task_work_add() and avoid
->vfork_done altogether, probably we will do this later.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/kthread.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 7891a94..4dcbc8b 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -532,13 +532,11 @@ int kthread_stop(struct task_struct *k)
 	trace_sched_kthread_stop(k);
 
 	get_task_struct(k);
-	kthread = to_live_kthread(k);
-	if (kthread) {
-		set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
-		__kthread_unpark(k, kthread);
-		wake_up_process(k);
-		wait_for_completion(&kthread->exited);
-	}
+	kthread = to_kthread(k);
+	set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
+	__kthread_unpark(k, kthread);
+	wake_up_process(k);
+	wait_for_completion(&kthread->exited);
 	ret = k->exit_code;
 	put_task_struct(k);
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 2/2] kthread: don't use to_live_kthread() in kthread_park() and kthread_unpark()
  2016-10-31 20:07                 ` [PATCH 0/2] kthread: kill to_live_kthread() Oleg Nesterov
  2016-10-31 20:07                   ` [PATCH 1/2] kthread: don't use to_live_kthread() in kthread_stop() Oleg Nesterov
@ 2016-10-31 20:08                   ` Oleg Nesterov
  2016-11-09  8:45                     ` Thomas Gleixner
  2016-11-07 18:23                   ` [PATCH 0/2] kthread: kill to_live_kthread() Andy Lutomirski
  2 siblings, 1 reply; 31+ messages in thread
From: Oleg Nesterov @ 2016-10-31 20:08 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel, Chunming Zhou,
	Alex Deucher

Now that to_kthread() is always valid we can change kthread_park() and
kthread_unpark() to use it and kill to_live_kthread().

kthread_unpark() is trivial, if KTHREAD_IS_PARKED is set we know that this
kthread has called complete(&self->parked), we do not care if it exits after
that if we race with kthread_stop().

kthread_park() is more tricky, but only because its semantics is not well
defined. It returns -ENOSYS if the thread exited but this can never happen
and as Roman pointed out kthread_park() can obviously block forever if it
could race with the exiting kthread.

I think we need to unexport kthread_park/unpark, and either make it return
"void" or actually fix the race with kthred_stop/exit. This patch just adds
WARN_ON(PF_EXITING) for now.

The usage of kthread_park() in cpuhp code (cpu.c, smpboot.c, stop_machine.c)
is fine. It can never see an exiting/exited kthread, smpboot_destroy_threads()
clears *ht->store, smpboot_park_thread() checks it is not NULL under the same
smpboot_threads_lock. cpuhp_threads and cpu_stop_threads never exit, so other
callers are fine too.

But it has two more users:

- watchdog_park_threads() and it does not look nice. The code is actually
  correct, get_online_cpus() ensures that kthread_park() can't race with
  itself (note that kthread_park() can't handle this race correctly), but
  imo it should not use kthread_park() directly.

- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c and I think it should not
  use kthread_park() too.

  But this patch should not break this code. kthread_park() must not be
  called after amd_sched_fini() which does kthread_stop(), otherwise even
  to_live_kthread() is not safe because task_struct can be already freed
  and sched->thread can point to nowhere.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/kthread.c | 69 ++++++++++++++++++++------------------------------------
 1 file changed, 24 insertions(+), 45 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4dcbc8b..01d2716 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -78,21 +78,6 @@ void free_kthread_struct(struct task_struct *k)
 	kfree(to_kthread(k));
 }
 
-#define __to_kthread(vfork)	\
-	container_of(vfork, struct kthread, exited)
-
-/*
- * TODO: kill it and use to_kthread(). But we still need the users
- * like kthread_stop() which has to sync with the exiting kthread.
- */
-static struct kthread *to_live_kthread(struct task_struct *k)
-{
-	struct completion *vfork = ACCESS_ONCE(k->vfork_done);
-	if (likely(vfork))
-		return __to_kthread(vfork);
-	return NULL;
-}
-
 /**
  * kthread_should_stop - should this kthread return now?
  *
@@ -441,8 +426,18 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 	return p;
 }
 
-static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
+/**
+ * kthread_unpark - unpark a thread created by kthread_create().
+ * @k:		thread created by kthread_create().
+ *
+ * Sets kthread_should_park() for @k to return false, wakes it, and
+ * waits for it to return. If the thread is marked percpu then its
+ * bound to the cpu again.
+ */
+void kthread_unpark(struct task_struct *k)
 {
+	struct kthread *kthread = to_kthread(k);
+
 	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
 	/*
 	 * We clear the IS_PARKED bit here as we don't wait
@@ -460,22 +455,6 @@ static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
 		wake_up_state(k, TASK_PARKED);
 	}
 }
-
-/**
- * kthread_unpark - unpark a thread created by kthread_create().
- * @k:		thread created by kthread_create().
- *
- * Sets kthread_should_park() for @k to return false, wakes it, and
- * waits for it to return. If the thread is marked percpu then its
- * bound to the cpu again.
- */
-void kthread_unpark(struct task_struct *k)
-{
-	struct kthread *kthread = to_live_kthread(k);
-
-	if (kthread)
-		__kthread_unpark(k, kthread);
-}
 EXPORT_SYMBOL_GPL(kthread_unpark);
 
 /**
@@ -492,20 +471,20 @@ EXPORT_SYMBOL_GPL(kthread_unpark);
  */
 int kthread_park(struct task_struct *k)
 {
-	struct kthread *kthread = to_live_kthread(k);
-	int ret = -ENOSYS;
-
-	if (kthread) {
-		if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
-			set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
-			if (k != current) {
-				wake_up_process(k);
-				wait_for_completion(&kthread->parked);
-			}
+	struct kthread *kthread = to_kthread(k);
+
+	if (WARN_ON(k->flags & PF_EXITING))
+		return -ENOSYS;
+
+	if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
+		set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+		if (k != current) {
+			wake_up_process(k);
+			wait_for_completion(&kthread->parked);
 		}
-		ret = 0;
 	}
-	return ret;
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(kthread_park);
 
@@ -534,7 +513,7 @@ int kthread_stop(struct task_struct *k)
 	get_task_struct(k);
 	kthread = to_kthread(k);
 	set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
-	__kthread_unpark(k, kthread);
+	kthread_unpark(k);
 	wake_up_process(k);
 	wait_for_completion(&kthread->exited);
 	ret = k->exit_code;
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/2] kthread: kill to_live_kthread()
  2016-10-31 20:07                 ` [PATCH 0/2] kthread: kill to_live_kthread() Oleg Nesterov
  2016-10-31 20:07                   ` [PATCH 1/2] kthread: don't use to_live_kthread() in kthread_stop() Oleg Nesterov
  2016-10-31 20:08                   ` [PATCH 2/2] kthread: don't use to_live_kthread() in kthread_park() and kthread_unpark() Oleg Nesterov
@ 2016-11-07 18:23                   ` Andy Lutomirski
  2 siblings, 0 replies; 31+ messages in thread
From: Andy Lutomirski @ 2016-11-07 18:23 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Thomas Gleixner, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel, Chunming Zhou,
	Alex Deucher

Ping?

What's the current status of this?  Do we need this for 4.9?

On Mon, Oct 31, 2016 at 1:07 PM, Oleg Nesterov <oleg@redhat.com> wrote:
> On 10/28, Oleg Nesterov wrote:
>>
>> I'll try to kill to_live_kthread() tomorrow, didn't have time to do this
>> today.
>
> Hmm. And this looks even simpler than I thought, or I am totally confused.
> Thomas, could you please review this series too?
>
> Chunming, Alex, I _think_ that drm/amdgpu should not use kthread_park(), but
> lets discuss this separately. It would be nice if you can confirm that 2/2
> can't break this code, even if I think "it obviously can't".
>
> Oleg.
>



-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/2] kthread: don't use to_live_kthread() in kthread_stop()
  2016-10-31 20:07                   ` [PATCH 1/2] kthread: don't use to_live_kthread() in kthread_stop() Oleg Nesterov
@ 2016-11-09  7:58                     ` Thomas Gleixner
  0 siblings, 0 replies; 31+ messages in thread
From: Thomas Gleixner @ 2016-11-09  7:58 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel, Chunming Zhou,
	Alex Deucher



On Mon, 31 Oct 2016, Oleg Nesterov wrote:

> kthread_stop() had to use to_live_kthread() simply because it was not
> possible to access kthread->exited after the exiting kthread clears
> task_struct->vfork_done. Now that to_kthread() is always valid we can
> do wake_up_process() + wait_for_completion() unconditionally, we don't
> care if it has already passed complete_vfork_done() or even dead.
> 
> The exiting kthread can get the spurious wakeup after mm_release() but
> this is possible without this change too and this is fine, do_task_dead()
> ensures that this can't make any harm.
> 
> Note: we can even change this function to use task_work_add() and avoid
> ->vfork_done altogether, probably we will do this later.
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/2] kthread: don't use to_live_kthread() in kthread_park() and kthread_unpark()
  2016-10-31 20:08                   ` [PATCH 2/2] kthread: don't use to_live_kthread() in kthread_park() and kthread_unpark() Oleg Nesterov
@ 2016-11-09  8:45                     ` Thomas Gleixner
  2016-11-09 17:27                       ` Oleg Nesterov
  0 siblings, 1 reply; 31+ messages in thread
From: Thomas Gleixner @ 2016-11-09  8:45 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel, Chunming Zhou,
	Alex Deucher

On Mon, 31 Oct 2016, Oleg Nesterov wrote:
> I think we need to unexport kthread_park/unpark, and either make it return
> "void" or actually fix the race with kthred_stop/exit. This patch just adds
> WARN_ON(PF_EXITING) for now.

I'll have a look.
 
> The usage of kthread_park() in cpuhp code (cpu.c, smpboot.c, stop_machine.c)
> is fine. It can never see an exiting/exited kthread, smpboot_destroy_threads()
> clears *ht->store, smpboot_park_thread() checks it is not NULL under the same
> smpboot_threads_lock. cpuhp_threads and cpu_stop_threads never exit, so other
> callers are fine too.
> 
> But it has two more users:
> 
> - watchdog_park_threads() and it does not look nice. The code is actually
>   correct, get_online_cpus() ensures that kthread_park() can't race with
>   itself (note that kthread_park() can't handle this race correctly), but
>   imo it should not use kthread_park() directly.

Should we provide an interface through the smpboot thread infrastructure for
this?

> - drivers/gpu/drm/amd/scheduler/gpu_scheduler.c and I think it should not
>   use kthread_park() too.
> 
>   But this patch should not break this code. kthread_park() must not be
>   called after amd_sched_fini() which does kthread_stop(), otherwise even
>   to_live_kthread() is not safe because task_struct can be already freed
>   and sched->thread can point to nowhere.

Right. That's why the smpboot thread code holds a task ref which is only
released after the thread has been stopped.

I can see why that gpu driver wants to use the park mechanism and I guess
there are other legitimate use cases as well. I prefer to implement a
park/unpark variant which is safe to use on arbitrary kthreads over forcing
driver writers to come up with even more broken open coded implementations
of that.

> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/2] kthread: don't use to_live_kthread() in kthread_park() and kthread_unpark()
  2016-11-09  8:45                     ` Thomas Gleixner
@ 2016-11-09 17:27                       ` Oleg Nesterov
  2016-11-10 17:19                         ` [PATCH 0/1] kthread: don't abuse kthread_create_on_cpu() in __kthread_create_worker() Oleg Nesterov
  0 siblings, 1 reply; 31+ messages in thread
From: Oleg Nesterov @ 2016-11-09 17:27 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel, Chunming Zhou,
	Alex Deucher

On 11/09, Thomas Gleixner wrote:
>
> > - watchdog_park_threads() and it does not look nice. The code is actually
> >   correct, get_online_cpus() ensures that kthread_park() can't race with
> >   itself (note that kthread_park() can't handle this race correctly), but
> >   imo it should not use kthread_park() directly.
>
> Should we provide an interface through the smpboot thread infrastructure for
> this?

IMHO yes, I'll write another email.

> I can see why that gpu driver wants to use the park mechanism and I guess
> there are other legitimate use cases as well. I prefer to implement a
> park/unpark variant which is safe to use on arbitrary kthreads

Yes, agreed. Again, I'll write another email. Perhaps we should even keep
park/unpark exported and change them to avoid the races with exit/itself,
I dunno.

My real point was, imo the KTHREAD_IS_PER_CPU/__kthread_bind(kthread->cpu)
logic in kthread_unpark() should be private to smpboot.c/cpu.c.

I'll send another patch tomorrow. kthread_create_worker_on_cpu() ab-uses
this logic too for no reason, but this is trivial.

> Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

Thanks!

Probably I should re-send these 2 short series to Ingo with your acks applied.

Oleg.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 0/1] kthread: don't abuse kthread_create_on_cpu() in __kthread_create_worker()
  2016-11-09 17:27                       ` Oleg Nesterov
@ 2016-11-10 17:19                         ` Oleg Nesterov
  2016-11-10 17:20                           ` [PATCH 1/1] " Oleg Nesterov
  2016-11-14 11:09                           ` [PATCH 0/1] " Petr Mladek
  0 siblings, 2 replies; 31+ messages in thread
From: Oleg Nesterov @ 2016-11-10 17:19 UTC (permalink / raw)
  To: Thomas Gleixner, Petr Mladek
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel, Chunming Zhou,
	Alex Deucher

On 11/09, Oleg Nesterov wrote:
>
> Yes, agreed. Again, I'll write another email. Perhaps we should even keep
> park/unpark exported and change them to avoid the races with exit/itself,
> I dunno.
>
> My real point was, imo the KTHREAD_IS_PER_CPU/__kthread_bind(kthread->cpu)
> logic in kthread_unpark() should be private to smpboot.c/cpu.c.
>
> I'll send another patch tomorrow. kthread_create_worker_on_cpu() ab-uses
> this logic too for no reason, but this is trivial.

After this change we are almost ready to kill kthread->cpu and KTHREAD_IS_PER_CPU.
(but the change itself doesn't depend on the previous patches).

Petr, why do we need kthread_create_worker_on_cpu() ? It has no users and
I can not imagine any "real" use-case for it. Perhaps it can be removed?

Oleg.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 1/1] kthread: don't abuse kthread_create_on_cpu() in __kthread_create_worker()
  2016-11-10 17:19                         ` [PATCH 0/1] kthread: don't abuse kthread_create_on_cpu() in __kthread_create_worker() Oleg Nesterov
@ 2016-11-10 17:20                           ` Oleg Nesterov
  2016-11-14 11:12                             ` Petr Mladek
  2016-11-14 11:09                           ` [PATCH 0/1] " Petr Mladek
  1 sibling, 1 reply; 31+ messages in thread
From: Oleg Nesterov @ 2016-11-10 17:20 UTC (permalink / raw)
  To: Thomas Gleixner, Petr Mladek
  Cc: Andy Lutomirski, Roman Pen, Andy Lutomirski, Peter Zijlstra,
	Ingo Molnar, Tejun Heo, linux-kernel, Chunming Zhou,
	Alex Deucher

kthread_create_on_cpu() sets KTHREAD_IS_PER_CPU and kthread->cpu, this
only makes sense if this kthread can be parked/unparked by cpuhp code.
kthread workers never call kthread_parkme() so this has no effect.

Change __kthread_create_worker() to simply call kthread_bind(task, cpu).
The very fact that kthread_create_on_cpu() doesn't accept a generic fmt
shows that it should not be used outside of smpboot.c.

Now, the only reason we can not unexport this helper and move it into
smpboot.c is that it sets kthread->cpu and struct kthread is not exported.
And the only reason we can not kill kthread->cpu is that kthread_unpark()
is used by drivers/gpu/drm/amd/scheduler/gpu_scheduler.c and thus we can
not turn _unpark into kthread_unpark(struct smp_hotplug_thread *, cpu).

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/kthread.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 01d2716..956495f 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -641,6 +641,7 @@ __kthread_create_worker(int cpu, unsigned int flags,
 {
 	struct kthread_worker *worker;
 	struct task_struct *task;
+	int node = -1;
 
 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
 	if (!worker)
@@ -648,25 +649,17 @@ __kthread_create_worker(int cpu, unsigned int flags,
 
 	kthread_init_worker(worker);
 
-	if (cpu >= 0) {
-		char name[TASK_COMM_LEN];
-
-		/*
-		 * kthread_create_worker_on_cpu() allows to pass a generic
-		 * namefmt in compare with kthread_create_on_cpu. We need
-		 * to format it here.
-		 */
-		vsnprintf(name, sizeof(name), namefmt, args);
-		task = kthread_create_on_cpu(kthread_worker_fn, worker,
-					     cpu, name);
-	} else {
-		task = __kthread_create_on_node(kthread_worker_fn, worker,
-						-1, namefmt, args);
-	}
+	if (cpu >= 0)
+		node = cpu_to_node(cpu);
 
+	task = __kthread_create_on_node(kthread_worker_fn, worker,
+						node, namefmt, args);
 	if (IS_ERR(task))
 		goto fail_task;
 
+	if (cpu >= 0)
+		kthread_bind(task, cpu);
+
 	worker->flags = flags;
 	worker->task = task;
 	wake_up_process(task);
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 0/1] kthread: don't abuse kthread_create_on_cpu() in __kthread_create_worker()
  2016-11-10 17:19                         ` [PATCH 0/1] kthread: don't abuse kthread_create_on_cpu() in __kthread_create_worker() Oleg Nesterov
  2016-11-10 17:20                           ` [PATCH 1/1] " Oleg Nesterov
@ 2016-11-14 11:09                           ` Petr Mladek
  1 sibling, 0 replies; 31+ messages in thread
From: Petr Mladek @ 2016-11-14 11:09 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Thomas Gleixner, Andy Lutomirski, Roman Pen, Andy Lutomirski,
	Peter Zijlstra, Ingo Molnar, Tejun Heo, linux-kernel,
	Chunming Zhou, Alex Deucher

On Thu 2016-11-10 18:19:58, Oleg Nesterov wrote:
> On 11/09, Oleg Nesterov wrote:
> >
> > Yes, agreed. Again, I'll write another email. Perhaps we should even keep
> > park/unpark exported and change them to avoid the races with exit/itself,
> > I dunno.
> >
> > My real point was, imo the KTHREAD_IS_PER_CPU/__kthread_bind(kthread->cpu)
> > logic in kthread_unpark() should be private to smpboot.c/cpu.c.
> >
> > I'll send another patch tomorrow. kthread_create_worker_on_cpu() ab-uses
> > this logic too for no reason, but this is trivial.
> 
> After this change we are almost ready to kill kthread->cpu and KTHREAD_IS_PER_CPU.
> (but the change itself doesn't depend on the previous patches).
> 
> Petr, why do we need kthread_create_worker_on_cpu() ? It has no users and
> I can not imagine any "real" use-case for it. Perhaps it can be removed?

kthread_create_worker_on_cpu() is going to have some users. For
example, patches for intel_powerclamp are already flying around,
see
https://lkml.kernel.org/r/1476707572-32215-3-git-send-email-pmladek@suse.com

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/1] kthread: don't abuse kthread_create_on_cpu() in __kthread_create_worker()
  2016-11-10 17:20                           ` [PATCH 1/1] " Oleg Nesterov
@ 2016-11-14 11:12                             ` Petr Mladek
  0 siblings, 0 replies; 31+ messages in thread
From: Petr Mladek @ 2016-11-14 11:12 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Thomas Gleixner, Andy Lutomirski, Roman Pen, Andy Lutomirski,
	Peter Zijlstra, Ingo Molnar, Tejun Heo, linux-kernel,
	Chunming Zhou, Alex Deucher

On Thu 2016-11-10 18:20:31, Oleg Nesterov wrote:
> kthread_create_on_cpu() sets KTHREAD_IS_PER_CPU and kthread->cpu, this
> only makes sense if this kthread can be parked/unparked by cpuhp code.
> kthread workers never call kthread_parkme() so this has no effect.

Yes.

> Change __kthread_create_worker() to simply call kthread_bind(task, cpu).
> The very fact that kthread_create_on_cpu() doesn't accept a generic fmt
> shows that it should not be used outside of smpboot.c.
> 
> Now, the only reason we can not unexport this helper and move it into
> smpboot.c is that it sets kthread->cpu and struct kthread is not exported.
> And the only reason we can not kill kthread->cpu is that kthread_unpark()
> is used by drivers/gpu/drm/amd/scheduler/gpu_scheduler.c and thus we can
> not turn _unpark into kthread_unpark(struct smp_hotplug_thread *, cpu).
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

The change looks fine to me. Feel free to add one or both of these:

Reviewed-by: Petr Mladek <pmladek@suse.com>
Tested-by: Petr Mladek <pmladek@suse.com>

Best Regards,
Petr

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2016-11-14 11:13 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-25 11:05 [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc Roman Pen
2016-10-25 14:03 ` Oleg Nesterov
2016-10-25 15:43   ` Oleg Nesterov
2016-10-25 16:08     ` Roman Penyaev
2016-10-25 16:17       ` Oleg Nesterov
2016-10-25 16:52     ` Andy Lutomirski
2016-10-26 14:14       ` Oleg Nesterov
2016-10-26 14:57         ` Thomas Gleixner
2016-10-26 15:51           ` Oleg Nesterov
2016-10-26 18:31             ` Thomas Gleixner
2016-10-28 16:11               ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Oleg Nesterov
2016-10-28 16:11                 ` [PATCH 1/2] " Oleg Nesterov
2016-10-28 18:48                   ` Thomas Gleixner
2016-10-28 16:12                 ` [PATCH 2/2] Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function" Oleg Nesterov
2016-10-28 18:49                   ` Thomas Gleixner
2016-10-28 18:44                 ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Thomas Gleixner
2016-10-31 20:07                 ` [PATCH 0/2] kthread: kill to_live_kthread() Oleg Nesterov
2016-10-31 20:07                   ` [PATCH 1/2] kthread: don't use to_live_kthread() in kthread_stop() Oleg Nesterov
2016-11-09  7:58                     ` Thomas Gleixner
2016-10-31 20:08                   ` [PATCH 2/2] kthread: don't use to_live_kthread() in kthread_park() and kthread_unpark() Oleg Nesterov
2016-11-09  8:45                     ` Thomas Gleixner
2016-11-09 17:27                       ` Oleg Nesterov
2016-11-10 17:19                         ` [PATCH 0/1] kthread: don't abuse kthread_create_on_cpu() in __kthread_create_worker() Oleg Nesterov
2016-11-10 17:20                           ` [PATCH 1/1] " Oleg Nesterov
2016-11-14 11:12                             ` Petr Mladek
2016-11-14 11:09                           ` [PATCH 0/1] " Petr Mladek
2016-11-07 18:23                   ` [PATCH 0/2] kthread: kill to_live_kthread() Andy Lutomirski
2016-10-26 16:13         ` [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc Oleg Nesterov
2016-10-27  2:56         ` Josh Poimboeuf
2016-10-27 13:10           ` Oleg Nesterov
2016-10-25 15:46   ` Roman Penyaev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).