All of lore.kernel.org
 help / color / mirror / Atom feed
From: Roman Pen <roman.penyaev@profitbricks.com>
To: unlisted-recipients:; (no To-header on input)
Cc: Roman Pen <roman.penyaev@profitbricks.com>,
	Andy Lutomirski <luto@kernel.org>,
	Oleg Nesterov <oleg@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Tejun Heo <tj@kernel.org>,
	linux-kernel@vger.kernel.org
Subject: [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc
Date: Tue, 25 Oct 2016 13:05:08 +0200	[thread overview]
Message-ID: <20161025110508.9052-1-roman.penyaev@profitbricks.com> (raw)

This patch avoids allocation of kthread structure on a stack, and simply
uses kmalloc.  Allocation on a stack became a huge problem (with memory
corruption and all other not nice consequences) after the following commit
2deb4be28077 ("x86/dumpstack: When OOPSing, rewind the stack before do_exit()")
by Andy Lutomirski, which rewinds the stack on oops, thus ooopsed kthread
steps on a garbage memory while completion of task->vfork_done structure
on the following path:

       oops_end()
       rewind_stack_do_exit()
       exit_mm()
       mm_release()
       complete_vfork_done()

Also in this patch two structures 'struct kthread_create_info' and
'struct kthread' are merged into one 'struct kthread' and its freeing
is controlled by a reference counter.

The last reference on kthread is put from a task work, the callback,
which is invoked from do_exit().  The major thing is that the last
put is happens *after* completion_vfork_done() is invoked.

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-kernel@vger.kernel.org
---
v3:
  o handle to_live_kthread() calls, which should increase a kthread
    ref or return NULL.  Function was renamed to to_live_kthread_and_get().
  o minor comments tweaks.

v2:
  o let x86/kernel/dumpstack.c rewind a stack, but do not use a stack
    for a structure allocation.

 kernel/kthread.c | 198 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 117 insertions(+), 81 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ab4c3766a80..e8adc10556e0 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -18,14 +18,19 @@
 #include <linux/freezer.h>
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
+#include <linux/task_work.h>
 #include <trace/events/sched.h>
 
 static DEFINE_SPINLOCK(kthread_create_lock);
 static LIST_HEAD(kthread_create_list);
 struct task_struct *kthreadd_task;
 
-struct kthread_create_info
-{
+struct kthread {
+	struct list_head list;
+	unsigned long flags;
+	unsigned int cpu;
+	atomic_t refs;
+
 	/* Information passed to kthread() from kthreadd. */
 	int (*threadfn)(void *data);
 	void *data;
@@ -33,15 +38,9 @@ struct kthread_create_info
 
 	/* Result passed back to kthread_create() from kthreadd. */
 	struct task_struct *result;
-	struct completion *done;
 
-	struct list_head list;
-};
-
-struct kthread {
-	unsigned long flags;
-	unsigned int cpu;
-	void *data;
+	struct callback_head put_work;
+	struct completion *started;
 	struct completion parked;
 	struct completion exited;
 };
@@ -56,17 +55,49 @@ enum KTHREAD_BITS {
 #define __to_kthread(vfork)	\
 	container_of(vfork, struct kthread, exited)
 
+static inline void get_kthread(struct kthread *kthread)
+{
+	BUG_ON(atomic_read(&kthread->refs) <= 0);
+	atomic_inc(&kthread->refs);
+}
+
+static inline void put_kthread(struct kthread *kthread)
+{
+	BUG_ON(atomic_read(&kthread->refs) <= 0);
+	if (atomic_dec_and_test(&kthread->refs))
+		kfree(kthread);
+}
+
+/**
+ * put_kthread_cb - is called from do_exit() and does likely
+ *                  the final put.
+ */
+static void put_kthread_cb(struct callback_head *work)
+{
+	struct kthread *kthread;
+
+	kthread = container_of(work, struct kthread, put_work);
+	put_kthread(kthread);
+}
+
 static inline struct kthread *to_kthread(struct task_struct *k)
 {
 	return __to_kthread(k->vfork_done);
 }
 
-static struct kthread *to_live_kthread(struct task_struct *k)
+static struct kthread *to_live_kthread_and_get(struct task_struct *k)
 {
-	struct completion *vfork = ACCESS_ONCE(k->vfork_done);
-	if (likely(vfork) && try_get_task_stack(k))
-		return __to_kthread(vfork);
-	return NULL;
+	struct kthread *kthread = NULL;
+
+	BUG_ON(!(k->flags & PF_KTHREAD));
+	task_lock(k);
+	if (likely(k->vfork_done)) {
+		kthread = __to_kthread(k->vfork_done);
+		get_kthread(kthread);
+	}
+	task_unlock(k);
+
+	return kthread;
 }
 
 /**
@@ -174,41 +205,37 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
 
-static int kthread(void *_create)
+static int kthreadfn(void *_self)
 {
-	/* Copy data: it's on kthread's stack */
-	struct kthread_create_info *create = _create;
-	int (*threadfn)(void *data) = create->threadfn;
-	void *data = create->data;
-	struct completion *done;
-	struct kthread self;
-	int ret;
-
-	self.flags = 0;
-	self.data = data;
-	init_completion(&self.exited);
-	init_completion(&self.parked);
-	current->vfork_done = &self.exited;
-
-	/* If user was SIGKILLed, I release the structure. */
-	done = xchg(&create->done, NULL);
-	if (!done) {
-		kfree(create);
-		do_exit(-EINTR);
+	struct completion *started;
+	struct kthread *self = _self;
+	int ret = -EINTR;
+
+	/* If user was SIGKILLed, put a ref and exit silently. */
+	started = xchg(&self->started, NULL);
+	if (!started) {
+		put_kthread(self);
+		goto exit;
 	}
+	/*
+	 * Delegate last ref put to a task work, which will happen
+	 * after 'vfork_done' completion.
+	 */
+	init_task_work(&self->put_work, put_kthread_cb);
+	task_work_add(current, &self->put_work, false);
+	current->vfork_done = &self->exited;
+
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
-	create->result = current;
-	complete(done);
+	self->result = current;
+	complete(started);
 	schedule();
 
-	ret = -EINTR;
-
-	if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
-		__kthread_parkme(&self);
-		ret = threadfn(data);
+	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+		__kthread_parkme(self);
+		ret = self->threadfn(self->data);
 	}
-	/* we can't just return, we must preserve "self" on stack */
+exit:
 	do_exit(ret);
 }
 
@@ -222,25 +249,25 @@ int tsk_fork_get_node(struct task_struct *tsk)
 	return NUMA_NO_NODE;
 }
 
-static void create_kthread(struct kthread_create_info *create)
+static void create_kthread(struct kthread *kthread)
 {
+	struct completion *started;
 	int pid;
 
 #ifdef CONFIG_NUMA
-	current->pref_node_fork = create->node;
+	current->pref_node_fork = kthread->node;
 #endif
 	/* We want our own signal handler (we take no signals by default). */
-	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
+	pid = kernel_thread(kthreadfn, kthread,
+			    CLONE_FS | CLONE_FILES | SIGCHLD);
 	if (pid < 0) {
-		/* If user was SIGKILLed, I release the structure. */
-		struct completion *done = xchg(&create->done, NULL);
-
-		if (!done) {
-			kfree(create);
-			return;
+		started = xchg(&kthread->started, NULL);
+		if (started) {
+			/* The user was not SIGKILLed and wants the result. */
+			kthread->result = ERR_PTR(pid);
+			complete(started);
 		}
-		create->result = ERR_PTR(pid);
-		complete(done);
+		put_kthread(kthread);
 	}
 }
 
@@ -272,20 +299,26 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 					   const char namefmt[],
 					   ...)
 {
-	DECLARE_COMPLETION_ONSTACK(done);
+	DECLARE_COMPLETION_ONSTACK(started);
 	struct task_struct *task;
-	struct kthread_create_info *create = kmalloc(sizeof(*create),
-						     GFP_KERNEL);
+	struct kthread *kthread;
 
-	if (!create)
+	kthread = kmalloc(sizeof(*kthread), GFP_KERNEL);
+	if (!kthread)
 		return ERR_PTR(-ENOMEM);
-	create->threadfn = threadfn;
-	create->data = data;
-	create->node = node;
-	create->done = &done;
+	/* One ref for us and one ref for a new kernel thread. */
+	atomic_set(&kthread->refs, 2);
+	kthread->flags = 0;
+	kthread->cpu = 0;
+	kthread->threadfn = threadfn;
+	kthread->data = data;
+	kthread->node = node;
+	kthread->started = &started;
+	init_completion(&kthread->exited);
+	init_completion(&kthread->parked);
 
 	spin_lock(&kthread_create_lock);
-	list_add_tail(&create->list, &kthread_create_list);
+	list_add_tail(&kthread->list, &kthread_create_list);
 	spin_unlock(&kthread_create_lock);
 
 	wake_up_process(kthreadd_task);
@@ -294,21 +327,23 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 	 * the OOM killer while kthreadd is trying to allocate memory for
 	 * new kernel thread.
 	 */
-	if (unlikely(wait_for_completion_killable(&done))) {
+	if (unlikely(wait_for_completion_killable(&started))) {
 		/*
 		 * If I was SIGKILLed before kthreadd (or new kernel thread)
-		 * calls complete(), leave the cleanup of this structure to
-		 * that thread.
+		 * calls complete(), put a ref and return an error.
 		 */
-		if (xchg(&create->done, NULL))
+		if (xchg(&kthread->started, NULL)) {
+			put_kthread(kthread);
+
 			return ERR_PTR(-EINTR);
+		}
 		/*
 		 * kthreadd (or new kernel thread) will call complete()
 		 * shortly.
 		 */
-		wait_for_completion(&done);
+		wait_for_completion(&started);
 	}
-	task = create->result;
+	task = kthread->result;
 	if (!IS_ERR(task)) {
 		static const struct sched_param param = { .sched_priority = 0 };
 		va_list args;
@@ -323,7 +358,8 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
 		set_cpus_allowed_ptr(task, cpu_all_mask);
 	}
-	kfree(create);
+	put_kthread(kthread);
+
 	return task;
 }
 EXPORT_SYMBOL(kthread_create_on_node);
@@ -423,11 +459,11 @@ static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
  */
 void kthread_unpark(struct task_struct *k)
 {
-	struct kthread *kthread = to_live_kthread(k);
+	struct kthread *kthread = to_live_kthread_and_get(k);
 
 	if (kthread) {
 		__kthread_unpark(k, kthread);
-		put_task_stack(k);
+		put_kthread(kthread);
 	}
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -446,7 +482,7 @@ EXPORT_SYMBOL_GPL(kthread_unpark);
  */
 int kthread_park(struct task_struct *k)
 {
-	struct kthread *kthread = to_live_kthread(k);
+	struct kthread *kthread = to_live_kthread_and_get(k);
 	int ret = -ENOSYS;
 
 	if (kthread) {
@@ -457,7 +493,7 @@ int kthread_park(struct task_struct *k)
 				wait_for_completion(&kthread->parked);
 			}
 		}
-		put_task_stack(k);
+		put_kthread(kthread);
 		ret = 0;
 	}
 	return ret;
@@ -487,13 +523,13 @@ int kthread_stop(struct task_struct *k)
 	trace_sched_kthread_stop(k);
 
 	get_task_struct(k);
-	kthread = to_live_kthread(k);
+	kthread = to_live_kthread_and_get(k);
 	if (kthread) {
 		set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
 		__kthread_unpark(k, kthread);
 		wake_up_process(k);
 		wait_for_completion(&kthread->exited);
-		put_task_stack(k);
+		put_kthread(kthread);
 	}
 	ret = k->exit_code;
 	put_task_struct(k);
@@ -523,14 +559,14 @@ int kthreadd(void *unused)
 
 		spin_lock(&kthread_create_lock);
 		while (!list_empty(&kthread_create_list)) {
-			struct kthread_create_info *create;
+			struct kthread *kthread;
 
-			create = list_entry(kthread_create_list.next,
-					    struct kthread_create_info, list);
-			list_del_init(&create->list);
+			kthread = list_entry(kthread_create_list.next,
+					     struct kthread, list);
+			list_del_init(&kthread->list);
 			spin_unlock(&kthread_create_lock);
 
-			create_kthread(create);
+			create_kthread(kthread);
 
 			spin_lock(&kthread_create_lock);
 		}
-- 
2.9.3

             reply	other threads:[~2016-10-25 11:05 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-25 11:05 Roman Pen [this message]
2016-10-25 14:03 ` [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc Oleg Nesterov
2016-10-25 15:43   ` Oleg Nesterov
2016-10-25 16:08     ` Roman Penyaev
2016-10-25 16:17       ` Oleg Nesterov
2016-10-25 16:52     ` Andy Lutomirski
2016-10-26 14:14       ` Oleg Nesterov
2016-10-26 14:57         ` Thomas Gleixner
2016-10-26 15:51           ` Oleg Nesterov
2016-10-26 18:31             ` Thomas Gleixner
2016-10-28 16:11               ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Oleg Nesterov
2016-10-28 16:11                 ` [PATCH 1/2] " Oleg Nesterov
2016-10-28 18:48                   ` Thomas Gleixner
2016-10-28 16:12                 ` [PATCH 2/2] Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function" Oleg Nesterov
2016-10-28 18:49                   ` Thomas Gleixner
2016-10-28 18:44                 ` [PATCH 0/2] kthread: make struct kthread kmalloc'ed Thomas Gleixner
2016-10-31 20:07                 ` [PATCH 0/2] kthread: kill to_live_kthread() Oleg Nesterov
2016-10-31 20:07                   ` [PATCH 1/2] kthread: don't use to_live_kthread() in kthread_stop() Oleg Nesterov
2016-11-09  7:58                     ` Thomas Gleixner
2016-10-31 20:08                   ` [PATCH 2/2] kthread: don't use to_live_kthread() in kthread_park() and kthread_unpark() Oleg Nesterov
2016-11-09  8:45                     ` Thomas Gleixner
2016-11-09 17:27                       ` Oleg Nesterov
2016-11-10 17:19                         ` [PATCH 0/1] kthread: don't abuse kthread_create_on_cpu() in __kthread_create_worker() Oleg Nesterov
2016-11-10 17:20                           ` [PATCH 1/1] " Oleg Nesterov
2016-11-14 11:12                             ` Petr Mladek
2016-11-14 11:09                           ` [PATCH 0/1] " Petr Mladek
2016-11-07 18:23                   ` [PATCH 0/2] kthread: kill to_live_kthread() Andy Lutomirski
2016-10-26 16:13         ` [PATCH v3 1/1] kthread: allocate kthread structure using kmalloc Oleg Nesterov
2016-10-27  2:56         ` Josh Poimboeuf
2016-10-27 13:10           ` Oleg Nesterov
2016-10-25 15:46   ` Roman Penyaev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161025110508.9052-1-roman.penyaev@profitbricks.com \
    --to=roman.penyaev@profitbricks.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.