linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc
@ 2016-10-24 16:08 Roman Pen
  2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Roman Pen @ 2016-10-24 16:08 UTC (permalink / raw)
  Cc: Roman Pen, Andy Lutomirski, Oleg Nesterov, Peter Zijlstra,
	Thomas Gleixner, Ingo Molnar, Tejun Heo, linux-kernel

This patch avoids allocation of kthread structure on a stack, and simply
uses kmalloc.  Allocation on a stack became a huge problem (with memory
corruption and all other not nice consequences) after the commit 2deb4be28
by Andy Lutomirski, which rewinds the stack on oops, thus ooopsed kthread
steps on a garbage memory while completion of task->vfork_done structure
on the following path:

       oops_end()
       rewind_stack_do_exit()
       exit_mm()
       mm_release()
       complete_vfork_done()

Also in this patch two structures 'struct kthread_create_info' and
'struct kthread' are merged into one 'struct kthread' and its freeing
is controlled by a reference counter.

The last reference on kthread is put from a task work, the callback,
which is invoked from do_exit().  The major thing is that the last
put is happens *after* completion_vfork_done() is invoked.

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-kernel@vger.kernel.org
---
v2:
  o let x86/kernel/dumpstack.c rewind a stack, but do not use a stack
    for a structure allocation.

 kernel/kthread.c | 160 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 90 insertions(+), 70 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ab4c37..9ccfe06 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -18,14 +18,19 @@
 #include <linux/freezer.h>
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
+#include <linux/task_work.h>
 #include <trace/events/sched.h>
 
 static DEFINE_SPINLOCK(kthread_create_lock);
 static LIST_HEAD(kthread_create_list);
 struct task_struct *kthreadd_task;
 
-struct kthread_create_info
-{
+struct kthread {
+	struct list_head list;
+	unsigned long flags;
+	unsigned int cpu;
+	atomic_t refs;
+
 	/* Information passed to kthread() from kthreadd. */
 	int (*threadfn)(void *data);
 	void *data;
@@ -33,15 +38,9 @@ struct kthread_create_info
 
 	/* Result passed back to kthread_create() from kthreadd. */
 	struct task_struct *result;
-	struct completion *done;
-
-	struct list_head list;
-};
 
-struct kthread {
-	unsigned long flags;
-	unsigned int cpu;
-	void *data;
+	struct callback_head put_work;
+	struct completion *started;
 	struct completion parked;
 	struct completion exited;
 };
@@ -69,6 +68,24 @@ static struct kthread *to_live_kthread(struct task_struct *k)
 	return NULL;
 }
 
+static inline void put_kthread(struct kthread *kthread)
+{
+	if (atomic_dec_and_test(&kthread->refs))
+		kfree(kthread);
+}
+
+/**
+ * put_kthread_cb - is called from do_exit() and does likely
+ *                  the final put.
+ */
+static void put_kthread_cb(struct callback_head *work)
+{
+	struct kthread *kthread;
+
+	kthread = container_of(work, struct kthread, put_work);
+	put_kthread(kthread);
+}
+
 /**
  * kthread_should_stop - should this kthread return now?
  *
@@ -174,41 +191,36 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
 
-static int kthread(void *_create)
+static int kthreadfn(void *_self)
 {
-	/* Copy data: it's on kthread's stack */
-	struct kthread_create_info *create = _create;
-	int (*threadfn)(void *data) = create->threadfn;
-	void *data = create->data;
-	struct completion *done;
-	struct kthread self;
-	int ret;
-
-	self.flags = 0;
-	self.data = data;
-	init_completion(&self.exited);
-	init_completion(&self.parked);
-	current->vfork_done = &self.exited;
-
-	/* If user was SIGKILLed, I release the structure. */
-	done = xchg(&create->done, NULL);
-	if (!done) {
-		kfree(create);
-		do_exit(-EINTR);
+	struct completion *started;
+	struct kthread *self = _self;
+	int ret = -EINTR;
+
+	/* If user was SIGKILLed, put a ref and exit silently. */
+	started = xchg(&self->started, NULL);
+	if (!started) {
+		put_kthread(self);
+		goto exit;
 	}
+	/* Delegate last ref put to a task work, which will happen
+	 * after 'vfork_done' completion.
+	 */
+	init_task_work(&self->put_work, put_kthread_cb);
+	task_work_add(current, &self->put_work, false);
+	current->vfork_done = &self->exited;
+
 	/* OK, tell user we're spawned, wait for stop or wakeup */
 	__set_current_state(TASK_UNINTERRUPTIBLE);
-	create->result = current;
-	complete(done);
+	self->result = current;
+	complete(started);
 	schedule();
 
-	ret = -EINTR;
-
-	if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
-		__kthread_parkme(&self);
-		ret = threadfn(data);
+	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+		__kthread_parkme(self);
+		ret = self->threadfn(self->data);
 	}
-	/* we can't just return, we must preserve "self" on stack */
+exit:
 	do_exit(ret);
 }
 
@@ -222,25 +234,24 @@ int tsk_fork_get_node(struct task_struct *tsk)
 	return NUMA_NO_NODE;
 }
 
-static void create_kthread(struct kthread_create_info *create)
+static void create_kthread(struct kthread *kthread)
 {
+	struct completion *started;
 	int pid;
 
 #ifdef CONFIG_NUMA
-	current->pref_node_fork = create->node;
+	current->pref_node_fork = kthread->node;
 #endif
 	/* We want our own signal handler (we take no signals by default). */
-	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
+	pid = kernel_thread(kthreadfn, kthread,
+			    CLONE_FS | CLONE_FILES | SIGCHLD);
 	if (pid < 0) {
-		/* If user was SIGKILLed, I release the structure. */
-		struct completion *done = xchg(&create->done, NULL);
-
-		if (!done) {
-			kfree(create);
-			return;
+		started = xchg(&kthread->started, NULL);
+		if (started) {
+			kthread->result = ERR_PTR(pid);
+			complete(started);
 		}
-		create->result = ERR_PTR(pid);
-		complete(done);
+		put_kthread(kthread);
 	}
 }
 
@@ -272,20 +283,26 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 					   const char namefmt[],
 					   ...)
 {
-	DECLARE_COMPLETION_ONSTACK(done);
+	DECLARE_COMPLETION_ONSTACK(started);
 	struct task_struct *task;
-	struct kthread_create_info *create = kmalloc(sizeof(*create),
-						     GFP_KERNEL);
+	struct kthread *kthread;
 
-	if (!create)
+	kthread = kmalloc(sizeof(*kthread), GFP_KERNEL);
+	if (!kthread)
 		return ERR_PTR(-ENOMEM);
-	create->threadfn = threadfn;
-	create->data = data;
-	create->node = node;
-	create->done = &done;
+	/* One ref for us and one ref for a new kernel thread. */
+	atomic_set(&kthread->refs, 2);
+	kthread->flags = 0;
+	kthread->cpu = 0;
+	kthread->threadfn = threadfn;
+	kthread->data = data;
+	kthread->node = node;
+	kthread->started = &started;
+	init_completion(&kthread->exited);
+	init_completion(&kthread->parked);
 
 	spin_lock(&kthread_create_lock);
-	list_add_tail(&create->list, &kthread_create_list);
+	list_add_tail(&kthread->list, &kthread_create_list);
 	spin_unlock(&kthread_create_lock);
 
 	wake_up_process(kthreadd_task);
@@ -294,21 +311,23 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 	 * the OOM killer while kthreadd is trying to allocate memory for
 	 * new kernel thread.
 	 */
-	if (unlikely(wait_for_completion_killable(&done))) {
+	if (unlikely(wait_for_completion_killable(&started))) {
 		/*
 		 * If I was SIGKILLed before kthreadd (or new kernel thread)
-		 * calls complete(), leave the cleanup of this structure to
-		 * that thread.
+		 * calls complete(), put a ref and return an error.
 		 */
-		if (xchg(&create->done, NULL))
+		if (xchg(&kthread->started, NULL)) {
+			put_kthread(kthread);
+
 			return ERR_PTR(-EINTR);
+		}
 		/*
 		 * kthreadd (or new kernel thread) will call complete()
 		 * shortly.
 		 */
-		wait_for_completion(&done);
+		wait_for_completion(&started);
 	}
-	task = create->result;
+	task = kthread->result;
 	if (!IS_ERR(task)) {
 		static const struct sched_param param = { .sched_priority = 0 };
 		va_list args;
@@ -323,7 +342,8 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
 		set_cpus_allowed_ptr(task, cpu_all_mask);
 	}
-	kfree(create);
+	put_kthread(kthread);
+
 	return task;
 }
 EXPORT_SYMBOL(kthread_create_on_node);
@@ -523,14 +543,14 @@ int kthreadd(void *unused)
 
 		spin_lock(&kthread_create_lock);
 		while (!list_empty(&kthread_create_list)) {
-			struct kthread_create_info *create;
+			struct kthread *kthread;
 
-			create = list_entry(kthread_create_list.next,
-					    struct kthread_create_info, list);
-			list_del_init(&create->list);
+			kthread = list_entry(kthread_create_list.next,
+					     struct kthread, list);
+			list_del_init(&kthread->list);
 			spin_unlock(&kthread_create_lock);
 
-			create_kthread(create);
+			create_kthread(kthread);
 
 			spin_lock(&kthread_create_lock);
 		}
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook
  2016-10-24 16:08 [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Roman Pen
@ 2016-10-24 16:08 ` Roman Pen
  2016-10-24 16:40   ` Peter Zijlstra
  2016-10-24 20:27   ` Tejun Heo
  2016-10-24 16:42 ` [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Peter Zijlstra
  2016-10-24 17:08 ` Andy Lutomirski
  2 siblings, 2 replies; 9+ messages in thread
From: Roman Pen @ 2016-10-24 16:08 UTC (permalink / raw)
  Cc: Roman Pen, Andy Lutomirski, Oleg Nesterov, Peter Zijlstra,
	Thomas Gleixner, Ingo Molnar, Tejun Heo, linux-kernel

If panic_on_oops is not set and oops happens inside workqueue kthread,
kernel kills this kthread.  Current patch fixes recursive GPF which
happens when wq_worker_sleeping() function unconditionally accesses
the NULL kthread->vfork_done ptr trhu kthread_data() -> to_kthread().

The stack is the following:

[<ffffffff81397f75>] dump_stack+0x68/0x93
[<ffffffff8106954b>] ? do_exit+0x7ab/0xc10
[<ffffffff8108fd73>] __schedule_bug+0x83/0xe0
[<ffffffff81716d5a>] __schedule+0x7ea/0xba0
[<ffffffff810c864f>] ? vprintk_default+0x1f/0x30
[<ffffffff8116a63c>] ? printk+0x48/0x50
[<ffffffff81717150>] schedule+0x40/0x90
[<ffffffff8106976a>] do_exit+0x9ca/0xc10
[<ffffffff810c8e3d>] ? kmsg_dump+0x11d/0x190
[<ffffffff810c8d37>] ? kmsg_dump+0x17/0x190
[<ffffffff81021ee9>] oops_end+0x99/0xd0
[<ffffffff81052da5>] no_context+0x185/0x3e0
[<ffffffff81053083>] __bad_area_nosemaphore+0x83/0x1c0
[<ffffffff810c820e>] ? vprintk_emit+0x25e/0x530
[<ffffffff810531d4>] bad_area_nosemaphore+0x14/0x20
[<ffffffff8105355c>] __do_page_fault+0xac/0x570
[<ffffffff810c66fe>] ? console_trylock+0x1e/0xe0
[<ffffffff81002036>] ? trace_hardirqs_off_thunk+0x1a/0x1c
[<ffffffff81053a2c>] do_page_fault+0xc/0x10
[<ffffffff8171f812>] page_fault+0x22/0x30
[<ffffffff81089bc3>] ? kthread_data+0x33/0x40
[<ffffffff8108427e>] ? wq_worker_sleeping+0xe/0x80
[<ffffffff817169eb>] __schedule+0x47b/0xba0
[<ffffffff81717150>] schedule+0x40/0x90
[<ffffffff8106957d>] do_exit+0x7dd/0xc10
[<ffffffff81021ee9>] oops_end+0x99/0xd0

kthread->vfork_done is zeroed out on the following path:

    do_exit()
    exit_mm()
    mm_release()
    complete_vfork_done()

In order to fix a bug dead tasks must be ignored.

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-kernel@vger.kernel.org
---
v2:
  o put a task->state check directly into a wq_worker_sleeping() function
    instead of changing the __schedule().

 kernel/workqueue.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9dc7ac5..b19dcb6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -875,9 +875,31 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
  */
 struct task_struct *wq_worker_sleeping(struct task_struct *task)
 {
-	struct worker *worker = kthread_data(task), *to_wakeup = NULL;
+	struct worker *worker, *to_wakeup = NULL;
 	struct worker_pool *pool;
 
+
+	if (task->state == TASK_DEAD)
+		/* Here we try to catch the following path before
+		 * accessing NULL kthread->vfork_done ptr thru
+		 * kthread_data():
+		 *
+		 *    oops_end()
+		 *    do_exit()
+		 *    schedule()
+		 *
+		 * If panic_on_oops is not set and oops happens on
+		 * a workqueue execution path, thread will be killed.
+		 * That is definitly sad, but not to make the situation
+		 * even worse we have to ignore dead tasks in order not
+		 * to step on zeroed out members (e.g. t->vfork_done is
+		 * already NULL on that path, since we were called by
+		 * do_exit())).
+		 */
+		return NULL;
+
+	worker = kthread_data(task);
+
 	/*
 	 * Rescuers, which may not have all the fields set up like normal
 	 * workers, also reach here, let's not access anything before
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook
  2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
@ 2016-10-24 16:40   ` Peter Zijlstra
  2016-10-24 19:09     ` Roman Penyaev
  2016-10-24 20:27   ` Tejun Heo
  1 sibling, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2016-10-24 16:40 UTC (permalink / raw)
  To: Roman Pen
  Cc: Andy Lutomirski, Oleg Nesterov, Thomas Gleixner, Ingo Molnar,
	Tejun Heo, linux-kernel

On Mon, Oct 24, 2016 at 06:08:14PM +0200, Roman Pen wrote:

> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -875,9 +875,31 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
>   */
>  struct task_struct *wq_worker_sleeping(struct task_struct *task)
>  {
> -	struct worker *worker = kthread_data(task), *to_wakeup = NULL;
> +	struct worker *worker, *to_wakeup = NULL;
>  	struct worker_pool *pool;
>  
> +
> +	if (task->state == TASK_DEAD)
> +		/* Here we try to catch the following path before
> +		 * accessing NULL kthread->vfork_done ptr thru
> +		 * kthread_data():
> +		 *
> +		 *    oops_end()
> +		 *    do_exit()
> +		 *    schedule()
> +		 *
> +		 * If panic_on_oops is not set and oops happens on
> +		 * a workqueue execution path, thread will be killed.
> +		 * That is definitly sad, but not to make the situation
> +		 * even worse we have to ignore dead tasks in order not
> +		 * to step on zeroed out members (e.g. t->vfork_done is
> +		 * already NULL on that path, since we were called by
> +		 * do_exit())).
> +		 */
> +		return NULL;

https://lkml.kernel.org/r/CA+55aFyQYJerovMsSoSKS7PessZBr4vNp-3QUUwhqk4A4_jcbg@mail.gmail.com

Also, that misses { }.

> +
> +	worker = kthread_data(task);
> +
>  	/*
>  	 * Rescuers, which may not have all the fields set up like normal
>  	 * workers, also reach here, let's not access anything before
> -- 
> 2.9.3
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc
  2016-10-24 16:08 [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Roman Pen
  2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
@ 2016-10-24 16:42 ` Peter Zijlstra
  2016-10-24 19:10   ` Roman Penyaev
  2016-10-24 17:08 ` Andy Lutomirski
  2 siblings, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2016-10-24 16:42 UTC (permalink / raw)
  To: Roman Pen
  Cc: Andy Lutomirski, Oleg Nesterov, Thomas Gleixner, Ingo Molnar,
	Tejun Heo, linux-kernel

On Mon, Oct 24, 2016 at 06:08:13PM +0200, Roman Pen wrote:
> This patch avoids allocation of kthread structure on a stack, and simply
> uses kmalloc.  Allocation on a stack became a huge problem (with memory
> corruption and all other not nice consequences) after the commit 2deb4be28

2deb4be28077 ("x86/dumpstack: When OOPSing, rewind the stack before do_exit()")

Is the normal quoting style.

.gitconfig:

[core]
        abbrev = 12
[alias]
        one = show -s --pretty='format:%h (\"%s\")'

> by Andy Lutomirski, which rewinds the stack on oops, thus ooopsed kthread
> steps on a garbage memory while completion of task->vfork_done structure
> on the following path:
> 
>        oops_end()
>        rewind_stack_do_exit()
>        exit_mm()
>        mm_release()
>        complete_vfork_done()
> 
> Also in this patch two structures 'struct kthread_create_info' and
> 'struct kthread' are merged into one 'struct kthread' and its freeing
> is controlled by a reference counter.
> 
> The last reference on kthread is put from a task work, the callback,
> which is invoked from do_exit().  The major thing is that the last
> put is happens *after* completion_vfork_done() is invoked.
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc
  2016-10-24 16:08 [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Roman Pen
  2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
  2016-10-24 16:42 ` [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Peter Zijlstra
@ 2016-10-24 17:08 ` Andy Lutomirski
  2016-10-24 19:37   ` Roman Penyaev
  2 siblings, 1 reply; 9+ messages in thread
From: Andy Lutomirski @ 2016-10-24 17:08 UTC (permalink / raw)
  To: Roman Pen
  Cc: Andy Lutomirski, Oleg Nesterov, Peter Zijlstra, Thomas Gleixner,
	Ingo Molnar, Tejun Heo, linux-kernel

On Mon, Oct 24, 2016 at 9:08 AM, Roman Pen
<roman.penyaev@profitbricks.com> wrote:
> This patch avoids allocation of kthread structure on a stack, and simply
> uses kmalloc.  Allocation on a stack became a huge problem (with memory
> corruption and all other not nice consequences) after the commit 2deb4be28
> by Andy Lutomirski, which rewinds the stack on oops, thus ooopsed kthread
> steps on a garbage memory while completion of task->vfork_done structure
> on the following path:

This is IMO a *huge* improvement.

Shouldn't the patch also remove the try_get_task_stack() /
put_task_stack() hackery in kthread.c, though?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook
  2016-10-24 16:40   ` Peter Zijlstra
@ 2016-10-24 19:09     ` Roman Penyaev
  0 siblings, 0 replies; 9+ messages in thread
From: Roman Penyaev @ 2016-10-24 19:09 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andy Lutomirski, Oleg Nesterov, Thomas Gleixner, Ingo Molnar,
	Tejun Heo, linux-kernel

On Mon, Oct 24, 2016 at 6:40 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, Oct 24, 2016 at 06:08:14PM +0200, Roman Pen wrote:
>
>> --- a/kernel/workqueue.c
>> +++ b/kernel/workqueue.c
>> @@ -875,9 +875,31 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
>>   */
>>  struct task_struct *wq_worker_sleeping(struct task_struct *task)
>>  {
>> -     struct worker *worker = kthread_data(task), *to_wakeup = NULL;
>> +     struct worker *worker, *to_wakeup = NULL;
>>       struct worker_pool *pool;
>>
>> +
>> +     if (task->state == TASK_DEAD)
>> +             /* Here we try to catch the following path before
>> +              * accessing NULL kthread->vfork_done ptr thru
>> +              * kthread_data():
>> +              *
>> +              *    oops_end()
>> +              *    do_exit()
>> +              *    schedule()
>> +              *
>> +              * If panic_on_oops is not set and oops happens on
>> +              * a workqueue execution path, thread will be killed.
>> +              * That is definitly sad, but not to make the situation
>> +              * even worse we have to ignore dead tasks in order not
>> +              * to step on zeroed out members (e.g. t->vfork_done is
>> +              * already NULL on that path, since we were called by
>> +              * do_exit())).
>> +              */
>> +             return NULL;
>
> https://lkml.kernel.org/r/CA+55aFyQYJerovMsSoSKS7PessZBr4vNp-3QUUwhqk4A4_jcbg@mail.gmail.com

Ha, explicit comment from Linus :)  Ok.

> Also, that misses { }.

Ok.

--
Roman

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc
  2016-10-24 16:42 ` [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Peter Zijlstra
@ 2016-10-24 19:10   ` Roman Penyaev
  0 siblings, 0 replies; 9+ messages in thread
From: Roman Penyaev @ 2016-10-24 19:10 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andy Lutomirski, Oleg Nesterov, Thomas Gleixner, Ingo Molnar,
	Tejun Heo, linux-kernel

On Mon, Oct 24, 2016 at 6:42 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, Oct 24, 2016 at 06:08:13PM +0200, Roman Pen wrote:
>> This patch avoids allocation of kthread structure on a stack, and simply
>> uses kmalloc.  Allocation on a stack became a huge problem (with memory
>> corruption and all other not nice consequences) after the commit 2deb4be28
>
> 2deb4be28077 ("x86/dumpstack: When OOPSing, rewind the stack before do_exit()")
>
> Is the normal quoting style.
>
> .gitconfig:
>
> [core]
>         abbrev = 12
> [alias]
>         one = show -s --pretty='format:%h (\"%s\")'

Nice tip. Thanks.

--
Roman

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc
  2016-10-24 17:08 ` Andy Lutomirski
@ 2016-10-24 19:37   ` Roman Penyaev
  0 siblings, 0 replies; 9+ messages in thread
From: Roman Penyaev @ 2016-10-24 19:37 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Andy Lutomirski, Oleg Nesterov, Peter Zijlstra, Thomas Gleixner,
	Ingo Molnar, Tejun Heo, linux-kernel

On Mon, Oct 24, 2016 at 7:08 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Mon, Oct 24, 2016 at 9:08 AM, Roman Pen
> <roman.penyaev@profitbricks.com> wrote:
>> This patch avoids allocation of kthread structure on a stack, and simply
>> uses kmalloc.  Allocation on a stack became a huge problem (with memory
>> corruption and all other not nice consequences) after the commit 2deb4be28
>> by Andy Lutomirski, which rewinds the stack on oops, thus ooopsed kthread
>> steps on a garbage memory while completion of task->vfork_done structure
>> on the following path:
>
> This is IMO a *huge* improvement.
>
> Shouldn't the patch also remove the try_get_task_stack() /
> put_task_stack() hackery in kthread.c, though?

Do you mean that commit from Oleg https://patchwork.kernel.org/patch/9335295/ ?

Hm, I missed that to_live_kthread() function completely. So, yes, we can remove
put/get_task_stack(),  but only replacing on get/put_kthread.  So still need to
be careful with the refs.

Oleg, could you please take a look on the hunk below, just a quick thought.
(get_kthread is simple atomic kthread->refs++).

--
Roman

--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -64,9 +64,20 @@ static inline struct kthread *to_kthread(struct
task_struct *k)
 static struct kthread *to_live_kthread(struct task_struct *k)
 {
        struct completion *vfork = ACCESS_ONCE(k->vfork_done);
-       if (likely(vfork) && try_get_task_stack(k))
-               return __to_kthread(vfork);
-       return NULL;
+       struct kthread *kthread = NULL;
+
+       BUG_ON(!(k->flags & PF_KTHREAD));
+       if (likely(vfork)) {
+               task_lock(k);
+               vfork = ACCESS_ONCE(k->vfork_done);
+               if (likely(vfork)) {
+                       kthread = __to_kthread(vfork);
+                       get_kthread(kthread);
+               }
+               task_unlock(k);
+       }
+
+       return kthread;
 }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook
  2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
  2016-10-24 16:40   ` Peter Zijlstra
@ 2016-10-24 20:27   ` Tejun Heo
  1 sibling, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2016-10-24 20:27 UTC (permalink / raw)
  To: Roman Pen
  Cc: Andy Lutomirski, Oleg Nesterov, Peter Zijlstra, Thomas Gleixner,
	Ingo Molnar, linux-kernel

Hello,

On Mon, Oct 24, 2016 at 06:08:14PM +0200, Roman Pen wrote:
> If panic_on_oops is not set and oops happens inside workqueue kthread,
> kernel kills this kthread.  Current patch fixes recursive GPF which
> happens when wq_worker_sleeping() function unconditionally accesses
> the NULL kthread->vfork_done ptr trhu kthread_data() -> to_kthread().

Other than the pointed out style issues, looks good to me.  If you
send an updated version, I'll route it through wq/for-4.9-fixes.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-10-24 20:27 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-24 16:08 [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Roman Pen
2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
2016-10-24 16:40   ` Peter Zijlstra
2016-10-24 19:09     ` Roman Penyaev
2016-10-24 20:27   ` Tejun Heo
2016-10-24 16:42 ` [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Peter Zijlstra
2016-10-24 19:10   ` Roman Penyaev
2016-10-24 17:08 ` Andy Lutomirski
2016-10-24 19:37   ` Roman Penyaev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).