* [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc
@ 2016-10-24 16:08 Roman Pen
2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Roman Pen @ 2016-10-24 16:08 UTC (permalink / raw)
Cc: Roman Pen, Andy Lutomirski, Oleg Nesterov, Peter Zijlstra,
Thomas Gleixner, Ingo Molnar, Tejun Heo, linux-kernel
This patch avoids allocation of kthread structure on a stack, and simply
uses kmalloc. Allocation on a stack became a huge problem (with memory
corruption and all other not nice consequences) after the commit 2deb4be28
by Andy Lutomirski, which rewinds the stack on oops, thus ooopsed kthread
steps on a garbage memory while completion of task->vfork_done structure
on the following path:
oops_end()
rewind_stack_do_exit()
exit_mm()
mm_release()
complete_vfork_done()
Also in this patch two structures 'struct kthread_create_info' and
'struct kthread' are merged into one 'struct kthread' and its freeing
is controlled by a reference counter.
The last reference on kthread is put from a task work, the callback,
which is invoked from do_exit(). The major thing is that the last
put is happens *after* completion_vfork_done() is invoked.
Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-kernel@vger.kernel.org
---
v2:
o let x86/kernel/dumpstack.c rewind a stack, but do not use a stack
for a structure allocation.
kernel/kthread.c | 160 +++++++++++++++++++++++++++++++------------------------
1 file changed, 90 insertions(+), 70 deletions(-)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ab4c37..9ccfe06 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -18,14 +18,19 @@
#include <linux/freezer.h>
#include <linux/ptrace.h>
#include <linux/uaccess.h>
+#include <linux/task_work.h>
#include <trace/events/sched.h>
static DEFINE_SPINLOCK(kthread_create_lock);
static LIST_HEAD(kthread_create_list);
struct task_struct *kthreadd_task;
-struct kthread_create_info
-{
+struct kthread {
+ struct list_head list;
+ unsigned long flags;
+ unsigned int cpu;
+ atomic_t refs;
+
/* Information passed to kthread() from kthreadd. */
int (*threadfn)(void *data);
void *data;
@@ -33,15 +38,9 @@ struct kthread_create_info
/* Result passed back to kthread_create() from kthreadd. */
struct task_struct *result;
- struct completion *done;
-
- struct list_head list;
-};
-struct kthread {
- unsigned long flags;
- unsigned int cpu;
- void *data;
+ struct callback_head put_work;
+ struct completion *started;
struct completion parked;
struct completion exited;
};
@@ -69,6 +68,24 @@ static struct kthread *to_live_kthread(struct task_struct *k)
return NULL;
}
+static inline void put_kthread(struct kthread *kthread)
+{
+ if (atomic_dec_and_test(&kthread->refs))
+ kfree(kthread);
+}
+
+/**
+ * put_kthread_cb - is called from do_exit() and does likely
+ * the final put.
+ */
+static void put_kthread_cb(struct callback_head *work)
+{
+ struct kthread *kthread;
+
+ kthread = container_of(work, struct kthread, put_work);
+ put_kthread(kthread);
+}
+
/**
* kthread_should_stop - should this kthread return now?
*
@@ -174,41 +191,36 @@ void kthread_parkme(void)
}
EXPORT_SYMBOL_GPL(kthread_parkme);
-static int kthread(void *_create)
+static int kthreadfn(void *_self)
{
- /* Copy data: it's on kthread's stack */
- struct kthread_create_info *create = _create;
- int (*threadfn)(void *data) = create->threadfn;
- void *data = create->data;
- struct completion *done;
- struct kthread self;
- int ret;
-
- self.flags = 0;
- self.data = data;
- init_completion(&self.exited);
- init_completion(&self.parked);
- current->vfork_done = &self.exited;
-
- /* If user was SIGKILLed, I release the structure. */
- done = xchg(&create->done, NULL);
- if (!done) {
- kfree(create);
- do_exit(-EINTR);
+ struct completion *started;
+ struct kthread *self = _self;
+ int ret = -EINTR;
+
+ /* If user was SIGKILLed, put a ref and exit silently. */
+ started = xchg(&self->started, NULL);
+ if (!started) {
+ put_kthread(self);
+ goto exit;
}
+ /* Delegate last ref put to a task work, which will happen
+ * after 'vfork_done' completion.
+ */
+ init_task_work(&self->put_work, put_kthread_cb);
+ task_work_add(current, &self->put_work, false);
+ current->vfork_done = &self->exited;
+
/* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_UNINTERRUPTIBLE);
- create->result = current;
- complete(done);
+ self->result = current;
+ complete(started);
schedule();
- ret = -EINTR;
-
- if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
- __kthread_parkme(&self);
- ret = threadfn(data);
+ if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+ __kthread_parkme(self);
+ ret = self->threadfn(self->data);
}
- /* we can't just return, we must preserve "self" on stack */
+exit:
do_exit(ret);
}
@@ -222,25 +234,24 @@ int tsk_fork_get_node(struct task_struct *tsk)
return NUMA_NO_NODE;
}
-static void create_kthread(struct kthread_create_info *create)
+static void create_kthread(struct kthread *kthread)
{
+ struct completion *started;
int pid;
#ifdef CONFIG_NUMA
- current->pref_node_fork = create->node;
+ current->pref_node_fork = kthread->node;
#endif
/* We want our own signal handler (we take no signals by default). */
- pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
+ pid = kernel_thread(kthreadfn, kthread,
+ CLONE_FS | CLONE_FILES | SIGCHLD);
if (pid < 0) {
- /* If user was SIGKILLed, I release the structure. */
- struct completion *done = xchg(&create->done, NULL);
-
- if (!done) {
- kfree(create);
- return;
+ started = xchg(&kthread->started, NULL);
+ if (started) {
+ kthread->result = ERR_PTR(pid);
+ complete(started);
}
- create->result = ERR_PTR(pid);
- complete(done);
+ put_kthread(kthread);
}
}
@@ -272,20 +283,26 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
const char namefmt[],
...)
{
- DECLARE_COMPLETION_ONSTACK(done);
+ DECLARE_COMPLETION_ONSTACK(started);
struct task_struct *task;
- struct kthread_create_info *create = kmalloc(sizeof(*create),
- GFP_KERNEL);
+ struct kthread *kthread;
- if (!create)
+ kthread = kmalloc(sizeof(*kthread), GFP_KERNEL);
+ if (!kthread)
return ERR_PTR(-ENOMEM);
- create->threadfn = threadfn;
- create->data = data;
- create->node = node;
- create->done = &done;
+ /* One ref for us and one ref for a new kernel thread. */
+ atomic_set(&kthread->refs, 2);
+ kthread->flags = 0;
+ kthread->cpu = 0;
+ kthread->threadfn = threadfn;
+ kthread->data = data;
+ kthread->node = node;
+ kthread->started = &started;
+ init_completion(&kthread->exited);
+ init_completion(&kthread->parked);
spin_lock(&kthread_create_lock);
- list_add_tail(&create->list, &kthread_create_list);
+ list_add_tail(&kthread->list, &kthread_create_list);
spin_unlock(&kthread_create_lock);
wake_up_process(kthreadd_task);
@@ -294,21 +311,23 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
* the OOM killer while kthreadd is trying to allocate memory for
* new kernel thread.
*/
- if (unlikely(wait_for_completion_killable(&done))) {
+ if (unlikely(wait_for_completion_killable(&started))) {
/*
* If I was SIGKILLed before kthreadd (or new kernel thread)
- * calls complete(), leave the cleanup of this structure to
- * that thread.
+ * calls complete(), put a ref and return an error.
*/
- if (xchg(&create->done, NULL))
+ if (xchg(&kthread->started, NULL)) {
+ put_kthread(kthread);
+
return ERR_PTR(-EINTR);
+ }
/*
* kthreadd (or new kernel thread) will call complete()
* shortly.
*/
- wait_for_completion(&done);
+ wait_for_completion(&started);
}
- task = create->result;
+ task = kthread->result;
if (!IS_ERR(task)) {
static const struct sched_param param = { .sched_priority = 0 };
va_list args;
@@ -323,7 +342,8 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m);
set_cpus_allowed_ptr(task, cpu_all_mask);
}
- kfree(create);
+ put_kthread(kthread);
+
return task;
}
EXPORT_SYMBOL(kthread_create_on_node);
@@ -523,14 +543,14 @@ int kthreadd(void *unused)
spin_lock(&kthread_create_lock);
while (!list_empty(&kthread_create_list)) {
- struct kthread_create_info *create;
+ struct kthread *kthread;
- create = list_entry(kthread_create_list.next,
- struct kthread_create_info, list);
- list_del_init(&create->list);
+ kthread = list_entry(kthread_create_list.next,
+ struct kthread, list);
+ list_del_init(&kthread->list);
spin_unlock(&kthread_create_lock);
- create_kthread(create);
+ create_kthread(kthread);
spin_lock(&kthread_create_lock);
}
--
2.9.3
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook
2016-10-24 16:08 [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Roman Pen
@ 2016-10-24 16:08 ` Roman Pen
2016-10-24 16:40 ` Peter Zijlstra
2016-10-24 20:27 ` Tejun Heo
2016-10-24 16:42 ` [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Peter Zijlstra
2016-10-24 17:08 ` Andy Lutomirski
2 siblings, 2 replies; 9+ messages in thread
From: Roman Pen @ 2016-10-24 16:08 UTC (permalink / raw)
Cc: Roman Pen, Andy Lutomirski, Oleg Nesterov, Peter Zijlstra,
Thomas Gleixner, Ingo Molnar, Tejun Heo, linux-kernel
If panic_on_oops is not set and oops happens inside workqueue kthread,
kernel kills this kthread. Current patch fixes recursive GPF which
happens when wq_worker_sleeping() function unconditionally accesses
the NULL kthread->vfork_done ptr trhu kthread_data() -> to_kthread().
The stack is the following:
[<ffffffff81397f75>] dump_stack+0x68/0x93
[<ffffffff8106954b>] ? do_exit+0x7ab/0xc10
[<ffffffff8108fd73>] __schedule_bug+0x83/0xe0
[<ffffffff81716d5a>] __schedule+0x7ea/0xba0
[<ffffffff810c864f>] ? vprintk_default+0x1f/0x30
[<ffffffff8116a63c>] ? printk+0x48/0x50
[<ffffffff81717150>] schedule+0x40/0x90
[<ffffffff8106976a>] do_exit+0x9ca/0xc10
[<ffffffff810c8e3d>] ? kmsg_dump+0x11d/0x190
[<ffffffff810c8d37>] ? kmsg_dump+0x17/0x190
[<ffffffff81021ee9>] oops_end+0x99/0xd0
[<ffffffff81052da5>] no_context+0x185/0x3e0
[<ffffffff81053083>] __bad_area_nosemaphore+0x83/0x1c0
[<ffffffff810c820e>] ? vprintk_emit+0x25e/0x530
[<ffffffff810531d4>] bad_area_nosemaphore+0x14/0x20
[<ffffffff8105355c>] __do_page_fault+0xac/0x570
[<ffffffff810c66fe>] ? console_trylock+0x1e/0xe0
[<ffffffff81002036>] ? trace_hardirqs_off_thunk+0x1a/0x1c
[<ffffffff81053a2c>] do_page_fault+0xc/0x10
[<ffffffff8171f812>] page_fault+0x22/0x30
[<ffffffff81089bc3>] ? kthread_data+0x33/0x40
[<ffffffff8108427e>] ? wq_worker_sleeping+0xe/0x80
[<ffffffff817169eb>] __schedule+0x47b/0xba0
[<ffffffff81717150>] schedule+0x40/0x90
[<ffffffff8106957d>] do_exit+0x7dd/0xc10
[<ffffffff81021ee9>] oops_end+0x99/0xd0
kthread->vfork_done is zeroed out on the following path:
do_exit()
exit_mm()
mm_release()
complete_vfork_done()
In order to fix a bug dead tasks must be ignored.
Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-kernel@vger.kernel.org
---
v2:
o put a task->state check directly into a wq_worker_sleeping() function
instead of changing the __schedule().
kernel/workqueue.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9dc7ac5..b19dcb6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -875,9 +875,31 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
*/
struct task_struct *wq_worker_sleeping(struct task_struct *task)
{
- struct worker *worker = kthread_data(task), *to_wakeup = NULL;
+ struct worker *worker, *to_wakeup = NULL;
struct worker_pool *pool;
+
+ if (task->state == TASK_DEAD)
+ /* Here we try to catch the following path before
+ * accessing NULL kthread->vfork_done ptr thru
+ * kthread_data():
+ *
+ * oops_end()
+ * do_exit()
+ * schedule()
+ *
+ * If panic_on_oops is not set and oops happens on
+ * a workqueue execution path, thread will be killed.
+ * That is definitly sad, but not to make the situation
+ * even worse we have to ignore dead tasks in order not
+ * to step on zeroed out members (e.g. t->vfork_done is
+ * already NULL on that path, since we were called by
+ * do_exit())).
+ */
+ return NULL;
+
+ worker = kthread_data(task);
+
/*
* Rescuers, which may not have all the fields set up like normal
* workers, also reach here, let's not access anything before
--
2.9.3
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook
2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
@ 2016-10-24 16:40 ` Peter Zijlstra
2016-10-24 19:09 ` Roman Penyaev
2016-10-24 20:27 ` Tejun Heo
1 sibling, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2016-10-24 16:40 UTC (permalink / raw)
To: Roman Pen
Cc: Andy Lutomirski, Oleg Nesterov, Thomas Gleixner, Ingo Molnar,
Tejun Heo, linux-kernel
On Mon, Oct 24, 2016 at 06:08:14PM +0200, Roman Pen wrote:
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -875,9 +875,31 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
> */
> struct task_struct *wq_worker_sleeping(struct task_struct *task)
> {
> - struct worker *worker = kthread_data(task), *to_wakeup = NULL;
> + struct worker *worker, *to_wakeup = NULL;
> struct worker_pool *pool;
>
> +
> + if (task->state == TASK_DEAD)
> + /* Here we try to catch the following path before
> + * accessing NULL kthread->vfork_done ptr thru
> + * kthread_data():
> + *
> + * oops_end()
> + * do_exit()
> + * schedule()
> + *
> + * If panic_on_oops is not set and oops happens on
> + * a workqueue execution path, thread will be killed.
> + * That is definitly sad, but not to make the situation
> + * even worse we have to ignore dead tasks in order not
> + * to step on zeroed out members (e.g. t->vfork_done is
> + * already NULL on that path, since we were called by
> + * do_exit())).
> + */
> + return NULL;
https://lkml.kernel.org/r/CA+55aFyQYJerovMsSoSKS7PessZBr4vNp-3QUUwhqk4A4_jcbg@mail.gmail.com
Also, that misses { }.
> +
> + worker = kthread_data(task);
> +
> /*
> * Rescuers, which may not have all the fields set up like normal
> * workers, also reach here, let's not access anything before
> --
> 2.9.3
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc
2016-10-24 16:08 [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Roman Pen
2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
@ 2016-10-24 16:42 ` Peter Zijlstra
2016-10-24 19:10 ` Roman Penyaev
2016-10-24 17:08 ` Andy Lutomirski
2 siblings, 1 reply; 9+ messages in thread
From: Peter Zijlstra @ 2016-10-24 16:42 UTC (permalink / raw)
To: Roman Pen
Cc: Andy Lutomirski, Oleg Nesterov, Thomas Gleixner, Ingo Molnar,
Tejun Heo, linux-kernel
On Mon, Oct 24, 2016 at 06:08:13PM +0200, Roman Pen wrote:
> This patch avoids allocation of kthread structure on a stack, and simply
> uses kmalloc. Allocation on a stack became a huge problem (with memory
> corruption and all other not nice consequences) after the commit 2deb4be28
2deb4be28077 ("x86/dumpstack: When OOPSing, rewind the stack before do_exit()")
Is the normal quoting style.
.gitconfig:
[core]
abbrev = 12
[alias]
one = show -s --pretty='format:%h (\"%s\")'
> by Andy Lutomirski, which rewinds the stack on oops, thus ooopsed kthread
> steps on a garbage memory while completion of task->vfork_done structure
> on the following path:
>
> oops_end()
> rewind_stack_do_exit()
> exit_mm()
> mm_release()
> complete_vfork_done()
>
> Also in this patch two structures 'struct kthread_create_info' and
> 'struct kthread' are merged into one 'struct kthread' and its freeing
> is controlled by a reference counter.
>
> The last reference on kthread is put from a task work, the callback,
> which is invoked from do_exit(). The major thing is that the last
> put is happens *after* completion_vfork_done() is invoked.
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc
2016-10-24 16:08 [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Roman Pen
2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
2016-10-24 16:42 ` [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Peter Zijlstra
@ 2016-10-24 17:08 ` Andy Lutomirski
2016-10-24 19:37 ` Roman Penyaev
2 siblings, 1 reply; 9+ messages in thread
From: Andy Lutomirski @ 2016-10-24 17:08 UTC (permalink / raw)
To: Roman Pen
Cc: Andy Lutomirski, Oleg Nesterov, Peter Zijlstra, Thomas Gleixner,
Ingo Molnar, Tejun Heo, linux-kernel
On Mon, Oct 24, 2016 at 9:08 AM, Roman Pen
<roman.penyaev@profitbricks.com> wrote:
> This patch avoids allocation of kthread structure on a stack, and simply
> uses kmalloc. Allocation on a stack became a huge problem (with memory
> corruption and all other not nice consequences) after the commit 2deb4be28
> by Andy Lutomirski, which rewinds the stack on oops, thus ooopsed kthread
> steps on a garbage memory while completion of task->vfork_done structure
> on the following path:
This is IMO a *huge* improvement.
Shouldn't the patch also remove the try_get_task_stack() /
put_task_stack() hackery in kthread.c, though?
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook
2016-10-24 16:40 ` Peter Zijlstra
@ 2016-10-24 19:09 ` Roman Penyaev
0 siblings, 0 replies; 9+ messages in thread
From: Roman Penyaev @ 2016-10-24 19:09 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Andy Lutomirski, Oleg Nesterov, Thomas Gleixner, Ingo Molnar,
Tejun Heo, linux-kernel
On Mon, Oct 24, 2016 at 6:40 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, Oct 24, 2016 at 06:08:14PM +0200, Roman Pen wrote:
>
>> --- a/kernel/workqueue.c
>> +++ b/kernel/workqueue.c
>> @@ -875,9 +875,31 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
>> */
>> struct task_struct *wq_worker_sleeping(struct task_struct *task)
>> {
>> - struct worker *worker = kthread_data(task), *to_wakeup = NULL;
>> + struct worker *worker, *to_wakeup = NULL;
>> struct worker_pool *pool;
>>
>> +
>> + if (task->state == TASK_DEAD)
>> + /* Here we try to catch the following path before
>> + * accessing NULL kthread->vfork_done ptr thru
>> + * kthread_data():
>> + *
>> + * oops_end()
>> + * do_exit()
>> + * schedule()
>> + *
>> + * If panic_on_oops is not set and oops happens on
>> + * a workqueue execution path, thread will be killed.
>> + * That is definitly sad, but not to make the situation
>> + * even worse we have to ignore dead tasks in order not
>> + * to step on zeroed out members (e.g. t->vfork_done is
>> + * already NULL on that path, since we were called by
>> + * do_exit())).
>> + */
>> + return NULL;
>
> https://lkml.kernel.org/r/CA+55aFyQYJerovMsSoSKS7PessZBr4vNp-3QUUwhqk4A4_jcbg@mail.gmail.com
Ha, explicit comment from Linus :) Ok.
> Also, that misses { }.
Ok.
--
Roman
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc
2016-10-24 16:42 ` [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Peter Zijlstra
@ 2016-10-24 19:10 ` Roman Penyaev
0 siblings, 0 replies; 9+ messages in thread
From: Roman Penyaev @ 2016-10-24 19:10 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Andy Lutomirski, Oleg Nesterov, Thomas Gleixner, Ingo Molnar,
Tejun Heo, linux-kernel
On Mon, Oct 24, 2016 at 6:42 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, Oct 24, 2016 at 06:08:13PM +0200, Roman Pen wrote:
>> This patch avoids allocation of kthread structure on a stack, and simply
>> uses kmalloc. Allocation on a stack became a huge problem (with memory
>> corruption and all other not nice consequences) after the commit 2deb4be28
>
> 2deb4be28077 ("x86/dumpstack: When OOPSing, rewind the stack before do_exit()")
>
> Is the normal quoting style.
>
> .gitconfig:
>
> [core]
> abbrev = 12
> [alias]
> one = show -s --pretty='format:%h (\"%s\")'
Nice tip. Thanks.
--
Roman
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc
2016-10-24 17:08 ` Andy Lutomirski
@ 2016-10-24 19:37 ` Roman Penyaev
0 siblings, 0 replies; 9+ messages in thread
From: Roman Penyaev @ 2016-10-24 19:37 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Andy Lutomirski, Oleg Nesterov, Peter Zijlstra, Thomas Gleixner,
Ingo Molnar, Tejun Heo, linux-kernel
On Mon, Oct 24, 2016 at 7:08 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> On Mon, Oct 24, 2016 at 9:08 AM, Roman Pen
> <roman.penyaev@profitbricks.com> wrote:
>> This patch avoids allocation of kthread structure on a stack, and simply
>> uses kmalloc. Allocation on a stack became a huge problem (with memory
>> corruption and all other not nice consequences) after the commit 2deb4be28
>> by Andy Lutomirski, which rewinds the stack on oops, thus ooopsed kthread
>> steps on a garbage memory while completion of task->vfork_done structure
>> on the following path:
>
> This is IMO a *huge* improvement.
>
> Shouldn't the patch also remove the try_get_task_stack() /
> put_task_stack() hackery in kthread.c, though?
Do you mean that commit from Oleg https://patchwork.kernel.org/patch/9335295/ ?
Hm, I missed that to_live_kthread() function completely. So, yes, we can remove
put/get_task_stack(), but only replacing on get/put_kthread. So still need to
be careful with the refs.
Oleg, could you please take a look on the hunk below, just a quick thought.
(get_kthread is simple atomic kthread->refs++).
--
Roman
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -64,9 +64,20 @@ static inline struct kthread *to_kthread(struct
task_struct *k)
static struct kthread *to_live_kthread(struct task_struct *k)
{
struct completion *vfork = ACCESS_ONCE(k->vfork_done);
- if (likely(vfork) && try_get_task_stack(k))
- return __to_kthread(vfork);
- return NULL;
+ struct kthread *kthread = NULL;
+
+ BUG_ON(!(k->flags & PF_KTHREAD));
+ if (likely(vfork)) {
+ task_lock(k);
+ vfork = ACCESS_ONCE(k->vfork_done);
+ if (likely(vfork)) {
+ kthread = __to_kthread(vfork);
+ get_kthread(kthread);
+ }
+ task_unlock(k);
+ }
+
+ return kthread;
}
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook
2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
2016-10-24 16:40 ` Peter Zijlstra
@ 2016-10-24 20:27 ` Tejun Heo
1 sibling, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2016-10-24 20:27 UTC (permalink / raw)
To: Roman Pen
Cc: Andy Lutomirski, Oleg Nesterov, Peter Zijlstra, Thomas Gleixner,
Ingo Molnar, linux-kernel
Hello,
On Mon, Oct 24, 2016 at 06:08:14PM +0200, Roman Pen wrote:
> If panic_on_oops is not set and oops happens inside workqueue kthread,
> kernel kills this kthread. Current patch fixes recursive GPF which
> happens when wq_worker_sleeping() function unconditionally accesses
> the NULL kthread->vfork_done ptr trhu kthread_data() -> to_kthread().
Other than the pointed out style issues, looks good to me. If you
send an updated version, I'll route it through wq/for-4.9-fixes.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2016-10-24 20:27 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-24 16:08 [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Roman Pen
2016-10-24 16:08 ` [PATCH v2 2/2] workqueue: ignore dead tasks in a workqueue sleep hook Roman Pen
2016-10-24 16:40 ` Peter Zijlstra
2016-10-24 19:09 ` Roman Penyaev
2016-10-24 20:27 ` Tejun Heo
2016-10-24 16:42 ` [PATCH v2 1/2] kthread: allocate kthread structure using kmalloc Peter Zijlstra
2016-10-24 19:10 ` Roman Penyaev
2016-10-24 17:08 ` Andy Lutomirski
2016-10-24 19:37 ` Roman Penyaev
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).