* [PATCH] sched/pid fix use-after free in task_tgid_vnr
[not found] <CGME20161209093351epcms1p418673c3cdec7d4c3e81b5df131173c57@epcms1p4>
@ 2016-12-09 9:33 ` EunTaik Lee
2016-12-09 17:21 ` Oleg Nesterov
0 siblings, 1 reply; 6+ messages in thread
From: EunTaik Lee @ 2016-12-09 9:33 UTC (permalink / raw)
To: mingo, peterz, linux-kernel
[-- Attachment #1: Type: text/plain, Size: 1413 bytes --]
There is a use-after-free case with below call stack.
pid_nr_ns+0x10/0x38
cgroup_pidlist_start+0x144/0x400
cgroup_seqfile_start+0x1c/0x24
kernfs_seq_start+0x54/0x90
seq_read+0x15c/0x3a8
kernfs_fop_read+0x38/0x160
__vfs_read+0x28/0xc8
vfs_read+0x84/0xfc
A task in the cg_list was dying and the group_leader's
task struct was already freed.
To avoid this task_tgid_vnr needs to take the
rcu_read_lock and check for pid_alive.
Signed-off-by: Eun Taik Lee <eun.taik.lee@samsung.com>
---
include/linux/sched.h | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e9c009d..ed567bc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2118,14 +2118,21 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
}
pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+static inline int pid_alive(const struct task_struct *p);
static inline pid_t task_tgid_vnr(struct task_struct *tsk)
{
- return pid_vnr(task_tgid(tsk));
+ pid_t pid = 0;
+
+ rcu_read_lock();
+ if (pid_alive(tsk))
+ pid = pid_vnr(task_tgid(tsk));
+ rcu_read_unlock();
+
+ return pid;
}
-static inline int pid_alive(const struct task_struct *p);
static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
{
pid_t pid = 0;
--
1.9.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr
2016-12-09 9:33 ` [PATCH] sched/pid fix use-after free in task_tgid_vnr EunTaik Lee
@ 2016-12-09 17:21 ` Oleg Nesterov
2016-12-09 22:21 ` Eric W. Biederman
0 siblings, 1 reply; 6+ messages in thread
From: Oleg Nesterov @ 2016-12-09 17:21 UTC (permalink / raw)
To: EunTaik Lee, Eric W. Biederman; +Cc: mingo, peterz, linux-kernel
On 12/09, EunTaik Lee wrote:
>
> There is a use-after-free case with below call stack.
>
> pid_nr_ns+0x10/0x38
> cgroup_pidlist_start+0x144/0x400
> cgroup_seqfile_start+0x1c/0x24
> kernfs_seq_start+0x54/0x90
> seq_read+0x15c/0x3a8
> kernfs_fop_read+0x38/0x160
> __vfs_read+0x28/0xc8
> vfs_read+0x84/0xfc
This reminds about perf_event_pid() which is equally buggy...
> static inline pid_t task_tgid_vnr(struct task_struct *tsk)
> {
> - return pid_vnr(task_tgid(tsk));
> + pid_t pid = 0;
> +
> + rcu_read_lock();
> + if (pid_alive(tsk))
> + pid = pid_vnr(task_tgid(tsk));
> + rcu_read_unlock();
> +
> + return pid;
> }
Eric, EunTaik, what do you think about the patch below?
I can't decide whether it is too ugly or not, but it would be nice
to avoid the code duplication.
Oleg.
--- x/include/linux/pid.h
+++ x/include/linux/pid.h
@@ -8,7 +8,8 @@ enum pid_type
PIDTYPE_PID,
PIDTYPE_PGID,
PIDTYPE_SID,
- PIDTYPE_MAX
+ PIDTYPE_MAX,
+ PIDTYPE_TGID /* do not use */
};
/*
--- x/kernel/pid.c
+++ x/kernel/pid.c
@@ -526,8 +526,11 @@ pid_t __task_pid_nr_ns(struct task_struc
if (!ns)
ns = task_active_pid_ns(current);
if (likely(pid_alive(task))) {
- if (type != PIDTYPE_PID)
+ if (type != PIDTYPE_PID) {
+ if (type == PIDTYPE_TGID)
+ type = PIDTYPE_PID;
task = task->group_leader;
+ }
nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
}
rcu_read_unlock();
@@ -538,7 +541,7 @@ EXPORT_SYMBOL(__task_pid_nr_ns);
pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
{
- return pid_nr_ns(task_tgid(tsk), ns);
+ return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
}
EXPORT_SYMBOL(task_tgid_nr_ns);
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr
2016-12-09 17:21 ` Oleg Nesterov
@ 2016-12-09 22:21 ` Eric W. Biederman
2016-12-12 13:46 ` Oleg Nesterov
0 siblings, 1 reply; 6+ messages in thread
From: Eric W. Biederman @ 2016-12-09 22:21 UTC (permalink / raw)
To: Oleg Nesterov; +Cc: EunTaik Lee, mingo, peterz, linux-kernel
Oleg Nesterov <oleg@redhat.com> writes:
> On 12/09, EunTaik Lee wrote:
>>
>> There is a use-after-free case with below call stack.
>>
>> pid_nr_ns+0x10/0x38
>> cgroup_pidlist_start+0x144/0x400
>> cgroup_seqfile_start+0x1c/0x24
>> kernfs_seq_start+0x54/0x90
>> seq_read+0x15c/0x3a8
>> kernfs_fop_read+0x38/0x160
>> __vfs_read+0x28/0xc8
>> vfs_read+0x84/0xfc
How is this a use after free. The function pid_nr_ns should take a NULL pointer
as input and return 0?
Certainly if the addtion of pid_alive fixes it pid_vnr(task_tgid(tsk))
is fine. Are we perhaps missing rcu locking?
Or is the problem simply that in task_tgid we are accessing
task->group_leader which may already be dead? If so the fix needs to be
in task_tgid.
> This reminds about perf_event_pid() which is equally buggy...
>
>> static inline pid_t task_tgid_vnr(struct task_struct *tsk)
>> {
>> - return pid_vnr(task_tgid(tsk));
>> + pid_t pid = 0;
>> +
>> + rcu_read_lock();
>> + if (pid_alive(tsk))
>> + pid = pid_vnr(task_tgid(tsk));
>> + rcu_read_unlock();
>> +
>> + return pid;
>> }
>
> Eric, EunTaik, what do you think about the patch below?
>
> I can't decide whether it is too ugly or not, but it would be nice
> to avoid the code duplication.
I think it can be beaten into shape but I am not certain it addresses the
core issue.
>
> Oleg.
>
>
> --- x/include/linux/pid.h
> +++ x/include/linux/pid.h
> @@ -8,7 +8,8 @@ enum pid_type
> PIDTYPE_PID,
> PIDTYPE_PGID,
> PIDTYPE_SID,
> - PIDTYPE_MAX
> + PIDTYPE_MAX,
> + PIDTYPE_TGID /* do not use */
I would do:
/* __PIDTYPE_TGID is only valid to __task_pid_nr_ns */
#define __PIDTYPE_TGID PIDTYPE_MAX
Prefixing __PIDTYPE_TGID with __ should help make it clear
this is a special use define.
I am also curious why pid_alive is the proper check to see if
task->group_leader is valid? That feels like it could get us into
trouble later.
Especially as that is the real problem child here.
> };
>
> /*
> --- x/kernel/pid.c
> +++ x/kernel/pid.c
> @@ -526,8 +526,11 @@ pid_t __task_pid_nr_ns(struct task_struc
> if (!ns)
> ns = task_active_pid_ns(current);
> if (likely(pid_alive(task))) {
> - if (type != PIDTYPE_PID)
> + if (type != PIDTYPE_PID) {
> + if (type == PIDTYPE_TGID)
> + type = PIDTYPE_PID;
> task = task->group_leader;
> + }
> nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
> }
> rcu_read_unlock();
> @@ -538,7 +541,7 @@ EXPORT_SYMBOL(__task_pid_nr_ns);
>
> pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
> {
> - return pid_nr_ns(task_tgid(tsk), ns);
> + return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
> }
> EXPORT_SYMBOL(task_tgid_nr_ns);
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr
2016-12-09 22:21 ` Eric W. Biederman
@ 2016-12-12 13:46 ` Oleg Nesterov
2016-12-12 19:10 ` Eric W. Biederman
0 siblings, 1 reply; 6+ messages in thread
From: Oleg Nesterov @ 2016-12-12 13:46 UTC (permalink / raw)
To: Eric W. Biederman; +Cc: EunTaik Lee, mingo, peterz, linux-kernel
On 12/10, Eric W. Biederman wrote:
>
> Oleg Nesterov <oleg@redhat.com> writes:
>
> > On 12/09, EunTaik Lee wrote:
> >>
> >> There is a use-after-free case with below call stack.
> >>
> >> pid_nr_ns+0x10/0x38
> >> cgroup_pidlist_start+0x144/0x400
> >> cgroup_seqfile_start+0x1c/0x24
> >> kernfs_seq_start+0x54/0x90
> >> seq_read+0x15c/0x3a8
> >> kernfs_fop_read+0x38/0x160
> >> __vfs_read+0x28/0xc8
> >> vfs_read+0x84/0xfc
>
> How is this a use after free. The function pid_nr_ns should take a NULL pointer
> as input and return 0?
No, the task (task_struct) itself can't go away, but task->group_leader
can point to nowhere.
> Certainly if the addtion of pid_alive fixes it pid_vnr(task_tgid(tsk))
> is fine. Are we perhaps missing rcu locking?
rcu_read_lock() is not enough in this case, see below.
> Or is the problem simply that in task_tgid we are accessing
> task->group_leader which may already be dead?
Yes. Lets forget about the callchain above, I didn't even bother to verify
that it can actually hit the problem. Although I think EunTaik is very right,
css_task_iter_next() does get_task_struct() and drops css_set_lock, the task
can exit after that. Forget.
Just suppose that a task simply does
pid = task_tgid_vnr(current);
after it has already called exit_notify(). And this is what perf_event_pid()
does, perhaps we have more buggy users.
In this case current->group_leader or parent/real_parent can point to the
exited/freed tasks. I already said this many times, ee really need to nullify
them in __unhash_process() but this needs a lot of (mostly simple) cleanups.
> If so the fix needs to be
> in task_tgid.
Yes, task_tgid() should probably return NULL in this case, but this connects
to "a lot of cleanups" above.
> > --- x/include/linux/pid.h
> > +++ x/include/linux/pid.h
> > @@ -8,7 +8,8 @@ enum pid_type
> > PIDTYPE_PID,
> > PIDTYPE_PGID,
> > PIDTYPE_SID,
> > - PIDTYPE_MAX
> > + PIDTYPE_MAX,
> > + PIDTYPE_TGID /* do not use */
>
>
> I would do:
>
> /* __PIDTYPE_TGID is only valid to __task_pid_nr_ns */
> #define __PIDTYPE_TGID PIDTYPE_MAX
>
> Prefixing __PIDTYPE_TGID with __ should help make it clear
> this is a special use define.
OK, will do, thanks.
> I am also curious why pid_alive is the proper check to see if
> task->group_leader is valid? That feels like it could get us into
> trouble later.
It is. pid_alive(task) == T means that this task was not removed from
rcu-protected-lists and thus task->group_leader (in particular) can't
go away. As long as you check pid_alive() and use ->group_leader in
the same rcu_read_lock section, of course.
To clarify, this is not because detach_pid(PIDTYPE_PID) is called
before list_del_rcu(), this doesn't matter. What does matter is that
if you see pid_alive() == T (or task->sighand != NULL, or anything
else which means that release_task/__unhash_process was not called yet)
you know that this task, its leader/parent/real_parent/pids/etc can't
go away until rcu_read_unlock().
And this is why __task_pid_nr_ns() checks pid_alive() before it reads
->group_leader. Note that __task_pid_nr_ns(PIDTYPE_PID) does not need
this check, task->pids[PID] is nullified by detach_pid() and pid_nr_ns()
check pid != NULL.
However. I think it should be renamed. Or, better, we should add a new
helper to make this all more clear. Say,
bool task_is_rcu_safe(task)
{
return task->sighand != NULL;
}
then change __task_pid_nr_ns() to use this helper rather than pid_alive().
Because, once again, it is not that we need to ensure that
pids[PIDTYPE_PID].pid != NULL, we need to ensure that rcu_read_lock() can
actually protect this task and its leader/parent/etc. And of course, it
can have more users.
Oleg.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr
2016-12-12 13:46 ` Oleg Nesterov
@ 2016-12-12 19:10 ` Eric W. Biederman
2016-12-13 16:03 ` Oleg Nesterov
0 siblings, 1 reply; 6+ messages in thread
From: Eric W. Biederman @ 2016-12-12 19:10 UTC (permalink / raw)
To: Oleg Nesterov; +Cc: EunTaik Lee, mingo, peterz, linux-kernel
Oleg Nesterov <oleg@redhat.com> writes:
> On 12/10, Eric W. Biederman wrote:
>>
>> Oleg Nesterov <oleg@redhat.com> writes:
>>
>> > On 12/09, EunTaik Lee wrote:
>> >>
>> >> There is a use-after-free case with below call stack.
>> >>
>> >> pid_nr_ns+0x10/0x38
>> >> cgroup_pidlist_start+0x144/0x400
>> >> cgroup_seqfile_start+0x1c/0x24
>> >> kernfs_seq_start+0x54/0x90
>> >> seq_read+0x15c/0x3a8
>> >> kernfs_fop_read+0x38/0x160
>> >> __vfs_read+0x28/0xc8
>> >> vfs_read+0x84/0xfc
>>
>> How is this a use after free. The function pid_nr_ns should take a NULL pointer
>> as input and return 0?
>
> No, the task (task_struct) itself can't go away, but task->group_leader
> can point to nowhere.
>
>> Certainly if the addtion of pid_alive fixes it pid_vnr(task_tgid(tsk))
>> is fine. Are we perhaps missing rcu locking?
>
> rcu_read_lock() is not enough in this case, see below.
>
>> Or is the problem simply that in task_tgid we are accessing
>> task->group_leader which may already be dead?
>
> Yes. Lets forget about the callchain above, I didn't even bother to verify
> that it can actually hit the problem. Although I think EunTaik is very right,
> css_task_iter_next() does get_task_struct() and drops css_set_lock, the task
> can exit after that. Forget.
>
> Just suppose that a task simply does
>
> pid = task_tgid_vnr(current);
>
> after it has already called exit_notify(). And this is what perf_event_pid()
> does, perhaps we have more buggy users.
>
> In this case current->group_leader or parent/real_parent can point to the
> exited/freed tasks. I already said this many times, ee really need to nullify
> them in __unhash_process() but this needs a lot of (mostly simple)
> cleanups.
Is there anything wrong with starting with the patch below?
diff --git a/kernel/exit.c b/kernel/exit.c
index 9d68c45ebbe3..03daeecc335d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -200,6 +200,7 @@ void release_task(struct task_struct *p)
if (zap_leader)
leader->exit_state = EXIT_DEAD;
}
+ p->group_leader = NULL;
write_unlock_irq(&tasklist_lock);
release_thread(p);
That seems to cut to the heart of the matter. Failures will be clearer,
as will be code that is introduced to handle the situation. Then we
don't need pid_alive or any other magic just a simple:
rcu_read_lock();
leader = READ_ONCE(task->group_leader);
if (leader) {
/* Do stuff */
}
rcu_read_unlock();
>> If so the fix needs to be
>> in task_tgid.
>
> Yes, task_tgid() should probably return NULL in this case, but this connects
> to "a lot of cleanups" above.
But that is important because that is where things go wrong in the
specific case under discussion. pid_nr_ns handles all of the other
cases, it is task_tgid that went wrong.
Eric
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr
2016-12-12 19:10 ` Eric W. Biederman
@ 2016-12-13 16:03 ` Oleg Nesterov
0 siblings, 0 replies; 6+ messages in thread
From: Oleg Nesterov @ 2016-12-13 16:03 UTC (permalink / raw)
To: Eric W. Biederman; +Cc: EunTaik Lee, mingo, peterz, linux-kernel
On 12/13, Eric W. Biederman wrote:
>
> Oleg Nesterov <oleg@redhat.com> writes:
>
> > In this case current->group_leader or parent/real_parent can point to the
> > exited/freed tasks. I already said this many times, ee really need to nullify
> > them in __unhash_process() but this needs a lot of (mostly simple)
> > cleanups.
>
> Is there anything wrong with starting with the patch below?
>
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 9d68c45ebbe3..03daeecc335d 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -200,6 +200,7 @@ void release_task(struct task_struct *p)
> if (zap_leader)
> leader->exit_state = EXIT_DEAD;
> }
> + p->group_leader = NULL;
Yes, see above, this is what we should do. Except I think we should
change __unhash_process() to do this. The same for parent/real_parent.
> That seems to cut to the heart of the matter.
Exactly.
But we can not start with this change. Just for example,
task_session/task_pgrp/tgid can obviously crash, even if used properly,
they will need to check group_leader != NULL. __task_pid_nr_ns() should
be changed too. And more.
That is why I'd prefer to start with the simple fix I suggested before,
then do the cleanups.
And imo that change makes sense in any case to avoid the code duplication,
even if we change __unhash_process() to nullify group_leader/etc. Note that
we can also add __PIDTYPE_PPID and turn task_ppid_nr_ns() into another
trivial users of __task_pid_nr_ns(). Just it will need to check task != NULL
instead if pid_alive(p).
So, will you agree with v2 below? s/PIDTYPE_TGID/__PIDTYPE_TGID/ as you
suggested, and move task_tgid_nr_ns() into sched.h close to other similar
helpers.
Oleg.
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -8,7 +8,9 @@ enum pid_type
PIDTYPE_PID,
PIDTYPE_PGID,
PIDTYPE_SID,
- PIDTYPE_MAX
+ PIDTYPE_MAX,
+ /* only valid to __task_pid_nr_ns() */
+ __PIDTYPE_TGID
};
/*
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2117,14 +2117,6 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
return tsk->tgid;
}
-pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
-
-static inline pid_t task_tgid_vnr(struct task_struct *tsk)
-{
- return pid_vnr(task_tgid(tsk));
-}
-
-
static inline int pid_alive(const struct task_struct *p);
static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
{
@@ -2166,6 +2158,16 @@ static inline pid_t task_session_vnr(struct task_struct *tsk)
return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
}
+static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+ return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns);
+}
+
+static inline pid_t task_tgid_vnr(struct task_struct *tsk)
+{
+ return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL);
+}
+
/* obsolete, do not use */
static inline pid_t task_pgrp_nr(struct task_struct *tsk)
{
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -526,8 +526,11 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
if (!ns)
ns = task_active_pid_ns(current);
if (likely(pid_alive(task))) {
- if (type != PIDTYPE_PID)
+ if (type != PIDTYPE_PID) {
+ if (type == __PIDTYPE_TGID)
+ type = PIDTYPE_PID;
task = task->group_leader;
+ }
nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
}
rcu_read_unlock();
@@ -536,12 +539,6 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
}
EXPORT_SYMBOL(__task_pid_nr_ns);
-pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
-{
- return pid_nr_ns(task_tgid(tsk), ns);
-}
-EXPORT_SYMBOL(task_tgid_nr_ns);
-
struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
return ns_of_pid(task_pid(tsk));
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2016-12-13 16:05 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <CGME20161209093351epcms1p418673c3cdec7d4c3e81b5df131173c57@epcms1p4>
2016-12-09 9:33 ` [PATCH] sched/pid fix use-after free in task_tgid_vnr EunTaik Lee
2016-12-09 17:21 ` Oleg Nesterov
2016-12-09 22:21 ` Eric W. Biederman
2016-12-12 13:46 ` Oleg Nesterov
2016-12-12 19:10 ` Eric W. Biederman
2016-12-13 16:03 ` Oleg Nesterov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).