linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sched/pid fix use-after free in task_tgid_vnr
       [not found] <CGME20161209093351epcms1p418673c3cdec7d4c3e81b5df131173c57@epcms1p4>
@ 2016-12-09  9:33 ` EunTaik Lee
  2016-12-09 17:21   ` Oleg Nesterov
  0 siblings, 1 reply; 6+ messages in thread
From: EunTaik Lee @ 2016-12-09  9:33 UTC (permalink / raw)
  To: mingo, peterz, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1413 bytes --]

There is a use-after-free case with below call stack.

pid_nr_ns+0x10/0x38
cgroup_pidlist_start+0x144/0x400
cgroup_seqfile_start+0x1c/0x24
kernfs_seq_start+0x54/0x90
seq_read+0x15c/0x3a8
kernfs_fop_read+0x38/0x160
__vfs_read+0x28/0xc8
vfs_read+0x84/0xfc

A task in the cg_list was dying and the group_leader's
task struct was already freed.

To avoid this task_tgid_vnr needs to take the
rcu_read_lock and check for pid_alive.

Signed-off-by: Eun Taik Lee <eun.taik.lee@samsung.com>
---
 include/linux/sched.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e9c009d..ed567bc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2118,14 +2118,21 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
 }
 
 pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+static inline int pid_alive(const struct task_struct *p);
 
 static inline pid_t task_tgid_vnr(struct task_struct *tsk)
 {
-	return pid_vnr(task_tgid(tsk));
+	pid_t pid = 0;
+
+	rcu_read_lock();
+	if (pid_alive(tsk))
+		pid = pid_vnr(task_tgid(tsk));
+	rcu_read_unlock();
+
+	return pid;
 }
 
 
-static inline int pid_alive(const struct task_struct *p);
 static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
 {
 	pid_t pid = 0;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr
  2016-12-09  9:33 ` [PATCH] sched/pid fix use-after free in task_tgid_vnr EunTaik Lee
@ 2016-12-09 17:21   ` Oleg Nesterov
  2016-12-09 22:21     ` Eric W. Biederman
  0 siblings, 1 reply; 6+ messages in thread
From: Oleg Nesterov @ 2016-12-09 17:21 UTC (permalink / raw)
  To: EunTaik Lee, Eric W. Biederman; +Cc: mingo, peterz, linux-kernel

On 12/09, EunTaik Lee wrote:
>
> There is a use-after-free case with below call stack.
>
> pid_nr_ns+0x10/0x38
> cgroup_pidlist_start+0x144/0x400
> cgroup_seqfile_start+0x1c/0x24
> kernfs_seq_start+0x54/0x90
> seq_read+0x15c/0x3a8
> kernfs_fop_read+0x38/0x160
> __vfs_read+0x28/0xc8
> vfs_read+0x84/0xfc

This reminds about perf_event_pid() which is equally buggy...

>  static inline pid_t task_tgid_vnr(struct task_struct *tsk)
>  {
> -	return pid_vnr(task_tgid(tsk));
> +	pid_t pid = 0;
> +
> +	rcu_read_lock();
> +	if (pid_alive(tsk))
> +		pid = pid_vnr(task_tgid(tsk));
> +	rcu_read_unlock();
> +
> +	return pid;
>  }

Eric, EunTaik, what do you think about the patch below?

I can't decide whether it is too ugly or not, but it would be nice
to avoid the code duplication.

Oleg.


--- x/include/linux/pid.h
+++ x/include/linux/pid.h
@@ -8,7 +8,8 @@ enum pid_type
 	PIDTYPE_PID,
 	PIDTYPE_PGID,
 	PIDTYPE_SID,
-	PIDTYPE_MAX
+	PIDTYPE_MAX,
+	PIDTYPE_TGID	/* do not use */
 };
 
 /*
--- x/kernel/pid.c
+++ x/kernel/pid.c
@@ -526,8 +526,11 @@ pid_t __task_pid_nr_ns(struct task_struc
 	if (!ns)
 		ns = task_active_pid_ns(current);
 	if (likely(pid_alive(task))) {
-		if (type != PIDTYPE_PID)
+		if (type != PIDTYPE_PID) {
+			if (type == PIDTYPE_TGID)
+				type = PIDTYPE_PID;
 			task = task->group_leader;
+		}
 		nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
 	}
 	rcu_read_unlock();
@@ -538,7 +541,7 @@ EXPORT_SYMBOL(__task_pid_nr_ns);
 
 pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
-	return pid_nr_ns(task_tgid(tsk), ns);
+	return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
 }
 EXPORT_SYMBOL(task_tgid_nr_ns);
 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr
  2016-12-09 17:21   ` Oleg Nesterov
@ 2016-12-09 22:21     ` Eric W. Biederman
  2016-12-12 13:46       ` Oleg Nesterov
  0 siblings, 1 reply; 6+ messages in thread
From: Eric W. Biederman @ 2016-12-09 22:21 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: EunTaik Lee, mingo, peterz, linux-kernel

Oleg Nesterov <oleg@redhat.com> writes:

> On 12/09, EunTaik Lee wrote:
>>
>> There is a use-after-free case with below call stack.
>>
>> pid_nr_ns+0x10/0x38
>> cgroup_pidlist_start+0x144/0x400
>> cgroup_seqfile_start+0x1c/0x24
>> kernfs_seq_start+0x54/0x90
>> seq_read+0x15c/0x3a8
>> kernfs_fop_read+0x38/0x160
>> __vfs_read+0x28/0xc8
>> vfs_read+0x84/0xfc

How is this a use after free.  The function pid_nr_ns should take a NULL pointer
as input and return 0?

Certainly if the addtion of pid_alive fixes it pid_vnr(task_tgid(tsk))
is fine.  Are we perhaps missing rcu locking?

Or is the problem simply that in task_tgid we are accessing
task->group_leader which may already be dead?  If so the fix needs to be
in task_tgid.

> This reminds about perf_event_pid() which is equally buggy...
>
>>  static inline pid_t task_tgid_vnr(struct task_struct *tsk)
>>  {
>> -	return pid_vnr(task_tgid(tsk));
>> +	pid_t pid = 0;
>> +
>> +	rcu_read_lock();
>> +	if (pid_alive(tsk))
>> +		pid = pid_vnr(task_tgid(tsk));
>> +	rcu_read_unlock();
>> +
>> +	return pid;
>>  }
>
> Eric, EunTaik, what do you think about the patch below?
>
> I can't decide whether it is too ugly or not, but it would be nice
> to avoid the code duplication.

I think it can be beaten into shape but I am not certain it addresses the
core issue.

>
> Oleg.
>
>
> --- x/include/linux/pid.h
> +++ x/include/linux/pid.h
> @@ -8,7 +8,8 @@ enum pid_type
>  	PIDTYPE_PID,
>  	PIDTYPE_PGID,
>  	PIDTYPE_SID,
> -	PIDTYPE_MAX
> +	PIDTYPE_MAX,
> +	PIDTYPE_TGID	/* do not use */


I would do:

/* __PIDTYPE_TGID is only valid to __task_pid_nr_ns */
#define __PIDTYPE_TGID PIDTYPE_MAX

Prefixing __PIDTYPE_TGID  with __ should help make it clear
this is a special use define.

I am also curious why pid_alive is the proper check to see if
task->group_leader is valid?  That feels like it could get us into
trouble later.

Especially as that is the real problem child here.

>  };
>  
>  /*
> --- x/kernel/pid.c
> +++ x/kernel/pid.c
> @@ -526,8 +526,11 @@ pid_t __task_pid_nr_ns(struct task_struc
>  	if (!ns)
>  		ns = task_active_pid_ns(current);
>  	if (likely(pid_alive(task))) {
> -		if (type != PIDTYPE_PID)
> +		if (type != PIDTYPE_PID) {
> +			if (type == PIDTYPE_TGID)
> +				type = PIDTYPE_PID;
>  			task = task->group_leader;
> +		}
>  		nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
>  	}
>  	rcu_read_unlock();
> @@ -538,7 +541,7 @@ EXPORT_SYMBOL(__task_pid_nr_ns);
>  
>  pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
>  {
> -	return pid_nr_ns(task_tgid(tsk), ns);
> +	return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
>  }
>  EXPORT_SYMBOL(task_tgid_nr_ns);
>  

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr
  2016-12-09 22:21     ` Eric W. Biederman
@ 2016-12-12 13:46       ` Oleg Nesterov
  2016-12-12 19:10         ` Eric W. Biederman
  0 siblings, 1 reply; 6+ messages in thread
From: Oleg Nesterov @ 2016-12-12 13:46 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: EunTaik Lee, mingo, peterz, linux-kernel

On 12/10, Eric W. Biederman wrote:
>
> Oleg Nesterov <oleg@redhat.com> writes:
>
> > On 12/09, EunTaik Lee wrote:
> >>
> >> There is a use-after-free case with below call stack.
> >>
> >> pid_nr_ns+0x10/0x38
> >> cgroup_pidlist_start+0x144/0x400
> >> cgroup_seqfile_start+0x1c/0x24
> >> kernfs_seq_start+0x54/0x90
> >> seq_read+0x15c/0x3a8
> >> kernfs_fop_read+0x38/0x160
> >> __vfs_read+0x28/0xc8
> >> vfs_read+0x84/0xfc
>
> How is this a use after free.  The function pid_nr_ns should take a NULL pointer
> as input and return 0?

No, the task (task_struct) itself can't go away, but task->group_leader
can point to nowhere.

> Certainly if the addtion of pid_alive fixes it pid_vnr(task_tgid(tsk))
> is fine.  Are we perhaps missing rcu locking?

rcu_read_lock() is not enough in this case, see below.

> Or is the problem simply that in task_tgid we are accessing
> task->group_leader which may already be dead?

Yes. Lets forget about the callchain above, I didn't even bother to verify
that it can actually hit the problem. Although I think EunTaik is very right,
css_task_iter_next() does get_task_struct() and drops css_set_lock, the task
can exit after that. Forget.

Just suppose that a task simply does

	pid = task_tgid_vnr(current);

after it has already called exit_notify(). And this is what perf_event_pid()
does, perhaps we have more buggy users.

In this case current->group_leader or parent/real_parent can point to the
exited/freed tasks. I already said this many times, ee really need to nullify
them in __unhash_process() but this needs a lot of (mostly simple) cleanups.

> If so the fix needs to be
> in task_tgid.

Yes, task_tgid() should probably return NULL in this case, but this connects
to "a lot of cleanups" above.

> > --- x/include/linux/pid.h
> > +++ x/include/linux/pid.h
> > @@ -8,7 +8,8 @@ enum pid_type
> >  	PIDTYPE_PID,
> >  	PIDTYPE_PGID,
> >  	PIDTYPE_SID,
> > -	PIDTYPE_MAX
> > +	PIDTYPE_MAX,
> > +	PIDTYPE_TGID	/* do not use */
>
>
> I would do:
>
> /* __PIDTYPE_TGID is only valid to __task_pid_nr_ns */
> #define __PIDTYPE_TGID PIDTYPE_MAX
>
> Prefixing __PIDTYPE_TGID  with __ should help make it clear
> this is a special use define.

OK, will do, thanks.

> I am also curious why pid_alive is the proper check to see if
> task->group_leader is valid?  That feels like it could get us into
> trouble later.

It is. pid_alive(task) == T means that this task was not removed from
rcu-protected-lists and thus task->group_leader (in particular) can't
go away. As long as you check pid_alive() and use ->group_leader in
the same rcu_read_lock section, of course.

To clarify, this is not because detach_pid(PIDTYPE_PID) is called
before list_del_rcu(), this doesn't matter. What does matter is that
if you see pid_alive() == T  (or task->sighand != NULL, or anything
else which means that release_task/__unhash_process was not called yet)
you know that this task, its leader/parent/real_parent/pids/etc can't
go away until rcu_read_unlock().

And this is why __task_pid_nr_ns() checks pid_alive() before it reads
->group_leader. Note that __task_pid_nr_ns(PIDTYPE_PID) does not need
this check, task->pids[PID] is nullified by detach_pid() and pid_nr_ns()
check pid != NULL.

However. I think it should be renamed. Or, better, we should add a new
helper to make this all more clear. Say,

	bool task_is_rcu_safe(task)
	{
		return task->sighand != NULL;
	}

then change __task_pid_nr_ns() to use this helper rather than pid_alive().
Because, once again, it is not that we need to ensure that
pids[PIDTYPE_PID].pid != NULL, we need to ensure that rcu_read_lock() can
actually protect this task and its leader/parent/etc. And of course, it
can have more users.

Oleg.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr
  2016-12-12 13:46       ` Oleg Nesterov
@ 2016-12-12 19:10         ` Eric W. Biederman
  2016-12-13 16:03           ` Oleg Nesterov
  0 siblings, 1 reply; 6+ messages in thread
From: Eric W. Biederman @ 2016-12-12 19:10 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: EunTaik Lee, mingo, peterz, linux-kernel

Oleg Nesterov <oleg@redhat.com> writes:

> On 12/10, Eric W. Biederman wrote:
>>
>> Oleg Nesterov <oleg@redhat.com> writes:
>>
>> > On 12/09, EunTaik Lee wrote:
>> >>
>> >> There is a use-after-free case with below call stack.
>> >>
>> >> pid_nr_ns+0x10/0x38
>> >> cgroup_pidlist_start+0x144/0x400
>> >> cgroup_seqfile_start+0x1c/0x24
>> >> kernfs_seq_start+0x54/0x90
>> >> seq_read+0x15c/0x3a8
>> >> kernfs_fop_read+0x38/0x160
>> >> __vfs_read+0x28/0xc8
>> >> vfs_read+0x84/0xfc
>>
>> How is this a use after free.  The function pid_nr_ns should take a NULL pointer
>> as input and return 0?
>
> No, the task (task_struct) itself can't go away, but task->group_leader
> can point to nowhere.
>
>> Certainly if the addtion of pid_alive fixes it pid_vnr(task_tgid(tsk))
>> is fine.  Are we perhaps missing rcu locking?
>
> rcu_read_lock() is not enough in this case, see below.
>
>> Or is the problem simply that in task_tgid we are accessing
>> task->group_leader which may already be dead?
>
> Yes. Lets forget about the callchain above, I didn't even bother to verify
> that it can actually hit the problem. Although I think EunTaik is very right,
> css_task_iter_next() does get_task_struct() and drops css_set_lock, the task
> can exit after that. Forget.
>
> Just suppose that a task simply does
>
> 	pid = task_tgid_vnr(current);
>
> after it has already called exit_notify(). And this is what perf_event_pid()
> does, perhaps we have more buggy users.
>
> In this case current->group_leader or parent/real_parent can point to the
> exited/freed tasks. I already said this many times, ee really need to nullify
> them in __unhash_process() but this needs a lot of (mostly simple)
> cleanups.

Is there anything wrong with starting with the patch below?

diff --git a/kernel/exit.c b/kernel/exit.c
index 9d68c45ebbe3..03daeecc335d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -200,6 +200,7 @@ void release_task(struct task_struct *p)
                if (zap_leader)
                        leader->exit_state = EXIT_DEAD;
        }
+       p->group_leader = NULL;
 
        write_unlock_irq(&tasklist_lock);
        release_thread(p);


That seems to cut to the heart of the matter.  Failures will be clearer,
as will be code that is introduced to handle the situation.   Then we
don't need pid_alive or any other magic just a simple:
	rcu_read_lock();
	leader = READ_ONCE(task->group_leader);
	if (leader) {
		/* Do stuff */
	}
	rcu_read_unlock();

>> If so the fix needs to be
>> in task_tgid.
>
> Yes, task_tgid() should probably return NULL in this case, but this connects
> to "a lot of cleanups" above.

But that is important because that is where things go wrong in the
specific case under discussion.  pid_nr_ns handles all of the other
cases, it is task_tgid that went wrong.

Eric

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr
  2016-12-12 19:10         ` Eric W. Biederman
@ 2016-12-13 16:03           ` Oleg Nesterov
  0 siblings, 0 replies; 6+ messages in thread
From: Oleg Nesterov @ 2016-12-13 16:03 UTC (permalink / raw)
  To: Eric W. Biederman; +Cc: EunTaik Lee, mingo, peterz, linux-kernel

On 12/13, Eric W. Biederman wrote:
>
> Oleg Nesterov <oleg@redhat.com> writes:
>
> > In this case current->group_leader or parent/real_parent can point to the
> > exited/freed tasks. I already said this many times, ee really need to nullify
> > them in __unhash_process() but this needs a lot of (mostly simple)
> > cleanups.
>
> Is there anything wrong with starting with the patch below?
>
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 9d68c45ebbe3..03daeecc335d 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -200,6 +200,7 @@ void release_task(struct task_struct *p)
>                 if (zap_leader)
>                         leader->exit_state = EXIT_DEAD;
>         }
> +       p->group_leader = NULL;

Yes, see above, this is what we should do. Except I think we should
change __unhash_process() to do this. The same for parent/real_parent.

> That seems to cut to the heart of the matter.

Exactly.

But we can not start with this change. Just for example,
task_session/task_pgrp/tgid can obviously crash, even if used properly,
they will need to check group_leader != NULL. __task_pid_nr_ns() should
be changed too. And more.

That is why I'd prefer to start with the simple fix I suggested before,
then do the cleanups.

And imo that change makes sense in any case to avoid the code duplication,
even if we change __unhash_process() to nullify group_leader/etc. Note that
we can also add __PIDTYPE_PPID and turn task_ppid_nr_ns() into another
trivial users of  __task_pid_nr_ns(). Just it will need to check task != NULL
instead if pid_alive(p).

So, will you agree with v2 below? s/PIDTYPE_TGID/__PIDTYPE_TGID/ as you
suggested, and move task_tgid_nr_ns() into sched.h close to other similar
helpers.

Oleg.


--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -8,7 +8,9 @@ enum pid_type
 	PIDTYPE_PID,
 	PIDTYPE_PGID,
 	PIDTYPE_SID,
-	PIDTYPE_MAX
+	PIDTYPE_MAX,
+	/* only valid to __task_pid_nr_ns() */
+	__PIDTYPE_TGID
 };
 
 /*
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2117,14 +2117,6 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
 	return tsk->tgid;
 }
 
-pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
-
-static inline pid_t task_tgid_vnr(struct task_struct *tsk)
-{
-	return pid_vnr(task_tgid(tsk));
-}
-
-
 static inline int pid_alive(const struct task_struct *p);
 static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
 {
@@ -2166,6 +2158,16 @@ static inline pid_t task_session_vnr(struct task_struct *tsk)
 	return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
 }
 
+static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+	return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns);
+}
+
+static inline pid_t task_tgid_vnr(struct task_struct *tsk)
+{
+	return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL);
+}
+
 /* obsolete, do not use */
 static inline pid_t task_pgrp_nr(struct task_struct *tsk)
 {
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -526,8 +526,11 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
 	if (!ns)
 		ns = task_active_pid_ns(current);
 	if (likely(pid_alive(task))) {
-		if (type != PIDTYPE_PID)
+		if (type != PIDTYPE_PID) {
+			if (type == __PIDTYPE_TGID)
+				type = PIDTYPE_PID;
 			task = task->group_leader;
+		}
 		nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
 	}
 	rcu_read_unlock();
@@ -536,12 +539,6 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
 }
 EXPORT_SYMBOL(__task_pid_nr_ns);
 
-pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
-{
-	return pid_nr_ns(task_tgid(tsk), ns);
-}
-EXPORT_SYMBOL(task_tgid_nr_ns);
-
 struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
 {
 	return ns_of_pid(task_pid(tsk));

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-12-13 16:05 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20161209093351epcms1p418673c3cdec7d4c3e81b5df131173c57@epcms1p4>
2016-12-09  9:33 ` [PATCH] sched/pid fix use-after free in task_tgid_vnr EunTaik Lee
2016-12-09 17:21   ` Oleg Nesterov
2016-12-09 22:21     ` Eric W. Biederman
2016-12-12 13:46       ` Oleg Nesterov
2016-12-12 19:10         ` Eric W. Biederman
2016-12-13 16:03           ` Oleg Nesterov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).