linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] proc: fix inode uid/gid writeback race
@ 2019-10-20 17:30 Alexey Dobriyan
  2019-10-21  9:24 ` Marco Elver
  0 siblings, 1 reply; 5+ messages in thread
From: Alexey Dobriyan @ 2019-10-20 17:30 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, linux-fsdevel, elver, viro

(euid, egid) pair is snapshotted correctly from task under RCU,
but writeback to inode can be done in any order.

Fix by doing writeback under inode->i_lock where necessary
(/proc/* , /proc/*/fd/* , /proc/*/map_files/* revalidate).

Reported-by: syzbot+e392f8008a294fdf8891@syzkaller.appspotmail.com
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 fs/proc/base.c     |   25 +++++++++++++++++++++++--
 fs/proc/fd.c       |    2 +-
 fs/proc/internal.h |    2 ++
 3 files changed, 26 insertions(+), 3 deletions(-)

--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1743,6 +1743,25 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
 	*rgid = gid;
 }
 
+/* use if inode is live */
+void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
+			      struct inode *inode)
+{
+	kuid_t uid;
+	kgid_t gid;
+
+	task_dump_owner(task, mode, &uid, &gid);
+	/*
+	 * There is no atomic "change all credentials at once" system call,
+	 * guaranteeing more than _some_ snapshot from "struct cred" ends up
+	 * in inode is not possible.
+	 */
+	spin_lock(&inode->i_lock);
+	inode->i_uid = uid;
+	inode->i_gid = gid;
+	spin_unlock(&inode->i_lock);
+}
+
 struct inode *proc_pid_make_inode(struct super_block * sb,
 				  struct task_struct *task, umode_t mode)
 {
@@ -1769,6 +1788,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
 	if (!ei->pid)
 		goto out_unlock;
 
+	/* fresh inode -- no races */
 	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
 	security_task_to_inode(task, inode);
 
@@ -1802,6 +1822,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
 			 */
 			return -ENOENT;
 		}
+		/* "struct kstat" is thread local, atomic snapshot is enough */
 		task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
 	}
 	rcu_read_unlock();
@@ -1815,7 +1836,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
  */
 void pid_update_inode(struct task_struct *task, struct inode *inode)
 {
-	task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
+	task_dump_owner_to_inode(task, inode->i_mode, inode);
 
 	inode->i_mode &= ~(S_ISUID | S_ISGID);
 	security_task_to_inode(task, inode);
@@ -1990,7 +2011,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
 	mmput(mm);
 
 	if (exact_vma_exists) {
-		task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+		task_dump_owner_to_inode(task, 0, inode);
 
 		security_task_to_inode(task, inode);
 		status = 1;
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -101,7 +101,7 @@ static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
 static void tid_fd_update_inode(struct task_struct *task, struct inode *inode,
 				fmode_t f_mode)
 {
-	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+	task_dump_owner_to_inode(task, 0, inode);
 
 	if (S_ISLNK(inode->i_mode)) {
 		unsigned i_mode = S_IFLNK;
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -123,6 +123,8 @@ static inline struct task_struct *get_proc_task(const struct inode *inode)
 
 void task_dump_owner(struct task_struct *task, umode_t mode,
 		     kuid_t *ruid, kgid_t *rgid);
+void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
+			      struct inode *inode);
 
 unsigned name_to_int(const struct qstr *qstr);
 /*

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] proc: fix inode uid/gid writeback race
  2019-10-20 17:30 [PATCH] proc: fix inode uid/gid writeback race Alexey Dobriyan
@ 2019-10-21  9:24 ` Marco Elver
  2019-10-21  9:59   ` Marco Elver
  2019-10-21 18:32   ` Alexey Dobriyan
  0 siblings, 2 replies; 5+ messages in thread
From: Marco Elver @ 2019-10-21  9:24 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: Andrew Morton, LKML, linux-fsdevel, viro

On Sun, 20 Oct 2019 at 19:30, Alexey Dobriyan <adobriyan@gmail.com> wrote:
>
> (euid, egid) pair is snapshotted correctly from task under RCU,
> but writeback to inode can be done in any order.
>
> Fix by doing writeback under inode->i_lock where necessary
> (/proc/* , /proc/*/fd/* , /proc/*/map_files/* revalidate).
>
> Reported-by: syzbot+e392f8008a294fdf8891@syzkaller.appspotmail.com
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---

Thanks!

This certainly fixes the problem of inconsistent uid/gid pair due to
racing writebacks, as well as the data-race. If that is the only
purpose of this patch, then from what I see this is fine:

Acked-by: Marco Elver <elver@google.com>

However, there is probably still a more fundamental problem as outlined below.

>  fs/proc/base.c     |   25 +++++++++++++++++++++++--
>  fs/proc/fd.c       |    2 +-
>  fs/proc/internal.h |    2 ++
>  3 files changed, 26 insertions(+), 3 deletions(-)
>
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -1743,6 +1743,25 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
>         *rgid = gid;
>  }
>
> +/* use if inode is live */
> +void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
> +                             struct inode *inode)
> +{
> +       kuid_t uid;
> +       kgid_t gid;
> +
> +       task_dump_owner(task, mode, &uid, &gid);
> +       /*
> +        * There is no atomic "change all credentials at once" system call,
> +        * guaranteeing more than _some_ snapshot from "struct cred" ends up
> +        * in inode is not possible.
> +        */
> +       spin_lock(&inode->i_lock);
> +       inode->i_uid = uid;
> +       inode->i_gid = gid;
> +       spin_unlock(&inode->i_lock);

2 tasks can still race here, and the inconsistent scenario I outlined in
https://lore.kernel.org/linux-fsdevel/000000000000328b2905951a7667@google.com/
could still happen I think (although extremely unlikely). Mainly,
causality may still be violated -- but I may be wrong as I don't know
the rest of the code (so please be critical of my suggestion).

The problem is that if 2 threads race here, one has snapshotted old
uid/gid, and the other the new uid/gid. Then it is still possible for
the old uid/gid to be written back after new uid/gid, which would
result in this bad scenario:

=== TASK 1 ===
| seteuid(1000);
| seteuid(0);
| stat("/proc/<pid-of-task-1>", &fstat);
| assert(fstat.st_uid == 0);  // fails
=== TASK 2 ===
| stat("/proc/<pid-of-task-1>", ...);

AFAIK it's not something that can easily be fixed without some
timestamp on the uid/gid pair (timestamp updated after setuid/seteuid
etc) obtained in the RCU reader critical section. Then in this
critical section, uid/gid should only be written if the current pair
in inode is older according to snapshot timestamp.

> +}
> +
>  struct inode *proc_pid_make_inode(struct super_block * sb,
>                                   struct task_struct *task, umode_t mode)
>  {
> @@ -1769,6 +1788,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
>         if (!ei->pid)
>                 goto out_unlock;
>
> +       /* fresh inode -- no races */
>         task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
>         security_task_to_inode(task, inode);
>
> @@ -1802,6 +1822,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
>                          */
>                         return -ENOENT;
>                 }
> +               /* "struct kstat" is thread local, atomic snapshot is enough */
>                 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
>         }
>         rcu_read_unlock();
> @@ -1815,7 +1836,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
>   */
>  void pid_update_inode(struct task_struct *task, struct inode *inode)
>  {
> -       task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
> +       task_dump_owner_to_inode(task, inode->i_mode, inode);
>
>         inode->i_mode &= ~(S_ISUID | S_ISGID);
>         security_task_to_inode(task, inode);
> @@ -1990,7 +2011,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
>         mmput(mm);
>
>         if (exact_vma_exists) {
> -               task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> +               task_dump_owner_to_inode(task, 0, inode);
>
>                 security_task_to_inode(task, inode);
>                 status = 1;
> --- a/fs/proc/fd.c
> +++ b/fs/proc/fd.c
> @@ -101,7 +101,7 @@ static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
>  static void tid_fd_update_inode(struct task_struct *task, struct inode *inode,
>                                 fmode_t f_mode)
>  {
> -       task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> +       task_dump_owner_to_inode(task, 0, inode);
>
>         if (S_ISLNK(inode->i_mode)) {
>                 unsigned i_mode = S_IFLNK;
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -123,6 +123,8 @@ static inline struct task_struct *get_proc_task(const struct inode *inode)
>
>  void task_dump_owner(struct task_struct *task, umode_t mode,
>                      kuid_t *ruid, kgid_t *rgid);
> +void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
> +                             struct inode *inode);
>
>  unsigned name_to_int(const struct qstr *qstr);
>  /*

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] proc: fix inode uid/gid writeback race
  2019-10-21  9:24 ` Marco Elver
@ 2019-10-21  9:59   ` Marco Elver
  2019-10-21 18:32   ` Alexey Dobriyan
  1 sibling, 0 replies; 5+ messages in thread
From: Marco Elver @ 2019-10-21  9:59 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: Andrew Morton, LKML, linux-fsdevel, viro

On Mon, 21 Oct 2019 at 11:24, Marco Elver <elver@google.com> wrote:
>
> On Sun, 20 Oct 2019 at 19:30, Alexey Dobriyan <adobriyan@gmail.com> wrote:
> >
> > (euid, egid) pair is snapshotted correctly from task under RCU,
> > but writeback to inode can be done in any order.
> >
> > Fix by doing writeback under inode->i_lock where necessary
> > (/proc/* , /proc/*/fd/* , /proc/*/map_files/* revalidate).
> >
> > Reported-by: syzbot+e392f8008a294fdf8891@syzkaller.appspotmail.com
> > Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> > ---
>
> Thanks!
>
> This certainly fixes the problem of inconsistent uid/gid pair due to
> racing writebacks, as well as the data-race. If that is the only
> purpose of this patch, then from what I see this is fine:
>
> Acked-by: Marco Elver <elver@google.com>
>
> However, there is probably still a more fundamental problem as outlined below.
>
> >  fs/proc/base.c     |   25 +++++++++++++++++++++++--
> >  fs/proc/fd.c       |    2 +-
> >  fs/proc/internal.h |    2 ++
> >  3 files changed, 26 insertions(+), 3 deletions(-)
> >
> > --- a/fs/proc/base.c
> > +++ b/fs/proc/base.c
> > @@ -1743,6 +1743,25 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
> >         *rgid = gid;
> >  }
> >
> > +/* use if inode is live */
> > +void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
> > +                             struct inode *inode)
> > +{
> > +       kuid_t uid;
> > +       kgid_t gid;
> > +
> > +       task_dump_owner(task, mode, &uid, &gid);
> > +       /*
> > +        * There is no atomic "change all credentials at once" system call,
> > +        * guaranteeing more than _some_ snapshot from "struct cred" ends up
> > +        * in inode is not possible.
> > +        */
> > +       spin_lock(&inode->i_lock);
> > +       inode->i_uid = uid;
> > +       inode->i_gid = gid;
> > +       spin_unlock(&inode->i_lock);
>
> 2 tasks can still race here, and the inconsistent scenario I outlined in
> https://lore.kernel.org/linux-fsdevel/000000000000328b2905951a7667@google.com/
> could still happen I think (although extremely unlikely). Mainly,
> causality may still be violated -- but I may be wrong as I don't know
> the rest of the code (so please be critical of my suggestion).
>
> The problem is that if 2 threads race here, one has snapshotted old
> uid/gid, and the other the new uid/gid. Then it is still possible for
> the old uid/gid to be written back after new uid/gid, which would
> result in this bad scenario:
>
> === TASK 1 ===
> | seteuid(1000);
> | seteuid(0);
> | stat("/proc/<pid-of-task-1>", &fstat);
> | assert(fstat.st_uid == 0);  // fails
> === TASK 2 ===
> | stat("/proc/<pid-of-task-1>", ...);
>
> AFAIK it's not something that can easily be fixed without some
> timestamp on the uid/gid pair (timestamp updated after setuid/seteuid
> etc) obtained in the RCU reader critical section. Then in this
> critical section, uid/gid should only be written if the current pair
> in inode is older according to snapshot timestamp.

Note that timestamp is probably wrong here, but rather some kind of
sequence number would be needed.

> > +}
> > +
> >  struct inode *proc_pid_make_inode(struct super_block * sb,
> >                                   struct task_struct *task, umode_t mode)
> >  {
> > @@ -1769,6 +1788,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
> >         if (!ei->pid)
> >                 goto out_unlock;
> >
> > +       /* fresh inode -- no races */
> >         task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> >         security_task_to_inode(task, inode);
> >
> > @@ -1802,6 +1822,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
> >                          */
> >                         return -ENOENT;
> >                 }
> > +               /* "struct kstat" is thread local, atomic snapshot is enough */
> >                 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
> >         }
> >         rcu_read_unlock();
> > @@ -1815,7 +1836,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
> >   */
> >  void pid_update_inode(struct task_struct *task, struct inode *inode)
> >  {
> > -       task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
> > +       task_dump_owner_to_inode(task, inode->i_mode, inode);
> >
> >         inode->i_mode &= ~(S_ISUID | S_ISGID);
> >         security_task_to_inode(task, inode);
> > @@ -1990,7 +2011,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
> >         mmput(mm);
> >
> >         if (exact_vma_exists) {
> > -               task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> > +               task_dump_owner_to_inode(task, 0, inode);
> >
> >                 security_task_to_inode(task, inode);
> >                 status = 1;
> > --- a/fs/proc/fd.c
> > +++ b/fs/proc/fd.c
> > @@ -101,7 +101,7 @@ static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
> >  static void tid_fd_update_inode(struct task_struct *task, struct inode *inode,
> >                                 fmode_t f_mode)
> >  {
> > -       task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> > +       task_dump_owner_to_inode(task, 0, inode);
> >
> >         if (S_ISLNK(inode->i_mode)) {
> >                 unsigned i_mode = S_IFLNK;
> > --- a/fs/proc/internal.h
> > +++ b/fs/proc/internal.h
> > @@ -123,6 +123,8 @@ static inline struct task_struct *get_proc_task(const struct inode *inode)
> >
> >  void task_dump_owner(struct task_struct *task, umode_t mode,
> >                      kuid_t *ruid, kgid_t *rgid);
> > +void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
> > +                             struct inode *inode);
> >
> >  unsigned name_to_int(const struct qstr *qstr);
> >  /*

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] proc: fix inode uid/gid writeback race
  2019-10-21  9:24 ` Marco Elver
  2019-10-21  9:59   ` Marco Elver
@ 2019-10-21 18:32   ` Alexey Dobriyan
  2019-11-03 13:20     ` Dmitry Vyukov
  1 sibling, 1 reply; 5+ messages in thread
From: Alexey Dobriyan @ 2019-10-21 18:32 UTC (permalink / raw)
  To: Marco Elver, dhowells; +Cc: Andrew Morton, LKML, linux-fsdevel, viro

On Mon, Oct 21, 2019 at 11:24:27AM +0200, Marco Elver wrote:
> On Sun, 20 Oct 2019 at 19:30, Alexey Dobriyan <adobriyan@gmail.com> wrote:
> >
> > (euid, egid) pair is snapshotted correctly from task under RCU,
> > but writeback to inode can be done in any order.
> >
> > Fix by doing writeback under inode->i_lock where necessary
> > (/proc/* , /proc/*/fd/* , /proc/*/map_files/* revalidate).
> >
> > Reported-by: syzbot+e392f8008a294fdf8891@syzkaller.appspotmail.com
> > Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> > ---
> 
> Thanks!
> 
> This certainly fixes the problem of inconsistent uid/gid pair due to
> racing writebacks, as well as the data-race. If that is the only
> purpose of this patch, then from what I see this is fine:
> 
> Acked-by: Marco Elver <elver@google.com>
> 
> However, there is probably still a more fundamental problem as outlined below.
> 
> >  fs/proc/base.c     |   25 +++++++++++++++++++++++--
> >  fs/proc/fd.c       |    2 +-
> >  fs/proc/internal.h |    2 ++
> >  3 files changed, 26 insertions(+), 3 deletions(-)
> >
> > --- a/fs/proc/base.c
> > +++ b/fs/proc/base.c
> > @@ -1743,6 +1743,25 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
> >         *rgid = gid;
> >  }
> >
> > +/* use if inode is live */
> > +void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
> > +                             struct inode *inode)
> > +{
> > +       kuid_t uid;
> > +       kgid_t gid;
> > +
> > +       task_dump_owner(task, mode, &uid, &gid);
> > +       /*
> > +        * There is no atomic "change all credentials at once" system call,
> > +        * guaranteeing more than _some_ snapshot from "struct cred" ends up
> > +        * in inode is not possible.
> > +        */
> > +       spin_lock(&inode->i_lock);
> > +       inode->i_uid = uid;
> > +       inode->i_gid = gid;
> > +       spin_unlock(&inode->i_lock);
> 
> 2 tasks can still race here, and the inconsistent scenario I outlined in
> https://lore.kernel.org/linux-fsdevel/000000000000328b2905951a7667@google.com/
> could still happen I think (although extremely unlikely). Mainly,
> causality may still be violated -- but I may be wrong as I don't know
> the rest of the code (so please be critical of my suggestion).
> 
> The problem is that if 2 threads race here, one has snapshotted old
> uid/gid, and the other the new uid/gid. Then it is still possible for
> the old uid/gid to be written back after new uid/gid, which would
> result in this bad scenario:
> 
> === TASK 1 ===
> | seteuid(1000);
> | seteuid(0);
> | stat("/proc/<pid-of-task-1>", &fstat);
> | assert(fstat.st_uid == 0);  // fails
> === TASK 2 ===
> | stat("/proc/<pid-of-task-1>", ...);
> 
> AFAIK it's not something that can easily be fixed without some
> timestamp on the uid/gid pair (timestamp updated after setuid/seteuid
> etc) obtained in the RCU reader critical section. Then in this
> critical section, uid/gid should only be written if the current pair
> in inode is older according to snapshot timestamp.

This probably requires bloating "struct cred" with generation number.
I'm not sure what to do other than cc our credential overlords.

> > +}
> > +
> >  struct inode *proc_pid_make_inode(struct super_block * sb,
> >                                   struct task_struct *task, umode_t mode)
> >  {
> > @@ -1769,6 +1788,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
> >         if (!ei->pid)
> >                 goto out_unlock;
> >
> > +       /* fresh inode -- no races */
> >         task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> >         security_task_to_inode(task, inode);
> >
> > @@ -1802,6 +1822,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
> >                          */
> >                         return -ENOENT;
> >                 }
> > +               /* "struct kstat" is thread local, atomic snapshot is enough */
> >                 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
> >         }
> >         rcu_read_unlock();
> > @@ -1815,7 +1836,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
> >   */
> >  void pid_update_inode(struct task_struct *task, struct inode *inode)
> >  {
> > -       task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
> > +       task_dump_owner_to_inode(task, inode->i_mode, inode);
> >
> >         inode->i_mode &= ~(S_ISUID | S_ISGID);
> >         security_task_to_inode(task, inode);
> > @@ -1990,7 +2011,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
> >         mmput(mm);
> >
> >         if (exact_vma_exists) {
> > -               task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> > +               task_dump_owner_to_inode(task, 0, inode);
> >
> >                 security_task_to_inode(task, inode);
> >                 status = 1;
> > --- a/fs/proc/fd.c
> > +++ b/fs/proc/fd.c
> > @@ -101,7 +101,7 @@ static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
> >  static void tid_fd_update_inode(struct task_struct *task, struct inode *inode,
> >                                 fmode_t f_mode)
> >  {
> > -       task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> > +       task_dump_owner_to_inode(task, 0, inode);
> >
> >         if (S_ISLNK(inode->i_mode)) {
> >                 unsigned i_mode = S_IFLNK;
> > --- a/fs/proc/internal.h
> > +++ b/fs/proc/internal.h
> > @@ -123,6 +123,8 @@ static inline struct task_struct *get_proc_task(const struct inode *inode)
> >
> >  void task_dump_owner(struct task_struct *task, umode_t mode,
> >                      kuid_t *ruid, kgid_t *rgid);
> > +void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
> > +                             struct inode *inode);
> >
> >  unsigned name_to_int(const struct qstr *qstr);
> >  /*

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] proc: fix inode uid/gid writeback race
  2019-10-21 18:32   ` Alexey Dobriyan
@ 2019-11-03 13:20     ` Dmitry Vyukov
  0 siblings, 0 replies; 5+ messages in thread
From: Dmitry Vyukov @ 2019-11-03 13:20 UTC (permalink / raw)
  To: Alexey Dobriyan
  Cc: Marco Elver, David Howells, Andrew Morton, LKML, linux-fsdevel, Al Viro

On Mon, Oct 21, 2019 at 8:32 PM Alexey Dobriyan <adobriyan@gmail.com> wrote:
>
> On Mon, Oct 21, 2019 at 11:24:27AM +0200, Marco Elver wrote:
> > On Sun, 20 Oct 2019 at 19:30, Alexey Dobriyan <adobriyan@gmail.com> wrote:
> > >
> > > (euid, egid) pair is snapshotted correctly from task under RCU,
> > > but writeback to inode can be done in any order.
> > >
> > > Fix by doing writeback under inode->i_lock where necessary
> > > (/proc/* , /proc/*/fd/* , /proc/*/map_files/* revalidate).
> > >
> > > Reported-by: syzbot+e392f8008a294fdf8891@syzkaller.appspotmail.com
> > > Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> > > ---
> >
> > Thanks!
> >
> > This certainly fixes the problem of inconsistent uid/gid pair due to
> > racing writebacks, as well as the data-race. If that is the only
> > purpose of this patch, then from what I see this is fine:
> >
> > Acked-by: Marco Elver <elver@google.com>
> >
> > However, there is probably still a more fundamental problem as outlined below.
> >
> > >  fs/proc/base.c     |   25 +++++++++++++++++++++++--
> > >  fs/proc/fd.c       |    2 +-
> > >  fs/proc/internal.h |    2 ++
> > >  3 files changed, 26 insertions(+), 3 deletions(-)
> > >
> > > --- a/fs/proc/base.c
> > > +++ b/fs/proc/base.c
> > > @@ -1743,6 +1743,25 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
> > >         *rgid = gid;
> > >  }
> > >
> > > +/* use if inode is live */
> > > +void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
> > > +                             struct inode *inode)
> > > +{
> > > +       kuid_t uid;
> > > +       kgid_t gid;
> > > +
> > > +       task_dump_owner(task, mode, &uid, &gid);
> > > +       /*
> > > +        * There is no atomic "change all credentials at once" system call,
> > > +        * guaranteeing more than _some_ snapshot from "struct cred" ends up
> > > +        * in inode is not possible.
> > > +        */
> > > +       spin_lock(&inode->i_lock);
> > > +       inode->i_uid = uid;
> > > +       inode->i_gid = gid;
> > > +       spin_unlock(&inode->i_lock);
> >
> > 2 tasks can still race here, and the inconsistent scenario I outlined in
> > https://lore.kernel.org/linux-fsdevel/000000000000328b2905951a7667@google.com/
> > could still happen I think (although extremely unlikely). Mainly,
> > causality may still be violated -- but I may be wrong as I don't know
> > the rest of the code (so please be critical of my suggestion).
> >
> > The problem is that if 2 threads race here, one has snapshotted old
> > uid/gid, and the other the new uid/gid. Then it is still possible for
> > the old uid/gid to be written back after new uid/gid, which would
> > result in this bad scenario:
> >
> > === TASK 1 ===
> > | seteuid(1000);
> > | seteuid(0);
> > | stat("/proc/<pid-of-task-1>", &fstat);
> > | assert(fstat.st_uid == 0);  // fails
> > === TASK 2 ===
> > | stat("/proc/<pid-of-task-1>", ...);
> >
> > AFAIK it's not something that can easily be fixed without some
> > timestamp on the uid/gid pair (timestamp updated after setuid/seteuid
> > etc) obtained in the RCU reader critical section. Then in this
> > critical section, uid/gid should only be written if the current pair
> > in inode is older according to snapshot timestamp.
>
> This probably requires bloating "struct cred" with generation number.
> I'm not sure what to do other than cc our credential overlords.

Just in case, this bug can lead to bad observable behavior, e.g.
bypassing LSM checks (apparmor) and this patch does not fix the
underlying problem. For details see:
https://groups.google.com/d/msg/syzkaller-bugs/mzwiXt4ml68/GuAUQrWtBQAJ


> > > +}
> > > +
> > >  struct inode *proc_pid_make_inode(struct super_block * sb,
> > >                                   struct task_struct *task, umode_t mode)
> > >  {
> > > @@ -1769,6 +1788,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
> > >         if (!ei->pid)
> > >                 goto out_unlock;
> > >
> > > +       /* fresh inode -- no races */
> > >         task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> > >         security_task_to_inode(task, inode);
> > >
> > > @@ -1802,6 +1822,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
> > >                          */
> > >                         return -ENOENT;
> > >                 }
> > > +               /* "struct kstat" is thread local, atomic snapshot is enough */
> > >                 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
> > >         }
> > >         rcu_read_unlock();
> > > @@ -1815,7 +1836,7 @@ int pid_getattr(const struct path *path, struct kstat *stat,
> > >   */
> > >  void pid_update_inode(struct task_struct *task, struct inode *inode)
> > >  {
> > > -       task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
> > > +       task_dump_owner_to_inode(task, inode->i_mode, inode);
> > >
> > >         inode->i_mode &= ~(S_ISUID | S_ISGID);
> > >         security_task_to_inode(task, inode);
> > > @@ -1990,7 +2011,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
> > >         mmput(mm);
> > >
> > >         if (exact_vma_exists) {
> > > -               task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> > > +               task_dump_owner_to_inode(task, 0, inode);
> > >
> > >                 security_task_to_inode(task, inode);
> > >                 status = 1;
> > > --- a/fs/proc/fd.c
> > > +++ b/fs/proc/fd.c
> > > @@ -101,7 +101,7 @@ static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
> > >  static void tid_fd_update_inode(struct task_struct *task, struct inode *inode,
> > >                                 fmode_t f_mode)
> > >  {
> > > -       task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
> > > +       task_dump_owner_to_inode(task, 0, inode);
> > >
> > >         if (S_ISLNK(inode->i_mode)) {
> > >                 unsigned i_mode = S_IFLNK;
> > > --- a/fs/proc/internal.h
> > > +++ b/fs/proc/internal.h
> > > @@ -123,6 +123,8 @@ static inline struct task_struct *get_proc_task(const struct inode *inode)
> > >
> > >  void task_dump_owner(struct task_struct *task, umode_t mode,
> > >                      kuid_t *ruid, kgid_t *rgid);
> > > +void task_dump_owner_to_inode(struct task_struct *task, umode_t mode,
> > > +                             struct inode *inode);
> > >
> > >  unsigned name_to_int(const struct qstr *qstr);
> > >  /*

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-11-03 13:20 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-20 17:30 [PATCH] proc: fix inode uid/gid writeback race Alexey Dobriyan
2019-10-21  9:24 ` Marco Elver
2019-10-21  9:59   ` Marco Elver
2019-10-21 18:32   ` Alexey Dobriyan
2019-11-03 13:20     ` Dmitry Vyukov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).