* [PATCH v2] unix: properly account for FDs passed over unix sockets @ 2016-01-10 6:58 ` Willy Tarreau 0 siblings, 0 replies; 23+ messages in thread From: Willy Tarreau @ 2016-01-10 6:54 UTC (permalink / raw) To: David S. Miller, netdev, linux-kernel Cc: Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair, Tetsuo Handa It is possible for a process to allocate and accumulate far more FDs than the process' limit by sending them over a unix socket then closing them to keep the process' fd count low. This change addresses this problem by keeping track of the number of FDs in flight per user and preventing non-privileged processes from having more FDs in flight than their configured FD limit. Reported-by: socketpair@gmail.com Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Mitigates: CVE-2013-4312 (Linux 2.0+) Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: Willy Tarreau <w@1wt.eu> --- v2: add reported-by, mitigates and acked-by. It would be nice if (if accepted) it would be backported to -stable as the issue is currently exploitable. --- include/linux/sched.h | 1 + net/unix/af_unix.c | 24 ++++++++++++++++++++---- net/unix/garbage.c | 13 ++++++++----- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index edad7a4..fbf25f1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -830,6 +830,7 @@ struct user_struct { unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ #ifdef CONFIG_KEYS struct key *uid_keyring; /* UID specific keyring */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 45aebd9..d6d7b43 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +/* + * The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + #define MAX_RECURSION_LEVEL 4 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) unsigned char max_level = 0; int unix_sock_count = 0; + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); @@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - if (unix_sock_count) { - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - } + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a73a226..8fcdc22 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); - if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) BUG_ON(list_empty(&u->link)); } unix_tot_inflight++; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight++; + spin_unlock(&unix_gc_lock); } void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight--; + spin_unlock(&unix_gc_lock); } static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), -- 1.7.12.1 ^ permalink raw reply related [flat|nested] 23+ messages in thread
* [PATCH v2] unix: properly account for FDs passed over unix sockets @ 2016-01-10 6:58 ` Willy Tarreau 0 siblings, 0 replies; 23+ messages in thread From: Willy Tarreau @ 2016-01-10 6:58 UTC (permalink / raw) To: David S. Miller, netdev, linux-kernel Cc: Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair, Tetsuo Handa It is possible for a process to allocate and accumulate far more FDs than the process' limit by sending them over a unix socket then closing them to keep the process' fd count low. This change addresses this problem by keeping track of the number of FDs in flight per user and preventing non-privileged processes from having more FDs in flight than their configured FD limit. Reported-by: socketpair@gmail.com Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Mitigates: CVE-2013-4312 (Linux 2.0+) Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: Willy Tarreau <w@1wt.eu> --- v2: add reported-by, mitigates and acked-by. It would be nice if (if accepted) it would be backported to -stable as the issue is currently exploitable. --- include/linux/sched.h | 1 + net/unix/af_unix.c | 24 ++++++++++++++++++++---- net/unix/garbage.c | 13 ++++++++----- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index edad7a4..fbf25f1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -830,6 +830,7 @@ struct user_struct { unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ #ifdef CONFIG_KEYS struct key *uid_keyring; /* UID specific keyring */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 45aebd9..d6d7b43 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +/* + * The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + #define MAX_RECURSION_LEVEL 4 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) unsigned char max_level = 0; int unix_sock_count = 0; + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); @@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - if (unix_sock_count) { - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - } + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a73a226..8fcdc22 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); - if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) BUG_ON(list_empty(&u->link)); } unix_tot_inflight++; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight++; + spin_unlock(&unix_gc_lock); } void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight--; + spin_unlock(&unix_gc_lock); } static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), -- 1.7.12.1 ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-01-10 6:58 ` Willy Tarreau (?) @ 2016-01-11 5:05 ` David Miller -1 siblings, 0 replies; 23+ messages in thread From: David Miller @ 2016-01-11 5:05 UTC (permalink / raw) To: w Cc: netdev, linux-kernel, torvalds, edumazet, hannes, socketpair, penguin-kernel From: Willy Tarreau <w@1wt.eu> Date: Sun, Jan 10 07:54:56 CET 2016 > It is possible for a process to allocate and accumulate far more FDs than > the process' limit by sending them over a unix socket then closing them > to keep the process' fd count low. > > This change addresses this problem by keeping track of the number of FDs > in flight per user and preventing non-privileged processes from having > more FDs in flight than their configured FD limit. > > Reported-by: socketpair@gmail.com > Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > Mitigates: CVE-2013-4312 (Linux 2.0+) > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> > Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org> > Signed-off-by: Willy Tarreau <w@1wt.eu> Applied and queued up for -stable, thanks! ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-01-10 6:58 ` Willy Tarreau (?) (?) @ 2016-02-02 17:34 ` David Herrmann 2016-02-02 18:29 ` Hannes Frederic Sowa 2016-02-03 11:36 ` Simon McVittie -1 siblings, 2 replies; 23+ messages in thread From: David Herrmann @ 2016-02-02 17:34 UTC (permalink / raw) To: Willy Tarreau Cc: David S. Miller, netdev, linux-kernel, Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair, Tetsuo Handa, Simon McVittie Hi On Sun, Jan 10, 2016 at 7:54 AM, Willy Tarreau <w@1wt.eu> wrote: > It is possible for a process to allocate and accumulate far more FDs than > the process' limit by sending them over a unix socket then closing them > to keep the process' fd count low. > > This change addresses this problem by keeping track of the number of FDs > in flight per user and preventing non-privileged processes from having > more FDs in flight than their configured FD limit. This is not really what this patch does. This patch rather prevents processes from having more of their owned *files* in flight than their configured FD limit. See below for details.. > Reported-by: socketpair@gmail.com > Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > Mitigates: CVE-2013-4312 (Linux 2.0+) > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> > Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org> > Signed-off-by: Willy Tarreau <w@1wt.eu> > --- > v2: add reported-by, mitigates and acked-by. > > It would be nice if (if accepted) it would be backported to -stable as the > issue is currently exploitable. > --- > include/linux/sched.h | 1 + > net/unix/af_unix.c | 24 ++++++++++++++++++++---- > net/unix/garbage.c | 13 ++++++++----- > 3 files changed, 29 insertions(+), 9 deletions(-) > > diff --git a/include/linux/sched.h b/include/linux/sched.h > index edad7a4..fbf25f1 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -830,6 +830,7 @@ struct user_struct { > unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ > #endif > unsigned long locked_shm; /* How many pages of mlocked shm ? */ > + unsigned long unix_inflight; /* How many files in flight in unix sockets */ > > #ifdef CONFIG_KEYS > struct key *uid_keyring; /* UID specific keyring */ > diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c > index 45aebd9..d6d7b43 100644 > --- a/net/unix/af_unix.c > +++ b/net/unix/af_unix.c > @@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb) > sock_wfree(skb); > } > > +/* > + * The "user->unix_inflight" variable is protected by the garbage > + * collection lock, and we just read it locklessly here. If you go > + * over the limit, there might be a tiny race in actually noticing > + * it across threads. Tough. This limit is checked once per transaction. IIRC, a transaction can carry 255 files. Thus, it is easy to exceed this limit without any race involved. > + */ > +static inline bool too_many_unix_fds(struct task_struct *p) > +{ > + struct user_struct *user = current_user(); > + > + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) > + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); > + return false; > +} > + > #define MAX_RECURSION_LEVEL 4 > > static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) > @@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) > unsigned char max_level = 0; > int unix_sock_count = 0; > > + if (too_many_unix_fds(current)) > + return -ETOOMANYREFS; > + Ignoring the capabilities, this effectively resolves to: if (current_cred()->user->unix_inflight > rlimit(RLIMIT_NOFILE)) return -ETOOMANYREFS; It limits the number of inflight FDs of the *current* user to its *own* limit. But.. > for (i = scm->fp->count - 1; i >= 0; i--) { > struct sock *sk = unix_get_socket(scm->fp->fp[i]); > > @@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) > if (!UNIXCB(skb).fp) > return -ENOMEM; > > - if (unix_sock_count) { > - for (i = scm->fp->count - 1; i >= 0; i--) > - unix_inflight(scm->fp->fp[i]); > - } > + for (i = scm->fp->count - 1; i >= 0; i--) > + unix_inflight(scm->fp->fp[i]); > return max_level; > } > > diff --git a/net/unix/garbage.c b/net/unix/garbage.c > index a73a226..8fcdc22 100644 > --- a/net/unix/garbage.c > +++ b/net/unix/garbage.c > @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) > { > struct sock *s = unix_get_socket(fp); > > + spin_lock(&unix_gc_lock); > + > if (s) { > struct unix_sock *u = unix_sk(s); > > - spin_lock(&unix_gc_lock); > - > if (atomic_long_inc_return(&u->inflight) == 1) { > BUG_ON(!list_empty(&u->link)); > list_add_tail(&u->link, &gc_inflight_list); > @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) > BUG_ON(list_empty(&u->link)); > } > unix_tot_inflight++; > - spin_unlock(&unix_gc_lock); > } > + fp->f_cred->user->unix_inflight++; ..this modifies the limit of the owner of the file you send. As such, if you only send files that you don't own, you will continuously bump the limit of the file-owner, but never your own. As such, the protection above will never fire. Is this really what this patch is supposed to do? Shouldn't the loop in unix_attach_fds() rather check for this: if (unlikely(fp->f_cred->user->unix_inflight > nofile && !file_ns_capable(fp, &init_user_ns, CAP_SYS_RESOURCE) && !file_ns_capable(fp, &init_user_ns, CAP_SYS_ADMIN))) return -ETOOMANYREFS; (or keeping capable() rather than file_ns_capable(), depending on the wanted semantics) Furthermore, with this patch in place, a process better not pass any file-descriptors to an untrusted process. This might have been a stupid idea in the first place, but I would have expected the patch to mention this. Because with this patch in place, a receiver of a file-descriptor can bump the unix_inflight limit of the sender arbitrarily. Did anyone notify the dbus maintainers of this? They might wanna document this, if not already done (CC: smcv). Why doesn't this patch modify "unix_inflight" of the sender rather than the passed descriptors? It would require pinning the affected user in 'scm' contexts, but that is probably already the case, anyway. This way, the original ETOOMANYREFS check would be perfectly fine. Anyway, can someone provide a high-level description of what exactly this patch is supposed to do? Which operation should be limited, who should inflight FDs be accounted on, and which rlimit should be used on each operation? I'm having a hard time auditing existing user-space, given just the scarce description of this commit. Thanks David > + spin_unlock(&unix_gc_lock); > } > > void unix_notinflight(struct file *fp) > { > struct sock *s = unix_get_socket(fp); > > + spin_lock(&unix_gc_lock); > + > if (s) { > struct unix_sock *u = unix_sk(s); > > - spin_lock(&unix_gc_lock); > BUG_ON(list_empty(&u->link)); > > if (atomic_long_dec_and_test(&u->inflight)) > list_del_init(&u->link); > unix_tot_inflight--; > - spin_unlock(&unix_gc_lock); > } > + fp->f_cred->user->unix_inflight--; > + spin_unlock(&unix_gc_lock); > } > > static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), > -- > 1.7.12.1 > > > ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 17:34 ` David Herrmann @ 2016-02-02 18:29 ` Hannes Frederic Sowa 2016-02-02 19:29 ` Linus Torvalds 2016-02-03 11:36 ` Simon McVittie 1 sibling, 1 reply; 23+ messages in thread From: Hannes Frederic Sowa @ 2016-02-02 18:29 UTC (permalink / raw) To: David Herrmann, Willy Tarreau Cc: David S. Miller, netdev, linux-kernel, Linus Torvalds, Eric Dumazet, socketpair, Tetsuo Handa, Simon McVittie On 02.02.2016 18:34, David Herrmann wrote: > Hi > > On Sun, Jan 10, 2016 at 7:54 AM, Willy Tarreau <w@1wt.eu> wrote: >> It is possible for a process to allocate and accumulate far more FDs than >> the process' limit by sending them over a unix socket then closing them >> to keep the process' fd count low. >> >> This change addresses this problem by keeping track of the number of FDs >> in flight per user and preventing non-privileged processes from having >> more FDs in flight than their configured FD limit. > > This is not really what this patch does. This patch rather prevents > processes from having more of their owned *files* in flight than their > configured FD limit. See below for details.. > >> Reported-by: socketpair@gmail.com >> Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> >> Mitigates: CVE-2013-4312 (Linux 2.0+) >> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> >> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org> >> Signed-off-by: Willy Tarreau <w@1wt.eu> >> --- >> v2: add reported-by, mitigates and acked-by. >> >> It would be nice if (if accepted) it would be backported to -stable as the >> issue is currently exploitable. >> --- >> include/linux/sched.h | 1 + >> net/unix/af_unix.c | 24 ++++++++++++++++++++---- >> net/unix/garbage.c | 13 ++++++++----- >> 3 files changed, 29 insertions(+), 9 deletions(-) >> >> diff --git a/include/linux/sched.h b/include/linux/sched.h >> index edad7a4..fbf25f1 100644 >> --- a/include/linux/sched.h >> +++ b/include/linux/sched.h >> @@ -830,6 +830,7 @@ struct user_struct { >> unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ >> #endif >> unsigned long locked_shm; /* How many pages of mlocked shm ? */ >> + unsigned long unix_inflight; /* How many files in flight in unix sockets */ >> >> #ifdef CONFIG_KEYS >> struct key *uid_keyring; /* UID specific keyring */ >> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c >> index 45aebd9..d6d7b43 100644 >> --- a/net/unix/af_unix.c >> +++ b/net/unix/af_unix.c >> @@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb) >> sock_wfree(skb); >> } >> >> +/* >> + * The "user->unix_inflight" variable is protected by the garbage >> + * collection lock, and we just read it locklessly here. If you go >> + * over the limit, there might be a tiny race in actually noticing >> + * it across threads. Tough. > > This limit is checked once per transaction. IIRC, a transaction can > carry 255 files. Thus, it is easy to exceed this limit without any > race involved. > >> + */ >> +static inline bool too_many_unix_fds(struct task_struct *p) >> +{ >> + struct user_struct *user = current_user(); >> + >> + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) >> + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); >> + return false; >> +} >> + >> #define MAX_RECURSION_LEVEL 4 >> >> static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) >> @@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) >> unsigned char max_level = 0; >> int unix_sock_count = 0; >> >> + if (too_many_unix_fds(current)) >> + return -ETOOMANYREFS; >> + > > Ignoring the capabilities, this effectively resolves to: > > if (current_cred()->user->unix_inflight > rlimit(RLIMIT_NOFILE)) > return -ETOOMANYREFS; > > It limits the number of inflight FDs of the *current* user to its *own* limit. > > But.. > >> for (i = scm->fp->count - 1; i >= 0; i--) { >> struct sock *sk = unix_get_socket(scm->fp->fp[i]); >> >> @@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) >> if (!UNIXCB(skb).fp) >> return -ENOMEM; >> >> - if (unix_sock_count) { >> - for (i = scm->fp->count - 1; i >= 0; i--) >> - unix_inflight(scm->fp->fp[i]); >> - } >> + for (i = scm->fp->count - 1; i >= 0; i--) >> + unix_inflight(scm->fp->fp[i]); >> return max_level; >> } >> >> diff --git a/net/unix/garbage.c b/net/unix/garbage.c >> index a73a226..8fcdc22 100644 >> --- a/net/unix/garbage.c >> +++ b/net/unix/garbage.c >> @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) >> { >> struct sock *s = unix_get_socket(fp); >> >> + spin_lock(&unix_gc_lock); >> + >> if (s) { >> struct unix_sock *u = unix_sk(s); >> >> - spin_lock(&unix_gc_lock); >> - >> if (atomic_long_inc_return(&u->inflight) == 1) { >> BUG_ON(!list_empty(&u->link)); >> list_add_tail(&u->link, &gc_inflight_list); >> @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) >> BUG_ON(list_empty(&u->link)); >> } >> unix_tot_inflight++; >> - spin_unlock(&unix_gc_lock); >> } >> + fp->f_cred->user->unix_inflight++; > > ..this modifies the limit of the owner of the file you send. As such, > if you only send files that you don't own, you will continuously bump > the limit of the file-owner, but never your own. As such, the > protection above will never fire. > > Is this really what this patch is supposed to do? Shouldn't the loop > in unix_attach_fds() rather check for this: > > if (unlikely(fp->f_cred->user->unix_inflight > nofile && > !file_ns_capable(fp, &init_user_ns, CAP_SYS_RESOURCE) && > !file_ns_capable(fp, &init_user_ns, CAP_SYS_ADMIN))) > return -ETOOMANYREFS; > > (or keeping capable() rather than file_ns_capable(), depending on the > wanted semantics) > > Furthermore, with this patch in place, a process better not pass any > file-descriptors to an untrusted process. This might have been a > stupid idea in the first place, but I would have expected the patch to > mention this. Because with this patch in place, a receiver of a > file-descriptor can bump the unix_inflight limit of the sender > arbitrarily. Did anyone notify the dbus maintainers of this? They > might wanna document this, if not already done (CC: smcv). > > Why doesn't this patch modify "unix_inflight" of the sender rather > than the passed descriptors? It would require pinning the affected > user in 'scm' contexts, but that is probably already the case, anyway. > This way, the original ETOOMANYREFS check would be perfectly fine. > > Anyway, can someone provide a high-level description of what exactly > this patch is supposed to do? Which operation should be limited, who > should inflight FDs be accounted on, and which rlimit should be used > on each operation? I'm having a hard time auditing existing > user-space, given just the scarce description of this commit. Yes, all your observations are true. I think we need to explicitly need to refer to the sending socket while attaching the fds. Good thing is that we don't switch skb->sk while appending the skb to the receive queue. First quick prototype which is completely untested (only compile-tested): diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2a91a0561a4783..d7148ae04b02dd 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -6,8 +6,8 @@ #include <linux/mutex.h> #include <net/sock.h> -void unix_inflight(struct file *fp); -void unix_notinflight(struct file *fp); +void unix_inflight(struct sock *sk, struct file *fp); +void unix_notinflight(struct sock *sk, struct file *fp); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 49d5093eb0553a..7bbb4762899924 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) - unix_notinflight(scm->fp->fp[i]); + unix_notinflight(skb->sk, scm->fp->fp[i]); } static void unix_destruct_scm(struct sk_buff *skb) @@ -1561,7 +1561,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) return -ENOMEM; for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); + unix_inflight(skb->sk, scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 8fcdc2283af50c..874c42161717f0 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file *filp) * descriptor if it is for an AF_UNIX socket. */ -void unix_inflight(struct file *fp) +void unix_inflight(struct sock *sk, struct file *fp) { struct sock *s = unix_get_socket(fp); @@ -133,11 +133,11 @@ void unix_inflight(struct file *fp) } unix_tot_inflight++; } - fp->f_cred->user->unix_inflight++; + sk->sk_socket->file->f_cred->user->unix_inflight++; spin_unlock(&unix_gc_lock); } -void unix_notinflight(struct file *fp) +void unix_notinflight(struct sock *sk, struct file *fp) { struct sock *s = unix_get_socket(fp); @@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp) list_del_init(&u->link); unix_tot_inflight--; } - fp->f_cred->user->unix_inflight--; + sk->sk_socket->file->f_cred->user->unix_inflight++; spin_unlock(&unix_gc_lock); } ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 18:29 ` Hannes Frederic Sowa @ 2016-02-02 19:29 ` Linus Torvalds 2016-02-02 20:32 ` Hannes Frederic Sowa 0 siblings, 1 reply; 23+ messages in thread From: Linus Torvalds @ 2016-02-02 19:29 UTC (permalink / raw) To: Hannes Frederic Sowa Cc: David Herrmann, Willy Tarreau, David S. Miller, netdev, linux-kernel, Eric Dumazet, Марк Коренберг, Tetsuo Handa, Simon McVittie On Tue, Feb 2, 2016 at 10:29 AM, Hannes Frederic Sowa <hannes@stressinduktion.org> wrote: >> >> Anyway, can someone provide a high-level description of what exactly >> this patch is supposed to do? Which operation should be limited, who >> should inflight FDs be accounted on, and which rlimit should be used >> on each operation? I'm having a hard time auditing existing >> user-space, given just the scarce description of this commit. > > Yes, all your observations are true. I think we need to explicitly > need to refer to the sending socket while attaching the fds. I don't think that really helps. Maybe somebody passed a unix domain socket around, and now we're crediting the wrong socket again. So how about we actually add a "struct cred *" to the scm_cookie itself, and we initialize it to "get_current_cred()". And then always use that. That way it's always the person who actually does the send (rather than the opener of the socket _or_ the opener of the file that gets passed around) that gets credited, and thanks to the cred pointer we can then de-credit them properly. Hmm? Linus ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 19:29 ` Linus Torvalds @ 2016-02-02 20:32 ` Hannes Frederic Sowa 2016-02-02 20:39 ` Willy Tarreau 2016-02-02 20:44 ` Linus Torvalds 0 siblings, 2 replies; 23+ messages in thread From: Hannes Frederic Sowa @ 2016-02-02 20:32 UTC (permalink / raw) To: Linus Torvalds Cc: David Herrmann, Willy Tarreau, David S. Miller, netdev, linux-kernel, Eric Dumazet, Марк Коренберг, Tetsuo Handa, Simon McVittie On 02.02.2016 20:29, Linus Torvalds wrote: > On Tue, Feb 2, 2016 at 10:29 AM, Hannes Frederic Sowa > <hannes@stressinduktion.org> wrote: >>> >>> Anyway, can someone provide a high-level description of what exactly >>> this patch is supposed to do? Which operation should be limited, who >>> should inflight FDs be accounted on, and which rlimit should be used >>> on each operation? I'm having a hard time auditing existing >>> user-space, given just the scarce description of this commit. >> >> Yes, all your observations are true. I think we need to explicitly >> need to refer to the sending socket while attaching the fds. > > I don't think that really helps. Maybe somebody passed a unix domain > socket around, and now we're crediting the wrong socket again. I was struggling a bit what you meant but I think you are referring to the following scenario: process-1 opens up a unix socket and passes it to process-2 (this process has different credentials) via af-unix. Process-2 then sends multiple fds to another destination over this transferred unix-fd. True, in this case we again account the fds to the wrong process, which is bad. > So how about we actually add a "struct cred *" to the scm_cookie > itself, and we initialize it to "get_current_cred()". And then always > use that. Unfortunately we never transfer a scm_cookie via the skbs but merely use it to initialize unix_skb_parms structure in skb->cb and destroy it afterwards. But "struct pid *" in unix_skb_parms should be enough to get us to corresponding "struct cred *" so we can decrement the correct counter during skb destruction. So: We increment current task's unix_inflight and also check the current task's limit during attaching fds to skbs and decrement the inflight counter via "struct pid *". This looks like it should work. > That way it's always the person who actually does the send (rather > than the opener of the socket _or_ the opener of the file that gets > passed around) that gets credited, and thanks to the cred pointer we > can then de-credit them properly. Exactly, I try to implement that. Thanks a lot! Hannes ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 20:32 ` Hannes Frederic Sowa @ 2016-02-02 20:39 ` Willy Tarreau 2016-02-02 21:55 ` Hannes Frederic Sowa 2016-02-02 20:44 ` Linus Torvalds 1 sibling, 1 reply; 23+ messages in thread From: Willy Tarreau @ 2016-02-02 20:39 UTC (permalink / raw) To: Hannes Frederic Sowa Cc: Linus Torvalds, David Herrmann, David S. Miller, netdev, linux-kernel, Eric Dumazet, ???????? ??????????????????, Tetsuo Handa, Simon McVittie On Tue, Feb 02, 2016 at 09:32:56PM +0100, Hannes Frederic Sowa wrote: > But "struct pid *" in unix_skb_parms should be enough to get us to > corresponding "struct cred *" so we can decrement the correct counter > during skb destruction. > > So: > > We increment current task's unix_inflight and also check the current > task's limit during attaching fds to skbs and decrement the inflight > counter via "struct pid *". This looks like it should work. I like it as well, the principle sounds sane. > >That way it's always the person who actually does the send (rather > >than the opener of the socket _or_ the opener of the file that gets > >passed around) that gets credited, and thanks to the cred pointer we > >can then de-credit them properly. > > Exactly, I try to implement that. Thanks a lot! Thanks to you Hannes, I appreciate that you work on it, it would take much more time to me to dig into this. Willy ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 20:39 ` Willy Tarreau @ 2016-02-02 21:55 ` Hannes Frederic Sowa [not found] ` <CA+55aFzqdR80MKupCs+va8vtbTU67Jobax1QAbfWNktQCXFxpA@mail.gmail.com> 0 siblings, 1 reply; 23+ messages in thread From: Hannes Frederic Sowa @ 2016-02-02 21:55 UTC (permalink / raw) To: Willy Tarreau Cc: Linus Torvalds, David Herrmann, David S. Miller, netdev, linux-kernel, Eric Dumazet, ???????? ??????????????????, Tetsuo Handa, Simon McVittie Hi Willy, On 02.02.2016 21:39, Willy Tarreau wrote: > On Tue, Feb 02, 2016 at 09:32:56PM +0100, Hannes Frederic Sowa wrote: >> But "struct pid *" in unix_skb_parms should be enough to get us to >> corresponding "struct cred *" so we can decrement the correct counter >> during skb destruction. >> >> So: >> >> We increment current task's unix_inflight and also check the current >> task's limit during attaching fds to skbs and decrement the inflight >> counter via "struct pid *". This looks like it should work. > > I like it as well, the principle sounds sane. > >>> That way it's always the person who actually does the send (rather >>> than the opener of the socket _or_ the opener of the file that gets >>> passed around) that gets credited, and thanks to the cred pointer we >>> can then de-credit them properly. >> >> Exactly, I try to implement that. Thanks a lot! > > Thanks to you Hannes, I appreciate that you work on it, it would take > much more time to me to dig into this. I slightly tested the attached patch. If you have the original reproducer available could you also give it a try? Unfortunately I currently don't find it and am limited in time this evening. Thanks a lot, Hannes diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2a91a0561a4783..4567dbe04f274d 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -6,8 +6,8 @@ #include <linux/mutex.h> #include <net/sock.h> -void unix_inflight(struct file *fp); -void unix_notinflight(struct file *fp); +void unix_inflight(const struct cred *cred, struct file *fp); +void unix_notinflight(const struct cred *cred, struct file *fp); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); diff --git a/include/net/scm.h b/include/net/scm.h index 262532d111f51e..8bf7d496545bf8 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -21,6 +21,7 @@ struct scm_creds { struct scm_fp_list { short count; short max; + const struct cred *cred; struct file *fp[SCM_MAX_FD]; }; diff --git a/net/core/scm.c b/net/core/scm.c index 14596fb3717270..6b02b574e283f6 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fplp = fpl; fpl->count = 0; fpl->max = SCM_MAX_FD; + fpl->cred = NULL; } fpp = &fpl->fp[fpl->count]; @@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fpp++ = file; fpl->count++; } + + if (fpl->cred) + put_cred(fpl->cred); + fpl->cred = get_current_cred(); return num; } @@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm) scm->fp = NULL; for (i=fpl->count-1; i>=0; i--) fput(fpl->fp[i]); + put_cred(fpl->cred); kfree(fpl); } } @@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); new_fpl->max = new_fpl->count; + new_fpl->cred = get_cred(fpl->cred); } return new_fpl; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 49d5093eb0553a..ba5058682419ba 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) - unix_notinflight(scm->fp->fp[i]); + unix_notinflight(scm->fp->cred, scm->fp->fp[i]); } static void unix_destruct_scm(struct sk_buff *skb) @@ -1561,7 +1561,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) return -ENOMEM; for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); + unix_inflight(scm->fp->cred, scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 8fcdc2283af50c..30b03e7dddd547 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file *filp) * descriptor if it is for an AF_UNIX socket. */ -void unix_inflight(struct file *fp) +void unix_inflight(const struct cred *cred, struct file *fp) { struct sock *s = unix_get_socket(fp); @@ -133,11 +133,11 @@ void unix_inflight(struct file *fp) } unix_tot_inflight++; } - fp->f_cred->user->unix_inflight++; + cred->user->unix_inflight++; spin_unlock(&unix_gc_lock); } -void unix_notinflight(struct file *fp) +void unix_notinflight(const struct cred *cred, struct file *fp) { struct sock *s = unix_get_socket(fp); @@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp) list_del_init(&u->link); unix_tot_inflight--; } - fp->f_cred->user->unix_inflight--; + cred->user->unix_inflight--; spin_unlock(&unix_gc_lock); } ^ permalink raw reply related [flat|nested] 23+ messages in thread
[parent not found: <CA+55aFzqdR80MKupCs+va8vtbTU67Jobax1QAbfWNktQCXFxpA@mail.gmail.com>]
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets [not found] ` <CA+55aFzqdR80MKupCs+va8vtbTU67Jobax1QAbfWNktQCXFxpA@mail.gmail.com> @ 2016-02-03 0:57 ` Hannes Frederic Sowa 2016-02-03 1:12 ` Hannes Frederic Sowa 0 siblings, 1 reply; 23+ messages in thread From: Hannes Frederic Sowa @ 2016-02-03 0:57 UTC (permalink / raw) To: Linus Torvalds Cc: Network Development, Tetsuo Handa, Simon McVittie, Willy Tarreau, Eric Dumazet, linux-kernel, David S. Miller, ???????? ??????????????????, David Herrmann On 02.02.2016 23:11, Linus Torvalds wrote: > [ sorry for the html mail, I'm out grocery shopping ] > > On Feb 2, 2016 13:55, "Hannes Frederic Sowa" <hannes@stressinduktion.org> > wrote: >> >> I slightly tested the attached patch. > > Looks fine. I do wonder: if the only thing we use that "struct cred" for is > to do that ->user lookup, maybe we should just use "struct user_struct" > directly, and skip the cred entirely. > > Something like > > fp->user = get_uid(current_user()); > > and then > > put_uid(fp->user); > > But I'm OK with that patch as is if you prefer it that way (maybe you want > to use the cred to then test for root separately etc, out maybe there > already was done use of cred as cred that I just missed when reading the > patch on my phone..) I don't see any reason to switch over to struct user_struct. I tested a patch and will send it out soon. Bye, Hannes ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-03 0:57 ` Hannes Frederic Sowa @ 2016-02-03 1:12 ` Hannes Frederic Sowa 0 siblings, 0 replies; 23+ messages in thread From: Hannes Frederic Sowa @ 2016-02-03 1:12 UTC (permalink / raw) To: Linus Torvalds Cc: Network Development, Tetsuo Handa, Simon McVittie, Willy Tarreau, Eric Dumazet, linux-kernel, David S. Miller, ???????? ??????????????????, David Herrmann On 03.02.2016 01:57, Hannes Frederic Sowa wrote: > On 02.02.2016 23:11, Linus Torvalds wrote: >> But I'm OK with that patch as is if you prefer it that way (maybe you >> want >> to use the cred to then test for root separately etc, out maybe there >> already was done use of cred as cred that I just missed when reading the >> patch on my phone..) > > I don't see any reason to switch over to struct user_struct. I tested a > patch and will send it out soon. I meant that there is no reason not to switch over to struct user_struct. Patch is send out. Thanks for looking into this! Hannes ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 20:32 ` Hannes Frederic Sowa 2016-02-02 20:39 ` Willy Tarreau @ 2016-02-02 20:44 ` Linus Torvalds 2016-02-02 20:49 ` Willy Tarreau ` (2 more replies) 1 sibling, 3 replies; 23+ messages in thread From: Linus Torvalds @ 2016-02-02 20:44 UTC (permalink / raw) To: Hannes Frederic Sowa Cc: David Herrmann, Willy Tarreau, David S. Miller, netdev, linux-kernel, Eric Dumazet, Марк Коренберг, Tetsuo Handa, Simon McVittie On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa <hannes@stressinduktion.org> wrote: > > Unfortunately we never transfer a scm_cookie via the skbs but merely use it > to initialize unix_skb_parms structure in skb->cb and destroy it afterwards. Ok, I obviously didn't check very closely. > But "struct pid *" in unix_skb_parms should be enough to get us to > corresponding "struct cred *" so we can decrement the correct counter during > skb destruction. Umm. I think the "struct cred" may change in between, can't it? So I don't think you can later look up the cred based on the pid. Could we add the cred pointer (or just the user pointer) to the unix_skb_parms? Or maybe just add it to the "struct scm_fp_list"? Linus ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 20:44 ` Linus Torvalds @ 2016-02-02 20:49 ` Willy Tarreau 2016-02-02 20:53 ` Linus Torvalds 2016-02-02 20:56 ` Hannes Frederic Sowa 2016-02-03 12:19 ` David Laight 2 siblings, 1 reply; 23+ messages in thread From: Willy Tarreau @ 2016-02-02 20:49 UTC (permalink / raw) To: Linus Torvalds Cc: Hannes Frederic Sowa, David Herrmann, David S. Miller, netdev, linux-kernel, Eric Dumazet, ???????? ??????????????????, Tetsuo Handa, Simon McVittie On Tue, Feb 02, 2016 at 12:44:54PM -0800, Linus Torvalds wrote: > On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa > <hannes@stressinduktion.org> wrote: > > But "struct pid *" in unix_skb_parms should be enough to get us to > > corresponding "struct cred *" so we can decrement the correct counter during > > skb destruction. > > Umm. I think the "struct cred" may change in between, can't it? You mean for example in case of setuid() or something like this ? willy ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 20:49 ` Willy Tarreau @ 2016-02-02 20:53 ` Linus Torvalds 2016-02-02 20:58 ` Willy Tarreau 0 siblings, 1 reply; 23+ messages in thread From: Linus Torvalds @ 2016-02-02 20:53 UTC (permalink / raw) To: Willy Tarreau Cc: Hannes Frederic Sowa, David Herrmann, David S. Miller, netdev, linux-kernel, Eric Dumazet, ???????? ??????????????????, Tetsuo Handa, Simon McVittie On Tue, Feb 2, 2016 at 12:49 PM, Willy Tarreau <w@1wt.eu> wrote: > On Tue, Feb 02, 2016 at 12:44:54PM -0800, Linus Torvalds wrote: >> >> Umm. I think the "struct cred" may change in between, can't it? > > You mean for example in case of setuid() or something like this ? Yeah. I'd be worried about looking up the creds or user structure later, and possibly getting a different one. I'd much rather look it up at attach time, and just carry an extra pointer around. That seems to be an inherently safer model where there's no worry about "what happens if the user does X in the meantime". Linus ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 20:53 ` Linus Torvalds @ 2016-02-02 20:58 ` Willy Tarreau 0 siblings, 0 replies; 23+ messages in thread From: Willy Tarreau @ 2016-02-02 20:58 UTC (permalink / raw) To: Linus Torvalds Cc: Hannes Frederic Sowa, David Herrmann, David S. Miller, netdev, linux-kernel, Eric Dumazet, ???????? ??????????????????, Tetsuo Handa, Simon McVittie On Tue, Feb 02, 2016 at 12:53:20PM -0800, Linus Torvalds wrote: > On Tue, Feb 2, 2016 at 12:49 PM, Willy Tarreau <w@1wt.eu> wrote: > > On Tue, Feb 02, 2016 at 12:44:54PM -0800, Linus Torvalds wrote: > >> > >> Umm. I think the "struct cred" may change in between, can't it? > > > > You mean for example in case of setuid() or something like this ? > > Yeah. I'd be worried about looking up the creds or user structure > later, and possibly getting a different one. > > I'd much rather look it up at attach time, and just carry an extra > pointer around. That seems to be an inherently safer model where > there's no worry about "what happens if the user does X in the > meantime". Normally we can only change from root to non-root, and we don't apply the limits to root, so if we have the ability to only store one bit indicating "not tracked" or to simply nullify one pointer to avoid counting in flight FDs for root, we don't take the risk to recredit them to the target user after a change. I just don't know if we can do that though :-/ Willy ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 20:44 ` Linus Torvalds 2016-02-02 20:49 ` Willy Tarreau @ 2016-02-02 20:56 ` Hannes Frederic Sowa 2016-02-03 12:19 ` David Laight 2 siblings, 0 replies; 23+ messages in thread From: Hannes Frederic Sowa @ 2016-02-02 20:56 UTC (permalink / raw) To: Linus Torvalds Cc: David Herrmann, Willy Tarreau, David S. Miller, netdev, linux-kernel, Eric Dumazet, Марк Коренберг, Tetsuo Handa, Simon McVittie On 02.02.2016 21:44, Linus Torvalds wrote: > On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa > <hannes@stressinduktion.org> wrote: >> >> Unfortunately we never transfer a scm_cookie via the skbs but merely use it >> to initialize unix_skb_parms structure in skb->cb and destroy it afterwards. > > Ok, I obviously didn't check very closely. > >> But "struct pid *" in unix_skb_parms should be enough to get us to >> corresponding "struct cred *" so we can decrement the correct counter during >> skb destruction. > > Umm. I think the "struct cred" may change in between, can't it? While reviewing the task_struct->cred/real_cred assignments, I noticed that, too. I already went the same way and added a "struct cred *" to unix_skb_parms. > So I don't think you can later look up the cred based on the pid. Yep, it also looked to dangerous to me. > Could we add the cred pointer (or just the user pointer) to the unix_skb_parms? > > Or maybe just add it to the "struct scm_fp_list"? scm_fp_list seems to be an even better place. I have a look, thanks! Hannes ^ permalink raw reply [flat|nested] 23+ messages in thread
* RE: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 20:44 ` Linus Torvalds 2016-02-02 20:49 ` Willy Tarreau 2016-02-02 20:56 ` Hannes Frederic Sowa @ 2016-02-03 12:19 ` David Laight 2 siblings, 0 replies; 23+ messages in thread From: David Laight @ 2016-02-03 12:19 UTC (permalink / raw) To: 'Linus Torvalds', Hannes Frederic Sowa Cc: David Herrmann, Willy Tarreau, David S. Miller, netdev, linux-kernel, Eric Dumazet, ???? ?????????, Tetsuo Handa, Simon McVittie From: Linus Torvalds > Sent: 02 February 2016 20:45 > On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa > <hannes@stressinduktion.org> wrote: > > > > Unfortunately we never transfer a scm_cookie via the skbs but merely use it > > to initialize unix_skb_parms structure in skb->cb and destroy it afterwards. > > Ok, I obviously didn't check very closely. > > > But "struct pid *" in unix_skb_parms should be enough to get us to > > corresponding "struct cred *" so we can decrement the correct counter during > > skb destruction. > > Umm. I think the "struct cred" may change in between, can't it? > > So I don't think you can later look up the cred based on the pid. > > Could we add the cred pointer (or just the user pointer) to the unix_skb_parms? > > Or maybe just add it to the "struct scm_fp_list"? Is that going to work if the sending process exits before the message is read? You need a reference count against the structure than contains the count. I think this is 'struct user' not 'struct cred' David ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-02 17:34 ` David Herrmann 2016-02-02 18:29 ` Hannes Frederic Sowa @ 2016-02-03 11:36 ` Simon McVittie 2016-02-03 11:56 ` Hannes Frederic Sowa 2016-02-03 11:56 ` David Herrmann 1 sibling, 2 replies; 23+ messages in thread From: Simon McVittie @ 2016-02-03 11:36 UTC (permalink / raw) To: David Herrmann, Willy Tarreau Cc: David S. Miller, netdev, linux-kernel, Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair, Tetsuo Handa On 02/02/16 17:34, David Herrmann wrote: > Furthermore, with this patch in place, a process better not pass any > file-descriptors to an untrusted process. ... > Did anyone notify the dbus maintainers of this? They > might wanna document this, if not already done (CC: smcv). Sorry, I'm not clear from this message on what patterns I should be documenting as bad, and what the effect of non-compliance would be. dbus-daemon has a fd-passing feature, which uses AF_UNIX sockets' existing ability to pass fds to let users of D-Bus attach fds to their messages. The message is passed from the sending client to dbus-daemon, then from dbus-daemon to the recipient: AF_UNIX AF_UNIX | | sender -------> dbus-daemon -------> recipient | | This has been API since before I was a D-Bus maintainer, so I have no influence over its existence; just like the kernel doesn't want to break user-space, dbus-daemon doesn't want to break its users. The system dbus-daemon (dbus-daemon --system) is a privilege boundary, and accepts senders and recipients with differing privileges. Without configuration, messages are denied by default. Recipients can open this up (by installing system-wide configuration) to allow arbitrary processes to send messages to them, so that they can carry out their own discretionary access control. Since 2014, the system dbus-daemon accepts up to 16 file descriptors per message by default. There is also a session or user dbus-daemon (dbus-daemon --session) per uid, but that isn't normally a privilege boundary, so any user trying to carry out a denial of service there is only hurting themselves. Am I right in saying that the advice I give to D-Bus users should be something like this? * system services should not send fds at all, unless they trust the dbus-daemon * system services should not send fds via D-Bus that will be delivered to recipients that they do not trust * sending fds to an untrusted recipient would enable that recipient to carry out a denial-of-service attack (on what? the sender? the dbus-daemon?) -- Simon McVittie Collabora Ltd. <http://www.collabora.com/> ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-03 11:36 ` Simon McVittie @ 2016-02-03 11:56 ` Hannes Frederic Sowa 2016-02-03 11:56 ` David Herrmann 1 sibling, 0 replies; 23+ messages in thread From: Hannes Frederic Sowa @ 2016-02-03 11:56 UTC (permalink / raw) To: Simon McVittie, David Herrmann, Willy Tarreau Cc: David S. Miller, netdev, linux-kernel, Linus Torvalds, Eric Dumazet, socketpair, Tetsuo Handa On 03.02.2016 12:36, Simon McVittie wrote: > On 02/02/16 17:34, David Herrmann wrote: >> Furthermore, with this patch in place, a process better not pass any >> file-descriptors to an untrusted process. > ... >> Did anyone notify the dbus maintainers of this? They >> might wanna document this, if not already done (CC: smcv). > > Sorry, I'm not clear from this message on what patterns I should be > documenting as bad, and what the effect of non-compliance would be. > > dbus-daemon has a fd-passing feature, which uses AF_UNIX sockets' > existing ability to pass fds to let users of D-Bus attach fds to their > messages. The message is passed from the sending client to dbus-daemon, > then from dbus-daemon to the recipient: > > AF_UNIX AF_UNIX > | | > sender -------> dbus-daemon -------> recipient > | | > > This has been API since before I was a D-Bus maintainer, so I have no > influence over its existence; just like the kernel doesn't want to break > user-space, dbus-daemon doesn't want to break its users. > > The system dbus-daemon (dbus-daemon --system) is a privilege boundary, > and accepts senders and recipients with differing privileges. Without > configuration, messages are denied by default. Recipients can open this > up (by installing system-wide configuration) to allow arbitrary > processes to send messages to them, so that they can carry out their own > discretionary access control. Since 2014, the system dbus-daemon accepts > up to 16 file descriptors per message by default. > > There is also a session or user dbus-daemon (dbus-daemon --session) per > uid, but that isn't normally a privilege boundary, so any user trying to > carry out a denial of service there is only hurting themselves. > > Am I right in saying that the advice I give to D-Bus users should be > something like this? > > * system services should not send fds at all, unless they trust the > dbus-daemon > * system services should not send fds via D-Bus that will be delivered > to recipients that they do not trust > * sending fds to an untrusted recipient would enable that recipient to > carry out a denial-of-service attack (on what? the sender? the > dbus-daemon?) > The described behavior was simply a bug in the referenced patch. I already posted a follow-up to change this behavior so that only the current sending process is credited with the number of fds in flight: <https://patchwork.ozlabs.org/patch/577653/> Other processes (in this case the original opener of the file) isn't credited anymore if it does not send the fd itself. That said, I don't think you need to change anything or give different advice because of this thread. Thanks, Hannes ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-03 11:36 ` Simon McVittie 2016-02-03 11:56 ` Hannes Frederic Sowa @ 2016-02-03 11:56 ` David Herrmann 2016-02-03 12:49 ` Simon McVittie 2016-02-03 14:07 ` Hannes Frederic Sowa 1 sibling, 2 replies; 23+ messages in thread From: David Herrmann @ 2016-02-03 11:56 UTC (permalink / raw) To: Simon McVittie Cc: Willy Tarreau, David S. Miller, netdev, linux-kernel, Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair, Tetsuo Handa Hi On Wed, Feb 3, 2016 at 12:36 PM, Simon McVittie <simon.mcvittie@collabora.co.uk> wrote: > Am I right in saying that the advice I give to D-Bus users should be > something like this? > > * system services should not send fds at all, unless they trust the > dbus-daemon > * system services should not send fds via D-Bus that will be delivered > to recipients that they do not trust > * sending fds to an untrusted recipient would enable that recipient to > carry out a denial-of-service attack (on what? the sender? the > dbus-daemon?) With the revised patch from Hannes, this should no longer be needed. My original concern was only about accounting inflight-fds on the file-owner, rather than the sender. However, with Hannes' revised patch, a different DoS attack against dbus-daemon is possible. Imagine a peer that receives batches of FDs, but never dequeues them. They will be accounted on the inflight-limit of dbus-daemon, as such causing messages of independent peers to be rejected in case they carry FDs. Preferably, dbus-daemon would avoid queuing more than 16 FDs on a single destination (total). But that would require POLLOUT to be capped by the number of queued fds. A possible workaround is to add CAP_SYS_RESOURCE to dbus-daemon. Thanks David ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-03 11:56 ` David Herrmann @ 2016-02-03 12:49 ` Simon McVittie 2016-02-03 14:07 ` Hannes Frederic Sowa 1 sibling, 0 replies; 23+ messages in thread From: Simon McVittie @ 2016-02-03 12:49 UTC (permalink / raw) To: David Herrmann Cc: Willy Tarreau, David S. Miller, netdev, linux-kernel, Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair, Tetsuo Handa On 03/02/16 11:56, David Herrmann wrote: > However, with Hannes' revised patch, a different DoS attack against > dbus-daemon is possible. Imagine a peer that receives batches of FDs, > but never dequeues them. They will be accounted on the inflight-limit > of dbus-daemon, as such causing messages of independent peers to be > rejected in case they carry FDs. > Preferably, dbus-daemon would avoid queuing more than 16 FDs on a > single destination (total). But that would require POLLOUT to be > capped by the number of queued fds. A possible workaround is to add > CAP_SYS_RESOURCE to dbus-daemon. We have several mitigations for this: * there's a limit on outgoing fds queued inside dbus-daemon for a particular recipient connection, currently 64, and if that's exceeded, we stop accepting messages for that recipient, feeding back a send failure to the sender for messages to that recipient; * there's a time limit for how long any given fd can stay queued up inside dbus-daemon, currently 2.5 minutes, and if that's exceeded, we drop the message; * if started as root[1], we increase our fd limit to 64k before dropping privileges to the intended uid, which in combination with the maximum of 256 connections per uid and 64 fds per connection means it takes multiple uids working together to carry out a DoS; * all of those limits can be adjusted by a local sysadmin if necessary, if their users are particularly hostile (for instance 16 fds per message is a generous limit intended to avoid unforeseen breakage, and 4 would probably be enough in practice) The queueing logic is fairly involved, so I'd appreciate suggested patches on freedesktop.org Bugzilla or to dbus-security@lists.freedesktop.org if you have an idea for better limits. If you believe you have found a non-public vulnerability, please mark any Bugzilla bugs as restricted to the D-Bus security group. Thanks, S [1] The system dbus-daemon is started as root (and hence able to increase its own fd limit) on all systemd systems, anything that uses the Red Hat or Slackware sysvinit scripts bundled with dbus, and any Debian derivatives that use sysvinit and have taken security updates from at least Debian 7. Other distro-specific init system glue is up to the relevant distro. -- Simon McVittie Collabora Ltd. <http://www.collabora.com/> ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets 2016-02-03 11:56 ` David Herrmann 2016-02-03 12:49 ` Simon McVittie @ 2016-02-03 14:07 ` Hannes Frederic Sowa 1 sibling, 0 replies; 23+ messages in thread From: Hannes Frederic Sowa @ 2016-02-03 14:07 UTC (permalink / raw) To: David Herrmann, Simon McVittie Cc: Willy Tarreau, David S. Miller, netdev, linux-kernel, Linus Torvalds, Eric Dumazet, socketpair, Tetsuo Handa On 03.02.2016 12:56, David Herrmann wrote: > However, with Hannes' revised patch, a different DoS attack against > dbus-daemon is possible. Imagine a peer that receives batches of FDs, > but never dequeues them. They will be accounted on the inflight-limit > of dbus-daemon, as such causing messages of independent peers to be > rejected in case they carry FDs. Yes, that is true. We also kind of have the problem with unconnected af-unix dgram sockets: if the receiver does not read the skbs on the receive queue we don't free up the sending socket's wmem, thus stop the socket from being destructed and can block the process during sendmsg on this socket. This is harder to DoS but pretty much the same schema. Bye, Hannes ^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH v2] unix: properly account for FDs passed over unix sockets @ 2016-01-10 6:58 Willy Tarreau 0 siblings, 0 replies; 23+ messages in thread From: Willy Tarreau @ 2016-01-10 6:58 UTC (permalink / raw) To: David S. Miller, netdev, linux-kernel Cc: Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair, Tetsuo Handa It is possible for a process to allocate and accumulate far more FDs than the process' limit by sending them over a unix socket then closing them to keep the process' fd count low. This change addresses this problem by keeping track of the number of FDs in flight per user and preventing non-privileged processes from having more FDs in flight than their configured FD limit. Reported-by: socketpair@gmail.com Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Mitigates: CVE-2013-4312 (Linux 2.0+) Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: Willy Tarreau <w@1wt.eu> --- v2: add reported-by, mitigates and acked-by. It would be nice if (if accepted) it would be backported to -stable as the issue is currently exploitable. --- include/linux/sched.h | 1 + net/unix/af_unix.c | 24 ++++++++++++++++++++---- net/unix/garbage.c | 13 ++++++++----- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index edad7a4..fbf25f1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -830,6 +830,7 @@ struct user_struct { unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ + unsigned long unix_inflight; /* How many files in flight in unix sockets */ #ifdef CONFIG_KEYS struct key *uid_keyring; /* UID specific keyring */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 45aebd9..d6d7b43 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +/* + * The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + #define MAX_RECURSION_LEVEL 4 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) @@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) unsigned char max_level = 0; int unix_sock_count = 0; + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); @@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - if (unix_sock_count) { - for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - } + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index a73a226..8fcdc22 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); - if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp) BUG_ON(list_empty(&u->link)); } unix_tot_inflight++; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight++; + spin_unlock(&unix_gc_lock); } void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + spin_lock(&unix_gc_lock); + if (s) { struct unix_sock *u = unix_sk(s); - spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; - spin_unlock(&unix_gc_lock); } + fp->f_cred->user->unix_inflight--; + spin_unlock(&unix_gc_lock); } static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), -- 1.7.12.1 ^ permalink raw reply related [flat|nested] 23+ messages in thread
end of thread, other threads:[~2016-02-03 14:07 UTC | newest] Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2016-01-10 6:54 [PATCH v2] unix: properly account for FDs passed over unix sockets Willy Tarreau 2016-01-10 6:58 ` Willy Tarreau 2016-01-11 5:05 ` David Miller 2016-02-02 17:34 ` David Herrmann 2016-02-02 18:29 ` Hannes Frederic Sowa 2016-02-02 19:29 ` Linus Torvalds 2016-02-02 20:32 ` Hannes Frederic Sowa 2016-02-02 20:39 ` Willy Tarreau 2016-02-02 21:55 ` Hannes Frederic Sowa [not found] ` <CA+55aFzqdR80MKupCs+va8vtbTU67Jobax1QAbfWNktQCXFxpA@mail.gmail.com> 2016-02-03 0:57 ` Hannes Frederic Sowa 2016-02-03 1:12 ` Hannes Frederic Sowa 2016-02-02 20:44 ` Linus Torvalds 2016-02-02 20:49 ` Willy Tarreau 2016-02-02 20:53 ` Linus Torvalds 2016-02-02 20:58 ` Willy Tarreau 2016-02-02 20:56 ` Hannes Frederic Sowa 2016-02-03 12:19 ` David Laight 2016-02-03 11:36 ` Simon McVittie 2016-02-03 11:56 ` Hannes Frederic Sowa 2016-02-03 11:56 ` David Herrmann 2016-02-03 12:49 ` Simon McVittie 2016-02-03 14:07 ` Hannes Frederic Sowa 2016-01-10 6:58 Willy Tarreau
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.