All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] unix: properly account for FDs passed over unix sockets
@ 2016-01-10  6:58 ` Willy Tarreau
  0 siblings, 0 replies; 23+ messages in thread
From: Willy Tarreau @ 2016-01-10  6:54 UTC (permalink / raw)
  To: David S. Miller, netdev, linux-kernel
  Cc: Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair,
	Tetsuo Handa

It is possible for a process to allocate and accumulate far more FDs than
the process' limit by sending them over a unix socket then closing them
to keep the process' fd count low.

This change addresses this problem by keeping track of the number of FDs
in flight per user and preventing non-privileged processes from having
more FDs in flight than their configured FD limit.

Reported-by: socketpair@gmail.com
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Mitigates: CVE-2013-4312 (Linux 2.0+)
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
v2: add reported-by, mitigates and acked-by.

It would be nice if (if accepted) it would be backported to -stable as the
issue is currently exploitable.
---
 include/linux/sched.h |  1 +
 net/unix/af_unix.c    | 24 ++++++++++++++++++++----
 net/unix/garbage.c    | 13 ++++++++-----
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index edad7a4..fbf25f1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -830,6 +830,7 @@ struct user_struct {
 	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
 #endif
 	unsigned long locked_shm; /* How many pages of mlocked shm ? */
+	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
 
 #ifdef CONFIG_KEYS
 	struct key *uid_keyring;	/* UID specific keyring */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 45aebd9..d6d7b43 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+	struct user_struct *user = current_user();
+
+	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+	return false;
+}
+
 #define MAX_RECURSION_LEVEL 4
 
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	unsigned char max_level = 0;
 	int unix_sock_count = 0;
 
+	if (too_many_unix_fds(current))
+		return -ETOOMANYREFS;
+
 	for (i = scm->fp->count - 1; i >= 0; i--) {
 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
 
@@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	if (!UNIXCB(skb).fp)
 		return -ENOMEM;
 
-	if (unix_sock_count) {
-		for (i = scm->fp->count - 1; i >= 0; i--)
-			unix_inflight(scm->fp->fp[i]);
-	}
+	for (i = scm->fp->count - 1; i >= 0; i--)
+		unix_inflight(scm->fp->fp[i]);
 	return max_level;
 }
 
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index a73a226..8fcdc22 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -120,11 +120,11 @@ void unix_inflight(struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
 
+	spin_lock(&unix_gc_lock);
+
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
-		spin_lock(&unix_gc_lock);
-
 		if (atomic_long_inc_return(&u->inflight) == 1) {
 			BUG_ON(!list_empty(&u->link));
 			list_add_tail(&u->link, &gc_inflight_list);
@@ -132,25 +132,28 @@ void unix_inflight(struct file *fp)
 			BUG_ON(list_empty(&u->link));
 		}
 		unix_tot_inflight++;
-		spin_unlock(&unix_gc_lock);
 	}
+	fp->f_cred->user->unix_inflight++;
+	spin_unlock(&unix_gc_lock);
 }
 
 void unix_notinflight(struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
 
+	spin_lock(&unix_gc_lock);
+
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
-		spin_lock(&unix_gc_lock);
 		BUG_ON(list_empty(&u->link));
 
 		if (atomic_long_dec_and_test(&u->inflight))
 			list_del_init(&u->link);
 		unix_tot_inflight--;
-		spin_unlock(&unix_gc_lock);
 	}
+	fp->f_cred->user->unix_inflight--;
+	spin_unlock(&unix_gc_lock);
 }
 
 static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
-- 
1.7.12.1

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2] unix: properly account for FDs passed over unix sockets
@ 2016-01-10  6:58 ` Willy Tarreau
  0 siblings, 0 replies; 23+ messages in thread
From: Willy Tarreau @ 2016-01-10  6:58 UTC (permalink / raw)
  To: David S. Miller, netdev, linux-kernel
  Cc: Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair,
	Tetsuo Handa

It is possible for a process to allocate and accumulate far more FDs than
the process' limit by sending them over a unix socket then closing them
to keep the process' fd count low.

This change addresses this problem by keeping track of the number of FDs
in flight per user and preventing non-privileged processes from having
more FDs in flight than their configured FD limit.

Reported-by: socketpair@gmail.com
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Mitigates: CVE-2013-4312 (Linux 2.0+)
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
v2: add reported-by, mitigates and acked-by.

It would be nice if (if accepted) it would be backported to -stable as the
issue is currently exploitable.
---
 include/linux/sched.h |  1 +
 net/unix/af_unix.c    | 24 ++++++++++++++++++++----
 net/unix/garbage.c    | 13 ++++++++-----
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index edad7a4..fbf25f1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -830,6 +830,7 @@ struct user_struct {
 	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
 #endif
 	unsigned long locked_shm; /* How many pages of mlocked shm ? */
+	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
 
 #ifdef CONFIG_KEYS
 	struct key *uid_keyring;	/* UID specific keyring */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 45aebd9..d6d7b43 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+	struct user_struct *user = current_user();
+
+	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+	return false;
+}
+
 #define MAX_RECURSION_LEVEL 4
 
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	unsigned char max_level = 0;
 	int unix_sock_count = 0;
 
+	if (too_many_unix_fds(current))
+		return -ETOOMANYREFS;
+
 	for (i = scm->fp->count - 1; i >= 0; i--) {
 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
 
@@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	if (!UNIXCB(skb).fp)
 		return -ENOMEM;
 
-	if (unix_sock_count) {
-		for (i = scm->fp->count - 1; i >= 0; i--)
-			unix_inflight(scm->fp->fp[i]);
-	}
+	for (i = scm->fp->count - 1; i >= 0; i--)
+		unix_inflight(scm->fp->fp[i]);
 	return max_level;
 }
 
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index a73a226..8fcdc22 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -120,11 +120,11 @@ void unix_inflight(struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
 
+	spin_lock(&unix_gc_lock);
+
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
-		spin_lock(&unix_gc_lock);
-
 		if (atomic_long_inc_return(&u->inflight) == 1) {
 			BUG_ON(!list_empty(&u->link));
 			list_add_tail(&u->link, &gc_inflight_list);
@@ -132,25 +132,28 @@ void unix_inflight(struct file *fp)
 			BUG_ON(list_empty(&u->link));
 		}
 		unix_tot_inflight++;
-		spin_unlock(&unix_gc_lock);
 	}
+	fp->f_cred->user->unix_inflight++;
+	spin_unlock(&unix_gc_lock);
 }
 
 void unix_notinflight(struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
 
+	spin_lock(&unix_gc_lock);
+
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
-		spin_lock(&unix_gc_lock);
 		BUG_ON(list_empty(&u->link));
 
 		if (atomic_long_dec_and_test(&u->inflight))
 			list_del_init(&u->link);
 		unix_tot_inflight--;
-		spin_unlock(&unix_gc_lock);
 	}
+	fp->f_cred->user->unix_inflight--;
+	spin_unlock(&unix_gc_lock);
 }
 
 static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
-- 
1.7.12.1

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-01-10  6:58 ` Willy Tarreau
  (?)
@ 2016-01-11  5:05 ` David Miller
  -1 siblings, 0 replies; 23+ messages in thread
From: David Miller @ 2016-01-11  5:05 UTC (permalink / raw)
  To: w
  Cc: netdev, linux-kernel, torvalds, edumazet, hannes, socketpair,
	penguin-kernel

From: Willy Tarreau <w@1wt.eu>
Date: Sun, Jan 10 07:54:56 CET 2016

> It is possible for a process to allocate and accumulate far more FDs than
> the process' limit by sending them over a unix socket then closing them
> to keep the process' fd count low.
> 
> This change addresses this problem by keeping track of the number of FDs
> in flight per user and preventing non-privileged processes from having
> more FDs in flight than their configured FD limit.
> 
> Reported-by: socketpair@gmail.com
> Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Mitigates: CVE-2013-4312 (Linux 2.0+)
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Signed-off-by: Willy Tarreau <w@1wt.eu>

Applied and queued up for -stable, thanks!

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-01-10  6:58 ` Willy Tarreau
  (?)
  (?)
@ 2016-02-02 17:34 ` David Herrmann
  2016-02-02 18:29   ` Hannes Frederic Sowa
  2016-02-03 11:36   ` Simon McVittie
  -1 siblings, 2 replies; 23+ messages in thread
From: David Herrmann @ 2016-02-02 17:34 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: David S. Miller, netdev, linux-kernel, Linus Torvalds,
	Eric Dumazet, Hannes Frederic Sowa, socketpair, Tetsuo Handa,
	Simon McVittie

Hi

On Sun, Jan 10, 2016 at 7:54 AM, Willy Tarreau <w@1wt.eu> wrote:
> It is possible for a process to allocate and accumulate far more FDs than
> the process' limit by sending them over a unix socket then closing them
> to keep the process' fd count low.
>
> This change addresses this problem by keeping track of the number of FDs
> in flight per user and preventing non-privileged processes from having
> more FDs in flight than their configured FD limit.

This is not really what this patch does. This patch rather prevents
processes from having more of their owned *files* in flight than their
configured FD limit. See below for details..

> Reported-by: socketpair@gmail.com
> Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> Mitigates: CVE-2013-4312 (Linux 2.0+)
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Signed-off-by: Willy Tarreau <w@1wt.eu>
> ---
> v2: add reported-by, mitigates and acked-by.
>
> It would be nice if (if accepted) it would be backported to -stable as the
> issue is currently exploitable.
> ---
>  include/linux/sched.h |  1 +
>  net/unix/af_unix.c    | 24 ++++++++++++++++++++----
>  net/unix/garbage.c    | 13 ++++++++-----
>  3 files changed, 29 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index edad7a4..fbf25f1 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -830,6 +830,7 @@ struct user_struct {
>         unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
>  #endif
>         unsigned long locked_shm; /* How many pages of mlocked shm ? */
> +       unsigned long unix_inflight;    /* How many files in flight in unix sockets */
>
>  #ifdef CONFIG_KEYS
>         struct key *uid_keyring;        /* UID specific keyring */
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 45aebd9..d6d7b43 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
>         sock_wfree(skb);
>  }
>
> +/*
> + * The "user->unix_inflight" variable is protected by the garbage
> + * collection lock, and we just read it locklessly here. If you go
> + * over the limit, there might be a tiny race in actually noticing
> + * it across threads. Tough.

This limit is checked once per transaction. IIRC, a transaction can
carry 255 files. Thus, it is easy to exceed this limit without any
race involved.

> + */
> +static inline bool too_many_unix_fds(struct task_struct *p)
> +{
> +       struct user_struct *user = current_user();
> +
> +       if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
> +               return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
> +       return false;
> +}
> +
>  #define MAX_RECURSION_LEVEL 4
>
>  static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
> @@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
>         unsigned char max_level = 0;
>         int unix_sock_count = 0;
>
> +       if (too_many_unix_fds(current))
> +               return -ETOOMANYREFS;
> +

Ignoring the capabilities, this effectively resolves to:

    if (current_cred()->user->unix_inflight > rlimit(RLIMIT_NOFILE))
            return -ETOOMANYREFS;

It limits the number of inflight FDs of the *current* user to its *own* limit.

But..

>         for (i = scm->fp->count - 1; i >= 0; i--) {
>                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
>
> @@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
>         if (!UNIXCB(skb).fp)
>                 return -ENOMEM;
>
> -       if (unix_sock_count) {
> -               for (i = scm->fp->count - 1; i >= 0; i--)
> -                       unix_inflight(scm->fp->fp[i]);
> -       }
> +       for (i = scm->fp->count - 1; i >= 0; i--)
> +               unix_inflight(scm->fp->fp[i]);
>         return max_level;
>  }
>
> diff --git a/net/unix/garbage.c b/net/unix/garbage.c
> index a73a226..8fcdc22 100644
> --- a/net/unix/garbage.c
> +++ b/net/unix/garbage.c
> @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp)
>  {
>         struct sock *s = unix_get_socket(fp);
>
> +       spin_lock(&unix_gc_lock);
> +
>         if (s) {
>                 struct unix_sock *u = unix_sk(s);
>
> -               spin_lock(&unix_gc_lock);
> -
>                 if (atomic_long_inc_return(&u->inflight) == 1) {
>                         BUG_ON(!list_empty(&u->link));
>                         list_add_tail(&u->link, &gc_inflight_list);
> @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp)
>                         BUG_ON(list_empty(&u->link));
>                 }
>                 unix_tot_inflight++;
> -               spin_unlock(&unix_gc_lock);
>         }
> +       fp->f_cred->user->unix_inflight++;

..this modifies the limit of the owner of the file you send. As such,
if you only send files that you don't own, you will continuously bump
the limit of the file-owner, but never your own. As such, the
protection above will never fire.

Is this really what this patch is supposed to do? Shouldn't the loop
in unix_attach_fds() rather check for this:

    if (unlikely(fp->f_cred->user->unix_inflight > nofile &&
            !file_ns_capable(fp, &init_user_ns, CAP_SYS_RESOURCE) &&
            !file_ns_capable(fp, &init_user_ns, CAP_SYS_ADMIN)))
            return -ETOOMANYREFS;

(or keeping capable() rather than file_ns_capable(), depending on the
wanted semantics)

Furthermore, with this patch in place, a process better not pass any
file-descriptors to an untrusted process. This might have been a
stupid idea in the first place, but I would have expected the patch to
mention this. Because with this patch in place, a receiver of a
file-descriptor can bump the unix_inflight limit of the sender
arbitrarily. Did anyone notify the dbus maintainers of this? They
might wanna document this, if not already done (CC: smcv).

Why doesn't this patch modify "unix_inflight" of the sender rather
than the passed descriptors? It would require pinning the affected
user in 'scm' contexts, but that is probably already the case, anyway.
This way, the original ETOOMANYREFS check would be perfectly fine.

Anyway, can someone provide a high-level description of what exactly
this patch is supposed to do? Which operation should be limited, who
should inflight FDs be accounted on, and which rlimit should be used
on each operation? I'm having a hard time auditing existing
user-space, given just the scarce description of this commit.

Thanks
David

> +       spin_unlock(&unix_gc_lock);
>  }
>
>  void unix_notinflight(struct file *fp)
>  {
>         struct sock *s = unix_get_socket(fp);
>
> +       spin_lock(&unix_gc_lock);
> +
>         if (s) {
>                 struct unix_sock *u = unix_sk(s);
>
> -               spin_lock(&unix_gc_lock);
>                 BUG_ON(list_empty(&u->link));
>
>                 if (atomic_long_dec_and_test(&u->inflight))
>                         list_del_init(&u->link);
>                 unix_tot_inflight--;
> -               spin_unlock(&unix_gc_lock);
>         }
> +       fp->f_cred->user->unix_inflight--;
> +       spin_unlock(&unix_gc_lock);
>  }
>
>  static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
> --
> 1.7.12.1
>
>
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 17:34 ` David Herrmann
@ 2016-02-02 18:29   ` Hannes Frederic Sowa
  2016-02-02 19:29     ` Linus Torvalds
  2016-02-03 11:36   ` Simon McVittie
  1 sibling, 1 reply; 23+ messages in thread
From: Hannes Frederic Sowa @ 2016-02-02 18:29 UTC (permalink / raw)
  To: David Herrmann, Willy Tarreau
  Cc: David S. Miller, netdev, linux-kernel, Linus Torvalds,
	Eric Dumazet, socketpair, Tetsuo Handa, Simon McVittie

On 02.02.2016 18:34, David Herrmann wrote:
> Hi
>
> On Sun, Jan 10, 2016 at 7:54 AM, Willy Tarreau <w@1wt.eu> wrote:
>> It is possible for a process to allocate and accumulate far more FDs than
>> the process' limit by sending them over a unix socket then closing them
>> to keep the process' fd count low.
>>
>> This change addresses this problem by keeping track of the number of FDs
>> in flight per user and preventing non-privileged processes from having
>> more FDs in flight than their configured FD limit.
>
> This is not really what this patch does. This patch rather prevents
> processes from having more of their owned *files* in flight than their
> configured FD limit. See below for details..
>
>> Reported-by: socketpair@gmail.com
>> Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
>> Mitigates: CVE-2013-4312 (Linux 2.0+)
>> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
>> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
>> Signed-off-by: Willy Tarreau <w@1wt.eu>
>> ---
>> v2: add reported-by, mitigates and acked-by.
>>
>> It would be nice if (if accepted) it would be backported to -stable as the
>> issue is currently exploitable.
>> ---
>>   include/linux/sched.h |  1 +
>>   net/unix/af_unix.c    | 24 ++++++++++++++++++++----
>>   net/unix/garbage.c    | 13 ++++++++-----
>>   3 files changed, 29 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index edad7a4..fbf25f1 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -830,6 +830,7 @@ struct user_struct {
>>          unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
>>   #endif
>>          unsigned long locked_shm; /* How many pages of mlocked shm ? */
>> +       unsigned long unix_inflight;    /* How many files in flight in unix sockets */
>>
>>   #ifdef CONFIG_KEYS
>>          struct key *uid_keyring;        /* UID specific keyring */
>> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
>> index 45aebd9..d6d7b43 100644
>> --- a/net/unix/af_unix.c
>> +++ b/net/unix/af_unix.c
>> @@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
>>          sock_wfree(skb);
>>   }
>>
>> +/*
>> + * The "user->unix_inflight" variable is protected by the garbage
>> + * collection lock, and we just read it locklessly here. If you go
>> + * over the limit, there might be a tiny race in actually noticing
>> + * it across threads. Tough.
>
> This limit is checked once per transaction. IIRC, a transaction can
> carry 255 files. Thus, it is easy to exceed this limit without any
> race involved.
>
>> + */
>> +static inline bool too_many_unix_fds(struct task_struct *p)
>> +{
>> +       struct user_struct *user = current_user();
>> +
>> +       if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
>> +               return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
>> +       return false;
>> +}
>> +
>>   #define MAX_RECURSION_LEVEL 4
>>
>>   static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
>> @@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
>>          unsigned char max_level = 0;
>>          int unix_sock_count = 0;
>>
>> +       if (too_many_unix_fds(current))
>> +               return -ETOOMANYREFS;
>> +
>
> Ignoring the capabilities, this effectively resolves to:
>
>      if (current_cred()->user->unix_inflight > rlimit(RLIMIT_NOFILE))
>              return -ETOOMANYREFS;
>
> It limits the number of inflight FDs of the *current* user to its *own* limit.
>
> But..
>
>>          for (i = scm->fp->count - 1; i >= 0; i--) {
>>                  struct sock *sk = unix_get_socket(scm->fp->fp[i]);
>>
>> @@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
>>          if (!UNIXCB(skb).fp)
>>                  return -ENOMEM;
>>
>> -       if (unix_sock_count) {
>> -               for (i = scm->fp->count - 1; i >= 0; i--)
>> -                       unix_inflight(scm->fp->fp[i]);
>> -       }
>> +       for (i = scm->fp->count - 1; i >= 0; i--)
>> +               unix_inflight(scm->fp->fp[i]);
>>          return max_level;
>>   }
>>
>> diff --git a/net/unix/garbage.c b/net/unix/garbage.c
>> index a73a226..8fcdc22 100644
>> --- a/net/unix/garbage.c
>> +++ b/net/unix/garbage.c
>> @@ -120,11 +120,11 @@ void unix_inflight(struct file *fp)
>>   {
>>          struct sock *s = unix_get_socket(fp);
>>
>> +       spin_lock(&unix_gc_lock);
>> +
>>          if (s) {
>>                  struct unix_sock *u = unix_sk(s);
>>
>> -               spin_lock(&unix_gc_lock);
>> -
>>                  if (atomic_long_inc_return(&u->inflight) == 1) {
>>                          BUG_ON(!list_empty(&u->link));
>>                          list_add_tail(&u->link, &gc_inflight_list);
>> @@ -132,25 +132,28 @@ void unix_inflight(struct file *fp)
>>                          BUG_ON(list_empty(&u->link));
>>                  }
>>                  unix_tot_inflight++;
>> -               spin_unlock(&unix_gc_lock);
>>          }
>> +       fp->f_cred->user->unix_inflight++;
>
> ..this modifies the limit of the owner of the file you send. As such,
> if you only send files that you don't own, you will continuously bump
> the limit of the file-owner, but never your own. As such, the
> protection above will never fire.
>
> Is this really what this patch is supposed to do? Shouldn't the loop
> in unix_attach_fds() rather check for this:
>
>      if (unlikely(fp->f_cred->user->unix_inflight > nofile &&
>              !file_ns_capable(fp, &init_user_ns, CAP_SYS_RESOURCE) &&
>              !file_ns_capable(fp, &init_user_ns, CAP_SYS_ADMIN)))
>              return -ETOOMANYREFS;
>
> (or keeping capable() rather than file_ns_capable(), depending on the
> wanted semantics)
>
> Furthermore, with this patch in place, a process better not pass any
> file-descriptors to an untrusted process. This might have been a
> stupid idea in the first place, but I would have expected the patch to
> mention this. Because with this patch in place, a receiver of a
> file-descriptor can bump the unix_inflight limit of the sender
> arbitrarily. Did anyone notify the dbus maintainers of this? They
> might wanna document this, if not already done (CC: smcv).
>
> Why doesn't this patch modify "unix_inflight" of the sender rather
> than the passed descriptors? It would require pinning the affected
> user in 'scm' contexts, but that is probably already the case, anyway.
> This way, the original ETOOMANYREFS check would be perfectly fine.
>
> Anyway, can someone provide a high-level description of what exactly
> this patch is supposed to do? Which operation should be limited, who
> should inflight FDs be accounted on, and which rlimit should be used
> on each operation? I'm having a hard time auditing existing
> user-space, given just the scarce description of this commit.

Yes, all your observations are true. I think we need to explicitly
need to refer to the sending socket while attaching the fds.

Good thing is that we don't switch skb->sk while appending the skb to
the receive queue. First quick prototype which is completely untested
(only compile-tested):

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 2a91a0561a4783..d7148ae04b02dd 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -6,8 +6,8 @@
  #include <linux/mutex.h>
  #include <net/sock.h>

-void unix_inflight(struct file *fp);
-void unix_notinflight(struct file *fp);
+void unix_inflight(struct sock *sk, struct file *fp);
+void unix_notinflight(struct sock *sk, struct file *fp);
  void unix_gc(void);
  void wait_for_unix_gc(void);
  struct sock *unix_get_socket(struct file *filp);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 49d5093eb0553a..7bbb4762899924 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie 
*scm, struct sk_buff *skb)
  	UNIXCB(skb).fp = NULL;

  	for (i = scm->fp->count-1; i >= 0; i--)
-		unix_notinflight(scm->fp->fp[i]);
+		unix_notinflight(skb->sk, scm->fp->fp[i]);
  }

  static void unix_destruct_scm(struct sk_buff *skb)
@@ -1561,7 +1561,7 @@ static int unix_attach_fds(struct scm_cookie *scm, 
struct sk_buff *skb)
  		return -ENOMEM;

  	for (i = scm->fp->count - 1; i >= 0; i--)
-		unix_inflight(scm->fp->fp[i]);
+		unix_inflight(skb->sk, scm->fp->fp[i]);
  	return max_level;
  }

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 8fcdc2283af50c..874c42161717f0 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file *filp)
   * descriptor if it is for an AF_UNIX socket.
   */

-void unix_inflight(struct file *fp)
+void unix_inflight(struct sock *sk, struct file *fp)
  {
  	struct sock *s = unix_get_socket(fp);

@@ -133,11 +133,11 @@ void unix_inflight(struct file *fp)
  		}
  		unix_tot_inflight++;
  	}
-	fp->f_cred->user->unix_inflight++;
+	sk->sk_socket->file->f_cred->user->unix_inflight++;
  	spin_unlock(&unix_gc_lock);
  }

-void unix_notinflight(struct file *fp)
+void unix_notinflight(struct sock *sk, struct file *fp)
  {
  	struct sock *s = unix_get_socket(fp);

@@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp)
  			list_del_init(&u->link);
  		unix_tot_inflight--;
  	}
-	fp->f_cred->user->unix_inflight--;
+	sk->sk_socket->file->f_cred->user->unix_inflight++;
  	spin_unlock(&unix_gc_lock);
  }

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 18:29   ` Hannes Frederic Sowa
@ 2016-02-02 19:29     ` Linus Torvalds
  2016-02-02 20:32       ` Hannes Frederic Sowa
  0 siblings, 1 reply; 23+ messages in thread
From: Linus Torvalds @ 2016-02-02 19:29 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David Herrmann, Willy Tarreau, David S. Miller, netdev,
	linux-kernel, Eric Dumazet,
	Марк
	Коренберг,
	Tetsuo Handa, Simon McVittie

On Tue, Feb 2, 2016 at 10:29 AM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
>>
>> Anyway, can someone provide a high-level description of what exactly
>> this patch is supposed to do? Which operation should be limited, who
>> should inflight FDs be accounted on, and which rlimit should be used
>> on each operation? I'm having a hard time auditing existing
>> user-space, given just the scarce description of this commit.
>
> Yes, all your observations are true. I think we need to explicitly
> need to refer to the sending socket while attaching the fds.

I don't think that really helps. Maybe somebody passed a unix domain
socket around, and now we're crediting the wrong socket again.

So how about we actually add a "struct cred *" to the scm_cookie
itself, and we initialize it to "get_current_cred()". And then always
use that.

That way it's always the person who actually does the send (rather
than the opener of the socket _or_ the opener of the file that gets
passed around) that gets credited, and thanks to the cred pointer we
can then de-credit them properly.

Hmm?

                Linus

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 19:29     ` Linus Torvalds
@ 2016-02-02 20:32       ` Hannes Frederic Sowa
  2016-02-02 20:39         ` Willy Tarreau
  2016-02-02 20:44         ` Linus Torvalds
  0 siblings, 2 replies; 23+ messages in thread
From: Hannes Frederic Sowa @ 2016-02-02 20:32 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: David Herrmann, Willy Tarreau, David S. Miller, netdev,
	linux-kernel, Eric Dumazet,
	Марк
	Коренберг,
	Tetsuo Handa, Simon McVittie

On 02.02.2016 20:29, Linus Torvalds wrote:
> On Tue, Feb 2, 2016 at 10:29 AM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
>>>
>>> Anyway, can someone provide a high-level description of what exactly
>>> this patch is supposed to do? Which operation should be limited, who
>>> should inflight FDs be accounted on, and which rlimit should be used
>>> on each operation? I'm having a hard time auditing existing
>>> user-space, given just the scarce description of this commit.
>>
>> Yes, all your observations are true. I think we need to explicitly
>> need to refer to the sending socket while attaching the fds.
>
> I don't think that really helps. Maybe somebody passed a unix domain
> socket around, and now we're crediting the wrong socket again.

I was struggling a bit what you meant but I think you are referring to 
the following scenario:

process-1 opens up a unix socket and passes it to process-2 (this 
process has different credentials) via af-unix. Process-2 then sends 
multiple fds to another destination over this transferred unix-fd.

True, in this case we again account the fds to the wrong process, which 
is bad.

> So how about we actually add a "struct cred *" to the scm_cookie
> itself, and we initialize it to "get_current_cred()". And then always
> use that.

Unfortunately we never transfer a scm_cookie via the skbs but merely use 
it to initialize unix_skb_parms structure in skb->cb and destroy it 
afterwards.

But "struct pid *" in unix_skb_parms should be enough to get us to 
corresponding "struct cred *" so we can decrement the correct counter 
during skb destruction.

So:

We increment current task's unix_inflight and also check the current 
task's limit during attaching fds to skbs and decrement the inflight 
counter via "struct pid *". This looks like it should work.

> That way it's always the person who actually does the send (rather
> than the opener of the socket _or_ the opener of the file that gets
> passed around) that gets credited, and thanks to the cred pointer we
> can then de-credit them properly.

Exactly, I try to implement that. Thanks a lot!

Hannes

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 20:32       ` Hannes Frederic Sowa
@ 2016-02-02 20:39         ` Willy Tarreau
  2016-02-02 21:55           ` Hannes Frederic Sowa
  2016-02-02 20:44         ` Linus Torvalds
  1 sibling, 1 reply; 23+ messages in thread
From: Willy Tarreau @ 2016-02-02 20:39 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Linus Torvalds, David Herrmann, David S. Miller, netdev,
	linux-kernel, Eric Dumazet, ???????? ??????????????????,
	Tetsuo Handa, Simon McVittie

On Tue, Feb 02, 2016 at 09:32:56PM +0100, Hannes Frederic Sowa wrote:
> But "struct pid *" in unix_skb_parms should be enough to get us to 
> corresponding "struct cred *" so we can decrement the correct counter 
> during skb destruction.
> 
> So:
> 
> We increment current task's unix_inflight and also check the current 
> task's limit during attaching fds to skbs and decrement the inflight 
> counter via "struct pid *". This looks like it should work.

I like it as well, the principle sounds sane.

> >That way it's always the person who actually does the send (rather
> >than the opener of the socket _or_ the opener of the file that gets
> >passed around) that gets credited, and thanks to the cred pointer we
> >can then de-credit them properly.
> 
> Exactly, I try to implement that. Thanks a lot!

Thanks to you Hannes, I appreciate that you work on it, it would take
much more time to me to dig into this.

Willy

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 20:32       ` Hannes Frederic Sowa
  2016-02-02 20:39         ` Willy Tarreau
@ 2016-02-02 20:44         ` Linus Torvalds
  2016-02-02 20:49           ` Willy Tarreau
                             ` (2 more replies)
  1 sibling, 3 replies; 23+ messages in thread
From: Linus Torvalds @ 2016-02-02 20:44 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David Herrmann, Willy Tarreau, David S. Miller, netdev,
	linux-kernel, Eric Dumazet,
	Марк
	Коренберг,
	Tetsuo Handa, Simon McVittie

On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
>
> Unfortunately we never transfer a scm_cookie via the skbs but merely use it
> to initialize unix_skb_parms structure in skb->cb and destroy it afterwards.

Ok, I obviously didn't check very closely.

> But "struct pid *" in unix_skb_parms should be enough to get us to
> corresponding "struct cred *" so we can decrement the correct counter during
> skb destruction.

Umm. I think the "struct cred" may change in between, can't it?

So I don't think you can later look up the cred based on the pid.

Could we add the cred pointer (or just the user pointer) to the unix_skb_parms?

Or maybe just add it to the "struct scm_fp_list"?

               Linus

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 20:44         ` Linus Torvalds
@ 2016-02-02 20:49           ` Willy Tarreau
  2016-02-02 20:53             ` Linus Torvalds
  2016-02-02 20:56           ` Hannes Frederic Sowa
  2016-02-03 12:19           ` David Laight
  2 siblings, 1 reply; 23+ messages in thread
From: Willy Tarreau @ 2016-02-02 20:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Hannes Frederic Sowa, David Herrmann, David S. Miller, netdev,
	linux-kernel, Eric Dumazet, ???????? ??????????????????,
	Tetsuo Handa, Simon McVittie

On Tue, Feb 02, 2016 at 12:44:54PM -0800, Linus Torvalds wrote:
> On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> > But "struct pid *" in unix_skb_parms should be enough to get us to
> > corresponding "struct cred *" so we can decrement the correct counter during
> > skb destruction.
> 
> Umm. I think the "struct cred" may change in between, can't it?

You mean for example in case of setuid() or something like this ?

willy

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 20:49           ` Willy Tarreau
@ 2016-02-02 20:53             ` Linus Torvalds
  2016-02-02 20:58               ` Willy Tarreau
  0 siblings, 1 reply; 23+ messages in thread
From: Linus Torvalds @ 2016-02-02 20:53 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: Hannes Frederic Sowa, David Herrmann, David S. Miller, netdev,
	linux-kernel, Eric Dumazet, ???????? ??????????????????,
	Tetsuo Handa, Simon McVittie

On Tue, Feb 2, 2016 at 12:49 PM, Willy Tarreau <w@1wt.eu> wrote:
> On Tue, Feb 02, 2016 at 12:44:54PM -0800, Linus Torvalds wrote:
>>
>> Umm. I think the "struct cred" may change in between, can't it?
>
> You mean for example in case of setuid() or something like this ?

Yeah. I'd be worried about looking up the creds or user structure
later, and possibly getting a different one.

I'd much rather look it up at attach time, and just carry an extra
pointer around. That seems to be an inherently safer model where
there's no worry about "what happens if the user does X in the
meantime".

                  Linus

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 20:44         ` Linus Torvalds
  2016-02-02 20:49           ` Willy Tarreau
@ 2016-02-02 20:56           ` Hannes Frederic Sowa
  2016-02-03 12:19           ` David Laight
  2 siblings, 0 replies; 23+ messages in thread
From: Hannes Frederic Sowa @ 2016-02-02 20:56 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: David Herrmann, Willy Tarreau, David S. Miller, netdev,
	linux-kernel, Eric Dumazet,
	Марк
	Коренберг,
	Tetsuo Handa, Simon McVittie

On 02.02.2016 21:44, Linus Torvalds wrote:
> On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
>>
>> Unfortunately we never transfer a scm_cookie via the skbs but merely use it
>> to initialize unix_skb_parms structure in skb->cb and destroy it afterwards.
>
> Ok, I obviously didn't check very closely.
>
>> But "struct pid *" in unix_skb_parms should be enough to get us to
>> corresponding "struct cred *" so we can decrement the correct counter during
>> skb destruction.
>
> Umm. I think the "struct cred" may change in between, can't it?

While reviewing the task_struct->cred/real_cred assignments, I noticed 
that, too. I already went the same way and added a "struct cred *" to 
unix_skb_parms.

> So I don't think you can later look up the cred based on the pid.

Yep, it also looked to dangerous to me.

> Could we add the cred pointer (or just the user pointer) to the unix_skb_parms?
>
> Or maybe just add it to the "struct scm_fp_list"?

scm_fp_list seems to be an even better place. I have a look, thanks!

Hannes

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 20:53             ` Linus Torvalds
@ 2016-02-02 20:58               ` Willy Tarreau
  0 siblings, 0 replies; 23+ messages in thread
From: Willy Tarreau @ 2016-02-02 20:58 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Hannes Frederic Sowa, David Herrmann, David S. Miller, netdev,
	linux-kernel, Eric Dumazet, ???????? ??????????????????,
	Tetsuo Handa, Simon McVittie

On Tue, Feb 02, 2016 at 12:53:20PM -0800, Linus Torvalds wrote:
> On Tue, Feb 2, 2016 at 12:49 PM, Willy Tarreau <w@1wt.eu> wrote:
> > On Tue, Feb 02, 2016 at 12:44:54PM -0800, Linus Torvalds wrote:
> >>
> >> Umm. I think the "struct cred" may change in between, can't it?
> >
> > You mean for example in case of setuid() or something like this ?
> 
> Yeah. I'd be worried about looking up the creds or user structure
> later, and possibly getting a different one.
> 
> I'd much rather look it up at attach time, and just carry an extra
> pointer around. That seems to be an inherently safer model where
> there's no worry about "what happens if the user does X in the
> meantime".

Normally we can only change from root to non-root, and we don't apply
the limits to root, so if we have the ability to only store one bit
indicating "not tracked" or to simply nullify one pointer to avoid
counting in flight FDs for root, we don't take the risk to recredit
them to the target user after a change.

I just don't know if we can do that though :-/

Willy

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 20:39         ` Willy Tarreau
@ 2016-02-02 21:55           ` Hannes Frederic Sowa
       [not found]             ` <CA+55aFzqdR80MKupCs+va8vtbTU67Jobax1QAbfWNktQCXFxpA@mail.gmail.com>
  0 siblings, 1 reply; 23+ messages in thread
From: Hannes Frederic Sowa @ 2016-02-02 21:55 UTC (permalink / raw)
  To: Willy Tarreau
  Cc: Linus Torvalds, David Herrmann, David S. Miller, netdev,
	linux-kernel, Eric Dumazet, ???????? ??????????????????,
	Tetsuo Handa, Simon McVittie

Hi Willy,

On 02.02.2016 21:39, Willy Tarreau wrote:
> On Tue, Feb 02, 2016 at 09:32:56PM +0100, Hannes Frederic Sowa wrote:
>> But "struct pid *" in unix_skb_parms should be enough to get us to
>> corresponding "struct cred *" so we can decrement the correct counter
>> during skb destruction.
>>
>> So:
>>
>> We increment current task's unix_inflight and also check the current
>> task's limit during attaching fds to skbs and decrement the inflight
>> counter via "struct pid *". This looks like it should work.
>
> I like it as well, the principle sounds sane.
>
>>> That way it's always the person who actually does the send (rather
>>> than the opener of the socket _or_ the opener of the file that gets
>>> passed around) that gets credited, and thanks to the cred pointer we
>>> can then de-credit them properly.
>>
>> Exactly, I try to implement that. Thanks a lot!
>
> Thanks to you Hannes, I appreciate that you work on it, it would take
> much more time to me to dig into this.

I slightly tested the attached patch. If you have the original 
reproducer available could you also give it a try? Unfortunately I 
currently don't find it and am limited in time this evening.

Thanks a lot,
Hannes

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 2a91a0561a4783..4567dbe04f274d 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -6,8 +6,8 @@
  #include <linux/mutex.h>
  #include <net/sock.h>

-void unix_inflight(struct file *fp);
-void unix_notinflight(struct file *fp);
+void unix_inflight(const struct cred *cred, struct file *fp);
+void unix_notinflight(const struct cred *cred, struct file *fp);
  void unix_gc(void);
  void wait_for_unix_gc(void);
  struct sock *unix_get_socket(struct file *filp);
diff --git a/include/net/scm.h b/include/net/scm.h
index 262532d111f51e..8bf7d496545bf8 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -21,6 +21,7 @@ struct scm_creds {
  struct scm_fp_list {
  	short			count;
  	short			max;
+	const struct cred	*cred;
  	struct file		*fp[SCM_MAX_FD];
  };

diff --git a/net/core/scm.c b/net/core/scm.c
index 14596fb3717270..6b02b574e283f6 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct 
scm_fp_list **fplp)
  		*fplp = fpl;
  		fpl->count = 0;
  		fpl->max = SCM_MAX_FD;
+		fpl->cred = NULL;
  	}
  	fpp = &fpl->fp[fpl->count];

@@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct 
scm_fp_list **fplp)
  		*fpp++ = file;
  		fpl->count++;
  	}
+
+	if (fpl->cred)
+		put_cred(fpl->cred);
+	fpl->cred = get_current_cred();
  	return num;
  }

@@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm)
  		scm->fp = NULL;
  		for (i=fpl->count-1; i>=0; i--)
  			fput(fpl->fp[i]);
+		put_cred(fpl->cred);
  		kfree(fpl);
  	}
  }
@@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
  		for (i = 0; i < fpl->count; i++)
  			get_file(fpl->fp[i]);
  		new_fpl->max = new_fpl->count;
+		new_fpl->cred = get_cred(fpl->cred);
  	}
  	return new_fpl;
  }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 49d5093eb0553a..ba5058682419ba 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie 
*scm, struct sk_buff *skb)
  	UNIXCB(skb).fp = NULL;

  	for (i = scm->fp->count-1; i >= 0; i--)
-		unix_notinflight(scm->fp->fp[i]);
+		unix_notinflight(scm->fp->cred, scm->fp->fp[i]);
  }

  static void unix_destruct_scm(struct sk_buff *skb)
@@ -1561,7 +1561,7 @@ static int unix_attach_fds(struct scm_cookie *scm, 
struct sk_buff *skb)
  		return -ENOMEM;

  	for (i = scm->fp->count - 1; i >= 0; i--)
-		unix_inflight(scm->fp->fp[i]);
+		unix_inflight(scm->fp->cred, scm->fp->fp[i]);
  	return max_level;
  }

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 8fcdc2283af50c..30b03e7dddd547 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file *filp)
   * descriptor if it is for an AF_UNIX socket.
   */

-void unix_inflight(struct file *fp)
+void unix_inflight(const struct cred *cred, struct file *fp)
  {
  	struct sock *s = unix_get_socket(fp);

@@ -133,11 +133,11 @@ void unix_inflight(struct file *fp)
  		}
  		unix_tot_inflight++;
  	}
-	fp->f_cred->user->unix_inflight++;
+	cred->user->unix_inflight++;
  	spin_unlock(&unix_gc_lock);
  }

-void unix_notinflight(struct file *fp)
+void unix_notinflight(const struct cred *cred, struct file *fp)
  {
  	struct sock *s = unix_get_socket(fp);

@@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp)
  			list_del_init(&u->link);
  		unix_tot_inflight--;
  	}
-	fp->f_cred->user->unix_inflight--;
+	cred->user->unix_inflight--;
  	spin_unlock(&unix_gc_lock);
  }

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
       [not found]             ` <CA+55aFzqdR80MKupCs+va8vtbTU67Jobax1QAbfWNktQCXFxpA@mail.gmail.com>
@ 2016-02-03  0:57               ` Hannes Frederic Sowa
  2016-02-03  1:12                 ` Hannes Frederic Sowa
  0 siblings, 1 reply; 23+ messages in thread
From: Hannes Frederic Sowa @ 2016-02-03  0:57 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Network Development, Tetsuo Handa, Simon McVittie, Willy Tarreau,
	Eric Dumazet, linux-kernel, David S. Miller,
	???????? ??????????????????,
	David Herrmann

On 02.02.2016 23:11, Linus Torvalds wrote:
> [ sorry for the html mail, I'm out grocery shopping ]
>
> On Feb 2, 2016 13:55, "Hannes Frederic Sowa" <hannes@stressinduktion.org>
> wrote:
>>
>> I slightly tested the attached patch.
>
> Looks fine. I do wonder: if the only thing we use that "struct cred" for is
> to do that ->user lookup, maybe we should just use "struct user_struct"
> directly, and skip the cred entirely.
>
> Something like
>
>      fp->user = get_uid(current_user());
>
> and then
>
>      put_uid(fp->user);
>
> But I'm OK with that patch as is if you prefer it that way (maybe you want
> to use the cred to then test for root separately etc, out maybe there
> already was done use of cred as cred that I just missed when reading the
> patch on my phone..)

I don't see any reason to switch over to struct user_struct. I tested a 
patch and will send it out soon.

Bye,
Hannes

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-03  0:57               ` Hannes Frederic Sowa
@ 2016-02-03  1:12                 ` Hannes Frederic Sowa
  0 siblings, 0 replies; 23+ messages in thread
From: Hannes Frederic Sowa @ 2016-02-03  1:12 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Network Development, Tetsuo Handa, Simon McVittie, Willy Tarreau,
	Eric Dumazet, linux-kernel, David S. Miller,
	???????? ??????????????????,
	David Herrmann

On 03.02.2016 01:57, Hannes Frederic Sowa wrote:
> On 02.02.2016 23:11, Linus Torvalds wrote:
>> But I'm OK with that patch as is if you prefer it that way (maybe you
>> want
>> to use the cred to then test for root separately etc, out maybe there
>> already was done use of cred as cred that I just missed when reading the
>> patch on my phone..)
>
> I don't see any reason to switch over to struct user_struct. I tested a
> patch and will send it out soon.

I meant that there is no reason not to switch over to struct 
user_struct. Patch is send out.

Thanks for looking into this!

Hannes

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 17:34 ` David Herrmann
  2016-02-02 18:29   ` Hannes Frederic Sowa
@ 2016-02-03 11:36   ` Simon McVittie
  2016-02-03 11:56     ` Hannes Frederic Sowa
  2016-02-03 11:56     ` David Herrmann
  1 sibling, 2 replies; 23+ messages in thread
From: Simon McVittie @ 2016-02-03 11:36 UTC (permalink / raw)
  To: David Herrmann, Willy Tarreau
  Cc: David S. Miller, netdev, linux-kernel, Linus Torvalds,
	Eric Dumazet, Hannes Frederic Sowa, socketpair, Tetsuo Handa

On 02/02/16 17:34, David Herrmann wrote:
> Furthermore, with this patch in place, a process better not pass any
> file-descriptors to an untrusted process.
...
> Did anyone notify the dbus maintainers of this? They
> might wanna document this, if not already done (CC: smcv).

Sorry, I'm not clear from this message on what patterns I should be
documenting as bad, and what the effect of non-compliance would be.

dbus-daemon has a fd-passing feature, which uses AF_UNIX sockets'
existing ability to pass fds to let users of D-Bus attach fds to their
messages. The message is passed from the sending client to dbus-daemon,
then from dbus-daemon to the recipient:

             AF_UNIX             AF_UNIX
               |                    |
    sender -------> dbus-daemon -------> recipient
               |                    |

This has been API since before I was a D-Bus maintainer, so I have no
influence over its existence; just like the kernel doesn't want to break
user-space, dbus-daemon doesn't want to break its users.

The system dbus-daemon (dbus-daemon --system) is a privilege boundary,
and accepts senders and recipients with differing privileges. Without
configuration, messages are denied by default. Recipients can open this
up (by installing system-wide configuration) to allow arbitrary
processes to send messages to them, so that they can carry out their own
discretionary access control. Since 2014, the system dbus-daemon accepts
up to 16 file descriptors per message by default.

There is also a session or user dbus-daemon (dbus-daemon --session) per
uid, but that isn't normally a privilege boundary, so any user trying to
carry out a denial of service there is only hurting themselves.

Am I right in saying that the advice I give to D-Bus users should be
something like this?

* system services should not send fds at all, unless they trust the
  dbus-daemon
* system services should not send fds via D-Bus that will be delivered
  to recipients that they do not trust
* sending fds to an untrusted recipient would enable that recipient to
  carry out a denial-of-service attack (on what? the sender? the
  dbus-daemon?)

-- 
Simon McVittie
Collabora Ltd. <http://www.collabora.com/>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-03 11:36   ` Simon McVittie
@ 2016-02-03 11:56     ` Hannes Frederic Sowa
  2016-02-03 11:56     ` David Herrmann
  1 sibling, 0 replies; 23+ messages in thread
From: Hannes Frederic Sowa @ 2016-02-03 11:56 UTC (permalink / raw)
  To: Simon McVittie, David Herrmann, Willy Tarreau
  Cc: David S. Miller, netdev, linux-kernel, Linus Torvalds,
	Eric Dumazet, socketpair, Tetsuo Handa

On 03.02.2016 12:36, Simon McVittie wrote:
> On 02/02/16 17:34, David Herrmann wrote:
>> Furthermore, with this patch in place, a process better not pass any
>> file-descriptors to an untrusted process.
> ...
>> Did anyone notify the dbus maintainers of this? They
>> might wanna document this, if not already done (CC: smcv).
>
> Sorry, I'm not clear from this message on what patterns I should be
> documenting as bad, and what the effect of non-compliance would be.
>
> dbus-daemon has a fd-passing feature, which uses AF_UNIX sockets'
> existing ability to pass fds to let users of D-Bus attach fds to their
> messages. The message is passed from the sending client to dbus-daemon,
> then from dbus-daemon to the recipient:
>
>               AF_UNIX             AF_UNIX
>                 |                    |
>      sender -------> dbus-daemon -------> recipient
>                 |                    |
>
> This has been API since before I was a D-Bus maintainer, so I have no
> influence over its existence; just like the kernel doesn't want to break
> user-space, dbus-daemon doesn't want to break its users.
>
> The system dbus-daemon (dbus-daemon --system) is a privilege boundary,
> and accepts senders and recipients with differing privileges. Without
> configuration, messages are denied by default. Recipients can open this
> up (by installing system-wide configuration) to allow arbitrary
> processes to send messages to them, so that they can carry out their own
> discretionary access control. Since 2014, the system dbus-daemon accepts
> up to 16 file descriptors per message by default.
>
> There is also a session or user dbus-daemon (dbus-daemon --session) per
> uid, but that isn't normally a privilege boundary, so any user trying to
> carry out a denial of service there is only hurting themselves.
>
> Am I right in saying that the advice I give to D-Bus users should be
> something like this?
>
> * system services should not send fds at all, unless they trust the
>    dbus-daemon
> * system services should not send fds via D-Bus that will be delivered
>    to recipients that they do not trust
> * sending fds to an untrusted recipient would enable that recipient to
>    carry out a denial-of-service attack (on what? the sender? the
>    dbus-daemon?)
>

The described behavior was simply a bug in the referenced patch. I 
already posted a follow-up to change this behavior so that only the 
current sending process is credited with the number of fds in flight:

<https://patchwork.ozlabs.org/patch/577653/>

Other processes (in this case the original opener of the file) isn't 
credited anymore if it does not send the fd itself.

That said, I don't think you need to change anything or give different 
advice because of this thread.

Thanks,
Hannes

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-03 11:36   ` Simon McVittie
  2016-02-03 11:56     ` Hannes Frederic Sowa
@ 2016-02-03 11:56     ` David Herrmann
  2016-02-03 12:49       ` Simon McVittie
  2016-02-03 14:07       ` Hannes Frederic Sowa
  1 sibling, 2 replies; 23+ messages in thread
From: David Herrmann @ 2016-02-03 11:56 UTC (permalink / raw)
  To: Simon McVittie
  Cc: Willy Tarreau, David S. Miller, netdev, linux-kernel,
	Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair,
	Tetsuo Handa

Hi

On Wed, Feb 3, 2016 at 12:36 PM, Simon McVittie
<simon.mcvittie@collabora.co.uk> wrote:
> Am I right in saying that the advice I give to D-Bus users should be
> something like this?
>
> * system services should not send fds at all, unless they trust the
>   dbus-daemon
> * system services should not send fds via D-Bus that will be delivered
>   to recipients that they do not trust
> * sending fds to an untrusted recipient would enable that recipient to
>   carry out a denial-of-service attack (on what? the sender? the
>   dbus-daemon?)

With the revised patch from Hannes, this should no longer be needed.
My original concern was only about accounting inflight-fds on the
file-owner, rather than the sender.

However, with Hannes' revised patch, a different DoS attack against
dbus-daemon is possible. Imagine a peer that receives batches of FDs,
but never dequeues them. They will be accounted on the inflight-limit
of dbus-daemon, as such causing messages of independent peers to be
rejected in case they carry FDs.
Preferably, dbus-daemon would avoid queuing more than 16 FDs on a
single destination (total). But that would require POLLOUT to be
capped by the number of queued fds. A possible workaround is to add
CAP_SYS_RESOURCE to dbus-daemon.

Thanks
David

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-02 20:44         ` Linus Torvalds
  2016-02-02 20:49           ` Willy Tarreau
  2016-02-02 20:56           ` Hannes Frederic Sowa
@ 2016-02-03 12:19           ` David Laight
  2 siblings, 0 replies; 23+ messages in thread
From: David Laight @ 2016-02-03 12:19 UTC (permalink / raw)
  To: 'Linus Torvalds', Hannes Frederic Sowa
  Cc: David Herrmann, Willy Tarreau, David S. Miller, netdev,
	linux-kernel, Eric Dumazet, ???? ?????????,
	Tetsuo Handa, Simon McVittie

From: Linus Torvalds
> Sent: 02 February 2016 20:45
> On Tue, Feb 2, 2016 at 12:32 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> >
> > Unfortunately we never transfer a scm_cookie via the skbs but merely use it
> > to initialize unix_skb_parms structure in skb->cb and destroy it afterwards.
> 
> Ok, I obviously didn't check very closely.
> 
> > But "struct pid *" in unix_skb_parms should be enough to get us to
> > corresponding "struct cred *" so we can decrement the correct counter during
> > skb destruction.
> 
> Umm. I think the "struct cred" may change in between, can't it?
> 
> So I don't think you can later look up the cred based on the pid.
> 
> Could we add the cred pointer (or just the user pointer) to the unix_skb_parms?
> 
> Or maybe just add it to the "struct scm_fp_list"?

Is that going to work if the sending process exits before the message is read?
You need a reference count against the structure than contains the count.
I think this is 'struct user' not 'struct cred'

	David

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-03 11:56     ` David Herrmann
@ 2016-02-03 12:49       ` Simon McVittie
  2016-02-03 14:07       ` Hannes Frederic Sowa
  1 sibling, 0 replies; 23+ messages in thread
From: Simon McVittie @ 2016-02-03 12:49 UTC (permalink / raw)
  To: David Herrmann
  Cc: Willy Tarreau, David S. Miller, netdev, linux-kernel,
	Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair,
	Tetsuo Handa

On 03/02/16 11:56, David Herrmann wrote:
> However, with Hannes' revised patch, a different DoS attack against
> dbus-daemon is possible. Imagine a peer that receives batches of FDs,
> but never dequeues them. They will be accounted on the inflight-limit
> of dbus-daemon, as such causing messages of independent peers to be
> rejected in case they carry FDs.
> Preferably, dbus-daemon would avoid queuing more than 16 FDs on a
> single destination (total). But that would require POLLOUT to be
> capped by the number of queued fds. A possible workaround is to add
> CAP_SYS_RESOURCE to dbus-daemon.

We have several mitigations for this:

* there's a limit on outgoing fds queued inside dbus-daemon for a
  particular recipient connection, currently 64, and if that's
  exceeded, we stop accepting messages for that recipient, feeding back
  a send failure to the sender for messages to that recipient;
* there's a time limit for how long any given fd can stay queued up
  inside dbus-daemon, currently 2.5 minutes, and if that's exceeded, we
  drop the message;
* if started as root[1], we increase our fd limit to 64k before
  dropping privileges to the intended uid, which in combination with
  the maximum of 256 connections per uid and 64 fds per connection means
  it takes multiple uids working together to carry out a DoS;
* all of those limits can be adjusted by a local sysadmin if necessary,
  if their users are particularly hostile (for instance 16 fds per
  message is a generous limit intended to avoid unforeseen breakage,
  and 4 would probably be enough in practice)

The queueing logic is fairly involved, so I'd appreciate suggested
patches on freedesktop.org Bugzilla or to
dbus-security@lists.freedesktop.org if you have an idea for better
limits. If you believe you have found a non-public vulnerability, please
mark any Bugzilla bugs as restricted to the D-Bus security group.

Thanks,
    S

[1] The system dbus-daemon is started as root (and hence able to
increase its own fd limit) on all systemd systems, anything that uses
the Red Hat or Slackware sysvinit scripts bundled with dbus, and any
Debian derivatives that use sysvinit and have taken security updates
from at least Debian 7. Other distro-specific init system glue is up to
the relevant distro.

-- 
Simon McVittie
Collabora Ltd. <http://www.collabora.com/>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v2] unix: properly account for FDs passed over unix sockets
  2016-02-03 11:56     ` David Herrmann
  2016-02-03 12:49       ` Simon McVittie
@ 2016-02-03 14:07       ` Hannes Frederic Sowa
  1 sibling, 0 replies; 23+ messages in thread
From: Hannes Frederic Sowa @ 2016-02-03 14:07 UTC (permalink / raw)
  To: David Herrmann, Simon McVittie
  Cc: Willy Tarreau, David S. Miller, netdev, linux-kernel,
	Linus Torvalds, Eric Dumazet, socketpair, Tetsuo Handa

On 03.02.2016 12:56, David Herrmann wrote:
> However, with Hannes' revised patch, a different DoS attack against
> dbus-daemon is possible. Imagine a peer that receives batches of FDs,
> but never dequeues them. They will be accounted on the inflight-limit
> of dbus-daemon, as such causing messages of independent peers to be
> rejected in case they carry FDs.

Yes, that is true.

We also kind of have the problem with unconnected af-unix dgram sockets: 
if the receiver does not read the skbs on the receive queue we don't 
free up the sending socket's wmem, thus stop the socket from being 
destructed and can block the process during sendmsg on this socket. This 
is harder to DoS but pretty much the same schema.

Bye,
Hannes

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v2] unix: properly account for FDs passed over unix sockets
@ 2016-01-10  6:58 Willy Tarreau
  0 siblings, 0 replies; 23+ messages in thread
From: Willy Tarreau @ 2016-01-10  6:58 UTC (permalink / raw)
  To: David S. Miller, netdev, linux-kernel
  Cc: Linus Torvalds, Eric Dumazet, Hannes Frederic Sowa, socketpair,
	Tetsuo Handa

It is possible for a process to allocate and accumulate far more FDs than
the process' limit by sending them over a unix socket then closing them
to keep the process' fd count low.

This change addresses this problem by keeping track of the number of FDs
in flight per user and preventing non-privileged processes from having
more FDs in flight than their configured FD limit.

Reported-by: socketpair@gmail.com
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Mitigates: CVE-2013-4312 (Linux 2.0+)
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
v2: add reported-by, mitigates and acked-by.

It would be nice if (if accepted) it would be backported to -stable as the
issue is currently exploitable.
---
 include/linux/sched.h |  1 +
 net/unix/af_unix.c    | 24 ++++++++++++++++++++----
 net/unix/garbage.c    | 13 ++++++++-----
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index edad7a4..fbf25f1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -830,6 +830,7 @@ struct user_struct {
 	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
 #endif
 	unsigned long locked_shm; /* How many pages of mlocked shm ? */
+	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
 
 #ifdef CONFIG_KEYS
 	struct key *uid_keyring;	/* UID specific keyring */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 45aebd9..d6d7b43 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1499,6 +1499,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+	struct user_struct *user = current_user();
+
+	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+	return false;
+}
+
 #define MAX_RECURSION_LEVEL 4
 
 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1507,6 +1522,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	unsigned char max_level = 0;
 	int unix_sock_count = 0;
 
+	if (too_many_unix_fds(current))
+		return -ETOOMANYREFS;
+
 	for (i = scm->fp->count - 1; i >= 0; i--) {
 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
 
@@ -1528,10 +1546,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	if (!UNIXCB(skb).fp)
 		return -ENOMEM;
 
-	if (unix_sock_count) {
-		for (i = scm->fp->count - 1; i >= 0; i--)
-			unix_inflight(scm->fp->fp[i]);
-	}
+	for (i = scm->fp->count - 1; i >= 0; i--)
+		unix_inflight(scm->fp->fp[i]);
 	return max_level;
 }
 
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index a73a226..8fcdc22 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -120,11 +120,11 @@ void unix_inflight(struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
 
+	spin_lock(&unix_gc_lock);
+
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
-		spin_lock(&unix_gc_lock);
-
 		if (atomic_long_inc_return(&u->inflight) == 1) {
 			BUG_ON(!list_empty(&u->link));
 			list_add_tail(&u->link, &gc_inflight_list);
@@ -132,25 +132,28 @@ void unix_inflight(struct file *fp)
 			BUG_ON(list_empty(&u->link));
 		}
 		unix_tot_inflight++;
-		spin_unlock(&unix_gc_lock);
 	}
+	fp->f_cred->user->unix_inflight++;
+	spin_unlock(&unix_gc_lock);
 }
 
 void unix_notinflight(struct file *fp)
 {
 	struct sock *s = unix_get_socket(fp);
 
+	spin_lock(&unix_gc_lock);
+
 	if (s) {
 		struct unix_sock *u = unix_sk(s);
 
-		spin_lock(&unix_gc_lock);
 		BUG_ON(list_empty(&u->link));
 
 		if (atomic_long_dec_and_test(&u->inflight))
 			list_del_init(&u->link);
 		unix_tot_inflight--;
-		spin_unlock(&unix_gc_lock);
 	}
+	fp->f_cred->user->unix_inflight--;
+	spin_unlock(&unix_gc_lock);
 }
 
 static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
-- 
1.7.12.1

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2016-02-03 14:07 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-10  6:54 [PATCH v2] unix: properly account for FDs passed over unix sockets Willy Tarreau
2016-01-10  6:58 ` Willy Tarreau
2016-01-11  5:05 ` David Miller
2016-02-02 17:34 ` David Herrmann
2016-02-02 18:29   ` Hannes Frederic Sowa
2016-02-02 19:29     ` Linus Torvalds
2016-02-02 20:32       ` Hannes Frederic Sowa
2016-02-02 20:39         ` Willy Tarreau
2016-02-02 21:55           ` Hannes Frederic Sowa
     [not found]             ` <CA+55aFzqdR80MKupCs+va8vtbTU67Jobax1QAbfWNktQCXFxpA@mail.gmail.com>
2016-02-03  0:57               ` Hannes Frederic Sowa
2016-02-03  1:12                 ` Hannes Frederic Sowa
2016-02-02 20:44         ` Linus Torvalds
2016-02-02 20:49           ` Willy Tarreau
2016-02-02 20:53             ` Linus Torvalds
2016-02-02 20:58               ` Willy Tarreau
2016-02-02 20:56           ` Hannes Frederic Sowa
2016-02-03 12:19           ` David Laight
2016-02-03 11:36   ` Simon McVittie
2016-02-03 11:56     ` Hannes Frederic Sowa
2016-02-03 11:56     ` David Herrmann
2016-02-03 12:49       ` Simon McVittie
2016-02-03 14:07       ` Hannes Frederic Sowa
2016-01-10  6:58 Willy Tarreau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.