All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] af_unix: fix races in sk_peer_pid and sk_peer_cred accesses
@ 2021-09-29 22:57 Eric Dumazet
  2021-09-30 13:30 ` patchwork-bot+netdevbpf
  2022-03-24  8:03 ` wanghai (M)
  0 siblings, 2 replies; 5+ messages in thread
From: Eric Dumazet @ 2021-09-29 22:57 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski
  Cc: netdev, Eric Dumazet, Eric Dumazet, Jann Horn,
	Eric W . Biederman, Luiz Augusto von Dentz, Marcel Holtmann

From: Eric Dumazet <edumazet@google.com>

Jann Horn reported that SO_PEERCRED and SO_PEERGROUPS implementations
are racy, as af_unix can concurrently change sk_peer_pid and sk_peer_cred.

In order to fix this issue, this patch adds a new spinlock that needs
to be used whenever these fields are read or written.

Jann also pointed out that l2cap_sock_get_peer_pid_cb() is currently
reading sk->sk_peer_pid which makes no sense, as this field
is only possibly set by AF_UNIX sockets.
We will have to clean this in a separate patch.
This could be done by reverting b48596d1dc25 "Bluetooth: L2CAP: Add get_peer_pid callback"
or implementing what was truly expected.

Fixes: 109f6e39fa07 ("af_unix: Allow SO_PEERCRED to work across namespaces.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jann Horn <jannh@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/sock.h |  2 ++
 net/core/sock.c    | 32 ++++++++++++++++++++++++++------
 net/unix/af_unix.c | 34 ++++++++++++++++++++++++++++------
 3 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index c005c3c750e89b6d4d36d36ccfad392a7e733603..f471cd0a6f98cb5b2b961f01c2de62870d61521e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -488,8 +488,10 @@ struct sock {
 	u8			sk_prefer_busy_poll;
 	u16			sk_busy_poll_budget;
 #endif
+	spinlock_t		sk_peer_lock;
 	struct pid		*sk_peer_pid;
 	const struct cred	*sk_peer_cred;
+
 	long			sk_rcvtimeo;
 	ktime_t			sk_stamp;
 #if BITS_PER_LONG==32
diff --git a/net/core/sock.c b/net/core/sock.c
index 512e629f97803f3216db8f054e97fd1b7a6e6c63..c70cbce69a66181c3688862ea51aa781b3ce4f97 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1376,6 +1376,16 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 }
 EXPORT_SYMBOL(sock_setsockopt);
 
+static const struct cred *sk_get_peer_cred(struct sock *sk)
+{
+	const struct cred *cred;
+
+	spin_lock(&sk->sk_peer_lock);
+	cred = get_cred(sk->sk_peer_cred);
+	spin_unlock(&sk->sk_peer_lock);
+
+	return cred;
+}
 
 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
 			  struct ucred *ucred)
@@ -1552,7 +1562,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		struct ucred peercred;
 		if (len > sizeof(peercred))
 			len = sizeof(peercred);
+
+		spin_lock(&sk->sk_peer_lock);
 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+		spin_unlock(&sk->sk_peer_lock);
+
 		if (copy_to_user(optval, &peercred, len))
 			return -EFAULT;
 		goto lenout;
@@ -1560,20 +1574,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 
 	case SO_PEERGROUPS:
 	{
+		const struct cred *cred;
 		int ret, n;
 
-		if (!sk->sk_peer_cred)
+		cred = sk_get_peer_cred(sk);
+		if (!cred)
 			return -ENODATA;
 
-		n = sk->sk_peer_cred->group_info->ngroups;
+		n = cred->group_info->ngroups;
 		if (len < n * sizeof(gid_t)) {
 			len = n * sizeof(gid_t);
+			put_cred(cred);
 			return put_user(len, optlen) ? -EFAULT : -ERANGE;
 		}
 		len = n * sizeof(gid_t);
 
-		ret = groups_to_user((gid_t __user *)optval,
-				     sk->sk_peer_cred->group_info);
+		ret = groups_to_user((gid_t __user *)optval, cred->group_info);
+		put_cred(cred);
 		if (ret)
 			return ret;
 		goto lenout;
@@ -1935,9 +1952,10 @@ static void __sk_destruct(struct rcu_head *head)
 		sk->sk_frag.page = NULL;
 	}
 
-	if (sk->sk_peer_cred)
-		put_cred(sk->sk_peer_cred);
+	/* We do not need to acquire sk->sk_peer_lock, we are the last user. */
+	put_cred(sk->sk_peer_cred);
 	put_pid(sk->sk_peer_pid);
+
 	if (likely(sk->sk_net_refcnt))
 		put_net(sock_net(sk));
 	sk_prot_free(sk->sk_prot_creator, sk);
@@ -3145,6 +3163,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	sk->sk_peer_pid 	=	NULL;
 	sk->sk_peer_cred	=	NULL;
+	spin_lock_init(&sk->sk_peer_lock);
+
 	sk->sk_write_pending	=	0;
 	sk->sk_rcvlowat		=	1;
 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f505b89bda6adefe973806f842001d428fc6c5ca..efac5989edb5d37881139bb1ad2fb9cf63bd7342 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -608,20 +608,42 @@ static void unix_release_sock(struct sock *sk, int embrion)
 
 static void init_peercred(struct sock *sk)
 {
-	put_pid(sk->sk_peer_pid);
-	if (sk->sk_peer_cred)
-		put_cred(sk->sk_peer_cred);
+	const struct cred *old_cred;
+	struct pid *old_pid;
+
+	spin_lock(&sk->sk_peer_lock);
+	old_pid = sk->sk_peer_pid;
+	old_cred = sk->sk_peer_cred;
 	sk->sk_peer_pid  = get_pid(task_tgid(current));
 	sk->sk_peer_cred = get_current_cred();
+	spin_unlock(&sk->sk_peer_lock);
+
+	put_pid(old_pid);
+	put_cred(old_cred);
 }
 
 static void copy_peercred(struct sock *sk, struct sock *peersk)
 {
-	put_pid(sk->sk_peer_pid);
-	if (sk->sk_peer_cred)
-		put_cred(sk->sk_peer_cred);
+	const struct cred *old_cred;
+	struct pid *old_pid;
+
+	if (sk < peersk) {
+		spin_lock(&sk->sk_peer_lock);
+		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+	} else {
+		spin_lock(&peersk->sk_peer_lock);
+		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+	}
+	old_pid = sk->sk_peer_pid;
+	old_cred = sk->sk_peer_cred;
 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+
+	spin_unlock(&sk->sk_peer_lock);
+	spin_unlock(&peersk->sk_peer_lock);
+
+	put_pid(old_pid);
+	put_cred(old_cred);
 }
 
 static int unix_listen(struct socket *sock, int backlog)
-- 
2.33.0.800.g4c38ced690-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net] af_unix: fix races in sk_peer_pid and sk_peer_cred accesses
  2021-09-29 22:57 [PATCH net] af_unix: fix races in sk_peer_pid and sk_peer_cred accesses Eric Dumazet
@ 2021-09-30 13:30 ` patchwork-bot+netdevbpf
  2022-03-24  8:03 ` wanghai (M)
  1 sibling, 0 replies; 5+ messages in thread
From: patchwork-bot+netdevbpf @ 2021-09-30 13:30 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: davem, kuba, netdev, edumazet, jannh, ebiederm, luiz.von.dentz, marcel

Hello:

This patch was applied to netdev/net.git (refs/heads/master):

On Wed, 29 Sep 2021 15:57:50 -0700 you wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> Jann Horn reported that SO_PEERCRED and SO_PEERGROUPS implementations
> are racy, as af_unix can concurrently change sk_peer_pid and sk_peer_cred.
> 
> In order to fix this issue, this patch adds a new spinlock that needs
> to be used whenever these fields are read or written.
> 
> [...]

Here is the summary with links:
  - [net] af_unix: fix races in sk_peer_pid and sk_peer_cred accesses
    https://git.kernel.org/netdev/net/c/35306eb23814

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] af_unix: fix races in sk_peer_pid and sk_peer_cred accesses
  2021-09-29 22:57 [PATCH net] af_unix: fix races in sk_peer_pid and sk_peer_cred accesses Eric Dumazet
  2021-09-30 13:30 ` patchwork-bot+netdevbpf
@ 2022-03-24  8:03 ` wanghai (M)
  2022-03-24  9:04   ` Kuniyuki Iwashima
  1 sibling, 1 reply; 5+ messages in thread
From: wanghai (M) @ 2022-03-24  8:03 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller, Jakub Kicinski
  Cc: netdev, Eric Dumazet, Jann Horn, Eric W . Biederman,
	Luiz Augusto von Dentz, Marcel Holtmann


在 2021/9/30 6:57, Eric Dumazet 写道:
> From: Eric Dumazet <edumazet@google.com>
>
> Jann Horn reported that SO_PEERCRED and SO_PEERGROUPS implementations
> are racy, as af_unix can concurrently change sk_peer_pid and sk_peer_cred.
>
> In order to fix this issue, this patch adds a new spinlock that needs
> to be used whenever these fields are read or written.
>
> Jann also pointed out that l2cap_sock_get_peer_pid_cb() is currently
> reading sk->sk_peer_pid which makes no sense, as this field
> is only possibly set by AF_UNIX sockets.
> We will have to clean this in a separate patch.
> This could be done by reverting b48596d1dc25 "Bluetooth: L2CAP: Add get_peer_pid callback"
> or implementing what was truly expected.
>
> Fixes: 109f6e39fa07 ("af_unix: Allow SO_PEERCRED to work across namespaces.")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Jann Horn <jannh@google.com>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Cc: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
> Cc: Marcel Holtmann <marcel@holtmann.org>
> ---
...
>   static void copy_peercred(struct sock *sk, struct sock *peersk)
>   {
> -	put_pid(sk->sk_peer_pid);
> -	if (sk->sk_peer_cred)
> -		put_cred(sk->sk_peer_cred);
> +	const struct cred *old_cred;
> +	struct pid *old_pid;
> +
> +	if (sk < peersk) {
> +		spin_lock(&sk->sk_peer_lock);
> +		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
> +	} else {
> +		spin_lock(&peersk->sk_peer_lock);
> +		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
> +	}
Hi, ALL.
I'm sorry to bother you.

This patch adds sk_peer_lock to solve the problem that af_unix may
concurrently change sk_peer_pid and sk_peer_cred.

I am confused as to why the order of locks is needed here based on
the address size of sk and peersk.

Any feedback would be appreciated, thanks.
> +	old_pid = sk->sk_peer_pid;
> +	old_cred = sk->sk_peer_cred;
>   	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
>   	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
> +
> +	spin_unlock(&sk->sk_peer_lock);
> +	spin_unlock(&peersk->sk_peer_lock);
> +
> +	put_pid(old_pid);
> +	put_cred(old_cred);
>   }
>   
>   static int unix_listen(struct socket *sock, int backlog)

-- 
Wang Hai


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] af_unix: fix races in sk_peer_pid and sk_peer_cred accesses
  2022-03-24  8:03 ` wanghai (M)
@ 2022-03-24  9:04   ` Kuniyuki Iwashima
  2022-03-24 11:15     ` wanghai (M)
  0 siblings, 1 reply; 5+ messages in thread
From: Kuniyuki Iwashima @ 2022-03-24  9:04 UTC (permalink / raw)
  To: wanghai38
  Cc: davem, ebiederm, edumazet, eric.dumazet, jannh, kuba,
	luiz.von.dentz, marcel, netdev

From:   "wanghai (M)" <wanghai38@huawei.com>
Date:   Thu, 24 Mar 2022 16:03:31 +0800
> 在 2021/9/30 6:57, Eric Dumazet 写道:
>> From: Eric Dumazet <edumazet@google.com>
>>
>> Jann Horn reported that SO_PEERCRED and SO_PEERGROUPS implementations
>> are racy, as af_unix can concurrently change sk_peer_pid and sk_peer_cred.
>>
>> In order to fix this issue, this patch adds a new spinlock that needs
>> to be used whenever these fields are read or written.
>>
>> Jann also pointed out that l2cap_sock_get_peer_pid_cb() is currently
>> reading sk->sk_peer_pid which makes no sense, as this field
>> is only possibly set by AF_UNIX sockets.
>> We will have to clean this in a separate patch.
>> This could be done by reverting b48596d1dc25 "Bluetooth: L2CAP: Add get_peer_pid callback"
>> or implementing what was truly expected.
>>
>> Fixes: 109f6e39fa07 ("af_unix: Allow SO_PEERCRED to work across namespaces.")
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> Reported-by: Jann Horn <jannh@google.com>
>> Cc: Eric W. Biederman <ebiederm@xmission.com>
>> Cc: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
>> Cc: Marcel Holtmann <marcel@holtmann.org>
>> ---
> ...
>>   static void copy_peercred(struct sock *sk, struct sock *peersk)
>>   {
>> -	put_pid(sk->sk_peer_pid);
>> -	if (sk->sk_peer_cred)
>> -		put_cred(sk->sk_peer_cred);
>> +	const struct cred *old_cred;
>> +	struct pid *old_pid;
>> +
>> +	if (sk < peersk) {
>> +		spin_lock(&sk->sk_peer_lock);
>> +		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
>> +	} else {
>> +		spin_lock(&peersk->sk_peer_lock);
>> +		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
>> +	}
> Hi, ALL.
> I'm sorry to bother you.
> 
> This patch adds sk_peer_lock to solve the problem that af_unix may
> concurrently change sk_peer_pid and sk_peer_cred.
> 
> I am confused as to why the order of locks is needed here based on
> the address size of sk and peersk.

To simply avoid dead lock.  These locks must be acquired in the same
order.  The smaller address lock is acquired first, then larger one.

  e.g.) CPU-A calls copy_peercred(sk-A, sk-B), and
        CPU-B calls copy_peercred(sk-B, sk-A).

There are some implementations like this:

  $ grep -rn double_lock


> 
> Any feedback would be appreciated, thanks.
>> +	old_pid = sk->sk_peer_pid;
>> +	old_cred = sk->sk_peer_cred;
>>   	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
>>   	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
>> +
>> +	spin_unlock(&sk->sk_peer_lock);
>> +	spin_unlock(&peersk->sk_peer_lock);
>> +
>> +	put_pid(old_pid);
>> +	put_cred(old_cred);
>>   }
>>   
>>   static int unix_listen(struct socket *sock, int backlog)
> 
> -- 
> Wang Hai

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] af_unix: fix races in sk_peer_pid and sk_peer_cred accesses
  2022-03-24  9:04   ` Kuniyuki Iwashima
@ 2022-03-24 11:15     ` wanghai (M)
  0 siblings, 0 replies; 5+ messages in thread
From: wanghai (M) @ 2022-03-24 11:15 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: davem, ebiederm, edumazet, eric.dumazet, jannh, kuba,
	luiz.von.dentz, marcel, netdev


在 2022/3/24 17:04, Kuniyuki Iwashima 写道:
> From:   "wanghai (M)" <wanghai38@huawei.com>
> Date:   Thu, 24 Mar 2022 16:03:31 +0800
>> 在 2021/9/30 6:57, Eric Dumazet 写道:
>>> From: Eric Dumazet <edumazet@google.com>
>>>
>>> Jann Horn reported that SO_PEERCRED and SO_PEERGROUPS implementations
>>> are racy, as af_unix can concurrently change sk_peer_pid and sk_peer_cred.
>>>
>>> In order to fix this issue, this patch adds a new spinlock that needs
>>> to be used whenever these fields are read or written.
>>>
>>> Jann also pointed out that l2cap_sock_get_peer_pid_cb() is currently
>>> reading sk->sk_peer_pid which makes no sense, as this field
>>> is only possibly set by AF_UNIX sockets.
>>> We will have to clean this in a separate patch.
>>> This could be done by reverting b48596d1dc25 "Bluetooth: L2CAP: Add get_peer_pid callback"
>>> or implementing what was truly expected.
>>>
>>> Fixes: 109f6e39fa07 ("af_unix: Allow SO_PEERCRED to work across namespaces.")
>>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>>> Reported-by: Jann Horn <jannh@google.com>
>>> Cc: Eric W. Biederman <ebiederm@xmission.com>
>>> Cc: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
>>> Cc: Marcel Holtmann <marcel@holtmann.org>
>>> ---
>> ...
>>>    static void copy_peercred(struct sock *sk, struct sock *peersk)
>>>    {
>>> -	put_pid(sk->sk_peer_pid);
>>> -	if (sk->sk_peer_cred)
>>> -		put_cred(sk->sk_peer_cred);
>>> +	const struct cred *old_cred;
>>> +	struct pid *old_pid;
>>> +
>>> +	if (sk < peersk) {
>>> +		spin_lock(&sk->sk_peer_lock);
>>> +		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
>>> +	} else {
>>> +		spin_lock(&peersk->sk_peer_lock);
>>> +		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
>>> +	}
>> Hi, ALL.
>> I'm sorry to bother you.
>>
>> This patch adds sk_peer_lock to solve the problem that af_unix may
>> concurrently change sk_peer_pid and sk_peer_cred.
>>
>> I am confused as to why the order of locks is needed here based on
>> the address size of sk and peersk.
> To simply avoid dead lock.  These locks must be acquired in the same
> order.  The smaller address lock is acquired first, then larger one.
>
>    e.g.) CPU-A calls copy_peercred(sk-A, sk-B), and
>          CPU-B calls copy_peercred(sk-B, sk-A).
>
> There are some implementations like this:
>
>    $ grep -rn double_lock
I got it, thank you for your patient explanation.
>
>
>> Any feedback would be appreciated, thanks.
>>> +	old_pid = sk->sk_peer_pid;
>>> +	old_cred = sk->sk_peer_cred;
>>>    	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
>>>    	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
>>> +
>>> +	spin_unlock(&sk->sk_peer_lock);
>>> +	spin_unlock(&peersk->sk_peer_lock);
>>> +
>>> +	put_pid(old_pid);
>>> +	put_cred(old_cred);
>>>    }
>>>    
>>>    static int unix_listen(struct socket *sock, int backlog)
>> -- 
>> Wang Hai
> .
>
-- 
Wang Hai


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-03-24 11:15 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-29 22:57 [PATCH net] af_unix: fix races in sk_peer_pid and sk_peer_cred accesses Eric Dumazet
2021-09-30 13:30 ` patchwork-bot+netdevbpf
2022-03-24  8:03 ` wanghai (M)
2022-03-24  9:04   ` Kuniyuki Iwashima
2022-03-24 11:15     ` wanghai (M)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.