* [PATCH v1 net-next] af_unix: Put a named socket in the global hash table.
@ 2022-07-01 7:25 Kuniyuki Iwashima
2022-07-01 16:07 ` Sachin Sant
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Kuniyuki Iwashima @ 2022-07-01 7:25 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: Sachin Sant, Leonard Crestez, Kuniyuki Iwashima,
Kuniyuki Iwashima, netdev
Commit cf2f225e2653 ("af_unix: Put a socket into a per-netns hash
table.") accidentally broke user API for named sockets. A named
socket was able to connect() to a peer in the same mount namespace
even if they were in different network namespaces.
The commit put all sockets into each per-netns hash table. As a
result, connect() to a socket in a different netns failed to find
the peer and returned -ECONNREFUSED even when they had the same
mount namespace.
We can reproduce this issue by
Console A:
# python3
>>> from socket import *
>>> s = socket(AF_UNIX, SOCK_STREAM, 0)
>>> s.bind('test')
>>> s.listen(32)
Console B:
# ip netns add test
# ip netns exec test sh
# python3
>>> from socket import *
>>> s = socket(AF_UNIX, SOCK_STREAM, 0)
>>> s.connect('test')
Note when dumping sockets by sock_diag, procfs, and bpf_iter, they are
filtered only by netns. In other words, sockets with different netns
and the same mount ns are skipped while iterating sockets. Thus, we
need a fix only for finding a peer socket.
This patch adds a global hash table for named sockets, links them with
sk_bind_node, and uses it in unix_find_socket_byinode(). By doing so,
we can keep all sockets in per-netns hash tables and dump them easily.
Thank Sachin Sant and Leonard Crestez for reports, logs and a reproducer.
Fixes: cf2f225e2653 ("af_unix: Put a socket into a per-netns hash table.")
Reported-by: Sachin Sant <sachinp@linux.ibm.com>
Reported-by: Leonard Crestez <cdleonard@gmail.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
net/unix/af_unix.c | 47 ++++++++++++++++++++++++++++++++++++----------
1 file changed, 37 insertions(+), 10 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 49f6626330c3..526b872cc710 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -119,6 +119,8 @@
#include "scm.h"
static atomic_long_t unix_nr_socks;
+static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
+static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
/* SMP locking strategy:
* hash table is protected with spinlock.
@@ -328,6 +330,24 @@ static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
spin_unlock(&net->unx.table.locks[sk->sk_hash]);
}
+static void unix_insert_bsd_socket(struct sock *sk)
+{
+ spin_lock(&bsd_socket_locks[sk->sk_hash]);
+ sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
+ spin_unlock(&bsd_socket_locks[sk->sk_hash]);
+}
+
+static void unix_remove_bsd_socket(struct sock *sk)
+{
+ if (!hlist_unhashed(&sk->sk_bind_node)) {
+ spin_lock(&bsd_socket_locks[sk->sk_hash]);
+ __sk_del_bind_node(sk);
+ spin_unlock(&bsd_socket_locks[sk->sk_hash]);
+
+ sk_node_init(&sk->sk_bind_node);
+ }
+}
+
static struct sock *__unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
int len, unsigned int hash)
@@ -358,22 +378,22 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
return s;
}
-static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
+static struct sock *unix_find_socket_byinode(struct inode *i)
{
unsigned int hash = unix_bsd_hash(i);
struct sock *s;
- spin_lock(&net->unx.table.locks[hash]);
- sk_for_each(s, &net->unx.table.buckets[hash]) {
+ spin_lock(&bsd_socket_locks[hash]);
+ sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
struct dentry *dentry = unix_sk(s)->path.dentry;
if (dentry && d_backing_inode(dentry) == i) {
sock_hold(s);
- spin_unlock(&net->unx.table.locks[hash]);
+ spin_unlock(&bsd_socket_locks[hash]);
return s;
}
}
- spin_unlock(&net->unx.table.locks[hash]);
+ spin_unlock(&bsd_socket_locks[hash]);
return NULL;
}
@@ -577,6 +597,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
int state;
unix_remove_socket(sock_net(sk), sk);
+ unix_remove_bsd_socket(sk);
/* Clear state */
unix_state_lock(sk);
@@ -988,8 +1009,8 @@ static int unix_release(struct socket *sock)
return 0;
}
-static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
- int addr_len, int type)
+static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
+ int type)
{
struct inode *inode;
struct path path;
@@ -1010,7 +1031,7 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
if (!S_ISSOCK(inode->i_mode))
goto path_put;
- sk = unix_find_socket_byinode(net, inode);
+ sk = unix_find_socket_byinode(inode);
if (!sk)
goto path_put;
@@ -1058,7 +1079,7 @@ static struct sock *unix_find_other(struct net *net,
struct sock *sk;
if (sunaddr->sun_path[0])
- sk = unix_find_bsd(net, sunaddr, addr_len, type);
+ sk = unix_find_bsd(sunaddr, addr_len, type);
else
sk = unix_find_abstract(net, sunaddr, addr_len, type);
@@ -1179,6 +1200,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
u->path.dentry = dget(dentry);
__unix_set_addr_hash(net, sk, addr, new_hash);
unix_table_double_unlock(net, old_hash, new_hash);
+ unix_insert_bsd_socket(sk);
mutex_unlock(&u->bindlock);
done_path_create(&parent, dentry);
return 0;
@@ -3682,10 +3704,15 @@ static void __init bpf_iter_register(void)
static int __init af_unix_init(void)
{
- int rc = -1;
+ int i, rc = -1;
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
+ for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
+ spin_lock_init(&bsd_socket_locks[i]);
+ INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
+ }
+
rc = proto_register(&unix_dgram_proto, 1);
if (rc != 0) {
pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
--
2.30.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v1 net-next] af_unix: Put a named socket in the global hash table.
2022-07-01 7:25 [PATCH v1 net-next] af_unix: Put a named socket in the global hash table Kuniyuki Iwashima
@ 2022-07-01 16:07 ` Sachin Sant
2022-07-01 16:36 ` Eric Dumazet
2022-07-01 21:38 ` Nathan Chancellor
2 siblings, 0 replies; 5+ messages in thread
From: Sachin Sant @ 2022-07-01 16:07 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Leonard Crestez, Kuniyuki Iwashima, netdev
> Note when dumping sockets by sock_diag, procfs, and bpf_iter, they are
> filtered only by netns. In other words, sockets with different netns
> and the same mount ns are skipped while iterating sockets. Thus, we
> need a fix only for finding a peer socket.
>
> This patch adds a global hash table for named sockets, links them with
> sk_bind_node, and uses it in unix_find_socket_byinode(). By doing so,
> we can keep all sockets in per-netns hash tables and dump them easily.
>
> Thank Sachin Sant and Leonard Crestez for reports, logs and a reproducer.
>
> Fixes: cf2f225e2653 ("af_unix: Put a socket into a per-netns hash table.")
> Reported-by: Sachin Sant <sachinp@linux.ibm.com>
> Reported-by: Leonard Crestez <cdleonard@gmail.com>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---
> net/unix/af_unix.c | 47 ++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 37 insertions(+), 10 deletions(-)
>
Thanks for the fix.
The patch fixes the reported problem.
Tested-by: Sachin Sant <sachinp@linux.ibm.com>
- Sachin
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v1 net-next] af_unix: Put a named socket in the global hash table.
2022-07-01 7:25 [PATCH v1 net-next] af_unix: Put a named socket in the global hash table Kuniyuki Iwashima
2022-07-01 16:07 ` Sachin Sant
@ 2022-07-01 16:36 ` Eric Dumazet
2022-07-01 16:50 ` Kuniyuki Iwashima
2022-07-01 21:38 ` Nathan Chancellor
2 siblings, 1 reply; 5+ messages in thread
From: Eric Dumazet @ 2022-07-01 16:36 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Sachin Sant,
Leonard Crestez, Kuniyuki Iwashima, netdev
On Fri, Jul 1, 2022 at 9:25 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> Commit cf2f225e2653 ("af_unix: Put a socket into a per-netns hash
> table.") accidentally broke user API for named sockets. A named
> socket was able to connect() to a peer in the same mount namespace
> even if they were in different network namespaces.
>
> The commit put all sockets into each per-netns hash table. As a
> result, connect() to a socket in a different netns failed to find
> the peer and returned -ECONNREFUSED even when they had the same
> mount namespace.
>
> We can reproduce this issue by
>
> Console A:
>
> # python3
> >>> from socket import *
> >>> s = socket(AF_UNIX, SOCK_STREAM, 0)
> >>> s.bind('test')
> >>> s.listen(32)
>
> Console B:
>
> # ip netns add test
> # ip netns exec test sh
> # python3
> >>> from socket import *
> >>> s = socket(AF_UNIX, SOCK_STREAM, 0)
> >>> s.connect('test')
>
I think this deserves a new test perhaps...
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v1 net-next] af_unix: Put a named socket in the global hash table.
2022-07-01 16:36 ` Eric Dumazet
@ 2022-07-01 16:50 ` Kuniyuki Iwashima
0 siblings, 0 replies; 5+ messages in thread
From: Kuniyuki Iwashima @ 2022-07-01 16:50 UTC (permalink / raw)
To: edumazet
Cc: cdleonard, davem, kuba, kuni1840, kuniyu, netdev, pabeni, sachinp
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 1 Jul 2022 18:36:21 +0200
> On Fri, Jul 1, 2022 at 9:25 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> >
> > Commit cf2f225e2653 ("af_unix: Put a socket into a per-netns hash
> > table.") accidentally broke user API for named sockets. A named
> > socket was able to connect() to a peer in the same mount namespace
> > even if they were in different network namespaces.
> >
> > The commit put all sockets into each per-netns hash table. As a
> > result, connect() to a socket in a different netns failed to find
> > the peer and returned -ECONNREFUSED even when they had the same
> > mount namespace.
> >
> > We can reproduce this issue by
> >
> > Console A:
> >
> > # python3
> > >>> from socket import *
> > >>> s = socket(AF_UNIX, SOCK_STREAM, 0)
> > >>> s.bind('test')
> > >>> s.listen(32)
> >
> > Console B:
> >
> > # ip netns add test
> > # ip netns exec test sh
> > # python3
> > >>> from socket import *
> > >>> s = socket(AF_UNIX, SOCK_STREAM, 0)
> > >>> s.connect('test')
> >
>
> I think this deserves a new test perhaps...
Exactly. I will add a selftest in v2.
Thank you.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v1 net-next] af_unix: Put a named socket in the global hash table.
2022-07-01 7:25 [PATCH v1 net-next] af_unix: Put a named socket in the global hash table Kuniyuki Iwashima
2022-07-01 16:07 ` Sachin Sant
2022-07-01 16:36 ` Eric Dumazet
@ 2022-07-01 21:38 ` Nathan Chancellor
2 siblings, 0 replies; 5+ messages in thread
From: Nathan Chancellor @ 2022-07-01 21:38 UTC (permalink / raw)
To: Kuniyuki Iwashima
Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Sachin Sant, Leonard Crestez, Kuniyuki Iwashima, netdev
On Fri, Jul 01, 2022 at 12:25:19AM -0700, Kuniyuki Iwashima wrote:
> Commit cf2f225e2653 ("af_unix: Put a socket into a per-netns hash
> table.") accidentally broke user API for named sockets. A named
> socket was able to connect() to a peer in the same mount namespace
> even if they were in different network namespaces.
>
> The commit put all sockets into each per-netns hash table. As a
> result, connect() to a socket in a different netns failed to find
> the peer and returned -ECONNREFUSED even when they had the same
> mount namespace.
>
> We can reproduce this issue by
>
> Console A:
>
> # python3
> >>> from socket import *
> >>> s = socket(AF_UNIX, SOCK_STREAM, 0)
> >>> s.bind('test')
> >>> s.listen(32)
>
> Console B:
>
> # ip netns add test
> # ip netns exec test sh
> # python3
> >>> from socket import *
> >>> s = socket(AF_UNIX, SOCK_STREAM, 0)
> >>> s.connect('test')
>
> Note when dumping sockets by sock_diag, procfs, and bpf_iter, they are
> filtered only by netns. In other words, sockets with different netns
> and the same mount ns are skipped while iterating sockets. Thus, we
> need a fix only for finding a peer socket.
>
> This patch adds a global hash table for named sockets, links them with
> sk_bind_node, and uses it in unix_find_socket_byinode(). By doing so,
> we can keep all sockets in per-netns hash tables and dump them easily.
>
> Thank Sachin Sant and Leonard Crestez for reports, logs and a reproducer.
>
> Fixes: cf2f225e2653 ("af_unix: Put a socket into a per-netns hash table.")
> Reported-by: Sachin Sant <sachinp@linux.ibm.com>
> Reported-by: Leonard Crestez <cdleonard@gmail.com>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
I noticed that all my test systems failed to start systemd-hostnamed
during boot, which I bisected to cf2f225e2653. This patch resolves the
problem for me as well.
Tested-by: Nathan Chancellor <nathan@kernel.org>
> ---
> net/unix/af_unix.c | 47 ++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 37 insertions(+), 10 deletions(-)
>
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 49f6626330c3..526b872cc710 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -119,6 +119,8 @@
> #include "scm.h"
>
> static atomic_long_t unix_nr_socks;
> +static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
> +static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
>
> /* SMP locking strategy:
> * hash table is protected with spinlock.
> @@ -328,6 +330,24 @@ static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
> spin_unlock(&net->unx.table.locks[sk->sk_hash]);
> }
>
> +static void unix_insert_bsd_socket(struct sock *sk)
> +{
> + spin_lock(&bsd_socket_locks[sk->sk_hash]);
> + sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
> + spin_unlock(&bsd_socket_locks[sk->sk_hash]);
> +}
> +
> +static void unix_remove_bsd_socket(struct sock *sk)
> +{
> + if (!hlist_unhashed(&sk->sk_bind_node)) {
> + spin_lock(&bsd_socket_locks[sk->sk_hash]);
> + __sk_del_bind_node(sk);
> + spin_unlock(&bsd_socket_locks[sk->sk_hash]);
> +
> + sk_node_init(&sk->sk_bind_node);
> + }
> +}
> +
> static struct sock *__unix_find_socket_byname(struct net *net,
> struct sockaddr_un *sunname,
> int len, unsigned int hash)
> @@ -358,22 +378,22 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
> return s;
> }
>
> -static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
> +static struct sock *unix_find_socket_byinode(struct inode *i)
> {
> unsigned int hash = unix_bsd_hash(i);
> struct sock *s;
>
> - spin_lock(&net->unx.table.locks[hash]);
> - sk_for_each(s, &net->unx.table.buckets[hash]) {
> + spin_lock(&bsd_socket_locks[hash]);
> + sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
> struct dentry *dentry = unix_sk(s)->path.dentry;
>
> if (dentry && d_backing_inode(dentry) == i) {
> sock_hold(s);
> - spin_unlock(&net->unx.table.locks[hash]);
> + spin_unlock(&bsd_socket_locks[hash]);
> return s;
> }
> }
> - spin_unlock(&net->unx.table.locks[hash]);
> + spin_unlock(&bsd_socket_locks[hash]);
> return NULL;
> }
>
> @@ -577,6 +597,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
> int state;
>
> unix_remove_socket(sock_net(sk), sk);
> + unix_remove_bsd_socket(sk);
>
> /* Clear state */
> unix_state_lock(sk);
> @@ -988,8 +1009,8 @@ static int unix_release(struct socket *sock)
> return 0;
> }
>
> -static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
> - int addr_len, int type)
> +static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
> + int type)
> {
> struct inode *inode;
> struct path path;
> @@ -1010,7 +1031,7 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
> if (!S_ISSOCK(inode->i_mode))
> goto path_put;
>
> - sk = unix_find_socket_byinode(net, inode);
> + sk = unix_find_socket_byinode(inode);
> if (!sk)
> goto path_put;
>
> @@ -1058,7 +1079,7 @@ static struct sock *unix_find_other(struct net *net,
> struct sock *sk;
>
> if (sunaddr->sun_path[0])
> - sk = unix_find_bsd(net, sunaddr, addr_len, type);
> + sk = unix_find_bsd(sunaddr, addr_len, type);
> else
> sk = unix_find_abstract(net, sunaddr, addr_len, type);
>
> @@ -1179,6 +1200,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
> u->path.dentry = dget(dentry);
> __unix_set_addr_hash(net, sk, addr, new_hash);
> unix_table_double_unlock(net, old_hash, new_hash);
> + unix_insert_bsd_socket(sk);
> mutex_unlock(&u->bindlock);
> done_path_create(&parent, dentry);
> return 0;
> @@ -3682,10 +3704,15 @@ static void __init bpf_iter_register(void)
>
> static int __init af_unix_init(void)
> {
> - int rc = -1;
> + int i, rc = -1;
>
> BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
>
> + for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
> + spin_lock_init(&bsd_socket_locks[i]);
> + INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
> + }
> +
> rc = proto_register(&unix_dgram_proto, 1);
> if (rc != 0) {
> pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
> --
> 2.30.2
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-07-01 21:38 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-01 7:25 [PATCH v1 net-next] af_unix: Put a named socket in the global hash table Kuniyuki Iwashima
2022-07-01 16:07 ` Sachin Sant
2022-07-01 16:36 ` Eric Dumazet
2022-07-01 16:50 ` Kuniyuki Iwashima
2022-07-01 21:38 ` Nathan Chancellor
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.