All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nathan Chancellor <nathan@kernel.org>
To: Kuniyuki Iwashima <kuniyu@amazon.com>
Cc: "David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Sachin Sant <sachinp@linux.ibm.com>,
	Leonard Crestez <cdleonard@gmail.com>,
	Kuniyuki Iwashima <kuni1840@gmail.com>,
	netdev@vger.kernel.org
Subject: Re: [PATCH v1 net-next] af_unix: Put a named socket in the global hash table.
Date: Fri, 1 Jul 2022 14:38:04 -0700	[thread overview]
Message-ID: <Yr9pPJGoCDYOB3Tm@dev-arch.thelio-3990X> (raw)
In-Reply-To: <20220701072519.96097-1-kuniyu@amazon.com>

On Fri, Jul 01, 2022 at 12:25:19AM -0700, Kuniyuki Iwashima wrote:
> Commit cf2f225e2653 ("af_unix: Put a socket into a per-netns hash
> table.") accidentally broke user API for named sockets.  A named
> socket was able to connect() to a peer in the same mount namespace
> even if they were in different network namespaces.
> 
> The commit put all sockets into each per-netns hash table.  As a
> result, connect() to a socket in a different netns failed to find
> the peer and returned -ECONNREFUSED even when they had the same
> mount namespace.
> 
> We can reproduce this issue by
> 
>   Console A:
> 
>     # python3
>     >>> from socket import *
>     >>> s = socket(AF_UNIX, SOCK_STREAM, 0)
>     >>> s.bind('test')
>     >>> s.listen(32)
> 
>   Console B:
> 
>     # ip netns add test
>     # ip netns exec test sh
>     # python3
>     >>> from socket import *
>     >>> s = socket(AF_UNIX, SOCK_STREAM, 0)
>     >>> s.connect('test')
> 
> Note when dumping sockets by sock_diag, procfs, and bpf_iter, they are
> filtered only by netns.  In other words, sockets with different netns
> and the same mount ns are skipped while iterating sockets.  Thus, we
> need a fix only for finding a peer socket.
> 
> This patch adds a global hash table for named sockets, links them with
> sk_bind_node, and uses it in unix_find_socket_byinode().  By doing so,
> we can keep all sockets in per-netns hash tables and dump them easily.
> 
> Thank Sachin Sant and Leonard Crestez for reports, logs and a reproducer.
> 
> Fixes: cf2f225e2653 ("af_unix: Put a socket into a per-netns hash table.")
> Reported-by: Sachin Sant <sachinp@linux.ibm.com>
> Reported-by: Leonard Crestez <cdleonard@gmail.com>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>

I noticed that all my test systems failed to start systemd-hostnamed
during boot, which I bisected to cf2f225e2653. This patch resolves the
problem for me as well.

Tested-by: Nathan Chancellor <nathan@kernel.org>

> ---
>  net/unix/af_unix.c | 47 ++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 37 insertions(+), 10 deletions(-)
> 
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 49f6626330c3..526b872cc710 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -119,6 +119,8 @@
>  #include "scm.h"
>  
>  static atomic_long_t unix_nr_socks;
> +static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
> +static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
>  
>  /* SMP locking strategy:
>   *    hash table is protected with spinlock.
> @@ -328,6 +330,24 @@ static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
>  	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
>  }
>  
> +static void unix_insert_bsd_socket(struct sock *sk)
> +{
> +	spin_lock(&bsd_socket_locks[sk->sk_hash]);
> +	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
> +	spin_unlock(&bsd_socket_locks[sk->sk_hash]);
> +}
> +
> +static void unix_remove_bsd_socket(struct sock *sk)
> +{
> +	if (!hlist_unhashed(&sk->sk_bind_node)) {
> +		spin_lock(&bsd_socket_locks[sk->sk_hash]);
> +		__sk_del_bind_node(sk);
> +		spin_unlock(&bsd_socket_locks[sk->sk_hash]);
> +
> +		sk_node_init(&sk->sk_bind_node);
> +	}
> +}
> +
>  static struct sock *__unix_find_socket_byname(struct net *net,
>  					      struct sockaddr_un *sunname,
>  					      int len, unsigned int hash)
> @@ -358,22 +378,22 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
>  	return s;
>  }
>  
> -static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
> +static struct sock *unix_find_socket_byinode(struct inode *i)
>  {
>  	unsigned int hash = unix_bsd_hash(i);
>  	struct sock *s;
>  
> -	spin_lock(&net->unx.table.locks[hash]);
> -	sk_for_each(s, &net->unx.table.buckets[hash]) {
> +	spin_lock(&bsd_socket_locks[hash]);
> +	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
>  		struct dentry *dentry = unix_sk(s)->path.dentry;
>  
>  		if (dentry && d_backing_inode(dentry) == i) {
>  			sock_hold(s);
> -			spin_unlock(&net->unx.table.locks[hash]);
> +			spin_unlock(&bsd_socket_locks[hash]);
>  			return s;
>  		}
>  	}
> -	spin_unlock(&net->unx.table.locks[hash]);
> +	spin_unlock(&bsd_socket_locks[hash]);
>  	return NULL;
>  }
>  
> @@ -577,6 +597,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
>  	int state;
>  
>  	unix_remove_socket(sock_net(sk), sk);
> +	unix_remove_bsd_socket(sk);
>  
>  	/* Clear state */
>  	unix_state_lock(sk);
> @@ -988,8 +1009,8 @@ static int unix_release(struct socket *sock)
>  	return 0;
>  }
>  
> -static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
> -				  int addr_len, int type)
> +static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
> +				  int type)
>  {
>  	struct inode *inode;
>  	struct path path;
> @@ -1010,7 +1031,7 @@ static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
>  	if (!S_ISSOCK(inode->i_mode))
>  		goto path_put;
>  
> -	sk = unix_find_socket_byinode(net, inode);
> +	sk = unix_find_socket_byinode(inode);
>  	if (!sk)
>  		goto path_put;
>  
> @@ -1058,7 +1079,7 @@ static struct sock *unix_find_other(struct net *net,
>  	struct sock *sk;
>  
>  	if (sunaddr->sun_path[0])
> -		sk = unix_find_bsd(net, sunaddr, addr_len, type);
> +		sk = unix_find_bsd(sunaddr, addr_len, type);
>  	else
>  		sk = unix_find_abstract(net, sunaddr, addr_len, type);
>  
> @@ -1179,6 +1200,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
>  	u->path.dentry = dget(dentry);
>  	__unix_set_addr_hash(net, sk, addr, new_hash);
>  	unix_table_double_unlock(net, old_hash, new_hash);
> +	unix_insert_bsd_socket(sk);
>  	mutex_unlock(&u->bindlock);
>  	done_path_create(&parent, dentry);
>  	return 0;
> @@ -3682,10 +3704,15 @@ static void __init bpf_iter_register(void)
>  
>  static int __init af_unix_init(void)
>  {
> -	int rc = -1;
> +	int i, rc = -1;
>  
>  	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
>  
> +	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
> +		spin_lock_init(&bsd_socket_locks[i]);
> +		INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
> +	}
> +
>  	rc = proto_register(&unix_dgram_proto, 1);
>  	if (rc != 0) {
>  		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
> -- 
> 2.30.2
> 
> 

      parent reply	other threads:[~2022-07-01 21:38 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-01  7:25 [PATCH v1 net-next] af_unix: Put a named socket in the global hash table Kuniyuki Iwashima
2022-07-01 16:07 ` Sachin Sant
2022-07-01 16:36 ` Eric Dumazet
2022-07-01 16:50   ` Kuniyuki Iwashima
2022-07-01 21:38 ` Nathan Chancellor [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Yr9pPJGoCDYOB3Tm@dev-arch.thelio-3990X \
    --to=nathan@kernel.org \
    --cc=cdleonard@gmail.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kuba@kernel.org \
    --cc=kuni1840@gmail.com \
    --cc=kuniyu@amazon.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sachinp@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.