netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 net] udp: Update reuse->has_conns under reuseport_lock.
@ 2022-10-12 18:52 Kuniyuki Iwashima
  2022-10-12 18:59 ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Kuniyuki Iwashima @ 2022-10-12 18:52 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Hideaki YOSHIFUJI, David Ahern, Martin KaFai Lau
  Cc: Craig Gallek, Willem de Bruijn, Kuniyuki Iwashima,
	Kuniyuki Iwashima, netdev

When we call connect() for a UDP socket in a reuseport group, we have
to update sk->sk_reuseport_cb->has_conns to 1.  Otherwise, the kernel
could select a unconnected socket wrongly for packets sent to the
connected socket.

However, the current way to set has_conns is illegal and possible to
trigger that problem.  reuseport_has_conns() changes has_conns under
rcu_read_lock(), which upgrades the RCU reader to the updater.  Then,
it must do the update under the updater's lock, reuseport_lock, but
it doesn't for now.

For this reason, there is a race below where we fail to set has_conns
resulting in the wrong socket selection.  To avoid the race, let's split
the reader and updater with proper locking.

 cpu1                               cpu2
+----+                             +----+

__ip[46]_datagram_connect()        reuseport_grow()
.                                  .
|- reuseport_has_conns(sk, true)   |- more_reuse = __reuseport_alloc(more_socks_size)
|  .                               |
|  |- rcu_read_lock()
|  |- reuse = rcu_dereference(sk->sk_reuseport_cb)
|  |
|  |                               |  /* reuse->has_conns == 0 here */
|  |                               |- more_reuse->has_conns = reuse->has_conns
|  |- reuse->has_conns = 1         |  /* more_reuse->has_conns SHOULD BE 1 HERE */
|  |                               |
|  |                               |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
|  |                               |                     more_reuse)
|  `- rcu_read_unlock()            `- kfree_rcu(reuse, rcu)
|
|- sk->sk_state = TCP_ESTABLISHED

Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
v2:
  * Fix build failure for CONFIG_IPV6=m
  * Drop SO_INCOMING_CPU fix, which will be sent for net-next
    after the v6.1 merge window

v1: https://lore.kernel.org/netdev/20221010174351.11024-1-kuniyu@amazon.com/
---
 include/net/sock_reuseport.h | 11 +++++------
 net/core/sock_reuseport.c    | 15 +++++++++++++++
 net/ipv4/datagram.c          |  2 +-
 net/ipv4/udp.c               |  2 +-
 net/ipv6/datagram.c          |  2 +-
 net/ipv6/udp.c               |  2 +-
 6 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 473b0b0fa4ab..efc9085c6892 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -43,21 +43,20 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 extern int reuseport_detach_prog(struct sock *sk);
 
-static inline bool reuseport_has_conns(struct sock *sk, bool set)
+static inline bool reuseport_has_conns(struct sock *sk)
 {
 	struct sock_reuseport *reuse;
 	bool ret = false;
 
 	rcu_read_lock();
 	reuse = rcu_dereference(sk->sk_reuseport_cb);
-	if (reuse) {
-		if (set)
-			reuse->has_conns = 1;
-		ret = reuse->has_conns;
-	}
+	if (reuse && reuse->has_conns)
+		ret = true;
 	rcu_read_unlock();
 
 	return ret;
 }
 
+void reuseport_has_conns_set(struct sock *sk);
+
 #endif  /* _SOCK_REUSEPORT_H */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 5daa1fa54249..abb414ed4aa7 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -21,6 +21,21 @@ static DEFINE_IDA(reuseport_ida);
 static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
 			       struct sock_reuseport *reuse, bool bind_inany);
 
+void reuseport_has_conns_set(struct sock *sk)
+{
+	struct sock_reuseport *reuse;
+
+	if (!rcu_access_pointer(sk->sk_reuseport_cb))
+		return;
+
+	spin_lock(&reuseport_lock);
+	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+					  lockdep_is_held(&reuseport_lock));
+	reuse->has_conns = 1;
+	spin_unlock(&reuseport_lock);
+}
+EXPORT_SYMBOL(reuseport_has_conns_set);
+
 static int reuseport_sock_index(struct sock *sk,
 				const struct sock_reuseport *reuse,
 				bool closed)
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 405a8c2aea64..5e66add7befa 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	}
 	inet->inet_daddr = fl4->daddr;
 	inet->inet_dport = usin->sin_port;
-	reuseport_has_conns(sk, true);
+	reuseport_has_conns_set(sk);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 	inet->inet_id = prandom_u32();
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d63118ce5900..29228231b058 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -448,7 +448,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 			result = lookup_reuseport(net, sk, skb,
 						  saddr, sport, daddr, hnum);
 			/* Fall back to scoring if group has connections */
-			if (result && !reuseport_has_conns(sk, false))
+			if (result && !reuseport_has_conns(sk))
 				return result;
 
 			result = result ? : sk;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index df665d4e8f0f..5ecb56522f9d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -256,7 +256,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
 		goto out;
 	}
 
-	reuseport_has_conns(sk, true);
+	reuseport_has_conns_set(sk);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 91e795bb9ade..56e4523a3004 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -182,7 +182,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 			result = lookup_reuseport(net, sk, skb,
 						  saddr, sport, daddr, hnum);
 			/* Fall back to scoring if group has connections */
-			if (result && !reuseport_has_conns(sk, false))
+			if (result && !reuseport_has_conns(sk))
 				return result;
 
 			result = result ? : sk;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 net] udp: Update reuse->has_conns under reuseport_lock.
  2022-10-12 18:52 [PATCH v2 net] udp: Update reuse->has_conns under reuseport_lock Kuniyuki Iwashima
@ 2022-10-12 18:59 ` Eric Dumazet
  2022-10-12 19:27   ` Kuniyuki Iwashima
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2022-10-12 18:59 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Hideaki YOSHIFUJI,
	David Ahern, Martin KaFai Lau, Craig Gallek, Willem de Bruijn,
	Kuniyuki Iwashima, netdev

On Wed, Oct 12, 2022 at 11:53 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> When we call connect() for a UDP socket in a reuseport group, we have
> to update sk->sk_reuseport_cb->has_conns to 1.  Otherwise, the kernel
> could select a unconnected socket wrongly for packets sent to the
> connected socket.
>
> However, the current way to set has_conns is illegal and possible to
> trigger that problem.  reuseport_has_conns() changes has_conns under
> rcu_read_lock(), which upgrades the RCU reader to the updater.  Then,
> it must do the update under the updater's lock, reuseport_lock, but
> it doesn't for now.
>
> For this reason, there is a race below where we fail to set has_conns
> resulting in the wrong socket selection.  To avoid the race, let's split
> the reader and updater with proper locking.
>
>  cpu1                               cpu2
> +----+                             +----+
>
> __ip[46]_datagram_connect()        reuseport_grow()
> .                                  .
> |- reuseport_has_conns(sk, true)   |- more_reuse = __reuseport_alloc(more_socks_size)
> |  .                               |
> |  |- rcu_read_lock()
> |  |- reuse = rcu_dereference(sk->sk_reuseport_cb)
> |  |
> |  |                               |  /* reuse->has_conns == 0 here */
> |  |                               |- more_reuse->has_conns = reuse->has_conns
> |  |- reuse->has_conns = 1         |  /* more_reuse->has_conns SHOULD BE 1 HERE */
> |  |                               |
> |  |                               |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
> |  |                               |                     more_reuse)
> |  `- rcu_read_unlock()            `- kfree_rcu(reuse, rcu)
> |
> |- sk->sk_state = TCP_ESTABLISHED
>
> Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---
> v2:
>   * Fix build failure for CONFIG_IPV6=m
>   * Drop SO_INCOMING_CPU fix, which will be sent for net-next
>     after the v6.1 merge window
>
> v1: https://lore.kernel.org/netdev/20221010174351.11024-1-kuniyu@amazon.com/
> ---
>  include/net/sock_reuseport.h | 11 +++++------
>  net/core/sock_reuseport.c    | 15 +++++++++++++++
>  net/ipv4/datagram.c          |  2 +-
>  net/ipv4/udp.c               |  2 +-
>  net/ipv6/datagram.c          |  2 +-
>  net/ipv6/udp.c               |  2 +-
>  6 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
> index 473b0b0fa4ab..efc9085c6892 100644
> --- a/include/net/sock_reuseport.h
> +++ b/include/net/sock_reuseport.h
> @@ -43,21 +43,20 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
>  extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
>  extern int reuseport_detach_prog(struct sock *sk);
>
> -static inline bool reuseport_has_conns(struct sock *sk, bool set)
> +static inline bool reuseport_has_conns(struct sock *sk)
>  {
>         struct sock_reuseport *reuse;
>         bool ret = false;
>
>         rcu_read_lock();
>         reuse = rcu_dereference(sk->sk_reuseport_cb);
> -       if (reuse) {
> -               if (set)
> -                       reuse->has_conns = 1;
> -               ret = reuse->has_conns;
> -       }
> +       if (reuse && reuse->has_conns)
> +               ret = true;
>         rcu_read_unlock();
>
>         return ret;
>  }
>
> +void reuseport_has_conns_set(struct sock *sk);
> +
>  #endif  /* _SOCK_REUSEPORT_H */
> diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
> index 5daa1fa54249..abb414ed4aa7 100644
> --- a/net/core/sock_reuseport.c
> +++ b/net/core/sock_reuseport.c
> @@ -21,6 +21,21 @@ static DEFINE_IDA(reuseport_ida);
>  static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
>                                struct sock_reuseport *reuse, bool bind_inany);
>
> +void reuseport_has_conns_set(struct sock *sk)
> +{
> +       struct sock_reuseport *reuse;
> +
> +       if (!rcu_access_pointer(sk->sk_reuseport_cb))
> +               return;
> +
> +       spin_lock(&reuseport_lock);
> +       reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
> +                                         lockdep_is_held(&reuseport_lock));

Could @reuse be NULL at this point ?

Previous  test was performed without reuseport_lock being held.

> +       reuse->has_conns = 1;
> +       spin_unlock(&reuseport_lock);
> +}
> +EXPORT_SYMBOL(reuseport_has_conns_set);
> +
>  static int reuseport_sock_index(struct sock *sk,
>                                 const struct sock_reuseport *reuse,
>                                 bool closed)
> diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> index 405a8c2aea64..5e66add7befa 100644
> --- a/net/ipv4/datagram.c
> +++ b/net/ipv4/datagram.c
> @@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
>         }
>         inet->inet_daddr = fl4->daddr;
>         inet->inet_dport = usin->sin_port;
> -       reuseport_has_conns(sk, true);
> +       reuseport_has_conns_set(sk);
>         sk->sk_state = TCP_ESTABLISHED;
>         sk_set_txhash(sk);
>         inet->inet_id = prandom_u32();
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index d63118ce5900..29228231b058 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -448,7 +448,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
>                         result = lookup_reuseport(net, sk, skb,
>                                                   saddr, sport, daddr, hnum);
>                         /* Fall back to scoring if group has connections */
> -                       if (result && !reuseport_has_conns(sk, false))
> +                       if (result && !reuseport_has_conns(sk))
>                                 return result;
>
>                         result = result ? : sk;
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index df665d4e8f0f..5ecb56522f9d 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -256,7 +256,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
>                 goto out;
>         }
>
> -       reuseport_has_conns(sk, true);
> +       reuseport_has_conns_set(sk);
>         sk->sk_state = TCP_ESTABLISHED;
>         sk_set_txhash(sk);
>  out:
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 91e795bb9ade..56e4523a3004 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -182,7 +182,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
>                         result = lookup_reuseport(net, sk, skb,
>                                                   saddr, sport, daddr, hnum);
>                         /* Fall back to scoring if group has connections */
> -                       if (result && !reuseport_has_conns(sk, false))
> +                       if (result && !reuseport_has_conns(sk))
>                                 return result;
>
>                         result = result ? : sk;
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 net] udp: Update reuse->has_conns under reuseport_lock.
  2022-10-12 18:59 ` Eric Dumazet
@ 2022-10-12 19:27   ` Kuniyuki Iwashima
  2022-10-13 16:09     ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Kuniyuki Iwashima @ 2022-10-12 19:27 UTC (permalink / raw)
  To: edumazet
  Cc: davem, dsahern, kraig, kuba, kuni1840, kuniyu, martin.lau,
	netdev, pabeni, willemb, yoshfuji

From:   Eric Dumazet <edumazet@google.com>
Date:   Wed, 12 Oct 2022 11:59:43 -0700
> On Wed, Oct 12, 2022 at 11:53 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> >
> > When we call connect() for a UDP socket in a reuseport group, we have
> > to update sk->sk_reuseport_cb->has_conns to 1.  Otherwise, the kernel
> > could select a unconnected socket wrongly for packets sent to the
> > connected socket.
> >
> > However, the current way to set has_conns is illegal and possible to
> > trigger that problem.  reuseport_has_conns() changes has_conns under
> > rcu_read_lock(), which upgrades the RCU reader to the updater.  Then,
> > it must do the update under the updater's lock, reuseport_lock, but
> > it doesn't for now.
> >
> > For this reason, there is a race below where we fail to set has_conns
> > resulting in the wrong socket selection.  To avoid the race, let's split
> > the reader and updater with proper locking.
> >
> >  cpu1                               cpu2
> > +----+                             +----+
> >
> > __ip[46]_datagram_connect()        reuseport_grow()
> > .                                  .
> > |- reuseport_has_conns(sk, true)   |- more_reuse = __reuseport_alloc(more_socks_size)
> > |  .                               |
> > |  |- rcu_read_lock()
> > |  |- reuse = rcu_dereference(sk->sk_reuseport_cb)
> > |  |
> > |  |                               |  /* reuse->has_conns == 0 here */
> > |  |                               |- more_reuse->has_conns = reuse->has_conns
> > |  |- reuse->has_conns = 1         |  /* more_reuse->has_conns SHOULD BE 1 HERE */
> > |  |                               |
> > |  |                               |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
> > |  |                               |                     more_reuse)
> > |  `- rcu_read_unlock()            `- kfree_rcu(reuse, rcu)
> > |
> > |- sk->sk_state = TCP_ESTABLISHED
> >
> > Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > ---
> > v2:
> >   * Fix build failure for CONFIG_IPV6=m
> >   * Drop SO_INCOMING_CPU fix, which will be sent for net-next
> >     after the v6.1 merge window
> >
> > v1: https://lore.kernel.org/netdev/20221010174351.11024-1-kuniyu@amazon.com/
> > ---
> >  include/net/sock_reuseport.h | 11 +++++------
> >  net/core/sock_reuseport.c    | 15 +++++++++++++++
> >  net/ipv4/datagram.c          |  2 +-
> >  net/ipv4/udp.c               |  2 +-
> >  net/ipv6/datagram.c          |  2 +-
> >  net/ipv6/udp.c               |  2 +-
> >  6 files changed, 24 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
> > index 473b0b0fa4ab..efc9085c6892 100644
> > --- a/include/net/sock_reuseport.h
> > +++ b/include/net/sock_reuseport.h
> > @@ -43,21 +43,20 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
> >  extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
> >  extern int reuseport_detach_prog(struct sock *sk);
> >
> > -static inline bool reuseport_has_conns(struct sock *sk, bool set)
> > +static inline bool reuseport_has_conns(struct sock *sk)
> >  {
> >         struct sock_reuseport *reuse;
> >         bool ret = false;
> >
> >         rcu_read_lock();
> >         reuse = rcu_dereference(sk->sk_reuseport_cb);
> > -       if (reuse) {
> > -               if (set)
> > -                       reuse->has_conns = 1;
> > -               ret = reuse->has_conns;
> > -       }
> > +       if (reuse && reuse->has_conns)
> > +               ret = true;
> >         rcu_read_unlock();
> >
> >         return ret;
> >  }
> >
> > +void reuseport_has_conns_set(struct sock *sk);
> > +
> >  #endif  /* _SOCK_REUSEPORT_H */
> > diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
> > index 5daa1fa54249..abb414ed4aa7 100644
> > --- a/net/core/sock_reuseport.c
> > +++ b/net/core/sock_reuseport.c
> > @@ -21,6 +21,21 @@ static DEFINE_IDA(reuseport_ida);
> >  static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
> >                                struct sock_reuseport *reuse, bool bind_inany);
> >
> > +void reuseport_has_conns_set(struct sock *sk)
> > +{
> > +       struct sock_reuseport *reuse;
> > +
> > +       if (!rcu_access_pointer(sk->sk_reuseport_cb))
> > +               return;
> > +
> > +       spin_lock(&reuseport_lock);
> > +       reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
> > +                                         lockdep_is_held(&reuseport_lock));
> 
> Could @reuse be NULL at this point ?
> 
> Previous  test was performed without reuseport_lock being held.

Usually, sk_reuseport_cb is changed under lock_sock().

The only exception is reuseport_grow() & TCP reqsk migration case.

1) shutdown() TCP listener, which is moved into the latter part of
   reuse->socks[] to migrate reqsk.

2) New listen() overflows reuse->socks[] and call reuseport_grow().

3) reuse->max_socks overflows u16 with the new listener.

4) reuseport_grow() pops the old shutdown()ed listener from the array
   and update its sk->sk_reuseport_cb as NULL without lock_sock().

shutdown()ed sk->sk_reuseport_cb can be changed without lock_sock().

But, reuseport_has_conns_set() is called only for UDP and under
lock_sock(), so @reuse never be NULL in this case.


> > +       reuse->has_conns = 1;
> > +       spin_unlock(&reuseport_lock);
> > +}
> > +EXPORT_SYMBOL(reuseport_has_conns_set);
> > +
> >  static int reuseport_sock_index(struct sock *sk,
> >                                 const struct sock_reuseport *reuse,
> >                                 bool closed)
> > diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> > index 405a8c2aea64..5e66add7befa 100644
> > --- a/net/ipv4/datagram.c
> > +++ b/net/ipv4/datagram.c
> > @@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
> >         }
> >         inet->inet_daddr = fl4->daddr;
> >         inet->inet_dport = usin->sin_port;
> > -       reuseport_has_conns(sk, true);
> > +       reuseport_has_conns_set(sk);
> >         sk->sk_state = TCP_ESTABLISHED;
> >         sk_set_txhash(sk);
> >         inet->inet_id = prandom_u32();
> > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> > index d63118ce5900..29228231b058 100644
> > --- a/net/ipv4/udp.c
> > +++ b/net/ipv4/udp.c
> > @@ -448,7 +448,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
> >                         result = lookup_reuseport(net, sk, skb,
> >                                                   saddr, sport, daddr, hnum);
> >                         /* Fall back to scoring if group has connections */
> > -                       if (result && !reuseport_has_conns(sk, false))
> > +                       if (result && !reuseport_has_conns(sk))
> >                                 return result;
> >
> >                         result = result ? : sk;
> > diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> > index df665d4e8f0f..5ecb56522f9d 100644
> > --- a/net/ipv6/datagram.c
> > +++ b/net/ipv6/datagram.c
> > @@ -256,7 +256,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
> >                 goto out;
> >         }
> >
> > -       reuseport_has_conns(sk, true);
> > +       reuseport_has_conns_set(sk);
> >         sk->sk_state = TCP_ESTABLISHED;
> >         sk_set_txhash(sk);
> >  out:
> > diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> > index 91e795bb9ade..56e4523a3004 100644
> > --- a/net/ipv6/udp.c
> > +++ b/net/ipv6/udp.c
> > @@ -182,7 +182,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
> >                         result = lookup_reuseport(net, sk, skb,
> >                                                   saddr, sport, daddr, hnum);
> >                         /* Fall back to scoring if group has connections */
> > -                       if (result && !reuseport_has_conns(sk, false))
> > +                       if (result && !reuseport_has_conns(sk))
> >                                 return result;
> >
> >                         result = result ? : sk;
> > --
> > 2.30.2

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 net] udp: Update reuse->has_conns under reuseport_lock.
  2022-10-12 19:27   ` Kuniyuki Iwashima
@ 2022-10-13 16:09     ` Eric Dumazet
  2022-10-13 16:51       ` Kuniyuki Iwashima
  2022-10-13 17:41       ` Martin KaFai Lau
  0 siblings, 2 replies; 7+ messages in thread
From: Eric Dumazet @ 2022-10-13 16:09 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: davem, dsahern, kraig, kuba, kuni1840, martin.lau, netdev,
	pabeni, willemb, yoshfuji

On Wed, Oct 12, 2022 at 12:28 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> From:   Eric Dumazet <edumazet@google.com>
> Date:   Wed, 12 Oct 2022 11:59:43 -0700
> > On Wed, Oct 12, 2022 at 11:53 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> > >
> > > When we call connect() for a UDP socket in a reuseport group, we have
> > > to update sk->sk_reuseport_cb->has_conns to 1.  Otherwise, the kernel
> > > could select a unconnected socket wrongly for packets sent to the
> > > connected socket.
> > >
> > > However, the current way to set has_conns is illegal and possible to
> > > trigger that problem.  reuseport_has_conns() changes has_conns under
> > > rcu_read_lock(), which upgrades the RCU reader to the updater.  Then,
> > > it must do the update under the updater's lock, reuseport_lock, but
> > > it doesn't for now.
> > >
> > > For this reason, there is a race below where we fail to set has_conns
> > > resulting in the wrong socket selection.  To avoid the race, let's split
> > > the reader and updater with proper locking.
> > >
> > >  cpu1                               cpu2
> > > +----+                             +----+
> > >
> > > __ip[46]_datagram_connect()        reuseport_grow()
> > > .                                  .
> > > |- reuseport_has_conns(sk, true)   |- more_reuse = __reuseport_alloc(more_socks_size)
> > > |  .                               |
> > > |  |- rcu_read_lock()
> > > |  |- reuse = rcu_dereference(sk->sk_reuseport_cb)
> > > |  |
> > > |  |                               |  /* reuse->has_conns == 0 here */
> > > |  |                               |- more_reuse->has_conns = reuse->has_conns
> > > |  |- reuse->has_conns = 1         |  /* more_reuse->has_conns SHOULD BE 1 HERE */
> > > |  |                               |
> > > |  |                               |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
> > > |  |                               |                     more_reuse)
> > > |  `- rcu_read_unlock()            `- kfree_rcu(reuse, rcu)
> > > |
> > > |- sk->sk_state = TCP_ESTABLISHED
> > >
> > > Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
> > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > > ---
> > > v2:
> > >   * Fix build failure for CONFIG_IPV6=m
> > >   * Drop SO_INCOMING_CPU fix, which will be sent for net-next
> > >     after the v6.1 merge window
> > >
> > > v1: https://lore.kernel.org/netdev/20221010174351.11024-1-kuniyu@amazon.com/
> > > ---
> > >  include/net/sock_reuseport.h | 11 +++++------
> > >  net/core/sock_reuseport.c    | 15 +++++++++++++++
> > >  net/ipv4/datagram.c          |  2 +-
> > >  net/ipv4/udp.c               |  2 +-
> > >  net/ipv6/datagram.c          |  2 +-
> > >  net/ipv6/udp.c               |  2 +-
> > >  6 files changed, 24 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
> > > index 473b0b0fa4ab..efc9085c6892 100644
> > > --- a/include/net/sock_reuseport.h
> > > +++ b/include/net/sock_reuseport.h
> > > @@ -43,21 +43,20 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
> > >  extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
> > >  extern int reuseport_detach_prog(struct sock *sk);
> > >
> > > -static inline bool reuseport_has_conns(struct sock *sk, bool set)
> > > +static inline bool reuseport_has_conns(struct sock *sk)
> > >  {
> > >         struct sock_reuseport *reuse;
> > >         bool ret = false;
> > >
> > >         rcu_read_lock();
> > >         reuse = rcu_dereference(sk->sk_reuseport_cb);
> > > -       if (reuse) {
> > > -               if (set)
> > > -                       reuse->has_conns = 1;
> > > -               ret = reuse->has_conns;
> > > -       }
> > > +       if (reuse && reuse->has_conns)
> > > +               ret = true;
> > >         rcu_read_unlock();
> > >
> > >         return ret;
> > >  }
> > >
> > > +void reuseport_has_conns_set(struct sock *sk);
> > > +
> > >  #endif  /* _SOCK_REUSEPORT_H */
> > > diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
> > > index 5daa1fa54249..abb414ed4aa7 100644
> > > --- a/net/core/sock_reuseport.c
> > > +++ b/net/core/sock_reuseport.c
> > > @@ -21,6 +21,21 @@ static DEFINE_IDA(reuseport_ida);
> > >  static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
> > >                                struct sock_reuseport *reuse, bool bind_inany);
> > >
> > > +void reuseport_has_conns_set(struct sock *sk)
> > > +{
> > > +       struct sock_reuseport *reuse;
> > > +
> > > +       if (!rcu_access_pointer(sk->sk_reuseport_cb))
> > > +               return;
> > > +
> > > +       spin_lock(&reuseport_lock);
> > > +       reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
> > > +                                         lockdep_is_held(&reuseport_lock));
> >
> > Could @reuse be NULL at this point ?
> >
> > Previous  test was performed without reuseport_lock being held.
>
> Usually, sk_reuseport_cb is changed under lock_sock().
>
> The only exception is reuseport_grow() & TCP reqsk migration case.
>
> 1) shutdown() TCP listener, which is moved into the latter part of
>    reuse->socks[] to migrate reqsk.
>
> 2) New listen() overflows reuse->socks[] and call reuseport_grow().
>
> 3) reuse->max_socks overflows u16 with the new listener.
>
> 4) reuseport_grow() pops the old shutdown()ed listener from the array
>    and update its sk->sk_reuseport_cb as NULL without lock_sock().
>
> shutdown()ed sk->sk_reuseport_cb can be changed without lock_sock().
>
> But, reuseport_has_conns_set() is called only for UDP and under
> lock_sock(), so @reuse never be NULL in this case.

Given the complexity of this code and how much time is needed to
review all possibilities, please add an additional

if (reuse)
   reuse->has_conns = 1;

I doubt anyone will object to such safety measures.

Thanks.

>
>
> > > +       reuse->has_conns = 1;
> > > +       spin_unlock(&reuseport_lock);
> > > +}
> > > +EXPORT_SYMBOL(reuseport_has_conns_set);
> > > +
> > >  static int reuseport_sock_index(struct sock *sk,
> > >                                 const struct sock_reuseport *reuse,
> > >                                 bool closed)
> > > diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> > > index 405a8c2aea64..5e66add7befa 100644
> > > --- a/net/ipv4/datagram.c
> > > +++ b/net/ipv4/datagram.c
> > > @@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
> > >         }
> > >         inet->inet_daddr = fl4->daddr;
> > >         inet->inet_dport = usin->sin_port;
> > > -       reuseport_has_conns(sk, true);
> > > +       reuseport_has_conns_set(sk);
> > >         sk->sk_state = TCP_ESTABLISHED;
> > >         sk_set_txhash(sk);
> > >         inet->inet_id = prandom_u32();
> > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> > > index d63118ce5900..29228231b058 100644
> > > --- a/net/ipv4/udp.c
> > > +++ b/net/ipv4/udp.c
> > > @@ -448,7 +448,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
> > >                         result = lookup_reuseport(net, sk, skb,
> > >                                                   saddr, sport, daddr, hnum);
> > >                         /* Fall back to scoring if group has connections */
> > > -                       if (result && !reuseport_has_conns(sk, false))
> > > +                       if (result && !reuseport_has_conns(sk))
> > >                                 return result;
> > >
> > >                         result = result ? : sk;
> > > diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> > > index df665d4e8f0f..5ecb56522f9d 100644
> > > --- a/net/ipv6/datagram.c
> > > +++ b/net/ipv6/datagram.c
> > > @@ -256,7 +256,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
> > >                 goto out;
> > >         }
> > >
> > > -       reuseport_has_conns(sk, true);
> > > +       reuseport_has_conns_set(sk);
> > >         sk->sk_state = TCP_ESTABLISHED;
> > >         sk_set_txhash(sk);
> > >  out:
> > > diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> > > index 91e795bb9ade..56e4523a3004 100644
> > > --- a/net/ipv6/udp.c
> > > +++ b/net/ipv6/udp.c
> > > @@ -182,7 +182,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
> > >                         result = lookup_reuseport(net, sk, skb,
> > >                                                   saddr, sport, daddr, hnum);
> > >                         /* Fall back to scoring if group has connections */
> > > -                       if (result && !reuseport_has_conns(sk, false))
> > > +                       if (result && !reuseport_has_conns(sk))
> > >                                 return result;
> > >
> > >                         result = result ? : sk;
> > > --
> > > 2.30.2

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 net] udp: Update reuse->has_conns under reuseport_lock.
  2022-10-13 16:09     ` Eric Dumazet
@ 2022-10-13 16:51       ` Kuniyuki Iwashima
  2022-10-13 17:41       ` Martin KaFai Lau
  1 sibling, 0 replies; 7+ messages in thread
From: Kuniyuki Iwashima @ 2022-10-13 16:51 UTC (permalink / raw)
  To: edumazet
  Cc: davem, dsahern, kraig, kuba, kuni1840, kuniyu, martin.lau,
	netdev, pabeni, willemb, yoshfuji

From:   Eric Dumazet <edumazet@google.com>
Date:   Thu, 13 Oct 2022 09:09:31 -0700
> On Wed, Oct 12, 2022 at 12:28 PM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> >
> > From:   Eric Dumazet <edumazet@google.com>
> > Date:   Wed, 12 Oct 2022 11:59:43 -0700
> > > On Wed, Oct 12, 2022 at 11:53 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
> > > >
> > > > When we call connect() for a UDP socket in a reuseport group, we have
> > > > to update sk->sk_reuseport_cb->has_conns to 1.  Otherwise, the kernel
> > > > could select a unconnected socket wrongly for packets sent to the
> > > > connected socket.
> > > >
> > > > However, the current way to set has_conns is illegal and possible to
> > > > trigger that problem.  reuseport_has_conns() changes has_conns under
> > > > rcu_read_lock(), which upgrades the RCU reader to the updater.  Then,
> > > > it must do the update under the updater's lock, reuseport_lock, but
> > > > it doesn't for now.
> > > >
> > > > For this reason, there is a race below where we fail to set has_conns
> > > > resulting in the wrong socket selection.  To avoid the race, let's split
> > > > the reader and updater with proper locking.
> > > >
> > > >  cpu1                               cpu2
> > > > +----+                             +----+
> > > >
> > > > __ip[46]_datagram_connect()        reuseport_grow()
> > > > .                                  .
> > > > |- reuseport_has_conns(sk, true)   |- more_reuse = __reuseport_alloc(more_socks_size)
> > > > |  .                               |
> > > > |  |- rcu_read_lock()
> > > > |  |- reuse = rcu_dereference(sk->sk_reuseport_cb)
> > > > |  |
> > > > |  |                               |  /* reuse->has_conns == 0 here */
> > > > |  |                               |- more_reuse->has_conns = reuse->has_conns
> > > > |  |- reuse->has_conns = 1         |  /* more_reuse->has_conns SHOULD BE 1 HERE */
> > > > |  |                               |
> > > > |  |                               |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
> > > > |  |                               |                     more_reuse)
> > > > |  `- rcu_read_unlock()            `- kfree_rcu(reuse, rcu)
> > > > |
> > > > |- sk->sk_state = TCP_ESTABLISHED
> > > >
> > > > Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
> > > > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > > > ---
> > > > v2:
> > > >   * Fix build failure for CONFIG_IPV6=m
> > > >   * Drop SO_INCOMING_CPU fix, which will be sent for net-next
> > > >     after the v6.1 merge window
> > > >
> > > > v1: https://lore.kernel.org/netdev/20221010174351.11024-1-kuniyu@amazon.com/
> > > > ---
> > > >  include/net/sock_reuseport.h | 11 +++++------
> > > >  net/core/sock_reuseport.c    | 15 +++++++++++++++
> > > >  net/ipv4/datagram.c          |  2 +-
> > > >  net/ipv4/udp.c               |  2 +-
> > > >  net/ipv6/datagram.c          |  2 +-
> > > >  net/ipv6/udp.c               |  2 +-
> > > >  6 files changed, 24 insertions(+), 10 deletions(-)
> > > >
> > > > diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
> > > > index 473b0b0fa4ab..efc9085c6892 100644
> > > > --- a/include/net/sock_reuseport.h
> > > > +++ b/include/net/sock_reuseport.h
> > > > @@ -43,21 +43,20 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
> > > >  extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
> > > >  extern int reuseport_detach_prog(struct sock *sk);
> > > >
> > > > -static inline bool reuseport_has_conns(struct sock *sk, bool set)
> > > > +static inline bool reuseport_has_conns(struct sock *sk)
> > > >  {
> > > >         struct sock_reuseport *reuse;
> > > >         bool ret = false;
> > > >
> > > >         rcu_read_lock();
> > > >         reuse = rcu_dereference(sk->sk_reuseport_cb);
> > > > -       if (reuse) {
> > > > -               if (set)
> > > > -                       reuse->has_conns = 1;
> > > > -               ret = reuse->has_conns;
> > > > -       }
> > > > +       if (reuse && reuse->has_conns)
> > > > +               ret = true;
> > > >         rcu_read_unlock();
> > > >
> > > >         return ret;
> > > >  }
> > > >
> > > > +void reuseport_has_conns_set(struct sock *sk);
> > > > +
> > > >  #endif  /* _SOCK_REUSEPORT_H */
> > > > diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
> > > > index 5daa1fa54249..abb414ed4aa7 100644
> > > > --- a/net/core/sock_reuseport.c
> > > > +++ b/net/core/sock_reuseport.c
> > > > @@ -21,6 +21,21 @@ static DEFINE_IDA(reuseport_ida);
> > > >  static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
> > > >                                struct sock_reuseport *reuse, bool bind_inany);
> > > >
> > > > +void reuseport_has_conns_set(struct sock *sk)
> > > > +{
> > > > +       struct sock_reuseport *reuse;
> > > > +
> > > > +       if (!rcu_access_pointer(sk->sk_reuseport_cb))
> > > > +               return;
> > > > +
> > > > +       spin_lock(&reuseport_lock);
> > > > +       reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
> > > > +                                         lockdep_is_held(&reuseport_lock));
> > >
> > > Could @reuse be NULL at this point ?
> > >
> > > Previous  test was performed without reuseport_lock being held.
> >
> > Usually, sk_reuseport_cb is changed under lock_sock().
> >
> > The only exception is reuseport_grow() & TCP reqsk migration case.
> >
> > 1) shutdown() TCP listener, which is moved into the latter part of
> >    reuse->socks[] to migrate reqsk.
> >
> > 2) New listen() overflows reuse->socks[] and call reuseport_grow().
> >
> > 3) reuse->max_socks overflows u16 with the new listener.
> >
> > 4) reuseport_grow() pops the old shutdown()ed listener from the array
> >    and update its sk->sk_reuseport_cb as NULL without lock_sock().
> >
> > shutdown()ed sk->sk_reuseport_cb can be changed without lock_sock().
> >
> > But, reuseport_has_conns_set() is called only for UDP and under
> > lock_sock(), so @reuse never be NULL in this case.
> 
> Given the complexity of this code and how much time is needed to
> review all possibilities, please add an additional
> 
> if (reuse)
>    reuse->has_conns = 1;
> 
> I doubt anyone will object to such safety measures.

I see.
If no one has any objections, I'll respin tomorrow with likely().

Thank you.


> 
> Thanks.
> 
> >
> >
> > > > +       reuse->has_conns = 1;
> > > > +       spin_unlock(&reuseport_lock);
> > > > +}
> > > > +EXPORT_SYMBOL(reuseport_has_conns_set);
> > > > +
> > > >  static int reuseport_sock_index(struct sock *sk,
> > > >                                 const struct sock_reuseport *reuse,
> > > >                                 bool closed)
> > > > diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> > > > index 405a8c2aea64..5e66add7befa 100644
> > > > --- a/net/ipv4/datagram.c
> > > > +++ b/net/ipv4/datagram.c
> > > > @@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
> > > >         }
> > > >         inet->inet_daddr = fl4->daddr;
> > > >         inet->inet_dport = usin->sin_port;
> > > > -       reuseport_has_conns(sk, true);
> > > > +       reuseport_has_conns_set(sk);
> > > >         sk->sk_state = TCP_ESTABLISHED;
> > > >         sk_set_txhash(sk);
> > > >         inet->inet_id = prandom_u32();
> > > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> > > > index d63118ce5900..29228231b058 100644
> > > > --- a/net/ipv4/udp.c
> > > > +++ b/net/ipv4/udp.c
> > > > @@ -448,7 +448,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
> > > >                         result = lookup_reuseport(net, sk, skb,
> > > >                                                   saddr, sport, daddr, hnum);
> > > >                         /* Fall back to scoring if group has connections */
> > > > -                       if (result && !reuseport_has_conns(sk, false))
> > > > +                       if (result && !reuseport_has_conns(sk))
> > > >                                 return result;
> > > >
> > > >                         result = result ? : sk;
> > > > diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> > > > index df665d4e8f0f..5ecb56522f9d 100644
> > > > --- a/net/ipv6/datagram.c
> > > > +++ b/net/ipv6/datagram.c
> > > > @@ -256,7 +256,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
> > > >                 goto out;
> > > >         }
> > > >
> > > > -       reuseport_has_conns(sk, true);
> > > > +       reuseport_has_conns_set(sk);
> > > >         sk->sk_state = TCP_ESTABLISHED;
> > > >         sk_set_txhash(sk);
> > > >  out:
> > > > diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> > > > index 91e795bb9ade..56e4523a3004 100644
> > > > --- a/net/ipv6/udp.c
> > > > +++ b/net/ipv6/udp.c
> > > > @@ -182,7 +182,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
> > > >                         result = lookup_reuseport(net, sk, skb,
> > > >                                                   saddr, sport, daddr, hnum);
> > > >                         /* Fall back to scoring if group has connections */
> > > > -                       if (result && !reuseport_has_conns(sk, false))
> > > > +                       if (result && !reuseport_has_conns(sk))
> > > >                                 return result;
> > > >
> > > >                         result = result ? : sk;
> > > > --
> > > > 2.30.2

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 net] udp: Update reuse->has_conns under reuseport_lock.
  2022-10-13 16:09     ` Eric Dumazet
  2022-10-13 16:51       ` Kuniyuki Iwashima
@ 2022-10-13 17:41       ` Martin KaFai Lau
  2022-10-13 18:46         ` Kuniyuki Iwashima
  1 sibling, 1 reply; 7+ messages in thread
From: Martin KaFai Lau @ 2022-10-13 17:41 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: davem, dsahern, Eric Dumazet, kraig, kuba, kuni1840, martin.lau,
	netdev, pabeni, willemb, yoshfuji

On 10/13/22 9:09 AM, Eric Dumazet wrote:
>>>> diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
>>>> index 5daa1fa54249..abb414ed4aa7 100644
>>>> --- a/net/core/sock_reuseport.c
>>>> +++ b/net/core/sock_reuseport.c
>>>> @@ -21,6 +21,21 @@ static DEFINE_IDA(reuseport_ida);
>>>>   static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
>>>>                                 struct sock_reuseport *reuse, bool bind_inany);
>>>>
>>>> +void reuseport_has_conns_set(struct sock *sk)
>>>> +{
>>>> +       struct sock_reuseport *reuse;
>>>> +
>>>> +       if (!rcu_access_pointer(sk->sk_reuseport_cb))
>>>> +               return;
>>>> +
>>>> +       spin_lock(&reuseport_lock);

It seems other paths are still using the spin_lock_bh().  It will be useful to 
have a few words here why _bh() is not needed.

>>>> +       reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
>>>> +                                         lockdep_is_held(&reuseport_lock));
>>>
>>> Could @reuse be NULL at this point ?
>>>
>>> Previous  test was performed without reuseport_lock being held.
>>
>> Usually, sk_reuseport_cb is changed under lock_sock().
>>
>> The only exception is reuseport_grow() & TCP reqsk migration case.
>>
>> 1) shutdown() TCP listener, which is moved into the latter part of
>>     reuse->socks[] to migrate reqsk.
>>
>> 2) New listen() overflows reuse->socks[] and call reuseport_grow().
>>
>> 3) reuse->max_socks overflows u16 with the new listener.
>>
>> 4) reuseport_grow() pops the old shutdown()ed listener from the array
>>     and update its sk->sk_reuseport_cb as NULL without lock_sock().
>>
>> shutdown()ed sk->sk_reuseport_cb can be changed without lock_sock().
>>
>> But, reuseport_has_conns_set() is called only for UDP and under
>> lock_sock(), so @reuse never be NULL in this case.
> 
> Given the complexity of this code and how much time is needed to
> review all possibilities, please add an additional
> 
> if (reuse)
>     reuse->has_conns = 1;

+1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 net] udp: Update reuse->has_conns under reuseport_lock.
  2022-10-13 17:41       ` Martin KaFai Lau
@ 2022-10-13 18:46         ` Kuniyuki Iwashima
  0 siblings, 0 replies; 7+ messages in thread
From: Kuniyuki Iwashima @ 2022-10-13 18:46 UTC (permalink / raw)
  To: martin.lau
  Cc: davem, dsahern, edumazet, kraig, kuba, kuni1840, kuniyu,
	martin.lau, netdev, pabeni, willemb

From:   Martin KaFai Lau <martin.lau@linux.dev>
Date:   Thu, 13 Oct 2022 10:41:53 -0700
> On 10/13/22 9:09 AM, Eric Dumazet wrote:
> >>>> diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
> >>>> index 5daa1fa54249..abb414ed4aa7 100644
> >>>> --- a/net/core/sock_reuseport.c
> >>>> +++ b/net/core/sock_reuseport.c
> >>>> @@ -21,6 +21,21 @@ static DEFINE_IDA(reuseport_ida);
> >>>>   static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
> >>>>                                 struct sock_reuseport *reuse, bool bind_inany);
> >>>>
> >>>> +void reuseport_has_conns_set(struct sock *sk)
> >>>> +{
> >>>> +       struct sock_reuseport *reuse;
> >>>> +
> >>>> +       if (!rcu_access_pointer(sk->sk_reuseport_cb))
> >>>> +               return;
> >>>> +
> >>>> +       spin_lock(&reuseport_lock);
> 
> It seems other paths are still using the spin_lock_bh().  It will be useful to 
> have a few words here why _bh() is not needed.

I think I forgot to add _bh(), but I'm now wondering what is the hlist
lock mentioned in reuseport_alloc()...


> >>>> +       reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
> >>>> +                                         lockdep_is_held(&reuseport_lock));
> >>>
> >>> Could @reuse be NULL at this point ?
> >>>
> >>> Previous  test was performed without reuseport_lock being held.
> >>
> >> Usually, sk_reuseport_cb is changed under lock_sock().
> >>
> >> The only exception is reuseport_grow() & TCP reqsk migration case.
> >>
> >> 1) shutdown() TCP listener, which is moved into the latter part of
> >>     reuse->socks[] to migrate reqsk.
> >>
> >> 2) New listen() overflows reuse->socks[] and call reuseport_grow().
> >>
> >> 3) reuse->max_socks overflows u16 with the new listener.
> >>
> >> 4) reuseport_grow() pops the old shutdown()ed listener from the array
> >>     and update its sk->sk_reuseport_cb as NULL without lock_sock().
> >>
> >> shutdown()ed sk->sk_reuseport_cb can be changed without lock_sock().
> >>
> >> But, reuseport_has_conns_set() is called only for UDP and under
> >> lock_sock(), so @reuse never be NULL in this case.
> > 
> > Given the complexity of this code and how much time is needed to
> > review all possibilities, please add an additional
> > 
> > if (reuse)
> >     reuse->has_conns = 1;
> 
> +1

Acked.

Thank you.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-10-13 18:56 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-12 18:52 [PATCH v2 net] udp: Update reuse->has_conns under reuseport_lock Kuniyuki Iwashima
2022-10-12 18:59 ` Eric Dumazet
2022-10-12 19:27   ` Kuniyuki Iwashima
2022-10-13 16:09     ` Eric Dumazet
2022-10-13 16:51       ` Kuniyuki Iwashima
2022-10-13 17:41       ` Martin KaFai Lau
2022-10-13 18:46         ` Kuniyuki Iwashima

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).