All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] udp: correct reuseport selection with connected sockets
@ 2019-09-13  1:16 Willem de Bruijn
  2019-09-13 14:47 ` Paolo Abeni
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Willem de Bruijn @ 2019-09-13  1:16 UTC (permalink / raw)
  To: netdev
  Cc: davem, edumazet, kraig, zabele, pabeni, mark.keaton, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

UDP reuseport groups can hold a mix unconnected and connected sockets.
Ensure that connections only receive all traffic to their 4-tuple.

Fast reuseport returns on the first reuseport match on the assumption
that all matches are equal. Only if connections are present, return to
the previous behavior of scoring all sockets.

Record if connections are present and if so (1) treat such connected
sockets as an independent match from the group, (2) only return
2-tuple matches from reuseport and (3) do not return on the first
2-tuple reuseport match to allow for a higher scoring match later.

New field has_conns is set without locks. No other fields in the
bitmap are modified at runtime and the field is only ever set
unconditionally, so an RMW cannot miss a change.

Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>

---

I was unable to compile some older kernels, so the Fixes tag is based
on basic analysis, not bisected to by the regression test.
---
 include/net/sock_reuseport.h | 20 +++++++++++++++++++-
 net/core/sock_reuseport.c    | 15 +++++++++++++--
 net/ipv4/datagram.c          |  2 ++
 net/ipv4/udp.c               |  5 +++--
 net/ipv6/datagram.c          |  2 ++
 net/ipv6/udp.c               |  5 +++--
 6 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index d9112de85261..43f4a818d88f 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -21,7 +21,8 @@ struct sock_reuseport {
 	unsigned int		synq_overflow_ts;
 	/* ID stays the same even after the size of socks[] grows. */
 	unsigned int		reuseport_id;
-	bool			bind_inany;
+	unsigned int		bind_inany:1;
+	unsigned int		has_conns:1;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
@@ -37,6 +38,23 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 extern int reuseport_detach_prog(struct sock *sk);
 
+static inline bool reuseport_has_conns(struct sock *sk, bool set)
+{
+	struct sock_reuseport *reuse;
+	bool ret = false;
+
+	rcu_read_lock();
+	reuse = rcu_dereference(sk->sk_reuseport_cb);
+	if (reuse) {
+		if (set)
+			reuse->has_conns = 1;
+		ret = reuse->has_conns;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
 int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9408f9264d05..f3ceec93f392 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk,
 
 select_by_hash:
 		/* no bpf or invalid bpf result: fall back to hash usage */
-		if (!sk2)
-			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+		if (!sk2) {
+			int i, j;
+
+			i = j = reciprocal_scale(hash, socks);
+			while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+				i++;
+				if (i >= reuse->num_socks)
+					i = 0;
+				if (i == j)
+					goto out;
+			}
+			sk2 = reuse->socks[i];
+		}
 	}
 
 out:
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 7bd29e694603..9a0fe0c2fa02 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -15,6 +15,7 @@
 #include <net/sock.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
+#include <net/sock_reuseport.h>
 
 int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	}
 	inet->inet_daddr = fl4->daddr;
 	inet->inet_dport = usin->sin_port;
+	reuseport_has_conns(sk, true);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 	inet->inet_id = jiffies;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d88821c794fb..16486c8b708b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
+			if (sk->sk_reuseport &&
+			    sk->sk_state != TCP_ESTABLISHED) {
 				hash = udp_ehashfn(net, daddr, hnum,
 						   saddr, sport);
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
-				if (result)
+				if (result && !reuseport_has_conns(sk, false))
 					return result;
 			}
 			badness = score;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9ab897ded4df..96f939248d2f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -27,6 +27,7 @@
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
 #include <net/dsfield.h>
+#include <net/sock_reuseport.h>
 
 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
@@ -254,6 +255,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
 		goto out;
 	}
 
+	reuseport_has_conns(sk, true);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 827fe7385078..5995fdc99d3f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
+			if (sk->sk_reuseport &&
+			    sk->sk_state != TCP_ESTABLISHED) {
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
 
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
-				if (result)
+				if (result && !reuseport_has_conns(sk, false))
 					return result;
 			}
 			result = sk;
-- 
2.23.0.237.gc6a4ce50a0-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net] udp: correct reuseport selection with connected sockets
  2019-09-13  1:16 [PATCH net] udp: correct reuseport selection with connected sockets Willem de Bruijn
@ 2019-09-13 14:47 ` Paolo Abeni
  2019-09-13 18:40 ` Craig Gallek
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Paolo Abeni @ 2019-09-13 14:47 UTC (permalink / raw)
  To: Willem de Bruijn, netdev
  Cc: davem, edumazet, kraig, zabele, mark.keaton, Willem de Bruijn

On Thu, 2019-09-12 at 21:16 -0400, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
> 
> UDP reuseport groups can hold a mix unconnected and connected sockets.
> Ensure that connections only receive all traffic to their 4-tuple.
> 
> Fast reuseport returns on the first reuseport match on the assumption
> that all matches are equal. Only if connections are present, return to
> the previous behavior of scoring all sockets.
> 
> Record if connections are present and if so (1) treat such connected
> sockets as an independent match from the group, (2) only return
> 2-tuple matches from reuseport and (3) do not return on the first
> 2-tuple reuseport match to allow for a higher scoring match later.
> 
> New field has_conns is set without locks. No other fields in the
> bitmap are modified at runtime and the field is only ever set
> unconditionally, so an RMW cannot miss a change.
> 
> Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
> Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> 
> ---
> 
> I was unable to compile some older kernels, so the Fixes tag is based
> on basic analysis, not bisected to by the regression test.
> ---
>  include/net/sock_reuseport.h | 20 +++++++++++++++++++-
>  net/core/sock_reuseport.c    | 15 +++++++++++++--
>  net/ipv4/datagram.c          |  2 ++
>  net/ipv4/udp.c               |  5 +++--
>  net/ipv6/datagram.c          |  2 ++
>  net/ipv6/udp.c               |  5 +++--
>  6 files changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
> index d9112de85261..43f4a818d88f 100644
> --- a/include/net/sock_reuseport.h
> +++ b/include/net/sock_reuseport.h
> @@ -21,7 +21,8 @@ struct sock_reuseport {
>  	unsigned int		synq_overflow_ts;
>  	/* ID stays the same even after the size of socks[] grows. */
>  	unsigned int		reuseport_id;
> -	bool			bind_inany;
> +	unsigned int		bind_inany:1;
> +	unsigned int		has_conns:1;
>  	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
>  	struct sock		*socks[0];	/* array of sock pointers */
>  };
> @@ -37,6 +38,23 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
>  extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
>  extern int reuseport_detach_prog(struct sock *sk);
>  
> +static inline bool reuseport_has_conns(struct sock *sk, bool set)
> +{
> +	struct sock_reuseport *reuse;
> +	bool ret = false;
> +
> +	rcu_read_lock();
> +	reuse = rcu_dereference(sk->sk_reuseport_cb);
> +	if (reuse) {
> +		if (set)
> +			reuse->has_conns = 1;
> +		ret = reuse->has_conns;
> +	}
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +
>  int reuseport_get_id(struct sock_reuseport *reuse);
>  
>  #endif  /* _SOCK_REUSEPORT_H */
> diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
> index 9408f9264d05..f3ceec93f392 100644
> --- a/net/core/sock_reuseport.c
> +++ b/net/core/sock_reuseport.c
> @@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk,
>  
>  select_by_hash:
>  		/* no bpf or invalid bpf result: fall back to hash usage */
> -		if (!sk2)
> -			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
> +		if (!sk2) {
> +			int i, j;
> +
> +			i = j = reciprocal_scale(hash, socks);
> +			while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
> +				i++;
> +				if (i >= reuse->num_socks)
> +					i = 0;
> +				if (i == j)
> +					goto out;
> +			}
> +			sk2 = reuse->socks[i];
> +		}
>  	}
>  
>  out:
> diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> index 7bd29e694603..9a0fe0c2fa02 100644
> --- a/net/ipv4/datagram.c
> +++ b/net/ipv4/datagram.c
> @@ -15,6 +15,7 @@
>  #include <net/sock.h>
>  #include <net/route.h>
>  #include <net/tcp_states.h>
> +#include <net/sock_reuseport.h>
>  
>  int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>  {
> @@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
>  	}
>  	inet->inet_daddr = fl4->daddr;
>  	inet->inet_dport = usin->sin_port;
> +	reuseport_has_conns(sk, true);
>  	sk->sk_state = TCP_ESTABLISHED;
>  	sk_set_txhash(sk);
>  	inet->inet_id = jiffies;
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index d88821c794fb..16486c8b708b 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(struct net *net,
>  		score = compute_score(sk, net, saddr, sport,
>  				      daddr, hnum, dif, sdif);
>  		if (score > badness) {
> -			if (sk->sk_reuseport) {
> +			if (sk->sk_reuseport &&
> +			    sk->sk_state != TCP_ESTABLISHED) {
>  				hash = udp_ehashfn(net, daddr, hnum,
>  						   saddr, sport);
>  				result = reuseport_select_sock(sk, hash, skb,
>  							sizeof(struct udphdr));
> -				if (result)
> +				if (result && !reuseport_has_conns(sk, false))
>  					return result;
>  			}
>  			badness = score;
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index 9ab897ded4df..96f939248d2f 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -27,6 +27,7 @@
>  #include <net/ip6_route.h>
>  #include <net/tcp_states.h>
>  #include <net/dsfield.h>
> +#include <net/sock_reuseport.h>
>  
>  #include <linux/errqueue.h>
>  #include <linux/uaccess.h>
> @@ -254,6 +255,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
>  		goto out;
>  	}
>  
> +	reuseport_has_conns(sk, true);
>  	sk->sk_state = TCP_ESTABLISHED;
>  	sk_set_txhash(sk);
>  out:
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 827fe7385078..5995fdc99d3f 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
>  		score = compute_score(sk, net, saddr, sport,
>  				      daddr, hnum, dif, sdif);
>  		if (score > badness) {
> -			if (sk->sk_reuseport) {
> +			if (sk->sk_reuseport &&
> +			    sk->sk_state != TCP_ESTABLISHED) {
>  				hash = udp6_ehashfn(net, daddr, hnum,
>  						    saddr, sport);
>  
>  				result = reuseport_select_sock(sk, hash, skb,
>  							sizeof(struct udphdr));
> -				if (result)
> +				if (result && !reuseport_has_conns(sk, false))
>  					return result;
>  			}
>  			result = sk;

The patch LGTM,

Acked-by: Paolo Abeni <pabeni@redhat.com>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] udp: correct reuseport selection with connected sockets
  2019-09-13  1:16 [PATCH net] udp: correct reuseport selection with connected sockets Willem de Bruijn
  2019-09-13 14:47 ` Paolo Abeni
@ 2019-09-13 18:40 ` Craig Gallek
  2019-09-15 13:36 ` Steve Zabele
  2019-09-16  7:03 ` David Miller
  3 siblings, 0 replies; 5+ messages in thread
From: Craig Gallek @ 2019-09-13 18:40 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: netdev, David Miller, Eric Dumazet, zabele, Paolo Abeni,
	mark.keaton, Willem de Bruijn

On Thu, Sep 12, 2019 at 9:16 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> From: Willem de Bruijn <willemb@google.com>
>
> UDP reuseport groups can hold a mix unconnected and connected sockets.
> Ensure that connections only receive all traffic to their 4-tuple.
>
> Fast reuseport returns on the first reuseport match on the assumption
> that all matches are equal. Only if connections are present, return to
> the previous behavior of scoring all sockets.
>
> Record if connections are present and if so (1) treat such connected
> sockets as an independent match from the group, (2) only return
> 2-tuple matches from reuseport and (3) do not return on the first
> 2-tuple reuseport match to allow for a higher scoring match later.
>
> New field has_conns is set without locks. No other fields in the
> bitmap are modified at runtime and the field is only ever set
> unconditionally, so an RMW cannot miss a change.
>
> Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
> Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
> Signed-off-by: Willem de Bruijn <willemb@google.com>

Slick, no additional cost for the BPF case and just a single branch
for the unconnected udp, tcp listener case!

Acked-by: Craig Gallek <kraig@google.com>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH net] udp: correct reuseport selection with connected sockets
  2019-09-13  1:16 [PATCH net] udp: correct reuseport selection with connected sockets Willem de Bruijn
  2019-09-13 14:47 ` Paolo Abeni
  2019-09-13 18:40 ` Craig Gallek
@ 2019-09-15 13:36 ` Steve Zabele
  2019-09-16  7:03 ` David Miller
  3 siblings, 0 replies; 5+ messages in thread
From: Steve Zabele @ 2019-09-15 13:36 UTC (permalink / raw)
  To: 'Willem de Bruijn', netdev
  Cc: davem, edumazet, kraig, pabeni, mark.keaton, 'Willem de Bruijn'

Hey Willem,

Thanks a bunch for getting this resolved, *very* much appreciated. This is a
really big help for us

Do you know if this will be backported to 4.19 stable, and if so when it
might be available??

Thanks again

Steve

-----Original Message-----
From: Willem de Bruijn [mailto:willemdebruijn.kernel@gmail.com] 
Sent: Thursday, September 12, 2019 9:17 PM
To: netdev@vger.kernel.org
Cc: davem@davemloft.net; edumazet@google.com; kraig@google.com;
zabele@comcast.net; pabeni@redhat.com; mark.keaton@raytheon.com; Willem de
Bruijn
Subject: [PATCH net] udp: correct reuseport selection with connected sockets

From: Willem de Bruijn <willemb@google.com>

UDP reuseport groups can hold a mix unconnected and connected sockets.
Ensure that connections only receive all traffic to their 4-tuple.

Fast reuseport returns on the first reuseport match on the assumption
that all matches are equal. Only if connections are present, return to
the previous behavior of scoring all sockets.

Record if connections are present and if so (1) treat such connected
sockets as an independent match from the group, (2) only return
2-tuple matches from reuseport and (3) do not return on the first
2-tuple reuseport match to allow for a higher scoring match later.

New field has_conns is set without locks. No other fields in the
bitmap are modified at runtime and the field is only ever set
unconditionally, so an RMW cannot miss a change.

Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
Link:
http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw
@mail.gmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>

---

I was unable to compile some older kernels, so the Fixes tag is based
on basic analysis, not bisected to by the regression test.
---
 include/net/sock_reuseport.h | 20 +++++++++++++++++++-
 net/core/sock_reuseport.c    | 15 +++++++++++++--
 net/ipv4/datagram.c          |  2 ++
 net/ipv4/udp.c               |  5 +++--
 net/ipv6/datagram.c          |  2 ++
 net/ipv6/udp.c               |  5 +++--
 6 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index d9112de85261..43f4a818d88f 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -21,7 +21,8 @@ struct sock_reuseport {
 	unsigned int		synq_overflow_ts;
 	/* ID stays the same even after the size of socks[] grows. */
 	unsigned int		reuseport_id;
-	bool			bind_inany;
+	unsigned int		bind_inany:1;
+	unsigned int		has_conns:1;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock
selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
@@ -37,6 +38,23 @@ extern struct sock *reuseport_select_sock(struct sock
*sk,
 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 extern int reuseport_detach_prog(struct sock *sk);
 
+static inline bool reuseport_has_conns(struct sock *sk, bool set)
+{
+	struct sock_reuseport *reuse;
+	bool ret = false;
+
+	rcu_read_lock();
+	reuse = rcu_dereference(sk->sk_reuseport_cb);
+	if (reuse) {
+		if (set)
+			reuse->has_conns = 1;
+		ret = reuse->has_conns;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
 int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9408f9264d05..f3ceec93f392 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk,
 
 select_by_hash:
 		/* no bpf or invalid bpf result: fall back to hash usage */
-		if (!sk2)
-			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+		if (!sk2) {
+			int i, j;
+
+			i = j = reciprocal_scale(hash, socks);
+			while (reuse->socks[i]->sk_state == TCP_ESTABLISHED)
{
+				i++;
+				if (i >= reuse->num_socks)
+					i = 0;
+				if (i == j)
+					goto out;
+			}
+			sk2 = reuse->socks[i];
+		}
 	}
 
 out:
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 7bd29e694603..9a0fe0c2fa02 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -15,6 +15,7 @@
 #include <net/sock.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
+#include <net/sock_reuseport.h>
 
 int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int
addr_len)
 {
@@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct
sockaddr *uaddr, int addr_len
 	}
 	inet->inet_daddr = fl4->daddr;
 	inet->inet_dport = usin->sin_port;
+	reuseport_has_conns(sk, true);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 	inet->inet_id = jiffies;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d88821c794fb..16486c8b708b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
+			if (sk->sk_reuseport &&
+			    sk->sk_state != TCP_ESTABLISHED) {
 				hash = udp_ehashfn(net, daddr, hnum,
 						   saddr, sport);
 				result = reuseport_select_sock(sk, hash,
skb,
 							sizeof(struct
udphdr));
-				if (result)
+				if (result && !reuseport_has_conns(sk,
false))
 					return result;
 			}
 			badness = score;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9ab897ded4df..96f939248d2f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -27,6 +27,7 @@
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
 #include <net/dsfield.h>
+#include <net/sock_reuseport.h>
 
 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
@@ -254,6 +255,7 @@ int __ip6_datagram_connect(struct sock *sk, struct
sockaddr *uaddr,
 		goto out;
 	}
 
+	reuseport_has_conns(sk, true);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 827fe7385078..5995fdc99d3f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
+			if (sk->sk_reuseport &&
+			    sk->sk_state != TCP_ESTABLISHED) {
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
 
 				result = reuseport_select_sock(sk, hash,
skb,
 							sizeof(struct
udphdr));
-				if (result)
+				if (result && !reuseport_has_conns(sk,
false))
 					return result;
 			}
 			result = sk;
-- 
2.23.0.237.gc6a4ce50a0-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net] udp: correct reuseport selection with connected sockets
  2019-09-13  1:16 [PATCH net] udp: correct reuseport selection with connected sockets Willem de Bruijn
                   ` (2 preceding siblings ...)
  2019-09-15 13:36 ` Steve Zabele
@ 2019-09-16  7:03 ` David Miller
  3 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2019-09-16  7:03 UTC (permalink / raw)
  To: willemdebruijn.kernel
  Cc: netdev, edumazet, kraig, zabele, pabeni, mark.keaton, willemb

From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Date: Thu, 12 Sep 2019 21:16:39 -0400

> From: Willem de Bruijn <willemb@google.com>
> 
> UDP reuseport groups can hold a mix unconnected and connected sockets.
> Ensure that connections only receive all traffic to their 4-tuple.
> 
> Fast reuseport returns on the first reuseport match on the assumption
> that all matches are equal. Only if connections are present, return to
> the previous behavior of scoring all sockets.
> 
> Record if connections are present and if so (1) treat such connected
> sockets as an independent match from the group, (2) only return
> 2-tuple matches from reuseport and (3) do not return on the first
> 2-tuple reuseport match to allow for a higher scoring match later.
> 
> New field has_conns is set without locks. No other fields in the
> bitmap are modified at runtime and the field is only ever set
> unconditionally, so an RMW cannot miss a change.
> 
> Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
> Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
> Signed-off-by: Willem de Bruijn <willemb@google.com>

Applied and queued up for -stable, thanks.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-09-16  7:03 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-13  1:16 [PATCH net] udp: correct reuseport selection with connected sockets Willem de Bruijn
2019-09-13 14:47 ` Paolo Abeni
2019-09-13 18:40 ` Craig Gallek
2019-09-15 13:36 ` Steve Zabele
2019-09-16  7:03 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.