linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Willem de Bruijn <willemb@google.com>,
	Paolo Abeni <pabeni@redhat.com>, Craig Gallek <kraig@google.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 5.3 07/21] udp: correct reuseport selection with connected sockets
Date: Fri, 20 Sep 2019 00:03:08 +0200	[thread overview]
Message-ID: <20190919214702.093524364@linuxfoundation.org> (raw)
In-Reply-To: <20190919214657.842130855@linuxfoundation.org>

From: Willem de Bruijn <willemb@google.com>

[ Upstream commit acdcecc61285faed359f1a3568c32089cc3a8329 ]

UDP reuseport groups can hold a mix unconnected and connected sockets.
Ensure that connections only receive all traffic to their 4-tuple.

Fast reuseport returns on the first reuseport match on the assumption
that all matches are equal. Only if connections are present, return to
the previous behavior of scoring all sockets.

Record if connections are present and if so (1) treat such connected
sockets as an independent match from the group, (2) only return
2-tuple matches from reuseport and (3) do not return on the first
2-tuple reuseport match to allow for a higher scoring match later.

New field has_conns is set without locks. No other fields in the
bitmap are modified at runtime and the field is only ever set
unconditionally, so an RMW cannot miss a change.

Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Craig Gallek <kraig@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sock_reuseport.h |   20 +++++++++++++++++++-
 net/core/sock_reuseport.c    |   15 +++++++++++++--
 net/ipv4/datagram.c          |    2 ++
 net/ipv4/udp.c               |    5 +++--
 net/ipv6/datagram.c          |    2 ++
 net/ipv6/udp.c               |    5 +++--
 6 files changed, 42 insertions(+), 7 deletions(-)

--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -21,7 +21,8 @@ struct sock_reuseport {
 	unsigned int		synq_overflow_ts;
 	/* ID stays the same even after the size of socks[] grows. */
 	unsigned int		reuseport_id;
-	bool			bind_inany;
+	unsigned int		bind_inany:1;
+	unsigned int		has_conns:1;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
@@ -37,6 +38,23 @@ extern struct sock *reuseport_select_soc
 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 extern int reuseport_detach_prog(struct sock *sk);
 
+static inline bool reuseport_has_conns(struct sock *sk, bool set)
+{
+	struct sock_reuseport *reuse;
+	bool ret = false;
+
+	rcu_read_lock();
+	reuse = rcu_dereference(sk->sk_reuseport_cb);
+	if (reuse) {
+		if (set)
+			reuse->has_conns = 1;
+		ret = reuse->has_conns;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
 int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struc
 
 select_by_hash:
 		/* no bpf or invalid bpf result: fall back to hash usage */
-		if (!sk2)
-			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+		if (!sk2) {
+			int i, j;
+
+			i = j = reciprocal_scale(hash, socks);
+			while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+				i++;
+				if (i >= reuse->num_socks)
+					i = 0;
+				if (i == j)
+					goto out;
+			}
+			sk2 = reuse->socks[i];
+		}
 	}
 
 out:
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -15,6 +15,7 @@
 #include <net/sock.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
+#include <net/sock_reuseport.h>
 
 int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *
 	}
 	inet->inet_daddr = fl4->daddr;
 	inet->inet_dport = usin->sin_port;
+	reuseport_has_conns(sk, true);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 	inet->inet_id = jiffies;
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(str
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
+			if (sk->sk_reuseport &&
+			    sk->sk_state != TCP_ESTABLISHED) {
 				hash = udp_ehashfn(net, daddr, hnum,
 						   saddr, sport);
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
-				if (result)
+				if (result && !reuseport_has_conns(sk, false))
 					return result;
 			}
 			badness = score;
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -27,6 +27,7 @@
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
 #include <net/dsfield.h>
+#include <net/sock_reuseport.h>
 
 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
@@ -254,6 +255,7 @@ ipv4_connected:
 		goto out;
 	}
 
+	reuseport_has_conns(sk, true);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 out:
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(str
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
+			if (sk->sk_reuseport &&
+			    sk->sk_state != TCP_ESTABLISHED) {
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
 
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
-				if (result)
+				if (result && !reuseport_has_conns(sk, false))
 					return result;
 			}
 			result = sk;



  parent reply	other threads:[~2019-09-19 22:06 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-19 22:03 [PATCH 5.3 00/21] 5.3.1-stable review Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 01/21] USB: usbcore: Fix slab-out-of-bounds bug during device reset Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 02/21] media: tm6000: double free if usb disconnect while streaming Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 03/21] phy: renesas: rcar-gen3-usb2: Disable clearing VBUS in over-current Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 04/21] ip6_gre: fix a dst leak in ip6erspan_tunnel_xmit Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 05/21] net/sched: fix race between deactivation and dequeue for NOLOCK qdisc Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 06/21] net_sched: let qdisc_put() accept NULL pointer Greg Kroah-Hartman
2019-09-19 22:03 ` Greg Kroah-Hartman [this message]
2019-09-19 22:03 ` [PATCH 5.3 08/21] xen-netfront: do not assume sk_buff_head list is empty in error handling Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 09/21] net: dsa: Fix load order between DSA drivers and taggers Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 10/21] net: stmmac: Hold rtnl lock in suspend/resume callbacks Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 11/21] KVM: coalesced_mmio: add bounds checking Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 12/21] Documentation: sphinx: Add missing comma to list of strings Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 13/21] firmware: google: check if size is valid when decoding VPD data Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 14/21] serial: sprd: correct the wrong sequence of arguments Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 15/21] tty/serial: atmel: reschedule TX after RX was started Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 16/21] nl80211: Fix possible Spectre-v1 for CQM RSSI thresholds Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 17/21] Revert "arm64: Remove unnecessary ISBs from set_{pte,pmd,pud}" Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 18/21] ovl: fix regression caused by overlapping layers detection Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 19/21] phy: qcom-qmp: Correct ready status, again Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 20/21] floppy: fix usercopy direction Greg Kroah-Hartman
2019-09-19 22:03 ` [PATCH 5.3 21/21] media: technisat-usb2: break out of loop at end of buffer Greg Kroah-Hartman
2019-09-20 13:45 ` [PATCH 5.3 00/21] 5.3.1-stable review Guenter Roeck
2019-09-20 13:54 ` Jon Hunter
2019-09-20 14:24   ` Greg Kroah-Hartman
2019-09-20 16:01     ` Jon Hunter
2019-09-22  8:13       ` Greg Kroah-Hartman
2019-09-20 14:41 ` Naresh Kamboju
2019-09-21  5:06   ` Greg Kroah-Hartman
2019-09-20 21:17 ` shuah
2019-09-21  5:04   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190919214702.093524364@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=kraig@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=stable@vger.kernel.org \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).