All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4.14.y 0/2] netfilter: nat: fix soft lockup when most ports are used
@ 2022-02-03 12:41 Florian Westphal
  2022-02-03 12:41 ` [PATCH 4.14.y 1/2] netfilter: nat: remove l4 protocol port rovers Florian Westphal
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Florian Westphal @ 2022-02-03 12:41 UTC (permalink / raw)
  To: stable; +Cc: netfilter-devel, Florian Westphal

First patch removes old rovers, they are inherently racy
and cause packet drops/delays.

Second patch avoids iterating entire range of ports: It takes way
too long when most tuples are in use.

First patch is slightly mangled; nf_nat_proto_common.c needs minor adjustment
in context.

Florian Westphal (2):
  netfilter: nat: remove l4 protocol port rovers
  netfilter: nat: limit port clash resolution attempts

 include/net/netfilter/nf_nat_l4proto.h |  2 +-
 net/netfilter/nf_nat_proto_common.c    | 36 ++++++++++++++++++--------
 net/netfilter/nf_nat_proto_dccp.c      |  5 +---
 net/netfilter/nf_nat_proto_sctp.c      |  5 +---
 net/netfilter/nf_nat_proto_tcp.c       |  5 +---
 net/netfilter/nf_nat_proto_udp.c       | 10 ++-----
 6 files changed, 31 insertions(+), 32 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 4.14.y 1/2] netfilter: nat: remove l4 protocol port rovers
  2022-02-03 12:41 [PATCH 4.14.y 0/2] netfilter: nat: fix soft lockup when most ports are used Florian Westphal
@ 2022-02-03 12:41 ` Florian Westphal
  2022-02-03 12:41 ` [PATCH 4.14.y 2/2] netfilter: nat: limit port clash resolution attempts Florian Westphal
  2022-02-03 17:44 ` [PATCH 4.14.y 0/2] netfilter: nat: fix soft lockup when most ports are used Greg KH
  2 siblings, 0 replies; 4+ messages in thread
From: Florian Westphal @ 2022-02-03 12:41 UTC (permalink / raw)
  To: stable; +Cc: netfilter-devel, Florian Westphal, Pablo Neira Ayuso

commit 6ed5943f8735e2b778d92ea4d9805c0a1d89bc2b upstream.

This is a leftover from days where single-cpu systems were common:
Store last port used to resolve a clash to use it as a starting point when
the next conflict needs to be resolved.

When we have parallel attempt to connect to same address:port pair,
its likely that both cores end up computing the same "available" port,
as both use same starting port, and newly used ports won't become
visible to other cores until the conntrack gets confirmed later.

One of the cores then has to drop the packet at insertion time because
the chosen new tuple turns out to be in use after all.

Lets simplify this: remove port rover and use a pseudo-random starting
point.

Note that this doesn't make netfilter default to 'fully random' mode;
the 'rover' was only used if NAT could not reuse source port as-is.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l4proto.h |  2 +-
 net/netfilter/nf_nat_proto_common.c    |  7 ++-----
 net/netfilter/nf_nat_proto_dccp.c      |  5 +----
 net/netfilter/nf_nat_proto_sctp.c      |  5 +----
 net/netfilter/nf_nat_proto_tcp.c       |  5 +----
 net/netfilter/nf_nat_proto_udp.c       | 10 ++--------
 6 files changed, 8 insertions(+), 26 deletions(-)

diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 67835ff8a2d9..103ecea6afdb 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -74,7 +74,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 				 struct nf_conntrack_tuple *tuple,
 				 const struct nf_nat_range *range,
 				 enum nf_nat_manip_type maniptype,
-				 const struct nf_conn *ct, u16 *rover);
+				 const struct nf_conn *ct);
 
 int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
 				   struct nf_nat_range *range);
diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
index 7d7466dbf663..ac57e47aded2 100644
--- a/net/netfilter/nf_nat_proto_common.c
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -38,8 +38,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 				 struct nf_conntrack_tuple *tuple,
 				 const struct nf_nat_range *range,
 				 enum nf_nat_manip_type maniptype,
-				 const struct nf_conn *ct,
-				 u16 *rover)
+				 const struct nf_conn *ct)
 {
 	unsigned int range_size, min, max, i;
 	__be16 *portptr;
@@ -84,15 +83,13 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 	} else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
 		off = prandom_u32();
 	} else {
-		off = *rover;
+		off = prandom_u32();
 	}
 
 	for (i = 0; ; ++off) {
 		*portptr = htons(min + off % range_size);
 		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
 			continue;
-		if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL))
-			*rover = off;
 		return;
 	}
 }
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index 269fcd5dc34c..04c671300a14 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -18,8 +18,6 @@
 #include <net/netfilter/nf_nat_l3proto.h>
 #include <net/netfilter/nf_nat_l4proto.h>
 
-static u_int16_t dccp_port_rover;
-
 static void
 dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		  struct nf_conntrack_tuple *tuple,
@@ -27,8 +25,7 @@ dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
-	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
-				    &dccp_port_rover);
+	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
 }
 
 static bool
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index c57ee3240b1d..7329c9b1dc1e 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -12,8 +12,6 @@
 
 #include <net/netfilter/nf_nat_l4proto.h>
 
-static u_int16_t nf_sctp_port_rover;
-
 static void
 sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		  struct nf_conntrack_tuple *tuple,
@@ -21,8 +19,7 @@ sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
-	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
-				    &nf_sctp_port_rover);
+	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
 }
 
 static bool
diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
index 4f8820fc5148..882e79c6df73 100644
--- a/net/netfilter/nf_nat_proto_tcp.c
+++ b/net/netfilter/nf_nat_proto_tcp.c
@@ -18,8 +18,6 @@
 #include <net/netfilter/nf_nat_l4proto.h>
 #include <net/netfilter/nf_nat_core.h>
 
-static u16 tcp_port_rover;
-
 static void
 tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		 struct nf_conntrack_tuple *tuple,
@@ -27,8 +25,7 @@ tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
-	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
-				    &tcp_port_rover);
+	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
 }
 
 static bool
diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
index 167ad0dd269c..f48bacd38d9d 100644
--- a/net/netfilter/nf_nat_proto_udp.c
+++ b/net/netfilter/nf_nat_proto_udp.c
@@ -17,8 +17,6 @@
 #include <net/netfilter/nf_nat_l3proto.h>
 #include <net/netfilter/nf_nat_l4proto.h>
 
-static u16 udp_port_rover;
-
 static void
 udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		 struct nf_conntrack_tuple *tuple,
@@ -26,8 +24,7 @@ udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
-	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
-				    &udp_port_rover);
+	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
 }
 
 static void
@@ -78,8 +75,6 @@ static bool udp_manip_pkt(struct sk_buff *skb,
 }
 
 #ifdef CONFIG_NF_NAT_PROTO_UDPLITE
-static u16 udplite_port_rover;
-
 static bool udplite_manip_pkt(struct sk_buff *skb,
 			      const struct nf_nat_l3proto *l3proto,
 			      unsigned int iphdroff, unsigned int hdroff,
@@ -103,8 +98,7 @@ udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		     enum nf_nat_manip_type maniptype,
 		     const struct nf_conn *ct)
 {
-	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
-				    &udplite_port_rover);
+	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
 }
 
 const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 4.14.y 2/2] netfilter: nat: limit port clash resolution attempts
  2022-02-03 12:41 [PATCH 4.14.y 0/2] netfilter: nat: fix soft lockup when most ports are used Florian Westphal
  2022-02-03 12:41 ` [PATCH 4.14.y 1/2] netfilter: nat: remove l4 protocol port rovers Florian Westphal
@ 2022-02-03 12:41 ` Florian Westphal
  2022-02-03 17:44 ` [PATCH 4.14.y 0/2] netfilter: nat: fix soft lockup when most ports are used Greg KH
  2 siblings, 0 replies; 4+ messages in thread
From: Florian Westphal @ 2022-02-03 12:41 UTC (permalink / raw)
  To: stable
  Cc: netfilter-devel, Florian Westphal, Pablo Neira Ayuso, Vimal Agrawal

commit a504b703bb1da526a01593da0e4be2af9d9f5fa8 upstream.

In case almost or all available ports are taken, clash resolution can
take a very long time, resulting in soft lockup.

This can happen when many to-be-natted hosts connect to same
destination:port (e.g. a proxy) and all connections pass the same SNAT.

Pick a random offset in the acceptable range, then try ever smaller
number of adjacent port numbers, until either the limit is reached or a
useable port was found.  This results in at most 248 attempts
(128 + 64 + 32 + 16 + 8, i.e. 4 restarts with new search offset)
instead of 64000+,

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Vimal Agrawal <vimal.agrawal@sophos.com>
---
 net/netfilter/nf_nat_proto_common.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
index ac57e47aded2..a4f709a3cbac 100644
--- a/net/netfilter/nf_nat_proto_common.c
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -40,9 +40,10 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 				 enum nf_nat_manip_type maniptype,
 				 const struct nf_conn *ct)
 {
-	unsigned int range_size, min, max, i;
+	unsigned int range_size, min, max, i, attempts;
 	__be16 *portptr;
-	u_int16_t off;
+	u16 off;
+	static const unsigned int max_attempts = 128;
 
 	if (maniptype == NF_NAT_MANIP_SRC)
 		portptr = &tuple->src.u.all;
@@ -86,12 +87,28 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
 		off = prandom_u32();
 	}
 
-	for (i = 0; ; ++off) {
+	attempts = range_size;
+	if (attempts > max_attempts)
+		attempts = max_attempts;
+
+	/* We are in softirq; doing a search of the entire range risks
+	 * soft lockup when all tuples are already used.
+	 *
+	 * If we can't find any free port from first offset, pick a new
+	 * one and try again, with ever smaller search window.
+	 */
+another_round:
+	for (i = 0; i < attempts; i++, off++) {
 		*portptr = htons(min + off % range_size);
-		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
-			continue;
-		return;
+		if (!nf_nat_used_tuple(tuple, ct))
+			return;
 	}
+
+	if (attempts >= range_size || attempts < 16)
+		return;
+	attempts /= 2;
+	off = prandom_u32();
+	goto another_round;
 }
 EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 4.14.y 0/2] netfilter: nat: fix soft lockup when most ports are used
  2022-02-03 12:41 [PATCH 4.14.y 0/2] netfilter: nat: fix soft lockup when most ports are used Florian Westphal
  2022-02-03 12:41 ` [PATCH 4.14.y 1/2] netfilter: nat: remove l4 protocol port rovers Florian Westphal
  2022-02-03 12:41 ` [PATCH 4.14.y 2/2] netfilter: nat: limit port clash resolution attempts Florian Westphal
@ 2022-02-03 17:44 ` Greg KH
  2 siblings, 0 replies; 4+ messages in thread
From: Greg KH @ 2022-02-03 17:44 UTC (permalink / raw)
  To: Florian Westphal; +Cc: stable, netfilter-devel

On Thu, Feb 03, 2022 at 01:41:53PM +0100, Florian Westphal wrote:
> First patch removes old rovers, they are inherently racy
> and cause packet drops/delays.
> 
> Second patch avoids iterating entire range of ports: It takes way
> too long when most tuples are in use.
> 
> First patch is slightly mangled; nf_nat_proto_common.c needs minor adjustment
> in context.

Both sets of backports now queued up, thanks.

greg k-h

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-02-03 17:44 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-03 12:41 [PATCH 4.14.y 0/2] netfilter: nat: fix soft lockup when most ports are used Florian Westphal
2022-02-03 12:41 ` [PATCH 4.14.y 1/2] netfilter: nat: remove l4 protocol port rovers Florian Westphal
2022-02-03 12:41 ` [PATCH 4.14.y 2/2] netfilter: nat: limit port clash resolution attempts Florian Westphal
2022-02-03 17:44 ` [PATCH 4.14.y 0/2] netfilter: nat: fix soft lockup when most ports are used Greg KH

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.