All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jon Maxwell <jmaxwell37@gmail.com>
To: davem@davemloft.net
Cc: edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
	yoshfuji@linux-ipv6.org, dsahern@kernel.org,
	martin.lau@kernel.org, joel@joelfernandes.org,
	paulmck@kernel.org, eyal.birger@gmail.com,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Jon Maxwell <jmaxwell37@gmail.com>,
	Andrea Mayer <andrea.mayer@uniroma2.it>
Subject: [net-next v2] ipv6: remove max_size check inline with ipv4
Date: Thu, 12 Jan 2023 12:25:32 +1100	[thread overview]
Message-ID: <20230112012532.311021-1-jmaxwell37@gmail.com> (raw)

v2: Correct syntax error in net/ipv6/route.c

In ip6_dst_gc() replace: 

if (entries > gc_thresh)

With:

if (entries > ops->gc_thresh)

Sending Ipv6 packets in a loop via a raw socket triggers an issue where a 
route is cloned by ip6_rt_cache_alloc() for each packet sent. This quickly 
consumes the Ipv6 max_size threshold which defaults to 4096 resulting in 
these warnings:

[1]   99.187805] dst_alloc: 7728 callbacks suppressed
[2] Route cache is full: consider increasing sysctl net.ipv6.route.max_size.
.
.
[300] Route cache is full: consider increasing sysctl net.ipv6.route.max_size.

When this happens the packet is dropped and sendto() gets a network is 
unreachable error:

# ./a.out -s 

remaining pkt 200557 errno 101
remaining pkt 196462 errno 101
.
.
remaining pkt 126821 errno 101

Implement David Aherns suggestion to remove max_size check seeing that Ipv6 
has a GC to manage memory usage. Ipv4 already does not check max_size.

Here are some memory comparisons for Ipv4 vs Ipv6 with the patch:

Test by running 5 instances of a program that sends UDP packets to a raw 
socket 5000000 times. Compare Ipv4 and Ipv6 performance with a similar 
program.

Ipv4: 

Before test:

# grep -e Slab -e Free /proc/meminfo
MemFree:        29427108 kB
Slab:             237612 kB

# grep dst_cache /proc/slabinfo
ip6_dst_cache       1912   2528    256   32    2 : tunables    0    0    0 
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0 
ip_dst_cache        2881   3990    192   42    2 : tunables    0    0    0 

During test:

# grep -e Slab -e Free /proc/meminfo
MemFree:        29417608 kB
Slab:             247712 kB

# grep dst_cache /proc/slabinfo
ip6_dst_cache       1912   2528    256   32    2 : tunables    0    0    0 
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0 
ip_dst_cache       44394  44394    192   42    2 : tunables    0    0    0 

After test:

# grep -e Slab -e Free /proc/meminfo
MemFree:        29422308 kB
Slab:             238104 kB

# grep dst_cache /proc/slabinfo
ip6_dst_cache       1912   2528    256   32    2 : tunables    0    0    0 
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0 
ip_dst_cache        3048   4116    192   42    2 : tunables    0    0    0 

Ipv6 with patch:

Errno 101 errors are not observed anymore with the patch.

Before test:

# grep -e Slab -e Free /proc/meminfo
MemFree:        29422308 kB
Slab:             238104 kB

# grep dst_cache /proc/slabinfo
ip6_dst_cache       1912   2528    256   32    2 : tunables    0    0    0 
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0 
ip_dst_cache        3048   4116    192   42    2 : tunables    0    0    0 

During Test:

# grep -e Slab -e Free /proc/meminfo
MemFree:        29431516 kB
Slab:             240940 kB

# grep dst_cache /proc/slabinfo
ip6_dst_cache      11980  12064    256   32    2 : tunables    0    0    0
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0
ip_dst_cache        3048   4116    192   42    2 : tunables    0    0    0

After Test:

# grep -e Slab -e Free /proc/meminfo
MemFree:        29441816 kB
Slab:             238132 kB

# grep dst_cache /proc/slabinfo
ip6_dst_cache       1902   2432    256   32    2 : tunables    0    0    0
xfrm_dst_cache         0      0    320   25    2 : tunables    0    0    0
ip_dst_cache        3048   4116    192   42    2 : tunables    0    0    0

Tested-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Jon Maxwell <jmaxwell37@gmail.com>
---
 include/net/dst_ops.h |  2 +-
 net/core/dst.c        |  8 ++------
 net/ipv6/route.c      | 13 +++++--------
 3 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 88ff7bb2bb9b..632086b2f644 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -16,7 +16,7 @@ struct dst_ops {
 	unsigned short		family;
 	unsigned int		gc_thresh;
 
-	int			(*gc)(struct dst_ops *ops);
+	void			(*gc)(struct dst_ops *ops);
 	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
 	unsigned int		(*default_advmss)(const struct dst_entry *);
 	unsigned int		(*mtu)(const struct dst_entry *);
diff --git a/net/core/dst.c b/net/core/dst.c
index 6d2dd03dafa8..31c08a3386d3 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -82,12 +82,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 
 	if (ops->gc &&
 	    !(flags & DST_NOCOUNT) &&
-	    dst_entries_get_fast(ops) > ops->gc_thresh) {
-		if (ops->gc(ops)) {
-			pr_notice_ratelimited("Route cache is full: consider increasing sysctl net.ipv6.route.max_size.\n");
-			return NULL;
-		}
-	}
+	    dst_entries_get_fast(ops) > ops->gc_thresh)
+		ops->gc(ops);
 
 	dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
 	if (!dst)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e74e0361fd92..b643dda68d31 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -91,7 +91,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 static void		ip6_dst_destroy(struct dst_entry *);
 static void		ip6_dst_ifdown(struct dst_entry *,
 				       struct net_device *dev, int how);
-static int		 ip6_dst_gc(struct dst_ops *ops);
+static void		 ip6_dst_gc(struct dst_ops *ops);
 
 static int		ip6_pkt_discard(struct sk_buff *skb);
 static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -3284,11 +3284,10 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	return dst;
 }
 
-static int ip6_dst_gc(struct dst_ops *ops)
+static void ip6_dst_gc(struct dst_ops *ops)
 {
 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
-	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
@@ -3296,11 +3295,10 @@ static int ip6_dst_gc(struct dst_ops *ops)
 	int entries;
 
 	entries = dst_entries_get_fast(ops);
-	if (entries > rt_max_size)
+	if (entries > ops->gc_thresh)
 		entries = dst_entries_get_slow(ops);
 
-	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
-	    entries <= rt_max_size)
+	if (time_after(rt_last_gc + rt_min_interval, jiffies))
 		goto out;
 
 	fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
@@ -3310,7 +3308,6 @@ static int ip6_dst_gc(struct dst_ops *ops)
 out:
 	val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
 	atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
-	return entries > rt_max_size;
 }
 
 static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
@@ -6512,7 +6509,7 @@ static int __net_init ip6_route_net_init(struct net *net)
 #endif
 
 	net->ipv6.sysctl.flush_delay = 0;
-	net->ipv6.sysctl.ip6_rt_max_size = 4096;
+	net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
-- 
2.31.1


             reply	other threads:[~2023-01-12  1:26 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-12  1:25 Jon Maxwell [this message]
2023-01-12 20:41 ` [net-next v2] ipv6: remove max_size check inline with ipv4 Andrea Mayer
2023-01-12 21:48   ` Jonathan Maxwell
2023-01-12 22:26     ` Andrea Mayer
2023-01-13  4:03 ` David Ahern
2023-01-14  5:40 ` patchwork-bot+netdevbpf
2023-01-14 14:05 kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230112012532.311021-1-jmaxwell37@gmail.com \
    --to=jmaxwell37@gmail.com \
    --cc=andrea.mayer@uniroma2.it \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=eyal.birger@gmail.com \
    --cc=joel@joelfernandes.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=martin.lau@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=paulmck@kernel.org \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.