All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net 0/2] ipv4: avoid pathological hash tables
@ 2022-01-18 20:46 Eric Dumazet
  2022-01-18 20:46 ` [PATCH net 1/2] ipv4: avoid quadratic behavior in netns dismantle Eric Dumazet
  2022-01-18 20:46 ` [PATCH net 2/2] ipv4: add net_hash_mix() dispersion to fib_info_laddrhash keys Eric Dumazet
  0 siblings, 2 replies; 6+ messages in thread
From: Eric Dumazet @ 2022-01-18 20:46 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski
  Cc: David Ahern, netdev, Eric Dumazet, Eric Dumazet

From: Eric Dumazet <edumazet@google.com>

This series speeds up netns dismantles on hosts
having many active netns, by making sure two hash tables
used for IPV4 fib contains uniformly spread items.

Eric Dumazet (2):
  ipv4: avoid quadratic behavior in netns dismantle
  ipv4: add net_hash_mix() dispersion to fib_info_laddrhash keys

 net/ipv4/fib_semantics.c | 56 +++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 30 deletions(-)

-- 
2.34.1.703.g22d0c6ccf7-goog


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH net 1/2] ipv4: avoid quadratic behavior in netns dismantle
  2022-01-18 20:46 [PATCH net 0/2] ipv4: avoid pathological hash tables Eric Dumazet
@ 2022-01-18 20:46 ` Eric Dumazet
  2022-01-19  2:40   ` David Ahern
  2022-01-18 20:46 ` [PATCH net 2/2] ipv4: add net_hash_mix() dispersion to fib_info_laddrhash keys Eric Dumazet
  1 sibling, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2022-01-18 20:46 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski
  Cc: David Ahern, netdev, Eric Dumazet, Eric Dumazet

From: Eric Dumazet <edumazet@google.com>

net/ipv4/fib_semantics.c uses an hash table of 256 slots,
keyed by device ifindexes: fib_info_devhash[DEVINDEX_HASHSIZE]

Problem is that with network namespaces, devices tend
to use the same ifindex.

lo device for instance has a fixed ifindex of one,
for all network namespaces.

This means that hosts with thousands of netns spend
a lot of time looking at some hash buckets with thousands
of elements, notably at netns dismantle.

Simply add a per netns perturbation (net_hash_mix())
to spread elements more uniformely.

Also change fib_devindex_hashfn() to use more entropy.

Fixes: aa79e66eee5d ("net: Make ifindex generation per-net namespace")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/fib_semantics.c | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 45619c005b8dddd7ccd5c7029efa4ed69b6ce1de..9813949da10493de36b9db797b6a5d94fd9bd3b1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/netlink.h>
+#include <linux/hash.h>
 
 #include <net/arp.h>
 #include <net/ip.h>
@@ -319,11 +320,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
 
 static inline unsigned int fib_devindex_hashfn(unsigned int val)
 {
-	unsigned int mask = DEVINDEX_HASHSIZE - 1;
+	return hash_32(val, DEVINDEX_HASHBITS);
+}
 
-	return (val ^
-		(val >> DEVINDEX_HASHBITS) ^
-		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
+static struct hlist_head *
+fib_info_devhash_bucket(const struct net_device *dev)
+{
+	u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
+
+	return &fib_info_devhash[fib_devindex_hashfn(val)];
 }
 
 static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -433,12 +438,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
 {
 	struct hlist_head *head;
 	struct fib_nh *nh;
-	unsigned int hash;
 
 	spin_lock(&fib_info_lock);
 
-	hash = fib_devindex_hashfn(dev->ifindex);
-	head = &fib_info_devhash[hash];
+	head = fib_info_devhash_bucket(dev);
+
 	hlist_for_each_entry(nh, head, nh_hash) {
 		if (nh->fib_nh_dev == dev &&
 		    nh->fib_nh_gw4 == gw &&
@@ -1609,12 +1613,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 	} else {
 		change_nexthops(fi) {
 			struct hlist_head *head;
-			unsigned int hash;
 
 			if (!nexthop_nh->fib_nh_dev)
 				continue;
-			hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
-			head = &fib_info_devhash[hash];
+			head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
 			hlist_add_head(&nexthop_nh->nh_hash, head);
 		} endfor_nexthops(fi)
 	}
@@ -1966,8 +1968,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
 
 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
 {
-	unsigned int hash = fib_devindex_hashfn(dev->ifindex);
-	struct hlist_head *head = &fib_info_devhash[hash];
+	struct hlist_head *head = fib_info_devhash_bucket(dev);
 	struct fib_nh *nh;
 
 	hlist_for_each_entry(nh, head, nh_hash) {
@@ -1986,12 +1987,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
  */
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
 {
-	int ret = 0;
-	int scope = RT_SCOPE_NOWHERE;
+	struct hlist_head *head = fib_info_devhash_bucket(dev);
 	struct fib_info *prev_fi = NULL;
-	unsigned int hash = fib_devindex_hashfn(dev->ifindex);
-	struct hlist_head *head = &fib_info_devhash[hash];
+	int scope = RT_SCOPE_NOWHERE;
 	struct fib_nh *nh;
+	int ret = 0;
 
 	if (force)
 		scope = -1;
@@ -2136,7 +2136,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
 {
 	struct fib_info *prev_fi;
-	unsigned int hash;
 	struct hlist_head *head;
 	struct fib_nh *nh;
 	int ret;
@@ -2152,8 +2151,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
 	}
 
 	prev_fi = NULL;
-	hash = fib_devindex_hashfn(dev->ifindex);
-	head = &fib_info_devhash[hash];
+	head = fib_info_devhash_bucket(dev);
 	ret = 0;
 
 	hlist_for_each_entry(nh, head, nh_hash) {
-- 
2.34.1.703.g22d0c6ccf7-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH net 2/2] ipv4: add net_hash_mix() dispersion to fib_info_laddrhash keys
  2022-01-18 20:46 [PATCH net 0/2] ipv4: avoid pathological hash tables Eric Dumazet
  2022-01-18 20:46 ` [PATCH net 1/2] ipv4: avoid quadratic behavior in netns dismantle Eric Dumazet
@ 2022-01-18 20:46 ` Eric Dumazet
  2022-01-19  2:42   ` David Ahern
  1 sibling, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2022-01-18 20:46 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski
  Cc: David Ahern, netdev, Eric Dumazet, Eric Dumazet

From: Eric Dumazet <edumazet@google.com>

net/ipv4/fib_semantics.c uses a hash table (fib_info_laddrhash)
in which fib_sync_down_addr() can locate fib_info
based on IPv4 local address.

This hash table is resized based on total number of
hashed fib_info, but the hash function is only
using the local address.

For hosts having many active network namespaces,
all fib_info for loopback devices (IPv4 address 127.0.0.1)
are hashed into a single bucket, making netns dismantles
very slow.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/fib_semantics.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 9813949da10493de36b9db797b6a5d94fd9bd3b1..7971889fc0fe3690e47931c39e6a8f8e0fb1d31f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(fib_info_lock);
 static struct hlist_head *fib_info_hash;
 static struct hlist_head *fib_info_laddrhash;
 static unsigned int fib_info_hash_size;
+static unsigned int fib_info_hash_bits;
 static unsigned int fib_info_cnt;
 
 #define DEVINDEX_HASHBITS 8
@@ -1247,13 +1248,9 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
 	return err;
 }
 
-static inline unsigned int fib_laddr_hashfn(__be32 val)
+static inline unsigned int fib_laddr_hashfn(const struct net *net, __be32 val)
 {
-	unsigned int mask = (fib_info_hash_size - 1);
-
-	return ((__force u32)val ^
-		((__force u32)val >> 7) ^
-		((__force u32)val >> 14)) & mask;
+	return hash_32(net_hash_mix(net) ^ (__force u32)val, fib_info_hash_bits);
 }
 
 static struct hlist_head *fib_info_hash_alloc(int bytes)
@@ -1289,6 +1286,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 	old_info_hash = fib_info_hash;
 	old_laddrhash = fib_info_laddrhash;
 	fib_info_hash_size = new_size;
+	fib_info_hash_bits = ilog2(new_size);
 
 	for (i = 0; i < old_size; i++) {
 		struct hlist_head *head = &fib_info_hash[i];
@@ -1315,7 +1313,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
 			struct hlist_head *ldest;
 			unsigned int new_hash;
 
-			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
+			new_hash = fib_laddr_hashfn(fi->fib_net, fi->fib_prefsrc);
 			ldest = &new_laddrhash[new_hash];
 			hlist_add_head(&fi->fib_lhash, ldest);
 		}
@@ -1605,7 +1603,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 	if (fi->fib_prefsrc) {
 		struct hlist_head *head;
 
-		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
+		head = &fib_info_laddrhash[fib_laddr_hashfn(net, fi->fib_prefsrc)];
 		hlist_add_head(&fi->fib_lhash, head);
 	}
 	if (fi->nh) {
@@ -1877,16 +1875,16 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
  */
 int fib_sync_down_addr(struct net_device *dev, __be32 local)
 {
-	int ret = 0;
-	unsigned int hash = fib_laddr_hashfn(local);
-	struct hlist_head *head = &fib_info_laddrhash[hash];
 	int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
 	struct net *net = dev_net(dev);
+	struct hlist_head *head;
 	struct fib_info *fi;
+	int ret = 0;
 
 	if (!fib_info_laddrhash || local == 0)
 		return 0;
 
+	head = &fib_info_laddrhash[fib_laddr_hashfn(net, local)];
 	hlist_for_each_entry(fi, head, fib_lhash) {
 		if (!net_eq(fi->fib_net, net) ||
 		    fi->fib_tb_id != tb_id)
-- 
2.34.1.703.g22d0c6ccf7-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net 1/2] ipv4: avoid quadratic behavior in netns dismantle
  2022-01-18 20:46 ` [PATCH net 1/2] ipv4: avoid quadratic behavior in netns dismantle Eric Dumazet
@ 2022-01-19  2:40   ` David Ahern
  0 siblings, 0 replies; 6+ messages in thread
From: David Ahern @ 2022-01-19  2:40 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller, Jakub Kicinski
  Cc: David Ahern, netdev, Eric Dumazet

On 1/18/22 1:46 PM, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> net/ipv4/fib_semantics.c uses an hash table of 256 slots,
> keyed by device ifindexes: fib_info_devhash[DEVINDEX_HASHSIZE]
> 
> Problem is that with network namespaces, devices tend
> to use the same ifindex.
> 
> lo device for instance has a fixed ifindex of one,
> for all network namespaces.
> 
> This means that hosts with thousands of netns spend
> a lot of time looking at some hash buckets with thousands
> of elements, notably at netns dismantle.
> 
> Simply add a per netns perturbation (net_hash_mix())
> to spread elements more uniformely.
> 
> Also change fib_devindex_hashfn() to use more entropy.
> 
> Fixes: aa79e66eee5d ("net: Make ifindex generation per-net namespace")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/ipv4/fib_semantics.c | 36 +++++++++++++++++-------------------
>  1 file changed, 17 insertions(+), 19 deletions(-)
> 

Reviewed-by: David Ahern <dsahern@kernel.org>



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net 2/2] ipv4: add net_hash_mix() dispersion to fib_info_laddrhash keys
  2022-01-18 20:46 ` [PATCH net 2/2] ipv4: add net_hash_mix() dispersion to fib_info_laddrhash keys Eric Dumazet
@ 2022-01-19  2:42   ` David Ahern
  2022-01-19  8:54     ` Eric Dumazet
  0 siblings, 1 reply; 6+ messages in thread
From: David Ahern @ 2022-01-19  2:42 UTC (permalink / raw)
  To: Eric Dumazet, David S . Miller, Jakub Kicinski
  Cc: David Ahern, netdev, Eric Dumazet

On 1/18/22 1:46 PM, Eric Dumazet wrote:
> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
> index 9813949da10493de36b9db797b6a5d94fd9bd3b1..7971889fc0fe3690e47931c39e6a8f8e0fb1d31f 100644
> --- a/net/ipv4/fib_semantics.c
> +++ b/net/ipv4/fib_semantics.c
> @@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(fib_info_lock);
>  static struct hlist_head *fib_info_hash;
>  static struct hlist_head *fib_info_laddrhash;
>  static unsigned int fib_info_hash_size;
> +static unsigned int fib_info_hash_bits;
>  static unsigned int fib_info_cnt;
>  
>  #define DEVINDEX_HASHBITS 8
> @@ -1247,13 +1248,9 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
>  	return err;
>  }
>  
> -static inline unsigned int fib_laddr_hashfn(__be32 val)
> +static inline unsigned int fib_laddr_hashfn(const struct net *net, __be32 val)
>  {
> -	unsigned int mask = (fib_info_hash_size - 1);
> -
> -	return ((__force u32)val ^
> -		((__force u32)val >> 7) ^
> -		((__force u32)val >> 14)) & mask;
> +	return hash_32(net_hash_mix(net) ^ (__force u32)val, fib_info_hash_bits);
>  }
>  
>  static struct hlist_head *fib_info_hash_alloc(int bytes)

for consistency, make this hashfn and bucket lookup similar to
fib_devindex_hashfn and fib_info_devhash_bucket.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net 2/2] ipv4: add net_hash_mix() dispersion to fib_info_laddrhash keys
  2022-01-19  2:42   ` David Ahern
@ 2022-01-19  8:54     ` Eric Dumazet
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Dumazet @ 2022-01-19  8:54 UTC (permalink / raw)
  To: David Ahern
  Cc: Eric Dumazet, David S . Miller, Jakub Kicinski, David Ahern, netdev

On Tue, Jan 18, 2022 at 6:42 PM David Ahern <dsahern@gmail.com> wrote:
>
>
> for consistency, make this hashfn and bucket lookup similar to
> fib_devindex_hashfn and fib_info_devhash_bucket.
>
>

Ack, thanks for the suggestion.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-01-19  8:55 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-18 20:46 [PATCH net 0/2] ipv4: avoid pathological hash tables Eric Dumazet
2022-01-18 20:46 ` [PATCH net 1/2] ipv4: avoid quadratic behavior in netns dismantle Eric Dumazet
2022-01-19  2:40   ` David Ahern
2022-01-18 20:46 ` [PATCH net 2/2] ipv4: add net_hash_mix() dispersion to fib_info_laddrhash keys Eric Dumazet
2022-01-19  2:42   ` David Ahern
2022-01-19  8:54     ` Eric Dumazet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.