All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] net/ipv6: Add anycast addresses to a global hashtable
@ 2018-10-23  2:12 Jeff Barnhill
  2018-10-23  2:26 ` Eric Dumazet
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-10-23  2:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill

icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device.  This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().

Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
---
 include/net/addrconf.h |   2 +
 include/net/if_inet6.h |   8 ++++
 net/ipv6/af_inet6.c    |   5 ++
 net/ipv6/anycast.c     | 122 ++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6def0351bcc3..0cee3f99c41d 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 			     const struct in6_addr *addr);
+int anycast_init(void);
+void anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..ac02b2cf2ba1 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
 	struct ipv6_ac_socklist *acl_next;
 };
 
+struct ipv6_ac_addrlist {
+	struct in6_addr		acal_addr;
+	int			acal_ifindex; /* net */
+	int			acal_users;
+	struct hlist_node	acal_lst; /* inet6_acaddr_lst */
+	struct rcu_head		rcu;
+};
+
 struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct fib6_info	*aca_rt;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9a4261e50272..971a05fdd3bd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
+	err = anycast_init();
+	if (err)
+		goto anycast_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
+	anycast_cleanup();
+anycast_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
 	ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..58d31e0980aa 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
+/*	anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *	socket join an anycast group
  */
@@ -204,6 +218,73 @@ void ipv6_sock_ac_close(struct sock *sk)
 	rtnl_unlock();
 }
 
+static struct ipv6_ac_addrlist *acal_alloc(int ifindex,
+					   const struct in6_addr *addr)
+{
+	struct ipv6_ac_addrlist *acal;
+
+	acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
+	if (!acal)
+		return NULL;
+
+	acal->acal_addr = *addr;
+	acal->acal_ifindex = ifindex;
+	acal->acal_users = 1;
+	INIT_HLIST_NODE(&acal->acal_lst);
+
+	return acal;
+}
+
+static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+	int err = 0;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (acal->acal_ifindex != net->ifindex)
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			acal->acal_users++;
+			goto out;
+		}
+	}
+
+	acal = acal_alloc(net->ifindex, addr);
+	if (!acal) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
+
+out:
+	spin_unlock(&acaddr_hash_lock);
+	return err;
+}
+
+static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (acal->acal_ifindex != net->ifindex)
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			if (--acal->acal_users < 1) {
+				hlist_del_init_rcu(&acal->acal_lst);
+				kfree_rcu(acal, rcu);
+			}
+			spin_unlock(&acaddr_hash_lock);
+			return;
+		}
+	}
+	spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
 	refcount_inc(&aca->aca_refcnt);
@@ -275,6 +356,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 		err = -ENOMEM;
 		goto out;
 	}
+	err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
+	if (err) {
+		fib6_info_release(f6i);
+		fib6_info_release(f6i);
+		kfree(aca);
+		goto out;
+	}
 
 	aca->aca_next = idev->ac_list;
 	idev->ac_list = aca;
@@ -324,6 +412,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 		prev_aca->aca_next = aca->aca_next;
 	else
 		idev->ac_list = aca->aca_next;
+	ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
@@ -350,6 +439,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 	write_lock_bh(&idev->lock);
 	while ((aca = idev->ac_list) != NULL) {
 		idev->ac_list = aca->aca_next;
+		ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
+
 		write_unlock_bh(&idev->lock);
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
@@ -391,16 +482,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr)
 {
 	bool found = false;
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
 
 	rcu_read_lock();
 	if (dev)
 		found = ipv6_chk_acast_dev(dev, addr);
 	else
-		for_each_netdev_rcu(net, dev)
-			if (ipv6_chk_acast_dev(dev, addr)) {
+		hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
+					 acal_lst) {
+			if (acal->acal_ifindex != net->ifindex)
+				continue;
+			if (ipv6_addr_equal(&acal->acal_addr, addr)) {
 				found = true;
 				break;
 			}
+		}
 	rcu_read_unlock();
 	return found;
 }
@@ -539,4 +636,25 @@ void ac6_proc_exit(struct net *net)
 {
 	remove_proc_entry("anycast6", net->proc_net);
 }
+
+/*	Init / cleanup code
+ */
+int __init anycast_init(void)
+{
+	int i;
+
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+	return 0;
+}
+
+void anycast_cleanup(void)
+{
+	int i;
+
+	spin_lock(&acaddr_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+	spin_unlock(&acaddr_hash_lock);
+}
 #endif
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2018-11-03 16:05 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-23  2:12 [PATCH net] net/ipv6: Add anycast addresses to a global hashtable Jeff Barnhill
2018-10-23  2:26 ` Eric Dumazet
2018-10-23 18:21   ` Jeff Barnhill
2018-10-24  1:58     ` [PATCH net v2] " Jeff Barnhill
2018-10-24  3:12       ` Eric Dumazet
2018-10-24  5:06         ` Jeff Barnhill
2018-10-26 21:22           ` [PATCH net v3] " Jeff Barnhill
2018-10-26 21:44             ` David Ahern
2018-10-27 18:02               ` [PATCH net v4] " Jeff Barnhill
2018-10-27 23:39                 ` David Ahern
2018-10-28  1:27                   ` Jeff Barnhill
2018-10-28  1:51                     ` [PATCH net v5] " Jeff Barnhill
2018-10-30  3:32                       ` David Miller
2018-10-30 11:10                         ` Jeff Barnhill
2018-10-30 18:31                           ` David Miller
2018-10-30 22:06                             ` David Ahern
2018-10-30 23:19                               ` David Miller
2018-11-01  0:02                                 ` Jeff Barnhill
2018-11-01  0:14                                   ` [PATCH net v6] " Jeff Barnhill
2018-11-01  5:34                                     ` Stephen Hemminger
2018-11-02 20:23                                       ` [PATCH net v7] " Jeff Barnhill
2018-11-03  6:55                                         ` David Miller
2018-11-01  2:53                                   ` [PATCH net v5] " David Ahern

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.