All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] net/ipv6: Add anycast addresses to a global hashtable
@ 2018-10-23  2:12 Jeff Barnhill
  2018-10-23  2:26 ` Eric Dumazet
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-10-23  2:12 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill

icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device.  This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().

Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
---
 include/net/addrconf.h |   2 +
 include/net/if_inet6.h |   8 ++++
 net/ipv6/af_inet6.c    |   5 ++
 net/ipv6/anycast.c     | 122 ++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6def0351bcc3..0cee3f99c41d 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 			     const struct in6_addr *addr);
+int anycast_init(void);
+void anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..ac02b2cf2ba1 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
 	struct ipv6_ac_socklist *acl_next;
 };
 
+struct ipv6_ac_addrlist {
+	struct in6_addr		acal_addr;
+	int			acal_ifindex; /* net */
+	int			acal_users;
+	struct hlist_node	acal_lst; /* inet6_acaddr_lst */
+	struct rcu_head		rcu;
+};
+
 struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct fib6_info	*aca_rt;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9a4261e50272..971a05fdd3bd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
+	err = anycast_init();
+	if (err)
+		goto anycast_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
+	anycast_cleanup();
+anycast_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
 	ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..58d31e0980aa 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
+/*	anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *	socket join an anycast group
  */
@@ -204,6 +218,73 @@ void ipv6_sock_ac_close(struct sock *sk)
 	rtnl_unlock();
 }
 
+static struct ipv6_ac_addrlist *acal_alloc(int ifindex,
+					   const struct in6_addr *addr)
+{
+	struct ipv6_ac_addrlist *acal;
+
+	acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
+	if (!acal)
+		return NULL;
+
+	acal->acal_addr = *addr;
+	acal->acal_ifindex = ifindex;
+	acal->acal_users = 1;
+	INIT_HLIST_NODE(&acal->acal_lst);
+
+	return acal;
+}
+
+static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+	int err = 0;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (acal->acal_ifindex != net->ifindex)
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			acal->acal_users++;
+			goto out;
+		}
+	}
+
+	acal = acal_alloc(net->ifindex, addr);
+	if (!acal) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
+
+out:
+	spin_unlock(&acaddr_hash_lock);
+	return err;
+}
+
+static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (acal->acal_ifindex != net->ifindex)
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			if (--acal->acal_users < 1) {
+				hlist_del_init_rcu(&acal->acal_lst);
+				kfree_rcu(acal, rcu);
+			}
+			spin_unlock(&acaddr_hash_lock);
+			return;
+		}
+	}
+	spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
 	refcount_inc(&aca->aca_refcnt);
@@ -275,6 +356,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 		err = -ENOMEM;
 		goto out;
 	}
+	err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
+	if (err) {
+		fib6_info_release(f6i);
+		fib6_info_release(f6i);
+		kfree(aca);
+		goto out;
+	}
 
 	aca->aca_next = idev->ac_list;
 	idev->ac_list = aca;
@@ -324,6 +412,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 		prev_aca->aca_next = aca->aca_next;
 	else
 		idev->ac_list = aca->aca_next;
+	ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
@@ -350,6 +439,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 	write_lock_bh(&idev->lock);
 	while ((aca = idev->ac_list) != NULL) {
 		idev->ac_list = aca->aca_next;
+		ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
+
 		write_unlock_bh(&idev->lock);
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
@@ -391,16 +482,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr)
 {
 	bool found = false;
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
 
 	rcu_read_lock();
 	if (dev)
 		found = ipv6_chk_acast_dev(dev, addr);
 	else
-		for_each_netdev_rcu(net, dev)
-			if (ipv6_chk_acast_dev(dev, addr)) {
+		hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
+					 acal_lst) {
+			if (acal->acal_ifindex != net->ifindex)
+				continue;
+			if (ipv6_addr_equal(&acal->acal_addr, addr)) {
 				found = true;
 				break;
 			}
+		}
 	rcu_read_unlock();
 	return found;
 }
@@ -539,4 +636,25 @@ void ac6_proc_exit(struct net *net)
 {
 	remove_proc_entry("anycast6", net->proc_net);
 }
+
+/*	Init / cleanup code
+ */
+int __init anycast_init(void)
+{
+	int i;
+
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+	return 0;
+}
+
+void anycast_cleanup(void)
+{
+	int i;
+
+	spin_lock(&acaddr_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+	spin_unlock(&acaddr_hash_lock);
+}
 #endif
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH net] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-23  2:12 [PATCH net] net/ipv6: Add anycast addresses to a global hashtable Jeff Barnhill
@ 2018-10-23  2:26 ` Eric Dumazet
  2018-10-23 18:21   ` Jeff Barnhill
  0 siblings, 1 reply; 23+ messages in thread
From: Eric Dumazet @ 2018-10-23  2:26 UTC (permalink / raw)
  To: Jeff Barnhill, netdev; +Cc: davem, kuznet, yoshfuji



On 10/22/2018 07:12 PM, Jeff Barnhill wrote:
> icmp6_send() function is expensive on systems with a large number of
> interfaces. Every time it’s called, it has to verify that the source
> address does not correspond to an existing anycast address by looping
> through every device and every anycast address on the device.  This can
> result in significant delays for a CPU when there are a large number of
> neighbors and ND timers are frequently timing out and calling
> neigh_invalidate().
> 
> Add anycast addresses to a global hashtable to allow quick searching for
> matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.
>

I do not see this patch being netns aware ?

Also I believe you misunderstood what was stored in net->ifindex
You can look at dev_new_index() for what I mean.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH net] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-23  2:26 ` Eric Dumazet
@ 2018-10-23 18:21   ` Jeff Barnhill
  2018-10-24  1:58     ` [PATCH net v2] " Jeff Barnhill
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-10-23 18:21 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, davem, Alexey Kuznetsov, yoshfuji

Thanks! You are right. I mis-understood net->ifindex.  I think I need
to instead hold the net pointer in the new ipv6_ac_addrlist structure.
Do you see a problem with that?
On Mon, Oct 22, 2018 at 10:26 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 10/22/2018 07:12 PM, Jeff Barnhill wrote:
> > icmp6_send() function is expensive on systems with a large number of
> > interfaces. Every time it’s called, it has to verify that the source
> > address does not correspond to an existing anycast address by looping
> > through every device and every anycast address on the device.  This can
> > result in significant delays for a CPU when there are a large number of
> > neighbors and ND timers are frequently timing out and calling
> > neigh_invalidate().
> >
> > Add anycast addresses to a global hashtable to allow quick searching for
> > matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.
> >
>
> I do not see this patch being netns aware ?
>
> Also I believe you misunderstood what was stored in net->ifindex
> You can look at dev_new_index() for what I mean.
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH net v2] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-23 18:21   ` Jeff Barnhill
@ 2018-10-24  1:58     ` Jeff Barnhill
  2018-10-24  3:12       ` Eric Dumazet
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-10-24  1:58 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill

icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device.  This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().

Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
---
 include/net/addrconf.h |   2 +
 include/net/if_inet6.h |   8 +++
 net/ipv6/af_inet6.c    |   5 ++
 net/ipv6/anycast.c     | 132 ++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 145 insertions(+), 2 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6def0351bcc3..0cee3f99c41d 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 			     const struct in6_addr *addr);
+int anycast_init(void);
+void anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..55a4a1d8cebc 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
 	struct ipv6_ac_socklist *acl_next;
 };
 
+struct ipv6_ac_addrlist {
+	struct in6_addr		acal_addr;
+	possible_net_t		acal_pnet;
+	int			acal_users;
+	struct hlist_node	acal_lst; /* inet6_acaddr_lst */
+	struct rcu_head		rcu;
+};
+
 struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct fib6_info	*aca_rt;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9a4261e50272..971a05fdd3bd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
+	err = anycast_init();
+	if (err)
+		goto anycast_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
+	anycast_cleanup();
+anycast_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
 	ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..def1e156d857 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
+/*	anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *	socket join an anycast group
  */
@@ -204,6 +218,83 @@ void ipv6_sock_ac_close(struct sock *sk)
 	rtnl_unlock();
 }
 
+static struct ipv6_ac_addrlist *acal_alloc(struct net *net,
+					   const struct in6_addr *addr)
+{
+	struct ipv6_ac_addrlist *acal;
+
+	acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
+	if (!acal)
+		return NULL;
+
+	acal->acal_addr = *addr;
+	write_pnet(&acal->acal_pnet, get_net(net));
+	acal->acal_users = 1;
+	INIT_HLIST_NODE(&acal->acal_lst);
+
+	return acal;
+}
+
+static void acal_free_rcu(struct rcu_head *h)
+{
+	struct ipv6_ac_addrlist *acal;
+
+	acal = container_of(h, struct ipv6_ac_addrlist, rcu);
+	WARN_ON(acal->acal_users);
+	put_net(read_pnet(&acal->acal_pnet));
+	kfree(acal);
+}
+
+static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+	int err = 0;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (!net_eq(read_pnet(&acal->acal_pnet), net))
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			acal->acal_users++;
+			goto out;
+		}
+	}
+
+	acal = acal_alloc(net, addr);
+	if (!acal) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
+
+out:
+	spin_unlock(&acaddr_hash_lock);
+	return err;
+}
+
+static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (!net_eq(read_pnet(&acal->acal_pnet), net))
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			if (--acal->acal_users < 1) {
+				hlist_del_init_rcu(&acal->acal_lst);
+				call_rcu(&acal->rcu, acal_free_rcu);
+			}
+			spin_unlock(&acaddr_hash_lock);
+			return;
+		}
+	}
+	spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
 	refcount_inc(&aca->aca_refcnt);
@@ -275,6 +366,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 		err = -ENOMEM;
 		goto out;
 	}
+	err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
+	if (err) {
+		fib6_info_release(f6i);
+		fib6_info_release(f6i);
+		kfree(aca);
+		goto out;
+	}
 
 	aca->aca_next = idev->ac_list;
 	idev->ac_list = aca;
@@ -324,6 +422,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 		prev_aca->aca_next = aca->aca_next;
 	else
 		idev->ac_list = aca->aca_next;
+	ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
@@ -350,6 +449,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 	write_lock_bh(&idev->lock);
 	while ((aca = idev->ac_list) != NULL) {
 		idev->ac_list = aca->aca_next;
+		ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
+
 		write_unlock_bh(&idev->lock);
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
@@ -391,16 +492,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr)
 {
 	bool found = false;
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
 
 	rcu_read_lock();
 	if (dev)
 		found = ipv6_chk_acast_dev(dev, addr);
 	else
-		for_each_netdev_rcu(net, dev)
-			if (ipv6_chk_acast_dev(dev, addr)) {
+		hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
+					 acal_lst) {
+			if (!net_eq(read_pnet(&acal->acal_pnet), net))
+				continue;
+			if (ipv6_addr_equal(&acal->acal_addr, addr)) {
 				found = true;
 				break;
 			}
+		}
 	rcu_read_unlock();
 	return found;
 }
@@ -539,4 +646,25 @@ void ac6_proc_exit(struct net *net)
 {
 	remove_proc_entry("anycast6", net->proc_net);
 }
+
+/*	Init / cleanup code
+ */
+int __init anycast_init(void)
+{
+	int i;
+
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+	return 0;
+}
+
+void anycast_cleanup(void)
+{
+	int i;
+
+	spin_lock(&acaddr_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+	spin_unlock(&acaddr_hash_lock);
+}
 #endif
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH net v2] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-24  1:58     ` [PATCH net v2] " Jeff Barnhill
@ 2018-10-24  3:12       ` Eric Dumazet
  2018-10-24  5:06         ` Jeff Barnhill
  0 siblings, 1 reply; 23+ messages in thread
From: Eric Dumazet @ 2018-10-24  3:12 UTC (permalink / raw)
  To: Jeff Barnhill, netdev; +Cc: davem, kuznet, yoshfuji



On 10/23/2018 06:58 PM, Jeff Barnhill wrote:
> icmp6_send() function is expensive on systems with a large number of
> interfaces. Every time it’s called, it has to verify that the source
> address does not correspond to an existing anycast address by looping
> through every device and every anycast address on the device.  This can
> result in significant delays for a CPU when there are a large number of
> neighbors and ND timers are frequently timing out and calling
> neigh_invalidate().
> 
> Add anycast addresses to a global hashtable to allow quick searching for
> matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.
> 
> Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
> ---
>  include/net/addrconf.h |   2 +
>  include/net/if_inet6.h |   8 +++
>  net/ipv6/af_inet6.c    |   5 ++
>  net/ipv6/anycast.c     | 132 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  4 files changed, 145 insertions(+), 2 deletions(-)
> 
> diff --git a/include/net/addrconf.h b/include/net/addrconf.h
> index 6def0351bcc3..0cee3f99c41d 100644
> --- a/include/net/addrconf.h
> +++ b/include/net/addrconf.h
> @@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
>  			 const struct in6_addr *addr);
>  bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
>  			     const struct in6_addr *addr);
> +int anycast_init(void);
> +void anycast_cleanup(void);
>  
>  /* Device notifier */
>  int register_inet6addr_notifier(struct notifier_block *nb);
> diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
> index d7578cf49c3a..55a4a1d8cebc 100644
> --- a/include/net/if_inet6.h
> +++ b/include/net/if_inet6.h
> @@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
>  	struct ipv6_ac_socklist *acl_next;
>  };
>  
> +struct ipv6_ac_addrlist {
> +	struct in6_addr		acal_addr;
> +	possible_net_t		acal_pnet;
> +	int			acal_users;

That would be a refcount_t acal_users; so that CONFIG_REFCOUNT_FULL brings debugging for free.

> +	struct hlist_node	acal_lst; /* inet6_acaddr_lst */
> +	struct rcu_head		rcu;
> +};
> +
>  struct ifacaddr6 {
>  	struct in6_addr		aca_addr;
>  	struct fib6_info	*aca_rt;
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index 9a4261e50272..971a05fdd3bd 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
>  	err = ip6_flowlabel_init();
>  	if (err)
>  		goto ip6_flowlabel_fail;
> +	err = anycast_init();
> +	if (err)
> +		goto anycast_fail;
>  	err = addrconf_init();
>  	if (err)
>  		goto addrconf_fail;
> @@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
>  ipv6_exthdrs_fail:
>  	addrconf_cleanup();
>  addrconf_fail:
> +	anycast_cleanup();
> +anycast_fail:
>  	ip6_flowlabel_cleanup();
>  ip6_flowlabel_fail:
>  	ndisc_late_cleanup();
> diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
> index 4e0ff7031edd..def1e156d857 100644
> --- a/net/ipv6/anycast.c
> +++ b/net/ipv6/anycast.c
> @@ -44,8 +44,22 @@
>  
>  #include <net/checksum.h>
>  
> +#define IN6_ADDR_HSIZE_SHIFT	8
> +#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
> +/*	anycast address hash table
> + */
> +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
> +static DEFINE_SPINLOCK(acaddr_hash_lock);
> +
>  static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
>  
> +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
> +{
> +	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
> +
> +	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
> +}
> +
>  /*
>   *	socket join an anycast group
>   */
> @@ -204,6 +218,83 @@ void ipv6_sock_ac_close(struct sock *sk)
>  	rtnl_unlock();
>  }
>  
> +static struct ipv6_ac_addrlist *acal_alloc(struct net *net,
> +					   const struct in6_addr *addr)
> +{
> +	struct ipv6_ac_addrlist *acal;
> +
> +	acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
> +	if (!acal)
> +		return NULL;
> +
> +	acal->acal_addr = *addr;
> +	write_pnet(&acal->acal_pnet, get_net(net));

I am not sure why you grab a reference on the netns.

The ipv6 address will be freed at some point before the netns disappears.
It would automatically remove the associated struct ipv6_ac_addrlist.

> +	acal->acal_users = 1;
> +	INIT_HLIST_NODE(&acal->acal_lst);
> +
> +	return acal;
> +}
> +
> +static void acal_free_rcu(struct rcu_head *h)
> +{
> +	struct ipv6_ac_addrlist *acal;
> +
> +	acal = container_of(h, struct ipv6_ac_addrlist, rcu);
> +	WARN_ON(acal->acal_users);

Not needed with refcount_t debugging infra.

> +	put_net(read_pnet(&acal->acal_pnet));
> +	kfree(acal);

So this could use kfree_rcu() in the caller, and get rid of acal_free_rcu() completely.

> +}
> +
> +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
> +{
> +	unsigned int hash = inet6_acaddr_hash(net, addr);
> +	struct ipv6_ac_addrlist *acal;
> +	int err = 0;
> +
> +	spin_lock(&acaddr_hash_lock);
> +	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
> +		if (!net_eq(read_pnet(&acal->acal_pnet), net))
> +			continue;
> +		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
> +			acal->acal_users++;
> +			goto out;
> +		}
> +	}
> +
> +	acal = acal_alloc(net, addr);
> +	if (!acal) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +
> +	hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
> +
> +out:
> +	spin_unlock(&acaddr_hash_lock);
> +	return err;
> +}
> +
> +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
> +{
> +	unsigned int hash = inet6_acaddr_hash(net, addr);
> +	struct ipv6_ac_addrlist *acal;
> +
> +	spin_lock(&acaddr_hash_lock);
> +	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
> +		if (!net_eq(read_pnet(&acal->acal_pnet), net))
> +			continue;
> +		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
> +			if (--acal->acal_users < 1) {
> +				hlist_del_init_rcu(&acal->acal_lst);
> +				call_rcu(&acal->rcu, acal_free_rcu);
> +			}
> +			spin_unlock(&acaddr_hash_lock);
> +			return;
> +		}
> +	}
> +	spin_unlock(&acaddr_hash_lock);
> +}
> +
>  static void aca_get(struct ifacaddr6 *aca)
>  {
>  	refcount_inc(&aca->aca_refcnt);
> @@ -275,6 +366,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
>  		err = -ENOMEM;
>  		goto out;
>  	}
> +	err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
> +	if (err) {
> +		fib6_info_release(f6i);
> +		fib6_info_release(f6i);

Double call to fib6_info_release() ? Why ?

> +		kfree(aca);
> +		goto out;
> +	}
>  
>  	aca->aca_next = idev->ac_list;
>  	idev->ac_list = aca;
> @@ -324,6 +422,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
>  		prev_aca->aca_next = aca->aca_next;
>  	else
>  		idev->ac_list = aca->aca_next;
> +	ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
>  	write_unlock_bh(&idev->lock);
>  	addrconf_leave_solict(idev, &aca->aca_addr);
>  
> @@ -350,6 +449,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
>  	write_lock_bh(&idev->lock);
>  	while ((aca = idev->ac_list) != NULL) {
>  		idev->ac_list = aca->aca_next;
> +		ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
> +
>  		write_unlock_bh(&idev->lock);
>  
>  		addrconf_leave_solict(idev, &aca->aca_addr);
> @@ -391,16 +492,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
>  			 const struct in6_addr *addr)
>  {
>  	bool found = false;
> +	unsigned int hash = inet6_acaddr_hash(net, addr);
> +	struct ipv6_ac_addrlist *acal;

Reorder variable declaration in longest to shortest (reverse xmas tree),
per David Miller request :)

>  
>  	rcu_read_lock();
>  	if (dev)
>  		found = ipv6_chk_acast_dev(dev, addr);
>  	else
> -		for_each_netdev_rcu(net, dev)
> -			if (ipv6_chk_acast_dev(dev, addr)) {
> +		hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
> +					 acal_lst) {
> +			if (!net_eq(read_pnet(&acal->acal_pnet), net))
> +				continue;
> +			if (ipv6_addr_equal(&acal->acal_addr, addr)) {
>  				found = true;
>  				break;
>  			}
> +		}
>  	rcu_read_unlock();
>  	return found;
>  }
> @@ -539,4 +646,25 @@ void ac6_proc_exit(struct net *net)
>  {
>  	remove_proc_entry("anycast6", net->proc_net);
>  }
> +
> +/*	Init / cleanup code
> + */
> +int __init anycast_init(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < IN6_ADDR_HSIZE; i++)
> +		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
> +	return 0;
> +}
> +
> +void anycast_cleanup(void)
> +{
> +	int i;
> +
> +	spin_lock(&acaddr_hash_lock);
> +	for (i = 0; i < IN6_ADDR_HSIZE; i++)
> +		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
> +	spin_unlock(&acaddr_hash_lock);
> +}
>  #endif
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH net v2] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-24  3:12       ` Eric Dumazet
@ 2018-10-24  5:06         ` Jeff Barnhill
  2018-10-26 21:22           ` [PATCH net v3] " Jeff Barnhill
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-10-24  5:06 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, davem, Alexey Kuznetsov, yoshfuji

Thanks for the feedback.

As suggested, I did these things:
 - switched to refcount_t
 - stopped grabbing a reference on the netns (now able to use kfree_rcu)
 - re-ordered ipv6_chk_acast_addr variable definitions to reverse xmas tree

With regards to your question in __ipv6_dev_ac_inc():

> +     err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
> +     if (err) {
> +             fib6_info_release(f6i);
> +             fib6_info_release(f6i);
Double call to fib6_info_release() ? Why ?

Unless I mis-understand, both addrconf_f6i_alloc() (indirectly through
fib6_info_alloc()) and aca_alloc() increment fib6_ref, so it seemed like
to fully cleanup/backout, we needed to to decrement twice.  Please let me know
if I'm wrong here.

I'll re-submit the patch after agreement on the double call and
testing with the new changes.

Thanks!
Jeff
On Tue, Oct 23, 2018 at 11:12 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 10/23/2018 06:58 PM, Jeff Barnhill wrote:
> > icmp6_send() function is expensive on systems with a large number of
> > interfaces. Every time it’s called, it has to verify that the source
> > address does not correspond to an existing anycast address by looping
> > through every device and every anycast address on the device.  This can
> > result in significant delays for a CPU when there are a large number of
> > neighbors and ND timers are frequently timing out and calling
> > neigh_invalidate().
> >
> > Add anycast addresses to a global hashtable to allow quick searching for
> > matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.
> >
> > Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
> > ---
> >  include/net/addrconf.h |   2 +
> >  include/net/if_inet6.h |   8 +++
> >  net/ipv6/af_inet6.c    |   5 ++
> >  net/ipv6/anycast.c     | 132 ++++++++++++++++++++++++++++++++++++++++++++++++-
> >  4 files changed, 145 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/net/addrconf.h b/include/net/addrconf.h
> > index 6def0351bcc3..0cee3f99c41d 100644
> > --- a/include/net/addrconf.h
> > +++ b/include/net/addrconf.h
> > @@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
> >                        const struct in6_addr *addr);
> >  bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
> >                            const struct in6_addr *addr);
> > +int anycast_init(void);
> > +void anycast_cleanup(void);
> >
> >  /* Device notifier */
> >  int register_inet6addr_notifier(struct notifier_block *nb);
> > diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
> > index d7578cf49c3a..55a4a1d8cebc 100644
> > --- a/include/net/if_inet6.h
> > +++ b/include/net/if_inet6.h
> > @@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
> >       struct ipv6_ac_socklist *acl_next;
> >  };
> >
> > +struct ipv6_ac_addrlist {
> > +     struct in6_addr         acal_addr;
> > +     possible_net_t          acal_pnet;
> > +     int                     acal_users;
>
> That would be a refcount_t acal_users; so that CONFIG_REFCOUNT_FULL brings debugging for free.
>
> > +     struct hlist_node       acal_lst; /* inet6_acaddr_lst */
> > +     struct rcu_head         rcu;
> > +};
> > +
> >  struct ifacaddr6 {
> >       struct in6_addr         aca_addr;
> >       struct fib6_info        *aca_rt;
> > diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> > index 9a4261e50272..971a05fdd3bd 100644
> > --- a/net/ipv6/af_inet6.c
> > +++ b/net/ipv6/af_inet6.c
> > @@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
> >       err = ip6_flowlabel_init();
> >       if (err)
> >               goto ip6_flowlabel_fail;
> > +     err = anycast_init();
> > +     if (err)
> > +             goto anycast_fail;
> >       err = addrconf_init();
> >       if (err)
> >               goto addrconf_fail;
> > @@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
> >  ipv6_exthdrs_fail:
> >       addrconf_cleanup();
> >  addrconf_fail:
> > +     anycast_cleanup();
> > +anycast_fail:
> >       ip6_flowlabel_cleanup();
> >  ip6_flowlabel_fail:
> >       ndisc_late_cleanup();
> > diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
> > index 4e0ff7031edd..def1e156d857 100644
> > --- a/net/ipv6/anycast.c
> > +++ b/net/ipv6/anycast.c
> > @@ -44,8 +44,22 @@
> >
> >  #include <net/checksum.h>
> >
> > +#define IN6_ADDR_HSIZE_SHIFT 8
> > +#define IN6_ADDR_HSIZE               BIT(IN6_ADDR_HSIZE_SHIFT)
> > +/*   anycast address hash table
> > + */
> > +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
> > +static DEFINE_SPINLOCK(acaddr_hash_lock);
> > +
> >  static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
> >
> > +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
> > +{
> > +     u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
> > +
> > +     return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
> > +}
> > +
> >  /*
> >   *   socket join an anycast group
> >   */
> > @@ -204,6 +218,83 @@ void ipv6_sock_ac_close(struct sock *sk)
> >       rtnl_unlock();
> >  }
> >
> > +static struct ipv6_ac_addrlist *acal_alloc(struct net *net,
> > +                                        const struct in6_addr *addr)
> > +{
> > +     struct ipv6_ac_addrlist *acal;
> > +
> > +     acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
> > +     if (!acal)
> > +             return NULL;
> > +
> > +     acal->acal_addr = *addr;
> > +     write_pnet(&acal->acal_pnet, get_net(net));
>
> I am not sure why you grab a reference on the netns.
>
> The ipv6 address will be freed at some point before the netns disappears.
> It would automatically remove the associated struct ipv6_ac_addrlist.
>
> > +     acal->acal_users = 1;
> > +     INIT_HLIST_NODE(&acal->acal_lst);
> > +
> > +     return acal;
> > +}
> > +
> > +static void acal_free_rcu(struct rcu_head *h)
> > +{
> > +     struct ipv6_ac_addrlist *acal;
> > +
> > +     acal = container_of(h, struct ipv6_ac_addrlist, rcu);
> > +     WARN_ON(acal->acal_users);
>
> Not needed with refcount_t debugging infra.
>
> > +     put_net(read_pnet(&acal->acal_pnet));
> > +     kfree(acal);
>
> So this could use kfree_rcu() in the caller, and get rid of acal_free_rcu() completely.
>
> > +}
> > +
> > +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
> > +{
> > +     unsigned int hash = inet6_acaddr_hash(net, addr);
> > +     struct ipv6_ac_addrlist *acal;
> > +     int err = 0;
> > +
> > +     spin_lock(&acaddr_hash_lock);
> > +     hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
> > +             if (!net_eq(read_pnet(&acal->acal_pnet), net))
> > +                     continue;
> > +             if (ipv6_addr_equal(&acal->acal_addr, addr)) {
> > +                     acal->acal_users++;
> > +                     goto out;
> > +             }
> > +     }
> > +
> > +     acal = acal_alloc(net, addr);
> > +     if (!acal) {
> > +             err = -ENOMEM;
> > +             goto out;
> > +     }
> > +
> > +     hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
> > +
> > +out:
> > +     spin_unlock(&acaddr_hash_lock);
> > +     return err;
> > +}
> > +
> > +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
> > +{
> > +     unsigned int hash = inet6_acaddr_hash(net, addr);
> > +     struct ipv6_ac_addrlist *acal;
> > +
> > +     spin_lock(&acaddr_hash_lock);
> > +     hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
> > +             if (!net_eq(read_pnet(&acal->acal_pnet), net))
> > +                     continue;
> > +             if (ipv6_addr_equal(&acal->acal_addr, addr)) {
> > +                     if (--acal->acal_users < 1) {
> > +                             hlist_del_init_rcu(&acal->acal_lst);
> > +                             call_rcu(&acal->rcu, acal_free_rcu);
> > +                     }
> > +                     spin_unlock(&acaddr_hash_lock);
> > +                     return;
> > +             }
> > +     }
> > +     spin_unlock(&acaddr_hash_lock);
> > +}
> > +
> >  static void aca_get(struct ifacaddr6 *aca)
> >  {
> >       refcount_inc(&aca->aca_refcnt);
> > @@ -275,6 +366,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
> >               err = -ENOMEM;
> >               goto out;
> >       }
> > +     err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
> > +     if (err) {
> > +             fib6_info_release(f6i);
> > +             fib6_info_release(f6i);
>
> Double call to fib6_info_release() ? Why ?
>
> > +             kfree(aca);
> > +             goto out;
> > +     }
> >
> >       aca->aca_next = idev->ac_list;
> >       idev->ac_list = aca;
> > @@ -324,6 +422,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
> >               prev_aca->aca_next = aca->aca_next;
> >       else
> >               idev->ac_list = aca->aca_next;
> > +     ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
> >       write_unlock_bh(&idev->lock);
> >       addrconf_leave_solict(idev, &aca->aca_addr);
> >
> > @@ -350,6 +449,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
> >       write_lock_bh(&idev->lock);
> >       while ((aca = idev->ac_list) != NULL) {
> >               idev->ac_list = aca->aca_next;
> > +             ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
> > +
> >               write_unlock_bh(&idev->lock);
> >
> >               addrconf_leave_solict(idev, &aca->aca_addr);
> > @@ -391,16 +492,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
> >                        const struct in6_addr *addr)
> >  {
> >       bool found = false;
> > +     unsigned int hash = inet6_acaddr_hash(net, addr);
> > +     struct ipv6_ac_addrlist *acal;
>
> Reorder variable declaration in longest to shortest (reverse xmas tree),
> per David Miller request :)
>
> >
> >       rcu_read_lock();
> >       if (dev)
> >               found = ipv6_chk_acast_dev(dev, addr);
> >       else
> > -             for_each_netdev_rcu(net, dev)
> > -                     if (ipv6_chk_acast_dev(dev, addr)) {
> > +             hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
> > +                                      acal_lst) {
> > +                     if (!net_eq(read_pnet(&acal->acal_pnet), net))
> > +                             continue;
> > +                     if (ipv6_addr_equal(&acal->acal_addr, addr)) {
> >                               found = true;
> >                               break;
> >                       }
> > +             }
> >       rcu_read_unlock();
> >       return found;
> >  }
> > @@ -539,4 +646,25 @@ void ac6_proc_exit(struct net *net)
> >  {
> >       remove_proc_entry("anycast6", net->proc_net);
> >  }
> > +
> > +/*   Init / cleanup code
> > + */
> > +int __init anycast_init(void)
> > +{
> > +     int i;
> > +
> > +     for (i = 0; i < IN6_ADDR_HSIZE; i++)
> > +             INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
> > +     return 0;
> > +}
> > +
> > +void anycast_cleanup(void)
> > +{
> > +     int i;
> > +
> > +     spin_lock(&acaddr_hash_lock);
> > +     for (i = 0; i < IN6_ADDR_HSIZE; i++)
> > +             WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
> > +     spin_unlock(&acaddr_hash_lock);
> > +}
> >  #endif
> >

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH net v3] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-24  5:06         ` Jeff Barnhill
@ 2018-10-26 21:22           ` Jeff Barnhill
  2018-10-26 21:44             ` David Ahern
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-10-26 21:22 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill

icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device.  This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().

Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
---
 include/net/addrconf.h |   2 +
 include/net/if_inet6.h |   8 ++++
 net/ipv6/af_inet6.c    |   5 ++
 net/ipv6/anycast.c     | 122 ++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 14b789a123e7..799af1a037d1 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 			     const struct in6_addr *addr);
+int anycast_init(void);
+void anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..a445014b981d 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
 	struct ipv6_ac_socklist *acl_next;
 };
 
+struct ipv6_ac_addrlist {
+	struct in6_addr		acal_addr;
+	possible_net_t		acal_pnet;
+	refcount_t		acal_users;
+	struct hlist_node	acal_lst; /* inet6_acaddr_lst */
+	struct rcu_head		rcu;
+};
+
 struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct fib6_info	*aca_rt;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f4d61017a69..ddc8a6dbfba2 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
+	err = anycast_init();
+	if (err)
+		goto anycast_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
+	anycast_cleanup();
+anycast_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
 	ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..1040d08867ab 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
+/*	anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *	socket join an anycast group
  */
@@ -204,6 +218,73 @@ void ipv6_sock_ac_close(struct sock *sk)
 	rtnl_unlock();
 }
 
+static struct ipv6_ac_addrlist *acal_alloc(struct net *net,
+					   const struct in6_addr *addr)
+{
+	struct ipv6_ac_addrlist *acal;
+
+	acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
+	if (!acal)
+		return NULL;
+
+	acal->acal_addr = *addr;
+	write_pnet(&acal->acal_pnet, net);
+	refcount_set(&acal->acal_users, 1);
+	INIT_HLIST_NODE(&acal->acal_lst);
+
+	return acal;
+}
+
+static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+	int err = 0;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (!net_eq(read_pnet(&acal->acal_pnet), net))
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			refcount_inc(&acal->acal_users);
+			goto out;
+		}
+	}
+
+	acal = acal_alloc(net, addr);
+	if (!acal) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
+
+out:
+	spin_unlock(&acaddr_hash_lock);
+	return err;
+}
+
+static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (!net_eq(read_pnet(&acal->acal_pnet), net))
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			if (refcount_dec_and_test(&acal->acal_users)) {
+				hlist_del_init_rcu(&acal->acal_lst);
+				kfree_rcu(acal, rcu);
+			}
+			spin_unlock(&acaddr_hash_lock);
+			return;
+		}
+	}
+	spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
 	refcount_inc(&aca->aca_refcnt);
@@ -275,6 +356,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 		err = -ENOMEM;
 		goto out;
 	}
+	err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
+	if (err) {
+		fib6_info_release(f6i);
+		fib6_info_release(f6i);
+		kfree(aca);
+		goto out;
+	}
 
 	aca->aca_next = idev->ac_list;
 	idev->ac_list = aca;
@@ -324,6 +412,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 		prev_aca->aca_next = aca->aca_next;
 	else
 		idev->ac_list = aca->aca_next;
+	ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
@@ -350,6 +439,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 	write_lock_bh(&idev->lock);
 	while ((aca = idev->ac_list) != NULL) {
 		idev->ac_list = aca->aca_next;
+		ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
+
 		write_unlock_bh(&idev->lock);
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
@@ -390,17 +481,23 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr)
 {
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
 	bool found = false;
 
 	rcu_read_lock();
 	if (dev)
 		found = ipv6_chk_acast_dev(dev, addr);
 	else
-		for_each_netdev_rcu(net, dev)
-			if (ipv6_chk_acast_dev(dev, addr)) {
+		hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
+					 acal_lst) {
+			if (!net_eq(read_pnet(&acal->acal_pnet), net))
+				continue;
+			if (ipv6_addr_equal(&acal->acal_addr, addr)) {
 				found = true;
 				break;
 			}
+		}
 	rcu_read_unlock();
 	return found;
 }
@@ -539,4 +636,25 @@ void ac6_proc_exit(struct net *net)
 {
 	remove_proc_entry("anycast6", net->proc_net);
 }
+
+/*	Init / cleanup code
+ */
+int __init anycast_init(void)
+{
+	int i;
+
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+	return 0;
+}
+
+void anycast_cleanup(void)
+{
+	int i;
+
+	spin_lock(&acaddr_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+	spin_unlock(&acaddr_hash_lock);
+}
 #endif
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH net v3] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-26 21:22           ` [PATCH net v3] " Jeff Barnhill
@ 2018-10-26 21:44             ` David Ahern
  2018-10-27 18:02               ` [PATCH net v4] " Jeff Barnhill
  0 siblings, 1 reply; 23+ messages in thread
From: David Ahern @ 2018-10-26 21:44 UTC (permalink / raw)
  To: Jeff Barnhill, netdev; +Cc: davem, kuznet, yoshfuji

On 10/26/18 3:22 PM, Jeff Barnhill wrote:
> @@ -275,6 +356,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
>  		err = -ENOMEM;
>  		goto out;
>  	}
> +	err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
> +	if (err) {
> +		fib6_info_release(f6i);
> +		fib6_info_release(f6i);
> +		kfree(aca);
> +		goto out;
> +	}

I think aca_put() makes this less confusing as it will do the
fib6_info_release(f6i) and kfree(aca);

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH net v4] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-26 21:44             ` David Ahern
@ 2018-10-27 18:02               ` Jeff Barnhill
  2018-10-27 23:39                 ` David Ahern
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-10-27 18:02 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill

icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device.  This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().

Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
---
 include/net/addrconf.h |   2 +
 include/net/if_inet6.h |   8 ++++
 net/ipv6/af_inet6.c    |   5 +++
 net/ipv6/anycast.c     | 120 ++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 133 insertions(+), 2 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 14b789a123e7..799af1a037d1 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 			     const struct in6_addr *addr);
+int anycast_init(void);
+void anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..a445014b981d 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
 	struct ipv6_ac_socklist *acl_next;
 };
 
+struct ipv6_ac_addrlist {
+	struct in6_addr		acal_addr;
+	possible_net_t		acal_pnet;
+	refcount_t		acal_users;
+	struct hlist_node	acal_lst; /* inet6_acaddr_lst */
+	struct rcu_head		rcu;
+};
+
 struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct fib6_info	*aca_rt;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f4d61017a69..ddc8a6dbfba2 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
+	err = anycast_init();
+	if (err)
+		goto anycast_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
+	anycast_cleanup();
+anycast_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
 	ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..45585010908a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
+/*	anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *	socket join an anycast group
  */
@@ -204,6 +218,73 @@ void ipv6_sock_ac_close(struct sock *sk)
 	rtnl_unlock();
 }
 
+static struct ipv6_ac_addrlist *acal_alloc(struct net *net,
+					   const struct in6_addr *addr)
+{
+	struct ipv6_ac_addrlist *acal;
+
+	acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
+	if (!acal)
+		return NULL;
+
+	acal->acal_addr = *addr;
+	write_pnet(&acal->acal_pnet, net);
+	refcount_set(&acal->acal_users, 1);
+	INIT_HLIST_NODE(&acal->acal_lst);
+
+	return acal;
+}
+
+static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+	int err = 0;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (!net_eq(read_pnet(&acal->acal_pnet), net))
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			refcount_inc(&acal->acal_users);
+			goto out;
+		}
+	}
+
+	acal = acal_alloc(net, addr);
+	if (!acal) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
+
+out:
+	spin_unlock(&acaddr_hash_lock);
+	return err;
+}
+
+static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (!net_eq(read_pnet(&acal->acal_pnet), net))
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			if (refcount_dec_and_test(&acal->acal_users)) {
+				hlist_del_init_rcu(&acal->acal_lst);
+				kfree_rcu(acal, rcu);
+			}
+			spin_unlock(&acaddr_hash_lock);
+			return;
+		}
+	}
+	spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
 	refcount_inc(&aca->aca_refcnt);
@@ -275,6 +356,11 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 		err = -ENOMEM;
 		goto out;
 	}
+	err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
+	if (err) {
+		aca_put(aca);
+		goto out;
+	}
 
 	aca->aca_next = idev->ac_list;
 	idev->ac_list = aca;
@@ -324,6 +410,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 		prev_aca->aca_next = aca->aca_next;
 	else
 		idev->ac_list = aca->aca_next;
+	ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
@@ -350,6 +437,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 	write_lock_bh(&idev->lock);
 	while ((aca = idev->ac_list) != NULL) {
 		idev->ac_list = aca->aca_next;
+		ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
+
 		write_unlock_bh(&idev->lock);
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
@@ -390,17 +479,23 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr)
 {
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
 	bool found = false;
 
 	rcu_read_lock();
 	if (dev)
 		found = ipv6_chk_acast_dev(dev, addr);
 	else
-		for_each_netdev_rcu(net, dev)
-			if (ipv6_chk_acast_dev(dev, addr)) {
+		hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
+					 acal_lst) {
+			if (!net_eq(read_pnet(&acal->acal_pnet), net))
+				continue;
+			if (ipv6_addr_equal(&acal->acal_addr, addr)) {
 				found = true;
 				break;
 			}
+		}
 	rcu_read_unlock();
 	return found;
 }
@@ -539,4 +634,25 @@ void ac6_proc_exit(struct net *net)
 {
 	remove_proc_entry("anycast6", net->proc_net);
 }
+
+/*	Init / cleanup code
+ */
+int __init anycast_init(void)
+{
+	int i;
+
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+	return 0;
+}
+
+void anycast_cleanup(void)
+{
+	int i;
+
+	spin_lock(&acaddr_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+	spin_unlock(&acaddr_hash_lock);
+}
 #endif
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH net v4] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-27 18:02               ` [PATCH net v4] " Jeff Barnhill
@ 2018-10-27 23:39                 ` David Ahern
  2018-10-28  1:27                   ` Jeff Barnhill
  0 siblings, 1 reply; 23+ messages in thread
From: David Ahern @ 2018-10-27 23:39 UTC (permalink / raw)
  To: Jeff Barnhill, netdev; +Cc: davem, kuznet, yoshfuji

On 10/27/18 12:02 PM, Jeff Barnhill wrote:
> @@ -275,6 +356,11 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
>  		err = -ENOMEM;
>  		goto out;
>  	}
> +	err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
> +	if (err) {
> +		aca_put(aca);
> +		goto out;
> +	}
>  
>  	aca->aca_next = idev->ac_list;
>  	idev->ac_list = aca;

you misunderstood my comment. aca_put is instead of a double call to
fib6_info_release(f6i). You still need one call to
fib6_info_release(f6i) for the addrconf_f6i_alloc.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH net v4] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-27 23:39                 ` David Ahern
@ 2018-10-28  1:27                   ` Jeff Barnhill
  2018-10-28  1:51                     ` [PATCH net v5] " Jeff Barnhill
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-10-28  1:27 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, Alexey Kuznetsov, yoshfuji

You are right, David...I mistook the refcount_dec_and_test() in
aca_put() as being for the fib6_info, but it's for the aca_refcnt.
Thanks!  I'll submit a corrected patch.
On Sat, Oct 27, 2018 at 7:39 PM David Ahern <dsahern@gmail.com> wrote:
>
> On 10/27/18 12:02 PM, Jeff Barnhill wrote:
> > @@ -275,6 +356,11 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
> >               err = -ENOMEM;
> >               goto out;
> >       }
> > +     err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
> > +     if (err) {
> > +             aca_put(aca);
> > +             goto out;
> > +     }
> >
> >       aca->aca_next = idev->ac_list;
> >       idev->ac_list = aca;
>
> you misunderstood my comment. aca_put is instead of a double call to
> fib6_info_release(f6i). You still need one call to
> fib6_info_release(f6i) for the addrconf_f6i_alloc.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-28  1:27                   ` Jeff Barnhill
@ 2018-10-28  1:51                     ` Jeff Barnhill
  2018-10-30  3:32                       ` David Miller
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-10-28  1:51 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill

icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device.  This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().

Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
---
 include/net/addrconf.h |   2 +
 include/net/if_inet6.h |   8 ++++
 net/ipv6/af_inet6.c    |   5 ++
 net/ipv6/anycast.c     | 121 ++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 14b789a123e7..799af1a037d1 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 			     const struct in6_addr *addr);
+int anycast_init(void);
+void anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..a445014b981d 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
 	struct ipv6_ac_socklist *acl_next;
 };
 
+struct ipv6_ac_addrlist {
+	struct in6_addr		acal_addr;
+	possible_net_t		acal_pnet;
+	refcount_t		acal_users;
+	struct hlist_node	acal_lst; /* inet6_acaddr_lst */
+	struct rcu_head		rcu;
+};
+
 struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct fib6_info	*aca_rt;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f4d61017a69..ddc8a6dbfba2 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
+	err = anycast_init();
+	if (err)
+		goto anycast_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
+	anycast_cleanup();
+anycast_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
 	ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..ca51c9d57ce5 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
+/*	anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *	socket join an anycast group
  */
@@ -204,6 +218,73 @@ void ipv6_sock_ac_close(struct sock *sk)
 	rtnl_unlock();
 }
 
+static struct ipv6_ac_addrlist *acal_alloc(struct net *net,
+					   const struct in6_addr *addr)
+{
+	struct ipv6_ac_addrlist *acal;
+
+	acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
+	if (!acal)
+		return NULL;
+
+	acal->acal_addr = *addr;
+	write_pnet(&acal->acal_pnet, net);
+	refcount_set(&acal->acal_users, 1);
+	INIT_HLIST_NODE(&acal->acal_lst);
+
+	return acal;
+}
+
+static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+	int err = 0;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (!net_eq(read_pnet(&acal->acal_pnet), net))
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			refcount_inc(&acal->acal_users);
+			goto out;
+		}
+	}
+
+	acal = acal_alloc(net, addr);
+	if (!acal) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
+
+out:
+	spin_unlock(&acaddr_hash_lock);
+	return err;
+}
+
+static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+		if (!net_eq(read_pnet(&acal->acal_pnet), net))
+			continue;
+		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+			if (refcount_dec_and_test(&acal->acal_users)) {
+				hlist_del_init_rcu(&acal->acal_lst);
+				kfree_rcu(acal, rcu);
+			}
+			spin_unlock(&acaddr_hash_lock);
+			return;
+		}
+	}
+	spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
 	refcount_inc(&aca->aca_refcnt);
@@ -275,6 +356,12 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 		err = -ENOMEM;
 		goto out;
 	}
+	err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
+	if (err) {
+		aca_put(aca);
+		fib6_info_release(f6i);
+		goto out;
+	}
 
 	aca->aca_next = idev->ac_list;
 	idev->ac_list = aca;
@@ -324,6 +411,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 		prev_aca->aca_next = aca->aca_next;
 	else
 		idev->ac_list = aca->aca_next;
+	ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
@@ -350,6 +438,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 	write_lock_bh(&idev->lock);
 	while ((aca = idev->ac_list) != NULL) {
 		idev->ac_list = aca->aca_next;
+		ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
+
 		write_unlock_bh(&idev->lock);
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
@@ -390,17 +480,23 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr)
 {
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct ipv6_ac_addrlist *acal;
 	bool found = false;
 
 	rcu_read_lock();
 	if (dev)
 		found = ipv6_chk_acast_dev(dev, addr);
 	else
-		for_each_netdev_rcu(net, dev)
-			if (ipv6_chk_acast_dev(dev, addr)) {
+		hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
+					 acal_lst) {
+			if (!net_eq(read_pnet(&acal->acal_pnet), net))
+				continue;
+			if (ipv6_addr_equal(&acal->acal_addr, addr)) {
 				found = true;
 				break;
 			}
+		}
 	rcu_read_unlock();
 	return found;
 }
@@ -539,4 +635,25 @@ void ac6_proc_exit(struct net *net)
 {
 	remove_proc_entry("anycast6", net->proc_net);
 }
+
+/*	Init / cleanup code
+ */
+int __init anycast_init(void)
+{
+	int i;
+
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+	return 0;
+}
+
+void anycast_cleanup(void)
+{
+	int i;
+
+	spin_lock(&acaddr_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+	spin_unlock(&acaddr_hash_lock);
+}
 #endif
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-28  1:51                     ` [PATCH net v5] " Jeff Barnhill
@ 2018-10-30  3:32                       ` David Miller
  2018-10-30 11:10                         ` Jeff Barnhill
  0 siblings, 1 reply; 23+ messages in thread
From: David Miller @ 2018-10-30  3:32 UTC (permalink / raw)
  To: 0xeffeff; +Cc: netdev, kuznet, yoshfuji

From: Jeff Barnhill <0xeffeff@gmail.com>
Date: Sun, 28 Oct 2018 01:51:59 +0000

> +struct ipv6_ac_addrlist {
> +	struct in6_addr		acal_addr;
> +	possible_net_t		acal_pnet;
> +	refcount_t		acal_users;
> +	struct hlist_node	acal_lst; /* inet6_acaddr_lst */
> +	struct rcu_head		rcu;
> +};

Please just add the hlist to ifcaddr6 instead of duplicating so much
information and reference counters here.

This seems to waste a lot of memory unnecessary and add lots of
unnecessary object allocate/setup/destroy logic.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-30  3:32                       ` David Miller
@ 2018-10-30 11:10                         ` Jeff Barnhill
  2018-10-30 18:31                           ` David Miller
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-10-30 11:10 UTC (permalink / raw)
  To: davem; +Cc: netdev, Alexey Kuznetsov, yoshfuji

I originally started implementing it the way you suggested; however,
it seemed to complicate management of that structure because it isn't
currently using rcu.  Also, assuming that can be worked out, where
would I get the net from?  Would I need to store a copy in ifcaddr6,
or is there some way to access it during ipv6_chk_acast_addr()?  It
seems that if I don't add a copy of net, but instead access it through
aca_rt(?), then freeing the ifcaddr6 memory becomes problematic
(detaching it from idev, while read_rcu may still be accessing it).
On Mon, Oct 29, 2018 at 11:32 PM David Miller <davem@davemloft.net> wrote:
>
> From: Jeff Barnhill <0xeffeff@gmail.com>
> Date: Sun, 28 Oct 2018 01:51:59 +0000
>
> > +struct ipv6_ac_addrlist {
> > +     struct in6_addr         acal_addr;
> > +     possible_net_t          acal_pnet;
> > +     refcount_t              acal_users;
> > +     struct hlist_node       acal_lst; /* inet6_acaddr_lst */
> > +     struct rcu_head         rcu;
> > +};
>
> Please just add the hlist to ifcaddr6 instead of duplicating so much
> information and reference counters here.
>
> This seems to waste a lot of memory unnecessary and add lots of
> unnecessary object allocate/setup/destroy logic.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-30 11:10                         ` Jeff Barnhill
@ 2018-10-30 18:31                           ` David Miller
  2018-10-30 22:06                             ` David Ahern
  0 siblings, 1 reply; 23+ messages in thread
From: David Miller @ 2018-10-30 18:31 UTC (permalink / raw)
  To: 0xeffeff; +Cc: netdev, kuznet, yoshfuji

From: Jeff Barnhill <0xeffeff@gmail.com>
Date: Tue, 30 Oct 2018 07:10:58 -0400

> I originally started implementing it the way you suggested; however,
> it seemed to complicate management of that structure because it isn't
> currently using rcu.  Also, assuming that can be worked out, where
> would I get the net from?  Would I need to store a copy in ifcaddr6,
> or is there some way to access it during ipv6_chk_acast_addr()?  It
> seems that if I don't add a copy of net, but instead access it through
> aca_rt(?), then freeing the ifcaddr6 memory becomes problematic
> (detaching it from idev, while read_rcu may still be accessing it).
> On Mon, Oct 29, 2018 at 11:32 PM David Miller <davem@davemloft.net> wrote:

I don't think converting the structure over to RCU, especially because
all of the read paths (everything leading to ipv6_chk_acast_dev()) are
taking RCU locks already.

And I cannot understand how having _two_ structures to manage a piece
of information can be less complicated than just one.

You can add a backpointer to the 'idev' in ifacaddr6 to get at the
network namespace.  You don't even need to do additional reference
counting because the idev->ac_list is always purged before an idev
is destroyed.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-30 18:31                           ` David Miller
@ 2018-10-30 22:06                             ` David Ahern
  2018-10-30 23:19                               ` David Miller
  0 siblings, 1 reply; 23+ messages in thread
From: David Ahern @ 2018-10-30 22:06 UTC (permalink / raw)
  To: David Miller, 0xeffeff; +Cc: netdev, kuznet, yoshfuji

On 10/30/18 12:31 PM, David Miller wrote:
> From: Jeff Barnhill <0xeffeff@gmail.com>
> Date: Tue, 30 Oct 2018 07:10:58 -0400
> 
>> I originally started implementing it the way you suggested; however,
>> it seemed to complicate management of that structure because it isn't
>> currently using rcu.  Also, assuming that can be worked out, where
>> would I get the net from?  Would I need to store a copy in ifcaddr6,
>> or is there some way to access it during ipv6_chk_acast_addr()?  It
>> seems that if I don't add a copy of net, but instead access it through
>> aca_rt(?), then freeing the ifcaddr6 memory becomes problematic
>> (detaching it from idev, while read_rcu may still be accessing it).
>> On Mon, Oct 29, 2018 at 11:32 PM David Miller <davem@davemloft.net> wrote:
> 
> I don't think converting the structure over to RCU, especially because
> all of the read paths (everything leading to ipv6_chk_acast_dev()) are
> taking RCU locks already.
> 
> And I cannot understand how having _two_ structures to manage a piece
> of information can be less complicated than just one.
> 
> You can add a backpointer to the 'idev' in ifacaddr6 to get at the
> network namespace.  You don't even need to do additional reference
> counting because the idev->ac_list is always purged before an idev
> is destroyed.
> 

or make the table per namespace.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-30 22:06                             ` David Ahern
@ 2018-10-30 23:19                               ` David Miller
  2018-11-01  0:02                                 ` Jeff Barnhill
  0 siblings, 1 reply; 23+ messages in thread
From: David Miller @ 2018-10-30 23:19 UTC (permalink / raw)
  To: dsahern; +Cc: 0xeffeff, netdev, kuznet, yoshfuji

From: David Ahern <dsahern@gmail.com>
Date: Tue, 30 Oct 2018 16:06:46 -0600

> or make the table per namespace.

This will increase namespace create/destroy cost, so I'd rather not
for something like this.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable
  2018-10-30 23:19                               ` David Miller
@ 2018-11-01  0:02                                 ` Jeff Barnhill
  2018-11-01  0:14                                   ` [PATCH net v6] " Jeff Barnhill
  2018-11-01  2:53                                   ` [PATCH net v5] " David Ahern
  0 siblings, 2 replies; 23+ messages in thread
From: Jeff Barnhill @ 2018-11-01  0:02 UTC (permalink / raw)
  To: davem; +Cc: David Ahern, netdev, Alexey Kuznetsov, yoshfuji

I'll follow this email with a new patch using ifacaddr6 instead of
creating a new struct. I ended up using fib6_nh.nh_dev to get the net,
instead of adding a back pointer to idev.  It seems that idev was
recently removed in lieu of this, so if this is incorrect, please let
me know. Hopefully, I got the locking correct.
Thanks,
Jeff
On Tue, Oct 30, 2018 at 7:19 PM David Miller <davem@davemloft.net> wrote:
>
> From: David Ahern <dsahern@gmail.com>
> Date: Tue, 30 Oct 2018 16:06:46 -0600
>
> > or make the table per namespace.
>
> This will increase namespace create/destroy cost, so I'd rather not
> for something like this.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH net v6] net/ipv6: Add anycast addresses to a global hashtable
  2018-11-01  0:02                                 ` Jeff Barnhill
@ 2018-11-01  0:14                                   ` Jeff Barnhill
  2018-11-01  5:34                                     ` Stephen Hemminger
  2018-11-01  2:53                                   ` [PATCH net v5] " David Ahern
  1 sibling, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-11-01  0:14 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill

icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device.  This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().

Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
---
 include/net/addrconf.h |  2 ++
 include/net/if_inet6.h |  2 ++
 net/ipv6/af_inet6.c    |  5 ++++
 net/ipv6/anycast.c     | 80 +++++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 14b789a123e7..799af1a037d1 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 			     const struct in6_addr *addr);
+int anycast_init(void);
+void anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..c9c78c15bce0 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -146,10 +146,12 @@ struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct fib6_info	*aca_rt;
 	struct ifacaddr6	*aca_next;
+	struct hlist_node	aca_addr_lst;
 	int			aca_users;
 	refcount_t		aca_refcnt;
 	unsigned long		aca_cstamp;
 	unsigned long		aca_tstamp;
+	struct rcu_head		rcu;
 };
 
 #define	IFA_HOST	IPV6_ADDR_LOOPBACK
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f4d61017a69..ddc8a6dbfba2 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
+	err = anycast_init();
+	if (err)
+		goto anycast_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
+	anycast_cleanup();
+anycast_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
 	ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..f6c4c8ac184c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
+/*	anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *	socket join an anycast group
  */
@@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk)
 	rtnl_unlock();
 }
 
+static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
+{
+	unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
+	spin_unlock(&acaddr_hash_lock);
+}
+
+static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
+{
+	spin_lock(&acaddr_hash_lock);
+	hlist_del_init_rcu(&aca->aca_addr_lst);
+	spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
 	refcount_inc(&aca->aca_refcnt);
 }
 
+static void aca_free_rcu(struct rcu_head *h)
+{
+	struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
+
+	fib6_info_release(aca->aca_rt);
+	kfree(aca);
+}
+
 static void aca_put(struct ifacaddr6 *ac)
 {
 	if (refcount_dec_and_test(&ac->aca_refcnt)) {
-		fib6_info_release(ac->aca_rt);
-		kfree(ac);
+		call_rcu(&ac->rcu, aca_free_rcu);
 	}
 }
 
@@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
 	aca->aca_addr = *addr;
 	fib6_info_hold(f6i);
 	aca->aca_rt = f6i;
+	INIT_HLIST_NODE(&aca->aca_addr_lst);
 	aca->aca_users = 1;
 	/* aca_tstamp should be updated upon changes */
 	aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 	aca_get(aca);
 	write_unlock_bh(&idev->lock);
 
+	ipv6_add_acaddr_hash(net, aca);
+
 	ip6_ins_rt(net, f6i);
 
 	addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	else
 		idev->ac_list = aca->aca_next;
 	write_unlock_bh(&idev->lock);
+	ipv6_del_acaddr_hash(aca);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
 	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 		idev->ac_list = aca->aca_next;
 		write_unlock_bh(&idev->lock);
 
+		ipv6_del_acaddr_hash(aca);
+
 		addrconf_leave_solict(idev, &aca->aca_addr);
 
 		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr)
 {
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct net_device *nh_dev;
+	struct ifacaddr6 *aca;
 	bool found = false;
 
 	rcu_read_lock();
 	if (dev)
 		found = ipv6_chk_acast_dev(dev, addr);
 	else
-		for_each_netdev_rcu(net, dev)
-			if (ipv6_chk_acast_dev(dev, addr)) {
+		hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
+					 aca_addr_lst) {
+			nh_dev = fib6_info_nh_dev(aca->aca_rt);
+			if (!nh_dev || !net_eq(dev_net(nh_dev), net))
+				continue;
+			if (ipv6_addr_equal(&aca->aca_addr, addr)) {
 				found = true;
 				break;
 			}
+		}
 	rcu_read_unlock();
 	return found;
 }
@@ -539,4 +590,25 @@ void ac6_proc_exit(struct net *net)
 {
 	remove_proc_entry("anycast6", net->proc_net);
 }
+
+/*	Init / cleanup code
+ */
+int __init anycast_init(void)
+{
+	int i;
+
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+	return 0;
+}
+
+void anycast_cleanup(void)
+{
+	int i;
+
+	spin_lock(&acaddr_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+	spin_unlock(&acaddr_hash_lock);
+}
 #endif
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable
  2018-11-01  0:02                                 ` Jeff Barnhill
  2018-11-01  0:14                                   ` [PATCH net v6] " Jeff Barnhill
@ 2018-11-01  2:53                                   ` David Ahern
  1 sibling, 0 replies; 23+ messages in thread
From: David Ahern @ 2018-11-01  2:53 UTC (permalink / raw)
  To: Jeff Barnhill, davem; +Cc: netdev, Alexey Kuznetsov, yoshfuji

On 10/31/18 6:02 PM, Jeff Barnhill wrote:
> I'll follow this email with a new patch using ifacaddr6 instead of
> creating a new struct. I ended up using fib6_nh.nh_dev to get the net,
> instead of adding a back pointer to idev.  It seems that idev was
> recently removed in lieu of this, so if this is incorrect, please let
> me know. Hopefully, I got the locking correct.

That's correct. Make sure that the anycast code can not be accessed for
reject routes which will not have a device set. Should be ok, but double
check.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH net v6] net/ipv6: Add anycast addresses to a global hashtable
  2018-11-01  0:14                                   ` [PATCH net v6] " Jeff Barnhill
@ 2018-11-01  5:34                                     ` Stephen Hemminger
  2018-11-02 20:23                                       ` [PATCH net v7] " Jeff Barnhill
  0 siblings, 1 reply; 23+ messages in thread
From: Stephen Hemminger @ 2018-11-01  5:34 UTC (permalink / raw)
  To: Jeff Barnhill; +Cc: netdev, davem, kuznet, yoshfuji

On Thu,  1 Nov 2018 00:14:38 +0000
Jeff Barnhill <0xeffeff@gmail.com> wrote:

> diff --git a/include/net/addrconf.h b/include/net/addrconf.h
> index 14b789a123e7..799af1a037d1 100644
> --- a/include/net/addrconf.h
> +++ b/include/net/addrconf.h
> @@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
>  			 const struct in6_addr *addr);
>  bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
>  			     const struct in6_addr *addr);
> +int anycast_init(void);
> +void anycast_cleanup(void);

One minor nit that should be fixed.


To avoid any potential naming conflicts, please prefix all ipv6 global symbols
with ipv6_

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH net v7] net/ipv6: Add anycast addresses to a global hashtable
  2018-11-01  5:34                                     ` Stephen Hemminger
@ 2018-11-02 20:23                                       ` Jeff Barnhill
  2018-11-03  6:55                                         ` David Miller
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Barnhill @ 2018-11-02 20:23 UTC (permalink / raw)
  To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill

icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device.  This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().

Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
---
 include/net/addrconf.h |  2 ++
 include/net/if_inet6.h |  2 ++
 net/ipv6/af_inet6.c    |  5 ++++
 net/ipv6/anycast.c     | 80 +++++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 14b789a123e7..1656c5978498 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 			     const struct in6_addr *addr);
+int ipv6_anycast_init(void);
+void ipv6_anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..c9c78c15bce0 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -146,10 +146,12 @@ struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct fib6_info	*aca_rt;
 	struct ifacaddr6	*aca_next;
+	struct hlist_node	aca_addr_lst;
 	int			aca_users;
 	refcount_t		aca_refcnt;
 	unsigned long		aca_cstamp;
 	unsigned long		aca_tstamp;
+	struct rcu_head		rcu;
 };
 
 #define	IFA_HOST	IPV6_ADDR_LOOPBACK
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f4d61017a69..f0cd291034f0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
+	err = ipv6_anycast_init();
+	if (err)
+		goto ipv6_anycast_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
+	ipv6_anycast_cleanup();
+ipv6_anycast_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
 	ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..7698637cf827 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
+/*	anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *	socket join an anycast group
  */
@@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk)
 	rtnl_unlock();
 }
 
+static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
+{
+	unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
+	spin_unlock(&acaddr_hash_lock);
+}
+
+static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
+{
+	spin_lock(&acaddr_hash_lock);
+	hlist_del_init_rcu(&aca->aca_addr_lst);
+	spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
 	refcount_inc(&aca->aca_refcnt);
 }
 
+static void aca_free_rcu(struct rcu_head *h)
+{
+	struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
+
+	fib6_info_release(aca->aca_rt);
+	kfree(aca);
+}
+
 static void aca_put(struct ifacaddr6 *ac)
 {
 	if (refcount_dec_and_test(&ac->aca_refcnt)) {
-		fib6_info_release(ac->aca_rt);
-		kfree(ac);
+		call_rcu(&ac->rcu, aca_free_rcu);
 	}
 }
 
@@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
 	aca->aca_addr = *addr;
 	fib6_info_hold(f6i);
 	aca->aca_rt = f6i;
+	INIT_HLIST_NODE(&aca->aca_addr_lst);
 	aca->aca_users = 1;
 	/* aca_tstamp should be updated upon changes */
 	aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 	aca_get(aca);
 	write_unlock_bh(&idev->lock);
 
+	ipv6_add_acaddr_hash(net, aca);
+
 	ip6_ins_rt(net, f6i);
 
 	addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	else
 		idev->ac_list = aca->aca_next;
 	write_unlock_bh(&idev->lock);
+	ipv6_del_acaddr_hash(aca);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
 	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 		idev->ac_list = aca->aca_next;
 		write_unlock_bh(&idev->lock);
 
+		ipv6_del_acaddr_hash(aca);
+
 		addrconf_leave_solict(idev, &aca->aca_addr);
 
 		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr)
 {
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct net_device *nh_dev;
+	struct ifacaddr6 *aca;
 	bool found = false;
 
 	rcu_read_lock();
 	if (dev)
 		found = ipv6_chk_acast_dev(dev, addr);
 	else
-		for_each_netdev_rcu(net, dev)
-			if (ipv6_chk_acast_dev(dev, addr)) {
+		hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
+					 aca_addr_lst) {
+			nh_dev = fib6_info_nh_dev(aca->aca_rt);
+			if (!nh_dev || !net_eq(dev_net(nh_dev), net))
+				continue;
+			if (ipv6_addr_equal(&aca->aca_addr, addr)) {
 				found = true;
 				break;
 			}
+		}
 	rcu_read_unlock();
 	return found;
 }
@@ -539,4 +590,25 @@ void ac6_proc_exit(struct net *net)
 {
 	remove_proc_entry("anycast6", net->proc_net);
 }
+
+/*	Init / cleanup code
+ */
+int __init ipv6_anycast_init(void)
+{
+	int i;
+
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+	return 0;
+}
+
+void ipv6_anycast_cleanup(void)
+{
+	int i;
+
+	spin_lock(&acaddr_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+	spin_unlock(&acaddr_hash_lock);
+}
 #endif
-- 
2.14.1

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH net v7] net/ipv6: Add anycast addresses to a global hashtable
  2018-11-02 20:23                                       ` [PATCH net v7] " Jeff Barnhill
@ 2018-11-03  6:55                                         ` David Miller
  0 siblings, 0 replies; 23+ messages in thread
From: David Miller @ 2018-11-03  6:55 UTC (permalink / raw)
  To: 0xeffeff; +Cc: netdev, kuznet, yoshfuji

From: Jeff Barnhill <0xeffeff@gmail.com>
Date: Fri,  2 Nov 2018 20:23:57 +0000

> icmp6_send() function is expensive on systems with a large number of
> interfaces. Every time it’s called, it has to verify that the source
> address does not correspond to an existing anycast address by looping
> through every device and every anycast address on the device.  This can
> result in significant delays for a CPU when there are a large number of
> neighbors and ND timers are frequently timing out and calling
> neigh_invalidate().
> 
> Add anycast addresses to a global hashtable to allow quick searching for
> matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.
> 
> Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>

Applied, thank you.

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2018-11-03 16:05 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-23  2:12 [PATCH net] net/ipv6: Add anycast addresses to a global hashtable Jeff Barnhill
2018-10-23  2:26 ` Eric Dumazet
2018-10-23 18:21   ` Jeff Barnhill
2018-10-24  1:58     ` [PATCH net v2] " Jeff Barnhill
2018-10-24  3:12       ` Eric Dumazet
2018-10-24  5:06         ` Jeff Barnhill
2018-10-26 21:22           ` [PATCH net v3] " Jeff Barnhill
2018-10-26 21:44             ` David Ahern
2018-10-27 18:02               ` [PATCH net v4] " Jeff Barnhill
2018-10-27 23:39                 ` David Ahern
2018-10-28  1:27                   ` Jeff Barnhill
2018-10-28  1:51                     ` [PATCH net v5] " Jeff Barnhill
2018-10-30  3:32                       ` David Miller
2018-10-30 11:10                         ` Jeff Barnhill
2018-10-30 18:31                           ` David Miller
2018-10-30 22:06                             ` David Ahern
2018-10-30 23:19                               ` David Miller
2018-11-01  0:02                                 ` Jeff Barnhill
2018-11-01  0:14                                   ` [PATCH net v6] " Jeff Barnhill
2018-11-01  5:34                                     ` Stephen Hemminger
2018-11-02 20:23                                       ` [PATCH net v7] " Jeff Barnhill
2018-11-03  6:55                                         ` David Miller
2018-11-01  2:53                                   ` [PATCH net v5] " David Ahern

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.