* [PATCH net] net/ipv6: Add anycast addresses to a global hashtable @ 2018-10-23 2:12 Jeff Barnhill 2018-10-23 2:26 ` Eric Dumazet 0 siblings, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-10-23 2:12 UTC (permalink / raw) To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill icmp6_send() function is expensive on systems with a large number of interfaces. Every time it’s called, it has to verify that the source address does not correspond to an existing anycast address by looping through every device and every anycast address on the device. This can result in significant delays for a CPU when there are a large number of neighbors and ND timers are frequently timing out and calling neigh_invalidate(). Add anycast addresses to a global hashtable to allow quick searching for matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> --- include/net/addrconf.h | 2 + include/net/if_inet6.h | 8 ++++ net/ipv6/af_inet6.c | 5 ++ net/ipv6/anycast.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 135 insertions(+), 2 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 6def0351bcc3..0cee3f99c41d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); +int anycast_init(void); +void anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d7578cf49c3a..ac02b2cf2ba1 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -142,6 +142,14 @@ struct ipv6_ac_socklist { struct ipv6_ac_socklist *acl_next; }; +struct ipv6_ac_addrlist { + struct in6_addr acal_addr; + int acal_ifindex; /* net */ + int acal_users; + struct hlist_node acal_lst; /* inet6_acaddr_lst */ + struct rcu_head rcu; +}; + struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9a4261e50272..971a05fdd3bd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; + err = anycast_init(); + if (err) + goto anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: + anycast_cleanup(); +anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e0ff7031edd..58d31e0980aa 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -44,8 +44,22 @@ #include <net/checksum.h> +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + /* * socket join an anycast group */ @@ -204,6 +218,73 @@ void ipv6_sock_ac_close(struct sock *sk) rtnl_unlock(); } +static struct ipv6_ac_addrlist *acal_alloc(int ifindex, + const struct in6_addr *addr) +{ + struct ipv6_ac_addrlist *acal; + + acal = kzalloc(sizeof(*acal), GFP_ATOMIC); + if (!acal) + return NULL; + + acal->acal_addr = *addr; + acal->acal_ifindex = ifindex; + acal->acal_users = 1; + INIT_HLIST_NODE(&acal->acal_lst); + + return acal; +} + +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + int err = 0; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (acal->acal_ifindex != net->ifindex) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + acal->acal_users++; + goto out; + } + } + + acal = acal_alloc(net->ifindex, addr); + if (!acal) { + err = -ENOMEM; + goto out; + } + + hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]); + +out: + spin_unlock(&acaddr_hash_lock); + return err; +} + +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (acal->acal_ifindex != net->ifindex) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + if (--acal->acal_users < 1) { + hlist_del_init_rcu(&acal->acal_lst); + kfree_rcu(acal, rcu); + } + spin_unlock(&acaddr_hash_lock); + return; + } + } + spin_unlock(&acaddr_hash_lock); +} + static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); @@ -275,6 +356,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) err = -ENOMEM; goto out; } + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); + if (err) { + fib6_info_release(f6i); + fib6_info_release(f6i); + kfree(aca); + goto out; + } aca->aca_next = idev->ac_list; idev->ac_list = aca; @@ -324,6 +412,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) prev_aca->aca_next = aca->aca_next; else idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -350,6 +439,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); while ((aca = idev->ac_list) != NULL) { idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); + write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -391,16 +482,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { bool found = false; + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash], + acal_lst) { + if (acal->acal_ifindex != net->ifindex) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { found = true; break; } + } rcu_read_unlock(); return found; } @@ -539,4 +636,25 @@ void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } + +/* Init / cleanup code + */ +int __init anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} #endif -- 2.14.1 ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH net] net/ipv6: Add anycast addresses to a global hashtable 2018-10-23 2:12 [PATCH net] net/ipv6: Add anycast addresses to a global hashtable Jeff Barnhill @ 2018-10-23 2:26 ` Eric Dumazet 2018-10-23 18:21 ` Jeff Barnhill 0 siblings, 1 reply; 23+ messages in thread From: Eric Dumazet @ 2018-10-23 2:26 UTC (permalink / raw) To: Jeff Barnhill, netdev; +Cc: davem, kuznet, yoshfuji On 10/22/2018 07:12 PM, Jeff Barnhill wrote: > icmp6_send() function is expensive on systems with a large number of > interfaces. Every time it’s called, it has to verify that the source > address does not correspond to an existing anycast address by looping > through every device and every anycast address on the device. This can > result in significant delays for a CPU when there are a large number of > neighbors and ND timers are frequently timing out and calling > neigh_invalidate(). > > Add anycast addresses to a global hashtable to allow quick searching for > matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. > I do not see this patch being netns aware ? Also I believe you misunderstood what was stored in net->ifindex You can look at dev_new_index() for what I mean. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH net] net/ipv6: Add anycast addresses to a global hashtable 2018-10-23 2:26 ` Eric Dumazet @ 2018-10-23 18:21 ` Jeff Barnhill 2018-10-24 1:58 ` [PATCH net v2] " Jeff Barnhill 0 siblings, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-10-23 18:21 UTC (permalink / raw) To: eric.dumazet; +Cc: netdev, davem, Alexey Kuznetsov, yoshfuji Thanks! You are right. I mis-understood net->ifindex. I think I need to instead hold the net pointer in the new ipv6_ac_addrlist structure. Do you see a problem with that? On Mon, Oct 22, 2018 at 10:26 PM Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > On 10/22/2018 07:12 PM, Jeff Barnhill wrote: > > icmp6_send() function is expensive on systems with a large number of > > interfaces. Every time it’s called, it has to verify that the source > > address does not correspond to an existing anycast address by looping > > through every device and every anycast address on the device. This can > > result in significant delays for a CPU when there are a large number of > > neighbors and ND timers are frequently timing out and calling > > neigh_invalidate(). > > > > Add anycast addresses to a global hashtable to allow quick searching for > > matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. > > > > I do not see this patch being netns aware ? > > Also I believe you misunderstood what was stored in net->ifindex > You can look at dev_new_index() for what I mean. > ^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH net v2] net/ipv6: Add anycast addresses to a global hashtable 2018-10-23 18:21 ` Jeff Barnhill @ 2018-10-24 1:58 ` Jeff Barnhill 2018-10-24 3:12 ` Eric Dumazet 0 siblings, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-10-24 1:58 UTC (permalink / raw) To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill icmp6_send() function is expensive on systems with a large number of interfaces. Every time it’s called, it has to verify that the source address does not correspond to an existing anycast address by looping through every device and every anycast address on the device. This can result in significant delays for a CPU when there are a large number of neighbors and ND timers are frequently timing out and calling neigh_invalidate(). Add anycast addresses to a global hashtable to allow quick searching for matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> --- include/net/addrconf.h | 2 + include/net/if_inet6.h | 8 +++ net/ipv6/af_inet6.c | 5 ++ net/ipv6/anycast.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 145 insertions(+), 2 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 6def0351bcc3..0cee3f99c41d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); +int anycast_init(void); +void anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d7578cf49c3a..55a4a1d8cebc 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -142,6 +142,14 @@ struct ipv6_ac_socklist { struct ipv6_ac_socklist *acl_next; }; +struct ipv6_ac_addrlist { + struct in6_addr acal_addr; + possible_net_t acal_pnet; + int acal_users; + struct hlist_node acal_lst; /* inet6_acaddr_lst */ + struct rcu_head rcu; +}; + struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9a4261e50272..971a05fdd3bd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; + err = anycast_init(); + if (err) + goto anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: + anycast_cleanup(); +anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e0ff7031edd..def1e156d857 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -44,8 +44,22 @@ #include <net/checksum.h> +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + /* * socket join an anycast group */ @@ -204,6 +218,83 @@ void ipv6_sock_ac_close(struct sock *sk) rtnl_unlock(); } +static struct ipv6_ac_addrlist *acal_alloc(struct net *net, + const struct in6_addr *addr) +{ + struct ipv6_ac_addrlist *acal; + + acal = kzalloc(sizeof(*acal), GFP_ATOMIC); + if (!acal) + return NULL; + + acal->acal_addr = *addr; + write_pnet(&acal->acal_pnet, get_net(net)); + acal->acal_users = 1; + INIT_HLIST_NODE(&acal->acal_lst); + + return acal; +} + +static void acal_free_rcu(struct rcu_head *h) +{ + struct ipv6_ac_addrlist *acal; + + acal = container_of(h, struct ipv6_ac_addrlist, rcu); + WARN_ON(acal->acal_users); + put_net(read_pnet(&acal->acal_pnet)); + kfree(acal); +} + +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + int err = 0; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + acal->acal_users++; + goto out; + } + } + + acal = acal_alloc(net, addr); + if (!acal) { + err = -ENOMEM; + goto out; + } + + hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]); + +out: + spin_unlock(&acaddr_hash_lock); + return err; +} + +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + if (--acal->acal_users < 1) { + hlist_del_init_rcu(&acal->acal_lst); + call_rcu(&acal->rcu, acal_free_rcu); + } + spin_unlock(&acaddr_hash_lock); + return; + } + } + spin_unlock(&acaddr_hash_lock); +} + static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); @@ -275,6 +366,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) err = -ENOMEM; goto out; } + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); + if (err) { + fib6_info_release(f6i); + fib6_info_release(f6i); + kfree(aca); + goto out; + } aca->aca_next = idev->ac_list; idev->ac_list = aca; @@ -324,6 +422,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) prev_aca->aca_next = aca->aca_next; else idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -350,6 +449,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); while ((aca = idev->ac_list) != NULL) { idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); + write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -391,16 +492,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { bool found = false; + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash], + acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { found = true; break; } + } rcu_read_unlock(); return found; } @@ -539,4 +646,25 @@ void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } + +/* Init / cleanup code + */ +int __init anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} #endif -- 2.14.1 ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH net v2] net/ipv6: Add anycast addresses to a global hashtable 2018-10-24 1:58 ` [PATCH net v2] " Jeff Barnhill @ 2018-10-24 3:12 ` Eric Dumazet 2018-10-24 5:06 ` Jeff Barnhill 0 siblings, 1 reply; 23+ messages in thread From: Eric Dumazet @ 2018-10-24 3:12 UTC (permalink / raw) To: Jeff Barnhill, netdev; +Cc: davem, kuznet, yoshfuji On 10/23/2018 06:58 PM, Jeff Barnhill wrote: > icmp6_send() function is expensive on systems with a large number of > interfaces. Every time it’s called, it has to verify that the source > address does not correspond to an existing anycast address by looping > through every device and every anycast address on the device. This can > result in significant delays for a CPU when there are a large number of > neighbors and ND timers are frequently timing out and calling > neigh_invalidate(). > > Add anycast addresses to a global hashtable to allow quick searching for > matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. > > Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> > --- > include/net/addrconf.h | 2 + > include/net/if_inet6.h | 8 +++ > net/ipv6/af_inet6.c | 5 ++ > net/ipv6/anycast.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++- > 4 files changed, 145 insertions(+), 2 deletions(-) > > diff --git a/include/net/addrconf.h b/include/net/addrconf.h > index 6def0351bcc3..0cee3f99c41d 100644 > --- a/include/net/addrconf.h > +++ b/include/net/addrconf.h > @@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, > const struct in6_addr *addr); > bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, > const struct in6_addr *addr); > +int anycast_init(void); > +void anycast_cleanup(void); > > /* Device notifier */ > int register_inet6addr_notifier(struct notifier_block *nb); > diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h > index d7578cf49c3a..55a4a1d8cebc 100644 > --- a/include/net/if_inet6.h > +++ b/include/net/if_inet6.h > @@ -142,6 +142,14 @@ struct ipv6_ac_socklist { > struct ipv6_ac_socklist *acl_next; > }; > > +struct ipv6_ac_addrlist { > + struct in6_addr acal_addr; > + possible_net_t acal_pnet; > + int acal_users; That would be a refcount_t acal_users; so that CONFIG_REFCOUNT_FULL brings debugging for free. > + struct hlist_node acal_lst; /* inet6_acaddr_lst */ > + struct rcu_head rcu; > +}; > + > struct ifacaddr6 { > struct in6_addr aca_addr; > struct fib6_info *aca_rt; > diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c > index 9a4261e50272..971a05fdd3bd 100644 > --- a/net/ipv6/af_inet6.c > +++ b/net/ipv6/af_inet6.c > @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) > err = ip6_flowlabel_init(); > if (err) > goto ip6_flowlabel_fail; > + err = anycast_init(); > + if (err) > + goto anycast_fail; > err = addrconf_init(); > if (err) > goto addrconf_fail; > @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) > ipv6_exthdrs_fail: > addrconf_cleanup(); > addrconf_fail: > + anycast_cleanup(); > +anycast_fail: > ip6_flowlabel_cleanup(); > ip6_flowlabel_fail: > ndisc_late_cleanup(); > diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c > index 4e0ff7031edd..def1e156d857 100644 > --- a/net/ipv6/anycast.c > +++ b/net/ipv6/anycast.c > @@ -44,8 +44,22 @@ > > #include <net/checksum.h> > > +#define IN6_ADDR_HSIZE_SHIFT 8 > +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) > +/* anycast address hash table > + */ > +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; > +static DEFINE_SPINLOCK(acaddr_hash_lock); > + > static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); > > +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) > +{ > + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); > + > + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); > +} > + > /* > * socket join an anycast group > */ > @@ -204,6 +218,83 @@ void ipv6_sock_ac_close(struct sock *sk) > rtnl_unlock(); > } > > +static struct ipv6_ac_addrlist *acal_alloc(struct net *net, > + const struct in6_addr *addr) > +{ > + struct ipv6_ac_addrlist *acal; > + > + acal = kzalloc(sizeof(*acal), GFP_ATOMIC); > + if (!acal) > + return NULL; > + > + acal->acal_addr = *addr; > + write_pnet(&acal->acal_pnet, get_net(net)); I am not sure why you grab a reference on the netns. The ipv6 address will be freed at some point before the netns disappears. It would automatically remove the associated struct ipv6_ac_addrlist. > + acal->acal_users = 1; > + INIT_HLIST_NODE(&acal->acal_lst); > + > + return acal; > +} > + > +static void acal_free_rcu(struct rcu_head *h) > +{ > + struct ipv6_ac_addrlist *acal; > + > + acal = container_of(h, struct ipv6_ac_addrlist, rcu); > + WARN_ON(acal->acal_users); Not needed with refcount_t debugging infra. > + put_net(read_pnet(&acal->acal_pnet)); > + kfree(acal); So this could use kfree_rcu() in the caller, and get rid of acal_free_rcu() completely. > +} > + > +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr) > +{ > + unsigned int hash = inet6_acaddr_hash(net, addr); > + struct ipv6_ac_addrlist *acal; > + int err = 0; > + > + spin_lock(&acaddr_hash_lock); > + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { > + if (!net_eq(read_pnet(&acal->acal_pnet), net)) > + continue; > + if (ipv6_addr_equal(&acal->acal_addr, addr)) { > + acal->acal_users++; > + goto out; > + } > + } > + > + acal = acal_alloc(net, addr); > + if (!acal) { > + err = -ENOMEM; > + goto out; > + } > + > + hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]); > + > +out: > + spin_unlock(&acaddr_hash_lock); > + return err; > +} > + > +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr) > +{ > + unsigned int hash = inet6_acaddr_hash(net, addr); > + struct ipv6_ac_addrlist *acal; > + > + spin_lock(&acaddr_hash_lock); > + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { > + if (!net_eq(read_pnet(&acal->acal_pnet), net)) > + continue; > + if (ipv6_addr_equal(&acal->acal_addr, addr)) { > + if (--acal->acal_users < 1) { > + hlist_del_init_rcu(&acal->acal_lst); > + call_rcu(&acal->rcu, acal_free_rcu); > + } > + spin_unlock(&acaddr_hash_lock); > + return; > + } > + } > + spin_unlock(&acaddr_hash_lock); > +} > + > static void aca_get(struct ifacaddr6 *aca) > { > refcount_inc(&aca->aca_refcnt); > @@ -275,6 +366,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) > err = -ENOMEM; > goto out; > } > + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); > + if (err) { > + fib6_info_release(f6i); > + fib6_info_release(f6i); Double call to fib6_info_release() ? Why ? > + kfree(aca); > + goto out; > + } > > aca->aca_next = idev->ac_list; > idev->ac_list = aca; > @@ -324,6 +422,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) > prev_aca->aca_next = aca->aca_next; > else > idev->ac_list = aca->aca_next; > + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); > write_unlock_bh(&idev->lock); > addrconf_leave_solict(idev, &aca->aca_addr); > > @@ -350,6 +449,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) > write_lock_bh(&idev->lock); > while ((aca = idev->ac_list) != NULL) { > idev->ac_list = aca->aca_next; > + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); > + > write_unlock_bh(&idev->lock); > > addrconf_leave_solict(idev, &aca->aca_addr); > @@ -391,16 +492,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, > const struct in6_addr *addr) > { > bool found = false; > + unsigned int hash = inet6_acaddr_hash(net, addr); > + struct ipv6_ac_addrlist *acal; Reorder variable declaration in longest to shortest (reverse xmas tree), per David Miller request :) > > rcu_read_lock(); > if (dev) > found = ipv6_chk_acast_dev(dev, addr); > else > - for_each_netdev_rcu(net, dev) > - if (ipv6_chk_acast_dev(dev, addr)) { > + hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash], > + acal_lst) { > + if (!net_eq(read_pnet(&acal->acal_pnet), net)) > + continue; > + if (ipv6_addr_equal(&acal->acal_addr, addr)) { > found = true; > break; > } > + } > rcu_read_unlock(); > return found; > } > @@ -539,4 +646,25 @@ void ac6_proc_exit(struct net *net) > { > remove_proc_entry("anycast6", net->proc_net); > } > + > +/* Init / cleanup code > + */ > +int __init anycast_init(void) > +{ > + int i; > + > + for (i = 0; i < IN6_ADDR_HSIZE; i++) > + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); > + return 0; > +} > + > +void anycast_cleanup(void) > +{ > + int i; > + > + spin_lock(&acaddr_hash_lock); > + for (i = 0; i < IN6_ADDR_HSIZE; i++) > + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); > + spin_unlock(&acaddr_hash_lock); > +} > #endif > ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH net v2] net/ipv6: Add anycast addresses to a global hashtable 2018-10-24 3:12 ` Eric Dumazet @ 2018-10-24 5:06 ` Jeff Barnhill 2018-10-26 21:22 ` [PATCH net v3] " Jeff Barnhill 0 siblings, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-10-24 5:06 UTC (permalink / raw) To: eric.dumazet; +Cc: netdev, davem, Alexey Kuznetsov, yoshfuji Thanks for the feedback. As suggested, I did these things: - switched to refcount_t - stopped grabbing a reference on the netns (now able to use kfree_rcu) - re-ordered ipv6_chk_acast_addr variable definitions to reverse xmas tree With regards to your question in __ipv6_dev_ac_inc(): > + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); > + if (err) { > + fib6_info_release(f6i); > + fib6_info_release(f6i); Double call to fib6_info_release() ? Why ? Unless I mis-understand, both addrconf_f6i_alloc() (indirectly through fib6_info_alloc()) and aca_alloc() increment fib6_ref, so it seemed like to fully cleanup/backout, we needed to to decrement twice. Please let me know if I'm wrong here. I'll re-submit the patch after agreement on the double call and testing with the new changes. Thanks! Jeff On Tue, Oct 23, 2018 at 11:12 PM Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > On 10/23/2018 06:58 PM, Jeff Barnhill wrote: > > icmp6_send() function is expensive on systems with a large number of > > interfaces. Every time it’s called, it has to verify that the source > > address does not correspond to an existing anycast address by looping > > through every device and every anycast address on the device. This can > > result in significant delays for a CPU when there are a large number of > > neighbors and ND timers are frequently timing out and calling > > neigh_invalidate(). > > > > Add anycast addresses to a global hashtable to allow quick searching for > > matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. > > > > Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> > > --- > > include/net/addrconf.h | 2 + > > include/net/if_inet6.h | 8 +++ > > net/ipv6/af_inet6.c | 5 ++ > > net/ipv6/anycast.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++- > > 4 files changed, 145 insertions(+), 2 deletions(-) > > > > diff --git a/include/net/addrconf.h b/include/net/addrconf.h > > index 6def0351bcc3..0cee3f99c41d 100644 > > --- a/include/net/addrconf.h > > +++ b/include/net/addrconf.h > > @@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, > > const struct in6_addr *addr); > > bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, > > const struct in6_addr *addr); > > +int anycast_init(void); > > +void anycast_cleanup(void); > > > > /* Device notifier */ > > int register_inet6addr_notifier(struct notifier_block *nb); > > diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h > > index d7578cf49c3a..55a4a1d8cebc 100644 > > --- a/include/net/if_inet6.h > > +++ b/include/net/if_inet6.h > > @@ -142,6 +142,14 @@ struct ipv6_ac_socklist { > > struct ipv6_ac_socklist *acl_next; > > }; > > > > +struct ipv6_ac_addrlist { > > + struct in6_addr acal_addr; > > + possible_net_t acal_pnet; > > + int acal_users; > > That would be a refcount_t acal_users; so that CONFIG_REFCOUNT_FULL brings debugging for free. > > > + struct hlist_node acal_lst; /* inet6_acaddr_lst */ > > + struct rcu_head rcu; > > +}; > > + > > struct ifacaddr6 { > > struct in6_addr aca_addr; > > struct fib6_info *aca_rt; > > diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c > > index 9a4261e50272..971a05fdd3bd 100644 > > --- a/net/ipv6/af_inet6.c > > +++ b/net/ipv6/af_inet6.c > > @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) > > err = ip6_flowlabel_init(); > > if (err) > > goto ip6_flowlabel_fail; > > + err = anycast_init(); > > + if (err) > > + goto anycast_fail; > > err = addrconf_init(); > > if (err) > > goto addrconf_fail; > > @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) > > ipv6_exthdrs_fail: > > addrconf_cleanup(); > > addrconf_fail: > > + anycast_cleanup(); > > +anycast_fail: > > ip6_flowlabel_cleanup(); > > ip6_flowlabel_fail: > > ndisc_late_cleanup(); > > diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c > > index 4e0ff7031edd..def1e156d857 100644 > > --- a/net/ipv6/anycast.c > > +++ b/net/ipv6/anycast.c > > @@ -44,8 +44,22 @@ > > > > #include <net/checksum.h> > > > > +#define IN6_ADDR_HSIZE_SHIFT 8 > > +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) > > +/* anycast address hash table > > + */ > > +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; > > +static DEFINE_SPINLOCK(acaddr_hash_lock); > > + > > static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); > > > > +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) > > +{ > > + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); > > + > > + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); > > +} > > + > > /* > > * socket join an anycast group > > */ > > @@ -204,6 +218,83 @@ void ipv6_sock_ac_close(struct sock *sk) > > rtnl_unlock(); > > } > > > > +static struct ipv6_ac_addrlist *acal_alloc(struct net *net, > > + const struct in6_addr *addr) > > +{ > > + struct ipv6_ac_addrlist *acal; > > + > > + acal = kzalloc(sizeof(*acal), GFP_ATOMIC); > > + if (!acal) > > + return NULL; > > + > > + acal->acal_addr = *addr; > > + write_pnet(&acal->acal_pnet, get_net(net)); > > I am not sure why you grab a reference on the netns. > > The ipv6 address will be freed at some point before the netns disappears. > It would automatically remove the associated struct ipv6_ac_addrlist. > > > + acal->acal_users = 1; > > + INIT_HLIST_NODE(&acal->acal_lst); > > + > > + return acal; > > +} > > + > > +static void acal_free_rcu(struct rcu_head *h) > > +{ > > + struct ipv6_ac_addrlist *acal; > > + > > + acal = container_of(h, struct ipv6_ac_addrlist, rcu); > > + WARN_ON(acal->acal_users); > > Not needed with refcount_t debugging infra. > > > + put_net(read_pnet(&acal->acal_pnet)); > > + kfree(acal); > > So this could use kfree_rcu() in the caller, and get rid of acal_free_rcu() completely. > > > +} > > + > > +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr) > > +{ > > + unsigned int hash = inet6_acaddr_hash(net, addr); > > + struct ipv6_ac_addrlist *acal; > > + int err = 0; > > + > > + spin_lock(&acaddr_hash_lock); > > + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { > > + if (!net_eq(read_pnet(&acal->acal_pnet), net)) > > + continue; > > + if (ipv6_addr_equal(&acal->acal_addr, addr)) { > > + acal->acal_users++; > > + goto out; > > + } > > + } > > + > > + acal = acal_alloc(net, addr); > > + if (!acal) { > > + err = -ENOMEM; > > + goto out; > > + } > > + > > + hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]); > > + > > +out: > > + spin_unlock(&acaddr_hash_lock); > > + return err; > > +} > > + > > +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr) > > +{ > > + unsigned int hash = inet6_acaddr_hash(net, addr); > > + struct ipv6_ac_addrlist *acal; > > + > > + spin_lock(&acaddr_hash_lock); > > + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { > > + if (!net_eq(read_pnet(&acal->acal_pnet), net)) > > + continue; > > + if (ipv6_addr_equal(&acal->acal_addr, addr)) { > > + if (--acal->acal_users < 1) { > > + hlist_del_init_rcu(&acal->acal_lst); > > + call_rcu(&acal->rcu, acal_free_rcu); > > + } > > + spin_unlock(&acaddr_hash_lock); > > + return; > > + } > > + } > > + spin_unlock(&acaddr_hash_lock); > > +} > > + > > static void aca_get(struct ifacaddr6 *aca) > > { > > refcount_inc(&aca->aca_refcnt); > > @@ -275,6 +366,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) > > err = -ENOMEM; > > goto out; > > } > > + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); > > + if (err) { > > + fib6_info_release(f6i); > > + fib6_info_release(f6i); > > Double call to fib6_info_release() ? Why ? > > > + kfree(aca); > > + goto out; > > + } > > > > aca->aca_next = idev->ac_list; > > idev->ac_list = aca; > > @@ -324,6 +422,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) > > prev_aca->aca_next = aca->aca_next; > > else > > idev->ac_list = aca->aca_next; > > + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); > > write_unlock_bh(&idev->lock); > > addrconf_leave_solict(idev, &aca->aca_addr); > > > > @@ -350,6 +449,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) > > write_lock_bh(&idev->lock); > > while ((aca = idev->ac_list) != NULL) { > > idev->ac_list = aca->aca_next; > > + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); > > + > > write_unlock_bh(&idev->lock); > > > > addrconf_leave_solict(idev, &aca->aca_addr); > > @@ -391,16 +492,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, > > const struct in6_addr *addr) > > { > > bool found = false; > > + unsigned int hash = inet6_acaddr_hash(net, addr); > > + struct ipv6_ac_addrlist *acal; > > Reorder variable declaration in longest to shortest (reverse xmas tree), > per David Miller request :) > > > > > rcu_read_lock(); > > if (dev) > > found = ipv6_chk_acast_dev(dev, addr); > > else > > - for_each_netdev_rcu(net, dev) > > - if (ipv6_chk_acast_dev(dev, addr)) { > > + hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash], > > + acal_lst) { > > + if (!net_eq(read_pnet(&acal->acal_pnet), net)) > > + continue; > > + if (ipv6_addr_equal(&acal->acal_addr, addr)) { > > found = true; > > break; > > } > > + } > > rcu_read_unlock(); > > return found; > > } > > @@ -539,4 +646,25 @@ void ac6_proc_exit(struct net *net) > > { > > remove_proc_entry("anycast6", net->proc_net); > > } > > + > > +/* Init / cleanup code > > + */ > > +int __init anycast_init(void) > > +{ > > + int i; > > + > > + for (i = 0; i < IN6_ADDR_HSIZE; i++) > > + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); > > + return 0; > > +} > > + > > +void anycast_cleanup(void) > > +{ > > + int i; > > + > > + spin_lock(&acaddr_hash_lock); > > + for (i = 0; i < IN6_ADDR_HSIZE; i++) > > + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); > > + spin_unlock(&acaddr_hash_lock); > > +} > > #endif > > ^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH net v3] net/ipv6: Add anycast addresses to a global hashtable 2018-10-24 5:06 ` Jeff Barnhill @ 2018-10-26 21:22 ` Jeff Barnhill 2018-10-26 21:44 ` David Ahern 0 siblings, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-10-26 21:22 UTC (permalink / raw) To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill icmp6_send() function is expensive on systems with a large number of interfaces. Every time it’s called, it has to verify that the source address does not correspond to an existing anycast address by looping through every device and every anycast address on the device. This can result in significant delays for a CPU when there are a large number of neighbors and ND timers are frequently timing out and calling neigh_invalidate(). Add anycast addresses to a global hashtable to allow quick searching for matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> --- include/net/addrconf.h | 2 + include/net/if_inet6.h | 8 ++++ net/ipv6/af_inet6.c | 5 ++ net/ipv6/anycast.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 135 insertions(+), 2 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 14b789a123e7..799af1a037d1 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); +int anycast_init(void); +void anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d7578cf49c3a..a445014b981d 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -142,6 +142,14 @@ struct ipv6_ac_socklist { struct ipv6_ac_socklist *acl_next; }; +struct ipv6_ac_addrlist { + struct in6_addr acal_addr; + possible_net_t acal_pnet; + refcount_t acal_users; + struct hlist_node acal_lst; /* inet6_acaddr_lst */ + struct rcu_head rcu; +}; + struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3f4d61017a69..ddc8a6dbfba2 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; + err = anycast_init(); + if (err) + goto anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: + anycast_cleanup(); +anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e0ff7031edd..1040d08867ab 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -44,8 +44,22 @@ #include <net/checksum.h> +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + /* * socket join an anycast group */ @@ -204,6 +218,73 @@ void ipv6_sock_ac_close(struct sock *sk) rtnl_unlock(); } +static struct ipv6_ac_addrlist *acal_alloc(struct net *net, + const struct in6_addr *addr) +{ + struct ipv6_ac_addrlist *acal; + + acal = kzalloc(sizeof(*acal), GFP_ATOMIC); + if (!acal) + return NULL; + + acal->acal_addr = *addr; + write_pnet(&acal->acal_pnet, net); + refcount_set(&acal->acal_users, 1); + INIT_HLIST_NODE(&acal->acal_lst); + + return acal; +} + +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + int err = 0; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + refcount_inc(&acal->acal_users); + goto out; + } + } + + acal = acal_alloc(net, addr); + if (!acal) { + err = -ENOMEM; + goto out; + } + + hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]); + +out: + spin_unlock(&acaddr_hash_lock); + return err; +} + +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + if (refcount_dec_and_test(&acal->acal_users)) { + hlist_del_init_rcu(&acal->acal_lst); + kfree_rcu(acal, rcu); + } + spin_unlock(&acaddr_hash_lock); + return; + } + } + spin_unlock(&acaddr_hash_lock); +} + static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); @@ -275,6 +356,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) err = -ENOMEM; goto out; } + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); + if (err) { + fib6_info_release(f6i); + fib6_info_release(f6i); + kfree(aca); + goto out; + } aca->aca_next = idev->ac_list; idev->ac_list = aca; @@ -324,6 +412,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) prev_aca->aca_next = aca->aca_next; else idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -350,6 +439,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); while ((aca = idev->ac_list) != NULL) { idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); + write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -390,17 +481,23 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; bool found = false; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash], + acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { found = true; break; } + } rcu_read_unlock(); return found; } @@ -539,4 +636,25 @@ void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } + +/* Init / cleanup code + */ +int __init anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} #endif -- 2.14.1 ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH net v3] net/ipv6: Add anycast addresses to a global hashtable 2018-10-26 21:22 ` [PATCH net v3] " Jeff Barnhill @ 2018-10-26 21:44 ` David Ahern 2018-10-27 18:02 ` [PATCH net v4] " Jeff Barnhill 0 siblings, 1 reply; 23+ messages in thread From: David Ahern @ 2018-10-26 21:44 UTC (permalink / raw) To: Jeff Barnhill, netdev; +Cc: davem, kuznet, yoshfuji On 10/26/18 3:22 PM, Jeff Barnhill wrote: > @@ -275,6 +356,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) > err = -ENOMEM; > goto out; > } > + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); > + if (err) { > + fib6_info_release(f6i); > + fib6_info_release(f6i); > + kfree(aca); > + goto out; > + } I think aca_put() makes this less confusing as it will do the fib6_info_release(f6i) and kfree(aca); ^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH net v4] net/ipv6: Add anycast addresses to a global hashtable 2018-10-26 21:44 ` David Ahern @ 2018-10-27 18:02 ` Jeff Barnhill 2018-10-27 23:39 ` David Ahern 0 siblings, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-10-27 18:02 UTC (permalink / raw) To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill icmp6_send() function is expensive on systems with a large number of interfaces. Every time it’s called, it has to verify that the source address does not correspond to an existing anycast address by looping through every device and every anycast address on the device. This can result in significant delays for a CPU when there are a large number of neighbors and ND timers are frequently timing out and calling neigh_invalidate(). Add anycast addresses to a global hashtable to allow quick searching for matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> --- include/net/addrconf.h | 2 + include/net/if_inet6.h | 8 ++++ net/ipv6/af_inet6.c | 5 +++ net/ipv6/anycast.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 133 insertions(+), 2 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 14b789a123e7..799af1a037d1 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); +int anycast_init(void); +void anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d7578cf49c3a..a445014b981d 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -142,6 +142,14 @@ struct ipv6_ac_socklist { struct ipv6_ac_socklist *acl_next; }; +struct ipv6_ac_addrlist { + struct in6_addr acal_addr; + possible_net_t acal_pnet; + refcount_t acal_users; + struct hlist_node acal_lst; /* inet6_acaddr_lst */ + struct rcu_head rcu; +}; + struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3f4d61017a69..ddc8a6dbfba2 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; + err = anycast_init(); + if (err) + goto anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: + anycast_cleanup(); +anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e0ff7031edd..45585010908a 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -44,8 +44,22 @@ #include <net/checksum.h> +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + /* * socket join an anycast group */ @@ -204,6 +218,73 @@ void ipv6_sock_ac_close(struct sock *sk) rtnl_unlock(); } +static struct ipv6_ac_addrlist *acal_alloc(struct net *net, + const struct in6_addr *addr) +{ + struct ipv6_ac_addrlist *acal; + + acal = kzalloc(sizeof(*acal), GFP_ATOMIC); + if (!acal) + return NULL; + + acal->acal_addr = *addr; + write_pnet(&acal->acal_pnet, net); + refcount_set(&acal->acal_users, 1); + INIT_HLIST_NODE(&acal->acal_lst); + + return acal; +} + +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + int err = 0; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + refcount_inc(&acal->acal_users); + goto out; + } + } + + acal = acal_alloc(net, addr); + if (!acal) { + err = -ENOMEM; + goto out; + } + + hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]); + +out: + spin_unlock(&acaddr_hash_lock); + return err; +} + +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + if (refcount_dec_and_test(&acal->acal_users)) { + hlist_del_init_rcu(&acal->acal_lst); + kfree_rcu(acal, rcu); + } + spin_unlock(&acaddr_hash_lock); + return; + } + } + spin_unlock(&acaddr_hash_lock); +} + static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); @@ -275,6 +356,11 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) err = -ENOMEM; goto out; } + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); + if (err) { + aca_put(aca); + goto out; + } aca->aca_next = idev->ac_list; idev->ac_list = aca; @@ -324,6 +410,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) prev_aca->aca_next = aca->aca_next; else idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -350,6 +437,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); while ((aca = idev->ac_list) != NULL) { idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); + write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -390,17 +479,23 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; bool found = false; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash], + acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { found = true; break; } + } rcu_read_unlock(); return found; } @@ -539,4 +634,25 @@ void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } + +/* Init / cleanup code + */ +int __init anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} #endif -- 2.14.1 ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH net v4] net/ipv6: Add anycast addresses to a global hashtable 2018-10-27 18:02 ` [PATCH net v4] " Jeff Barnhill @ 2018-10-27 23:39 ` David Ahern 2018-10-28 1:27 ` Jeff Barnhill 0 siblings, 1 reply; 23+ messages in thread From: David Ahern @ 2018-10-27 23:39 UTC (permalink / raw) To: Jeff Barnhill, netdev; +Cc: davem, kuznet, yoshfuji On 10/27/18 12:02 PM, Jeff Barnhill wrote: > @@ -275,6 +356,11 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) > err = -ENOMEM; > goto out; > } > + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); > + if (err) { > + aca_put(aca); > + goto out; > + } > > aca->aca_next = idev->ac_list; > idev->ac_list = aca; you misunderstood my comment. aca_put is instead of a double call to fib6_info_release(f6i). You still need one call to fib6_info_release(f6i) for the addrconf_f6i_alloc. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH net v4] net/ipv6: Add anycast addresses to a global hashtable 2018-10-27 23:39 ` David Ahern @ 2018-10-28 1:27 ` Jeff Barnhill 2018-10-28 1:51 ` [PATCH net v5] " Jeff Barnhill 0 siblings, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-10-28 1:27 UTC (permalink / raw) To: David Ahern; +Cc: netdev, davem, Alexey Kuznetsov, yoshfuji You are right, David...I mistook the refcount_dec_and_test() in aca_put() as being for the fib6_info, but it's for the aca_refcnt. Thanks! I'll submit a corrected patch. On Sat, Oct 27, 2018 at 7:39 PM David Ahern <dsahern@gmail.com> wrote: > > On 10/27/18 12:02 PM, Jeff Barnhill wrote: > > @@ -275,6 +356,11 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) > > err = -ENOMEM; > > goto out; > > } > > + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); > > + if (err) { > > + aca_put(aca); > > + goto out; > > + } > > > > aca->aca_next = idev->ac_list; > > idev->ac_list = aca; > > you misunderstood my comment. aca_put is instead of a double call to > fib6_info_release(f6i). You still need one call to > fib6_info_release(f6i) for the addrconf_f6i_alloc. ^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable 2018-10-28 1:27 ` Jeff Barnhill @ 2018-10-28 1:51 ` Jeff Barnhill 2018-10-30 3:32 ` David Miller 0 siblings, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-10-28 1:51 UTC (permalink / raw) To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill icmp6_send() function is expensive on systems with a large number of interfaces. Every time it’s called, it has to verify that the source address does not correspond to an existing anycast address by looping through every device and every anycast address on the device. This can result in significant delays for a CPU when there are a large number of neighbors and ND timers are frequently timing out and calling neigh_invalidate(). Add anycast addresses to a global hashtable to allow quick searching for matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> --- include/net/addrconf.h | 2 + include/net/if_inet6.h | 8 ++++ net/ipv6/af_inet6.c | 5 ++ net/ipv6/anycast.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 134 insertions(+), 2 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 14b789a123e7..799af1a037d1 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); +int anycast_init(void); +void anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d7578cf49c3a..a445014b981d 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -142,6 +142,14 @@ struct ipv6_ac_socklist { struct ipv6_ac_socklist *acl_next; }; +struct ipv6_ac_addrlist { + struct in6_addr acal_addr; + possible_net_t acal_pnet; + refcount_t acal_users; + struct hlist_node acal_lst; /* inet6_acaddr_lst */ + struct rcu_head rcu; +}; + struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3f4d61017a69..ddc8a6dbfba2 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; + err = anycast_init(); + if (err) + goto anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: + anycast_cleanup(); +anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e0ff7031edd..ca51c9d57ce5 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -44,8 +44,22 @@ #include <net/checksum.h> +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + /* * socket join an anycast group */ @@ -204,6 +218,73 @@ void ipv6_sock_ac_close(struct sock *sk) rtnl_unlock(); } +static struct ipv6_ac_addrlist *acal_alloc(struct net *net, + const struct in6_addr *addr) +{ + struct ipv6_ac_addrlist *acal; + + acal = kzalloc(sizeof(*acal), GFP_ATOMIC); + if (!acal) + return NULL; + + acal->acal_addr = *addr; + write_pnet(&acal->acal_pnet, net); + refcount_set(&acal->acal_users, 1); + INIT_HLIST_NODE(&acal->acal_lst); + + return acal; +} + +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + int err = 0; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + refcount_inc(&acal->acal_users); + goto out; + } + } + + acal = acal_alloc(net, addr); + if (!acal) { + err = -ENOMEM; + goto out; + } + + hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]); + +out: + spin_unlock(&acaddr_hash_lock); + return err; +} + +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + if (refcount_dec_and_test(&acal->acal_users)) { + hlist_del_init_rcu(&acal->acal_lst); + kfree_rcu(acal, rcu); + } + spin_unlock(&acaddr_hash_lock); + return; + } + } + spin_unlock(&acaddr_hash_lock); +} + static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); @@ -275,6 +356,12 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) err = -ENOMEM; goto out; } + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); + if (err) { + aca_put(aca); + fib6_info_release(f6i); + goto out; + } aca->aca_next = idev->ac_list; idev->ac_list = aca; @@ -324,6 +411,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) prev_aca->aca_next = aca->aca_next; else idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -350,6 +438,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); while ((aca = idev->ac_list) != NULL) { idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); + write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -390,17 +480,23 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; bool found = false; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash], + acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { found = true; break; } + } rcu_read_unlock(); return found; } @@ -539,4 +635,25 @@ void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } + +/* Init / cleanup code + */ +int __init anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} #endif -- 2.14.1 ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable 2018-10-28 1:51 ` [PATCH net v5] " Jeff Barnhill @ 2018-10-30 3:32 ` David Miller 2018-10-30 11:10 ` Jeff Barnhill 0 siblings, 1 reply; 23+ messages in thread From: David Miller @ 2018-10-30 3:32 UTC (permalink / raw) To: 0xeffeff; +Cc: netdev, kuznet, yoshfuji From: Jeff Barnhill <0xeffeff@gmail.com> Date: Sun, 28 Oct 2018 01:51:59 +0000 > +struct ipv6_ac_addrlist { > + struct in6_addr acal_addr; > + possible_net_t acal_pnet; > + refcount_t acal_users; > + struct hlist_node acal_lst; /* inet6_acaddr_lst */ > + struct rcu_head rcu; > +}; Please just add the hlist to ifcaddr6 instead of duplicating so much information and reference counters here. This seems to waste a lot of memory unnecessary and add lots of unnecessary object allocate/setup/destroy logic. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable 2018-10-30 3:32 ` David Miller @ 2018-10-30 11:10 ` Jeff Barnhill 2018-10-30 18:31 ` David Miller 0 siblings, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-10-30 11:10 UTC (permalink / raw) To: davem; +Cc: netdev, Alexey Kuznetsov, yoshfuji I originally started implementing it the way you suggested; however, it seemed to complicate management of that structure because it isn't currently using rcu. Also, assuming that can be worked out, where would I get the net from? Would I need to store a copy in ifcaddr6, or is there some way to access it during ipv6_chk_acast_addr()? It seems that if I don't add a copy of net, but instead access it through aca_rt(?), then freeing the ifcaddr6 memory becomes problematic (detaching it from idev, while read_rcu may still be accessing it). On Mon, Oct 29, 2018 at 11:32 PM David Miller <davem@davemloft.net> wrote: > > From: Jeff Barnhill <0xeffeff@gmail.com> > Date: Sun, 28 Oct 2018 01:51:59 +0000 > > > +struct ipv6_ac_addrlist { > > + struct in6_addr acal_addr; > > + possible_net_t acal_pnet; > > + refcount_t acal_users; > > + struct hlist_node acal_lst; /* inet6_acaddr_lst */ > > + struct rcu_head rcu; > > +}; > > Please just add the hlist to ifcaddr6 instead of duplicating so much > information and reference counters here. > > This seems to waste a lot of memory unnecessary and add lots of > unnecessary object allocate/setup/destroy logic. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable 2018-10-30 11:10 ` Jeff Barnhill @ 2018-10-30 18:31 ` David Miller 2018-10-30 22:06 ` David Ahern 0 siblings, 1 reply; 23+ messages in thread From: David Miller @ 2018-10-30 18:31 UTC (permalink / raw) To: 0xeffeff; +Cc: netdev, kuznet, yoshfuji From: Jeff Barnhill <0xeffeff@gmail.com> Date: Tue, 30 Oct 2018 07:10:58 -0400 > I originally started implementing it the way you suggested; however, > it seemed to complicate management of that structure because it isn't > currently using rcu. Also, assuming that can be worked out, where > would I get the net from? Would I need to store a copy in ifcaddr6, > or is there some way to access it during ipv6_chk_acast_addr()? It > seems that if I don't add a copy of net, but instead access it through > aca_rt(?), then freeing the ifcaddr6 memory becomes problematic > (detaching it from idev, while read_rcu may still be accessing it). > On Mon, Oct 29, 2018 at 11:32 PM David Miller <davem@davemloft.net> wrote: I don't think converting the structure over to RCU, especially because all of the read paths (everything leading to ipv6_chk_acast_dev()) are taking RCU locks already. And I cannot understand how having _two_ structures to manage a piece of information can be less complicated than just one. You can add a backpointer to the 'idev' in ifacaddr6 to get at the network namespace. You don't even need to do additional reference counting because the idev->ac_list is always purged before an idev is destroyed. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable 2018-10-30 18:31 ` David Miller @ 2018-10-30 22:06 ` David Ahern 2018-10-30 23:19 ` David Miller 0 siblings, 1 reply; 23+ messages in thread From: David Ahern @ 2018-10-30 22:06 UTC (permalink / raw) To: David Miller, 0xeffeff; +Cc: netdev, kuznet, yoshfuji On 10/30/18 12:31 PM, David Miller wrote: > From: Jeff Barnhill <0xeffeff@gmail.com> > Date: Tue, 30 Oct 2018 07:10:58 -0400 > >> I originally started implementing it the way you suggested; however, >> it seemed to complicate management of that structure because it isn't >> currently using rcu. Also, assuming that can be worked out, where >> would I get the net from? Would I need to store a copy in ifcaddr6, >> or is there some way to access it during ipv6_chk_acast_addr()? It >> seems that if I don't add a copy of net, but instead access it through >> aca_rt(?), then freeing the ifcaddr6 memory becomes problematic >> (detaching it from idev, while read_rcu may still be accessing it). >> On Mon, Oct 29, 2018 at 11:32 PM David Miller <davem@davemloft.net> wrote: > > I don't think converting the structure over to RCU, especially because > all of the read paths (everything leading to ipv6_chk_acast_dev()) are > taking RCU locks already. > > And I cannot understand how having _two_ structures to manage a piece > of information can be less complicated than just one. > > You can add a backpointer to the 'idev' in ifacaddr6 to get at the > network namespace. You don't even need to do additional reference > counting because the idev->ac_list is always purged before an idev > is destroyed. > or make the table per namespace. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable 2018-10-30 22:06 ` David Ahern @ 2018-10-30 23:19 ` David Miller 2018-11-01 0:02 ` Jeff Barnhill 0 siblings, 1 reply; 23+ messages in thread From: David Miller @ 2018-10-30 23:19 UTC (permalink / raw) To: dsahern; +Cc: 0xeffeff, netdev, kuznet, yoshfuji From: David Ahern <dsahern@gmail.com> Date: Tue, 30 Oct 2018 16:06:46 -0600 > or make the table per namespace. This will increase namespace create/destroy cost, so I'd rather not for something like this. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable 2018-10-30 23:19 ` David Miller @ 2018-11-01 0:02 ` Jeff Barnhill 2018-11-01 0:14 ` [PATCH net v6] " Jeff Barnhill 2018-11-01 2:53 ` [PATCH net v5] " David Ahern 0 siblings, 2 replies; 23+ messages in thread From: Jeff Barnhill @ 2018-11-01 0:02 UTC (permalink / raw) To: davem; +Cc: David Ahern, netdev, Alexey Kuznetsov, yoshfuji I'll follow this email with a new patch using ifacaddr6 instead of creating a new struct. I ended up using fib6_nh.nh_dev to get the net, instead of adding a back pointer to idev. It seems that idev was recently removed in lieu of this, so if this is incorrect, please let me know. Hopefully, I got the locking correct. Thanks, Jeff On Tue, Oct 30, 2018 at 7:19 PM David Miller <davem@davemloft.net> wrote: > > From: David Ahern <dsahern@gmail.com> > Date: Tue, 30 Oct 2018 16:06:46 -0600 > > > or make the table per namespace. > > This will increase namespace create/destroy cost, so I'd rather not > for something like this. ^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH net v6] net/ipv6: Add anycast addresses to a global hashtable 2018-11-01 0:02 ` Jeff Barnhill @ 2018-11-01 0:14 ` Jeff Barnhill 2018-11-01 5:34 ` Stephen Hemminger 2018-11-01 2:53 ` [PATCH net v5] " David Ahern 1 sibling, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-11-01 0:14 UTC (permalink / raw) To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill icmp6_send() function is expensive on systems with a large number of interfaces. Every time it’s called, it has to verify that the source address does not correspond to an existing anycast address by looping through every device and every anycast address on the device. This can result in significant delays for a CPU when there are a large number of neighbors and ND timers are frequently timing out and calling neigh_invalidate(). Add anycast addresses to a global hashtable to allow quick searching for matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> --- include/net/addrconf.h | 2 ++ include/net/if_inet6.h | 2 ++ net/ipv6/af_inet6.c | 5 ++++ net/ipv6/anycast.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 85 insertions(+), 4 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 14b789a123e7..799af1a037d1 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); +int anycast_init(void); +void anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d7578cf49c3a..c9c78c15bce0 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -146,10 +146,12 @@ struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; struct ifacaddr6 *aca_next; + struct hlist_node aca_addr_lst; int aca_users; refcount_t aca_refcnt; unsigned long aca_cstamp; unsigned long aca_tstamp; + struct rcu_head rcu; }; #define IFA_HOST IPV6_ADDR_LOOPBACK diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3f4d61017a69..ddc8a6dbfba2 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; + err = anycast_init(); + if (err) + goto anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: + anycast_cleanup(); +anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e0ff7031edd..f6c4c8ac184c 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -44,8 +44,22 @@ #include <net/checksum.h> +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + /* * socket join an anycast group */ @@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk) rtnl_unlock(); } +static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) +{ + unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr); + + spin_lock(&acaddr_hash_lock); + hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]); + spin_unlock(&acaddr_hash_lock); +} + +static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca) +{ + spin_lock(&acaddr_hash_lock); + hlist_del_init_rcu(&aca->aca_addr_lst); + spin_unlock(&acaddr_hash_lock); +} + static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); } +static void aca_free_rcu(struct rcu_head *h) +{ + struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu); + + fib6_info_release(aca->aca_rt); + kfree(aca); +} + static void aca_put(struct ifacaddr6 *ac) { if (refcount_dec_and_test(&ac->aca_refcnt)) { - fib6_info_release(ac->aca_rt); - kfree(ac); + call_rcu(&ac->rcu, aca_free_rcu); } } @@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, aca->aca_addr = *addr; fib6_info_hold(f6i); aca->aca_rt = f6i; + INIT_HLIST_NODE(&aca->aca_addr_lst); aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; @@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) aca_get(aca); write_unlock_bh(&idev->lock); + ipv6_add_acaddr_hash(net, aca); + ip6_ins_rt(net, f6i); addrconf_join_solict(idev->dev, &aca->aca_addr); @@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) else idev->ac_list = aca->aca_next; write_unlock_bh(&idev->lock); + ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt); @@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) idev->ac_list = aca->aca_next; write_unlock_bh(&idev->lock); + ipv6_del_acaddr_hash(aca); + addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt); @@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { + unsigned int hash = inet6_acaddr_hash(net, addr); + struct net_device *nh_dev; + struct ifacaddr6 *aca; bool found = false; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash], + aca_addr_lst) { + nh_dev = fib6_info_nh_dev(aca->aca_rt); + if (!nh_dev || !net_eq(dev_net(nh_dev), net)) + continue; + if (ipv6_addr_equal(&aca->aca_addr, addr)) { found = true; break; } + } rcu_read_unlock(); return found; } @@ -539,4 +590,25 @@ void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } + +/* Init / cleanup code + */ +int __init anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} #endif -- 2.14.1 ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH net v6] net/ipv6: Add anycast addresses to a global hashtable 2018-11-01 0:14 ` [PATCH net v6] " Jeff Barnhill @ 2018-11-01 5:34 ` Stephen Hemminger 2018-11-02 20:23 ` [PATCH net v7] " Jeff Barnhill 0 siblings, 1 reply; 23+ messages in thread From: Stephen Hemminger @ 2018-11-01 5:34 UTC (permalink / raw) To: Jeff Barnhill; +Cc: netdev, davem, kuznet, yoshfuji On Thu, 1 Nov 2018 00:14:38 +0000 Jeff Barnhill <0xeffeff@gmail.com> wrote: > diff --git a/include/net/addrconf.h b/include/net/addrconf.h > index 14b789a123e7..799af1a037d1 100644 > --- a/include/net/addrconf.h > +++ b/include/net/addrconf.h > @@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, > const struct in6_addr *addr); > bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, > const struct in6_addr *addr); > +int anycast_init(void); > +void anycast_cleanup(void); One minor nit that should be fixed. To avoid any potential naming conflicts, please prefix all ipv6 global symbols with ipv6_ ^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH net v7] net/ipv6: Add anycast addresses to a global hashtable 2018-11-01 5:34 ` Stephen Hemminger @ 2018-11-02 20:23 ` Jeff Barnhill 2018-11-03 6:55 ` David Miller 0 siblings, 1 reply; 23+ messages in thread From: Jeff Barnhill @ 2018-11-02 20:23 UTC (permalink / raw) To: netdev; +Cc: davem, kuznet, yoshfuji, Jeff Barnhill icmp6_send() function is expensive on systems with a large number of interfaces. Every time it’s called, it has to verify that the source address does not correspond to an existing anycast address by looping through every device and every anycast address on the device. This can result in significant delays for a CPU when there are a large number of neighbors and ND timers are frequently timing out and calling neigh_invalidate(). Add anycast addresses to a global hashtable to allow quick searching for matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> --- include/net/addrconf.h | 2 ++ include/net/if_inet6.h | 2 ++ net/ipv6/af_inet6.c | 5 ++++ net/ipv6/anycast.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 85 insertions(+), 4 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 14b789a123e7..1656c5978498 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); +int ipv6_anycast_init(void); +void ipv6_anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d7578cf49c3a..c9c78c15bce0 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -146,10 +146,12 @@ struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; struct ifacaddr6 *aca_next; + struct hlist_node aca_addr_lst; int aca_users; refcount_t aca_refcnt; unsigned long aca_cstamp; unsigned long aca_tstamp; + struct rcu_head rcu; }; #define IFA_HOST IPV6_ADDR_LOOPBACK diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3f4d61017a69..f0cd291034f0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; + err = ipv6_anycast_init(); + if (err) + goto ipv6_anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: + ipv6_anycast_cleanup(); +ipv6_anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e0ff7031edd..7698637cf827 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -44,8 +44,22 @@ #include <net/checksum.h> +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + /* * socket join an anycast group */ @@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk) rtnl_unlock(); } +static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) +{ + unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr); + + spin_lock(&acaddr_hash_lock); + hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]); + spin_unlock(&acaddr_hash_lock); +} + +static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca) +{ + spin_lock(&acaddr_hash_lock); + hlist_del_init_rcu(&aca->aca_addr_lst); + spin_unlock(&acaddr_hash_lock); +} + static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); } +static void aca_free_rcu(struct rcu_head *h) +{ + struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu); + + fib6_info_release(aca->aca_rt); + kfree(aca); +} + static void aca_put(struct ifacaddr6 *ac) { if (refcount_dec_and_test(&ac->aca_refcnt)) { - fib6_info_release(ac->aca_rt); - kfree(ac); + call_rcu(&ac->rcu, aca_free_rcu); } } @@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, aca->aca_addr = *addr; fib6_info_hold(f6i); aca->aca_rt = f6i; + INIT_HLIST_NODE(&aca->aca_addr_lst); aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; @@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) aca_get(aca); write_unlock_bh(&idev->lock); + ipv6_add_acaddr_hash(net, aca); + ip6_ins_rt(net, f6i); addrconf_join_solict(idev->dev, &aca->aca_addr); @@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) else idev->ac_list = aca->aca_next; write_unlock_bh(&idev->lock); + ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt); @@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) idev->ac_list = aca->aca_next; write_unlock_bh(&idev->lock); + ipv6_del_acaddr_hash(aca); + addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt); @@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { + unsigned int hash = inet6_acaddr_hash(net, addr); + struct net_device *nh_dev; + struct ifacaddr6 *aca; bool found = false; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash], + aca_addr_lst) { + nh_dev = fib6_info_nh_dev(aca->aca_rt); + if (!nh_dev || !net_eq(dev_net(nh_dev), net)) + continue; + if (ipv6_addr_equal(&aca->aca_addr, addr)) { found = true; break; } + } rcu_read_unlock(); return found; } @@ -539,4 +590,25 @@ void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } + +/* Init / cleanup code + */ +int __init ipv6_anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void ipv6_anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} #endif -- 2.14.1 ^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH net v7] net/ipv6: Add anycast addresses to a global hashtable 2018-11-02 20:23 ` [PATCH net v7] " Jeff Barnhill @ 2018-11-03 6:55 ` David Miller 0 siblings, 0 replies; 23+ messages in thread From: David Miller @ 2018-11-03 6:55 UTC (permalink / raw) To: 0xeffeff; +Cc: netdev, kuznet, yoshfuji From: Jeff Barnhill <0xeffeff@gmail.com> Date: Fri, 2 Nov 2018 20:23:57 +0000 > icmp6_send() function is expensive on systems with a large number of > interfaces. Every time it’s called, it has to verify that the source > address does not correspond to an existing anycast address by looping > through every device and every anycast address on the device. This can > result in significant delays for a CPU when there are a large number of > neighbors and ND timers are frequently timing out and calling > neigh_invalidate(). > > Add anycast addresses to a global hashtable to allow quick searching for > matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. > > Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> Applied, thank you. ^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable 2018-11-01 0:02 ` Jeff Barnhill 2018-11-01 0:14 ` [PATCH net v6] " Jeff Barnhill @ 2018-11-01 2:53 ` David Ahern 1 sibling, 0 replies; 23+ messages in thread From: David Ahern @ 2018-11-01 2:53 UTC (permalink / raw) To: Jeff Barnhill, davem; +Cc: netdev, Alexey Kuznetsov, yoshfuji On 10/31/18 6:02 PM, Jeff Barnhill wrote: > I'll follow this email with a new patch using ifacaddr6 instead of > creating a new struct. I ended up using fib6_nh.nh_dev to get the net, > instead of adding a back pointer to idev. It seems that idev was > recently removed in lieu of this, so if this is incorrect, please let > me know. Hopefully, I got the locking correct. That's correct. Make sure that the anycast code can not be accessed for reject routes which will not have a device set. Should be ok, but double check. ^ permalink raw reply [flat|nested] 23+ messages in thread
end of thread, other threads:[~2018-11-03 16:05 UTC | newest] Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2018-10-23 2:12 [PATCH net] net/ipv6: Add anycast addresses to a global hashtable Jeff Barnhill 2018-10-23 2:26 ` Eric Dumazet 2018-10-23 18:21 ` Jeff Barnhill 2018-10-24 1:58 ` [PATCH net v2] " Jeff Barnhill 2018-10-24 3:12 ` Eric Dumazet 2018-10-24 5:06 ` Jeff Barnhill 2018-10-26 21:22 ` [PATCH net v3] " Jeff Barnhill 2018-10-26 21:44 ` David Ahern 2018-10-27 18:02 ` [PATCH net v4] " Jeff Barnhill 2018-10-27 23:39 ` David Ahern 2018-10-28 1:27 ` Jeff Barnhill 2018-10-28 1:51 ` [PATCH net v5] " Jeff Barnhill 2018-10-30 3:32 ` David Miller 2018-10-30 11:10 ` Jeff Barnhill 2018-10-30 18:31 ` David Miller 2018-10-30 22:06 ` David Ahern 2018-10-30 23:19 ` David Miller 2018-11-01 0:02 ` Jeff Barnhill 2018-11-01 0:14 ` [PATCH net v6] " Jeff Barnhill 2018-11-01 5:34 ` Stephen Hemminger 2018-11-02 20:23 ` [PATCH net v7] " Jeff Barnhill 2018-11-03 6:55 ` David Miller 2018-11-01 2:53 ` [PATCH net v5] " David Ahern
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).