From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jeff Barnhill <0xeffeff@gmail.com> Subject: [PATCH net v2] net/ipv6: Add anycast addresses to a global hashtable Date: Wed, 24 Oct 2018 01:58:32 +0000 Message-ID: <20181024015832.16607-1-0xeffeff@gmail.com> References: Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: davem@davemloft.net, kuznet@ms2.inr.ac.ru, yoshfuji@linux-ipv6.org, Jeff Barnhill <0xeffeff@gmail.com> To: netdev@vger.kernel.org Return-path: Received: from mail-yb1-f196.google.com ([209.85.219.196]:42186 "EHLO mail-yb1-f196.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1725896AbeJXLQ5 (ORCPT ); Wed, 24 Oct 2018 07:16:57 -0400 Received: by mail-yb1-f196.google.com with SMTP id o204-v6so1483018yba.9 for ; Tue, 23 Oct 2018 19:50:56 -0700 (PDT) In-Reply-To: Sender: netdev-owner@vger.kernel.org List-ID: icmp6_send() function is expensive on systems with a large number of interfaces. Every time it’s called, it has to verify that the source address does not correspond to an existing anycast address by looping through every device and every anycast address on the device. This can result in significant delays for a CPU when there are a large number of neighbors and ND timers are frequently timing out and calling neigh_invalidate(). Add anycast addresses to a global hashtable to allow quick searching for matching anycast addresses. This is based on inet6_addr_lst in addrconf.c. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> --- include/net/addrconf.h | 2 + include/net/if_inet6.h | 8 +++ net/ipv6/af_inet6.c | 5 ++ net/ipv6/anycast.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 145 insertions(+), 2 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 6def0351bcc3..0cee3f99c41d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr); +int anycast_init(void); +void anycast_cleanup(void); /* Device notifier */ int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d7578cf49c3a..55a4a1d8cebc 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -142,6 +142,14 @@ struct ipv6_ac_socklist { struct ipv6_ac_socklist *acl_next; }; +struct ipv6_ac_addrlist { + struct in6_addr acal_addr; + possible_net_t acal_pnet; + int acal_users; + struct hlist_node acal_lst; /* inet6_acaddr_lst */ + struct rcu_head rcu; +}; + struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9a4261e50272..971a05fdd3bd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1001,6 +1001,9 @@ static int __init inet6_init(void) err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; + err = anycast_init(); + if (err) + goto anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; @@ -1091,6 +1094,8 @@ static int __init inet6_init(void) ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: + anycast_cleanup(); +anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 4e0ff7031edd..def1e156d857 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -44,8 +44,22 @@ #include +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + /* * socket join an anycast group */ @@ -204,6 +218,83 @@ void ipv6_sock_ac_close(struct sock *sk) rtnl_unlock(); } +static struct ipv6_ac_addrlist *acal_alloc(struct net *net, + const struct in6_addr *addr) +{ + struct ipv6_ac_addrlist *acal; + + acal = kzalloc(sizeof(*acal), GFP_ATOMIC); + if (!acal) + return NULL; + + acal->acal_addr = *addr; + write_pnet(&acal->acal_pnet, get_net(net)); + acal->acal_users = 1; + INIT_HLIST_NODE(&acal->acal_lst); + + return acal; +} + +static void acal_free_rcu(struct rcu_head *h) +{ + struct ipv6_ac_addrlist *acal; + + acal = container_of(h, struct ipv6_ac_addrlist, rcu); + WARN_ON(acal->acal_users); + put_net(read_pnet(&acal->acal_pnet)); + kfree(acal); +} + +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + int err = 0; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + acal->acal_users++; + goto out; + } + } + + acal = acal_alloc(net, addr); + if (!acal) { + err = -ENOMEM; + goto out; + } + + hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]); + +out: + spin_unlock(&acaddr_hash_lock); + return err; +} + +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr) +{ + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; + + spin_lock(&acaddr_hash_lock); + hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { + if (--acal->acal_users < 1) { + hlist_del_init_rcu(&acal->acal_lst); + call_rcu(&acal->rcu, acal_free_rcu); + } + spin_unlock(&acaddr_hash_lock); + return; + } + } + spin_unlock(&acaddr_hash_lock); +} + static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); @@ -275,6 +366,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) err = -ENOMEM; goto out; } + err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr); + if (err) { + fib6_info_release(f6i); + fib6_info_release(f6i); + kfree(aca); + goto out; + } aca->aca_next = idev->ac_list; idev->ac_list = aca; @@ -324,6 +422,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) prev_aca->aca_next = aca->aca_next; else idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -350,6 +449,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) write_lock_bh(&idev->lock); while ((aca = idev->ac_list) != NULL) { idev->ac_list = aca->aca_next; + ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr); + write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); @@ -391,16 +492,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { bool found = false; + unsigned int hash = inet6_acaddr_hash(net, addr); + struct ipv6_ac_addrlist *acal; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash], + acal_lst) { + if (!net_eq(read_pnet(&acal->acal_pnet), net)) + continue; + if (ipv6_addr_equal(&acal->acal_addr, addr)) { found = true; break; } + } rcu_read_unlock(); return found; } @@ -539,4 +646,25 @@ void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } + +/* Init / cleanup code + */ +int __init anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} #endif -- 2.14.1