From mboxrd@z Thu Jan 1 00:00:00 1970 From: David Ahern Subject: [PATCH net-next v2 17/21] net/ipv6: Cleanup exception and cache route handling Date: Tue, 17 Apr 2018 17:33:23 -0700 Message-ID: <20180418003327.19992-18-dsahern@gmail.com> References: <20180418003327.19992-1-dsahern@gmail.com> Cc: davem@davemloft.net, idosch@idosch.org, roopa@cumulusnetworks.com, eric.dumazet@gmail.com, weiwan@google.com, kafai@fb.com, yoshfuji@linux-ipv6.org, David Ahern To: netdev@vger.kernel.org Return-path: Received: from mail-pf0-f196.google.com ([209.85.192.196]:41753 "EHLO mail-pf0-f196.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753186AbeDRAeB (ORCPT ); Tue, 17 Apr 2018 20:34:01 -0400 Received: by mail-pf0-f196.google.com with SMTP id a2so14136pff.8 for ; Tue, 17 Apr 2018 17:34:01 -0700 (PDT) In-Reply-To: <20180418003327.19992-1-dsahern@gmail.com> Sender: netdev-owner@vger.kernel.org List-ID: IPv6 FIB will only contain FIB entries with exception routes added to the FIB entry. Once this transformation is complete, FIB lookups will return a fib6_info with the lookup functions still returning a dst based rt6_info. The current code uses rt6_info for both paths and overloads the rt6_info variable usually called 'rt'. This patch introduces a new 'f6i' variable name for the result of the FIB lookup and keeps 'rt' as the dst based return variable. 'f6i' becomes a fib6_info in a later patch which is why it is introduced as f6i now; avoids the additional churn in the later patch. In addition, remove RTF_CACHE and dst checks from fib6 add and delete since they can not happen now and will never happen after the data type flip. Signed-off-by: David Ahern --- include/net/ip6_route.h | 1 - net/ipv6/ip6_fib.c | 16 +----- net/ipv6/route.c | 142 +++++++++++++++++++++++++++--------------------- 3 files changed, 81 insertions(+), 78 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index ff70266e30d7..686cdc7f356a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -106,7 +106,6 @@ int ip6_ins_rt(struct net *net, struct rt6_info *rt); int ip6_del_rt(struct net *net, struct rt6_info *rt); void rt6_flush_exceptions(struct rt6_info *rt); -int rt6_remove_exception_rt(struct rt6_info *rt); void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args, unsigned long now); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 280b69497ad0..53df4a98a7f7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1074,7 +1074,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, static void fib6_start_gc(struct net *net, struct rt6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && - (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) + (rt->rt6i_flags & RTF_EXPIRES)) mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } @@ -1125,8 +1125,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) return -EINVAL; - if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE)) - return -EINVAL; if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -1650,8 +1648,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, RT6_TRACE("fib6_del_route\n"); - WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE); - /* Unlink it */ *rtp = rt->rt6_next; rt->rt6i_node = NULL; @@ -1720,21 +1716,11 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) struct rt6_info __rcu **rtp; struct rt6_info __rcu **rtp_next; -#if RT6_DEBUG >= 2 - if (rt->dst.obsolete > 0) { - WARN_ON(fn); - return -ENOENT; - } -#endif if (!fn || rt == net->ipv6.fib6_null_entry) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); - /* remove cached dst from exception table */ - if (rt->rt6i_flags & RTF_CACHE) - return rt6_remove_exception_rt(rt); - /* * Walk the leaf entries looking for ourself */ diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 56a854b426a6..ad9eaecf539c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1013,8 +1013,8 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) BUG_ON(from->from); rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(&from->dst); - rt->from = from; + if (dst_hold_safe(&from->dst)) + rt->from = from; dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); if (from->fib6_metrics != &dst_default_metrics) { rt->dst._metrics |= DST_METRICS_REFCOUNTED; @@ -1097,8 +1097,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, const struct sk_buff *skb, int flags) { - struct rt6_info *rt, *rt_cache; + struct rt6_info *f6i; struct fib6_node *fn; + struct rt6_info *rt; if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) flags &= ~RT6_LOOKUP_F_IFACE; @@ -1106,36 +1107,36 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - rt = rcu_dereference(fn->leaf); - if (!rt) { - rt = net->ipv6.fib6_null_entry; + f6i = rcu_dereference(fn->leaf); + if (!f6i) { + f6i = net->ipv6.fib6_null_entry; } else { - rt = rt6_device_match(net, rt, &fl6->saddr, + f6i = rt6_device_match(net, f6i, &fl6->saddr, fl6->flowi6_oif, flags); - if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, - skb, flags); + if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0) + f6i = rt6_multipath_select(net, f6i, fl6, + fl6->flowi6_oif, skb, flags); } - if (rt == net->ipv6.fib6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } + /* Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) { - rt = rt_cache; + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { if (ip6_hold_safe(net, &rt, true)) dst_use_noref(&rt->dst, jiffies); - } else if (dst_hold_safe(&rt->dst)) { - struct rt6_info *nrt; - - nrt = ip6_create_rt_rcu(rt); - dst_release(&rt->dst); - rt = nrt; - } else { + } else if (f6i == net->ipv6.fib6_null_entry) { rt = net->ipv6.ip6_null_entry; dst_hold(&rt->dst); + } else { + rt = ip6_create_rt_rcu(f6i); + if (!rt) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } } rcu_read_unlock(); @@ -1218,9 +1219,6 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, * Clone the route. */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - rcu_read_lock(); dev = ip6_rt_get_dev_rcu(ort); rt = __ip6_dst_alloc(dev_net(dev), dev, 0); @@ -1446,11 +1444,6 @@ static int rt6_insert_exception(struct rt6_info *nrt, struct rt6_exception *rt6_ex; int err = 0; - /* ort can't be a cache or pcpu route */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); - spin_lock_bh(&rt6_exception_lock); if (ort->exception_bucket_flushed) { @@ -1589,7 +1582,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, } /* Remove the passed in cached rt from the hash table that contains it */ -int rt6_remove_exception_rt(struct rt6_info *rt) +static int rt6_remove_exception_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; struct rt6_info *from = rt->from; @@ -1854,7 +1847,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, const struct sk_buff *skb, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt, *rt_cache; + struct rt6_info *f6i; + struct rt6_info *rt; int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -1871,10 +1865,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, oif = 0; redo_rt6_select: - rt = rt6_select(net, fn, oif, strict); - if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); - if (rt == net->ipv6.fib6_null_entry) { + f6i = rt6_select(net, fn, oif, strict); + if (f6i->rt6i_nsiblings) + f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict); + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto redo_rt6_select; @@ -1886,18 +1880,17 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, } } - /*Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) - rt = rt_cache; - - if (rt == net->ipv6.fib6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { rt = net->ipv6.ip6_null_entry; rcu_read_unlock(); dst_hold(&rt->dst); trace_fib6_table_lookup(net, rt, table, fl6); return rt; - } else if (rt->rt6i_flags & RTF_CACHE) { + } + + /*Search through exception table */ + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { if (ip6_hold_safe(net, &rt, true)) dst_use_noref(&rt->dst, jiffies); @@ -1905,7 +1898,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, trace_fib6_table_lookup(net, rt, table, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && - !(rt->rt6i_flags & RTF_GATEWAY))) { + !(f6i->rt6i_flags & RTF_GATEWAY))) { /* Create a RTF_CACHE clone which will not be * owned by the fib6 tree. It is for the special case where * the daddr in the skb during the neighbor look-up is different @@ -1914,16 +1907,16 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, struct rt6_info *uncached_rt; - if (ip6_hold_safe(net, &rt, true)) { - dst_use_noref(&rt->dst, jiffies); + if (ip6_hold_safe(net, &f6i, true)) { + dst_use_noref(&f6i->dst, jiffies); } else { rcu_read_unlock(); - uncached_rt = rt; + uncached_rt = f6i; goto uncached_rt_out; } rcu_read_unlock(); - uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); + uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL); dst_release(&rt->dst); if (uncached_rt) { @@ -1946,18 +1939,18 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, struct rt6_info *pcpu_rt; - dst_use_noref(&rt->dst, jiffies); + dst_use_noref(&f6i->dst, jiffies); local_bh_disable(); - pcpu_rt = rt6_get_pcpu_route(rt); + pcpu_rt = rt6_get_pcpu_route(f6i); if (!pcpu_rt) { /* atomic_inc_not_zero() is needed when using rcu */ - if (atomic_inc_not_zero(&rt->rt6i_ref)) { + if (atomic_inc_not_zero(&f6i->rt6i_ref)) { /* No dst_hold() on rt is needed because grabbing * rt->rt6i_ref makes sure rt can't be released. */ - pcpu_rt = rt6_make_pcpu_route(net, rt); - rt6_release(rt); + pcpu_rt = rt6_make_pcpu_route(net, f6i); + rt6_release(f6i); } else { /* rt is already removed from tree */ pcpu_rt = net->ipv6.ip6_null_entry; @@ -2419,7 +2412,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt, *rt_cache; + struct rt6_info *ret = NULL, *rt_cache; + struct rt6_info *rt; struct fib6_node *fn; /* Get the "current" route for this destination and @@ -2458,7 +2452,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, if (rt_cache && ipv6_addr_equal(&rdfl->gateway, &rt_cache->rt6i_gateway)) { - rt = rt_cache; + ret = rt_cache; break; } continue; @@ -2469,7 +2463,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, if (!rt) rt = net->ipv6.fib6_null_entry; else if (rt->rt6i_flags & RTF_REJECT) { - rt = net->ipv6.ip6_null_entry; + ret = net->ipv6.ip6_null_entry; goto out; } @@ -2480,12 +2474,15 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, } out: - ip6_hold_safe(net, &rt, true); + if (ret) + dst_hold(&ret->dst); + else + ret = ip6_create_rt_rcu(rt); rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table, fl6); - return rt; + trace_fib6_table_lookup(net, ret, table, fl6); + return ret; }; static struct dst_entry *ip6_route_redirect(struct net *net, @@ -3182,6 +3179,22 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) return err; } +static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) +{ + int rc = -ESRCH; + + if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex) + goto out; + + if (cfg->fc_flags & RTF_GATEWAY && + !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + goto out; + if (dst_hold_safe(&rt->dst)) + rc = rt6_remove_exception_rt(rt); +out: + return rc; +} + static int ip6_route_del(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -3206,11 +3219,16 @@ static int ip6_route_del(struct fib6_config *cfg, if (fn) { for_each_fib6_node_rt_rcu(fn) { if (cfg->fc_flags & RTF_CACHE) { + int rc; + rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, &cfg->fc_src); - if (!rt_cache) - continue; - rt = rt_cache; + if (rt_cache) { + rc = ip6_del_cached_rt(rt_cache, cfg); + if (rc != -ESRCH) + return rc; + } + continue; } if (cfg->fc_ifindex && (!rt->fib6_nh.nh_dev || @@ -3327,7 +3345,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)), NDISC_REDIRECT, &ndopts); - nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); + nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL); if (!nrt) goto out; -- 2.11.0