All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions
@ 2019-04-28  2:27 David Ahern
  2019-04-28  2:27 ` [PATCH v2 net-next 1/7] ipv4: Move cached routes to fib_nh_common David Ahern
                   ` (7 more replies)
  0 siblings, 8 replies; 12+ messages in thread
From: David Ahern @ 2019-04-28  2:27 UTC (permalink / raw)
  To: davem, netdev; +Cc: idosch, David Ahern

From: David Ahern <dsahern@gmail.com>

This series moves IPv4 pcpu cached routes from fib_nh to fib_nh_common
to make the caches avaialable for IPv6 nexthops (fib6_nh) with IPv4
routes. This allows a fib6_nh struct to be used with both IPv4 and
and IPv6 routes.

In addition pcpu caches and exception entries for IPv6 routes are
moved from fib6_info to fib6_nh since they are really a function of
the device and gateway. During the move of each, functions are
refactored such that the core logic is in new helpers that take
a fib6_nh versus a fib6_info.

v2
- reverted patch 2 to use ifdef CONFIG_IP_ROUTE_CLASSID instead
  of IS_ENABLED(CONFIG_IP_ROUTE_CLASSID) to fix compile issues
  reported by kbuild test robot

David Ahern (7):
  ipv4: Move cached routes to fib_nh_common
  ipv4: Pass fib_nh_common to rt_cache_route
  ipv4: Move exception bucket to nh_common
  ipv6: Move pcpu cached routes to fib6_nh
  ipv6: Refactor fib6_drop_pcpu_from
  ipv6: Refactor exception functions
  ipv6: Move exception bucket to fib6_nh

 include/net/ip6_fib.h    |  11 +-
 include/net/ip_fib.h     |   8 +-
 net/ipv4/fib_semantics.c |  45 +++----
 net/ipv4/route.c         |  75 ++++++------
 net/ipv6/addrconf.c      |   6 +-
 net/ipv6/ip6_fib.c       |  62 ++++------
 net/ipv6/route.c         | 302 +++++++++++++++++++++++++++++++++--------------
 7 files changed, 305 insertions(+), 204 deletions(-)

-- 
2.11.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2 net-next 1/7] ipv4: Move cached routes to fib_nh_common
  2019-04-28  2:27 [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions David Ahern
@ 2019-04-28  2:27 ` David Ahern
  2019-04-28  2:27 ` [PATCH v2 net-next 2/7] ipv4: Pass fib_nh_common to rt_cache_route David Ahern
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2019-04-28  2:27 UTC (permalink / raw)
  To: davem, netdev; +Cc: idosch, David Ahern

From: David Ahern <dsahern@gmail.com>

While the cached routes, nh_pcpu_rth_output and nh_rth_input, are IPv4
specific, a later patch wants to make them accessible for IPv6 nexthops
with IPv4 routes using a fib6_nh. Move the cached routes from fib_nh to
fib_nh_common and update references.

Initialization of the cached entries is moved to fib_nh_common_init,
and free is moved to fib_nh_common_release.

Change in location only, from fib_nh up to fib_nh_common; no functional
change intended:

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip_fib.h     |  6 ++++--
 net/ipv4/fib_semantics.c | 33 +++++++++++++++++----------------
 net/ipv4/route.c         | 18 +++++++++---------
 3 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 772a9e61bd84..659c5081c40b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -96,6 +96,10 @@ struct fib_nh_common {
 
 	int			nhc_weight;
 	atomic_t		nhc_upper_bound;
+
+	/* v4 specific, but allows fib6_nh with v4 routes */
+	struct rtable __rcu * __percpu *nhc_pcpu_rth_output;
+	struct rtable __rcu     *nhc_rth_input;
 };
 
 struct fib_nh {
@@ -107,8 +111,6 @@ struct fib_nh {
 #endif
 	__be32			nh_saddr;
 	int			nh_saddr_genid;
-	struct rtable __rcu * __percpu *nh_pcpu_rth_output;
-	struct rtable __rcu	*nh_rth_input;
 	struct fnhe_hash_bucket	__rcu *nh_exceptions;
 #define fib_nh_family		nh_common.nhc_family
 #define fib_nh_dev		nh_common.nhc_dev
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4336f1ec8ab0..9ef4529db659 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -212,6 +212,8 @@ void fib_nh_common_release(struct fib_nh_common *nhc)
 		dev_put(nhc->nhc_dev);
 
 	lwtstate_put(nhc->nhc_lwtstate);
+	rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
+	rt_fibinfo_free(&nhc->nhc_rth_input);
 }
 EXPORT_SYMBOL_GPL(fib_nh_common_release);
 
@@ -223,8 +225,6 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
 #endif
 	fib_nh_common_release(&fib_nh->nh_common);
 	free_nh_exceptions(fib_nh);
-	rt_fibinfo_free_cpus(fib_nh->nh_pcpu_rth_output);
-	rt_fibinfo_free(&fib_nh->nh_rth_input);
 }
 
 /* Release a nexthop info record */
@@ -491,9 +491,15 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
 		       u16 encap_type, void *cfg, gfp_t gfp_flags,
 		       struct netlink_ext_ack *extack)
 {
+	int err;
+
+	nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *,
+						    gfp_flags);
+	if (!nhc->nhc_pcpu_rth_output)
+		return -ENOMEM;
+
 	if (encap) {
 		struct lwtunnel_state *lwtstate;
-		int err;
 
 		if (encap_type == LWTUNNEL_ENCAP_NONE) {
 			NL_SET_ERR_MSG(extack, "LWT encap type not specified");
@@ -502,12 +508,17 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
 		err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family,
 					   cfg, &lwtstate, extack);
 		if (err)
-			return err;
+			goto lwt_failure;
 
 		nhc->nhc_lwtstate = lwtstate_get(lwtstate);
 	}
 
 	return 0;
+
+lwt_failure:
+	rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
+	nhc->nhc_pcpu_rth_output = NULL;
+	return err;
 }
 EXPORT_SYMBOL_GPL(fib_nh_common_init);
 
@@ -515,18 +526,14 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
 		struct fib_config *cfg, int nh_weight,
 		struct netlink_ext_ack *extack)
 {
-	int err = -ENOMEM;
+	int err;
 
 	nh->fib_nh_family = AF_INET;
 
-	nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
-	if (!nh->nh_pcpu_rth_output)
-		goto err_out;
-
 	err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap,
 				 cfg->fc_encap_type, cfg, GFP_KERNEL, extack);
 	if (err)
-		goto init_failure;
+		return err;
 
 	nh->fib_nh_oif = cfg->fc_oif;
 	nh->fib_nh_gw_family = cfg->fc_gw_family;
@@ -546,12 +553,6 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
 	nh->fib_nh_weight = nh_weight;
 #endif
 	return 0;
-
-init_failure:
-	rt_fibinfo_free_cpus(nh->nh_pcpu_rth_output);
-	nh->nh_pcpu_rth_output = NULL;
-err_out:
-	return err;
 }
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4950adeb05c0..15776e98dfa3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -646,6 +646,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
 static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 				  u32 pmtu, bool lock, unsigned long expires)
 {
+	struct fib_nh_common *nhc = &nh->nh_common;
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe;
 	struct rtable *rt;
@@ -715,13 +716,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 		 * stale, so anyone caching it rechecks if this exception
 		 * applies to them.
 		 */
-		rt = rcu_dereference(nh->nh_rth_input);
+		rt = rcu_dereference(nhc->nhc_rth_input);
 		if (rt)
 			rt->dst.obsolete = DST_OBSOLETE_KILL;
 
 		for_each_possible_cpu(i) {
 			struct rtable __rcu **prt;
-			prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
+			prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
 			rt = rcu_dereference(*prt);
 			if (rt)
 				rt->dst.obsolete = DST_OBSOLETE_KILL;
@@ -1471,13 +1472,14 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
 
 static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
 {
+	struct fib_nh_common *nhc = &nh->nh_common;
 	struct rtable *orig, *prev, **p;
 	bool ret = true;
 
 	if (rt_is_input_route(rt)) {
-		p = (struct rtable **)&nh->nh_rth_input;
+		p = (struct rtable **)&nhc->nhc_rth_input;
 	} else {
-		p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
+		p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
 	}
 	orig = *p;
 
@@ -1810,7 +1812,7 @@ static int __mkroute_input(struct sk_buff *skb,
 		if (fnhe)
 			rth = rcu_dereference(fnhe->fnhe_rth_input);
 		else
-			rth = rcu_dereference(nh->nh_rth_input);
+			rth = rcu_dereference(nhc->nhc_rth_input);
 		if (rt_cache_valid(rth)) {
 			skb_dst_set_noref(skb, &rth->dst);
 			goto out;
@@ -2105,10 +2107,8 @@ out:	return err;
 	if (res->fi) {
 		if (!itag) {
 			struct fib_nh_common *nhc = FIB_RES_NHC(*res);
-			struct fib_nh *nh;
 
-			nh = container_of(nhc, struct fib_nh, nh_common);
-			rth = rcu_dereference(nh->nh_rth_input);
+			rth = rcu_dereference(nhc->nhc_rth_input);
 			if (rt_cache_valid(rth)) {
 				skb_dst_set_noref(skb, &rth->dst);
 				err = 0;
@@ -2337,7 +2337,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 				do_cache = false;
 				goto add;
 			}
-			prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
+			prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
 		}
 		rth = rcu_dereference(*prth);
 		if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 net-next 2/7] ipv4: Pass fib_nh_common to rt_cache_route
  2019-04-28  2:27 [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions David Ahern
  2019-04-28  2:27 ` [PATCH v2 net-next 1/7] ipv4: Move cached routes to fib_nh_common David Ahern
@ 2019-04-28  2:27 ` David Ahern
  2019-04-28  2:27 ` [PATCH v2 net-next 3/7] ipv4: Move exception bucket to nh_common David Ahern
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2019-04-28  2:27 UTC (permalink / raw)
  To: davem, netdev; +Cc: idosch, David Ahern

From: David Ahern <dsahern@gmail.com>

Now that the cached routes are in fib_nh_common, pass it to
rt_cache_route and simplify its callers. For rt_set_nexthop,
the tclassid becomes the last user of fib_nh so move the
container of under the #ifdef CONFIG_IP_ROUTE_CLASSID.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv4/route.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 15776e98dfa3..712d22c0d37a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1470,9 +1470,8 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
 	return ret;
 }
 
-static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
 {
-	struct fib_nh_common *nhc = &nh->nh_common;
 	struct rtable *orig, *prev, **p;
 	bool ret = true;
 
@@ -1576,7 +1575,6 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 
 	if (fi) {
 		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
-		struct fib_nh *nh;
 
 		if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
 			rt->rt_gw_family = nhc->nhc_gw_family;
@@ -1589,15 +1587,19 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 
 		ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
 
-		nh = container_of(nhc, struct fib_nh, nh_common);
 #ifdef CONFIG_IP_ROUTE_CLASSID
-		rt->dst.tclassid = nh->nh_tclassid;
+		{
+			struct fib_nh *nh;
+
+			nh = container_of(nhc, struct fib_nh, nh_common);
+			rt->dst.tclassid = nh->nh_tclassid;
+		}
 #endif
-		rt->dst.lwtstate = lwtstate_get(nh->fib_nh_lws);
+		rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
 		if (unlikely(fnhe))
 			cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
 		else if (do_cache)
-			cached = rt_cache_route(nh, rt);
+			cached = rt_cache_route(nhc, rt);
 		if (unlikely(!cached)) {
 			/* Routes we intend to cache in nexthop exception or
 			 * FIB nexthop have the DST_NOCACHE bit clear.
@@ -2139,7 +2141,6 @@ out:	return err;
 
 	if (do_cache) {
 		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
-		struct fib_nh *nh;
 
 		rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
 		if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
@@ -2148,8 +2149,7 @@ out:	return err;
 			rth->dst.input = lwtunnel_input;
 		}
 
-		nh = container_of(nhc, struct fib_nh, nh_common);
-		if (unlikely(!rt_cache_route(nh, rth)))
+		if (unlikely(!rt_cache_route(nhc, rth)))
 			rt_add_uncached_list(rth);
 	}
 	skb_dst_set(skb, &rth->dst);
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 net-next 3/7] ipv4: Move exception bucket to nh_common
  2019-04-28  2:27 [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions David Ahern
  2019-04-28  2:27 ` [PATCH v2 net-next 1/7] ipv4: Move cached routes to fib_nh_common David Ahern
  2019-04-28  2:27 ` [PATCH v2 net-next 2/7] ipv4: Pass fib_nh_common to rt_cache_route David Ahern
@ 2019-04-28  2:27 ` David Ahern
  2019-04-28  2:27 ` [PATCH v2 net-next 4/7] ipv6: Move pcpu cached routes to fib6_nh David Ahern
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2019-04-28  2:27 UTC (permalink / raw)
  To: davem, netdev; +Cc: idosch, David Ahern

From: David Ahern <dsahern@gmail.com>

Similar to the cached routes, make IPv4 exceptions accessible when
using an IPv6 nexthop struct with IPv4 routes. Simplify the exception
functions by passing in fib_nh_common since that is all it needs,
and then cleanup the call sites that have extraneous fib_nh conversions.

As with the cached routes this is a change in location only, from fib_nh
up to fib_nh_common; no functional change intended:

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip_fib.h     |  2 +-
 net/ipv4/fib_semantics.c | 12 ++++++------
 net/ipv4/route.c         | 41 +++++++++++++++++------------------------
 3 files changed, 24 insertions(+), 31 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 659c5081c40b..d0e28f4ab099 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -100,6 +100,7 @@ struct fib_nh_common {
 	/* v4 specific, but allows fib6_nh with v4 routes */
 	struct rtable __rcu * __percpu *nhc_pcpu_rth_output;
 	struct rtable __rcu     *nhc_rth_input;
+	struct fnhe_hash_bucket	__rcu *nhc_exceptions;
 };
 
 struct fib_nh {
@@ -111,7 +112,6 @@ struct fib_nh {
 #endif
 	__be32			nh_saddr;
 	int			nh_saddr_genid;
-	struct fnhe_hash_bucket	__rcu *nh_exceptions;
 #define fib_nh_family		nh_common.nhc_family
 #define fib_nh_dev		nh_common.nhc_dev
 #define fib_nh_oif		nh_common.nhc_oif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 9ef4529db659..107ac9183eb2 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -159,12 +159,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp)
 	dst_release_immediate(&rt->dst);
 }
 
-static void free_nh_exceptions(struct fib_nh *nh)
+static void free_nh_exceptions(struct fib_nh_common *nhc)
 {
 	struct fnhe_hash_bucket *hash;
 	int i;
 
-	hash = rcu_dereference_protected(nh->nh_exceptions, 1);
+	hash = rcu_dereference_protected(nhc->nhc_exceptions, 1);
 	if (!hash)
 		return;
 	for (i = 0; i < FNHE_HASH_SIZE; i++) {
@@ -214,6 +214,7 @@ void fib_nh_common_release(struct fib_nh_common *nhc)
 	lwtstate_put(nhc->nhc_lwtstate);
 	rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
 	rt_fibinfo_free(&nhc->nhc_rth_input);
+	free_nh_exceptions(nhc);
 }
 EXPORT_SYMBOL_GPL(fib_nh_common_release);
 
@@ -224,7 +225,6 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	fib_nh_common_release(&fib_nh->nh_common);
-	free_nh_exceptions(fib_nh);
 }
 
 /* Release a nexthop info record */
@@ -1712,12 +1712,12 @@ static int call_fib_nh_notifiers(struct fib_nh *nh,
  * - if the new MTU is greater than the PMTU, don't make any change
  * - otherwise, unlock and set PMTU
  */
-static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
+static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
 {
 	struct fnhe_hash_bucket *bucket;
 	int i;
 
-	bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
+	bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
 	if (!bucket)
 		return;
 
@@ -1748,7 +1748,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
 
 	hlist_for_each_entry(nh, head, nh_hash) {
 		if (nh->fib_nh_dev == dev)
-			nh_update_mtu(nh, dev->mtu, orig_mtu);
+			nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
 	}
 }
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 712d22c0d37a..fa53a1d8292e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -643,10 +643,10 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
 	}
 }
 
-static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
-				  u32 pmtu, bool lock, unsigned long expires)
+static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
+				  __be32 gw, u32 pmtu, bool lock,
+				  unsigned long expires)
 {
-	struct fib_nh_common *nhc = &nh->nh_common;
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe;
 	struct rtable *rt;
@@ -654,17 +654,17 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 	unsigned int i;
 	int depth;
 
-	genid = fnhe_genid(dev_net(nh->fib_nh_dev));
+	genid = fnhe_genid(dev_net(nhc->nhc_dev));
 	hval = fnhe_hashfun(daddr);
 
 	spin_lock_bh(&fnhe_lock);
 
-	hash = rcu_dereference(nh->nh_exceptions);
+	hash = rcu_dereference(nhc->nhc_exceptions);
 	if (!hash) {
 		hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
 		if (!hash)
 			goto out_unlock;
-		rcu_assign_pointer(nh->nh_exceptions, hash);
+		rcu_assign_pointer(nhc->nhc_exceptions, hash);
 	}
 
 	hash += hval;
@@ -789,10 +789,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 		} else {
 			if (fib_lookup(net, fl4, &res, 0) == 0) {
 				struct fib_nh_common *nhc = FIB_RES_NHC(res);
-				struct fib_nh *nh;
 
-				nh = container_of(nhc, struct fib_nh, nh_common);
-				update_or_create_fnhe(nh, fl4->daddr, new_gw,
+				update_or_create_fnhe(nhc, fl4->daddr, new_gw,
 						0, false,
 						jiffies + ip_rt_gc_timeout);
 			}
@@ -1040,10 +1038,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 	rcu_read_lock();
 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
 		struct fib_nh_common *nhc = FIB_RES_NHC(res);
-		struct fib_nh *nh;
 
-		nh = container_of(nhc, struct fib_nh, nh_common);
-		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
+		update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
 				      jiffies + ip_rt_mtu_expires);
 	}
 	rcu_read_unlock();
@@ -1329,7 +1325,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
-static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
+static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
 {
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe, __rcu **fnhe_p;
@@ -1337,7 +1333,7 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
 
 	spin_lock_bh(&fnhe_lock);
 
-	hash = rcu_dereference_protected(nh->nh_exceptions,
+	hash = rcu_dereference_protected(nhc->nhc_exceptions,
 					 lockdep_is_held(&fnhe_lock));
 	hash += hval;
 
@@ -1363,9 +1359,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
 	spin_unlock_bh(&fnhe_lock);
 }
 
-static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
+static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
+					       __be32 daddr)
 {
-	struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
+	struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
 	struct fib_nh_exception *fnhe;
 	u32 hval;
 
@@ -1379,7 +1376,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 		if (fnhe->fnhe_daddr == daddr) {
 			if (fnhe->fnhe_expires &&
 			    time_after(jiffies, fnhe->fnhe_expires)) {
-				ip_del_fnhe(nh, daddr);
+				ip_del_fnhe(nhc, daddr);
 				break;
 			}
 			return fnhe;
@@ -1406,10 +1403,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
 		mtu = fi->fib_mtu;
 
 	if (likely(!mtu)) {
-		struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
 		struct fib_nh_exception *fnhe;
 
-		fnhe = find_exception(nh, daddr);
+		fnhe = find_exception(nhc, daddr);
 		if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
 			mtu = fnhe->fnhe_pmtu;
 	}
@@ -1760,7 +1756,6 @@ static int __mkroute_input(struct sk_buff *skb,
 	struct net_device *dev = nhc->nhc_dev;
 	struct fib_nh_exception *fnhe;
 	struct rtable *rth;
-	struct fib_nh *nh;
 	int err;
 	struct in_device *out_dev;
 	bool do_cache;
@@ -1808,8 +1803,7 @@ static int __mkroute_input(struct sk_buff *skb,
 		}
 	}
 
-	nh = container_of(nhc, struct fib_nh, nh_common);
-	fnhe = find_exception(nh, daddr);
+	fnhe = find_exception(nhc, daddr);
 	if (do_cache) {
 		if (fnhe)
 			rth = rcu_dereference(fnhe->fnhe_rth_input);
@@ -2321,10 +2315,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	do_cache &= fi != NULL;
 	if (fi) {
 		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
-		struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
 		struct rtable __rcu **prth;
 
-		fnhe = find_exception(nh, fl4->daddr);
+		fnhe = find_exception(nhc, fl4->daddr);
 		if (!do_cache)
 			goto add;
 		if (fnhe) {
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 net-next 4/7] ipv6: Move pcpu cached routes to fib6_nh
  2019-04-28  2:27 [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions David Ahern
                   ` (2 preceding siblings ...)
  2019-04-28  2:27 ` [PATCH v2 net-next 3/7] ipv4: Move exception bucket to nh_common David Ahern
@ 2019-04-28  2:27 ` David Ahern
  2019-04-28  2:27 ` [PATCH v2 net-next 5/7] ipv6: Refactor fib6_drop_pcpu_from David Ahern
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2019-04-28  2:27 UTC (permalink / raw)
  To: davem, netdev; +Cc: idosch, David Ahern

From: David Ahern <dsahern@gmail.com>

rt6_info are specific instances of a fib entry and are tied to a
device and gateway - ie., a nexthop. Before nexthop objects, IPv6 fib
entries have separate fib6_info for each nexthop in a multipath route,
so the location of the pcpu cache in the fib6_info struct worked.
However, with nexthop objects a fib6_info can point to a set of nexthops
(yet another alignment of ipv6 with ipv4). Accordingly, the pcpu
cache needs to be moved to the fib6_nh struct so the cached entries
are local to the nexthop specification used to create the rt6_info.

Initialization and free of the pcpu entries moved to fib6_nh_init and
fib6_nh_release.

Change in location only, from fib6_info down to fib6_nh; no other
functional change intended.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h |  3 ++-
 net/ipv6/addrconf.c   |  6 +++---
 net/ipv6/ip6_fib.c    | 33 ++++++---------------------------
 net/ipv6/route.c      | 29 +++++++++++++++++++++++++++--
 4 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 40105738e2f6..cf715835a330 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -131,6 +131,8 @@ struct fib6_nh {
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	unsigned long		last_probe;
 #endif
+
+	struct rt6_info * __percpu *rt6i_pcpu;
 };
 
 struct fib6_info {
@@ -156,7 +158,6 @@ struct fib6_info {
 	struct rt6key			fib6_src;
 	struct rt6key			fib6_prefsrc;
 
-	struct rt6_info * __percpu	*rt6i_pcpu;
 	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
 
 	u32				fib6_metric;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 340a0f06f974..25754175097b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6339,16 +6339,16 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
 		spin_lock(&ifa->lock);
 		if (ifa->rt) {
-			struct fib6_info *rt = ifa->rt;
+			struct fib6_nh *nh = &ifa->rt->fib6_nh;
 			int cpu;
 
 			rcu_read_lock();
 			ifa->rt->dst_nopolicy = val ? true : false;
-			if (rt->rt6i_pcpu) {
+			if (nh->rt6i_pcpu) {
 				for_each_possible_cpu(cpu) {
 					struct rt6_info **rtp;
 
-					rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+					rtp = per_cpu_ptr(nh->rt6i_pcpu, cpu);
 					addrconf_set_nopolicy(*rtp, val);
 				}
 			}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index a8919c217cc2..793830b2965d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -155,12 +155,6 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
 	if (!f6i)
 		return NULL;
 
-	f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
-	if (!f6i->rt6i_pcpu) {
-		kfree(f6i);
-		return NULL;
-	}
-
 	INIT_LIST_HEAD(&f6i->fib6_siblings);
 	refcount_set(&f6i->fib6_ref, 1);
 
@@ -177,25 +171,6 @@ void fib6_info_destroy_rcu(struct rcu_head *head)
 	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
 	kfree(bucket);
 
-	if (f6i->rt6i_pcpu) {
-		int cpu;
-
-		for_each_possible_cpu(cpu) {
-			struct rt6_info **ppcpu_rt;
-			struct rt6_info *pcpu_rt;
-
-			ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
-			pcpu_rt = *ppcpu_rt;
-			if (pcpu_rt) {
-				dst_dev_put(&pcpu_rt->dst);
-				dst_release(&pcpu_rt->dst);
-				*ppcpu_rt = NULL;
-			}
-		}
-
-		free_percpu(f6i->rt6i_pcpu);
-	}
-
 	fib6_nh_release(&f6i->fib6_nh);
 
 	ip_fib_metrics_put(f6i->fib6_metrics);
@@ -902,8 +877,12 @@ static struct fib6_node *fib6_add_1(struct net *net,
 static void fib6_drop_pcpu_from(struct fib6_info *f6i,
 				const struct fib6_table *table)
 {
+	struct fib6_nh *fib6_nh = &f6i->fib6_nh;
 	int cpu;
 
+	if (!rcu_access_pointer(fib6_nh->rt6i_pcpu))
+		return;
+
 	/* release the reference to this fib entry from
 	 * all of its cached pcpu routes
 	 */
@@ -911,7 +890,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
 		struct rt6_info **ppcpu_rt;
 		struct rt6_info *pcpu_rt;
 
-		ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
 		pcpu_rt = *ppcpu_rt;
 		if (pcpu_rt) {
 			struct fib6_info *from;
@@ -951,7 +930,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
 				    lockdep_is_held(&table->tb6_lock));
 		}
 
-		if (rt->rt6i_pcpu)
+		if (rt->fib6_nh.rt6i_pcpu)
 			fib6_drop_pcpu_from(rt, table);
 	}
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9c0127a44f9f..32c526d008e2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1273,7 +1273,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
 {
 	struct rt6_info *pcpu_rt, **p;
 
-	p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+	p = this_cpu_ptr(res->nh->rt6i_pcpu);
 	pcpu_rt = *p;
 
 	if (pcpu_rt)
@@ -1294,7 +1294,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
 	}
 
 	dst_hold(&pcpu_rt->dst);
-	p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+	p = this_cpu_ptr(res->nh->rt6i_pcpu);
 	prev = cmpxchg(p, NULL, pcpu_rt);
 	BUG_ON(prev);
 
@@ -3063,6 +3063,12 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 	    !netif_carrier_ok(dev))
 		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
 
+	fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+	if (!fib6_nh->rt6i_pcpu) {
+		err = -ENOMEM;
+		goto out;
+	}
+
 	err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
 				 cfg->fc_encap_type, cfg, gfp_flags, extack);
 	if (err)
@@ -3087,6 +3093,25 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 
 void fib6_nh_release(struct fib6_nh *fib6_nh)
 {
+	if (fib6_nh->rt6i_pcpu) {
+		int cpu;
+
+		for_each_possible_cpu(cpu) {
+			struct rt6_info **ppcpu_rt;
+			struct rt6_info *pcpu_rt;
+
+			ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+			pcpu_rt = *ppcpu_rt;
+			if (pcpu_rt) {
+				dst_dev_put(&pcpu_rt->dst);
+				dst_release(&pcpu_rt->dst);
+				*ppcpu_rt = NULL;
+			}
+		}
+
+		free_percpu(fib6_nh->rt6i_pcpu);
+	}
+
 	fib_nh_common_release(&fib6_nh->nh_common);
 }
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 net-next 5/7] ipv6: Refactor fib6_drop_pcpu_from
  2019-04-28  2:27 [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions David Ahern
                   ` (3 preceding siblings ...)
  2019-04-28  2:27 ` [PATCH v2 net-next 4/7] ipv6: Move pcpu cached routes to fib6_nh David Ahern
@ 2019-04-28  2:27 ` David Ahern
  2019-04-28  2:27 ` [PATCH v2 net-next 6/7] ipv6: Refactor exception functions David Ahern
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2019-04-28  2:27 UTC (permalink / raw)
  To: davem, netdev; +Cc: idosch, David Ahern

From: David Ahern <dsahern@gmail.com>

Move the existing pcpu walk in fib6_drop_pcpu_from to a new
helper, __fib6_drop_pcpu_from, that can be invoked per fib6_nh
with a reference to the from entries that need to be evicted. If
the passed in from is non-NULL then only entries associated with
that fib6_info are removed (fib entry is deleted); if the from
is NULL are entries are flushed (nexthop is deleted).

For fib6_info entries with builtin fib6_nh (ie., current code)
there is no change in behavior.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/ip6_fib.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 793830b2965d..aaffe4a653d8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -874,10 +874,10 @@ static struct fib6_node *fib6_add_1(struct net *net,
 	return ln;
 }
 
-static void fib6_drop_pcpu_from(struct fib6_info *f6i,
-				const struct fib6_table *table)
+static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
+				  const struct fib6_info *from,
+				  const struct fib6_table *table)
 {
-	struct fib6_nh *fib6_nh = &f6i->fib6_nh;
 	int cpu;
 
 	if (!rcu_access_pointer(fib6_nh->rt6i_pcpu))
@@ -892,17 +892,27 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
 
 		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
 		pcpu_rt = *ppcpu_rt;
-		if (pcpu_rt) {
-			struct fib6_info *from;
+		if (pcpu_rt &&
+		    (!from || rcu_access_pointer(pcpu_rt->from) == from)) {
+			struct fib6_info *pfrom;
 
-			from = rcu_dereference_protected(pcpu_rt->from,
+			pfrom = rcu_dereference_protected(pcpu_rt->from,
 					     lockdep_is_held(&table->tb6_lock));
 			rcu_assign_pointer(pcpu_rt->from, NULL);
-			fib6_info_release(from);
+			fib6_info_release(pfrom);
 		}
 	}
 }
 
+static void fib6_drop_pcpu_from(struct fib6_info *f6i,
+				const struct fib6_table *table)
+{
+	struct fib6_nh *fib6_nh;
+
+	fib6_nh = &f6i->fib6_nh;
+	__fib6_drop_pcpu_from(fib6_nh, f6i, table);
+}
+
 static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
 			  struct net *net)
 {
@@ -930,8 +940,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
 				    lockdep_is_held(&table->tb6_lock));
 		}
 
-		if (rt->fib6_nh.rt6i_pcpu)
-			fib6_drop_pcpu_from(rt, table);
+		fib6_drop_pcpu_from(rt, table);
 	}
 }
 
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 net-next 6/7] ipv6: Refactor exception functions
  2019-04-28  2:27 [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions David Ahern
                   ` (4 preceding siblings ...)
  2019-04-28  2:27 ` [PATCH v2 net-next 5/7] ipv6: Refactor fib6_drop_pcpu_from David Ahern
@ 2019-04-28  2:27 ` David Ahern
  2019-04-28  2:27 ` [PATCH v2 net-next 7/7] ipv6: Move exception bucket to fib6_nh David Ahern
  2019-04-28 15:06 ` [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions Eric Dumazet
  7 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2019-04-28  2:27 UTC (permalink / raw)
  To: davem, netdev; +Cc: idosch, David Ahern

From: David Ahern <dsahern@gmail.com>

Before moving exception bucket from fib6_info to fib6_nh, refactor
rt6_flush_exceptions, rt6_remove_exception_rt, rt6_mtu_change_route,
and rt6_update_exception_stamp_rt. In all 3 cases, move the primary
logic into a new helper that starts with fib6_nh_. The latter 3
functions still take a fib6_info; this will be changed to fib6_nh
in the next patch.

In the case of rt6_mtu_change_route, move the fib6_metric_locked
out as a standalone check - no need to call the new function if
the fib entry has the mtu locked. Also, add fib6_info to
rt6_mtu_change_arg as a way of passing the fib entry to the new
helper.

No functional change intended. The goal here is to make the next
patch easier to review by moving existing lookup logic for each to
new helpers.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/route.c | 134 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 86 insertions(+), 48 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 32c526d008e2..a1eb7e526ccd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1540,7 +1540,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	return err;
 }
 
-void rt6_flush_exceptions(struct fib6_info *rt)
+static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1549,9 +1549,9 @@ void rt6_flush_exceptions(struct fib6_info *rt)
 
 	spin_lock_bh(&rt6_exception_lock);
 	/* Prevent rt6_insert_exception() to recreate the bucket list */
-	rt->exception_bucket_flushed = 1;
+	from->exception_bucket_flushed = 1;
 
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
 				    lockdep_is_held(&rt6_exception_lock));
 	if (!bucket)
 		goto out;
@@ -1567,6 +1567,11 @@ void rt6_flush_exceptions(struct fib6_info *rt)
 	spin_unlock_bh(&rt6_exception_lock);
 }
 
+void rt6_flush_exceptions(struct fib6_info *f6i)
+{
+	fib6_nh_flush_exceptions(&f6i->fib6_nh, f6i);
+}
+
 /* Find cached rt in the hash table inside passed in rt
  * Caller has to hold rcu_read_lock()
  */
@@ -1600,19 +1605,14 @@ static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 }
 
 /* Remove the passed in cached rt from the hash table that contains it */
-static int rt6_remove_exception_rt(struct rt6_info *rt)
+static int fib6_nh_remove_exception(const struct fib6_info *from, int plen,
+				    const struct rt6_info *rt)
 {
+	const struct in6_addr *src_key = NULL;
 	struct rt6_exception_bucket *bucket;
-	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
-	struct fib6_info *from;
 	int err;
 
-	from = rcu_dereference(rt->from);
-	if (!from ||
-	    !(rt->rt6i_flags & RTF_CACHE))
-		return -EINVAL;
-
 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
 		return -ENOENT;
 
@@ -1620,13 +1620,12 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
 				    lockdep_is_held(&rt6_exception_lock));
 #ifdef CONFIG_IPV6_SUBTREES
-	/* rt6i_src.plen != 0 indicates 'from' is in subtree
-	 * and exception table is indexed by a hash of
-	 * both rt6i_dst and rt6i_src.
+	/* plen != 0 indicates 'from' is in subtree and exception
+	 * table is indexed by a hash of both rt6i_dst and rt6i_src.
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (from->fib6_src.plen)
+	if (plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
@@ -1643,31 +1642,37 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
 	return err;
 }
 
+static int rt6_remove_exception_rt(struct rt6_info *rt)
+{
+	struct fib6_info *from;
+
+	from = rcu_dereference(rt->from);
+	if (!from ||
+	    !(rt->rt6i_flags & RTF_CACHE))
+		return -EINVAL;
+
+	return fib6_nh_remove_exception(from, from->fib6_src.plen, rt);
+}
+
 /* Find rt6_ex which contains the passed in rt cache and
  * refresh its stamp
  */
-static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+static void fib6_nh_update_exception(const struct fib6_info *from, int plen,
+				     const struct rt6_info *rt)
 {
+	const struct in6_addr *src_key = NULL;
 	struct rt6_exception_bucket *bucket;
-	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
-	struct fib6_info *from;
-
-	rcu_read_lock();
-	from = rcu_dereference(rt->from);
-	if (!from || !(rt->rt6i_flags & RTF_CACHE))
-		goto unlock;
 
 	bucket = rcu_dereference(from->rt6i_exception_bucket);
 
 #ifdef CONFIG_IPV6_SUBTREES
-	/* rt6i_src.plen != 0 indicates 'from' is in subtree
-	 * and exception table is indexed by a hash of
-	 * both rt6i_dst and rt6i_src.
+	/* plen != 0 indicates 'from' is in subtree and exception
+	 * table is indexed by a hash of both rt6i_dst and rt6i_src.
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (from->fib6_src.plen)
+	if (plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
 	rt6_ex = __rt6_find_exception_rcu(&bucket,
@@ -1675,7 +1680,19 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 					  src_key);
 	if (rt6_ex)
 		rt6_ex->stamp = jiffies;
+}
+
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+	struct fib6_info *from;
+
+	rcu_read_lock();
+
+	from = rcu_dereference(rt->from);
+	if (!from || !(rt->rt6i_flags & RTF_CACHE))
+		goto unlock;
 
+	fib6_nh_update_exception(from, from->fib6_src.plen, rt);
 unlock:
 	rcu_read_unlock();
 }
@@ -1812,9 +1829,9 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
 	gc_args->more++;
 }
 
-void rt6_age_exceptions(struct fib6_info *rt,
-			struct fib6_gc_args *gc_args,
-			unsigned long now)
+static void fib6_nh_age_exceptions(struct fib6_info *rt,
+				   struct fib6_gc_args *gc_args,
+				   unsigned long now)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
@@ -1843,6 +1860,13 @@ void rt6_age_exceptions(struct fib6_info *rt,
 	rcu_read_unlock_bh();
 }
 
+void rt6_age_exceptions(struct fib6_info *rt,
+			struct fib6_gc_args *gc_args,
+			unsigned long now)
+{
+	fib6_nh_age_exceptions(rt, gc_args, now);
+}
+
 /* must be called with rcu lock held */
 int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
 		      struct flowi6 *fl6, struct fib6_result *res, int strict)
@@ -4188,9 +4212,36 @@ void rt6_disable_ip(struct net_device *dev, unsigned long event)
 struct rt6_mtu_change_arg {
 	struct net_device *dev;
 	unsigned int mtu;
+	struct fib6_info *f6i;
 };
 
-static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
+static int fib6_nh_mtu_change(struct fib6_info *f6i, void *_arg)
+{
+	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
+	struct fib6_nh *nh = &f6i->fib6_nh;
+
+	/* For administrative MTU increase, there is no way to discover
+	 * IPv6 PMTU increase, so PMTU increase should be updated here.
+	 * Since RFC 1981 doesn't include administrative MTU increase
+	 * update PMTU increase is a MUST. (i.e. jumbo frame)
+	 */
+	if (nh->fib_nh_dev == arg->dev) {
+		struct inet6_dev *idev = __in6_dev_get(arg->dev);
+		u32 mtu = f6i->fib6_pmtu;
+
+		if (mtu >= arg->mtu ||
+		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
+			fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
+
+		spin_lock_bh(&rt6_exception_lock);
+		rt6_exceptions_update_pmtu(idev, f6i, arg->mtu);
+		spin_unlock_bh(&rt6_exception_lock);
+	}
+
+	return 0;
+}
+
+static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
 {
 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
 	struct inet6_dev *idev;
@@ -4205,24 +4256,11 @@ static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
 	if (!idev)
 		return 0;
 
-	/* For administrative MTU increase, there is no way to discover
-	   IPv6 PMTU increase, so PMTU increase should be updated here.
-	   Since RFC 1981 doesn't include administrative MTU increase
-	   update PMTU increase is a MUST. (i.e. jumbo frame)
-	 */
-	if (rt->fib6_nh.fib_nh_dev == arg->dev &&
-	    !fib6_metric_locked(rt, RTAX_MTU)) {
-		u32 mtu = rt->fib6_pmtu;
-
-		if (mtu >= arg->mtu ||
-		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
-			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
+	if (fib6_metric_locked(f6i, RTAX_MTU))
+		return 0;
 
-		spin_lock_bh(&rt6_exception_lock);
-		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
-		spin_unlock_bh(&rt6_exception_lock);
-	}
-	return 0;
+	arg->f6i = f6i;
+	return fib6_nh_mtu_change(f6i, arg);
 }
 
 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 net-next 7/7] ipv6: Move exception bucket to fib6_nh
  2019-04-28  2:27 [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions David Ahern
                   ` (5 preceding siblings ...)
  2019-04-28  2:27 ` [PATCH v2 net-next 6/7] ipv6: Refactor exception functions David Ahern
@ 2019-04-28  2:27 ` David Ahern
  2019-04-28 15:06 ` [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions Eric Dumazet
  7 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2019-04-28  2:27 UTC (permalink / raw)
  To: davem, netdev; +Cc: idosch, David Ahern

From: David Ahern <dsahern@gmail.com>

Similar to the pcpu routes exceptions are really per nexthop, so move
rt6i_exception_bucket from fib6_info to fib6_nh.

To avoid additional increases to the size of fib6_nh for a 1-bit flag,
use the lowest bit in the allocated memory pointer for the flushed flag.
Add helpers for retrieving the bucket pointer to mask off the flag.

The cleanup of the exception bucket is moved to fib6_nh_release.

fib6_nh_flush_exceptions can now be called from 2 contexts:
1. deleting a fib entry
2. deleting a fib6_nh

For 1., fib6_nh_flush_exceptions is called for a specific fib6_info that
is getting deleted. All exceptions in the cache using the entry are
deleted. For 2, the fib6_nh itself is getting destroyed so
fib6_nh_flush_exceptions is called for a NULL fib6_info which means
flush all entries.

The pmtu.sh selftest exercises the affected code paths - from creating
exceptions to cleaning them up on device delete. All tests pass without
any rcu locking or memleak warnings.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h |   8 +--
 net/ipv6/ip6_fib.c    |   6 --
 net/ipv6/route.c      | 189 +++++++++++++++++++++++++++++++++-----------------
 3 files changed, 128 insertions(+), 75 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cf715835a330..b5a3824554eb 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -133,6 +133,7 @@ struct fib6_nh {
 #endif
 
 	struct rt6_info * __percpu *rt6i_pcpu;
+	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
 };
 
 struct fib6_info {
@@ -158,17 +159,14 @@ struct fib6_info {
 	struct rt6key			fib6_src;
 	struct rt6key			fib6_prefsrc;
 
-	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-
 	u32				fib6_metric;
 	u8				fib6_protocol;
 	u8				fib6_type;
-	u8				exception_bucket_flushed:1,
-					should_flush:1,
+	u8				should_flush:1,
 					dst_nocount:1,
 					dst_nopolicy:1,
 					dst_host:1,
-					unused:3;
+					unused:4;
 
 	struct fib6_nh			fib6_nh;
 	struct rcu_head			rcu;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index aaffe4a653d8..b091657b99c2 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -164,17 +164,11 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
 void fib6_info_destroy_rcu(struct rcu_head *head)
 {
 	struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
-	struct rt6_exception_bucket *bucket;
 
 	WARN_ON(f6i->fib6_node);
 
-	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
-	kfree(bucket);
-
 	fib6_nh_release(&f6i->fib6_nh);
-
 	ip_fib_metrics_put(f6i->fib6_metrics);
-
 	kfree(f6i);
 }
 EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a1eb7e526ccd..1364affa0380 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1459,25 +1459,74 @@ static unsigned int fib6_mtu(const struct fib6_result *res)
 	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
 }
 
+#define FIB6_EXCEPTION_BUCKET_FLUSHED  0x1UL
+
+/* used when the flushed bit is not relevant, only access to the bucket
+ * (ie., all bucket users except rt6_insert_exception);
+ *
+ * called under rcu lock; sometimes called with rt6_exception_lock held
+ */
+static
+struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
+						       spinlock_t *lock)
+{
+	struct rt6_exception_bucket *bucket;
+
+	if (lock)
+		bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+						   lockdep_is_held(lock));
+	else
+		bucket = rcu_dereference(nh->rt6i_exception_bucket);
+
+	/* remove bucket flushed bit if set */
+	if (bucket) {
+		unsigned long p = (unsigned long)bucket;
+
+		p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
+		bucket = (struct rt6_exception_bucket *)p;
+	}
+
+	return bucket;
+}
+
+static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
+{
+	unsigned long p = (unsigned long)bucket;
+
+	return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
+}
+
+/* called with rt6_exception_lock held */
+static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
+					      spinlock_t *lock)
+{
+	struct rt6_exception_bucket *bucket;
+	unsigned long p;
+
+	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+					   lockdep_is_held(lock));
+
+	p = (unsigned long)bucket;
+	p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
+	bucket = (struct rt6_exception_bucket *)p;
+	rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+}
+
 static int rt6_insert_exception(struct rt6_info *nrt,
 				const struct fib6_result *res)
 {
 	struct net *net = dev_net(nrt->dst.dev);
 	struct rt6_exception_bucket *bucket;
+	struct fib6_info *f6i = res->f6i;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
-	struct fib6_info *f6i = res->f6i;
+	struct fib6_nh *nh = res->nh;
 	int err = 0;
 
 	spin_lock_bh(&rt6_exception_lock);
 
-	if (f6i->exception_bucket_flushed) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
-					lockdep_is_held(&rt6_exception_lock));
+	bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+					  lockdep_is_held(&rt6_exception_lock));
 	if (!bucket) {
 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
 				 GFP_ATOMIC);
@@ -1485,7 +1534,10 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 			err = -ENOMEM;
 			goto out;
 		}
-		rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
+		rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+	} else if (fib6_nh_excptn_bucket_flushed(bucket)) {
+		err = -EINVAL;
+		goto out;
 	}
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -1548,21 +1600,24 @@ static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
 	int i;
 
 	spin_lock_bh(&rt6_exception_lock);
-	/* Prevent rt6_insert_exception() to recreate the bucket list */
-	from->exception_bucket_flushed = 1;
 
-	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
-				    lockdep_is_held(&rt6_exception_lock));
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (!bucket)
 		goto out;
 
+	/* Prevent rt6_insert_exception() to recreate the bucket list */
+	if (!from)
+		fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
+
 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
-		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
-			rt6_remove_exception(bucket, rt6_ex);
-		WARN_ON_ONCE(bucket->depth);
+		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
+			if (!from ||
+			    rcu_access_pointer(rt6_ex->rt6i->from) == from)
+				rt6_remove_exception(bucket, rt6_ex);
+		}
+		WARN_ON_ONCE(!from && bucket->depth);
 		bucket++;
 	}
-
 out:
 	spin_unlock_bh(&rt6_exception_lock);
 }
@@ -1584,7 +1639,7 @@ static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 	struct rt6_exception *rt6_ex;
 	struct rt6_info *ret = NULL;
 
-	bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
+	bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
 
 #ifdef CONFIG_IPV6_SUBTREES
 	/* fib6i_src.plen != 0 indicates f6i is in subtree
@@ -1605,7 +1660,7 @@ static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 }
 
 /* Remove the passed in cached rt from the hash table that contains it */
-static int fib6_nh_remove_exception(const struct fib6_info *from, int plen,
+static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
 				    const struct rt6_info *rt)
 {
 	const struct in6_addr *src_key = NULL;
@@ -1613,15 +1668,16 @@ static int fib6_nh_remove_exception(const struct fib6_info *from, int plen,
 	struct rt6_exception *rt6_ex;
 	int err;
 
-	if (!rcu_access_pointer(from->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
 		return -ENOENT;
 
 	spin_lock_bh(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
-				    lockdep_is_held(&rt6_exception_lock));
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
+
 #ifdef CONFIG_IPV6_SUBTREES
-	/* plen != 0 indicates 'from' is in subtree and exception
-	 * table is indexed by a hash of both rt6i_dst and rt6i_src.
+	/* rt6i_src.plen != 0 indicates 'from' is in subtree
+	 * and exception table is indexed by a hash of
+	 * both rt6i_dst and rt6i_src.
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
@@ -1647,37 +1703,35 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
 	struct fib6_info *from;
 
 	from = rcu_dereference(rt->from);
-	if (!from ||
-	    !(rt->rt6i_flags & RTF_CACHE))
+	if (!from || !(rt->rt6i_flags & RTF_CACHE))
 		return -EINVAL;
 
-	return fib6_nh_remove_exception(from, from->fib6_src.plen, rt);
+	return fib6_nh_remove_exception(&from->fib6_nh,
+					from->fib6_src.plen, rt);
 }
 
 /* Find rt6_ex which contains the passed in rt cache and
  * refresh its stamp
  */
-static void fib6_nh_update_exception(const struct fib6_info *from, int plen,
+static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
 				     const struct rt6_info *rt)
 {
 	const struct in6_addr *src_key = NULL;
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 
-	bucket = rcu_dereference(from->rt6i_exception_bucket);
-
+	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
 #ifdef CONFIG_IPV6_SUBTREES
-	/* plen != 0 indicates 'from' is in subtree and exception
-	 * table is indexed by a hash of both rt6i_dst and rt6i_src.
+	/* rt6i_src.plen != 0 indicates 'from' is in subtree
+	 * and exception table is indexed by a hash of
+	 * both rt6i_dst and rt6i_src.
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
 	if (plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
-	rt6_ex = __rt6_find_exception_rcu(&bucket,
-					  &rt->rt6i_dst.addr,
-					  src_key);
+	rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
 	if (rt6_ex)
 		rt6_ex->stamp = jiffies;
 }
@@ -1692,7 +1746,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 	if (!from || !(rt->rt6i_flags & RTF_CACHE))
 		goto unlock;
 
-	fib6_nh_update_exception(from, from->fib6_src.plen, rt);
+	fib6_nh_update_exception(&from->fib6_nh, from->fib6_src.plen, rt);
 unlock:
 	rcu_read_unlock();
 }
@@ -1720,15 +1774,13 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
 }
 
 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
-				       struct fib6_info *rt, int mtu)
+				       const struct fib6_nh *nh, int mtu)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	int i;
 
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-					lockdep_is_held(&rt6_exception_lock));
-
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (!bucket)
 		return;
 
@@ -1750,21 +1802,19 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
 
 #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
 
-static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
-					struct in6_addr *gateway)
+static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
+					    const struct in6_addr *gateway)
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	struct hlist_node *tmp;
 	int i;
 
-	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
 		return;
 
 	spin_lock_bh(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-				     lockdep_is_held(&rt6_exception_lock));
-
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (bucket) {
 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
 			hlist_for_each_entry_safe(rt6_ex, tmp,
@@ -1829,7 +1879,7 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
 	gc_args->more++;
 }
 
-static void fib6_nh_age_exceptions(struct fib6_info *rt,
+static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
 				   struct fib6_gc_args *gc_args,
 				   unsigned long now)
 {
@@ -1838,14 +1888,12 @@ static void fib6_nh_age_exceptions(struct fib6_info *rt,
 	struct hlist_node *tmp;
 	int i;
 
-	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nh->rt6i_exception_bucket))
 		return;
 
 	rcu_read_lock_bh();
 	spin_lock(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
-				    lockdep_is_held(&rt6_exception_lock));
-
+	bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
 	if (bucket) {
 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
 			hlist_for_each_entry_safe(rt6_ex, tmp,
@@ -1860,11 +1908,11 @@ static void fib6_nh_age_exceptions(struct fib6_info *rt,
 	rcu_read_unlock_bh();
 }
 
-void rt6_age_exceptions(struct fib6_info *rt,
+void rt6_age_exceptions(struct fib6_info *f6i,
 			struct fib6_gc_args *gc_args,
 			unsigned long now)
 {
-	fib6_nh_age_exceptions(rt, gc_args, now);
+	fib6_nh_age_exceptions(&f6i->fib6_nh, gc_args, now);
 }
 
 /* must be called with rcu lock held */
@@ -2695,9 +2743,9 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
 		      const struct in6_addr *saddr)
 {
 	struct rt6_exception_bucket *bucket;
-	const struct fib6_nh *nh = res->nh;
 	struct fib6_info *f6i = res->f6i;
 	const struct in6_addr *src_key;
+	struct fib6_nh *nh = res->nh;
 	struct rt6_exception *rt6_ex;
 	struct inet6_dev *idev;
 	u32 mtu = 0;
@@ -2714,7 +2762,7 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
 		src_key = saddr;
 #endif
 
-	bucket = rcu_dereference(f6i->rt6i_exception_bucket);
+	bucket = fib6_nh_get_excptn_bucket(nh, NULL);
 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
 		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
@@ -3117,6 +3165,19 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 
 void fib6_nh_release(struct fib6_nh *fib6_nh)
 {
+	struct rt6_exception_bucket *bucket;
+
+	rcu_read_lock();
+
+	fib6_nh_flush_exceptions(fib6_nh, NULL);
+	bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
+	if (bucket) {
+		rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
+		kfree(bucket);
+	}
+
+	rcu_read_unlock();
+
 	if (fib6_nh->rt6i_pcpu) {
 		int cpu;
 
@@ -3406,9 +3467,11 @@ static int ip6_route_del(struct fib6_config *cfg,
 		for_each_fib6_node_rt_rcu(fn) {
 			struct fib6_nh *nh;
 
+			nh = &rt->fib6_nh;
 			if (cfg->fc_flags & RTF_CACHE) {
 				struct fib6_result res = {
 					.f6i = rt,
+					.nh = nh,
 				};
 				int rc;
 
@@ -3425,7 +3488,6 @@ static int ip6_route_del(struct fib6_config *cfg,
 				continue;
 			}
 
-			nh = &rt->fib6_nh;
 			if (cfg->fc_ifindex &&
 			    (!nh->fib_nh_dev ||
 			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
@@ -3937,18 +3999,17 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
 {
 	struct in6_addr *gateway = (struct in6_addr *)arg;
+	struct fib6_nh *nh = &rt->fib6_nh;
 
 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
-	    rt->fib6_nh.fib_nh_gw_family &&
-	    ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
+	    nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
 		return -1;
-	}
 
 	/* Further clean up cached routes in exception table.
 	 * This is needed because cached route may have a different
 	 * gateway than its 'parent' in the case of an ip redirect.
 	 */
-	rt6_exceptions_clean_tohost(rt, gateway);
+	fib6_nh_exceptions_clean_tohost(nh, gateway);
 
 	return 0;
 }
@@ -4215,10 +4276,10 @@ struct rt6_mtu_change_arg {
 	struct fib6_info *f6i;
 };
 
-static int fib6_nh_mtu_change(struct fib6_info *f6i, void *_arg)
+static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
 {
 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
-	struct fib6_nh *nh = &f6i->fib6_nh;
+	struct fib6_info *f6i = arg->f6i;
 
 	/* For administrative MTU increase, there is no way to discover
 	 * IPv6 PMTU increase, so PMTU increase should be updated here.
@@ -4234,7 +4295,7 @@ static int fib6_nh_mtu_change(struct fib6_info *f6i, void *_arg)
 			fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
 
 		spin_lock_bh(&rt6_exception_lock);
-		rt6_exceptions_update_pmtu(idev, f6i, arg->mtu);
+		rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
 		spin_unlock_bh(&rt6_exception_lock);
 	}
 
@@ -4260,7 +4321,7 @@ static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
 		return 0;
 
 	arg->f6i = f6i;
-	return fib6_nh_mtu_change(f6i, arg);
+	return fib6_nh_mtu_change(&f6i->fib6_nh, arg);
 }
 
 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions
  2019-04-28  2:27 [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions David Ahern
                   ` (6 preceding siblings ...)
  2019-04-28  2:27 ` [PATCH v2 net-next 7/7] ipv6: Move exception bucket to fib6_nh David Ahern
@ 2019-04-28 15:06 ` Eric Dumazet
  2019-04-28 17:53   ` Alexei Starovoitov
  2019-04-28 20:41   ` David Ahern
  7 siblings, 2 replies; 12+ messages in thread
From: Eric Dumazet @ 2019-04-28 15:06 UTC (permalink / raw)
  To: David Ahern, davem, netdev; +Cc: idosch, David Ahern



On 4/27/19 7:27 PM, David Ahern wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> This series moves IPv4 pcpu cached routes from fib_nh to fib_nh_common
> to make the caches avaialable for IPv6 nexthops (fib6_nh) with IPv4
> routes. This allows a fib6_nh struct to be used with both IPv4 and
> and IPv6 routes.
> 
> In addition pcpu caches and exception entries for IPv6 routes are
> moved from fib6_info to fib6_nh since they are really a function of
> the device and gateway. During the move of each, functions are
> refactored such that the core logic is in new helpers that take
> a fib6_nh versus a fib6_info.
>

I would prefer we fix the existing bugs before moving the code around,
otherwise stable teams work is going to be tough.

We have dozens of syzbot reports involving all this stuff.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions
  2019-04-28 15:06 ` [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions Eric Dumazet
@ 2019-04-28 17:53   ` Alexei Starovoitov
  2019-04-28 20:54     ` David Ahern
  2019-04-28 20:41   ` David Ahern
  1 sibling, 1 reply; 12+ messages in thread
From: Alexei Starovoitov @ 2019-04-28 17:53 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Ahern, David S. Miller, Network Development, Ido Schimmel,
	David Ahern

On Sun, Apr 28, 2019 at 8:10 AM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 4/27/19 7:27 PM, David Ahern wrote:
> > From: David Ahern <dsahern@gmail.com>
> >
> > This series moves IPv4 pcpu cached routes from fib_nh to fib_nh_common
> > to make the caches avaialable for IPv6 nexthops (fib6_nh) with IPv4
> > routes. This allows a fib6_nh struct to be used with both IPv4 and
> > and IPv6 routes.
> >
> > In addition pcpu caches and exception entries for IPv6 routes are
> > moved from fib6_info to fib6_nh since they are really a function of
> > the device and gateway. During the move of each, functions are
> > refactored such that the core logic is in new helpers that take
> > a fib6_nh versus a fib6_info.
> >
>
> I would prefer we fix the existing bugs before moving the code around,
> otherwise stable teams work is going to be tough.
>
> We have dozens of syzbot reports involving all this stuff.

+1
Adding tests as we go along would immensely help too.
Unfortunately afaik there is no test infra to use for it.
Anyone has ideas?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions
  2019-04-28 15:06 ` [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions Eric Dumazet
  2019-04-28 17:53   ` Alexei Starovoitov
@ 2019-04-28 20:41   ` David Ahern
  1 sibling, 0 replies; 12+ messages in thread
From: David Ahern @ 2019-04-28 20:41 UTC (permalink / raw)
  To: Eric Dumazet, David Ahern, davem, netdev; +Cc: idosch

On 4/28/19 9:06 AM, Eric Dumazet wrote:
> 
> I would prefer we fix the existing bugs before moving the code around,
> otherwise stable teams work is going to be tough.
> 

fair enough. I'll drop the ipv6 patches and re-send just the v4 ones.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions
  2019-04-28 17:53   ` Alexei Starovoitov
@ 2019-04-28 20:54     ` David Ahern
  0 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2019-04-28 20:54 UTC (permalink / raw)
  To: Alexei Starovoitov, Eric Dumazet
  Cc: David Ahern, David S. Miller, Network Development, Ido Schimmel

On 4/28/19 11:53 AM, Alexei Starovoitov wrote:
> Adding tests as we go along would immensely help too.
> Unfortunately afaik there is no test infra to use for it.


pcpu routes are the predominant ones used for traffic. We have a number
of existing tests in selftests - fib_tests.sh - that exercise the code
paths touched by this set. That includes their entire life cycle - from
FIB entry create, to running traffic or 'ip route get' to cleanup.

For the exception code paths selftests has pmtu.sh which covers creation
of entries, updates and deletes.

Both of those cover IPv4 and IPv6.


I have a vrf test suite which covers functional tests under a lot of
permutations (ie., sysctls, APIs, remote traffic, local traffic,
negative tests, ...) as well as some runtime tests (e.g., active and
passive socket with device kills). All of these are run using namespaces
which are created and destroyed 100's of times during a run which covers
cleanup of resources (eg, netdev refcnts). While the tests were started
for VRF they cover non-VRF tests too as 1 namespace uses VRF and other
does not. I run this suite with a performance focused kernel config and
one with rculock debugging and kmemleak enabled. I am currently
re-working the script into something suitable for selftests. I might get
it out this dev cycle, if not the beginning of the next.

I am also reworking a lot of the above to handle nexthop objects in
addition to its own standalone set of functional tests.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-04-28 20:54 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-28  2:27 [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions David Ahern
2019-04-28  2:27 ` [PATCH v2 net-next 1/7] ipv4: Move cached routes to fib_nh_common David Ahern
2019-04-28  2:27 ` [PATCH v2 net-next 2/7] ipv4: Pass fib_nh_common to rt_cache_route David Ahern
2019-04-28  2:27 ` [PATCH v2 net-next 3/7] ipv4: Move exception bucket to nh_common David Ahern
2019-04-28  2:27 ` [PATCH v2 net-next 4/7] ipv6: Move pcpu cached routes to fib6_nh David Ahern
2019-04-28  2:27 ` [PATCH v2 net-next 5/7] ipv6: Refactor fib6_drop_pcpu_from David Ahern
2019-04-28  2:27 ` [PATCH v2 net-next 6/7] ipv6: Refactor exception functions David Ahern
2019-04-28  2:27 ` [PATCH v2 net-next 7/7] ipv6: Move exception bucket to fib6_nh David Ahern
2019-04-28 15:06 ` [PATCH v2 net-next 0/7] ipv4 ipv6: Move location of pcpu route cache and exceptions Eric Dumazet
2019-04-28 17:53   ` Alexei Starovoitov
2019-04-28 20:54     ` David Ahern
2019-04-28 20:41   ` David Ahern

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.