All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
To: David Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org, eric.dumazet@gmail.com,
	nicolas.dichtel@6wind.com
Subject: Re: [PATCH net-next] ipv6: implement rt_genid_bump_ipv6 with fn_sernum and remove rt6i_genid
Date: Thu, 11 Sep 2014 14:05:46 +0200	[thread overview]
Message-ID: <1410437146.18873.2.camel@localhost> (raw)
In-Reply-To: <20140910.130929.247064282043941043.davem@davemloft.net>

On Mi, 2014-09-10 at 13:09 -0700, David Miller wrote:
> From: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Date: Wed, 10 Sep 2014 11:31:28 +0200
> 
> > In case we need to force the sockets to relookup the routes we now
> > increase the fn_sernum on all fibnodes in the routing tree. This is a
> > costly operation but should only happen if we have major routing/policy
> > changes in the kernel (e.g. manual route adding/removal, xfrm policy
> > changes).
> 
> Core routers can update thousands of route updates per second, and they
> do this via what you refer to as "manual route adding/removal".
> 
> I don't think we want to put such a scalability problem into the tree.
> 
> There has to be a lightweight way to address this.

An alternative approach without traversing the routing table, but each
newly inserted route (even only cached ones) might bump all other routes
out of the per-socket caches:

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 9bcb220..a7e45b9 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -119,8 +119,6 @@ struct rt6_info {
 	struct inet6_dev		*rt6i_idev;
 	unsigned long			_rt6i_peer;
 
-	u32				rt6i_genid;
-
 	/* more non-fragment space at head required */
 	unsigned short			rt6i_nfheader_len;
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 361d260..428fdcb 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -358,18 +358,28 @@ static inline int rt_genid_ipv6(struct net *net)
 	return atomic_read(&net->ipv6.rt_genid);
 }
 
-static inline void rt_genid_bump_ipv6(struct net *net)
+static inline int rt_genid_bump_ipv6(struct net *net)
 {
-	atomic_inc(&net->ipv6.rt_genid);
+	int new, old;
+
+	do {
+		old = atomic_read(&net->ipv6.rt_genid);
+		new = old + 1;
+		if (new <= 0)
+			new = 1;
+	} while (atomic_cmpxchg(&net->ipv6.rt_genid, old, new) != old);
+	return new;
+
 }
 #else
 static inline int rt_genid_ipv6(struct net *net)
 {
-	return 0;
+	return 1;
 }
 
-static inline void rt_genid_bump_ipv6(struct net *net)
+static inline int rt_genid_bump_ipv6(struct net *net)
 {
+	return 1;
 }
 #endif
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 76b7f5e..4a2f130 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -84,7 +84,10 @@ static int fib6_walk_continue(struct fib6_walker_t *w);
  *	result of redirects, path MTU changes, etc.
  */
 
-static __u32 rt_sernum;
+static int fib6_new_sernum(struct net *net)
+{
+	return rt_genid_bump_ipv6(net);
+}
 
 static void fib6_gc_timer_cb(unsigned long arg);
 
@@ -104,13 +107,6 @@ static inline void fib6_walker_unlink(struct fib6_walker_t *w)
 	list_del(&w->lh);
 	write_unlock_bh(&fib6_walker_lock);
 }
-static __inline__ u32 fib6_new_sernum(void)
-{
-	u32 n = ++rt_sernum;
-	if ((__s32)n <= 0)
-		rt_sernum = n = 1;
-	return n;
-}
 
 /*
  *	Auxiliary address test functions for the radix tree.
@@ -421,16 +417,15 @@ out:
  */
 
 static struct fib6_node *fib6_add_1(struct fib6_node *root,
-				     struct in6_addr *addr, int plen,
-				     int offset, int allow_create,
-				     int replace_required)
+				    struct in6_addr *addr, int plen,
+				    int offset, int allow_create,
+				    int replace_required, int sernum)
 {
 	struct fib6_node *fn, *in, *ln;
 	struct fib6_node *pn = NULL;
 	struct rt6key *key;
 	int	bit;
 	__be32	dir = 0;
-	__u32	sernum = fib6_new_sernum();
 
 	RT6_TRACE("fib6_add_1\n");
 
@@ -844,6 +839,7 @@ void fib6_force_start_gc(struct net *net)
 int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
 	     struct nlattr *mx, int mx_len)
 {
+	struct net *net = dev_net(rt->dst.dev);
 	struct fib6_node *fn, *pn = NULL;
 	int err = -ENOMEM;
 	int allow_create = 1;
@@ -860,7 +856,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
 
 	fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
 			offsetof(struct rt6_info, rt6i_dst), allow_create,
-			replace_required);
+			replace_required, fib6_new_sernum(net));
 	if (IS_ERR(fn)) {
 		err = PTR_ERR(fn);
 		fn = NULL;
@@ -894,14 +890,15 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
 			sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
 			atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
 			sfn->fn_flags = RTN_ROOT;
-			sfn->fn_sernum = fib6_new_sernum();
+			sfn->fn_sernum = fib6_new_sernum(net);
 
 			/* Now add the first leaf node to new subtree */
 
 			sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
 					rt->rt6i_src.plen,
 					offsetof(struct rt6_info, rt6i_src),
-					allow_create, replace_required);
+					allow_create, replace_required,
+					fib6_new_sernum(net));
 
 			if (IS_ERR(sn)) {
 				/* If it is failed, discard just allocated
@@ -920,7 +917,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
 			sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
 					rt->rt6i_src.plen,
 					offsetof(struct rt6_info, rt6i_src),
-					allow_create, replace_required);
+					allow_create, replace_required,
+					fib6_new_sernum(net));
 
 			if (IS_ERR(sn)) {
 				err = PTR_ERR(sn);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f74b041..54b7d81 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -314,7 +314,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 
 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
-		rt->rt6i_genid = rt_genid_ipv6(net);
 		INIT_LIST_HEAD(&rt->rt6i_siblings);
 	}
 	return rt;
@@ -1096,10 +1095,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
 	 * into this function always.
 	 */
-	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
-		return NULL;
-
-	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+	if (!rt->rt6i_node || rt_genid_ipv6(dev_net(rt->dst.dev)) != cookie)
 		return NULL;
 
 	if (rt6_check_expired(rt))

  parent reply	other threads:[~2014-09-11 12:05 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-14 18:19 Performance regression on kernels 3.10 and newer Alexander Duyck
2014-08-14 18:46 ` Eric Dumazet
2014-08-14 19:50   ` Eric Dumazet
2014-08-14 19:59   ` Rick Jones
2014-08-14 20:31     ` Alexander Duyck
2014-08-14 20:51       ` Eric Dumazet
2014-08-14 20:46     ` Eric Dumazet
2014-08-14 23:16   ` Alexander Duyck
2014-08-14 23:20     ` David Miller
2014-08-14 23:25       ` Tom Herbert
2014-08-21 23:24         ` David Miller
2014-09-06 14:45           ` Eric Dumazet
2014-09-06 15:27             ` Eric Dumazet
2014-09-06 15:46               ` Eric Dumazet
2014-09-06 16:38                 ` Eric Dumazet
2014-09-06 18:21                   ` Eric Dumazet
2014-09-07 19:05                     ` [PATCH net] ipv6: refresh rt6i_genid in ip6_pol_route() Eric Dumazet
2014-09-07 22:54                       ` David Miller
2014-09-08  4:18                         ` Eric Dumazet
2014-09-08  4:27                           ` David Miller
2014-09-08  4:43                             ` Eric Dumazet
2014-09-08  4:59                               ` David Miller
2014-09-08  5:07                                 ` Eric Dumazet
2014-09-08  8:11                                   ` Nicolas Dichtel
2014-09-08 10:28                                     ` Eric Dumazet
2014-09-08 12:16                                       ` Nicolas Dichtel
2014-09-08 18:48                                   ` Vlad Yasevich
2014-09-09 12:58                                   ` Hannes Frederic Sowa
2014-09-10  9:31                                     ` [PATCH net-next] ipv6: implement rt_genid_bump_ipv6 with fn_sernum and remove rt6i_genid Hannes Frederic Sowa
2014-09-10 13:26                                       ` Vlad Yasevich
2014-09-10 13:42                                         ` Hannes Frederic Sowa
2014-09-10 20:09                                       ` David Miller
2014-09-11  8:30                                         ` Hannes Frederic Sowa
2014-09-11 12:22                                           ` Vlad Yasevich
2014-09-11 12:40                                             ` Hannes Frederic Sowa
2014-09-11 12:05                                         ` Hannes Frederic Sowa [this message]
2014-09-11 14:19                                           ` Vlad Yasevich
2014-09-11 14:32                                             ` Hannes Frederic Sowa
2014-09-11 14:44                                               ` Vlad Yasevich
2014-09-11 14:47                                                 ` Hannes Frederic Sowa
2014-09-08 15:06               ` [PATCH v2 net-next] tcp: remove dst refcount false sharing for prequeue mode Eric Dumazet
2014-09-08 21:21                 ` David Miller
2014-09-08 21:30                   ` Eric Dumazet
2014-09-08 22:41                     ` David Miller
2014-09-09 23:56                     ` David Miller
2014-08-15 17:15       ` Performance regression on kernels 3.10 and newer Alexander Duyck
2014-08-15 17:59         ` Eric Dumazet
2014-08-15 18:49         ` Tom Herbert
2014-08-15 19:10           ` Alexander Duyck
2014-08-15 22:16             ` Tom Herbert
2014-08-15 23:23               ` Alexander Duyck
2014-08-18  9:03                 ` David Laight
2014-08-18 15:22                   ` Alexander Duyck
2014-08-18 15:29                     ` Rick Jones
2014-08-21 23:51         ` David Miller
2014-08-14 23:48     ` Eric Dumazet
2014-08-15  0:33       ` Rick Jones

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1410437146.18873.2.camel@localhost \
    --to=hannes@stressinduktion.org \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=nicolas.dichtel@6wind.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.