netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Fan Du <fan.du@windriver.com>
To: David Miller <davem@davemloft.net>, <nicolas.dichtel@6wind.com>,
	Steffen Klassert <steffen.klassert@secunet.com>,
	<kuznet@ms2.inr.ac.ru>, <yoshfuji@linux-ipv6.org>,
	<jmorris@namei.org>
Cc: <netdev@vger.kernel.org>
Subject: [RFC PATCH net-next] net: split rt_genid for ipv4 and ipv6
Date: Mon, 22 Jul 2013 13:43:30 +0800	[thread overview]
Message-ID: <51ECC682.1030409@windriver.com> (raw)
In-Reply-To: <20130719.023318.2201887768706444746.davem@davemloft.net>

Adding IPsec and other IPv4/IPv6 maintainers in the list.

On 2013年07月19日 17:33, David Miller wrote:
> From: Fan Du<fan.du@windriver.com>
> Date: Fri, 19 Jul 2013 15:50:20 +0800
>
>> The original commit is targeted for XFRM policy inserting/removing,
>> but it uses net genid shared by both IPv4 and IPv6, the side effect is
>> add/delete IPv4 address will invalidate IPv6 dst in all.
>>
>> We *do* need to bump genid when add/delete IPv6 address in scenario I
>> described here: http://www.spinics.net/lists/netdev/msg243398.html,
>> but definitely not from add/delete IPv4 address. Moreover test shows
>> that DCCP still push thousands of packets on wire after delete its
>> IPv6
>> address in the same scenario I describe before.
>>
>> The impulse to bump genid for IPv6 is much more stronger after this
>> commit even do it unintentionally.
>
> If you really think it will help, and it will still handle the IPSEC
> case, you can make a seperate genid for ipv4 and ipv6 but that might not
> work out so cleanly.
>

At least let me give it a try. Any comments would be truly welcome.


 From c79215d64038d62340d77c6ac070d8bb479b2f89 Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@windriver.com>
Date: Mon, 22 Jul 2013 11:31:56 +0800
Subject: [RFC PATCH net-next] net: split rt_genid for ipv4 and ipv6

Current net name space has only one genid for both IPv4 and IPv6, it has below drawbacks:

- Add/delete an IPv4 address will invalidate all IPv6 routing table entries.
- Insert/remove XFRM policy will also invalidate both IPv4/IPv6 routing table entries
   even when the policy is only applied for one address family.

Thus, this patch attempt to split one genid for two to cater for IPv4 and IPv6 separately
in a fine granularity.

Signed-off-by: Fan Du <fan.du@windriver.com>
---
  include/net/net_namespace.h |   33 ++++++++++++++++++++++++++++-----
  net/ipv4/route.c            |   16 ++++++++--------
  net/ipv6/af_inet6.c         |    1 +
  net/ipv6/route.c            |    4 ++--
  net/xfrm/xfrm_policy.c      |    8 +++++++-
  5 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 84e37b1..a08e312 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -119,8 +119,11 @@ struct net {
  	struct netns_ipvs	*ipvs;
  #endif
  	struct sock		*diag_nlsk;
-	atomic_t		rt_genid;
  	atomic_t		fnhe_genid;
+	atomic_t                rt_genid_ipv4;
+#if IS_ENABLED(CONFIG_IPV6)
+	atomic_t                rt_genid_ipv6;
+#endif
  };

  /*
@@ -333,14 +336,34 @@ static inline void unregister_net_sysctl_table(struct ctl_table_header *header)
  }
  #endif

-static inline int rt_genid(struct net *net)
+static inline int rt_genid_ipv4(struct net *net)
+{
+	return atomic_read(&net->rt_genid_ipv4);
+}
+
+static inline void rt_genid_bump_ipv4(struct net *net)
  {
-	return atomic_read(&net->rt_genid);
+	atomic_inc(&net->rt_genid_ipv4);
  }

-static inline void rt_genid_bump(struct net *net)
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int rt_genid_ipv6(struct net *net)
  {
-	atomic_inc(&net->rt_genid);
+	return atomic_read(&net->rt_genid_ipv6);
+}
+
+static inline void rt_genid_bump_ipv6(struct net *net)
+{
+	atomic_inc(&net->rt_genid_ipv6);
+}
+#endif
+
+static inline void rt_genid_bump_all(struct net *net)
+{
+	atomic_inc(&net->rt_genid_ipv4);
+#if IS_ENABLED(CONFIG_IPV6)
+	atomic_inc(&net->rt_genid_ipv6);
+#endif
  }

  static inline int fnhe_genid(struct net *net)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a9a54a2..df6095d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -435,12 +435,12 @@ static inline int ip_rt_proc_init(void)

  static inline bool rt_is_expired(const struct rtable *rth)
  {
-	return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
+	return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
  }

  void rt_cache_flush(struct net *net)
  {
-	rt_genid_bump(net);
+	rt_genid_bump_ipv4(net);
  }

  static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -1458,7 +1458,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
  #endif
  	rth->dst.output = ip_rt_bug;

-	rth->rt_genid	= rt_genid(dev_net(dev));
+	rth->rt_genid	= rt_genid_ipv4(dev_net(dev));
  	rth->rt_flags	= RTCF_MULTICAST;
  	rth->rt_type	= RTN_MULTICAST;
  	rth->rt_is_input= 1;
@@ -1589,7 +1589,7 @@ static int __mkroute_input(struct sk_buff *skb,
  		goto cleanup;
  	}

-	rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
+	rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
  	rth->rt_flags = flags;
  	rth->rt_type = res->type;
  	rth->rt_is_input = 1;
@@ -1760,7 +1760,7 @@ local_input:
  	rth->dst.tclassid = itag;
  #endif

-	rth->rt_genid = rt_genid(net);
+	rth->rt_genid = rt_genid_ipv4(net);
  	rth->rt_flags 	= flags|RTCF_LOCAL;
  	rth->rt_type	= res.type;
  	rth->rt_is_input = 1;
@@ -1945,7 +1945,7 @@ add:

  	rth->dst.output = ip_output;

-	rth->rt_genid = rt_genid(dev_net(dev_out));
+	rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
  	rth->rt_flags	= flags;
  	rth->rt_type	= type;
  	rth->rt_is_input = 0;
@@ -2227,7 +2227,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
  		rt->rt_iif = ort->rt_iif;
  		rt->rt_pmtu = ort->rt_pmtu;

-		rt->rt_genid = rt_genid(net);
+		rt->rt_genid = rt_genid_ipv4(net);
  		rt->rt_flags = ort->rt_flags;
  		rt->rt_type = ort->rt_type;
  		rt->rt_gateway = ort->rt_gateway;
@@ -2665,7 +2665,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {

  static __net_init int rt_genid_init(struct net *net)
  {
-	atomic_set(&net->rt_genid, 0);
+	atomic_set(&net->rt_genid_ipv4, 0);
  	atomic_set(&net->fnhe_genid, 0);
  	get_random_bytes(&net->ipv4.dev_addr_genid,
  			 sizeof(net->ipv4.dev_addr_genid));
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a5ac969..af6855c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -766,6 +766,7 @@ static int __net_init inet6_net_init(struct net *net)

  	net->ipv6.sysctl.bindv6only = 0;
  	net->ipv6.sysctl.icmpv6_time = 1*HZ;
+	atomic_set(&net->rt_genid_ipv6, 0);

  	err = ipv6_init_mibs(net);
  	if (err)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a8c891a..45ca9af 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -283,7 +283,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,

  		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
  		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
-		rt->rt6i_genid = rt_genid(net);
+		rt->rt6i_genid = rt_genid_ipv6(net);
  		INIT_LIST_HEAD(&rt->rt6i_siblings);
  		rt->rt6i_nsiblings = 0;
  	}
@@ -1062,7 +1062,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
  	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
  	 * into this function always.
  	 */
-	if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
+	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
  		return NULL;

  	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e52cab3..dbd36e0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -660,7 +660,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
  	xfrm_pol_hold(policy);
  	net->xfrm.policy_count[dir]++;
  	atomic_inc(&flow_cache_genid);
-	rt_genid_bump(net);
+
+	/* After previous checking, p->family can either be AF_INET or AF_INET6 */
+	if (policy->family == AF_INET)	
+		rt_genid_bump_ipv4(net);
+	else
+		rt_genid_bump_ipv6(net);
+
  	if (delpol) {
  		xfrm_policy_requeue(delpol, policy);
  		__xfrm_policy_unlink(delpol, dir);
-- 
1.7.9.5






-- 
浮沉随浪只记今朝笑

--fan

  reply	other threads:[~2013-07-22  5:43 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-07-18  3:22 [DISCUSSION] rt6i_genid Fan Du
2013-07-18  9:13 ` Nicolas Dichtel
2013-07-18  9:28   ` Fan Du
2013-07-18 15:12     ` Nicolas Dichtel
2013-07-19  0:01       ` Fan Du
2013-07-19  3:18         ` David Miller
2013-07-19  3:28           ` Fan Du
2013-07-19  3:31             ` David Miller
2013-07-19  7:50               ` Fan Du
2013-07-19  9:33                 ` David Miller
2013-07-22  5:43                   ` Fan Du [this message]
2013-07-22 10:53                     ` [RFC PATCH net-next] net: split rt_genid for ipv4 and ipv6 Steffen Klassert
2013-07-22 20:40                     ` Nicolas Dichtel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51ECC682.1030409@windriver.com \
    --to=fan.du@windriver.com \
    --cc=davem@davemloft.net \
    --cc=jmorris@namei.org \
    --cc=kuznet@ms2.inr.ac.ru \
    --cc=netdev@vger.kernel.org \
    --cc=nicolas.dichtel@6wind.com \
    --cc=steffen.klassert@secunet.com \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).