All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] l3mdev: Improve use with main table
@ 2017-04-10 14:21 Robert Shearman
  2017-04-10 14:22 ` [PATCH net-next 1/3] ipv6: Fix route handling when using l3mdev set to " Robert Shearman
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Robert Shearman @ 2017-04-10 14:21 UTC (permalink / raw)
  To: davem; +Cc: netdev, David Ahern, Robert Shearman

Attempting to create a TCP socket not bound to a VRF device when a TCP
socket bound to a VRF device with the same port exists (and vice
versa) fails with EADDRINUSE. This limits the ability to use programs
in selected mixed VRF/non-VRF contexts.

This patch series solves the problem by extending the l3mdev be aware
of the special semantics of the main table and fixing issues arising
from the split local/main tables. A VRF master device created linking
to the main table and used for these programs in the same way as those
created for VRF tables can.

Robert Shearman (3):
  ipv6: Fix route handling when using l3mdev set to main table
  ipv4: Fix route handling when using l3mdev set to main table
  l3mdev: Fix lookup in local table when using main table

 net/ipv4/af_inet.c      |  4 +++-
 net/ipv4/fib_frontend.c | 14 +++++++++-----
 net/ipv4/raw.c          |  5 ++++-
 net/ipv6/addrconf.c     | 12 +++++++++---
 net/ipv6/route.c        | 23 ++++++++++++++++++-----
 net/l3mdev/l3mdev.c     | 26 ++++++++++++++++++++------
 6 files changed, 63 insertions(+), 21 deletions(-)

-- 
2.1.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH net-next 1/3] ipv6: Fix route handling when using l3mdev set to main table
  2017-04-10 14:21 [PATCH net-next 0/3] l3mdev: Improve use with main table Robert Shearman
@ 2017-04-10 14:22 ` Robert Shearman
  2017-04-10 14:22 ` [PATCH net-next 2/3] ipv4: " Robert Shearman
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 12+ messages in thread
From: Robert Shearman @ 2017-04-10 14:22 UTC (permalink / raw)
  To: davem; +Cc: netdev, David Ahern, Robert Shearman

If an l3mdev is set to use the main table then the use of the local
table is overridden. This means that when split local/main table is in
effect then local routes aren't added to the local table and so don't
respect the order of ip rules.

Fix this by assuming that no if no l3mdev is present then defaulting
to RT6_TABLE_MAIN and then subsequently doing a translation from
RT6_TABLE_MAIN to RT6_TABLE_LOCAL.

Do the same translations for RT6_TABLE_INFO, RT6_TABLE_DFLT and
RT6_TABLE_PREFIX even though they are just defined to RT6_TABLE_MAIN
in case someone decides to change that in the future.

Signed-off-by: Robert Shearman <rshearma@brocade.com>
---
 net/ipv6/addrconf.c | 12 +++++++++---
 net/ipv6/route.c    | 23 ++++++++++++++++++-----
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 67ec87ea5fb6..937c35581a28 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2244,7 +2244,6 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		      unsigned long expires, u32 flags)
 {
 	struct fib6_config cfg = {
-		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
 		.fc_metric = IP6_RT_PRIO_ADDRCONF,
 		.fc_ifindex = dev->ifindex,
 		.fc_expires = expires,
@@ -2254,6 +2253,9 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		.fc_protocol = RTPROT_KERNEL,
 	};
 
+	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_MAIN;
+	if (cfg.fc_table == RT6_TABLE_MAIN)
+		cfg.fc_table = RT6_TABLE_PREFIX;
 	cfg.fc_dst = *pfx;
 
 	/* Prevent useless cloning on PtP SIT.
@@ -2277,8 +2279,10 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 	struct fib6_node *fn;
 	struct rt6_info *rt = NULL;
 	struct fib6_table *table;
-	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
+	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_MAIN;
 
+	if (tb_id == RT6_TABLE_MAIN)
+		tb_id = RT6_TABLE_PREFIX;
 	table = fib6_get_table(dev_net(dev), tb_id);
 	if (!table)
 		return NULL;
@@ -2310,7 +2314,6 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 static void addrconf_add_mroute(struct net_device *dev)
 {
 	struct fib6_config cfg = {
-		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL,
 		.fc_metric = IP6_RT_PRIO_ADDRCONF,
 		.fc_ifindex = dev->ifindex,
 		.fc_dst_len = 8,
@@ -2318,6 +2321,9 @@ static void addrconf_add_mroute(struct net_device *dev)
 		.fc_nlinfo.nl_net = dev_net(dev),
 	};
 
+	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_MAIN;
+	if (cfg.fc_table == RT6_TABLE_MAIN)
+		cfg.fc_table = RT6_TABLE_LOCAL;
 	ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
 
 	ip6_route_add(&cfg);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9db1418993f2..490c74ed6a78 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2424,12 +2424,15 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 					   const struct in6_addr *gwaddr,
 					   struct net_device *dev)
 {
-	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
+	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_MAIN;
 	int ifindex = dev->ifindex;
 	struct fib6_node *fn;
 	struct rt6_info *rt = NULL;
 	struct fib6_table *table;
 
+	if (tb_id == RT6_TABLE_MAIN)
+		tb_id = RT6_TABLE_INFO;
+
 	table = fib6_get_table(net, tb_id);
 	if (!table)
 		return NULL;
@@ -2471,7 +2474,9 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 		.fc_nlinfo.nl_net = net,
 	};
 
-	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
+	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_MAIN;
+	if (cfg.fc_table == RT6_TABLE_MAIN)
+		cfg.fc_table = RT6_TABLE_INFO;
 	cfg.fc_dst = *prefix;
 	cfg.fc_gateway = *gwaddr;
 
@@ -2487,10 +2492,13 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 
 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
 {
-	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
+	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_MAIN;
 	struct rt6_info *rt;
 	struct fib6_table *table;
 
+	if (tb_id == RT6_TABLE_MAIN)
+		tb_id = RT6_TABLE_DFLT;
+
 	table = fib6_get_table(dev_net(dev), tb_id);
 	if (!table)
 		return NULL;
@@ -2513,7 +2521,6 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 				     unsigned int pref)
 {
 	struct fib6_config cfg = {
-		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
 		.fc_metric	= IP6_RT_PRIO_USER,
 		.fc_ifindex	= dev->ifindex,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -2523,6 +2530,10 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 		.fc_nlinfo.nl_net = dev_net(dev),
 	};
 
+	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_MAIN;
+	if (cfg.fc_table == RT6_TABLE_MAIN)
+		cfg.fc_table = RT6_TABLE_DFLT;
+
 	cfg.fc_gateway = *gwaddr;
 
 	if (!ip6_route_add(&cfg)) {
@@ -2723,7 +2734,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->rt6i_gateway  = *addr;
 	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
-	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
+	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_MAIN;
+	if (tb_id == RT6_TABLE_MAIN)
+		tb_id = RT6_TABLE_LOCAL;
 	rt->rt6i_table = fib6_get_table(net, tb_id);
 	rt->dst.flags |= DST_NOCACHE;
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 2/3] ipv4: Fix route handling when using l3mdev set to main table
  2017-04-10 14:21 [PATCH net-next 0/3] l3mdev: Improve use with main table Robert Shearman
  2017-04-10 14:22 ` [PATCH net-next 1/3] ipv6: Fix route handling when using l3mdev set to " Robert Shearman
@ 2017-04-10 14:22 ` Robert Shearman
  2017-04-10 14:22 ` [PATCH net-next 3/3] l3mdev: Fix lookup in local table when using " Robert Shearman
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 12+ messages in thread
From: Robert Shearman @ 2017-04-10 14:22 UTC (permalink / raw)
  To: davem; +Cc: netdev, David Ahern, Robert Shearman

If an l3mdev is set to use the main table then the use of the local
table is overridden. This means that when split local/main table is in
effect then local routes aren't added to the local table and so don't
respect the order of ip rules.

Fix this by assuming that no if no l3mdev is present then defaulting
to RT_TABLE_MAIN and then subsequently doing a translation from
RT_TABLE_MAIN to RT_TABLE_LOCAL.

Signed-off-by: Robert Shearman <rshearma@brocade.com>
---
 net/ipv4/af_inet.c      |  4 +++-
 net/ipv4/fib_frontend.c | 14 +++++++++-----
 net/ipv4/raw.c          |  5 ++++-
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d1a11707a126..83d54fab03f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -436,7 +436,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct net *net = sock_net(sk);
 	unsigned short snum;
 	int chk_addr_ret;
-	u32 tb_id = RT_TABLE_LOCAL;
+	u32 tb_id = RT_TABLE_MAIN;
 	int err;
 
 	/* If the socket has its own bind function then use it. (RAW) */
@@ -459,6 +459,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	}
 
 	tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
+	if (tb_id == RT_TABLE_MAIN)
+		tb_id = RT_TABLE_LOCAL;
 	chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
 
 	/* Not specified by any standard per-se, however it breaks too
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8f2133ffc2ff..1782c35dbac0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -248,8 +248,10 @@ EXPORT_SYMBOL(inet_addr_type);
 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 				__be32 addr)
 {
-	u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+	u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
 
+	if (rt_table == RT_TABLE_MAIN)
+		rt_table = RT_TABLE_LOCAL;
 	return __inet_dev_addr_type(net, dev, addr, rt_table);
 }
 EXPORT_SYMBOL(inet_dev_addr_type);
@@ -261,8 +263,10 @@ unsigned int inet_addr_type_dev_table(struct net *net,
 				      const struct net_device *dev,
 				      __be32 addr)
 {
-	u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+	u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
 
+	if (rt_table == RT_TABLE_MAIN)
+		rt_table = RT_TABLE_LOCAL;
 	return __inet_dev_addr_type(net, NULL, addr, rt_table);
 }
 EXPORT_SYMBOL(inet_addr_type_dev_table);
@@ -805,7 +809,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
 {
 	struct net *net = dev_net(ifa->ifa_dev->dev);
-	u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
+	u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev) ? : RT_TABLE_MAIN;
 	struct fib_table *tb;
 	struct fib_config cfg = {
 		.fc_protocol = RTPROT_KERNEL,
@@ -820,8 +824,8 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
 		},
 	};
 
-	if (!tb_id)
-		tb_id = (type == RTN_UNICAST) ? RT_TABLE_MAIN : RT_TABLE_LOCAL;
+	if (tb_id == RT_TABLE_MAIN && type != RTN_UNICAST)
+		tb_id = RT_TABLE_LOCAL;
 
 	tb = fib_new_table(net, tb_id);
 	if (!tb)
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8119e1f66e03..2dd7022681e6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -699,7 +699,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
-	u32 tb_id = RT_TABLE_LOCAL;
+	u32 tb_id = RT_TABLE_MAIN;
 	int ret = -EINVAL;
 	int chk_addr_ret;
 
@@ -710,6 +710,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		tb_id = l3mdev_fib_table_by_index(sock_net(sk),
 						 sk->sk_bound_dev_if) ? : tb_id;
 
+	if (tb_id == RT_TABLE_MAIN)
+		tb_id = RT_TABLE_LOCAL;
+
 	chk_addr_ret = inet_addr_type_table(sock_net(sk), addr->sin_addr.s_addr,
 					    tb_id);
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 3/3] l3mdev: Fix lookup in local table when using main table
  2017-04-10 14:21 [PATCH net-next 0/3] l3mdev: Improve use with main table Robert Shearman
  2017-04-10 14:22 ` [PATCH net-next 1/3] ipv6: Fix route handling when using l3mdev set to " Robert Shearman
  2017-04-10 14:22 ` [PATCH net-next 2/3] ipv4: " Robert Shearman
@ 2017-04-10 14:22 ` Robert Shearman
  2017-04-12 16:51 ` [PATCH net-next 0/3] l3mdev: Improve use with " David Ahern
  2017-04-20 22:36 ` David Ahern
  4 siblings, 0 replies; 12+ messages in thread
From: Robert Shearman @ 2017-04-10 14:22 UTC (permalink / raw)
  To: davem; +Cc: netdev, David Ahern, Robert Shearman

If an l3mdev is set to use the main table then the l3mdev rules will
return this. This means that no lookup will be done in the local table
if split local/main table is in effect and the local table lookup rule
has been reordered to after the l3mdev rule. Related to this is that
the order of the rule for looking up in the main table isn't
respected.

Fix these two aspects by not returning a match if the l3mdev's table
id is the main table. Processing will then proceed normally to the
default rules and do the appropriate lookups.

Signed-off-by: Robert Shearman <rshearma@brocade.com>
---
 net/l3mdev/l3mdev.c | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 8da86ceca33d..5147959243c5 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -12,6 +12,7 @@
 #include <linux/netdevice.h>
 #include <net/fib_rules.h>
 #include <net/l3mdev.h>
+#include <net/ip6_fib.h>
 
 /**
  *	l3mdev_master_ifindex - get index of L3 master device
@@ -142,23 +143,36 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
 {
 	struct net_device *dev;
 	int rc = 0;
+	u32 tb_id;
 
 	rcu_read_lock();
 
 	dev = dev_get_by_index_rcu(net, fl->flowi_oif);
 	if (dev && netif_is_l3_master(dev) &&
 	    dev->l3mdev_ops->l3mdev_fib_table) {
-		arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
-		rc = 1;
-		goto out;
+		tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
+		/* This requires the main table id to be consistent
+		 * between IPv4 and IPv6.
+		 */
+		BUILD_BUG_ON(RT_TABLE_MAIN != RT6_TABLE_MAIN);
+		/* main table is handled by default rules */
+		if (tb_id != RT_TABLE_MAIN) {
+			arg->table = tb_id;
+			rc = 1;
+			goto out;
+		}
 	}
 
 	dev = dev_get_by_index_rcu(net, fl->flowi_iif);
 	if (dev && netif_is_l3_master(dev) &&
 	    dev->l3mdev_ops->l3mdev_fib_table) {
-		arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev);
-		rc = 1;
-		goto out;
+		tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
+		/* main table is handled by default rules */
+		if (tb_id != RT_TABLE_MAIN) {
+			arg->table = tb_id;
+			rc = 1;
+			goto out;
+		}
 	}
 
 out:
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 0/3] l3mdev: Improve use with main table
  2017-04-10 14:21 [PATCH net-next 0/3] l3mdev: Improve use with main table Robert Shearman
                   ` (2 preceding siblings ...)
  2017-04-10 14:22 ` [PATCH net-next 3/3] l3mdev: Fix lookup in local table when using " Robert Shearman
@ 2017-04-12 16:51 ` David Ahern
  2017-04-13 12:48   ` Robert Shearman
  2017-04-20 22:36 ` David Ahern
  4 siblings, 1 reply; 12+ messages in thread
From: David Ahern @ 2017-04-12 16:51 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev

On 4/10/17 8:21 AM, Robert Shearman wrote:
> Attempting to create a TCP socket not bound to a VRF device when a TCP
> socket bound to a VRF device with the same port exists (and vice
> versa) fails with EADDRINUSE. This limits the ability to use programs
> in selected mixed VRF/non-VRF contexts.
> 
> This patch series solves the problem by extending the l3mdev be aware
> of the special semantics of the main table and fixing issues arising
> from the split local/main tables. A VRF master device created linking
> to the main table and used for these programs in the same way as those
> created for VRF tables can.
> 
> Robert Shearman (3):
>   ipv6: Fix route handling when using l3mdev set to main table
>   ipv4: Fix route handling when using l3mdev set to main table
>   l3mdev: Fix lookup in local table when using main table
> 
>  net/ipv4/af_inet.c      |  4 +++-
>  net/ipv4/fib_frontend.c | 14 +++++++++-----
>  net/ipv4/raw.c          |  5 ++++-
>  net/ipv6/addrconf.c     | 12 +++++++++---
>  net/ipv6/route.c        | 23 ++++++++++++++++++-----
>  net/l3mdev/l3mdev.c     | 26 ++++++++++++++++++++------
>  6 files changed, 63 insertions(+), 21 deletions(-)
> 

the patches look ok to me, but in testing them I see I refcnt problem.
simple reproducer:

ip li add red type vrf table 254
ip link set dev eth1 vrf red
ip addr add 127.0.0.1/8 dev red
ip link set dev eth1 up
ip li set red up
ping -c1 -w1 -I red 127.0.0.1
ip li del red
--> hangs with 'uregister_netdevice: waiting for red to become free.'

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 0/3] l3mdev: Improve use with main table
  2017-04-12 16:51 ` [PATCH net-next 0/3] l3mdev: Improve use with " David Ahern
@ 2017-04-13 12:48   ` Robert Shearman
  2017-04-13 14:36     ` David Ahern
  0 siblings, 1 reply; 12+ messages in thread
From: Robert Shearman @ 2017-04-13 12:48 UTC (permalink / raw)
  To: David Ahern, davem; +Cc: netdev

On 12/04/17 17:51, David Ahern wrote:
> On 4/10/17 8:21 AM, Robert Shearman wrote:
>> Attempting to create a TCP socket not bound to a VRF device when a TCP
>> socket bound to a VRF device with the same port exists (and vice
>> versa) fails with EADDRINUSE. This limits the ability to use programs
>> in selected mixed VRF/non-VRF contexts.
>>
>> This patch series solves the problem by extending the l3mdev be aware
>> of the special semantics of the main table and fixing issues arising
>> from the split local/main tables. A VRF master device created linking
>> to the main table and used for these programs in the same way as those
>> created for VRF tables can.
>>
>> Robert Shearman (3):
>>   ipv6: Fix route handling when using l3mdev set to main table
>>   ipv4: Fix route handling when using l3mdev set to main table
>>   l3mdev: Fix lookup in local table when using main table
>>
>>  net/ipv4/af_inet.c      |  4 +++-
>>  net/ipv4/fib_frontend.c | 14 +++++++++-----
>>  net/ipv4/raw.c          |  5 ++++-
>>  net/ipv6/addrconf.c     | 12 +++++++++---
>>  net/ipv6/route.c        | 23 ++++++++++++++++++-----
>>  net/l3mdev/l3mdev.c     | 26 ++++++++++++++++++++------
>>  6 files changed, 63 insertions(+), 21 deletions(-)
>>
>
> the patches look ok to me, but in testing them I see I refcnt problem.
> simple reproducer:
>
> ip li add red type vrf table 254
> ip link set dev eth1 vrf red
> ip addr add 127.0.0.1/8 dev red
> ip link set dev eth1 up
> ip li set red up
> ping -c1 -w1 -I red 127.0.0.1
> ip li del red
> --> hangs with 'uregister_netdevice: waiting for red to become free.'

Right, I've reproduced the same and it occurs even without using the 
main table and I believe it has been introduced within the last week.

I'll bisect to find out the cause.

Thanks for reviewing and testing,
Rob

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 0/3] l3mdev: Improve use with main table
  2017-04-13 12:48   ` Robert Shearman
@ 2017-04-13 14:36     ` David Ahern
  2017-04-20 13:01       ` Robert Shearman
  0 siblings, 1 reply; 12+ messages in thread
From: David Ahern @ 2017-04-13 14:36 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev

On 4/13/17 6:48 AM, Robert Shearman wrote:
>> the patches look ok to me, but in testing them I see I refcnt problem.
>> simple reproducer:
>>
>> ip li add red type vrf table 254
>> ip link set dev eth1 vrf red
>> ip addr add 127.0.0.1/8 dev red
>> ip link set dev eth1 up
>> ip li set red up
>> ping -c1 -w1 -I red 127.0.0.1
>> ip li del red
>> --> hangs with 'uregister_netdevice: waiting for red to become free.'
> 
> Right, I've reproduced the same and it occurs even without using the
> main table and I believe it has been introduced within the last week.

The cached dst on sockets is one known place that causes a hang on a
delete. Basically the delete stalls until the sockets are closed. I have
a patch for sk_rx_dst which is the one I chased down last week.
sk_dst_cache is another possibility.

Neither of those should be at play with the above example because the
ping command runs and then exits.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 0/3] l3mdev: Improve use with main table
  2017-04-13 14:36     ` David Ahern
@ 2017-04-20 13:01       ` Robert Shearman
  2017-04-20 14:25         ` David Ahern
  0 siblings, 1 reply; 12+ messages in thread
From: Robert Shearman @ 2017-04-20 13:01 UTC (permalink / raw)
  To: David Ahern, davem; +Cc: netdev

On 13/04/17 15:36, David Ahern wrote:
> On 4/13/17 6:48 AM, Robert Shearman wrote:
>>> the patches look ok to me, but in testing them I see I refcnt problem.
>>> simple reproducer:
>>>
>>> ip li add red type vrf table 254
>>> ip link set dev eth1 vrf red
>>> ip addr add 127.0.0.1/8 dev red
>>> ip link set dev eth1 up
>>> ip li set red up
>>> ping -c1 -w1 -I red 127.0.0.1
>>> ip li del red
>>> --> hangs with 'uregister_netdevice: waiting for red to become free.'
>>
>> Right, I've reproduced the same and it occurs even without using the
>> main table and I believe it has been introduced within the last week.
>
> The cached dst on sockets is one known place that causes a hang on a
> delete. Basically the delete stalls until the sockets are closed. I have
> a patch for sk_rx_dst which is the one I chased down last week.
> sk_dst_cache is another possibility.
>
> Neither of those should be at play with the above example because the
> ping command runs and then exits.

Thanks for the pointers. My earlier assessment that this was something 
recent turned out to be wrong. I've sent a patch against net that fixes 
the issue.

Thanks,
Rob

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 0/3] l3mdev: Improve use with main table
  2017-04-20 13:01       ` Robert Shearman
@ 2017-04-20 14:25         ` David Ahern
  0 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2017-04-20 14:25 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev

On 4/20/17 7:01 AM, Robert Shearman wrote:
>> The cached dst on sockets is one known place that causes a hang on a
>> delete. Basically the delete stalls until the sockets are closed. I have
>> a patch for sk_rx_dst which is the one I chased down last week.

I got to the bottom on the sk_rx_dst caching -- it is only a problem on
older kernels (e.g., our 4.1 kernels) and only with multicast udp
sockets. The early demux code in 4.1 is matching a listen socket when it
should not. The early demux code for mcast had some changes unrelated to
false matching that fixed the problem.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 0/3] l3mdev: Improve use with main table
  2017-04-10 14:21 [PATCH net-next 0/3] l3mdev: Improve use with main table Robert Shearman
                   ` (3 preceding siblings ...)
  2017-04-12 16:51 ` [PATCH net-next 0/3] l3mdev: Improve use with " David Ahern
@ 2017-04-20 22:36 ` David Ahern
  2017-04-21 17:44   ` Robert Shearman
  4 siblings, 1 reply; 12+ messages in thread
From: David Ahern @ 2017-04-20 22:36 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev

On 4/10/17 8:21 AM, Robert Shearman wrote:
> Attempting to create a TCP socket not bound to a VRF device when a TCP
> socket bound to a VRF device with the same port exists (and vice
> versa) fails with EADDRINUSE. This limits the ability to use programs
> in selected mixed VRF/non-VRF contexts.
> 
> This patch series solves the problem by extending the l3mdev be aware
> of the special semantics of the main table and fixing issues arising
> from the split local/main tables. A VRF master device created linking
> to the main table and used for these programs in the same way as those
> created for VRF tables can.
> 
> Robert Shearman (3):
>   ipv6: Fix route handling when using l3mdev set to main table
>   ipv4: Fix route handling when using l3mdev set to main table
>   l3mdev: Fix lookup in local table when using main table
> 
>  net/ipv4/af_inet.c      |  4 +++-
>  net/ipv4/fib_frontend.c | 14 +++++++++-----
>  net/ipv4/raw.c          |  5 ++++-
>  net/ipv6/addrconf.c     | 12 +++++++++---
>  net/ipv6/route.c        | 23 ++++++++++++++++++-----
>  net/l3mdev/l3mdev.c     | 26 ++++++++++++++++++++------
>  6 files changed, 63 insertions(+), 21 deletions(-)
> 

With the change I mentioned earlier to fix the refcnt issue on top of
this patch set I see a number of failures:
- local IPv4 with 127.0.0.1 address - ping, tcp, udp tests fail
- all of the IPv4 multicast tests fail
- IPv6 linklocal and mcast addresses generally fail
- IPv6 global address on vrf device

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 0/3] l3mdev: Improve use with main table
  2017-04-20 22:36 ` David Ahern
@ 2017-04-21 17:44   ` Robert Shearman
  2017-04-21 20:47     ` David Ahern
  0 siblings, 1 reply; 12+ messages in thread
From: Robert Shearman @ 2017-04-21 17:44 UTC (permalink / raw)
  To: David Ahern, davem; +Cc: netdev

On 20/04/17 23:36, David Ahern wrote:
> On 4/10/17 8:21 AM, Robert Shearman wrote:
>> Attempting to create a TCP socket not bound to a VRF device when a TCP
>> socket bound to a VRF device with the same port exists (and vice
>> versa) fails with EADDRINUSE. This limits the ability to use programs
>> in selected mixed VRF/non-VRF contexts.
>>
>> This patch series solves the problem by extending the l3mdev be aware
>> of the special semantics of the main table and fixing issues arising
>> from the split local/main tables. A VRF master device created linking
>> to the main table and used for these programs in the same way as those
>> created for VRF tables can.
>>
>> Robert Shearman (3):
>>   ipv6: Fix route handling when using l3mdev set to main table
>>   ipv4: Fix route handling when using l3mdev set to main table
>>   l3mdev: Fix lookup in local table when using main table
>>
>>  net/ipv4/af_inet.c      |  4 +++-
>>  net/ipv4/fib_frontend.c | 14 +++++++++-----
>>  net/ipv4/raw.c          |  5 ++++-
>>  net/ipv6/addrconf.c     | 12 +++++++++---
>>  net/ipv6/route.c        | 23 ++++++++++++++++++-----
>>  net/l3mdev/l3mdev.c     | 26 ++++++++++++++++++++------
>>  6 files changed, 63 insertions(+), 21 deletions(-)
>>
>
> With the change I mentioned earlier to fix the refcnt issue on top of
> this patch set I see a number of failures:
> - local IPv4 with 127.0.0.1 address - ping, tcp, udp tests fail
> - all of the IPv4 multicast tests fail
> - IPv6 linklocal and mcast addresses generally fail
> - IPv6 global address on vrf device

Can you send me some more details of your testing?

These work for me:

$ ping -c1 -I vrf-default 127.0.0.1
PING 127.0.0.1 (127.0.0.1) from 127.0.0.1 vrf-default: 56(84) bytes of data.
64 bytes from 127.0.0.1: icmp_seq=1 ttl=64 time=0.141 ms

--- 127.0.0.1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.141/0.141/0.141/0.000 ms
$ ping6 -c1 -I vrf-default 2001::1
PING 2001::1(2001::1) from 2001::1 vrf-default: 56 data bytes
64 bytes from 2001::1: icmp_seq=1 ttl=64 time=0.069 ms

--- 2001::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.069/0.069/0.069/0.000 ms
$ sudo ip vrf exec vrf-default nc -l -p 4013
TEST
$ sudo ip vrf exec vrf-default nc -l -u -p 4013
TEST
^C

(with a neighbouring host using nc to send the TEST string for the udp 
and tcp cases)

Thanks,
Rob

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 0/3] l3mdev: Improve use with main table
  2017-04-21 17:44   ` Robert Shearman
@ 2017-04-21 20:47     ` David Ahern
  0 siblings, 0 replies; 12+ messages in thread
From: David Ahern @ 2017-04-21 20:47 UTC (permalink / raw)
  To: Robert Shearman, davem; +Cc: netdev

On 4/21/17 11:44 AM, Robert Shearman wrote:
> 
> Can you send me some more details of your testing?

It's a shell script that runs a long list of combinations of client,
server and local traffic for ipv4 and ipv6 with addresses on the
external interface, the vrf device and 127.0.0.1 on the VRF device and
link local and mcast addresses for IPv6. Tests are run for icmp, tcp and
udp - and includes negative testing to verify tcp resets or icmp
unreachables are properly sent and received. There are so many
combinations. And then you have to check tcp_l3mdev_accept = {0,1} and
udp_l3mdev_accept = {0,1}.

For your 'main table' vrf run through the different combinations and
check IPv6 linklocal addressing to.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-04-21 20:47 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-10 14:21 [PATCH net-next 0/3] l3mdev: Improve use with main table Robert Shearman
2017-04-10 14:22 ` [PATCH net-next 1/3] ipv6: Fix route handling when using l3mdev set to " Robert Shearman
2017-04-10 14:22 ` [PATCH net-next 2/3] ipv4: " Robert Shearman
2017-04-10 14:22 ` [PATCH net-next 3/3] l3mdev: Fix lookup in local table when using " Robert Shearman
2017-04-12 16:51 ` [PATCH net-next 0/3] l3mdev: Improve use with " David Ahern
2017-04-13 12:48   ` Robert Shearman
2017-04-13 14:36     ` David Ahern
2017-04-20 13:01       ` Robert Shearman
2017-04-20 14:25         ` David Ahern
2017-04-20 22:36 ` David Ahern
2017-04-21 17:44   ` Robert Shearman
2017-04-21 20:47     ` David Ahern

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.