All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/4 v2] net: VRF support in IPv6 stack
@ 2015-10-07 22:02 David Ahern
  2015-10-07 22:02 ` [PATCH net-next 1/4] net: Add IPv6 support to l3mdev David Ahern
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: David Ahern @ 2015-10-07 22:02 UTC (permalink / raw)
  To: netdev; +Cc: dsahern, David Ahern

Initial support for VRF in IPv6 stack. Makes IPv6 functionality
on par with IPv4 -- ping, tcp client/server and udp client/server
all work fine. tcpdump on vrf device and external tap (e.g., host
side tap device) shows all packets with proper addresses. IPv6
does not need the source address operation like IPv4. Verified
vti6 works properly in my setup as does use of an IPv6 address
on the VRF device.

v2
- fixed CONFIG_IPV6 dependency as questioned by Cong
  - if IPV6 is a module, kbuild ensures VRF is a module
  - if IPV6 is disabled IPV6 functionality is compiled out of VRF module

- addressed comments from Nik over IRC
  - removed duplicate call to netif_is_l3_master in l3mdev_rt6_dst_by_oif
  - changed allocation flag from GFP_ATOMIC to GFP_KERNEL since it is init time
  - added free of rt6i_pcpu
  - check_ipv6_frame returns false only if packet is NDISC type

David Ahern (4):
  net: Add IPv6 support to l3mdev
  net: Export fib6_get_table and nd_tbl
  net: Add IPv6 support to VRF device
  net: Add VRF support to IPv6 stack

 drivers/net/Kconfig   |   4 +-
 drivers/net/vrf.c     | 279 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/net/l3mdev.h  |  46 +++++++++
 net/ipv6/addrconf.c   |  12 ++-
 net/ipv6/icmp.c       |   6 +-
 net/ipv6/ip6_fib.c    |   1 +
 net/ipv6/ip6_output.c |   6 +-
 net/ipv6/ndisc.c      |  27 ++++-
 net/ipv6/route.c      |  22 +++-
 9 files changed, 387 insertions(+), 16 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH net-next 1/4] net: Add IPv6 support to l3mdev
  2015-10-07 22:02 [PATCH net-next 0/4 v2] net: VRF support in IPv6 stack David Ahern
@ 2015-10-07 22:02 ` David Ahern
  2015-10-07 22:02 ` [PATCH net-next 2/4] net: Export fib6_get_table and nd_tbl David Ahern
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: David Ahern @ 2015-10-07 22:02 UTC (permalink / raw)
  To: netdev; +Cc: dsahern, David Ahern

Add operations to retrieve cached IPv6 dst entry from l3mdev device
and lookup IPv6 source address.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
 include/net/l3mdev.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 44a19a171104..774d85b2d5d9 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -19,14 +19,22 @@
  * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
  *
  * @l3mdev_get_saddr: Get source address for a flow
+ *
+ * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device
  */
 
 struct l3mdev_ops {
 	u32		(*l3mdev_fib_table)(const struct net_device *dev);
+
+	/* IPv4 ops */
 	struct rtable *	(*l3mdev_get_rtable)(const struct net_device *dev,
 					     const struct flowi4 *fl4);
 	void		(*l3mdev_get_saddr)(struct net_device *dev,
 					    struct flowi4 *fl4);
+
+	/* IPv6 ops */
+	struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
+						 const struct flowi6 *fl6);
 };
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
@@ -123,6 +131,31 @@ static inline void l3mdev_get_saddr(struct net *net, int ifindex,
 	}
 }
 
+static inline struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev,
+						   const struct flowi6 *fl6)
+{
+	if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rt6_dst)
+		return dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6);
+
+	return NULL;
+}
+
+static inline
+struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net,
+					const struct flowi6 *fl6)
+{
+	struct dst_entry *dst = NULL;
+	struct net_device *dev;
+
+	dev = dev_get_by_index(net, fl6->flowi6_oif);
+	if (dev) {
+		dst = l3mdev_get_rt6_dst(dev, fl6);
+		dev_put(dev);
+	}
+
+	return dst;
+}
+
 #else
 
 static inline int l3mdev_master_ifindex_rcu(struct net_device *dev)
@@ -171,6 +204,19 @@ static inline void l3mdev_get_saddr(struct net *net, int ifindex,
 				    struct flowi4 *fl4)
 {
 }
+
+static inline
+struct dst_entry *l3mdev_get_rt6_dst(const struct net_device *dev,
+				     const struct flowi6 *fl6)
+{
+	return NULL;
+}
+static inline
+struct dst_entry *l3mdev_rt6_dst_by_oif(struct net *net,
+					const struct flowi6 *fl6)
+{
+	return NULL;
+}
 #endif
 
 #endif /* _NET_L3MDEV_H_ */
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH net-next 2/4] net: Export fib6_get_table and nd_tbl
  2015-10-07 22:02 [PATCH net-next 0/4 v2] net: VRF support in IPv6 stack David Ahern
  2015-10-07 22:02 ` [PATCH net-next 1/4] net: Add IPv6 support to l3mdev David Ahern
@ 2015-10-07 22:02 ` David Ahern
  2015-10-07 22:02 ` [PATCH net-next 3/4] net: Add IPv6 support to VRF device David Ahern
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: David Ahern @ 2015-10-07 22:02 UTC (permalink / raw)
  To: netdev; +Cc: dsahern, David Ahern

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
 net/ipv6/ip6_fib.c | 1 +
 net/ipv6/ndisc.c   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7d2e0023c72d..09fddf70cca4 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -264,6 +264,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(fib6_get_table);
 
 static void __net_init fib6_tables_init(struct net *net)
 {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 7089c305245c..b3292db05198 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -147,6 +147,7 @@ struct neigh_table nd_tbl = {
 	.gc_thresh2 =	 512,
 	.gc_thresh3 =	1024,
 };
+EXPORT_SYMBOL_GPL(nd_tbl);
 
 static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
 {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH net-next 3/4] net: Add IPv6 support to VRF device
  2015-10-07 22:02 [PATCH net-next 0/4 v2] net: VRF support in IPv6 stack David Ahern
  2015-10-07 22:02 ` [PATCH net-next 1/4] net: Add IPv6 support to l3mdev David Ahern
  2015-10-07 22:02 ` [PATCH net-next 2/4] net: Export fib6_get_table and nd_tbl David Ahern
@ 2015-10-07 22:02 ` David Ahern
  2015-10-07 22:02 ` [PATCH net-next 4/4] net: Add VRF support to IPv6 stack David Ahern
  2015-10-11 11:48 ` [PATCH net-next 0/4 v2] net: VRF support in " David Miller
  4 siblings, 0 replies; 6+ messages in thread
From: David Ahern @ 2015-10-07 22:02 UTC (permalink / raw)
  To: netdev; +Cc: dsahern, David Ahern

Add support for IPv6 to VRF device driver. Implemenation parallels what
has been done for IPv4.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
 drivers/net/Kconfig |   4 +-
 drivers/net/vrf.c   | 279 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 281 insertions(+), 2 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index b9ebd0d18a52..f184fb5bd110 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -298,8 +298,10 @@ config NLMON
 
 config NET_VRF
 	tristate "Virtual Routing and Forwarding (Lite)"
-	depends on IP_MULTIPLE_TABLES && IPV6_MULTIPLE_TABLES
+	depends on IP_MULTIPLE_TABLES
 	depends on NET_L3_MASTER_DEV
+	depends on IPV6 || IPV6=n
+	depends on IPV6_MULTIPLE_TABLES || IPV6=n
 	---help---
 	  This option enables the support for mapping interfaces into VRF's. The
 	  support enables VRF devices.
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 64499766e00f..c6f9266c4032 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -30,6 +30,7 @@
 #include <net/arp.h>
 #include <net/ip.h>
 #include <net/ip_fib.h>
+#include <net/ip6_fib.h>
 #include <net/ip6_route.h>
 #include <net/rtnetlink.h>
 #include <net/route.h>
@@ -57,6 +58,7 @@ struct slave_queue {
 struct net_vrf {
 	struct slave_queue      queue;
 	struct rtable           *rth;
+	struct rt6_info		*rt6;
 	u32                     tb_id;
 };
 
@@ -104,12 +106,56 @@ static struct dst_ops vrf_dst_ops = {
 	.default_advmss	= vrf_default_advmss,
 };
 
+/* neighbor handling is done with actual device; do not want
+ * to flip skb->dev for those ndisc packets. This really fails
+ * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
+ * a start.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static bool check_ipv6_frame(const struct sk_buff *skb)
+{
+	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
+	size_t hlen = sizeof(*ipv6h);
+	bool rc = true;
+
+	if (skb->len < hlen)
+		goto out;
+
+	if (ipv6h->nexthdr == NEXTHDR_ICMP) {
+		const struct icmp6hdr *icmph;
+
+		if (skb->len < hlen + sizeof(*icmph))
+			goto out;
+
+		icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h));
+		switch (icmph->icmp6_type) {
+		case NDISC_ROUTER_SOLICITATION:
+		case NDISC_ROUTER_ADVERTISEMENT:
+		case NDISC_NEIGHBOUR_SOLICITATION:
+		case NDISC_NEIGHBOUR_ADVERTISEMENT:
+		case NDISC_REDIRECT:
+			rc = false;
+			break;
+		}
+	}
+
+out:
+	return rc;
+}
+#else
+static bool check_ipv6_frame(const struct sk_buff *skb)
+{
+	return false;
+}
+#endif
+
 static bool is_ip_rx_frame(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-	case htons(ETH_P_IPV6):
 		return true;
+	case htons(ETH_P_IPV6):
+		return check_ipv6_frame(skb);
 	}
 	return false;
 }
@@ -169,12 +215,52 @@ static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev,
 	return stats;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
+					   struct net_device *dev)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct net *net = dev_net(skb->dev);
+	struct flowi6 fl6 = {
+		/* needed to match OIF rule */
+		.flowi6_oif = dev->ifindex,
+		.flowi6_iif = LOOPBACK_IFINDEX,
+		.daddr = iph->daddr,
+		.saddr = iph->saddr,
+		.flowlabel = ip6_flowinfo(iph),
+		.flowi6_mark = skb->mark,
+		.flowi6_proto = iph->nexthdr,
+		.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
+	};
+	int ret = NET_XMIT_DROP;
+	struct dst_entry *dst;
+
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (dst == (struct dst_entry *)net->ipv6.ip6_null_entry)
+		goto err;
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+
+	ret = ip6_local_out(skb);
+	if (unlikely(net_xmit_eval(ret)))
+		dev->stats.tx_errors++;
+	else
+		ret = NET_XMIT_SUCCESS;
+
+	return ret;
+err:
+	vrf_tx_error(dev, skb);
+	return NET_XMIT_DROP;
+}
+#else
 static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 					   struct net_device *dev)
 {
 	vrf_tx_error(dev, skb);
 	return NET_XMIT_DROP;
 }
+#endif
 
 static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4,
 			    struct net_device *vrf_dev)
@@ -269,6 +355,164 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
 	return ret;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static struct dst_entry *vrf_ip6_check(struct dst_entry *dst, u32 cookie)
+{
+	return dst;
+}
+
+static int vrf_ip6_local_out(struct sk_buff *skb)
+{
+	return ip6_local_out(skb);
+}
+
+static struct dst_ops vrf_dst_ops6 = {
+	.family		= AF_INET6,
+	.local_out	= vrf_ip6_local_out,
+	.check		= vrf_ip6_check,
+	.mtu		= vrf_v4_mtu,
+	.destroy	= vrf_dst_destroy,
+	.default_advmss	= vrf_default_advmss,
+};
+
+static int init_dst_ops6_kmem_cachep(void)
+{
+	vrf_dst_ops6.kmem_cachep = kmem_cache_create("vrf_ip6_dst_cache",
+						     sizeof(struct rt6_info),
+						     0,
+						     SLAB_HWCACHE_ALIGN,
+						     NULL);
+
+	if (!vrf_dst_ops6.kmem_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void free_dst_ops6_kmem_cachep(void)
+{
+	kmem_cache_destroy(vrf_dst_ops6.kmem_cachep);
+}
+
+static int vrf_input6(struct sk_buff *skb)
+{
+	skb->dev->stats.rx_errors++;
+	kfree_skb(skb);
+	return 0;
+}
+
+/* modelled after ip6_finish_output2 */
+static int vrf_finish_output6(struct net *net, struct sock *sk,
+			      struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct net_device *dev = dst->dev;
+	struct neighbour *neigh;
+	struct in6_addr *nexthop;
+	int ret;
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->dev = dev;
+
+	rcu_read_lock_bh();
+	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+	if (unlikely(!neigh))
+		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+	if (!IS_ERR(neigh)) {
+		ret = dst_neigh_output(dst, neigh, skb);
+		rcu_read_unlock_bh();
+		return ret;
+	}
+	rcu_read_unlock_bh();
+
+	IP6_INC_STATS(dev_net(dst->dev),
+		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+/* modelled after ip6_output */
+static int vrf_output6(struct sock *sk, struct sk_buff *skb)
+{
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+			    dev_net(skb_dst(skb)->dev), sk, skb,
+			    NULL, skb_dst(skb)->dev,
+			    vrf_finish_output6,
+			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
+}
+
+static void vrf_rt6_destroy(struct net_vrf *vrf)
+{
+	struct dst_entry *dst = (struct dst_entry *)vrf->rt6;
+
+	dst_destroy(dst);
+	free_percpu(vrf->rt6->rt6i_pcpu);
+	vrf->rt6 = NULL;
+}
+
+static int vrf_rt6_create(struct net_device *dev)
+{
+	struct net_vrf *vrf = netdev_priv(dev);
+	struct rt6_info *rt6;
+	struct dst_entry *dst;
+	int cpu;
+	int rc = -ENOMEM;
+
+	rt6 = dst_alloc(&vrf_dst_ops6, dev, 0,
+			DST_OBSOLETE_NONE,
+			(DST_HOST | DST_NOPOLICY | DST_NOXFRM));
+	if (!rt6)
+		goto out;
+
+	rt6->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_KERNEL);
+	if (!rt6->rt6i_pcpu) {
+		dst_destroy((struct dst_entry *)rt6);
+		goto out;
+	}
+	for_each_possible_cpu(cpu) {
+		struct rt6_info **p = per_cpu_ptr(rt6->rt6i_pcpu, cpu);
+		*p =  NULL;
+	}
+
+	dst = &rt6->dst;
+	memset(dst + 1, 0, sizeof(*rt6) - sizeof(*dst));
+
+	INIT_LIST_HEAD(&rt6->rt6i_siblings);
+	INIT_LIST_HEAD(&rt6->rt6i_uncached);
+
+	rt6->dst.input	= vrf_input6;
+	rt6->dst.output	= vrf_output6;
+
+	rt6->rt6i_table = fib6_get_table(dev_net(dev), vrf->tb_id);
+
+	atomic_set(&rt6->dst.__refcnt, 2);
+
+	vrf->rt6 = rt6;
+	rc = 0;
+out:
+	return rc;
+}
+#else
+static int init_dst_ops6_kmem_cachep(void)
+{
+	return 0;
+}
+
+static void free_dst_ops6_kmem_cachep(void)
+{
+}
+
+static void vrf_rt6_destroy(struct net_vrf *vrf)
+{
+}
+
+static int vrf_rt6_create(struct net_device *dev)
+{
+	return 0;
+}
+#endif
+
 /* modelled after ip_finish_output2 */
 static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -491,6 +735,7 @@ static void vrf_dev_uninit(struct net_device *dev)
 	struct slave *slave, *next;
 
 	vrf_rtable_destroy(vrf);
+	vrf_rt6_destroy(vrf);
 
 	list_for_each_entry_safe(slave, next, head, list)
 		vrf_del_slave(dev, slave->dev);
@@ -514,10 +759,15 @@ static int vrf_dev_init(struct net_device *dev)
 	if (!vrf->rth)
 		goto out_stats;
 
+	if (vrf_rt6_create(dev) != 0)
+		goto out_rth;
+
 	dev->flags = IFF_MASTER | IFF_NOARP;
 
 	return 0;
 
+out_rth:
+	vrf_rtable_destroy(vrf);
 out_stats:
 	free_percpu(dev->dstats);
 	dev->dstats = NULL;
@@ -587,10 +837,30 @@ static void vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
 	fl4->flowi4_scope = scope;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
+					 const struct flowi6 *fl6)
+{
+	struct rt6_info *rt = NULL;
+
+	if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
+		struct net_vrf *vrf = netdev_priv(dev);
+
+		rt = vrf->rt6;
+		atomic_inc(&rt->dst.__refcnt);
+	}
+
+	return (struct dst_entry *)rt;
+}
+#endif
+
 static const struct l3mdev_ops vrf_l3mdev_ops = {
 	.l3mdev_fib_table	= vrf_fib_table,
 	.l3mdev_get_rtable	= vrf_get_rtable,
 	.l3mdev_get_saddr	= vrf_get_saddr,
+#if IS_ENABLED(CONFIG_IPV6)
+	.l3mdev_get_rt6_dst	= vrf_get_rt6_dst,
+#endif
 };
 
 static void vrf_get_drvinfo(struct net_device *dev,
@@ -732,6 +1002,10 @@ static int __init vrf_init_module(void)
 	if (!vrf_dst_ops.kmem_cachep)
 		return -ENOMEM;
 
+	rc = init_dst_ops6_kmem_cachep();
+	if (rc != 0)
+		goto error2;
+
 	register_netdevice_notifier(&vrf_notifier_block);
 
 	rc = rtnl_link_register(&vrf_link_ops);
@@ -742,6 +1016,8 @@ static int __init vrf_init_module(void)
 
 error:
 	unregister_netdevice_notifier(&vrf_notifier_block);
+	free_dst_ops6_kmem_cachep();
+error2:
 	kmem_cache_destroy(vrf_dst_ops.kmem_cachep);
 	return rc;
 }
@@ -751,6 +1027,7 @@ static void __exit vrf_cleanup_module(void)
 	rtnl_link_unregister(&vrf_link_ops);
 	unregister_netdevice_notifier(&vrf_notifier_block);
 	kmem_cache_destroy(vrf_dst_ops.kmem_cachep);
+	free_dst_ops6_kmem_cachep();
 }
 
 module_init(vrf_init_module);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH net-next 4/4] net: Add VRF support to IPv6 stack
  2015-10-07 22:02 [PATCH net-next 0/4 v2] net: VRF support in IPv6 stack David Ahern
                   ` (2 preceding siblings ...)
  2015-10-07 22:02 ` [PATCH net-next 3/4] net: Add IPv6 support to VRF device David Ahern
@ 2015-10-07 22:02 ` David Ahern
  2015-10-11 11:48 ` [PATCH net-next 0/4 v2] net: VRF support in " David Miller
  4 siblings, 0 replies; 6+ messages in thread
From: David Ahern @ 2015-10-07 22:02 UTC (permalink / raw)
  To: netdev; +Cc: dsahern, David Ahern

As with IPv4 support for VRFs added to IPv6 stack by replacing hardcoded
table ids with possibly device specific ones and manipulating the oif in
the flowi6. The flow flags are used to skip oif compare in nexthop lookups
if the device is enslaved to a VRF via the L3 master device.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
 net/ipv6/addrconf.c   | 12 +++++++++---
 net/ipv6/icmp.c       |  6 +++++-
 net/ipv6/ip6_output.c |  6 ++++--
 net/ipv6/ndisc.c      | 26 +++++++++++++++++++++++---
 net/ipv6/route.c      | 22 +++++++++++++++++-----
 5 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b2d6f2fc0fbd..e07b1fb52131 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
 #include <net/ip.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/l3mdev.h>
 #include <linux/if_tunnel.h>
 #include <linux/rtnetlink.h>
 #include <linux/netconf.h>
@@ -2149,7 +2150,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		      unsigned long expires, u32 flags)
 {
 	struct fib6_config cfg = {
-		.fc_table = RT6_TABLE_PREFIX,
+		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
 		.fc_metric = IP6_RT_PRIO_ADDRCONF,
 		.fc_ifindex = dev->ifindex,
 		.fc_expires = expires,
@@ -2182,8 +2183,9 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 	struct fib6_node *fn;
 	struct rt6_info *rt = NULL;
 	struct fib6_table *table;
+	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
 
-	table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
+	table = fib6_get_table(dev_net(dev), tb_id);
 	if (!table)
 		return NULL;
 
@@ -2214,7 +2216,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 static void addrconf_add_mroute(struct net_device *dev)
 {
 	struct fib6_config cfg = {
-		.fc_table = RT6_TABLE_LOCAL,
+		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL,
 		.fc_metric = IP6_RT_PRIO_ADDRCONF,
 		.fc_ifindex = dev->ifindex,
 		.fc_dst_len = 8,
@@ -3035,6 +3037,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 {
 	struct in6_addr addr;
 
+	/* no link local addresses on L3 master devices */
+	if (netif_is_l3_master(idev->dev))
+		return;
+
 	ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
 
 	if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6c2b2132c8d3..efb1c00f2270 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
 #include <net/xfrm.h>
 #include <net/inet_common.h>
 #include <net/dsfield.h>
+#include <net/l3mdev.h>
 
 #include <asm/uaccess.h>
 
@@ -496,6 +497,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	else if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->ucast_oif;
 
+	if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
+
 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
 	if (IS_ERR(dst))
 		goto out;
@@ -575,7 +579,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	fl6.daddr = ipv6_hdr(skb)->saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
-	fl6.flowi6_oif = skb->dev->ifindex;
+	fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 	fl6.flowi6_mark = mark;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index caf7d14a1bdd..527870ccf5c1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -55,6 +55,7 @@
 #include <net/xfrm.h>
 #include <net/checksum.h>
 #include <linux/mroute6.h>
+#include <net/l3mdev.h>
 
 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -886,7 +887,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 #ifdef CONFIG_IPV6_SUBTREES
 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 #endif
-	    (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
+	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 		dst_release(dst);
 		dst = NULL;
 	}
@@ -1038,7 +1040,7 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
 	if (final_dst)
 		fl6->daddr = *final_dst;
 	if (!fl6->flowi6_oif)
-		fl6->flowi6_oif = dst->dev->ifindex;
+		fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
 
 	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 }
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b3292db05198..0b85242d8469 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -67,6 +67,7 @@
 #include <net/flow.h>
 #include <net/ip6_checksum.h>
 #include <net/inet_common.h>
+#include <net/l3mdev.h>
 #include <linux/proc_fs.h>
 
 #include <linux/netfilter.h>
@@ -442,8 +443,11 @@ static void ndisc_send_skb(struct sk_buff *skb,
 
 	if (!dst) {
 		struct flowi6 fl6;
+		int oif = l3mdev_fib_oif(skb->dev);
 
-		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
+		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
+		if (oif != skb->dev->ifindex)
+			fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
 		dst = icmp6_dst_alloc(skb->dev, &fl6);
 		if (IS_ERR(dst)) {
 			kfree_skb(skb);
@@ -767,7 +771,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 
 	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
 	if (ifp) {
-
+have_ifp:
 		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
 			if (dad) {
 				/*
@@ -793,6 +797,18 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	} else {
 		struct net *net = dev_net(dev);
 
+		/* perhaps an address on the master device */
+		if (netif_is_l3_slave(dev)) {
+			struct net_device *mdev;
+
+			mdev = netdev_master_upper_dev_get_rcu(dev);
+			if (mdev) {
+				ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
+				if (ifp)
+					goto have_ifp;
+			}
+		}
+
 		idev = in6_dev_get(dev);
 		if (!idev) {
 			/* XXX: count this drop? */
@@ -1484,6 +1500,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	struct flowi6 fl6;
 	int rd_len;
 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+	int oif = l3mdev_fib_oif(dev);
 	bool ret;
 
 	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
@@ -1500,7 +1517,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	}
 
 	icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
-			 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
+			 &saddr_buf, &ipv6_hdr(skb)->saddr, oif);
+
+	if (oif != skb->dev->ifindex)
+		fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (dst->error) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d3d946773a3e..63446bae7f5f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -61,6 +61,7 @@
 #include <net/nexthop.h>
 #include <net/lwtunnel.h>
 #include <net/ip_tunnels.h>
+#include <net/l3mdev.h>
 
 #include <asm/uaccess.h>
 
@@ -1044,6 +1045,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 	saved_fn = fn;
 
+	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+		oif = 0;
+
 redo_rt6_select:
 	rt = rt6_select(fn, oif, strict);
 	if (rt->rt6i_nsiblings)
@@ -1141,7 +1145,7 @@ void ip6_route_input(struct sk_buff *skb)
 	int flags = RT6_LOOKUP_F_HAS_SADDR;
 	struct ip_tunnel_info *tun_info;
 	struct flowi6 fl6 = {
-		.flowi6_iif = skb->dev->ifindex,
+		.flowi6_iif = l3mdev_fib_oif(skb->dev),
 		.daddr = iph->daddr,
 		.saddr = iph->saddr,
 		.flowlabel = ip6_flowinfo(iph),
@@ -1165,8 +1169,13 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
 				    struct flowi6 *fl6)
 {
+	struct dst_entry *dst;
 	int flags = 0;
 
+	dst = l3mdev_rt6_dst_by_oif(net, fl6);
+	if (dst)
+		return dst;
+
 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
@@ -2264,7 +2273,6 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 					   unsigned int pref)
 {
 	struct fib6_config cfg = {
-		.fc_table	= RT6_TABLE_INFO,
 		.fc_metric	= IP6_RT_PRIO_USER,
 		.fc_ifindex	= ifindex,
 		.fc_dst_len	= prefixlen,
@@ -2275,6 +2283,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 		.fc_nlinfo.nl_net = net,
 	};
 
+	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
 	cfg.fc_dst = *prefix;
 	cfg.fc_gateway = *gwaddr;
 
@@ -2315,7 +2324,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 				     unsigned int pref)
 {
 	struct fib6_config cfg = {
-		.fc_table	= RT6_TABLE_DFLT,
+		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
 		.fc_metric	= IP6_RT_PRIO_USER,
 		.fc_ifindex	= dev->ifindex,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -2362,7 +2371,8 @@ static void rtmsg_to_fib6_config(struct net *net,
 {
 	memset(cfg, 0, sizeof(*cfg));
 
-	cfg->fc_table = RT6_TABLE_MAIN;
+	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+			 : RT6_TABLE_MAIN;
 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
 	cfg->fc_metric = rtmsg->rtmsg_metric;
 	cfg->fc_expires = rtmsg->rtmsg_info;
@@ -2471,6 +2481,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 				    const struct in6_addr *addr,
 				    bool anycast)
 {
+	u32 tb_id;
 	struct net *net = dev_net(idev->dev);
 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
 					    DST_NOCOUNT);
@@ -2493,7 +2504,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->rt6i_gateway  = *addr;
 	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
-	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
+	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
+	rt->rt6i_table = fib6_get_table(net, tb_id);
 	rt->dst.flags |= DST_NOCACHE;
 
 	atomic_set(&rt->dst.__refcnt, 1);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next 0/4 v2] net: VRF support in IPv6 stack
  2015-10-07 22:02 [PATCH net-next 0/4 v2] net: VRF support in IPv6 stack David Ahern
                   ` (3 preceding siblings ...)
  2015-10-07 22:02 ` [PATCH net-next 4/4] net: Add VRF support to IPv6 stack David Ahern
@ 2015-10-11 11:48 ` David Miller
  4 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2015-10-11 11:48 UTC (permalink / raw)
  To: dsa; +Cc: netdev, dsahern


Please minimize the crazy pointer casting in this series.

All of these casts between ipv4/ipv6 route objects and dsts are
completely unnecessary.

Prefer to maintain an rt/rt6 in local variables, and then when you
need the dst say &rt6->dst, or to get the device say rt6->dst.dev or
similar.

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-10-11 11:32 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-07 22:02 [PATCH net-next 0/4 v2] net: VRF support in IPv6 stack David Ahern
2015-10-07 22:02 ` [PATCH net-next 1/4] net: Add IPv6 support to l3mdev David Ahern
2015-10-07 22:02 ` [PATCH net-next 2/4] net: Export fib6_get_table and nd_tbl David Ahern
2015-10-07 22:02 ` [PATCH net-next 3/4] net: Add IPv6 support to VRF device David Ahern
2015-10-07 22:02 ` [PATCH net-next 4/4] net: Add VRF support to IPv6 stack David Ahern
2015-10-11 11:48 ` [PATCH net-next 0/4 v2] net: VRF support in " David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.