[PATCH net-next] vrf: local route leaking

* [PATCH net-next] vrf: local route leaking
@ 2019-05-24  8:05 George Wilkie
  2019-05-24 20:19 ` David Ahern
  0 siblings, 1 reply; 10+ messages in thread
From: George Wilkie @ 2019-05-24  8:05 UTC (permalink / raw)
  To: David Ahern, Shrijeet Mukherjee, David S. Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI
  Cc: netdev

If have an interface in vrf A:

  10.10.2.0/24 dev ens9 proto kernel scope link src 10.10.2.2
  local 10.10.2.2 dev ens9 proto kernel scope host src 10.10.2.2

and want to leak it into vrf B, it is not sufficient to leak just
the interface route:

  ip route add 10.10.2.0/24 vrf B dev ens9

as traffic arriving into vrf B that is destined for 10.10.2.2 will
not arrive - it will be sent to the ens9 interface and nobody will
respond to the ARP.

In order to handle the traffic locally, the local route must also
be leaked to vrf B:

  ip route add local 10.10.2.2 vrf B dev ens9

However, that still doesn't work as the traffic is processed in
the context of the input vrf B and does not find a socket that is
bound to the destination vrf A.

Add a new vector to l3mdev_ops for receiving a local packet.
This checks if the local interface is enslaved to a different vrf
than the input interface, and if so, updates the skb so that it
will be handled by a socket in the vrf associated with the local
interface.
For ipv4, the local interface is obtained from the fib result in
the RTN_LOCAL route lookup path.
For ipv6, the local interface is obtained from the skb_dst in
ip6_input.

Signed-off-by: George Wilkie <gwilkie@vyatta.att-mail.com>
---
 drivers/net/vrf.c    | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/net/l3mdev.h | 41 +++++++++++++++++++++++++++++++++++++++++
 net/ipv4/route.c     |  2 ++
 net/ipv6/ip6_input.c |  1 +
 4 files changed, 87 insertions(+)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index cf7e6a92e73c..719e10f7761b 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1106,6 +1106,48 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
 }
 #endif
 
+static void vrf_ip6_local_rcv(struct net_device *vrf_dev, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (vrf_dev) {
+		skb->dev = vrf_dev;
+		skb->skb_iif = vrf_dev->ifindex;
+		IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
+		vrf_rx_stats(vrf_dev, skb->len);
+	} else {
+		/* Moving from VRF to global */
+		IP6CB(skb)->flags &= ~IP6SKB_L3SLAVE;
+	}
+#endif
+}
+
+static void vrf_ip_local_rcv(struct net_device *vrf_dev, struct sk_buff *skb)
+{
+	if (vrf_dev) {
+		skb->dev = vrf_dev;
+		skb->skb_iif = vrf_dev->ifindex;
+		IPCB(skb)->flags |= IPSKB_L3SLAVE;
+		vrf_rx_stats(vrf_dev, skb->len);
+	} else {
+		/* Moving from VRF to global */
+		IPCB(skb)->flags &= ~IPSKB_L3SLAVE;
+	}
+}
+
+/* called with rcu lock held */
+static void vrf_local_rcv(struct net_device *vrf_dev, struct sk_buff *skb,
+			  u16 proto)
+{
+	switch (proto) {
+	case AF_INET:
+		vrf_ip_local_rcv(vrf_dev, skb);
+		break;
+	case AF_INET6:
+		vrf_ip6_local_rcv(vrf_dev, skb);
+		break;
+	}
+}
+
 static const struct l3mdev_ops vrf_l3mdev_ops = {
 	.l3mdev_fib_table	= vrf_fib_table,
 	.l3mdev_l3_rcv		= vrf_l3_rcv,
@@ -1113,6 +1155,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = {
 #if IS_ENABLED(CONFIG_IPV6)
 	.l3mdev_link_scope_lookup = vrf_link_scope_lookup,
 #endif
+	.l3mdev_local_rcv	= vrf_local_rcv,
 };
 
 static void vrf_get_drvinfo(struct net_device *dev,
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index 5175fd63cd82..d1008437a769 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -24,6 +24,8 @@
  * @l3mdev_l3_out:    Hook in L3 output path
  *
  * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations
+ *
+ * @l3mdev_local_rcv: Hook in local receive path
  */
 
 struct l3mdev_ops {
@@ -37,6 +39,8 @@ struct l3mdev_ops {
 	/* IPv6 ops */
 	struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev,
 						 struct flowi6 *fl6);
+	void (*l3mdev_local_rcv)(struct net_device *dev, struct sk_buff *skb,
+				 u16 proto);
 };
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
@@ -203,6 +207,35 @@ struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
 {
 	return l3mdev_l3_out(sk, skb, AF_INET6);
 }
+
+static inline
+void l3mdev_local_rcv(struct net_device *dev, struct sk_buff *skb, u16 proto)
+{
+	struct net_device *l3mdev1 = l3mdev_master_dev_rcu(skb->dev);
+	struct net_device *l3mdev2 = l3mdev_master_dev_rcu(dev);
+
+	/* local device enslaved to a different L3 master from input device */
+	if (l3mdev1 != l3mdev2) {
+		if (l3mdev1 && l3mdev1->l3mdev_ops->l3mdev_local_rcv)
+			l3mdev1->l3mdev_ops->l3mdev_local_rcv(l3mdev2, skb,
+							      proto);
+		else if (l3mdev2 && l3mdev2->l3mdev_ops->l3mdev_local_rcv)
+			l3mdev2->l3mdev_ops->l3mdev_local_rcv(l3mdev2, skb,
+							      proto);
+	}
+}
+
+static inline
+void l3mdev_ip_local_rcv(struct net_device *dev, struct sk_buff *skb)
+{
+	return l3mdev_local_rcv(dev, skb, AF_INET);
+}
+
+static inline
+void l3mdev_ip6_local_rcv(struct net_device *dev, struct sk_buff *skb)
+{
+	return l3mdev_local_rcv(dev, skb, AF_INET6);
+}
 #else
 
 static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
@@ -294,6 +327,14 @@ static inline
 void l3mdev_update_flow(struct net *net, struct flowi *fl)
 {
 }
+static inline
+void l3mdev_ip_local_rcv(struct net_device *fib_dev, struct sk_buff *skb)
+{
+}
+static inline
+void l3mdev_ip6_local_rcv(struct net_device *fib_dev, struct sk_buff *skb)
+{
+}
 #endif
 
 #endif /* _NET_L3MDEV_H_ */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 11ddc276776e..c91b8ab06b86 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2070,6 +2070,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 					  0, dev, in_dev, &itag);
 		if (err < 0)
 			goto martian_source;
+
+		l3mdev_ip_local_rcv(res->fi->fib_dev, skb);
 		goto local_input;
 	}
 
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index b50b1af1f530..4def37f73363 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -448,6 +448,7 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk
 
 int ip6_input(struct sk_buff *skb)
 {
+	l3mdev_ip6_local_rcv(skb_dst(skb)->dev, skb);
 	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
 		       dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 		       ip6_input_finish);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread