All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Lamparter <equinox@diac24.net>
To: netdev@vger.kernel.org
Cc: amine.kherbouche@6wind.com, roopa@cumulusnetworks.com,
	David Lamparter <equinox@diac24.net>
Subject: [PATCH 4/6] mpls: VPLS support
Date: Wed, 16 Aug 2017 19:02:00 +0200	[thread overview]
Message-ID: <20170816170202.456851-5-equinox@diac24.net> (raw)
In-Reply-To: <20170816170202.456851-1-equinox@diac24.net>

[work-in-progress, works but needs changes]
[v2: refactored lots of things, e.g. dst_metadata, no more genetlink]

Signed-off-by: David Lamparter <equinox@diac24.net>
---
 include/net/dst_metadata.h |  21 ++
 include/net/vpls.h         |   8 +
 net/mpls/Kconfig           |  11 ++
 net/mpls/Makefile          |   1 +
 net/mpls/vpls.c            | 469 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 510 insertions(+)
 create mode 100644 include/net/vpls.h
 create mode 100644 net/mpls/vpls.c

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 8858dc441458..aeee4ce3b654 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -3,11 +3,13 @@
 
 #include <linux/skbuff.h>
 #include <net/ip_tunnels.h>
+#include <net/vpls.h>
 #include <net/dst.h>
 
 enum metadata_type {
 	METADATA_IP_TUNNEL,
 	METADATA_HW_PORT_MUX,
+	METADATA_VPLS,
 };
 
 struct hw_port_info {
@@ -21,6 +23,7 @@ struct metadata_dst {
 	union {
 		struct ip_tunnel_info	tun_info;
 		struct hw_port_info	port_info;
+		struct vpls_info	vpls_info;
 	} u;
 };
 
@@ -49,6 +52,15 @@ static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
 	return NULL;
 }
 
+static inline struct vpls_info *skb_vpls_info(struct sk_buff *skb)
+{
+	struct metadata_dst *md_dst = skb_metadata_dst(skb);
+	if (md_dst && md_dst->type == METADATA_VPLS)
+		return &md_dst->u.vpls_info;
+	return NULL;
+}
+
+
 static inline bool skb_valid_dst(const struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
@@ -73,6 +85,9 @@ static inline int dst_metadata_cmp(const struct dst_entry *dst_a,
 	case METADATA_HW_PORT_MUX:
 		return memcmp(&a->u.port_info, &b->u.port_info,
 			      sizeof(a->u.port_info));
+	case METADATA_VPLS:
+		return memcmp(&a->u.vpls_info, &b->u.vpls_info,
+			      sizeof(a->u.vpls_info));
 	case METADATA_IP_TUNNEL:
 		return memcmp(&a->u.tun_info, &b->u.tun_info,
 			      sizeof(a->u.tun_info) +
@@ -218,4 +233,10 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
 				  0, ip6_flowlabel(ip6h), flags, tunnel_id,
 				  md_size);
 }
+
+static inline struct metadata_dst *vpls_rx_dst(void)
+{
+	return metadata_dst_alloc(0, METADATA_VPLS, GFP_ATOMIC);
+}
+
 #endif /* __NET_DST_METADATA_H */
diff --git a/include/net/vpls.h b/include/net/vpls.h
new file mode 100644
index 000000000000..b261e2d97734
--- /dev/null
+++ b/include/net/vpls.h
@@ -0,0 +1,8 @@
+#ifndef __NET_VPLS_H
+#define __NET_VPLS_H 1
+
+struct vpls_info {
+	u32		pw_label;
+};
+
+#endif /* __NET_VPLS_H */
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 5c467ef97311..c15ba73efb34 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -27,6 +27,17 @@ config MPLS_ROUTING
 	---help---
 	 Add support for forwarding of mpls packets.
 
+config MPLS_VPLS
+	bool "VPLS support"
+	default y
+	depends on MPLS_ROUTING && BRIDGE_NETFILTER=n
+	---help---
+	 Add support for de-&encapsulating VPLS.  Not compatible with
+	 bridge netfilter due to the latter stomping over VPLS' dst metadata.
+
+comment "disable 'Bridged IP/ARP packets filtering' for VPLS support"
+	depends on BRIDGE_NETFILTER
+
 config MPLS_IPTUNNEL
 	tristate "MPLS: IP over MPLS tunnel support"
 	depends on LWTUNNEL && MPLS_ROUTING
diff --git a/net/mpls/Makefile b/net/mpls/Makefile
index 9ca923625016..3c028600a980 100644
--- a/net/mpls/Makefile
+++ b/net/mpls/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_MPLS_ROUTING) += mpls_router.o
 obj-$(CONFIG_MPLS_IPTUNNEL) += mpls_iptunnel.o
 
 mpls_router-y := af_mpls.o
+mpls_router-$(CONFIG_MPLS_VPLS) += vpls.o
diff --git a/net/mpls/vpls.c b/net/mpls/vpls.c
new file mode 100644
index 000000000000..28ac810da6e9
--- /dev/null
+++ b/net/mpls/vpls.c
@@ -0,0 +1,469 @@
+/*
+ *  net/mpls/vpls.c
+ *
+ *  Copyright (C) 2016 David Lamparter
+ *
+ */
+
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/mpls.h>
+
+#include <net/rtnetlink.h>
+#include <net/dst.h>
+#include <net/xfrm.h>
+#include <net/mpls.h>
+#include <linux/module.h>
+#include <net/dst_metadata.h>
+#include <net/ip_tunnels.h>
+
+#include "internal.h"
+
+#define DRV_NAME	"vpls"
+
+#define MIN_MTU 68		/* Min L3 MTU */
+#define MAX_MTU 65535		/* Max L3 MTU (arbitrary) */
+
+struct vpls_wirelist {
+	struct rcu_head rcu;
+	size_t count;
+	unsigned wires[0];
+};
+
+struct vpls_priv {
+	struct net *encap_net;
+	struct vpls_wirelist __rcu *wires;
+};
+
+static int vpls_xmit_wire(struct sk_buff *skb, struct net_device *dev,
+			  struct vpls_priv *vpls, u32 wire)
+{
+	struct mpls_route *rt;
+	struct mpls_entry_decoded dec;
+
+	dec.bos = 1;
+	dec.ttl = 255;
+
+	rt = mpls_route_input_rcu(vpls->encap_net, wire);
+	if (!rt)
+		return -ENOENT;
+	if (rt->rt_vpls_dev != dev)
+		return -EINVAL;
+
+	return mpls_rt_xmit(skb, rt, dec);
+}
+
+static netdev_tx_t vpls_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	int err = -EINVAL, ok_count = 0;
+	struct vpls_priv *priv = netdev_priv(dev);
+	struct vpls_info *vi;
+	struct pcpu_sw_netstats *stats;
+	size_t len = skb->len;
+
+	rcu_read_lock();
+	vi = skb_vpls_info(skb);
+
+	skb_orphan(skb);
+	skb_forward_csum(skb);
+
+	if (vi) {
+		err = vpls_xmit_wire(skb, dev, priv, vi->pw_label);
+		if (err)
+			goto out_err;
+	} else {
+		struct sk_buff *cloned;
+		struct vpls_wirelist *wl;
+		size_t i;
+
+		wl = rcu_dereference(priv->wires);
+		if (wl->count == 0) {
+			dev->stats.tx_carrier_errors++;
+			goto out_err;
+		}
+
+		for (i = 0; i < wl->count; i++) {
+			cloned = skb_clone(skb, GFP_KERNEL);
+			if (vpls_xmit_wire(cloned, dev, priv, wl->wires[i]))
+				consume_skb(cloned);
+			else
+				ok_count++;
+		}
+		if (!ok_count)
+			goto out_err;
+
+		consume_skb(skb);
+	}
+
+	stats = this_cpu_ptr(dev->tstats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->tx_packets++;
+	stats->tx_bytes += len;
+	u64_stats_update_end(&stats->syncp);
+
+	rcu_read_unlock();
+	return 0;
+
+out_err:
+	dev->stats.tx_errors++;
+
+	consume_skb(skb);
+	rcu_read_unlock();
+	return err;
+}
+
+int vpls_rcv(struct sk_buff *skb, struct net_device *in_dev,
+	     struct packet_type *pt, struct mpls_route *rt,
+	     struct mpls_shim_hdr *hdr, struct net_device *orig_dev)
+{
+	struct net_device *dev = rt->rt_vpls_dev;
+	struct mpls_entry_decoded dec;
+	struct metadata_dst *md_dst;
+	struct pcpu_sw_netstats *stats;
+
+	if (!dev)
+		goto drop_nodev;
+
+	dec = mpls_entry_decode(hdr);
+	if (!dec.bos) {
+		dev->stats.rx_frame_errors++;
+		goto drop;
+	}
+
+	skb_pull(skb, sizeof(*hdr));
+
+	if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) {
+		dev->stats.rx_length_errors++;
+		goto drop;
+	}
+
+	md_dst = vpls_rx_dst();
+	if (unlikely(!md_dst)) {
+		netdev_err(dev, "failed to allocate dst metadata\n");
+		goto drop;
+	}
+	md_dst->u.vpls_info.pw_label = dec.label;
+
+	skb->dev = dev;
+
+	skb_reset_mac_header(skb);
+	skb->protocol = eth_type_trans(skb, dev);
+	skb->ip_summed = CHECKSUM_NONE;
+	skb->pkt_type = PACKET_HOST;
+
+	skb_clear_hash(skb);
+	skb->vlan_tci = 0;
+	skb_set_queue_mapping(skb, 0);
+	skb_scrub_packet(skb, !net_eq(dev_net(in_dev), dev_net(dev)));
+
+	skb_reset_network_header(skb);
+	skb_probe_transport_header(skb, 0);
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &md_dst->dst);
+
+	stats = this_cpu_ptr(dev->tstats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+	u64_stats_update_end(&stats->syncp);
+
+	netif_rx(skb);
+	return 0;
+
+drop:
+	dev->stats.rx_errors++;
+drop_nodev:
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+
+void vpls_label_update(unsigned label, struct mpls_route *rt_old,
+		       struct mpls_route *rt_new)
+{
+	struct vpls_priv *priv;
+	struct vpls_wirelist *wl, *wl_new;
+	size_t i;
+
+	ASSERT_RTNL();
+
+	if (rt_old && rt_new && rt_old->rt_vpls_dev == rt_new->rt_vpls_dev)
+		return;
+
+	if (rt_old && rt_old->rt_vpls_dev) {
+		priv = netdev_priv(rt_old->rt_vpls_dev);
+		wl = rcu_dereference(priv->wires);
+
+		for (i = 0; i < wl->count; i++)
+			if (wl->wires[i] == label)
+				break;
+
+		if (i == wl->count) {
+			netdev_err(rt_old->rt_vpls_dev,
+				   "can't find pseudowire to remove!\n");
+			goto update_new;
+		}
+
+		wl_new = kmalloc(sizeof(*wl) +
+				 (wl->count - 1) * sizeof(wl->wires[0]),
+				 GFP_ATOMIC);
+		if (!wl_new) {
+			netdev_err(rt_old->rt_vpls_dev,
+				   "out of memory for pseudowire delete!\n");
+			goto update_new;
+		}
+
+		wl_new->count = wl->count - 1;
+		memcpy(wl_new->wires, wl->wires, i * sizeof(wl->wires[0]));
+		memcpy(wl_new->wires + i, wl->wires + i + 1,
+			(wl->count - i - 1) * sizeof(wl->wires[0]));
+
+		rcu_assign_pointer(priv->wires, wl_new);
+		kfree_rcu(wl, rcu);
+
+		if (wl_new->count == 0)
+			netif_carrier_off(rt_old->rt_vpls_dev);
+	}
+
+update_new:
+	if (rt_new && rt_new->rt_vpls_dev) {
+		priv = netdev_priv(rt_new->rt_vpls_dev);
+		wl = rcu_dereference(priv->wires);
+
+		wl_new = kmalloc(sizeof(*wl) +
+				 (wl->count + 1) * sizeof(wl->wires[0]),
+				 GFP_ATOMIC);
+		if (!wl_new) {
+			netdev_err(rt_new->rt_vpls_dev,
+				   "out of memory for pseudowire add!\n");
+			return;
+		}
+		wl_new->count = wl->count + 1;
+		memcpy(wl_new->wires, wl->wires,
+			wl->count * sizeof(wl->wires[0]));
+		wl_new->wires[wl->count] = label;
+
+		rcu_assign_pointer(priv->wires, wl_new);
+		kfree_rcu(wl, rcu);
+
+		if (wl_new->count == 1)
+			netif_carrier_on(rt_new->rt_vpls_dev);
+	}
+}
+
+/* fake multicast ability */
+static void vpls_set_multicast_list(struct net_device *dev)
+{
+}
+
+static int vpls_open(struct net_device *dev)
+{
+	struct vpls_priv *priv = netdev_priv(dev);
+	struct vpls_wirelist *wl;
+
+	wl = rcu_dereference(priv->wires);
+	if (wl->count > 0)
+		netif_carrier_on(dev);
+
+	return 0;
+}
+
+static int vpls_close(struct net_device *dev)
+{
+	netif_carrier_off(dev);
+	return 0;
+}
+
+static int is_valid_vpls_mtu(int new_mtu)
+{
+	return new_mtu >= MIN_MTU && new_mtu <= MAX_MTU;
+}
+
+static int vpls_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (!is_valid_vpls_mtu(new_mtu))
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static int vpls_dev_init(struct net_device *dev)
+{
+	struct vpls_priv *priv = netdev_priv(dev);
+	priv->wires = kzalloc(sizeof(struct vpls_wirelist), GFP_KERNEL);
+	if (!priv->wires)
+		return -ENOMEM;
+
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats) {
+		kfree(priv->wires);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void vpls_dev_free(struct net_device *dev)
+{
+	struct vpls_priv *priv = netdev_priv(dev);
+
+	free_percpu(dev->tstats);
+
+	if (priv->wires)
+		kfree(priv->wires);
+
+	if (priv->encap_net)
+		put_net(priv->encap_net);
+
+	free_netdev(dev);
+}
+
+static const struct net_device_ops vpls_netdev_ops = {
+	.ndo_init		= vpls_dev_init,
+	.ndo_open		= vpls_open,
+	.ndo_stop		= vpls_close,
+	.ndo_start_xmit		= vpls_xmit,
+	.ndo_change_mtu		= vpls_change_mtu,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_set_rx_mode	= vpls_set_multicast_list,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_features_check	= passthru_features_check,
+};
+
+int is_vpls_dev(struct net_device *dev)
+{
+	return dev->netdev_ops == &vpls_netdev_ops;
+}
+
+#define VPLS_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | \
+		       NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_HIGHDMA)
+
+static void vpls_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	dev->priv_flags |= IFF_NO_QUEUE;
+
+	dev->netdev_ops = &vpls_netdev_ops;
+	dev->features |= NETIF_F_LLTX;
+	dev->features |= VPLS_FEATURES;
+	dev->vlan_features = dev->features;
+	dev->priv_destructor = vpls_dev_free;
+
+	dev->hw_features = VPLS_FEATURES;
+	dev->hw_enc_features = VPLS_FEATURES;
+
+	netif_keep_dst(dev);
+}
+
+/*
+ * netlink interface
+ */
+
+static int vpls_validate(struct nlattr *tb[], struct nlattr *data[],
+			 struct netlink_ext_ack *extack)
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
+				    "Invalid Ethernet address length");
+			return -EINVAL;
+		}
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
+				    "Invalid Ethernet address");
+			return -EADDRNOTAVAIL;
+		}
+	}
+	if (tb[IFLA_MTU]) {
+		if (!is_valid_vpls_mtu(nla_get_u32(tb[IFLA_MTU]))) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
+				    "Invalid MTU");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static struct rtnl_link_ops vpls_link_ops;
+
+static int vpls_newlink(struct net *src_net, struct net_device *dev,
+			struct nlattr *tb[], struct nlattr *data[],
+			struct netlink_ext_ack *extack)
+{
+	int err;
+	struct vpls_priv *priv = netdev_priv(dev);
+
+	if (tb[IFLA_ADDRESS] == NULL)
+		eth_hw_addr_random(dev);
+
+	if (tb[IFLA_IFNAME])
+		nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
+	else
+		snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
+
+	err = register_netdevice(dev);
+	if (err < 0)
+		goto err;
+	priv->encap_net = get_net(src_net);
+
+	netif_carrier_off(dev);
+	return 0;
+
+err:
+	return err;
+}
+
+static void vpls_dellink(struct net_device *dev, struct list_head *head)
+{
+	unregister_netdevice_queue(dev, head);
+}
+
+
+static struct rtnl_link_ops vpls_link_ops = {
+	.kind		= DRV_NAME,
+	.priv_size	= sizeof(struct vpls_priv),
+	.setup		= vpls_setup,
+	.validate	= vpls_validate,
+	.newlink	= vpls_newlink,
+	.dellink	= vpls_dellink,
+};
+
+/*
+ * init/fini
+ */
+
+__init int vpls_init(void)
+{
+	int ret;
+
+	ret = rtnl_link_register(&vpls_link_ops);
+	if (ret)
+		goto out;
+
+	return 0;
+
+out:
+	return ret;
+}
+
+__exit void vpls_exit(void)
+{
+	rtnl_link_unregister(&vpls_link_ops);
+}
+
+#if 0
+/* not currently available as a separate module... */
+
+module_init(vpls_init);
+module_exit(vpls_exit);
+
+MODULE_DESCRIPTION("Virtual Private LAN Service");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_RTNL_LINK(DRV_NAME);
+#endif
-- 
2.13.0

  parent reply	other threads:[~2017-08-16 17:02 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-16 17:01 [RFC net-next] VPLS support David Lamparter
2017-08-16 17:01 ` [PATCH 1/6] bridge: learn dst metadata in FDB David Lamparter
2017-08-16 20:38   ` Nikolay Aleksandrov
2017-08-16 20:38     ` [Bridge] " Nikolay Aleksandrov
2017-08-17 11:03     ` David Lamparter
2017-08-17 11:03       ` [Bridge] " David Lamparter
2017-08-17 11:39       ` Nikolay Aleksandrov
2017-08-17 11:39         ` [Bridge] " Nikolay Aleksandrov
2017-08-17 11:51         ` Nikolay Aleksandrov
2017-08-17 11:51           ` [Bridge] " Nikolay Aleksandrov
2017-08-17 12:10           ` David Lamparter
2017-08-17 12:10             ` [Bridge] " David Lamparter
2017-08-17 12:19             ` Nikolay Aleksandrov
2017-08-17 12:19               ` [Bridge] " Nikolay Aleksandrov
2017-08-17 12:20             ` David Lamparter
2017-08-17 12:20               ` [Bridge] " David Lamparter
2017-08-17 12:45         ` David Lamparter
2017-08-17 12:45           ` [Bridge] " David Lamparter
2017-08-17 13:04           ` Nikolay Aleksandrov
2017-08-17 13:04             ` [Bridge] " Nikolay Aleksandrov
2017-08-17 16:16     ` David Lamparter
2017-08-17 16:16       ` [Bridge] " David Lamparter
2017-08-16 17:01 ` [PATCH 2/6] mpls: split forwarding path on rx/tx boundary David Lamparter
2017-08-19 17:10   ` kbuild test robot
2017-08-19 17:42   ` kbuild test robot
2017-08-16 17:01 ` [PATCH 3/6] mpls: add VPLS entry points David Lamparter
2017-08-19 18:27   ` kbuild test robot
2017-08-21 14:01   ` Amine Kherbouche
2017-08-21 15:55     ` David Lamparter
2017-08-21 16:13       ` Amine Kherbouche
2017-08-16 17:02 ` David Lamparter [this message]
2017-08-21 15:14   ` [PATCH 4/6] mpls: VPLS support Amine Kherbouche
2017-08-21 16:18     ` David Lamparter
2017-08-21 16:11   ` Amine Kherbouche
2017-08-16 17:02 ` [PATCH 5/6] bridge: add VPLS pseudowire info in fdb dump David Lamparter
2017-08-16 17:02 ` [PATCH 6/6] mpls: pseudowire control word support David Lamparter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170816170202.456851-5-equinox@diac24.net \
    --to=equinox@diac24.net \
    --cc=amine.kherbouche@6wind.com \
    --cc=netdev@vger.kernel.org \
    --cc=roopa@cumulusnetworks.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.