[PATCH net-next 0/5] vxlan: implement Generic Protocol Extension (GPE)

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH net-next 0/5] vxlan: implement Generic Protocol Extension (GPE)
@ 2016-02-26  7:48 Jiri Benc
  2016-02-26  7:48 ` [PATCH net-next 1/5] vxlan: implement GPE in L2 mode Jiri Benc
                   ` (4 more replies)
  0 siblings, 5 replies; 19+ messages in thread
From: Jiri Benc @ 2016-02-26  7:48 UTC (permalink / raw)
  To: netdev

VXLAN-GPE can operate in two modes: with encapsulated Ethernet header
(L2 mode) or with L3 header (e.g. IP header) directly following VXLAN-GPE
header (L3 mode).

Add support for both modes. The L2 mode is simple, as it's basically the
same as plain VXLAN, only with added bits in the header. The L3 mode is more
complicated. The patches adding it follow the same model as the tun/tap
driver: depending on the chosen mode, the vxlan interface is created either
as ARPHRD_ETHER (L2 mode) or ARPHRD_NONE (L3 mode).

Note that the internal fdb control plane cannot be used together with
VXLAN-GPE and attempt to configure it will be rejected by the driver. This
in theory could be relaxed for L2 mode in the future if such need arises.

Jiri Benc (5):
  vxlan: implement GPE in L2 mode
  vxlan: move L2 mode initialization to a separate function
  vxlan: move fdb code to common location in vxlan_xmit
  vxlan: fix too large pskb_may_pull with remote checksum
  vxlan: implement GPE in L3 mode

 drivers/net/vxlan.c          | 223 ++++++++++++++++++++++++++++++++++++-------
 include/net/vxlan.h          |  63 +++++++++++-
 include/uapi/linux/if_link.h |   9 ++
 3 files changed, 258 insertions(+), 37 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH net-next 1/5] vxlan: implement GPE in L2 mode
  2016-02-26  7:48 [PATCH net-next 0/5] vxlan: implement Generic Protocol Extension (GPE) Jiri Benc
@ 2016-02-26  7:48 ` Jiri Benc
  2016-02-26 23:51   ` Tom Herbert
  2016-02-26  7:48 ` [PATCH net-next 2/5] vxlan: move L2 mode initialization to a separate function Jiri Benc
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 19+ messages in thread
From: Jiri Benc @ 2016-02-26  7:48 UTC (permalink / raw)
  To: netdev

Implement VXLAN-GPE. Only L2 mode (i.e. encapsulated Ethernet frame) is
supported by this patch.

L3 mode will be added by subsequent patches.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
---
 drivers/net/vxlan.c          | 68 ++++++++++++++++++++++++++++++++++++++++++--
 include/net/vxlan.h          | 62 +++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/if_link.h |  8 ++++++
 3 files changed, 135 insertions(+), 3 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 775ddb48388d..c7844bae339d 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1192,6 +1192,33 @@ out:
 	unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
 }
 
+static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
+				struct sk_buff *skb, u32 vxflags)
+{
+	struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
+
+	/* Need to have Next Protocol set for interfaces in GPE mode. */
+	if (!gpe->np_applied)
+		return false;
+	/* "The initial version is 0. If a receiver does not support the
+	 * version indicated it MUST drop the packet.
+	 */
+	if (gpe->version != 0)
+		return false;
+	/* "When the O bit is set to 1, the packet is an OAM packet and OAM
+	 * processing MUST occur." However, we don't implement OAM
+	 * processing, thus drop the packet.
+	 */
+	if (gpe->oam_flag)
+		return false;
+
+	if (gpe->next_protocol != VXLAN_GPE_NP_ETHERNET)
+		return false;
+
+	unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
+	return true;
+}
+
 static bool vxlan_set_mac(struct vxlan_dev *vxlan,
 			  struct vxlan_sock *vs,
 			  struct sk_buff *skb)
@@ -1307,6 +1334,9 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
 	/* For backwards compatibility, only allow reserved fields to be
 	 * used by VXLAN extensions if explicitly requested.
 	 */
+	if (vs->flags & VXLAN_F_GPE)
+		if (!vxlan_parse_gpe_hdr(&unparsed, skb, vs->flags))
+			goto drop;
 	if (vs->flags & VXLAN_F_REMCSUM_RX)
 		if (!vxlan_remcsum(&unparsed, skb, vs->flags))
 			goto drop;
@@ -1685,6 +1715,14 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
 	gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
 }
 
+static void vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags)
+{
+	struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
+
+	gpe->np_applied = 1;
+	gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
+}
+
 static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
 			   int iphdr_len, __be32 vni,
 			   struct vxlan_metadata *md, u32 vxflags,
@@ -1744,6 +1782,8 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
 
 	if (vxflags & VXLAN_F_GBP)
 		vxlan_build_gbp_hdr(vxh, vxflags, md);
+	if (vxflags & VXLAN_F_GPE)
+		vxlan_build_gpe_hdr(vxh, vxflags);
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 	return 0;
@@ -2515,6 +2555,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },
 	[IFLA_VXLAN_REMCSUM_NOPARTIAL]	= { .type = NLA_FLAG },
+	[IFLA_VXLAN_GPE_MODE]	= { .type = NLA_U8, },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2714,6 +2755,10 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	__be16 default_port = vxlan->cfg.dst_port;
 	struct net_device *lowerdev = NULL;
 
+	if (((conf->flags & VXLAN_F_LEARN) && (conf->flags & VXLAN_F_GPE)) ||
+	    ((conf->flags & VXLAN_F_GBP) && (conf->flags & VXLAN_F_GPE)))
+		return -EINVAL;
+
 	vxlan->net = src_net;
 
 	dst->remote_vni = conf->vni;
@@ -2770,8 +2815,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	dev->needed_headroom = needed_headroom;
 
 	memcpy(&vxlan->cfg, conf, sizeof(*conf));
-	if (!vxlan->cfg.dst_port)
-		vxlan->cfg.dst_port = default_port;
+	if (!vxlan->cfg.dst_port) {
+		if (conf->flags & VXLAN_F_GPE)
+			vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */
+		else
+			vxlan->cfg.dst_port = default_port;
+	}
 	vxlan->flags |= conf->flags;
 
 	if (!vxlan->cfg.age_interval)
@@ -2941,6 +2990,13 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 	if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
 		conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
 
+	if (data[IFLA_VXLAN_GPE_MODE]) {
+		u8 mode = nla_get_u8(data[IFLA_VXLAN_GPE_MODE]);
+
+		if (mode > 0 && mode <= VXLAN_GPE_MODE_MAX)
+			conf.flags |= VXLAN_F_GPE;
+	}
+
 	err = vxlan_dev_configure(src_net, dev, &conf);
 	switch (err) {
 	case -ENODEV:
@@ -2954,6 +3010,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 	case -EEXIST:
 		pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
 		break;
+
+	case -EINVAL:
+		pr_info("unsupported combination of extensions\n");
+		break;
 	}
 
 	return err;
@@ -3083,6 +3143,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
 		goto nla_put_failure;
 
+	if (vxlan->flags & VXLAN_F_GPE &&
+	    nla_put_u8(skb, IFLA_VXLAN_GPE_MODE, VXLAN_GPE_MODE_L2))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 6eda4ed4d78b..7c5f1385bdfd 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -119,6 +119,64 @@ struct vxlanhdr_gbp {
 #define VXLAN_GBP_POLICY_APPLIED	(BIT(3) << 16)
 #define VXLAN_GBP_ID_MASK		(0xFFFF)
 
+/*
+ * VXLAN Generic Protocol Extension (VXLAN_F_GPE):
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|Ver|I|P|R|O|       Reserved                |Next Protocol  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Ver = Version. Indicates VXLAN GPE protocol version.
+ *
+ * P = Next Protocol Bit. The P bit is set to indicate that the
+ *     Next Protocol field is present.
+ *
+ * O = OAM Flag Bit. The O bit is set to indicate that the packet
+ *     is an OAM packet.
+ *
+ * Next Protocol = This 8 bit field indicates the protocol header
+ * immediately following the VXLAN GPE header.
+ *
+ * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01
+ */
+
+struct vxlanhdr_gpe {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	u8	oam_flag:1,
+		reserved_flags1:1,
+		np_applied:1,
+		instance_applied:1,
+		version:2,
+reserved_flags2:2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	u8	reserved_flags2:2,
+		version:2,
+		instance_applied:1,
+		np_applied:1,
+		reserved_flags1:1,
+		oam_flag:1;
+#endif
+	u8	reserved_flags3;
+	u8	reserved_flags4;
+	u8	next_protocol;
+	__be32	vx_vni;
+};
+
+/* VXLAN-GPE header flags. */
+#define VXLAN_HF_VER	cpu_to_be32(BIT(29) | BIT(28))
+#define VXLAN_HF_NP	cpu_to_be32(BIT(26))
+#define VXLAN_HF_OAM	cpu_to_be32(BIT(24))
+
+#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
+			     cpu_to_be32(0xff))
+
+/* VXLAN-GPE header Next Protocol. */
+#define VXLAN_GPE_NP_IPV4      0x01
+#define VXLAN_GPE_NP_IPV6      0x02
+#define VXLAN_GPE_NP_ETHERNET  0x03
+#define VXLAN_GPE_NP_NSH       0x04
+
 struct vxlan_metadata {
 	u32		gbp;
 };
@@ -205,6 +263,7 @@ struct vxlan_dev {
 #define VXLAN_F_GBP			0x800
 #define VXLAN_F_REMCSUM_NOPARTIAL	0x1000
 #define VXLAN_F_COLLECT_METADATA	0x2000
+#define VXLAN_F_GPE			0x4000
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
@@ -213,7 +272,8 @@ struct vxlan_dev {
 					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
 					 VXLAN_F_REMCSUM_RX |		\
 					 VXLAN_F_REMCSUM_NOPARTIAL |	\
-					 VXLAN_F_COLLECT_METADATA)
+					 VXLAN_F_COLLECT_METADATA |	\
+					 VXLAN_F_GPE)
 
 struct net_device *vxlan_dev_create(struct net *net, const char *name,
 				    u8 name_assign_type, struct vxlan_config *conf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d452cea59020..c2b2b7462731 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -456,10 +456,18 @@ enum {
 	IFLA_VXLAN_GBP,
 	IFLA_VXLAN_REMCSUM_NOPARTIAL,
 	IFLA_VXLAN_COLLECT_METADATA,
+	IFLA_VXLAN_GPE_MODE,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
 
+enum vxlan_gpe_mode {
+	VXLAN_GPE_MODE_DISABLED = 0,
+	VXLAN_GPE_MODE_L2,
+	__VXLAN_GPE_MODE_MAX
+};
+#define VXLAN_GPE_MODE_MAX (__VXLAN_GPE_MODE_MAX - 1)
+
 struct ifla_vxlan_port_range {
 	__be16	low;
 	__be16	high;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 2/5] vxlan: move L2 mode initialization to a separate function
  2016-02-26  7:48 [PATCH net-next 0/5] vxlan: implement Generic Protocol Extension (GPE) Jiri Benc
  2016-02-26  7:48 ` [PATCH net-next 1/5] vxlan: implement GPE in L2 mode Jiri Benc
@ 2016-02-26  7:48 ` Jiri Benc
  2016-02-26  7:48 ` [PATCH net-next 3/5] vxlan: move fdb code to common location in vxlan_xmit Jiri Benc
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 19+ messages in thread
From: Jiri Benc @ 2016-02-26  7:48 UTC (permalink / raw)
  To: netdev

This will allow to initialize vxlan in L3 mode based on the passed rtnl
attributes.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
---
 drivers/net/vxlan.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index c7844bae339d..49fcca8d8a8e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2437,7 +2437,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 	return 0;
 }
 
-static const struct net_device_ops vxlan_netdev_ops = {
+static const struct net_device_ops vxlan_netdev_l2mode_ops = {
 	.ndo_init		= vxlan_init,
 	.ndo_uninit		= vxlan_uninit,
 	.ndo_open		= vxlan_open,
@@ -2491,10 +2491,6 @@ static void vxlan_setup(struct net_device *dev)
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	unsigned int h;
 
-	eth_hw_addr_random(dev);
-	ether_setup(dev);
-
-	dev->netdev_ops = &vxlan_netdev_ops;
 	dev->destructor = free_netdev;
 	SET_NETDEV_DEVTYPE(dev, &vxlan_type);
 
@@ -2509,8 +2505,7 @@ static void vxlan_setup(struct net_device *dev)
 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
 	netif_keep_dst(dev);
-	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
-	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
+	dev->priv_flags |= IFF_NO_QUEUE;
 
 	INIT_LIST_HEAD(&vxlan->next);
 	spin_lock_init(&vxlan->hash_lock);
@@ -2529,6 +2524,15 @@ static void vxlan_setup(struct net_device *dev)
 		INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
 }
 
+static void vxlan_l2mode_setup(struct net_device *dev)
+{
+	eth_hw_addr_random(dev);
+	ether_setup(dev);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	dev->netdev_ops = &vxlan_netdev_l2mode_ops;
+}
+
 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_ID]		= { .type = NLA_U32 },
 	[IFLA_VXLAN_GROUP]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
@@ -2759,6 +2763,8 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	    ((conf->flags & VXLAN_F_GBP) && (conf->flags & VXLAN_F_GPE)))
 		return -EINVAL;
 
+	vxlan_l2mode_setup(dev);
+
 	vxlan->net = src_net;
 
 	dst->remote_vni = conf->vni;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 3/5] vxlan: move fdb code to common location in vxlan_xmit
  2016-02-26  7:48 [PATCH net-next 0/5] vxlan: implement Generic Protocol Extension (GPE) Jiri Benc
  2016-02-26  7:48 ` [PATCH net-next 1/5] vxlan: implement GPE in L2 mode Jiri Benc
  2016-02-26  7:48 ` [PATCH net-next 2/5] vxlan: move L2 mode initialization to a separate function Jiri Benc
@ 2016-02-26  7:48 ` Jiri Benc
  2016-02-26  7:48 ` [PATCH net-next 4/5] vxlan: fix too large pskb_may_pull with remote checksum Jiri Benc
  2016-02-26  7:48 ` [PATCH net-next 5/5] vxlan: implement GPE in L3 mode Jiri Benc
  4 siblings, 0 replies; 19+ messages in thread
From: Jiri Benc @ 2016-02-26  7:48 UTC (permalink / raw)
  To: netdev

Handle VXLAN_F_COLLECT_METADATA before VXLAN_F_PROXY. The latter does not
make sense with the former, as it needs populated fdb which does not happen
in metadata mode.

After this cleanup, the fdb code in vxlan_xmit is moved to a common location
and can be later skipped for L3 mode vxlan tunnels.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
---
 drivers/net/vxlan.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 49fcca8d8a8e..029e0e5b0b2b 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2139,9 +2139,17 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 	info = skb_tunnel_info(skb);
 
 	skb_reset_mac_header(skb);
-	eth = eth_hdr(skb);
 
-	if ((vxlan->flags & VXLAN_F_PROXY)) {
+	if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
+		if (info && info->mode & IP_TUNNEL_INFO_TX)
+			vxlan_xmit_one(skb, dev, NULL, false);
+		else
+			kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (vxlan->flags & VXLAN_F_PROXY) {
+		eth = eth_hdr(skb);
 		if (ntohs(eth->h_proto) == ETH_P_ARP)
 			return arp_reduce(dev, skb);
 #if IS_ENABLED(CONFIG_IPV6)
@@ -2156,18 +2164,10 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 				    msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
 					return neigh_reduce(dev, skb);
 		}
-		eth = eth_hdr(skb);
 #endif
 	}
 
-	if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
-		if (info && info->mode & IP_TUNNEL_INFO_TX)
-			vxlan_xmit_one(skb, dev, NULL, false);
-		else
-			kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-
+	eth = eth_hdr(skb);
 	f = vxlan_find_mac(vxlan, eth->h_dest);
 	did_rsc = false;
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 4/5] vxlan: fix too large pskb_may_pull with remote checksum
  2016-02-26  7:48 [PATCH net-next 0/5] vxlan: implement Generic Protocol Extension (GPE) Jiri Benc
                   ` (2 preceding siblings ...)
  2016-02-26  7:48 ` [PATCH net-next 3/5] vxlan: move fdb code to common location in vxlan_xmit Jiri Benc
@ 2016-02-26  7:48 ` Jiri Benc
  2016-02-26  7:48 ` [PATCH net-next 5/5] vxlan: implement GPE in L3 mode Jiri Benc
  4 siblings, 0 replies; 19+ messages in thread
From: Jiri Benc @ 2016-02-26  7:48 UTC (permalink / raw)
  To: netdev

The vxlan header is pulled at this point, don't include it again in the
calculation.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
---
 drivers/net/vxlan.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 029e0e5b0b2b..81a7c1a829b9 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1142,7 +1142,7 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
 static bool vxlan_remcsum(struct vxlanhdr *unparsed,
 			  struct sk_buff *skb, u32 vxflags)
 {
-	size_t start, offset, plen;
+	size_t start, offset;
 
 	if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
 		goto out;
@@ -1150,9 +1150,7 @@ static bool vxlan_remcsum(struct vxlanhdr *unparsed,
 	start = vxlan_rco_start(unparsed->vx_vni);
 	offset = start + vxlan_rco_offset(unparsed->vx_vni);
 
-	plen = sizeof(struct vxlanhdr) + offset + sizeof(u16);
-
-	if (!pskb_may_pull(skb, plen))
+	if (!pskb_may_pull(skb, offset + sizeof(u16)))
 		return false;
 
 	skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH net-next 5/5] vxlan: implement GPE in L3 mode
  2016-02-26  7:48 [PATCH net-next 0/5] vxlan: implement Generic Protocol Extension (GPE) Jiri Benc
                   ` (3 preceding siblings ...)
  2016-02-26  7:48 ` [PATCH net-next 4/5] vxlan: fix too large pskb_may_pull with remote checksum Jiri Benc
@ 2016-02-26  7:48 ` Jiri Benc
  2016-02-26 22:22   ` Jesse Gross
  4 siblings, 1 reply; 19+ messages in thread
From: Jiri Benc @ 2016-02-26  7:48 UTC (permalink / raw)
  To: netdev

Implement L3 mode for VXLAN-GPE (i.e. IPv4/IPv6 payload directly after the
VXLAN header).

The GPE header parsing has to be moved before iptunnel_pull_header, as we
need to know the protocol.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
---
 drivers/net/vxlan.c          | 127 +++++++++++++++++++++++++++++++++++--------
 include/net/vxlan.h          |   3 +-
 include/uapi/linux/if_link.h |   1 +
 3 files changed, 108 insertions(+), 23 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 81a7c1a829b9..b79472d3fd36 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1191,6 +1191,7 @@ out:
 }
 
 static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
+				__be32 *protocol,
 				struct sk_buff *skb, u32 vxflags)
 {
 	struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
@@ -1210,9 +1211,30 @@ static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
 	if (gpe->oam_flag)
 		return false;
 
-	if (gpe->next_protocol != VXLAN_GPE_NP_ETHERNET)
+	/* L2 mode */
+	if (gpe->next_protocol == VXLAN_GPE_NP_ETHERNET) {
+		if (vxflags & VXLAN_F_GPE_L3)
+			return false;
+		*protocol = htons(ETH_P_TEB);
+		goto out;
+	}
+
+	/* L3 mode */
+	if (!(vxflags & VXLAN_F_GPE_L3))
+		return false;
+
+	switch (gpe->next_protocol) {
+	case VXLAN_GPE_NP_IPV4:
+		*protocol = htons(ETH_P_IP);
+		break;
+	case VXLAN_GPE_NP_IPV6:
+		*protocol = htons(ETH_P_IPV6);
+		break;
+	default:
 		return false;
+	}
 
+out:
 	unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
 	return true;
 }
@@ -1282,9 +1304,10 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
 	struct vxlanhdr unparsed;
 	struct vxlan_metadata _md;
 	struct vxlan_metadata *md = &_md;
+	__be32 protocol = htons(ETH_P_TEB);
 	void *oiph;
 
-	/* Need Vxlan and inner Ethernet header to be present */
+	/* Need UDP and VXLAN header to be present */
 	if (!pskb_may_pull(skb, VXLAN_HLEN))
 		return 1;
 
@@ -1308,7 +1331,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
 	if (!vxlan)
 		goto drop;
 
-	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
+	/* For backwards compatibility, only allow reserved fields to be
+	 * used by VXLAN extensions if explicitly requested.
+	 */
+	if (vs->flags & VXLAN_F_GPE)
+		if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
+			goto drop;
+
+	if (iptunnel_pull_header(skb, VXLAN_HLEN, protocol,
 				 !net_eq(vxlan->net, dev_net(vxlan->dev))))
 		goto drop;
 
@@ -1329,12 +1359,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
 		memset(md, 0, sizeof(*md));
 	}
 
-	/* For backwards compatibility, only allow reserved fields to be
-	 * used by VXLAN extensions if explicitly requested.
-	 */
-	if (vs->flags & VXLAN_F_GPE)
-		if (!vxlan_parse_gpe_hdr(&unparsed, skb, vs->flags))
-			goto drop;
 	if (vs->flags & VXLAN_F_REMCSUM_RX)
 		if (!vxlan_remcsum(&unparsed, skb, vs->flags))
 			goto drop;
@@ -1353,8 +1377,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	}
 
-	if (!vxlan_set_mac(vxlan, vs, skb))
-		goto drop;
+	if (protocol == htons(ETH_P_TEB)) {
+		if (!vxlan_set_mac(vxlan, vs, skb))
+			goto drop;
+	} else {
+		skb->dev = vxlan->dev;
+		skb->pkt_type = PACKET_HOST;
+	}
 
 	oiph = skb_network_header(skb);
 	skb_reset_network_header(skb);
@@ -1713,12 +1742,29 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
 	gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
 }
 
-static void vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags)
+static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
+			       __be16 protocol)
 {
 	struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
 
 	gpe->np_applied = 1;
-	gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
+
+	/* L2 mode */
+	if (!(vxflags & VXLAN_F_GPE_L3)) {
+		gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
+		return 0;
+	}
+
+	/* L3 mode */
+	switch (protocol) {
+	case htons(ETH_P_IP):
+		gpe->next_protocol = VXLAN_GPE_NP_IPV4;
+		return 0;
+	case htons(ETH_P_IPV6):
+		gpe->next_protocol = VXLAN_GPE_NP_IPV6;
+		return 0;
+	}
+	return -EPFNOSUPPORT;
 }
 
 static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
@@ -1730,6 +1776,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
 	int min_headroom;
 	int err;
 	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+	__be16 inner_protocol = htons(ETH_P_TEB);
 
 	if ((vxflags & VXLAN_F_REMCSUM_TX) &&
 	    skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1748,10 +1795,8 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
 
 	/* Need space for new headers (invalidates iph ptr) */
 	err = skb_cow_head(skb, min_headroom);
-	if (unlikely(err)) {
-		kfree_skb(skb);
-		return err;
-	}
+	if (unlikely(err))
+		goto out_free;
 
 	skb = vlan_hwaccel_push_inside(skb);
 	if (WARN_ON(!skb))
@@ -1780,11 +1825,20 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
 
 	if (vxflags & VXLAN_F_GBP)
 		vxlan_build_gbp_hdr(vxh, vxflags, md);
-	if (vxflags & VXLAN_F_GPE)
-		vxlan_build_gpe_hdr(vxh, vxflags);
+	if (vxflags & VXLAN_F_GPE) {
+		err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
+		if (err < 0)
+			goto out_free;
+		if (vxflags & VXLAN_F_GPE_L3)
+			inner_protocol = skb->protocol;
+	}
 
-	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+	skb_set_inner_protocol(skb, inner_protocol);
 	return 0;
+
+out_free:
+	kfree_skb(skb);
+	return err;
 }
 
 static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
@@ -2452,6 +2506,17 @@ static const struct net_device_ops vxlan_netdev_l2mode_ops = {
 	.ndo_fill_metadata_dst	= vxlan_fill_metadata_dst,
 };
 
+static const struct net_device_ops vxlan_netdev_l3mode_ops = {
+	.ndo_init		= vxlan_init,
+	.ndo_uninit		= vxlan_uninit,
+	.ndo_open		= vxlan_open,
+	.ndo_stop		= vxlan_stop,
+	.ndo_start_xmit		= vxlan_xmit,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_change_mtu		= vxlan_change_mtu,
+	.ndo_fill_metadata_dst	= vxlan_fill_metadata_dst,
+};
+
 /* Info for udev, that this is a virtual tunnel endpoint */
 static struct device_type vxlan_type = {
 	.name = "vxlan",
@@ -2531,6 +2596,17 @@ static void vxlan_l2mode_setup(struct net_device *dev)
 	dev->netdev_ops = &vxlan_netdev_l2mode_ops;
 }
 
+static void vxlan_l3mode_setup(struct net_device *dev)
+{
+	dev->type = ARPHRD_NONE;
+	dev->hard_header_len = 0;
+	dev->addr_len = 0;
+	dev->mtu = ETH_DATA_LEN;
+	dev->tx_queue_len = 1000;
+	dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+	dev->netdev_ops = &vxlan_netdev_l3mode_ops;
+}
+
 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_ID]		= { .type = NLA_U32 },
 	[IFLA_VXLAN_GROUP]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
@@ -2761,7 +2837,10 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	    ((conf->flags & VXLAN_F_GBP) && (conf->flags & VXLAN_F_GPE)))
 		return -EINVAL;
 
-	vxlan_l2mode_setup(dev);
+	if (conf->flags & VXLAN_F_GPE_L3)
+		vxlan_l3mode_setup(dev);
+	else
+		vxlan_l2mode_setup(dev);
 
 	vxlan->net = src_net;
 
@@ -2999,6 +3078,8 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 
 		if (mode > 0 && mode <= VXLAN_GPE_MODE_MAX)
 			conf.flags |= VXLAN_F_GPE;
+		if (mode == VXLAN_GPE_MODE_L3)
+			conf.flags |= VXLAN_F_GPE_L3;
 	}
 
 	err = vxlan_dev_configure(src_net, dev, &conf);
@@ -3148,7 +3229,9 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		goto nla_put_failure;
 
 	if (vxlan->flags & VXLAN_F_GPE &&
-	    nla_put_u8(skb, IFLA_VXLAN_GPE_MODE, VXLAN_GPE_MODE_L2))
+	    nla_put_u8(skb, IFLA_VXLAN_GPE_MODE,
+		       vxlan->flags & VXLAN_F_GPE_L3 ? VXLAN_GPE_MODE_L3 :
+						       VXLAN_GPE_MODE_L2))
 		goto nla_put_failure;
 
 	return 0;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 7c5f1385bdfd..25b3753f6f67 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -264,6 +264,7 @@ struct vxlan_dev {
 #define VXLAN_F_REMCSUM_NOPARTIAL	0x1000
 #define VXLAN_F_COLLECT_METADATA	0x2000
 #define VXLAN_F_GPE			0x4000
+#define VXLAN_F_GPE_L3			0x8000
 
 /* Flags that are used in the receive path. These flags must match in
  * order for a socket to be shareable
@@ -273,7 +274,7 @@ struct vxlan_dev {
 					 VXLAN_F_REMCSUM_RX |		\
 					 VXLAN_F_REMCSUM_NOPARTIAL |	\
 					 VXLAN_F_COLLECT_METADATA |	\
-					 VXLAN_F_GPE)
+					 VXLAN_F_GPE | VXLAN_F_GPE_L3)
 
 struct net_device *vxlan_dev_create(struct net *net, const char *name,
 				    u8 name_assign_type, struct vxlan_config *conf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c2b2b7462731..ee4f7198aa21 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -464,6 +464,7 @@ enum {
 enum vxlan_gpe_mode {
 	VXLAN_GPE_MODE_DISABLED = 0,
 	VXLAN_GPE_MODE_L2,
+	VXLAN_GPE_MODE_L3,
 	__VXLAN_GPE_MODE_MAX
 };
 #define VXLAN_GPE_MODE_MAX (__VXLAN_GPE_MODE_MAX - 1)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 5/5] vxlan: implement GPE in L3 mode
  2016-02-26  7:48 ` [PATCH net-next 5/5] vxlan: implement GPE in L3 mode Jiri Benc
@ 2016-02-26 22:22   ` Jesse Gross
  2016-02-26 23:42     ` Tom Herbert
  2016-02-27 19:21     ` Jiri Benc
  0 siblings, 2 replies; 19+ messages in thread
From: Jesse Gross @ 2016-02-26 22:22 UTC (permalink / raw)
  To: Jiri Benc; +Cc: Linux Kernel Network Developers

On Thu, Feb 25, 2016 at 11:48 PM, Jiri Benc <jbenc@redhat.com> wrote:
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index c2b2b7462731..ee4f7198aa21 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -464,6 +464,7 @@ enum {
>  enum vxlan_gpe_mode {
>         VXLAN_GPE_MODE_DISABLED = 0,
>         VXLAN_GPE_MODE_L2,
> +       VXLAN_GPE_MODE_L3,

Given that VXLAN_GPE_MODE_L3 will eventually come to be used by NSH,
MPLS, etc. in addition to IPv4/v6, most of which are not really L3, it
seems like something along the lines of NO_ARP might be better since
that's what it really indicates. Once that is in, I don't really see
the need to explicitly block Ethernet packets from being handled in
this mode. If they are received, then they can just be handed off to
the stack - at that point it would look like an extra header, the same
as if an NSH packet is received.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 5/5] vxlan: implement GPE in L3 mode
  2016-02-26 22:22   ` Jesse Gross
@ 2016-02-26 23:42     ` Tom Herbert
  2016-02-27 19:26       ` Jiri Benc
  2016-02-27 19:21     ` Jiri Benc
  1 sibling, 1 reply; 19+ messages in thread
From: Tom Herbert @ 2016-02-26 23:42 UTC (permalink / raw)
  To: Jesse Gross; +Cc: Jiri Benc, Linux Kernel Network Developers

On Fri, Feb 26, 2016 at 2:22 PM, Jesse Gross <jesse@kernel.org> wrote:
> On Thu, Feb 25, 2016 at 11:48 PM, Jiri Benc <jbenc@redhat.com> wrote:
>> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
>> index c2b2b7462731..ee4f7198aa21 100644
>> --- a/include/uapi/linux/if_link.h
>> +++ b/include/uapi/linux/if_link.h
>> @@ -464,6 +464,7 @@ enum {
>>  enum vxlan_gpe_mode {
>>         VXLAN_GPE_MODE_DISABLED = 0,
>>         VXLAN_GPE_MODE_L2,
>> +       VXLAN_GPE_MODE_L3,
>
> Given that VXLAN_GPE_MODE_L3 will eventually come to be used by NSH,
> MPLS, etc. in addition to IPv4/v6, most of which are not really L3, it
> seems like something along the lines of NO_ARP might be better since
> that's what it really indicates. Once that is in, I don't really see
> the need to explicitly block Ethernet packets from being handled in
> this mode. If they are received, then they can just be handed off to
> the stack - at that point it would look like an extra header, the same
> as if an NSH packet is received.

Agreed, and I don't see why there even needs to be modes. VXLAN-GPE
can carry arbitrary protocols with a next-header field. For Ethernet,
MPLS, IPv4, and IPv6 it should just be a simple mapping of the next
header to Ethertype for purposes of processing the payload.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 1/5] vxlan: implement GPE in L2 mode
  2016-02-26  7:48 ` [PATCH net-next 1/5] vxlan: implement GPE in L2 mode Jiri Benc
@ 2016-02-26 23:51   ` Tom Herbert
  2016-02-27 19:31     ` Jiri Benc
  0 siblings, 1 reply; 19+ messages in thread
From: Tom Herbert @ 2016-02-26 23:51 UTC (permalink / raw)
  To: Jiri Benc; +Cc: Linux Kernel Network Developers

On Thu, Feb 25, 2016 at 11:48 PM, Jiri Benc <jbenc@redhat.com> wrote:
> Implement VXLAN-GPE. Only L2 mode (i.e. encapsulated Ethernet frame) is
> supported by this patch.
>
> L3 mode will be added by subsequent patches.
>
> Signed-off-by: Jiri Benc <jbenc@redhat.com>
> ---
>  drivers/net/vxlan.c          | 68 ++++++++++++++++++++++++++++++++++++++++++--
>  include/net/vxlan.h          | 62 +++++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/if_link.h |  8 ++++++
>  3 files changed, 135 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index 775ddb48388d..c7844bae339d 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -1192,6 +1192,33 @@ out:
>         unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
>  }
>
> +static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
> +                               struct sk_buff *skb, u32 vxflags)
> +{
> +       struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
> +
> +       /* Need to have Next Protocol set for interfaces in GPE mode. */
> +       if (!gpe->np_applied)
> +               return false;
> +       /* "The initial version is 0. If a receiver does not support the
> +        * version indicated it MUST drop the packet.
> +        */
> +       if (gpe->version != 0)
> +               return false;
> +       /* "When the O bit is set to 1, the packet is an OAM packet and OAM
> +        * processing MUST occur." However, we don't implement OAM
> +        * processing, thus drop the packet.
> +        */
> +       if (gpe->oam_flag)
> +               return false;
> +
> +       if (gpe->next_protocol != VXLAN_GPE_NP_ETHERNET)
> +               return false;
> +
> +       unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
> +       return true;
> +}
> +
>  static bool vxlan_set_mac(struct vxlan_dev *vxlan,
>                           struct vxlan_sock *vs,
>                           struct sk_buff *skb)
> @@ -1307,6 +1334,9 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
>         /* For backwards compatibility, only allow reserved fields to be
>          * used by VXLAN extensions if explicitly requested.
>          */
> +       if (vs->flags & VXLAN_F_GPE)
> +               if (!vxlan_parse_gpe_hdr(&unparsed, skb, vs->flags))
> +                       goto drop;

I don't think this is right. VXLAN-GPE is a separate protocol than
VXLAN, they are not compatible on the wire and don't share flags or
fields (for instance GPB uses bits in VXLAN that hold the next
protocol in VXLAN-GPE). Neither is there a VXLAN_F_GPE flag defined in
VXLAN to differentiate the two. So VXLAN-GPE would be used on a
different port and probably needs its own rcv functions.

Tom

>         if (vs->flags & VXLAN_F_REMCSUM_RX)
>                 if (!vxlan_remcsum(&unparsed, skb, vs->flags))
>                         goto drop;
> @@ -1685,6 +1715,14 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
>         gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
>  }
>
> +static void vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags)
> +{
> +       struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
> +
> +       gpe->np_applied = 1;
> +       gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
> +}
> +
>  static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
>                            int iphdr_len, __be32 vni,
>                            struct vxlan_metadata *md, u32 vxflags,
> @@ -1744,6 +1782,8 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
>
>         if (vxflags & VXLAN_F_GBP)
>                 vxlan_build_gbp_hdr(vxh, vxflags, md);
> +       if (vxflags & VXLAN_F_GPE)
> +               vxlan_build_gpe_hdr(vxh, vxflags);
>
>         skb_set_inner_protocol(skb, htons(ETH_P_TEB));
>         return 0;
> @@ -2515,6 +2555,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
>         [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
>         [IFLA_VXLAN_GBP]        = { .type = NLA_FLAG, },
>         [IFLA_VXLAN_REMCSUM_NOPARTIAL]  = { .type = NLA_FLAG },
> +       [IFLA_VXLAN_GPE_MODE]   = { .type = NLA_U8, },
>  };
>
>  static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
> @@ -2714,6 +2755,10 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
>         __be16 default_port = vxlan->cfg.dst_port;
>         struct net_device *lowerdev = NULL;
>
> +       if (((conf->flags & VXLAN_F_LEARN) && (conf->flags & VXLAN_F_GPE)) ||
> +           ((conf->flags & VXLAN_F_GBP) && (conf->flags & VXLAN_F_GPE)))
> +               return -EINVAL;
> +
>         vxlan->net = src_net;
>
>         dst->remote_vni = conf->vni;
> @@ -2770,8 +2815,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
>         dev->needed_headroom = needed_headroom;
>
>         memcpy(&vxlan->cfg, conf, sizeof(*conf));
> -       if (!vxlan->cfg.dst_port)
> -               vxlan->cfg.dst_port = default_port;
> +       if (!vxlan->cfg.dst_port) {
> +               if (conf->flags & VXLAN_F_GPE)
> +                       vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */
> +               else
> +                       vxlan->cfg.dst_port = default_port;
> +       }
>         vxlan->flags |= conf->flags;
>
>         if (!vxlan->cfg.age_interval)
> @@ -2941,6 +2990,13 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
>         if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
>                 conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
>
> +       if (data[IFLA_VXLAN_GPE_MODE]) {
> +               u8 mode = nla_get_u8(data[IFLA_VXLAN_GPE_MODE]);
> +
> +               if (mode > 0 && mode <= VXLAN_GPE_MODE_MAX)
> +                       conf.flags |= VXLAN_F_GPE;
> +       }
> +
>         err = vxlan_dev_configure(src_net, dev, &conf);
>         switch (err) {
>         case -ENODEV:
> @@ -2954,6 +3010,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
>         case -EEXIST:
>                 pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
>                 break;
> +
> +       case -EINVAL:
> +               pr_info("unsupported combination of extensions\n");
> +               break;
>         }
>
>         return err;
> @@ -3083,6 +3143,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
>             nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
>                 goto nla_put_failure;
>
> +       if (vxlan->flags & VXLAN_F_GPE &&
> +           nla_put_u8(skb, IFLA_VXLAN_GPE_MODE, VXLAN_GPE_MODE_L2))
> +               goto nla_put_failure;
> +
>         return 0;
>
>  nla_put_failure:
> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
> index 6eda4ed4d78b..7c5f1385bdfd 100644
> --- a/include/net/vxlan.h
> +++ b/include/net/vxlan.h
> @@ -119,6 +119,64 @@ struct vxlanhdr_gbp {
>  #define VXLAN_GBP_POLICY_APPLIED       (BIT(3) << 16)
>  #define VXLAN_GBP_ID_MASK              (0xFFFF)
>
> +/*
> + * VXLAN Generic Protocol Extension (VXLAN_F_GPE):
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + * |R|R|Ver|I|P|R|O|       Reserved                |Next Protocol  |
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + * |                VXLAN Network Identifier (VNI) |   Reserved    |
> + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
> + *
> + * Ver = Version. Indicates VXLAN GPE protocol version.
> + *
> + * P = Next Protocol Bit. The P bit is set to indicate that the
> + *     Next Protocol field is present.
> + *
> + * O = OAM Flag Bit. The O bit is set to indicate that the packet
> + *     is an OAM packet.
> + *
> + * Next Protocol = This 8 bit field indicates the protocol header
> + * immediately following the VXLAN GPE header.
> + *
> + * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01
> + */
> +
> +struct vxlanhdr_gpe {
> +#if defined(__LITTLE_ENDIAN_BITFIELD)
> +       u8      oam_flag:1,
> +               reserved_flags1:1,
> +               np_applied:1,
> +               instance_applied:1,
> +               version:2,
> +reserved_flags2:2;
> +#elif defined(__BIG_ENDIAN_BITFIELD)
> +       u8      reserved_flags2:2,
> +               version:2,
> +               instance_applied:1,
> +               np_applied:1,
> +               reserved_flags1:1,
> +               oam_flag:1;
> +#endif
> +       u8      reserved_flags3;
> +       u8      reserved_flags4;
> +       u8      next_protocol;
> +       __be32  vx_vni;
> +};
> +
> +/* VXLAN-GPE header flags. */
> +#define VXLAN_HF_VER   cpu_to_be32(BIT(29) | BIT(28))
> +#define VXLAN_HF_NP    cpu_to_be32(BIT(26))
> +#define VXLAN_HF_OAM   cpu_to_be32(BIT(24))
> +
> +#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
> +                            cpu_to_be32(0xff))
> +
> +/* VXLAN-GPE header Next Protocol. */
> +#define VXLAN_GPE_NP_IPV4      0x01
> +#define VXLAN_GPE_NP_IPV6      0x02
> +#define VXLAN_GPE_NP_ETHERNET  0x03
> +#define VXLAN_GPE_NP_NSH       0x04
> +
>  struct vxlan_metadata {
>         u32             gbp;
>  };
> @@ -205,6 +263,7 @@ struct vxlan_dev {
>  #define VXLAN_F_GBP                    0x800
>  #define VXLAN_F_REMCSUM_NOPARTIAL      0x1000
>  #define VXLAN_F_COLLECT_METADATA       0x2000
> +#define VXLAN_F_GPE                    0x4000
>
>  /* Flags that are used in the receive path. These flags must match in
>   * order for a socket to be shareable
> @@ -213,7 +272,8 @@ struct vxlan_dev {
>                                          VXLAN_F_UDP_ZERO_CSUM6_RX |    \
>                                          VXLAN_F_REMCSUM_RX |           \
>                                          VXLAN_F_REMCSUM_NOPARTIAL |    \
> -                                        VXLAN_F_COLLECT_METADATA)
> +                                        VXLAN_F_COLLECT_METADATA |     \
> +                                        VXLAN_F_GPE)
>
>  struct net_device *vxlan_dev_create(struct net *net, const char *name,
>                                     u8 name_assign_type, struct vxlan_config *conf);
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index d452cea59020..c2b2b7462731 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -456,10 +456,18 @@ enum {
>         IFLA_VXLAN_GBP,
>         IFLA_VXLAN_REMCSUM_NOPARTIAL,
>         IFLA_VXLAN_COLLECT_METADATA,
> +       IFLA_VXLAN_GPE_MODE,
>         __IFLA_VXLAN_MAX
>  };
>  #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
>
> +enum vxlan_gpe_mode {
> +       VXLAN_GPE_MODE_DISABLED = 0,
> +       VXLAN_GPE_MODE_L2,
> +       __VXLAN_GPE_MODE_MAX
> +};
> +#define VXLAN_GPE_MODE_MAX (__VXLAN_GPE_MODE_MAX - 1)
> +
>  struct ifla_vxlan_port_range {
>         __be16  low;
>         __be16  high;
> --
> 1.8.3.1
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 5/5] vxlan: implement GPE in L3 mode
  2016-02-26 22:22   ` Jesse Gross
  2016-02-26 23:42     ` Tom Herbert
@ 2016-02-27 19:21     ` Jiri Benc
  2016-02-27 19:44       ` Jiri Benc
  1 sibling, 1 reply; 19+ messages in thread
From: Jiri Benc @ 2016-02-27 19:21 UTC (permalink / raw)
  To: Jesse Gross; +Cc: Linux Kernel Network Developers

On Fri, 26 Feb 2016 14:22:03 -0800, Jesse Gross wrote:
> Given that VXLAN_GPE_MODE_L3 will eventually come to be used by NSH,
> MPLS, etc. in addition to IPv4/v6, most of which are not really L3, it
> seems like something along the lines of NO_ARP might be better since
> that's what it really indicates.

I have no problem naming this differently. Not sure NO_ARP is the best
name, though - this is more about absence of the L2 header in received
packets than about ARP.

> Once that is in, I don't really see
> the need to explicitly block Ethernet packets from being handled in
> this mode. If they are received, then they can just be handed off to
> the stack - at that point it would look like an extra header, the same
> as if an NSH packet is received.

You mean returning ETH_P_TEB in skb->protocol? That's not much useful,
unfortunately. You won't get such packet processed by the kernel IP
stack, rendering the VXLAN-GPE device unusable outside of ovs. It would
effectively became a packet sink when used standalone, as it cannot be
added to bridge and received packets are not processed by anything -
there's no protocol handler for ETH_P_TEB.

With this patchset, you can create a VXLAN-GPE interface and use it as
any other point to point interface, and it works as expected with
routing etc.

The distinction between Ethernet and no Ethernet is needed, the
interface won't work otherwise.

 Jiri

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 5/5] vxlan: implement GPE in L3 mode
  2016-02-26 23:42     ` Tom Herbert
@ 2016-02-27 19:26       ` Jiri Benc
  0 siblings, 0 replies; 19+ messages in thread
From: Jiri Benc @ 2016-02-27 19:26 UTC (permalink / raw)
  To: Tom Herbert; +Cc: Jesse Gross, Linux Kernel Network Developers

On Fri, 26 Feb 2016 15:42:29 -0800, Tom Herbert wrote:
> Agreed, and I don't see why there even needs to be modes. VXLAN-GPE
> can carry arbitrary protocols with a next-header field. For Ethernet,
> MPLS, IPv4, and IPv6 it should just be a simple mapping of the next
> header to Ethertype for purposes of processing the payload.

That's exactly what this patchset does, Tom. The mapping is done in
vxlan_parse_gpe_hdr and vxlan_build_gpe_hdr.

Ethernet is special, though. It needs to be a standalone mode,
otherwise frames encapsulated including an Ethernet header wouldn't be
processed and there would be no way to send such packets - the only
distinction the driver can use is skb->protocol and that won't become
ETH_P_TEB magically.

 Jiri

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 1/5] vxlan: implement GPE in L2 mode
  2016-02-26 23:51   ` Tom Herbert
@ 2016-02-27 19:31     ` Jiri Benc
  2016-02-27 20:54       ` Tom Herbert
  0 siblings, 1 reply; 19+ messages in thread
From: Jiri Benc @ 2016-02-27 19:31 UTC (permalink / raw)
  To: Tom Herbert; +Cc: Linux Kernel Network Developers

On Fri, 26 Feb 2016 15:51:29 -0800, Tom Herbert wrote:
> I don't think this is right. VXLAN-GPE is a separate protocol than
> VXLAN, they are not compatible on the wire and don't share flags or
> fields (for instance GPB uses bits in VXLAN that hold the next
> protocol in VXLAN-GPE). Neither is there a VXLAN_F_GPE flag defined in
> VXLAN to differentiate the two. So VXLAN-GPE would be used on a
> different port

Yes, and that's exactly what this patchset does. If there's the
VXLAN_F_GPE flag defined while creating the interface, the used UDP
port defaults to the VXLAN-GPE UDP port (but can be overriden) and the
driver expects that all packets received are VXLAN-GPE.

Note also that you can't define both GPE and GBP together, because as
you noted, they're not compatible. The driver correctly refuses such
combination.

> and probably needs its own rcv functions.

I don't see the need for code duplication. This patchset does exactly
what you described and reuses the code, as most of it is really the
same for all VXLAN modes. I also made sure this is as clean as possible
in the driver which was the reason for the previous 4 cleanup patchsets.

 Jiri

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 5/5] vxlan: implement GPE in L3 mode
  2016-02-27 19:21     ` Jiri Benc
@ 2016-02-27 19:44       ` Jiri Benc
  2016-03-08 22:18         ` Jesse Gross
  0 siblings, 1 reply; 19+ messages in thread
From: Jiri Benc @ 2016-02-27 19:44 UTC (permalink / raw)
  To: Jesse Gross; +Cc: Linux Kernel Network Developers

On Sat, 27 Feb 2016 20:21:59 +0100, Jiri Benc wrote:
> You mean returning ETH_P_TEB in skb->protocol? That's not much useful,
> unfortunately. You won't get such packet processed by the kernel IP
> stack, rendering the VXLAN-GPE device unusable outside of ovs. It would
> effectively became a packet sink when used standalone, as it cannot be
> added to bridge and received packets are not processed by anything -
> there's no protocol handler for ETH_P_TEB.

Actually, I can do that in the L3 mode (or whatever we'll call it). It
won't hurt anything and may be useful for openvswitch. Ovs will have to
special case VXLAN-GPE vport (or perhaps any ARPHRD_NONE port) and set
skb->protocol to ETH_P_TEB on xmit and dissect correctly the ETH_P_TEB
packet on rcv.

The L2 mode (or whatever we'll call it) will need to stay, though, for
non-ovs use cases.

 Jiri

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 1/5] vxlan: implement GPE in L2 mode
  2016-02-27 19:31     ` Jiri Benc
@ 2016-02-27 20:54       ` Tom Herbert
  2016-02-27 21:02         ` Tom Herbert
  2016-02-29 10:23         ` Jiri Benc
  0 siblings, 2 replies; 19+ messages in thread
From: Tom Herbert @ 2016-02-27 20:54 UTC (permalink / raw)
  To: Jiri Benc; +Cc: Linux Kernel Network Developers

On Sat, Feb 27, 2016 at 11:31 AM, Jiri Benc <jbenc@redhat.com> wrote:
> On Fri, 26 Feb 2016 15:51:29 -0800, Tom Herbert wrote:
>> I don't think this is right. VXLAN-GPE is a separate protocol than
>> VXLAN, they are not compatible on the wire and don't share flags or
>> fields (for instance GPB uses bits in VXLAN that hold the next
>> protocol in VXLAN-GPE). Neither is there a VXLAN_F_GPE flag defined in
>> VXLAN to differentiate the two. So VXLAN-GPE would be used on a
>> different port
>
> Yes, and that's exactly what this patchset does. If there's the
> VXLAN_F_GPE flag defined while creating the interface, the used UDP
> port defaults to the VXLAN-GPE UDP port (but can be overriden) and the
> driver expects that all packets received are VXLAN-GPE.
>
> Note also that you can't define both GPE and GBP together, because as
> you noted, they're not compatible. The driver correctly refuses such
> combination.
>
Yes, but RCO has not been specified for VXLAN-GPE either so the patch
does not correctly refuse setting those two together. Inevitably
though, those and other extensions will defined for VXLAN-GPE and new
ones for VXLAN. Again, the protocols are fundamentally incompatible,
so instead of trying to enforce each valid combination at
configuration or performing multiple checks for flavor each time we
look at a packet, it seems easier to split the parsing with at most
one check for the protocol variant. For instance in
vxlan_udp_encap_recv just do:

if (vs->flags & VXLAN_F_GPE)
               if (!vxlan_parse_gpe_hdr(&unparsed, skb, vs->flags))
                       goto drop;
else
               if (!vxlan_parse_gpe(&unparsed, skb, vs->flags))
                       goto drop;

And then move REMCSUM and GPB and other protocol specific checks to
the right function.

Tom

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 1/5] vxlan: implement GPE in L2 mode
  2016-02-27 20:54       ` Tom Herbert
@ 2016-02-27 21:02         ` Tom Herbert
  2016-02-29 10:23         ` Jiri Benc
  1 sibling, 0 replies; 19+ messages in thread
From: Tom Herbert @ 2016-02-27 21:02 UTC (permalink / raw)
  To: Jiri Benc; +Cc: Linux Kernel Network Developers

On Sat, Feb 27, 2016 at 12:54 PM, Tom Herbert <tom@herbertland.com> wrote:
> On Sat, Feb 27, 2016 at 11:31 AM, Jiri Benc <jbenc@redhat.com> wrote:
>> On Fri, 26 Feb 2016 15:51:29 -0800, Tom Herbert wrote:
>>> I don't think this is right. VXLAN-GPE is a separate protocol than
>>> VXLAN, they are not compatible on the wire and don't share flags or
>>> fields (for instance GPB uses bits in VXLAN that hold the next
>>> protocol in VXLAN-GPE). Neither is there a VXLAN_F_GPE flag defined in
>>> VXLAN to differentiate the two. So VXLAN-GPE would be used on a
>>> different port
>>
>> Yes, and that's exactly what this patchset does. If there's the
>> VXLAN_F_GPE flag defined while creating the interface, the used UDP
>> port defaults to the VXLAN-GPE UDP port (but can be overriden) and the
>> driver expects that all packets received are VXLAN-GPE.
>>
>> Note also that you can't define both GPE and GBP together, because as
>> you noted, they're not compatible. The driver correctly refuses such
>> combination.
>>
> Yes, but RCO has not been specified for VXLAN-GPE either so the patch
> does not correctly refuse setting those two together. Inevitably
> though, those and other extensions will defined for VXLAN-GPE and new
> ones for VXLAN. Again, the protocols are fundamentally incompatible,
> so instead of trying to enforce each valid combination at
> configuration or performing multiple checks for flavor each time we
> look at a packet, it seems easier to split the parsing with at most
> one check for the protocol variant. For instance in
> vxlan_udp_encap_recv just do:
>
> if (vs->flags & VXLAN_F_GPE)
>                if (!vxlan_parse_gpe_hdr(&unparsed, skb, vs->flags))
>                        goto drop;
> else
>                if (!vxlan_parse_gpe(&unparsed, skb, vs->flags))
>                        goto drop;
>

I meant

if (vs->flags & VXLAN_F_GPE)
               if (!vxlan_parse_gpe_hdr(&unparsed, skb, vs->flags))
                       goto drop;
else
               if (!vxlan_parse_hdr(&unparsed, skb, vs->flags))
                       goto drop;

>
> And then move REMCSUM and GPB and other protocol specific checks to
> the right function.
>
> Tom

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 1/5] vxlan: implement GPE in L2 mode
  2016-02-27 20:54       ` Tom Herbert
  2016-02-27 21:02         ` Tom Herbert
@ 2016-02-29 10:23         ` Jiri Benc
  2016-02-29 17:13           ` Tom Herbert
  1 sibling, 1 reply; 19+ messages in thread
From: Jiri Benc @ 2016-02-29 10:23 UTC (permalink / raw)
  To: Tom Herbert; +Cc: Linux Kernel Network Developers

On Sat, 27 Feb 2016 12:54:52 -0800, Tom Herbert wrote:
> Yes, but RCO has not been specified for VXLAN-GPE either

As far as I can see, RCO will just work with VXLAN-GPE. But I have no
problem disallowing them to be set together, if you prefer that.

> so the patch
> does not correctly refuse setting those two together. Inevitably
> though, those and other extensions will defined for VXLAN-GPE and new
> ones for VXLAN. Again, the protocols are fundamentally incompatible,
> so instead of trying to enforce each valid combination at
> configuration

We need to do the checking in either case. If we accepted unsupported
combinations and then just silently ignored them, we'd be in troubles
later when such combination becomes defined/supported. There would be
no way for the userspace tools to detect whether a particular kernel
supports the combination or not.

So, we need to check for supported combination of options during
configuration anyway.

And when we have that, I don't really see the reason for doing that
kind of code duplication that you suggest.

> or performing multiple checks for flavor each time we
> look at a packet, it seems easier to split the parsing with at most
> one check for the protocol variant. For instance in
> vxlan_udp_encap_recv just do:
> 
> if (vs->flags & VXLAN_F_GPE)
>                if (!vxlan_parse_gpe_hdr(&unparsed, skb, vs->flags))
>                        goto drop;
> else
>                if (!vxlan_parse_gpe(&unparsed, skb, vs->flags))
>                        goto drop;

Most of the code of these two functions will be identical. To
consolidate that as much as possible, you'll end up with what I have or
something very similar.

> And then move REMCSUM and GPB and other protocol specific checks to
> the right function.

And when RCO is defined for GPE, we copy the code? Doesn't make sense,
sorry.

If you look at the code in the current net-next (and the code after
this patchset), the extension handling has been made generic and each
extension gets its own handler function, leading to clean separation in
the code. There's no reason to split the vxlan_rcv into two functions
doing the same things but with slightly different calls to extensions.

 Jiri

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 1/5] vxlan: implement GPE in L2 mode
  2016-02-29 10:23         ` Jiri Benc
@ 2016-02-29 17:13           ` Tom Herbert
  2016-03-01 18:16             ` Jiri Benc
  0 siblings, 1 reply; 19+ messages in thread
From: Tom Herbert @ 2016-02-29 17:13 UTC (permalink / raw)
  To: Jiri Benc; +Cc: Linux Kernel Network Developers

On Mon, Feb 29, 2016 at 2:23 AM, Jiri Benc <jbenc@redhat.com> wrote:
> On Sat, 27 Feb 2016 12:54:52 -0800, Tom Herbert wrote:
>> Yes, but RCO has not been specified for VXLAN-GPE either
>
> As far as I can see, RCO will just work with VXLAN-GPE. But I have no
> problem disallowing them to be set together, if you prefer that.
>
>> so the patch
>> does not correctly refuse setting those two together. Inevitably
>> though, those and other extensions will defined for VXLAN-GPE and new
>> ones for VXLAN. Again, the protocols are fundamentally incompatible,
>> so instead of trying to enforce each valid combination at
>> configuration
>
> We need to do the checking in either case. If we accepted unsupported
> combinations and then just silently ignored them, we'd be in troubles
> later when such combination becomes defined/supported. There would be
> no way for the userspace tools to detect whether a particular kernel
> supports the combination or not.
>
> So, we need to check for supported combination of options during
> configuration anyway.
>
> And when we have that, I don't really see the reason for doing that
> kind of code duplication that you suggest.
>
>> or performing multiple checks for flavor each time we
>> look at a packet, it seems easier to split the parsing with at most
>> one check for the protocol variant. For instance in
>> vxlan_udp_encap_recv just do:
>>
>> if (vs->flags & VXLAN_F_GPE)
>>                if (!vxlan_parse_gpe_hdr(&unparsed, skb, vs->flags))
>>                        goto drop;
>> else
>>                if (!vxlan_parse_gpe(&unparsed, skb, vs->flags))
>>                        goto drop;
>
> Most of the code of these two functions will be identical. To
> consolidate that as much as possible, you'll end up with what I have or
> something very similar.
>
>> And then move REMCSUM and GPB and other protocol specific checks to
>> the right function.
>
> And when RCO is defined for GPE, we copy the code? Doesn't make sense,
> sorry.
>
> If you look at the code in the current net-next (and the code after
> this patchset), the extension handling has been made generic and each
> extension gets its own handler function, leading to clean separation in
> the code. There's no reason to split the vxlan_rcv into two functions
> doing the same things but with slightly different calls to extensions.
>
They may or may not be "slightly different"; if they are the same
(like RCO for VXLAN-GPE uses the low order bits in VNI) then a common
backend function can be called.

As defined now, GPB can't be used with VXLAN-GPE at all, but when I
read your patch it looks very much like GPB is being checked and
allowed in the VXLAN-GPE path. The fact that "if (vs->flags &
VXLAN_F_GBP)" always fails for VXLAN-GPE packets because of
configuration constraints is not at all obvious, and really this just
results in an unnecessary conditional that gives the same answer for
every single VXLAN-GPE packet which we've already checked for just a
few lines above. At least the check for GPB could be moved to an else
block of " if (vs->flags & VXLAN_F_GPE)", this alone improves clarity
and eliminates an unnecessary conditional in the VXLAN-GPE path.

>  Jiri

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 1/5] vxlan: implement GPE in L2 mode
  2016-02-29 17:13           ` Tom Herbert
@ 2016-03-01 18:16             ` Jiri Benc
  0 siblings, 0 replies; 19+ messages in thread
From: Jiri Benc @ 2016-03-01 18:16 UTC (permalink / raw)
  To: Tom Herbert; +Cc: Linux Kernel Network Developers

On Mon, 29 Feb 2016 09:13:50 -0800, Tom Herbert wrote:
> As defined now, GPB can't be used with VXLAN-GPE at all, but when I
> read your patch it looks very much like GPB is being checked and
> allowed in the VXLAN-GPE path. The fact that "if (vs->flags &
> VXLAN_F_GBP)" always fails for VXLAN-GPE packets because of
> configuration constraints is not at all obvious, and really this just
> results in an unnecessary conditional that gives the same answer for
> every single VXLAN-GPE packet which we've already checked for just a
> few lines above. At least the check for GPB could be moved to an else
> block of " if (vs->flags & VXLAN_F_GPE)", this alone improves clarity
> and eliminates an unnecessary conditional in the VXLAN-GPE path.

The problem here is ordering. GPE needs to be called before
iptunnel_pull_header, while GBP needs to be called after udp_tun_rx_dst
(and hence after iptunnel_pull_header).

I agree that it's a check that's done for every packet and would be
nice to get rid of. On the other hand, the amount of processing in the
rx path of vxlan is so huge that it hardly matters. Yes, we should work
on overall vxlan performance and it's something I'm actually looking
into.

As for not being obvious that the GBP processing can't happen, I see
your point. I'll add a comment that explains this to the code in v2.

Thanks,

 Jiri

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH net-next 5/5] vxlan: implement GPE in L3 mode
  2016-02-27 19:44       ` Jiri Benc
@ 2016-03-08 22:18         ` Jesse Gross
  0 siblings, 0 replies; 19+ messages in thread
From: Jesse Gross @ 2016-03-08 22:18 UTC (permalink / raw)
  To: Jiri Benc; +Cc: Linux Kernel Network Developers

On Sat, Feb 27, 2016 at 11:44 AM, Jiri Benc <jbenc@redhat.com> wrote:
> On Sat, 27 Feb 2016 20:21:59 +0100, Jiri Benc wrote:
>> You mean returning ETH_P_TEB in skb->protocol? That's not much useful,
>> unfortunately. You won't get such packet processed by the kernel IP
>> stack, rendering the VXLAN-GPE device unusable outside of ovs. It would
>> effectively became a packet sink when used standalone, as it cannot be
>> added to bridge and received packets are not processed by anything -
>> there's no protocol handler for ETH_P_TEB.
>
> Actually, I can do that in the L3 mode (or whatever we'll call it). It
> won't hurt anything and may be useful for openvswitch. Ovs will have to
> special case VXLAN-GPE vport (or perhaps any ARPHRD_NONE port) and set
> skb->protocol to ETH_P_TEB on xmit and dissect correctly the ETH_P_TEB
> packet on rcv.
>
> The L2 mode (or whatever we'll call it) will need to stay, though, for
> non-ovs use cases.

Sorry for the delay in responding but this plan sounds good to me.

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2016-03-08 22:18 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-26  7:48 [PATCH net-next 0/5] vxlan: implement Generic Protocol Extension (GPE) Jiri Benc
2016-02-26  7:48 ` [PATCH net-next 1/5] vxlan: implement GPE in L2 mode Jiri Benc
2016-02-26 23:51   ` Tom Herbert
2016-02-27 19:31     ` Jiri Benc
2016-02-27 20:54       ` Tom Herbert
2016-02-27 21:02         ` Tom Herbert
2016-02-29 10:23         ` Jiri Benc
2016-02-29 17:13           ` Tom Herbert
2016-03-01 18:16             ` Jiri Benc
2016-02-26  7:48 ` [PATCH net-next 2/5] vxlan: move L2 mode initialization to a separate function Jiri Benc
2016-02-26  7:48 ` [PATCH net-next 3/5] vxlan: move fdb code to common location in vxlan_xmit Jiri Benc
2016-02-26  7:48 ` [PATCH net-next 4/5] vxlan: fix too large pskb_may_pull with remote checksum Jiri Benc
2016-02-26  7:48 ` [PATCH net-next 5/5] vxlan: implement GPE in L3 mode Jiri Benc
2016-02-26 22:22   ` Jesse Gross
2016-02-26 23:42     ` Tom Herbert
2016-02-27 19:26       ` Jiri Benc
2016-02-27 19:21     ` Jiri Benc
2016-02-27 19:44       ` Jiri Benc
2016-03-08 22:18         ` Jesse Gross

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.