All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v5] GTP: add support for flow based tunneling API
@ 2021-01-10  7:00 Pravin B Shelar
  2021-01-13 15:25 ` Pravin Shelar
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Pravin B Shelar @ 2021-01-10  7:00 UTC (permalink / raw)
  To: netdev, pablo, laforge, jonas, kuba; +Cc: Pravin B Shelar

Following patch add support for flow based tunneling API
to send and recv GTP tunnel packet over tunnel metadata API.
This would allow this device integration with OVS or eBPF using
flow based tunneling APIs.

Signed-off-by: Pravin B Shelar <pbshelar@fb.com>
---
v4-v5:
- coding style changes
v3-v4:
- add check for non-zero dst port
v2-v3:
-  Fixed coding style
-  changed IFLA_GTP_FD1 to optional param for LWT dev.
v1-v2:
-  Fixed according to comments from Jonas Bonn

 drivers/net/gtp.c                  | 527 +++++++++++++++++++++--------
 include/uapi/linux/gtp.h           |  12 +
 include/uapi/linux/if_link.h       |   1 +
 include/uapi/linux/if_tunnel.h     |   1 +
 tools/include/uapi/linux/if_link.h |   1 +
 5 files changed, 398 insertions(+), 144 deletions(-)

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 4c04e271f184..851364314ecc 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -21,6 +21,7 @@
 #include <linux/file.h>
 #include <linux/gtp.h>
 
+#include <net/dst_metadata.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/ip.h>
@@ -73,6 +74,9 @@ struct gtp_dev {
 	unsigned int		hash_size;
 	struct hlist_head	*tid_hash;
 	struct hlist_head	*addr_hash;
+	/* Used by LWT tunnel. */
+	bool			collect_md;
+	struct socket		*collect_md_sock;
 };
 
 static unsigned int gtp_net_id __read_mostly;
@@ -179,33 +183,121 @@ static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx,
 	return false;
 }
 
-static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
-			unsigned int hdrlen, unsigned int role)
+static int gtp_set_tun_dst(struct gtp_dev *gtp, struct sk_buff *skb,
+			   unsigned int hdrlen, u8 gtp_version,
+			   __be64 tid, u8 flags)
 {
-	if (!gtp_check_ms(skb, pctx, hdrlen, role)) {
-		netdev_dbg(pctx->dev, "No PDP ctx for this MS\n");
-		return 1;
+	struct metadata_dst *tun_dst;
+	int opts_len = 0;
+
+	if (unlikely(flags & GTP1_F_MASK))
+		opts_len = sizeof(struct gtpu_metadata);
+
+	tun_dst = udp_tun_rx_dst(skb, gtp->sk1u->sk_family, TUNNEL_KEY, tid, opts_len);
+	if (!tun_dst) {
+		netdev_dbg(gtp->dev, "Failed to allocate tun_dst");
+		goto err;
 	}
 
+	netdev_dbg(gtp->dev, "attaching metadata_dst to skb, gtp ver %d hdrlen %d\n",
+		   gtp_version, hdrlen);
+	if (unlikely(opts_len)) {
+		struct gtpu_metadata *opts;
+		struct gtp1_header *gtp1;
+
+		opts = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+		gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
+		opts->ver = GTP_METADATA_V1;
+		opts->flags = gtp1->flags;
+		opts->type = gtp1->type;
+		netdev_dbg(gtp->dev, "recved control pkt: flag %x type: %d\n",
+			   opts->flags, opts->type);
+		tun_dst->u.tun_info.key.tun_flags |= TUNNEL_GTPU_OPT;
+		tun_dst->u.tun_info.options_len = opts_len;
+		skb->protocol = htons(0xffff);         /* Unknown */
+	}
 	/* Get rid of the GTP + UDP headers. */
 	if (iptunnel_pull_header(skb, hdrlen, skb->protocol,
-				 !net_eq(sock_net(pctx->sk), dev_net(pctx->dev))))
-		return -1;
+				 !net_eq(sock_net(gtp->sk1u), dev_net(gtp->dev)))) {
+		gtp->dev->stats.rx_length_errors++;
+		goto err;
+	}
+
+	skb_dst_set(skb, &tun_dst->dst);
+	return 0;
+err:
+	return -1;
+}
+
+static int gtp_rx(struct gtp_dev *gtp, struct sk_buff *skb,
+		  unsigned int hdrlen, u8 gtp_version, unsigned int role,
+		  __be64 tid, u8 flags, u8 type)
+{
+	if (ip_tunnel_collect_metadata() || gtp->collect_md) {
+		int err;
+
+		err = gtp_set_tun_dst(gtp, skb, hdrlen, gtp_version, tid, flags);
+		if (err)
+			goto err;
+	} else {
+		struct pdp_ctx *pctx;
+
+		if (flags & GTP1_F_MASK)
+			hdrlen += 4;
 
-	netdev_dbg(pctx->dev, "forwarding packet from GGSN to uplink\n");
+		if (type != GTP_TPDU)
+			return 1;
+
+		if (gtp_version == GTP_V0)
+			pctx = gtp0_pdp_find(gtp, be64_to_cpu(tid));
+		else
+			pctx = gtp1_pdp_find(gtp, be64_to_cpu(tid));
+		if (!pctx) {
+			netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
+			return 1;
+		}
+
+		if (!gtp_check_ms(skb, pctx, hdrlen, role)) {
+			netdev_dbg(pctx->dev, "No PDP ctx for this MS\n");
+			return 1;
+		}
+		/* Get rid of the GTP + UDP headers. */
+		if (iptunnel_pull_header(skb, hdrlen, skb->protocol,
+					 !net_eq(sock_net(pctx->sk), dev_net(gtp->dev)))) {
+			gtp->dev->stats.rx_length_errors++;
+			goto err;
+		}
+	}
+	netdev_dbg(gtp->dev, "forwarding packet from GGSN to uplink\n");
 
 	/* Now that the UDP and the GTP header have been removed, set up the
 	 * new network header. This is required by the upper layer to
 	 * calculate the transport header.
 	 */
 	skb_reset_network_header(skb);
+	if (pskb_may_pull(skb, sizeof(struct iphdr))) {
+		struct iphdr *iph;
+
+		iph = ip_hdr(skb);
+		if (iph->version == 4) {
+			netdev_dbg(gtp->dev, "inner pkt: ipv4");
+			skb->protocol = htons(ETH_P_IP);
+		} else if (iph->version == 6) {
+			netdev_dbg(gtp->dev, "inner pkt: ipv6");
+			skb->protocol = htons(ETH_P_IPV6);
+		} else {
+			netdev_dbg(gtp->dev, "inner pkt error: Unknown type");
+		}
+	}
 
-	skb->dev = pctx->dev;
-
-	dev_sw_netstats_rx_add(pctx->dev, skb->len);
-
+	skb->dev = gtp->dev;
+	dev_sw_netstats_rx_add(gtp->dev, skb->len);
 	netif_rx(skb);
 	return 0;
+
+err:
+	gtp->dev->stats.rx_dropped++;
+	return -1;
 }
 
 /* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */
@@ -214,7 +306,6 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
 	unsigned int hdrlen = sizeof(struct udphdr) +
 			      sizeof(struct gtp0_header);
 	struct gtp0_header *gtp0;
-	struct pdp_ctx *pctx;
 
 	if (!pskb_may_pull(skb, hdrlen))
 		return -1;
@@ -224,16 +315,7 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
 	if ((gtp0->flags >> 5) != GTP_V0)
 		return 1;
 
-	if (gtp0->type != GTP_TPDU)
-		return 1;
-
-	pctx = gtp0_pdp_find(gtp, be64_to_cpu(gtp0->tid));
-	if (!pctx) {
-		netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
-		return 1;
-	}
-
-	return gtp_rx(pctx, skb, hdrlen, gtp->role);
+	return gtp_rx(gtp, skb, hdrlen, GTP_V0, gtp->role, gtp0->tid, gtp0->flags, gtp0->type);
 }
 
 static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
@@ -241,41 +323,30 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
 	unsigned int hdrlen = sizeof(struct udphdr) +
 			      sizeof(struct gtp1_header);
 	struct gtp1_header *gtp1;
-	struct pdp_ctx *pctx;
 
 	if (!pskb_may_pull(skb, hdrlen))
 		return -1;
 
 	gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
 
+	netdev_dbg(gtp->dev, "GTPv1 recv: flags %x\n", gtp1->flags);
 	if ((gtp1->flags >> 5) != GTP_V1)
 		return 1;
 
-	if (gtp1->type != GTP_TPDU)
-		return 1;
-
 	/* From 29.060: "This field shall be present if and only if any one or
 	 * more of the S, PN and E flags are set.".
 	 *
 	 * If any of the bit is set, then the remaining ones also have to be
 	 * set.
 	 */
-	if (gtp1->flags & GTP1_F_MASK)
-		hdrlen += 4;
-
 	/* Make sure the header is larger enough, including extensions. */
 	if (!pskb_may_pull(skb, hdrlen))
 		return -1;
 
 	gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
 
-	pctx = gtp1_pdp_find(gtp, ntohl(gtp1->tid));
-	if (!pctx) {
-		netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
-		return 1;
-	}
-
-	return gtp_rx(pctx, skb, hdrlen, gtp->role);
+	return gtp_rx(gtp, skb, hdrlen, GTP_V1, gtp->role,
+		      key32_to_tunnel_id(gtp1->tid), gtp1->flags, gtp1->type);
 }
 
 static void __gtp_encap_destroy(struct sock *sk)
@@ -315,6 +386,11 @@ static void gtp_encap_disable(struct gtp_dev *gtp)
 {
 	gtp_encap_disable_sock(gtp->sk0);
 	gtp_encap_disable_sock(gtp->sk1u);
+	if (gtp->collect_md_sock) {
+		udp_tunnel_sock_release(gtp->collect_md_sock);
+		gtp->collect_md_sock = NULL;
+		netdev_dbg(gtp->dev, "GTP socket released.\n");
+	}
 }
 
 /* UDP encapsulation receive handler. See net/ipv4/udp.c.
@@ -329,7 +405,8 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	if (!gtp)
 		return 1;
 
-	netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);
+	netdev_dbg(gtp->dev, "encap_recv sk=%p type %d\n",
+		   sk, udp_sk(sk)->encap_type);
 
 	switch (udp_sk(sk)->encap_type) {
 	case UDP_ENCAP_GTP0:
@@ -383,12 +460,13 @@ static void gtp_dev_uninit(struct net_device *dev)
 
 static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4,
 					   const struct sock *sk,
-					   __be32 daddr)
+					   __be32 daddr,
+					   __be32 saddr)
 {
 	memset(fl4, 0, sizeof(*fl4));
 	fl4->flowi4_oif		= sk->sk_bound_dev_if;
 	fl4->daddr		= daddr;
-	fl4->saddr		= inet_sk(sk)->inet_saddr;
+	fl4->saddr		= saddr;
 	fl4->flowi4_tos		= RT_CONN_FLAGS(sk);
 	fl4->flowi4_proto	= sk->sk_protocol;
 
@@ -412,7 +490,7 @@ static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
 	gtp0->tid	= cpu_to_be64(pctx->u.v0.tid);
 }
 
-static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
+static inline void gtp1_push_header(struct sk_buff *skb, __be32 tid)
 {
 	int payload_len = skb->len;
 	struct gtp1_header *gtp1;
@@ -428,46 +506,63 @@ static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
 	gtp1->flags	= 0x30; /* v1, GTP-non-prime. */
 	gtp1->type	= GTP_TPDU;
 	gtp1->length	= htons(payload_len);
-	gtp1->tid	= htonl(pctx->u.v1.o_tei);
+	gtp1->tid	= tid;
 
 	/* TODO: Suppport for extension header, sequence number and N-PDU.
 	 *	 Update the length field if any of them is available.
 	 */
 }
 
-struct gtp_pktinfo {
-	struct sock		*sk;
-	struct iphdr		*iph;
-	struct flowi4		fl4;
-	struct rtable		*rt;
-	struct pdp_ctx		*pctx;
-	struct net_device	*dev;
-	__be16			gtph_port;
-};
-
-static void gtp_push_header(struct sk_buff *skb, struct gtp_pktinfo *pktinfo)
+static inline int gtp1_push_control_header(struct sk_buff *skb,
+					   __be32 tid,
+					   struct gtpu_metadata *opts,
+					   struct net_device *dev)
 {
-	switch (pktinfo->pctx->gtp_version) {
-	case GTP_V0:
-		pktinfo->gtph_port = htons(GTP0_PORT);
-		gtp0_push_header(skb, pktinfo->pctx);
-		break;
-	case GTP_V1:
-		pktinfo->gtph_port = htons(GTP1U_PORT);
-		gtp1_push_header(skb, pktinfo->pctx);
-		break;
+	struct gtp1_header *gtp1c;
+	int payload_len;
+
+	if (opts->ver != GTP_METADATA_V1)
+		return -ENOENT;
+
+	if (opts->type == 0xFE) {
+		/* for end marker ignore skb data. */
+		netdev_dbg(dev, "xmit pkt with null data");
+		pskb_trim(skb, 0);
 	}
+	if (skb_cow_head(skb, sizeof(*gtp1c)) < 0)
+		return -ENOMEM;
+
+	payload_len = skb->len;
+
+	gtp1c = skb_push(skb, sizeof(*gtp1c));
+
+	gtp1c->flags	= opts->flags;
+	gtp1c->type	= opts->type;
+	gtp1c->length	= htons(payload_len);
+	gtp1c->tid	= tid;
+	netdev_dbg(dev, "xmit control pkt: ver %d flags %x type %x pkt len %d tid %x",
+		   opts->ver, opts->flags, opts->type, skb->len, tid);
+	return 0;
 }
 
+struct gtp_pktinfo {
+	struct sock             *sk;
+	__u8                    tos;
+	struct flowi4           fl4;
+	struct rtable           *rt;
+	struct net_device       *dev;
+	__be16                  gtph_port;
+};
+
 static inline void gtp_set_pktinfo_ipv4(struct gtp_pktinfo *pktinfo,
-					struct sock *sk, struct iphdr *iph,
-					struct pdp_ctx *pctx, struct rtable *rt,
+					struct sock *sk,
+					__u8 tos,
+					struct rtable *rt,
 					struct flowi4 *fl4,
 					struct net_device *dev)
 {
 	pktinfo->sk	= sk;
-	pktinfo->iph	= iph;
-	pktinfo->pctx	= pctx;
+	pktinfo->tos    = tos;
 	pktinfo->rt	= rt;
 	pktinfo->fl4	= *fl4;
 	pktinfo->dev	= dev;
@@ -477,40 +572,99 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
 			     struct gtp_pktinfo *pktinfo)
 {
 	struct gtp_dev *gtp = netdev_priv(dev);
+	struct gtpu_metadata *opts = NULL;
+	struct sock *sk = NULL;
 	struct pdp_ctx *pctx;
 	struct rtable *rt;
 	struct flowi4 fl4;
-	struct iphdr *iph;
-	__be16 df;
+	u8 gtp_version;
+	__be16 df = 0;
+	__be32 tun_id;
+	__be32 daddr;
+	__be32 saddr;
+	__u8 tos;
 	int mtu;
 
-	/* Read the IP destination address and resolve the PDP context.
-	 * Prepend PDP header with TEI/TID from PDP ctx.
-	 */
-	iph = ip_hdr(skb);
-	if (gtp->role == GTP_ROLE_SGSN)
-		pctx = ipv4_pdp_find(gtp, iph->saddr);
-	else
-		pctx = ipv4_pdp_find(gtp, iph->daddr);
+	if (gtp->collect_md) {
+		/* LWT GTP1U encap */
+		struct ip_tunnel_info *info = NULL;
 
-	if (!pctx) {
-		netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n",
-			   &iph->daddr);
-		return -ENOENT;
+		info = skb_tunnel_info(skb);
+		if (!info) {
+			netdev_dbg(dev, "missing tunnel info");
+			return -ENOENT;
+		}
+		if (info->key.tp_dst && ntohs(info->key.tp_dst) != GTP1U_PORT) {
+			netdev_dbg(dev, "unexpected GTP dst port: %d", ntohs(info->key.tp_dst));
+			return -EOPNOTSUPP;
+		}
+		pctx = NULL;
+		gtp_version = GTP_V1;
+		tun_id = tunnel_id_to_key32(info->key.tun_id);
+		daddr = info->key.u.ipv4.dst;
+		saddr = info->key.u.ipv4.src;
+		sk = gtp->sk1u;
+		if (!sk) {
+			netdev_dbg(dev, "missing tunnel sock");
+			return -EOPNOTSUPP;
+		}
+		tos = info->key.tos;
+		if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+			df = htons(IP_DF);
+
+		if (info->options_len != 0) {
+			if (info->key.tun_flags & TUNNEL_GTPU_OPT) {
+				opts = ip_tunnel_info_opts(info);
+			} else {
+				netdev_dbg(dev, "missing tunnel metadata for control pkt");
+				return -EOPNOTSUPP;
+			}
+		}
+		netdev_dbg(dev, "flow-based GTP1U encap: tunnel id %d\n",
+			   be32_to_cpu(tun_id));
+	} else {
+		struct iphdr *iph;
+
+		if (ntohs(skb->protocol) != ETH_P_IP)
+			return -EOPNOTSUPP;
+
+		iph = ip_hdr(skb);
+
+		/* Read the IP destination address and resolve the PDP context.
+		 * Prepend PDP header with TEI/TID from PDP ctx.
+		 */
+		if (gtp->role == GTP_ROLE_SGSN)
+			pctx = ipv4_pdp_find(gtp, iph->saddr);
+		else
+			pctx = ipv4_pdp_find(gtp, iph->daddr);
+
+		if (!pctx) {
+			netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n",
+				   &iph->daddr);
+			return -ENOENT;
+		}
+		sk = pctx->sk;
+		netdev_dbg(dev, "found PDP context %p\n", pctx);
+
+		gtp_version = pctx->gtp_version;
+		tun_id  = htonl(pctx->u.v1.o_tei);
+		daddr = pctx->peer_addr_ip4.s_addr;
+		saddr = inet_sk(sk)->inet_saddr;
+		tos = iph->tos;
+		df = iph->frag_off;
+		netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n",
+			   &iph->saddr, &iph->daddr);
 	}
-	netdev_dbg(dev, "found PDP context %p\n", pctx);
 
-	rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->peer_addr_ip4.s_addr);
+	rt = ip4_route_output_gtp(&fl4, sk, daddr, saddr);
 	if (IS_ERR(rt)) {
-		netdev_dbg(dev, "no route to SSGN %pI4\n",
-			   &pctx->peer_addr_ip4.s_addr);
+		netdev_dbg(dev, "no route to SSGN %pI4\n", &daddr);
 		dev->stats.tx_carrier_errors++;
 		goto err;
 	}
 
 	if (rt->dst.dev == dev) {
-		netdev_dbg(dev, "circular route to SSGN %pI4\n",
-			   &pctx->peer_addr_ip4.s_addr);
+		netdev_dbg(dev, "circular route to SSGN %pI4\n", &daddr);
 		dev->stats.collisions++;
 		goto err_rt;
 	}
@@ -518,11 +672,10 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
 	skb_dst_drop(skb);
 
 	/* This is similar to tnl_update_pmtu(). */
-	df = iph->frag_off;
 	if (df) {
 		mtu = dst_mtu(&rt->dst) - dev->hard_header_len -
 			sizeof(struct iphdr) - sizeof(struct udphdr);
-		switch (pctx->gtp_version) {
+		switch (gtp_version) {
 		case GTP_V0:
 			mtu -= sizeof(struct gtp0_header);
 			break;
@@ -536,17 +689,38 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
 
 	rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false);
 
-	if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
-	    mtu < ntohs(iph->tot_len)) {
-		netdev_dbg(dev, "packet too big, fragmentation needed\n");
+	if (!skb_is_gso(skb) && (df & htons(IP_DF)) && mtu < skb->len) {
+		netdev_dbg(dev, "packet too big, fragmentation needed");
 		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 		icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 			      htonl(mtu));
 		goto err_rt;
 	}
 
-	gtp_set_pktinfo_ipv4(pktinfo, pctx->sk, iph, pctx, rt, &fl4, dev);
-	gtp_push_header(skb, pktinfo);
+	gtp_set_pktinfo_ipv4(pktinfo, sk, tos, rt, &fl4, dev);
+
+	if (unlikely(opts)) {
+		int err;
+
+		pktinfo->gtph_port = htons(GTP1U_PORT);
+		err = gtp1_push_control_header(skb, tun_id, opts, dev);
+		if (err) {
+			netdev_info(dev, "cntr pkt error %d", err);
+			goto err_rt;
+		}
+		return 0;
+	}
+
+	switch (gtp_version) {
+	case GTP_V0:
+		pktinfo->gtph_port = htons(GTP0_PORT);
+		gtp0_push_header(skb, pctx);
+		break;
+	case GTP_V1:
+		pktinfo->gtph_port = htons(GTP1U_PORT);
+		gtp1_push_header(skb, tun_id);
+		break;
+	}
 
 	return 0;
 err_rt:
@@ -557,7 +731,6 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
 
 static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	unsigned int proto = ntohs(skb->protocol);
 	struct gtp_pktinfo pktinfo;
 	int err;
 
@@ -569,32 +742,22 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */
 	rcu_read_lock();
-	switch (proto) {
-	case ETH_P_IP:
-		err = gtp_build_skb_ip4(skb, dev, &pktinfo);
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
+	err = gtp_build_skb_ip4(skb, dev, &pktinfo);
 	rcu_read_unlock();
 
 	if (err < 0)
 		goto tx_err;
 
-	switch (proto) {
-	case ETH_P_IP:
-		netdev_dbg(pktinfo.dev, "gtp -> IP src: %pI4 dst: %pI4\n",
-			   &pktinfo.iph->saddr, &pktinfo.iph->daddr);
-		udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb,
-				    pktinfo.fl4.saddr, pktinfo.fl4.daddr,
-				    pktinfo.iph->tos,
-				    ip4_dst_hoplimit(&pktinfo.rt->dst),
-				    0,
-				    pktinfo.gtph_port, pktinfo.gtph_port,
-				    true, false);
-		break;
-	}
+	udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb,
+			    pktinfo.fl4.saddr,
+			    pktinfo.fl4.daddr,
+			    pktinfo.tos,
+			    ip4_dst_hoplimit(&pktinfo.rt->dst),
+			    0,
+			    pktinfo.gtph_port,
+			    pktinfo.gtph_port,
+			    true,
+			    false);
 
 	return NETDEV_TX_OK;
 tx_err:
@@ -610,6 +773,19 @@ static const struct net_device_ops gtp_netdev_ops = {
 	.ndo_get_stats64	= dev_get_tstats64,
 };
 
+static struct gtp_dev *gtp_find_flow_based_dev(struct net *net)
+{
+	struct gtp_net *gn = net_generic(net, gtp_net_id);
+	struct gtp_dev *gtp;
+
+	list_for_each_entry(gtp, &gn->gtp_dev_list, list) {
+		if (gtp->collect_md)
+			return gtp;
+	}
+
+	return NULL;
+}
+
 static void gtp_link_setup(struct net_device *dev)
 {
 	dev->netdev_ops		= &gtp_netdev_ops;
@@ -634,7 +810,7 @@ static void gtp_link_setup(struct net_device *dev)
 }
 
 static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
-static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
+static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct nlattr *data[]);
 
 static void gtp_destructor(struct net_device *dev)
 {
@@ -652,11 +828,24 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
 	struct gtp_net *gn;
 	int hashsize, err;
 
-	if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1])
+	if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1] &&
+	    !data[IFLA_GTP_COLLECT_METADATA])
 		return -EINVAL;
 
 	gtp = netdev_priv(dev);
 
+	if (data[IFLA_GTP_COLLECT_METADATA]) {
+		if (data[IFLA_GTP_FD0]) {
+			netdev_dbg(dev, "LWT device does not support setting v0 socket");
+			return -EINVAL;
+		}
+		if (gtp_find_flow_based_dev(src_net)) {
+			netdev_dbg(dev, "LWT device already exist");
+			return -EBUSY;
+		}
+		gtp->collect_md = true;
+	}
+
 	if (!data[IFLA_GTP_PDP_HASHSIZE]) {
 		hashsize = 1024;
 	} else {
@@ -669,7 +858,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		return err;
 
-	err = gtp_encap_enable(gtp, data);
+	err = gtp_encap_enable(gtp, dev, data);
 	if (err < 0)
 		goto out_hashtable;
 
@@ -683,7 +872,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
 	list_add_rcu(&gtp->list, &gn->gtp_dev_list);
 	dev->priv_destructor = gtp_destructor;
 
-	netdev_dbg(dev, "registered new GTP interface\n");
+	netdev_dbg(dev, "registered new GTP interface %s\n", dev->name);
 
 	return 0;
 
@@ -714,6 +903,7 @@ static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = {
 	[IFLA_GTP_FD1]			= { .type = NLA_U32 },
 	[IFLA_GTP_PDP_HASHSIZE]		= { .type = NLA_U32 },
 	[IFLA_GTP_ROLE]			= { .type = NLA_U32 },
+	[IFLA_GTP_COLLECT_METADATA]	= { .type = NLA_FLAG },
 };
 
 static int gtp_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -737,6 +927,9 @@ static int gtp_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put_u32(skb, IFLA_GTP_PDP_HASHSIZE, gtp->hash_size))
 		goto nla_put_failure;
 
+	if (gtp->collect_md && nla_put_flag(skb, IFLA_GTP_COLLECT_METADATA))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
@@ -782,35 +975,24 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize)
 	return -ENOMEM;
 }
 
-static struct sock *gtp_encap_enable_socket(int fd, int type,
-					    struct gtp_dev *gtp)
+static int __gtp_encap_enable_socket(struct socket *sock, int type,
+				     struct gtp_dev *gtp)
 {
 	struct udp_tunnel_sock_cfg tuncfg = {NULL};
-	struct socket *sock;
 	struct sock *sk;
-	int err;
-
-	pr_debug("enable gtp on %d, %d\n", fd, type);
-
-	sock = sockfd_lookup(fd, &err);
-	if (!sock) {
-		pr_debug("gtp socket fd=%d not found\n", fd);
-		return NULL;
-	}
 
 	sk = sock->sk;
 	if (sk->sk_protocol != IPPROTO_UDP ||
 	    sk->sk_type != SOCK_DGRAM ||
 	    (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) {
-		pr_debug("socket fd=%d not UDP\n", fd);
-		sk = ERR_PTR(-EINVAL);
-		goto out_sock;
+		pr_debug("socket not UDP\n");
+		return -EINVAL;
 	}
 
 	lock_sock(sk);
 	if (sk->sk_user_data) {
-		sk = ERR_PTR(-EBUSY);
-		goto out_rel_sock;
+		release_sock(sock->sk);
+		return -EBUSY;
 	}
 
 	sock_hold(sk);
@@ -821,15 +1003,58 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
 	tuncfg.encap_destroy = gtp_encap_destroy;
 
 	setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
-
-out_rel_sock:
 	release_sock(sock->sk);
-out_sock:
+	return 0;
+}
+
+static struct sock *gtp_encap_enable_socket(int fd, int type,
+					    struct gtp_dev *gtp)
+{
+	struct socket *sock;
+	int err;
+
+	pr_debug("enable gtp on %d, %d\n", fd, type);
+
+	sock = sockfd_lookup(fd, &err);
+	if (!sock) {
+		pr_debug("gtp socket fd=%d not found\n", fd);
+		return NULL;
+	}
+	err =  __gtp_encap_enable_socket(sock, type, gtp);
 	sockfd_put(sock);
-	return sk;
+	if (err)
+		return ERR_PTR(err);
+
+	return sock->sk;
 }
 
-static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
+static struct socket *gtp_create_gtp_socket(struct gtp_dev *gtp, struct net_device *dev)
+{
+	struct udp_port_cfg udp_conf;
+	struct socket *sock;
+	int err;
+
+	memset(&udp_conf, 0, sizeof(udp_conf));
+	udp_conf.family = AF_INET;
+	udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+	udp_conf.local_udp_port = htons(GTP1U_PORT);
+
+	err = udp_sock_create(dev_net(dev), &udp_conf, &sock);
+	if (err < 0) {
+		pr_debug("create gtp sock failed: %d\n", err);
+		return ERR_PTR(err);
+	}
+	err = __gtp_encap_enable_socket(sock, UDP_ENCAP_GTP1U, gtp);
+	if (err) {
+		pr_debug("enable gtp sock encap failed: %d\n", err);
+		udp_tunnel_sock_release(sock);
+		return ERR_PTR(err);
+	}
+	pr_debug("create gtp sock done\n");
+	return sock;
+}
+
+static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct nlattr *data[])
 {
 	struct sock *sk1u = NULL;
 	struct sock *sk0 = NULL;
@@ -853,11 +1078,25 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
 		}
 	}
 
+	if (data[IFLA_GTP_COLLECT_METADATA]) {
+		struct socket *sock;
+
+		if (!sk1u) {
+			sock = gtp_create_gtp_socket(gtp, dev);
+			if (IS_ERR(sock))
+				return PTR_ERR(sock);
+
+			gtp->collect_md_sock = sock;
+			sk1u = sock->sk;
+		} else {
+			gtp->collect_md_sock = NULL;
+		}
+	}
+
 	if (data[IFLA_GTP_ROLE]) {
 		role = nla_get_u32(data[IFLA_GTP_ROLE]);
 		if (role > GTP_ROLE_SGSN) {
-			gtp_encap_disable_sock(sk0);
-			gtp_encap_disable_sock(sk1u);
+			gtp_encap_disable(gtp);
 			return -EINVAL;
 		}
 	}
@@ -1416,7 +1655,7 @@ static int __init gtp_init(void)
 	if (err < 0)
 		goto unreg_genl_family;
 
-	pr_info("GTP module loaded (pdp ctx size %zd bytes)\n",
+	pr_info("GTP module loaded (pdp ctx size %zd bytes) with tnl-md support\n",
 		sizeof(struct pdp_ctx));
 	return 0;
 
diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index 79f9191bbb24..62aff78b7c56 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -2,6 +2,8 @@
 #ifndef _UAPI_LINUX_GTP_H_
 #define _UAPI_LINUX_GTP_H_
 
+#include <linux/types.h>
+
 #define GTP_GENL_MCGRP_NAME	"gtp"
 
 enum gtp_genl_cmds {
@@ -34,4 +36,14 @@ enum gtp_attrs {
 };
 #define GTPA_MAX (__GTPA_MAX + 1)
 
+enum {
+	GTP_METADATA_V1
+};
+
+struct gtpu_metadata {
+	__u8    ver;
+	__u8    flags;
+	__u8    type;
+};
+
 #endif /* _UAPI_LINUX_GTP_H_ */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 82708c6db432..2bd0d8bbcdb2 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -809,6 +809,7 @@ enum {
 	IFLA_GTP_FD1,
 	IFLA_GTP_PDP_HASHSIZE,
 	IFLA_GTP_ROLE,
+	IFLA_GTP_COLLECT_METADATA,
 	__IFLA_GTP_MAX,
 };
 #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 7d9105533c7b..802da679fab1 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -176,6 +176,7 @@ enum {
 #define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
 #define TUNNEL_NOCACHE		__cpu_to_be16(0x2000)
 #define TUNNEL_ERSPAN_OPT	__cpu_to_be16(0x4000)
+#define TUNNEL_GTPU_OPT		__cpu_to_be16(0x8000)
 
 #define TUNNEL_OPTIONS_PRESENT \
 		(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index d208b2af697f..28d649bda686 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -617,6 +617,7 @@ enum {
 	IFLA_GTP_FD1,
 	IFLA_GTP_PDP_HASHSIZE,
 	IFLA_GTP_ROLE,
+	IFLA_GTP_COLLECT_METADATA,
 	__IFLA_GTP_MAX,
 };
 #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-10  7:00 [PATCH net-next v5] GTP: add support for flow based tunneling API Pravin B Shelar
@ 2021-01-13 15:25 ` Pravin Shelar
  2021-01-17  0:46 ` Jakub Kicinski
  2021-01-17 13:40 ` Jonas Bonn
  2 siblings, 0 replies; 13+ messages in thread
From: Pravin Shelar @ 2021-01-13 15:25 UTC (permalink / raw)
  To: Pravin B Shelar, Harald Welte
  Cc: Linux Kernel Network Developers, Pablo Neira Ayuso, Jonas Bonn,
	Jakub Kicinski

Hi Harald,

On Sat, Jan 9, 2021 at 11:02 PM Pravin B Shelar <pbshelar@fb.com> wrote:
>
> Following patch add support for flow based tunneling API
> to send and recv GTP tunnel packet over tunnel metadata API.
> This would allow this device integration with OVS or eBPF using
> flow based tunneling APIs.
>
> Signed-off-by: Pravin B Shelar <pbshelar@fb.com>
> ---
> v4-v5:
> - coding style changes
> v3-v4:
> - add check for non-zero dst port
> v2-v3:
> -  Fixed coding style
> -  changed IFLA_GTP_FD1 to optional param for LWT dev.
> v1-v2:
> -  Fixed according to comments from Jonas Bonn
>
This is the latest revision.

I have started with OVS integration, there are unit tests that
validate the GTP support. This is datapath related test, that has the
setup commands:
https://github.com/pshelar/ovs/blob/6ec6a2a86adc56c7c9dcab7b3a7b70bb6dad35c9/tests/system-layer3-tunnels.at#L158

Once OVS patches are upstream I can post patches for ip-route command.

Patch for iproute to add support for LWT GTP devices.
https://github.com/pshelar/iproute2/commit/d6e99f8342672e6e9ce0b71e153296f8e2b41cfc

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-10  7:00 [PATCH net-next v5] GTP: add support for flow based tunneling API Pravin B Shelar
  2021-01-13 15:25 ` Pravin Shelar
@ 2021-01-17  0:46 ` Jakub Kicinski
  2021-01-17 13:23   ` Jonas Bonn
  2021-01-17 13:40 ` Jonas Bonn
  2 siblings, 1 reply; 13+ messages in thread
From: Jakub Kicinski @ 2021-01-17  0:46 UTC (permalink / raw)
  To: Pravin B Shelar; +Cc: netdev, pablo, laforge, jonas

On Sat,  9 Jan 2021 23:00:21 -0800 Pravin B Shelar wrote:
> Following patch add support for flow based tunneling API
> to send and recv GTP tunnel packet over tunnel metadata API.
> This would allow this device integration with OVS or eBPF using
> flow based tunneling APIs.
> 
> Signed-off-by: Pravin B Shelar <pbshelar@fb.com>

Applied, thanks!

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-17  0:46 ` Jakub Kicinski
@ 2021-01-17 13:23   ` Jonas Bonn
  2021-01-17 15:25     ` Harald Welte
                       ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Jonas Bonn @ 2021-01-17 13:23 UTC (permalink / raw)
  To: Jakub Kicinski, Pravin B Shelar; +Cc: netdev, pablo, laforge

Hi Jakub,

On 17/01/2021 01:46, Jakub Kicinski wrote:
> On Sat,  9 Jan 2021 23:00:21 -0800 Pravin B Shelar wrote:
>> Following patch add support for flow based tunneling API
>> to send and recv GTP tunnel packet over tunnel metadata API.
>> This would allow this device integration with OVS or eBPF using
>> flow based tunneling APIs.
>>
>> Signed-off-by: Pravin B Shelar <pbshelar@fb.com>
> 
> Applied, thanks!
> 

This patch hasn't received any ACK's from either the maintainers or 
anyone else providing review.  The following issues remain unaddressed 
after review:

i)  the patch contains several logically separate changes that would be 
better served as smaller patches
ii) functionality like the handling of end markers has been introduced 
without further explanation
iii) symmetry between the handling of GTPv0 and GTPv1 has been 
unnecessarily broken
iv) there are no available userspace tools to allow for testing this 
functionality

I have requested that this patch be reworked into a series of smaller 
changes.  That would allow:

i) reasonable review
ii) the possibility to explain _why_ things are being done in the patch 
comment where this isn't obvious (like the handling of end markers)
iii) the chance to do a reasonable rebase of other ongoing work onto 
this patch (series):  this one patch is invasive and difficult to rebase 
onto

I'm not sure what the hurry is to get this patch into mainline.  Large 
and complicated patches like this take time to review; please revert 
this and allow that process to happen.

Thanks,
Jonas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-10  7:00 [PATCH net-next v5] GTP: add support for flow based tunneling API Pravin B Shelar
  2021-01-13 15:25 ` Pravin Shelar
  2021-01-17  0:46 ` Jakub Kicinski
@ 2021-01-17 13:40 ` Jonas Bonn
  2021-01-17 20:42   ` Pravin Shelar
  2 siblings, 1 reply; 13+ messages in thread
From: Jonas Bonn @ 2021-01-17 13:40 UTC (permalink / raw)
  To: Pravin B Shelar, netdev, pablo, laforge, kuba

Hi Pravin,

Again, this patch is too big and contains too many disparate changes to 
allow for easy review.  Please break this up into a series of logical 
changes for easier review.

On 10/01/2021 08:00, Pravin B Shelar wrote:
> Following patch add support for flow based tunneling API
> to send and recv GTP tunnel packet over tunnel metadata API.
> This would allow this device integration with OVS or eBPF using
> flow based tunneling APIs.
> 
> Signed-off-by: Pravin B Shelar <pbshelar@fb.com>
> ---
> v4-v5:
> - coding style changes
> v3-v4:
> - add check for non-zero dst port
> v2-v3:
> -  Fixed coding style
> -  changed IFLA_GTP_FD1 to optional param for LWT dev.
> v1-v2:
> -  Fixed according to comments from Jonas Bonn
> 
>   drivers/net/gtp.c                  | 527 +++++++++++++++++++++--------
>   include/uapi/linux/gtp.h           |  12 +
>   include/uapi/linux/if_link.h       |   1 +
>   include/uapi/linux/if_tunnel.h     |   1 +
>   tools/include/uapi/linux/if_link.h |   1 +
>   5 files changed, 398 insertions(+), 144 deletions(-)
> 
> diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
> index 4c04e271f184..851364314ecc 100644
> --- a/drivers/net/gtp.c
> +++ b/drivers/net/gtp.c
> @@ -21,6 +21,7 @@
>   #include <linux/file.h>
>   #include <linux/gtp.h>
>   
> +#include <net/dst_metadata.h>
>   #include <net/net_namespace.h>
>   #include <net/protocol.h>
>   #include <net/ip.h>
> @@ -73,6 +74,9 @@ struct gtp_dev {
>   	unsigned int		hash_size;
>   	struct hlist_head	*tid_hash;
>   	struct hlist_head	*addr_hash;
> +	/* Used by LWT tunnel. */
> +	bool			collect_md;
> +	struct socket		*collect_md_sock;
>   };

Can't you use the 'sock' member of sk1u as the reference this socket?

>   
>   static unsigned int gtp_net_id __read_mostly;
> @@ -179,33 +183,121 @@ static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx,
>   	return false;
>   }
>   
> -static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
> -			unsigned int hdrlen, unsigned int role)
> +static int gtp_set_tun_dst(struct gtp_dev *gtp, struct sk_buff *skb,
> +			   unsigned int hdrlen, u8 gtp_version,
> +			   __be64 tid, u8 flags)
>   {
> -	if (!gtp_check_ms(skb, pctx, hdrlen, role)) {
> -		netdev_dbg(pctx->dev, "No PDP ctx for this MS\n");
> -		return 1;
> +	struct metadata_dst *tun_dst;
> +	int opts_len = 0;
> +
> +	if (unlikely(flags & GTP1_F_MASK))
> +		opts_len = sizeof(struct gtpu_metadata);
> +
> +	tun_dst = udp_tun_rx_dst(skb, gtp->sk1u->sk_family, TUNNEL_KEY, tid, opts_len);
> +	if (!tun_dst) {
> +		netdev_dbg(gtp->dev, "Failed to allocate tun_dst");
> +		goto err;
>   	}
>   
> +	netdev_dbg(gtp->dev, "attaching metadata_dst to skb, gtp ver %d hdrlen %d\n",
> +		   gtp_version, hdrlen);
> +	if (unlikely(opts_len)) {
> +		struct gtpu_metadata *opts;
> +		struct gtp1_header *gtp1;
> +
> +		opts = ip_tunnel_info_opts(&tun_dst->u.tun_info);
> +		gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
> +		opts->ver = GTP_METADATA_V1;
> +		opts->flags = gtp1->flags;
> +		opts->type = gtp1->type;
> +		netdev_dbg(gtp->dev, "recved control pkt: flag %x type: %d\n",
> +			   opts->flags, opts->type);
> +		tun_dst->u.tun_info.key.tun_flags |= TUNNEL_GTPU_OPT;
> +		tun_dst->u.tun_info.options_len = opts_len;
> +		skb->protocol = htons(0xffff);         /* Unknown */
> +	}
>   	/* Get rid of the GTP + UDP headers. */
>   	if (iptunnel_pull_header(skb, hdrlen, skb->protocol,
> -				 !net_eq(sock_net(pctx->sk), dev_net(pctx->dev))))
> -		return -1;
> +				 !net_eq(sock_net(gtp->sk1u), dev_net(gtp->dev)))) {
> +		gtp->dev->stats.rx_length_errors++;
> +		goto err;
> +	}
> +
> +	skb_dst_set(skb, &tun_dst->dst);
> +	return 0;
> +err:
> +	return -1;
> +}
> +
> +static int gtp_rx(struct gtp_dev *gtp, struct sk_buff *skb,
> +		  unsigned int hdrlen, u8 gtp_version, unsigned int role,
> +		  __be64 tid, u8 flags, u8 type)
> +{
> +	if (ip_tunnel_collect_metadata() || gtp->collect_md) {
> +		int err;
> +
> +		err = gtp_set_tun_dst(gtp, skb, hdrlen, gtp_version, tid, flags);
> +		if (err)
> +			goto err;
> +	} else {
> +		struct pdp_ctx *pctx;
> +
> +		if (flags & GTP1_F_MASK)
> +			hdrlen += 4;
>   
> -	netdev_dbg(pctx->dev, "forwarding packet from GGSN to uplink\n");
> +		if (type != GTP_TPDU)
> +			return 1;
> +
> +		if (gtp_version == GTP_V0)
> +			pctx = gtp0_pdp_find(gtp, be64_to_cpu(tid));
> +		else
> +			pctx = gtp1_pdp_find(gtp, be64_to_cpu(tid));
> +		if (!pctx) {
> +			netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
> +			return 1;
> +		}
> +
> +		if (!gtp_check_ms(skb, pctx, hdrlen, role)) {
> +			netdev_dbg(pctx->dev, "No PDP ctx for this MS\n");
> +			return 1;
> +		}
> +		/* Get rid of the GTP + UDP headers. */
> +		if (iptunnel_pull_header(skb, hdrlen, skb->protocol,
> +					 !net_eq(sock_net(pctx->sk), dev_net(gtp->dev)))) {
> +			gtp->dev->stats.rx_length_errors++;
> +			goto err;
> +		}
> +	}
> +	netdev_dbg(gtp->dev, "forwarding packet from GGSN to uplink\n");
>   
>   	/* Now that the UDP and the GTP header have been removed, set up the
>   	 * new network header. This is required by the upper layer to
>   	 * calculate the transport header.
>   	 */
>   	skb_reset_network_header(skb);
> +	if (pskb_may_pull(skb, sizeof(struct iphdr))) {
> +		struct iphdr *iph;
> +
> +		iph = ip_hdr(skb);
> +		if (iph->version == 4) {
> +			netdev_dbg(gtp->dev, "inner pkt: ipv4");
> +			skb->protocol = htons(ETH_P_IP);
> +		} else if (iph->version == 6) {
> +			netdev_dbg(gtp->dev, "inner pkt: ipv6");
> +			skb->protocol = htons(ETH_P_IPV6);
> +		} else {
> +			netdev_dbg(gtp->dev, "inner pkt error: Unknown type");
> +		}
> +	}
>   
> -	skb->dev = pctx->dev;
> -
> -	dev_sw_netstats_rx_add(pctx->dev, skb->len);
> -
> +	skb->dev = gtp->dev;
> +	dev_sw_netstats_rx_add(gtp->dev, skb->len);
>   	netif_rx(skb);
>   	return 0;
> +
> +err:
> +	gtp->dev->stats.rx_dropped++;
> +	return -1;
>   }
>   
>   /* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */
> @@ -214,7 +306,6 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
>   	unsigned int hdrlen = sizeof(struct udphdr) +
>   			      sizeof(struct gtp0_header);
>   	struct gtp0_header *gtp0;
> -	struct pdp_ctx *pctx;
>   
>   	if (!pskb_may_pull(skb, hdrlen))
>   		return -1;
> @@ -224,16 +315,7 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
>   	if ((gtp0->flags >> 5) != GTP_V0)
>   		return 1;
>   
> -	if (gtp0->type != GTP_TPDU)
> -		return 1;
> -
> -	pctx = gtp0_pdp_find(gtp, be64_to_cpu(gtp0->tid));
> -	if (!pctx) {
> -		netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
> -		return 1;
> -	}
> -
> -	return gtp_rx(pctx, skb, hdrlen, gtp->role);
> +	return gtp_rx(gtp, skb, hdrlen, GTP_V0, gtp->role, gtp0->tid, gtp0->flags, gtp0->type);
>   }
>   
>   static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
> @@ -241,41 +323,30 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
>   	unsigned int hdrlen = sizeof(struct udphdr) +
>   			      sizeof(struct gtp1_header);
>   	struct gtp1_header *gtp1;
> -	struct pdp_ctx *pctx;
>   
>   	if (!pskb_may_pull(skb, hdrlen))
>   		return -1;
>   
>   	gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
>   
> +	netdev_dbg(gtp->dev, "GTPv1 recv: flags %x\n", gtp1->flags);
>   	if ((gtp1->flags >> 5) != GTP_V1)
>   		return 1;
>   
> -	if (gtp1->type != GTP_TPDU)
> -		return 1;
> -
>   	/* From 29.060: "This field shall be present if and only if any one or
>   	 * more of the S, PN and E flags are set.".
>   	 *
>   	 * If any of the bit is set, then the remaining ones also have to be
>   	 * set.
>   	 */
> -	if (gtp1->flags & GTP1_F_MASK)
> -		hdrlen += 4;
> -
>   	/* Make sure the header is larger enough, including extensions. */
>   	if (!pskb_may_pull(skb, hdrlen))
>   		return -1;
>   
>   	gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
>   
> -	pctx = gtp1_pdp_find(gtp, ntohl(gtp1->tid));
> -	if (!pctx) {
> -		netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
> -		return 1;
> -	}
> -
> -	return gtp_rx(pctx, skb, hdrlen, gtp->role);
> +	return gtp_rx(gtp, skb, hdrlen, GTP_V1, gtp->role,
> +		      key32_to_tunnel_id(gtp1->tid), gtp1->flags, gtp1->type);
>   }
>   
>   static void __gtp_encap_destroy(struct sock *sk)
> @@ -315,6 +386,11 @@ static void gtp_encap_disable(struct gtp_dev *gtp)
>   {
>   	gtp_encap_disable_sock(gtp->sk0);
>   	gtp_encap_disable_sock(gtp->sk1u);
> +	if (gtp->collect_md_sock) {
> +		udp_tunnel_sock_release(gtp->collect_md_sock);
> +		gtp->collect_md_sock = NULL;
> +		netdev_dbg(gtp->dev, "GTP socket released.\n");
> +	}
>   }
>   
>   /* UDP encapsulation receive handler. See net/ipv4/udp.c.
> @@ -329,7 +405,8 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
>   	if (!gtp)
>   		return 1;
>   
> -	netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);
> +	netdev_dbg(gtp->dev, "encap_recv sk=%p type %d\n",
> +		   sk, udp_sk(sk)->encap_type);
>   
>   	switch (udp_sk(sk)->encap_type) {
>   	case UDP_ENCAP_GTP0:
> @@ -383,12 +460,13 @@ static void gtp_dev_uninit(struct net_device *dev)
>   
>   static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4,
>   					   const struct sock *sk,
> -					   __be32 daddr)
> +					   __be32 daddr,
> +					   __be32 saddr)
>   {
>   	memset(fl4, 0, sizeof(*fl4));
>   	fl4->flowi4_oif		= sk->sk_bound_dev_if;
>   	fl4->daddr		= daddr;
> -	fl4->saddr		= inet_sk(sk)->inet_saddr;
> +	fl4->saddr		= saddr;
>   	fl4->flowi4_tos		= RT_CONN_FLAGS(sk);
>   	fl4->flowi4_proto	= sk->sk_protocol;
>   
> @@ -412,7 +490,7 @@ static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
>   	gtp0->tid	= cpu_to_be64(pctx->u.v0.tid);
>   }
>   
> -static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
> +static inline void gtp1_push_header(struct sk_buff *skb, __be32 tid)
>   {
>   	int payload_len = skb->len;
>   	struct gtp1_header *gtp1;
> @@ -428,46 +506,63 @@ static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
>   	gtp1->flags	= 0x30; /* v1, GTP-non-prime. */
>   	gtp1->type	= GTP_TPDU;
>   	gtp1->length	= htons(payload_len);
> -	gtp1->tid	= htonl(pctx->u.v1.o_tei);
> +	gtp1->tid	= tid;
>   
>   	/* TODO: Suppport for extension header, sequence number and N-PDU.
>   	 *	 Update the length field if any of them is available.
>   	 */
>   }
>   
> -struct gtp_pktinfo {
> -	struct sock		*sk;
> -	struct iphdr		*iph;
> -	struct flowi4		fl4;
> -	struct rtable		*rt;
> -	struct pdp_ctx		*pctx;
> -	struct net_device	*dev;
> -	__be16			gtph_port;
> -};
> -
> -static void gtp_push_header(struct sk_buff *skb, struct gtp_pktinfo *pktinfo)
> +static inline int gtp1_push_control_header(struct sk_buff *skb,
> +					   __be32 tid,
> +					   struct gtpu_metadata *opts,
> +					   struct net_device *dev)
>   {
> -	switch (pktinfo->pctx->gtp_version) {
> -	case GTP_V0:
> -		pktinfo->gtph_port = htons(GTP0_PORT);
> -		gtp0_push_header(skb, pktinfo->pctx);
> -		break;
> -	case GTP_V1:
> -		pktinfo->gtph_port = htons(GTP1U_PORT);
> -		gtp1_push_header(skb, pktinfo->pctx);
> -		break;
> +	struct gtp1_header *gtp1c;
> +	int payload_len;
> +
> +	if (opts->ver != GTP_METADATA_V1)
> +		return -ENOENT;
> +
> +	if (opts->type == 0xFE) {
> +		/* for end marker ignore skb data. */
> +		netdev_dbg(dev, "xmit pkt with null data");
> +		pskb_trim(skb, 0);
>   	}

This is new functionality...???  Separate patch, please.

> +	if (skb_cow_head(skb, sizeof(*gtp1c)) < 0)
> +		return -ENOMEM;
> +
> +	payload_len = skb->len;
> +
> +	gtp1c = skb_push(skb, sizeof(*gtp1c));
> +
> +	gtp1c->flags	= opts->flags;
> +	gtp1c->type	= opts->type;
> +	gtp1c->length	= htons(payload_len);
> +	gtp1c->tid	= tid;
> +	netdev_dbg(dev, "xmit control pkt: ver %d flags %x type %x pkt len %d tid %x",
> +		   opts->ver, opts->flags, opts->type, skb->len, tid);
> +	return 0;
>   }
>   
> +struct gtp_pktinfo {
> +	struct sock             *sk;
> +	__u8                    tos;
> +	struct flowi4           fl4;
> +	struct rtable           *rt;
> +	struct net_device       *dev;
> +	__be16                  gtph_port;
> +};
> +

There are significant changes to this structure... can't these be made 
seaparately from the main patch adding flow tunneling?

>   static inline void gtp_set_pktinfo_ipv4(struct gtp_pktinfo *pktinfo,
> -					struct sock *sk, struct iphdr *iph,
> -					struct pdp_ctx *pctx, struct rtable *rt,
> +					struct sock *sk,
> +					__u8 tos,
> +					struct rtable *rt,
>   					struct flowi4 *fl4,
>   					struct net_device *dev)
>   {
>   	pktinfo->sk	= sk;
> -	pktinfo->iph	= iph;
> -	pktinfo->pctx	= pctx;
> +	pktinfo->tos    = tos;
>   	pktinfo->rt	= rt;
>   	pktinfo->fl4	= *fl4;
>   	pktinfo->dev	= dev;
> @@ -477,40 +572,99 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
>   			     struct gtp_pktinfo *pktinfo)
>   {
>   	struct gtp_dev *gtp = netdev_priv(dev);
> +	struct gtpu_metadata *opts = NULL;
> +	struct sock *sk = NULL;
>   	struct pdp_ctx *pctx;
>   	struct rtable *rt;
>   	struct flowi4 fl4;
> -	struct iphdr *iph;
> -	__be16 df;
> +	u8 gtp_version;
> +	__be16 df = 0;
> +	__be32 tun_id;
> +	__be32 daddr;
> +	__be32 saddr;
> +	__u8 tos;
>   	int mtu;
>   
> -	/* Read the IP destination address and resolve the PDP context.
> -	 * Prepend PDP header with TEI/TID from PDP ctx.
> -	 */
> -	iph = ip_hdr(skb);
> -	if (gtp->role == GTP_ROLE_SGSN)
> -		pctx = ipv4_pdp_find(gtp, iph->saddr);
> -	else
> -		pctx = ipv4_pdp_find(gtp, iph->daddr);
> +	if (gtp->collect_md) {
> +		/* LWT GTP1U encap */
> +		struct ip_tunnel_info *info = NULL;
>   
> -	if (!pctx) {
> -		netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n",
> -			   &iph->daddr);
> -		return -ENOENT;
> +		info = skb_tunnel_info(skb);
> +		if (!info) {
> +			netdev_dbg(dev, "missing tunnel info");
> +			return -ENOENT;
> +		}
> +		if (info->key.tp_dst && ntohs(info->key.tp_dst) != GTP1U_PORT) {
> +			netdev_dbg(dev, "unexpected GTP dst port: %d", ntohs(info->key.tp_dst));
> +			return -EOPNOTSUPP;
> +		}
> +		pctx = NULL;
> +		gtp_version = GTP_V1;
> +		tun_id = tunnel_id_to_key32(info->key.tun_id);
> +		daddr = info->key.u.ipv4.dst;
> +		saddr = info->key.u.ipv4.src;
> +		sk = gtp->sk1u;
> +		if (!sk) {
> +			netdev_dbg(dev, "missing tunnel sock");
> +			return -EOPNOTSUPP;
> +		}
> +		tos = info->key.tos;
> +		if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
> +			df = htons(IP_DF);
> +
> +		if (info->options_len != 0) {
> +			if (info->key.tun_flags & TUNNEL_GTPU_OPT) {
> +				opts = ip_tunnel_info_opts(info);
> +			} else {
> +				netdev_dbg(dev, "missing tunnel metadata for control pkt");
> +				return -EOPNOTSUPP;
> +			}
> +		}
> +		netdev_dbg(dev, "flow-based GTP1U encap: tunnel id %d\n",
> +			   be32_to_cpu(tun_id));
> +	} else {
> +		struct iphdr *iph;
> +
> +		if (ntohs(skb->protocol) != ETH_P_IP)
> +			return -EOPNOTSUPP;
> +
> +		iph = ip_hdr(skb);
> +
> +		/* Read the IP destination address and resolve the PDP context.
> +		 * Prepend PDP header with TEI/TID from PDP ctx.
> +		 */
> +		if (gtp->role == GTP_ROLE_SGSN)
> +			pctx = ipv4_pdp_find(gtp, iph->saddr);
> +		else
> +			pctx = ipv4_pdp_find(gtp, iph->daddr);
> +
> +		if (!pctx) {
> +			netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n",
> +				   &iph->daddr);
> +			return -ENOENT;
> +		}
> +		sk = pctx->sk;
> +		netdev_dbg(dev, "found PDP context %p\n", pctx);
> +
> +		gtp_version = pctx->gtp_version;
> +		tun_id  = htonl(pctx->u.v1.o_tei);
> +		daddr = pctx->peer_addr_ip4.s_addr;
> +		saddr = inet_sk(sk)->inet_saddr;
> +		tos = iph->tos;
> +		df = iph->frag_off;
> +		netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n",
> +			   &iph->saddr, &iph->daddr);
>   	}
> -	netdev_dbg(dev, "found PDP context %p\n", pctx);

Please refactor the above so that the resulting patch set becomes less 
invasive.  There's a lot of unnecessary code movement happening here 
that will result in unnecessary pain when rebasing on top of this.

>   
> -	rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->peer_addr_ip4.s_addr);
> +	rt = ip4_route_output_gtp(&fl4, sk, daddr, saddr);
>   	if (IS_ERR(rt)) {
> -		netdev_dbg(dev, "no route to SSGN %pI4\n",
> -			   &pctx->peer_addr_ip4.s_addr);
> +		netdev_dbg(dev, "no route to SSGN %pI4\n", &daddr);
>   		dev->stats.tx_carrier_errors++;
>   		goto err;
>   	}
>   
>   	if (rt->dst.dev == dev) {
> -		netdev_dbg(dev, "circular route to SSGN %pI4\n",
> -			   &pctx->peer_addr_ip4.s_addr);
> +		netdev_dbg(dev, "circular route to SSGN %pI4\n", &daddr);
>   		dev->stats.collisions++;
>   		goto err_rt;
>   	}
> @@ -518,11 +672,10 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
>   	skb_dst_drop(skb);
>   
>   	/* This is similar to tnl_update_pmtu(). */
> -	df = iph->frag_off;
>   	if (df) {
>   		mtu = dst_mtu(&rt->dst) - dev->hard_header_len -
>   			sizeof(struct iphdr) - sizeof(struct udphdr);
> -		switch (pctx->gtp_version) {
> +		switch (gtp_version) {
>   		case GTP_V0:
>   			mtu -= sizeof(struct gtp0_header);
>   			break;
> @@ -536,17 +689,38 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
>   
>   	rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false);
>   
> -	if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
> -	    mtu < ntohs(iph->tot_len)) {
> -		netdev_dbg(dev, "packet too big, fragmentation needed\n");
> +	if (!skb_is_gso(skb) && (df & htons(IP_DF)) && mtu < skb->len) {
> +		netdev_dbg(dev, "packet too big, fragmentation needed");
>   		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
>   		icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
>   			      htonl(mtu));
>   		goto err_rt;
>   	}
>   
> -	gtp_set_pktinfo_ipv4(pktinfo, pctx->sk, iph, pctx, rt, &fl4, dev);
> -	gtp_push_header(skb, pktinfo);
> +	gtp_set_pktinfo_ipv4(pktinfo, sk, tos, rt, &fl4, dev);
> +
> +	if (unlikely(opts)) {
> +		int err;
> +
> +		pktinfo->gtph_port = htons(GTP1U_PORT);
> +		err = gtp1_push_control_header(skb, tun_id, opts, dev);
> +		if (err) {
> +			netdev_info(dev, "cntr pkt error %d", err);
> +			goto err_rt;
> +		}
> +		return 0;
> +	}
> +
> +	switch (gtp_version) {
> +	case GTP_V0:
> +		pktinfo->gtph_port = htons(GTP0_PORT);
> +		gtp0_push_header(skb, pctx);
> +		break;
> +	case GTP_V1:
> +		pktinfo->gtph_port = htons(GTP1U_PORT);
> +		gtp1_push_header(skb, tun_id);
> +		break;
> +	}
>   
>   	return 0;
>   err_rt:
> @@ -557,7 +731,6 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
>   
>   static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>   {
> -	unsigned int proto = ntohs(skb->protocol);
>   	struct gtp_pktinfo pktinfo;
>   	int err;
>   
> @@ -569,32 +742,22 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>   
>   	/* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */
>   	rcu_read_lock();
> -	switch (proto) {
> -	case ETH_P_IP:
> -		err = gtp_build_skb_ip4(skb, dev, &pktinfo);
> -		break;
> -	default:
> -		err = -EOPNOTSUPP;
> -		break;
> -	}
> +	err = gtp_build_skb_ip4(skb, dev, &pktinfo);
>   	rcu_read_unlock();
>   
>   	if (err < 0)
>   		goto tx_err;
>   
> -	switch (proto) {
> -	case ETH_P_IP:
> -		netdev_dbg(pktinfo.dev, "gtp -> IP src: %pI4 dst: %pI4\n",
> -			   &pktinfo.iph->saddr, &pktinfo.iph->daddr);
> -		udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb,
> -				    pktinfo.fl4.saddr, pktinfo.fl4.daddr,
> -				    pktinfo.iph->tos,
> -				    ip4_dst_hoplimit(&pktinfo.rt->dst),
> -				    0,
> -				    pktinfo.gtph_port, pktinfo.gtph_port,
> -				    true, false);
> -		break;
> -	}
> +	udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb,
> +			    pktinfo.fl4.saddr,
> +			    pktinfo.fl4.daddr,
> +			    pktinfo.tos,
> +			    ip4_dst_hoplimit(&pktinfo.rt->dst),
> +			    0,
> +			    pktinfo.gtph_port,
> +			    pktinfo.gtph_port,
> +			    true,
> +			    false);
>   
>   	return NETDEV_TX_OK;
>   tx_err:
> @@ -610,6 +773,19 @@ static const struct net_device_ops gtp_netdev_ops = {
>   	.ndo_get_stats64	= dev_get_tstats64,
>   };
>   
> +static struct gtp_dev *gtp_find_flow_based_dev(struct net *net)
> +{
> +	struct gtp_net *gn = net_generic(net, gtp_net_id);
> +	struct gtp_dev *gtp;
> +
> +	list_for_each_entry(gtp, &gn->gtp_dev_list, list) {
> +		if (gtp->collect_md)
> +			return gtp;
> +	}
> +
> +	return NULL;
> +}
> +
>   static void gtp_link_setup(struct net_device *dev)
>   {
>   	dev->netdev_ops		= &gtp_netdev_ops;
> @@ -634,7 +810,7 @@ static void gtp_link_setup(struct net_device *dev)
>   }
>   
>   static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
> -static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
> +static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct nlattr *data[]);
>   
>   static void gtp_destructor(struct net_device *dev)
>   {
> @@ -652,11 +828,24 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
>   	struct gtp_net *gn;
>   	int hashsize, err;
>   
> -	if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1])
> +	if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1] &&
> +	    !data[IFLA_GTP_COLLECT_METADATA])
>   		return -EINVAL;

You're allowing the fd=NULL case for the COLLECT_METADATA variant, but 
you might as well allow it unconditionally since you have the 
create_gtp_socket() function now to handle this for you.

>   
>   	gtp = netdev_priv(dev);
>   
> +	if (data[IFLA_GTP_COLLECT_METADATA]) {
> +		if (data[IFLA_GTP_FD0]) {
> +			netdev_dbg(dev, "LWT device does not support setting v0 socket");
> +			return -EINVAL;
> +		}
> +		if (gtp_find_flow_based_dev(src_net)) {
> +			netdev_dbg(dev, "LWT device already exist");
> +			return -EBUSY;
> +		}
> +		gtp->collect_md = true;
> +	}
> +
>   	if (!data[IFLA_GTP_PDP_HASHSIZE]) {
>   		hashsize = 1024;
>   	} else {
> @@ -669,7 +858,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
>   	if (err < 0)
>   		return err;
>   
> -	err = gtp_encap_enable(gtp, data);
> +	err = gtp_encap_enable(gtp, dev, data);
>   	if (err < 0)
>   		goto out_hashtable;
>   
> @@ -683,7 +872,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
>   	list_add_rcu(&gtp->list, &gn->gtp_dev_list);
>   	dev->priv_destructor = gtp_destructor;
>   
> -	netdev_dbg(dev, "registered new GTP interface\n");
> +	netdev_dbg(dev, "registered new GTP interface %s\n", dev->name);
>   
>   	return 0;
>   
> @@ -714,6 +903,7 @@ static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = {
>   	[IFLA_GTP_FD1]			= { .type = NLA_U32 },
>   	[IFLA_GTP_PDP_HASHSIZE]		= { .type = NLA_U32 },
>   	[IFLA_GTP_ROLE]			= { .type = NLA_U32 },
> +	[IFLA_GTP_COLLECT_METADATA]	= { .type = NLA_FLAG },
>   };
>   
>   static int gtp_validate(struct nlattr *tb[], struct nlattr *data[],
> @@ -737,6 +927,9 @@ static int gtp_fill_info(struct sk_buff *skb, const struct net_device *dev)
>   	if (nla_put_u32(skb, IFLA_GTP_PDP_HASHSIZE, gtp->hash_size))
>   		goto nla_put_failure;
>   
> +	if (gtp->collect_md && nla_put_flag(skb, IFLA_GTP_COLLECT_METADATA))
> +		goto nla_put_failure;
> +
>   	return 0;
>   
>   nla_put_failure:
> @@ -782,35 +975,24 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize)
>   	return -ENOMEM;
>   }
>   
> -static struct sock *gtp_encap_enable_socket(int fd, int type,
> -					    struct gtp_dev *gtp)
> +static int __gtp_encap_enable_socket(struct socket *sock, int type,
> +				     struct gtp_dev *gtp)
>   {
>   	struct udp_tunnel_sock_cfg tuncfg = {NULL};
> -	struct socket *sock;
>   	struct sock *sk;
> -	int err;
> -
> -	pr_debug("enable gtp on %d, %d\n", fd, type);
> -
> -	sock = sockfd_lookup(fd, &err);
> -	if (!sock) {
> -		pr_debug("gtp socket fd=%d not found\n", fd);
> -		return NULL;
> -	}
>   
>   	sk = sock->sk;
>   	if (sk->sk_protocol != IPPROTO_UDP ||
>   	    sk->sk_type != SOCK_DGRAM ||
>   	    (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) {
> -		pr_debug("socket fd=%d not UDP\n", fd);
> -		sk = ERR_PTR(-EINVAL);
> -		goto out_sock;
> +		pr_debug("socket not UDP\n");
> +		return -EINVAL;
>   	}
>   
>   	lock_sock(sk);
>   	if (sk->sk_user_data) {
> -		sk = ERR_PTR(-EBUSY);
> -		goto out_rel_sock;
> +		release_sock(sock->sk);
> +		return -EBUSY;
>   	}
>   
>   	sock_hold(sk);
> @@ -821,15 +1003,58 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
>   	tuncfg.encap_destroy = gtp_encap_destroy;
>   
>   	setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
> -
> -out_rel_sock:
>   	release_sock(sock->sk);
> -out_sock:
> +	return 0;
> +}
> +
> +static struct sock *gtp_encap_enable_socket(int fd, int type,
> +					    struct gtp_dev *gtp)
> +{
> +	struct socket *sock;
> +	int err;
> +
> +	pr_debug("enable gtp on %d, %d\n", fd, type);
> +
> +	sock = sockfd_lookup(fd, &err);
> +	if (!sock) {
> +		pr_debug("gtp socket fd=%d not found\n", fd);
> +		return NULL;
> +	}
> +	err =  __gtp_encap_enable_socket(sock, type, gtp);
>   	sockfd_put(sock);
> -	return sk;
> +	if (err)
> +		return ERR_PTR(err);
> +
> +	return sock->sk;
>   }
>   
> -static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
> +static struct socket *gtp_create_gtp_socket(struct gtp_dev *gtp, struct net_device *dev)
> +{
> +	struct udp_port_cfg udp_conf;
> +	struct socket *sock;
> +	int err;
> +
> +	memset(&udp_conf, 0, sizeof(udp_conf));
> +	udp_conf.family = AF_INET;
> +	udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
> +	udp_conf.local_udp_port = htons(GTP1U_PORT);
> +
> +	err = udp_sock_create(dev_net(dev), &udp_conf, &sock);
> +	if (err < 0) {
> +		pr_debug("create gtp sock failed: %d\n", err);
> +		return ERR_PTR(err);
> +	}
> +	err = __gtp_encap_enable_socket(sock, UDP_ENCAP_GTP1U, gtp);
> +	if (err) {
> +		pr_debug("enable gtp sock encap failed: %d\n", err);
> +		udp_tunnel_sock_release(sock);
> +		return ERR_PTR(err);
> +	}
> +	pr_debug("create gtp sock done\n");
> +	return sock;
> +}
> +
> +static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct nlattr *data[])
>   {
>   	struct sock *sk1u = NULL;
>   	struct sock *sk0 = NULL;
> @@ -853,11 +1078,25 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
>   		}
>   	}
>   
> +	if (data[IFLA_GTP_COLLECT_METADATA]) {
> +		struct socket *sock;
> +
> +		if (!sk1u) {
> +			sock = gtp_create_gtp_socket(gtp, dev);
> +			if (IS_ERR(sock))
> +				return PTR_ERR(sock);
> +
> +			gtp->collect_md_sock = sock;
> +			sk1u = sock->sk;
> +		} else {
> +			gtp->collect_md_sock = NULL;
> +		}
> +	}
> +

If you're adding a gtp_create_gtp_socket() function, then that s

>   	if (data[IFLA_GTP_ROLE]) {
>   		role = nla_get_u32(data[IFLA_GTP_ROLE]);
>   		if (role > GTP_ROLE_SGSN) {
> -			gtp_encap_disable_sock(sk0);
> -			gtp_encap_disable_sock(sk1u);
> +			gtp_encap_disable(gtp);
>   			return -EINVAL;
>   		}
>   	}
> @@ -1416,7 +1655,7 @@ static int __init gtp_init(void)
>   	if (err < 0)
>   		goto unreg_genl_family;
>   
> -	pr_info("GTP module loaded (pdp ctx size %zd bytes)\n",
> +	pr_info("GTP module loaded (pdp ctx size %zd bytes) with tnl-md support\n",
>   		sizeof(struct pdp_ctx));
>   	return 0;
>   
> diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
> index 79f9191bbb24..62aff78b7c56 100644
> --- a/include/uapi/linux/gtp.h
> +++ b/include/uapi/linux/gtp.h
> @@ -2,6 +2,8 @@
>   #ifndef _UAPI_LINUX_GTP_H_
>   #define _UAPI_LINUX_GTP_H_
>   
> +#include <linux/types.h>
> +
>   #define GTP_GENL_MCGRP_NAME	"gtp"
>   
>   enum gtp_genl_cmds {
> @@ -34,4 +36,14 @@ enum gtp_attrs {
>   };
>   #define GTPA_MAX (__GTPA_MAX + 1)
>   
> +enum {
> +	GTP_METADATA_V1
> +};
> +
> +struct gtpu_metadata {
> +	__u8    ver;
> +	__u8    flags;
> +	__u8    type;
> +};
> +
>   #endif /* _UAPI_LINUX_GTP_H_ */
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 82708c6db432..2bd0d8bbcdb2 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -809,6 +809,7 @@ enum {
>   	IFLA_GTP_FD1,
>   	IFLA_GTP_PDP_HASHSIZE,
>   	IFLA_GTP_ROLE,
> +	IFLA_GTP_COLLECT_METADATA,
>   	__IFLA_GTP_MAX,
>   };
>   #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
> diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
> index 7d9105533c7b..802da679fab1 100644
> --- a/include/uapi/linux/if_tunnel.h
> +++ b/include/uapi/linux/if_tunnel.h
> @@ -176,6 +176,7 @@ enum {
>   #define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
>   #define TUNNEL_NOCACHE		__cpu_to_be16(0x2000)
>   #define TUNNEL_ERSPAN_OPT	__cpu_to_be16(0x4000)
> +#define TUNNEL_GTPU_OPT		__cpu_to_be16(0x8000)
>   
>   #define TUNNEL_OPTIONS_PRESENT \
>   		(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
> diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
> index d208b2af697f..28d649bda686 100644
> --- a/tools/include/uapi/linux/if_link.h
> +++ b/tools/include/uapi/linux/if_link.h
> @@ -617,6 +617,7 @@ enum {
>   	IFLA_GTP_FD1,
>   	IFLA_GTP_PDP_HASHSIZE,
>   	IFLA_GTP_ROLE,
> +	IFLA_GTP_COLLECT_METADATA,
>   	__IFLA_GTP_MAX,
>   };
>   #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
> 

/Jonas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-17 13:23   ` Jonas Bonn
@ 2021-01-17 15:25     ` Harald Welte
  2021-01-17 20:55       ` Pravin Shelar
  2021-01-17 20:47     ` Pravin Shelar
  2021-01-18 17:27     ` Jakub Kicinski
  2 siblings, 1 reply; 13+ messages in thread
From: Harald Welte @ 2021-01-17 15:25 UTC (permalink / raw)
  To: Jonas Bonn; +Cc: Jakub Kicinski, Pravin B Shelar, netdev, pablo

Hi Jonas, Jakub and others,

On Sun, Jan 17, 2021 at 02:23:52PM +0100, Jonas Bonn wrote:
> This patch hasn't received any ACK's from either the maintainers or anyone
> else providing review.  The following issues remain unaddressed after
> review:

[...]

Full ACK from my point of view.  The patch is so massive that I
as the original co-author and co-maintainer of the GTP kernel module
have problems understanding what it is doing at all.  Furthermore,
I am actually wondering if there is any commonality between the existing
use cases and whatever the modified gtp.ko is trying to achieve.  Up to
the point on whether or not it makes sense to have both functionalities
in the same driver/module at all

> I'm not sure what the hurry is to get this patch into mainline.  Large and
> complicated patches like this take time to review; please revert this and
> allow that process to happen.

Also acknowledged and supported from my side.

-- 
- Harald Welte <laforge@gnumonks.org>           http://laforge.gnumonks.org/
============================================================================
"Privacy in residential applications is a desirable marketing option."
                                                  (ETSI EN 300 175-7 Ch. A6)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-17 13:40 ` Jonas Bonn
@ 2021-01-17 20:42   ` Pravin Shelar
  0 siblings, 0 replies; 13+ messages in thread
From: Pravin Shelar @ 2021-01-17 20:42 UTC (permalink / raw)
  To: Jonas Bonn
  Cc: Pravin B Shelar, Linux Kernel Network Developers,
	Pablo Neira Ayuso, Harald Welte, Jakub Kicinski

On Sun, Jan 17, 2021 at 5:46 AM Jonas Bonn <jonas@norrbonn.se> wrote:
>
> Hi Pravin,
>
> Again, this patch is too big and contains too many disparate changes to
> allow for easy review.  Please break this up into a series of logical
> changes for easier review.
>
> On 10/01/2021 08:00, Pravin B Shelar wrote:
> > Following patch add support for flow based tunneling API
> > to send and recv GTP tunnel packet over tunnel metadata API.
> > This would allow this device integration with OVS or eBPF using
> > flow based tunneling APIs.
> >
> > Signed-off-by: Pravin B Shelar <pbshelar@fb.com>
> > ---
> > v4-v5:
> > - coding style changes
> > v3-v4:
> > - add check for non-zero dst port
> > v2-v3:
> > -  Fixed coding style
> > -  changed IFLA_GTP_FD1 to optional param for LWT dev.
> > v1-v2:
> > -  Fixed according to comments from Jonas Bonn
> >
> >   drivers/net/gtp.c                  | 527 +++++++++++++++++++++--------
> >   include/uapi/linux/gtp.h           |  12 +
> >   include/uapi/linux/if_link.h       |   1 +
> >   include/uapi/linux/if_tunnel.h     |   1 +
> >   tools/include/uapi/linux/if_link.h |   1 +
> >   5 files changed, 398 insertions(+), 144 deletions(-)
> >
> > diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
> > index 4c04e271f184..851364314ecc 100644
> > --- a/drivers/net/gtp.c
> > +++ b/drivers/net/gtp.c
> > @@ -21,6 +21,7 @@
> >   #include <linux/file.h>
> >   #include <linux/gtp.h>
> >
> > +#include <net/dst_metadata.h>
> >   #include <net/net_namespace.h>
> >   #include <net/protocol.h>
> >   #include <net/ip.h>
> > @@ -73,6 +74,9 @@ struct gtp_dev {
> >       unsigned int            hash_size;
> >       struct hlist_head       *tid_hash;
> >       struct hlist_head       *addr_hash;
> > +     /* Used by LWT tunnel. */
> > +     bool                    collect_md;
> > +     struct socket           *collect_md_sock;
> >   };
>
> Can't you use the 'sock' member of sk1u as the reference this socket?
As discussed earlier I use it to destroy the LWT socket in the cleanup
code path. I need to test a solution to avoid this reference. I can
send an incremental patch if that works.

>
> >
> >   static unsigned int gtp_net_id __read_mostly;
> > @@ -179,33 +183,121 @@ static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx,
> >       return false;
> >   }
> >
> > -static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
> > -                     unsigned int hdrlen, unsigned int role)
> > +static int gtp_set_tun_dst(struct gtp_dev *gtp, struct sk_buff *skb,
> > +                        unsigned int hdrlen, u8 gtp_version,
> > +                        __be64 tid, u8 flags)
> >   {
> > -     if (!gtp_check_ms(skb, pctx, hdrlen, role)) {
> > -             netdev_dbg(pctx->dev, "No PDP ctx for this MS\n");
> > -             return 1;
> > +     struct metadata_dst *tun_dst;
> > +     int opts_len = 0;
> > +
> > +     if (unlikely(flags & GTP1_F_MASK))
> > +             opts_len = sizeof(struct gtpu_metadata);
> > +
> > +     tun_dst = udp_tun_rx_dst(skb, gtp->sk1u->sk_family, TUNNEL_KEY, tid, opts_len);
> > +     if (!tun_dst) {
> > +             netdev_dbg(gtp->dev, "Failed to allocate tun_dst");
> > +             goto err;
> >       }
> >
> > +     netdev_dbg(gtp->dev, "attaching metadata_dst to skb, gtp ver %d hdrlen %d\n",
> > +                gtp_version, hdrlen);
> > +     if (unlikely(opts_len)) {
> > +             struct gtpu_metadata *opts;
> > +             struct gtp1_header *gtp1;
> > +
> > +             opts = ip_tunnel_info_opts(&tun_dst->u.tun_info);
> > +             gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
> > +             opts->ver = GTP_METADATA_V1;
> > +             opts->flags = gtp1->flags;
> > +             opts->type = gtp1->type;
> > +             netdev_dbg(gtp->dev, "recved control pkt: flag %x type: %d\n",
> > +                        opts->flags, opts->type);
> > +             tun_dst->u.tun_info.key.tun_flags |= TUNNEL_GTPU_OPT;
> > +             tun_dst->u.tun_info.options_len = opts_len;
> > +             skb->protocol = htons(0xffff);         /* Unknown */
> > +     }
> >       /* Get rid of the GTP + UDP headers. */
> >       if (iptunnel_pull_header(skb, hdrlen, skb->protocol,
> > -                              !net_eq(sock_net(pctx->sk), dev_net(pctx->dev))))
> > -             return -1;
> > +                              !net_eq(sock_net(gtp->sk1u), dev_net(gtp->dev)))) {
> > +             gtp->dev->stats.rx_length_errors++;
> > +             goto err;
> > +     }
> > +
> > +     skb_dst_set(skb, &tun_dst->dst);
> > +     return 0;
> > +err:
> > +     return -1;
> > +}
> > +
> > +static int gtp_rx(struct gtp_dev *gtp, struct sk_buff *skb,
> > +               unsigned int hdrlen, u8 gtp_version, unsigned int role,
> > +               __be64 tid, u8 flags, u8 type)
> > +{
> > +     if (ip_tunnel_collect_metadata() || gtp->collect_md) {
> > +             int err;
> > +
> > +             err = gtp_set_tun_dst(gtp, skb, hdrlen, gtp_version, tid, flags);
> > +             if (err)
> > +                     goto err;
> > +     } else {
> > +             struct pdp_ctx *pctx;
> > +
> > +             if (flags & GTP1_F_MASK)
> > +                     hdrlen += 4;
> >
> > -     netdev_dbg(pctx->dev, "forwarding packet from GGSN to uplink\n");
> > +             if (type != GTP_TPDU)
> > +                     return 1;
> > +
> > +             if (gtp_version == GTP_V0)
> > +                     pctx = gtp0_pdp_find(gtp, be64_to_cpu(tid));
> > +             else
> > +                     pctx = gtp1_pdp_find(gtp, be64_to_cpu(tid));
> > +             if (!pctx) {
> > +                     netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
> > +                     return 1;
> > +             }
> > +
> > +             if (!gtp_check_ms(skb, pctx, hdrlen, role)) {
> > +                     netdev_dbg(pctx->dev, "No PDP ctx for this MS\n");
> > +                     return 1;
> > +             }
> > +             /* Get rid of the GTP + UDP headers. */
> > +             if (iptunnel_pull_header(skb, hdrlen, skb->protocol,
> > +                                      !net_eq(sock_net(pctx->sk), dev_net(gtp->dev)))) {
> > +                     gtp->dev->stats.rx_length_errors++;
> > +                     goto err;
> > +             }
> > +     }
> > +     netdev_dbg(gtp->dev, "forwarding packet from GGSN to uplink\n");
> >
> >       /* Now that the UDP and the GTP header have been removed, set up the
> >        * new network header. This is required by the upper layer to
> >        * calculate the transport header.
> >        */
> >       skb_reset_network_header(skb);
> > +     if (pskb_may_pull(skb, sizeof(struct iphdr))) {
> > +             struct iphdr *iph;
> > +
> > +             iph = ip_hdr(skb);
> > +             if (iph->version == 4) {
> > +                     netdev_dbg(gtp->dev, "inner pkt: ipv4");
> > +                     skb->protocol = htons(ETH_P_IP);
> > +             } else if (iph->version == 6) {
> > +                     netdev_dbg(gtp->dev, "inner pkt: ipv6");
> > +                     skb->protocol = htons(ETH_P_IPV6);
> > +             } else {
> > +                     netdev_dbg(gtp->dev, "inner pkt error: Unknown type");
> > +             }
> > +     }
> >
> > -     skb->dev = pctx->dev;
> > -
> > -     dev_sw_netstats_rx_add(pctx->dev, skb->len);
> > -
> > +     skb->dev = gtp->dev;
> > +     dev_sw_netstats_rx_add(gtp->dev, skb->len);
> >       netif_rx(skb);
> >       return 0;
> > +
> > +err:
> > +     gtp->dev->stats.rx_dropped++;
> > +     return -1;
> >   }
> >
> >   /* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */
> > @@ -214,7 +306,6 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
> >       unsigned int hdrlen = sizeof(struct udphdr) +
> >                             sizeof(struct gtp0_header);
> >       struct gtp0_header *gtp0;
> > -     struct pdp_ctx *pctx;
> >
> >       if (!pskb_may_pull(skb, hdrlen))
> >               return -1;
> > @@ -224,16 +315,7 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
> >       if ((gtp0->flags >> 5) != GTP_V0)
> >               return 1;
> >
> > -     if (gtp0->type != GTP_TPDU)
> > -             return 1;
> > -
> > -     pctx = gtp0_pdp_find(gtp, be64_to_cpu(gtp0->tid));
> > -     if (!pctx) {
> > -             netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
> > -             return 1;
> > -     }
> > -
> > -     return gtp_rx(pctx, skb, hdrlen, gtp->role);
> > +     return gtp_rx(gtp, skb, hdrlen, GTP_V0, gtp->role, gtp0->tid, gtp0->flags, gtp0->type);
> >   }
> >
> >   static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
> > @@ -241,41 +323,30 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
> >       unsigned int hdrlen = sizeof(struct udphdr) +
> >                             sizeof(struct gtp1_header);
> >       struct gtp1_header *gtp1;
> > -     struct pdp_ctx *pctx;
> >
> >       if (!pskb_may_pull(skb, hdrlen))
> >               return -1;
> >
> >       gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
> >
> > +     netdev_dbg(gtp->dev, "GTPv1 recv: flags %x\n", gtp1->flags);
> >       if ((gtp1->flags >> 5) != GTP_V1)
> >               return 1;
> >
> > -     if (gtp1->type != GTP_TPDU)
> > -             return 1;
> > -
> >       /* From 29.060: "This field shall be present if and only if any one or
> >        * more of the S, PN and E flags are set.".
> >        *
> >        * If any of the bit is set, then the remaining ones also have to be
> >        * set.
> >        */
> > -     if (gtp1->flags & GTP1_F_MASK)
> > -             hdrlen += 4;
> > -
> >       /* Make sure the header is larger enough, including extensions. */
> >       if (!pskb_may_pull(skb, hdrlen))
> >               return -1;
> >
> >       gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
> >
> > -     pctx = gtp1_pdp_find(gtp, ntohl(gtp1->tid));
> > -     if (!pctx) {
> > -             netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
> > -             return 1;
> > -     }
> > -
> > -     return gtp_rx(pctx, skb, hdrlen, gtp->role);
> > +     return gtp_rx(gtp, skb, hdrlen, GTP_V1, gtp->role,
> > +                   key32_to_tunnel_id(gtp1->tid), gtp1->flags, gtp1->type);
> >   }
> >
> >   static void __gtp_encap_destroy(struct sock *sk)
> > @@ -315,6 +386,11 @@ static void gtp_encap_disable(struct gtp_dev *gtp)
> >   {
> >       gtp_encap_disable_sock(gtp->sk0);
> >       gtp_encap_disable_sock(gtp->sk1u);
> > +     if (gtp->collect_md_sock) {
> > +             udp_tunnel_sock_release(gtp->collect_md_sock);
> > +             gtp->collect_md_sock = NULL;
> > +             netdev_dbg(gtp->dev, "GTP socket released.\n");
> > +     }
> >   }
> >
> >   /* UDP encapsulation receive handler. See net/ipv4/udp.c.
> > @@ -329,7 +405,8 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
> >       if (!gtp)
> >               return 1;
> >
> > -     netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);
> > +     netdev_dbg(gtp->dev, "encap_recv sk=%p type %d\n",
> > +                sk, udp_sk(sk)->encap_type);
> >
> >       switch (udp_sk(sk)->encap_type) {
> >       case UDP_ENCAP_GTP0:
> > @@ -383,12 +460,13 @@ static void gtp_dev_uninit(struct net_device *dev)
> >
> >   static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4,
> >                                          const struct sock *sk,
> > -                                        __be32 daddr)
> > +                                        __be32 daddr,
> > +                                        __be32 saddr)
> >   {
> >       memset(fl4, 0, sizeof(*fl4));
> >       fl4->flowi4_oif         = sk->sk_bound_dev_if;
> >       fl4->daddr              = daddr;
> > -     fl4->saddr              = inet_sk(sk)->inet_saddr;
> > +     fl4->saddr              = saddr;
> >       fl4->flowi4_tos         = RT_CONN_FLAGS(sk);
> >       fl4->flowi4_proto       = sk->sk_protocol;
> >
> > @@ -412,7 +490,7 @@ static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
> >       gtp0->tid       = cpu_to_be64(pctx->u.v0.tid);
> >   }
> >
> > -static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
> > +static inline void gtp1_push_header(struct sk_buff *skb, __be32 tid)
> >   {
> >       int payload_len = skb->len;
> >       struct gtp1_header *gtp1;
> > @@ -428,46 +506,63 @@ static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
> >       gtp1->flags     = 0x30; /* v1, GTP-non-prime. */
> >       gtp1->type      = GTP_TPDU;
> >       gtp1->length    = htons(payload_len);
> > -     gtp1->tid       = htonl(pctx->u.v1.o_tei);
> > +     gtp1->tid       = tid;
> >
> >       /* TODO: Suppport for extension header, sequence number and N-PDU.
> >        *       Update the length field if any of them is available.
> >        */
> >   }
> >
> > -struct gtp_pktinfo {
> > -     struct sock             *sk;
> > -     struct iphdr            *iph;
> > -     struct flowi4           fl4;
> > -     struct rtable           *rt;
> > -     struct pdp_ctx          *pctx;
> > -     struct net_device       *dev;
> > -     __be16                  gtph_port;
> > -};
> > -
> > -static void gtp_push_header(struct sk_buff *skb, struct gtp_pktinfo *pktinfo)
> > +static inline int gtp1_push_control_header(struct sk_buff *skb,
> > +                                        __be32 tid,
> > +                                        struct gtpu_metadata *opts,
> > +                                        struct net_device *dev)
> >   {
> > -     switch (pktinfo->pctx->gtp_version) {
> > -     case GTP_V0:
> > -             pktinfo->gtph_port = htons(GTP0_PORT);
> > -             gtp0_push_header(skb, pktinfo->pctx);
> > -             break;
> > -     case GTP_V1:
> > -             pktinfo->gtph_port = htons(GTP1U_PORT);
> > -             gtp1_push_header(skb, pktinfo->pctx);
> > -             break;
> > +     struct gtp1_header *gtp1c;
> > +     int payload_len;
> > +
> > +     if (opts->ver != GTP_METADATA_V1)
> > +             return -ENOENT;
> > +
> > +     if (opts->type == 0xFE) {
> > +             /* for end marker ignore skb data. */
> > +             netdev_dbg(dev, "xmit pkt with null data");
> > +             pskb_trim(skb, 0);
> >       }
>
> This is new functionality...???  Separate patch, please.
>
> > +     if (skb_cow_head(skb, sizeof(*gtp1c)) < 0)
> > +             return -ENOMEM;
> > +
> > +     payload_len = skb->len;
> > +
> > +     gtp1c = skb_push(skb, sizeof(*gtp1c));
> > +
> > +     gtp1c->flags    = opts->flags;
> > +     gtp1c->type     = opts->type;
> > +     gtp1c->length   = htons(payload_len);
> > +     gtp1c->tid      = tid;
> > +     netdev_dbg(dev, "xmit control pkt: ver %d flags %x type %x pkt len %d tid %x",
> > +                opts->ver, opts->flags, opts->type, skb->len, tid);
> > +     return 0;
> >   }
> >
> > +struct gtp_pktinfo {
> > +     struct sock             *sk;
> > +     __u8                    tos;
> > +     struct flowi4           fl4;
> > +     struct rtable           *rt;
> > +     struct net_device       *dev;
> > +     __be16                  gtph_port;
> > +};
> > +
>
> There are significant changes to this structure... can't these be made
> seaparately from the main patch adding flow tunneling?
>
This is static scoped structure passed between functions. Without this
change GTP LTW can not reuse existing functions.

> >   static inline void gtp_set_pktinfo_ipv4(struct gtp_pktinfo *pktinfo,
> > -                                     struct sock *sk, struct iphdr *iph,
> > -                                     struct pdp_ctx *pctx, struct rtable *rt,
> > +                                     struct sock *sk,
> > +                                     __u8 tos,
> > +                                     struct rtable *rt,
> >                                       struct flowi4 *fl4,
> >                                       struct net_device *dev)
> >   {
> >       pktinfo->sk     = sk;
> > -     pktinfo->iph    = iph;
> > -     pktinfo->pctx   = pctx;
> > +     pktinfo->tos    = tos;
> >       pktinfo->rt     = rt;
> >       pktinfo->fl4    = *fl4;
> >       pktinfo->dev    = dev;
> > @@ -477,40 +572,99 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
> >                            struct gtp_pktinfo *pktinfo)
> >   {
> >       struct gtp_dev *gtp = netdev_priv(dev);
> > +     struct gtpu_metadata *opts = NULL;
> > +     struct sock *sk = NULL;
> >       struct pdp_ctx *pctx;
> >       struct rtable *rt;
> >       struct flowi4 fl4;
> > -     struct iphdr *iph;
> > -     __be16 df;
> > +     u8 gtp_version;
> > +     __be16 df = 0;
> > +     __be32 tun_id;
> > +     __be32 daddr;
> > +     __be32 saddr;
> > +     __u8 tos;
> >       int mtu;
> >
> > -     /* Read the IP destination address and resolve the PDP context.
> > -      * Prepend PDP header with TEI/TID from PDP ctx.
> > -      */
> > -     iph = ip_hdr(skb);
> > -     if (gtp->role == GTP_ROLE_SGSN)
> > -             pctx = ipv4_pdp_find(gtp, iph->saddr);
> > -     else
> > -             pctx = ipv4_pdp_find(gtp, iph->daddr);
> > +     if (gtp->collect_md) {
> > +             /* LWT GTP1U encap */
> > +             struct ip_tunnel_info *info = NULL;
> >
> > -     if (!pctx) {
> > -             netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n",
> > -                        &iph->daddr);
> > -             return -ENOENT;
> > +             info = skb_tunnel_info(skb);
> > +             if (!info) {
> > +                     netdev_dbg(dev, "missing tunnel info");
> > +                     return -ENOENT;
> > +             }
> > +             if (info->key.tp_dst && ntohs(info->key.tp_dst) != GTP1U_PORT) {
> > +                     netdev_dbg(dev, "unexpected GTP dst port: %d", ntohs(info->key.tp_dst));
> > +                     return -EOPNOTSUPP;
> > +             }
> > +             pctx = NULL;
> > +             gtp_version = GTP_V1;
> > +             tun_id = tunnel_id_to_key32(info->key.tun_id);
> > +             daddr = info->key.u.ipv4.dst;
> > +             saddr = info->key.u.ipv4.src;
> > +             sk = gtp->sk1u;
> > +             if (!sk) {
> > +                     netdev_dbg(dev, "missing tunnel sock");
> > +                     return -EOPNOTSUPP;
> > +             }
> > +             tos = info->key.tos;
> > +             if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
> > +                     df = htons(IP_DF);
> > +
> > +             if (info->options_len != 0) {
> > +                     if (info->key.tun_flags & TUNNEL_GTPU_OPT) {
> > +                             opts = ip_tunnel_info_opts(info);
> > +                     } else {
> > +                             netdev_dbg(dev, "missing tunnel metadata for control pkt");
> > +                             return -EOPNOTSUPP;
> > +                     }
> > +             }
> > +             netdev_dbg(dev, "flow-based GTP1U encap: tunnel id %d\n",
> > +                        be32_to_cpu(tun_id));
> > +     } else {
> > +             struct iphdr *iph;
> > +
> > +             if (ntohs(skb->protocol) != ETH_P_IP)
> > +                     return -EOPNOTSUPP;
> > +
> > +             iph = ip_hdr(skb);
> > +
> > +             /* Read the IP destination address and resolve the PDP context.
> > +              * Prepend PDP header with TEI/TID from PDP ctx.
> > +              */
> > +             if (gtp->role == GTP_ROLE_SGSN)
> > +                     pctx = ipv4_pdp_find(gtp, iph->saddr);
> > +             else
> > +                     pctx = ipv4_pdp_find(gtp, iph->daddr);
> > +
> > +             if (!pctx) {
> > +                     netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n",
> > +                                &iph->daddr);
> > +                     return -ENOENT;
> > +             }
> > +             sk = pctx->sk;
> > +             netdev_dbg(dev, "found PDP context %p\n", pctx);
> > +
> > +             gtp_version = pctx->gtp_version;
> > +             tun_id  = htonl(pctx->u.v1.o_tei);
> > +             daddr = pctx->peer_addr_ip4.s_addr;
> > +             saddr = inet_sk(sk)->inet_saddr;
> > +             tos = iph->tos;
> > +             df = iph->frag_off;
> > +             netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n",
> > +                        &iph->saddr, &iph->daddr);
> >       }
> > -     netdev_dbg(dev, "found PDP context %p\n", pctx);
>
> Please refactor the above so that the resulting patch set becomes less
> invasive.  There's a lot of unnecessary code movement happening here
> that will result in unnecessary pain when rebasing on top of this.
>
I am trying to reuse existing xmit and recv functionality of GTP
driver. Are you suggesting to introduce duplicate code for LWT
datapath?


> >
> > -     rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->peer_addr_ip4.s_addr);
> > +     rt = ip4_route_output_gtp(&fl4, sk, daddr, saddr);
> >       if (IS_ERR(rt)) {
> > -             netdev_dbg(dev, "no route to SSGN %pI4\n",
> > -                        &pctx->peer_addr_ip4.s_addr);
> > +             netdev_dbg(dev, "no route to SSGN %pI4\n", &daddr);
> >               dev->stats.tx_carrier_errors++;
> >               goto err;
> >       }
> >
> >       if (rt->dst.dev == dev) {
> > -             netdev_dbg(dev, "circular route to SSGN %pI4\n",
> > -                        &pctx->peer_addr_ip4.s_addr);
> > +             netdev_dbg(dev, "circular route to SSGN %pI4\n", &daddr);
> >               dev->stats.collisions++;
> >               goto err_rt;
> >       }
> > @@ -518,11 +672,10 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
> >       skb_dst_drop(skb);
> >
> >       /* This is similar to tnl_update_pmtu(). */
> > -     df = iph->frag_off;
> >       if (df) {
> >               mtu = dst_mtu(&rt->dst) - dev->hard_header_len -
> >                       sizeof(struct iphdr) - sizeof(struct udphdr);
> > -             switch (pctx->gtp_version) {
> > +             switch (gtp_version) {
> >               case GTP_V0:
> >                       mtu -= sizeof(struct gtp0_header);
> >                       break;
> > @@ -536,17 +689,38 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
> >
> >       rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false);
> >
> > -     if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
> > -         mtu < ntohs(iph->tot_len)) {
> > -             netdev_dbg(dev, "packet too big, fragmentation needed\n");
> > +     if (!skb_is_gso(skb) && (df & htons(IP_DF)) && mtu < skb->len) {
> > +             netdev_dbg(dev, "packet too big, fragmentation needed");
> >               memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
> >               icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
> >                             htonl(mtu));
> >               goto err_rt;
> >       }
> >
> > -     gtp_set_pktinfo_ipv4(pktinfo, pctx->sk, iph, pctx, rt, &fl4, dev);
> > -     gtp_push_header(skb, pktinfo);
> > +     gtp_set_pktinfo_ipv4(pktinfo, sk, tos, rt, &fl4, dev);
> > +
> > +     if (unlikely(opts)) {
> > +             int err;
> > +
> > +             pktinfo->gtph_port = htons(GTP1U_PORT);
> > +             err = gtp1_push_control_header(skb, tun_id, opts, dev);
> > +             if (err) {
> > +                     netdev_info(dev, "cntr pkt error %d", err);
> > +                     goto err_rt;
> > +             }
> > +             return 0;
> > +     }
> > +
> > +     switch (gtp_version) {
> > +     case GTP_V0:
> > +             pktinfo->gtph_port = htons(GTP0_PORT);
> > +             gtp0_push_header(skb, pctx);
> > +             break;
> > +     case GTP_V1:
> > +             pktinfo->gtph_port = htons(GTP1U_PORT);
> > +             gtp1_push_header(skb, tun_id);
> > +             break;
> > +     }
> >
> >       return 0;
> >   err_rt:
> > @@ -557,7 +731,6 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
> >
> >   static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> >   {
> > -     unsigned int proto = ntohs(skb->protocol);
> >       struct gtp_pktinfo pktinfo;
> >       int err;
> >
> > @@ -569,32 +742,22 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> >
> >       /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */
> >       rcu_read_lock();
> > -     switch (proto) {
> > -     case ETH_P_IP:
> > -             err = gtp_build_skb_ip4(skb, dev, &pktinfo);
> > -             break;
> > -     default:
> > -             err = -EOPNOTSUPP;
> > -             break;
> > -     }
> > +     err = gtp_build_skb_ip4(skb, dev, &pktinfo);
> >       rcu_read_unlock();
> >
> >       if (err < 0)
> >               goto tx_err;
> >
> > -     switch (proto) {
> > -     case ETH_P_IP:
> > -             netdev_dbg(pktinfo.dev, "gtp -> IP src: %pI4 dst: %pI4\n",
> > -                        &pktinfo.iph->saddr, &pktinfo.iph->daddr);
> > -             udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb,
> > -                                 pktinfo.fl4.saddr, pktinfo.fl4.daddr,
> > -                                 pktinfo.iph->tos,
> > -                                 ip4_dst_hoplimit(&pktinfo.rt->dst),
> > -                                 0,
> > -                                 pktinfo.gtph_port, pktinfo.gtph_port,
> > -                                 true, false);
> > -             break;
> > -     }
> > +     udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb,
> > +                         pktinfo.fl4.saddr,
> > +                         pktinfo.fl4.daddr,
> > +                         pktinfo.tos,
> > +                         ip4_dst_hoplimit(&pktinfo.rt->dst),
> > +                         0,
> > +                         pktinfo.gtph_port,
> > +                         pktinfo.gtph_port,
> > +                         true,
> > +                         false);
> >
> >       return NETDEV_TX_OK;
> >   tx_err:
> > @@ -610,6 +773,19 @@ static const struct net_device_ops gtp_netdev_ops = {
> >       .ndo_get_stats64        = dev_get_tstats64,
> >   };
> >
> > +static struct gtp_dev *gtp_find_flow_based_dev(struct net *net)
> > +{
> > +     struct gtp_net *gn = net_generic(net, gtp_net_id);
> > +     struct gtp_dev *gtp;
> > +
> > +     list_for_each_entry(gtp, &gn->gtp_dev_list, list) {
> > +             if (gtp->collect_md)
> > +                     return gtp;
> > +     }
> > +
> > +     return NULL;
> > +}
> > +
> >   static void gtp_link_setup(struct net_device *dev)
> >   {
> >       dev->netdev_ops         = &gtp_netdev_ops;
> > @@ -634,7 +810,7 @@ static void gtp_link_setup(struct net_device *dev)
> >   }
> >
> >   static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
> > -static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
> > +static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct nlattr *data[]);
> >
> >   static void gtp_destructor(struct net_device *dev)
> >   {
> > @@ -652,11 +828,24 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
> >       struct gtp_net *gn;
> >       int hashsize, err;
> >
> > -     if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1])
> > +     if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1] &&
> > +         !data[IFLA_GTP_COLLECT_METADATA])
> >               return -EINVAL;
>
> You're allowing the fd=NULL case for the COLLECT_METADATA variant, but
> you might as well allow it unconditionally since you have the
> create_gtp_socket() function now to handle this for you.
>
I can send an incremental patch for this.

> >
> >       gtp = netdev_priv(dev);
> >
> > +     if (data[IFLA_GTP_COLLECT_METADATA]) {
> > +             if (data[IFLA_GTP_FD0]) {
> > +                     netdev_dbg(dev, "LWT device does not support setting v0 socket");
> > +                     return -EINVAL;
> > +             }
> > +             if (gtp_find_flow_based_dev(src_net)) {
> > +                     netdev_dbg(dev, "LWT device already exist");
> > +                     return -EBUSY;
> > +             }
> > +             gtp->collect_md = true;
> > +     }
> > +
> >       if (!data[IFLA_GTP_PDP_HASHSIZE]) {
> >               hashsize = 1024;
> >       } else {
> > @@ -669,7 +858,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
> >       if (err < 0)
> >               return err;
> >
> > -     err = gtp_encap_enable(gtp, data);
> > +     err = gtp_encap_enable(gtp, dev, data);
> >       if (err < 0)
> >               goto out_hashtable;
> >
> > @@ -683,7 +872,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
> >       list_add_rcu(&gtp->list, &gn->gtp_dev_list);
> >       dev->priv_destructor = gtp_destructor;
> >
> > -     netdev_dbg(dev, "registered new GTP interface\n");
> > +     netdev_dbg(dev, "registered new GTP interface %s\n", dev->name);
> >
> >       return 0;
> >
> > @@ -714,6 +903,7 @@ static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = {
> >       [IFLA_GTP_FD1]                  = { .type = NLA_U32 },
> >       [IFLA_GTP_PDP_HASHSIZE]         = { .type = NLA_U32 },
> >       [IFLA_GTP_ROLE]                 = { .type = NLA_U32 },
> > +     [IFLA_GTP_COLLECT_METADATA]     = { .type = NLA_FLAG },
> >   };
> >
> >   static int gtp_validate(struct nlattr *tb[], struct nlattr *data[],
> > @@ -737,6 +927,9 @@ static int gtp_fill_info(struct sk_buff *skb, const struct net_device *dev)
> >       if (nla_put_u32(skb, IFLA_GTP_PDP_HASHSIZE, gtp->hash_size))
> >               goto nla_put_failure;
> >
> > +     if (gtp->collect_md && nla_put_flag(skb, IFLA_GTP_COLLECT_METADATA))
> > +             goto nla_put_failure;
> > +
> >       return 0;
> >
> >   nla_put_failure:
> > @@ -782,35 +975,24 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize)
> >       return -ENOMEM;
> >   }
> >
> > -static struct sock *gtp_encap_enable_socket(int fd, int type,
> > -                                         struct gtp_dev *gtp)
> > +static int __gtp_encap_enable_socket(struct socket *sock, int type,
> > +                                  struct gtp_dev *gtp)
> >   {
> >       struct udp_tunnel_sock_cfg tuncfg = {NULL};
> > -     struct socket *sock;
> >       struct sock *sk;
> > -     int err;
> > -
> > -     pr_debug("enable gtp on %d, %d\n", fd, type);
> > -
> > -     sock = sockfd_lookup(fd, &err);
> > -     if (!sock) {
> > -             pr_debug("gtp socket fd=%d not found\n", fd);
> > -             return NULL;
> > -     }
> >
> >       sk = sock->sk;
> >       if (sk->sk_protocol != IPPROTO_UDP ||
> >           sk->sk_type != SOCK_DGRAM ||
> >           (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) {
> > -             pr_debug("socket fd=%d not UDP\n", fd);
> > -             sk = ERR_PTR(-EINVAL);
> > -             goto out_sock;
> > +             pr_debug("socket not UDP\n");
> > +             return -EINVAL;
> >       }
> >
> >       lock_sock(sk);
> >       if (sk->sk_user_data) {
> > -             sk = ERR_PTR(-EBUSY);
> > -             goto out_rel_sock;
> > +             release_sock(sock->sk);
> > +             return -EBUSY;
> >       }
> >
> >       sock_hold(sk);
> > @@ -821,15 +1003,58 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
> >       tuncfg.encap_destroy = gtp_encap_destroy;
> >
> >       setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
> > -
> > -out_rel_sock:
> >       release_sock(sock->sk);
> > -out_sock:
> > +     return 0;
> > +}
> > +
> > +static struct sock *gtp_encap_enable_socket(int fd, int type,
> > +                                         struct gtp_dev *gtp)
> > +{
> > +     struct socket *sock;
> > +     int err;
> > +
> > +     pr_debug("enable gtp on %d, %d\n", fd, type);
> > +
> > +     sock = sockfd_lookup(fd, &err);
> > +     if (!sock) {
> > +             pr_debug("gtp socket fd=%d not found\n", fd);
> > +             return NULL;
> > +     }
> > +     err =  __gtp_encap_enable_socket(sock, type, gtp);
> >       sockfd_put(sock);
> > -     return sk;
> > +     if (err)
> > +             return ERR_PTR(err);
> > +
> > +     return sock->sk;
> >   }
> >
> > -static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
> > +static struct socket *gtp_create_gtp_socket(struct gtp_dev *gtp, struct net_device *dev)
> > +{
> > +     struct udp_port_cfg udp_conf;
> > +     struct socket *sock;
> > +     int err;
> > +
> > +     memset(&udp_conf, 0, sizeof(udp_conf));
> > +     udp_conf.family = AF_INET;
> > +     udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
> > +     udp_conf.local_udp_port = htons(GTP1U_PORT);
> > +
> > +     err = udp_sock_create(dev_net(dev), &udp_conf, &sock);
> > +     if (err < 0) {
> > +             pr_debug("create gtp sock failed: %d\n", err);
> > +             return ERR_PTR(err);
> > +     }
> > +     err = __gtp_encap_enable_socket(sock, UDP_ENCAP_GTP1U, gtp);
> > +     if (err) {
> > +             pr_debug("enable gtp sock encap failed: %d\n", err);
> > +             udp_tunnel_sock_release(sock);
> > +             return ERR_PTR(err);
> > +     }
> > +     pr_debug("create gtp sock done\n");
> > +     return sock;
> > +}
> > +
> > +static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct nlattr *data[])
> >   {
> >       struct sock *sk1u = NULL;
> >       struct sock *sk0 = NULL;
> > @@ -853,11 +1078,25 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
> >               }
> >       }
> >
> > +     if (data[IFLA_GTP_COLLECT_METADATA]) {
> > +             struct socket *sock;
> > +
> > +             if (!sk1u) {
> > +                     sock = gtp_create_gtp_socket(gtp, dev);
> > +                     if (IS_ERR(sock))
> > +                             return PTR_ERR(sock);
> > +
> > +                     gtp->collect_md_sock = sock;
> > +                     sk1u = sock->sk;
> > +             } else {
> > +                     gtp->collect_md_sock = NULL;
> > +             }
> > +     }
> > +
>
> If you're adding a gtp_create_gtp_socket() function, then that s
>
This looks like an incomplete comment. Not sure what you mean here.


> >       if (data[IFLA_GTP_ROLE]) {
> >               role = nla_get_u32(data[IFLA_GTP_ROLE]);
> >               if (role > GTP_ROLE_SGSN) {
> > -                     gtp_encap_disable_sock(sk0);
> > -                     gtp_encap_disable_sock(sk1u);
> > +                     gtp_encap_disable(gtp);
> >                       return -EINVAL;
> >               }
> >       }
> > @@ -1416,7 +1655,7 @@ static int __init gtp_init(void)
> >       if (err < 0)
> >               goto unreg_genl_family;
> >
> > -     pr_info("GTP module loaded (pdp ctx size %zd bytes)\n",
> > +     pr_info("GTP module loaded (pdp ctx size %zd bytes) with tnl-md support\n",
> >               sizeof(struct pdp_ctx));
> >       return 0;
> >
> > diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
> > index 79f9191bbb24..62aff78b7c56 100644
> > --- a/include/uapi/linux/gtp.h
> > +++ b/include/uapi/linux/gtp.h
> > @@ -2,6 +2,8 @@
> >   #ifndef _UAPI_LINUX_GTP_H_
> >   #define _UAPI_LINUX_GTP_H_
> >
> > +#include <linux/types.h>
> > +
> >   #define GTP_GENL_MCGRP_NAME "gtp"
> >
> >   enum gtp_genl_cmds {
> > @@ -34,4 +36,14 @@ enum gtp_attrs {
> >   };
> >   #define GTPA_MAX (__GTPA_MAX + 1)
> >
> > +enum {
> > +     GTP_METADATA_V1
> > +};
> > +
> > +struct gtpu_metadata {
> > +     __u8    ver;
> > +     __u8    flags;
> > +     __u8    type;
> > +};
> > +
> >   #endif /* _UAPI_LINUX_GTP_H_ */
> > diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> > index 82708c6db432..2bd0d8bbcdb2 100644
> > --- a/include/uapi/linux/if_link.h
> > +++ b/include/uapi/linux/if_link.h
> > @@ -809,6 +809,7 @@ enum {
> >       IFLA_GTP_FD1,
> >       IFLA_GTP_PDP_HASHSIZE,
> >       IFLA_GTP_ROLE,
> > +     IFLA_GTP_COLLECT_METADATA,
> >       __IFLA_GTP_MAX,
> >   };
> >   #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
> > diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
> > index 7d9105533c7b..802da679fab1 100644
> > --- a/include/uapi/linux/if_tunnel.h
> > +++ b/include/uapi/linux/if_tunnel.h
> > @@ -176,6 +176,7 @@ enum {
> >   #define TUNNEL_VXLAN_OPT    __cpu_to_be16(0x1000)
> >   #define TUNNEL_NOCACHE              __cpu_to_be16(0x2000)
> >   #define TUNNEL_ERSPAN_OPT   __cpu_to_be16(0x4000)
> > +#define TUNNEL_GTPU_OPT              __cpu_to_be16(0x8000)
> >
> >   #define TUNNEL_OPTIONS_PRESENT \
> >               (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
> > diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
> > index d208b2af697f..28d649bda686 100644
> > --- a/tools/include/uapi/linux/if_link.h
> > +++ b/tools/include/uapi/linux/if_link.h
> > @@ -617,6 +617,7 @@ enum {
> >       IFLA_GTP_FD1,
> >       IFLA_GTP_PDP_HASHSIZE,
> >       IFLA_GTP_ROLE,
> > +     IFLA_GTP_COLLECT_METADATA,
> >       __IFLA_GTP_MAX,
> >   };
> >   #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
> >
>
> /Jonas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-17 13:23   ` Jonas Bonn
  2021-01-17 15:25     ` Harald Welte
@ 2021-01-17 20:47     ` Pravin Shelar
  2021-01-18 17:27     ` Jakub Kicinski
  2 siblings, 0 replies; 13+ messages in thread
From: Pravin Shelar @ 2021-01-17 20:47 UTC (permalink / raw)
  To: Jonas Bonn
  Cc: Jakub Kicinski, Pravin B Shelar, Linux Kernel Network Developers,
	Pablo Neira Ayuso, Harald Welte

On Sun, Jan 17, 2021 at 5:25 AM Jonas Bonn <jonas@norrbonn.se> wrote:
>
> Hi Jakub,
>
> On 17/01/2021 01:46, Jakub Kicinski wrote:
> > On Sat,  9 Jan 2021 23:00:21 -0800 Pravin B Shelar wrote:
> >> Following patch add support for flow based tunneling API
> >> to send and recv GTP tunnel packet over tunnel metadata API.
> >> This would allow this device integration with OVS or eBPF using
> >> flow based tunneling APIs.
> >>
> >> Signed-off-by: Pravin B Shelar <pbshelar@fb.com>
> >
> > Applied, thanks!
> >
>
> This patch hasn't received any ACK's from either the maintainers or
> anyone else providing review.  The following issues remain unaddressed
> after review:
>
This patch was first sent out on Dec 10 and you responded on Dec 11. I
think there was a reasonable amount of time given for reviews.

> i)  the patch contains several logically separate changes that would be
> better served as smaller patches
Given this patch is adding a single feature, to add support for LWT, I
sent a single patch. Now sure how much benefit it would be to divide
it in smaller patches. In future I will be more careful with dividing
the patch, since that is THE objection you have presented here.

> ii) functionality like the handling of end markers has been introduced
> without further explanation
This is the first time you are raising this question. End marker is
introduced to handle these packets in a single LWT device. This way
the control plane can use a single device to program all GTP
user-plane functionality.

> iii) symmetry between the handling of GTPv0 and GTPv1 has been
> unnecessarily broken
This is already discussed in previous review: Once we add support for
LWT for v0, we can get symmetry between V1 and V0. At this point there
is no use case to use V0 in LWT, so I do not see a point introducing
the support.

> iv) there are no available userspace tools to allow for testing this
> functionality
>
This is not true. I have mentioned and provided open source tools
multiple times on review tread:

Patch for iproute to add support for LWT GTP devices.
https://github.com/pshelar/iproute2/commit/d6e99f8342672e6e9ce0b71e153296f8e2b41cfc

OVS support with integration test:
https://github.com/pshelar/ovs/blob/6ec6a2a86adc56c7c9dcab7b3a7b70bb6dad35c9/tests/system-layer3-tunnels.at#L158


> I have requested that this patch be reworked into a series of smaller
> changes.  That would allow:
>
> i) reasonable review
> ii) the possibility to explain _why_ things are being done in the patch
> comment where this isn't obvious (like the handling of end markers)
> iii) the chance to do a reasonable rebase of other ongoing work onto
> this patch (series):  this one patch is invasive and difficult to rebase
> onto
>
> I'm not sure what the hurry is to get this patch into mainline.  Large
> and complicated patches like this take time to review; please revert
> this and allow that process to happen.
>

Rather than reverting this patch, I can handle comments you have
posted. Those can be fixed by minor incremental patches.

Let me know if you find any regression introduced by this patch set, I
can quickly fix it.

Thanks,
Pravin.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-17 15:25     ` Harald Welte
@ 2021-01-17 20:55       ` Pravin Shelar
  0 siblings, 0 replies; 13+ messages in thread
From: Pravin Shelar @ 2021-01-17 20:55 UTC (permalink / raw)
  To: Harald Welte
  Cc: Jonas Bonn, Jakub Kicinski, Pravin B Shelar,
	Linux Kernel Network Developers, Pablo Neira Ayuso

On Sun, Jan 17, 2021 at 7:31 AM Harald Welte <laforge@gnumonks.org> wrote:
>
> Hi Jonas, Jakub and others,
>
> On Sun, Jan 17, 2021 at 02:23:52PM +0100, Jonas Bonn wrote:
> > This patch hasn't received any ACK's from either the maintainers or anyone
> > else providing review.  The following issues remain unaddressed after
> > review:
>
> [...]
>
> Full ACK from my point of view.  The patch is so massive that I
> as the original co-author and co-maintainer of the GTP kernel module
> have problems understanding what it is doing at all.  Furthermore,
> I am actually wondering if there is any commonality between the existing
> use cases and whatever the modified gtp.ko is trying to achieve.  Up to
> the point on whether or not it makes sense to have both functionalities
> in the same driver/module at all
>

This is not modifying existing functionality. This patch is adding LWT
tunneling API. Existing functionality remains the same. Let me know if
you find any regression. I can fix it.
LWT is a well known method to implement scalable tunneling which most
of the tunneling modules (GENEVE, GRE, VxLAN etc..) in linux kernel
already supports.

If we separate out gtp.ko. from its LWT implementation, we will need
to duplicate a bunch of existing code as well as code that Jonas is
adding to improve performance using UDP tunnel offloading APIs. I
don't think that is the right approach. Existing tunneling modules
also use the unified module approach to implement traditional and LWT
based tunnel devices.


> > I'm not sure what the hurry is to get this patch into mainline.  Large and
> > complicated patches like this take time to review; please revert this and
> > allow that process to happen.
>
> Also acknowledged and supported from my side.
>
> --
> - Harald Welte <laforge@gnumonks.org>           http://laforge.gnumonks.org/
> ============================================================================
> "Privacy in residential applications is a desirable marketing option."
>                                                   (ETSI EN 300 175-7 Ch. A6)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-17 13:23   ` Jonas Bonn
  2021-01-17 15:25     ` Harald Welte
  2021-01-17 20:47     ` Pravin Shelar
@ 2021-01-18 17:27     ` Jakub Kicinski
  2021-01-18 18:27       ` Jonas Bonn
  2 siblings, 1 reply; 13+ messages in thread
From: Jakub Kicinski @ 2021-01-18 17:27 UTC (permalink / raw)
  To: Jonas Bonn; +Cc: Pravin B Shelar, netdev, pablo, laforge

On Sun, 17 Jan 2021 14:23:52 +0100 Jonas Bonn wrote:
> On 17/01/2021 01:46, Jakub Kicinski wrote:
> > On Sat,  9 Jan 2021 23:00:21 -0800 Pravin B Shelar wrote:  
> >> Following patch add support for flow based tunneling API
> >> to send and recv GTP tunnel packet over tunnel metadata API.
> >> This would allow this device integration with OVS or eBPF using
> >> flow based tunneling APIs.
> >>
> >> Signed-off-by: Pravin B Shelar <pbshelar@fb.com>  
> > 
> > Applied, thanks!
> 
> This patch hasn't received any ACK's from either the maintainers or 
> anyone else providing review.  

I made Pravin wait _over_ _a_ _month_ to merge this. He did not receive
any feedback since v3, which was posted Dec 13th. That's very long.

v5 itself was laying around on patchwork for almost a week, marked as 
"Needs Review/Ack".

Normally we try to merge patches within two days. If anything my
lesson from this whole ordeal is in fact waiting longer makes
absolutely no sense. The review didn't come in anyway, and we're 
just delaying whatever project Pravin needs this for :/

Do I disagree with you that the patch is "far from pretty"? Not at all, 
but I couldn't find any actual bug, and the experience of contributors 
matters to us, so we can't wait forever.

> The following issues remain unaddressed after review:
> 
> i)  the patch contains several logically separate changes that would be 
> better served as smaller patches
> ii) functionality like the handling of end markers has been introduced 
> without further explanation
> iii) symmetry between the handling of GTPv0 and GTPv1 has been 
> unnecessarily broken
> iv) there are no available userspace tools to allow for testing this 
> functionality

I don't understand these points couldn't be stated on any of the last 
3 versions / in the last month.

> I have requested that this patch be reworked into a series of smaller 
> changes.  That would allow:
> 
> i) reasonable review
> ii) the possibility to explain _why_ things are being done in the patch 
> comment where this isn't obvious (like the handling of end markers)
> iii) the chance to do a reasonable rebase of other ongoing work onto 
> this patch (series):  this one patch is invasive and difficult to rebase 
> onto
> 
> I'm not sure what the hurry is to get this patch into mainline.  Large 
> and complicated patches like this take time to review; please revert 
> this and allow that process to happen.

You'd need to post a revert with the justification to the ML, so it can
be reviewed on its merits. That said I think incremental changes may be
a better direction.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-18 17:27     ` Jakub Kicinski
@ 2021-01-18 18:27       ` Jonas Bonn
  2021-01-18 18:56         ` Jakub Kicinski
  0 siblings, 1 reply; 13+ messages in thread
From: Jonas Bonn @ 2021-01-18 18:27 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Pravin B Shelar, netdev, pablo, laforge

Hi Jakub,

On 18/01/2021 18:27, Jakub Kicinski wrote:
> On Sun, 17 Jan 2021 14:23:52 +0100 Jonas Bonn wrote:
>> On 17/01/2021 01:46, Jakub Kicinski wrote:
>>> On Sat,  9 Jan 2021 23:00:21 -0800 Pravin B Shelar wrote:
>>>> Following patch add support for flow based tunneling API
>>>> to send and recv GTP tunnel packet over tunnel metadata API.
>>>> This would allow this device integration with OVS or eBPF using
>>>> flow based tunneling APIs.
>>>>
>>>> Signed-off-by: Pravin B Shelar <pbshelar@fb.com>
>>>
>>> Applied, thanks!
>>
>> This patch hasn't received any ACK's from either the maintainers or
>> anyone else providing review.
> 
> I made Pravin wait _over_ _a_ _month_ to merge this. He did not receive
> any feedback since v3, which was posted Dec 13th. That's very long.

Merge window, Christmas, New Year, 3 kings, kids out of school, holiday 
hangover... certain times of the year four weeks are not four weeks.

> 
> v5 itself was laying around on patchwork for almost a week, marked as
> "Needs Review/Ack".

When new series show up just hours after review, it's hard to take them 
seriously.  It takes a fair amount of time to go through an elephant 
like this and to make sense of it; the time spent in response to review 
commentary shouldn't be less.

> 
> Normally we try to merge patches within two days. If anything my
> lesson from this whole ordeal is in fact waiting longer makes
> absolutely no sense. The review didn't come in anyway, and we're
> just delaying whatever project Pravin needs this for :/

I think the expectation that everything gets review within two days is 
unrealistic.  Worse though, is the insinuation that anything unreviewed 
gets blindly merged...  No, the two day target should be for the merging 
of ACK:ed patches.

> 
> Do I disagree with you that the patch is "far from pretty"? Not at all,
> but I couldn't find any actual bug, and the experience of contributors
> matters to us, so we can't wait forever.
> 
>> The following issues remain unaddressed after review:
>>
>> i)  the patch contains several logically separate changes that would be
>> better served as smaller patches
>> ii) functionality like the handling of end markers has been introduced
>> without further explanation
>> iii) symmetry between the handling of GTPv0 and GTPv1 has been
>> unnecessarily broken
>> iv) there are no available userspace tools to allow for testing this
>> functionality
> 
> I don't understand these points couldn't be stated on any of the last
> 3 versions / in the last month.

I believe all of the above was stated in review of series v1 and v2.  v3 
was posted during the merge window so wasn't really relevant for review. 
  v4 didn't address the comments from v1 and v2.  v5 was posted 3 hours 
after receiving reverse christmas tree comments and addressed only 
those.  v5 received commentary within a week... hardly excessive for a 
lightly maintained module like this one.

> 
>> I have requested that this patch be reworked into a series of smaller
>> changes.  That would allow:
>>
>> i) reasonable review
>> ii) the possibility to explain _why_ things are being done in the patch
>> comment where this isn't obvious (like the handling of end markers)
>> iii) the chance to do a reasonable rebase of other ongoing work onto
>> this patch (series):  this one patch is invasive and difficult to rebase
>> onto
>>
>> I'm not sure what the hurry is to get this patch into mainline.  Large
>> and complicated patches like this take time to review; please revert
>> this and allow that process to happen.
> 
> You'd need to post a revert with the justification to the ML, so it can
> be reviewed on its merits. That said I think incremental changes may be
> a better direction.
> 

I guess I'll have to do so, but that seems like setting the bar higher 
than for even getting the patch in in the first place.

I don't think it's tenable for patches to sneak in because they are so 
convoluted that the maintainers just can't find the energy to review 
them.  I'd say that the maintainers silence on this particular patch 
speaks volumes in itself.

Sincerely frustrated because rebasing my IPv6 series on top of this mess 
will take days,
/Jonas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-18 18:27       ` Jonas Bonn
@ 2021-01-18 18:56         ` Jakub Kicinski
  2021-01-18 20:01           ` Harald Welte
  0 siblings, 1 reply; 13+ messages in thread
From: Jakub Kicinski @ 2021-01-18 18:56 UTC (permalink / raw)
  To: Jonas Bonn; +Cc: Pravin B Shelar, netdev, pablo, laforge

On Mon, 18 Jan 2021 19:27:53 +0100 Jonas Bonn wrote:
> On 18/01/2021 18:27, Jakub Kicinski wrote:
> > v5 itself was laying around on patchwork for almost a week, marked as
> > "Needs Review/Ack".  
> 
> When new series show up just hours after review, it's hard to take them 
> seriously.  It takes a fair amount of time to go through an elephant 
> like this and to make sense of it; the time spent in response to review 
> commentary shouldn't be less.

Agreed.

> > Normally we try to merge patches within two days. If anything my
> > lesson from this whole ordeal is in fact waiting longer makes
> > absolutely no sense. The review didn't come in anyway, and we're
> > just delaying whatever project Pravin needs this for :/  
> 
> I think the expectation that everything gets review within two days is 
> unrealistic. 

Right, it's perfectly fine to send an email saying "please wait, I'll
review it on $date".

> Worse though, is the insinuation that anything unreviewed 
> gets blindly merged...  No, the two day target should be for the merging 
> of ACK:ed patches.

Well, certainly, the code has to be acceptable to the person merging it.

Let's also remember that Pravin is quite a seasoned contributor.

> > Do I disagree with you that the patch is "far from pretty"? Not at all,
> > but I couldn't find any actual bug, and the experience of contributors
> > matters to us, so we can't wait forever.
> >   
> >> The following issues remain unaddressed after review:
> >>
> >> i)  the patch contains several logically separate changes that would be
> >> better served as smaller patches
> >> ii) functionality like the handling of end markers has been introduced
> >> without further explanation
> >> iii) symmetry between the handling of GTPv0 and GTPv1 has been
> >> unnecessarily broken
> >> iv) there are no available userspace tools to allow for testing this
> >> functionality  
> > 
> > I don't understand these points couldn't be stated on any of the last
> > 3 versions / in the last month.  
> 
> I believe all of the above was stated in review of series v1 and v2.  v3 
> was posted during the merge window so wasn't really relevant for review. 
>   v4 didn't address the comments from v1 and v2.  v5 was posted 3 hours 
> after receiving reverse christmas tree comments and addressed only 
> those.  v5 received commentary within a week... hardly excessive for a 
> lightly maintained module like this one.

Sorry, a week is far too long for netdev. If we were to wait that long
we'd have a queue of 300+ patches always hanging around.

> >> I have requested that this patch be reworked into a series of smaller
> >> changes.  That would allow:
> >>
> >> i) reasonable review
> >> ii) the possibility to explain _why_ things are being done in the patch
> >> comment where this isn't obvious (like the handling of end markers)
> >> iii) the chance to do a reasonable rebase of other ongoing work onto
> >> this patch (series):  this one patch is invasive and difficult to rebase
> >> onto
> >>
> >> I'm not sure what the hurry is to get this patch into mainline.  Large
> >> and complicated patches like this take time to review; please revert
> >> this and allow that process to happen.  
> > 
> > You'd need to post a revert with the justification to the ML, so it can
> > be reviewed on its merits. That said I think incremental changes may be
> > a better direction.
> 
> I guess I'll have to do so, but that seems like setting the bar higher 
> than for even getting the patch in in the first place.
> 
> I don't think it's tenable for patches to sneak in because they are so 
> convoluted that the maintainers just can't find the energy to review 
> them.  I'd say that the maintainers silence on this particular patch 
> speaks volumes in itself.

Sadly most maintainers are not particularly dependable, so we can't
afford to make that the criteria.

I have also pinged for reviews on v4 and nobody replied.

> Sincerely frustrated because rebasing my IPv6 series on top of this mess 
> will take days,

I sympathize, perhaps we should document the expectations we have so
less involved maintainers know the expectations :(

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH net-next v5] GTP: add support for flow based tunneling API
  2021-01-18 18:56         ` Jakub Kicinski
@ 2021-01-18 20:01           ` Harald Welte
  0 siblings, 0 replies; 13+ messages in thread
From: Harald Welte @ 2021-01-18 20:01 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Jonas Bonn, Pravin B Shelar, netdev, pablo

Dear Jakub and list,

I also have my fair share of concerns about the "if the maintainers
don't ACK it, merge it in case of doubt" approach.  What is the point of
having a maintainer in the first place?

I don't really want to imagine the state of a codebase where everything
gets merged by default, unless somebody clearly and repeatedly actively
NACKs it.

Furthermore, for anyone who is maintaining a small portion of the code,
like a single driver, the suggested "two days" review cycle is simply
not possible.

Such people, like myself, are not full-time kernel developers, but
people who may only have a few hours per month to spend on maintenance
related duties of a rather small and exotic driver with few users.

We are not talking about a security related fix, or a bugfix, but the
introduction of massive changes (compared to the size of the gtp driver)
and new features.

I did see your ping for review, but the end of the year brings
unfortunately an incredible amount of work that needs to be done. I work
60+ hours each week as it is, and end of the year + financial year with
closing of accounts, changing of VAT rates, brexit related changes,
inventory counting, ... as a small business owner, I simply could not
provide feedback as quickly as I wanted.

I would have to build a kernel with that patch, then set up some
related test VMs, test with at least one existing userspace software
like OpenGGSN before I could ACK any change.  Given the low amount of
changes, and the lack of any commercial interest in funding, there is
no automatic test setup that involves the kernel GTP driver yet.  In
Osmocom we do have extensive automatic test suites for OpenGGSN, but
only with the userspace data plane, not with the gtp.ko kernel data
plane :(  SO this means manual testing, likely taking half a day to
get an idea about potential regressions.

> > Worse though, is the insinuation that anything unreviewed 
> > gets blindly merged...  No, the two day target should be for the merging 
> > of ACK:ed patches.
> 
> Well, certainly, the code has to be acceptable to the person merging it.

Indeed.  But given the highly exotic use case of the GTP driver, I would
say it needs at least an ACK from somebody who is actively using it,
to have some indication that there don't seem to be regressions at least
at first sight.

Compliance with kernel coding style and the general network architecture
alone is one part, but I would expect that virtually nobody except 2-3
people on this list have ever used the GTP kernel driver...

> Sorry, a week is far too long for netdev. If we were to wait that long
> we'd have a queue of 300+ patches always hanging around.

I would actually consider 300 in-flight patches a relatively small
number for such a huge project, but I don't want to get off-topic.

> > Sincerely frustrated because rebasing my IPv6 series on top of this mess 
> > will take days,
> 
> I sympathize, perhaps we should document the expectations we have so
> less involved maintainers know the expectations :(

As far as I remember, the IPv6 patches have appeared before, and I had
also hoped/wished for them to be merged before any large changes
(percentage of numbers of lines touched vs. size of the driver) like
this flow-based tunneling change.

Yes, I should have communicated better, that clearly was my fault.  But
I was operating under the assumption that code only gets merged if the
maintainers actually ACK it.  At least that's how I remember it from
my more active kernel hacking days ~20 years ago.

Regards,
	Harald

-- 
- Harald Welte <laforge@gnumonks.org>           http://laforge.gnumonks.org/
============================================================================
"Privacy in residential applications is a desirable marketing option."
                                                  (ETSI EN 300 175-7 Ch. A6)

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-01-18 20:11 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-10  7:00 [PATCH net-next v5] GTP: add support for flow based tunneling API Pravin B Shelar
2021-01-13 15:25 ` Pravin Shelar
2021-01-17  0:46 ` Jakub Kicinski
2021-01-17 13:23   ` Jonas Bonn
2021-01-17 15:25     ` Harald Welte
2021-01-17 20:55       ` Pravin Shelar
2021-01-17 20:47     ` Pravin Shelar
2021-01-18 17:27     ` Jakub Kicinski
2021-01-18 18:27       ` Jonas Bonn
2021-01-18 18:56         ` Jakub Kicinski
2021-01-18 20:01           ` Harald Welte
2021-01-17 13:40 ` Jonas Bonn
2021-01-17 20:42   ` Pravin Shelar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.