[PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels
@ 2016-09-15 20:00 Alexei Starovoitov
  2016-09-15 20:00 ` [PATCH net-next 1/4] ip_tunnel: add collect_md mode to IPIP tunnel Alexei Starovoitov
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Alexei Starovoitov @ 2016-09-15 20:00 UTC (permalink / raw)
  To: David S . Miller; +Cc: Daniel Borkmann, Thomas Graf, netdev, kernel-team

Similar to geneve, vxlan, gre tunnels implement 'collect metadata' mode
in ipip, ipip6, ip6ip6 tunnels.

Alexei Starovoitov (4):
  ip_tunnel: add collect_md mode to IPIP tunnel
  ip6_tunnel: add collect_md mode to IPv6 tunnels
  samples/bpf: extend test_tunnel_bpf.sh with IPIP test
  samples/bpf: add comprehensive ipip, ipip6, ip6ip6 test

 include/net/ip6_tunnel.h       |   1 +
 include/net/ip_tunnels.h       |   2 +
 include/uapi/linux/if_tunnel.h |   1 +
 net/ipv4/ip_tunnel.c           |  76 +++++++++++++++++
 net/ipv4/ipip.c                |  35 ++++++--
 net/ipv6/ip6_tunnel.c          | 178 ++++++++++++++++++++++++++++----------
 samples/bpf/tcbpf2_kern.c      | 190 +++++++++++++++++++++++++++++++++++++++++
 samples/bpf/test_ipip.sh       | 178 ++++++++++++++++++++++++++++++++++++++
 samples/bpf/test_tunnel_bpf.sh |  56 ++++++++++--
 9 files changed, 658 insertions(+), 59 deletions(-)
 create mode 100755 samples/bpf/test_ipip.sh

-- 
2.8.0

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH net-next 1/4] ip_tunnel: add collect_md mode to IPIP tunnel
  2016-09-15 20:00 [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels Alexei Starovoitov
@ 2016-09-15 20:00 ` Alexei Starovoitov
  2016-09-15 20:00 ` [PATCH net-next 2/4] ip6_tunnel: add collect_md mode to IPv6 tunnels Alexei Starovoitov
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Alexei Starovoitov @ 2016-09-15 20:00 UTC (permalink / raw)
  To: David S . Miller; +Cc: Daniel Borkmann, Thomas Graf, netdev, kernel-team

Similar to gre, vxlan, geneve tunnels allow IPIP tunnels to
operate in 'collect metadata' mode.
bpf_skb_[gs]et_tunnel_key() helpers can make use of it right away.
ovs can use it as well in the future (once appropriate ovs-vport
abstractions and user apis are added).
Note that just like in other tunnels we cannot cache the dst,
since tunnel_info metadata can be different for every packet.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/ip_tunnels.h       |  2 ++
 include/uapi/linux/if_tunnel.h |  1 +
 net/ipv4/ip_tunnel.c           | 76 ++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/ipip.c                | 35 +++++++++++++++----
 4 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index e598c639aa6f..59557c07904b 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -255,6 +255,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
 
 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, const u8 protocol);
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+		       const u8 proto);
 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 9865c8caedde..18d5dc13985d 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -73,6 +73,7 @@ enum {
 	IFLA_IPTUN_ENCAP_FLAGS,
 	IFLA_IPTUN_ENCAP_SPORT,
 	IFLA_IPTUN_ENCAP_DPORT,
+	IFLA_IPTUN_COLLECT_METADATA,
 	__IFLA_IPTUN_MAX,
 };
 #define IFLA_IPTUN_MAX	(__IFLA_IPTUN_MAX - 1)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 95649ebd2874..5719d6ba0824 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -55,6 +55,7 @@
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 #include <net/udp.h>
+#include <net/dst_metadata.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -546,6 +547,81 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 	return 0;
 }
 
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	u32 headroom = sizeof(struct iphdr);
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	const struct iphdr *inner_iph;
+	struct rtable *rt;
+	struct flowi4 fl4;
+	__be16 df = 0;
+	u8 tos, ttl;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto tx_error;
+	key = &tun_info->key;
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+	tos = key->tos;
+	if (tos == 1) {
+		if (skb->protocol == htons(ETH_P_IP))
+			tos = inner_iph->tos;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
+	}
+	init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+			 RT_TOS(tos), tunnel->parms.link);
+	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
+		goto tx_error;
+	rt = ip_route_output_key(tunnel->net, &fl4);
+	if (IS_ERR(rt)) {
+		dev->stats.tx_carrier_errors++;
+		goto tx_error;
+	}
+	if (rt->dst.dev == dev) {
+		ip_rt_put(rt);
+		dev->stats.collisions++;
+		goto tx_error;
+	}
+	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
+	ttl = key->ttl;
+	if (ttl == 0) {
+		if (skb->protocol == htons(ETH_P_IP))
+			ttl = inner_iph->ttl;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
+		else
+			ttl = ip4_dst_hoplimit(&rt->dst);
+	}
+	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
+		df = htons(IP_DF);
+	else if (skb->protocol == htons(ETH_P_IP))
+		df = inner_iph->frag_off & htons(IP_DF);
+	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+	if (headroom > dev->needed_headroom)
+		dev->needed_headroom = headroom;
+
+	if (skb_cow_head(skb, dev->needed_headroom)) {
+		ip_rt_put(rt);
+		goto tx_dropped;
+	}
+	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
+		      key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+	return;
+tx_error:
+	dev->stats.tx_errors++;
+	goto kfree;
+tx_dropped:
+	dev->stats.tx_dropped++;
+kfree:
+	kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
+
 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, u8 protocol)
 {
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 4ae3f8e6c6cc..c9392589c415 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -115,6 +115,7 @@
 #include <net/xfrm.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/dst_metadata.h>
 
 static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
@@ -193,6 +194,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 {
 	struct net *net = dev_net(skb->dev);
 	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+	struct metadata_dst *tun_dst = NULL;
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph;
 
@@ -216,7 +218,12 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 			tpi = &ipip_tpi;
 		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 			goto drop;
-		return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+		if (tunnel->collect_md) {
+			tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
+			if (!tun_dst)
+				return 0;
+		}
+		return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 	}
 
 	return -1;
@@ -270,7 +277,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
 
 	skb_set_inner_ipproto(skb, ipproto);
 
-	ip_tunnel_xmit(skb, dev, tiph, ipproto);
+	if (tunnel->collect_md)
+		ip_md_tunnel_xmit(skb, dev, ipproto);
+	else
+		ip_tunnel_xmit(skb, dev, tiph, ipproto);
 	return NETDEV_TX_OK;
 
 tx_error:
@@ -380,13 +390,14 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 }
 
 static void ipip_netlink_parms(struct nlattr *data[],
-			       struct ip_tunnel_parm *parms)
+			       struct ip_tunnel_parm *parms, bool *collect_md)
 {
 	memset(parms, 0, sizeof(*parms));
 
 	parms->iph.version = 4;
 	parms->iph.protocol = IPPROTO_IPIP;
 	parms->iph.ihl = 5;
+	*collect_md = false;
 
 	if (!data)
 		return;
@@ -414,6 +425,9 @@ static void ipip_netlink_parms(struct nlattr *data[],
 
 	if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
 		parms->iph.frag_off = htons(IP_DF);
+
+	if (data[IFLA_IPTUN_COLLECT_METADATA])
+		*collect_md = true;
 }
 
 /* This function returns true when ENCAP attributes are present in the nl msg */
@@ -453,18 +467,18 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[],
 static int ipip_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
+	struct ip_tunnel *t = netdev_priv(dev);
 	struct ip_tunnel_parm p;
 	struct ip_tunnel_encap ipencap;
 
 	if (ipip_netlink_encap_parms(data, &ipencap)) {
-		struct ip_tunnel *t = netdev_priv(dev);
 		int err = ip_tunnel_encap_setup(t, &ipencap);
 
 		if (err < 0)
 			return err;
 	}
 
-	ipip_netlink_parms(data, &p);
+	ipip_netlink_parms(data, &p, &t->collect_md);
 	return ip_tunnel_newlink(dev, tb, &p);
 }
 
@@ -473,6 +487,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
 {
 	struct ip_tunnel_parm p;
 	struct ip_tunnel_encap ipencap;
+	bool collect_md;
 
 	if (ipip_netlink_encap_parms(data, &ipencap)) {
 		struct ip_tunnel *t = netdev_priv(dev);
@@ -482,7 +497,9 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
 			return err;
 	}
 
-	ipip_netlink_parms(data, &p);
+	ipip_netlink_parms(data, &p, &collect_md);
+	if (collect_md)
+		return -EINVAL;
 
 	if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
 	    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
@@ -516,6 +533,8 @@ static size_t ipip_get_size(const struct net_device *dev)
 		nla_total_size(2) +
 		/* IFLA_IPTUN_ENCAP_DPORT */
 		nla_total_size(2) +
+		/* IFLA_IPTUN_COLLECT_METADATA */
+		nla_total_size(0) +
 		0;
 }
 
@@ -544,6 +563,9 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			tunnel->encap.flags))
 		goto nla_put_failure;
 
+	if (tunnel->collect_md)
+		if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
+			goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
@@ -562,6 +584,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
 	[IFLA_IPTUN_ENCAP_FLAGS]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_SPORT]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_DPORT]	= { .type = NLA_U16 },
+	[IFLA_IPTUN_COLLECT_METADATA]	= { .type = NLA_FLAG },
 };
 
 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 2/4] ip6_tunnel: add collect_md mode to IPv6 tunnels
  2016-09-15 20:00 [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels Alexei Starovoitov
  2016-09-15 20:00 ` [PATCH net-next 1/4] ip_tunnel: add collect_md mode to IPIP tunnel Alexei Starovoitov
@ 2016-09-15 20:00 ` Alexei Starovoitov
  2016-09-15 20:00 ` [PATCH net-next 3/4] samples/bpf: extend test_tunnel_bpf.sh with IPIP test Alexei Starovoitov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Alexei Starovoitov @ 2016-09-15 20:00 UTC (permalink / raw)
  To: David S . Miller; +Cc: Daniel Borkmann, Thomas Graf, netdev, kernel-team

Similar to gre, vxlan, geneve tunnels allow IPIP6 and IP6IP6 tunnels
to operate in 'collect metadata' mode.
Unlike ipv4 code here it's possible to reuse ip6_tnl_xmit() function
for both collect_md and traditional tunnels.
bpf_skb_[gs]et_tunnel_key() helpers and ovs (in the future) are the users.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/ip6_tunnel.h |   1 +
 net/ipv6/ip6_tunnel.c    | 178 +++++++++++++++++++++++++++++++++++------------
 2 files changed, 134 insertions(+), 45 deletions(-)

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 43a5a0e4524c..20ed9699fcd4 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -23,6 +23,7 @@ struct __ip6_tnl_parm {
 	__u8 proto;		/* tunnel protocol */
 	__u8 encap_limit;	/* encapsulation limit for tunnel */
 	__u8 hop_limit;		/* hop limit for tunnel */
+	bool collect_md;
 	__be32 flowinfo;	/* traffic class and flowlabel for tunnel */
 	__u32 flags;		/* tunnel flags */
 	struct in6_addr laddr;	/* local tunnel end-point address */
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5c5779720ef1..6a66adba0c22 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
 #include <net/inet_ecn.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/dst_metadata.h>
 
 MODULE_AUTHOR("Ville Nuorvala");
 MODULE_DESCRIPTION("IPv6 tunneling device");
@@ -90,6 +91,7 @@ struct ip6_tnl_net {
 	struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
 	struct ip6_tnl __rcu *tnls_wc[1];
 	struct ip6_tnl __rcu **tnls[2];
+	struct ip6_tnl __rcu *collect_md_tun;
 };
 
 static struct net_device_stats *ip6_get_stats(struct net_device *dev)
@@ -166,6 +168,10 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
 			return t;
 	}
 
+	t = rcu_dereference(ip6n->collect_md_tun);
+	if (t)
+		return t;
+
 	t = rcu_dereference(ip6n->tnls_wc[0]);
 	if (t && (t->dev->flags & IFF_UP))
 		return t;
@@ -209,6 +215,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 {
 	struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
 
+	if (t->parms.collect_md)
+		rcu_assign_pointer(ip6n->collect_md_tun, t);
 	rcu_assign_pointer(t->next , rtnl_dereference(*tp));
 	rcu_assign_pointer(*tp, t);
 }
@@ -224,6 +232,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 	struct ip6_tnl __rcu **tp;
 	struct ip6_tnl *iter;
 
+	if (t->parms.collect_md)
+		rcu_assign_pointer(ip6n->collect_md_tun, NULL);
+
 	for (tp = ip6_tnl_bucket(ip6n, &t->parms);
 	     (iter = rtnl_dereference(*tp)) != NULL;
 	     tp = &iter->next) {
@@ -829,6 +840,9 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
 
 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
 
+	if (tun_dst)
+		skb_dst_set(skb, (struct dst_entry *)tun_dst);
+
 	gro_cells_receive(&tunnel->gro_cells, skb);
 	return 0;
 
@@ -865,6 +879,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
 {
 	struct ip6_tnl *t;
 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct metadata_dst *tun_dst = NULL;
 	int ret = -1;
 
 	rcu_read_lock();
@@ -881,7 +896,12 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
 			goto drop;
 		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 			goto drop;
-		ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
+		if (t->parms.collect_md) {
+			tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
+			if (!tun_dst)
+				return 0;
+		}
+		ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
 				    log_ecn_error);
 	}
 
@@ -1012,8 +1032,16 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 	int mtu;
 	unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
 	unsigned int max_headroom = psh_hlen;
+	u8 hop_limit;
 	int err = -1;
 
+	if (t->parms.collect_md) {
+		hop_limit = skb_tunnel_info(skb)->key.ttl;
+		goto route_lookup;
+	} else {
+		hop_limit = t->parms.hop_limit;
+	}
+
 	/* NBMA tunnel */
 	if (ipv6_addr_any(&t->parms.raddr)) {
 		struct in6_addr *addr6;
@@ -1043,6 +1071,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 		goto tx_err_link_failure;
 
 	if (!dst) {
+route_lookup:
 		dst = ip6_route_output(net, NULL, fl6);
 
 		if (dst->error)
@@ -1053,6 +1082,10 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 			dst = NULL;
 			goto tx_err_link_failure;
 		}
+		if (t->parms.collect_md &&
+		    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
+				       &fl6->daddr, 0, &fl6->saddr))
+			goto tx_err_link_failure;
 		ndst = dst;
 	}
 
@@ -1071,7 +1104,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 	}
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
-	if (skb_dst(skb))
+	if (skb_dst(skb) && !t->parms.collect_md)
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 	if (skb->len > mtu && !skb_is_gso(skb)) {
 		*pmtu = mtu;
@@ -1111,8 +1144,13 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 		skb = new_skb;
 	}
 
-	if (!fl6->flowi6_mark && ndst)
-		dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
+	if (t->parms.collect_md) {
+		if (t->encap.type != TUNNEL_ENCAP_NONE)
+			goto tx_err_dst_release;
+	} else {
+		if (!fl6->flowi6_mark && ndst)
+			dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
+	}
 	skb_dst_set(skb, dst);
 
 	if (encap_limit >= 0) {
@@ -1137,7 +1175,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 	ipv6h = ipv6_hdr(skb);
 	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
-	ipv6h->hop_limit = t->parms.hop_limit;
+	ipv6h->hop_limit = hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
 	ipv6h->daddr = fl6->daddr;
@@ -1170,19 +1208,34 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (tproto != IPPROTO_IPIP && tproto != 0)
 		return -1;
 
-	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
-		encap_limit = t->parms.encap_limit;
+	dsfield = ipv4_get_dsfield(iph);
 
-	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-	fl6.flowi6_proto = IPPROTO_IPIP;
+	if (t->parms.collect_md) {
+		struct ip_tunnel_info *tun_info;
+		const struct ip_tunnel_key *key;
 
-	dsfield = ipv4_get_dsfield(iph);
+		tun_info = skb_tunnel_info(skb);
+		if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+			     ip_tunnel_info_af(tun_info) != AF_INET6))
+			return -1;
+		key = &tun_info->key;
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_IPIP;
+		fl6.daddr = key->u.ipv6.dst;
+		fl6.flowlabel = key->label;
+	} else {
+		if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+			encap_limit = t->parms.encap_limit;
 
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
-					  & IPV6_TCLASS_MASK;
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
-		fl6.flowi6_mark = skb->mark;
+		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_IPIP;
+
+		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+			fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+					 & IPV6_TCLASS_MASK;
+		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+			fl6.flowi6_mark = skb->mark;
+	}
 
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
@@ -1220,29 +1273,47 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	    ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
-	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
-	if (offset > 0) {
-		struct ipv6_tlv_tnl_enc_lim *tel;
-		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
-		if (tel->encap_limit == 0) {
-			icmpv6_send(skb, ICMPV6_PARAMPROB,
-				    ICMPV6_HDR_FIELD, offset + 2);
+	dsfield = ipv6_get_dsfield(ipv6h);
+
+	if (t->parms.collect_md) {
+		struct ip_tunnel_info *tun_info;
+		const struct ip_tunnel_key *key;
+
+		tun_info = skb_tunnel_info(skb);
+		if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+			     ip_tunnel_info_af(tun_info) != AF_INET6))
 			return -1;
+		key = &tun_info->key;
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_IPV6;
+		fl6.daddr = key->u.ipv6.dst;
+		fl6.flowlabel = key->label;
+	} else {
+		offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+		if (offset > 0) {
+			struct ipv6_tlv_tnl_enc_lim *tel;
+
+			tel = (void *)&skb_network_header(skb)[offset];
+			if (tel->encap_limit == 0) {
+				icmpv6_send(skb, ICMPV6_PARAMPROB,
+					    ICMPV6_HDR_FIELD, offset + 2);
+				return -1;
+			}
+			encap_limit = tel->encap_limit - 1;
+		} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+			encap_limit = t->parms.encap_limit;
 		}
-		encap_limit = tel->encap_limit - 1;
-	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
-		encap_limit = t->parms.encap_limit;
 
-	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-	fl6.flowi6_proto = IPPROTO_IPV6;
+		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_IPV6;
 
-	dsfield = ipv6_get_dsfield(ipv6h);
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
-		fl6.flowlabel |= ip6_flowlabel(ipv6h);
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
-		fl6.flowi6_mark = skb->mark;
+		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+			fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK);
+		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+			fl6.flowlabel |= ip6_flowlabel(ipv6h);
+		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+			fl6.flowi6_mark = skb->mark;
+	}
 
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
@@ -1741,6 +1812,10 @@ static int ip6_tnl_dev_init(struct net_device *dev)
 	if (err)
 		return err;
 	ip6_tnl_link_config(t);
+	if (t->parms.collect_md) {
+		dev->features |= NETIF_F_NETNS_LOCAL;
+		netif_keep_dst(dev);
+	}
 	return 0;
 }
 
@@ -1811,6 +1886,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[],
 
 	if (data[IFLA_IPTUN_PROTO])
 		parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
+	if (data[IFLA_IPTUN_COLLECT_METADATA])
+		parms->collect_md = true;
 }
 
 static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
@@ -1850,6 +1928,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 			   struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net *net = dev_net(dev);
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 	struct ip6_tnl *nt, *t;
 	struct ip_tunnel_encap ipencap;
 
@@ -1864,9 +1943,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 
 	ip6_tnl_netlink_parms(data, &nt->parms);
 
-	t = ip6_tnl_locate(net, &nt->parms, 0);
-	if (!IS_ERR(t))
-		return -EEXIST;
+	if (nt->parms.collect_md) {
+		if (rtnl_dereference(ip6n->collect_md_tun))
+			return -EEXIST;
+	} else {
+		t = ip6_tnl_locate(net, &nt->parms, 0);
+		if (!IS_ERR(t))
+			return -EEXIST;
+	}
 
 	return ip6_tnl_create2(dev);
 }
@@ -1890,6 +1974,8 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
 			return err;
 	}
 	ip6_tnl_netlink_parms(data, &p);
+	if (p.collect_md)
+		return -EINVAL;
 
 	t = ip6_tnl_locate(net, &p, 0);
 	if (!IS_ERR(t)) {
@@ -1937,6 +2023,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev)
 		nla_total_size(2) +
 		/* IFLA_IPTUN_ENCAP_DPORT */
 		nla_total_size(2) +
+		/* IFLA_IPTUN_COLLECT_METADATA */
+		nla_total_size(0) +
 		0;
 }
 
@@ -1955,16 +2043,15 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
 		goto nla_put_failure;
 
-	if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
-			tunnel->encap.type) ||
-	nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
-		     tunnel->encap.sport) ||
-	nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
-		     tunnel->encap.dport) ||
-	nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
-		    tunnel->encap.flags))
+	if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
+	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) ||
+	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) ||
+	    nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags))
 		goto nla_put_failure;
 
+	if (parm->collect_md)
+		if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
+			goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
@@ -1992,6 +2079,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
 	[IFLA_IPTUN_ENCAP_FLAGS]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_SPORT]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_DPORT]	= { .type = NLA_U16 },
+	[IFLA_IPTUN_COLLECT_METADATA]	= { .type = NLA_FLAG },
 };
 
 static struct rtnl_link_ops ip6_link_ops __read_mostly = {
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 3/4] samples/bpf: extend test_tunnel_bpf.sh with IPIP test
  2016-09-15 20:00 [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels Alexei Starovoitov
  2016-09-15 20:00 ` [PATCH net-next 1/4] ip_tunnel: add collect_md mode to IPIP tunnel Alexei Starovoitov
  2016-09-15 20:00 ` [PATCH net-next 2/4] ip6_tunnel: add collect_md mode to IPv6 tunnels Alexei Starovoitov
@ 2016-09-15 20:00 ` Alexei Starovoitov
  2016-09-16  5:16   ` William Tu
  2016-09-15 20:00 ` [PATCH net-next 4/4] samples/bpf: add comprehensive ipip, ipip6, ip6ip6 test Alexei Starovoitov
  2016-09-17 14:13 ` [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels David Miller
  4 siblings, 1 reply; 8+ messages in thread
From: Alexei Starovoitov @ 2016-09-15 20:00 UTC (permalink / raw)
  To: David S . Miller; +Cc: Daniel Borkmann, Thomas Graf, netdev, kernel-team

extend existing tests for vxlan, geneve, gre to include IPIP tunnel.
It tests both traditional tunnel configuration and
dynamic via bpf helpers.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 samples/bpf/tcbpf2_kern.c      | 58 ++++++++++++++++++++++++++++++++++++++++++
 samples/bpf/test_tunnel_bpf.sh | 56 ++++++++++++++++++++++++++++++++++------
 2 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index 7a15289da6cc..c1917d968fb4 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -1,4 +1,5 @@
 /* Copyright (c) 2016 VMware
+ * Copyright (c) 2016 Facebook
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -188,4 +189,61 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
+SEC("ipip_set_tunnel")
+int _ipip_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.tunnel_ttl = 64;
+	if (iph->protocol == IPPROTO_ICMP) {
+		key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	} else {
+		if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
+			return TC_ACT_SHOT;
+
+		if (tcp->dest == htons(5200))
+			key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+		else if (tcp->dest == htons(5201))
+			key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
+		else
+			return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip_get_tunnel")
+int _ipip_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index 4956589a83ae..1ff634f187b7 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -9,15 +9,13 @@
 # local 172.16.1.200 remote 172.16.1.100
 # veth1 IP: 172.16.1.200, tunnel dev <type>11
 
-set -e
-
 function config_device {
 	ip netns add at_ns0
 	ip link add veth0 type veth peer name veth1
 	ip link set veth0 netns at_ns0
 	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
 	ip netns exec at_ns0 ip link set dev veth0 up
-	ip link set dev veth1 up
+	ip link set dev veth1 up mtu 1500
 	ip addr add dev veth1 172.16.1.200/24
 }
 
@@ -67,6 +65,19 @@ function add_geneve_tunnel {
 	ip addr add dev $DEV 10.1.1.200/24
 }
 
+function add_ipip_tunnel {
+	# in namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# out of namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
 function attach_bpf {
 	DEV=$1
 	SET_TUNNEL=$2
@@ -85,6 +96,7 @@ function test_gre {
 	attach_bpf $DEV gre_set_tunnel gre_get_tunnel
 	ping -c 1 10.1.1.100
 	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	cleanup
 }
 
 function test_vxlan {
@@ -96,6 +108,7 @@ function test_vxlan {
 	attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
 	ping -c 1 10.1.1.100
 	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	cleanup
 }
 
 function test_geneve {
@@ -107,21 +120,48 @@ function test_geneve {
 	attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
 	ping -c 1 10.1.1.100
 	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	cleanup
+}
+
+function test_ipip {
+	TYPE=ipip
+	DEV_NS=ipip00
+	DEV=ipip11
+	config_device
+	tcpdump -nei veth1 &
+	cat /sys/kernel/debug/tracing/trace_pipe &
+	add_ipip_tunnel
+	ethtool -K veth1 gso off gro off rx off tx off
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+	ping -c 1 10.1.1.100
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
+	sleep 0.2
+	iperf -c 10.1.1.100 -n 5k -p 5200
+	cleanup
 }
 
 function cleanup {
+	set +ex
+	pkill iperf
 	ip netns delete at_ns0
 	ip link del veth1
-	ip link del $DEV
+	ip link del ipip11
+	ip link del gretap11
+	ip link del geneve11
+	pkill tcpdump
+	pkill cat
+	set -ex
 }
 
+cleanup
 echo "Testing GRE tunnel..."
 test_gre
-cleanup
 echo "Testing VXLAN tunnel..."
 test_vxlan
-cleanup
 echo "Testing GENEVE tunnel..."
 test_geneve
-cleanup
-echo "Success"
+echo "Testing IPIP tunnel..."
+test_ipip
+echo "*** PASS ***"
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 4/4] samples/bpf: add comprehensive ipip, ipip6, ip6ip6 test
  2016-09-15 20:00 [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels Alexei Starovoitov
                   ` (2 preceding siblings ...)
  2016-09-15 20:00 ` [PATCH net-next 3/4] samples/bpf: extend test_tunnel_bpf.sh with IPIP test Alexei Starovoitov
@ 2016-09-15 20:00 ` Alexei Starovoitov
  2016-09-17 14:13 ` [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels David Miller
  4 siblings, 0 replies; 8+ messages in thread
From: Alexei Starovoitov @ 2016-09-15 20:00 UTC (permalink / raw)
  To: David S . Miller; +Cc: Daniel Borkmann, Thomas Graf, netdev, kernel-team

the test creates 3 namespaces with veth connected via bridge.
First two namespaces simulate two different hosts with the same
IPv4 and IPv6 addresses configured on the tunnel interface and they
communicate with outside world via standard tunnels.
Third namespace creates collect_md tunnel that is driven by BPF
program which selects different remote host (either first or
second namespace) based on tcp dest port number while tcp dst
ip is the same.
This scenario is rough approximation of load balancer use case.
The tests check both traditional tunnel configuration and collect_md mode.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 samples/bpf/tcbpf2_kern.c | 132 ++++++++++++++++++++++++++++++++++
 samples/bpf/test_ipip.sh  | 178 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 310 insertions(+)
 create mode 100755 samples/bpf/test_ipip.sh

diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index c1917d968fb4..3303bb85593b 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -9,12 +9,15 @@
 #include <uapi/linux/if_ether.h>
 #include <uapi/linux/if_packet.h>
 #include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
 #include <uapi/linux/in.h>
 #include <uapi/linux/tcp.h>
 #include <uapi/linux/filter.h>
 #include <uapi/linux/pkt_cls.h>
+#include <net/ipv6.h>
 #include "bpf_helpers.h"
 
+#define _htonl __builtin_bswap32
 #define ERROR(ret) do {\
 		char fmt[] = "ERROR line:%d ret:%d\n";\
 		bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
@@ -246,4 +249,133 @@ int _ipip_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
+SEC("ipip6_set_tunnel")
+int _ipip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.remote_ipv6[0] = _htonl(0x2401db00);
+	key.tunnel_ttl = 64;
+
+	if (iph->protocol == IPPROTO_ICMP) {
+		key.remote_ipv6[3] = _htonl(1);
+	} else {
+		if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) {
+			ERROR(iph->protocol);
+			return TC_ACT_SHOT;
+		}
+
+		if (tcp->dest == htons(5200)) {
+			key.remote_ipv6[3] = _htonl(1);
+		} else if (tcp->dest == htons(5201)) {
+			key.remote_ipv6[3] = _htonl(2);
+		} else {
+			ERROR(tcp->dest);
+			return TC_ACT_SHOT;
+		}
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_get_tunnel")
+int _ipip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]),
+			 _htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_set_tunnel")
+int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct ipv6hdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.remote_ipv6[0] = _htonl(0x2401db00);
+	key.tunnel_ttl = 64;
+
+	if (iph->nexthdr == NEXTHDR_ICMP) {
+		key.remote_ipv6[3] = _htonl(1);
+	} else {
+		if (iph->nexthdr != NEXTHDR_TCP) {
+			ERROR(iph->nexthdr);
+			return TC_ACT_SHOT;
+		}
+
+		if (tcp->dest == htons(5200)) {
+			key.remote_ipv6[3] = _htonl(1);
+		} else if (tcp->dest == htons(5201)) {
+			key.remote_ipv6[3] = _htonl(2);
+		} else {
+			ERROR(tcp->dest);
+			return TC_ACT_SHOT;
+		}
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_get_tunnel")
+int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]),
+			 _htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+
 char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_ipip.sh b/samples/bpf/test_ipip.sh
new file mode 100755
index 000000000000..196925403ab4
--- /dev/null
+++ b/samples/bpf/test_ipip.sh
@@ -0,0 +1,178 @@
+#!/bin/bash
+
+function config_device {
+	ip netns add at_ns0
+	ip netns add at_ns1
+	ip netns add at_ns2
+	ip link add veth0 type veth peer name veth0b
+	ip link add veth1 type veth peer name veth1b
+	ip link add veth2 type veth peer name veth2b
+	ip link set veth0b up
+	ip link set veth1b up
+	ip link set veth2b up
+	ip link set dev veth0b mtu 1500
+	ip link set dev veth1b mtu 1500
+	ip link set dev veth2b mtu 1500
+	ip link set veth0 netns at_ns0
+	ip link set veth1 netns at_ns1
+	ip link set veth2 netns at_ns2
+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+	ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip netns exec at_ns1 ip addr add 172.16.1.101/24 dev veth1
+	ip netns exec at_ns1 ip addr add 2401:db00::2/64 dev veth1 nodad
+	ip netns exec at_ns1 ip link set dev veth1 up
+	ip netns exec at_ns2 ip addr add 172.16.1.200/24 dev veth2
+	ip netns exec at_ns2 ip addr add 2401:db00::3/64 dev veth2 nodad
+	ip netns exec at_ns2 ip link set dev veth2 up
+	ip link add br0 type bridge
+	ip link set br0 up
+	ip link set dev br0 mtu 1500
+	ip link set veth0b master br0
+	ip link set veth1b master br0
+	ip link set veth2b master br0
+}
+
+function add_ipip_tunnel {
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type ipip local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns1 \
+		ip link add dev $DEV_NS type ipip local 172.16.1.101 remote 172.16.1.200
+	ip netns exec at_ns1 ip link set dev $DEV_NS up
+	# same inner IP address in at_ns0 and at_ns1
+	ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	ip netns exec at_ns2 ip link add dev $DEV type ipip external
+	ip netns exec at_ns2 ip link set dev $DEV up
+	ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24
+}
+
+function add_ipip6_tunnel {
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::1/64 remote 2401:db00::3/64
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns1 \
+		ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::2/64 remote 2401:db00::3/64
+	ip netns exec at_ns1 ip link set dev $DEV_NS up
+	# same inner IP address in at_ns0 and at_ns1
+	ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ipip6 external
+	ip netns exec at_ns2 ip link set dev $DEV up
+	ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24
+}
+
+function add_ip6ip6_tunnel {
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::1/64 remote 2401:db00::3/64
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 2601:646::1/64
+	ip netns exec at_ns1 \
+		ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::2/64 remote 2401:db00::3/64
+	ip netns exec at_ns1 ip link set dev $DEV_NS up
+	# same inner IP address in at_ns0 and at_ns1
+	ip netns exec at_ns1 ip addr add dev $DEV_NS 2601:646::1/64
+
+	ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ip6ip6 external
+	ip netns exec at_ns2 ip link set dev $DEV up
+	ip netns exec at_ns2 ip addr add dev $DEV 2601:646::2/64
+}
+
+function attach_bpf {
+	DEV=$1
+	SET_TUNNEL=$2
+	GET_TUNNEL=$3
+	ip netns exec at_ns2 tc qdisc add dev $DEV clsact
+	ip netns exec at_ns2 tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL
+	ip netns exec at_ns2 tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL
+}
+
+function test_ipip {
+	DEV_NS=ipip_std
+	DEV=ipip_bpf
+	config_device
+#	tcpdump -nei br0 &
+	cat /sys/kernel/debug/tracing/trace_pipe &
+
+	add_ipip_tunnel
+	attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	ip netns exec at_ns2 ping -c 1 10.1.1.100
+	ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
+	ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null
+	sleep 0.2
+	# tcp check _same_ IP over different tunnels
+	ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200
+	ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201
+	cleanup
+}
+
+# IPv4 over IPv6 tunnel
+function test_ipip6 {
+	DEV_NS=ipip_std
+	DEV=ipip_bpf
+	config_device
+#	tcpdump -nei br0 &
+	cat /sys/kernel/debug/tracing/trace_pipe &
+
+	add_ipip6_tunnel
+	attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
+
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	ip netns exec at_ns2 ping -c 1 10.1.1.100
+	ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
+	ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null
+	sleep 0.2
+	# tcp check _same_ IP over different tunnels
+	ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200
+	ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201
+	cleanup
+}
+
+# IPv6 over IPv6 tunnel
+function test_ip6ip6 {
+	DEV_NS=ipip_std
+	DEV=ipip_bpf
+	config_device
+#	tcpdump -nei br0 &
+	cat /sys/kernel/debug/tracing/trace_pipe &
+
+	add_ip6ip6_tunnel
+	attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
+
+	ip netns exec at_ns0 ping -6 -c 1 2601:646::2
+	ip netns exec at_ns2 ping -6 -c 1 2601:646::1
+	ip netns exec at_ns0 iperf -6sD -p 5200 > /dev/null
+	ip netns exec at_ns1 iperf -6sD -p 5201 > /dev/null
+	sleep 0.2
+	# tcp check _same_ IP over different tunnels
+	ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5200
+	ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5201
+	cleanup
+}
+
+function cleanup {
+	set +ex
+	pkill iperf
+	ip netns delete at_ns0
+	ip netns delete at_ns1
+	ip netns delete at_ns2
+	ip link del veth0
+	ip link del veth1
+	ip link del veth2
+	ip link del br0
+	pkill tcpdump
+	pkill cat
+	set -ex
+}
+
+cleanup
+echo "Testing IP tunnels..."
+test_ipip
+test_ipip6
+test_ip6ip6
+echo "*** PASS ***"
-- 
2.8.0

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 3/4] samples/bpf: extend test_tunnel_bpf.sh with IPIP test
  2016-09-15 20:00 ` [PATCH net-next 3/4] samples/bpf: extend test_tunnel_bpf.sh with IPIP test Alexei Starovoitov
@ 2016-09-16  5:16   ` William Tu
  2016-09-16  7:22     ` Daniel Borkmann
  0 siblings, 1 reply; 8+ messages in thread
From: William Tu @ 2016-09-16  5:16 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: David S . Miller, Daniel Borkmann, Thomas Graf,
	Linux Kernel Network Developers, kernel-team

Hi Alexei,

Is there a corresponding patch for iproute2? I tested this patch but fails at:
+ ip link add dev ipip11 type ipip external
because my ip command does not support "external".

Thanks
William


On Thu, Sep 15, 2016 at 1:00 PM, Alexei Starovoitov <ast@fb.com> wrote:
> extend existing tests for vxlan, geneve, gre to include IPIP tunnel.
> It tests both traditional tunnel configuration and
> dynamic via bpf helpers.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  samples/bpf/tcbpf2_kern.c      | 58 ++++++++++++++++++++++++++++++++++++++++++
>  samples/bpf/test_tunnel_bpf.sh | 56 ++++++++++++++++++++++++++++++++++------
>  2 files changed, 106 insertions(+), 8 deletions(-)
>
> diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
> index 7a15289da6cc..c1917d968fb4 100644
> --- a/samples/bpf/tcbpf2_kern.c
> +++ b/samples/bpf/tcbpf2_kern.c
> @@ -1,4 +1,5 @@
>  /* Copyright (c) 2016 VMware
> + * Copyright (c) 2016 Facebook
>   *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of version 2 of the GNU General Public
> @@ -188,4 +189,61 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
>         return TC_ACT_OK;
>  }
>
> +SEC("ipip_set_tunnel")
> +int _ipip_set_tunnel(struct __sk_buff *skb)
> +{
> +       struct bpf_tunnel_key key = {};
> +       void *data = (void *)(long)skb->data;
> +       struct iphdr *iph = data;
> +       struct tcphdr *tcp = data + sizeof(*iph);
> +       void *data_end = (void *)(long)skb->data_end;
> +       int ret;
> +
> +       /* single length check */
> +       if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
> +               ERROR(1);
> +               return TC_ACT_SHOT;
> +       }
> +
> +       key.tunnel_ttl = 64;
> +       if (iph->protocol == IPPROTO_ICMP) {
> +               key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
> +       } else {
> +               if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
> +                       return TC_ACT_SHOT;
> +
> +               if (tcp->dest == htons(5200))
> +                       key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
> +               else if (tcp->dest == htons(5201))
> +                       key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
> +               else
> +                       return TC_ACT_SHOT;
> +       }
> +
> +       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
> +       if (ret < 0) {
> +               ERROR(ret);
> +               return TC_ACT_SHOT;
> +       }
> +
> +       return TC_ACT_OK;
> +}
> +
> +SEC("ipip_get_tunnel")
> +int _ipip_get_tunnel(struct __sk_buff *skb)
> +{
> +       int ret;
> +       struct bpf_tunnel_key key;
> +       char fmt[] = "remote ip 0x%x\n";
> +
> +       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
> +       if (ret < 0) {
> +               ERROR(ret);
> +               return TC_ACT_SHOT;
> +       }
> +
> +       bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
> +       return TC_ACT_OK;
> +}
> +
>  char _license[] SEC("license") = "GPL";
> diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
> index 4956589a83ae..1ff634f187b7 100755
> --- a/samples/bpf/test_tunnel_bpf.sh
> +++ b/samples/bpf/test_tunnel_bpf.sh
> @@ -9,15 +9,13 @@
>  # local 172.16.1.200 remote 172.16.1.100
>  # veth1 IP: 172.16.1.200, tunnel dev <type>11
>
> -set -e
> -
>  function config_device {
>         ip netns add at_ns0
>         ip link add veth0 type veth peer name veth1
>         ip link set veth0 netns at_ns0
>         ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
>         ip netns exec at_ns0 ip link set dev veth0 up
> -       ip link set dev veth1 up
> +       ip link set dev veth1 up mtu 1500
>         ip addr add dev veth1 172.16.1.200/24
>  }
>
> @@ -67,6 +65,19 @@ function add_geneve_tunnel {
>         ip addr add dev $DEV 10.1.1.200/24
>  }
>
> +function add_ipip_tunnel {
> +       # in namespace
> +       ip netns exec at_ns0 \
> +               ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200
> +       ip netns exec at_ns0 ip link set dev $DEV_NS up
> +       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
> +
> +       # out of namespace
> +       ip link add dev $DEV type $TYPE external
> +       ip link set dev $DEV up
> +       ip addr add dev $DEV 10.1.1.200/24
> +}
> +
>  function attach_bpf {
>         DEV=$1
>         SET_TUNNEL=$2
> @@ -85,6 +96,7 @@ function test_gre {
>         attach_bpf $DEV gre_set_tunnel gre_get_tunnel
>         ping -c 1 10.1.1.100
>         ip netns exec at_ns0 ping -c 1 10.1.1.200
> +       cleanup
>  }
>
>  function test_vxlan {
> @@ -96,6 +108,7 @@ function test_vxlan {
>         attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
>         ping -c 1 10.1.1.100
>         ip netns exec at_ns0 ping -c 1 10.1.1.200
> +       cleanup
>  }
>
>  function test_geneve {
> @@ -107,21 +120,48 @@ function test_geneve {
>         attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
>         ping -c 1 10.1.1.100
>         ip netns exec at_ns0 ping -c 1 10.1.1.200
> +       cleanup
> +}
> +
> +function test_ipip {
> +       TYPE=ipip
> +       DEV_NS=ipip00
> +       DEV=ipip11
> +       config_device
> +       tcpdump -nei veth1 &
> +       cat /sys/kernel/debug/tracing/trace_pipe &
> +       add_ipip_tunnel
> +       ethtool -K veth1 gso off gro off rx off tx off
> +       ip link set dev veth1 mtu 1500
> +       attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
> +       ping -c 1 10.1.1.100
> +       ip netns exec at_ns0 ping -c 1 10.1.1.200
> +       ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
> +       sleep 0.2
> +       iperf -c 10.1.1.100 -n 5k -p 5200
> +       cleanup
>  }
>
>  function cleanup {
> +       set +ex
> +       pkill iperf
>         ip netns delete at_ns0
>         ip link del veth1
> -       ip link del $DEV
> +       ip link del ipip11
> +       ip link del gretap11
> +       ip link del geneve11
> +       pkill tcpdump
> +       pkill cat
> +       set -ex
>  }
>
> +cleanup
>  echo "Testing GRE tunnel..."
>  test_gre
> -cleanup
>  echo "Testing VXLAN tunnel..."
>  test_vxlan
> -cleanup
>  echo "Testing GENEVE tunnel..."
>  test_geneve
> -cleanup
> -echo "Success"
> +echo "Testing IPIP tunnel..."
> +test_ipip
> +echo "*** PASS ***"
> --
> 2.8.0
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 3/4] samples/bpf: extend test_tunnel_bpf.sh with IPIP test
  2016-09-16  5:16   ` William Tu
@ 2016-09-16  7:22     ` Daniel Borkmann
  0 siblings, 0 replies; 8+ messages in thread
From: Daniel Borkmann @ 2016-09-16  7:22 UTC (permalink / raw)
  To: William Tu, Alexei Starovoitov
  Cc: David S . Miller, Thomas Graf, Linux Kernel Network Developers,
	kernel-team

Hi William,

On 09/16/2016 07:16 AM, William Tu wrote:
> Hi Alexei,
>
> Is there a corresponding patch for iproute2? I tested this patch but fails at:
> + ip link add dev ipip11 type ipip external
> because my ip command does not support "external".

Yes, like any other collect metadata backends you need a small patch
to iproute2 that sets in this case IFLA_IPTUN_COLLECT_METADATA flag
via conventional "external" keyword. Will be posted at latest on Monday
(Alexei mentioned he's pto today).

Cheers,
Daniel

> Thanks
> William
>
>
> On Thu, Sep 15, 2016 at 1:00 PM, Alexei Starovoitov <ast@fb.com> wrote:
>> extend existing tests for vxlan, geneve, gre to include IPIP tunnel.
>> It tests both traditional tunnel configuration and
>> dynamic via bpf helpers.
>>
>> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
>> ---
>>   samples/bpf/tcbpf2_kern.c      | 58 ++++++++++++++++++++++++++++++++++++++++++
>>   samples/bpf/test_tunnel_bpf.sh | 56 ++++++++++++++++++++++++++++++++++------
>>   2 files changed, 106 insertions(+), 8 deletions(-)
>>
>> diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
>> index 7a15289da6cc..c1917d968fb4 100644
>> --- a/samples/bpf/tcbpf2_kern.c
>> +++ b/samples/bpf/tcbpf2_kern.c
>> @@ -1,4 +1,5 @@
>>   /* Copyright (c) 2016 VMware
>> + * Copyright (c) 2016 Facebook
>>    *
>>    * This program is free software; you can redistribute it and/or
>>    * modify it under the terms of version 2 of the GNU General Public
>> @@ -188,4 +189,61 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
>>          return TC_ACT_OK;
>>   }
>>
>> +SEC("ipip_set_tunnel")
>> +int _ipip_set_tunnel(struct __sk_buff *skb)
>> +{
>> +       struct bpf_tunnel_key key = {};
>> +       void *data = (void *)(long)skb->data;
>> +       struct iphdr *iph = data;
>> +       struct tcphdr *tcp = data + sizeof(*iph);
>> +       void *data_end = (void *)(long)skb->data_end;
>> +       int ret;
>> +
>> +       /* single length check */
>> +       if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
>> +               ERROR(1);
>> +               return TC_ACT_SHOT;
>> +       }
>> +
>> +       key.tunnel_ttl = 64;
>> +       if (iph->protocol == IPPROTO_ICMP) {
>> +               key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
>> +       } else {
>> +               if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
>> +                       return TC_ACT_SHOT;
>> +
>> +               if (tcp->dest == htons(5200))
>> +                       key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
>> +               else if (tcp->dest == htons(5201))
>> +                       key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
>> +               else
>> +                       return TC_ACT_SHOT;
>> +       }
>> +
>> +       ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
>> +       if (ret < 0) {
>> +               ERROR(ret);
>> +               return TC_ACT_SHOT;
>> +       }
>> +
>> +       return TC_ACT_OK;
>> +}
>> +
>> +SEC("ipip_get_tunnel")
>> +int _ipip_get_tunnel(struct __sk_buff *skb)
>> +{
>> +       int ret;
>> +       struct bpf_tunnel_key key;
>> +       char fmt[] = "remote ip 0x%x\n";
>> +
>> +       ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
>> +       if (ret < 0) {
>> +               ERROR(ret);
>> +               return TC_ACT_SHOT;
>> +       }
>> +
>> +       bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
>> +       return TC_ACT_OK;
>> +}
>> +
>>   char _license[] SEC("license") = "GPL";
>> diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
>> index 4956589a83ae..1ff634f187b7 100755
>> --- a/samples/bpf/test_tunnel_bpf.sh
>> +++ b/samples/bpf/test_tunnel_bpf.sh
>> @@ -9,15 +9,13 @@
>>   # local 172.16.1.200 remote 172.16.1.100
>>   # veth1 IP: 172.16.1.200, tunnel dev <type>11
>>
>> -set -e
>> -
>>   function config_device {
>>          ip netns add at_ns0
>>          ip link add veth0 type veth peer name veth1
>>          ip link set veth0 netns at_ns0
>>          ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
>>          ip netns exec at_ns0 ip link set dev veth0 up
>> -       ip link set dev veth1 up
>> +       ip link set dev veth1 up mtu 1500
>>          ip addr add dev veth1 172.16.1.200/24
>>   }
>>
>> @@ -67,6 +65,19 @@ function add_geneve_tunnel {
>>          ip addr add dev $DEV 10.1.1.200/24
>>   }
>>
>> +function add_ipip_tunnel {
>> +       # in namespace
>> +       ip netns exec at_ns0 \
>> +               ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200
>> +       ip netns exec at_ns0 ip link set dev $DEV_NS up
>> +       ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
>> +
>> +       # out of namespace
>> +       ip link add dev $DEV type $TYPE external
>> +       ip link set dev $DEV up
>> +       ip addr add dev $DEV 10.1.1.200/24
>> +}
>> +
>>   function attach_bpf {
>>          DEV=$1
>>          SET_TUNNEL=$2
>> @@ -85,6 +96,7 @@ function test_gre {
>>          attach_bpf $DEV gre_set_tunnel gre_get_tunnel
>>          ping -c 1 10.1.1.100
>>          ip netns exec at_ns0 ping -c 1 10.1.1.200
>> +       cleanup
>>   }
>>
>>   function test_vxlan {
>> @@ -96,6 +108,7 @@ function test_vxlan {
>>          attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
>>          ping -c 1 10.1.1.100
>>          ip netns exec at_ns0 ping -c 1 10.1.1.200
>> +       cleanup
>>   }
>>
>>   function test_geneve {
>> @@ -107,21 +120,48 @@ function test_geneve {
>>          attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
>>          ping -c 1 10.1.1.100
>>          ip netns exec at_ns0 ping -c 1 10.1.1.200
>> +       cleanup
>> +}
>> +
>> +function test_ipip {
>> +       TYPE=ipip
>> +       DEV_NS=ipip00
>> +       DEV=ipip11
>> +       config_device
>> +       tcpdump -nei veth1 &
>> +       cat /sys/kernel/debug/tracing/trace_pipe &
>> +       add_ipip_tunnel
>> +       ethtool -K veth1 gso off gro off rx off tx off
>> +       ip link set dev veth1 mtu 1500
>> +       attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
>> +       ping -c 1 10.1.1.100
>> +       ip netns exec at_ns0 ping -c 1 10.1.1.200
>> +       ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
>> +       sleep 0.2
>> +       iperf -c 10.1.1.100 -n 5k -p 5200
>> +       cleanup
>>   }
>>
>>   function cleanup {
>> +       set +ex
>> +       pkill iperf
>>          ip netns delete at_ns0
>>          ip link del veth1
>> -       ip link del $DEV
>> +       ip link del ipip11
>> +       ip link del gretap11
>> +       ip link del geneve11
>> +       pkill tcpdump
>> +       pkill cat
>> +       set -ex
>>   }
>>
>> +cleanup
>>   echo "Testing GRE tunnel..."
>>   test_gre
>> -cleanup
>>   echo "Testing VXLAN tunnel..."
>>   test_vxlan
>> -cleanup
>>   echo "Testing GENEVE tunnel..."
>>   test_geneve
>> -cleanup
>> -echo "Success"
>> +echo "Testing IPIP tunnel..."
>> +test_ipip
>> +echo "*** PASS ***"
>> --
>> 2.8.0
>>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels
  2016-09-15 20:00 [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels Alexei Starovoitov
                   ` (3 preceding siblings ...)
  2016-09-15 20:00 ` [PATCH net-next 4/4] samples/bpf: add comprehensive ipip, ipip6, ip6ip6 test Alexei Starovoitov
@ 2016-09-17 14:13 ` David Miller
  4 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2016-09-17 14:13 UTC (permalink / raw)
  To: ast; +Cc: daniel, tgraf, netdev, kernel-team

From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 15 Sep 2016 13:00:28 -0700

> Similar to geneve, vxlan, gre tunnels implement 'collect metadata' mode
> in ipip, ipip6, ip6ip6 tunnels.

Series applied, thanks.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-09-17 14:13 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-15 20:00 [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels Alexei Starovoitov
2016-09-15 20:00 ` [PATCH net-next 1/4] ip_tunnel: add collect_md mode to IPIP tunnel Alexei Starovoitov
2016-09-15 20:00 ` [PATCH net-next 2/4] ip6_tunnel: add collect_md mode to IPv6 tunnels Alexei Starovoitov
2016-09-15 20:00 ` [PATCH net-next 3/4] samples/bpf: extend test_tunnel_bpf.sh with IPIP test Alexei Starovoitov
2016-09-16  5:16   ` William Tu
2016-09-16  7:22     ` Daniel Borkmann
2016-09-15 20:00 ` [PATCH net-next 4/4] samples/bpf: add comprehensive ipip, ipip6, ip6ip6 test Alexei Starovoitov
2016-09-17 14:13 ` [PATCH net-next 0/4] ip_tunnel: add collect_md mode to IPv4/IPv6 tunnels David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.