All of lore.kernel.org
 help / color / mirror / Atom feed
* [net-next v5 0/3] Refactor vxlan and l2tp to use common UDP tunnel APIs
@ 2014-09-11  3:29 Andy Zhou
  2014-09-11  3:29 ` [net-next v5 1/3] udp-tunnel: Expand " Andy Zhou
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Andy Zhou @ 2014-09-11  3:29 UTC (permalink / raw)
  To: davem; +Cc: netdev, Andy Zhou

This patch series expend current UDP tunnel APIs and refactoring current
UDP tunnel based protocols to make use of the new APIs. The main motivation
is to reduce code duplication.

*** BLURB HERE ***

Andy Zhou (3):
  udp-tunnel: Expand UDP tunnel APIs
  vxlan: Refactor vxlan driver to make use of the common UDP tunnel
    functions.
  l2tp: Refactor l2tp core driver to make use of the common UDP tunnel 
       functions

 drivers/net/vxlan.c           |  174 ++++++++++++++--------------------
 include/net/udp_tunnel.h      |   73 +++++++++++++++
 include/net/vxlan.h           |   17 ++--
 net/ipv4/Kconfig              |    1 +
 net/ipv4/udp_tunnel.c         |  108 ++++++++++++++-------
 net/ipv6/Makefile             |    1 +
 net/ipv6/ip6_udp_tunnel.c     |  121 ++++++++++++++++++++++++
 net/l2tp/l2tp_core.c          |  208 +++++++++++++++++++++++++----------------
 net/openvswitch/vport-vxlan.c |    6 +-
 9 files changed, 480 insertions(+), 229 deletions(-)
 create mode 100644 net/ipv6/ip6_udp_tunnel.c

-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [net-next v5 1/3] udp-tunnel: Expand UDP tunnel APIs
  2014-09-11  3:29 [net-next v5 0/3] Refactor vxlan and l2tp to use common UDP tunnel APIs Andy Zhou
@ 2014-09-11  3:29 ` Andy Zhou
  2014-09-11 21:04   ` Tom Herbert
  2014-09-12 21:35   ` Tom Herbert
  2014-09-11  3:29 ` [net-next v5 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
  2014-09-11  3:29 ` [net-next v5 3/3] l2tp: Refactor l2tp core " Andy Zhou
  2 siblings, 2 replies; 8+ messages in thread
From: Andy Zhou @ 2014-09-11  3:29 UTC (permalink / raw)
  To: davem; +Cc: netdev, Andy Zhou

Added common udp tunnel socket creation, and packet transmission APIs
API that can be used by other UDP based tunneling protocol
implementation.

Signed-off-by: Andy Zhou <azhou@nicira.com>
---
 include/net/udp_tunnel.h  |   73 +++++++++++++++++++++++++++
 net/ipv4/Kconfig          |    1 +
 net/ipv4/udp_tunnel.c     |  108 ++++++++++++++++++++++++++--------------
 net/ipv6/Makefile         |    1 +
 net/ipv6/ip6_udp_tunnel.c |  121 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 268 insertions(+), 36 deletions(-)
 create mode 100644 net/ipv6/ip6_udp_tunnel.c

diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index ffd69cb..e9dcf83 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -1,6 +1,14 @@
 #ifndef __NET_UDP_TUNNEL_H
 #define __NET_UDP_TUNNEL_H
 
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#endif
+
 struct udp_port_cfg {
 	u8			family;
 
@@ -29,4 +37,69 @@ struct udp_port_cfg {
 int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
 		    struct socket **sockp);
 
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+		     struct socket **sockp);
+#else
+static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+				   struct socket **sockp)
+{
+	return 0;
+}
+#endif
+
+struct udp_tunnel_sock;
+
+typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
+
+struct udp_tunnel_sock_cfg {
+	struct socket *sock;	/* The socket UDP tunnel will attach to */
+	/* Used for setting up udp_sock fields, see udp.h for details */
+	__u8  encap_type;
+	udp_tunnel_encap_rcv_t encap_rcv;
+	udp_tunnel_encap_destroy_t encap_destroy;
+};
+
+struct udp_tunnel_sock {
+	struct socket *sock;
+};
+
+struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
+					       struct udp_tunnel_sock_cfg
+							*sock_cfg);
+
+int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
+			struct sk_buff *skb, __be32 src, __be32 dst,
+			__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+			__be16 dst_port, bool xnet);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
+			 struct sk_buff *skb, struct net_device *dev,
+			 struct in6_addr *saddr, struct in6_addr *daddr,
+			 __u8 prio, __u8 ttl, __be16 src_port,
+			 __be16 dst_port);
+#endif
+
+void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
+void udp_tunnel_sock_free(struct udp_tunnel_sock *uts);
+
+static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
+							 bool udp_csum)
+{
+	int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+	return iptunnel_handle_offloads(skb, udp_csum, type);
+}
+
+static inline void udp_tunnel_encap_enable(struct socket *sock)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sock->sk->sk_family == PF_INET6)
+		ipv6_stub->udpv6_encap_enable();
+	else
+#endif
+		udp_encap_enable();
+}
 #endif
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index dbc10d8..cccb95f 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -308,6 +308,7 @@ config NET_IPVTI
 	  on top.
 
 config NET_UDP_TUNNEL
+	depends on (IPV6 || IPV6=n)
 	tristate
 	default n
 
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 61ec1a6..d60c1a0 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -14,42 +14,9 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
 	int err = -EINVAL;
 	struct socket *sock = NULL;
 
-#if IS_ENABLED(CONFIG_IPV6)
 	if (cfg->family == AF_INET6) {
-		struct sockaddr_in6 udp6_addr;
-
-		err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
-		if (err < 0)
-			goto error;
-
-		sk_change_net(sock->sk, net);
-
-		udp6_addr.sin6_family = AF_INET6;
-		memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
-		       sizeof(udp6_addr.sin6_addr));
-		udp6_addr.sin6_port = cfg->local_udp_port;
-		err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
-				  sizeof(udp6_addr));
-		if (err < 0)
-			goto error;
-
-		if (cfg->peer_udp_port) {
-			udp6_addr.sin6_family = AF_INET6;
-			memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
-			       sizeof(udp6_addr.sin6_addr));
-			udp6_addr.sin6_port = cfg->peer_udp_port;
-			err = kernel_connect(sock,
-					     (struct sockaddr *)&udp6_addr,
-					     sizeof(udp6_addr), 0);
-		}
-		if (err < 0)
-			goto error;
-
-		udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
-		udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
-	} else
-#endif
-	if (cfg->family == AF_INET) {
+		return udp_sock_create6(net, cfg, sockp);
+	} else if (cfg->family == AF_INET) {
 		struct sockaddr_in udp_addr;
 
 		err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
@@ -82,7 +49,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
 		return -EPFNOSUPPORT;
 	}
 
-
 	*sockp = sock;
 
 	return 0;
@@ -97,4 +63,74 @@ error:
 }
 EXPORT_SYMBOL(udp_sock_create);
 
+struct udp_tunnel_sock *
+create_udp_tunnel_sock(struct net *net, size_t size,
+		       struct udp_tunnel_sock_cfg *cfg)
+{
+	struct udp_tunnel_sock *uts;
+	struct sock *sk;
+	struct socket *sock = cfg->sock;
+
+	uts = kzalloc(size, GFP_KERNEL);
+	if (!uts)
+		return ERR_PTR(-ENOMEM);
+
+	sk = sock->sk;
+
+	/* Disable multicast loopback */
+	inet_sk(sk)->mc_loop = 0;
+
+	rcu_assign_sk_user_data(sk, uts);
+
+	udp_sk(sk)->encap_type = cfg->encap_type;
+	udp_sk(sk)->encap_rcv = cfg->encap_rcv;
+	udp_sk(sk)->encap_destroy = cfg->encap_destroy;
+
+	uts->sock = sock;
+
+	udp_tunnel_encap_enable(sock);
+
+	return uts;
+}
+EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
+
+int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
+			struct sk_buff *skb, __be32 src, __be32 dst,
+			__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+			__be16 dst_port, bool xnet)
+{
+	struct udphdr *uh;
+	struct socket *sock = uts->sock;
+
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+
+	uh->dest = dst_port;
+	uh->source = src_port;
+	uh->len = htons(skb->len);
+
+	udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
+
+	return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+			     tos, ttl, df, xnet);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
+
+void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
+{
+	struct sock *sk = uts->sock->sk;
+
+	rcu_assign_sk_user_data(uts->sock->sk, NULL);
+	kernel_sock_shutdown(uts->sock, SHUT_RDWR);
+	sk_release_kernel(sk);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
+
+void udp_tunnel_sock_free(struct udp_tunnel_sock *uts)
+{
+	kfree(uts);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_sock_free);
+
 MODULE_LICENSE("GPL");
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2fe6836..45f830e 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
 obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
 obj-$(CONFIG_IPV6_MIP6) += mip6.o
 obj-$(CONFIG_NETFILTER)	+= netfilter/
+obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
 
 obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
 obj-$(CONFIG_IPV6_SIT) += sit.o
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
new file mode 100644
index 0000000..5109f46
--- /dev/null
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -0,0 +1,121 @@
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/socket.h>
+#include <linux/udp.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/in6.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+
+int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
+		     struct socket **sockp)
+{
+	struct sockaddr_in6 udp6_addr;
+	int err = -EINVAL;
+	struct socket *sock = NULL;
+
+	err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
+	if (err < 0)
+		goto error;
+
+	sk_change_net(sock->sk, net);
+
+	udp6_addr.sin6_family = AF_INET6;
+	memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
+	       sizeof(udp6_addr.sin6_addr));
+	udp6_addr.sin6_port = cfg->local_udp_port;
+	err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
+			  sizeof(udp6_addr));
+	if (err < 0)
+		goto error;
+
+	if (cfg->peer_udp_port) {
+		udp6_addr.sin6_family = AF_INET6;
+		memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
+		       sizeof(udp6_addr.sin6_addr));
+		udp6_addr.sin6_port = cfg->peer_udp_port;
+		err = kernel_connect(sock,
+				     (struct sockaddr *)&udp6_addr,
+				     sizeof(udp6_addr), 0);
+	}
+	if (err < 0)
+		goto error;
+
+	udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
+	udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
+
+	*sockp = sock;
+	return 0;
+
+error:
+	if (sock) {
+		kernel_sock_shutdown(sock, SHUT_RDWR);
+		sk_release_kernel(sock->sk);
+	}
+	*sockp = NULL;
+	return err;
+}
+EXPORT_SYMBOL_GPL(udp_sock_create6);
+
+int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
+			 struct sk_buff *skb, struct net_device *dev,
+			 struct in6_addr *saddr, struct in6_addr *daddr,
+			 __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
+{
+	struct udphdr *uh;
+	struct ipv6hdr *ip6h;
+
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+
+	uh->dest = dst_port;
+	uh->source = src_port;
+
+	uh->len = htons(skb->len);
+	uh->check = 0;
+
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
+			    | IPSKB_REROUTED);
+	skb_dst_set(skb, dst);
+
+	if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
+		__wsum csum = skb_checksum(skb, 0, skb->len, 0);
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
+				IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~csum_ipv6_magic(saddr, daddr,
+				skb->len, IPPROTO_UDP, 0);
+	}
+
+	__skb_push(skb, sizeof(*ip6h));
+	skb_reset_network_header(skb);
+	ip6h		  = ipv6_hdr(skb);
+	ip6h->version	  = 6;
+	ip6h->priority	  = prio;
+	ip6h->flow_lbl[0] = 0;
+	ip6h->flow_lbl[1] = 0;
+	ip6h->flow_lbl[2] = 0;
+	ip6h->payload_len = htons(skb->len);
+	ip6h->nexthdr     = IPPROTO_UDP;
+	ip6h->hop_limit   = ttl;
+	ip6h->daddr	  = *daddr;
+	ip6h->saddr	  = *saddr;
+
+	ip6tunnel_xmit(skb, dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [net-next v5 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions.
  2014-09-11  3:29 [net-next v5 0/3] Refactor vxlan and l2tp to use common UDP tunnel APIs Andy Zhou
  2014-09-11  3:29 ` [net-next v5 1/3] udp-tunnel: Expand " Andy Zhou
@ 2014-09-11  3:29 ` Andy Zhou
  2014-09-11  3:29 ` [net-next v5 3/3] l2tp: Refactor l2tp core " Andy Zhou
  2 siblings, 0 replies; 8+ messages in thread
From: Andy Zhou @ 2014-09-11  3:29 UTC (permalink / raw)
  To: davem; +Cc: netdev, Andy Zhou

Signed-off-by: Andy Zhou <azhou@nicira.com>
---
 drivers/net/vxlan.c           |  174 +++++++++++++++++------------------------
 include/net/vxlan.h           |   17 ++--
 net/openvswitch/vport-vxlan.c |    6 +-
 3 files changed, 84 insertions(+), 113 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 53c3ec1..d915669 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -42,6 +42,7 @@
 #include <net/netns/generic.h>
 #include <net/vxlan.h>
 #include <net/protocol.h>
+#include <net/udp_tunnel.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
 #include <net/addrconf.h>
@@ -280,7 +281,7 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
 	struct vxlan_sock *vs;
 
 	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
-		if (inet_sk(vs->sock->sk)->inet_sport == port)
+		if (inet_sk(vs->uts.sock->sk)->inet_sport == port)
 			return vs;
 	}
 	return NULL;
@@ -636,7 +637,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
 static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
 {
 	struct net_device *dev;
-	struct sock *sk = vs->sock->sk;
+	struct sock *sk = vs->uts.sock->sk;
 	struct net *net = sock_net(sk);
 	sa_family_t sa_family = sk->sk_family;
 	__be16 port = inet_sk(sk)->inet_sport;
@@ -661,7 +662,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
 static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
 {
 	struct net_device *dev;
-	struct sock *sk = vs->sock->sk;
+	struct sock *sk = vs->uts.sock->sk;
 	struct net *net = sock_net(sk);
 	sa_family_t sa_family = sk->sk_family;
 	__be16 port = inet_sk(sk)->inet_sport;
@@ -1053,7 +1054,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
 
 void vxlan_sock_release(struct vxlan_sock *vs)
 {
-	struct sock *sk = vs->sock->sk;
+	struct sock *sk = vs->uts.sock->sk;
 	struct net *net = sock_net(sk);
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 
@@ -1062,7 +1063,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
 
 	spin_lock(&vn->sock_lock);
 	hlist_del_rcu(&vs->hlist);
-	rcu_assign_sk_user_data(vs->sock->sk, NULL);
 	vxlan_notify_del_rx_port(vs);
 	spin_unlock(&vn->sock_lock);
 
@@ -1078,7 +1078,7 @@ static void vxlan_igmp_join(struct work_struct *work)
 {
 	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
 	struct vxlan_sock *vs = vxlan->vn_sock;
-	struct sock *sk = vs->sock->sk;
+	struct sock *sk = vs->uts.sock->sk;
 	union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
 	int ifindex = vxlan->default_dst.remote_ifindex;
 
@@ -1107,7 +1107,7 @@ static void vxlan_igmp_leave(struct work_struct *work)
 {
 	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
 	struct vxlan_sock *vs = vxlan->vn_sock;
-	struct sock *sk = vs->sock->sk;
+	struct sock *sk = vs->uts.sock->sk;
 	union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
 	int ifindex = vxlan->default_dst.remote_ifindex;
 
@@ -1336,7 +1336,6 @@ out:
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-
 static struct sk_buff *vxlan_na_create(struct sk_buff *request,
 	struct neighbour *n, bool isrouter)
 {
@@ -1570,13 +1569,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
-static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
-						    bool udp_csum)
-{
-	int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
-	return iptunnel_handle_offloads(skb, udp_csum, type);
-}
-
 #if IS_ENABLED(CONFIG_IPV6)
 static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 			   struct dst_entry *dst, struct sk_buff *skb,
@@ -1585,13 +1577,12 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 			   __be16 src_port, __be16 dst_port, __be32 vni,
 			   bool xnet)
 {
-	struct ipv6hdr *ip6h;
 	struct vxlanhdr *vxh;
-	struct udphdr *uh;
 	int min_headroom;
 	int err;
+	bool udp_sum = !udp_get_no_check6_tx(vs->uts.sock->sk);
 
-	skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
+	skb = udp_tunnel_handle_offloads(skb, udp_sum);
 	if (IS_ERR(skb))
 		return -EINVAL;
 
@@ -1619,38 +1610,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	vxh->vx_flags = htonl(VXLAN_FLAGS);
 	vxh->vx_vni = vni;
 
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-
-	uh->dest = dst_port;
-	uh->source = src_port;
-
-	uh->len = htons(skb->len);
-
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
-			      IPSKB_REROUTED);
-	skb_dst_set(skb, dst);
-
-	udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
-		      saddr, daddr, skb->len);
-
-	__skb_push(skb, sizeof(*ip6h));
-	skb_reset_network_header(skb);
-	ip6h		  = ipv6_hdr(skb);
-	ip6h->version	  = 6;
-	ip6h->priority	  = prio;
-	ip6h->flow_lbl[0] = 0;
-	ip6h->flow_lbl[1] = 0;
-	ip6h->flow_lbl[2] = 0;
-	ip6h->payload_len = htons(skb->len);
-	ip6h->nexthdr     = IPPROTO_UDP;
-	ip6h->hop_limit   = ttl;
-	ip6h->daddr	  = *daddr;
-	ip6h->saddr	  = *saddr;
-
-	ip6tunnel_xmit(skb, dev);
+	udp_tunnel6_xmit_skb(&vs->uts, dst, skb, dev, saddr, daddr, prio,
+			     ttl, src_port, dst_port);
 	return 0;
 }
 #endif
@@ -1661,11 +1622,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
 {
 	struct vxlanhdr *vxh;
-	struct udphdr *uh;
 	int min_headroom;
 	int err;
+	bool udp_sum = !vs->uts.sock->sk->sk_no_check_tx;
 
-	skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
+	skb = udp_tunnel_handle_offloads(skb, udp_sum);
 	if (IS_ERR(skb))
 		return -EINVAL;
 
@@ -1691,20 +1652,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	vxh->vx_flags = htonl(VXLAN_FLAGS);
 	vxh->vx_vni = vni;
 
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-
-	uh->dest = dst_port;
-	uh->source = src_port;
-
-	uh->len = htons(skb->len);
-
-	udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
-		     src, dst, skb->len);
-
-	return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
-			     tos, ttl, df, xnet);
+	return udp_tunnel_xmit_skb(&vs->uts, rt, skb, src, dst, tos,
+				   ttl, df, src_port, dst_port, xnet);
 }
 EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
 
@@ -1829,18 +1778,18 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
 
-		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
-				     fl4.saddr, dst->sin.sin_addr.s_addr,
-				     tos, ttl, df, src_port, dst_port,
-				     htonl(vni << 8),
-				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
+		err = udp_tunnel_xmit_skb(&vxlan->vn_sock->uts, rt, skb,
+					  fl4.saddr, dst->sin.sin_addr.s_addr,
+					  tos, ttl, df, src_port, dst_port,
+					  !net_eq(vxlan->net,
+						  dev_net(vxlan->dev)));
 
 		if (err < 0)
 			goto rt_tx_error;
 		iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
-		struct sock *sk = vxlan->vn_sock->sock->sk;
+		struct sock *sk = vxlan->vn_sock->uts.sock->sk;
 		struct dst_entry *ndst;
 		struct flowi6 fl6;
 		u32 flags;
@@ -2202,8 +2151,8 @@ void vxlan_get_rx_port(struct net_device *dev)
 	spin_lock(&vn->sock_lock);
 	for (i = 0; i < PORT_HASH_SIZE; ++i) {
 		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
-			port = inet_sk(vs->sock->sk)->inet_sport;
-			sa_family = vs->sock->sk->sk_family;
+			port = inet_sk(vs->uts.sock->sk)->inet_sport;
+			sa_family = vs->uts.sock->sk->sk_family;
 			dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
 							    port);
 		}
@@ -2330,12 +2279,18 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
 	.get_link	= ethtool_op_get_link,
 };
 
+static void free_vxlan_sock_rcu(struct rcu_head *rcu)
+{
+	struct vxlan_sock *vs = container_of(rcu, struct vxlan_sock, rcu);
+
+	udp_tunnel_sock_free(&vs->uts);
+}
+
 static void vxlan_del_work(struct work_struct *work)
 {
 	struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
-
-	sk_release_kernel(vs->sock->sk);
-	kfree_rcu(vs, rcu);
+	udp_tunnel_sock_release(&vs->uts);
+	call_rcu(&vs->rcu, free_vxlan_sock_rcu);
 }
 
 static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
@@ -2375,39 +2330,60 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
 	return sock;
 }
 
+static struct vxlan_sock *vxlan_create_tunnel_sock(struct net *net,
+						   struct socket *sock)
+{
+	struct udp_tunnel_sock_cfg cfg;
+	struct udp_tunnel_sock *uts;
+	struct vxlan_sock *vs;
+
+	cfg.sock = sock;
+	cfg.encap_type = 1;
+	cfg.encap_rcv = vxlan_udp_encap_recv;
+	cfg.encap_destroy = NULL;
+
+	uts = create_udp_tunnel_sock(net, sizeof(*vs), &cfg);
+
+	if (IS_ERR(uts))
+		vs = ERR_CAST(uts);
+	else
+		vs = container_of(uts, struct vxlan_sock, uts);
+
+	return vs;
+}
+
 /* Create new listen socket if needed */
 static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
-					      vxlan_rcv_t *rcv, void *data,
+					      vxlan_rcv_t rcv, void *data,
 					      u32 flags)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_sock *vs;
 	struct socket *sock;
-	struct sock *sk;
-	unsigned int h;
 	bool ipv6 = !!(flags & VXLAN_F_IPV6);
+	unsigned int h;
 
-	vs = kzalloc(sizeof(*vs), GFP_KERNEL);
-	if (!vs)
-		return ERR_PTR(-ENOMEM);
+	sock = vxlan_create_sock(net, ipv6, port, flags);
+	if (IS_ERR(sock))
+		return ERR_CAST(sock);
+
+	vs = vxlan_create_tunnel_sock(net, sock);
+	if (IS_ERR(vs))
+		return vs;
 
 	for (h = 0; h < VNI_HASH_SIZE; ++h)
 		INIT_HLIST_HEAD(&vs->vni_list[h]);
 
-	INIT_WORK(&vs->del_work, vxlan_del_work);
+	spin_lock(&vn->sock_lock);
+	list_add(&vs->next, &vn->vxlan_list);
+	spin_unlock(&vn->sock_lock);
 
-	sock = vxlan_create_sock(net, ipv6, port, flags);
-	if (IS_ERR(sock)) {
-		kfree(vs);
-		return ERR_CAST(sock);
-	}
+	INIT_WORK(&vs->del_work, vxlan_del_work);
 
-	vs->sock = sock;
-	sk = sock->sk;
 	atomic_set(&vs->refcnt, 1);
+
 	vs->rcv = rcv;
-	vs->data = data;
-	rcu_assign_sk_user_data(vs->sock->sk, vs);
+	vs->rcv_data = data;
 
 	/* Initialize the vxlan udp offloads structure */
 	vs->udp_offloads.port = port;
@@ -2419,21 +2395,11 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 	vxlan_notify_add_rx_port(vs);
 	spin_unlock(&vn->sock_lock);
 
-	/* Mark socket as an encapsulation socket. */
-	udp_sk(sk)->encap_type = 1;
-	udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
-#if IS_ENABLED(CONFIG_IPV6)
-	if (ipv6)
-		ipv6_stub->udpv6_encap_enable();
-	else
-#endif
-		udp_encap_enable();
-
 	return vs;
 }
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
-				  vxlan_rcv_t *rcv, void *data,
+				  vxlan_rcv_t rcv, void *data,
 				  bool no_share, u32 flags)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index d5f59f3..b1dd547 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -4,23 +4,27 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
+#include <net/udp_tunnel.h>
 
 #define VNI_HASH_BITS	10
 #define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
 
 struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
 
-/* per UDP socket information */
+typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
+			    __be32 key);
+
+/* per vxlan socket information */
 struct vxlan_sock {
+	struct udp_tunnel_sock uts;  /* Must be the first member */
 	struct hlist_node hlist;
-	vxlan_rcv_t	 *rcv;
-	void		 *data;
+	struct list_head next;
 	struct work_struct del_work;
-	struct socket	 *sock;
 	struct rcu_head	  rcu;
 	struct hlist_head vni_list[VNI_HASH_SIZE];
 	atomic_t	  refcnt;
+	vxlan_rcv_t	  rcv;
+	void		  *rcv_data;
 	struct udp_offload udp_offloads;
 };
 
@@ -35,7 +39,7 @@ struct vxlan_sock {
 #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
-				  vxlan_rcv_t *rcv, void *data,
+				  vxlan_rcv_t rcv, void *data,
 				  bool no_share, u32 flags);
 
 void vxlan_sock_release(struct vxlan_sock *vs);
@@ -57,4 +61,5 @@ static inline void vxlan_get_rx_port(struct net_device *netdev)
 {
 }
 #endif
+
 #endif
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index d8b7e24..7599efd 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
 static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 {
 	struct ovs_key_ipv4_tunnel tun_key;
-	struct vport *vport = vs->data;
+	struct vport *vport = vs->rcv_data;
 	struct iphdr *iph;
 	__be64 key;
 
@@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
 {
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
-	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+	__be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
 
 	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
 		return -EMSGSIZE;
@@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
 	struct net *net = ovs_dp_get_net(vport->dp);
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
-	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+	__be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
 	struct rtable *rt;
 	struct flowi4 fl;
 	__be16 src_port;
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [net-next v5 3/3] l2tp: Refactor l2tp core driver to make use of the common UDP tunnel functions
  2014-09-11  3:29 [net-next v5 0/3] Refactor vxlan and l2tp to use common UDP tunnel APIs Andy Zhou
  2014-09-11  3:29 ` [net-next v5 1/3] udp-tunnel: Expand " Andy Zhou
  2014-09-11  3:29 ` [net-next v5 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
@ 2014-09-11  3:29 ` Andy Zhou
  2 siblings, 0 replies; 8+ messages in thread
From: Andy Zhou @ 2014-09-11  3:29 UTC (permalink / raw)
  To: davem; +Cc: netdev, Andy Zhou

Signed-off-by: Andy Zhou <azhou@nicira.com>
---
 net/l2tp/l2tp_core.c |  208 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 128 insertions(+), 80 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 2aa2b6c..1f2bb8e 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -113,6 +113,11 @@ struct l2tp_net {
 	spinlock_t l2tp_session_hlist_lock;
 };
 
+struct l2tp_udp_tunnel {
+	struct udp_tunnel_sock uts;
+	struct l2tp_tunnel tunnel;
+};
+
 static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 
 static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
@@ -1198,7 +1203,6 @@ static void l2tp_tunnel_destruct(struct sock *sk)
 
 	l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: closing...\n", tunnel->name);
 
-
 	/* Disable udp encapsulation */
 	switch (tunnel->encap) {
 	case L2TP_ENCAPTYPE_UDP:
@@ -1298,6 +1302,18 @@ static void l2tp_udp_encap_destroy(struct sock *sk)
 	}
 }
 
+static void free_l2tp_udp_sock_rcu(struct rcu_head *rcu)
+{
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_udp_tunnel *udp_tunnel;
+
+	tunnel = container_of(rcu, struct l2tp_tunnel, rcu);
+
+	udp_tunnel = container_of(tunnel, struct l2tp_udp_tunnel, tunnel);
+
+	udp_tunnel_sock_free(&udp_tunnel->uts);
+}
+
 /* Really kill the tunnel.
  * Come here only when all sessions have been cleared from the tunnel.
  */
@@ -1306,7 +1322,19 @@ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
 	BUG_ON(atomic_read(&tunnel->ref_count) != 0);
 	BUG_ON(tunnel->sock != NULL);
 	l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name);
-	kfree_rcu(tunnel, rcu);
+
+	switch (tunnel->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		call_rcu(&tunnel->rcu, free_l2tp_udp_sock_rcu);
+		break;
+
+	case L2TP_ENCAPTYPE_IP:
+		kfree_rcu(tunnel, rcu);
+		break;
+
+	default:
+		BUG();
+	}
 }
 
 /* Workqueue tunnel deletion function */
@@ -1342,6 +1370,35 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
 	l2tp_tunnel_sock_put(sk);
 }
 
+static void l2tp_tunnel_udp_port_conf(struct l2tp_tunnel_cfg *cfg,
+				      struct udp_port_cfg *udp_conf)
+{
+	memset(udp_conf, 0, sizeof(*udp_conf));
+
+#if IS_ENABLED(CONFIG_IPV6)
+	if (cfg->local_ip6 && cfg->peer_ip6) {
+		udp_conf->family = AF_INET6;
+		memcpy(&udp_conf->local_ip6, cfg->local_ip6,
+		       sizeof(udp_conf->local_ip6));
+		memcpy(&udp_conf->peer_ip6, cfg->peer_ip6,
+		       sizeof(udp_conf->peer_ip6));
+		udp_conf->use_udp6_tx_checksums =
+			cfg->udp6_zero_tx_checksums;
+		udp_conf->use_udp6_rx_checksums =
+			cfg->udp6_zero_rx_checksums;
+	} else
+#endif
+	{
+		udp_conf->family = AF_INET;
+		udp_conf->local_ip = cfg->local_ip;
+		udp_conf->peer_ip = cfg->peer_ip;
+		udp_conf->use_udp_checksums = cfg->use_udp_checksums;
+	}
+
+	udp_conf->local_udp_port = htons(cfg->local_udp_port);
+	udp_conf->peer_udp_port = htons(cfg->peer_udp_port);
+}
+
 /* Create a socket for the tunnel, if one isn't set up by
  * userspace. This is used for static tunnels where there is no
  * managing L2TP daemon.
@@ -1363,31 +1420,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
 
 	switch (cfg->encap) {
 	case L2TP_ENCAPTYPE_UDP:
-		memset(&udp_conf, 0, sizeof(udp_conf));
-
-#if IS_ENABLED(CONFIG_IPV6)
-		if (cfg->local_ip6 && cfg->peer_ip6) {
-			udp_conf.family = AF_INET6;
-			memcpy(&udp_conf.local_ip6, cfg->local_ip6,
-			       sizeof(udp_conf.local_ip6));
-			memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
-			       sizeof(udp_conf.peer_ip6));
-			udp_conf.use_udp6_tx_checksums =
-			    cfg->udp6_zero_tx_checksums;
-			udp_conf.use_udp6_rx_checksums =
-			    cfg->udp6_zero_rx_checksums;
-		} else
-#endif
-		{
-			udp_conf.family = AF_INET;
-			udp_conf.local_ip = cfg->local_ip;
-			udp_conf.peer_ip = cfg->peer_ip;
-			udp_conf.use_udp_checksums = cfg->use_udp_checksums;
-		}
-
-		udp_conf.local_udp_port = htons(cfg->local_udp_port);
-		udp_conf.peer_udp_port = htons(cfg->peer_udp_port);
-
+		l2tp_tunnel_udp_port_conf(cfg, &udp_conf);
 		err = udp_sock_create(net, &udp_conf, &sock);
 		if (err < 0)
 			goto out;
@@ -1473,6 +1506,31 @@ out:
 
 static struct lock_class_key l2tp_socket_class;
 
+static int l2tp_sk_sanity_check(struct sock *sk, enum l2tp_encap_type encap,
+				u32 tunnel_id, int fd)
+{
+	unsigned int expected_protocol;
+
+	switch (encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		expected_protocol = IPPROTO_UDP;
+		break;
+	case L2TP_ENCAPTYPE_IP:
+		expected_protocol = IPPROTO_L2TP;
+		break;
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	if (sk->sk_protocol != expected_protocol) {
+		pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+		       tunnel_id, fd, sk->sk_protocol, expected_protocol);
+		return -EPROTONOSUPPORT;
+	}
+
+	return 0;
+}
+
 int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
 {
 	struct l2tp_tunnel *tunnel = NULL;
@@ -1480,7 +1538,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	struct socket *sock = NULL;
 	struct sock *sk = NULL;
 	struct l2tp_net *pn;
-	enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
+	enum l2tp_encap_type encap = cfg ? cfg->encap : L2TP_ENCAPTYPE_UDP;
 
 	/* Get the tunnel socket from the fd, which was opened by
 	 * the userspace L2TP daemon. If not specified, create a
@@ -1488,9 +1546,11 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	 */
 	if (fd < 0) {
 		err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id,
-				cfg, &sock);
+					      cfg, &sock);
 		if (err < 0)
 			goto err;
+
+		sk = sock->sk;
 	} else {
 		sock = sockfd_lookup(fd, &err);
 		if (!sock) {
@@ -1500,58 +1560,66 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 			goto err;
 		}
 
+		sk = sock->sk;
+
 		/* Reject namespace mismatches */
-		if (!net_eq(sock_net(sock->sk), net)) {
+		if (!net_eq(sock_net(sk), net)) {
 			pr_err("tunl %u: netns mismatch\n", tunnel_id);
 			err = -EINVAL;
 			goto err;
 		}
-	}
 
-	sk = sock->sk;
+		/* Quick sanity checks */
+		err = l2tp_sk_sanity_check(sk, encap, tunnel_id, fd);
+		if (err)
+			goto err;
 
-	if (cfg != NULL)
-		encap = cfg->encap;
+		/* Check if this socket has already been prepped */
+		tunnel = l2tp_tunnel(sk);
+		if (tunnel != NULL) {
+			/* This socket has already been prepped */
+			err = -EBUSY;
+			goto err;
+		}
+	}
 
-	/* Quick sanity checks */
 	switch (encap) {
-	case L2TP_ENCAPTYPE_UDP:
-		err = -EPROTONOSUPPORT;
-		if (sk->sk_protocol != IPPROTO_UDP) {
-			pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-			       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+	case L2TP_ENCAPTYPE_UDP: {
+		struct udp_tunnel_sock_cfg udp_cfg;
+		struct l2tp_udp_tunnel *udp_tunnel;
+		struct udp_tunnel_sock *uts;
+
+		udp_cfg.sock = sock;
+		udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
+		udp_cfg.encap_rcv = l2tp_udp_encap_recv;
+		udp_cfg.encap_destroy = l2tp_udp_encap_destroy;
+
+		uts = create_udp_tunnel_sock(net, sizeof(*udp_tunnel),
+					     &udp_cfg);
+		if (!uts) {
+			err = -ENOMEM;
 			goto err;
 		}
+
+		udp_tunnel = container_of(uts, struct l2tp_udp_tunnel, uts);
+		tunnel = &udp_tunnel->tunnel;
 		break;
+	}
 	case L2TP_ENCAPTYPE_IP:
-		err = -EPROTONOSUPPORT;
-		if (sk->sk_protocol != IPPROTO_L2TP) {
-			pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-			       tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
+		tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL);
+		if (tunnel == NULL) {
+			err = -ENOMEM;
 			goto err;
 		}
-		break;
 	}
 
-	/* Check if this socket has already been prepped */
-	tunnel = l2tp_tunnel(sk);
-	if (tunnel != NULL) {
-		/* This socket has already been prepped */
-		err = -EBUSY;
-		goto err;
-	}
-
-	tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
-	if (tunnel == NULL) {
-		err = -ENOMEM;
-		goto err;
-	}
+	rcu_assign_sk_user_data(sk, tunnel);
 
+	tunnel->encap = encap;
 	tunnel->version = version;
 	tunnel->tunnel_id = tunnel_id;
 	tunnel->peer_tunnel_id = peer_tunnel_id;
-	tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS;
-
+	tunnel->debug = cfg ? cfg->debug : L2TP_DEFAULT_DEBUG_FLAGS;
 	tunnel->magic = L2TP_TUNNEL_MAGIC;
 	sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
 	rwlock_init(&tunnel->hlist_lock);
@@ -1560,9 +1628,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	tunnel->l2tp_net = net;
 	pn = l2tp_pernet(net);
 
-	if (cfg != NULL)
-		tunnel->debug = cfg->debug;
-
 #if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == PF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1581,23 +1646,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	}
 #endif
 
-	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	tunnel->encap = encap;
-	if (encap == L2TP_ENCAPTYPE_UDP) {
-		/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-		udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
-		udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
-		udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
-#if IS_ENABLED(CONFIG_IPV6)
-		if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
-			udpv6_encap_enable();
-		else
-#endif
-		udp_encap_enable();
-	}
-
-	sk->sk_user_data = tunnel;
-
 	/* Hook on the tunnel socket destructor so that we can cleanup
 	 * if the tunnel socket goes away.
 	 */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [net-next v5 1/3] udp-tunnel: Expand UDP tunnel APIs
  2014-09-11  3:29 ` [net-next v5 1/3] udp-tunnel: Expand " Andy Zhou
@ 2014-09-11 21:04   ` Tom Herbert
  2014-09-12 20:42     ` Andy Zhou
  2014-09-12 21:35   ` Tom Herbert
  1 sibling, 1 reply; 8+ messages in thread
From: Tom Herbert @ 2014-09-11 21:04 UTC (permalink / raw)
  To: Andy Zhou; +Cc: David Miller, Linux Netdev List

On Wed, Sep 10, 2014 at 8:29 PM, Andy Zhou <azhou@nicira.com> wrote:
> Added common udp tunnel socket creation, and packet transmission APIs
> API that can be used by other UDP based tunneling protocol
> implementation.
>
> Signed-off-by: Andy Zhou <azhou@nicira.com>
> ---
>  include/net/udp_tunnel.h  |   73 +++++++++++++++++++++++++++
>  net/ipv4/Kconfig          |    1 +
>  net/ipv4/udp_tunnel.c     |  108 ++++++++++++++++++++++++++--------------
>  net/ipv6/Makefile         |    1 +
>  net/ipv6/ip6_udp_tunnel.c |  121 +++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 268 insertions(+), 36 deletions(-)
>  create mode 100644 net/ipv6/ip6_udp_tunnel.c
>
> diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
> index ffd69cb..e9dcf83 100644
> --- a/include/net/udp_tunnel.h
> +++ b/include/net/udp_tunnel.h
> @@ -1,6 +1,14 @@
>  #ifndef __NET_UDP_TUNNEL_H
>  #define __NET_UDP_TUNNEL_H
>
> +#include <net/ip_tunnels.h>
> +#include <net/udp.h>
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +#include <net/ipv6.h>
> +#include <net/addrconf.h>
> +#endif
> +
>  struct udp_port_cfg {
>         u8                      family;
>
> @@ -29,4 +37,69 @@ struct udp_port_cfg {
>  int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>                     struct socket **sockp);
>
> +#if IS_ENABLED(CONFIG_IPV6)
> +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
> +                    struct socket **sockp);
> +#else
> +static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
> +                                  struct socket **sockp)
> +{
> +       return 0;
> +}
> +#endif
> +
> +struct udp_tunnel_sock;
> +
> +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
> +typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
> +
> +struct udp_tunnel_sock_cfg {
> +       struct socket *sock;    /* The socket UDP tunnel will attach to */
> +       /* Used for setting up udp_sock fields, see udp.h for details */
> +       __u8  encap_type;
> +       udp_tunnel_encap_rcv_t encap_rcv;
> +       udp_tunnel_encap_destroy_t encap_destroy;
> +};
> +
> +struct udp_tunnel_sock {
> +       struct socket *sock;
> +};
> +
> +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
> +                                              struct udp_tunnel_sock_cfg
> +                                                       *sock_cfg);
> +
> +int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
> +                       struct sk_buff *skb, __be32 src, __be32 dst,
> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
> +                       __be16 dst_port, bool xnet);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
> +                        struct sk_buff *skb, struct net_device *dev,
> +                        struct in6_addr *saddr, struct in6_addr *daddr,
> +                        __u8 prio, __u8 ttl, __be16 src_port,
> +                        __be16 dst_port);
> +#endif
> +
> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
> +void udp_tunnel_sock_free(struct udp_tunnel_sock *uts);
> +
> +static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
> +                                                        bool udp_csum)
> +{
> +       int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
> +
> +       return iptunnel_handle_offloads(skb, udp_csum, type);
> +}
> +
> +static inline void udp_tunnel_encap_enable(struct socket *sock)
> +{
> +#if IS_ENABLED(CONFIG_IPV6)
> +       if (sock->sk->sk_family == PF_INET6)
> +               ipv6_stub->udpv6_encap_enable();
> +       else
> +#endif
> +               udp_encap_enable();
> +}
>  #endif
> diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
> index dbc10d8..cccb95f 100644
> --- a/net/ipv4/Kconfig
> +++ b/net/ipv4/Kconfig
> @@ -308,6 +308,7 @@ config NET_IPVTI
>           on top.
>
>  config NET_UDP_TUNNEL
> +       depends on (IPV6 || IPV6=n)
>         tristate
>         default n
>
> diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
> index 61ec1a6..d60c1a0 100644
> --- a/net/ipv4/udp_tunnel.c
> +++ b/net/ipv4/udp_tunnel.c
> @@ -14,42 +14,9 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>         int err = -EINVAL;
>         struct socket *sock = NULL;
>
> -#if IS_ENABLED(CONFIG_IPV6)
>         if (cfg->family == AF_INET6) {
> -               struct sockaddr_in6 udp6_addr;
> -
> -               err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
> -               if (err < 0)
> -                       goto error;
> -
> -               sk_change_net(sock->sk, net);
> -
> -               udp6_addr.sin6_family = AF_INET6;
> -               memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
> -                      sizeof(udp6_addr.sin6_addr));
> -               udp6_addr.sin6_port = cfg->local_udp_port;
> -               err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
> -                                 sizeof(udp6_addr));
> -               if (err < 0)
> -                       goto error;
> -
> -               if (cfg->peer_udp_port) {
> -                       udp6_addr.sin6_family = AF_INET6;
> -                       memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
> -                              sizeof(udp6_addr.sin6_addr));
> -                       udp6_addr.sin6_port = cfg->peer_udp_port;
> -                       err = kernel_connect(sock,
> -                                            (struct sockaddr *)&udp6_addr,
> -                                            sizeof(udp6_addr), 0);
> -               }
> -               if (err < 0)
> -                       goto error;
> -
> -               udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
> -               udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
> -       } else
> -#endif
> -       if (cfg->family == AF_INET) {
> +               return udp_sock_create6(net, cfg, sockp);
> +       } else if (cfg->family == AF_INET) {
>                 struct sockaddr_in udp_addr;
>
>                 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
> @@ -82,7 +49,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>                 return -EPFNOSUPPORT;
>         }
>
> -
>         *sockp = sock;
>
>         return 0;
> @@ -97,4 +63,74 @@ error:
>  }
>  EXPORT_SYMBOL(udp_sock_create);
>
> +struct udp_tunnel_sock *
> +create_udp_tunnel_sock(struct net *net, size_t size,
> +                      struct udp_tunnel_sock_cfg *cfg)
> +{
> +       struct udp_tunnel_sock *uts;
> +       struct sock *sk;
> +       struct socket *sock = cfg->sock;
> +
> +       uts = kzalloc(size, GFP_KERNEL);
> +       if (!uts)
> +               return ERR_PTR(-ENOMEM);
> +
Allocating memory for the caller seems like overkill to me and there's
no guarantee that is what caller wants anyway (maybe they are using
array of static structures for instance). Seems like it would be just
as easy to return the sock and let caller allocate whatever else it
needs on its own (only field in udp_tunnel_sock is the sock anyway).

> +       sk = sock->sk;
> +
> +       /* Disable multicast loopback */
> +       inet_sk(sk)->mc_loop = 0;
> +
Probably want to enable checksum unnecessary conversions also.

> +       rcu_assign_sk_user_data(sk, uts);
> +
> +       udp_sk(sk)->encap_type = cfg->encap_type;
> +       udp_sk(sk)->encap_rcv = cfg->encap_rcv;
> +       udp_sk(sk)->encap_destroy = cfg->encap_destroy;
> +
> +       uts->sock = sock;
> +
> +       udp_tunnel_encap_enable(sock);
> +
> +       return uts;
> +}
> +EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
> +
> +int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
> +                       struct sk_buff *skb, __be32 src, __be32 dst,
> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
> +                       __be16 dst_port, bool xnet)
> +{
> +       struct udphdr *uh;
> +       struct socket *sock = uts->sock;
> +
> +       __skb_push(skb, sizeof(*uh));
> +       skb_reset_transport_header(skb);
> +       uh = udp_hdr(skb);
> +
> +       uh->dest = dst_port;
> +       uh->source = src_port;
> +       uh->len = htons(skb->len);
> +
> +       udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
> +
> +       return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
> +                            tos, ttl, df, xnet);
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
> +
> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
> +{
> +       struct sock *sk = uts->sock->sk;
> +
> +       rcu_assign_sk_user_data(uts->sock->sk, NULL);
> +       kernel_sock_shutdown(uts->sock, SHUT_RDWR);
> +       sk_release_kernel(sk);
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
> +
> +void udp_tunnel_sock_free(struct udp_tunnel_sock *uts)
> +{
> +       kfree(uts);
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_free);
> +
>  MODULE_LICENSE("GPL");
> diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
> index 2fe6836..45f830e 100644
> --- a/net/ipv6/Makefile
> +++ b/net/ipv6/Makefile

Probably good to make IPv4 and v6 stuff in separate patches.

> @@ -35,6 +35,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
>  obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
>  obj-$(CONFIG_IPV6_MIP6) += mip6.o
>  obj-$(CONFIG_NETFILTER)        += netfilter/
> +obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
>
>  obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
>  obj-$(CONFIG_IPV6_SIT) += sit.o
> diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
> new file mode 100644
> index 0000000..5109f46
> --- /dev/null
> +++ b/net/ipv6/ip6_udp_tunnel.c
> @@ -0,0 +1,121 @@
> +#include <linux/module.h>
> +#include <linux/errno.h>
> +#include <linux/socket.h>
> +#include <linux/udp.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/in6.h>
> +#include <net/udp.h>
> +#include <net/udp_tunnel.h>
> +#include <net/net_namespace.h>
> +#include <net/netns/generic.h>
> +#include <net/ip6_tunnel.h>
> +#include <net/ip6_checksum.h>
> +
> +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
> +                    struct socket **sockp)
> +{
> +       struct sockaddr_in6 udp6_addr;
> +       int err = -EINVAL;
> +       struct socket *sock = NULL;
> +
> +       err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
> +       if (err < 0)
> +               goto error;
> +
> +       sk_change_net(sock->sk, net);
> +
> +       udp6_addr.sin6_family = AF_INET6;
> +       memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
> +              sizeof(udp6_addr.sin6_addr));
> +       udp6_addr.sin6_port = cfg->local_udp_port;
> +       err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
> +                         sizeof(udp6_addr));
> +       if (err < 0)
> +               goto error;
> +
> +       if (cfg->peer_udp_port) {
> +               udp6_addr.sin6_family = AF_INET6;
> +               memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
> +                      sizeof(udp6_addr.sin6_addr));
> +               udp6_addr.sin6_port = cfg->peer_udp_port;
> +               err = kernel_connect(sock,
> +                                    (struct sockaddr *)&udp6_addr,
> +                                    sizeof(udp6_addr), 0);
> +       }
> +       if (err < 0)
> +               goto error;
> +
> +       udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
> +       udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
> +
> +       *sockp = sock;
> +       return 0;
> +
> +error:
> +       if (sock) {
> +               kernel_sock_shutdown(sock, SHUT_RDWR);
> +               sk_release_kernel(sock->sk);
> +       }
> +       *sockp = NULL;
> +       return err;
> +}
> +EXPORT_SYMBOL_GPL(udp_sock_create6);
> +
> +int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
> +                        struct sk_buff *skb, struct net_device *dev,
> +                        struct in6_addr *saddr, struct in6_addr *daddr,
> +                        __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
> +{
> +       struct udphdr *uh;
> +       struct ipv6hdr *ip6h;
> +
> +       __skb_push(skb, sizeof(*uh));
> +       skb_reset_transport_header(skb);
> +       uh = udp_hdr(skb);
> +
> +       uh->dest = dst_port;
> +       uh->source = src_port;
> +
> +       uh->len = htons(skb->len);
> +       uh->check = 0;
> +
> +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
> +       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
> +                           | IPSKB_REROUTED);
> +       skb_dst_set(skb, dst);
> +
> +       if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
> +               __wsum csum = skb_checksum(skb, 0, skb->len, 0);
> +
> +               skb->ip_summed = CHECKSUM_UNNECESSARY;
> +               uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
> +                               IPPROTO_UDP, csum);
> +               if (uh->check == 0)
> +                       uh->check = CSUM_MANGLED_0;
> +       } else {
> +               skb->ip_summed = CHECKSUM_PARTIAL;
> +               skb->csum_start = skb_transport_header(skb) - skb->head;
> +               skb->csum_offset = offsetof(struct udphdr, check);
> +               uh->check = ~csum_ipv6_magic(saddr, daddr,
> +                               skb->len, IPPROTO_UDP, 0);
> +       }

Can't we call udp6_set_csum for this?

> +
> +       __skb_push(skb, sizeof(*ip6h));
> +       skb_reset_network_header(skb);
> +       ip6h              = ipv6_hdr(skb);
> +       ip6h->version     = 6;
> +       ip6h->priority    = prio;
> +       ip6h->flow_lbl[0] = 0;
> +       ip6h->flow_lbl[1] = 0;
> +       ip6h->flow_lbl[2] = 0;
> +       ip6h->payload_len = htons(skb->len);
> +       ip6h->nexthdr     = IPPROTO_UDP;
> +       ip6h->hop_limit   = ttl;
> +       ip6h->daddr       = *daddr;
> +       ip6h->saddr       = *saddr;
> +
> +       ip6tunnel_xmit(skb, dev);

So iptunnel_xmit creates the IP header, but ip6tunnel_xmit doesn't. It
should be on the TODO list to make this consistent!

> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [net-next v5 1/3] udp-tunnel: Expand UDP tunnel APIs
  2014-09-11 21:04   ` Tom Herbert
@ 2014-09-12 20:42     ` Andy Zhou
  2014-09-12 21:33       ` Tom Herbert
  0 siblings, 1 reply; 8+ messages in thread
From: Andy Zhou @ 2014-09-12 20:42 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, Linux Netdev List

On Thu, Sep 11, 2014 at 2:04 PM, Tom Herbert <therbert@google.com> wrote:
> On Wed, Sep 10, 2014 at 8:29 PM, Andy Zhou <azhou@nicira.com> wrote:
>> Added common udp tunnel socket creation, and packet transmission APIs
>> API that can be used by other UDP based tunneling protocol
>> implementation.
>>
>> Signed-off-by: Andy Zhou <azhou@nicira.com>
>> ---
>>  include/net/udp_tunnel.h  |   73 +++++++++++++++++++++++++++
>>  net/ipv4/Kconfig          |    1 +
>>  net/ipv4/udp_tunnel.c     |  108 ++++++++++++++++++++++++++--------------
>>  net/ipv6/Makefile         |    1 +
>>  net/ipv6/ip6_udp_tunnel.c |  121 +++++++++++++++++++++++++++++++++++++++++++++
>>  5 files changed, 268 insertions(+), 36 deletions(-)
>>  create mode 100644 net/ipv6/ip6_udp_tunnel.c
>>
>> diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
>> index ffd69cb..e9dcf83 100644
>> --- a/include/net/udp_tunnel.h
>> +++ b/include/net/udp_tunnel.h
>> @@ -1,6 +1,14 @@
>>  #ifndef __NET_UDP_TUNNEL_H
>>  #define __NET_UDP_TUNNEL_H
>>
>> +#include <net/ip_tunnels.h>
>> +#include <net/udp.h>
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +#include <net/ipv6.h>
>> +#include <net/addrconf.h>
>> +#endif
>> +
>>  struct udp_port_cfg {
>>         u8                      family;
>>
>> @@ -29,4 +37,69 @@ struct udp_port_cfg {
>>  int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>>                     struct socket **sockp);
>>
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
>> +                    struct socket **sockp);
>> +#else
>> +static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
>> +                                  struct socket **sockp)
>> +{
>> +       return 0;
>> +}
>> +#endif
>> +
>> +struct udp_tunnel_sock;
>> +
>> +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
>> +typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
>> +
>> +struct udp_tunnel_sock_cfg {
>> +       struct socket *sock;    /* The socket UDP tunnel will attach to */
>> +       /* Used for setting up udp_sock fields, see udp.h for details */
>> +       __u8  encap_type;
>> +       udp_tunnel_encap_rcv_t encap_rcv;
>> +       udp_tunnel_encap_destroy_t encap_destroy;
>> +};
>> +
>> +struct udp_tunnel_sock {
>> +       struct socket *sock;
>> +};
>> +
>> +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
>> +                                              struct udp_tunnel_sock_cfg
>> +                                                       *sock_cfg);
>> +
>> +int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
>> +                       struct sk_buff *skb, __be32 src, __be32 dst,
>> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
>> +                       __be16 dst_port, bool xnet);
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
>> +                        struct sk_buff *skb, struct net_device *dev,
>> +                        struct in6_addr *saddr, struct in6_addr *daddr,
>> +                        __u8 prio, __u8 ttl, __be16 src_port,
>> +                        __be16 dst_port);
>> +#endif
>> +
>> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
>> +void udp_tunnel_sock_free(struct udp_tunnel_sock *uts);
>> +
>> +static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
>> +                                                        bool udp_csum)
>> +{
>> +       int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
>> +
>> +       return iptunnel_handle_offloads(skb, udp_csum, type);
>> +}
>> +
>> +static inline void udp_tunnel_encap_enable(struct socket *sock)
>> +{
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +       if (sock->sk->sk_family == PF_INET6)
>> +               ipv6_stub->udpv6_encap_enable();
>> +       else
>> +#endif
>> +               udp_encap_enable();
>> +}
>>  #endif
>> diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
>> index dbc10d8..cccb95f 100644
>> --- a/net/ipv4/Kconfig
>> +++ b/net/ipv4/Kconfig
>> @@ -308,6 +308,7 @@ config NET_IPVTI
>>           on top.
>>
>>  config NET_UDP_TUNNEL
>> +       depends on (IPV6 || IPV6=n)
>>         tristate
>>         default n
>>
>> diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
>> index 61ec1a6..d60c1a0 100644
>> --- a/net/ipv4/udp_tunnel.c
>> +++ b/net/ipv4/udp_tunnel.c
>> @@ -14,42 +14,9 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>>         int err = -EINVAL;
>>         struct socket *sock = NULL;
>>
>> -#if IS_ENABLED(CONFIG_IPV6)
>>         if (cfg->family == AF_INET6) {
>> -               struct sockaddr_in6 udp6_addr;
>> -
>> -               err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
>> -               if (err < 0)
>> -                       goto error;
>> -
>> -               sk_change_net(sock->sk, net);
>> -
>> -               udp6_addr.sin6_family = AF_INET6;
>> -               memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
>> -                      sizeof(udp6_addr.sin6_addr));
>> -               udp6_addr.sin6_port = cfg->local_udp_port;
>> -               err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
>> -                                 sizeof(udp6_addr));
>> -               if (err < 0)
>> -                       goto error;
>> -
>> -               if (cfg->peer_udp_port) {
>> -                       udp6_addr.sin6_family = AF_INET6;
>> -                       memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
>> -                              sizeof(udp6_addr.sin6_addr));
>> -                       udp6_addr.sin6_port = cfg->peer_udp_port;
>> -                       err = kernel_connect(sock,
>> -                                            (struct sockaddr *)&udp6_addr,
>> -                                            sizeof(udp6_addr), 0);
>> -               }
>> -               if (err < 0)
>> -                       goto error;
>> -
>> -               udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
>> -               udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
>> -       } else
>> -#endif
>> -       if (cfg->family == AF_INET) {
>> +               return udp_sock_create6(net, cfg, sockp);
>> +       } else if (cfg->family == AF_INET) {
>>                 struct sockaddr_in udp_addr;
>>
>>                 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
>> @@ -82,7 +49,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>>                 return -EPFNOSUPPORT;
>>         }
>>
>> -
>>         *sockp = sock;
>>
>>         return 0;
>> @@ -97,4 +63,74 @@ error:
>>  }
>>  EXPORT_SYMBOL(udp_sock_create);
>>
>> +struct udp_tunnel_sock *
>> +create_udp_tunnel_sock(struct net *net, size_t size,
>> +                      struct udp_tunnel_sock_cfg *cfg)
>> +{
>> +       struct udp_tunnel_sock *uts;
>> +       struct sock *sk;
>> +       struct socket *sock = cfg->sock;
>> +
>> +       uts = kzalloc(size, GFP_KERNEL);
>> +       if (!uts)
>> +               return ERR_PTR(-ENOMEM);
>> +
> Allocating memory for the caller seems like overkill to me and there's
> no guarantee that is what caller wants anyway (maybe they are using
> array of static structures for instance). Seems like it would be just
> as easy to return the sock and let caller allocate whatever else it
> needs on its own (only field in udp_tunnel_sock is the sock anyway).
O.K. udp_tunnel layer does not need to allocate or maintain memory. It
will further simplify this layer.
>
>> +       sk = sock->sk;
>> +
>> +       /* Disable multicast loopback */
>> +       inet_sk(sk)->mc_loop = 0;
>> +
> Probably want to enable checksum unnecessary conversions also.
Right. Will do.
>
>> +       rcu_assign_sk_user_data(sk, uts);
>> +
>> +       udp_sk(sk)->encap_type = cfg->encap_type;
>> +       udp_sk(sk)->encap_rcv = cfg->encap_rcv;
>> +       udp_sk(sk)->encap_destroy = cfg->encap_destroy;
>> +
>> +       uts->sock = sock;
>> +
>> +       udp_tunnel_encap_enable(sock);
>> +
>> +       return uts;
>> +}
>> +EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
>> +
>> +int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
>> +                       struct sk_buff *skb, __be32 src, __be32 dst,
>> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
>> +                       __be16 dst_port, bool xnet)
>> +{
>> +       struct udphdr *uh;
>> +       struct socket *sock = uts->sock;
>> +
>> +       __skb_push(skb, sizeof(*uh));
>> +       skb_reset_transport_header(skb);
>> +       uh = udp_hdr(skb);
>> +
>> +       uh->dest = dst_port;
>> +       uh->source = src_port;
>> +       uh->len = htons(skb->len);
>> +
>> +       udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
>> +
>> +       return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
>> +                            tos, ttl, df, xnet);
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
>> +
>> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
>> +{
>> +       struct sock *sk = uts->sock->sk;
>> +
>> +       rcu_assign_sk_user_data(uts->sock->sk, NULL);
>> +       kernel_sock_shutdown(uts->sock, SHUT_RDWR);
>> +       sk_release_kernel(sk);
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
>> +
>> +void udp_tunnel_sock_free(struct udp_tunnel_sock *uts)
>> +{
>> +       kfree(uts);
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_free);
>> +
>>  MODULE_LICENSE("GPL");
>> diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
>> index 2fe6836..45f830e 100644
>> --- a/net/ipv6/Makefile
>> +++ b/net/ipv6/Makefile
>
> Probably good to make IPv4 and v6 stuff in separate patches.
Is this really necessary? Removing dependency may be tricky..
>
>> @@ -35,6 +35,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
>>  obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
>>  obj-$(CONFIG_IPV6_MIP6) += mip6.o
>>  obj-$(CONFIG_NETFILTER)        += netfilter/
>> +obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
>>
>>  obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
>>  obj-$(CONFIG_IPV6_SIT) += sit.o
>> diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
>> new file mode 100644
>> index 0000000..5109f46
>> --- /dev/null
>> +++ b/net/ipv6/ip6_udp_tunnel.c
>> @@ -0,0 +1,121 @@
>> +#include <linux/module.h>
>> +#include <linux/errno.h>
>> +#include <linux/socket.h>
>> +#include <linux/udp.h>
>> +#include <linux/types.h>
>> +#include <linux/kernel.h>
>> +#include <linux/in6.h>
>> +#include <net/udp.h>
>> +#include <net/udp_tunnel.h>
>> +#include <net/net_namespace.h>
>> +#include <net/netns/generic.h>
>> +#include <net/ip6_tunnel.h>
>> +#include <net/ip6_checksum.h>
>> +
>> +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
>> +                    struct socket **sockp)
>> +{
>> +       struct sockaddr_in6 udp6_addr;
>> +       int err = -EINVAL;
>> +       struct socket *sock = NULL;
>> +
>> +       err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
>> +       if (err < 0)
>> +               goto error;
>> +
>> +       sk_change_net(sock->sk, net);
>> +
>> +       udp6_addr.sin6_family = AF_INET6;
>> +       memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
>> +              sizeof(udp6_addr.sin6_addr));
>> +       udp6_addr.sin6_port = cfg->local_udp_port;
>> +       err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
>> +                         sizeof(udp6_addr));
>> +       if (err < 0)
>> +               goto error;
>> +
>> +       if (cfg->peer_udp_port) {
>> +               udp6_addr.sin6_family = AF_INET6;
>> +               memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
>> +                      sizeof(udp6_addr.sin6_addr));
>> +               udp6_addr.sin6_port = cfg->peer_udp_port;
>> +               err = kernel_connect(sock,
>> +                                    (struct sockaddr *)&udp6_addr,
>> +                                    sizeof(udp6_addr), 0);
>> +       }
>> +       if (err < 0)
>> +               goto error;
>> +
>> +       udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
>> +       udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
>> +
>> +       *sockp = sock;
>> +       return 0;
>> +
>> +error:
>> +       if (sock) {
>> +               kernel_sock_shutdown(sock, SHUT_RDWR);
>> +               sk_release_kernel(sock->sk);
>> +       }
>> +       *sockp = NULL;
>> +       return err;
>> +}
>> +EXPORT_SYMBOL_GPL(udp_sock_create6);
>> +
>> +int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
>> +                        struct sk_buff *skb, struct net_device *dev,
>> +                        struct in6_addr *saddr, struct in6_addr *daddr,
>> +                        __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
>> +{
>> +       struct udphdr *uh;
>> +       struct ipv6hdr *ip6h;
>> +
>> +       __skb_push(skb, sizeof(*uh));
>> +       skb_reset_transport_header(skb);
>> +       uh = udp_hdr(skb);
>> +
>> +       uh->dest = dst_port;
>> +       uh->source = src_port;
>> +
>> +       uh->len = htons(skb->len);
>> +       uh->check = 0;
>> +
>> +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
>> +       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
>> +                           | IPSKB_REROUTED);
>> +       skb_dst_set(skb, dst);
>> +
>> +       if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
>> +               __wsum csum = skb_checksum(skb, 0, skb->len, 0);
>> +
>> +               skb->ip_summed = CHECKSUM_UNNECESSARY;
>> +               uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
>> +                               IPPROTO_UDP, csum);
>> +               if (uh->check == 0)
>> +                       uh->check = CSUM_MANGLED_0;
>> +       } else {
>> +               skb->ip_summed = CHECKSUM_PARTIAL;
>> +               skb->csum_start = skb_transport_header(skb) - skb->head;
>> +               skb->csum_offset = offsetof(struct udphdr, check);
>> +               uh->check = ~csum_ipv6_magic(saddr, daddr,
>> +                               skb->len, IPPROTO_UDP, 0);
>> +       }
>
> Can't we call udp6_set_csum for this?
Thanks for pointing it out.  Will do.
>
>> +
>> +       __skb_push(skb, sizeof(*ip6h));
>> +       skb_reset_network_header(skb);
>> +       ip6h              = ipv6_hdr(skb);
>> +       ip6h->version     = 6;
>> +       ip6h->priority    = prio;
>> +       ip6h->flow_lbl[0] = 0;
>> +       ip6h->flow_lbl[1] = 0;
>> +       ip6h->flow_lbl[2] = 0;
>> +       ip6h->payload_len = htons(skb->len);
>> +       ip6h->nexthdr     = IPPROTO_UDP;
>> +       ip6h->hop_limit   = ttl;
>> +       ip6h->daddr       = *daddr;
>> +       ip6h->saddr       = *saddr;
>> +
>> +       ip6tunnel_xmit(skb, dev);
>
> So iptunnel_xmit creates the IP header, but ip6tunnel_xmit doesn't. It
> should be on the TODO list to make this consistent!
Agreed.
>
>> +       return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
>> --
>> 1.7.9.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [net-next v5 1/3] udp-tunnel: Expand UDP tunnel APIs
  2014-09-12 20:42     ` Andy Zhou
@ 2014-09-12 21:33       ` Tom Herbert
  0 siblings, 0 replies; 8+ messages in thread
From: Tom Herbert @ 2014-09-12 21:33 UTC (permalink / raw)
  To: Andy Zhou; +Cc: David Miller, Linux Netdev List

On Fri, Sep 12, 2014 at 1:42 PM, Andy Zhou <azhou@nicira.com> wrote:
> On Thu, Sep 11, 2014 at 2:04 PM, Tom Herbert <therbert@google.com> wrote:
>> On Wed, Sep 10, 2014 at 8:29 PM, Andy Zhou <azhou@nicira.com> wrote:
>>> Added common udp tunnel socket creation, and packet transmission APIs
>>> API that can be used by other UDP based tunneling protocol
>>> implementation.
>>>
>>> Signed-off-by: Andy Zhou <azhou@nicira.com>
>>> ---
>>>  include/net/udp_tunnel.h  |   73 +++++++++++++++++++++++++++
>>>  net/ipv4/Kconfig          |    1 +
>>>  net/ipv4/udp_tunnel.c     |  108 ++++++++++++++++++++++++++--------------
>>>  net/ipv6/Makefile         |    1 +
>>>  net/ipv6/ip6_udp_tunnel.c |  121 +++++++++++++++++++++++++++++++++++++++++++++
>>>  5 files changed, 268 insertions(+), 36 deletions(-)
>>>  create mode 100644 net/ipv6/ip6_udp_tunnel.c
>>>
>>> diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
>>> index ffd69cb..e9dcf83 100644
>>> --- a/include/net/udp_tunnel.h
>>> +++ b/include/net/udp_tunnel.h
>>> @@ -1,6 +1,14 @@
>>>  #ifndef __NET_UDP_TUNNEL_H
>>>  #define __NET_UDP_TUNNEL_H
>>>
>>> +#include <net/ip_tunnels.h>
>>> +#include <net/udp.h>
>>> +
>>> +#if IS_ENABLED(CONFIG_IPV6)
>>> +#include <net/ipv6.h>
>>> +#include <net/addrconf.h>
>>> +#endif
>>> +
>>>  struct udp_port_cfg {
>>>         u8                      family;
>>>
>>> @@ -29,4 +37,69 @@ struct udp_port_cfg {
>>>  int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>>>                     struct socket **sockp);
>>>
>>> +#if IS_ENABLED(CONFIG_IPV6)
>>> +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
>>> +                    struct socket **sockp);
>>> +#else
>>> +static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
>>> +                                  struct socket **sockp)
>>> +{
>>> +       return 0;
>>> +}
>>> +#endif
>>> +
>>> +struct udp_tunnel_sock;
>>> +
>>> +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
>>> +typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
>>> +
>>> +struct udp_tunnel_sock_cfg {
>>> +       struct socket *sock;    /* The socket UDP tunnel will attach to */
>>> +       /* Used for setting up udp_sock fields, see udp.h for details */
>>> +       __u8  encap_type;
>>> +       udp_tunnel_encap_rcv_t encap_rcv;
>>> +       udp_tunnel_encap_destroy_t encap_destroy;
>>> +};
>>> +
>>> +struct udp_tunnel_sock {
>>> +       struct socket *sock;
>>> +};
>>> +
>>> +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
>>> +                                              struct udp_tunnel_sock_cfg
>>> +                                                       *sock_cfg);
>>> +
>>> +int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
>>> +                       struct sk_buff *skb, __be32 src, __be32 dst,
>>> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
>>> +                       __be16 dst_port, bool xnet);
>>> +
>>> +#if IS_ENABLED(CONFIG_IPV6)
>>> +int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
>>> +                        struct sk_buff *skb, struct net_device *dev,
>>> +                        struct in6_addr *saddr, struct in6_addr *daddr,
>>> +                        __u8 prio, __u8 ttl, __be16 src_port,
>>> +                        __be16 dst_port);
>>> +#endif
>>> +
>>> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
>>> +void udp_tunnel_sock_free(struct udp_tunnel_sock *uts);
>>> +
>>> +static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
>>> +                                                        bool udp_csum)
>>> +{
>>> +       int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
>>> +
>>> +       return iptunnel_handle_offloads(skb, udp_csum, type);
>>> +}
>>> +
>>> +static inline void udp_tunnel_encap_enable(struct socket *sock)
>>> +{
>>> +#if IS_ENABLED(CONFIG_IPV6)
>>> +       if (sock->sk->sk_family == PF_INET6)
>>> +               ipv6_stub->udpv6_encap_enable();
>>> +       else
>>> +#endif
>>> +               udp_encap_enable();
>>> +}
>>>  #endif
>>> diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
>>> index dbc10d8..cccb95f 100644
>>> --- a/net/ipv4/Kconfig
>>> +++ b/net/ipv4/Kconfig
>>> @@ -308,6 +308,7 @@ config NET_IPVTI
>>>           on top.
>>>
>>>  config NET_UDP_TUNNEL
>>> +       depends on (IPV6 || IPV6=n)
>>>         tristate
>>>         default n
>>>
>>> diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
>>> index 61ec1a6..d60c1a0 100644
>>> --- a/net/ipv4/udp_tunnel.c
>>> +++ b/net/ipv4/udp_tunnel.c
>>> @@ -14,42 +14,9 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>>>         int err = -EINVAL;
>>>         struct socket *sock = NULL;
>>>
>>> -#if IS_ENABLED(CONFIG_IPV6)
>>>         if (cfg->family == AF_INET6) {
>>> -               struct sockaddr_in6 udp6_addr;
>>> -
>>> -               err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
>>> -               if (err < 0)
>>> -                       goto error;
>>> -
>>> -               sk_change_net(sock->sk, net);
>>> -
>>> -               udp6_addr.sin6_family = AF_INET6;
>>> -               memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
>>> -                      sizeof(udp6_addr.sin6_addr));
>>> -               udp6_addr.sin6_port = cfg->local_udp_port;
>>> -               err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
>>> -                                 sizeof(udp6_addr));
>>> -               if (err < 0)
>>> -                       goto error;
>>> -
>>> -               if (cfg->peer_udp_port) {
>>> -                       udp6_addr.sin6_family = AF_INET6;
>>> -                       memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
>>> -                              sizeof(udp6_addr.sin6_addr));
>>> -                       udp6_addr.sin6_port = cfg->peer_udp_port;
>>> -                       err = kernel_connect(sock,
>>> -                                            (struct sockaddr *)&udp6_addr,
>>> -                                            sizeof(udp6_addr), 0);
>>> -               }
>>> -               if (err < 0)
>>> -                       goto error;
>>> -
>>> -               udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
>>> -               udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
>>> -       } else
>>> -#endif
>>> -       if (cfg->family == AF_INET) {
>>> +               return udp_sock_create6(net, cfg, sockp);
>>> +       } else if (cfg->family == AF_INET) {
>>>                 struct sockaddr_in udp_addr;
>>>
>>>                 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
>>> @@ -82,7 +49,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>>>                 return -EPFNOSUPPORT;
>>>         }
>>>
>>> -
>>>         *sockp = sock;
>>>
>>>         return 0;
>>> @@ -97,4 +63,74 @@ error:
>>>  }
>>>  EXPORT_SYMBOL(udp_sock_create);
>>>
>>> +struct udp_tunnel_sock *
>>> +create_udp_tunnel_sock(struct net *net, size_t size,
>>> +                      struct udp_tunnel_sock_cfg *cfg)
>>> +{
>>> +       struct udp_tunnel_sock *uts;
>>> +       struct sock *sk;
>>> +       struct socket *sock = cfg->sock;
>>> +
>>> +       uts = kzalloc(size, GFP_KERNEL);
>>> +       if (!uts)
>>> +               return ERR_PTR(-ENOMEM);
>>> +
>> Allocating memory for the caller seems like overkill to me and there's
>> no guarantee that is what caller wants anyway (maybe they are using
>> array of static structures for instance). Seems like it would be just
>> as easy to return the sock and let caller allocate whatever else it
>> needs on its own (only field in udp_tunnel_sock is the sock anyway).
> O.K. udp_tunnel layer does not need to allocate or maintain memory. It
> will further simplify this layer.
>>
>>> +       sk = sock->sk;
>>> +
>>> +       /* Disable multicast loopback */
>>> +       inet_sk(sk)->mc_loop = 0;
>>> +
>> Probably want to enable checksum unnecessary conversions also.
> Right. Will do.
>>
>>> +       rcu_assign_sk_user_data(sk, uts);
>>> +
>>> +       udp_sk(sk)->encap_type = cfg->encap_type;
>>> +       udp_sk(sk)->encap_rcv = cfg->encap_rcv;
>>> +       udp_sk(sk)->encap_destroy = cfg->encap_destroy;
>>> +
>>> +       uts->sock = sock;
>>> +
>>> +       udp_tunnel_encap_enable(sock);
>>> +
>>> +       return uts;
>>> +}
>>> +EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
>>> +
>>> +int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
>>> +                       struct sk_buff *skb, __be32 src, __be32 dst,
>>> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
>>> +                       __be16 dst_port, bool xnet)
>>> +{
>>> +       struct udphdr *uh;
>>> +       struct socket *sock = uts->sock;
>>> +
>>> +       __skb_push(skb, sizeof(*uh));
>>> +       skb_reset_transport_header(skb);
>>> +       uh = udp_hdr(skb);
>>> +
>>> +       uh->dest = dst_port;
>>> +       uh->source = src_port;
>>> +       uh->len = htons(skb->len);
>>> +
>>> +       udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
>>> +
>>> +       return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
>>> +                            tos, ttl, df, xnet);
>>> +}
>>> +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
>>> +
>>> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
>>> +{
>>> +       struct sock *sk = uts->sock->sk;
>>> +
>>> +       rcu_assign_sk_user_data(uts->sock->sk, NULL);
>>> +       kernel_sock_shutdown(uts->sock, SHUT_RDWR);
>>> +       sk_release_kernel(sk);
>>> +}
>>> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
>>> +
>>> +void udp_tunnel_sock_free(struct udp_tunnel_sock *uts)
>>> +{
>>> +       kfree(uts);
>>> +}
>>> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_free);
>>> +
>>>  MODULE_LICENSE("GPL");
>>> diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
>>> index 2fe6836..45f830e 100644
>>> --- a/net/ipv6/Makefile
>>> +++ b/net/ipv6/Makefile
>>
>> Probably good to make IPv4 and v6 stuff in separate patches.
> Is this really necessary? Removing dependency may be tricky..

I suppose not, but if there are dependencies they should be from IPv6
to IPv4 files, not other way around.

>>
>>> @@ -35,6 +35,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
>>>  obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
>>>  obj-$(CONFIG_IPV6_MIP6) += mip6.o
>>>  obj-$(CONFIG_NETFILTER)        += netfilter/
>>> +obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
>>>
>>>  obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
>>>  obj-$(CONFIG_IPV6_SIT) += sit.o
>>> diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
>>> new file mode 100644
>>> index 0000000..5109f46
>>> --- /dev/null
>>> +++ b/net/ipv6/ip6_udp_tunnel.c
>>> @@ -0,0 +1,121 @@
>>> +#include <linux/module.h>
>>> +#include <linux/errno.h>
>>> +#include <linux/socket.h>
>>> +#include <linux/udp.h>
>>> +#include <linux/types.h>
>>> +#include <linux/kernel.h>
>>> +#include <linux/in6.h>
>>> +#include <net/udp.h>
>>> +#include <net/udp_tunnel.h>
>>> +#include <net/net_namespace.h>
>>> +#include <net/netns/generic.h>
>>> +#include <net/ip6_tunnel.h>
>>> +#include <net/ip6_checksum.h>
>>> +
>>> +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
>>> +                    struct socket **sockp)
>>> +{
>>> +       struct sockaddr_in6 udp6_addr;
>>> +       int err = -EINVAL;
>>> +       struct socket *sock = NULL;
>>> +
>>> +       err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
>>> +       if (err < 0)
>>> +               goto error;
>>> +
>>> +       sk_change_net(sock->sk, net);
>>> +
>>> +       udp6_addr.sin6_family = AF_INET6;
>>> +       memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
>>> +              sizeof(udp6_addr.sin6_addr));
>>> +       udp6_addr.sin6_port = cfg->local_udp_port;
>>> +       err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
>>> +                         sizeof(udp6_addr));
>>> +       if (err < 0)
>>> +               goto error;
>>> +
>>> +       if (cfg->peer_udp_port) {
>>> +               udp6_addr.sin6_family = AF_INET6;
>>> +               memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
>>> +                      sizeof(udp6_addr.sin6_addr));
>>> +               udp6_addr.sin6_port = cfg->peer_udp_port;
>>> +               err = kernel_connect(sock,
>>> +                                    (struct sockaddr *)&udp6_addr,
>>> +                                    sizeof(udp6_addr), 0);
>>> +       }
>>> +       if (err < 0)
>>> +               goto error;
>>> +
>>> +       udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
>>> +       udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
>>> +
>>> +       *sockp = sock;
>>> +       return 0;
>>> +
>>> +error:
>>> +       if (sock) {
>>> +               kernel_sock_shutdown(sock, SHUT_RDWR);
>>> +               sk_release_kernel(sock->sk);
>>> +       }
>>> +       *sockp = NULL;
>>> +       return err;
>>> +}
>>> +EXPORT_SYMBOL_GPL(udp_sock_create6);
>>> +
>>> +int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
>>> +                        struct sk_buff *skb, struct net_device *dev,
>>> +                        struct in6_addr *saddr, struct in6_addr *daddr,
>>> +                        __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
>>> +{
>>> +       struct udphdr *uh;
>>> +       struct ipv6hdr *ip6h;
>>> +
>>> +       __skb_push(skb, sizeof(*uh));
>>> +       skb_reset_transport_header(skb);
>>> +       uh = udp_hdr(skb);
>>> +
>>> +       uh->dest = dst_port;
>>> +       uh->source = src_port;
>>> +
>>> +       uh->len = htons(skb->len);
>>> +       uh->check = 0;
>>> +
>>> +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
>>> +       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
>>> +                           | IPSKB_REROUTED);
>>> +       skb_dst_set(skb, dst);
>>> +
>>> +       if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
>>> +               __wsum csum = skb_checksum(skb, 0, skb->len, 0);
>>> +
>>> +               skb->ip_summed = CHECKSUM_UNNECESSARY;
>>> +               uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
>>> +                               IPPROTO_UDP, csum);
>>> +               if (uh->check == 0)
>>> +                       uh->check = CSUM_MANGLED_0;
>>> +       } else {
>>> +               skb->ip_summed = CHECKSUM_PARTIAL;
>>> +               skb->csum_start = skb_transport_header(skb) - skb->head;
>>> +               skb->csum_offset = offsetof(struct udphdr, check);
>>> +               uh->check = ~csum_ipv6_magic(saddr, daddr,
>>> +                               skb->len, IPPROTO_UDP, 0);
>>> +       }
>>
>> Can't we call udp6_set_csum for this?
> Thanks for pointing it out.  Will do.
>>
>>> +
>>> +       __skb_push(skb, sizeof(*ip6h));
>>> +       skb_reset_network_header(skb);
>>> +       ip6h              = ipv6_hdr(skb);
>>> +       ip6h->version     = 6;
>>> +       ip6h->priority    = prio;
>>> +       ip6h->flow_lbl[0] = 0;
>>> +       ip6h->flow_lbl[1] = 0;
>>> +       ip6h->flow_lbl[2] = 0;
>>> +       ip6h->payload_len = htons(skb->len);
>>> +       ip6h->nexthdr     = IPPROTO_UDP;
>>> +       ip6h->hop_limit   = ttl;
>>> +       ip6h->daddr       = *daddr;
>>> +       ip6h->saddr       = *saddr;
>>> +
>>> +       ip6tunnel_xmit(skb, dev);
>>
>> So iptunnel_xmit creates the IP header, but ip6tunnel_xmit doesn't. It
>> should be on the TODO list to make this consistent!
> Agreed.
>>
>>> +       return 0;
>>> +}
>>> +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
>>> --
>>> 1.7.9.5
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [net-next v5 1/3] udp-tunnel: Expand UDP tunnel APIs
  2014-09-11  3:29 ` [net-next v5 1/3] udp-tunnel: Expand " Andy Zhou
  2014-09-11 21:04   ` Tom Herbert
@ 2014-09-12 21:35   ` Tom Herbert
  1 sibling, 0 replies; 8+ messages in thread
From: Tom Herbert @ 2014-09-12 21:35 UTC (permalink / raw)
  To: Andy Zhou; +Cc: David Miller, Linux Netdev List

On Wed, Sep 10, 2014 at 8:29 PM, Andy Zhou <azhou@nicira.com> wrote:
> Added common udp tunnel socket creation, and packet transmission APIs
> API that can be used by other UDP based tunneling protocol
> implementation.
>
> Signed-off-by: Andy Zhou <azhou@nicira.com>
> ---
>  include/net/udp_tunnel.h  |   73 +++++++++++++++++++++++++++
>  net/ipv4/Kconfig          |    1 +
>  net/ipv4/udp_tunnel.c     |  108 ++++++++++++++++++++++++++--------------
>  net/ipv6/Makefile         |    1 +
>  net/ipv6/ip6_udp_tunnel.c |  121 +++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 268 insertions(+), 36 deletions(-)
>  create mode 100644 net/ipv6/ip6_udp_tunnel.c
>
> diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
> index ffd69cb..e9dcf83 100644
> --- a/include/net/udp_tunnel.h
> +++ b/include/net/udp_tunnel.h
> @@ -1,6 +1,14 @@
>  #ifndef __NET_UDP_TUNNEL_H
>  #define __NET_UDP_TUNNEL_H
>
> +#include <net/ip_tunnels.h>
> +#include <net/udp.h>
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +#include <net/ipv6.h>
> +#include <net/addrconf.h>
> +#endif
> +
>  struct udp_port_cfg {
>         u8                      family;
>
> @@ -29,4 +37,69 @@ struct udp_port_cfg {
>  int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>                     struct socket **sockp);
>
> +#if IS_ENABLED(CONFIG_IPV6)
> +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
> +                    struct socket **sockp);
> +#else
> +static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
> +                                  struct socket **sockp)
> +{
> +       return 0;
> +}
> +#endif
> +
> +struct udp_tunnel_sock;
> +
> +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
> +typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
> +
> +struct udp_tunnel_sock_cfg {
> +       struct socket *sock;    /* The socket UDP tunnel will attach to */
> +       /* Used for setting up udp_sock fields, see udp.h for details */
> +       __u8  encap_type;
> +       udp_tunnel_encap_rcv_t encap_rcv;
> +       udp_tunnel_encap_destroy_t encap_destroy;
> +};
> +
> +struct udp_tunnel_sock {
> +       struct socket *sock;
> +};
> +
> +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
> +                                              struct udp_tunnel_sock_cfg
> +                                                       *sock_cfg);
> +
> +int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
> +                       struct sk_buff *skb, __be32 src, __be32 dst,
> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
> +                       __be16 dst_port, bool xnet);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
> +                        struct sk_buff *skb, struct net_device *dev,
> +                        struct in6_addr *saddr, struct in6_addr *daddr,
> +                        __u8 prio, __u8 ttl, __be16 src_port,
> +                        __be16 dst_port);
> +#endif
> +
> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
> +void udp_tunnel_sock_free(struct udp_tunnel_sock *uts);
> +
> +static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
> +                                                        bool udp_csum)
> +{
> +       int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
> +
> +       return iptunnel_handle_offloads(skb, udp_csum, type);
> +}
> +
> +static inline void udp_tunnel_encap_enable(struct socket *sock)
> +{
> +#if IS_ENABLED(CONFIG_IPV6)
> +       if (sock->sk->sk_family == PF_INET6)
> +               ipv6_stub->udpv6_encap_enable();
> +       else
> +#endif
> +               udp_encap_enable();
> +}
>  #endif
> diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
> index dbc10d8..cccb95f 100644
> --- a/net/ipv4/Kconfig
> +++ b/net/ipv4/Kconfig
> @@ -308,6 +308,7 @@ config NET_IPVTI
>           on top.
>
>  config NET_UDP_TUNNEL
> +       depends on (IPV6 || IPV6=n)
>         tristate
>         default n
>
> diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
> index 61ec1a6..d60c1a0 100644
> --- a/net/ipv4/udp_tunnel.c
> +++ b/net/ipv4/udp_tunnel.c
> @@ -14,42 +14,9 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>         int err = -EINVAL;
>         struct socket *sock = NULL;
>
> -#if IS_ENABLED(CONFIG_IPV6)
>         if (cfg->family == AF_INET6) {
> -               struct sockaddr_in6 udp6_addr;
> -
> -               err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
> -               if (err < 0)
> -                       goto error;
> -
> -               sk_change_net(sock->sk, net);
> -
> -               udp6_addr.sin6_family = AF_INET6;
> -               memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
> -                      sizeof(udp6_addr.sin6_addr));
> -               udp6_addr.sin6_port = cfg->local_udp_port;
> -               err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
> -                                 sizeof(udp6_addr));
> -               if (err < 0)
> -                       goto error;
> -
> -               if (cfg->peer_udp_port) {
> -                       udp6_addr.sin6_family = AF_INET6;
> -                       memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
> -                              sizeof(udp6_addr.sin6_addr));
> -                       udp6_addr.sin6_port = cfg->peer_udp_port;
> -                       err = kernel_connect(sock,
> -                                            (struct sockaddr *)&udp6_addr,
> -                                            sizeof(udp6_addr), 0);
> -               }
> -               if (err < 0)
> -                       goto error;
> -
> -               udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
> -               udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
> -       } else
> -#endif
> -       if (cfg->family == AF_INET) {
> +               return udp_sock_create6(net, cfg, sockp);
> +       } else if (cfg->family == AF_INET) {
>                 struct sockaddr_in udp_addr;
>
>                 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
> @@ -82,7 +49,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>                 return -EPFNOSUPPORT;
>         }
>
> -
>         *sockp = sock;
>
>         return 0;
> @@ -97,4 +63,74 @@ error:
>  }
>  EXPORT_SYMBOL(udp_sock_create);
>
> +struct udp_tunnel_sock *
> +create_udp_tunnel_sock(struct net *net, size_t size,
> +                      struct udp_tunnel_sock_cfg *cfg)
> +{
> +       struct udp_tunnel_sock *uts;
> +       struct sock *sk;
> +       struct socket *sock = cfg->sock;
> +
> +       uts = kzalloc(size, GFP_KERNEL);
> +       if (!uts)
> +               return ERR_PTR(-ENOMEM);
> +
> +       sk = sock->sk;
> +
> +       /* Disable multicast loopback */
> +       inet_sk(sk)->mc_loop = 0;
> +
> +       rcu_assign_sk_user_data(sk, uts);
> +
> +       udp_sk(sk)->encap_type = cfg->encap_type;
> +       udp_sk(sk)->encap_rcv = cfg->encap_rcv;
> +       udp_sk(sk)->encap_destroy = cfg->encap_destroy;
> +
> +       uts->sock = sock;
> +
> +       udp_tunnel_encap_enable(sock);
> +
> +       return uts;
> +}
> +EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
> +
> +int udp_tunnel_xmit_skb(struct udp_tunnel_sock *uts, struct rtable *rt,
> +                       struct sk_buff *skb, __be32 src, __be32 dst,
> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
> +                       __be16 dst_port, bool xnet)
> +{
> +       struct udphdr *uh;
> +       struct socket *sock = uts->sock;
> +
> +       __skb_push(skb, sizeof(*uh));
> +       skb_reset_transport_header(skb);
> +       uh = udp_hdr(skb);
> +
> +       uh->dest = dst_port;
> +       uh->source = src_port;
> +       uh->len = htons(skb->len);
> +
> +       udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
> +
> +       return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
> +                            tos, ttl, df, xnet);
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
> +
> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
> +{
> +       struct sock *sk = uts->sock->sk;
> +
> +       rcu_assign_sk_user_data(uts->sock->sk, NULL);
> +       kernel_sock_shutdown(uts->sock, SHUT_RDWR);
> +       sk_release_kernel(sk);
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
> +
> +void udp_tunnel_sock_free(struct udp_tunnel_sock *uts)
> +{
> +       kfree(uts);
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_free);
> +
>  MODULE_LICENSE("GPL");
> diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
> index 2fe6836..45f830e 100644
> --- a/net/ipv6/Makefile
> +++ b/net/ipv6/Makefile
> @@ -35,6 +35,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
>  obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
>  obj-$(CONFIG_IPV6_MIP6) += mip6.o
>  obj-$(CONFIG_NETFILTER)        += netfilter/
> +obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
>
>  obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
>  obj-$(CONFIG_IPV6_SIT) += sit.o
> diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
> new file mode 100644
> index 0000000..5109f46
> --- /dev/null
> +++ b/net/ipv6/ip6_udp_tunnel.c
> @@ -0,0 +1,121 @@
> +#include <linux/module.h>
> +#include <linux/errno.h>
> +#include <linux/socket.h>
> +#include <linux/udp.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/in6.h>
> +#include <net/udp.h>
> +#include <net/udp_tunnel.h>
> +#include <net/net_namespace.h>
> +#include <net/netns/generic.h>
> +#include <net/ip6_tunnel.h>
> +#include <net/ip6_checksum.h>
> +
> +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
> +                    struct socket **sockp)
> +{
> +       struct sockaddr_in6 udp6_addr;
> +       int err = -EINVAL;
> +       struct socket *sock = NULL;
> +
> +       err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
> +       if (err < 0)
> +               goto error;
> +
> +       sk_change_net(sock->sk, net);
> +
> +       udp6_addr.sin6_family = AF_INET6;
> +       memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
> +              sizeof(udp6_addr.sin6_addr));
> +       udp6_addr.sin6_port = cfg->local_udp_port;
> +       err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
> +                         sizeof(udp6_addr));
> +       if (err < 0)
> +               goto error;
> +
> +       if (cfg->peer_udp_port) {
> +               udp6_addr.sin6_family = AF_INET6;
> +               memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
> +                      sizeof(udp6_addr.sin6_addr));
> +               udp6_addr.sin6_port = cfg->peer_udp_port;
> +               err = kernel_connect(sock,
> +                                    (struct sockaddr *)&udp6_addr,
> +                                    sizeof(udp6_addr), 0);
> +       }
> +       if (err < 0)
> +               goto error;
> +
> +       udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
> +       udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
> +
> +       *sockp = sock;
> +       return 0;
> +
> +error:
> +       if (sock) {
> +               kernel_sock_shutdown(sock, SHUT_RDWR);
> +               sk_release_kernel(sock->sk);
> +       }
> +       *sockp = NULL;
> +       return err;
> +}
> +EXPORT_SYMBOL_GPL(udp_sock_create6);
> +
> +int udp_tunnel6_xmit_skb(struct udp_tunnel_sock *uts, struct dst_entry *dst,
> +                        struct sk_buff *skb, struct net_device *dev,
> +                        struct in6_addr *saddr, struct in6_addr *daddr,
> +                        __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
> +{
> +       struct udphdr *uh;
> +       struct ipv6hdr *ip6h;
> +
> +       __skb_push(skb, sizeof(*uh));
> +       skb_reset_transport_header(skb);
> +       uh = udp_hdr(skb);
> +
> +       uh->dest = dst_port;
> +       uh->source = src_port;
> +
> +       uh->len = htons(skb->len);
> +       uh->check = 0;
> +
> +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
> +       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
> +                           | IPSKB_REROUTED);
> +       skb_dst_set(skb, dst);
> +
> +       if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
> +               __wsum csum = skb_checksum(skb, 0, skb->len, 0);
> +
> +               skb->ip_summed = CHECKSUM_UNNECESSARY;
> +               uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
> +                               IPPROTO_UDP, csum);
> +               if (uh->check == 0)
> +                       uh->check = CSUM_MANGLED_0;
> +       } else {
> +               skb->ip_summed = CHECKSUM_PARTIAL;
> +               skb->csum_start = skb_transport_header(skb) - skb->head;
> +               skb->csum_offset = offsetof(struct udphdr, check);
> +               uh->check = ~csum_ipv6_magic(saddr, daddr,
> +                               skb->len, IPPROTO_UDP, 0);
> +       }
> +
> +       __skb_push(skb, sizeof(*ip6h));
> +       skb_reset_network_header(skb);
> +       ip6h              = ipv6_hdr(skb);
> +       ip6h->version     = 6;
> +       ip6h->priority    = prio;
> +       ip6h->flow_lbl[0] = 0;
> +       ip6h->flow_lbl[1] = 0;
> +       ip6h->flow_lbl[2] = 0;

Please call ip6_flow_hdr to set up flow label (see ip6_tnl_xmit2 for instance).

> +       ip6h->payload_len = htons(skb->len);
> +       ip6h->nexthdr     = IPPROTO_UDP;
> +       ip6h->hop_limit   = ttl;
> +       ip6h->daddr       = *daddr;
> +       ip6h->saddr       = *saddr;
> +
> +       ip6tunnel_xmit(skb, dev);
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-09-12 21:35 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-11  3:29 [net-next v5 0/3] Refactor vxlan and l2tp to use common UDP tunnel APIs Andy Zhou
2014-09-11  3:29 ` [net-next v5 1/3] udp-tunnel: Expand " Andy Zhou
2014-09-11 21:04   ` Tom Herbert
2014-09-12 20:42     ` Andy Zhou
2014-09-12 21:33       ` Tom Herbert
2014-09-12 21:35   ` Tom Herbert
2014-09-11  3:29 ` [net-next v5 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
2014-09-11  3:29 ` [net-next v5 3/3] l2tp: Refactor l2tp core " Andy Zhou

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.