* [net-next v3 1/3] udp: Expand UDP tunnel common APIs
@ 2014-08-27 3:35 Andy Zhou
2014-08-27 3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27 3:35 UTC (permalink / raw)
To: davem; +Cc: netdev, Andy Zhou
Added create_udp_tunnel_socket(), packet receive and transmit, and
other related common functions for UDP tunnels.
Per net open UDP tunnel ports are tracked in this common layer to
prevent sharing of a single port with more than one UDP tunnel.
Signed-off-by: Andy Zhou <azhou@nicira.com>
---
include/net/udp_tunnel.h | 50 +++++++++++
net/ipv4/udp_tunnel.c | 216 +++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 265 insertions(+), 1 deletion(-)
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index ffd69cb..5ff0746 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -1,6 +1,8 @@
#ifndef __NET_UDP_TUNNEL_H
#define __NET_UDP_TUNNEL_H
+#include <net/ip_tunnels.h>
+
struct udp_port_cfg {
u8 family;
@@ -26,7 +28,55 @@ struct udp_port_cfg {
use_udp6_rx_checksums:1;
};
+struct udp_tunnel_sock;
+
+typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
+
+struct udp_tunnel_socket_cfg {
+ struct udp_port_cfg port;
+
+ /* Used for setting up udp_sock fields, see udp.h for details */
+ __u8 encap_type;
+ udp_tunnel_encap_rcv_t encap_rcv;
+ udp_tunnel_encap_destroy_t encap_destroy;
+};
+
+struct udp_tunnel_sock {
+ struct hlist_node hlist;
+ struct socket *sock;
+};
+
int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp);
+struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
+ struct socket *sock,
+ struct udp_tunnel_socket_cfg
+ *socket_cfg);
+
+struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port);
+
+int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+ struct sk_buff *skb, __be32 src, __be32 dst,
+ __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+ __be16 dst_port, bool xnet);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+ struct sk_buff *skb, struct net_device *dev,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port);
+
+#endif
+
+void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
+
+static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
+ bool udp_csum)
+{
+ int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+
+ return iptunnel_handle_offloads(skb, udp_csum, type);
+}
#endif
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 61ec1a6..3ed66bc 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -7,6 +7,23 @@
#include <net/udp.h>
#include <net/udp_tunnel.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/ip6_checksum.h>
+#endif
+
+#define PORT_HASH_BITS 8
+#define PORT_HASH_SIZE (1 << PORT_HASH_BITS)
+
+static int udp_tunnel_net_id;
+
+struct udp_tunnel_net {
+ struct hlist_head sock_list[PORT_HASH_SIZE];
+ spinlock_t sock_lock; /* Protecting the sock_list */
+};
int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
@@ -82,7 +99,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
return -EPFNOSUPPORT;
}
-
*sockp = sock;
return 0;
@@ -97,4 +113,202 @@ error:
}
EXPORT_SYMBOL(udp_sock_create);
+
+/* Socket hash table head */
+static inline struct hlist_head *uts_head(struct net *net, const __be16 port)
+{
+ struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+
+ return &utn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
+}
+
+struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
+ struct socket *sock,
+ struct udp_tunnel_socket_cfg
+ *cfg)
+{
+ struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+ struct udp_tunnel_sock *uts;
+ struct sock *sk;
+ const __be16 port = cfg->port.local_udp_port;
+ const int ipv6 = (cfg->port.family == AF_INET6);
+ int err;
+
+ if (!sock)
+ err = udp_sock_create(net, &cfg->port, &sock);
+ else
+ err = (sock->sk->sk_protocol == IPPROTO_UDP) ?
+ 0 : -EPROTONOSUPPORT;
+
+ if (err)
+ return NULL;
+
+ uts = kzalloc(size, GFP_KERNEL);
+ if (!uts)
+ return ERR_PTR(-ENOMEM);
+
+ sk = sock->sk;
+
+ /* Disable multicast loopback */
+ inet_sk(sk)->mc_loop = 0;
+
+ rcu_assign_sk_user_data(sk, uts);
+
+ udp_sk(sk)->encap_type = cfg->encap_type;
+ udp_sk(sk)->encap_rcv = cfg->encap_rcv;
+ udp_sk(sk)->encap_destroy = cfg->encap_destroy;
+
+ uts->sock = sock;
+
+ spin_lock(&utn->sock_lock);
+ hlist_add_head_rcu(&uts->hlist, uts_head(net, port));
+ spin_unlock(&utn->sock_lock);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ipv6)
+ ipv6_stub->udpv6_encap_enable();
+ else
+#endif
+ udp_encap_enable();
+
+ return uts;
+}
+EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
+
+int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
+ struct sk_buff *skb, __be32 src, __be32 dst,
+ __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
+ __be16 dst_port, bool xnet)
+{
+ struct udphdr *uh;
+
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = dst_port;
+ uh->source = src_port;
+ uh->len = htons(skb->len);
+
+ udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
+
+ return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+ tos, ttl, df, xnet);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
+ struct sk_buff *skb, struct net_device *dev,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
+{
+ struct udphdr *uh;
+ struct ipv6hdr *ip6h;
+
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = dst_port;
+ uh->source = src_port;
+
+ uh->len = htons(skb->len);
+ uh->check = 0;
+
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
+ | IPSKB_REROUTED);
+ skb_dst_set(skb, dst);
+
+ if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
+ __wsum csum = skb_checksum(skb, 0, skb->len, 0);
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
+ IPPROTO_UDP, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~csum_ipv6_magic(saddr, daddr,
+ skb->len, IPPROTO_UDP, 0);
+ }
+
+ __skb_push(skb, sizeof(*ip6h));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6h->version = 6;
+ ip6h->priority = prio;
+ ip6h->flow_lbl[0] = 0;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+ ip6h->payload_len = htons(skb->len);
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->hop_limit = ttl;
+ ip6h->daddr = *daddr;
+ ip6h->saddr = *saddr;
+
+ ip6tunnel_xmit(skb, dev);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
+#endif
+
+struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port)
+{
+ struct udp_tunnel_sock *uts;
+
+ hlist_for_each_entry_rcu(uts, uts_head(net, port), hlist) {
+ if (inet_sk(uts->sock->sk)->inet_sport == port)
+ return uts;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_find_sock);
+
+void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
+{
+ struct sock *sk = uts->sock->sk;
+ struct net *net = sock_net(sk);
+ struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+
+ spin_lock(&utn->sock_lock);
+ hlist_del_rcu(&uts->hlist);
+ rcu_assign_sk_user_data(uts->sock->sk, NULL);
+ kernel_sock_shutdown(uts->sock, SHUT_RDWR);
+ sk_release_kernel(sk);
+ spin_unlock(&utn->sock_lock);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
+
+static int __net_init udp_tunnel_init_net(struct net *net)
+{
+ struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
+ unsigned int h;
+
+ spin_lock_init(&utn->sock_lock);
+
+ for (h = 0; h < PORT_HASH_SIZE; h++)
+ INIT_HLIST_HEAD(&utn->sock_list[h]);
+
+ return 0;
+}
+
+static struct pernet_operations udp_tunnel_net_ops = {
+ .init = udp_tunnel_init_net,
+ .exit = NULL,
+ .id = &udp_tunnel_net_id,
+ .size = sizeof(struct udp_tunnel_net),
+};
+
+static int __init udp_tunnel_init(void)
+{
+ return register_pernet_subsys(&udp_tunnel_net_ops);
+}
+late_initcall(udp_tunnel_init);
+
MODULE_LICENSE("GPL");
--
1.7.9.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions.
2014-08-27 3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
@ 2014-08-27 3:35 ` Andy Zhou
2014-08-27 4:15 ` Tom Herbert
[not found] ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>
2014-08-27 3:35 ` [net-next v3 3/3] l2tp: Refactor l2tp core " Andy Zhou
` (2 subsequent siblings)
3 siblings, 2 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27 3:35 UTC (permalink / raw)
To: davem; +Cc: netdev, Andy Zhou
Signed-off-by: Andy Zhou <azhou@nicira.com>
---
drivers/net/vxlan.c | 199 ++++++++++++-----------------------------
include/net/vxlan.h | 16 ++--
net/openvswitch/vport-vxlan.c | 6 +-
3 files changed, 68 insertions(+), 153 deletions(-)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index beb377b..f1f1c48 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -42,6 +42,7 @@
#include <net/netns/generic.h>
#include <net/vxlan.h>
#include <net/protocol.h>
+#include <net/udp_tunnel.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/addrconf.h>
@@ -277,13 +278,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
/* Find VXLAN socket based on network namespace and UDP port */
static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
{
- struct vxlan_sock *vs;
-
- hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
- if (inet_sk(vs->sock->sk)->inet_sport == port)
- return vs;
- }
- return NULL;
+ return (struct vxlan_sock *)udp_tunnel_find_sock(net, port);
}
static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
@@ -636,7 +631,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
{
struct net_device *dev;
- struct sock *sk = vs->sock->sk;
+ struct sock *sk = vs->uts.sock->sk;
struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family;
__be16 port = inet_sk(sk)->inet_sport;
@@ -661,7 +656,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
{
struct net_device *dev;
- struct sock *sk = vs->sock->sk;
+ struct sock *sk = vs->uts.sock->sk;
struct net *net = sock_net(sk);
sa_family_t sa_family = sk->sk_family;
__be16 port = inet_sk(sk)->inet_sport;
@@ -1053,7 +1048,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
void vxlan_sock_release(struct vxlan_sock *vs)
{
- struct sock *sk = vs->sock->sk;
+ struct sock *sk = vs->uts.sock->sk;
struct net *net = sock_net(sk);
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -1062,7 +1057,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist);
- rcu_assign_sk_user_data(vs->sock->sk, NULL);
vxlan_notify_del_rx_port(vs);
spin_unlock(&vn->sock_lock);
@@ -1078,7 +1072,7 @@ static void vxlan_igmp_join(struct work_struct *work)
{
struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
struct vxlan_sock *vs = vxlan->vn_sock;
- struct sock *sk = vs->sock->sk;
+ struct sock *sk = vs->uts.sock->sk;
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
int ifindex = vxlan->default_dst.remote_ifindex;
@@ -1107,7 +1101,7 @@ static void vxlan_igmp_leave(struct work_struct *work)
{
struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
struct vxlan_sock *vs = vxlan->vn_sock;
- struct sock *sk = vs->sock->sk;
+ struct sock *sk = vs->uts.sock->sk;
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
int ifindex = vxlan->default_dst.remote_ifindex;
@@ -1338,7 +1332,6 @@ out:
}
#if IS_ENABLED(CONFIG_IPV6)
-
static struct sk_buff *vxlan_na_create(struct sk_buff *request,
struct neighbour *n, bool isrouter)
{
@@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
}
-static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
- bool udp_csum)
-{
- int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- return iptunnel_handle_offloads(skb, udp_csum, type);
-}
-
#if IS_ENABLED(CONFIG_IPV6)
static int vxlan6_xmit_skb(struct vxlan_sock *vs,
struct dst_entry *dst, struct sk_buff *skb,
@@ -1587,13 +1573,13 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
__be16 src_port, __be16 dst_port, __be32 vni,
bool xnet)
{
- struct ipv6hdr *ip6h;
struct vxlanhdr *vxh;
- struct udphdr *uh;
int min_headroom;
int err;
- skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
+ skb = udp_tunnel_handle_offloads(skb,
+ !udp_get_no_check6_tx(
+ vs->uts.sock->sk));
if (IS_ERR(skb))
return -EINVAL;
@@ -1621,38 +1607,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
-
- uh->len = htons(skb->len);
-
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
- IPSKB_REROUTED);
- skb_dst_set(skb, dst);
-
- udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
- saddr, daddr, skb->len);
-
- __skb_push(skb, sizeof(*ip6h));
- skb_reset_network_header(skb);
- ip6h = ipv6_hdr(skb);
- ip6h->version = 6;
- ip6h->priority = prio;
- ip6h->flow_lbl[0] = 0;
- ip6h->flow_lbl[1] = 0;
- ip6h->flow_lbl[2] = 0;
- ip6h->payload_len = htons(skb->len);
- ip6h->nexthdr = IPPROTO_UDP;
- ip6h->hop_limit = ttl;
- ip6h->daddr = *daddr;
- ip6h->saddr = *saddr;
-
- ip6tunnel_xmit(skb, dev);
+ udp_tunnel6_xmit_skb(vs->uts.sock, dst, skb, dev, saddr, daddr, prio,
+ ttl, src_port, dst_port);
return 0;
}
#endif
@@ -1663,11 +1619,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
{
struct vxlanhdr *vxh;
- struct udphdr *uh;
int min_headroom;
int err;
- skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
+ skb = udp_tunnel_handle_offloads(skb,
+ !vs->uts.sock->sk->sk_no_check_tx);
if (IS_ERR(skb))
return -EINVAL;
@@ -1693,20 +1649,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
-
- uh->len = htons(skb->len);
-
- udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
- src, dst, skb->len);
-
- return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
- tos, ttl, df, xnet);
+ return udp_tunnel_xmit_skb(vs->uts.sock, rt, skb, src, dst, tos,
+ ttl, df, src_port, dst_port, xnet);
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
@@ -1831,18 +1775,18 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
- fl4.saddr, dst->sin.sin_addr.s_addr,
- tos, ttl, df, src_port, dst_port,
- htonl(vni << 8),
- !net_eq(vxlan->net, dev_net(vxlan->dev)));
+ err = udp_tunnel_xmit_skb(vxlan->vn_sock->uts.sock, rt, skb,
+ fl4.saddr, dst->sin.sin_addr.s_addr,
+ tos, ttl, df, src_port, dst_port,
+ !net_eq(vxlan->net,
+ dev_net(vxlan->dev)));
if (err < 0)
goto rt_tx_error;
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- struct sock *sk = vxlan->vn_sock->sock->sk;
+ struct sock *sk = vxlan->vn_sock->uts.sock->sk;
struct dst_entry *ndst;
struct flowi6 fl6;
u32 flags;
@@ -2204,8 +2148,8 @@ void vxlan_get_rx_port(struct net_device *dev)
spin_lock(&vn->sock_lock);
for (i = 0; i < PORT_HASH_SIZE; ++i) {
hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
- port = inet_sk(vs->sock->sk)->inet_sport;
- sa_family = vs->sock->sk->sk_family;
+ port = inet_sk(vs->uts.sock->sk)->inet_sport;
+ sa_family = vs->uts.sock->sk->sk_family;
dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
port);
}
@@ -2335,79 +2279,60 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
static void vxlan_del_work(struct work_struct *work)
{
struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
-
- sk_release_kernel(vs->sock->sk);
+ udp_tunnel_sock_release(&vs->uts);
kfree_rcu(vs, rcu);
}
-static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
- __be16 port, u32 flags)
+/* Create new listen socket if needed */
+static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
+ vxlan_rcv_t rcv, void *data,
+ u32 flags)
{
- struct socket *sock;
- struct udp_port_cfg udp_conf;
- int err;
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct vxlan_sock *vs;
+ struct udp_tunnel_socket_cfg vxlan_ts_cfg;
+ bool ipv6 = !!(flags & VXLAN_F_IPV6);
+ unsigned int h;
- memset(&udp_conf, 0, sizeof(udp_conf));
+ memset(&vxlan_ts_cfg, 0, sizeof(struct udp_tunnel_socket_cfg));
if (ipv6) {
- udp_conf.family = AF_INET6;
- udp_conf.use_udp6_tx_checksums =
+ vxlan_ts_cfg.port.family = AF_INET6;
+ vxlan_ts_cfg.port.use_udp6_tx_checksums =
!!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
- udp_conf.use_udp6_rx_checksums =
+ vxlan_ts_cfg.port.use_udp6_rx_checksums =
!!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
} else {
- udp_conf.family = AF_INET;
- udp_conf.local_ip.s_addr = INADDR_ANY;
- udp_conf.use_udp_checksums =
+ vxlan_ts_cfg.port.family = AF_INET;
+ vxlan_ts_cfg.port.local_ip.s_addr = INADDR_ANY;
+ vxlan_ts_cfg.port.use_udp_checksums =
!!(flags & VXLAN_F_UDP_CSUM);
}
- udp_conf.local_udp_port = port;
+ vxlan_ts_cfg.port.local_udp_port = port;
+ vxlan_ts_cfg.encap_type = 1;
+ vxlan_ts_cfg.encap_rcv = vxlan_udp_encap_recv;
+ vxlan_ts_cfg.encap_destroy = NULL;
- /* Open UDP socket */
- err = udp_sock_create(net, &udp_conf, &sock);
- if (err < 0)
- return ERR_PTR(err);
-
- /* Disable multicast loopback */
- inet_sk(sock->sk)->mc_loop = 0;
-
- return sock;
-}
-
-/* Create new listen socket if needed */
-static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
- vxlan_rcv_t *rcv, void *data,
- u32 flags)
-{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- struct vxlan_sock *vs;
- struct socket *sock;
- struct sock *sk;
- unsigned int h;
- bool ipv6 = !!(flags & VXLAN_F_IPV6);
-
- vs = kzalloc(sizeof(*vs), GFP_KERNEL);
+ vs = (struct vxlan_sock *)create_udp_tunnel_sock(net, sizeof(*vs),
+ NULL,
+ &vxlan_ts_cfg);
if (!vs)
return ERR_PTR(-ENOMEM);
for (h = 0; h < VNI_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vs->vni_list[h]);
- INIT_WORK(&vs->del_work, vxlan_del_work);
+ spin_lock(&vn->sock_lock);
+ list_add(&vs->next, &vn->vxlan_list);
+ spin_unlock(&vn->sock_lock);
- sock = vxlan_create_sock(net, ipv6, port, flags);
- if (IS_ERR(sock)) {
- kfree(vs);
- return ERR_CAST(sock);
- }
+ INIT_WORK(&vs->del_work, vxlan_del_work);
- vs->sock = sock;
- sk = sock->sk;
atomic_set(&vs->refcnt, 1);
+
vs->rcv = rcv;
- vs->data = data;
- rcu_assign_sk_user_data(vs->sock->sk, vs);
+ vs->rcv_data = data;
/* Initialize the vxlan udp offloads structure */
vs->udp_offloads.port = port;
@@ -2419,24 +2344,13 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
vxlan_notify_add_rx_port(vs);
spin_unlock(&vn->sock_lock);
- /* Mark socket as an encapsulation socket. */
- udp_sk(sk)->encap_type = 1;
- udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
-#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6)
- ipv6_stub->udpv6_encap_enable();
- else
-#endif
- udp_encap_enable();
-
return vs;
}
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
- vxlan_rcv_t *rcv, void *data,
+ vxlan_rcv_t rcv, void *data,
bool no_share, u32 flags)
{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
vs = vxlan_socket_create(net, port, rcv, data, flags);
@@ -2446,7 +2360,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
if (no_share) /* Return error if sharing is not allowed. */
return vs;
- spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(net, port);
if (vs) {
if (vs->rcv == rcv)
@@ -2454,7 +2367,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
else
vs = ERR_PTR(-EBUSY);
}
- spin_unlock(&vn->sock_lock);
if (!vs)
vs = ERR_PTR(-EINVAL);
@@ -2634,7 +2546,6 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
}
list_add(&vxlan->next, &vn->vxlan_list);
-
return 0;
}
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index d5f59f3..10bfc13 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -4,23 +4,27 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/udp.h>
+#include <net/udp_tunnel.h>
#define VNI_HASH_BITS 10
#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
-/* per UDP socket information */
+typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
+ __be32 key);
+
+/* per vxlan socket information */
struct vxlan_sock {
+ struct udp_tunnel_sock uts; /* Must be the first member */
struct hlist_node hlist;
- vxlan_rcv_t *rcv;
- void *data;
+ struct list_head next;
struct work_struct del_work;
- struct socket *sock;
struct rcu_head rcu;
struct hlist_head vni_list[VNI_HASH_SIZE];
atomic_t refcnt;
+ vxlan_rcv_t rcv;
+ void *rcv_data;
struct udp_offload udp_offloads;
};
@@ -35,7 +39,7 @@ struct vxlan_sock {
#define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
- vxlan_rcv_t *rcv, void *data,
+ vxlan_rcv_t rcv, void *data,
bool no_share, u32 flags);
void vxlan_sock_release(struct vxlan_sock *vs);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index d8b7e24..7599efd 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
{
struct ovs_key_ipv4_tunnel tun_key;
- struct vport *vport = vs->data;
+ struct vport *vport = vs->rcv_data;
struct iphdr *iph;
__be64 key;
@@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
{
struct vxlan_port *vxlan_port = vxlan_vport(vport);
- __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
return -EMSGSIZE;
@@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct vxlan_port *vxlan_port = vxlan_vport(vport);
- __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
struct rtable *rt;
struct flowi4 fl;
__be16 src_port;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [net-next v3 3/3] l2tp: Refactor l2tp core driver to make use of the common UDP tunnel functions
2014-08-27 3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
2014-08-27 3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
@ 2014-08-27 3:35 ` Andy Zhou
2014-08-27 3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet
2014-08-27 4:12 ` Tom Herbert
3 siblings, 0 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27 3:35 UTC (permalink / raw)
To: davem; +Cc: netdev, Andy Zhou
Signed-off-by: Andy Zhou <azhou@nicira.com>
---
net/l2tp/l2tp_core.c | 189 +++++++++++++++++++++++++++++---------------------
1 file changed, 109 insertions(+), 80 deletions(-)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1109d3b..b4b9474 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -113,6 +113,11 @@ struct l2tp_net {
spinlock_t l2tp_session_hlist_lock;
};
+struct l2tp_udp_tunnel {
+ struct udp_tunnel_sock uts;
+ struct l2tp_tunnel tunnel;
+};
+
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
@@ -1334,14 +1339,47 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
if (sock)
inet_shutdown(sock, 2);
} else {
+ struct l2tp_udp_tunnel *udp_tunnel;
+
+ udp_tunnel = container_of(tunnel,
+ struct l2tp_udp_tunnel, tunnel);
+
if (sock)
- kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sk);
+ udp_tunnel_sock_release(&udp_tunnel->uts);
}
l2tp_tunnel_sock_put(sk);
}
+static void l2tp_tunnel_udp_port_conf(struct l2tp_tunnel_cfg *cfg,
+ struct udp_port_cfg *udp_conf)
+{
+ memset(udp_conf, 0, sizeof(*udp_conf));
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (cfg->local_ip6 && cfg->peer_ip6) {
+ udp_conf->family = AF_INET6;
+ memcpy(&udp_conf->local_ip6, cfg->local_ip6,
+ sizeof(udp_conf->local_ip6));
+ memcpy(&udp_conf->peer_ip6, cfg->peer_ip6,
+ sizeof(udp_conf->peer_ip6));
+ udp_conf->use_udp6_tx_checksums =
+ cfg->udp6_zero_tx_checksums;
+ udp_conf->use_udp6_rx_checksums =
+ cfg->udp6_zero_rx_checksums;
+ } else
+#endif
+ {
+ udp_conf->family = AF_INET;
+ udp_conf->local_ip = cfg->local_ip;
+ udp_conf->peer_ip = cfg->peer_ip;
+ udp_conf->use_udp_checksums = cfg->use_udp_checksums;
+ }
+
+ udp_conf->local_udp_port = htons(cfg->local_udp_port);
+ udp_conf->peer_udp_port = htons(cfg->peer_udp_port);
+}
+
/* Create a socket for the tunnel, if one isn't set up by
* userspace. This is used for static tunnels where there is no
* managing L2TP daemon.
@@ -1363,31 +1401,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
switch (cfg->encap) {
case L2TP_ENCAPTYPE_UDP:
- memset(&udp_conf, 0, sizeof(udp_conf));
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (cfg->local_ip6 && cfg->peer_ip6) {
- udp_conf.family = AF_INET6;
- memcpy(&udp_conf.local_ip6, cfg->local_ip6,
- sizeof(udp_conf.local_ip6));
- memcpy(&udp_conf.peer_ip6, cfg->peer_ip6,
- sizeof(udp_conf.peer_ip6));
- udp_conf.use_udp6_tx_checksums =
- cfg->udp6_zero_tx_checksums;
- udp_conf.use_udp6_rx_checksums =
- cfg->udp6_zero_rx_checksums;
- } else
-#endif
- {
- udp_conf.family = AF_INET;
- udp_conf.local_ip = cfg->local_ip;
- udp_conf.peer_ip = cfg->peer_ip;
- udp_conf.use_udp_checksums = cfg->use_udp_checksums;
- }
-
- udp_conf.local_udp_port = htons(cfg->local_udp_port);
- udp_conf.peer_udp_port = htons(cfg->peer_udp_port);
-
+ l2tp_tunnel_udp_port_conf(cfg, &udp_conf);
err = udp_sock_create(net, &udp_conf, &sock);
if (err < 0)
goto out;
@@ -1471,6 +1485,31 @@ out:
static struct lock_class_key l2tp_socket_class;
+static int l2tp_sk_sanity_check(struct sock *sk, enum l2tp_encap_type encap,
+ u32 tunnel_id, int fd)
+{
+ unsigned int expected_protocol;
+
+ switch (encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ expected_protocol = IPPROTO_UDP;
+ break;
+ case L2TP_ENCAPTYPE_IP:
+ expected_protocol = IPPROTO_L2TP;
+ break;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ if (sk->sk_protocol != expected_protocol) {
+ pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+ tunnel_id, fd, sk->sk_protocol, expected_protocol);
+ return -EPROTONOSUPPORT;
+ }
+
+ return 0;
+}
+
int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
{
struct l2tp_tunnel *tunnel = NULL;
@@ -1478,7 +1517,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
struct socket *sock = NULL;
struct sock *sk = NULL;
struct l2tp_net *pn;
- enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
+ enum l2tp_encap_type encap = cfg ? cfg->encap : L2TP_ENCAPTYPE_UDP;
/* Get the tunnel socket from the fd, which was opened by
* the userspace L2TP daemon. If not specified, create a
@@ -1486,9 +1525,11 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
*/
if (fd < 0) {
err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id,
- cfg, &sock);
+ cfg, &sock);
if (err < 0)
goto err;
+
+ sk = sock->sk;
} else {
sock = sockfd_lookup(fd, &err);
if (!sock) {
@@ -1498,58 +1539,66 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
goto err;
}
+ sk = sock->sk;
+
/* Reject namespace mismatches */
- if (!net_eq(sock_net(sock->sk), net)) {
+ if (!net_eq(sock_net(sk), net)) {
pr_err("tunl %u: netns mismatch\n", tunnel_id);
err = -EINVAL;
goto err;
}
+
+ /* Quick sanity checks */
+ err = l2tp_sk_sanity_check(sk, encap, tunnel_id, fd);
+ if (err)
+ goto err;
+
+ /* Check if this socket has already been prepped */
+ tunnel = l2tp_tunnel(sk);
+ if (tunnel != NULL) {
+ /* This socket has already been prepped */
+ err = -EBUSY;
+ goto err;
+ }
}
- sk = sock->sk;
+ switch(encap) {
+ case L2TP_ENCAPTYPE_UDP: {
+ struct udp_tunnel_socket_cfg l2tp_udp_cfg;
+ struct l2tp_udp_tunnel *udp_tunnel;
- if (cfg != NULL)
- encap = cfg->encap;
+ l2tp_tunnel_udp_port_conf(cfg, &l2tp_udp_cfg.port);
+ l2tp_udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
+ l2tp_udp_cfg.encap_rcv = l2tp_udp_encap_recv;
+ l2tp_udp_cfg.encap_destroy = l2tp_udp_encap_destroy;
- /* Quick sanity checks */
- switch (encap) {
- case L2TP_ENCAPTYPE_UDP:
- err = -EPROTONOSUPPORT;
- if (sk->sk_protocol != IPPROTO_UDP) {
- pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
- tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+ udp_tunnel = (struct l2tp_udp_tunnel *)
+ create_udp_tunnel_sock(net, sizeof(*udp_tunnel),
+ sock, &l2tp_udp_cfg);
+
+ if (!udp_tunnel) {
+ err = -ENOMEM;
goto err;
}
+
+ tunnel = &udp_tunnel->tunnel;
break;
+ }
case L2TP_ENCAPTYPE_IP:
- err = -EPROTONOSUPPORT;
- if (sk->sk_protocol != IPPROTO_L2TP) {
- pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
- tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
+ tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL);
+ if (tunnel == NULL) {
+ err = -ENOMEM;
goto err;
}
- break;
- }
-
- /* Check if this socket has already been prepped */
- tunnel = l2tp_tunnel(sk);
- if (tunnel != NULL) {
- /* This socket has already been prepped */
- err = -EBUSY;
- goto err;
}
- tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
- if (tunnel == NULL) {
- err = -ENOMEM;
- goto err;
- }
+ rcu_assign_sk_user_data(sk, tunnel);
+ tunnel->encap = encap;
tunnel->version = version;
tunnel->tunnel_id = tunnel_id;
tunnel->peer_tunnel_id = peer_tunnel_id;
- tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS;
-
+ tunnel->debug = cfg ? cfg->debug : L2TP_DEFAULT_DEBUG_FLAGS;
tunnel->magic = L2TP_TUNNEL_MAGIC;
sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
rwlock_init(&tunnel->hlist_lock);
@@ -1558,9 +1607,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
tunnel->l2tp_net = net;
pn = l2tp_pernet(net);
- if (cfg != NULL)
- tunnel->debug = cfg->debug;
-
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1579,23 +1625,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
}
#endif
- /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
- tunnel->encap = encap;
- if (encap == L2TP_ENCAPTYPE_UDP) {
- /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
- udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
- udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
- udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
- udpv6_encap_enable();
- else
-#endif
- udp_encap_enable();
- }
-
- sk->sk_user_data = tunnel;
-
/* Hook on the tunnel socket destructor so that we can cleanup
* if the tunnel socket goes away.
*/
--
1.7.9.5
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs
2014-08-27 3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
2014-08-27 3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
2014-08-27 3:35 ` [net-next v3 3/3] l2tp: Refactor l2tp core " Andy Zhou
@ 2014-08-27 3:54 ` Eric Dumazet
2014-08-27 5:01 ` Andy Zhou
2014-08-27 4:12 ` Tom Herbert
3 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2014-08-27 3:54 UTC (permalink / raw)
To: Andy Zhou; +Cc: davem, netdev
On Tue, 2014-08-26 at 20:35 -0700, Andy Zhou wrote:
> Added create_udp_tunnel_socket(), packet receive and transmit, and
> other related common functions for UDP tunnels.
>
> Per net open UDP tunnel ports are tracked in this common layer to
> prevent sharing of a single port with more than one UDP tunnel.
>
> Signed-off-by: Andy Zhou <azhou@nicira.com>
> ---
I see some RCU code, but I do not see elementary rules of RCU being
respected in this patch.
Changelogs are a bit terse, and there is not a single comment in this
code.
What is going on exactly ?
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs
2014-08-27 3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
` (2 preceding siblings ...)
2014-08-27 3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet
@ 2014-08-27 4:12 ` Tom Herbert
2014-08-27 5:17 ` Andy Zhou
3 siblings, 1 reply; 10+ messages in thread
From: Tom Herbert @ 2014-08-27 4:12 UTC (permalink / raw)
To: Andy Zhou; +Cc: David Miller, Linux Netdev List
On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote:
> Added create_udp_tunnel_socket(), packet receive and transmit, and
> other related common functions for UDP tunnels.
>
> Per net open UDP tunnel ports are tracked in this common layer to
> prevent sharing of a single port with more than one UDP tunnel.
>
This is not needed! If a UDP port is already bound (whether by another
tunnel or not), then bind during tunnel initialization will fail. All
this logic to store tunnel sockets in a separate list seems like
unnecessary complexity. If a driver needs to track multiple ports it
opens, it can do that on its own like VXLAN is already doing.
> Signed-off-by: Andy Zhou <azhou@nicira.com>
> ---
> include/net/udp_tunnel.h | 50 +++++++++++
> net/ipv4/udp_tunnel.c | 216 +++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 265 insertions(+), 1 deletion(-)
>
> diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
> index ffd69cb..5ff0746 100644
> --- a/include/net/udp_tunnel.h
> +++ b/include/net/udp_tunnel.h
> @@ -1,6 +1,8 @@
> #ifndef __NET_UDP_TUNNEL_H
> #define __NET_UDP_TUNNEL_H
>
> +#include <net/ip_tunnels.h>
> +
> struct udp_port_cfg {
> u8 family;
>
> @@ -26,7 +28,55 @@ struct udp_port_cfg {
> use_udp6_rx_checksums:1;
> };
>
> +struct udp_tunnel_sock;
> +
> +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
> +typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
> +
> +struct udp_tunnel_socket_cfg {
> + struct udp_port_cfg port;
> +
> + /* Used for setting up udp_sock fields, see udp.h for details */
> + __u8 encap_type;
> + udp_tunnel_encap_rcv_t encap_rcv;
> + udp_tunnel_encap_destroy_t encap_destroy;
> +};
> +
> +struct udp_tunnel_sock {
> + struct hlist_node hlist;
> + struct socket *sock;
> +};
> +
> int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
> struct socket **sockp);
>
> +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
> + struct socket *sock,
> + struct udp_tunnel_socket_cfg
> + *socket_cfg);
> +
> +struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port);
> +
> +int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
> + struct sk_buff *skb, __be32 src, __be32 dst,
> + __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
> + __be16 dst_port, bool xnet);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
> + struct sk_buff *skb, struct net_device *dev,
> + struct in6_addr *saddr, struct in6_addr *daddr,
> + __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port);
> +
> +#endif
> +
> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
> +
> +static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
> + bool udp_csum)
> +{
> + int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
> +
> + return iptunnel_handle_offloads(skb, udp_csum, type);
> +}
> #endif
> diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
> index 61ec1a6..3ed66bc 100644
> --- a/net/ipv4/udp_tunnel.c
> +++ b/net/ipv4/udp_tunnel.c
> @@ -7,6 +7,23 @@
> #include <net/udp.h>
> #include <net/udp_tunnel.h>
> #include <net/net_namespace.h>
> +#include <net/netns/generic.h>
> +#if IS_ENABLED(CONFIG_IPV6)
> +#include <net/ipv6.h>
> +#include <net/addrconf.h>
> +#include <net/ip6_tunnel.h>
> +#include <net/ip6_checksum.h>
> +#endif
> +
> +#define PORT_HASH_BITS 8
> +#define PORT_HASH_SIZE (1 << PORT_HASH_BITS)
> +
> +static int udp_tunnel_net_id;
> +
> +struct udp_tunnel_net {
> + struct hlist_head sock_list[PORT_HASH_SIZE];
> + spinlock_t sock_lock; /* Protecting the sock_list */
> +};
>
> int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
> struct socket **sockp)
> @@ -82,7 +99,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
> return -EPFNOSUPPORT;
> }
>
> -
> *sockp = sock;
>
> return 0;
> @@ -97,4 +113,202 @@ error:
> }
> EXPORT_SYMBOL(udp_sock_create);
>
> +
> +/* Socket hash table head */
> +static inline struct hlist_head *uts_head(struct net *net, const __be16 port)
> +{
> + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
> +
> + return &utn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
> +}
> +
> +struct udp_tunnel_sock *create_udp_tunnel_sock(struct net *net, size_t size,
> + struct socket *sock,
> + struct udp_tunnel_socket_cfg
> + *cfg)
> +{
> + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
> + struct udp_tunnel_sock *uts;
> + struct sock *sk;
> + const __be16 port = cfg->port.local_udp_port;
> + const int ipv6 = (cfg->port.family == AF_INET6);
> + int err;
> +
> + if (!sock)
> + err = udp_sock_create(net, &cfg->port, &sock);
> + else
> + err = (sock->sk->sk_protocol == IPPROTO_UDP) ?
> + 0 : -EPROTONOSUPPORT;
> +
> + if (err)
> + return NULL;
> +
> + uts = kzalloc(size, GFP_KERNEL);
> + if (!uts)
> + return ERR_PTR(-ENOMEM);
> +
> + sk = sock->sk;
> +
> + /* Disable multicast loopback */
> + inet_sk(sk)->mc_loop = 0;
> +
> + rcu_assign_sk_user_data(sk, uts);
> +
> + udp_sk(sk)->encap_type = cfg->encap_type;
> + udp_sk(sk)->encap_rcv = cfg->encap_rcv;
> + udp_sk(sk)->encap_destroy = cfg->encap_destroy;
> +
> + uts->sock = sock;
> +
> + spin_lock(&utn->sock_lock);
> + hlist_add_head_rcu(&uts->hlist, uts_head(net, port));
> + spin_unlock(&utn->sock_lock);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> + if (ipv6)
> + ipv6_stub->udpv6_encap_enable();
> + else
> +#endif
> + udp_encap_enable();
> +
> + return uts;
> +}
> +EXPORT_SYMBOL_GPL(create_udp_tunnel_sock);
> +
> +int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
> + struct sk_buff *skb, __be32 src, __be32 dst,
> + __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
> + __be16 dst_port, bool xnet)
> +{
> + struct udphdr *uh;
> +
> + __skb_push(skb, sizeof(*uh));
> + skb_reset_transport_header(skb);
> + uh = udp_hdr(skb);
> +
> + uh->dest = dst_port;
> + uh->source = src_port;
> + uh->len = htons(skb->len);
> +
> + udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
> +
> + return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
> + tos, ttl, df, xnet);
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
> + struct sk_buff *skb, struct net_device *dev,
> + struct in6_addr *saddr, struct in6_addr *daddr,
> + __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
> +{
> + struct udphdr *uh;
> + struct ipv6hdr *ip6h;
> +
> + __skb_push(skb, sizeof(*uh));
> + skb_reset_transport_header(skb);
> + uh = udp_hdr(skb);
> +
> + uh->dest = dst_port;
> + uh->source = src_port;
> +
> + uh->len = htons(skb->len);
> + uh->check = 0;
> +
> + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
> + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
> + | IPSKB_REROUTED);
> + skb_dst_set(skb, dst);
> +
> + if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
> + __wsum csum = skb_checksum(skb, 0, skb->len, 0);
> +
> + skb->ip_summed = CHECKSUM_UNNECESSARY;
> + uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
> + IPPROTO_UDP, csum);
> + if (uh->check == 0)
> + uh->check = CSUM_MANGLED_0;
> + } else {
> + skb->ip_summed = CHECKSUM_PARTIAL;
> + skb->csum_start = skb_transport_header(skb) - skb->head;
> + skb->csum_offset = offsetof(struct udphdr, check);
> + uh->check = ~csum_ipv6_magic(saddr, daddr,
> + skb->len, IPPROTO_UDP, 0);
> + }
> +
> + __skb_push(skb, sizeof(*ip6h));
> + skb_reset_network_header(skb);
> + ip6h = ipv6_hdr(skb);
> + ip6h->version = 6;
> + ip6h->priority = prio;
> + ip6h->flow_lbl[0] = 0;
> + ip6h->flow_lbl[1] = 0;
> + ip6h->flow_lbl[2] = 0;
> + ip6h->payload_len = htons(skb->len);
> + ip6h->nexthdr = IPPROTO_UDP;
> + ip6h->hop_limit = ttl;
> + ip6h->daddr = *daddr;
> + ip6h->saddr = *saddr;
> +
> + ip6tunnel_xmit(skb, dev);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
> +#endif
> +
> +struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port)
> +{
> + struct udp_tunnel_sock *uts;
> +
> + hlist_for_each_entry_rcu(uts, uts_head(net, port), hlist) {
> + if (inet_sk(uts->sock->sk)->inet_sport == port)
> + return uts;
> + }
> +
> + return NULL;
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_find_sock);
> +
> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
> +{
> + struct sock *sk = uts->sock->sk;
> + struct net *net = sock_net(sk);
> + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
> +
> + spin_lock(&utn->sock_lock);
> + hlist_del_rcu(&uts->hlist);
> + rcu_assign_sk_user_data(uts->sock->sk, NULL);
> + kernel_sock_shutdown(uts->sock, SHUT_RDWR);
> + sk_release_kernel(sk);
> + spin_unlock(&utn->sock_lock);
> +}
> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
> +
> +static int __net_init udp_tunnel_init_net(struct net *net)
> +{
> + struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
> + unsigned int h;
> +
> + spin_lock_init(&utn->sock_lock);
> +
> + for (h = 0; h < PORT_HASH_SIZE; h++)
> + INIT_HLIST_HEAD(&utn->sock_list[h]);
> +
> + return 0;
> +}
> +
> +static struct pernet_operations udp_tunnel_net_ops = {
> + .init = udp_tunnel_init_net,
> + .exit = NULL,
> + .id = &udp_tunnel_net_id,
> + .size = sizeof(struct udp_tunnel_net),
> +};
> +
> +static int __init udp_tunnel_init(void)
> +{
> + return register_pernet_subsys(&udp_tunnel_net_ops);
> +}
> +late_initcall(udp_tunnel_init);
> +
> MODULE_LICENSE("GPL");
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions.
2014-08-27 3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
@ 2014-08-27 4:15 ` Tom Herbert
[not found] ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>
1 sibling, 0 replies; 10+ messages in thread
From: Tom Herbert @ 2014-08-27 4:15 UTC (permalink / raw)
To: Andy Zhou; +Cc: David Miller, Linux Netdev List
On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote:
> Signed-off-by: Andy Zhou <azhou@nicira.com>
> ---
> drivers/net/vxlan.c | 199 ++++++++++++-----------------------------
> include/net/vxlan.h | 16 ++--
> net/openvswitch/vport-vxlan.c | 6 +-
> 3 files changed, 68 insertions(+), 153 deletions(-)
>
> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
> index beb377b..f1f1c48 100644
> --- a/drivers/net/vxlan.c
> +++ b/drivers/net/vxlan.c
> @@ -42,6 +42,7 @@
> #include <net/netns/generic.h>
> #include <net/vxlan.h>
> #include <net/protocol.h>
> +#include <net/udp_tunnel.h>
> #if IS_ENABLED(CONFIG_IPV6)
> #include <net/ipv6.h>
> #include <net/addrconf.h>
> @@ -277,13 +278,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
> /* Find VXLAN socket based on network namespace and UDP port */
> static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
> {
> - struct vxlan_sock *vs;
> -
> - hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
> - if (inet_sk(vs->sock->sk)->inet_sport == port)
> - return vs;
> - }
> - return NULL;
> + return (struct vxlan_sock *)udp_tunnel_find_sock(net, port);
It seems incorrect to assume that the socket returned is vxlan,
presumably this could be some other type of tunnel socket.
> }
>
> static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
> @@ -636,7 +631,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
> static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
> {
> struct net_device *dev;
> - struct sock *sk = vs->sock->sk;
> + struct sock *sk = vs->uts.sock->sk;
> struct net *net = sock_net(sk);
> sa_family_t sa_family = sk->sk_family;
> __be16 port = inet_sk(sk)->inet_sport;
> @@ -661,7 +656,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
> static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
> {
> struct net_device *dev;
> - struct sock *sk = vs->sock->sk;
> + struct sock *sk = vs->uts.sock->sk;
> struct net *net = sock_net(sk);
> sa_family_t sa_family = sk->sk_family;
> __be16 port = inet_sk(sk)->inet_sport;
> @@ -1053,7 +1048,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
>
> void vxlan_sock_release(struct vxlan_sock *vs)
> {
> - struct sock *sk = vs->sock->sk;
> + struct sock *sk = vs->uts.sock->sk;
> struct net *net = sock_net(sk);
> struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>
> @@ -1062,7 +1057,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
>
> spin_lock(&vn->sock_lock);
> hlist_del_rcu(&vs->hlist);
> - rcu_assign_sk_user_data(vs->sock->sk, NULL);
> vxlan_notify_del_rx_port(vs);
> spin_unlock(&vn->sock_lock);
>
> @@ -1078,7 +1072,7 @@ static void vxlan_igmp_join(struct work_struct *work)
> {
> struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
> struct vxlan_sock *vs = vxlan->vn_sock;
> - struct sock *sk = vs->sock->sk;
> + struct sock *sk = vs->uts.sock->sk;
> union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
> int ifindex = vxlan->default_dst.remote_ifindex;
>
> @@ -1107,7 +1101,7 @@ static void vxlan_igmp_leave(struct work_struct *work)
> {
> struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
> struct vxlan_sock *vs = vxlan->vn_sock;
> - struct sock *sk = vs->sock->sk;
> + struct sock *sk = vs->uts.sock->sk;
> union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
> int ifindex = vxlan->default_dst.remote_ifindex;
>
> @@ -1338,7 +1332,6 @@ out:
> }
>
> #if IS_ENABLED(CONFIG_IPV6)
> -
> static struct sk_buff *vxlan_na_create(struct sk_buff *request,
> struct neighbour *n, bool isrouter)
> {
> @@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
> return false;
> }
>
> -static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
> - bool udp_csum)
> -{
> - int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
> - return iptunnel_handle_offloads(skb, udp_csum, type);
> -}
> -
> #if IS_ENABLED(CONFIG_IPV6)
> static int vxlan6_xmit_skb(struct vxlan_sock *vs,
> struct dst_entry *dst, struct sk_buff *skb,
> @@ -1587,13 +1573,13 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
> __be16 src_port, __be16 dst_port, __be32 vni,
> bool xnet)
> {
> - struct ipv6hdr *ip6h;
> struct vxlanhdr *vxh;
> - struct udphdr *uh;
> int min_headroom;
> int err;
>
> - skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
> + skb = udp_tunnel_handle_offloads(skb,
> + !udp_get_no_check6_tx(
> + vs->uts.sock->sk));
> if (IS_ERR(skb))
> return -EINVAL;
>
> @@ -1621,38 +1607,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
> vxh->vx_flags = htonl(VXLAN_FLAGS);
> vxh->vx_vni = vni;
>
> - __skb_push(skb, sizeof(*uh));
> - skb_reset_transport_header(skb);
> - uh = udp_hdr(skb);
> -
> - uh->dest = dst_port;
> - uh->source = src_port;
> -
> - uh->len = htons(skb->len);
> -
> - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
> - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
> - IPSKB_REROUTED);
> - skb_dst_set(skb, dst);
> -
> - udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
> - saddr, daddr, skb->len);
> -
> - __skb_push(skb, sizeof(*ip6h));
> - skb_reset_network_header(skb);
> - ip6h = ipv6_hdr(skb);
> - ip6h->version = 6;
> - ip6h->priority = prio;
> - ip6h->flow_lbl[0] = 0;
> - ip6h->flow_lbl[1] = 0;
> - ip6h->flow_lbl[2] = 0;
> - ip6h->payload_len = htons(skb->len);
> - ip6h->nexthdr = IPPROTO_UDP;
> - ip6h->hop_limit = ttl;
> - ip6h->daddr = *daddr;
> - ip6h->saddr = *saddr;
> -
> - ip6tunnel_xmit(skb, dev);
> + udp_tunnel6_xmit_skb(vs->uts.sock, dst, skb, dev, saddr, daddr, prio,
> + ttl, src_port, dst_port);
> return 0;
> }
> #endif
> @@ -1663,11 +1619,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
> __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
> {
> struct vxlanhdr *vxh;
> - struct udphdr *uh;
> int min_headroom;
> int err;
>
> - skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
> + skb = udp_tunnel_handle_offloads(skb,
> + !vs->uts.sock->sk->sk_no_check_tx);
> if (IS_ERR(skb))
> return -EINVAL;
>
> @@ -1693,20 +1649,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
> vxh->vx_flags = htonl(VXLAN_FLAGS);
> vxh->vx_vni = vni;
>
> - __skb_push(skb, sizeof(*uh));
> - skb_reset_transport_header(skb);
> - uh = udp_hdr(skb);
> -
> - uh->dest = dst_port;
> - uh->source = src_port;
> -
> - uh->len = htons(skb->len);
> -
> - udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
> - src, dst, skb->len);
> -
> - return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
> - tos, ttl, df, xnet);
> + return udp_tunnel_xmit_skb(vs->uts.sock, rt, skb, src, dst, tos,
> + ttl, df, src_port, dst_port, xnet);
> }
> EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
>
> @@ -1831,18 +1775,18 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
> tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
> ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
>
> - err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
> - fl4.saddr, dst->sin.sin_addr.s_addr,
> - tos, ttl, df, src_port, dst_port,
> - htonl(vni << 8),
> - !net_eq(vxlan->net, dev_net(vxlan->dev)));
> + err = udp_tunnel_xmit_skb(vxlan->vn_sock->uts.sock, rt, skb,
> + fl4.saddr, dst->sin.sin_addr.s_addr,
> + tos, ttl, df, src_port, dst_port,
> + !net_eq(vxlan->net,
> + dev_net(vxlan->dev)));
>
> if (err < 0)
> goto rt_tx_error;
> iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
> #if IS_ENABLED(CONFIG_IPV6)
> } else {
> - struct sock *sk = vxlan->vn_sock->sock->sk;
> + struct sock *sk = vxlan->vn_sock->uts.sock->sk;
> struct dst_entry *ndst;
> struct flowi6 fl6;
> u32 flags;
> @@ -2204,8 +2148,8 @@ void vxlan_get_rx_port(struct net_device *dev)
> spin_lock(&vn->sock_lock);
> for (i = 0; i < PORT_HASH_SIZE; ++i) {
> hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
> - port = inet_sk(vs->sock->sk)->inet_sport;
> - sa_family = vs->sock->sk->sk_family;
> + port = inet_sk(vs->uts.sock->sk)->inet_sport;
> + sa_family = vs->uts.sock->sk->sk_family;
> dev->netdev_ops->ndo_add_vxlan_port(dev, sa_family,
> port);
> }
> @@ -2335,79 +2279,60 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
> static void vxlan_del_work(struct work_struct *work)
> {
> struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
> -
> - sk_release_kernel(vs->sock->sk);
> + udp_tunnel_sock_release(&vs->uts);
> kfree_rcu(vs, rcu);
> }
>
> -static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
> - __be16 port, u32 flags)
> +/* Create new listen socket if needed */
> +static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
> + vxlan_rcv_t rcv, void *data,
> + u32 flags)
> {
> - struct socket *sock;
> - struct udp_port_cfg udp_conf;
> - int err;
> + struct vxlan_net *vn = net_generic(net, vxlan_net_id);
> + struct vxlan_sock *vs;
> + struct udp_tunnel_socket_cfg vxlan_ts_cfg;
> + bool ipv6 = !!(flags & VXLAN_F_IPV6);
> + unsigned int h;
>
> - memset(&udp_conf, 0, sizeof(udp_conf));
> + memset(&vxlan_ts_cfg, 0, sizeof(struct udp_tunnel_socket_cfg));
>
> if (ipv6) {
> - udp_conf.family = AF_INET6;
> - udp_conf.use_udp6_tx_checksums =
> + vxlan_ts_cfg.port.family = AF_INET6;
> + vxlan_ts_cfg.port.use_udp6_tx_checksums =
> !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
> - udp_conf.use_udp6_rx_checksums =
> + vxlan_ts_cfg.port.use_udp6_rx_checksums =
> !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
> } else {
> - udp_conf.family = AF_INET;
> - udp_conf.local_ip.s_addr = INADDR_ANY;
> - udp_conf.use_udp_checksums =
> + vxlan_ts_cfg.port.family = AF_INET;
> + vxlan_ts_cfg.port.local_ip.s_addr = INADDR_ANY;
> + vxlan_ts_cfg.port.use_udp_checksums =
> !!(flags & VXLAN_F_UDP_CSUM);
> }
>
> - udp_conf.local_udp_port = port;
> + vxlan_ts_cfg.port.local_udp_port = port;
> + vxlan_ts_cfg.encap_type = 1;
> + vxlan_ts_cfg.encap_rcv = vxlan_udp_encap_recv;
> + vxlan_ts_cfg.encap_destroy = NULL;
>
> - /* Open UDP socket */
> - err = udp_sock_create(net, &udp_conf, &sock);
> - if (err < 0)
> - return ERR_PTR(err);
> -
> - /* Disable multicast loopback */
> - inet_sk(sock->sk)->mc_loop = 0;
> -
> - return sock;
> -}
> -
> -/* Create new listen socket if needed */
> -static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
> - vxlan_rcv_t *rcv, void *data,
> - u32 flags)
> -{
> - struct vxlan_net *vn = net_generic(net, vxlan_net_id);
> - struct vxlan_sock *vs;
> - struct socket *sock;
> - struct sock *sk;
> - unsigned int h;
> - bool ipv6 = !!(flags & VXLAN_F_IPV6);
> -
> - vs = kzalloc(sizeof(*vs), GFP_KERNEL);
> + vs = (struct vxlan_sock *)create_udp_tunnel_sock(net, sizeof(*vs),
> + NULL,
> + &vxlan_ts_cfg);
> if (!vs)
> return ERR_PTR(-ENOMEM);
>
> for (h = 0; h < VNI_HASH_SIZE; ++h)
> INIT_HLIST_HEAD(&vs->vni_list[h]);
>
> - INIT_WORK(&vs->del_work, vxlan_del_work);
> + spin_lock(&vn->sock_lock);
> + list_add(&vs->next, &vn->vxlan_list);
> + spin_unlock(&vn->sock_lock);
>
> - sock = vxlan_create_sock(net, ipv6, port, flags);
> - if (IS_ERR(sock)) {
> - kfree(vs);
> - return ERR_CAST(sock);
> - }
> + INIT_WORK(&vs->del_work, vxlan_del_work);
>
> - vs->sock = sock;
> - sk = sock->sk;
> atomic_set(&vs->refcnt, 1);
> +
> vs->rcv = rcv;
> - vs->data = data;
> - rcu_assign_sk_user_data(vs->sock->sk, vs);
> + vs->rcv_data = data;
>
> /* Initialize the vxlan udp offloads structure */
> vs->udp_offloads.port = port;
> @@ -2419,24 +2344,13 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
> vxlan_notify_add_rx_port(vs);
> spin_unlock(&vn->sock_lock);
>
> - /* Mark socket as an encapsulation socket. */
> - udp_sk(sk)->encap_type = 1;
> - udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
> -#if IS_ENABLED(CONFIG_IPV6)
> - if (ipv6)
> - ipv6_stub->udpv6_encap_enable();
> - else
> -#endif
> - udp_encap_enable();
> -
> return vs;
> }
>
> struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
> - vxlan_rcv_t *rcv, void *data,
> + vxlan_rcv_t rcv, void *data,
> bool no_share, u32 flags)
> {
> - struct vxlan_net *vn = net_generic(net, vxlan_net_id);
> struct vxlan_sock *vs;
>
> vs = vxlan_socket_create(net, port, rcv, data, flags);
> @@ -2446,7 +2360,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
> if (no_share) /* Return error if sharing is not allowed. */
> return vs;
>
> - spin_lock(&vn->sock_lock);
> vs = vxlan_find_sock(net, port);
> if (vs) {
> if (vs->rcv == rcv)
> @@ -2454,7 +2367,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
> else
> vs = ERR_PTR(-EBUSY);
> }
> - spin_unlock(&vn->sock_lock);
>
> if (!vs)
> vs = ERR_PTR(-EINVAL);
> @@ -2634,7 +2546,6 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
> }
>
> list_add(&vxlan->next, &vn->vxlan_list);
> -
> return 0;
> }
>
> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
> index d5f59f3..10bfc13 100644
> --- a/include/net/vxlan.h
> +++ b/include/net/vxlan.h
> @@ -4,23 +4,27 @@
> #include <linux/skbuff.h>
> #include <linux/netdevice.h>
> #include <linux/udp.h>
> +#include <net/udp_tunnel.h>
>
> #define VNI_HASH_BITS 10
> #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
>
> struct vxlan_sock;
> -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
>
> -/* per UDP socket information */
> +typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
> + __be32 key);
> +
> +/* per vxlan socket information */
> struct vxlan_sock {
> + struct udp_tunnel_sock uts; /* Must be the first member */
> struct hlist_node hlist;
> - vxlan_rcv_t *rcv;
> - void *data;
> + struct list_head next;
> struct work_struct del_work;
> - struct socket *sock;
> struct rcu_head rcu;
> struct hlist_head vni_list[VNI_HASH_SIZE];
> atomic_t refcnt;
> + vxlan_rcv_t rcv;
> + void *rcv_data;
> struct udp_offload udp_offloads;
> };
>
> @@ -35,7 +39,7 @@ struct vxlan_sock {
> #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100
>
> struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
> - vxlan_rcv_t *rcv, void *data,
> + vxlan_rcv_t rcv, void *data,
> bool no_share, u32 flags);
>
> void vxlan_sock_release(struct vxlan_sock *vs);
> diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
> index d8b7e24..7599efd 100644
> --- a/net/openvswitch/vport-vxlan.c
> +++ b/net/openvswitch/vport-vxlan.c
> @@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
> static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
> {
> struct ovs_key_ipv4_tunnel tun_key;
> - struct vport *vport = vs->data;
> + struct vport *vport = vs->rcv_data;
> struct iphdr *iph;
> __be64 key;
>
> @@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
> static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
> {
> struct vxlan_port *vxlan_port = vxlan_vport(vport);
> - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
> + __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
>
> if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
> return -EMSGSIZE;
> @@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
> {
> struct net *net = ovs_dp_get_net(vport->dp);
> struct vxlan_port *vxlan_port = vxlan_vport(vport);
> - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
> + __be16 dst_port = inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
> struct rtable *rt;
> struct flowi4 fl;
> __be16 src_port;
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs
2014-08-27 3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet
@ 2014-08-27 5:01 ` Andy Zhou
2014-08-27 11:45 ` Eric Dumazet
0 siblings, 1 reply; 10+ messages in thread
From: Andy Zhou @ 2014-08-27 5:01 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, netdev
On Tue, Aug 26, 2014 at 8:54 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2014-08-26 at 20:35 -0700, Andy Zhou wrote:
>> Added create_udp_tunnel_socket(), packet receive and transmit, and
>> other related common functions for UDP tunnels.
>>
>> Per net open UDP tunnel ports are tracked in this common layer to
>> prevent sharing of a single port with more than one UDP tunnel.
>>
>> Signed-off-by: Andy Zhou <azhou@nicira.com>
>> ---
>
> I see some RCU code, but I do not see elementary rules of RCU being
> respected in this patch.
Which RCU rule are you referring to?
>
> Changelogs are a bit terse, and there is not a single comment in this
> code.
I can add more content and comments in the next patch.
> What is going on exactly ?
>
I am trying to refactor some UDP tunnel code so that we don't end up
duplicate a lot of code when adding new UDP based tunnel protocols.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs
2014-08-27 4:12 ` Tom Herbert
@ 2014-08-27 5:17 ` Andy Zhou
0 siblings, 0 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27 5:17 UTC (permalink / raw)
To: Tom Herbert; +Cc: David Miller, Linux Netdev List
On Tue, Aug 26, 2014 at 9:12 PM, Tom Herbert <therbert@google.com> wrote:
> On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote:
>> Added create_udp_tunnel_socket(), packet receive and transmit, and
>> other related common functions for UDP tunnels.
>>
>> Per net open UDP tunnel ports are tracked in this common layer to
>> prevent sharing of a single port with more than one UDP tunnel.
>>
> This is not needed! If a UDP port is already bound (whether by another
> tunnel or not), then bind during tunnel initialization will fail. All
> this logic to store tunnel sockets in a separate list seems like
> unnecessary complexity. If a driver needs to track multiple ports it
> opens, it can do that on its own like VXLAN is already doing.
I was hoping to retain the udp_tunnel_find_sock() function which seems
to be common.
But it is not doing much at the moment, so I can just drop it in the
next version.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions.
[not found] ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>
@ 2014-08-27 5:19 ` Andy Zhou
0 siblings, 0 replies; 10+ messages in thread
From: Andy Zhou @ 2014-08-27 5:19 UTC (permalink / raw)
To: Tom Herbert; +Cc: David Miller, Linux Netdev List
On Tue, Aug 26, 2014 at 9:14 PM, Tom Herbert <therbert@google.com> wrote:
>
>
> On Tue, Aug 26, 2014 at 8:35 PM, Andy Zhou <azhou@nicira.com> wrote:
>> Signed-off-by: Andy Zhou <azhou@nicira.com>
>> ---
>> drivers/net/vxlan.c | 199
>> ++++++++++++-----------------------------
>> include/net/vxlan.h | 16 ++--
>> net/openvswitch/vport-vxlan.c | 6 +-
>> 3 files changed, 68 insertions(+), 153 deletions(-)
>>
>> diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
>> index beb377b..f1f1c48 100644
>> --- a/drivers/net/vxlan.c
>> +++ b/drivers/net/vxlan.c
>> @@ -42,6 +42,7 @@
>> #include <net/netns/generic.h>
>> #include <net/vxlan.h>
>> #include <net/protocol.h>
>> +#include <net/udp_tunnel.h>
>> #if IS_ENABLED(CONFIG_IPV6)
>> #include <net/ipv6.h>
>> #include <net/addrconf.h>
>> @@ -277,13 +278,7 @@ static inline struct vxlan_rdst
>> *first_remote_rtnl(struct vxlan_fdb *fdb)
>> /* Find VXLAN socket based on network namespace and UDP port */
>> static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
>> {
>> - struct vxlan_sock *vs;
>> -
>> - hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
>> - if (inet_sk(vs->sock->sk)->inet_sport == port)
>> - return vs;
>> - }
>> - return NULL;
>> + return (struct vxlan_sock *)udp_tunnel_find_sock(net, port);
>
> It seems incorrect to assume that the socket returned is vxlan, presumably
> this could be some other type of tunnel socket.
>
If I drop udp_tunnel_find_sock API from last patch, then this should
not be an issue any more right?
vxlan driver will just keep track of its own open sock.
>> }
>>
>> static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
>> @@ -636,7 +631,7 @@ static int vxlan_gro_complete(struct sk_buff *skb, int
>> nhoff)
>> static void vxlan_notify_add_rx_port(struct vxlan_sock *vs)
>> {
>> struct net_device *dev;
>> - struct sock *sk = vs->sock->sk;
>> + struct sock *sk = vs->uts.sock->sk;
>> struct net *net = sock_net(sk);
>> sa_family_t sa_family = sk->sk_family;
>> __be16 port = inet_sk(sk)->inet_sport;
>> @@ -661,7 +656,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock
>> *vs)
>> static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
>> {
>> struct net_device *dev;
>> - struct sock *sk = vs->sock->sk;
>> + struct sock *sk = vs->uts.sock->sk;
>> struct net *net = sock_net(sk);
>> sa_family_t sa_family = sk->sk_family;
>> __be16 port = inet_sk(sk)->inet_sport;
>> @@ -1053,7 +1048,7 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
>>
>> void vxlan_sock_release(struct vxlan_sock *vs)
>> {
>> - struct sock *sk = vs->sock->sk;
>> + struct sock *sk = vs->uts.sock->sk;
>> struct net *net = sock_net(sk);
>> struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>>
>> @@ -1062,7 +1057,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
>>
>> spin_lock(&vn->sock_lock);
>> hlist_del_rcu(&vs->hlist);
>> - rcu_assign_sk_user_data(vs->sock->sk, NULL);
>> vxlan_notify_del_rx_port(vs);
>> spin_unlock(&vn->sock_lock);
>>
>> @@ -1078,7 +1072,7 @@ static void vxlan_igmp_join(struct work_struct
>> *work)
>> {
>> struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev,
>> igmp_join);
>> struct vxlan_sock *vs = vxlan->vn_sock;
>> - struct sock *sk = vs->sock->sk;
>> + struct sock *sk = vs->uts.sock->sk;
>> union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
>> int ifindex = vxlan->default_dst.remote_ifindex;
>>
>> @@ -1107,7 +1101,7 @@ static void vxlan_igmp_leave(struct work_struct
>> *work)
>> {
>> struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev,
>> igmp_leave);
>> struct vxlan_sock *vs = vxlan->vn_sock;
>> - struct sock *sk = vs->sock->sk;
>> + struct sock *sk = vs->uts.sock->sk;
>> union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
>> int ifindex = vxlan->default_dst.remote_ifindex;
>>
>> @@ -1338,7 +1332,6 @@ out:
>> }
>>
>> #if IS_ENABLED(CONFIG_IPV6)
>> -
>> static struct sk_buff *vxlan_na_create(struct sk_buff *request,
>> struct neighbour *n, bool isrouter)
>> {
>> @@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device
>> *dev, struct sk_buff *skb)
>> return false;
>> }
>>
>> -static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
>> - bool udp_csum)
>> -{
>> - int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM :
>> SKB_GSO_UDP_TUNNEL;
>> - return iptunnel_handle_offloads(skb, udp_csum, type);
>> -}
>> -
>> #if IS_ENABLED(CONFIG_IPV6)
>> static int vxlan6_xmit_skb(struct vxlan_sock *vs,
>> struct dst_entry *dst, struct sk_buff *skb,
>> @@ -1587,13 +1573,13 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
>> __be16 src_port, __be16 dst_port, __be32 vni,
>> bool xnet)
>> {
>> - struct ipv6hdr *ip6h;
>> struct vxlanhdr *vxh;
>> - struct udphdr *uh;
>> int min_headroom;
>> int err;
>>
>> - skb = vxlan_handle_offloads(skb,
>> !udp_get_no_check6_tx(vs->sock->sk));
>> + skb = udp_tunnel_handle_offloads(skb,
>> + !udp_get_no_check6_tx(
>> + vs->uts.sock->sk));
>> if (IS_ERR(skb))
>> return -EINVAL;
>>
>> @@ -1621,38 +1607,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
>> vxh->vx_flags = htonl(VXLAN_FLAGS);
>> vxh->vx_vni = vni;
>>
>> - __skb_push(skb, sizeof(*uh));
>> - skb_reset_transport_header(skb);
>> - uh = udp_hdr(skb);
>> -
>> - uh->dest = dst_port;
>> - uh->source = src_port;
>> -
>> - uh->len = htons(skb->len);
>> -
>> - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
>> - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE |
>> IPSKB_XFRM_TRANSFORMED |
>> - IPSKB_REROUTED);
>> - skb_dst_set(skb, dst);
>> -
>> - udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
>> - saddr, daddr, skb->len);
>> -
>> - __skb_push(skb, sizeof(*ip6h));
>> - skb_reset_network_header(skb);
>> - ip6h = ipv6_hdr(skb);
>> - ip6h->version = 6;
>> - ip6h->priority = prio;
>> - ip6h->flow_lbl[0] = 0;
>> - ip6h->flow_lbl[1] = 0;
>> - ip6h->flow_lbl[2] = 0;
>> - ip6h->payload_len = htons(skb->len);
>> - ip6h->nexthdr = IPPROTO_UDP;
>> - ip6h->hop_limit = ttl;
>> - ip6h->daddr = *daddr;
>> - ip6h->saddr = *saddr;
>> -
>> - ip6tunnel_xmit(skb, dev);
>> + udp_tunnel6_xmit_skb(vs->uts.sock, dst, skb, dev, saddr, daddr,
>> prio,
>> + ttl, src_port, dst_port);
>> return 0;
>> }
>> #endif
>> @@ -1663,11 +1619,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
>> __be16 src_port, __be16 dst_port, __be32 vni, bool
>> xnet)
>> {
>> struct vxlanhdr *vxh;
>> - struct udphdr *uh;
>> int min_headroom;
>> int err;
>>
>> - skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
>> + skb = udp_tunnel_handle_offloads(skb,
>> +
>> !vs->uts.sock->sk->sk_no_check_tx);
>> if (IS_ERR(skb))
>> return -EINVAL;
>>
>> @@ -1693,20 +1649,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
>> vxh->vx_flags = htonl(VXLAN_FLAGS);
>> vxh->vx_vni = vni;
>>
>> - __skb_push(skb, sizeof(*uh));
>> - skb_reset_transport_header(skb);
>> - uh = udp_hdr(skb);
>> -
>> - uh->dest = dst_port;
>> - uh->source = src_port;
>> -
>> - uh->len = htons(skb->len);
>> -
>> - udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
>> - src, dst, skb->len);
>> -
>> - return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
>> - tos, ttl, df, xnet);
>> + return udp_tunnel_xmit_skb(vs->uts.sock, rt, skb, src, dst, tos,
>> + ttl, df, src_port, dst_port, xnet);
>> }
>> EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
>>
>> @@ -1831,18 +1775,18 @@ static void vxlan_xmit_one(struct sk_buff *skb,
>> struct net_device *dev,
>> tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
>> ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
>>
>> - err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
>> - fl4.saddr, dst->sin.sin_addr.s_addr,
>> - tos, ttl, df, src_port, dst_port,
>> - htonl(vni << 8),
>> - !net_eq(vxlan->net,
>> dev_net(vxlan->dev)));
>> + err = udp_tunnel_xmit_skb(vxlan->vn_sock->uts.sock, rt,
>> skb,
>> + fl4.saddr,
>> dst->sin.sin_addr.s_addr,
>> + tos, ttl, df, src_port,
>> dst_port,
>> + !net_eq(vxlan->net,
>> + dev_net(vxlan->dev)));
>>
>> if (err < 0)
>> goto rt_tx_error;
>> iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
>> #if IS_ENABLED(CONFIG_IPV6)
>> } else {
>> - struct sock *sk = vxlan->vn_sock->sock->sk;
>> + struct sock *sk = vxlan->vn_sock->uts.sock->sk;
>> struct dst_entry *ndst;
>> struct flowi6 fl6;
>> u32 flags;
>> @@ -2204,8 +2148,8 @@ void vxlan_get_rx_port(struct net_device *dev)
>> spin_lock(&vn->sock_lock);
>> for (i = 0; i < PORT_HASH_SIZE; ++i) {
>> hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
>> - port = inet_sk(vs->sock->sk)->inet_sport;
>> - sa_family = vs->sock->sk->sk_family;
>> + port = inet_sk(vs->uts.sock->sk)->inet_sport;
>> + sa_family = vs->uts.sock->sk->sk_family;
>> dev->netdev_ops->ndo_add_vxlan_port(dev,
>> sa_family,
>> port);
>> }
>> @@ -2335,79 +2279,60 @@ static const struct ethtool_ops vxlan_ethtool_ops
>> = {
>> static void vxlan_del_work(struct work_struct *work)
>> {
>> struct vxlan_sock *vs = container_of(work, struct vxlan_sock,
>> del_work);
>> -
>> - sk_release_kernel(vs->sock->sk);
>> + udp_tunnel_sock_release(&vs->uts);
>> kfree_rcu(vs, rcu);
>> }
>>
>> -static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
>> - __be16 port, u32 flags)
>> +/* Create new listen socket if needed */
>> +static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16
>> port,
>> + vxlan_rcv_t rcv, void *data,
>> + u32 flags)
>> {
>> - struct socket *sock;
>> - struct udp_port_cfg udp_conf;
>> - int err;
>> + struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>> + struct vxlan_sock *vs;
>> + struct udp_tunnel_socket_cfg vxlan_ts_cfg;
>> + bool ipv6 = !!(flags & VXLAN_F_IPV6);
>> + unsigned int h;
>>
>> - memset(&udp_conf, 0, sizeof(udp_conf));
>> + memset(&vxlan_ts_cfg, 0, sizeof(struct udp_tunnel_socket_cfg));
>>
>> if (ipv6) {
>> - udp_conf.family = AF_INET6;
>> - udp_conf.use_udp6_tx_checksums =
>> + vxlan_ts_cfg.port.family = AF_INET6;
>> + vxlan_ts_cfg.port.use_udp6_tx_checksums =
>> !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
>> - udp_conf.use_udp6_rx_checksums =
>> + vxlan_ts_cfg.port.use_udp6_rx_checksums =
>> !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
>> } else {
>> - udp_conf.family = AF_INET;
>> - udp_conf.local_ip.s_addr = INADDR_ANY;
>> - udp_conf.use_udp_checksums =
>> + vxlan_ts_cfg.port.family = AF_INET;
>> + vxlan_ts_cfg.port.local_ip.s_addr = INADDR_ANY;
>> + vxlan_ts_cfg.port.use_udp_checksums =
>> !!(flags & VXLAN_F_UDP_CSUM);
>> }
>>
>> - udp_conf.local_udp_port = port;
>> + vxlan_ts_cfg.port.local_udp_port = port;
>> + vxlan_ts_cfg.encap_type = 1;
>> + vxlan_ts_cfg.encap_rcv = vxlan_udp_encap_recv;
>> + vxlan_ts_cfg.encap_destroy = NULL;
>>
>> - /* Open UDP socket */
>> - err = udp_sock_create(net, &udp_conf, &sock);
>> - if (err < 0)
>> - return ERR_PTR(err);
>> -
>> - /* Disable multicast loopback */
>> - inet_sk(sock->sk)->mc_loop = 0;
>> -
>> - return sock;
>> -}
>> -
>> -/* Create new listen socket if needed */
>> -static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16
>> port,
>> - vxlan_rcv_t *rcv, void
>> *data,
>> - u32 flags)
>> -{
>> - struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>> - struct vxlan_sock *vs;
>> - struct socket *sock;
>> - struct sock *sk;
>> - unsigned int h;
>> - bool ipv6 = !!(flags & VXLAN_F_IPV6);
>> -
>> - vs = kzalloc(sizeof(*vs), GFP_KERNEL);
>> + vs = (struct vxlan_sock *)create_udp_tunnel_sock(net, sizeof(*vs),
>> + NULL,
>> + &vxlan_ts_cfg);
>> if (!vs)
>> return ERR_PTR(-ENOMEM);
>>
>> for (h = 0; h < VNI_HASH_SIZE; ++h)
>> INIT_HLIST_HEAD(&vs->vni_list[h]);
>>
>> - INIT_WORK(&vs->del_work, vxlan_del_work);
>> + spin_lock(&vn->sock_lock);
>> + list_add(&vs->next, &vn->vxlan_list);
>> + spin_unlock(&vn->sock_lock);
>>
>> - sock = vxlan_create_sock(net, ipv6, port, flags);
>> - if (IS_ERR(sock)) {
>> - kfree(vs);
>> - return ERR_CAST(sock);
>> - }
>> + INIT_WORK(&vs->del_work, vxlan_del_work);
>>
>> - vs->sock = sock;
>> - sk = sock->sk;
>> atomic_set(&vs->refcnt, 1);
>> +
>> vs->rcv = rcv;
>> - vs->data = data;
>> - rcu_assign_sk_user_data(vs->sock->sk, vs);
>> + vs->rcv_data = data;
>>
>> /* Initialize the vxlan udp offloads structure */
>> vs->udp_offloads.port = port;
>> @@ -2419,24 +2344,13 @@ static struct vxlan_sock
>> *vxlan_socket_create(struct net *net, __be16 port,
>> vxlan_notify_add_rx_port(vs);
>> spin_unlock(&vn->sock_lock);
>>
>> - /* Mark socket as an encapsulation socket. */
>> - udp_sk(sk)->encap_type = 1;
>> - udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
>> -#if IS_ENABLED(CONFIG_IPV6)
>> - if (ipv6)
>> - ipv6_stub->udpv6_encap_enable();
>> - else
>> -#endif
>> - udp_encap_enable();
>> -
>> return vs;
>> }
>>
>> struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
>> - vxlan_rcv_t *rcv, void *data,
>> + vxlan_rcv_t rcv, void *data,
>> bool no_share, u32 flags)
>> {
>> - struct vxlan_net *vn = net_generic(net, vxlan_net_id);
>> struct vxlan_sock *vs;
>>
>> vs = vxlan_socket_create(net, port, rcv, data, flags);
>> @@ -2446,7 +2360,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net,
>> __be16 port,
>> if (no_share) /* Return error if sharing is not allowed. */
>> return vs;
>>
>> - spin_lock(&vn->sock_lock);
>> vs = vxlan_find_sock(net, port);
>> if (vs) {
>> if (vs->rcv == rcv)
>> @@ -2454,7 +2367,6 @@ struct vxlan_sock *vxlan_sock_add(struct net *net,
>> __be16 port,
>> else
>> vs = ERR_PTR(-EBUSY);
>> }
>> - spin_unlock(&vn->sock_lock);
>>
>> if (!vs)
>> vs = ERR_PTR(-EINVAL);
>> @@ -2634,7 +2546,6 @@ static int vxlan_newlink(struct net *net, struct
>> net_device *dev,
>> }
>>
>> list_add(&vxlan->next, &vn->vxlan_list);
>> -
>> return 0;
>> }
>>
>> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
>> index d5f59f3..10bfc13 100644
>> --- a/include/net/vxlan.h
>> +++ b/include/net/vxlan.h
>> @@ -4,23 +4,27 @@
>> #include <linux/skbuff.h>
>> #include <linux/netdevice.h>
>> #include <linux/udp.h>
>> +#include <net/udp_tunnel.h>
>>
>> #define VNI_HASH_BITS 10
>> #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
>>
>> struct vxlan_sock;
>> -typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
>> __be32 key);
>>
>> -/* per UDP socket information */
>> +typedef void (*vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb,
>> + __be32 key);
>> +
>> +/* per vxlan socket information */
>> struct vxlan_sock {
>> + struct udp_tunnel_sock uts; /* Must be the first member */
>> struct hlist_node hlist;
>> - vxlan_rcv_t *rcv;
>> - void *data;
>> + struct list_head next;
>> struct work_struct del_work;
>> - struct socket *sock;
>> struct rcu_head rcu;
>> struct hlist_head vni_list[VNI_HASH_SIZE];
>> atomic_t refcnt;
>> + vxlan_rcv_t rcv;
>> + void *rcv_data;
>> struct udp_offload udp_offloads;
>> };
>>
>> @@ -35,7 +39,7 @@ struct vxlan_sock {
>> #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100
>>
>> struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
>> - vxlan_rcv_t *rcv, void *data,
>> + vxlan_rcv_t rcv, void *data,
>> bool no_share, u32 flags);
>>
>> void vxlan_sock_release(struct vxlan_sock *vs);
>> diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
>> index d8b7e24..7599efd 100644
>> --- a/net/openvswitch/vport-vxlan.c
>> +++ b/net/openvswitch/vport-vxlan.c
>> @@ -59,7 +59,7 @@ static inline struct vxlan_port *vxlan_vport(const
>> struct vport *vport)
>> static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32
>> vx_vni)
>> {
>> struct ovs_key_ipv4_tunnel tun_key;
>> - struct vport *vport = vs->data;
>> + struct vport *vport = vs->rcv_data;
>> struct iphdr *iph;
>> __be64 key;
>>
>> @@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct
>> sk_buff *skb, __be32 vx_vni)
>> static int vxlan_get_options(const struct vport *vport, struct sk_buff
>> *skb)
>> {
>> struct vxlan_port *vxlan_port = vxlan_vport(vport);
>> - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
>> + __be16 dst_port =
>> inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
>>
>> if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
>> return -EMSGSIZE;
>> @@ -139,7 +139,7 @@ static int vxlan_tnl_send(struct vport *vport, struct
>> sk_buff *skb)
>> {
>> struct net *net = ovs_dp_get_net(vport->dp);
>> struct vxlan_port *vxlan_port = vxlan_vport(vport);
>> - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
>> + __be16 dst_port =
>> inet_sk(vxlan_port->vs->uts.sock->sk)->inet_sport;
>> struct rtable *rt;
>> struct flowi4 fl;
>> __be16 src_port;
>> --
>> 1.7.9.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [net-next v3 1/3] udp: Expand UDP tunnel common APIs
2014-08-27 5:01 ` Andy Zhou
@ 2014-08-27 11:45 ` Eric Dumazet
0 siblings, 0 replies; 10+ messages in thread
From: Eric Dumazet @ 2014-08-27 11:45 UTC (permalink / raw)
To: Andy Zhou; +Cc: David Miller, netdev
On Tue, 2014-08-26 at 22:01 -0700, Andy Zhou wrote:
> Which RCU rule are you referring to?
The most elementary ones, like observing rcu grace period before
freeing ?
udp_tunnel_sock_release() is obviously wrong, or needs an appropriate
documentation.
Please carefully read Documentation/RCU/checklist.txt and tell us why
you believe your code is correct, either in the changelog or using
comments.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2014-08-27 11:45 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-27 3:35 [net-next v3 1/3] udp: Expand UDP tunnel common APIs Andy Zhou
2014-08-27 3:35 ` [net-next v3 2/3] vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions Andy Zhou
2014-08-27 4:15 ` Tom Herbert
[not found] ` <CA+mtBx8R2YyAMbPuw=iYDbLpSwRo_robnxOYPUFV2368-RzdLg@mail.gmail.com>
2014-08-27 5:19 ` Andy Zhou
2014-08-27 3:35 ` [net-next v3 3/3] l2tp: Refactor l2tp core " Andy Zhou
2014-08-27 3:54 ` [net-next v3 1/3] udp: Expand UDP tunnel common APIs Eric Dumazet
2014-08-27 5:01 ` Andy Zhou
2014-08-27 11:45 ` Eric Dumazet
2014-08-27 4:12 ` Tom Herbert
2014-08-27 5:17 ` Andy Zhou
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.