All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andy Zhou <azhou@nicira.com>
To: Tom Herbert <therbert@google.com>
Cc: David Miller <davem@davemloft.net>,
	Linux Netdev List <netdev@vger.kernel.org>
Subject: Re: [net-next 02/10] udp: Expand UDP tunnel common APIs
Date: Thu, 24 Jul 2014 13:23:59 -0700	[thread overview]
Message-ID: <CACzMAJLbqVrbXjUzLmVq+vvJ40Q5KZczjn57Cw5Gu9TuCwdQqw@mail.gmail.com> (raw)
In-Reply-To: <CA+mtBx8uKepVh4zF+u6TCmArKb5rnmNAcZ7v3VqQVN4BH7C_oA@mail.gmail.com>

The general layering I see is  tunnel_user (i.e. OVS) -> tuunel_driver
(i.e. vxlan) -> udp_tunnel.

The two receive functions are from two separate layers above
udp_tunnel. I can restructure the APIs to make it
cleaner.

On Wed, Jul 23, 2014 at 12:57 PM, Tom Herbert <therbert@google.com> wrote:
> On Tue, Jul 22, 2014 at 3:19 AM, Andy Zhou <azhou@nicira.com> wrote:
>> Added create_udp_tunnel_socket(), packet receive and transmit,  and
>> other related common functions for UDP tunnels.
>>
>> Per net open UDP tunnel ports are tracked in this common layer to
>> prevent sharing of a single port with more than one UDP tunnel.
>>
>> Signed-off-by: Andy Zhou <azhou@nicira.com>
>> ---
>>  include/net/udp_tunnel.h |   57 +++++++++-
>>  net/ipv4/udp_tunnel.c    |  257 +++++++++++++++++++++++++++++++++++++++++++++-
>>  2 files changed, 312 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
>> index 3f34c65..b5e815a 100644
>> --- a/include/net/udp_tunnel.h
>> +++ b/include/net/udp_tunnel.h
>> @@ -1,7 +1,10 @@
>>  #ifndef __NET_UDP_TUNNEL_H
>>  #define __NET_UDP_TUNNEL_H
>>
>> -#define UDP_TUNNEL_TYPE_VXLAN 0x01
>> +#include <net/ip_tunnels.h>
>> +
>> +#define UDP_TUNNEL_TYPE_VXLAN  0x01
>> +#define UDP_TUNNEL_TYPE_GENEVE 0x02
>>
>>  struct udp_port_cfg {
>>         u8                      family;
>> @@ -28,7 +31,59 @@ struct udp_port_cfg {
>>                                 use_udp6_rx_checksums:1;
>>  };
>>
>> +struct udp_tunnel_sock;
>> +
>> +typedef void (udp_tunnel_rcv_t)(struct udp_tunnel_sock *uts,
>> +                               struct sk_buff *skb, ...);
>> +
>> +typedef int (udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
>> +
>> +struct udp_tunnel_socket_cfg {
>> +       u8 tunnel_type;
>> +       struct udp_port_cfg port;
>> +       udp_tunnel_rcv_t *rcv;
>> +       udp_tunnel_encap_rcv_t *encap_rcv;
>
> Why do you need two receive functions or udp_tunnel_rcv_t?
>
>> +       void *data;
>
> Similarly, why is this needed when we already have sk_user_data?
>
>> +};
>> +
>> +struct udp_tunnel_sock {
>> +       u8 tunnel_type;
>> +       struct hlist_node hlist;
>> +       udp_tunnel_rcv_t *rcv;
>> +       void *data;
>> +       struct socket *sock;
>> +};
>> +
>>  int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>>                     struct socket **sockp);
>>
>> +struct udp_tunnel_sock *create_udp_tunnel_socket(struct net *net, size_t size,
>> +                                                struct udp_tunnel_socket_cfg
>> +                                                       *socket_cfg);
>> +
>> +struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port);
>> +
>> +int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
>> +                       struct sk_buff *skb, __be32 src, __be32 dst,
>> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
>> +                       __be16 dst_port, bool xnet);
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
>> +               struct sk_buff *skb, struct net_device *dev,
>> +               struct in6_addr *saddr, struct in6_addr *daddr,
>> +               __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port);
>> +
>> +#endif
>> +
>> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts);
>> +void udp_tunnel_get_rx_port(struct net_device *dev);
>> +
>> +static inline struct sk_buff *udp_tunnel_handle_offloads(struct sk_buff *skb,
>> +                                                        bool udp_csum)
>> +{
>> +       int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
>> +
>> +       return iptunnel_handle_offloads(skb, udp_csum, type);
>> +}
>>  #endif
>> diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
>> index 61ec1a6..3c14b16 100644
>> --- a/net/ipv4/udp_tunnel.c
>> +++ b/net/ipv4/udp_tunnel.c
>> @@ -7,6 +7,23 @@
>>  #include <net/udp.h>
>>  #include <net/udp_tunnel.h>
>>  #include <net/net_namespace.h>
>> +#include <net/netns/generic.h>
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +#include <net/ipv6.h>
>> +#include <net/addrconf.h>
>> +#include <net/ip6_tunnel.h>
>> +#include <net/ip6_checksum.h>
>> +#endif
>> +
>> +#define PORT_HASH_BITS 8
>> +#define PORT_HASH_SIZE (1 << PORT_HASH_BITS)
>> +
>> +static int udp_tunnel_net_id;
>> +
>> +struct udp_tunnel_net {
>> +       struct hlist_head sock_list[PORT_HASH_SIZE];
>> +       spinlock_t  sock_lock;   /* Protecting the sock_list */
>> +};
>>
>>  int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>>                     struct socket **sockp)
>> @@ -82,7 +99,6 @@ int udp_sock_create(struct net *net, struct udp_port_cfg *cfg,
>>                 return -EPFNOSUPPORT;
>>         }
>>
>> -
>>         *sockp = sock;
>>
>>         return 0;
>> @@ -97,4 +113,243 @@ error:
>>  }
>>  EXPORT_SYMBOL(udp_sock_create);
>>
>> +
>> +/* Socket hash table head */
>> +static inline struct hlist_head *uts_head(struct net *net, const __be16 port)
>> +{
>> +       struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
>> +
>> +       return &utn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
>> +}
>> +
>> +static int handle_offloads(struct sk_buff *skb)
>> +{
>> +       if (skb_is_gso(skb)) {
>> +               int err = skb_unclone(skb, GFP_ATOMIC);
>> +
>> +               if (unlikely(err))
>> +                       return err;
>> +               skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
>> +       } else {
>> +               if (skb->ip_summed != CHECKSUM_PARTIAL)
>> +                       skb->ip_summed = CHECKSUM_NONE;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +struct udp_tunnel_sock *create_udp_tunnel_socket(struct net *net, size_t size,
>> +                                                struct udp_tunnel_socket_cfg
>> +                                                       *cfg)
>> +{
>> +       struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
>> +       struct udp_tunnel_sock *uts;
>> +       struct socket *sock;
>> +       struct sock *sk;
>> +       const __be16 port = cfg->port.local_udp_port;
>> +       const int ipv6 = (cfg->port.family == AF_INET6);
>> +       int err;
>> +
>> +       uts = kzalloc(size, GFP_KERNEL);
>> +       if (!uts)
>> +               return ERR_PTR(-ENOMEM);
>> +
>> +       err = udp_sock_create(net, &cfg->port, &sock);
>> +       if (err < 0) {
>> +               kfree(uts);
>> +               return NULL;
>> +       }
>> +
>> +       /* Disable multicast loopback */
>> +       inet_sk(sock->sk)->mc_loop = 0;
>> +
>> +       uts->sock = sock;
>> +       sk = sock->sk;
>> +       uts->rcv = cfg->rcv;
>> +       uts->data = cfg->data;
>> +       rcu_assign_sk_user_data(sock->sk, uts);
>> +
>> +       spin_lock(&utn->sock_lock);
>> +       hlist_add_head_rcu(&uts->hlist, uts_head(net, port));
>> +       spin_unlock(&utn->sock_lock);
>> +
>> +       udp_sk(sk)->encap_type = 1;
>> +       udp_sk(sk)->encap_rcv = cfg->encap_rcv;
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +       if (ipv6)
>> +               ipv6_stub->udpv6_encap_enable();
>> +       else
>> +#endif
>> +               udp_encap_enable();
>> +
>> +       return uts;
>> +}
>> +EXPORT_SYMBOL_GPL(create_udp_tunnel_socket);
>> +
>> +int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
>> +                       struct sk_buff *skb, __be32 src, __be32 dst,
>> +                       __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
>> +                       __be16 dst_port, bool xnet)
>> +{
>> +       struct udphdr *uh;
>> +
>> +       __skb_push(skb, sizeof(*uh));
>> +       skb_reset_transport_header(skb);
>> +       uh = udp_hdr(skb);
>> +
>> +       uh->dest = dst_port;
>> +       uh->source = src_port;
>> +       uh->len = htons(skb->len);
>> +
>> +       udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
>> +
>> +       return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
>> +                            tos, ttl, df, xnet);
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
>> +                        struct sk_buff *skb, struct net_device *dev,
>> +                        struct in6_addr *saddr, struct in6_addr *daddr,
>> +                        __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
>> +{
>> +       struct udphdr *uh;
>> +       struct ipv6hdr *ip6h;
>> +       int err;
>> +
>> +       __skb_push(skb, sizeof(*uh));
>> +       skb_reset_transport_header(skb);
>> +       uh = udp_hdr(skb);
>> +
>> +       uh->dest = dst_port;
>> +       uh->source = src_port;
>> +
>> +       uh->len = htons(skb->len);
>> +       uh->check = 0;
>> +
>> +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
>> +       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
>> +                           | IPSKB_REROUTED);
>> +       skb_dst_set(skb, dst);
>> +
>> +       if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
>> +               __wsum csum = skb_checksum(skb, 0, skb->len, 0);
>> +
>> +               skb->ip_summed = CHECKSUM_UNNECESSARY;
>> +               uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
>> +                               IPPROTO_UDP, csum);
>> +               if (uh->check == 0)
>> +                       uh->check = CSUM_MANGLED_0;
>> +       } else {
>> +               skb->ip_summed = CHECKSUM_PARTIAL;
>> +               skb->csum_start = skb_transport_header(skb) - skb->head;
>> +               skb->csum_offset = offsetof(struct udphdr, check);
>> +               uh->check = ~csum_ipv6_magic(saddr, daddr,
>> +                               skb->len, IPPROTO_UDP, 0);
>> +       }
>> +
>> +       __skb_push(skb, sizeof(*ip6h));
>> +       skb_reset_network_header(skb);
>> +       ip6h              = ipv6_hdr(skb);
>> +       ip6h->version     = 6;
>> +       ip6h->priority    = prio;
>> +       ip6h->flow_lbl[0] = 0;
>> +       ip6h->flow_lbl[1] = 0;
>> +       ip6h->flow_lbl[2] = 0;
>> +       ip6h->payload_len = htons(skb->len);
>> +       ip6h->nexthdr     = IPPROTO_UDP;
>> +       ip6h->hop_limit   = ttl;
>> +       ip6h->daddr       = *daddr;
>> +       ip6h->saddr       = *saddr;
>> +
>> +       err = handle_offloads(skb);
>> +       if (err)
>> +               return err;
>> +
>> +       ip6tunnel_xmit(skb, dev);
>> +       return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
>> +#endif
>> +
>> +struct udp_tunnel_sock *udp_tunnel_find_sock(struct net *net, __be16 port)
>> +{
>> +       struct udp_tunnel_sock *uts;
>> +
>> +       hlist_for_each_entry_rcu(uts, uts_head(net, port), hlist) {
>> +               if (inet_sk(uts->sock->sk)->inet_sport == port)
>> +                       return uts;
>> +       }
>> +
>> +       return NULL;
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel_find_sock);
>> +
>> +void udp_tunnel_sock_release(struct udp_tunnel_sock *uts)
>> +{
>> +       struct sock *sk = uts->sock->sk;
>> +       struct net *net = sock_net(sk);
>> +       struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
>> +
>> +       spin_lock(&utn->sock_lock);
>> +       hlist_del_rcu(&uts->hlist);
>> +       rcu_assign_sk_user_data(uts->sock->sk, NULL);
>> +       spin_unlock(&utn->sock_lock);
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
>> +
>> +/* Calls the ndo_add_tunnel_port of the caller in order to
>> + * supply the listening VXLAN udp ports. Callers are expected
>> + * to implement the ndo_add_tunnle_port.
>> + */
>> +void udp_tunnel_get_rx_port(struct net_device *dev)
>> +{
>> +       struct udp_tunnel_sock *uts;
>> +       struct net *net = dev_net(dev);
>> +       struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
>> +       sa_family_t sa_family;
>> +       __be16 port;
>> +       unsigned int i;
>> +
>> +       spin_lock(&utn->sock_lock);
>> +       for (i = 0; i < PORT_HASH_SIZE; ++i) {
>> +               hlist_for_each_entry_rcu(uts, &utn->sock_list[i], hlist) {
>> +                       port = inet_sk(uts->sock->sk)->inet_sport;
>> +                       sa_family = uts->sock->sk->sk_family;
>> +                       dev->netdev_ops->ndo_add_udp_tunnel_port(dev,
>> +                                       sa_family, port, uts->tunnel_type);
>> +               }
>> +       }
>> +       spin_unlock(&utn->sock_lock);
>> +}
>> +EXPORT_SYMBOL_GPL(udp_tunnel_get_rx_port);
>> +
>> +static int __net_init udp_tunnel_init_net(struct net *net)
>> +{
>> +       struct udp_tunnel_net *utn = net_generic(net, udp_tunnel_net_id);
>> +       unsigned int h;
>> +
>> +       spin_lock_init(&utn->sock_lock);
>> +
>> +       for (h = 0; h < PORT_HASH_SIZE; h++)
>> +               INIT_HLIST_HEAD(&utn->sock_list[h]);
>> +
>> +       return 0;
>> +}
>> +
>> +static struct pernet_operations udp_tunnel_net_ops = {
>> +       .init = udp_tunnel_init_net,
>> +       .exit = NULL,
>> +       .id = &udp_tunnel_net_id,
>> +       .size = sizeof(struct udp_tunnel_net),
>> +};
>> +
>> +static int __init udp_tunnel_init(void)
>> +{
>> +       return register_pernet_subsys(&udp_tunnel_net_ops);
>> +}
>> +late_initcall(udp_tunnel_init);
>> +
>>  MODULE_LICENSE("GPL");
>> --
>> 1.7.9.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2014-07-24 20:24 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-22 10:19 [net-next 00/10] Add Geneve Andy Zhou
2014-07-22 10:19 ` [net-next 01/10] net: Rename ndo_add_vxlan_port to ndo_add_udp_tunnel_port Andy Zhou
2014-07-22 10:49   ` Varka Bhadram
2014-07-24  6:40   ` Or Gerlitz
2014-07-24 20:28     ` Andy Zhou
2014-07-22 10:19 ` [net-next 02/10] udp: Expand UDP tunnel common APIs Andy Zhou
     [not found]   ` <CA+mtBx9M_BpjT-_Egng+jFxmqJzdC2Npg0ufE2ZSAb9Lhw8hxg@mail.gmail.com>
2014-07-22 21:02     ` Andy Zhou
2014-07-22 21:16       ` Tom Herbert
2014-07-22 21:56         ` Jesse Gross
2014-07-22 22:38           ` Tom Herbert
2014-07-22 22:55             ` Alexander Duyck
2014-07-22 23:24               ` Tom Herbert
2014-07-23  2:16                 ` Alexander Duyck
2014-07-23  3:53                   ` Tom Herbert
2014-07-23  4:35                     ` Jesse Gross
2014-07-23 15:45                       ` Tom Herbert
2014-07-24  3:24                         ` Jesse Gross
2014-07-22 23:12             ` Jesse Gross
2014-07-23 19:57   ` Tom Herbert
2014-07-24 20:23     ` Andy Zhou [this message]
2014-07-24 20:47       ` Tom Herbert
2014-07-24 20:54         ` Andy Zhou
2014-07-22 10:19 ` [net-next 03/10] vxlan: Remove vxlan_get_rx_port() Andy Zhou
     [not found]   ` <CAKgT0UeRSc3MaZrLmXyx4jPZO+F1hS5imR1TjFkvKp4S8nQmeg@mail.gmail.com>
2014-07-23  3:57     ` Andy Zhou
2014-07-22 10:19 ` [net-next 04/10] net: Refactor vxlan driver to make use of common UDP tunnel functions Andy Zhou
2014-07-24  6:46   ` Or Gerlitz
2014-07-22 10:19 ` [net-next 05/10] net: Add Geneve tunneling protocol driver Andy Zhou
2014-07-22 23:12   ` Alexander Duyck
2014-07-22 23:24     ` Jesse Gross
2014-07-23 14:11       ` John W. Linville
2014-07-23 18:20   ` Stephen Hemminger
2014-07-22 10:19 ` [net-next 06/10] openvswitch: Eliminate memset() from flow_extract Andy Zhou
2014-07-22 10:19 ` [net-next 07/10] openvswitch: Add support for matching on OAM packets Andy Zhou
2014-07-22 10:19 ` [net-next 08/10] openvswitch: Wrap struct ovs_key_ipv4_tunnel in a new structure Andy Zhou
2014-07-22 10:19 ` [net-next 09/10] openvswitch: Factor out allocation and verification of actions Andy Zhou
2014-07-22 10:19 ` [net-next 10/10] openvswitch: Add support for Geneve tunneling Andy Zhou
2014-07-23 20:29   ` Tom Herbert
2014-07-24  4:10     ` Jesse Gross
     [not found]       ` <CA+mtBx9umxiFYtnG1kzFkK+Ev=b=4f3q2OOow2QcfCB5rUTUyA@mail.gmail.com>
2014-07-24 22:59         ` Jesse Gross
2014-07-24 23:45           ` Tom Herbert
2014-07-25  1:04             ` Jesse Gross
2014-07-22 10:54 ` [net-next 00/10] Add Geneve Varka Bhadram
2014-07-24  6:58 ` Or Gerlitz
2014-07-24 17:40   ` Tom Herbert
2014-07-24 21:03     ` Andy Zhou
2014-07-24 22:03       ` Tom Herbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CACzMAJLbqVrbXjUzLmVq+vvJ40Q5KZczjn57Cw5Gu9TuCwdQqw@mail.gmail.com \
    --to=azhou@nicira.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=therbert@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.