All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
@ 2015-06-19  4:49 Roopa Prabhu
  2015-06-19 14:43 ` Robert Shearman
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Roopa Prabhu @ 2015-06-19  4:49 UTC (permalink / raw)
  To: ebiederm, rshearma, tgraf; +Cc: davem, netdev

From: Roopa Prabhu <roopa@cumulusnetworks.com>

provides ops to parse, build and output encaped
packets for drivers that want to attach tunnel encap
information to routes.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 include/linux/lwtunnel.h      |    6 ++
 include/net/lwtunnel.h        |   84 +++++++++++++++++++++
 include/uapi/linux/lwtunnel.h |   11 +++
 net/Kconfig                   |    5 ++
 net/core/Makefile             |    1 +
 net/core/lwtunnel.c           |  162 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 269 insertions(+)
 create mode 100644 include/linux/lwtunnel.h
 create mode 100644 include/net/lwtunnel.h
 create mode 100644 include/uapi/linux/lwtunnel.h
 create mode 100644 net/core/lwtunnel.c

diff --git a/include/linux/lwtunnel.h b/include/linux/lwtunnel.h
new file mode 100644
index 0000000..97f32f8
--- /dev/null
+++ b/include/linux/lwtunnel.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_LWTUNNEL_H_
+#define _LINUX_LWTUNNEL_H_
+
+#include <uapi/linux/lwtunnel.h>
+
+#endif /* _LINUX_LWTUNNEL_H_ */
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
new file mode 100644
index 0000000..649da3c
--- /dev/null
+++ b/include/net/lwtunnel.h
@@ -0,0 +1,84 @@
+#ifndef __NET_LWTUNNEL_H
+#define __NET_LWTUNNEL_H 1
+
+#include <linux/lwtunnel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/dsfield.h>
+#include <net/ip.h>
+#include <net/rtnetlink.h>
+
+#define LWTUNNEL_HASH_BITS   7
+#define LWTUNNEL_HASH_SIZE   (1 << LWTUNNEL_HASH_BITS)
+
+struct lwtunnel_hdr {
+	int             len;
+	__u8            data[0];
+};
+
+/* lw tunnel state flags */
+#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1
+
+#define lwtunnel_output_redirect(lwtstate) (lwtstate && \
+			(lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT))
+
+struct lwtunnel_state {
+	__u16		type;
+	__u16		flags;
+	atomic_t	refcnt;
+	struct lwtunnel_hdr tunnel;
+};
+
+struct lwtunnel_net {
+	struct hlist_head tunnels[LWTUNNEL_HASH_SIZE];
+};
+
+struct lwtunnel_encap_ops {
+	int (*build_state)(struct net_device *dev, struct nlattr *encap,
+			   struct lwtunnel_state **ts);
+	int (*output)(struct sock *sk, struct sk_buff *skb);
+	int (*fill_encap)(struct sk_buff *skb,
+			  struct lwtunnel_state *lwtstate);
+	int (*get_encap_size)(struct lwtunnel_state *lwtstate);
+};
+
+#define MAX_LWTUNNEL_ENCAP_OPS 8
+extern const struct lwtunnel_encap_ops __rcu *
+		lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS];
+
+static inline void lwtunnel_state_get(struct lwtunnel_state *lws)
+{
+	atomic_inc(&lws->refcnt);
+}
+
+static inline void lwtunnel_state_put(struct lwtunnel_state *lws)
+{
+	if (!lws)
+		return;
+
+	if (atomic_dec_and_test(&lws->refcnt))
+		kfree(lws);
+}
+
+static inline struct lwtunnel_state *lwtunnel_skb_lwstate(struct sk_buff *skb)
+{
+	struct rtable *rt = (struct rtable *)skb_dst(skb);
+
+	return rt->rt_lwtstate;
+}
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
+			   unsigned int num);
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
+			   unsigned int num);
+int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+			 struct nlattr *encap,
+			 struct lwtunnel_state **lws);
+int lwtunnel_fill_encap(struct sk_buff *skb,
+			struct lwtunnel_state *lwtstate);
+int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
+struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
+int lwtunnel_output(struct sock *sk, struct sk_buff *skb);
+
+#endif /* __NET_LWTUNNEL_H */
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
new file mode 100644
index 0000000..11150c0
--- /dev/null
+++ b/include/uapi/linux/lwtunnel.h
@@ -0,0 +1,11 @@
+#ifndef _UAPI_LWTUNNEL_H_
+#define _UAPI_LWTUNNEL_H_
+
+#include <linux/types.h>
+
+enum tunnel_encap_types {
+	LWTUNNEL_ENCAP_NONE,
+	LWTUNNEL_ENCAP_MPLS,
+};
+
+#endif /* _UAPI_LWTUNNEL_H_ */
diff --git a/net/Kconfig b/net/Kconfig
index 57a7c5a..e296d6f 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -374,9 +374,14 @@ source "net/caif/Kconfig"
 source "net/ceph/Kconfig"
 source "net/nfc/Kconfig"
 
+config LWTUNNEL
+	bool "Network light weight tunnels"
+	---help---
+	  light weight tunnels
 
 endif   # if NET
 
 # Used by archs to tell that they support BPF_JIT
 config HAVE_BPF_JIT
 	bool
+
diff --git a/net/core/Makefile b/net/core/Makefile
index fec0856..086b01f 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
 obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
 obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
+obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
new file mode 100644
index 0000000..29c7802
--- /dev/null
+++ b/net/core/lwtunnel.c
@@ -0,0 +1,162 @@
+/*
+ * lwtunnel	Infrastructure for light weight tunnels like mpls
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/err.h>
+
+#include <net/lwtunnel.h>
+#include <net/rtnetlink.h>
+
+struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
+{
+	struct lwtunnel_state *lws;
+
+	return kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL);
+}
+EXPORT_SYMBOL(lwtunnel_state_alloc);
+
+const struct lwtunnel_encap_ops __rcu *
+		lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS] __read_mostly;
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
+			   unsigned int num)
+{
+	if (num >= MAX_LWTUNNEL_ENCAP_OPS)
+		return -ERANGE;
+
+	return !cmpxchg((const struct lwtunnel_encap_ops **)
+			&lwtun_encaps[num],
+			NULL, ops) ? 0 : -1;
+}
+EXPORT_SYMBOL(lwtunnel_encap_add_ops);
+
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
+			   unsigned int num)
+{
+	int ret;
+
+	if (num >= MAX_LWTUNNEL_ENCAP_OPS)
+		return -ERANGE;
+
+	ret = (cmpxchg((const struct lwtunnel_encap_ops **)
+		       &lwtun_encaps[num],
+		       ops, NULL) == ops) ? 0 : -1;
+
+	synchronize_net();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_encap_del_ops);
+
+int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+			 struct nlattr *encap, struct lwtunnel_state **lws)
+{
+	const struct lwtunnel_encap_ops *ops;
+	int ret = -EINVAL;
+
+	if (encap_type == LWTUNNEL_ENCAP_NONE ||
+	    encap_type >= MAX_LWTUNNEL_ENCAP_OPS)
+		return ret;
+
+	ret = -EOPNOTSUPP;
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[encap_type]);
+	if (likely(ops && ops->build_state))
+		ret = ops->build_state(dev, encap, lws);
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_build_state);
+
+int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
+{
+	const struct lwtunnel_encap_ops *ops;
+	struct nlattr *nest;
+	int ret = -EINVAL;
+
+	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+	    lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
+		return 0;
+
+	ret = -EOPNOTSUPP;
+	nest = nla_nest_start(skb, RTA_ENCAP);
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+	if (likely(ops && ops->fill_encap))
+		ret = ops->fill_encap(skb, lwtstate);
+	rcu_read_unlock();
+
+	if (ret)
+		goto errout;
+
+	nla_nest_end(skb, nest);
+
+	return 0;
+
+errout:
+	nla_nest_cancel(skb, nest);
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_fill_encap);
+
+int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
+{
+	const struct lwtunnel_encap_ops *ops;
+	int ret = 0;
+
+	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+	    lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
+		return 0;
+
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+	if (likely(ops && ops->get_encap_size))
+		ret = nla_total_size(ops->get_encap_size(lwtstate));
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_get_encap_size);
+
+int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
+{
+	const struct lwtunnel_encap_ops *ops;
+	struct lwtunnel_state *lwtstate = lwtunnel_skb_lwstate(skb);
+	int ret = 0;
+
+	if (!lwtstate)
+		return -EINVAL;
+
+	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+	    lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
+		return 0;
+
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+	if (likely(ops && ops->output))
+		ret = ops->output(sk, skb);
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_output);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-19  4:49 [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels Roopa Prabhu
@ 2015-06-19 14:43 ` Robert Shearman
  2015-06-19 15:14   ` roopa
  2015-06-20 16:38 ` Nikolay Aleksandrov
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: Robert Shearman @ 2015-06-19 14:43 UTC (permalink / raw)
  To: Roopa Prabhu, ebiederm, tgraf; +Cc: davem, netdev

On 19/06/15 05:49, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
> provides ops to parse, build and output encaped
> packets for drivers that want to attach tunnel encap
> information to routes.
>
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> ---
>   include/linux/lwtunnel.h      |    6 ++
>   include/net/lwtunnel.h        |   84 +++++++++++++++++++++
>   include/uapi/linux/lwtunnel.h |   11 +++
>   net/Kconfig                   |    5 ++
>   net/core/Makefile             |    1 +
>   net/core/lwtunnel.c           |  162 +++++++++++++++++++++++++++++++++++++++++
>   6 files changed, 269 insertions(+)
>   create mode 100644 include/linux/lwtunnel.h
>   create mode 100644 include/net/lwtunnel.h
>   create mode 100644 include/uapi/linux/lwtunnel.h
>   create mode 100644 net/core/lwtunnel.c
>
> diff --git a/include/linux/lwtunnel.h b/include/linux/lwtunnel.h
> new file mode 100644
> index 0000000..97f32f8
> --- /dev/null
> +++ b/include/linux/lwtunnel.h
> @@ -0,0 +1,6 @@
> +#ifndef _LINUX_LWTUNNEL_H_
> +#define _LINUX_LWTUNNEL_H_
> +
> +#include <uapi/linux/lwtunnel.h>
> +
> +#endif /* _LINUX_LWTUNNEL_H_ */
> diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
> new file mode 100644
> index 0000000..649da3c
> --- /dev/null
> +++ b/include/net/lwtunnel.h
> @@ -0,0 +1,84 @@
> +#ifndef __NET_LWTUNNEL_H
> +#define __NET_LWTUNNEL_H 1
> +
> +#include <linux/lwtunnel.h>
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +#include <linux/types.h>
> +#include <net/dsfield.h>
> +#include <net/ip.h>
> +#include <net/rtnetlink.h>
> +
> +#define LWTUNNEL_HASH_BITS   7
> +#define LWTUNNEL_HASH_SIZE   (1 << LWTUNNEL_HASH_BITS)
> +
> +struct lwtunnel_hdr {
> +	int             len;
> +	__u8            data[0];
> +};
> +
> +/* lw tunnel state flags */
> +#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1
> +
> +#define lwtunnel_output_redirect(lwtstate) (lwtstate && \
> +			(lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT))

This could be made an inline function for type-safety.

> +
> +struct lwtunnel_state {
> +	__u16		type;
> +	__u16		flags;
> +	atomic_t	refcnt;
> +	struct lwtunnel_hdr tunnel;
> +};
> +
> +struct lwtunnel_net {
> +	struct hlist_head tunnels[LWTUNNEL_HASH_SIZE];
> +};

This type doesn't appear to be used in this patch series. Do you intend 
to use it in a future version?

> +
> +struct lwtunnel_encap_ops {
> +	int (*build_state)(struct net_device *dev, struct nlattr *encap,
> +			   struct lwtunnel_state **ts);
> +	int (*output)(struct sock *sk, struct sk_buff *skb);
> +	int (*fill_encap)(struct sk_buff *skb,
> +			  struct lwtunnel_state *lwtstate);
> +	int (*get_encap_size)(struct lwtunnel_state *lwtstate);
> +};
> +
> +#define MAX_LWTUNNEL_ENCAP_OPS 8
> +extern const struct lwtunnel_encap_ops __rcu *
> +		lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS];
> +
> +static inline void lwtunnel_state_get(struct lwtunnel_state *lws)
> +{
> +	atomic_inc(&lws->refcnt);
> +}
> +
> +static inline void lwtunnel_state_put(struct lwtunnel_state *lws)
> +{
> +	if (!lws)
> +		return;
> +
> +	if (atomic_dec_and_test(&lws->refcnt))
> +		kfree(lws);
> +}
> +
> +static inline struct lwtunnel_state *lwtunnel_skb_lwstate(struct sk_buff *skb)
> +{
> +	struct rtable *rt = (struct rtable *)skb_dst(skb);
> +
> +	return rt->rt_lwtstate;
> +}

It doesn't look like this patch will build on its own because 
rt_lwtstate isn't added to struct rtable until patch 2.

More importantly, is it safe to assume that skb_dst will always return 
an IPv4 dst? How will this look when IPv6 support is added?

> +
> +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
> +			   unsigned int num);
> +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
> +			   unsigned int num);
> +int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
> +			 struct nlattr *encap,
> +			 struct lwtunnel_state **lws);
> +int lwtunnel_fill_encap(struct sk_buff *skb,
> +			struct lwtunnel_state *lwtstate);
> +int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
> +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
> +int lwtunnel_output(struct sock *sk, struct sk_buff *skb);
> +
> +#endif /* __NET_LWTUNNEL_H */
...
> diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
> new file mode 100644
> index 0000000..29c7802
> --- /dev/null
> +++ b/net/core/lwtunnel.c
> @@ -0,0 +1,162 @@
> +/*
> + * lwtunnel	Infrastructure for light weight tunnels like mpls
> + *
> + *
> + *		This program is free software; you can redistribute it and/or
> + *		modify it under the terms of the GNU General Public License
> + *		as published by the Free Software Foundation; either version
> + *		2 of the License, or (at your option) any later version.
> + *
> + */
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/capability.h>
> +#include <linux/module.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +#include <linux/skbuff.h>
> +#include <linux/netdevice.h>
> +#include <linux/in.h>
> +#include <linux/init.h>
> +#include <linux/err.h>
> +
> +#include <net/lwtunnel.h>
> +#include <net/rtnetlink.h>
> +
> +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
> +{
> +	struct lwtunnel_state *lws;
> +
> +	return kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL);
> +}
> +EXPORT_SYMBOL(lwtunnel_state_alloc);
> +
> +const struct lwtunnel_encap_ops __rcu *
> +		lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS] __read_mostly;
> +
> +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
> +			   unsigned int num)
> +{
> +	if (num >= MAX_LWTUNNEL_ENCAP_OPS)
> +		return -ERANGE;
> +
> +	return !cmpxchg((const struct lwtunnel_encap_ops **)
> +			&lwtun_encaps[num],
> +			NULL, ops) ? 0 : -1;
> +}
> +EXPORT_SYMBOL(lwtunnel_encap_add_ops);
> +
> +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
> +			   unsigned int num)
> +{
> +	int ret;
> +
> +	if (num >= MAX_LWTUNNEL_ENCAP_OPS)
> +		return -ERANGE;
> +
> +	ret = (cmpxchg((const struct lwtunnel_encap_ops **)
> +		       &lwtun_encaps[num],
> +		       ops, NULL) == ops) ? 0 : -1;
> +
> +	synchronize_net();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(lwtunnel_encap_del_ops);
> +
> +int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
> +			 struct nlattr *encap, struct lwtunnel_state **lws)
> +{
> +	const struct lwtunnel_encap_ops *ops;
> +	int ret = -EINVAL;
> +
> +	if (encap_type == LWTUNNEL_ENCAP_NONE ||
> +	    encap_type >= MAX_LWTUNNEL_ENCAP_OPS)
> +		return ret;
> +
> +	ret = -EOPNOTSUPP;
> +	rcu_read_lock();
> +	ops = rcu_dereference(lwtun_encaps[encap_type]);
> +	if (likely(ops && ops->build_state))
> +		ret = ops->build_state(dev, encap, lws);
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(lwtunnel_build_state);
> +
> +int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
> +{
> +	const struct lwtunnel_encap_ops *ops;
> +	struct nlattr *nest;
> +	int ret = -EINVAL;
> +
> +	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
> +	    lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
> +		return 0;
> +
> +	ret = -EOPNOTSUPP;
> +	nest = nla_nest_start(skb, RTA_ENCAP);

Again, it doesn't look like this will build since RTA_ENCAP isn't added 
until patch 2.

Thanks,
Rob

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-19 14:43 ` Robert Shearman
@ 2015-06-19 15:14   ` roopa
  2015-06-19 17:25     ` Robert Shearman
  0 siblings, 1 reply; 14+ messages in thread
From: roopa @ 2015-06-19 15:14 UTC (permalink / raw)
  To: Robert Shearman; +Cc: ebiederm, tgraf, davem, netdev

On 6/19/15, 7:43 AM, Robert Shearman wrote:
>> diff --git a/include/linux/lwtunnel.h b/include/linux/lwtunnel.h
>> new file mode 100644 
<snip>
>> +/* lw tunnel state flags */
>> +#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1
>> +
>> +#define lwtunnel_output_redirect(lwtstate) (lwtstate && \
>> +            (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT))
>
> This could be made an inline function for type-safety.
ack
>
>> +
>> +struct lwtunnel_state {
>> +    __u16        type;
>> +    __u16        flags;
>> +    atomic_t    refcnt;
>> +    struct lwtunnel_hdr tunnel;
>> +};
>> +
>> +struct lwtunnel_net {
>> +    struct hlist_head tunnels[LWTUNNEL_HASH_SIZE];
>> +};
>
> This type doesn't appear to be used in this patch series. Do you 
> intend to use it in a future version?
ack, will get rid of it
>
>>
>> +
>> +static inline struct lwtunnel_state *lwtunnel_skb_lwstate(struct 
>> sk_buff *skb)
>> +{
>> +    struct rtable *rt = (struct rtable *)skb_dst(skb);
>> +
>> +    return rt->rt_lwtstate;
>> +}
>
> It doesn't look like this patch will build on its own because 
> rt_lwtstate isn't added to struct rtable until patch 2.
looks like i messed up the patch creation. I will fix that with the next 
series.
>
> More importantly, is it safe to assume that skb_dst will always return 
> an IPv4 dst? How will this look when IPv6 support is added?

Today lwtunnel_skb_lwstate is called from lwtunnel_output which is only 
called from ipv4 code.
And my ipv6 variant code was supposed to have a 6 suffix. something like 
lwtunnel_output6.
Or to be more explicit i will probably have variants of the output and 
skb handling functions like,
lwtunnel_output_ipv4 and lwtunnel_output_ipv6.
>> +
>> +    ret = -EOPNOTSUPP;
>> +    nest = nla_nest_start(skb, RTA_ENCAP);
>
> Again, it doesn't look like this will build since RTA_ENCAP isn't 
> added until patch 2.
>
ack, sorry abt the patch ordering. will fix it.

Thanks for the review.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-19 15:14   ` roopa
@ 2015-06-19 17:25     ` Robert Shearman
  2015-06-19 18:34       ` roopa
  0 siblings, 1 reply; 14+ messages in thread
From: Robert Shearman @ 2015-06-19 17:25 UTC (permalink / raw)
  To: roopa; +Cc: ebiederm, tgraf, davem, netdev

n 19/06/15 16:14, roopa wrote:
> On 6/19/15, 7:43 AM, Robert Shearman wrote:
>>>
>>> +
>>> +static inline struct lwtunnel_state *lwtunnel_skb_lwstate(struct
>>> sk_buff *skb)
>>> +{
>>> +    struct rtable *rt = (struct rtable *)skb_dst(skb);
>>> +
>>> +    return rt->rt_lwtstate;
>>> +}
>>
>> It doesn't look like this patch will build on its own because
>> rt_lwtstate isn't added to struct rtable until patch 2.
> looks like i messed up the patch creation. I will fix that with the next
> series.
>>
>> More importantly, is it safe to assume that skb_dst will always return
>> an IPv4 dst? How will this look when IPv6 support is added?
>
> Today lwtunnel_skb_lwstate is called from lwtunnel_output which is only
> called from ipv4 code.
> And my ipv6 variant code was supposed to have a 6 suffix. something like
> lwtunnel_output6.
> Or to be more explicit i will probably have variants of the output and
> skb handling functions like,
> lwtunnel_output_ipv4 and lwtunnel_output_ipv6.

Do you intend for these functions to be used by netdevices to support 
the vxlan use case?

If so, then how will the netdevice know which one of the two to call? 
Will there have to be a netdevice for ipv4 and a netdevice for ipv6?

If not, could you outline how you intend for it to be implemented?

Thanks,
Rob

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-19 17:25     ` Robert Shearman
@ 2015-06-19 18:34       ` roopa
  2015-06-19 18:39         ` Robert Shearman
  0 siblings, 1 reply; 14+ messages in thread
From: roopa @ 2015-06-19 18:34 UTC (permalink / raw)
  To: Robert Shearman; +Cc: ebiederm, tgraf, davem, netdev

On 6/19/15, 10:25 AM, Robert Shearman wrote:
> n 19/06/15 16:14, roopa wrote:
>> Today lwtunnel_skb_lwstate is called from lwtunnel_output which is only
>> called from ipv4 code.
>> And my ipv6 variant code was supposed to have a 6 suffix. something like
>> lwtunnel_output6.
>> Or to be more explicit i will probably have variants of the output and
>> skb handling functions like,
>> lwtunnel_output_ipv4 and lwtunnel_output_ipv6.
>
> Do you intend for these functions to be used by netdevices to support 
> the vxlan use case?
>
> If so, then how will the netdevice know which one of the two to call? 
> Will there have to be a netdevice for ipv4 and a netdevice for ipv6?
>
> If not, could you outline how you intend for it to be implemented?

In the netdevice case, this output function is not called atall. It 
should just follow the existing netdevice the route is pointing to.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-19 18:34       ` roopa
@ 2015-06-19 18:39         ` Robert Shearman
  2015-06-20 14:27           ` roopa
  0 siblings, 1 reply; 14+ messages in thread
From: Robert Shearman @ 2015-06-19 18:39 UTC (permalink / raw)
  To: roopa; +Cc: ebiederm, tgraf, davem, netdev

On 19/06/15 19:34, roopa wrote:
> On 6/19/15, 10:25 AM, Robert Shearman wrote:
>> n 19/06/15 16:14, roopa wrote:
>>> Today lwtunnel_skb_lwstate is called from lwtunnel_output which is only
>>> called from ipv4 code.
>>> And my ipv6 variant code was supposed to have a 6 suffix. something like
>>> lwtunnel_output6.
>>> Or to be more explicit i will probably have variants of the output and
>>> skb handling functions like,
>>> lwtunnel_output_ipv4 and lwtunnel_output_ipv6.
>>
>> Do you intend for these functions to be used by netdevices to support
>> the vxlan use case?
>>
>> If so, then how will the netdevice know which one of the two to call?
>> Will there have to be a netdevice for ipv4 and a netdevice for ipv6?
>>
>> If not, could you outline how you intend for it to be implemented?
>
> In the netdevice case, this output function is not called atall. It
> should just follow the existing netdevice the route is pointing to.

Sorry for not being clear, but I meant that there would have to be 
lwtunnel_skb_lwstate functions for ipv4 and ipv6 to match the output 
functions. So in the vxlan use case where it's using a netdevice, how 
would it determine which one to call?

Thanks,
Rob

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-19 18:39         ` Robert Shearman
@ 2015-06-20 14:27           ` roopa
  2015-06-21 20:40             ` Thomas Graf
  0 siblings, 1 reply; 14+ messages in thread
From: roopa @ 2015-06-20 14:27 UTC (permalink / raw)
  To: Robert Shearman; +Cc: ebiederm, tgraf, davem, netdev

On 6/19/15, 11:39 AM, Robert Shearman wrote:
> On 19/06/15 19:34, roopa wrote:
>> On 6/19/15, 10:25 AM, Robert Shearman wrote:
>>> n 19/06/15 16:14, roopa wrote:
>>>
>> In the netdevice case, this output function is not called atall. It
>> should just follow the existing netdevice the route is pointing to.
>
> Sorry for not being clear, but I meant that there would have to be 
> lwtunnel_skb_lwstate functions for ipv4 and ipv6 to match the output 
> functions. So in the vxlan use case where it's using a netdevice, how 
> would it determine which one to call?

thanks for that clarification, and good point. I see some areas of the 
kernel checking for skb->protocol to do the conversion (something like 
below). I am guessing that is acceptable.
if (skb->protocol == htons(ETH_P_IPV6))
                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-19  4:49 [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels Roopa Prabhu
  2015-06-19 14:43 ` Robert Shearman
@ 2015-06-20 16:38 ` Nikolay Aleksandrov
  2015-06-22  2:05   ` roopa
  2015-06-21 20:32 ` Thomas Graf
  2015-07-03  9:49 ` Thomas Graf
  3 siblings, 1 reply; 14+ messages in thread
From: Nikolay Aleksandrov @ 2015-06-20 16:38 UTC (permalink / raw)
  To: Roopa Prabhu; +Cc: ebiederm, rshearma, tgraf, davem, netdev


<<<snip>>>
> diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
> new file mode 100644
> index 0000000..29c7802
> --- /dev/null
> +++ b/net/core/lwtunnel.c
> @@ -0,0 +1,162 @@
> +/*
> + * lwtunnel	Infrastructure for light weight tunnels like mpls
> + *
> + *
> + *		This program is free software; you can redistribute it and/or
> + *		modify it under the terms of the GNU General Public License
> + *		as published by the Free Software Foundation; either version
> + *		2 of the License, or (at your option) any later version.
> + *
> + */
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/capability.h>
> +#include <linux/module.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +#include <linux/skbuff.h>
> +#include <linux/netdevice.h>
> +#include <linux/in.h>
> +#include <linux/init.h>
> +#include <linux/err.h>
> +
> +#include <net/lwtunnel.h>
> +#include <net/rtnetlink.h>
> +
> +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
> +{
> +	struct lwtunnel_state *lws;
> +
> +	return kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL);

This seems to be called with rcu_read_lock so GFP_ATOMIC  would have to
be used. (Judging by patch 3/3’s mpls_build_state and lwtunnel_build_state)

> +}
> +EXPORT_SYMBOL(lwtunnel_state_alloc);
> +
> +const struct lwtunnel_encap_ops __rcu *
> +		lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS] __read_mostly;
> +
> +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
> +			   unsigned int num)
> +{
> +	if (num >= MAX_LWTUNNEL_ENCAP_OPS)
> +		return -ERANGE;
> +
> +	return !cmpxchg((const struct lwtunnel_encap_ops **)
> +			&lwtun_encaps[num],
> +			NULL, ops) ? 0 : -1;
> +}
> +EXPORT_SYMBOL(lwtunnel_encap_add_ops);
> +
> +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
> +			   unsigned int num)
> +{
> +	int ret;
> +
> +	if (num >= MAX_LWTUNNEL_ENCAP_OPS)
> +		return -ERANGE;
> +
> +	ret = (cmpxchg((const struct lwtunnel_encap_ops **)
> +		       &lwtun_encaps[num],
> +		       ops, NULL) == ops) ? 0 : -1;
> +
> +	synchronize_net();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(lwtunnel_encap_del_ops);
> +
> +int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
> +			 struct nlattr *encap, struct lwtunnel_state **lws)
> +{
> +	const struct lwtunnel_encap_ops *ops;
> +	int ret = -EINVAL;
> +
> +	if (encap_type == LWTUNNEL_ENCAP_NONE ||
> +	    encap_type >= MAX_LWTUNNEL_ENCAP_OPS)
> +		return ret;
> +
> +	ret = -EOPNOTSUPP;
> +	rcu_read_lock();
> +	ops = rcu_dereference(lwtun_encaps[encap_type]);
> +	if (likely(ops && ops->build_state))
> +		ret = ops->build_state(dev, encap, lws);
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(lwtunnel_build_state);
> +
> +int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
> +{
> +	const struct lwtunnel_encap_ops *ops;
> +	struct nlattr *nest;
> +	int ret = -EINVAL;
> +
> +	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
> +	    lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
> +		return 0;
> +
> +	ret = -EOPNOTSUPP;
> +	nest = nla_nest_start(skb, RTA_ENCAP);
> +	rcu_read_lock();
> +	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
> +	if (likely(ops && ops->fill_encap))
> +		ret = ops->fill_encap(skb, lwtstate);
> +	rcu_read_unlock();
> +
> +	if (ret)
> +		goto errout;
> +
> +	nla_nest_end(skb, nest);
> +
> +	return 0;
> +
> +errout:
> +	nla_nest_cancel(skb, nest);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(lwtunnel_fill_encap);
> +
> +int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
> +{
> +	const struct lwtunnel_encap_ops *ops;
> +	int ret = 0;
> +
> +	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
> +	    lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
> +		return 0;
> +
> +	rcu_read_lock();
> +	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
> +	if (likely(ops && ops->get_encap_size))
> +		ret = nla_total_size(ops->get_encap_size(lwtstate));
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(lwtunnel_get_encap_size);
> +
> +int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
> +{
> +	const struct lwtunnel_encap_ops *ops;
> +	struct lwtunnel_state *lwtstate = lwtunnel_skb_lwstate(skb);
> +	int ret = 0;
> +
> +	if (!lwtstate)
> +		return -EINVAL;
> +
> +	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
> +	    lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
> +		return 0;
> +
> +	rcu_read_lock();
> +	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
> +	if (likely(ops && ops->output))
> +		ret = ops->output(sk, skb);
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(lwtunnel_output);
> -- 
> 1.7.10.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-19  4:49 [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels Roopa Prabhu
  2015-06-19 14:43 ` Robert Shearman
  2015-06-20 16:38 ` Nikolay Aleksandrov
@ 2015-06-21 20:32 ` Thomas Graf
  2015-06-22  2:47   ` roopa
  2015-07-03  9:49 ` Thomas Graf
  3 siblings, 1 reply; 14+ messages in thread
From: Thomas Graf @ 2015-06-21 20:32 UTC (permalink / raw)
  To: Roopa Prabhu; +Cc: ebiederm, rshearma, davem, netdev

On 06/18/15 at 09:49pm, Roopa Prabhu wrote:
> +#include <net/ip.h>
> +#include <net/rtnetlink.h>
> +
> +#define LWTUNNEL_HASH_BITS   7
> +#define LWTUNNEL_HASH_SIZE   (1 << LWTUNNEL_HASH_BITS)
> +
> +struct lwtunnel_hdr {
> +	int             len;
> +	__u8            data[0];
> +};

The name header is a bit misleading here. Certain encaps won't
preallocate the header. How we just add a len to lwt_state and
allow the user have private data? Not sure we need to split this
into a separate struct anyway.

> +/* lw tunnel state flags */
> +#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1
> +
> +#define lwtunnel_output_redirect(lwtstate) (lwtstate && \
> +			(lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT))

Converting this to a static inline function would add type checks
by the compiler and it shouldn't result in any different code.

> +#define MAX_LWTUNNEL_ENCAP_OPS 8
> +extern const struct lwtunnel_encap_ops __rcu *
> +		lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS];

I guess we require everybody to add themselves to the enum so
we might as well just derive the MAX from the enum MAX. Unless you
want out of tree modules to register themselves.

> +
> +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
> +{
> +	struct lwtunnel_state *lws;
> +
> +	return kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL);

Should this set refcnt to 1?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-20 14:27           ` roopa
@ 2015-06-21 20:40             ` Thomas Graf
  2015-06-22  2:48               ` roopa
  0 siblings, 1 reply; 14+ messages in thread
From: Thomas Graf @ 2015-06-21 20:40 UTC (permalink / raw)
  To: roopa; +Cc: Robert Shearman, ebiederm, davem, netdev

On 06/20/15 at 07:27am, roopa wrote:
> On 6/19/15, 11:39 AM, Robert Shearman wrote:
> >On 19/06/15 19:34, roopa wrote:
> >>On 6/19/15, 10:25 AM, Robert Shearman wrote:
> >>>n 19/06/15 16:14, roopa wrote:
> >>>
> >>In the netdevice case, this output function is not called atall. It
> >>should just follow the existing netdevice the route is pointing to.
> >
> >Sorry for not being clear, but I meant that there would have to be
> >lwtunnel_skb_lwstate functions for ipv4 and ipv6 to match the output
> >functions. So in the vxlan use case where it's using a netdevice, how
> >would it determine which one to call?
> 
> thanks for that clarification, and good point. I see some areas of the
> kernel checking for skb->protocol to do the conversion (something like
> below). I am guessing that is acceptable.
> if (skb->protocol == htons(ETH_P_IPV6))
>                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);

I'm not yet convinced that it makes sense to offer the no-netdevice
shortcut for VXLAN. I'm not convinced we need yet another VXLAN data
path. In fact, I'm trying to get rid of the OVS one for this specific
reason.

I have no objection though if somebody comes up with an architecture
that can't just pass the required metadata between the namespaces and
do the actual encapsulation in a single net_device in the root/host
namespace.

Either way, I thin it's fair to defer to this to a later point. We
don't need to solve this for the first iteration of MPLS and VXLAN
implementation.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-20 16:38 ` Nikolay Aleksandrov
@ 2015-06-22  2:05   ` roopa
  0 siblings, 0 replies; 14+ messages in thread
From: roopa @ 2015-06-22  2:05 UTC (permalink / raw)
  To: Nikolay Aleksandrov; +Cc: ebiederm, rshearma, tgraf, davem, netdev

On 6/20/15, 9:38 AM, Nikolay Aleksandrov wrote:
> <<<snip>>>
>> diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
>> new file mode 100644
>> index 0000000..29c7802
>> --- /dev/null
>> +++ b/net/core/lwtunnel.c
>> @@ -0,0 +1,162 @@
>> +/*
>> + * lwtunnel	Infrastructure for light weight tunnels like mpls
>> + *
>> + *
>> + *		This program is free software; you can redistribute it and/or
>> + *		modify it under the terms of the GNU General Public License
>> + *		as published by the Free Software Foundation; either version
>> + *		2 of the License, or (at your option) any later version.
>> + *
>> + */
>> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>> +
>> +#include <linux/capability.h>
>> +#include <linux/module.h>
>> +#include <linux/types.h>
>> +#include <linux/kernel.h>
>> +#include <linux/slab.h>
>> +#include <linux/uaccess.h>
>> +#include <linux/skbuff.h>
>> +#include <linux/netdevice.h>
>> +#include <linux/in.h>
>> +#include <linux/init.h>
>> +#include <linux/err.h>
>> +
>> +#include <net/lwtunnel.h>
>> +#include <net/rtnetlink.h>
>> +
>> +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
>> +{
>> +	struct lwtunnel_state *lws;
>> +
>> +	return kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL);
> This seems to be called with rcu_read_lock so GFP_ATOMIC  would have to
> be used. (Judging by patch 3/3’s mpls_build_state and lwtunnel_build_state)
>
yep, correct. thanks

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-21 20:32 ` Thomas Graf
@ 2015-06-22  2:47   ` roopa
  0 siblings, 0 replies; 14+ messages in thread
From: roopa @ 2015-06-22  2:47 UTC (permalink / raw)
  To: Thomas Graf; +Cc: ebiederm, rshearma, davem, netdev

On 6/21/15, 1:32 PM, Thomas Graf wrote:
> On 06/18/15 at 09:49pm, Roopa Prabhu wrote:
>> +#include <net/ip.h>
>> +#include <net/rtnetlink.h>
>> +
>> +#define LWTUNNEL_HASH_BITS   7
>> +#define LWTUNNEL_HASH_SIZE   (1 << LWTUNNEL_HASH_BITS)
>> +
>> +struct lwtunnel_hdr {
>> +	int             len;
>> +	__u8            data[0];
>> +};
> The name header is a bit misleading here. Certain encaps won't
> preallocate the header. How we just add a len to lwt_state and
> allow the user have private data? Not sure we need to split this
> into a separate struct anyway.
sure, I have been debating about that as well.
>
>> +/* lw tunnel state flags */
>> +#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1
>> +
>> +#define lwtunnel_output_redirect(lwtstate) (lwtstate && \
>> +			(lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT))
> Converting this to a static inline function would add type checks
> by the compiler and it shouldn't result in any different code.
will do,
>
>> +#define MAX_LWTUNNEL_ENCAP_OPS 8
>> +extern const struct lwtunnel_encap_ops __rcu *
>> +		lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS];
> I guess we require everybody to add themselves to the enum so
> we might as well just derive the MAX from the enum MAX. Unless you
> want out of tree modules to register themselves.

I am ok with deriving the MAX from enum Max.
>
>> +
>> +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
>> +{
>> +	struct lwtunnel_state *lws;
>> +
>> +	return kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL);
> Should this set refcnt to 1?
My alloc does not bump the refcnt but its done right before it is 
assigned to a nexthop.
I was planning on checking the convention followed for this. will check 
and change if needed.


Thanks!

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-21 20:40             ` Thomas Graf
@ 2015-06-22  2:48               ` roopa
  0 siblings, 0 replies; 14+ messages in thread
From: roopa @ 2015-06-22  2:48 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Robert Shearman, ebiederm, davem, netdev

On 6/21/15, 1:40 PM, Thomas Graf wrote:
> On 06/20/15 at 07:27am, roopa wrote:
>> On 6/19/15, 11:39 AM, Robert Shearman wrote:
>>>
>>> Sorry for not being clear, but I meant that there would have to be
>>> lwtunnel_skb_lwstate functions for ipv4 and ipv6 to match the output
>>> functions. So in the vxlan use case where it's using a netdevice, how
>>> would it determine which one to call?
>> thanks for that clarification, and good point. I see some areas of the
>> kernel checking for skb->protocol to do the conversion (something like
>> below). I am guessing that is acceptable.
>> if (skb->protocol == htons(ETH_P_IPV6))
>>                  struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
> I'm not yet convinced that it makes sense to offer the no-netdevice
> shortcut for VXLAN. I'm not convinced we need yet another VXLAN data
> path. In fact, I'm trying to get rid of the OVS one for this specific
> reason.
>
> I have no objection though if somebody comes up with an architecture
> that can't just pass the required metadata between the namespaces and
> do the actual encapsulation in a single net_device in the root/host
> namespace.
>
> Either way, I thin it's fair to defer to this to a later point. We
> don't need to solve this for the first iteration of MPLS and VXLAN
> implementation.
ack, thanks for your thoughts on this.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
  2015-06-19  4:49 [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels Roopa Prabhu
                   ` (2 preceding siblings ...)
  2015-06-21 20:32 ` Thomas Graf
@ 2015-07-03  9:49 ` Thomas Graf
  3 siblings, 0 replies; 14+ messages in thread
From: Thomas Graf @ 2015-07-03  9:49 UTC (permalink / raw)
  To: Roopa Prabhu; +Cc: ebiederm, rshearma, davem, netdev

On 06/18/15 at 09:49pm, Roopa Prabhu wrote:
> +static inline struct lwtunnel_state *lwtunnel_skb_lwstate(struct sk_buff *skb)
> +{
> +	struct rtable *rt = (struct rtable *)skb_dst(skb);
> +
> +	return rt->rt_lwtstate;
> +}

Noticed while rebasing onto your patches. This needs an
ifdef CONFIG_LWTUNNEL.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-07-03 10:11 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-19  4:49 [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels Roopa Prabhu
2015-06-19 14:43 ` Robert Shearman
2015-06-19 15:14   ` roopa
2015-06-19 17:25     ` Robert Shearman
2015-06-19 18:34       ` roopa
2015-06-19 18:39         ` Robert Shearman
2015-06-20 14:27           ` roopa
2015-06-21 20:40             ` Thomas Graf
2015-06-22  2:48               ` roopa
2015-06-20 16:38 ` Nikolay Aleksandrov
2015-06-22  2:05   ` roopa
2015-06-21 20:32 ` Thomas Graf
2015-06-22  2:47   ` roopa
2015-07-03  9:49 ` Thomas Graf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.