[PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels

* [PATCH net-next RFC v2 1/3] lwt: infrastructure to support light weight tunnels
@ 2015-06-19  4:49 Roopa Prabhu
  2015-06-19 14:43 ` Robert Shearman
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Roopa Prabhu @ 2015-06-19  4:49 UTC (permalink / raw)
  To: ebiederm, rshearma, tgraf; +Cc: davem, netdev

From: Roopa Prabhu <roopa@cumulusnetworks.com>

provides ops to parse, build and output encaped
packets for drivers that want to attach tunnel encap
information to routes.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 include/linux/lwtunnel.h      |    6 ++
 include/net/lwtunnel.h        |   84 +++++++++++++++++++++
 include/uapi/linux/lwtunnel.h |   11 +++
 net/Kconfig                   |    5 ++
 net/core/Makefile             |    1 +
 net/core/lwtunnel.c           |  162 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 269 insertions(+)
 create mode 100644 include/linux/lwtunnel.h
 create mode 100644 include/net/lwtunnel.h
 create mode 100644 include/uapi/linux/lwtunnel.h
 create mode 100644 net/core/lwtunnel.c

diff --git a/include/linux/lwtunnel.h b/include/linux/lwtunnel.h
new file mode 100644
index 0000000..97f32f8
--- /dev/null
+++ b/include/linux/lwtunnel.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_LWTUNNEL_H_
+#define _LINUX_LWTUNNEL_H_
+
+#include <uapi/linux/lwtunnel.h>
+
+#endif /* _LINUX_LWTUNNEL_H_ */
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
new file mode 100644
index 0000000..649da3c
--- /dev/null
+++ b/include/net/lwtunnel.h
@@ -0,0 +1,84 @@
+#ifndef __NET_LWTUNNEL_H
+#define __NET_LWTUNNEL_H 1
+
+#include <linux/lwtunnel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/dsfield.h>
+#include <net/ip.h>
+#include <net/rtnetlink.h>
+
+#define LWTUNNEL_HASH_BITS   7
+#define LWTUNNEL_HASH_SIZE   (1 << LWTUNNEL_HASH_BITS)
+
+struct lwtunnel_hdr {
+	int             len;
+	__u8            data[0];
+};
+
+/* lw tunnel state flags */
+#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1
+
+#define lwtunnel_output_redirect(lwtstate) (lwtstate && \
+			(lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT))
+
+struct lwtunnel_state {
+	__u16		type;
+	__u16		flags;
+	atomic_t	refcnt;
+	struct lwtunnel_hdr tunnel;
+};
+
+struct lwtunnel_net {
+	struct hlist_head tunnels[LWTUNNEL_HASH_SIZE];
+};
+
+struct lwtunnel_encap_ops {
+	int (*build_state)(struct net_device *dev, struct nlattr *encap,
+			   struct lwtunnel_state **ts);
+	int (*output)(struct sock *sk, struct sk_buff *skb);
+	int (*fill_encap)(struct sk_buff *skb,
+			  struct lwtunnel_state *lwtstate);
+	int (*get_encap_size)(struct lwtunnel_state *lwtstate);
+};
+
+#define MAX_LWTUNNEL_ENCAP_OPS 8
+extern const struct lwtunnel_encap_ops __rcu *
+		lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS];
+
+static inline void lwtunnel_state_get(struct lwtunnel_state *lws)
+{
+	atomic_inc(&lws->refcnt);
+}
+
+static inline void lwtunnel_state_put(struct lwtunnel_state *lws)
+{
+	if (!lws)
+		return;
+
+	if (atomic_dec_and_test(&lws->refcnt))
+		kfree(lws);
+}
+
+static inline struct lwtunnel_state *lwtunnel_skb_lwstate(struct sk_buff *skb)
+{
+	struct rtable *rt = (struct rtable *)skb_dst(skb);
+
+	return rt->rt_lwtstate;
+}
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
+			   unsigned int num);
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
+			   unsigned int num);
+int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+			 struct nlattr *encap,
+			 struct lwtunnel_state **lws);
+int lwtunnel_fill_encap(struct sk_buff *skb,
+			struct lwtunnel_state *lwtstate);
+int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
+struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
+int lwtunnel_output(struct sock *sk, struct sk_buff *skb);
+
+#endif /* __NET_LWTUNNEL_H */
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
new file mode 100644
index 0000000..11150c0
--- /dev/null
+++ b/include/uapi/linux/lwtunnel.h
@@ -0,0 +1,11 @@
+#ifndef _UAPI_LWTUNNEL_H_
+#define _UAPI_LWTUNNEL_H_
+
+#include <linux/types.h>
+
+enum tunnel_encap_types {
+	LWTUNNEL_ENCAP_NONE,
+	LWTUNNEL_ENCAP_MPLS,
+};
+
+#endif /* _UAPI_LWTUNNEL_H_ */
diff --git a/net/Kconfig b/net/Kconfig
index 57a7c5a..e296d6f 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -374,9 +374,14 @@ source "net/caif/Kconfig"
 source "net/ceph/Kconfig"
 source "net/nfc/Kconfig"
 
+config LWTUNNEL
+	bool "Network light weight tunnels"
+	---help---
+	  light weight tunnels
 
 endif   # if NET
 
 # Used by archs to tell that they support BPF_JIT
 config HAVE_BPF_JIT
 	bool
+
diff --git a/net/core/Makefile b/net/core/Makefile
index fec0856..086b01f 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
 obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
 obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
+obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
new file mode 100644
index 0000000..29c7802
--- /dev/null
+++ b/net/core/lwtunnel.c
@@ -0,0 +1,162 @@
+/*
+ * lwtunnel	Infrastructure for light weight tunnels like mpls
+ *
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/err.h>
+
+#include <net/lwtunnel.h>
+#include <net/rtnetlink.h>
+
+struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
+{
+	struct lwtunnel_state *lws;
+
+	return kzalloc(sizeof(*lws) + hdr_len, GFP_KERNEL);
+}
+EXPORT_SYMBOL(lwtunnel_state_alloc);
+
+const struct lwtunnel_encap_ops __rcu *
+		lwtun_encaps[MAX_LWTUNNEL_ENCAP_OPS] __read_mostly;
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
+			   unsigned int num)
+{
+	if (num >= MAX_LWTUNNEL_ENCAP_OPS)
+		return -ERANGE;
+
+	return !cmpxchg((const struct lwtunnel_encap_ops **)
+			&lwtun_encaps[num],
+			NULL, ops) ? 0 : -1;
+}
+EXPORT_SYMBOL(lwtunnel_encap_add_ops);
+
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
+			   unsigned int num)
+{
+	int ret;
+
+	if (num >= MAX_LWTUNNEL_ENCAP_OPS)
+		return -ERANGE;
+
+	ret = (cmpxchg((const struct lwtunnel_encap_ops **)
+		       &lwtun_encaps[num],
+		       ops, NULL) == ops) ? 0 : -1;
+
+	synchronize_net();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_encap_del_ops);
+
+int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+			 struct nlattr *encap, struct lwtunnel_state **lws)
+{
+	const struct lwtunnel_encap_ops *ops;
+	int ret = -EINVAL;
+
+	if (encap_type == LWTUNNEL_ENCAP_NONE ||
+	    encap_type >= MAX_LWTUNNEL_ENCAP_OPS)
+		return ret;
+
+	ret = -EOPNOTSUPP;
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[encap_type]);
+	if (likely(ops && ops->build_state))
+		ret = ops->build_state(dev, encap, lws);
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_build_state);
+
+int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
+{
+	const struct lwtunnel_encap_ops *ops;
+	struct nlattr *nest;
+	int ret = -EINVAL;
+
+	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+	    lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
+		return 0;
+
+	ret = -EOPNOTSUPP;
+	nest = nla_nest_start(skb, RTA_ENCAP);
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+	if (likely(ops && ops->fill_encap))
+		ret = ops->fill_encap(skb, lwtstate);
+	rcu_read_unlock();
+
+	if (ret)
+		goto errout;
+
+	nla_nest_end(skb, nest);
+
+	return 0;
+
+errout:
+	nla_nest_cancel(skb, nest);
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_fill_encap);
+
+int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
+{
+	const struct lwtunnel_encap_ops *ops;
+	int ret = 0;
+
+	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+	    lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
+		return 0;
+
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+	if (likely(ops && ops->get_encap_size))
+		ret = nla_total_size(ops->get_encap_size(lwtstate));
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_get_encap_size);
+
+int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
+{
+	const struct lwtunnel_encap_ops *ops;
+	struct lwtunnel_state *lwtstate = lwtunnel_skb_lwstate(skb);
+	int ret = 0;
+
+	if (!lwtstate)
+		return -EINVAL;
+
+	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+	    lwtstate->type >= MAX_LWTUNNEL_ENCAP_OPS)
+		return 0;
+
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+	if (likely(ops && ops->output))
+		ret = ops->output(sk, skb);
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_output);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread