linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2/2] Traffic control cgroups subsystem
@ 2008-08-22  0:57 Ranjit Manomohan
  0 siblings, 0 replies; 8+ messages in thread
From: Ranjit Manomohan @ 2008-08-22  0:57 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: lizf, menage, kaber, akpm

This patch implements a filter that classifies packets based upon
the cgroup_classid of the packet.

Signed-off-by: Ranjit Manomohan <ranjitm@google.com>

---

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 99efbed..53348a3 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -379,6 +379,21 @@ enum

 #define TCA_FLOW_MAX	(__TCA_FLOW_MAX - 1)

+/* Cgroups filter */
+
+enum {
+	TCA_CGROUP_UNSPEC,
+	TCA_CGROUP_CLASSID,
+	TCA_CGROUP_MASK,
+	TCA_CGROUP_VALUE,
+	TCA_CGROUP_ACT,
+	TCA_CGROUP_POLICE,
+	TCA_CGROUP_EMATCHES,
+	__TCA_CGROUP_MAX
+};
+
+#define TCA_CGROUP_MAX	(__TCA_CGROUP_MAX - 1)
+
 /* Basic filter */

 enum
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 82adfe6..844837f 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -318,6 +318,16 @@ config NET_CLS_FLOW
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_flow.

+config NET_CLS_CGROUP
+	tristate "Cgroups tc classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  cgroup membership of the task originating the packet.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_cgroup.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 1d2b0f7..91e9ee0 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
+obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 0000000..70a363f
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,330 @@
+/*
+ * net/sched/cls_cgroup.c	Simple packet classifier which can filter
+ * 				packets based on the cgroups they belong to.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+struct cgroup_head {
+	struct list_head	flist;		/* Head of filter list */
+};
+
+struct cgroup_filter {
+	u32			handle;		/* Unique filter handle */
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+	struct tcf_result	res;
+	struct list_head	link;
+	u32			mask;
+	u32			value;
+};
+
+static const struct tcf_ext_map cgroup_ext_map = {
+	.action	= TCA_CGROUP_ACT,
+	.police	= TCA_CGROUP_POLICE,
+};
+
+/* This function is called from the qdisc to classify a particular packet
+ * contained in the skb to the appropriate sub-classes. It returns the
+ * classid of the target class. This filter will match if the cgroup_classid
+ * in the skb matches the value in the filter.
+ */
+static int cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	struct cgroup_head *head = (struct cgroup_head *)tp->root;
+	struct cgroup_filter *f;
+	uint32_t cgroup_classid = 0;
+	int r;
+
+#ifdef CONFIG_CGROUP_TC
+	if (skb->sk)
+		cgroup_classid =  skb->sk->sk_cgroup_classid;
+#endif
+
+	list_for_each_entry(f, &head->flist, link) {
+
+		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
+			continue;
+
+		if ((cgroup_classid & f->mask) == f->value) {
+			*res = f->res;
+			r = tcf_exts_exec(skb, &f->exts, res);
+			if (r < 0)
+				continue;
+			return r;
+		}
+	}
+	return -1;
+}
+
+/* Returns pointer to filter matching the handle passed into the function.*/
+static unsigned long cgroup_get(struct tcf_proto *tp, u32 handle)
+{
+	unsigned long l = 0UL;
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *f;
+
+	if (head == NULL)
+		return 0UL;
+
+	list_for_each_entry(f, &head->flist, link)
+		if (f->handle == handle)
+			l = (unsigned long) f;
+
+	return l;
+}
+
+/* Does not seem to be used for classifiers. */
+static void cgroup_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+/* Initializer function called when tp is created. */
+static int cgroup_init(struct tcf_proto *tp)
+{
+	struct cgroup_head *head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (head == NULL)
+		return -ENOBUFS;
+
+	INIT_LIST_HEAD(&head->flist);
+	tp->root = head;
+	return 0;
+}
+
+/* Simple delete function called when filter is deleted */
+static inline void cgroup_delete_filter(struct tcf_proto *tp,
+				       struct cgroup_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	tcf_em_tree_destroy(tp, &f->ematches);
+	kfree(f);
+}
+
+/* Destroy the entire tp structure.*/
+static void cgroup_destroy(struct tcf_proto *tp)
+{
+	struct cgroup_head *head = (struct cgroup_head *) xchg(&tp->root, NULL);
+	struct cgroup_filter *f, *n;
+
+	list_for_each_entry_safe(f, n, &head->flist, link) {
+		list_del(&f->link);
+		cgroup_delete_filter(tp, f);
+	}
+	kfree(head);
+}
+
+/* Delete one filter entry */
+static int cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *t, *f = (struct cgroup_filter *) arg;
+
+	list_for_each_entry(t, &head->flist, link)
+		if (t == f) {
+			tcf_tree_lock(tp);
+			list_del(&t->link);
+			tcf_tree_unlock(tp);
+			cgroup_delete_filter(tp, t);
+			return 0;
+		}
+
+	return -ENOENT;
+}
+
+/* Set the mask and value parameters in the tp structure. */
+static inline int cgroup_set_parms(struct tcf_proto *tp,
+			unsigned long base,
+			struct cgroup_filter *f, struct nlattr **tb)
+{
+	int err = -EINVAL;
+
+	if (tb[TCA_CGROUP_MASK]) {
+		if (nla_len(tb[TCA_CGROUP_MASK]) < sizeof(u32))
+			return err;
+		f->mask =  nla_get_u32(tb[TCA_CGROUP_MASK]);
+	} else
+		f->mask = UINT_MAX;
+
+	if (tb[TCA_CGROUP_VALUE]) {
+		if (nla_len(tb[TCA_CGROUP_VALUE]) < sizeof(u32))
+			return err;
+		f->value = nla_get_u32(tb[TCA_CGROUP_VALUE]);
+	} else
+		return err;
+
+	if (tb[TCA_CGROUP_CLASSID]) {
+		if (nla_len(tb[TCA_CGROUP_CLASSID]) < sizeof(u32))
+			return err;
+		f->res.classid = nla_get_u32(tb[TCA_CGROUP_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	} else
+		return err;
+
+	return 0;
+}
+
+/* Change the mask and value parameters in the current settings. */
+static int cgroup_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+				struct nlattr **tca, unsigned long *arg)
+{
+	int err = -EINVAL;
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct nlattr *tb[TCA_CGROUP_MAX];
+	struct cgroup_filter *f = (struct cgroup_filter *) *arg;
+	struct tcf_exts e;
+	struct tcf_ematch_tree t;
+
+	if (tca[TCA_OPTIONS] == NULL)
+		return -EINVAL;
+
+	if (nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], NULL) < 0)
+		return -EINVAL;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
+	if (err < 0)
+		goto error1;
+
+	if (f != NULL) {
+		if (handle && f->handle != handle)
+			goto error2;
+	} else {
+		if (!handle)
+			goto error2;
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (f == NULL)
+			goto error2;
+		f->handle = handle;
+	}
+
+	err = cgroup_set_parms(tp, base, f, tb);
+	if (err < 0)
+		goto error3;
+
+	tcf_exts_change(tp, &f->exts, &e);
+	tcf_em_tree_change(tp, &f->ematches, &t);
+
+	if (*arg == 0) {
+		tcf_tree_lock(tp);
+		list_add(&f->link, &head->flist);
+		tcf_tree_unlock(tp);
+	}
+
+	*arg = (unsigned long)f;
+	return 0;
+
+error3:
+	if (*arg == 0)
+		kfree(f);
+error2:
+	tcf_em_tree_destroy(tp, &t);
+error1:
+	tcf_exts_destroy(tp, &e);
+
+	return err;
+}
+
+/* Walk the filter list for things like displaying contents.*/
+static void cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (arg->count < arg->skip)
+			goto skip;
+
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+/* Retreive current settings in the filter */
+static int cgroup_dump(struct tcf_proto *tp, unsigned long fh,
+		      struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cgroup_filter *f = (struct cgroup_filter *) fh;
+	struct nlattr *nest;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(skb, TCA_CGROUP_CLASSID, f->res.classid);
+	NLA_PUT_U32(skb, TCA_CGROUP_MASK, f->mask);
+	NLA_PUT_U32(skb, TCA_CGROUP_VALUE, f->value);
+
+	if (tcf_exts_dump(skb, &f->exts, &cgroup_ext_map) < 0)
+		goto nla_put_failure;
+
+#ifdef CONFIG_NET_EMATCH
+	if (f->ematches.hdr.nmatches &&
+	  tcf_em_tree_dump(skb, &f->ematches, TCA_CGROUP_EMATCHES) < 0)
+		goto nla_put_failure;
+#endif
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &cgroup_ext_map) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_cgroup_ops = {
+	.kind		=	"cgroup",
+	.classify	=	cgroup_classify,
+	.init		=	cgroup_init,
+	.destroy	=	cgroup_destroy,
+	.get		=	cgroup_get,
+	.put		=	cgroup_put,
+	.change		=	cgroup_change,
+	.delete		=	cgroup_delete,
+	.walk		=	cgroup_walk,
+	.dump		=	cgroup_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_cgroup(void)
+{
+	return register_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+static void __exit exit_cgroup(void)
+{
+	unregister_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+module_init(init_cgroup)
+module_exit(exit_cgroup)
+MODULE_LICENSE("GPL");
+

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] Traffic control cgroups subsystem
@ 2008-09-10 17:43 Ranjit Manomohan
  0 siblings, 0 replies; 8+ messages in thread
From: Ranjit Manomohan @ 2008-09-10 17:43 UTC (permalink / raw)
  To: davem, akpm, tgraf, kaber, menage, lizf; +Cc: netdev, linux-kernel

This patch implements a filter that classifies packets based upon
the cgroup_classid of the packet.

Signed-off-by: Ranjit Manomohan <ranjitm@google.com>

---

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 99efbed..53348a3 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -379,6 +379,21 @@ enum

 #define TCA_FLOW_MAX	(__TCA_FLOW_MAX - 1)

+/* Cgroups filter */
+
+enum {
+	TCA_CGROUP_UNSPEC,
+	TCA_CGROUP_CLASSID,
+	TCA_CGROUP_MASK,
+	TCA_CGROUP_VALUE,
+	TCA_CGROUP_ACT,
+	TCA_CGROUP_POLICE,
+	TCA_CGROUP_EMATCHES,
+	__TCA_CGROUP_MAX
+};
+
+#define TCA_CGROUP_MAX	(__TCA_CGROUP_MAX - 1)
+
 /* Basic filter */

 enum
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 82adfe6..844837f 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -318,6 +318,16 @@ config NET_CLS_FLOW
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_flow.

+config NET_CLS_CGROUP
+	tristate "Cgroups tc classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  cgroup membership of the task originating the packet.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_cgroup.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 1d2b0f7..91e9ee0 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
+obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 0000000..70a363f
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,330 @@
+/*
+ * net/sched/cls_cgroup.c	Simple packet classifier which can filter
+ * 				packets based on the cgroups they belong to.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+struct cgroup_head {
+	struct list_head	flist;		/* Head of filter list */
+};
+
+struct cgroup_filter {
+	u32			handle;		/* Unique filter handle */
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+	struct tcf_result	res;
+	struct list_head	link;
+	u32			mask;
+	u32			value;
+};
+
+static const struct tcf_ext_map cgroup_ext_map = {
+	.action	= TCA_CGROUP_ACT,
+	.police	= TCA_CGROUP_POLICE,
+};
+
+/* This function is called from the qdisc to classify a particular packet
+ * contained in the skb to the appropriate sub-classes. It returns the
+ * classid of the target class. This filter will match if the cgroup_classid
+ * in the skb matches the value in the filter.
+ */
+static int cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	struct cgroup_head *head = (struct cgroup_head *)tp->root;
+	struct cgroup_filter *f;
+	uint32_t cgroup_classid = 0;
+	int r;
+
+#ifdef CONFIG_CGROUP_TC
+	if (skb->sk)
+		cgroup_classid =  skb->sk->sk_cgroup_classid;
+#endif
+
+	list_for_each_entry(f, &head->flist, link) {
+
+		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
+			continue;
+
+		if ((cgroup_classid & f->mask) == f->value) {
+			*res = f->res;
+			r = tcf_exts_exec(skb, &f->exts, res);
+			if (r < 0)
+				continue;
+			return r;
+		}
+	}
+	return -1;
+}
+
+/* Returns pointer to filter matching the handle passed into the function.*/
+static unsigned long cgroup_get(struct tcf_proto *tp, u32 handle)
+{
+	unsigned long l = 0UL;
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *f;
+
+	if (head == NULL)
+		return 0UL;
+
+	list_for_each_entry(f, &head->flist, link)
+		if (f->handle == handle)
+			l = (unsigned long) f;
+
+	return l;
+}
+
+/* Does not seem to be used for classifiers. */
+static void cgroup_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+/* Initializer function called when tp is created. */
+static int cgroup_init(struct tcf_proto *tp)
+{
+	struct cgroup_head *head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (head == NULL)
+		return -ENOBUFS;
+
+	INIT_LIST_HEAD(&head->flist);
+	tp->root = head;
+	return 0;
+}
+
+/* Simple delete function called when filter is deleted */
+static inline void cgroup_delete_filter(struct tcf_proto *tp,
+				       struct cgroup_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	tcf_em_tree_destroy(tp, &f->ematches);
+	kfree(f);
+}
+
+/* Destroy the entire tp structure.*/
+static void cgroup_destroy(struct tcf_proto *tp)
+{
+	struct cgroup_head *head = (struct cgroup_head *) xchg(&tp->root, NULL);
+	struct cgroup_filter *f, *n;
+
+	list_for_each_entry_safe(f, n, &head->flist, link) {
+		list_del(&f->link);
+		cgroup_delete_filter(tp, f);
+	}
+	kfree(head);
+}
+
+/* Delete one filter entry */
+static int cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *t, *f = (struct cgroup_filter *) arg;
+
+	list_for_each_entry(t, &head->flist, link)
+		if (t == f) {
+			tcf_tree_lock(tp);
+			list_del(&t->link);
+			tcf_tree_unlock(tp);
+			cgroup_delete_filter(tp, t);
+			return 0;
+		}
+
+	return -ENOENT;
+}
+
+/* Set the mask and value parameters in the tp structure. */
+static inline int cgroup_set_parms(struct tcf_proto *tp,
+			unsigned long base,
+			struct cgroup_filter *f, struct nlattr **tb)
+{
+	int err = -EINVAL;
+
+	if (tb[TCA_CGROUP_MASK]) {
+		if (nla_len(tb[TCA_CGROUP_MASK]) < sizeof(u32))
+			return err;
+		f->mask =  nla_get_u32(tb[TCA_CGROUP_MASK]);
+	} else
+		f->mask = UINT_MAX;
+
+	if (tb[TCA_CGROUP_VALUE]) {
+		if (nla_len(tb[TCA_CGROUP_VALUE]) < sizeof(u32))
+			return err;
+		f->value = nla_get_u32(tb[TCA_CGROUP_VALUE]);
+	} else
+		return err;
+
+	if (tb[TCA_CGROUP_CLASSID]) {
+		if (nla_len(tb[TCA_CGROUP_CLASSID]) < sizeof(u32))
+			return err;
+		f->res.classid = nla_get_u32(tb[TCA_CGROUP_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	} else
+		return err;
+
+	return 0;
+}
+
+/* Change the mask and value parameters in the current settings. */
+static int cgroup_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+				struct nlattr **tca, unsigned long *arg)
+{
+	int err = -EINVAL;
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct nlattr *tb[TCA_CGROUP_MAX];
+	struct cgroup_filter *f = (struct cgroup_filter *) *arg;
+	struct tcf_exts e;
+	struct tcf_ematch_tree t;
+
+	if (tca[TCA_OPTIONS] == NULL)
+		return -EINVAL;
+
+	if (nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], NULL) < 0)
+		return -EINVAL;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
+	if (err < 0)
+		goto error1;
+
+	if (f != NULL) {
+		if (handle && f->handle != handle)
+			goto error2;
+	} else {
+		if (!handle)
+			goto error2;
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (f == NULL)
+			goto error2;
+		f->handle = handle;
+	}
+
+	err = cgroup_set_parms(tp, base, f, tb);
+	if (err < 0)
+		goto error3;
+
+	tcf_exts_change(tp, &f->exts, &e);
+	tcf_em_tree_change(tp, &f->ematches, &t);
+
+	if (*arg == 0) {
+		tcf_tree_lock(tp);
+		list_add(&f->link, &head->flist);
+		tcf_tree_unlock(tp);
+	}
+
+	*arg = (unsigned long)f;
+	return 0;
+
+error3:
+	if (*arg == 0)
+		kfree(f);
+error2:
+	tcf_em_tree_destroy(tp, &t);
+error1:
+	tcf_exts_destroy(tp, &e);
+
+	return err;
+}
+
+/* Walk the filter list for things like displaying contents.*/
+static void cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (arg->count < arg->skip)
+			goto skip;
+
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+/* Retreive current settings in the filter */
+static int cgroup_dump(struct tcf_proto *tp, unsigned long fh,
+		      struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cgroup_filter *f = (struct cgroup_filter *) fh;
+	struct nlattr *nest;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(skb, TCA_CGROUP_CLASSID, f->res.classid);
+	NLA_PUT_U32(skb, TCA_CGROUP_MASK, f->mask);
+	NLA_PUT_U32(skb, TCA_CGROUP_VALUE, f->value);
+
+	if (tcf_exts_dump(skb, &f->exts, &cgroup_ext_map) < 0)
+		goto nla_put_failure;
+
+#ifdef CONFIG_NET_EMATCH
+	if (f->ematches.hdr.nmatches &&
+	  tcf_em_tree_dump(skb, &f->ematches, TCA_CGROUP_EMATCHES) < 0)
+		goto nla_put_failure;
+#endif
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &cgroup_ext_map) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_cgroup_ops = {
+	.kind		=	"cgroup",
+	.classify	=	cgroup_classify,
+	.init		=	cgroup_init,
+	.destroy	=	cgroup_destroy,
+	.get		=	cgroup_get,
+	.put		=	cgroup_put,
+	.change		=	cgroup_change,
+	.delete		=	cgroup_delete,
+	.walk		=	cgroup_walk,
+	.dump		=	cgroup_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_cgroup(void)
+{
+	return register_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+static void __exit exit_cgroup(void)
+{
+	unregister_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+module_init(init_cgroup)
+module_exit(exit_cgroup)
+MODULE_LICENSE("GPL");
+

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2[ Traffic control cgroups subsystem
  2008-07-25  3:02 ` Daniel Walker
@ 2008-07-25  3:29   ` Li Zefan
  0 siblings, 0 replies; 8+ messages in thread
From: Li Zefan @ 2008-07-25  3:29 UTC (permalink / raw)
  To: Daniel Walker; +Cc: Ranjit Manomohan, linux-kernel, netdev, menage, kaber, akpm

Daniel Walker wrote:
> On Thu, 2008-07-24 at 16:37 -0700, Ranjit Manomohan wrote:
> 
> 
>> +/* Delete one filter entry */
>> +static int cgroup_delete(struct tcf_proto *tp, unsigned long arg)
>> +{
>> +	struct cgroup_head *head = (struct cgroup_head *) tp->root;
>> +	struct cgroup_filter *t, *f = (struct cgroup_filter *) arg;
>> +
>> +	list_for_each_entry(t, &head->flist, link)
>> +		if (t == f) {
>> +			tcf_tree_lock(tp);
>> +			list_del(&t->link);
> 
> Doesn't the above need to be a "_safe" list operation since your
> deleting the element?
> 

No. You don't need "_safe" if you break out the loop immediately
after list_del().


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2[ Traffic control cgroups subsystem
  2008-07-24 23:37 [PATCH 2/2[ " Ranjit Manomohan
@ 2008-07-25  3:02 ` Daniel Walker
  2008-07-25  3:29   ` Li Zefan
  0 siblings, 1 reply; 8+ messages in thread
From: Daniel Walker @ 2008-07-25  3:02 UTC (permalink / raw)
  To: Ranjit Manomohan; +Cc: linux-kernel, netdev, lizf, menage, kaber, akpm

On Thu, 2008-07-24 at 16:37 -0700, Ranjit Manomohan wrote:


> +/* Delete one filter entry */
> +static int cgroup_delete(struct tcf_proto *tp, unsigned long arg)
> +{
> +	struct cgroup_head *head = (struct cgroup_head *) tp->root;
> +	struct cgroup_filter *t, *f = (struct cgroup_filter *) arg;
> +
> +	list_for_each_entry(t, &head->flist, link)
> +		if (t == f) {
> +			tcf_tree_lock(tp);
> +			list_del(&t->link);

Doesn't the above need to be a "_safe" list operation since your
deleting the element?

Daniel


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2[ Traffic control cgroups subsystem
@ 2008-07-24 23:37 Ranjit Manomohan
  2008-07-25  3:02 ` Daniel Walker
  0 siblings, 1 reply; 8+ messages in thread
From: Ranjit Manomohan @ 2008-07-24 23:37 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: lizf, menage, kaber, akpm

[Take 4] incorporated additional comments from Patrick McHardy

This patch implements a filter that classifies packets based upon
the cgroup_classid of the packet.

Signed-off-by: Ranjit Manomohan <ranjitm@google.com>

---

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 99efbed..53348a3 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -379,6 +379,21 @@ enum

 #define TCA_FLOW_MAX	(__TCA_FLOW_MAX - 1)

+/* Cgroups filter */
+
+enum {
+	TCA_CGROUP_UNSPEC,
+	TCA_CGROUP_CLASSID,
+	TCA_CGROUP_MASK,
+	TCA_CGROUP_VALUE,
+	TCA_CGROUP_ACT,
+	TCA_CGROUP_POLICE,
+	TCA_CGROUP_EMATCHES,
+	__TCA_CGROUP_MAX
+};
+
+#define TCA_CGROUP_MAX	(__TCA_CGROUP_MAX - 1)
+
 /* Basic filter */

 enum
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 82adfe6..844837f 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -318,6 +318,16 @@ config NET_CLS_FLOW
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_flow.

+config NET_CLS_CGROUP
+	tristate "Cgroups tc classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  cgroup membership of the task originating the packet.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_cgroup.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 1d2b0f7..91e9ee0 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
+obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 0000000..70a363f
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,330 @@
+/*
+ * net/sched/cls_cgroup.c	Simple packet classifier which can filter
+ * 				packets based on the cgroups they belong to.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+struct cgroup_head {
+	struct list_head	flist;		/* Head of filter list */
+};
+
+struct cgroup_filter {
+	u32			handle;		/* Unique filter handle */
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+	struct tcf_result	res;
+	struct list_head	link;
+	u32			mask;
+	u32			value;
+};
+
+static const struct tcf_ext_map cgroup_ext_map = {
+	.action	= TCA_CGROUP_ACT,
+	.police	= TCA_CGROUP_POLICE,
+};
+
+/* This function is called from the qdisc to classify a particular packet
+ * contained in the skb to the appropriate sub-classes. It returns the
+ * classid of the target class. This filter will match if the cgroup_classid
+ * in the skb matches the value in the filter.
+ */
+static int cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	struct cgroup_head *head = (struct cgroup_head *)tp->root;
+	struct cgroup_filter *f;
+	uint32_t cgroup_classid = 0;
+	int r;
+
+#ifdef CONFIG_CGROUP_TC
+	if (skb->sk)
+		cgroup_classid =  skb->sk->sk_cgroup_classid;
+#endif
+
+	list_for_each_entry(f, &head->flist, link) {
+
+		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
+			continue;
+
+		if ((cgroup_classid & f->mask) == f->value) {
+			*res = f->res;
+			r = tcf_exts_exec(skb, &f->exts, res);
+			if (r < 0)
+				continue;
+			return r;
+		}
+	}
+	return -1;
+}
+
+/* Returns pointer to filter matching the handle passed into the function.*/
+static unsigned long cgroup_get(struct tcf_proto *tp, u32 handle)
+{
+	unsigned long l = 0UL;
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *f;
+
+	if (head == NULL)
+		return 0UL;
+
+	list_for_each_entry(f, &head->flist, link)
+		if (f->handle == handle)
+			l = (unsigned long) f;
+
+	return l;
+}
+
+/* Does not seem to be used for classifiers. */
+static void cgroup_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+/* Initializer function called when tp is created. */
+static int cgroup_init(struct tcf_proto *tp)
+{
+	struct cgroup_head *head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (head == NULL)
+		return -ENOBUFS;
+
+	INIT_LIST_HEAD(&head->flist);
+	tp->root = head;
+	return 0;
+}
+
+/* Simple delete function called when filter is deleted */
+static inline void cgroup_delete_filter(struct tcf_proto *tp,
+				       struct cgroup_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	tcf_em_tree_destroy(tp, &f->ematches);
+	kfree(f);
+}
+
+/* Destroy the entire tp structure.*/
+static void cgroup_destroy(struct tcf_proto *tp)
+{
+	struct cgroup_head *head = (struct cgroup_head *) xchg(&tp->root, NULL);
+	struct cgroup_filter *f, *n;
+
+	list_for_each_entry_safe(f, n, &head->flist, link) {
+		list_del(&f->link);
+		cgroup_delete_filter(tp, f);
+	}
+	kfree(head);
+}
+
+/* Delete one filter entry */
+static int cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *t, *f = (struct cgroup_filter *) arg;
+
+	list_for_each_entry(t, &head->flist, link)
+		if (t == f) {
+			tcf_tree_lock(tp);
+			list_del(&t->link);
+			tcf_tree_unlock(tp);
+			cgroup_delete_filter(tp, t);
+			return 0;
+		}
+
+	return -ENOENT;
+}
+
+/* Set the mask and value parameters in the tp structure. */
+static inline int cgroup_set_parms(struct tcf_proto *tp,
+			unsigned long base,
+			struct cgroup_filter *f, struct nlattr **tb)
+{
+	int err = -EINVAL;
+
+	if (tb[TCA_CGROUP_MASK]) {
+		if (nla_len(tb[TCA_CGROUP_MASK]) < sizeof(u32))
+			return err;
+		f->mask =  nla_get_u32(tb[TCA_CGROUP_MASK]);
+	} else
+		f->mask = UINT_MAX;
+
+	if (tb[TCA_CGROUP_VALUE]) {
+		if (nla_len(tb[TCA_CGROUP_VALUE]) < sizeof(u32))
+			return err;
+		f->value = nla_get_u32(tb[TCA_CGROUP_VALUE]);
+	} else
+		return err;
+
+	if (tb[TCA_CGROUP_CLASSID]) {
+		if (nla_len(tb[TCA_CGROUP_CLASSID]) < sizeof(u32))
+			return err;
+		f->res.classid = nla_get_u32(tb[TCA_CGROUP_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	} else
+		return err;
+
+	return 0;
+}
+
+/* Change the mask and value parameters in the current settings. */
+static int cgroup_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+				struct nlattr **tca, unsigned long *arg)
+{
+	int err = -EINVAL;
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct nlattr *tb[TCA_CGROUP_MAX];
+	struct cgroup_filter *f = (struct cgroup_filter *) *arg;
+	struct tcf_exts e;
+	struct tcf_ematch_tree t;
+
+	if (tca[TCA_OPTIONS] == NULL)
+		return -EINVAL;
+
+	if (nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], NULL) < 0)
+		return -EINVAL;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
+	if (err < 0)
+		goto error1;
+
+	if (f != NULL) {
+		if (handle && f->handle != handle)
+			goto error2;
+	} else {
+		if (!handle)
+			goto error2;
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (f == NULL)
+			goto error2;
+		f->handle = handle;
+	}
+
+	err = cgroup_set_parms(tp, base, f, tb);
+	if (err < 0)
+		goto error3;
+
+	tcf_exts_change(tp, &f->exts, &e);
+	tcf_em_tree_change(tp, &f->ematches, &t);
+
+	if (*arg == 0) {
+		tcf_tree_lock(tp);
+		list_add(&f->link, &head->flist);
+		tcf_tree_unlock(tp);
+	}
+
+	*arg = (unsigned long)f;
+	return 0;
+
+error3:
+	if (*arg == 0)
+		kfree(f);
+error2:
+	tcf_em_tree_destroy(tp, &t);
+error1:
+	tcf_exts_destroy(tp, &e);
+
+	return err;
+}
+
+/* Walk the filter list for things like displaying contents.*/
+static void cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (arg->count < arg->skip)
+			goto skip;
+
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+/* Retreive current settings in the filter */
+static int cgroup_dump(struct tcf_proto *tp, unsigned long fh,
+		      struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cgroup_filter *f = (struct cgroup_filter *) fh;
+	struct nlattr *nest;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(skb, TCA_CGROUP_CLASSID, f->res.classid);
+	NLA_PUT_U32(skb, TCA_CGROUP_MASK, f->mask);
+	NLA_PUT_U32(skb, TCA_CGROUP_VALUE, f->value);
+
+	if (tcf_exts_dump(skb, &f->exts, &cgroup_ext_map) < 0)
+		goto nla_put_failure;
+
+#ifdef CONFIG_NET_EMATCH
+	if (f->ematches.hdr.nmatches &&
+	  tcf_em_tree_dump(skb, &f->ematches, TCA_CGROUP_EMATCHES) < 0)
+		goto nla_put_failure;
+#endif
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &cgroup_ext_map) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_cgroup_ops = {
+	.kind		=	"cgroup",
+	.classify	=	cgroup_classify,
+	.init		=	cgroup_init,
+	.destroy	=	cgroup_destroy,
+	.get		=	cgroup_get,
+	.put		=	cgroup_put,
+	.change		=	cgroup_change,
+	.delete		=	cgroup_delete,
+	.walk		=	cgroup_walk,
+	.dump		=	cgroup_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_cgroup(void)
+{
+	return register_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+static void __exit exit_cgroup(void)
+{
+	unregister_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+module_init(init_cgroup)
+module_exit(exit_cgroup)
+MODULE_LICENSE("GPL");
+

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] Traffic control cgroups subsystem
  2008-07-18 21:29 [PATCH 2/2] " Ranjit Manomohan
  2008-07-21  9:30 ` Li Zefan
@ 2008-07-21 14:05 ` Patrick McHardy
  1 sibling, 0 replies; 8+ messages in thread
From: Patrick McHardy @ 2008-07-21 14:05 UTC (permalink / raw)
  To: Ranjit Manomohan; +Cc: linux-kernel, netdev, menage

Ranjit Manomohan wrote:
> This patch implements a filter that classifies packets based upon
> the cgroup_classid of the packet.

I'd suggest to add this to the flow classifier (cls_flow.c),
adding a full classifier is overkill just for this one field.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] Traffic control cgroups subsystem
  2008-07-18 21:29 [PATCH 2/2] " Ranjit Manomohan
@ 2008-07-21  9:30 ` Li Zefan
  2008-07-21 14:05 ` Patrick McHardy
  1 sibling, 0 replies; 8+ messages in thread
From: Li Zefan @ 2008-07-21  9:30 UTC (permalink / raw)
  To: Ranjit Manomohan; +Cc: linux-kernel, netdev, menage

> +config NET_CLS_CGROUP
> +    tristate "Cgroups tc classifier"
> +    select NET_CLS
> +    ---help---
> +      If you say Y here, you will be able to classify packets based on
> +      cgroup membership of the task originating the packet.
> +
> +      To compile this code as a module, choose M here: the
> +      module will be called cls_cgroup.
> +

If I enable NET_CLS_CGROUP but disable CGROUP_TC, won't I get compile error?
And if I enable CGRUOP_TC but disable NET_CLS_CGROUP, won't cgroup_tc be
just useless?


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] Traffic control cgroups subsystem
@ 2008-07-18 21:29 Ranjit Manomohan
  2008-07-21  9:30 ` Li Zefan
  2008-07-21 14:05 ` Patrick McHardy
  0 siblings, 2 replies; 8+ messages in thread
From: Ranjit Manomohan @ 2008-07-18 21:29 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: menage

This patch implements a filter that classifies packets based upon
the cgroup_classid of the packet.

Signed-off-by: Ranjit Manomohan <ranjitm@google.com>

---

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 99efbed..290da04 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -379,6 +379,18 @@ enum

  #define TCA_FLOW_MAX	(__TCA_FLOW_MAX - 1)

+/* Cgroups filter */
+
+enum {
+	TCA_CGROUP_UNSPEC,
+	TCA_CGROUP_CLASSID,
+	TCA_CGROUP_MASK,
+	TCA_CGROUP_VALUE,
+	__TCA_CGROUP_MAX
+};
+
+#define TCA_CGROUP_MAX	(__TCA_CGROUP_MAX - 1)
+
  /* Basic filter */

  enum
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 82adfe6..844837f 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -318,6 +318,16 @@ config NET_CLS_FLOW
  	  To compile this code as a module, choose M here: the
  	  module will be called cls_flow.

+config NET_CLS_CGROUP
+	tristate "Cgroups tc classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  cgroup membership of the task originating the packet.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_cgroup.
+
  config NET_EMATCH
  	bool "Extended Matches"
  	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 1d2b0f7..91e9ee0 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
  obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
  obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
  obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
+obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
  obj-$(CONFIG_NET_EMATCH)	+= ematch.o
  obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
  obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 0000000..78d494f
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,277 @@
+/*
+ * net/sched/cls_cgroup.c	Simple packet classifier which can filter
+ * 				packets based on the cgroups they belong to.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+#include <net/netlink.h>
+
+struct cgroup_head {
+	struct list_head	flist;		/* Head of filter list */
+};
+
+struct cgroup_filter {
+	u32			handle;		/* Unique filter handle */
+	struct tcf_result	res;
+	struct list_head	link;
+	u32			mask;
+	u32			value;
+};
+
+/* This function is called from the qdisc to classify a particular packet
+ * contained in the skb to the appropriate sub-classes. It returns the
+ * classid of the target class. This filter will match if the cgroup_classid
+ * in the skb matches the value in the filter.
+ */
+static int cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	struct cgroup_head *head = (struct cgroup_head *)tp->root;
+	struct cgroup_filter *f;
+
+	uint32_t cgroup_classid = skb->cgroup_classid;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if ((cgroup_classid & f->mask) == f->value) {
+			*res = f->res;
+			return 0;
+		}
+	}
+	return -1;
+}
+
+/* Returns pointer to filter matching the handle passed into the function.*/
+static unsigned long cgroup_get(struct tcf_proto *tp, u32 handle)
+{
+	unsigned long l = 0UL;
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *f;
+
+	if (head == NULL)
+		return 0UL;
+
+	list_for_each_entry(f, &head->flist, link)
+		if (f->handle == handle)
+			l = (unsigned long) f;
+
+	return l;
+}
+
+/* Does not seem to be used for classifiers. */
+static void cgroup_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+/* Initializer function called when tp is created. */
+static int cgroup_init(struct tcf_proto *tp)
+{
+	struct cgroup_head *head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (head == NULL)
+		return -ENOBUFS;
+
+	INIT_LIST_HEAD(&head->flist);
+	tp->root = head;
+	return 0;
+}
+
+/* Simple delete function called when filter is deleted */
+static inline void cgroup_delete_filter(struct tcf_proto *tp,
+				       struct cgroup_filter *f)
+{
+	kfree(f);
+}
+
+/* Destroy the entire tp structure.*/
+static void cgroup_destroy(struct tcf_proto *tp)
+{
+	struct cgroup_head *head = (struct cgroup_head *) xchg(&tp->root, NULL);
+	struct cgroup_filter *f, *n;
+
+	list_for_each_entry_safe(f, n, &head->flist, link) {
+		list_del(&f->link);
+		cgroup_delete_filter(tp, f);
+	}
+	kfree(head);
+}
+
+/* Delete one filter entry */
+static int cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *t, *f = (struct cgroup_filter *) arg;
+
+	list_for_each_entry(t, &head->flist, link)
+		if (t == f) {
+			tcf_tree_lock(tp);
+			list_del(&t->link);
+			tcf_tree_unlock(tp);
+			cgroup_delete_filter(tp, t);
+			return 0;
+		}
+
+	return -ENOENT;
+}
+
+/* Set the mask and value parameters in the tp structure. */
+static inline int cgroup_set_parms(struct tcf_proto *tp,
+			unsigned long base,
+			struct cgroup_filter *f, struct nlattr **tb)
+{
+	int err = -EINVAL;
+
+	if (tb[TCA_CGROUP_MASK]) {
+		if (nla_len(tb[TCA_CGROUP_MASK]) < sizeof(u32))
+			return err;
+		f->mask =  nla_get_u32(tb[TCA_CGROUP_MASK]);
+	} else
+		f->mask = UINT_MAX;
+
+	if (tb[TCA_CGROUP_VALUE]) {
+		if (nla_len(tb[TCA_CGROUP_VALUE]) < sizeof(u32))
+			return err;
+		f->value = nla_get_u32(tb[TCA_CGROUP_VALUE]);
+	} else
+		return err;
+
+	if (tb[TCA_CGROUP_CLASSID]) {
+		if (nla_len(tb[TCA_CGROUP_CLASSID]) < sizeof(u32))
+			return err;
+		f->res.classid = nla_get_u32(tb[TCA_CGROUP_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	} else
+		return err;
+
+	return 0;
+}
+
+/* Change the mask and value parameters in the current settings. */
+static int cgroup_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+				struct nlattr **tca, unsigned long *arg)
+{
+	int err = -EINVAL;
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct nlattr *tb[TCA_CGROUP_MAX];
+	struct cgroup_filter *f = (struct cgroup_filter *) *arg;
+
+	if (tca[TCA_OPTIONS] == NULL)
+		return -EINVAL;
+
+	if (nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], NULL) < 0)
+		return -EINVAL;
+
+	if (f != NULL) {
+		if (handle && f->handle != handle)
+			return -EINVAL;
+	} else {
+		if (!handle)
+			return -EINVAL;
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (f == NULL)
+			return -ENOBUFS;
+		f->handle = handle;
+	}
+
+	err = cgroup_set_parms(tp, base, f, tb);
+	if (err < 0)
+		goto err;
+
+	if (*arg == 0) {
+		tcf_tree_lock(tp);
+		list_add(&f->link, &head->flist);
+		tcf_tree_unlock(tp);
+	}
+
+	*arg = (unsigned long)f;
+	return 0;
+
+err:
+	if (*arg == 0)
+		kfree(f);
+	return err;
+}
+
+/* Walk the filter list for things like displaying contents.*/
+static void cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cgroup_head *head = (struct cgroup_head *) tp->root;
+	struct cgroup_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (arg->count < arg->skip)
+			goto skip;
+
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+/* Retreive current settings in the filter */
+static int cgroup_dump(struct tcf_proto *tp, unsigned long fh,
+		      struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cgroup_filter *f = (struct cgroup_filter *) fh;
+	struct nlattr *nest;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(skb, TCA_CGROUP_CLASSID, f->res.classid);
+	NLA_PUT_U32(skb, TCA_CGROUP_MASK, f->mask);
+	NLA_PUT_U32(skb, TCA_CGROUP_VALUE, f->value);
+
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_cgroup_ops = {
+	.kind		=	"cgroup",
+	.classify	=	cgroup_classify,
+	.init		=	cgroup_init,
+	.destroy	=	cgroup_destroy,
+	.get		=	cgroup_get,
+	.put		=	cgroup_put,
+	.change		=	cgroup_change,
+	.delete		=	cgroup_delete,
+	.walk		=	cgroup_walk,
+	.dump		=	cgroup_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_cgroup(void)
+{
+	return register_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+static void __exit exit_cgroup(void)
+{
+	unregister_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+module_init(init_cgroup)
+module_exit(exit_cgroup)
+MODULE_LICENSE("GPL");
+

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2008-09-10 17:45 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-08-22  0:57 [PATCH 2/2] Traffic control cgroups subsystem Ranjit Manomohan
  -- strict thread matches above, loose matches on Subject: below --
2008-09-10 17:43 Ranjit Manomohan
2008-07-24 23:37 [PATCH 2/2[ " Ranjit Manomohan
2008-07-25  3:02 ` Daniel Walker
2008-07-25  3:29   ` Li Zefan
2008-07-18 21:29 [PATCH 2/2] " Ranjit Manomohan
2008-07-21  9:30 ` Li Zefan
2008-07-21 14:05 ` Patrick McHardy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).