All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHSET] Extended matches and basic classifier
@ 2005-01-23 23:00 Thomas Graf
  2005-01-23 23:01 ` [PATCH 1/6] PKT_SCHED: Extended Matches API Thomas Graf
                   ` (8 more replies)
  0 siblings, 9 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-23 23:00 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev


This patchset adds the ematch API, the ematches cmp, nbyte, u32, meta,
and the basic classifier. It doesn't touch any existing code.

The following changes were made since the last review:
 - various typo and bug fixes
 - macro for meta data collectors (beautification)
 - some more comments

The required changes for iproute2 can be pulled from:
  bk://tgr.bkbits.net/iproute2-tgr

The iproute2 part still has quite some potential for improvements to
ease usability though.

Cheers, Thomas

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 1/6] PKT_SCHED: Extended Matches API
  2005-01-23 23:00 [PATCHSET] Extended matches and basic classifier Thomas Graf
@ 2005-01-23 23:01 ` Thomas Graf
  2005-01-24  0:12   ` Patrick McHardy
  2005-01-25 23:22   ` [RESEND " Thomas Graf
  2005-01-23 23:02 ` [PATCH 2/6] PKT_SCHED: Simple comparison ematch (cmp) Thomas Graf
                   ` (7 subsequent siblings)
  8 siblings, 2 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-23 23:01 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

An extended match (ematch) is a small classifiction tool not worth
writing a full classifier for. Ematches can be interconnected to form
a logic expression and get attached to classifiers to extend their
functionatlity.

The userspace part transforms the logic expressions into an array
consisting of multiple sequences of interconnected ematches separated
by markers. Precedence is implemented by a special ematch kind
referencing a sequence beyond the marker of the current sequence
causing the current position in the sequence to be pushed onto a stack
to allow the current position to be overwritten by the position
referenced in the special ematch. Matching continues in the new sequence
until a marker is reached causing the position to be restored from the
stack.

Signed-off-by: Thomas Graf <tgraf@suug.ch>

diff -Nru linux-2.6.11-rc2-bk1.orig/include/linux/pkt_cls.h linux-2.6.11-rc2-bk1/include/linux/pkt_cls.h
--- linux-2.6.11-rc2-bk1.orig/include/linux/pkt_cls.h	2005-01-23 19:08:31.000000000 +0100
+++ linux-2.6.11-rc2-bk1/include/linux/pkt_cls.h	2005-01-23 19:08:13.000000000 +0100
@@ -319,4 +319,76 @@
 
 #define TCA_TCINDEX_MAX     (__TCA_TCINDEX_MAX - 1)
 
+/* Extended Matches */
+
+struct tcf_ematch_tree_hdr
+{
+	__u16		nmatches;
+	__u16		progid;
+};
+
+enum
+{
+	TCA_EMATCH_TREE_UNSPEC,
+	TCA_EMATCH_TREE_HDR,
+	TCA_EMATCH_TREE_LIST,
+	__TCA_EMATCH_TREE_MAX
+};
+#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1)
+
+struct tcf_ematch_hdr
+{
+	__u16		matchid;
+	__u16		kind;
+	__u16		flags;
+	__u16		pad; /* currently unused */
+};
+
+/*  0                   1
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 
+ * +-----------------------+-+-+---+
+ * |         Unused        |S|I| R |
+ * +-----------------------+-+-+---+
+ *
+ * R(2) ::= relation to next ematch
+ *          where: 0 0 END (last ematch)
+ *                 0 1 AND
+ *                 1 0 OR
+ *                 1 1 Unused (invalid)
+ * I(1) ::= invert result
+ * S(1) ::= simple payload
+ */
+#define TCF_EM_REL_END	0
+#define TCF_EM_REL_AND	(1<<0)
+#define TCF_EM_REL_OR	(1<<1)
+#define TCF_EM_INVERT	(1<<2)
+#define TCF_EM_SIMPLE	(1<<3)
+
+#define TCF_EM_REL_MASK	3
+#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK)
+
+enum
+{
+	TCF_LAYER_LINK,
+	TCF_LAYER_NETWORK,
+	TCF_LAYER_TRANSPORT,
+	__TCF_LAYER_MAX
+};
+#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1)
+
+/* Ematch type assignments
+ *   1..32767		Reserved for ematches inside kernel tree
+ *   32768..65535	Free to use, not reliable
+ */
+enum
+{
+	TCF_EM_CONTAINER,
+	__TCF_EM_MAX
+};
+
+enum
+{
+	TCF_EM_PROG_TC
+};
+
 #endif
diff -Nru linux-2.6.11-rc2-bk1.orig/include/linux/rtnetlink.h linux-2.6.11-rc2-bk1/include/linux/rtnetlink.h
--- linux-2.6.11-rc2-bk1.orig/include/linux/rtnetlink.h	2005-01-23 19:08:31.000000000 +0100
+++ linux-2.6.11-rc2-bk1/include/linux/rtnetlink.h	2005-01-23 16:31:57.000000000 +0100
@@ -779,6 +779,11 @@
 		 goto rtattr_failure; \
    	__rta_fill(skb, attrtype, attrlen, data); }) 
 
+#define RTA_PUT_NOHDR(skb, attrlen, data) \
+({	if (unlikely(skb_tailroom(skb) < (int)(attrlen))) \
+		goto rtattr_failure; \
+	memcpy(skb_put(skb, RTA_ALIGN(attrlen)), data, attrlen); })
+		
 static inline struct rtattr *
 __rta_reserve(struct sk_buff *skb, int attrtype, int attrlen)
 {
diff -Nru linux-2.6.11-rc2-bk1.orig/include/net/pkt_cls.h linux-2.6.11-rc2-bk1/include/net/pkt_cls.h
--- linux-2.6.11-rc2-bk1.orig/include/net/pkt_cls.h	2005-01-23 19:08:31.000000000 +0100
+++ linux-2.6.11-rc2-bk1/include/net/pkt_cls.h	2005-01-23 19:08:44.000000000 +0100
@@ -148,6 +148,176 @@
 extern int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
 	                       struct tcf_ext_map *map);
 
+/**
+ * struct tcf_pkt_info - packet information
+ */
+struct tcf_pkt_info
+{
+};
+
+#ifdef CONFIG_NET_EMATCH
+
+struct tcf_ematch_ops;
+
+/**
+ * struct tcf_ematch - extended match (ematch)
+ * 
+ * @matchid: identifier to allow userspace to reidentify a match
+ * @flags: flags specifying attributes and the relation to other matches
+ * @ops: the operations lookup table of the corresponding ematch module
+ * @datalen: length of the ematch specific configuration data
+ * @data: ematch specific data
+ */
+struct tcf_ematch
+{
+	u16			matchid;
+	u16			flags;
+	struct tcf_ematch_ops * ops;
+	unsigned int		datalen;
+	unsigned long		data;
+};
+
+static inline int tcf_em_is_container(struct tcf_ematch *em)
+{
+	return !em->ops;
+}
+
+static inline int tcf_em_is_simple(struct tcf_ematch *em)
+{
+	return em->flags & TCF_EM_SIMPLE;
+}
+
+static inline int tcf_em_is_inverted(struct tcf_ematch *em)
+{
+	return em->flags & TCF_EM_INVERT;
+}
+
+static inline int tcf_em_last_match(struct tcf_ematch *em)
+{
+	return (em->flags & TCF_EM_REL_MASK) == TCF_EM_REL_END;
+}
+
+static inline int tcf_em_early_end(struct tcf_ematch *em, int result)
+{
+	if (tcf_em_last_match(em))
+		return 1;
+
+	if (result == 0 && em->flags & TCF_EM_REL_AND)
+		return 1;
+
+	if (result != 0 && em->flags & TCF_EM_REL_OR)
+		return 1;
+
+	return 0;
+}
+	
+/**
+ * struct tcf_ematch_tree - ematch tree handle
+ *
+ * @hdr: ematch tree header supplied by userspace
+ * @matches: array of ematches
+ */
+struct tcf_ematch_tree
+{
+	struct tcf_ematch_tree_hdr hdr;
+	struct tcf_ematch *	matches;
+	
+};
+
+/**
+ * struct tcf_ematch_ops - ematch module operations
+ * 
+ * @kind: identifier (kind) of this ematch module
+ * @datalen: length of expected configuration data (optional)
+ * @change: called during validation (optional)
+ * @match: called during ematch tree evaluation, must return 1/0
+ * @destroy: called during destroyage (optional)
+ * @dump: called during dumping process (optional)
+ * @owner: owner, must be set to THIS_MODULE
+ * @link: link to previous/next ematch module (internal use)
+ */
+struct tcf_ematch_ops
+{
+	int			kind;
+	int			datalen;
+	int			(*change)(struct tcf_proto *, void *,
+					  int, struct tcf_ematch *);
+	int			(*match)(struct sk_buff *, struct tcf_ematch *,
+					 struct tcf_pkt_info *);
+	void			(*destroy)(struct tcf_proto *,
+					   struct tcf_ematch *);
+	int			(*dump)(struct sk_buff *, struct tcf_ematch *);
+	struct module		*owner;
+	struct list_head	link;
+};
+
+extern int tcf_em_register(struct tcf_ematch_ops *);
+extern int tcf_em_unregister(struct tcf_ematch_ops *);
+extern int tcf_em_tree_validate(struct tcf_proto *, struct rtattr *,
+				struct tcf_ematch_tree *);
+extern void tcf_em_tree_destroy(struct tcf_proto *, struct tcf_ematch_tree *);
+extern int tcf_em_tree_dump(struct sk_buff *, struct tcf_ematch_tree *, int);
+extern int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
+			       struct tcf_pkt_info *);
+
+/**
+ * tcf_em_tree_change - replace ematch tree of a running classifier
+ *
+ * @tp: classifier kind handle
+ * @dst: destination ematch tree variable
+ * @src: source ematch tree (temporary tree from tcf_em_tree_validate)
+ *
+ * This functions replaces the ematch tree in @dst with the ematch
+ * tree in @src. The classifier in charge of the ematch tree may be
+ * running.
+ */
+static inline void tcf_em_tree_change(struct tcf_proto *tp,
+				      struct tcf_ematch_tree *dst,
+				      struct tcf_ematch_tree *src)
+{
+	tcf_tree_lock(tp);
+	memcpy(dst, src, sizeof(*dst));
+	tcf_tree_unlock(tp);
+}
+
+/**
+ * tcf_em_tree_match - evaulate an ematch tree
+ *
+ * @skb: socket buffer of the packet in question
+ * @tree: ematch tree to be used for evaluation
+ * @info: packet information examined by classifier
+ *
+ * This function matches @skb against the ematch tree in @tree by going
+ * through all ematches respecting their logic relations returning
+ * as soon as the result is obvious.
+ *
+ * Returns 1 if the ematch tree as-one matches, no ematches are configured
+ * or ematch is not enabled in the kernel, otherwise 0 is returned.
+ */
+static inline int tcf_em_tree_match(struct sk_buff *skb,
+				    struct tcf_ematch_tree *tree,
+				    struct tcf_pkt_info *info)
+{
+	if (tree->hdr.nmatches)
+		return __tcf_em_tree_match(skb, tree, info);
+	else
+		return 1;
+}
+
+#else /* CONFIG_NET_EMATCH */
+
+struct tcf_ematch_tree
+{
+};
+
+#define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0)
+#define tcf_em_tree_destroy(tp, t) do { (void)(t); } while(0)
+#define tcf_em_tree_dump(skb, t, tlv) (0)
+#define tcf_em_tree_change(tp, dst, src) do { } while(0)
+#define tcf_em_tree_match(skb, t, info) ((void)(info), 1)
+
+#endif /* CONFIG_NET_EMATCH */
+
 #ifdef CONFIG_NET_CLS_IND
 static inline int
 tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv)
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/Kconfig linux-2.6.11-rc2-bk1/net/sched/Kconfig
--- linux-2.6.11-rc2-bk1.orig/net/sched/Kconfig	2005-01-23 19:08:31.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/Kconfig	2005-01-23 19:08:13.000000000 +0100
@@ -375,6 +375,29 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_rsvp6.
 
+config NET_EMATCH
+	bool "Extended Matches"
+	depends on NET_CLS
+	---help---
+	  Say Y here if you want to use extended matches on top of classifiers
+	  and select the extended matches below.
+
+	  Extended matches are small classification helpers not worth writing
+	  a separate classifier.
+
+	  You must have a recent version of the iproute2 tools in order to use
+	  extended matches.
+
+config NET_EMATCH_STACK
+	int "Stack size"
+	depends on NET_EMATCH
+	default "32"
+	---help---
+	  Size of the local stack variable used while evaluating the tree of
+	  ematches. Limits the depth of the tree, i.e. the number of
+	  encapsulated precedences. Every level requires 4 bytes of addtional
+	  stack space.
+
 config NET_CLS_ACT
 	bool "Packet ACTION"
 	depends on EXPERIMENTAL && NET_CLS && NET_QOS
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/Makefile linux-2.6.11-rc2-bk1/net/sched/Makefile
--- linux-2.6.11-rc2-bk1.orig/net/sched/Makefile	2005-01-23 19:08:31.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/Makefile	2005-01-23 19:08:13.000000000 +0100
@@ -33,3 +33,4 @@
 obj-$(CONFIG_NET_CLS_RSVP)	+= cls_rsvp.o
 obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
+obj-$(CONFIG_NET_EMATCH)	+= ematch.o
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/ematch.c linux-2.6.11-rc2-bk1/net/sched/ematch.c
--- linux-2.6.11-rc2-bk1.orig/net/sched/ematch.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/ematch.c	2005-01-23 16:31:57.000000000 +0100
@@ -0,0 +1,526 @@
+/*
+ * net/sched/ematch.c		Extended Match API
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ *
+ * An extended match (ematch) is a small classification tool not worth
+ * writing a full classifier for. Ematches can be interconnected to form
+ * a logic expression and get attached to classifiers to extend their
+ * functionatlity.
+ *
+ * The userspace part transforms the logic expressions into an array
+ * consisting of multiple sequences of interconnected ematches separated
+ * by markers. Precedence is implemented by a special ematch kind
+ * referencing a sequence beyond the marker of the current sequence
+ * causing the current position in the sequence to be pushed onto a stack
+ * to allow the current position to be overwritten by the position referenced
+ * in the special ematch. Matching continues in the new sequence until a
+ * marker is reached causing the position to be restored from the stack.
+ *
+ * Example:
+ *          A AND (B1 OR B2) AND C AND D
+ *
+ *              ------->-PUSH-------
+ *    -->--    /         -->--      \   -->--
+ *   /     \  /         /     \      \ /     \
+ * +-------+-------+-------+-------+-------+--------+
+ * | A AND | B AND | C AND | D END | B1 OR | B2 END |
+ * +-------+-------+-------+-------+-------+--------+
+ *                    \                      /
+ *                     --------<-POP---------
+ *
+ * where B is a virtual ematch referencing to sequence starting with B1.
+ * 
+ * ==========================================================================
+ *
+ * How to write an ematch in 60 seconds
+ * ------------------------------------
+ * 
+ *   1) Provide a matcher function:
+ *      static int my_match(struct sk_buff *skb, struct tcf_ematch *m,
+ *                          struct tcf_pkt_info *info)
+ *      {
+ *      	struct mydata *d = (struct mydata *) m->data;
+ *
+ *      	if (...matching goes here...)
+ *      		return 1;
+ *      	else
+ *      		return 0;
+ *      }
+ *
+ *   2) Fill out a struct tcf_ematch_ops:
+ *      static struct tcf_ematch_ops my_ops = {
+ *      	.kind = unique id,
+ *      	.datalen = sizeof(struct mydata),
+ *      	.match = my_match,
+ *      	.owner = THIS_MODULE,
+ *      };
+ *
+ *   3) Register/Unregister your ematch:
+ *      static int __init init_my_ematch(void)
+ *      {
+ *      	return tcf_em_register(&my_ops);
+ *      }
+ *
+ *      static void __exit exit_my_ematch(void)
+ *      {
+ *      	return tcf_em_unregister(&my_ops);
+ *      }
+ *
+ *      module_init(init_my_ematch);
+ *      module_exit(exit_my_ematch);
+ *
+ *   4) By now you should have two more seconds left, barely enough to
+ *      open up a beer to watch the compilation going.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+#include <config/net/ematch/stack.h>
+
+static LIST_HEAD(ematch_ops);
+static DEFINE_RWLOCK(ematch_mod_lock);
+
+static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+{
+	struct tcf_ematch_ops *e = NULL;
+
+	read_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link) {
+		if (kind == e->kind) {
+			if (!try_module_get(e->owner))
+				e = NULL;
+			break;
+		}
+	}
+	read_unlock(&ematch_mod_lock);
+
+	return e;
+}
+
+/**
+ * tcf_em_register - register an extended match
+ * 
+ * @ops: ematch operations lookup table
+ *
+ * This function must be called by ematches to announce their presence.
+ * The given @ops must have kind set to a unique identifier and the
+ * callback match() must be implemented. All other callbacks are optional
+ * and a fallback implementation is used instead.
+ *
+ * Returns -EEXISTS if an ematch of the same kind has already registered.
+ */
+int tcf_em_register(struct tcf_ematch_ops *ops)
+{
+	int err = -EEXIST;
+	struct tcf_ematch_ops *e;
+
+	write_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link)
+		if (ops->kind == e->kind)
+			goto errout;
+
+	list_add_tail(&ops->link, &ematch_ops);
+	err = 0;
+errout:
+	write_unlock(&ematch_mod_lock);
+	return err;
+}
+
+/**
+ * tcf_em_unregister - unregster and extended match
+ *
+ * @ops: ematch operations lookup table
+ *
+ * This function must be called by ematches to announce their disappearance
+ * for examples when the module gets unloaded. The @ops parameter must be
+ * the same as the one used for registration.
+ *
+ * Returns -ENOENT if no matching ematch was found.
+ */
+int tcf_em_unregister(struct tcf_ematch_ops *ops)
+{
+	int err = 0;
+	struct tcf_ematch_ops *e;
+
+	write_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link) {
+		if (e == ops) {
+			list_del(&e->link);
+			goto out;
+		}
+	}
+
+	err = -ENOENT;
+out:
+	write_unlock(&ematch_mod_lock);
+	return err;
+}
+
+static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
+						   int index)
+{
+	return &tree->matches[index];
+}
+
+
+static int tcf_em_validate(struct tcf_proto *tp,
+			   struct tcf_ematch_tree_hdr *tree_hdr,
+			   struct tcf_ematch *em, struct rtattr *rta, int idx)
+{
+	int err = -EINVAL;
+	struct tcf_ematch_hdr *em_hdr = RTA_DATA(rta);
+	int data_len = RTA_PAYLOAD(rta) - sizeof(*em_hdr);
+	void *data = (void *) em_hdr + sizeof(*em_hdr);
+
+	if (!TCF_EM_REL_VALID(em_hdr->flags))
+		goto errout;
+
+	if (em_hdr->kind == TCF_EM_CONTAINER) {
+		/* Special ematch called "container", carries an index
+		 * referencing an external ematch sequence. */
+		u32 ref;
+
+		if (data_len < sizeof(ref))
+			goto errout;
+		ref = *(u32 *) data;
+
+		if (ref >= tree_hdr->nmatches)
+			goto errout;
+
+		/* We do not allow backward jumps to avoid loops and jumps
+		 * to our own position are of course illegal. */
+		if (ref <= idx)
+			goto errout;
+
+		
+		em->data = ref;
+	} else {
+		/* Note: This lookup will increase the module refcnt
+		 * of the ematch module referenced. In case of a failure,
+		 * a destroy function is called by the underlying layer
+		 * which automatically releases the reference again, therefore
+		 * the module MUST not be given back under any circumstances
+		 * here. Be aware, the destroy function assumes that the
+		 * module is held if the ops field is non zero. */
+		em->ops = tcf_em_lookup(em_hdr->kind);
+
+		if (em->ops == NULL) {
+			err = -ENOENT;
+			goto errout;
+		}
+
+		/* ematch module provides expected length of data, so we
+		 * can do a basic sanity check. */
+		if (em->ops->datalen && data_len < em->ops->datalen)
+			goto errout;
+
+		if (em->ops->change) {
+			err = em->ops->change(tp, data, data_len, em);
+			if (err < 0)
+				goto errout;
+		} else if (data_len > 0) {
+			/* ematch module doesn't provide an own change
+			 * procedure and expects us to allocate and copy
+			 * the ematch data.
+			 *
+			 * TCF_EM_SIMPLE may be specified stating that the
+			 * data only consists of a u32 integer and the module
+			 * does not expected a memory reference but rather
+			 * the value carried. */
+			if (em_hdr->flags & TCF_EM_SIMPLE) {
+				if (data_len < sizeof(u32))
+					goto errout;
+				em->data = *(u32 *) data;
+			} else {
+				void *v = kmalloc(data_len, GFP_KERNEL);
+				if (v == NULL) {
+					err = -ENOBUFS;
+					goto errout;
+				}
+				memcpy(v, data, data_len);
+				em->data = (unsigned long) v;
+			}
+		}
+	}
+
+	em->matchid = em_hdr->matchid;
+	em->flags = em_hdr->flags;
+	em->datalen = data_len;
+
+	err = 0;
+errout:
+	return err;
+}
+
+/**
+ * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
+ *
+ * @tp: classifier kind handle
+ * @rta: ematch tree configuration TLV
+ * @tree: destination ematch tree variable to store the resulting
+ *        ematch tree.
+ *
+ * This function validates the given configuration TLV @rta and builds an
+ * ematch tree in @tree. The resulting tree must later be copied into
+ * the private classifier data using tcf_em_tree_change(). You MUST NOT
+ * provide the ematch tree variable of the private classifier data directly,
+ * the changes would not be locked properly.
+ *
+ * Returns a negative error code if the configuration TLV contains errors.
+ */
+int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
+			 struct tcf_ematch_tree *tree)
+{
+	int idx, list_len, matches_len, err = -EINVAL;
+	struct rtattr *tb[TCA_EMATCH_TREE_MAX];
+	struct rtattr *rt_match, *rt_hdr, *rt_list;
+	struct tcf_ematch_tree_hdr *tree_hdr;
+	struct tcf_ematch *em;
+
+	if (rtattr_parse_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0)
+		goto errout;
+
+	rt_hdr = tb[TCA_EMATCH_TREE_HDR-1];
+	rt_list = tb[TCA_EMATCH_TREE_LIST-1];
+
+	if (rt_hdr == NULL || rt_list == NULL)
+		goto errout;
+
+	if (RTA_PAYLOAD(rt_hdr) < sizeof(*tree_hdr) ||
+	    RTA_PAYLOAD(rt_list) < sizeof(*rt_match))
+		goto errout;
+
+	tree_hdr = RTA_DATA(rt_hdr);
+	memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
+
+	rt_match = RTA_DATA(rt_list);
+	list_len = RTA_PAYLOAD(rt_list);
+	matches_len = tree_hdr->nmatches * sizeof(*em);
+
+	tree->matches = kmalloc(matches_len, GFP_KERNEL);
+	if (tree->matches == NULL)
+		goto errout;
+	memset(tree->matches, 0, matches_len);
+
+	/* We do not use rtattr_parse_nested here because the maximum
+	 * number of attributes is unknown. This saves us the allocation
+	 * for a tb buffer which would serve no purpose at all.
+	 * 
+	 * The array of rt attributes is parsed in the order as they are
+	 * provided, their type must be incremental from 1 to n. Even
+	 * if it does not serve any real purpose, a failure of sticking
+	 * to this policy will result in parsing failure. */
+	for (idx = 0; RTA_OK(rt_match, list_len); idx++) {
+		err = -EINVAL;
+
+		if (rt_match->rta_type != (idx + 1))
+			goto errout_abort;
+
+		if (idx >= tree_hdr->nmatches)
+			goto errout_abort;
+
+		if (RTA_PAYLOAD(rt_match) < sizeof(struct tcf_ematch_hdr))
+			goto errout_abort;
+
+		em = tcf_em_get_match(tree, idx);
+
+		err = tcf_em_validate(tp, tree_hdr, em, rt_match, idx);
+		if (err < 0)
+			goto errout_abort;
+
+		rt_match = RTA_NEXT(rt_match, list_len);
+	}
+
+	/* Check if the number of matches provided by userspace actually
+	 * complies with the array of matches. The number was used for
+	 * the validation of references and a mismatch could lead to
+	 * undefined references during the matching process. */
+	if (idx != tree_hdr->nmatches) {
+		err = -EINVAL;
+		goto errout_abort;
+	}
+
+	err = 0;
+errout:
+	return err;
+
+errout_abort:
+	tcf_em_tree_destroy(tp, tree);
+	return err;
+}
+
+/**
+ * tcf_em_tree_destroy - destroy an ematch tree
+ *
+ * @tp: classifier kind handle
+ * @tree: ematch tree to be deleted
+ *
+ * This functions destroys an ematch tree previously created by
+ * tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that
+ * the ematch tree is not in use before calling this function.
+ */
+void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
+{
+	int i;
+
+	if (tree->matches == NULL)
+		return;
+
+	for (i = 0; i < tree->hdr.nmatches; i++) {
+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
+
+		if (em->ops) {
+			if (em->ops->destroy)
+				em->ops->destroy(tp, em);
+			else if (!tcf_em_is_simple(em) && em->data)
+				kfree((void *) em->data);
+			module_put(em->ops->owner);
+		}
+	}
+	
+	tree->hdr.nmatches = 0;
+	kfree(xchg(&tree->matches, NULL));
+}
+
+/**
+ * tcf_em_tree_dump - dump ematch tree into a rtnl message
+ *
+ * @skb: skb holding the rtnl message
+ * @t: ematch tree to be dumped
+ * @tlv: TLV type to be used to encapsulate the tree
+ *
+ * This function dumps a ematch tree into a rtnl message. It is valid to
+ * call this function while the ematch tree is in use.
+ *
+ * Returns -1 if the skb tailroom is insufficient.
+ */
+int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
+{
+	int i;
+	struct rtattr * top_start = (struct rtattr*) skb->tail;
+	struct rtattr * list_start;
+
+	RTA_PUT(skb, tlv, 0, NULL);
+	RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
+
+	list_start = (struct rtattr *) skb->tail;
+	RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
+
+	for (i = 0; i < tree->hdr.nmatches; i++) {
+		struct rtattr *match_start = (struct rtattr*) skb->tail;
+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
+		struct tcf_ematch_hdr em_hdr = {
+			.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
+			.matchid = em->matchid,
+			.flags = em->flags
+		};
+
+		RTA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+
+		if (em->ops && em->ops->dump) {
+			if (em->ops->dump(skb, em) < 0)
+				goto rtattr_failure;
+		} else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
+			u32 u = em->data;
+			RTA_PUT_NOHDR(skb, sizeof(u), &u);
+		} else if (em->datalen > 0)
+			RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
+
+		match_start->rta_len = skb->tail - (u8*) match_start;
+	}
+
+	list_start->rta_len = skb->tail - (u8 *) list_start;
+	top_start->rta_len = skb->tail - (u8 *) top_start;
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
+			       struct tcf_pkt_info *info)
+{
+	int r;
+
+	if (likely(em->ops->match))
+		r = em->ops->match(skb, em, info);
+	else
+		r = 0;
+
+	return tcf_em_is_inverted(em) ? !r : r;
+}
+
+/* Do not use this function directly, use tcf_em_tree_match instead */
+int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree,
+			struct tcf_pkt_info *info)
+{
+	int stackp = 0, match_idx = 0, res = 0;
+	struct tcf_ematch *cur_match;
+	int stack[CONFIG_NET_EMATCH_STACK];
+
+proceed:
+	while (match_idx < tree->hdr.nmatches) {
+		cur_match = tcf_em_get_match(tree, match_idx);
+
+		if (tcf_em_is_container(cur_match)) {
+			if (unlikely(stackp >= CONFIG_NET_EMATCH_STACK))
+				goto stack_overflow;
+
+			stack[stackp++] = match_idx;
+			match_idx = cur_match->data;
+			goto proceed;
+		}
+
+		res = tcf_em_match(skb, cur_match, info);
+
+		if (tcf_em_early_end(cur_match, res))
+			break;
+
+		match_idx++;
+	}
+
+pop_stack:
+	if (stackp > 0) {
+		match_idx = stack[--stackp];
+		cur_match = tcf_em_get_match(tree, match_idx);
+
+		if (tcf_em_early_end(cur_match, res))
+			goto pop_stack;
+		else {
+			match_idx++;
+			goto proceed;
+		}
+	}
+
+	return res;
+
+stack_overflow:
+	if (net_ratelimit())
+		printk("Local stack overflow, increase NET_EMATCH_STACK\n");
+	return -1;
+}
+
+EXPORT_SYMBOL(tcf_em_register);
+EXPORT_SYMBOL(tcf_em_unregister);
+EXPORT_SYMBOL(tcf_em_tree_validate);
+EXPORT_SYMBOL(tcf_em_tree_destroy);
+EXPORT_SYMBOL(tcf_em_tree_dump);
+EXPORT_SYMBOL(__tcf_em_tree_match);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 2/6] PKT_SCHED: Simple comparison ematch (cmp)
  2005-01-23 23:00 [PATCHSET] Extended matches and basic classifier Thomas Graf
  2005-01-23 23:01 ` [PATCH 1/6] PKT_SCHED: Extended Matches API Thomas Graf
@ 2005-01-23 23:02 ` Thomas Graf
  2005-01-24  0:14   ` Patrick McHardy
  2005-01-23 23:03 ` [PATCH 3/6] PKT_SCHED: Multi byte comparison ematch (nbyte) Thomas Graf
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: Thomas Graf @ 2005-01-23 23:02 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

The cmp ematch compares a static value provided by userspace against
a 8, 16, or 32bit chunk read from the packet. The reading offset is
provided by userspace and based on one of the skb layers (mac|nh|h).
The ematch provides functionality to transform the byte order of
the chunk and/or apply a mask and understands the operands eq, lt,
and gt. Basically, it is very similiar to the u32 (e)match but tries
filling the gaps left behind.

Signed-off-by: Thomas Graf <tgraf@suug.ch>

diff -Nru linux-2.6.11-rc2-bk1.orig/include/linux/pkt_cls.h linux-2.6.11-rc2-bk1/include/linux/pkt_cls.h
--- linux-2.6.11-rc2-bk1.orig/include/linux/pkt_cls.h	2005-01-23 17:29:35.000000000 +0100
+++ linux-2.6.11-rc2-bk1/include/linux/pkt_cls.h	2005-01-23 17:29:40.000000000 +0100
@@ -383,6 +383,7 @@
 enum
 {
 	TCF_EM_CONTAINER,
+	TCF_EM_CMP,
 	__TCF_EM_MAX
 };
 
@@ -391,4 +392,11 @@
 	TCF_EM_PROG_TC
 };
 
+enum
+{
+	TCF_EM_OPND_EQ,
+	TCF_EM_OPND_GT,
+	TCF_EM_OPND_LT
+};
+
 #endif
diff -Nru linux-2.6.11-rc2-bk1.orig/include/linux/tc_ematch/tc_em_cmp.h linux-2.6.11-rc2-bk1/include/linux/tc_ematch/tc_em_cmp.h
--- linux-2.6.11-rc2-bk1.orig/include/linux/tc_ematch/tc_em_cmp.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk1/include/linux/tc_ematch/tc_em_cmp.h	2005-01-23 17:29:40.000000000 +0100
@@ -0,0 +1,26 @@
+#ifndef __LINUX_TC_EM_CMP_H
+#define __LINUX_TC_EM_CMP_H
+
+#include <linux/pkt_cls.h>
+
+struct tcf_em_cmp
+{
+	__u32		val;
+	__u32		mask;
+	__u16		off;
+	__u8		align:4;
+	__u8		flags:4;
+	__u8		layer:4;
+	__u8		opnd:4;
+};
+
+enum
+{
+	TCF_EM_ALIGN_U8  = 1,
+	TCF_EM_ALIGN_U16 = 2,
+	TCF_EM_ALIGN_U32 = 4
+};
+
+#define TCF_EM_CMP_TRANS	1
+
+#endif
diff -Nru linux-2.6.11-rc2-bk1.orig/include/net/pkt_cls.h linux-2.6.11-rc2-bk1/include/net/pkt_cls.h
--- linux-2.6.11-rc2-bk1.orig/include/net/pkt_cls.h	2005-01-23 17:29:35.000000000 +0100
+++ linux-2.6.11-rc2-bk1/include/net/pkt_cls.h	2005-01-23 17:30:55.000000000 +0100
@@ -318,6 +318,26 @@
 
 #endif /* CONFIG_NET_EMATCH */
 
+static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
+{
+	switch (layer) {
+		case TCF_LAYER_LINK:
+			return skb->data;
+		case TCF_LAYER_NETWORK:
+			return skb->nh.raw;
+		case TCF_LAYER_TRANSPORT:
+			return skb->h.raw;
+	}
+
+	return NULL;
+}
+
+static inline int tcf_valid_offset(struct sk_buff *skb, unsigned char *ptr,
+				   int len)
+{
+	return unlikely((ptr + len) < skb->tail && ptr > skb->head);
+}
+
 #ifdef CONFIG_NET_CLS_IND
 static inline int
 tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv)
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/Kconfig linux-2.6.11-rc2-bk1/net/sched/Kconfig
--- linux-2.6.11-rc2-bk1.orig/net/sched/Kconfig	2005-01-23 17:29:35.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/Kconfig	2005-01-23 17:29:40.000000000 +0100
@@ -398,6 +398,16 @@
 	  encapsulated precedences. Every level requires 4 bytes of addtional
 	  stack space.
 
+config NET_EMATCH_CMP
+	tristate "Simple packet data comparison"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  simple packet data comparisons for 8, 16, and 32bit values.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_cmp.
+
 config NET_CLS_ACT
 	bool "Packet ACTION"
 	depends on EXPERIMENTAL && NET_CLS && NET_QOS
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/Makefile linux-2.6.11-rc2-bk1/net/sched/Makefile
--- linux-2.6.11-rc2-bk1.orig/net/sched/Makefile	2005-01-23 17:29:35.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/Makefile	2005-01-23 17:29:40.000000000 +0100
@@ -34,3 +34,4 @@
 obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
+obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/em_cmp.c linux-2.6.11-rc2-bk1/net/sched/em_cmp.c
--- linux-2.6.11-rc2-bk1.orig/net/sched/em_cmp.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/em_cmp.c	2005-01-23 17:31:03.000000000 +0100
@@ -0,0 +1,101 @@
+/*
+ * net/sched/em_cmp.c	Simple packet data comparison ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_cmp.h>
+#include <net/pkt_cls.h>
+
+static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp)
+{
+	return unlikely(cmp->flags & TCF_EM_CMP_TRANS);
+}
+
+static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	struct tcf_em_cmp *cmp = (struct tcf_em_cmp *) em->data;
+	unsigned char *ptr = tcf_get_base_ptr(skb, cmp->layer) + cmp->off;
+	u32 val = 0;
+
+	if (!tcf_valid_offset(skb, ptr, cmp->align))
+		return 0;
+
+	switch (cmp->align) {
+		case TCF_EM_ALIGN_U8:
+			val = *ptr;
+			break;
+
+		case TCF_EM_ALIGN_U16:
+			val = *ptr << 8;
+			val |= *(ptr+1);
+
+			if (cmp_needs_transformation(cmp))
+				val = be16_to_cpu(val);
+			break;
+
+		case TCF_EM_ALIGN_U32:
+			/* Worth checking boundries? The branching seems
+			 * to get worse. Visit again. */
+			val = *ptr << 24;
+			val |= *(ptr+1) << 16;
+			val |= *(ptr+2) << 8;
+			val |= *(ptr+3);
+
+			if (cmp_needs_transformation(cmp))
+				val = be32_to_cpu(val);
+			break;
+
+		default:
+			return 0;
+	}
+
+	if (cmp->mask)
+		val &= cmp->mask;
+
+	switch (cmp->opnd) {
+		case TCF_EM_OPND_EQ:
+			return val == cmp->val;
+		case TCF_EM_OPND_LT:
+			return val < cmp->val;
+		case TCF_EM_OPND_GT:
+			return val > cmp->val;
+	}
+
+	return 0;
+}
+
+static struct tcf_ematch_ops em_cmp_ops = {
+	.kind	  = TCF_EM_CMP,
+	.datalen  = sizeof(struct tcf_em_cmp),
+	.match	  = em_cmp_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_cmp_ops.link)
+};
+
+static int __init init_em_cmp(void)
+{
+	return tcf_em_register(&em_cmp_ops);
+}
+
+static void __exit exit_em_cmp(void) 
+{
+	tcf_em_unregister(&em_cmp_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_cmp);
+module_exit(exit_em_cmp);
+

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 3/6] PKT_SCHED: Multi byte comparison ematch (nbyte)
  2005-01-23 23:00 [PATCHSET] Extended matches and basic classifier Thomas Graf
  2005-01-23 23:01 ` [PATCH 1/6] PKT_SCHED: Extended Matches API Thomas Graf
  2005-01-23 23:02 ` [PATCH 2/6] PKT_SCHED: Simple comparison ematch (cmp) Thomas Graf
@ 2005-01-23 23:03 ` Thomas Graf
  2005-01-23 23:03 ` [PATCH 4/6] PKT_SCHED: u32 ematch Thomas Graf
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-23 23:03 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

The nbyte ematch allows comparing any number of bytes at an arbitary
offset based on one of the skb layers. Its main usage is intended
for IPv6 addresses but may be used for any kind of pattern.

Signed-off-by: Thomas Graf <tgraf@suug.ch>

diff -Nru linux-2.6.11-rc2-bk1.orig/include/linux/pkt_cls.h linux-2.6.11-rc2-bk1/include/linux/pkt_cls.h
--- linux-2.6.11-rc2-bk1.orig/include/linux/pkt_cls.h	2005-01-23 17:33:14.000000000 +0100
+++ linux-2.6.11-rc2-bk1/include/linux/pkt_cls.h	2005-01-23 17:33:26.000000000 +0100
@@ -384,6 +384,7 @@
 {
 	TCF_EM_CONTAINER,
 	TCF_EM_CMP,
+	TCF_EM_NBYTE,
 	__TCF_EM_MAX
 };
 
diff -Nru linux-2.6.11-rc2-bk1.orig/include/linux/tc_ematch/tc_em_nbyte.h linux-2.6.11-rc2-bk1/include/linux/tc_ematch/tc_em_nbyte.h
--- linux-2.6.11-rc2-bk1.orig/include/linux/tc_ematch/tc_em_nbyte.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk1/include/linux/tc_ematch/tc_em_nbyte.h	2005-01-23 17:33:26.000000000 +0100
@@ -0,0 +1,13 @@
+#ifndef __LINUX_TC_EM_NBYTE_H
+#define __LINUX_TC_EM_NBYTE_H
+
+#include <linux/pkt_cls.h>
+
+struct tcf_em_nbyte
+{
+	__u16		off;
+	__u16		len:12;
+	__u8		layer:4;
+};
+
+#endif
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/Kconfig linux-2.6.11-rc2-bk1/net/sched/Kconfig
--- linux-2.6.11-rc2-bk1.orig/net/sched/Kconfig	2005-01-23 17:33:14.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/Kconfig	2005-01-23 17:33:26.000000000 +0100
@@ -408,6 +408,16 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called em_cmp.
 
+config NET_EMATCH_NBYTE
+	tristate "Multi byte comparison"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  multiple byte comparisons mainly useful for IPv6 address comparisons.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_nbyte.
+
 config NET_CLS_ACT
 	bool "Packet ACTION"
 	depends on EXPERIMENTAL && NET_CLS && NET_QOS
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/Makefile linux-2.6.11-rc2-bk1/net/sched/Makefile
--- linux-2.6.11-rc2-bk1.orig/net/sched/Makefile	2005-01-23 17:33:14.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/Makefile	2005-01-23 17:33:26.000000000 +0100
@@ -35,3 +35,4 @@
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
+obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/em_nbyte.c linux-2.6.11-rc2-bk1/net/sched/em_nbyte.c
--- linux-2.6.11-rc2-bk1.orig/net/sched/em_nbyte.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/em_nbyte.c	2005-01-23 17:36:08.000000000 +0100
@@ -0,0 +1,82 @@
+/*
+ * net/sched/em_nbyte.c	N-Byte ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_nbyte.h>
+#include <net/pkt_cls.h>
+
+struct nbyte_data
+{
+	struct tcf_em_nbyte	hdr;
+	char			pattern[0];
+};
+	
+static int em_nbyte_change(struct tcf_proto *tp, void *data, int data_len,
+			   struct tcf_ematch *em)
+{
+	struct tcf_em_nbyte *nbyte = data;
+
+	if (data_len < sizeof(*nbyte) ||
+	    data_len < (sizeof(*nbyte) + nbyte->len))
+		return -EINVAL;
+
+	em->datalen = sizeof(*nbyte) + nbyte->len;
+	em->data = (unsigned long) kmalloc(em->datalen, GFP_KERNEL);
+	if (em->data == 0UL)
+		return -ENOBUFS;
+
+	memcpy((void *) em->data, data, em->datalen);
+
+	return 0;
+}
+
+static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
+			  struct tcf_pkt_info *info)
+{
+	struct nbyte_data *nbyte = (struct nbyte_data *) em->data;
+	unsigned char *ptr = tcf_get_base_ptr(skb, nbyte->hdr.layer);
+
+	ptr += nbyte->hdr.off;
+
+	if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
+		return 0;
+
+	return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
+}
+
+static struct tcf_ematch_ops em_nbyte_ops = {
+	.kind	  = TCF_EM_NBYTE,
+	.change	  = em_nbyte_change,
+	.match	  = em_nbyte_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_nbyte_ops.link)
+};
+
+static int __init init_em_nbyte(void)
+{
+	return tcf_em_register(&em_nbyte_ops);
+}
+
+static void __exit exit_em_nbyte(void) 
+{
+	tcf_em_unregister(&em_nbyte_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_nbyte);
+module_exit(exit_em_nbyte);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 4/6] PKT_SCHED: u32 ematch
  2005-01-23 23:00 [PATCHSET] Extended matches and basic classifier Thomas Graf
                   ` (2 preceding siblings ...)
  2005-01-23 23:03 ` [PATCH 3/6] PKT_SCHED: Multi byte comparison ematch (nbyte) Thomas Graf
@ 2005-01-23 23:03 ` Thomas Graf
  2005-01-24  0:24   ` Patrick McHardy
  2005-01-25 23:24   ` [RESEND " Thomas Graf
  2005-01-23 23:04 ` [PATCH 5/6]: PKT_SCHED: Metadata ematch (meta) Thomas Graf
                   ` (4 subsequent siblings)
  8 siblings, 2 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-23 23:03 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

The u32 ematch behaves exactly the same as a u32 match and will replace
it in the long term. It allows the underlying classifiers to give hints
about the position of the next protocol header (i.e. nexthdr+).

Signed-off-by: Thomas Graf <tgraf@suug.ch>

diff -Nru linux-2.6.11-rc1-bk9.orig/include/linux/pkt_cls.h linux-2.6.11-rc1-bk9/include/linux/pkt_cls.h
--- linux-2.6.11-rc1-bk9.orig/include/linux/pkt_cls.h	2005-01-22 03:39:23.000000000 +0100
+++ linux-2.6.11-rc1-bk9/include/linux/pkt_cls.h	2005-01-22 12:19:56.000000000 +0100
@@ -385,6 +385,7 @@
 	TCF_EM_CONTAINER,
 	TCF_EM_CMP,
 	TCF_EM_NBYTE,
+	TCF_EM_U32,
 	__TCF_EM_MAX
 };
 
diff -Nru linux-2.6.11-rc1-bk9.orig/include/net/pkt_cls.h linux-2.6.11-rc1-bk9/include/net/pkt_cls.h
--- linux-2.6.11-rc1-bk9.orig/include/net/pkt_cls.h	2005-01-22 02:56:33.000000000 +0100
+++ linux-2.6.11-rc1-bk9/include/net/pkt_cls.h	2005-01-22 12:21:33.000000000 +0100
@@ -153,6 +153,8 @@
  */
 struct tcf_pkt_info
 {
+	unsigned char *		ptr;
+	int			nexthdr;
 };
 
 #ifdef CONFIG_NET_EMATCH
diff -Nru linux-2.6.11-rc1-bk9.orig/net/sched/Kconfig linux-2.6.11-rc1-bk9/net/sched/Kconfig
--- linux-2.6.11-rc1-bk9.orig/net/sched/Kconfig	2005-01-22 03:39:23.000000000 +0100
+++ linux-2.6.11-rc1-bk9/net/sched/Kconfig	2005-01-22 12:19:56.000000000 +0100
@@ -418,6 +418,16 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called em_nbyte.
 
+config NET_EMATCH_U32
+	tristate "U32 hashing key"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets using
+	  the famous u32 key in combination with logic relations.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_u32.
+
 config NET_CLS_ACT
 	bool "Packet ACTION"
 	depends on EXPERIMENTAL && NET_CLS && NET_QOS
diff -Nru linux-2.6.11-rc1-bk9.orig/net/sched/Makefile linux-2.6.11-rc1-bk9/net/sched/Makefile
--- linux-2.6.11-rc1-bk9.orig/net/sched/Makefile	2005-01-22 03:39:23.000000000 +0100
+++ linux-2.6.11-rc1-bk9/net/sched/Makefile	2005-01-22 12:19:56.000000000 +0100
@@ -36,3 +36,4 @@
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
+obj-$(CONFIG_NET_EMATCH_U32)	+= em_u32.o
diff -Nru linux-2.6.11-rc1-bk9.orig/net/sched/em_u32.c linux-2.6.11-rc1-bk9/net/sched/em_u32.c
--- linux-2.6.11-rc1-bk9.orig/net/sched/em_u32.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc1-bk9/net/sched/em_u32.c	2005-01-22 12:37:28.000000000 +0100
@@ -0,0 +1,58 @@
+/*
+ * net/sched/em_u32.c	U32 Ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Based on net/sched/cls_u32.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+
+static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	struct tc_u32_key *key = (struct tc_u32_key *) em->data;
+	unsigned char *ptr = skb->nh.raw;
+	
+	if (info) {
+		if (info->ptr)
+			ptr = info->ptr;
+		ptr += (info->nexthdr & key->offmask);
+	}
+	
+	return !((*(u32*) (ptr + key->off) ^ key->val) & key->mask);
+}
+
+static struct tcf_ematch_ops em_u32_ops = {
+	.kind	  = TCF_EM_U32,
+	.datalen  = sizeof(struct tc_u32_key),
+	.match	  = em_u32_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_u32_ops.link)
+};
+
+static int __init init_em_u32(void)
+{
+	return tcf_em_register(&em_u32_ops);
+}
+
+static void __exit exit_em_u32(void) 
+{
+	tcf_em_unregister(&em_u32_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_u32);
+module_exit(exit_em_u32);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 5/6]: PKT_SCHED: Metadata ematch (meta)
  2005-01-23 23:00 [PATCHSET] Extended matches and basic classifier Thomas Graf
                   ` (3 preceding siblings ...)
  2005-01-23 23:03 ` [PATCH 4/6] PKT_SCHED: u32 ematch Thomas Graf
@ 2005-01-23 23:04 ` Thomas Graf
  2005-01-26 20:05   ` [RESEND " Thomas Graf
  2005-01-23 23:05 ` [PATCH 6/6] PKT_SCHED: Basic classifier Thomas Graf
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 21+ messages in thread
From: Thomas Graf @ 2005-01-23 23:04 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

The meta ematch allows comparing various metadata values against
static values from usersapce or other metadata values. It currently
supports various numeric meta values such as netfilter mark, packet
length, security level, interface indices, tc classid, load average,
a random value but also variable length values such as interface
names. Adding support for additional meta values is as easy as
writing a data collector (usually 1-5 lines of code) and assign it
to a id and type by putting it into the meta operations table.

Signed-off-by: Thomas Graf <tgraf@suug.ch>

diff -Nru linux-2.6.11-rc2-bk1.orig/include/linux/pkt_cls.h linux-2.6.11-rc2-bk1/include/linux/pkt_cls.h
--- linux-2.6.11-rc2-bk1.orig/include/linux/pkt_cls.h	2005-01-23 17:38:12.000000000 +0100
+++ linux-2.6.11-rc2-bk1/include/linux/pkt_cls.h	2005-01-23 17:38:21.000000000 +0100
@@ -386,6 +386,7 @@
 	TCF_EM_CMP,
 	TCF_EM_NBYTE,
 	TCF_EM_U32,
+	TCF_EM_META,
 	__TCF_EM_MAX
 };
 
diff -Nru linux-2.6.11-rc2-bk1.orig/include/linux/tc_ematch/tc_em_meta.h linux-2.6.11-rc2-bk1/include/linux/tc_ematch/tc_em_meta.h
--- linux-2.6.11-rc2-bk1.orig/include/linux/tc_ematch/tc_em_meta.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk1/include/linux/tc_ematch/tc_em_meta.h	2005-01-23 17:38:21.000000000 +0100
@@ -0,0 +1,69 @@
+#ifndef __LINUX_TC_EM_META_H
+#define __LINUX_TC_EM_META_H
+
+#include <linux/pkt_cls.h>
+
+enum
+{
+	TCA_EM_META_UNSPEC,
+	TCA_EM_META_HDR,
+	TCA_EM_META_LVALUE,
+	TCA_EM_META_RVALUE,
+	__TCA_EM_META_MAX
+};
+#define TCA_EM_META_MAX (__TCA_EM_META_MAX - 1)
+
+struct tcf_meta_val
+{
+	__u16			kind;
+	__u8			shift;
+	__u8			op;
+};
+
+#define TCF_META_TYPE_MASK	(0xf << 12)
+#define TCF_META_TYPE(kind)	(((kind) & TCF_META_TYPE_MASK) >> 12)
+#define TCF_META_ID_MASK	0x7ff
+#define TCF_META_ID(kind)	((kind) & TCF_META_ID_MASK)
+
+enum
+{
+	TCF_META_TYPE_VAR,
+	TCF_META_TYPE_INT,
+	__TCF_META_TYPE_MAX
+};
+#define TCF_META_TYPE_MAX (__TCF_META_TYPE_MAX - 1)
+
+enum
+{
+	TCF_META_ID_VALUE,
+	TCF_META_ID_RANDOM,
+	TCF_META_ID_LOADAVG_0,
+	TCF_META_ID_LOADAVG_1,
+	TCF_META_ID_LOADAVG_2,
+	TCF_META_ID_DEV,
+	TCF_META_ID_INDEV,
+	TCF_META_ID_REALDEV,
+	TCF_META_ID_PRIORITY,
+	TCF_META_ID_PROTOCOL,
+	TCF_META_ID_SECURITY,
+	TCF_META_ID_PKTTYPE,
+	TCF_META_ID_PKTLEN,
+	TCF_META_ID_DATALEN,
+	TCF_META_ID_MACLEN,
+	TCF_META_ID_NFMARK,
+	TCF_META_ID_TCINDEX,
+	TCF_META_ID_TCVERDICT,
+	TCF_META_ID_TCCLASSID,
+	TCF_META_ID_RTCLASSID,
+	TCF_META_ID_RTIIF,
+	__TCF_META_ID_MAX
+};
+#define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1)
+
+struct tcf_meta_hdr
+{
+	struct tcf_meta_val	left;
+	struct tcf_meta_val	right;
+};
+
+#endif
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/Kconfig linux-2.6.11-rc2-bk1/net/sched/Kconfig
--- linux-2.6.11-rc2-bk1.orig/net/sched/Kconfig	2005-01-23 17:38:12.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/Kconfig	2005-01-23 17:38:21.000000000 +0100
@@ -428,6 +428,17 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called em_u32.
 
+config NET_EMATCH_META
+	tristate "Metadata"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be ablt to classify packets based on
+	  metadata such as load average, netfilter attributes, socket
+	  attributes and routing decisions.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_meta.
+
 config NET_CLS_ACT
 	bool "Packet ACTION"
 	depends on EXPERIMENTAL && NET_CLS && NET_QOS
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/Makefile linux-2.6.11-rc2-bk1/net/sched/Makefile
--- linux-2.6.11-rc2-bk1.orig/net/sched/Makefile	2005-01-23 17:38:12.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/Makefile	2005-01-23 17:38:21.000000000 +0100
@@ -37,3 +37,4 @@
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
 obj-$(CONFIG_NET_EMATCH_U32)	+= em_u32.o
+obj-$(CONFIG_NET_EMATCH_META)	+= em_meta.o
diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/em_meta.c linux-2.6.11-rc2-bk1/net/sched/em_meta.c
--- linux-2.6.11-rc2-bk1.orig/net/sched/em_meta.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk1/net/sched/em_meta.c	2005-01-23 17:56:01.000000000 +0100
@@ -0,0 +1,609 @@
+/*
+ * net/sched/em_meta.c	Metadata ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ * 
+ * 	The metadata ematch compares two meta objects where each object
+ * 	represents either a meta value stored in the kernel or a static
+ * 	value provided by userspace. The objects are not provided by
+ * 	userspace itself but rather a definition providing the information
+ * 	to build them. Every object is of a certain type which must be
+ * 	equal to the object it is being compared to.
+ *
+ * 	The definition of a objects conists of the type (meta type), a
+ * 	identifier (meta id) and additional type specific information.
+ * 	The meta id is either TCF_META_TYPE_VALUE for values provided by
+ * 	userspace or a index to the meta operations table consisting of
+ * 	function pointers to type specific meta data collectors returning
+ * 	the value of the requested meta value.
+ *
+ * 	         lvalue                                   rvalue
+ * 	      +-----------+                           +-----------+
+ * 	      | type: INT |                           | type: INT |
+ * 	 def  | id: INDEV |                           | id: VALUE |
+ * 	      | data:     |                           | data: 3   |
+ * 	      +-----------+                           +-----------+
+ * 	            |                                       |
+ * 	            ---> meta_ops[INT][INDEV](...)          |
+ *                            |                             |
+ * 	            -----------                             |
+ * 	            V                                       V
+ * 	      +-----------+                           +-----------+
+ * 	      | type: INT |                           | type: INT |
+ * 	 obj  | id: INDEV |                           | id: VALUE |
+ * 	      | data: 2   |<--data got filled out     | data: 3   |
+ * 	      +-----------+                           +-----------+
+ * 	            |                                         |
+ * 	            --------------> 2  equals 3 <--------------
+ *
+ * 	This is a simplified schema, the complexity varies depending
+ * 	on the meta type. Obviously, the length of the data must also
+ * 	be provided for non-numeric types.
+ *
+ * 	Additionaly, type dependant modifiers such as shift operators
+ * 	or mask may be applied to extend the functionaliy. As of now,
+ * 	the variable length type supports shifting the byte string to
+ * 	the right, eating up any number of octets and thus supporting
+ * 	wildcard interface name comparisons such as "ppp%" matching
+ * 	ppp0..9.
+ *
+ * 	NOTE: Certain meta values depend on other subsystems and are
+ * 	      only available if that subsytem is enabled in the kernel.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/tc_ematch/tc_em_meta.h>
+#include <net/dst.h>
+#include <net/route.h>
+#include <net/pkt_cls.h>
+
+struct meta_obj
+{
+	unsigned long		value;
+	unsigned int		len;
+};
+
+struct meta_value
+{
+	struct tcf_meta_val	hdr;
+	unsigned long		val;
+	unsigned int		len;
+};
+
+struct meta_match
+{
+	struct meta_value	lvalue;
+	struct meta_value	rvalue;
+};
+
+static inline int meta_id(struct meta_value *v)
+{
+	return TCF_META_ID(v->hdr.kind);
+}
+
+static inline int meta_type(struct meta_value *v)
+{
+	return TCF_META_TYPE(v->hdr.kind);
+}
+
+#define META_COLLECTOR(FUNC) static void meta_##FUNC(struct sk_buff *skb, \
+	struct tcf_pkt_info *info, struct meta_value *v, \
+	struct meta_obj *dst, int *err)
+
+/**************************************************************************
+ * System status & misc
+ **************************************************************************/
+
+META_COLLECTOR(int_random)
+{
+	get_random_bytes(&dst->value, sizeof(dst->value));
+}
+
+static inline unsigned long fixed_loadavg(int load)
+{
+	int rnd_load = load + (FIXED_1/200);
+	int rnd_frac = ((rnd_load & (FIXED_1-1)) * 100) >> FSHIFT;
+
+	return ((rnd_load >> FSHIFT) * 100) + rnd_frac;
+}
+
+META_COLLECTOR(int_loadavg_0)
+{
+	dst->value = fixed_loadavg(avenrun[0]);
+}
+
+META_COLLECTOR(int_loadavg_1)
+{
+	dst->value = fixed_loadavg(avenrun[1]);
+}
+
+META_COLLECTOR(int_loadavg_2)
+{
+	dst->value = fixed_loadavg(avenrun[2]);
+}
+
+/**************************************************************************
+ * Device names & indices
+ **************************************************************************/
+
+static inline int int_dev(struct net_device *dev, struct meta_obj *dst)
+{
+	if (unlikely(dev == NULL))
+		return -1;
+
+	dst->value = dev->ifindex;
+	return 0;
+}
+
+static inline int var_dev(struct net_device *dev, struct meta_obj *dst)
+{
+	if (unlikely(dev == NULL))
+		return -1;
+
+	dst->value = (unsigned long) dev->name;
+	dst->len = strlen(dev->name);
+	return 0;
+}
+
+META_COLLECTOR(int_dev)
+{
+	*err = int_dev(skb->dev, dst);
+}
+
+META_COLLECTOR(var_dev)
+{
+	*err = var_dev(skb->dev, dst);
+}
+
+META_COLLECTOR(int_indev)
+{
+	*err = int_dev(skb->input_dev, dst);
+}
+
+META_COLLECTOR(var_indev)
+{
+	*err = var_dev(skb->input_dev, dst);
+}
+
+META_COLLECTOR(int_realdev)
+{
+	*err = int_dev(skb->real_dev, dst);
+}
+
+META_COLLECTOR(var_realdev)
+{
+	*err = var_dev(skb->real_dev, dst);
+}
+
+/**************************************************************************
+ * skb attributes
+ **************************************************************************/
+
+META_COLLECTOR(int_priority)
+{
+	dst->value = skb->priority;
+}
+
+META_COLLECTOR(int_protocol)
+{
+	/* Let userspace take care of the byte ordering */
+	dst->value = skb->protocol;
+}
+
+META_COLLECTOR(int_security)
+{
+	dst->value = skb->security;
+}
+
+META_COLLECTOR(int_pkttype)
+{
+	dst->value = skb->pkt_type;
+}
+
+META_COLLECTOR(int_pktlen)
+{
+	dst->value = skb->len;
+}
+
+META_COLLECTOR(int_datalen)
+{
+	dst->value = skb->data_len;
+}
+
+META_COLLECTOR(int_maclen)
+{
+	dst->value = skb->mac_len;
+}
+
+/**************************************************************************
+ * Netfilter
+ **************************************************************************/
+
+#ifdef CONFIG_NETFILTER
+META_COLLECTOR(int_nfmark)
+{
+	dst->value = skb->nfmark;
+}
+#endif
+
+/**************************************************************************
+ * Traffic Control
+ **************************************************************************/
+
+META_COLLECTOR(int_tcindex)
+{
+	dst->value = skb->tc_index;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+META_COLLECTOR(int_tcverd)
+{
+	dst->value = skb->tc_verd;
+}
+
+META_COLLECTOR(int_tcclassid)
+{
+	dst->value = skb->tc_classid;
+}
+#endif
+
+/**************************************************************************
+ * Routing
+ **************************************************************************/
+
+#ifdef CONFIG_NET_CLS_ROUTE
+META_COLLECTOR(int_rtclassid)
+{
+	if (unlikely(skb->dst == NULL))
+		*err = -1;
+	else
+		dst->value = skb->dst->tclassid;
+}
+#endif
+
+META_COLLECTOR(int_rtiif)
+{
+	if (unlikely(skb->dst == NULL))
+		*err = -1;
+	else
+		dst->value = ((struct rtable*) skb->dst)->fl.iif;
+}
+
+/**************************************************************************
+ * Meta value collectors assignment table
+ **************************************************************************/
+
+struct meta_ops
+{
+	void		(*get)(struct sk_buff *, struct tcf_pkt_info *,
+			       struct meta_value *, struct meta_obj *, int *);
+};
+
+/* Meta value operations table listing all meta value collectors and
+ * assigns them to a type and meta id. */
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+	[TCF_META_TYPE_VAR] = {
+		[TCF_META_ID_DEV]	= { .get = meta_var_dev },
+		[TCF_META_ID_INDEV]	= { .get = meta_var_indev },
+		[TCF_META_ID_REALDEV]	= { .get = meta_var_realdev }
+	},
+	[TCF_META_TYPE_INT] = {
+		[TCF_META_ID_RANDOM]	= { .get = meta_int_random },
+		[TCF_META_ID_LOADAVG_0]	= { .get = meta_int_loadavg_0 },
+		[TCF_META_ID_LOADAVG_1]	= { .get = meta_int_loadavg_1 },
+		[TCF_META_ID_LOADAVG_2]	= { .get = meta_int_loadavg_2 },
+		[TCF_META_ID_DEV]	= { .get = meta_int_dev },
+		[TCF_META_ID_INDEV]	= { .get = meta_int_indev },
+		[TCF_META_ID_REALDEV]	= { .get = meta_int_realdev },
+		[TCF_META_ID_PRIORITY]	= { .get = meta_int_priority },
+		[TCF_META_ID_PROTOCOL]	= { .get = meta_int_protocol },
+		[TCF_META_ID_SECURITY]	= { .get = meta_int_security },
+		[TCF_META_ID_PKTTYPE]	= { .get = meta_int_pkttype },
+		[TCF_META_ID_PKTLEN]	= { .get = meta_int_pktlen },
+		[TCF_META_ID_DATALEN]	= { .get = meta_int_datalen },
+		[TCF_META_ID_MACLEN]	= { .get = meta_int_maclen },
+#ifdef CONFIG_NETFILTER
+		[TCF_META_ID_NFMARK]	= { .get = meta_int_nfmark },
+#endif
+		[TCF_META_ID_TCINDEX]	= { .get = meta_int_tcindex },
+#ifdef CONFIG_NET_CLS_ACT
+		[TCF_META_ID_TCVERDICT]	= { .get = meta_int_tcverd },
+		[TCF_META_ID_TCCLASSID]	= { .get = meta_int_tcclassid },
+#endif
+#ifdef CONFIG_NET_CLS_ROUTE
+		[TCF_META_ID_RTCLASSID]	= { .get = meta_int_rtclassid },
+#endif
+		[TCF_META_ID_RTIIF]	= { .get = meta_int_rtiif }
+	}
+};
+
+static inline struct meta_ops * meta_ops(struct meta_value *val)
+{
+	return &__meta_ops[meta_type(val)][meta_id(val)];
+}
+
+/**************************************************************************
+ * Type specific operations for TCF_META_TYPE_VAR
+ **************************************************************************/
+
+static int meta_var_compare(struct meta_obj *a, struct meta_obj *b)
+{
+	int r = a->len - b->len;
+
+	if (r == 0)
+		r = memcmp((void *) a->value, (void *) b->value, a->len);
+
+	return r;
+}
+
+static int meta_var_change(struct meta_value *dst, struct rtattr *rta)
+{
+	int len = RTA_PAYLOAD(rta);
+
+	dst->val = (unsigned long) kmalloc(len, GFP_KERNEL);
+	if (dst->val == 0UL)
+		return -ENOMEM;
+	memcpy((void *) dst->val, RTA_DATA(rta), len);
+	dst->len = len;
+	return 0;
+}
+
+static void meta_var_destroy(struct meta_value *v)
+{
+	if (v->val)
+		kfree((void *) v->val);
+}
+
+static void meta_var_apply_extras(struct meta_value *v,
+				  struct meta_obj *dst)
+{
+	int shift = v->hdr.shift;
+
+	if (shift && shift < dst->len)
+		dst->len -= shift;
+}
+
+/**************************************************************************
+ * Type specific operations for TCF_META_TYPE_INT
+ **************************************************************************/
+
+static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
+{
+	/* Let gcc optimize it, the unlikely is not really based on
+	 * some numbers but jump free code for mismatches seems
+	 * more logical. */
+	if (unlikely(a == b))
+		return 0;
+	else if (a < b)
+		return -1;
+	else
+		return 1;
+}
+
+static int meta_int_change(struct meta_value *dst, struct rtattr *rta)
+{
+	if (RTA_PAYLOAD(rta) >= sizeof(unsigned long)) {
+		dst->val = *(unsigned long *) RTA_DATA(rta);
+		dst->len = sizeof(unsigned long);
+	} else if (RTA_PAYLOAD(rta) == sizeof(u32)) {
+		dst->val = *(u32 *) RTA_DATA(rta);
+		dst->len = sizeof(u32);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+static void meta_int_apply_extras(struct meta_value *v,
+				  struct meta_obj *dst)
+{
+	if (v->hdr.shift)
+		dst->value >>= v->hdr.shift;
+
+	if (v->val)
+		dst->value &= v->val;
+}
+
+/**************************************************************************
+ * Type specific operations table
+ **************************************************************************/
+
+struct meta_type_ops
+{
+	void	(*destroy)(struct meta_value *);
+	int	(*compare)(struct meta_obj *, struct meta_obj *);
+	int	(*change)(struct meta_value *, struct rtattr *);
+	void	(*apply_extras)(struct meta_value *, struct meta_obj *);
+};
+
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+	[TCF_META_TYPE_VAR] = {
+		.destroy = meta_var_destroy,
+		.compare = meta_var_compare,
+		.change = meta_var_change,
+		.apply_extras = meta_var_apply_extras
+	},
+	[TCF_META_TYPE_INT] = {
+		.compare = meta_int_compare,
+		.change = meta_int_change,
+		.apply_extras = meta_int_apply_extras
+	}
+};
+
+static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+{
+	return &__meta_type_ops[meta_type(v)];
+}
+
+/**************************************************************************
+ * Core
+ **************************************************************************/
+
+static inline int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info, 
+			   struct meta_value *v, struct meta_obj *dst)
+{
+	int err = 0;
+
+	if (meta_id(v) == TCF_META_ID_VALUE) {
+		dst->value = v->val;
+		dst->len = v->len;
+		return 0;
+	}
+
+	meta_ops(v)->get(skb, info, v, dst, &err);
+	if (err < 0)
+		return err;
+
+	if (meta_type_ops(v)->apply_extras)
+	    meta_type_ops(v)->apply_extras(v, dst);
+
+	return 0;
+}
+
+static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
+			 struct tcf_pkt_info *info)
+{
+	int r;
+	struct meta_match *meta = (struct meta_match *) m->data;
+	struct meta_obj l_value, r_value;
+
+	if (meta_get(skb, info, &meta->lvalue, &l_value) < 0 ||
+	    meta_get(skb, info, &meta->rvalue, &r_value) < 0)
+		return 0;
+
+	r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
+
+	switch (meta->lvalue.hdr.op) {
+		case TCF_EM_OPND_EQ:
+			return !r;
+		case TCF_EM_OPND_LT:
+			return r < 0;
+		case TCF_EM_OPND_GT:
+			return r > 0;
+	}
+
+	return 0;
+}
+
+static inline void meta_delete(struct meta_match *meta)
+{
+	struct meta_type_ops *ops = meta_type_ops(&meta->lvalue);
+
+	if (ops && ops->destroy) {
+		ops->destroy(&meta->lvalue);
+		ops->destroy(&meta->rvalue);
+	}
+
+	kfree(meta);
+}
+
+static inline int meta_change_data(struct meta_value *dst, struct rtattr *rta)
+{
+	if (rta) {
+		if (RTA_PAYLOAD(rta) == 0)
+			return -EINVAL;
+
+		return meta_type_ops(dst)->change(dst, rta);
+	}
+
+	return 0;
+}
+
+static inline int meta_is_supported(struct meta_value *val)
+{
+	return (!meta_id(val) || meta_ops(val)->get);
+}
+
+static int em_meta_change(struct tcf_proto *tp, void *data, int len,
+			  struct tcf_ematch *m)
+{
+	int err = -EINVAL;
+	struct rtattr *tb[TCA_EM_META_MAX];
+	struct tcf_meta_hdr *hdr;
+	struct meta_match *meta = NULL;
+	
+	if (rtattr_parse(tb, TCA_EM_META_MAX, data, len) < 0)
+		goto errout;
+
+	if (tb[TCA_EM_META_HDR-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_EM_META_HDR-1]) < sizeof(*hdr))
+		goto errout;
+	hdr = RTA_DATA(tb[TCA_EM_META_HDR-1]);
+
+	if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) ||
+	    TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX ||
+	    TCF_META_ID(hdr->left.kind) > TCF_META_ID_MAX ||
+	    TCF_META_ID(hdr->right.kind) > TCF_META_ID_MAX)
+		goto errout;
+
+	meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+	if (meta == NULL)
+		goto errout;
+	memset(meta, 0, sizeof(*meta));
+
+	memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left));
+	memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right));
+
+	if (!meta_is_supported(&meta->lvalue) ||
+	    !meta_is_supported(&meta->rvalue)) {
+		err = -EOPNOTSUPP;
+		goto errout;
+	}
+
+	if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE-1]) < 0 ||
+	    meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE-1]) < 0)
+		goto errout;
+
+	m->datalen = sizeof(*meta);
+	m->data = (unsigned long) meta;
+
+	err = 0;
+errout:
+	if (err && meta)
+		meta_delete(meta);
+	return err;
+}
+
+static void em_meta_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
+{
+	if (m)
+		meta_delete((struct meta_match *) m->data);
+}
+
+static struct tcf_ematch_ops em_meta_ops = {
+	.kind	  = TCF_EM_META,
+	.change	  = em_meta_change,
+	.match	  = em_meta_match,
+	.destroy  = em_meta_destroy,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_meta_ops.link)
+};
+
+static int __init init_em_meta(void)
+{
+	return tcf_em_register(&em_meta_ops);
+}
+
+static void __exit exit_em_meta(void) 
+{
+	tcf_em_unregister(&em_meta_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_meta);
+module_exit(exit_em_meta);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 6/6] PKT_SCHED: Basic classifier
  2005-01-23 23:00 [PATCHSET] Extended matches and basic classifier Thomas Graf
                   ` (4 preceding siblings ...)
  2005-01-23 23:04 ` [PATCH 5/6]: PKT_SCHED: Metadata ematch (meta) Thomas Graf
@ 2005-01-23 23:05 ` Thomas Graf
  2005-01-23 23:21 ` [PATCHSET] Extended matches and basic classifier Thomas Graf
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-23 23:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

The basic classifier is the most simple classifier one can think of,
it doesn't do anything on its own but to support extended matches and
actions. A basic classifier returns true if no ematches or actions
are configured and thus can also be used as a catch-all classifier.

Signed-off-by: Thomas Graf <tgraf@suug.ch>

diff -Nru linux-2.6.11-rc1-bk9.orig/include/linux/pkt_cls.h linux-2.6.11-rc1-bk9/include/linux/pkt_cls.h
--- linux-2.6.11-rc1-bk9.orig/include/linux/pkt_cls.h	2005-01-22 15:29:08.000000000 +0100
+++ linux-2.6.11-rc1-bk9/include/linux/pkt_cls.h	2005-01-22 15:55:17.000000000 +0100
@@ -319,6 +319,20 @@
 
 #define TCA_TCINDEX_MAX     (__TCA_TCINDEX_MAX - 1)
 
+/* Basic filter */
+
+enum
+{
+	TCA_BASIC_UNSPEC,
+	TCA_BASIC_CLASSID,
+	TCA_BASIC_EMATCHES,
+	TCA_BASIC_ACT,
+	TCA_BASIC_POLICE,
+	__TCA_BASIC_MAX
+};
+
+#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1)
+
 /* Extended Matches */
 
 struct tcf_ematch_tree_hdr
diff -Nru linux-2.6.11-rc1-bk9.orig/net/sched/Kconfig linux-2.6.11-rc1-bk9/net/sched/Kconfig
--- linux-2.6.11-rc1-bk9.orig/net/sched/Kconfig	2005-01-22 15:29:08.000000000 +0100
+++ linux-2.6.11-rc1-bk9/net/sched/Kconfig	2005-01-22 15:54:59.000000000 +0100
@@ -269,6 +269,16 @@
 	  Documentation and software is at
 	  <http://diffserv.sourceforge.net/>.
 
+config NET_CLS_BASIC
+	tristate "Basic classifier"
+	depends on NET_CLS
+	---help---
+	  Say Y here if you want to be able to classify packets using
+	  only extended matches and actions.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_basic.
+
 config NET_CLS_TCINDEX
 	tristate "TC index classifier"
 	depends on NET_CLS
diff -Nru linux-2.6.11-rc1-bk9.orig/net/sched/Makefile linux-2.6.11-rc1-bk9/net/sched/Makefile
--- linux-2.6.11-rc1-bk9.orig/net/sched/Makefile	2005-01-22 15:29:08.000000000 +0100
+++ linux-2.6.11-rc1-bk9/net/sched/Makefile	2005-01-22 15:54:59.000000000 +0100
@@ -33,6 +33,7 @@
 obj-$(CONFIG_NET_CLS_RSVP)	+= cls_rsvp.o
 obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
+obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff -Nru linux-2.6.11-rc1-bk9.orig/net/sched/cls_basic.c linux-2.6.11-rc1-bk9/net/sched/cls_basic.c
--- linux-2.6.11-rc1-bk9.orig/net/sched/cls_basic.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc1-bk9/net/sched/cls_basic.c	2005-01-22 16:12:49.000000000 +0100
@@ -0,0 +1,303 @@
+/*
+ * net/sched/cls_basic.c	Basic Packet Classifier.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+struct basic_head
+{
+	u32			hgenerator;
+	struct list_head	flist;
+};
+
+struct basic_filter
+{
+	u32			handle;
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+	struct tcf_result	res;
+	struct list_head	link;
+};
+
+static struct tcf_ext_map basic_ext_map = {
+	.action = TCA_BASIC_ACT,
+	.police = TCA_BASIC_POLICE
+};
+
+static int basic_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	int r;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
+			continue;
+		*res = f->res;
+		r = tcf_exts_exec(skb, &f->exts, res);
+		if (r < 0)
+			continue;
+		return r;
+	}
+	return -1;
+}
+
+static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
+{
+	unsigned long l = 0UL;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	if (head == NULL)
+		return 0UL;
+
+	list_for_each_entry(f, &head->flist, link)
+		if (f->handle == handle)
+			l = (unsigned long) f;
+
+	return l;
+}
+
+static void basic_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int basic_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static inline void basic_delete_filter(struct tcf_proto *tp,
+				       struct basic_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	tcf_em_tree_destroy(tp, &f->ematches);
+	kfree(f);
+}
+
+static void basic_destroy(struct tcf_proto *tp)
+{
+	struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL);
+	struct basic_filter *f, *n;
+	
+	list_for_each_entry_safe(f, n, &head->flist, link) {
+		list_del(&f->link);
+		basic_delete_filter(tp, f);
+	}
+}
+
+static int basic_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *t, *f = (struct basic_filter *) arg;
+
+	list_for_each_entry(t, &head->flist, link)
+		if (t == f) {
+			tcf_tree_lock(tp);
+			list_del(&t->link);
+			tcf_tree_unlock(tp);
+			basic_delete_filter(tp, t);
+			return 0;
+		}
+
+	return -ENOENT;
+}
+
+static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
+				  unsigned long base, struct rtattr **tb,
+				  struct rtattr *est)
+{
+	int err = -EINVAL;
+	struct tcf_exts e;
+	struct tcf_ematch_tree t;
+
+	if (tb[TCA_BASIC_CLASSID-1])
+		if (RTA_PAYLOAD(tb[TCA_BASIC_CLASSID-1]) < sizeof(u32))
+			return err;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES-1], &t);
+	if (err < 0)
+		goto errout;
+
+	if (tb[TCA_BASIC_CLASSID-1]) {
+		f->res.classid = *(u32*)RTA_DATA(tb[TCA_BASIC_CLASSID-1]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	tcf_exts_change(tp, &f->exts, &e);
+	tcf_em_tree_change(tp, &f->ematches, &t);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+		        struct rtattr **tca, unsigned long *arg)
+{
+	int err = -EINVAL;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct rtattr *tb[TCA_BASIC_MAX];
+	struct basic_filter *f = (struct basic_filter *) *arg;
+
+	if (tca[TCA_OPTIONS-1] == NULL)
+		return -EINVAL;
+
+	if (rtattr_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS-1]) < 0)
+		return -EINVAL;
+
+	if (f != NULL) {
+		if (handle && f->handle != handle)
+			return -EINVAL;
+		return basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]);
+	}
+
+	err = -ENOBUFS;
+	if (head == NULL) {
+		head = kmalloc(sizeof(*head), GFP_KERNEL);
+		if (head == NULL)
+			goto errout;
+
+		memset(head, 0, sizeof(*head));
+		INIT_LIST_HEAD(&head->flist);
+		tp->root = head;
+	}
+
+	f = kmalloc(sizeof(*f), GFP_KERNEL);
+	if (f == NULL)
+		goto errout;
+	memset(f, 0, sizeof(*f));
+
+	err = -EINVAL;
+	if (handle)
+		f->handle = handle;
+	else {
+		int i = 0x80000000;
+		do {
+			if (++head->hgenerator == 0x7FFFFFFF)
+				head->hgenerator = 1;
+		} while (--i > 0 && basic_get(tp, head->hgenerator));
+
+		if (i <= 0) {
+			printk(KERN_ERR "Insufficient number of handles\n");
+			goto errout;
+		}
+
+		f->handle = head->hgenerator;
+	}
+
+	err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE-1]);
+	if (err < 0)
+		goto errout;
+
+	tcf_tree_lock(tp);
+	list_add(&f->link, &head->flist);
+	tcf_tree_unlock(tp);
+	*arg = (unsigned long) f;
+
+	return 0;
+errout:
+	if (*arg == 0UL && f)
+		kfree(f);
+
+	return err;
+}
+
+static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (arg->count < arg->skip)
+			goto skip;
+
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+static int basic_dump(struct tcf_proto *tp, unsigned long fh,
+		      struct sk_buff *skb, struct tcmsg *t)
+{
+	struct basic_filter *f = (struct basic_filter *) fh;
+	unsigned char *b = skb->tail;
+	struct rtattr *rta;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	rta = (struct rtattr *) b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
+	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
+		goto rtattr_failure;
+
+	rta->rta_len = (skb->tail - b);
+	return skb->len;
+
+rtattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_basic_ops = {
+	.kind		=	"basic",
+	.classify	=	basic_classify,
+	.init		=	basic_init,
+	.destroy	=	basic_destroy,
+	.get		=	basic_get,
+	.put		=	basic_put,
+	.change		=	basic_change,
+	.delete		=	basic_delete,
+	.walk		=	basic_walk,
+	.dump		=	basic_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_basic(void)
+{
+	return register_tcf_proto_ops(&cls_basic_ops);
+}
+
+static void __exit exit_basic(void) 
+{
+	unregister_tcf_proto_ops(&cls_basic_ops);
+}
+
+module_init(init_basic)
+module_exit(exit_basic)
+MODULE_LICENSE("GPL");
+

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCHSET] Extended matches and basic classifier
  2005-01-23 23:00 [PATCHSET] Extended matches and basic classifier Thomas Graf
                   ` (5 preceding siblings ...)
  2005-01-23 23:05 ` [PATCH 6/6] PKT_SCHED: Basic classifier Thomas Graf
@ 2005-01-23 23:21 ` Thomas Graf
  2005-01-26  5:52 ` David S. Miller
  2005-02-15 21:38 ` David S. Miller
  8 siblings, 0 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-23 23:21 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

* Thomas Graf <20050123230012.GB23931@postel.suug.ch> 2005-01-24 00:00
> 
> This patchset adds the ematch API, the ematches cmp, nbyte, u32, meta,
> and the basic classifier. It doesn't touch any existing code.

Summary about what is planned next regarding ematches:

  The u32 classifier will get transformed to use the u32 ematch instead of
  its own. It will extend the u32 classifer to support logic
  expressions and combines hashing with the classifiction algorithms in
  the ematches. This has been dicussed already about 1-2 weeks ago. It
  will also make the u32 nfmark obsolete.

  The indev match in cls_u32 and cls_fw is going to be removed, it is
  obsoleted by the meta ematch.

  cls_fw is likely to be obsoleted by the meta ematch as well, it still
  has the advantage of the hashing over the ematch though.

  A _very_ simple regular expression ematch.

  Some kind of text search ematch, it is yet unclear wheter to make it
  stateful and maybe use the recent text search code in netfilter or
  make it stateless and simpler.

  The meta ematch will be extended by the addition of more netfilter,
  socket, and routing attributes.

  A "state" ematch allowing to limit classification on the first few
  packets of a connection and mark all further packets the same.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/6] PKT_SCHED: Extended Matches API
  2005-01-23 23:01 ` [PATCH 1/6] PKT_SCHED: Extended Matches API Thomas Graf
@ 2005-01-24  0:12   ` Patrick McHardy
  2005-01-24  0:49     ` Thomas Graf
  2005-01-25 23:22   ` [RESEND " Thomas Graf
  1 sibling, 1 reply; 21+ messages in thread
From: Patrick McHardy @ 2005-01-24  0:12 UTC (permalink / raw)
  To: Thomas Graf; +Cc: David S. Miller, netdev

Thomas Graf wrote:

>diff -Nru linux-2.6.11-rc2-bk1.orig/include/net/pkt_cls.h linux-2.6.11-rc2-bk1/include/net/pkt_cls.h
>--- linux-2.6.11-rc2-bk1.orig/include/net/pkt_cls.h	2005-01-23 19:08:31.000000000 +0100
>+++ linux-2.6.11-rc2-bk1/include/net/pkt_cls.h	2005-01-23 19:08:44.000000000 +0100
>@@ -148,6 +148,176 @@
> extern int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
> 	                       struct tcf_ext_map *map);
> 
>+/**
>+ * struct tcf_pkt_info - packet information
>+ */
>+struct tcf_pkt_info
>+{
>+};
>+
>+#ifdef CONFIG_NET_EMATCH
>+
>+struct tcf_ematch_ops;
>+
>+/**
>+ * struct tcf_ematch - extended match (ematch)
>+ * 
>+ * @matchid: identifier to allow userspace to reidentify a match
>+ * @flags: flags specifying attributes and the relation to other matches
>+ * @ops: the operations lookup table of the corresponding ematch module
>+ * @datalen: length of the ematch specific configuration data
>+ * @data: ematch specific data
>+ */
>+struct tcf_ematch
>+{
>+	u16			matchid;
>+	u16			flags;
>+	struct tcf_ematch_ops * ops;
>+	unsigned int		datalen;
>+	unsigned long		data;
>+};
>
This layout leaves two holes on 64 bit, how about:

{
    struct tcf_ematch_ops *ops;
    unsigned long data;
    unsigned int datalen;
    u16 matchid;
    u16 flags;
};

>diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/ematch.c linux-2.6.11-rc2-bk1/net/sched/ematch.c
>--- linux-2.6.11-rc2-bk1.orig/net/sched/ematch.c	1970-01-01 01:00:00.000000000 +0100
>+++ linux-2.6.11-rc2-bk1/net/sched/ematch.c	2005-01-23 16:31:57.000000000 +0100
>@@ -0,0 +1,526 @@
>+/*
>+ * net/sched/ematch.c		Extended Match API
>+ *
>+ *		This program is free software; you can redistribute it and/or
>+ *		modify it under the terms of the GNU General Public License
>+ *		as published by the Free Software Foundation; either version
>+ *		2 of the License, or (at your option) any later version.
>+ *
>+ * Authors:	Thomas Graf <tgraf@suug.ch>
>+ *
>+ * ==========================================================================
>+ *
>+ * An extended match (ematch) is a small classification tool not worth
>+ * writing a full classifier for. Ematches can be interconnected to form
>+ * a logic expression and get attached to classifiers to extend their
>+ * functionatlity.
>+ *
>+ * The userspace part transforms the logic expressions into an array
>+ * consisting of multiple sequences of interconnected ematches separated
>+ * by markers. Precedence is implemented by a special ematch kind
>+ * referencing a sequence beyond the marker of the current sequence
>+ * causing the current position in the sequence to be pushed onto a stack
>+ * to allow the current position to be overwritten by the position referenced
>+ * in the special ematch. Matching continues in the new sequence until a
>+ * marker is reached causing the position to be restored from the stack.
>+ *
>+ * Example:
>+ *          A AND (B1 OR B2) AND C AND D
>+ *
>+ *              ------->-PUSH-------
>+ *    -->--    /         -->--      \   -->--
>+ *   /     \  /         /     \      \ /     \
>+ * +-------+-------+-------+-------+-------+--------+
>+ * | A AND | B AND | C AND | D END | B1 OR | B2 END |
>+ * +-------+-------+-------+-------+-------+--------+
>+ *                    \                      /
>+ *                     --------<-POP---------
>+ *
>+ * where B is a virtual ematch referencing to sequence starting with B1.
>+ * 
>+ * ==========================================================================
>+ *
>+ * How to write an ematch in 60 seconds
>+ * ------------------------------------
>+ * 
>+ *   1) Provide a matcher function:
>+ *      static int my_match(struct sk_buff *skb, struct tcf_ematch *m,
>+ *                          struct tcf_pkt_info *info)
>+ *      {
>+ *      	struct mydata *d = (struct mydata *) m->data;
>+ *
>+ *      	if (...matching goes here...)
>+ *      		return 1;
>+ *      	else
>+ *      		return 0;
>+ *      }
>+ *
>+ *   2) Fill out a struct tcf_ematch_ops:
>+ *      static struct tcf_ematch_ops my_ops = {
>+ *      	.kind = unique id,
>+ *      	.datalen = sizeof(struct mydata),
>+ *      	.match = my_match,
>+ *      	.owner = THIS_MODULE,
>+ *      };
>+ *
>+ *   3) Register/Unregister your ematch:
>+ *      static int __init init_my_ematch(void)
>+ *      {
>+ *      	return tcf_em_register(&my_ops);
>+ *      }
>+ *
>+ *      static void __exit exit_my_ematch(void)
>+ *      {
>+ *      	return tcf_em_unregister(&my_ops);
>+ *      }
>+ *
>+ *      module_init(init_my_ematch);
>+ *      module_exit(exit_my_ematch);
>+ *
>+ *   4) By now you should have two more seconds left, barely enough to
>+ *      open up a beer to watch the compilation going.
>+ */
>+
>+#include <linux/config.h>
>+#include <linux/module.h>
>+#include <linux/types.h>
>+#include <linux/kernel.h>
>+#include <linux/sched.h>
>+#include <linux/mm.h>
>+#include <linux/errno.h>
>+#include <linux/interrupt.h>
>+#include <linux/rtnetlink.h>
>+#include <linux/skbuff.h>
>+#include <net/pkt_cls.h>
>+#include <config/net/ematch/stack.h>
>+
>+static LIST_HEAD(ematch_ops);
>+static DEFINE_RWLOCK(ematch_mod_lock);
>+
>+static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
>
>+{
>+	struct tcf_ematch_ops *e = NULL;
>+
>+	read_lock(&ematch_mod_lock);
>+	list_for_each_entry(e, &ematch_ops, link) {
>+		if (kind == e->kind) {
>+			if (!try_module_get(e->owner))
>+				e = NULL;
>+			break;
>+		}
>+	}
>
e is the iterator, if nothing matched it will contain the last element now

>+	read_unlock(&ematch_mod_lock);
>+
>+	return e;
>+}
>+
>+/**
>+ * tcf_em_register - register an extended match
>+ * 
>+ * @ops: ematch operations lookup table
>+ *
>+ * This function must be called by ematches to announce their presence.
>+ * The given @ops must have kind set to a unique identifier and the
>+ * callback match() must be implemented. All other callbacks are optional
>+ * and a fallback implementation is used instead.
>+ *
>+ * Returns -EEXISTS if an ematch of the same kind has already registered.
>+ */
>+int tcf_em_register(struct tcf_ematch_ops *ops)
>+{
>+	int err = -EEXIST;
>+	struct tcf_ematch_ops *e;
>+
>+	write_lock(&ematch_mod_lock);
>+	list_for_each_entry(e, &ematch_ops, link)
>+		if (ops->kind == e->kind)
>+			goto errout;
>+
>+	list_add_tail(&ops->link, &ematch_ops);
>+	err = 0;
>+errout:
>+	write_unlock(&ematch_mod_lock);
>+	return err;
>+}
>+
>+/**
>+ * tcf_em_unregister - unregster and extended match
>+ *
>+ * @ops: ematch operations lookup table
>+ *
>+ * This function must be called by ematches to announce their disappearance
>+ * for examples when the module gets unloaded. The @ops parameter must be
>+ * the same as the one used for registration.
>+ *
>+ * Returns -ENOENT if no matching ematch was found.
>+ */
>+int tcf_em_unregister(struct tcf_ematch_ops *ops)
>+{
>+	int err = 0;
>+	struct tcf_ematch_ops *e;
>+
>+	write_lock(&ematch_mod_lock);
>+	list_for_each_entry(e, &ematch_ops, link) {
>+		if (e == ops) {
>+			list_del(&e->link);
>+			goto out;
>+		}
>+	}
>+
>+	err = -ENOENT;
>+out:
>+	write_unlock(&ematch_mod_lock);
>+	return err;
>+}
>+
>+static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
>+						   int index)
>+{
>+	return &tree->matches[index];
>+}
>+
>+
>+static int tcf_em_validate(struct tcf_proto *tp,
>+			   struct tcf_ematch_tree_hdr *tree_hdr,
>+			   struct tcf_ematch *em, struct rtattr *rta, int idx)
>+{
>+	int err = -EINVAL;
>+	struct tcf_ematch_hdr *em_hdr = RTA_DATA(rta);
>+	int data_len = RTA_PAYLOAD(rta) - sizeof(*em_hdr);
>+	void *data = (void *) em_hdr + sizeof(*em_hdr);
>+
>+	if (!TCF_EM_REL_VALID(em_hdr->flags))
>+		goto errout;
>+
>+	if (em_hdr->kind == TCF_EM_CONTAINER) {
>+		/* Special ematch called "container", carries an index
>+		 * referencing an external ematch sequence. */
>+		u32 ref;
>+
>+		if (data_len < sizeof(ref))
>+			goto errout;
>+		ref = *(u32 *) data;
>+
>+		if (ref >= tree_hdr->nmatches)
>+			goto errout;
>+
>+		/* We do not allow backward jumps to avoid loops and jumps
>+		 * to our own position are of course illegal. */
>+		if (ref <= idx)
>+			goto errout;
>+
>+		
>+		em->data = ref;
>+	} else {
>+		/* Note: This lookup will increase the module refcnt
>+		 * of the ematch module referenced. In case of a failure,
>+		 * a destroy function is called by the underlying layer
>+		 * which automatically releases the reference again, therefore
>+		 * the module MUST not be given back under any circumstances
>+		 * here. Be aware, the destroy function assumes that the
>+		 * module is held if the ops field is non zero. */
>+		em->ops = tcf_em_lookup(em_hdr->kind);
>+
>+		if (em->ops == NULL) {
>+			err = -ENOENT;
>+			goto errout;
>+		}
>+
>+		/* ematch module provides expected length of data, so we
>+		 * can do a basic sanity check. */
>+		if (em->ops->datalen && data_len < em->ops->datalen)
>+			goto errout;
>+
>+		if (em->ops->change) {
>+			err = em->ops->change(tp, data, data_len, em);
>+			if (err < 0)
>+				goto errout;
>+		} else if (data_len > 0) {
>+			/* ematch module doesn't provide an own change
>+			 * procedure and expects us to allocate and copy
>+			 * the ematch data.
>+			 *
>+			 * TCF_EM_SIMPLE may be specified stating that the
>+			 * data only consists of a u32 integer and the module
>+			 * does not expected a memory reference but rather
>+			 * the value carried. */
>+			if (em_hdr->flags & TCF_EM_SIMPLE) {
>+				if (data_len < sizeof(u32))
>+					goto errout;
>+				em->data = *(u32 *) data;
>+			} else {
>+				void *v = kmalloc(data_len, GFP_KERNEL);
>+				if (v == NULL) {
>+					err = -ENOBUFS;
>+					goto errout;
>+				}
>+				memcpy(v, data, data_len);
>+				em->data = (unsigned long) v;
>+			}
>+		}
>+	}
>+
>+	em->matchid = em_hdr->matchid;
>+	em->flags = em_hdr->flags;
>+	em->datalen = data_len;
>+
>+	err = 0;
>+errout:
>+	return err;
>+}
>+
>+/**
>+ * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
>+ *
>+ * @tp: classifier kind handle
>+ * @rta: ematch tree configuration TLV
>+ * @tree: destination ematch tree variable to store the resulting
>+ *        ematch tree.
>+ *
>+ * This function validates the given configuration TLV @rta and builds an
>+ * ematch tree in @tree. The resulting tree must later be copied into
>+ * the private classifier data using tcf_em_tree_change(). You MUST NOT
>+ * provide the ematch tree variable of the private classifier data directly,
>+ * the changes would not be locked properly.
>+ *
>+ * Returns a negative error code if the configuration TLV contains errors.
>+ */
>+int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
>+			 struct tcf_ematch_tree *tree)
>+{
>+	int idx, list_len, matches_len, err = -EINVAL;
>+	struct rtattr *tb[TCA_EMATCH_TREE_MAX];
>+	struct rtattr *rt_match, *rt_hdr, *rt_list;
>+	struct tcf_ematch_tree_hdr *tree_hdr;
>+	struct tcf_ematch *em;
>+
>+	if (rtattr_parse_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0)
>+		goto errout;
>+
>+	rt_hdr = tb[TCA_EMATCH_TREE_HDR-1];
>+	rt_list = tb[TCA_EMATCH_TREE_LIST-1];
>+
>+	if (rt_hdr == NULL || rt_list == NULL)
>+		goto errout;
>+
>+	if (RTA_PAYLOAD(rt_hdr) < sizeof(*tree_hdr) ||
>+	    RTA_PAYLOAD(rt_list) < sizeof(*rt_match))
>+		goto errout;
>+
>+	tree_hdr = RTA_DATA(rt_hdr);
>+	memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
>+
>+	rt_match = RTA_DATA(rt_list);
>+	list_len = RTA_PAYLOAD(rt_list);
>+	matches_len = tree_hdr->nmatches * sizeof(*em);
>+
>+	tree->matches = kmalloc(matches_len, GFP_KERNEL);
>+	if (tree->matches == NULL)
>+		goto errout;
>+	memset(tree->matches, 0, matches_len);
>+
>+	/* We do not use rtattr_parse_nested here because the maximum
>+	 * number of attributes is unknown. This saves us the allocation
>+	 * for a tb buffer which would serve no purpose at all.
>+	 * 
>+	 * The array of rt attributes is parsed in the order as they are
>+	 * provided, their type must be incremental from 1 to n. Even
>+	 * if it does not serve any real purpose, a failure of sticking
>+	 * to this policy will result in parsing failure. */
>+	for (idx = 0; RTA_OK(rt_match, list_len); idx++) {
>+		err = -EINVAL;
>+
>+		if (rt_match->rta_type != (idx + 1))
>+			goto errout_abort;
>+
>+		if (idx >= tree_hdr->nmatches)
>+			goto errout_abort;
>+
>+		if (RTA_PAYLOAD(rt_match) < sizeof(struct tcf_ematch_hdr))
>+			goto errout_abort;
>+
>+		em = tcf_em_get_match(tree, idx);
>+
>+		err = tcf_em_validate(tp, tree_hdr, em, rt_match, idx);
>+		if (err < 0)
>+			goto errout_abort;
>+
>+		rt_match = RTA_NEXT(rt_match, list_len);
>+	}
>+
>+	/* Check if the number of matches provided by userspace actually
>+	 * complies with the array of matches. The number was used for
>+	 * the validation of references and a mismatch could lead to
>+	 * undefined references during the matching process. */
>+	if (idx != tree_hdr->nmatches) {
>+		err = -EINVAL;
>+		goto errout_abort;
>+	}
>+
>+	err = 0;
>+errout:
>+	return err;
>+
>+errout_abort:
>+	tcf_em_tree_destroy(tp, tree);
>+	return err;
>+}
>+
>+/**
>+ * tcf_em_tree_destroy - destroy an ematch tree
>+ *
>+ * @tp: classifier kind handle
>+ * @tree: ematch tree to be deleted
>+ *
>+ * This functions destroys an ematch tree previously created by
>+ * tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that
>+ * the ematch tree is not in use before calling this function.
>+ */
>+void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
>+{
>+	int i;
>+
>+	if (tree->matches == NULL)
>+		return;
>+
>+	for (i = 0; i < tree->hdr.nmatches; i++) {
>+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
>+
>+		if (em->ops) {
>+			if (em->ops->destroy)
>+				em->ops->destroy(tp, em);
>+			else if (!tcf_em_is_simple(em) && em->data)
>+				kfree((void *) em->data);
>+			module_put(em->ops->owner);
>+		}
>+	}
>+	
>+	tree->hdr.nmatches = 0;
>+	kfree(xchg(&tree->matches, NULL));
>  
>
xchg is not necessary here. Setting tree->matches to NULL also doesn't look
necessary. As the comment above indicates, the caller needs to ensure the
tree is unsused, so it should be easy for him to ensure he won't destroy the
same tree twice.

>+}
>+
>+/**
>+ * tcf_em_tree_dump - dump ematch tree into a rtnl message
>+ *
>+ * @skb: skb holding the rtnl message
>+ * @t: ematch tree to be dumped
>+ * @tlv: TLV type to be used to encapsulate the tree
>+ *
>+ * This function dumps a ematch tree into a rtnl message. It is valid to
>+ * call this function while the ematch tree is in use.
>+ *
>+ * Returns -1 if the skb tailroom is insufficient.
>+ */
>+int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
>+{
>+	int i;
>+	struct rtattr * top_start = (struct rtattr*) skb->tail;
>+	struct rtattr * list_start;
>+
>+	RTA_PUT(skb, tlv, 0, NULL);
>+	RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
>+
>+	list_start = (struct rtattr *) skb->tail;
>+	RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
>+
>+	for (i = 0; i < tree->hdr.nmatches; i++) {
>+		struct rtattr *match_start = (struct rtattr*) skb->tail;
>+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
>+		struct tcf_ematch_hdr em_hdr = {
>+			.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
>+			.matchid = em->matchid,
>+			.flags = em->flags
>+		};
>+
>+		RTA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
>+
>+		if (em->ops && em->ops->dump) {
>+			if (em->ops->dump(skb, em) < 0)
>+				goto rtattr_failure;
>+		} else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
>+			u32 u = em->data;
>+			RTA_PUT_NOHDR(skb, sizeof(u), &u);
>+		} else if (em->datalen > 0)
>+			RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
>+
>+		match_start->rta_len = skb->tail - (u8*) match_start;
>+	}
>+
>+	list_start->rta_len = skb->tail - (u8 *) list_start;
>+	top_start->rta_len = skb->tail - (u8 *) top_start;
>+
>+	return 0;
>+
>+rtattr_failure:
>+	return -1;
>+}
>+
>+static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
>+			       struct tcf_pkt_info *info)
>+{
>+	int r;
>+
>+	if (likely(em->ops->match))
>  
>
gcc assumes likely for ptr != NULL by default. Is there a reason why a match
wouldn't have a match function ?

>+		r = em->ops->match(skb, em, info);
>+	else
>+		r = 0;
>+
>+	return tcf_em_is_inverted(em) ? !r : r;
>+}
>+
>+/* Do not use this function directly, use tcf_em_tree_match instead */
>+int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree,
>+			struct tcf_pkt_info *info)
>+{
>+	int stackp = 0, match_idx = 0, res = 0;
>+	struct tcf_ematch *cur_match;
>+	int stack[CONFIG_NET_EMATCH_STACK];
>  
>
>+
>+proceed:
>+	while (match_idx < tree->hdr.nmatches) {
>+		cur_match = tcf_em_get_match(tree, match_idx);
>+
>+		if (tcf_em_is_container(cur_match)) {
>+			if (unlikely(stackp >= CONFIG_NET_EMATCH_STACK))
>+				goto stack_overflow;
>+
>+			stack[stackp++] = match_idx;
>+			match_idx = cur_match->data;
>+			goto proceed;
>+		}
>+
>+		res = tcf_em_match(skb, cur_match, info);
>+
>+		if (tcf_em_early_end(cur_match, res))
>+			break;
>+
>+		match_idx++;
>+	}
>+
>+pop_stack:
>+	if (stackp > 0) {
>+		match_idx = stack[--stackp];
>+		cur_match = tcf_em_get_match(tree, match_idx);
>+
>+		if (tcf_em_early_end(cur_match, res))
>+			goto pop_stack;
>+		else {
>+			match_idx++;
>+			goto proceed;
>+		}
>+	}
>+
>+	return res;
>+
>+stack_overflow:
>+	if (net_ratelimit())
>+		printk("Local stack overflow, increase NET_EMATCH_STACK\n");
>+	return -1;
>+}
>+
>+EXPORT_SYMBOL(tcf_em_register);
>+EXPORT_SYMBOL(tcf_em_unregister);
>+EXPORT_SYMBOL(tcf_em_tree_validate);
>+EXPORT_SYMBOL(tcf_em_tree_destroy);
>+EXPORT_SYMBOL(tcf_em_tree_dump);
>+EXPORT_SYMBOL(__tcf_em_tree_match);
>
>  
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/6] PKT_SCHED: Simple comparison ematch (cmp)
  2005-01-23 23:02 ` [PATCH 2/6] PKT_SCHED: Simple comparison ematch (cmp) Thomas Graf
@ 2005-01-24  0:14   ` Patrick McHardy
  2005-01-24  0:55     ` Thomas Graf
  0 siblings, 1 reply; 21+ messages in thread
From: Patrick McHardy @ 2005-01-24  0:14 UTC (permalink / raw)
  To: Thomas Graf; +Cc: David S. Miller, netdev

Thomas Graf wrote:

>diff -Nru linux-2.6.11-rc2-bk1.orig/net/sched/em_cmp.c linux-2.6.11-rc2-bk1/net/sched/em_cmp.c
>--- linux-2.6.11-rc2-bk1.orig/net/sched/em_cmp.c	1970-01-01 01:00:00.000000000 +0100
>+++ linux-2.6.11-rc2-bk1/net/sched/em_cmp.c	2005-01-23 17:31:03.000000000 +0100
>@@ -0,0 +1,101 @@
>+/*
>+ * net/sched/em_cmp.c	Simple packet data comparison ematch
>+ *
>+ *		This program is free software; you can redistribute it and/or
>+ *		modify it under the terms of the GNU General Public License
>+ *		as published by the Free Software Foundation; either version
>+ *		2 of the License, or (at your option) any later version.
>+ *
>+ * Authors:	Thomas Graf <tgraf@suug.ch>
>+ */
>+
>+#include <linux/config.h>
>+#include <linux/module.h>
>+#include <linux/types.h>
>+#include <linux/kernel.h>
>+#include <linux/skbuff.h>
>+#include <linux/tc_ematch/tc_em_cmp.h>
>+#include <net/pkt_cls.h>
>+
>+static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp)
>+{
>+	return unlikely(cmp->flags & TCF_EM_CMP_TRANS);
>+}
>+
>+static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
>+			struct tcf_pkt_info *info)
>+{
>+	struct tcf_em_cmp *cmp = (struct tcf_em_cmp *) em->data;
>+	unsigned char *ptr = tcf_get_base_ptr(skb, cmp->layer) + cmp->off;
>+	u32 val = 0;
>+
>+	if (!tcf_valid_offset(skb, ptr, cmp->align))
>+		return 0;
>+
>+	switch (cmp->align) {
>+		case TCF_EM_ALIGN_U8:
>+			val = *ptr;
>+			break;
>+
>+		case TCF_EM_ALIGN_U16:
>+			val = *ptr << 8;
>+			val |= *(ptr+1);
>+
>+			if (cmp_needs_transformation(cmp))
>+				val = be16_to_cpu(val);
>  
>
Why not simply convert cmp->val in userspace ?

>+			break;
>+
>+		case TCF_EM_ALIGN_U32:
>+			/* Worth checking boundries? The branching seems
>+			 * to get worse. Visit again. */
>+			val = *ptr << 24;
>+			val |= *(ptr+1) << 16;
>+			val |= *(ptr+2) << 8;
>+			val |= *(ptr+3);
>+
>+			if (cmp_needs_transformation(cmp))
>+				val = be32_to_cpu(val);
>+			break;
>+
>+		default:
>+			return 0;
>+	}
>+
>+	if (cmp->mask)
>+		val &= cmp->mask;
>+
>+	switch (cmp->opnd) {
>+		case TCF_EM_OPND_EQ:
>+			return val == cmp->val;
>+		case TCF_EM_OPND_LT:
>+			return val < cmp->val;
>+		case TCF_EM_OPND_GT:
>+			return val > cmp->val;
>+	}
>+
>+	return 0;
>+}
>+
>+static struct tcf_ematch_ops em_cmp_ops = {
>+	.kind	  = TCF_EM_CMP,
>+	.datalen  = sizeof(struct tcf_em_cmp),
>+	.match	  = em_cmp_match,
>+	.owner	  = THIS_MODULE,
>+	.link	  = LIST_HEAD_INIT(em_cmp_ops.link)
>+};
>+
>+static int __init init_em_cmp(void)
>+{
>+	return tcf_em_register(&em_cmp_ops);
>+}
>+
>+static void __exit exit_em_cmp(void) 
>+{
>+	tcf_em_unregister(&em_cmp_ops);
>+}
>+
>+MODULE_LICENSE("GPL");
>+
>+module_init(init_em_cmp);
>+module_exit(exit_em_cmp);
>+
>
>  
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 4/6] PKT_SCHED: u32 ematch
  2005-01-23 23:03 ` [PATCH 4/6] PKT_SCHED: u32 ematch Thomas Graf
@ 2005-01-24  0:24   ` Patrick McHardy
  2005-01-24  0:58     ` Thomas Graf
  2005-01-25 23:24   ` [RESEND " Thomas Graf
  1 sibling, 1 reply; 21+ messages in thread
From: Patrick McHardy @ 2005-01-24  0:24 UTC (permalink / raw)
  To: Thomas Graf; +Cc: David S. Miller, netdev

Thomas Graf wrote:

>diff -Nru linux-2.6.11-rc1-bk9.orig/net/sched/em_u32.c linux-2.6.11-rc1-bk9/net/sched/em_u32.c
>--- linux-2.6.11-rc1-bk9.orig/net/sched/em_u32.c	1970-01-01 01:00:00.000000000 +0100
>+++ linux-2.6.11-rc1-bk9/net/sched/em_u32.c	2005-01-22 12:37:28.000000000 +0100
>@@ -0,0 +1,58 @@
>+/*
>+ * net/sched/em_u32.c	U32 Ematch
>+ *
>+ *		This program is free software; you can redistribute it and/or
>+ *		modify it under the terms of the GNU General Public License
>+ *		as published by the Free Software Foundation; either version
>+ *		2 of the License, or (at your option) any later version.
>+ *
>+ * Authors:	Thomas Graf <tgraf@suug.ch>
>+ *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
>+ *
>+ * Based on net/sched/cls_u32.c
>+ */
>+
>+#include <linux/config.h>
>+#include <linux/module.h>
>+#include <linux/types.h>
>+#include <linux/kernel.h>
>+#include <linux/skbuff.h>
>+#include <net/pkt_cls.h>
>+
>+static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
>+			struct tcf_pkt_info *info)
>+{
>+	struct tc_u32_key *key = (struct tc_u32_key *) em->data;
>+	unsigned char *ptr = skb->nh.raw;
>+	
>+	if (info) {
>+		if (info->ptr)
>+			ptr = info->ptr;
>+		ptr += (info->nexthdr & key->offmask);
>+	}
>+	
>+	return !((*(u32*) (ptr + key->off) ^ key->val) & key->mask);
>  
>
This needs to make sure it stays inside the skb. I know cls_u32's checks
are pretty weak, but having nothing doesn't seem right.

>+}
>+
>+static struct tcf_ematch_ops em_u32_ops = {
>+	.kind	  = TCF_EM_U32,
>+	.datalen  = sizeof(struct tc_u32_key),
>+	.match	  = em_u32_match,
>+	.owner	  = THIS_MODULE,
>+	.link	  = LIST_HEAD_INIT(em_u32_ops.link)
>+};
>+
>+static int __init init_em_u32(void)
>+{
>+	return tcf_em_register(&em_u32_ops);
>+}
>+
>+static void __exit exit_em_u32(void) 
>+{
>+	tcf_em_unregister(&em_u32_ops);
>+}
>+
>+MODULE_LICENSE("GPL");
>+
>+module_init(init_em_u32);
>+module_exit(exit_em_u32);
>
>  
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/6] PKT_SCHED: Extended Matches API
  2005-01-24  0:12   ` Patrick McHardy
@ 2005-01-24  0:49     ` Thomas Graf
  2005-01-24  0:56       ` Patrick McHardy
  0 siblings, 1 reply; 21+ messages in thread
From: Thomas Graf @ 2005-01-24  0:49 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: David S. Miller, netdev

* Patrick McHardy <41F43D6D.30502@trash.net> 2005-01-24 01:12
> Thomas Graf wrote:
> >+struct tcf_ematch
> >+{
> >+	u16			matchid;
> >+	u16			flags;
> >+	struct tcf_ematch_ops * ops;
> >+	unsigned int		datalen;
> >+	unsigned long		data;
> >+};
> >
> This layout leaves two holes on 64 bit, how about:
> 
> {
>    struct tcf_ematch_ops *ops;
>    unsigned long data;
>    unsigned int datalen;
>    u16 matchid;
>    u16 flags;
> };

Good point.

> >+	read_lock(&ematch_mod_lock);
> >+	list_for_each_entry(e, &ematch_ops, link) {
> >+		if (kind == e->kind) {
> >+			if (!try_module_get(e->owner))
> >+				e = NULL;
> >+			break;
> >+		}
> >+	}
> >
> e is the iterator, if nothing matched it will contain the last element now

Damn, yes. Still used to have the iterator being NULL in loops. Probably
made this mistake in other spots too, will check this.

> >+	tree->hdr.nmatches = 0;
> >+	kfree(xchg(&tree->matches, NULL));
> > 
> >
> xchg is not necessary here. Setting tree->matches to NULL also doesn't look
> necessary. As the comment above indicates, the caller needs to ensure the
> tree is unsused, so it should be easy for him to ensure he won't destroy the
> same tree twice.

It's not necessary but I do call tcf_em_tree_destroy internally and I
want to provide a consistent interface to the outside world. It's
argueable for sure. The xchg doesn't have any locking purposes and I can
understand if it confuses readers so I'll remove it.


> >+static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
> >+			       struct tcf_pkt_info *info)
> >+{
> >+	int r;
> >+
> >+	if (likely(em->ops->match))
> > 
> >
> gcc assumes likely for ptr != NULL by default. Is there a reason why a match
> wouldn't have a match function ?

There is no reason but ematches might get written by unexperienced people
forgeting to register it. I know, the if partly hides the failure, it's
one of theses case where I have the same arguments for both ways.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/6] PKT_SCHED: Simple comparison ematch (cmp)
  2005-01-24  0:14   ` Patrick McHardy
@ 2005-01-24  0:55     ` Thomas Graf
  0 siblings, 0 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-24  0:55 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: David S. Miller, netdev

* Patrick McHardy <41F43DD4.8070306@trash.net> 2005-01-24 01:14
> Thomas Graf wrote:
> >+static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
> >+			struct tcf_pkt_info *info)
> >+{
> >+	struct tcf_em_cmp *cmp = (struct tcf_em_cmp *) em->data;
> >+	unsigned char *ptr = tcf_get_base_ptr(skb, cmp->layer) + cmp->off;
> >+	u32 val = 0;
> >+
> >+	if (!tcf_valid_offset(skb, ptr, cmp->align))
> >+		return 0;
> >+
> >+	switch (cmp->align) {
> >+		case TCF_EM_ALIGN_U8:
> >+			val = *ptr;
> >+			break;
> >+
> >+		case TCF_EM_ALIGN_U16:
> >+			val = *ptr << 8;
> >+			val |= *(ptr+1);
> >+
> >+			if (cmp_needs_transformation(cmp))
> >+				val = be16_to_cpu(val);
> > 
> >
> Why not simply convert cmp->val in userspace ?

To allow comparing with lt and gt. I favoured transforming over byte
order specific comparison routines.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/6] PKT_SCHED: Extended Matches API
  2005-01-24  0:49     ` Thomas Graf
@ 2005-01-24  0:56       ` Patrick McHardy
  2005-01-24  0:59         ` Thomas Graf
  0 siblings, 1 reply; 21+ messages in thread
From: Patrick McHardy @ 2005-01-24  0:56 UTC (permalink / raw)
  To: Thomas Graf; +Cc: David S. Miller, netdev

Thomas Graf wrote:

>* Patrick McHardy <41F43D6D.30502@trash.net> 2005-01-24 01:12
>  
>
>>gcc assumes likely for ptr != NULL by default. Is there a reason why a match
>>wouldn't have a match function ?
>>
>
>There is no reason but ematches might get written by unexperienced people
>forgeting to register it. I know, the if partly hides the failure, it's
>one of theses case where I have the same arguments for both ways.
>
I don't care much, but I guess people forgetting to add a match
function to an ematch will find other ways to do stupid things :)
How about catching it in tcf_em_register ?

Regards
Patrick

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 4/6] PKT_SCHED: u32 ematch
  2005-01-24  0:24   ` Patrick McHardy
@ 2005-01-24  0:58     ` Thomas Graf
  0 siblings, 0 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-24  0:58 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: David S. Miller, netdev

* Patrick McHardy <41F4402D.6040200@trash.net> 2005-01-24 01:24
> Thomas Graf wrote:
> >+static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
> >+			struct tcf_pkt_info *info)
> >+{
> >+	struct tc_u32_key *key = (struct tc_u32_key *) em->data;
> >+	unsigned char *ptr = skb->nh.raw;
> >+	
> >+	if (info) {
> >+		if (info->ptr)
> >+			ptr = info->ptr;
> >+		ptr += (info->nexthdr & key->offmask);
> >+	}
> >+	
> >+	return !((*(u32*) (ptr + key->off) ^ key->val) & key->mask);
> > 
> >
> This needs to make sure it stays inside the skb. I know cls_u32's checks
> are pretty weak, but having nothing doesn't seem right.

It's a exact copy with all the weaknesses inherited. I'll add a
call to tcf_valid_offset, we'll lose the _feature_ to match areas
in the page beyond the skb data though. ;->

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/6] PKT_SCHED: Extended Matches API
  2005-01-24  0:56       ` Patrick McHardy
@ 2005-01-24  0:59         ` Thomas Graf
  0 siblings, 0 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-24  0:59 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: David S. Miller, netdev

* Patrick McHardy <41F447CE.6030007@trash.net> 2005-01-24 01:56
> Thomas Graf wrote:
> 
> >* Patrick McHardy <41F43D6D.30502@trash.net> 2005-01-24 01:12
> > 
> >
> >>gcc assumes likely for ptr != NULL by default. Is there a reason why a 
> >>match
> >>wouldn't have a match function ?
> >>
> >
> >There is no reason but ematches might get written by unexperienced people
> >forgeting to register it. I know, the if partly hides the failure, it's
> >one of theses case where I have the same arguments for both ways.
> >
> I don't care much, but I guess people forgetting to add a match
> function to an ematch will find other ways to do stupid things :)
> How about catching it in tcf_em_register ?

Sounds like a good plan, will do so. Thanks.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [RESEND 1/6] PKT_SCHED: Extended Matches API
  2005-01-23 23:01 ` [PATCH 1/6] PKT_SCHED: Extended Matches API Thomas Graf
  2005-01-24  0:12   ` Patrick McHardy
@ 2005-01-25 23:22   ` Thomas Graf
  1 sibling, 0 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-25 23:22 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev


Resend with Patrick's comments addressed.

An extended match (ematch) is a small classifiction tool not worth
writing a full classifier for. Ematches can be interconnected to form
a logic expression and get attached to classifiers to extend their
functionatlity.

The userspace part transforms the logic expressions into an array
consisting of multiple sequences of interconnected ematches separated
by markers. Precedence is implemented by a special ematch kind
referencing a sequence beyond the marker of the current sequence
causing the current position in the sequence to be pushed onto a stack
to allow the current position to be overwritten by the position
referenced in the special ematch. Matching continues in the new sequence
until a marker is reached causing the position to be restored from the
stack.

Signed-off-by: Thomas Graf <tgraf@suug.ch>

diff -Nru linux-2.6.11-rc2-bk3.orig/include/linux/pkt_cls.h linux-2.6.11-rc2-bk3/include/linux/pkt_cls.h
--- linux-2.6.11-rc2-bk3.orig/include/linux/pkt_cls.h	2005-01-25 23:02:54.000000000 +0100
+++ linux-2.6.11-rc2-bk3/include/linux/pkt_cls.h	2005-01-25 23:19:02.000000000 +0100
@@ -319,4 +319,76 @@
 
 #define TCA_TCINDEX_MAX     (__TCA_TCINDEX_MAX - 1)
 
+/* Extended Matches */
+
+struct tcf_ematch_tree_hdr
+{
+	__u16		nmatches;
+	__u16		progid;
+};
+
+enum
+{
+	TCA_EMATCH_TREE_UNSPEC,
+	TCA_EMATCH_TREE_HDR,
+	TCA_EMATCH_TREE_LIST,
+	__TCA_EMATCH_TREE_MAX
+};
+#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1)
+
+struct tcf_ematch_hdr
+{
+	__u16		matchid;
+	__u16		kind;
+	__u16		flags;
+	__u16		pad; /* currently unused */
+};
+
+/*  0                   1
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 
+ * +-----------------------+-+-+---+
+ * |         Unused        |S|I| R |
+ * +-----------------------+-+-+---+
+ *
+ * R(2) ::= relation to next ematch
+ *          where: 0 0 END (last ematch)
+ *                 0 1 AND
+ *                 1 0 OR
+ *                 1 1 Unused (invalid)
+ * I(1) ::= invert result
+ * S(1) ::= simple payload
+ */
+#define TCF_EM_REL_END	0
+#define TCF_EM_REL_AND	(1<<0)
+#define TCF_EM_REL_OR	(1<<1)
+#define TCF_EM_INVERT	(1<<2)
+#define TCF_EM_SIMPLE	(1<<3)
+
+#define TCF_EM_REL_MASK	3
+#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK)
+
+enum
+{
+	TCF_LAYER_LINK,
+	TCF_LAYER_NETWORK,
+	TCF_LAYER_TRANSPORT,
+	__TCF_LAYER_MAX
+};
+#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1)
+
+/* Ematch type assignments
+ *   1..32767		Reserved for ematches inside kernel tree
+ *   32768..65535	Free to use, not reliable
+ */
+enum
+{
+	TCF_EM_CONTAINER,
+	__TCF_EM_MAX
+};
+
+enum
+{
+	TCF_EM_PROG_TC
+};
+
 #endif
diff -Nru linux-2.6.11-rc2-bk3.orig/include/linux/rtnetlink.h linux-2.6.11-rc2-bk3/include/linux/rtnetlink.h
--- linux-2.6.11-rc2-bk3.orig/include/linux/rtnetlink.h	2005-01-25 23:02:54.000000000 +0100
+++ linux-2.6.11-rc2-bk3/include/linux/rtnetlink.h	2005-01-25 23:19:02.000000000 +0100
@@ -779,6 +779,11 @@
 		 goto rtattr_failure; \
    	__rta_fill(skb, attrtype, attrlen, data); }) 
 
+#define RTA_PUT_NOHDR(skb, attrlen, data) \
+({	if (unlikely(skb_tailroom(skb) < (int)(attrlen))) \
+		goto rtattr_failure; \
+	memcpy(skb_put(skb, RTA_ALIGN(attrlen)), data, attrlen); })
+		
 static inline struct rtattr *
 __rta_reserve(struct sk_buff *skb, int attrtype, int attrlen)
 {
diff -Nru linux-2.6.11-rc2-bk3.orig/include/net/pkt_cls.h linux-2.6.11-rc2-bk3/include/net/pkt_cls.h
--- linux-2.6.11-rc2-bk3.orig/include/net/pkt_cls.h	2005-01-25 23:02:54.000000000 +0100
+++ linux-2.6.11-rc2-bk3/include/net/pkt_cls.h	2005-01-25 23:23:24.000000000 +0100
@@ -148,6 +148,176 @@
 extern int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
 	                       struct tcf_ext_map *map);
 
+/**
+ * struct tcf_pkt_info - packet information
+ */
+struct tcf_pkt_info
+{
+};
+
+#ifdef CONFIG_NET_EMATCH
+
+struct tcf_ematch_ops;
+
+/**
+ * struct tcf_ematch - extended match (ematch)
+ * 
+ * @matchid: identifier to allow userspace to reidentify a match
+ * @flags: flags specifying attributes and the relation to other matches
+ * @ops: the operations lookup table of the corresponding ematch module
+ * @datalen: length of the ematch specific configuration data
+ * @data: ematch specific data
+ */
+struct tcf_ematch
+{
+	struct tcf_ematch_ops * ops;
+	unsigned long		data;
+	unsigned int		datalen;
+	u16			matchid;
+	u16			flags;
+};
+
+static inline int tcf_em_is_container(struct tcf_ematch *em)
+{
+	return !em->ops;
+}
+
+static inline int tcf_em_is_simple(struct tcf_ematch *em)
+{
+	return em->flags & TCF_EM_SIMPLE;
+}
+
+static inline int tcf_em_is_inverted(struct tcf_ematch *em)
+{
+	return em->flags & TCF_EM_INVERT;
+}
+
+static inline int tcf_em_last_match(struct tcf_ematch *em)
+{
+	return (em->flags & TCF_EM_REL_MASK) == TCF_EM_REL_END;
+}
+
+static inline int tcf_em_early_end(struct tcf_ematch *em, int result)
+{
+	if (tcf_em_last_match(em))
+		return 1;
+
+	if (result == 0 && em->flags & TCF_EM_REL_AND)
+		return 1;
+
+	if (result != 0 && em->flags & TCF_EM_REL_OR)
+		return 1;
+
+	return 0;
+}
+	
+/**
+ * struct tcf_ematch_tree - ematch tree handle
+ *
+ * @hdr: ematch tree header supplied by userspace
+ * @matches: array of ematches
+ */
+struct tcf_ematch_tree
+{
+	struct tcf_ematch_tree_hdr hdr;
+	struct tcf_ematch *	matches;
+	
+};
+
+/**
+ * struct tcf_ematch_ops - ematch module operations
+ * 
+ * @kind: identifier (kind) of this ematch module
+ * @datalen: length of expected configuration data (optional)
+ * @change: called during validation (optional)
+ * @match: called during ematch tree evaluation, must return 1/0
+ * @destroy: called during destroyage (optional)
+ * @dump: called during dumping process (optional)
+ * @owner: owner, must be set to THIS_MODULE
+ * @link: link to previous/next ematch module (internal use)
+ */
+struct tcf_ematch_ops
+{
+	int			kind;
+	int			datalen;
+	int			(*change)(struct tcf_proto *, void *,
+					  int, struct tcf_ematch *);
+	int			(*match)(struct sk_buff *, struct tcf_ematch *,
+					 struct tcf_pkt_info *);
+	void			(*destroy)(struct tcf_proto *,
+					   struct tcf_ematch *);
+	int			(*dump)(struct sk_buff *, struct tcf_ematch *);
+	struct module		*owner;
+	struct list_head	link;
+};
+
+extern int tcf_em_register(struct tcf_ematch_ops *);
+extern int tcf_em_unregister(struct tcf_ematch_ops *);
+extern int tcf_em_tree_validate(struct tcf_proto *, struct rtattr *,
+				struct tcf_ematch_tree *);
+extern void tcf_em_tree_destroy(struct tcf_proto *, struct tcf_ematch_tree *);
+extern int tcf_em_tree_dump(struct sk_buff *, struct tcf_ematch_tree *, int);
+extern int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
+			       struct tcf_pkt_info *);
+
+/**
+ * tcf_em_tree_change - replace ematch tree of a running classifier
+ *
+ * @tp: classifier kind handle
+ * @dst: destination ematch tree variable
+ * @src: source ematch tree (temporary tree from tcf_em_tree_validate)
+ *
+ * This functions replaces the ematch tree in @dst with the ematch
+ * tree in @src. The classifier in charge of the ematch tree may be
+ * running.
+ */
+static inline void tcf_em_tree_change(struct tcf_proto *tp,
+				      struct tcf_ematch_tree *dst,
+				      struct tcf_ematch_tree *src)
+{
+	tcf_tree_lock(tp);
+	memcpy(dst, src, sizeof(*dst));
+	tcf_tree_unlock(tp);
+}
+
+/**
+ * tcf_em_tree_match - evaulate an ematch tree
+ *
+ * @skb: socket buffer of the packet in question
+ * @tree: ematch tree to be used for evaluation
+ * @info: packet information examined by classifier
+ *
+ * This function matches @skb against the ematch tree in @tree by going
+ * through all ematches respecting their logic relations returning
+ * as soon as the result is obvious.
+ *
+ * Returns 1 if the ematch tree as-one matches, no ematches are configured
+ * or ematch is not enabled in the kernel, otherwise 0 is returned.
+ */
+static inline int tcf_em_tree_match(struct sk_buff *skb,
+				    struct tcf_ematch_tree *tree,
+				    struct tcf_pkt_info *info)
+{
+	if (tree->hdr.nmatches)
+		return __tcf_em_tree_match(skb, tree, info);
+	else
+		return 1;
+}
+
+#else /* CONFIG_NET_EMATCH */
+
+struct tcf_ematch_tree
+{
+};
+
+#define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0)
+#define tcf_em_tree_destroy(tp, t) do { (void)(t); } while(0)
+#define tcf_em_tree_dump(skb, t, tlv) (0)
+#define tcf_em_tree_change(tp, dst, src) do { } while(0)
+#define tcf_em_tree_match(skb, t, info) ((void)(info), 1)
+
+#endif /* CONFIG_NET_EMATCH */
+
 #ifdef CONFIG_NET_CLS_IND
 static inline int
 tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv)
diff -Nru linux-2.6.11-rc2-bk3.orig/net/sched/Kconfig linux-2.6.11-rc2-bk3/net/sched/Kconfig
--- linux-2.6.11-rc2-bk3.orig/net/sched/Kconfig	2005-01-25 23:03:36.000000000 +0100
+++ linux-2.6.11-rc2-bk3/net/sched/Kconfig	2005-01-25 23:19:02.000000000 +0100
@@ -375,6 +375,29 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_rsvp6.
 
+config NET_EMATCH
+	bool "Extended Matches"
+	depends on NET_CLS
+	---help---
+	  Say Y here if you want to use extended matches on top of classifiers
+	  and select the extended matches below.
+
+	  Extended matches are small classification helpers not worth writing
+	  a separate classifier.
+
+	  You must have a recent version of the iproute2 tools in order to use
+	  extended matches.
+
+config NET_EMATCH_STACK
+	int "Stack size"
+	depends on NET_EMATCH
+	default "32"
+	---help---
+	  Size of the local stack variable used while evaluating the tree of
+	  ematches. Limits the depth of the tree, i.e. the number of
+	  encapsulated precedences. Every level requires 4 bytes of addtional
+	  stack space.
+
 config NET_CLS_ACT
 	bool "Packet ACTION"
 	depends on EXPERIMENTAL && NET_CLS && NET_QOS
diff -Nru linux-2.6.11-rc2-bk3.orig/net/sched/Makefile linux-2.6.11-rc2-bk3/net/sched/Makefile
--- linux-2.6.11-rc2-bk3.orig/net/sched/Makefile	2005-01-25 23:03:36.000000000 +0100
+++ linux-2.6.11-rc2-bk3/net/sched/Makefile	2005-01-25 23:19:02.000000000 +0100
@@ -33,3 +33,4 @@
 obj-$(CONFIG_NET_CLS_RSVP)	+= cls_rsvp.o
 obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
+obj-$(CONFIG_NET_EMATCH)	+= ematch.o
diff -Nru linux-2.6.11-rc2-bk3.orig/net/sched/ematch.c linux-2.6.11-rc2-bk3/net/sched/ematch.c
--- linux-2.6.11-rc2-bk3.orig/net/sched/ematch.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk3/net/sched/ematch.c	2005-01-25 23:40:54.000000000 +0100
@@ -0,0 +1,524 @@
+/*
+ * net/sched/ematch.c		Extended Match API
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ *
+ * An extended match (ematch) is a small classification tool not worth
+ * writing a full classifier for. Ematches can be interconnected to form
+ * a logic expression and get attached to classifiers to extend their
+ * functionatlity.
+ *
+ * The userspace part transforms the logic expressions into an array
+ * consisting of multiple sequences of interconnected ematches separated
+ * by markers. Precedence is implemented by a special ematch kind
+ * referencing a sequence beyond the marker of the current sequence
+ * causing the current position in the sequence to be pushed onto a stack
+ * to allow the current position to be overwritten by the position referenced
+ * in the special ematch. Matching continues in the new sequence until a
+ * marker is reached causing the position to be restored from the stack.
+ *
+ * Example:
+ *          A AND (B1 OR B2) AND C AND D
+ *
+ *              ------->-PUSH-------
+ *    -->--    /         -->--      \   -->--
+ *   /     \  /         /     \      \ /     \
+ * +-------+-------+-------+-------+-------+--------+
+ * | A AND | B AND | C AND | D END | B1 OR | B2 END |
+ * +-------+-------+-------+-------+-------+--------+
+ *                    \                      /
+ *                     --------<-POP---------
+ *
+ * where B is a virtual ematch referencing to sequence starting with B1.
+ * 
+ * ==========================================================================
+ *
+ * How to write an ematch in 60 seconds
+ * ------------------------------------
+ * 
+ *   1) Provide a matcher function:
+ *      static int my_match(struct sk_buff *skb, struct tcf_ematch *m,
+ *                          struct tcf_pkt_info *info)
+ *      {
+ *      	struct mydata *d = (struct mydata *) m->data;
+ *
+ *      	if (...matching goes here...)
+ *      		return 1;
+ *      	else
+ *      		return 0;
+ *      }
+ *
+ *   2) Fill out a struct tcf_ematch_ops:
+ *      static struct tcf_ematch_ops my_ops = {
+ *      	.kind = unique id,
+ *      	.datalen = sizeof(struct mydata),
+ *      	.match = my_match,
+ *      	.owner = THIS_MODULE,
+ *      };
+ *
+ *   3) Register/Unregister your ematch:
+ *      static int __init init_my_ematch(void)
+ *      {
+ *      	return tcf_em_register(&my_ops);
+ *      }
+ *
+ *      static void __exit exit_my_ematch(void)
+ *      {
+ *      	return tcf_em_unregister(&my_ops);
+ *      }
+ *
+ *      module_init(init_my_ematch);
+ *      module_exit(exit_my_ematch);
+ *
+ *   4) By now you should have two more seconds left, barely enough to
+ *      open up a beer to watch the compilation going.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+#include <config/net/ematch/stack.h>
+
+static LIST_HEAD(ematch_ops);
+static DEFINE_RWLOCK(ematch_mod_lock);
+
+static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+{
+	struct tcf_ematch_ops *e = NULL;
+
+	read_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link) {
+		if (kind == e->kind) {
+			if (!try_module_get(e->owner))
+				e = NULL;
+			read_unlock(&ematch_mod_lock);
+			return e;
+		}
+	}
+	read_unlock(&ematch_mod_lock);
+
+	return NULL;
+}
+
+/**
+ * tcf_em_register - register an extended match
+ * 
+ * @ops: ematch operations lookup table
+ *
+ * This function must be called by ematches to announce their presence.
+ * The given @ops must have kind set to a unique identifier and the
+ * callback match() must be implemented. All other callbacks are optional
+ * and a fallback implementation is used instead.
+ *
+ * Returns -EEXISTS if an ematch of the same kind has already registered.
+ */
+int tcf_em_register(struct tcf_ematch_ops *ops)
+{
+	int err = -EEXIST;
+	struct tcf_ematch_ops *e;
+
+	if (ops->match == NULL)
+		return -EINVAL;
+
+	write_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link)
+		if (ops->kind == e->kind)
+			goto errout;
+
+	list_add_tail(&ops->link, &ematch_ops);
+	err = 0;
+errout:
+	write_unlock(&ematch_mod_lock);
+	return err;
+}
+
+/**
+ * tcf_em_unregister - unregster and extended match
+ *
+ * @ops: ematch operations lookup table
+ *
+ * This function must be called by ematches to announce their disappearance
+ * for examples when the module gets unloaded. The @ops parameter must be
+ * the same as the one used for registration.
+ *
+ * Returns -ENOENT if no matching ematch was found.
+ */
+int tcf_em_unregister(struct tcf_ematch_ops *ops)
+{
+	int err = 0;
+	struct tcf_ematch_ops *e;
+
+	write_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link) {
+		if (e == ops) {
+			list_del(&e->link);
+			goto out;
+		}
+	}
+
+	err = -ENOENT;
+out:
+	write_unlock(&ematch_mod_lock);
+	return err;
+}
+
+static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
+						   int index)
+{
+	return &tree->matches[index];
+}
+
+
+static int tcf_em_validate(struct tcf_proto *tp,
+			   struct tcf_ematch_tree_hdr *tree_hdr,
+			   struct tcf_ematch *em, struct rtattr *rta, int idx)
+{
+	int err = -EINVAL;
+	struct tcf_ematch_hdr *em_hdr = RTA_DATA(rta);
+	int data_len = RTA_PAYLOAD(rta) - sizeof(*em_hdr);
+	void *data = (void *) em_hdr + sizeof(*em_hdr);
+
+	if (!TCF_EM_REL_VALID(em_hdr->flags))
+		goto errout;
+
+	if (em_hdr->kind == TCF_EM_CONTAINER) {
+		/* Special ematch called "container", carries an index
+		 * referencing an external ematch sequence. */
+		u32 ref;
+
+		if (data_len < sizeof(ref))
+			goto errout;
+		ref = *(u32 *) data;
+
+		if (ref >= tree_hdr->nmatches)
+			goto errout;
+
+		/* We do not allow backward jumps to avoid loops and jumps
+		 * to our own position are of course illegal. */
+		if (ref <= idx)
+			goto errout;
+
+		
+		em->data = ref;
+	} else {
+		/* Note: This lookup will increase the module refcnt
+		 * of the ematch module referenced. In case of a failure,
+		 * a destroy function is called by the underlying layer
+		 * which automatically releases the reference again, therefore
+		 * the module MUST not be given back under any circumstances
+		 * here. Be aware, the destroy function assumes that the
+		 * module is held if the ops field is non zero. */
+		em->ops = tcf_em_lookup(em_hdr->kind);
+
+		if (em->ops == NULL) {
+			err = -ENOENT;
+			goto errout;
+		}
+
+		/* ematch module provides expected length of data, so we
+		 * can do a basic sanity check. */
+		if (em->ops->datalen && data_len < em->ops->datalen)
+			goto errout;
+
+		if (em->ops->change) {
+			err = em->ops->change(tp, data, data_len, em);
+			if (err < 0)
+				goto errout;
+		} else if (data_len > 0) {
+			/* ematch module doesn't provide an own change
+			 * procedure and expects us to allocate and copy
+			 * the ematch data.
+			 *
+			 * TCF_EM_SIMPLE may be specified stating that the
+			 * data only consists of a u32 integer and the module
+			 * does not expected a memory reference but rather
+			 * the value carried. */
+			if (em_hdr->flags & TCF_EM_SIMPLE) {
+				if (data_len < sizeof(u32))
+					goto errout;
+				em->data = *(u32 *) data;
+			} else {
+				void *v = kmalloc(data_len, GFP_KERNEL);
+				if (v == NULL) {
+					err = -ENOBUFS;
+					goto errout;
+				}
+				memcpy(v, data, data_len);
+				em->data = (unsigned long) v;
+			}
+		}
+	}
+
+	em->matchid = em_hdr->matchid;
+	em->flags = em_hdr->flags;
+	em->datalen = data_len;
+
+	err = 0;
+errout:
+	return err;
+}
+
+/**
+ * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
+ *
+ * @tp: classifier kind handle
+ * @rta: ematch tree configuration TLV
+ * @tree: destination ematch tree variable to store the resulting
+ *        ematch tree.
+ *
+ * This function validates the given configuration TLV @rta and builds an
+ * ematch tree in @tree. The resulting tree must later be copied into
+ * the private classifier data using tcf_em_tree_change(). You MUST NOT
+ * provide the ematch tree variable of the private classifier data directly,
+ * the changes would not be locked properly.
+ *
+ * Returns a negative error code if the configuration TLV contains errors.
+ */
+int tcf_em_tree_validate(struct tcf_proto *tp, struct rtattr *rta,
+			 struct tcf_ematch_tree *tree)
+{
+	int idx, list_len, matches_len, err = -EINVAL;
+	struct rtattr *tb[TCA_EMATCH_TREE_MAX];
+	struct rtattr *rt_match, *rt_hdr, *rt_list;
+	struct tcf_ematch_tree_hdr *tree_hdr;
+	struct tcf_ematch *em;
+
+	if (rtattr_parse_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0)
+		goto errout;
+
+	rt_hdr = tb[TCA_EMATCH_TREE_HDR-1];
+	rt_list = tb[TCA_EMATCH_TREE_LIST-1];
+
+	if (rt_hdr == NULL || rt_list == NULL)
+		goto errout;
+
+	if (RTA_PAYLOAD(rt_hdr) < sizeof(*tree_hdr) ||
+	    RTA_PAYLOAD(rt_list) < sizeof(*rt_match))
+		goto errout;
+
+	tree_hdr = RTA_DATA(rt_hdr);
+	memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
+
+	rt_match = RTA_DATA(rt_list);
+	list_len = RTA_PAYLOAD(rt_list);
+	matches_len = tree_hdr->nmatches * sizeof(*em);
+
+	tree->matches = kmalloc(matches_len, GFP_KERNEL);
+	if (tree->matches == NULL)
+		goto errout;
+	memset(tree->matches, 0, matches_len);
+
+	/* We do not use rtattr_parse_nested here because the maximum
+	 * number of attributes is unknown. This saves us the allocation
+	 * for a tb buffer which would serve no purpose at all.
+	 * 
+	 * The array of rt attributes is parsed in the order as they are
+	 * provided, their type must be incremental from 1 to n. Even
+	 * if it does not serve any real purpose, a failure of sticking
+	 * to this policy will result in parsing failure. */
+	for (idx = 0; RTA_OK(rt_match, list_len); idx++) {
+		err = -EINVAL;
+
+		if (rt_match->rta_type != (idx + 1))
+			goto errout_abort;
+
+		if (idx >= tree_hdr->nmatches)
+			goto errout_abort;
+
+		if (RTA_PAYLOAD(rt_match) < sizeof(struct tcf_ematch_hdr))
+			goto errout_abort;
+
+		em = tcf_em_get_match(tree, idx);
+
+		err = tcf_em_validate(tp, tree_hdr, em, rt_match, idx);
+		if (err < 0)
+			goto errout_abort;
+
+		rt_match = RTA_NEXT(rt_match, list_len);
+	}
+
+	/* Check if the number of matches provided by userspace actually
+	 * complies with the array of matches. The number was used for
+	 * the validation of references and a mismatch could lead to
+	 * undefined references during the matching process. */
+	if (idx != tree_hdr->nmatches) {
+		err = -EINVAL;
+		goto errout_abort;
+	}
+
+	err = 0;
+errout:
+	return err;
+
+errout_abort:
+	tcf_em_tree_destroy(tp, tree);
+	return err;
+}
+
+/**
+ * tcf_em_tree_destroy - destroy an ematch tree
+ *
+ * @tp: classifier kind handle
+ * @tree: ematch tree to be deleted
+ *
+ * This functions destroys an ematch tree previously created by
+ * tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that
+ * the ematch tree is not in use before calling this function.
+ */
+void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
+{
+	int i;
+
+	if (tree->matches == NULL)
+		return;
+
+	for (i = 0; i < tree->hdr.nmatches; i++) {
+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
+
+		if (em->ops) {
+			if (em->ops->destroy)
+				em->ops->destroy(tp, em);
+			else if (!tcf_em_is_simple(em) && em->data)
+				kfree((void *) em->data);
+			module_put(em->ops->owner);
+		}
+	}
+	
+	tree->hdr.nmatches = 0;
+	kfree(tree->matches);
+}
+
+/**
+ * tcf_em_tree_dump - dump ematch tree into a rtnl message
+ *
+ * @skb: skb holding the rtnl message
+ * @t: ematch tree to be dumped
+ * @tlv: TLV type to be used to encapsulate the tree
+ *
+ * This function dumps a ematch tree into a rtnl message. It is valid to
+ * call this function while the ematch tree is in use.
+ *
+ * Returns -1 if the skb tailroom is insufficient.
+ */
+int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
+{
+	int i;
+	struct rtattr * top_start = (struct rtattr*) skb->tail;
+	struct rtattr * list_start;
+
+	RTA_PUT(skb, tlv, 0, NULL);
+	RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
+
+	list_start = (struct rtattr *) skb->tail;
+	RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
+
+	for (i = 0; i < tree->hdr.nmatches; i++) {
+		struct rtattr *match_start = (struct rtattr*) skb->tail;
+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
+		struct tcf_ematch_hdr em_hdr = {
+			.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
+			.matchid = em->matchid,
+			.flags = em->flags
+		};
+
+		RTA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+
+		if (em->ops && em->ops->dump) {
+			if (em->ops->dump(skb, em) < 0)
+				goto rtattr_failure;
+		} else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
+			u32 u = em->data;
+			RTA_PUT_NOHDR(skb, sizeof(u), &u);
+		} else if (em->datalen > 0)
+			RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
+
+		match_start->rta_len = skb->tail - (u8*) match_start;
+	}
+
+	list_start->rta_len = skb->tail - (u8 *) list_start;
+	top_start->rta_len = skb->tail - (u8 *) top_start;
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
+			       struct tcf_pkt_info *info)
+{
+	int r = em->ops->match(skb, em, info);
+	return tcf_em_is_inverted(em) ? !r : r;
+}
+
+/* Do not use this function directly, use tcf_em_tree_match instead */
+int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree,
+			struct tcf_pkt_info *info)
+{
+	int stackp = 0, match_idx = 0, res = 0;
+	struct tcf_ematch *cur_match;
+	int stack[CONFIG_NET_EMATCH_STACK];
+
+proceed:
+	while (match_idx < tree->hdr.nmatches) {
+		cur_match = tcf_em_get_match(tree, match_idx);
+
+		if (tcf_em_is_container(cur_match)) {
+			if (unlikely(stackp >= CONFIG_NET_EMATCH_STACK))
+				goto stack_overflow;
+
+			stack[stackp++] = match_idx;
+			match_idx = cur_match->data;
+			goto proceed;
+		}
+
+		res = tcf_em_match(skb, cur_match, info);
+
+		if (tcf_em_early_end(cur_match, res))
+			break;
+
+		match_idx++;
+	}
+
+pop_stack:
+	if (stackp > 0) {
+		match_idx = stack[--stackp];
+		cur_match = tcf_em_get_match(tree, match_idx);
+
+		if (tcf_em_early_end(cur_match, res))
+			goto pop_stack;
+		else {
+			match_idx++;
+			goto proceed;
+		}
+	}
+
+	return res;
+
+stack_overflow:
+	if (net_ratelimit())
+		printk("Local stack overflow, increase NET_EMATCH_STACK\n");
+	return -1;
+}
+
+EXPORT_SYMBOL(tcf_em_register);
+EXPORT_SYMBOL(tcf_em_unregister);
+EXPORT_SYMBOL(tcf_em_tree_validate);
+EXPORT_SYMBOL(tcf_em_tree_destroy);
+EXPORT_SYMBOL(tcf_em_tree_dump);
+EXPORT_SYMBOL(__tcf_em_tree_match);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [RESEND 4/6] PKT_SCHED: u32 ematch
  2005-01-23 23:03 ` [PATCH 4/6] PKT_SCHED: u32 ematch Thomas Graf
  2005-01-24  0:24   ` Patrick McHardy
@ 2005-01-25 23:24   ` Thomas Graf
  1 sibling, 0 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-25 23:24 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

Resend with offset validation as Patrick suggested.

The u32 ematch behaves exactly the same as a u32 match and will replace
it in the long term. It allows the underlying classifiers to give hints
about the position of the next protocol header (i.e. nexthdr+).

Signed-off-by: Thomas Graf <tgraf@suug.ch>

diff -Nru linux-2.6.11-rc2-bk3.orig/include/linux/pkt_cls.h linux-2.6.11-rc2-bk3/include/linux/pkt_cls.h
--- linux-2.6.11-rc2-bk3.orig/include/linux/pkt_cls.h	2005-01-25 23:58:04.000000000 +0100
+++ linux-2.6.11-rc2-bk3/include/linux/pkt_cls.h	2005-01-25 23:58:20.000000000 +0100
@@ -385,6 +385,7 @@
 	TCF_EM_CONTAINER,
 	TCF_EM_CMP,
 	TCF_EM_NBYTE,
+	TCF_EM_U32,
 	__TCF_EM_MAX
 };
 
diff -Nru linux-2.6.11-rc2-bk3.orig/include/net/pkt_cls.h linux-2.6.11-rc2-bk3/include/net/pkt_cls.h
--- linux-2.6.11-rc2-bk3.orig/include/net/pkt_cls.h	2005-01-25 23:57:30.000000000 +0100
+++ linux-2.6.11-rc2-bk3/include/net/pkt_cls.h	2005-01-25 23:58:20.000000000 +0100
@@ -153,6 +153,8 @@
  */
 struct tcf_pkt_info
 {
+	unsigned char *		ptr;
+	int			nexthdr;
 };
 
 #ifdef CONFIG_NET_EMATCH
diff -Nru linux-2.6.11-rc2-bk3.orig/net/sched/Kconfig linux-2.6.11-rc2-bk3/net/sched/Kconfig
--- linux-2.6.11-rc2-bk3.orig/net/sched/Kconfig	2005-01-25 23:58:04.000000000 +0100
+++ linux-2.6.11-rc2-bk3/net/sched/Kconfig	2005-01-25 23:58:20.000000000 +0100
@@ -418,6 +418,16 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called em_nbyte.
 
+config NET_EMATCH_U32
+	tristate "U32 hashing key"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets using
+	  the famous u32 key in combination with logic relations.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_u32.
+
 config NET_CLS_ACT
 	bool "Packet ACTION"
 	depends on EXPERIMENTAL && NET_CLS && NET_QOS
diff -Nru linux-2.6.11-rc2-bk3.orig/net/sched/Makefile linux-2.6.11-rc2-bk3/net/sched/Makefile
--- linux-2.6.11-rc2-bk3.orig/net/sched/Makefile	2005-01-25 23:58:04.000000000 +0100
+++ linux-2.6.11-rc2-bk3/net/sched/Makefile	2005-01-25 23:58:20.000000000 +0100
@@ -36,3 +36,4 @@
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
+obj-$(CONFIG_NET_EMATCH_U32)	+= em_u32.o
diff -Nru linux-2.6.11-rc2-bk3.orig/net/sched/em_u32.c linux-2.6.11-rc2-bk3/net/sched/em_u32.c
--- linux-2.6.11-rc2-bk3.orig/net/sched/em_u32.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk3/net/sched/em_u32.c	2005-01-26 00:06:03.000000000 +0100
@@ -0,0 +1,63 @@
+/*
+ * net/sched/em_u32.c	U32 Ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Based on net/sched/cls_u32.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+
+static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	struct tc_u32_key *key = (struct tc_u32_key *) em->data;
+	unsigned char *ptr = skb->nh.raw;
+	
+	if (info) {
+		if (info->ptr)
+			ptr = info->ptr;
+		ptr += (info->nexthdr & key->offmask);
+	}
+
+	ptr += key->off;
+
+	if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
+		return 0;
+	
+	return !(((*(u32*) ptr)  ^ key->val) & key->mask);
+}
+
+static struct tcf_ematch_ops em_u32_ops = {
+	.kind	  = TCF_EM_U32,
+	.datalen  = sizeof(struct tc_u32_key),
+	.match	  = em_u32_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_u32_ops.link)
+};
+
+static int __init init_em_u32(void)
+{
+	return tcf_em_register(&em_u32_ops);
+}
+
+static void __exit exit_em_u32(void) 
+{
+	tcf_em_unregister(&em_u32_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_u32);
+module_exit(exit_em_u32);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCHSET] Extended matches and basic classifier
  2005-01-23 23:00 [PATCHSET] Extended matches and basic classifier Thomas Graf
                   ` (6 preceding siblings ...)
  2005-01-23 23:21 ` [PATCHSET] Extended matches and basic classifier Thomas Graf
@ 2005-01-26  5:52 ` David S. Miller
  2005-02-15 21:38 ` David S. Miller
  8 siblings, 0 replies; 21+ messages in thread
From: David S. Miller @ 2005-01-26  5:52 UTC (permalink / raw)
  To: Thomas Graf; +Cc: netdev

On Mon, 24 Jan 2005 00:00:12 +0100
Thomas Graf <tgraf@suug.ch> wrote:

> This patchset adds the ematch API, the ematches cmp, nbyte, u32, meta,
> and the basic classifier. It doesn't touch any existing code.

I'm busy merging pure bug fixes, so I'll try to review this
one soon.  Likely it will go into 2.6.12, although I may try
to sneak it into the pending 2.6.11 tree.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [RESEND 5/6]: PKT_SCHED: Metadata ematch (meta)
  2005-01-23 23:04 ` [PATCH 5/6]: PKT_SCHED: Metadata ematch (meta) Thomas Graf
@ 2005-01-26 20:05   ` Thomas Graf
  0 siblings, 0 replies; 21+ messages in thread
From: Thomas Graf @ 2005-01-26 20:05 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev

RESEND: I gave up on trying to dump the meta configuration without TLVs, it gets
        too complicated and error prone. Added dumping via TLVs.

The meta ematch allows comparing various metadata values against
static values from usersapce or other metadata values. It currently
supports various numeric meta values such as netfilter mark, packet
length, security level, interface indices, tc classid, load average,
a random value but also variable length values such as interface
names. Adding support for additional meta values is as easy as
writing a data collector (usually 1-5 lines of code) and assign it
to a id and type by putting it into the meta operations table.

Signed-off-by: Thomas Graf <tgraf@suug.ch>

diff -Nru linux-2.6.11-rc2-bk4.orig/include/linux/pkt_cls.h linux-2.6.11-rc2-bk4/include/linux/pkt_cls.h
--- linux-2.6.11-rc2-bk4.orig/include/linux/pkt_cls.h	2005-01-26 18:52:30.000000000 +0100
+++ linux-2.6.11-rc2-bk4/include/linux/pkt_cls.h	2005-01-26 18:52:40.000000000 +0100
@@ -386,6 +386,7 @@
 	TCF_EM_CMP,
 	TCF_EM_NBYTE,
 	TCF_EM_U32,
+	TCF_EM_META,
 	__TCF_EM_MAX
 };
 
diff -Nru linux-2.6.11-rc2-bk4.orig/include/linux/tc_ematch/tc_em_meta.h linux-2.6.11-rc2-bk4/include/linux/tc_ematch/tc_em_meta.h
--- linux-2.6.11-rc2-bk4.orig/include/linux/tc_ematch/tc_em_meta.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk4/include/linux/tc_ematch/tc_em_meta.h	2005-01-26 18:21:12.000000000 +0100
@@ -0,0 +1,69 @@
+#ifndef __LINUX_TC_EM_META_H
+#define __LINUX_TC_EM_META_H
+
+#include <linux/pkt_cls.h>
+
+enum
+{
+	TCA_EM_META_UNSPEC,
+	TCA_EM_META_HDR,
+	TCA_EM_META_LVALUE,
+	TCA_EM_META_RVALUE,
+	__TCA_EM_META_MAX
+};
+#define TCA_EM_META_MAX (__TCA_EM_META_MAX - 1)
+
+struct tcf_meta_val
+{
+	__u16			kind;
+	__u8			shift;
+	__u8			op;
+};
+
+#define TCF_META_TYPE_MASK	(0xf << 12)
+#define TCF_META_TYPE(kind)	(((kind) & TCF_META_TYPE_MASK) >> 12)
+#define TCF_META_ID_MASK	0x7ff
+#define TCF_META_ID(kind)	((kind) & TCF_META_ID_MASK)
+
+enum
+{
+	TCF_META_TYPE_VAR,
+	TCF_META_TYPE_INT,
+	__TCF_META_TYPE_MAX
+};
+#define TCF_META_TYPE_MAX (__TCF_META_TYPE_MAX - 1)
+
+enum
+{
+	TCF_META_ID_VALUE,
+	TCF_META_ID_RANDOM,
+	TCF_META_ID_LOADAVG_0,
+	TCF_META_ID_LOADAVG_1,
+	TCF_META_ID_LOADAVG_2,
+	TCF_META_ID_DEV,
+	TCF_META_ID_INDEV,
+	TCF_META_ID_REALDEV,
+	TCF_META_ID_PRIORITY,
+	TCF_META_ID_PROTOCOL,
+	TCF_META_ID_SECURITY,
+	TCF_META_ID_PKTTYPE,
+	TCF_META_ID_PKTLEN,
+	TCF_META_ID_DATALEN,
+	TCF_META_ID_MACLEN,
+	TCF_META_ID_NFMARK,
+	TCF_META_ID_TCINDEX,
+	TCF_META_ID_TCVERDICT,
+	TCF_META_ID_TCCLASSID,
+	TCF_META_ID_RTCLASSID,
+	TCF_META_ID_RTIIF,
+	__TCF_META_ID_MAX
+};
+#define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1)
+
+struct tcf_meta_hdr
+{
+	struct tcf_meta_val	left;
+	struct tcf_meta_val	right;
+};
+
+#endif
diff -Nru linux-2.6.11-rc2-bk4.orig/net/sched/Kconfig linux-2.6.11-rc2-bk4/net/sched/Kconfig
--- linux-2.6.11-rc2-bk4.orig/net/sched/Kconfig	2005-01-26 18:52:30.000000000 +0100
+++ linux-2.6.11-rc2-bk4/net/sched/Kconfig	2005-01-26 18:52:40.000000000 +0100
@@ -428,6 +428,17 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called em_u32.
 
+config NET_EMATCH_META
+	tristate "Metadata"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be ablt to classify packets based on
+	  metadata such as load average, netfilter attributes, socket
+	  attributes and routing decisions.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_meta.
+
 config NET_CLS_ACT
 	bool "Packet ACTION"
 	depends on EXPERIMENTAL && NET_CLS && NET_QOS
diff -Nru linux-2.6.11-rc2-bk4.orig/net/sched/Makefile linux-2.6.11-rc2-bk4/net/sched/Makefile
--- linux-2.6.11-rc2-bk4.orig/net/sched/Makefile	2005-01-26 18:52:30.000000000 +0100
+++ linux-2.6.11-rc2-bk4/net/sched/Makefile	2005-01-26 18:52:40.000000000 +0100
@@ -37,3 +37,4 @@
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
 obj-$(CONFIG_NET_EMATCH_U32)	+= em_u32.o
+obj-$(CONFIG_NET_EMATCH_META)	+= em_meta.o
diff -Nru linux-2.6.11-rc2-bk4.orig/net/sched/em_meta.c linux-2.6.11-rc2-bk4/net/sched/em_meta.c
--- linux-2.6.11-rc2-bk4.orig/net/sched/em_meta.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk4/net/sched/em_meta.c	2005-01-26 20:20:06.000000000 +0100
@@ -0,0 +1,661 @@
+/*
+ * net/sched/em_meta.c	Metadata ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ * 
+ * 	The metadata ematch compares two meta objects where each object
+ * 	represents either a meta value stored in the kernel or a static
+ * 	value provided by userspace. The objects are not provided by
+ * 	userspace itself but rather a definition providing the information
+ * 	to build them. Every object is of a certain type which must be
+ * 	equal to the object it is being compared to.
+ *
+ * 	The definition of a objects conists of the type (meta type), a
+ * 	identifier (meta id) and additional type specific information.
+ * 	The meta id is either TCF_META_TYPE_VALUE for values provided by
+ * 	userspace or a index to the meta operations table consisting of
+ * 	function pointers to type specific meta data collectors returning
+ * 	the value of the requested meta value.
+ *
+ * 	         lvalue                                   rvalue
+ * 	      +-----------+                           +-----------+
+ * 	      | type: INT |                           | type: INT |
+ * 	 def  | id: INDEV |                           | id: VALUE |
+ * 	      | data:     |                           | data: 3   |
+ * 	      +-----------+                           +-----------+
+ * 	            |                                       |
+ * 	            ---> meta_ops[INT][INDEV](...)          |
+ *                            |                            |
+ * 	            -----------                             |
+ * 	            V                                       V
+ * 	      +-----------+                           +-----------+
+ * 	      | type: INT |                           | type: INT |
+ * 	 obj  | id: INDEV |                           | id: VALUE |
+ * 	      | data: 2   |<--data got filled out     | data: 3   |
+ * 	      +-----------+                           +-----------+
+ * 	            |                                         |
+ * 	            --------------> 2  equals 3 <--------------
+ *
+ * 	This is a simplified schema, the complexity varies depending
+ * 	on the meta type. Obviously, the length of the data must also
+ * 	be provided for non-numeric types.
+ *
+ * 	Additionaly, type dependant modifiers such as shift operators
+ * 	or mask may be applied to extend the functionaliy. As of now,
+ * 	the variable length type supports shifting the byte string to
+ * 	the right, eating up any number of octets and thus supporting
+ * 	wildcard interface name comparisons such as "ppp%" matching
+ * 	ppp0..9.
+ *
+ * 	NOTE: Certain meta values depend on other subsystems and are
+ * 	      only available if that subsytem is enabled in the kernel.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/tc_ematch/tc_em_meta.h>
+#include <net/dst.h>
+#include <net/route.h>
+#include <net/pkt_cls.h>
+
+struct meta_obj
+{
+	unsigned long		value;
+	unsigned int		len;
+};
+
+struct meta_value
+{
+	struct tcf_meta_val	hdr;
+	unsigned long		val;
+	unsigned int		len;
+};
+
+struct meta_match
+{
+	struct meta_value	lvalue;
+	struct meta_value	rvalue;
+};
+
+static inline int meta_id(struct meta_value *v)
+{
+	return TCF_META_ID(v->hdr.kind);
+}
+
+static inline int meta_type(struct meta_value *v)
+{
+	return TCF_META_TYPE(v->hdr.kind);
+}
+
+#define META_COLLECTOR(FUNC) static void meta_##FUNC(struct sk_buff *skb, \
+	struct tcf_pkt_info *info, struct meta_value *v, \
+	struct meta_obj *dst, int *err)
+
+/**************************************************************************
+ * System status & misc
+ **************************************************************************/
+
+META_COLLECTOR(int_random)
+{
+	get_random_bytes(&dst->value, sizeof(dst->value));
+}
+
+static inline unsigned long fixed_loadavg(int load)
+{
+	int rnd_load = load + (FIXED_1/200);
+	int rnd_frac = ((rnd_load & (FIXED_1-1)) * 100) >> FSHIFT;
+
+	return ((rnd_load >> FSHIFT) * 100) + rnd_frac;
+}
+
+META_COLLECTOR(int_loadavg_0)
+{
+	dst->value = fixed_loadavg(avenrun[0]);
+}
+
+META_COLLECTOR(int_loadavg_1)
+{
+	dst->value = fixed_loadavg(avenrun[1]);
+}
+
+META_COLLECTOR(int_loadavg_2)
+{
+	dst->value = fixed_loadavg(avenrun[2]);
+}
+
+/**************************************************************************
+ * Device names & indices
+ **************************************************************************/
+
+static inline int int_dev(struct net_device *dev, struct meta_obj *dst)
+{
+	if (unlikely(dev == NULL))
+		return -1;
+
+	dst->value = dev->ifindex;
+	return 0;
+}
+
+static inline int var_dev(struct net_device *dev, struct meta_obj *dst)
+{
+	if (unlikely(dev == NULL))
+		return -1;
+
+	dst->value = (unsigned long) dev->name;
+	dst->len = strlen(dev->name);
+	return 0;
+}
+
+META_COLLECTOR(int_dev)
+{
+	*err = int_dev(skb->dev, dst);
+}
+
+META_COLLECTOR(var_dev)
+{
+	*err = var_dev(skb->dev, dst);
+}
+
+META_COLLECTOR(int_indev)
+{
+	*err = int_dev(skb->input_dev, dst);
+}
+
+META_COLLECTOR(var_indev)
+{
+	*err = var_dev(skb->input_dev, dst);
+}
+
+META_COLLECTOR(int_realdev)
+{
+	*err = int_dev(skb->real_dev, dst);
+}
+
+META_COLLECTOR(var_realdev)
+{
+	*err = var_dev(skb->real_dev, dst);
+}
+
+/**************************************************************************
+ * skb attributes
+ **************************************************************************/
+
+META_COLLECTOR(int_priority)
+{
+	dst->value = skb->priority;
+}
+
+META_COLLECTOR(int_protocol)
+{
+	/* Let userspace take care of the byte ordering */
+	dst->value = skb->protocol;
+}
+
+META_COLLECTOR(int_security)
+{
+	dst->value = skb->security;
+}
+
+META_COLLECTOR(int_pkttype)
+{
+	dst->value = skb->pkt_type;
+}
+
+META_COLLECTOR(int_pktlen)
+{
+	dst->value = skb->len;
+}
+
+META_COLLECTOR(int_datalen)
+{
+	dst->value = skb->data_len;
+}
+
+META_COLLECTOR(int_maclen)
+{
+	dst->value = skb->mac_len;
+}
+
+/**************************************************************************
+ * Netfilter
+ **************************************************************************/
+
+#ifdef CONFIG_NETFILTER
+META_COLLECTOR(int_nfmark)
+{
+	dst->value = skb->nfmark;
+}
+#endif
+
+/**************************************************************************
+ * Traffic Control
+ **************************************************************************/
+
+META_COLLECTOR(int_tcindex)
+{
+	dst->value = skb->tc_index;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+META_COLLECTOR(int_tcverd)
+{
+	dst->value = skb->tc_verd;
+}
+
+META_COLLECTOR(int_tcclassid)
+{
+	dst->value = skb->tc_classid;
+}
+#endif
+
+/**************************************************************************
+ * Routing
+ **************************************************************************/
+
+#ifdef CONFIG_NET_CLS_ROUTE
+META_COLLECTOR(int_rtclassid)
+{
+	if (unlikely(skb->dst == NULL))
+		*err = -1;
+	else
+		dst->value = skb->dst->tclassid;
+}
+#endif
+
+META_COLLECTOR(int_rtiif)
+{
+	if (unlikely(skb->dst == NULL))
+		*err = -1;
+	else
+		dst->value = ((struct rtable*) skb->dst)->fl.iif;
+}
+
+/**************************************************************************
+ * Meta value collectors assignment table
+ **************************************************************************/
+
+struct meta_ops
+{
+	void		(*get)(struct sk_buff *, struct tcf_pkt_info *,
+			       struct meta_value *, struct meta_obj *, int *);
+};
+
+/* Meta value operations table listing all meta value collectors and
+ * assigns them to a type and meta id. */
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+	[TCF_META_TYPE_VAR] = {
+		[TCF_META_ID_DEV]	= { .get = meta_var_dev },
+		[TCF_META_ID_INDEV]	= { .get = meta_var_indev },
+		[TCF_META_ID_REALDEV]	= { .get = meta_var_realdev }
+	},
+	[TCF_META_TYPE_INT] = {
+		[TCF_META_ID_RANDOM]	= { .get = meta_int_random },
+		[TCF_META_ID_LOADAVG_0]	= { .get = meta_int_loadavg_0 },
+		[TCF_META_ID_LOADAVG_1]	= { .get = meta_int_loadavg_1 },
+		[TCF_META_ID_LOADAVG_2]	= { .get = meta_int_loadavg_2 },
+		[TCF_META_ID_DEV]	= { .get = meta_int_dev },
+		[TCF_META_ID_INDEV]	= { .get = meta_int_indev },
+		[TCF_META_ID_REALDEV]	= { .get = meta_int_realdev },
+		[TCF_META_ID_PRIORITY]	= { .get = meta_int_priority },
+		[TCF_META_ID_PROTOCOL]	= { .get = meta_int_protocol },
+		[TCF_META_ID_SECURITY]	= { .get = meta_int_security },
+		[TCF_META_ID_PKTTYPE]	= { .get = meta_int_pkttype },
+		[TCF_META_ID_PKTLEN]	= { .get = meta_int_pktlen },
+		[TCF_META_ID_DATALEN]	= { .get = meta_int_datalen },
+		[TCF_META_ID_MACLEN]	= { .get = meta_int_maclen },
+#ifdef CONFIG_NETFILTER
+		[TCF_META_ID_NFMARK]	= { .get = meta_int_nfmark },
+#endif
+		[TCF_META_ID_TCINDEX]	= { .get = meta_int_tcindex },
+#ifdef CONFIG_NET_CLS_ACT
+		[TCF_META_ID_TCVERDICT]	= { .get = meta_int_tcverd },
+		[TCF_META_ID_TCCLASSID]	= { .get = meta_int_tcclassid },
+#endif
+#ifdef CONFIG_NET_CLS_ROUTE
+		[TCF_META_ID_RTCLASSID]	= { .get = meta_int_rtclassid },
+#endif
+		[TCF_META_ID_RTIIF]	= { .get = meta_int_rtiif }
+	}
+};
+
+static inline struct meta_ops * meta_ops(struct meta_value *val)
+{
+	return &__meta_ops[meta_type(val)][meta_id(val)];
+}
+
+/**************************************************************************
+ * Type specific operations for TCF_META_TYPE_VAR
+ **************************************************************************/
+
+static int meta_var_compare(struct meta_obj *a, struct meta_obj *b)
+{
+	int r = a->len - b->len;
+
+	if (r == 0)
+		r = memcmp((void *) a->value, (void *) b->value, a->len);
+
+	return r;
+}
+
+static int meta_var_change(struct meta_value *dst, struct rtattr *rta)
+{
+	int len = RTA_PAYLOAD(rta);
+
+	dst->val = (unsigned long) kmalloc(len, GFP_KERNEL);
+	if (dst->val == 0UL)
+		return -ENOMEM;
+	memcpy((void *) dst->val, RTA_DATA(rta), len);
+	dst->len = len;
+	return 0;
+}
+
+static void meta_var_destroy(struct meta_value *v)
+{
+	if (v->val)
+		kfree((void *) v->val);
+}
+
+static void meta_var_apply_extras(struct meta_value *v,
+				  struct meta_obj *dst)
+{
+	int shift = v->hdr.shift;
+
+	if (shift && shift < dst->len)
+		dst->len -= shift;
+}
+
+static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
+{
+	if (v->val && v->len)
+		RTA_PUT(skb, tlv, v->len, (void *) v->val);
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+/**************************************************************************
+ * Type specific operations for TCF_META_TYPE_INT
+ **************************************************************************/
+
+static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
+{
+	/* Let gcc optimize it, the unlikely is not really based on
+	 * some numbers but jump free code for mismatches seems
+	 * more logical. */
+	if (unlikely(a == b))
+		return 0;
+	else if (a < b)
+		return -1;
+	else
+		return 1;
+}
+
+static int meta_int_change(struct meta_value *dst, struct rtattr *rta)
+{
+	if (RTA_PAYLOAD(rta) >= sizeof(unsigned long)) {
+		dst->val = *(unsigned long *) RTA_DATA(rta);
+		dst->len = sizeof(unsigned long);
+	} else if (RTA_PAYLOAD(rta) == sizeof(u32)) {
+		dst->val = *(u32 *) RTA_DATA(rta);
+		dst->len = sizeof(u32);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+static void meta_int_apply_extras(struct meta_value *v,
+				  struct meta_obj *dst)
+{
+	if (v->hdr.shift)
+		dst->value >>= v->hdr.shift;
+
+	if (v->val)
+		dst->value &= v->val;
+}
+
+static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
+{
+	if (v->len == sizeof(unsigned long))
+		RTA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
+	else if (v->len == sizeof(u32)) {
+		u32 d = v->val;
+		RTA_PUT(skb, tlv, sizeof(d), &d);
+	}
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}
+
+/**************************************************************************
+ * Type specific operations table
+ **************************************************************************/
+
+struct meta_type_ops
+{
+	void	(*destroy)(struct meta_value *);
+	int	(*compare)(struct meta_obj *, struct meta_obj *);
+	int	(*change)(struct meta_value *, struct rtattr *);
+	void	(*apply_extras)(struct meta_value *, struct meta_obj *);
+	int	(*dump)(struct sk_buff *, struct meta_value *, int);
+};
+
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+	[TCF_META_TYPE_VAR] = {
+		.destroy = meta_var_destroy,
+		.compare = meta_var_compare,
+		.change = meta_var_change,
+		.apply_extras = meta_var_apply_extras,
+		.dump = meta_var_dump
+	},
+	[TCF_META_TYPE_INT] = {
+		.compare = meta_int_compare,
+		.change = meta_int_change,
+		.apply_extras = meta_int_apply_extras,
+		.dump = meta_int_dump
+	}
+};
+
+static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+{
+	return &__meta_type_ops[meta_type(v)];
+}
+
+/**************************************************************************
+ * Core
+ **************************************************************************/
+
+static inline int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info, 
+			   struct meta_value *v, struct meta_obj *dst)
+{
+	int err = 0;
+
+	if (meta_id(v) == TCF_META_ID_VALUE) {
+		dst->value = v->val;
+		dst->len = v->len;
+		return 0;
+	}
+
+	meta_ops(v)->get(skb, info, v, dst, &err);
+	if (err < 0)
+		return err;
+
+	if (meta_type_ops(v)->apply_extras)
+	    meta_type_ops(v)->apply_extras(v, dst);
+
+	return 0;
+}
+
+static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
+			 struct tcf_pkt_info *info)
+{
+	int r;
+	struct meta_match *meta = (struct meta_match *) m->data;
+	struct meta_obj l_value, r_value;
+
+	if (meta_get(skb, info, &meta->lvalue, &l_value) < 0 ||
+	    meta_get(skb, info, &meta->rvalue, &r_value) < 0)
+		return 0;
+
+	r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
+
+	switch (meta->lvalue.hdr.op) {
+		case TCF_EM_OPND_EQ:
+			return !r;
+		case TCF_EM_OPND_LT:
+			return r < 0;
+		case TCF_EM_OPND_GT:
+			return r > 0;
+	}
+
+	return 0;
+}
+
+static inline void meta_delete(struct meta_match *meta)
+{
+	struct meta_type_ops *ops = meta_type_ops(&meta->lvalue);
+
+	if (ops && ops->destroy) {
+		ops->destroy(&meta->lvalue);
+		ops->destroy(&meta->rvalue);
+	}
+
+	kfree(meta);
+}
+
+static inline int meta_change_data(struct meta_value *dst, struct rtattr *rta)
+{
+	if (rta) {
+		if (RTA_PAYLOAD(rta) == 0)
+			return -EINVAL;
+
+		return meta_type_ops(dst)->change(dst, rta);
+	}
+
+	return 0;
+}
+
+static inline int meta_is_supported(struct meta_value *val)
+{
+	return (!meta_id(val) || meta_ops(val)->get);
+}
+
+static int em_meta_change(struct tcf_proto *tp, void *data, int len,
+			  struct tcf_ematch *m)
+{
+	int err = -EINVAL;
+	struct rtattr *tb[TCA_EM_META_MAX];
+	struct tcf_meta_hdr *hdr;
+	struct meta_match *meta = NULL;
+	
+	if (rtattr_parse(tb, TCA_EM_META_MAX, data, len) < 0)
+		goto errout;
+
+	if (tb[TCA_EM_META_HDR-1] == NULL ||
+	    RTA_PAYLOAD(tb[TCA_EM_META_HDR-1]) < sizeof(*hdr))
+		goto errout;
+	hdr = RTA_DATA(tb[TCA_EM_META_HDR-1]);
+
+	if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) ||
+	    TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX ||
+	    TCF_META_ID(hdr->left.kind) > TCF_META_ID_MAX ||
+	    TCF_META_ID(hdr->right.kind) > TCF_META_ID_MAX)
+		goto errout;
+
+	meta = kmalloc(sizeof(*meta), GFP_KERNEL);
+	if (meta == NULL)
+		goto errout;
+	memset(meta, 0, sizeof(*meta));
+
+	memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left));
+	memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right));
+
+	if (!meta_is_supported(&meta->lvalue) ||
+	    !meta_is_supported(&meta->rvalue)) {
+		err = -EOPNOTSUPP;
+		goto errout;
+	}
+
+	if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE-1]) < 0 ||
+	    meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE-1]) < 0)
+		goto errout;
+
+	m->datalen = sizeof(*meta);
+	m->data = (unsigned long) meta;
+
+	err = 0;
+errout:
+	if (err && meta)
+		meta_delete(meta);
+	return err;
+}
+
+static void em_meta_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
+{
+	if (m)
+		meta_delete((struct meta_match *) m->data);
+}
+
+static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+	struct meta_match *meta = (struct meta_match *) em->data;
+	struct tcf_meta_hdr hdr;
+	struct meta_type_ops *ops;
+
+	memset(&hdr, 0, sizeof(hdr));
+	memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left));
+	memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right));
+
+	RTA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr);
+
+	ops = meta_type_ops(&meta->lvalue);
+	if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 ||
+	    ops->dump(skb, &meta->rvalue, TCA_EM_META_RVALUE) < 0)
+		goto rtattr_failure;
+
+	return 0;
+
+rtattr_failure:
+	return -1;
+}		
+
+static struct tcf_ematch_ops em_meta_ops = {
+	.kind	  = TCF_EM_META,
+	.change	  = em_meta_change,
+	.match	  = em_meta_match,
+	.destroy  = em_meta_destroy,
+	.dump	  = em_meta_dump,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_meta_ops.link)
+};
+
+static int __init init_em_meta(void)
+{
+	return tcf_em_register(&em_meta_ops);
+}
+
+static void __exit exit_em_meta(void) 
+{
+	tcf_em_unregister(&em_meta_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_meta);
+module_exit(exit_em_meta);

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCHSET] Extended matches and basic classifier
  2005-01-23 23:00 [PATCHSET] Extended matches and basic classifier Thomas Graf
                   ` (7 preceding siblings ...)
  2005-01-26  5:52 ` David S. Miller
@ 2005-02-15 21:38 ` David S. Miller
  8 siblings, 0 replies; 21+ messages in thread
From: David S. Miller @ 2005-02-15 21:38 UTC (permalink / raw)
  To: Thomas Graf; +Cc: netdev

On Mon, 24 Jan 2005 00:00:12 +0100
Thomas Graf <tgraf@suug.ch> wrote:

> This patchset adds the ematch API, the ematches cmp, nbyte, u32, meta,
> and the basic classifier. It doesn't touch any existing code.

All added to my net-2.6.12 pending tree.  Thanks Thomas.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2005-02-15 21:38 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-01-23 23:00 [PATCHSET] Extended matches and basic classifier Thomas Graf
2005-01-23 23:01 ` [PATCH 1/6] PKT_SCHED: Extended Matches API Thomas Graf
2005-01-24  0:12   ` Patrick McHardy
2005-01-24  0:49     ` Thomas Graf
2005-01-24  0:56       ` Patrick McHardy
2005-01-24  0:59         ` Thomas Graf
2005-01-25 23:22   ` [RESEND " Thomas Graf
2005-01-23 23:02 ` [PATCH 2/6] PKT_SCHED: Simple comparison ematch (cmp) Thomas Graf
2005-01-24  0:14   ` Patrick McHardy
2005-01-24  0:55     ` Thomas Graf
2005-01-23 23:03 ` [PATCH 3/6] PKT_SCHED: Multi byte comparison ematch (nbyte) Thomas Graf
2005-01-23 23:03 ` [PATCH 4/6] PKT_SCHED: u32 ematch Thomas Graf
2005-01-24  0:24   ` Patrick McHardy
2005-01-24  0:58     ` Thomas Graf
2005-01-25 23:24   ` [RESEND " Thomas Graf
2005-01-23 23:04 ` [PATCH 5/6]: PKT_SCHED: Metadata ematch (meta) Thomas Graf
2005-01-26 20:05   ` [RESEND " Thomas Graf
2005-01-23 23:05 ` [PATCH 6/6] PKT_SCHED: Basic classifier Thomas Graf
2005-01-23 23:21 ` [PATCHSET] Extended matches and basic classifier Thomas Graf
2005-01-26  5:52 ` David S. Miller
2005-02-15 21:38 ` David S. Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.