All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] string matching ematch
@ 2005-01-26 15:07 Thomas Graf
  2005-01-26 21:03 ` David S. Miller
  2005-01-27 20:17 ` Pablo Neira
  0 siblings, 2 replies; 9+ messages in thread
From: Thomas Graf @ 2005-01-26 15:07 UTC (permalink / raw)
  To: Jamal Hadi Salim, Patrick McHardy; +Cc: netdev

I'd like to discuss the string matching ematch, I don't care about the
algorithm used but rather whether to make it stateful, match over
fragments, etc. I attached a simple stateless string matching ematch
using the Knuth-Morris-Pratt algorithm as a starting point.

KMP   ::= pattern [ BEGIN ] [ END ]
BEGIN ::= from OFFSET [ layer LAYER ]
END   ::= to OFFSET [ layer LAYER ]

where:
 default value for BEGIN: skb->h.raw
 default value for END: skb->tail

The prefix table used by the KMP algorithm is calculated in userspace.

The layering schema used is the one I introduced in the ematch patchset
which is likely to be extended by another layer dynamically calculated
from the packet payload.

The limitation is pretty obvious, it relies on linear skbs and is not
quite limited in matching the payload of a stream based protocol.

Making it aware of non-linear skbs isn't that hard, the matcher gets
a bit more complicated but shouldn't slow down too much so this is
probably worth doing.

Making it aware of a state gets much more complicated and if we don't
queue packets we can only classify on the packet holding the last
part of the pattern. So we need to either tell the underlying qdisc
to hold the packet for some time or accept this limitation. The
state of the matcher could be stored in the conntrack entry so the
match can be resumed when the relevent packet arrives. We're moving
more and more towards netfilter and application level filtering and
I think this should be covered by netfilter itself. If we do this,
we should at least share the code with netfilter.

Thoughts?

diff -Nru linux-2.6.11-rc2-bk3.orig/include/linux/pkt_cls.h linux-2.6.11-rc2-bk3/include/linux/pkt_cls.h
--- linux-2.6.11-rc2-bk3.orig/include/linux/pkt_cls.h	2005-01-26 13:28:25.000000000 +0100
+++ linux-2.6.11-rc2-bk3/include/linux/pkt_cls.h	2005-01-26 14:03:42.000000000 +0100
@@ -401,6 +401,7 @@
 	TCF_EM_NBYTE,
 	TCF_EM_U32,
 	TCF_EM_META,
+	TCF_EM_KMP,
 	__TCF_EM_MAX
 };
 
diff -Nru linux-2.6.11-rc2-bk3.orig/include/linux/tc_ematch/tc_em_kmp.h linux-2.6.11-rc2-bk3/include/linux/tc_ematch/tc_em_kmp.h
--- linux-2.6.11-rc2-bk3.orig/include/linux/tc_ematch/tc_em_kmp.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk3/include/linux/tc_ematch/tc_em_kmp.h	2005-01-26 13:42:49.000000000 +0100
@@ -0,0 +1,24 @@
+#ifndef __LINUX_TC_EM_KMP_H
+#define __LINUX_TC_EM_KMP_H
+
+#include <linux/pkt_cls.h>
+
+enum
+{
+	TCA_EM_KMP_UNSPEC,
+	TCA_EM_KMP_FROM,	/* struct tcf_kmp_offset */
+	TCA_EM_KMP_TO,		/* struct tcf_kmp_offset */
+	TCA_EM_KMP_LENGTH,	/* u32 */
+	TCA_EM_KMP_PATTERN,	/* unsigned char[] */
+	TCA_EM_KMP_PREFIX_TBL,	/* u32[] */
+	__TCA_EM_KMP_MAX
+};
+#define TCA_EM_KMP_MAX (__TCA_EM_KMP_MAX - 1)
+
+struct tcf_kmp_offset
+{
+	__u16		off;
+	__u16		layer;
+};
+
+#endif
diff -Nru linux-2.6.11-rc2-bk3.orig/net/sched/Kconfig linux-2.6.11-rc2-bk3/net/sched/Kconfig
--- linux-2.6.11-rc2-bk3.orig/net/sched/Kconfig	2005-01-26 13:28:25.000000000 +0100
+++ linux-2.6.11-rc2-bk3/net/sched/Kconfig	2005-01-26 14:19:59.000000000 +0100
@@ -449,6 +449,16 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called em_meta.
 
+config NET_EMATCH_KMP
+	tristate "Knuth-Morris-Pratt string matching"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets by
+	  matching strings using the KMP string matching algorithm.
+	  
+	  To compile this code as a module, choose M here: the
+	  module will be called em_kmp.
+
 config NET_CLS_ACT
 	bool "Packet ACTION"
 	depends on EXPERIMENTAL && NET_CLS && NET_QOS
diff -Nru linux-2.6.11-rc2-bk3.orig/net/sched/Makefile linux-2.6.11-rc2-bk3/net/sched/Makefile
--- linux-2.6.11-rc2-bk3.orig/net/sched/Makefile	2005-01-26 13:28:25.000000000 +0100
+++ linux-2.6.11-rc2-bk3/net/sched/Makefile	2005-01-26 14:22:54.000000000 +0100
@@ -39,3 +39,4 @@
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
 obj-$(CONFIG_NET_EMATCH_U32)	+= em_u32.o
 obj-$(CONFIG_NET_EMATCH_META)	+= em_meta.o
+obj-$(CONFIG_NET_EMATCH_KMP)	+= em_kmp.o
diff -Nru linux-2.6.11-rc2-bk3.orig/net/sched/em_kmp.c linux-2.6.11-rc2-bk3/net/sched/em_kmp.c
--- linux-2.6.11-rc2-bk3.orig/net/sched/em_kmp.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.11-rc2-bk3/net/sched/em_kmp.c	2005-01-26 14:25:18.000000000 +0100
@@ -0,0 +1,175 @@
+/*
+ * net/sched/em_kmp.c	Knuth-Morris-Pratt string matching
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_kmp.h>
+#include <net/pkt_cls.h>
+
+struct kmp_data
+{
+	struct tcf_kmp_offset	from;
+	struct tcf_kmp_offset	to;
+	u32			len;
+	u32			prefix_tbl[0];
+};
+
+#define KMP_PATTERN(kmp) \
+	((void*) (kmp) + sizeof(struct kmp_data) + ((kmp)->len * sizeof(u32)))
+
+static int em_kmp_change(struct tcf_proto *tp, void *data, int data_len,
+			 struct tcf_ematch *em)
+{
+	int kmp_size, err = -EINVAL;
+	struct rtattr *tb[TCA_EM_KMP_MAX];
+	struct tcf_kmp_offset *koff;
+	struct kmp_data *kmp;
+	u32 len;
+	
+	if (rtattr_parse(tb, TCA_EM_KMP_MAX, data, data_len) < 0)
+		goto errout;
+
+	if (tb[TCA_EM_KMP_LENGTH-1] == NULL ||
+	    tb[TCA_EM_KMP_PATTERN-1] == NULL ||
+	    tb[TCA_EM_KMP_PREFIX_TBL-1] == NULL)
+		goto errout;
+
+	if (RTA_PAYLOAD(tb[TCA_EM_KMP_LENGTH-1]) < sizeof(len))
+		goto errout;
+	len = *(u32 *) RTA_DATA(tb[TCA_EM_KMP_LENGTH-1]);
+
+	if (RTA_PAYLOAD(tb[TCA_EM_KMP_PATTERN-1]) < len ||
+	    RTA_PAYLOAD(tb[TCA_EM_KMP_PREFIX_TBL-1]) < (len * sizeof(u32)))
+		goto errout;
+
+	if (tb[TCA_EM_KMP_FROM-1])
+		if (RTA_PAYLOAD(tb[TCA_EM_KMP_FROM-1]) < sizeof(*koff))
+			goto errout;
+	
+	if (tb[TCA_EM_KMP_TO-1])
+		if (RTA_PAYLOAD(tb[TCA_EM_KMP_TO-1]) < sizeof(*koff))
+			goto errout;
+
+	kmp_size = sizeof(*kmp) + (len * sizeof(u32)) + len;
+	kmp = kmalloc(kmp_size, GFP_KERNEL);
+	if (kmp == NULL) {
+		err = -ENOMEM;
+		goto errout;
+	}
+	memset(kmp, 0, kmp_size);
+
+	kmp->len = len;
+
+	if (tb[TCA_EM_KMP_FROM-1]) {
+		koff = RTA_DATA(tb[TCA_EM_KMP_FROM-1]);
+		memcpy(&kmp->from, koff, sizeof(*koff));
+	} else
+		kmp->from.layer = TCF_LAYER_TRANSPORT;
+
+	if (tb[TCA_EM_KMP_TO-1]) {
+		koff = RTA_DATA(tb[TCA_EM_KMP_TO-1]);
+		memcpy(&kmp->to, koff, sizeof(*koff));
+	}
+
+	memcpy(kmp->prefix_tbl, RTA_DATA(tb[TCA_EM_KMP_PREFIX_TBL-1]),
+		len * sizeof(u32));
+
+	memcpy(KMP_PATTERN(kmp), RTA_DATA(tb[TCA_EM_KMP_PATTERN-1]), len);
+	
+	em->datalen = kmp_size;
+	em->data = (unsigned long) kmp;
+
+	err = 0;
+errout:
+	return err;
+}
+
+static int em_kmp_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	struct kmp_data *kmp = (struct kmp_data *) em->data;
+	unsigned char *begin, *end;
+	unsigned char *pattern = KMP_PATTERN(kmp);
+	int q;
+
+	begin = tcf_get_base_ptr(skb, kmp->from.layer);
+	begin += kmp->from.off;
+
+	if (kmp->to.off || kmp->to.layer) {
+		end = tcf_get_base_ptr(skb, kmp->to.layer);
+		end += kmp->to.off;
+	} else
+		end = skb->tail;
+
+	if (!tcf_valid_offset(skb, begin, kmp->len) ||
+	    !tcf_valid_offset(skb, end, 0) ||
+	    (end - kmp->len) < begin)
+		return 0;
+
+	for (q = 0; begin < end; begin++) {
+		while (q > 0 && pattern[q] != *begin)
+			q = kmp->prefix_tbl[q - 1];
+		if (pattern[q] == *begin)
+			q++;
+		if (q == kmp->len)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int em_kmp_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+	struct kmp_data *kmp = (struct kmp_data *) em->data;
+
+	RTA_PUT(skb, TCA_EM_KMP_FROM, sizeof(kmp->from), &kmp->from);
+
+	if (kmp->to.off || kmp->to.layer)
+		RTA_PUT(skb, TCA_EM_KMP_TO, sizeof(kmp->to), &kmp->to);
+
+	RTA_PUT(skb, TCA_EM_KMP_LENGTH, sizeof(kmp->len), &kmp->len);
+	RTA_PUT(skb, TCA_EM_KMP_PATTERN, kmp->len, KMP_PATTERN(kmp));
+	RTA_PUT(skb, TCA_EM_KMP_PREFIX_TBL, kmp->len * sizeof(u32),
+		kmp->prefix_tbl);
+
+	return 0;
+rtattr_failure:
+	return -1;
+}
+
+static struct tcf_ematch_ops em_kmp_ops = {
+	.kind	  = TCF_EM_KMP,
+	.change	  = em_kmp_change,
+	.match	  = em_kmp_match,
+	.dump	  = em_kmp_dump,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_kmp_ops.link)
+};
+
+static int __init init_em_kmp(void)
+{
+	return tcf_em_register(&em_kmp_ops);
+}
+
+static void __exit exit_em_kmp(void) 
+{
+	tcf_em_unregister(&em_kmp_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_kmp);
+module_exit(exit_em_kmp);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] string matching ematch
  2005-01-26 15:07 [RFC] string matching ematch Thomas Graf
@ 2005-01-26 21:03 ` David S. Miller
  2005-01-26 21:41   ` Thomas Graf
  2005-01-27 20:17 ` Pablo Neira
  1 sibling, 1 reply; 9+ messages in thread
From: David S. Miller @ 2005-01-26 21:03 UTC (permalink / raw)
  To: Thomas Graf; +Cc: hadi, kaber, netdev

On Wed, 26 Jan 2005 16:07:14 +0100
Thomas Graf <tgraf@suug.ch> wrote:

> I'd like to discuss the string matching ematch, I don't care about the
> algorithm used but rather whether to make it stateful, match over
> fragments, etc.

I think you'll need to make it stateful.

I assume this is meant to be used for things like catching references
to "Falun Gong" in SMTP sessions and stuff like that.  Not that I know
any entity interested in such applications :-)

Anyways, if the string goes across the TCP data portion of multiple
packets, statefulness becomes necessary to catch it.  Right?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] string matching ematch
  2005-01-26 21:03 ` David S. Miller
@ 2005-01-26 21:41   ` Thomas Graf
  2005-01-26 23:26     ` David S. Miller
  0 siblings, 1 reply; 9+ messages in thread
From: Thomas Graf @ 2005-01-26 21:41 UTC (permalink / raw)
  To: David S. Miller; +Cc: hadi, kaber, netdev

* David S. Miller <20050126130323.2dc10187.davem@davemloft.net> 2005-01-26 13:03
> On Wed, 26 Jan 2005 16:07:14 +0100
> Thomas Graf <tgraf@suug.ch> wrote:
> 
> > I'd like to discuss the string matching ematch, I don't care about the
> > algorithm used but rather whether to make it stateful, match over
> > fragments, etc.
> 
> I think you'll need to make it stateful.
> 
> I assume this is meant to be used for things like catching references
> to "Falun Gong" in SMTP sessions and stuff like that.  Not that I know
> any entity interested in such applications :-)

Hehe, it's main purpose is to catch mail from your sweetie and redirect
them through a low latency link but of course you can also use it to
match on text based protocols without strict header ordering. ;->

> Anyways, if the string goes across the TCP data portion of multiple
> packets, statefulness becomes necessary to catch it.  Right?

Yes and no, it is of course necessary if one wants to match any string
at any position without limitation. OTOH, it gets quite complex. We'd
have to store the state of every configured kmp ematch to just be
able to tell the result. On top of that, the whole classification
process is stateless and should be kept like this. Assuming one
configures three ematches like this:

u32(ip dport 333 0xff)
and (
  kmp("Falun Gong" from 20 layer transport)
  and nbyte("SMTP" at 0 layer application)
)

assuming the u32 and nbyte ematch matches in the first packet, the
string matches only partially. We can't regard regard the ematch
tree as matched so we must return false. The next packet in the flow
completes the string but the nbyte match doesn't match anymore
so no match either. In fact a stateless filter can't do any better
but it doesn't consume as much resources.

There are cases where a statefull string matching would be of use, one
of them is when it doesn't matter which packet you actually classify,
e.g. dropping connections such as to protect your web server from stilly
requests.

I'm not sure if mixing stateful with stateless stuff is such of a good
idea. I think it should be separated and have stateful filters only be
executed when the flow matters, not packets.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] string matching ematch
  2005-01-26 21:41   ` Thomas Graf
@ 2005-01-26 23:26     ` David S. Miller
  2005-01-27 14:20       ` Thomas Graf
  0 siblings, 1 reply; 9+ messages in thread
From: David S. Miller @ 2005-01-26 23:26 UTC (permalink / raw)
  To: Thomas Graf; +Cc: hadi, kaber, netdev

On Wed, 26 Jan 2005 22:41:19 +0100
Thomas Graf <tgraf@suug.ch> wrote:

> I'm not sure if mixing stateful with stateless stuff is such of a good
> idea. I think it should be separated and have stateful filters only be
> executed when the flow matters, not packets.

I think keeping things totally stateless would be the best
idea.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] string matching ematch
  2005-01-26 23:26     ` David S. Miller
@ 2005-01-27 14:20       ` Thomas Graf
  0 siblings, 0 replies; 9+ messages in thread
From: Thomas Graf @ 2005-01-27 14:20 UTC (permalink / raw)
  To: David S. Miller; +Cc: hadi, kaber, netdev

* David S. Miller <20050126152609.59e1a15e.davem@davemloft.net> 2005-01-26 15:26
> On Wed, 26 Jan 2005 22:41:19 +0100
> Thomas Graf <tgraf@suug.ch> wrote:
> 
> > I'm not sure if mixing stateful with stateless stuff is such of a good
> > idea. I think it should be separated and have stateful filters only be
> > executed when the flow matters, not packets.
> 
> I think keeping things totally stateless would be the best
> idea.

Yes, I think so too.

I'm still thinking about pskbs in the background and the problem gets
more serious with this. So far, even with the multi byte ematch, all
filters were focusing on header content even if they were able to read
from any offset. 

Our goal must of course be to to avoid lineraization for as many cases
as possible, therefore I think it would be worth making all matchers
aware of pskbs without linearization. In case the packet goes though
the pedit or ipt action later on then we definitely need to take further
actions but these should be special cases.

For the string matching it is fairly easy, we can check for partial
matches at the end and begin of every fragment. The multi byte
ematch could be splitted into two paths, a fast path using memcmp
directly on the skb data for linear skbs and a "slow" path for
non-linear skbs fetching the needed byte sequence from the relevant
fragments first. The cmp ematch and u32 ematch and classifier could
fetch the needed bucket via a function tcf_read_at_offset taking
an offset and the length of the bucket {8|16|32} which iterates
over the fragments as needed. I know, for most cases it is not needed
but it shouldn't cost us too much.

Thoughts?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] string matching ematch
  2005-01-26 15:07 [RFC] string matching ematch Thomas Graf
  2005-01-26 21:03 ` David S. Miller
@ 2005-01-27 20:17 ` Pablo Neira
  2005-01-27 20:51   ` Thomas Graf
  1 sibling, 1 reply; 9+ messages in thread
From: Pablo Neira @ 2005-01-27 20:17 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Jamal Hadi Salim, Patrick McHardy, netdev

Hi Thomas,

Thomas Graf wrote:

>I'd like to discuss the string matching ematch, I don't care about the
>algorithm used but rather whether to make it stateful, match over
>fragments, etc. I attached a simple stateless string matching ematch
>using the Knuth-Morris-Pratt algorithm as a starting point.
>  
>

I've posted something similar after christmas in netfilter-devel[1]. 
It's fragment aware, actually my implementation uses boyer-moore to look 
for matches in the payload, and it uses brute force together with 
Rusty's skb_iter stuff to look for matches on the edges. The worst case 
is not that bad for small patterns. Anyway I'm still looking for 
alternatives solutions. BTW, Harald Welte is also interested in this stuff.

I'll give it more spins these days since I've got some spare time. I'll 
also have a look at your work. I think that we could join efforts and 
push something good, thoughts?

References:

https://lists.netfilter.org/pipermail/netfilter-devel/2005-January/018034.html

--
Pablo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] string matching ematch
  2005-01-27 20:17 ` Pablo Neira
@ 2005-01-27 20:51   ` Thomas Graf
  2005-01-31 13:59     ` jamal
  0 siblings, 1 reply; 9+ messages in thread
From: Thomas Graf @ 2005-01-27 20:51 UTC (permalink / raw)
  To: Pablo Neira; +Cc: Jamal Hadi Salim, Patrick McHardy, netdev

* Pablo Neira <41F94C63.7010800@eurodev.net> 2005-01-27 21:17
> Thomas Graf wrote:
> 
> >I'd like to discuss the string matching ematch, I don't care about the
> >algorithm used but rather whether to make it stateful, match over
> >fragments, etc. I attached a simple stateless string matching ematch
> >using the Knuth-Morris-Pratt algorithm as a starting point.
> > 
> 
> I've posted something similar after christmas in netfilter-devel[1]. 
> It's fragment aware, actually my implementation uses boyer-moore to look 
> for matches in the payload, and it uses brute force together with 
> Rusty's skb_iter stuff to look for matches on the edges.

I've seen it but sticked to KMP because it uses less memory. Their
searching phase time complexity is nearly equal around O(nm) for
n being the length of T[] and m being the length of P[]. BM
definitely has a better performance for highly periodic P[]'s in a
periodic T[] though.

I'm missing a few things in your string matching API, namely
the ability to define a upper limit of the searching range which
can give much better performance gains than the best optimization
can do. A naive searching method around the borders of fragments
is definiltey easier but even there you could benefit from ruling
out invalid shifts.

> The worst case is not that bad for small patterns.

I don't think that any of the algorithms really make a difference,
theoretically yes but what we're basically should be looking for
is one with good average performance by detecting unnecessary
shifts. Our T[] is limited by the skb as long as we're not going
into statefull searches and thus other resources matter more to me
than a few more cycles.

> I'll give it more spins these days since I've got some spare time. I'll 
> also have a look at your work. I think that we could join efforts and 
> push something good, thoughts?

Definitely, being able to specify the upper limit is a must for me
though. Another difference is that I compute the prefix table in
userspace.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] string matching ematch
  2005-01-27 20:51   ` Thomas Graf
@ 2005-01-31 13:59     ` jamal
  2005-02-01  1:26       ` Pablo Neira
  0 siblings, 1 reply; 9+ messages in thread
From: jamal @ 2005-01-31 13:59 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Pablo Neira, Patrick McHardy, netdev


Reading email backward ..

I think we should allow for all sorts of algrithms KMP, Boyer-Moore etc
to be plugged in (in tc this is already there).
The stuff that Harald was looking at (at least around July when i last
talked to him on this) is an infrastructure level thing. Its derived
from someone who seems to have well thought of the callbacks etc for a
good stateful solution. I cant remember the person from whom Harald was
deriving his stuff (email was somewhere in .fr) - but it did seem pretty
sensible. If we can have infrastructure that is also usable by tc, that
would be great so we dont go cutnpasting unnecessarily.

Having said all that:
Thomas, I think you should leave what you have as totaly stateless
unless we dont have a shareable solution. I have tons of ideas i could
share when we get to that level.

cheers,
jamal

On Thu, 2005-01-27 at 15:51, Thomas Graf wrote:
> * Pablo Neira <41F94C63.7010800@eurodev.net> 2005-01-27 21:17
> > Thomas Graf wrote:
> > 
> > >I'd like to discuss the string matching ematch, I don't care about the
> > >algorithm used but rather whether to make it stateful, match over
> > >fragments, etc. I attached a simple stateless string matching ematch
> > >using the Knuth-Morris-Pratt algorithm as a starting point.
> > > 
> > 
> > I've posted something similar after christmas in netfilter-devel[1]. 
> > It's fragment aware, actually my implementation uses boyer-moore to look 
> > for matches in the payload, and it uses brute force together with 
> > Rusty's skb_iter stuff to look for matches on the edges.
> 
> I've seen it but sticked to KMP because it uses less memory. Their
> searching phase time complexity is nearly equal around O(nm) for
> n being the length of T[] and m being the length of P[]. BM
> definitely has a better performance for highly periodic P[]'s in a
> periodic T[] though.
> 
> I'm missing a few things in your string matching API, namely
> the ability to define a upper limit of the searching range which
> can give much better performance gains than the best optimization
> can do. A naive searching method around the borders of fragments
> is definiltey easier but even there you could benefit from ruling
> out invalid shifts.
> 
> > The worst case is not that bad for small patterns.
> 
> I don't think that any of the algorithms really make a difference,
> theoretically yes but what we're basically should be looking for
> is one with good average performance by detecting unnecessary
> shifts. Our T[] is limited by the skb as long as we're not going
> into statefull searches and thus other resources matter more to me
> than a few more cycles.
> 
> > I'll give it more spins these days since I've got some spare time. I'll 
> > also have a look at your work. I think that we could join efforts and 
> > push something good, thoughts?
> 
> Definitely, being able to specify the upper limit is a must for me
> though. Another difference is that I compute the prefix table in
> userspace.
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC] string matching ematch
  2005-01-31 13:59     ` jamal
@ 2005-02-01  1:26       ` Pablo Neira
  0 siblings, 0 replies; 9+ messages in thread
From: Pablo Neira @ 2005-02-01  1:26 UTC (permalink / raw)
  To: hadi; +Cc: Thomas Graf, Patrick McHardy, netdev, Harald Welte

Hi jamal,

Harald, I've cc'ed you since I think that this could be interesting.

jamal wrote:

>I think we should allow for all sorts of algrithms KMP, Boyer-Moore etc
>to be plugged in (in tc this is already there).
>The stuff that Harald was looking at (at least around July when i last
>talked to him on this) is an infrastructure level thing. Its derived
>from someone who seems to have well thought of the callbacks etc for a
>good stateful solution. I cant remember the person from whom Harald was
>deriving his stuff (email was somewhere in .fr) - but it did seem pretty
>sensible.
>

yes, Phil Biondi's libqsearch. I started working with it but then I 
thought that it was a bit bloated because I didn't see the point of 
using several algorithms since boyer-moore is the best AFAIK. But Thomas 
thoughts about memory usage made me see that maybe this doesn't fit well 
all possible scenarios. So I've changed my mind and I think that such 
infrastructure level thing is required.

Harald told me that he's going to finish soon his hacks based on 
libqsearch, then we could merge ideas based in that thing I've posted 
after christmas and his libqsearch mutant.

> If we can have infrastructure that is also usable by tc, that
>would be great so we dont go cutnpasting unnecessarily.
>  
>

definitely.

>Having said all that:
>Thomas, I think you should leave what you have as totaly stateless
>unless we dont have a shareable solution. I have tons of ideas i could
>share when we get to that level.
>  
>
I've got also some ideas, let's see if I can get settled one of them at 
least after the infrastructure have been cooked :).

--
Pablo

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2005-02-01  1:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-01-26 15:07 [RFC] string matching ematch Thomas Graf
2005-01-26 21:03 ` David S. Miller
2005-01-26 21:41   ` Thomas Graf
2005-01-26 23:26     ` David S. Miller
2005-01-27 14:20       ` Thomas Graf
2005-01-27 20:17 ` Pablo Neira
2005-01-27 20:51   ` Thomas Graf
2005-01-31 13:59     ` jamal
2005-02-01  1:26       ` Pablo Neira

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.