All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH nft v3 2/2] datatype: Implement binary search in symbolic_constant_print()
@ 2016-11-28 20:23 Elise Lennion
  2016-11-28 22:12 ` Florian Westphal
  0 siblings, 1 reply; 4+ messages in thread
From: Elise Lennion @ 2016-11-28 20:23 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel

Because a linear search is used, which is slower.

This approach demands that the symbol_table have a variable with its
size, also, it must be sorted by value.

Signed-off-by: Elise Lennion <elise.lennion@gmail.com>
---

 v2: This patch has no v1.

 v3: Removed gcc_workaround in struct symbol_table and fixed identation
 to use tabs.

 include/datatype.h |  3 ++-
 src/datatype.c     | 35 ++++++++++++++++++++++++++++-------
 src/proto.c        |  3 ++-
 src/services.c     |  3 ++-
 4 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/include/datatype.h b/include/datatype.h
index e53797d..c6013b6 100644
--- a/include/datatype.h
+++ b/include/datatype.h
@@ -178,10 +178,11 @@ struct symbolic_constant {
 /**
  * struct symbol_table - type construction from symbolic values
  *
+ * @size: 	number of symbols, without SYMBOL_LIST_END
  * @symbols:	the symbols
  */
 struct symbol_table {
-	int				gcc_workaround;
+	unsigned int 			size;
 	struct symbolic_constant	symbols[];
 };
 
diff --git a/src/datatype.c b/src/datatype.c
index 1ae7db4..39c9678 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -116,6 +116,18 @@ struct error_record *symbol_parse(const struct expr *sym,
 		     sym->dtype->desc);
 }
 
+static int symbolic_constant_cmp(const void *p1, const void *p2)
+{
+	const struct symbolic_constant f = *(struct symbolic_constant *)p1;
+	const struct symbolic_constant s = *(struct symbolic_constant *)p2;
+
+	if (f.value > s.value)
+		return  1;
+	if (f.value < s.value)
+		return -1;
+	return 0;
+}
+
 struct error_record *symbolic_constant_parse(const struct expr *sym,
 					     const struct symbol_table *tbl,
 					     struct expr **res)
@@ -157,8 +169,10 @@ out:
 void symbolic_constant_print(const struct symbol_table *tbl,
 			     const struct expr *expr, bool quotes)
 {
+	unsigned int l = 0;
+	unsigned int m;
+	unsigned int r = tbl->size - 1;
 	unsigned int len = div_round_up(expr->len, BITS_PER_BYTE);
-	const struct symbolic_constant *s;
 	uint64_t val = 0;
 
 	/* Export the data in the correct byteorder for comparison */
@@ -166,18 +180,22 @@ void symbolic_constant_print(const struct symbol_table *tbl,
 	mpz_export_data(constant_data_ptr(val, expr->len), expr->value,
 			expr->byteorder, len);
 
-	for (s = tbl->symbols; s->identifier != NULL; s++) {
-		if (val == s->value)
-			break;
+	while (l < r) {
+		m = l + (r - l)/2;
+
+		if (tbl->symbols[m].value < val)
+			l = m +1;
+		else
+			r = m;
 	}
 
-	if (s->identifier == NULL)
+	if (tbl->symbols[l].value != val)
 		return expr_basetype(expr)->print(expr);
 
 	if (quotes)
-		printf("\"%s\"", s->identifier);
+		printf("\"%s\"", tbl->symbols[l].identifier);
 	else
-		printf("%s", s->identifier);
+		printf("%s", tbl->symbols[l].identifier);
 }
 
 void symbol_table_print(const struct symbol_table *tbl,
@@ -652,6 +670,9 @@ struct symbol_table *rt_symbol_table_init(const char *filename)
 
 	fclose(f);
 out:
+	tbl->size = nelems;
+	qsort(tbl->symbols, nelems, sizeof(tbl->symbols[0]),
+	      symbolic_constant_cmp);
 	tbl->symbols[nelems] = SYMBOL_LIST_END;
 	return tbl;
 }
diff --git a/src/proto.c b/src/proto.c
index df5439c..15d6278 100644
--- a/src/proto.c
+++ b/src/proto.c
@@ -860,11 +860,12 @@ const struct datatype etheraddr_type = {
 };
 
 static const struct symbol_table ethertype_tbl = {
+	.size 		= 4,
 	.symbols	= {
 		SYMBOL("ip",		__constant_htons(ETH_P_IP)),
+		SYMBOL("vlan",		__constant_htons(ETH_P_8021Q)),
 		SYMBOL("arp",		__constant_htons(ETH_P_ARP)),
 		SYMBOL("ip6",		__constant_htons(ETH_P_IPV6)),
-		SYMBOL("vlan",		__constant_htons(ETH_P_8021Q)),
 		SYMBOL_LIST_END
 	},
 };
diff --git a/src/services.c b/src/services.c
index 8cb1cdf..7a3e96a 100644
--- a/src/services.c
+++ b/src/services.c
@@ -2,7 +2,8 @@
 #include <datatype.h>
 
 const struct symbol_table serv_tbl = {
-	.symbols =	{
+	.size 		= 335,
+	.symbols 	= {
 		SYMBOL("exec",	2),
 		SYMBOL("tcpmux",	256),
 		SYMBOL("login",	258),
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH nft v3 2/2] datatype: Implement binary search in symbolic_constant_print()
  2016-11-28 20:23 [PATCH nft v3 2/2] datatype: Implement binary search in symbolic_constant_print() Elise Lennion
@ 2016-11-28 22:12 ` Florian Westphal
  2016-11-29  8:35   ` Pablo Neira Ayuso
  0 siblings, 1 reply; 4+ messages in thread
From: Florian Westphal @ 2016-11-28 22:12 UTC (permalink / raw)
  To: Elise Lennion; +Cc: pablo, netfilter-devel

Elise Lennion <elise.lennion@gmail.com> wrote:
> Because a linear search is used, which is slower.
> 
> This approach demands that the symbol_table have a variable with its
> size, also, it must be sorted by value.

Did Pablo put you up to this?  Bad Pablo, bad!  :-P

because:

>  static const struct symbol_table ethertype_tbl = {
> +	.size 		= 4,
>  	.symbols	= {
>  		SYMBOL("ip",		__constant_htons(ETH_P_IP)),
> +		SYMBOL("vlan",		__constant_htons(ETH_P_8021Q)),
>  		SYMBOL("arp",		__constant_htons(ETH_P_ARP)),
>  		SYMBOL("ip6",		__constant_htons(ETH_P_IPV6)),
> -		SYMBOL("vlan",		__constant_htons(ETH_P_8021Q)),
>  		SYMBOL_LIST_END
>  	},

This is unmaintanable.  I have no clue what value ETH_P_8021Q is, and that
this has to be placed at spot #2 to not break things.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH nft v3 2/2] datatype: Implement binary search in symbolic_constant_print()
  2016-11-28 22:12 ` Florian Westphal
@ 2016-11-29  8:35   ` Pablo Neira Ayuso
  2016-11-29 10:04     ` Florian Westphal
  0 siblings, 1 reply; 4+ messages in thread
From: Pablo Neira Ayuso @ 2016-11-29  8:35 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Elise Lennion, netfilter-devel

On Mon, Nov 28, 2016 at 11:12:23PM +0100, Florian Westphal wrote:
> Elise Lennion <elise.lennion@gmail.com> wrote:
> > Because a linear search is used, which is slower.
> > 
> > This approach demands that the symbol_table have a variable with its
> > size, also, it must be sorted by value.
> 
> Did Pablo put you up to this?  Bad Pablo, bad!  :-P

Yep, Elise don't feel bad, that's my fault ;-)

> because:
> 
> >  static const struct symbol_table ethertype_tbl = {
> > +	.size 		= 4,
> >  	.symbols	= {
> >  		SYMBOL("ip",		__constant_htons(ETH_P_IP)),
> > +		SYMBOL("vlan",		__constant_htons(ETH_P_8021Q)),
> >  		SYMBOL("arp",		__constant_htons(ETH_P_ARP)),
> >  		SYMBOL("ip6",		__constant_htons(ETH_P_IPV6)),
> > -		SYMBOL("vlan",		__constant_htons(ETH_P_8021Q)),
> >  		SYMBOL_LIST_END
> >  	},
> 
> This is unmaintanable.  I have no clue what value ETH_P_8021Q is, and that
> this has to be placed at spot #2 to not break things.

OK, let's try to make this a bit more maintainable, another proposal
in steps:

1) Update symbol_table definition to:

struct symbol_table {
        unsignd int                     size;
        const struct symbolic_constant  *symbols; <---
};

so we don't have a flexible array anymore. Then, split this array of
symbols from struct symbol_table so this looks like:

static cont struct symbolic_constant ethertype_symbols[] = {
 		SYMBOL("ip",		__constant_htons(ETH_P_IP)),
		SYMBOL("vlan",		__constant_htons(ETH_P_8021Q)),
 		SYMBOL("arp",		__constant_htons(ETH_P_ARP)),
  		SYMBOL("ip6",		__constant_htons(ETH_P_IPV6)),
  		SYMBOL_LIST_END
};

static const struct symbol_table ethertype_tbl = {
        .size                   = SYMTBL_SIZE(ethertype_symbols),
        .symbolic_constant      = ethertype_symbols,
};

I can see 19 symbol_tables from here at quick glace, so this would be
an initial patch to update this. SYMTBL_SIZE needs to be define too, yes.

2) Then, second patch would look like:

struct symbol_table {
        unsignd int                     size;
        bool                            qsort; <---
        const struct symbolic_constant  *symbols;
};

If qsort field if true, then we can just validate when calling
datatype_register() that the array is not sorted and spot a BUG().
Moreover, the new qsort field would restrict this qsort to the large
inet_service symbol table.

Florian, are you OK if Elise follows this approach? If this is overly
complicated and not worth, rise your hand.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH nft v3 2/2] datatype: Implement binary search in symbolic_constant_print()
  2016-11-29  8:35   ` Pablo Neira Ayuso
@ 2016-11-29 10:04     ` Florian Westphal
  0 siblings, 0 replies; 4+ messages in thread
From: Florian Westphal @ 2016-11-29 10:04 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Florian Westphal, Elise Lennion, netfilter-devel

Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Mon, Nov 28, 2016 at 11:12:23PM +0100, Florian Westphal wrote:
> > This is unmaintanable.  I have no clue what value ETH_P_8021Q is, and that
> > this has to be placed at spot #2 to not break things.
> 
> OK, let's try to make this a bit more maintainable, another proposal
> in steps:
> 
> 1) Update symbol_table definition to:
> 
> struct symbol_table {
>         unsignd int                     size;
>         const struct symbolic_constant  *symbols; <---
> };

[..]

> Florian, are you OK if Elise follows this approach? If this is overly
> complicated and not worth, rise your hand.

Is a linear search of ~300-element symtable list a problem?

If it is, then your proposal seems like a sane approach.
But I'd probably benchmark this first.

(Since Elise already did a patch for bsearchable symtable
 it should not be too much work to verify this).

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-11-29 10:07 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-28 20:23 [PATCH nft v3 2/2] datatype: Implement binary search in symbolic_constant_print() Elise Lennion
2016-11-28 22:12 ` Florian Westphal
2016-11-29  8:35   ` Pablo Neira Ayuso
2016-11-29 10:04     ` Florian Westphal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.