Netfilter-Devel Archive on lore.kernel.org
 help / color / Atom feed
* [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets
@ 2019-10-08 16:14 Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 01/11] nft: Pass nft_handle to flush_cache() Phil Sutter
                   ` (10 more replies)
  0 siblings, 11 replies; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Third approach at caching optimizations implementation.

The goal of reducing scope of cached data remains the same: First,
optimize cache depth (i.e., omit caching rules or chains if not needed).
Second, optimize cache width (i.e., cache only required chains).

Changes since v2:

* Move all cache-related code into a dedicated source file.
* Replace have_cache boolean by a cache level, indicating cache
  completeness from a depth view.
* Keep a central function to populate the cache, perform consistency
  checking based on generation ID and update cache level.

The first two patches contain preparational work for the changes
described above. Patch 3 performs the code relocation, patches 4 to 8
extend functionality of the separated caching code and the last three
patches optimize core code in nft.c to put optimized caching into full
effect.

A follow-up series will deal with xtables-restore performance.

Phil Sutter (11):
  nft: Pass nft_handle to flush_cache()
  nft: Avoid nested cache fetching
  nft: Extract cache routines into nft-cache.c
  nft-cache: Introduce cache levels
  nft-cache: Fetch only chains in nft_chain_list_get()
  nft-cache: Cover for multiple fetcher invocation
  nft-cache: Support partial cache per table
  nft-cache: Support partial rule cache per chain
  nft: Reduce cache overhead of nft_chain_builtin_init()
  nft: Support nft_is_table_compatible() per chain
  nft: Optimize flushing all chains of a table

 iptables/Makefile.am       |   2 +-
 iptables/nft-cache.c       | 498 +++++++++++++++++++++++++++++++++++++
 iptables/nft-cache.h       |  18 ++
 iptables/nft.c             | 487 +++++++-----------------------------
 iptables/nft.h             |  22 +-
 iptables/xtables-restore.c |   5 +-
 iptables/xtables-save.c    |   5 +-
 7 files changed, 623 insertions(+), 414 deletions(-)
 create mode 100644 iptables/nft-cache.c
 create mode 100644 iptables/nft-cache.h

-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 01/11] nft: Pass nft_handle to flush_cache()
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  2019-10-09  9:30   ` Pablo Neira Ayuso
  2019-10-08 16:14 ` [iptables PATCH v3 02/11] nft: Avoid nested cache fetching Phil Sutter
                   ` (9 subsequent siblings)
  10 siblings, 1 reply; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

This allows to call nft_table_builtin_find() and hence removes the only
real user of __nft_table_builtin_find(). Consequently remove the latter
by integrating it into its sole caller.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/nft.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/iptables/nft.c b/iptables/nft.c
index a2f36b7ee90d2..bdc9fbc37f110 100644
--- a/iptables/nft.c
+++ b/iptables/nft.c
@@ -703,31 +703,25 @@ static void nft_chain_builtin_add(struct nft_handle *h,
 	nftnl_chain_list_add_tail(c, h->cache->table[table->type].chains);
 }
 
-static const struct builtin_table *
-__nft_table_builtin_find(const struct builtin_table *tables, const char *table)
+/* find if built-in table already exists */
+const struct builtin_table *
+nft_table_builtin_find(struct nft_handle *h, const char *table)
 {
 	int i;
 	bool found = false;
 
 	for (i = 0; i < NFT_TABLE_MAX; i++) {
-		if (tables[i].name == NULL)
+		if (h->tables[i].name == NULL)
 			continue;
 
-		if (strcmp(tables[i].name, table) != 0)
+		if (strcmp(h->tables[i].name, table) != 0)
 			continue;
 
 		found = true;
 		break;
 	}
 
-	return found ? &tables[i] : NULL;
-}
-
-/* find if built-in table already exists */
-const struct builtin_table *
-nft_table_builtin_find(struct nft_handle *h, const char *table)
-{
-	return __nft_table_builtin_find(h->tables, table);
+	return found ? &h->tables[i] : NULL;
 }
 
 /* find if built-in chain already exists */
@@ -857,14 +851,14 @@ static int __flush_chain_cache(struct nftnl_chain *c, void *data)
 	return 0;
 }
 
-static int flush_cache(struct nft_cache *c, const struct builtin_table *tables,
+static int flush_cache(struct nft_handle *h, struct nft_cache *c,
 		       const char *tablename)
 {
 	const struct builtin_table *table;
 	int i;
 
 	if (tablename) {
-		table = __nft_table_builtin_find(tables, tablename);
+		table = nft_table_builtin_find(h, tablename);
 		if (!table || !c->table[table->type].chains)
 			return 0;
 		nftnl_chain_list_foreach(c->table[table->type].chains,
@@ -873,7 +867,7 @@ static int flush_cache(struct nft_cache *c, const struct builtin_table *tables,
 	}
 
 	for (i = 0; i < NFT_TABLE_MAX; i++) {
-		if (tables[i].name == NULL)
+		if (h->tables[i].name == NULL)
 			continue;
 
 		if (!c->table[i].chains)
@@ -893,7 +887,7 @@ static void flush_chain_cache(struct nft_handle *h, const char *tablename)
 	if (!h->have_cache)
 		return;
 
-	if (flush_cache(h->cache, h->tables, tablename))
+	if (flush_cache(h, h->cache, tablename))
 		h->have_cache = false;
 }
 
@@ -1655,7 +1649,7 @@ static void nft_rebuild_cache(struct nft_handle *h)
 static void nft_release_cache(struct nft_handle *h)
 {
 	if (h->cache_index)
-		flush_cache(&h->__cache[0], h->tables, NULL);
+		flush_cache(h, &h->__cache[0], NULL);
 }
 
 struct nftnl_chain_list *nft_chain_list_get(struct nft_handle *h,
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 02/11] nft: Avoid nested cache fetching
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 01/11] nft: Pass nft_handle to flush_cache() Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  2019-10-09  9:30   ` Pablo Neira Ayuso
  2019-10-08 16:14 ` [iptables PATCH v3 03/11] nft: Extract cache routines into nft-cache.c Phil Sutter
                   ` (8 subsequent siblings)
  10 siblings, 1 reply; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Don't call fetch_table_cache() from within fetch_chain_cache() but
instead from __nft_build_cache(). Since that is the only caller of
fetch_chain_cache(), this change should not have any effect in practice.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/nft.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/iptables/nft.c b/iptables/nft.c
index bdc9fbc37f110..3228842cd3c8b 100644
--- a/iptables/nft.c
+++ b/iptables/nft.c
@@ -1414,8 +1414,6 @@ static int fetch_chain_cache(struct nft_handle *h)
 	struct nlmsghdr *nlh;
 	int i, ret;
 
-	fetch_table_cache(h);
-
 	for (i = 0; i < NFT_TABLE_MAX; i++) {
 		enum nft_table_type type = h->tables[i].type;
 
@@ -1592,6 +1590,7 @@ static void __nft_build_cache(struct nft_handle *h)
 
 retry:
 	mnl_genid_get(h, &genid_start);
+	fetch_table_cache(h);
 	fetch_chain_cache(h);
 	fetch_rule_cache(h);
 	h->have_cache = true;
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 03/11] nft: Extract cache routines into nft-cache.c
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 01/11] nft: Pass nft_handle to flush_cache() Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 02/11] nft: Avoid nested cache fetching Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  2019-10-09  9:32   ` Pablo Neira Ayuso
  2019-10-08 16:14 ` [iptables PATCH v3 04/11] nft-cache: Introduce cache levels Phil Sutter
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

The amount of code dealing with caching only is considerable and hence
deserves an own source file.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/Makefile.am       |   2 +-
 iptables/nft-cache.c       | 376 +++++++++++++++++++++++++++++++++++++
 iptables/nft-cache.h       |  17 ++
 iptables/nft.c             | 360 +----------------------------------
 iptables/nft.h             |   8 +-
 iptables/xtables-restore.c |   1 +
 iptables/xtables-save.c    |   1 +
 7 files changed, 404 insertions(+), 361 deletions(-)
 create mode 100644 iptables/nft-cache.c
 create mode 100644 iptables/nft-cache.h

diff --git a/iptables/Makefile.am b/iptables/Makefile.am
index 21ac7f08b7c1f..fc834e0f7b89e 100644
--- a/iptables/Makefile.am
+++ b/iptables/Makefile.am
@@ -36,7 +36,7 @@ xtables_nft_multi_CFLAGS  += -DENABLE_NFTABLES -DENABLE_IPV4 -DENABLE_IPV6
 xtables_nft_multi_SOURCES += xtables-save.c xtables-restore.c \
 				xtables-standalone.c xtables.c nft.c \
 				nft-shared.c nft-ipv4.c nft-ipv6.c nft-arp.c \
-				xtables-monitor.c \
+				xtables-monitor.c nft-cache.c \
 				xtables-arp-standalone.c xtables-arp.c \
 				nft-bridge.c \
 				xtables-eb-standalone.c xtables-eb.c \
diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
new file mode 100644
index 0000000000000..5444419a5cc3b
--- /dev/null
+++ b/iptables/nft-cache.c
@@ -0,0 +1,376 @@
+/*
+ * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This code has been sponsored by Sophos Astaro <http://www.sophos.com>
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <xtables.h>
+
+#include <linux/netfilter/nf_tables.h>
+
+#include <libmnl/libmnl.h>
+#include <libnftnl/gen.h>
+#include <libnftnl/table.h>
+
+#include "nft.h"
+#include "nft-cache.h"
+
+static int genid_cb(const struct nlmsghdr *nlh, void *data)
+{
+	uint32_t *genid = data;
+	struct nftnl_gen *gen;
+
+	gen = nftnl_gen_alloc();
+	if (!gen)
+		return MNL_CB_ERROR;
+
+	if (nftnl_gen_nlmsg_parse(nlh, gen) < 0)
+		goto out;
+
+	*genid = nftnl_gen_get_u32(gen, NFTNL_GEN_ID);
+
+	nftnl_gen_free(gen);
+	return MNL_CB_STOP;
+out:
+	nftnl_gen_free(gen);
+	return MNL_CB_ERROR;
+}
+
+static void mnl_genid_get(struct nft_handle *h, uint32_t *genid)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh;
+	int ret;
+
+	nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETGEN, 0, 0, h->seq);
+	ret = mnl_talk(h, nlh, genid_cb, genid);
+	if (ret == 0)
+		return;
+
+	xtables_error(RESOURCE_PROBLEM,
+		      "Could not fetch rule set generation id: %s\n", nft_strerror(errno));
+}
+
+static int nftnl_table_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct nftnl_table *t;
+	struct nftnl_table_list *list = data;
+
+	t = nftnl_table_alloc();
+	if (t == NULL)
+		goto err;
+
+	if (nftnl_table_nlmsg_parse(nlh, t) < 0)
+		goto out;
+
+	nftnl_table_list_add_tail(t, list);
+
+	return MNL_CB_OK;
+out:
+	nftnl_table_free(t);
+err:
+	return MNL_CB_OK;
+}
+
+static int fetch_table_cache(struct nft_handle *h)
+{
+	char buf[16536];
+	struct nlmsghdr *nlh;
+	struct nftnl_table_list *list;
+	int ret;
+
+	list = nftnl_table_list_alloc();
+	if (list == NULL)
+		return 0;
+
+	nlh = nftnl_rule_nlmsg_build_hdr(buf, NFT_MSG_GETTABLE, h->family,
+					NLM_F_DUMP, h->seq);
+
+	ret = mnl_talk(h, nlh, nftnl_table_list_cb, list);
+	if (ret < 0 && errno == EINTR)
+		assert(nft_restart(h) >= 0);
+
+	h->cache->tables = list;
+
+	return 1;
+}
+
+static int nftnl_chain_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct nft_handle *h = data;
+	const struct builtin_table *t;
+	struct nftnl_chain *c;
+
+	c = nftnl_chain_alloc();
+	if (c == NULL)
+		goto err;
+
+	if (nftnl_chain_nlmsg_parse(nlh, c) < 0)
+		goto out;
+
+	t = nft_table_builtin_find(h,
+			nftnl_chain_get_str(c, NFTNL_CHAIN_TABLE));
+	if (!t)
+		goto out;
+
+	nftnl_chain_list_add_tail(c, h->cache->table[t->type].chains);
+
+	return MNL_CB_OK;
+out:
+	nftnl_chain_free(c);
+err:
+	return MNL_CB_OK;
+}
+
+static int fetch_chain_cache(struct nft_handle *h)
+{
+	char buf[16536];
+	struct nlmsghdr *nlh;
+	int i, ret;
+
+	for (i = 0; i < NFT_TABLE_MAX; i++) {
+		enum nft_table_type type = h->tables[i].type;
+
+		if (!h->tables[i].name)
+			continue;
+
+		h->cache->table[type].chains = nftnl_chain_list_alloc();
+		if (!h->cache->table[type].chains)
+			return -1;
+	}
+
+	nlh = nftnl_chain_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN, h->family,
+					NLM_F_DUMP, h->seq);
+
+	ret = mnl_talk(h, nlh, nftnl_chain_list_cb, h);
+	if (ret < 0 && errno == EINTR)
+		assert(nft_restart(h) >= 0);
+
+	return ret;
+}
+
+static int nftnl_rule_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct nftnl_chain *c = data;
+	struct nftnl_rule *r;
+
+	r = nftnl_rule_alloc();
+	if (r == NULL)
+		return MNL_CB_OK;
+
+	if (nftnl_rule_nlmsg_parse(nlh, r) < 0) {
+		nftnl_rule_free(r);
+		return MNL_CB_OK;
+	}
+
+	nftnl_chain_rule_add_tail(r, c);
+	return MNL_CB_OK;
+}
+
+static int nft_rule_list_update(struct nftnl_chain *c, void *data)
+{
+	struct nft_handle *h = data;
+	char buf[16536];
+	struct nlmsghdr *nlh;
+	struct nftnl_rule *rule;
+	int ret;
+
+	rule = nftnl_rule_alloc();
+	if (!rule)
+		return -1;
+
+	nftnl_rule_set_str(rule, NFTNL_RULE_TABLE,
+			   nftnl_chain_get_str(c, NFTNL_CHAIN_TABLE));
+	nftnl_rule_set_str(rule, NFTNL_RULE_CHAIN,
+			   nftnl_chain_get_str(c, NFTNL_CHAIN_NAME));
+
+	nlh = nftnl_rule_nlmsg_build_hdr(buf, NFT_MSG_GETRULE, h->family,
+					NLM_F_DUMP, h->seq);
+	nftnl_rule_nlmsg_build_payload(nlh, rule);
+
+	ret = mnl_talk(h, nlh, nftnl_rule_list_cb, c);
+	if (ret < 0 && errno == EINTR)
+		assert(nft_restart(h) >= 0);
+
+	nftnl_rule_free(rule);
+
+	if (h->family == NFPROTO_BRIDGE)
+		nft_bridge_chain_postprocess(h, c);
+
+	return 0;
+}
+
+static int fetch_rule_cache(struct nft_handle *h)
+{
+	int i;
+
+	for (i = 0; i < NFT_TABLE_MAX; i++) {
+		enum nft_table_type type = h->tables[i].type;
+
+		if (!h->tables[i].name)
+			continue;
+
+		if (nftnl_chain_list_foreach(h->cache->table[type].chains,
+					     nft_rule_list_update, h))
+			return -1;
+	}
+	return 0;
+}
+
+static void __nft_build_cache(struct nft_handle *h)
+{
+	uint32_t genid_start, genid_stop;
+
+retry:
+	mnl_genid_get(h, &genid_start);
+	fetch_table_cache(h);
+	fetch_chain_cache(h);
+	fetch_rule_cache(h);
+	h->have_cache = true;
+	mnl_genid_get(h, &genid_stop);
+
+	if (genid_start != genid_stop) {
+		flush_chain_cache(h, NULL);
+		goto retry;
+	}
+
+	h->nft_genid = genid_start;
+}
+
+void nft_build_cache(struct nft_handle *h)
+{
+	if (!h->have_cache)
+		__nft_build_cache(h);
+}
+
+void nft_fake_cache(struct nft_handle *h)
+{
+	int i;
+
+	fetch_table_cache(h);
+	for (i = 0; i < NFT_TABLE_MAX; i++) {
+		enum nft_table_type type = h->tables[i].type;
+
+		if (!h->tables[i].name)
+			continue;
+
+		h->cache->table[type].chains = nftnl_chain_list_alloc();
+	}
+	h->have_cache = true;
+	mnl_genid_get(h, &h->nft_genid);
+}
+
+static void __nft_flush_cache(struct nft_handle *h)
+{
+	if (!h->cache_index) {
+		h->cache_index++;
+		h->cache = &h->__cache[h->cache_index];
+	} else {
+		flush_chain_cache(h, NULL);
+	}
+}
+
+static int __flush_rule_cache(struct nftnl_rule *r, void *data)
+{
+	nftnl_rule_list_del(r);
+	nftnl_rule_free(r);
+
+	return 0;
+}
+
+void flush_rule_cache(struct nftnl_chain *c)
+{
+	nftnl_rule_foreach(c, __flush_rule_cache, NULL);
+}
+
+static int __flush_chain_cache(struct nftnl_chain *c, void *data)
+{
+	nftnl_chain_list_del(c);
+	nftnl_chain_free(c);
+
+	return 0;
+}
+
+static int flush_cache(struct nft_handle *h, struct nft_cache *c,
+		       const char *tablename)
+{
+	const struct builtin_table *table;
+	int i;
+
+	if (tablename) {
+		table = nft_table_builtin_find(h, tablename);
+		if (!table || !c->table[table->type].chains)
+			return 0;
+		nftnl_chain_list_foreach(c->table[table->type].chains,
+					 __flush_chain_cache, NULL);
+		return 0;
+	}
+
+	for (i = 0; i < NFT_TABLE_MAX; i++) {
+		if (h->tables[i].name == NULL)
+			continue;
+
+		if (!c->table[i].chains)
+			continue;
+
+		nftnl_chain_list_free(c->table[i].chains);
+		c->table[i].chains = NULL;
+	}
+	nftnl_table_list_free(c->tables);
+	c->tables = NULL;
+
+	return 1;
+}
+
+void flush_chain_cache(struct nft_handle *h, const char *tablename)
+{
+	if (!h->have_cache)
+		return;
+
+	if (flush_cache(h, h->cache, tablename))
+		h->have_cache = false;
+}
+
+void nft_rebuild_cache(struct nft_handle *h)
+{
+	if (h->have_cache)
+		__nft_flush_cache(h);
+
+	__nft_build_cache(h);
+}
+
+void nft_release_cache(struct nft_handle *h)
+{
+	if (h->cache_index)
+		flush_cache(h, &h->__cache[0], NULL);
+}
+
+struct nftnl_table_list *nftnl_table_list_get(struct nft_handle *h)
+{
+	if (!h->cache->tables)
+		fetch_table_cache(h);
+
+	return h->cache->tables;
+}
+
+struct nftnl_chain_list *nft_chain_list_get(struct nft_handle *h,
+					    const char *table)
+{
+	const struct builtin_table *t;
+
+	t = nft_table_builtin_find(h, table);
+	if (!t)
+		return NULL;
+
+	nft_build_cache(h);
+
+	return h->cache->table[t->type].chains;
+}
+
diff --git a/iptables/nft-cache.h b/iptables/nft-cache.h
new file mode 100644
index 0000000000000..423c6516de9bb
--- /dev/null
+++ b/iptables/nft-cache.h
@@ -0,0 +1,17 @@
+#ifndef _NFT_CACHE_H_
+#define _NFT_CACHE_H_
+
+struct nft_handle;
+
+void nft_fake_cache(struct nft_handle *h);
+void nft_build_cache(struct nft_handle *h);
+void nft_rebuild_cache(struct nft_handle *h);
+void nft_release_cache(struct nft_handle *h);
+void flush_chain_cache(struct nft_handle *h, const char *tablename);
+void flush_rule_cache(struct nftnl_chain *c);
+
+struct nftnl_chain_list *nft_chain_list_get(struct nft_handle *h,
+					    const char *table);
+struct nftnl_table_list *nftnl_table_list_get(struct nft_handle *h);
+
+#endif /* _NFT_CACHE_H_ */
diff --git a/iptables/nft.c b/iptables/nft.c
index 3228842cd3c8b..81de10d8a0892 100644
--- a/iptables/nft.c
+++ b/iptables/nft.c
@@ -55,47 +55,12 @@
 
 #include "nft.h"
 #include "xshared.h" /* proto_to_name */
+#include "nft-cache.h"
 #include "nft-shared.h"
 #include "nft-bridge.h" /* EBT_NOPROTO */
 
 static void *nft_fn;
 
-static int genid_cb(const struct nlmsghdr *nlh, void *data)
-{
-	uint32_t *genid = data;
-	struct nftnl_gen *gen;
-
-	gen = nftnl_gen_alloc();
-	if (!gen)
-		return MNL_CB_ERROR;
-
-	if (nftnl_gen_nlmsg_parse(nlh, gen) < 0)
-		goto out;
-
-	*genid = nftnl_gen_get_u32(gen, NFTNL_GEN_ID);
-
-	nftnl_gen_free(gen);
-	return MNL_CB_STOP;
-out:
-	nftnl_gen_free(gen);
-	return MNL_CB_ERROR;
-}
-
-static void mnl_genid_get(struct nft_handle *h, uint32_t *genid)
-{
-	char buf[MNL_SOCKET_BUFFER_SIZE];
-	struct nlmsghdr *nlh;
-	int ret;
-
-	nlh = nftnl_nlmsg_build_hdr(buf, NFT_MSG_GETGEN, 0, 0, h->seq);
-	ret = mnl_talk(h, nlh, genid_cb, genid);
-	if (ret == 0)
-		return;
-
-	xtables_error(RESOURCE_PROBLEM,
-		      "Could not fetch rule set generation id: %s\n", nft_strerror(errno));
-}
-
 int mnl_talk(struct nft_handle *h, struct nlmsghdr *nlh,
 	     int (*cb)(const struct nlmsghdr *nlh, void *data),
 	     void *data)
@@ -791,7 +756,7 @@ static bool nft_chain_builtin(struct nftnl_chain *c)
 	return nftnl_chain_get(c, NFTNL_CHAIN_HOOKNUM) != NULL;
 }
 
-static int nft_restart(struct nft_handle *h)
+int nft_restart(struct nft_handle *h)
 {
 	mnl_socket_close(h->nl);
 
@@ -830,67 +795,6 @@ int nft_init(struct nft_handle *h, const struct builtin_table *t)
 	return 0;
 }
 
-static int __flush_rule_cache(struct nftnl_rule *r, void *data)
-{
-	nftnl_rule_list_del(r);
-	nftnl_rule_free(r);
-
-	return 0;
-}
-
-static void flush_rule_cache(struct nftnl_chain *c)
-{
-	nftnl_rule_foreach(c, __flush_rule_cache, NULL);
-}
-
-static int __flush_chain_cache(struct nftnl_chain *c, void *data)
-{
-	nftnl_chain_list_del(c);
-	nftnl_chain_free(c);
-
-	return 0;
-}
-
-static int flush_cache(struct nft_handle *h, struct nft_cache *c,
-		       const char *tablename)
-{
-	const struct builtin_table *table;
-	int i;
-
-	if (tablename) {
-		table = nft_table_builtin_find(h, tablename);
-		if (!table || !c->table[table->type].chains)
-			return 0;
-		nftnl_chain_list_foreach(c->table[table->type].chains,
-					 __flush_chain_cache, NULL);
-		return 0;
-	}
-
-	for (i = 0; i < NFT_TABLE_MAX; i++) {
-		if (h->tables[i].name == NULL)
-			continue;
-
-		if (!c->table[i].chains)
-			continue;
-
-		nftnl_chain_list_free(c->table[i].chains);
-		c->table[i].chains = NULL;
-	}
-	nftnl_table_list_free(c->tables);
-	c->tables = NULL;
-
-	return 1;
-}
-
-static void flush_chain_cache(struct nft_handle *h, const char *tablename)
-{
-	if (!h->have_cache)
-		return;
-
-	if (flush_cache(h, h->cache, tablename))
-		h->have_cache = false;
-}
-
 void nft_fini(struct nft_handle *h)
 {
 	flush_chain_cache(h, NULL);
@@ -1337,104 +1241,6 @@ nft_rule_print_save(const struct nftnl_rule *r, enum nft_rule_print type,
 		ops->clear_cs(&cs);
 }
 
-static int nftnl_chain_list_cb(const struct nlmsghdr *nlh, void *data)
-{
-	struct nft_handle *h = data;
-	const struct builtin_table *t;
-	struct nftnl_chain *c;
-
-	c = nftnl_chain_alloc();
-	if (c == NULL)
-		goto err;
-
-	if (nftnl_chain_nlmsg_parse(nlh, c) < 0)
-		goto out;
-
-	t = nft_table_builtin_find(h,
-			nftnl_chain_get_str(c, NFTNL_CHAIN_TABLE));
-	if (!t)
-		goto out;
-
-	nftnl_chain_list_add_tail(c, h->cache->table[t->type].chains);
-
-	return MNL_CB_OK;
-out:
-	nftnl_chain_free(c);
-err:
-	return MNL_CB_OK;
-}
-
-static int nftnl_table_list_cb(const struct nlmsghdr *nlh, void *data)
-{
-	struct nftnl_table *t;
-	struct nftnl_table_list *list = data;
-
-	t = nftnl_table_alloc();
-	if (t == NULL)
-		goto err;
-
-	if (nftnl_table_nlmsg_parse(nlh, t) < 0)
-		goto out;
-
-	nftnl_table_list_add_tail(t, list);
-
-	return MNL_CB_OK;
-out:
-	nftnl_table_free(t);
-err:
-	return MNL_CB_OK;
-}
-
-static int fetch_table_cache(struct nft_handle *h)
-{
-	char buf[16536];
-	struct nlmsghdr *nlh;
-	struct nftnl_table_list *list;
-	int ret;
-
-	list = nftnl_table_list_alloc();
-	if (list == NULL)
-		return 0;
-
-	nlh = nftnl_rule_nlmsg_build_hdr(buf, NFT_MSG_GETTABLE, h->family,
-					NLM_F_DUMP, h->seq);
-
-	ret = mnl_talk(h, nlh, nftnl_table_list_cb, list);
-	if (ret < 0 && errno == EINTR)
-		assert(nft_restart(h) >= 0);
-
-	h->cache->tables = list;
-
-	return 1;
-}
-
-static int fetch_chain_cache(struct nft_handle *h)
-{
-	char buf[16536];
-	struct nlmsghdr *nlh;
-	int i, ret;
-
-	for (i = 0; i < NFT_TABLE_MAX; i++) {
-		enum nft_table_type type = h->tables[i].type;
-
-		if (!h->tables[i].name)
-			continue;
-
-		h->cache->table[type].chains = nftnl_chain_list_alloc();
-		if (!h->cache->table[type].chains)
-			return -1;
-	}
-
-	nlh = nftnl_chain_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN, h->family,
-					NLM_F_DUMP, h->seq);
-
-	ret = mnl_talk(h, nlh, nftnl_chain_list_cb, h);
-	if (ret < 0 && errno == EINTR)
-		assert(nft_restart(h) >= 0);
-
-	return ret;
-}
-
 static bool nft_rule_is_policy_rule(struct nftnl_rule *r)
 {
 	const struct nftnl_udata *tb[UDATA_TYPE_MAX + 1] = {};
@@ -1473,8 +1279,8 @@ static struct nftnl_rule *nft_chain_last_rule(struct nftnl_chain *c)
 	return last;
 }
 
-static void nft_bridge_chain_postprocess(struct nft_handle *h,
-					 struct nftnl_chain *c)
+void nft_bridge_chain_postprocess(struct nft_handle *h,
+				  struct nftnl_chain *c)
 {
 	struct nftnl_rule *last = nft_chain_last_rule(c);
 	struct nftnl_expr_iter *iter;
@@ -1515,156 +1321,6 @@ static void nft_bridge_chain_postprocess(struct nft_handle *h,
 out_iter:
 	nftnl_expr_iter_destroy(iter);
 }
-
-static int nftnl_rule_list_cb(const struct nlmsghdr *nlh, void *data)
-{
-	struct nftnl_chain *c = data;
-	struct nftnl_rule *r;
-
-	r = nftnl_rule_alloc();
-	if (r == NULL)
-		return MNL_CB_OK;
-
-	if (nftnl_rule_nlmsg_parse(nlh, r) < 0) {
-		nftnl_rule_free(r);
-		return MNL_CB_OK;
-	}
-
-	nftnl_chain_rule_add_tail(r, c);
-	return MNL_CB_OK;
-}
-
-static int nft_rule_list_update(struct nftnl_chain *c, void *data)
-{
-	struct nft_handle *h = data;
-	char buf[16536];
-	struct nlmsghdr *nlh;
-	struct nftnl_rule *rule;
-	int ret;
-
-	rule = nftnl_rule_alloc();
-	if (!rule)
-		return -1;
-
-	nftnl_rule_set_str(rule, NFTNL_RULE_TABLE,
-			   nftnl_chain_get_str(c, NFTNL_CHAIN_TABLE));
-	nftnl_rule_set_str(rule, NFTNL_RULE_CHAIN,
-			   nftnl_chain_get_str(c, NFTNL_CHAIN_NAME));
-
-	nlh = nftnl_rule_nlmsg_build_hdr(buf, NFT_MSG_GETRULE, h->family,
-					NLM_F_DUMP, h->seq);
-	nftnl_rule_nlmsg_build_payload(nlh, rule);
-
-	ret = mnl_talk(h, nlh, nftnl_rule_list_cb, c);
-	if (ret < 0 && errno == EINTR)
-		assert(nft_restart(h) >= 0);
-
-	nftnl_rule_free(rule);
-
-	if (h->family == NFPROTO_BRIDGE)
-		nft_bridge_chain_postprocess(h, c);
-
-	return 0;
-}
-
-static int fetch_rule_cache(struct nft_handle *h)
-{
-	int i;
-
-	for (i = 0; i < NFT_TABLE_MAX; i++) {
-		enum nft_table_type type = h->tables[i].type;
-
-		if (!h->tables[i].name)
-			continue;
-
-		if (nftnl_chain_list_foreach(h->cache->table[type].chains,
-					     nft_rule_list_update, h))
-			return -1;
-	}
-	return 0;
-}
-
-static void __nft_build_cache(struct nft_handle *h)
-{
-	uint32_t genid_start, genid_stop;
-
-retry:
-	mnl_genid_get(h, &genid_start);
-	fetch_table_cache(h);
-	fetch_chain_cache(h);
-	fetch_rule_cache(h);
-	h->have_cache = true;
-	mnl_genid_get(h, &genid_stop);
-
-	if (genid_start != genid_stop) {
-		flush_chain_cache(h, NULL);
-		goto retry;
-	}
-
-	h->nft_genid = genid_start;
-}
-
-void nft_fake_cache(struct nft_handle *h)
-{
-	int i;
-
-	fetch_table_cache(h);
-	for (i = 0; i < NFT_TABLE_MAX; i++) {
-		enum nft_table_type type = h->tables[i].type;
-
-		if (!h->tables[i].name)
-			continue;
-
-		h->cache->table[type].chains = nftnl_chain_list_alloc();
-	}
-	h->have_cache = true;
-	mnl_genid_get(h, &h->nft_genid);
-}
-
-void nft_build_cache(struct nft_handle *h)
-{
-	if (!h->have_cache)
-		__nft_build_cache(h);
-}
-
-static void __nft_flush_cache(struct nft_handle *h)
-{
-	if (!h->cache_index) {
-		h->cache_index++;
-		h->cache = &h->__cache[h->cache_index];
-	} else {
-		flush_chain_cache(h, NULL);
-	}
-}
-
-static void nft_rebuild_cache(struct nft_handle *h)
-{
-	if (h->have_cache)
-		__nft_flush_cache(h);
-
-	__nft_build_cache(h);
-}
-
-static void nft_release_cache(struct nft_handle *h)
-{
-	if (h->cache_index)
-		flush_cache(h, &h->__cache[0], NULL);
-}
-
-struct nftnl_chain_list *nft_chain_list_get(struct nft_handle *h,
-					    const char *table)
-{
-	const struct builtin_table *t;
-
-	t = nft_table_builtin_find(h, table);
-	if (!t)
-		return NULL;
-
-	nft_build_cache(h);
-
-	return h->cache->table[t->type].chains;
-}
-
 static const char *policy_name[NF_ACCEPT+1] = {
 	[NF_DROP] = "DROP",
 	[NF_ACCEPT] = "ACCEPT",
@@ -2054,14 +1710,6 @@ int nft_chain_user_rename(struct nft_handle *h,const char *chain,
 	return ret == 0 ? 1 : 0;
 }
 
-static struct nftnl_table_list *nftnl_table_list_get(struct nft_handle *h)
-{
-	if (!h->cache->tables)
-		fetch_table_cache(h);
-
-	return h->cache->tables;
-}
-
 bool nft_table_find(struct nft_handle *h, const char *tablename)
 {
 	struct nftnl_table_list_iter *iter;
diff --git a/iptables/nft.h b/iptables/nft.h
index bcac8e228c787..451c266016d8d 100644
--- a/iptables/nft.h
+++ b/iptables/nft.h
@@ -73,8 +73,7 @@ int mnl_talk(struct nft_handle *h, struct nlmsghdr *nlh,
 	     void *data);
 int nft_init(struct nft_handle *h, const struct builtin_table *t);
 void nft_fini(struct nft_handle *h);
-void nft_fake_cache(struct nft_handle *h);
-void nft_build_cache(struct nft_handle *h);
+int nft_restart(struct nft_handle *h);
 
 /*
  * Operations with tables.
@@ -95,8 +94,6 @@ const struct builtin_table *nft_table_builtin_find(struct nft_handle *h, const c
 struct nftnl_chain;
 
 int nft_chain_set(struct nft_handle *h, const char *table, const char *chain, const char *policy, const struct xt_counters *counters);
-struct nftnl_chain_list *nft_chain_list_get(struct nft_handle *h,
-					    const char *table);
 int nft_chain_save(struct nft_handle *h, struct nftnl_chain_list *list);
 int nft_chain_user_add(struct nft_handle *h, const char *chain, const char *table);
 int nft_chain_user_del(struct nft_handle *h, const char *chain, const char *table, bool verbose);
@@ -105,6 +102,9 @@ int nft_chain_user_rename(struct nft_handle *h, const char *chain, const char *t
 int nft_chain_zero_counters(struct nft_handle *h, const char *chain, const char *table, bool verbose);
 const struct builtin_chain *nft_chain_builtin_find(const struct builtin_table *t, const char *chain);
 bool nft_chain_exists(struct nft_handle *h, const char *table, const char *chain);
+void nft_bridge_chain_postprocess(struct nft_handle *h,
+				  struct nftnl_chain *c);
+
 
 /*
  * Operations with rule-set.
diff --git a/iptables/xtables-restore.c b/iptables/xtables-restore.c
index 7f28a05c68619..7641955d1ce53 100644
--- a/iptables/xtables-restore.c
+++ b/iptables/xtables-restore.c
@@ -18,6 +18,7 @@
 #include "xtables-multi.h"
 #include "nft.h"
 #include "nft-bridge.h"
+#include "nft-cache.h"
 #include <libnftnl/chain.h>
 
 static int counters, verbose;
diff --git a/iptables/xtables-save.c b/iptables/xtables-save.c
index 77b13f7ffbcdd..3741888f9af44 100644
--- a/iptables/xtables-save.c
+++ b/iptables/xtables-save.c
@@ -21,6 +21,7 @@
 #include "iptables.h"
 #include "xtables-multi.h"
 #include "nft.h"
+#include "nft-cache.h"
 
 #include <libnftnl/chain.h>
 
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 04/11] nft-cache: Introduce cache levels
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
                   ` (2 preceding siblings ...)
  2019-10-08 16:14 ` [iptables PATCH v3 03/11] nft: Extract cache routines into nft-cache.c Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  2019-10-09  9:37   ` Pablo Neira Ayuso
  2019-10-08 16:14 ` [iptables PATCH v3 05/11] nft-cache: Fetch only chains in nft_chain_list_get() Phil Sutter
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Replace the simple have_cache boolean by a cache level indicator
defining how complete the cache is. Since have_cache indicated full
cache (including rules), make code depending on it check for cache level
NFT_CL_RULES.

Core cache fetching routine __nft_build_cache() accepts a new level via
parameter and raises cache completeness to that level.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/nft-cache.c | 51 +++++++++++++++++++++++++++++++-------------
 iptables/nft.h       |  9 +++++++-
 2 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
index 5444419a5cc3b..22a87e94efd76 100644
--- a/iptables/nft-cache.c
+++ b/iptables/nft-cache.c
@@ -224,30 +224,49 @@ static int fetch_rule_cache(struct nft_handle *h)
 	return 0;
 }
 
-static void __nft_build_cache(struct nft_handle *h)
+static void __nft_build_cache(struct nft_handle *h, enum nft_cache_level level)
 {
 	uint32_t genid_start, genid_stop;
 
+	if (level <= h->cache_level)
+		return;
 retry:
 	mnl_genid_get(h, &genid_start);
-	fetch_table_cache(h);
-	fetch_chain_cache(h);
-	fetch_rule_cache(h);
-	h->have_cache = true;
-	mnl_genid_get(h, &genid_stop);
 
+	switch (h->cache_level) {
+	case NFT_CL_NONE:
+		fetch_table_cache(h);
+		if (level == NFT_CL_TABLES)
+			break;
+		/* fall through */
+	case NFT_CL_TABLES:
+		fetch_chain_cache(h);
+		if (level == NFT_CL_CHAINS)
+			break;
+		/* fall through */
+	case NFT_CL_CHAINS:
+		fetch_rule_cache(h);
+		if (level == NFT_CL_RULES)
+			break;
+		/* fall through */
+	case NFT_CL_RULES:
+		break;
+	}
+
+	mnl_genid_get(h, &genid_stop);
 	if (genid_start != genid_stop) {
 		flush_chain_cache(h, NULL);
 		goto retry;
 	}
 
+	h->cache_level = level;
 	h->nft_genid = genid_start;
 }
 
 void nft_build_cache(struct nft_handle *h)
 {
-	if (!h->have_cache)
-		__nft_build_cache(h);
+	if (h->cache_level < NFT_CL_RULES)
+		__nft_build_cache(h, NFT_CL_RULES);
 }
 
 void nft_fake_cache(struct nft_handle *h)
@@ -263,7 +282,7 @@ void nft_fake_cache(struct nft_handle *h)
 
 		h->cache->table[type].chains = nftnl_chain_list_alloc();
 	}
-	h->have_cache = true;
+	h->cache_level = NFT_CL_RULES;
 	mnl_genid_get(h, &h->nft_genid);
 }
 
@@ -331,19 +350,22 @@ static int flush_cache(struct nft_handle *h, struct nft_cache *c,
 
 void flush_chain_cache(struct nft_handle *h, const char *tablename)
 {
-	if (!h->have_cache)
+	if (!h->cache_level)
 		return;
 
 	if (flush_cache(h, h->cache, tablename))
-		h->have_cache = false;
+		h->cache_level = NFT_CL_NONE;
 }
 
 void nft_rebuild_cache(struct nft_handle *h)
 {
-	if (h->have_cache)
+	enum nft_cache_level level = h->cache_level;
+
+	if (h->cache_level)
 		__nft_flush_cache(h);
 
-	__nft_build_cache(h);
+	h->cache_level = NFT_CL_NONE;
+	__nft_build_cache(h, level);
 }
 
 void nft_release_cache(struct nft_handle *h)
@@ -354,8 +376,7 @@ void nft_release_cache(struct nft_handle *h)
 
 struct nftnl_table_list *nftnl_table_list_get(struct nft_handle *h)
 {
-	if (!h->cache->tables)
-		fetch_table_cache(h);
+	__nft_build_cache(h, NFT_CL_TABLES);
 
 	return h->cache->tables;
 }
diff --git a/iptables/nft.h b/iptables/nft.h
index 451c266016d8d..9ae3122a1c515 100644
--- a/iptables/nft.h
+++ b/iptables/nft.h
@@ -27,6 +27,13 @@ struct builtin_table {
 	struct builtin_chain chains[NF_INET_NUMHOOKS];
 };
 
+enum nft_cache_level {
+	NFT_CL_NONE,
+	NFT_CL_TABLES,
+	NFT_CL_CHAINS,
+	NFT_CL_RULES
+};
+
 struct nft_cache {
 	struct nftnl_table_list		*tables;
 	struct {
@@ -53,7 +60,7 @@ struct nft_handle {
 	unsigned int		cache_index;
 	struct nft_cache	__cache[2];
 	struct nft_cache	*cache;
-	bool			have_cache;
+	enum nft_cache_level	cache_level;
 	bool			restore;
 	bool			noflush;
 	int8_t			config_done;
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 05/11] nft-cache: Fetch only chains in nft_chain_list_get()
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
                   ` (3 preceding siblings ...)
  2019-10-08 16:14 ` [iptables PATCH v3 04/11] nft-cache: Introduce cache levels Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 06/11] nft-cache: Cover for multiple fetcher invocation Phil Sutter
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

The function is used to return the given table's chains, so fetching
chain cache is enough.

Add calls to nft_build_cache() in places where a rule cache is required.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/nft-cache.c |  2 +-
 iptables/nft.c       | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
index 22a87e94efd76..5f27aec6d9c30 100644
--- a/iptables/nft-cache.c
+++ b/iptables/nft-cache.c
@@ -390,7 +390,7 @@ struct nftnl_chain_list *nft_chain_list_get(struct nft_handle *h,
 	if (!t)
 		return NULL;
 
-	nft_build_cache(h);
+	__nft_build_cache(h, NFT_CL_CHAINS);
 
 	return h->cache->table[t->type].chains;
 }
diff --git a/iptables/nft.c b/iptables/nft.c
index 81de10d8a0892..94fabd78e527e 100644
--- a/iptables/nft.c
+++ b/iptables/nft.c
@@ -1173,6 +1173,14 @@ nft_rule_append(struct nft_handle *h, const char *chain, const char *table,
 
 	nft_xt_builtin_init(h, table);
 
+	/* Since ebtables user-defined chain policies are implemented as last
+	 * rule in nftables, rule cache is required here to treat them right. */
+	if (h->family == NFPROTO_BRIDGE) {
+		c = nft_chain_find(h, table, chain);
+		if (c && !nft_chain_builtin(c))
+			nft_build_cache(h);
+	}
+
 	nft_fn = nft_rule_append;
 
 	r = nft_rule_new(h, chain, table, data);
@@ -1397,6 +1405,8 @@ int nft_rule_save(struct nft_handle *h, const char *table, unsigned int format)
 	struct nftnl_chain *c;
 	int ret = 0;
 
+	nft_build_cache(h);
+
 	list = nft_chain_list_get(h, table);
 	if (!list)
 		return 0;
@@ -1595,6 +1605,10 @@ static int __nft_chain_user_del(struct nftnl_chain *c, void *data)
 		fprintf(stdout, "Deleting chain `%s'\n",
 			nftnl_chain_get_str(c, NFTNL_CHAIN_NAME));
 
+	/* This triggers required policy rule deletion. */
+	if (h->family == NFPROTO_BRIDGE)
+		nft_build_cache(h);
+
 	/* XXX This triggers a fast lookup from the kernel. */
 	nftnl_chain_unset(c, NFTNL_CHAIN_HANDLE);
 	ret = batch_chain_add(h, NFT_COMPAT_CHAIN_USER_DEL, c);
@@ -1876,6 +1890,8 @@ nft_rule_find(struct nft_handle *h, struct nftnl_chain *c, void *data, int rulen
 	struct nftnl_rule_iter *iter;
 	bool found = false;
 
+	nft_build_cache(h);
+
 	if (rulenum >= 0)
 		/* Delete by rule number case */
 		return nftnl_rule_lookup_byindex(c, rulenum);
@@ -2701,6 +2717,8 @@ int ebt_set_user_chain_policy(struct nft_handle *h, const char *table,
 	else
 		return 0;
 
+	nft_build_cache(h);
+
 	nftnl_chain_set_u32(c, NFTNL_CHAIN_POLICY, pval);
 	return 1;
 }
@@ -3038,6 +3056,8 @@ static int nft_is_chain_compatible(struct nftnl_chain *c, void *data)
 	enum nf_inet_hooks hook;
 	int prio;
 
+	nft_build_cache(h);
+
 	if (nftnl_rule_foreach(c, nft_is_rule_compatible, NULL))
 		return -1;
 
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 06/11] nft-cache: Cover for multiple fetcher invocation
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
                   ` (4 preceding siblings ...)
  2019-10-08 16:14 ` [iptables PATCH v3 05/11] nft-cache: Fetch only chains in nft_chain_list_get() Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 07/11] nft-cache: Support partial cache per table Phil Sutter
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Preparing for partial caches, it is necessary to make sure these
functions don't cause harm if called repeatedly.

* Use h->cache->tables pointer as indicator for existing table cache,
  return immediately from fetch_table_cache() if non-NULL.

* Initialize table's chain list only if non-NULL.

* Search for chain in table's chain list before adding it.

* Don't fetch rules for a chain if it has any rules already. With rule
  list being embedded in struct nftnl_chain, this is the best way left
  to check if rules have been fetched already or not. It will fail for
  empty chains, but causes no harm in that case, either.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/nft-cache.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
index 5f27aec6d9c30..a2cfb6ef6dbcf 100644
--- a/iptables/nft-cache.c
+++ b/iptables/nft-cache.c
@@ -86,6 +86,9 @@ static int fetch_table_cache(struct nft_handle *h)
 	struct nftnl_table_list *list;
 	int ret;
 
+	if (h->cache->tables)
+		return 0;
+
 	list = nftnl_table_list_alloc();
 	if (list == NULL)
 		return 0;
@@ -106,7 +109,9 @@ static int nftnl_chain_list_cb(const struct nlmsghdr *nlh, void *data)
 {
 	struct nft_handle *h = data;
 	const struct builtin_table *t;
+	struct nftnl_chain_list *list;
 	struct nftnl_chain *c;
+	const char *cname;
 
 	c = nftnl_chain_alloc();
 	if (c == NULL)
@@ -120,7 +125,13 @@ static int nftnl_chain_list_cb(const struct nlmsghdr *nlh, void *data)
 	if (!t)
 		goto out;
 
-	nftnl_chain_list_add_tail(c, h->cache->table[t->type].chains);
+	list = h->cache->table[t->type].chains;
+	cname = nftnl_chain_get_str(c, NFTNL_CHAIN_NAME);
+
+	if (nftnl_chain_list_lookup_byname(list, cname))
+		goto out;
+
+	nftnl_chain_list_add_tail(c, list);
 
 	return MNL_CB_OK;
 out:
@@ -141,6 +152,9 @@ static int fetch_chain_cache(struct nft_handle *h)
 		if (!h->tables[i].name)
 			continue;
 
+		if (h->cache->table[type].chains)
+			continue;
+
 		h->cache->table[type].chains = nftnl_chain_list_alloc();
 		if (!h->cache->table[type].chains)
 			return -1;
@@ -182,6 +196,9 @@ static int nft_rule_list_update(struct nftnl_chain *c, void *data)
 	struct nftnl_rule *rule;
 	int ret;
 
+	if (nftnl_rule_lookup_byindex(c, 0))
+		return 0;
+
 	rule = nftnl_rule_alloc();
 	if (!rule)
 		return -1;
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 07/11] nft-cache: Support partial cache per table
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
                   ` (5 preceding siblings ...)
  2019-10-08 16:14 ` [iptables PATCH v3 06/11] nft-cache: Cover for multiple fetcher invocation Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 08/11] nft-cache: Support partial rule cache per chain Phil Sutter
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Accept a builtin_table pointer in __nft_build_cache() and pass it along
when fetching chains and rules to operate on that table only (unless the
pointer is NULL).

Make use of it in nft_chain_list_get() since that accepts a table name
and performs a builtin table lookup internally already.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/nft-cache.c | 82 ++++++++++++++++++++++++++++++--------------
 1 file changed, 57 insertions(+), 25 deletions(-)

diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
index a2cfb6ef6dbcf..3cb397c805a9a 100644
--- a/iptables/nft-cache.c
+++ b/iptables/nft-cache.c
@@ -11,6 +11,7 @@
 
 #include <assert.h>
 #include <errno.h>
+#include <string.h>
 #include <xtables.h>
 
 #include <linux/netfilter/nf_tables.h>
@@ -105,13 +106,19 @@ static int fetch_table_cache(struct nft_handle *h)
 	return 1;
 }
 
+struct nftnl_chain_list_cb_data {
+	struct nft_handle *h;
+	const struct builtin_table *t;
+};
+
 static int nftnl_chain_list_cb(const struct nlmsghdr *nlh, void *data)
 {
-	struct nft_handle *h = data;
-	const struct builtin_table *t;
+	struct nftnl_chain_list_cb_data *d = data;
+	const struct builtin_table *t = d->t;
 	struct nftnl_chain_list *list;
+	struct nft_handle *h = d->h;
+	const char *tname, *cname;
 	struct nftnl_chain *c;
-	const char *cname;
 
 	c = nftnl_chain_alloc();
 	if (c == NULL)
@@ -120,10 +127,15 @@ static int nftnl_chain_list_cb(const struct nlmsghdr *nlh, void *data)
 	if (nftnl_chain_nlmsg_parse(nlh, c) < 0)
 		goto out;
 
-	t = nft_table_builtin_find(h,
-			nftnl_chain_get_str(c, NFTNL_CHAIN_TABLE));
-	if (!t)
+	tname = nftnl_chain_get_str(c, NFTNL_CHAIN_TABLE);
+
+	if (!t) {
+		t = nft_table_builtin_find(h, tname);
+		if (!t)
+			goto out;
+	} else if (strcmp(t->name, tname)) {
 		goto out;
+	}
 
 	list = h->cache->table[t->type].chains;
 	cname = nftnl_chain_get_str(c, NFTNL_CHAIN_NAME);
@@ -140,30 +152,41 @@ err:
 	return MNL_CB_OK;
 }
 
-static int fetch_chain_cache(struct nft_handle *h)
+static int fetch_chain_cache(struct nft_handle *h,
+			     const struct builtin_table *t)
 {
+	struct nftnl_chain_list_cb_data d = {
+		.h = h,
+		.t = t,
+	};
 	char buf[16536];
 	struct nlmsghdr *nlh;
 	int i, ret;
 
-	for (i = 0; i < NFT_TABLE_MAX; i++) {
-		enum nft_table_type type = h->tables[i].type;
+	if (!t) {
+		for (i = 0; i < NFT_TABLE_MAX; i++) {
+			enum nft_table_type type = h->tables[i].type;
 
-		if (!h->tables[i].name)
-			continue;
+			if (!h->tables[i].name)
+				continue;
 
-		if (h->cache->table[type].chains)
-			continue;
+			if (h->cache->table[type].chains)
+				continue;
 
-		h->cache->table[type].chains = nftnl_chain_list_alloc();
-		if (!h->cache->table[type].chains)
+			h->cache->table[type].chains = nftnl_chain_list_alloc();
+			if (!h->cache->table[type].chains)
+				return -1;
+		}
+	} else if (!h->cache->table[t->type].chains) {
+		h->cache->table[t->type].chains = nftnl_chain_list_alloc();
+		if (!h->cache->table[t->type].chains)
 			return -1;
 	}
 
 	nlh = nftnl_chain_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN, h->family,
 					NLM_F_DUMP, h->seq);
 
-	ret = mnl_talk(h, nlh, nftnl_chain_list_cb, h);
+	ret = mnl_talk(h, nlh, nftnl_chain_list_cb, &d);
 	if (ret < 0 && errno == EINTR)
 		assert(nft_restart(h) >= 0);
 
@@ -224,10 +247,14 @@ static int nft_rule_list_update(struct nftnl_chain *c, void *data)
 	return 0;
 }
 
-static int fetch_rule_cache(struct nft_handle *h)
+static int fetch_rule_cache(struct nft_handle *h, const struct builtin_table *t)
 {
 	int i;
 
+	if (t)
+		return nftnl_chain_list_foreach(h->cache->table[t->type].chains,
+						nft_rule_list_update, h);
+
 	for (i = 0; i < NFT_TABLE_MAX; i++) {
 		enum nft_table_type type = h->tables[i].type;
 
@@ -241,7 +268,8 @@ static int fetch_rule_cache(struct nft_handle *h)
 	return 0;
 }
 
-static void __nft_build_cache(struct nft_handle *h, enum nft_cache_level level)
+static void __nft_build_cache(struct nft_handle *h, enum nft_cache_level level,
+			      const struct builtin_table *t)
 {
 	uint32_t genid_start, genid_stop;
 
@@ -257,12 +285,12 @@ retry:
 			break;
 		/* fall through */
 	case NFT_CL_TABLES:
-		fetch_chain_cache(h);
+		fetch_chain_cache(h, t);
 		if (level == NFT_CL_CHAINS)
 			break;
 		/* fall through */
 	case NFT_CL_CHAINS:
-		fetch_rule_cache(h);
+		fetch_rule_cache(h, t);
 		if (level == NFT_CL_RULES)
 			break;
 		/* fall through */
@@ -276,14 +304,18 @@ retry:
 		goto retry;
 	}
 
-	h->cache_level = level;
+	if (!t)
+		h->cache_level = level;
+	else if (h->cache_level < NFT_CL_TABLES)
+		h->cache_level = NFT_CL_TABLES;
+
 	h->nft_genid = genid_start;
 }
 
 void nft_build_cache(struct nft_handle *h)
 {
 	if (h->cache_level < NFT_CL_RULES)
-		__nft_build_cache(h, NFT_CL_RULES);
+		__nft_build_cache(h, NFT_CL_RULES, NULL);
 }
 
 void nft_fake_cache(struct nft_handle *h)
@@ -382,7 +414,7 @@ void nft_rebuild_cache(struct nft_handle *h)
 		__nft_flush_cache(h);
 
 	h->cache_level = NFT_CL_NONE;
-	__nft_build_cache(h, level);
+	__nft_build_cache(h, level, NULL);
 }
 
 void nft_release_cache(struct nft_handle *h)
@@ -393,7 +425,7 @@ void nft_release_cache(struct nft_handle *h)
 
 struct nftnl_table_list *nftnl_table_list_get(struct nft_handle *h)
 {
-	__nft_build_cache(h, NFT_CL_TABLES);
+	__nft_build_cache(h, NFT_CL_TABLES, NULL);
 
 	return h->cache->tables;
 }
@@ -407,7 +439,7 @@ struct nftnl_chain_list *nft_chain_list_get(struct nft_handle *h,
 	if (!t)
 		return NULL;
 
-	__nft_build_cache(h, NFT_CL_CHAINS);
+	__nft_build_cache(h, NFT_CL_CHAINS, t);
 
 	return h->cache->table[t->type].chains;
 }
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 08/11] nft-cache: Support partial rule cache per chain
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
                   ` (6 preceding siblings ...)
  2019-10-08 16:14 ` [iptables PATCH v3 07/11] nft-cache: Support partial cache per table Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 09/11] nft: Reduce cache overhead of nft_chain_builtin_init() Phil Sutter
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Accept an additional chain name pointer in __nft_build_cache() and pass
it along to fetch only that specific chain and its rules.

Enhance nft_build_cache() to take an optional nftnl_chain pointer to
fetch rules for.

Enhance nft_chain_list_get() to take an optional chain name. If cache
level doesn't include chains already, it will fetch only the specified
chain from kernel (if existing) and add that to table's chain list which
is returned. This keeps operations for all chains of a table or a
specific one within the same code path in nft.c.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/nft-cache.c       | 76 ++++++++++++++++++++++++++++----------
 iptables/nft-cache.h       |  6 +--
 iptables/nft.c             | 35 +++++++++---------
 iptables/xtables-restore.c |  4 +-
 iptables/xtables-save.c    |  2 +-
 5 files changed, 79 insertions(+), 44 deletions(-)

diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
index 3cb397c805a9a..07406960030cf 100644
--- a/iptables/nft-cache.c
+++ b/iptables/nft-cache.c
@@ -153,7 +153,8 @@ err:
 }
 
 static int fetch_chain_cache(struct nft_handle *h,
-			     const struct builtin_table *t)
+			     const struct builtin_table *t,
+			     const char *chain)
 {
 	struct nftnl_chain_list_cb_data d = {
 		.h = h,
@@ -183,8 +184,24 @@ static int fetch_chain_cache(struct nft_handle *h,
 			return -1;
 	}
 
-	nlh = nftnl_chain_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN, h->family,
-					NLM_F_DUMP, h->seq);
+	if (t && chain) {
+		struct nftnl_chain *c = nftnl_chain_alloc();
+
+		if (!c)
+			return -1;
+
+		nlh = nftnl_chain_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN,
+						  h->family, NLM_F_ACK,
+						  h->seq);
+		nftnl_chain_set_str(c, NFTNL_CHAIN_TABLE, t->name);
+		nftnl_chain_set_str(c, NFTNL_CHAIN_NAME, chain);
+		nftnl_chain_nlmsg_build_payload(nlh, c);
+		nftnl_chain_free(c);
+	} else {
+		nlh = nftnl_chain_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN,
+						  h->family, NLM_F_DUMP,
+						  h->seq);
+	}
 
 	ret = mnl_talk(h, nlh, nftnl_chain_list_cb, &d);
 	if (ret < 0 && errno == EINTR)
@@ -247,13 +264,23 @@ static int nft_rule_list_update(struct nftnl_chain *c, void *data)
 	return 0;
 }
 
-static int fetch_rule_cache(struct nft_handle *h, const struct builtin_table *t)
+static int fetch_rule_cache(struct nft_handle *h,
+			    const struct builtin_table *t, const char *chain)
 {
 	int i;
 
-	if (t)
-		return nftnl_chain_list_foreach(h->cache->table[t->type].chains,
-						nft_rule_list_update, h);
+	if (t) {
+		struct nftnl_chain_list *list;
+		struct nftnl_chain *c;
+
+		list = h->cache->table[t->type].chains;
+
+		if (chain) {
+			c = nftnl_chain_list_lookup_byname(list, chain);
+			return nft_rule_list_update(c, h);
+		}
+		return nftnl_chain_list_foreach(list, nft_rule_list_update, h);
+	}
 
 	for (i = 0; i < NFT_TABLE_MAX; i++) {
 		enum nft_table_type type = h->tables[i].type;
@@ -268,8 +295,9 @@ static int fetch_rule_cache(struct nft_handle *h, const struct builtin_table *t)
 	return 0;
 }
 
-static void __nft_build_cache(struct nft_handle *h, enum nft_cache_level level,
-			      const struct builtin_table *t)
+static void
+__nft_build_cache(struct nft_handle *h, enum nft_cache_level level,
+		  const struct builtin_table *t, const char *chain)
 {
 	uint32_t genid_start, genid_stop;
 
@@ -285,12 +313,12 @@ retry:
 			break;
 		/* fall through */
 	case NFT_CL_TABLES:
-		fetch_chain_cache(h, t);
+		fetch_chain_cache(h, t, chain);
 		if (level == NFT_CL_CHAINS)
 			break;
 		/* fall through */
 	case NFT_CL_CHAINS:
-		fetch_rule_cache(h, t);
+		fetch_rule_cache(h, t, chain);
 		if (level == NFT_CL_RULES)
 			break;
 		/* fall through */
@@ -304,7 +332,7 @@ retry:
 		goto retry;
 	}
 
-	if (!t)
+	if (!t && !chain)
 		h->cache_level = level;
 	else if (h->cache_level < NFT_CL_TABLES)
 		h->cache_level = NFT_CL_TABLES;
@@ -312,10 +340,18 @@ retry:
 	h->nft_genid = genid_start;
 }
 
-void nft_build_cache(struct nft_handle *h)
+void nft_build_cache(struct nft_handle *h, struct nftnl_chain *c)
 {
-	if (h->cache_level < NFT_CL_RULES)
-		__nft_build_cache(h, NFT_CL_RULES, NULL);
+	const struct builtin_table *t;
+	const char *table, *chain;
+
+	if (!c)
+		return __nft_build_cache(h, NFT_CL_RULES, NULL, NULL);
+
+	table = nftnl_chain_get_str(c, NFTNL_CHAIN_TABLE);
+	chain = nftnl_chain_get_str(c, NFTNL_CHAIN_NAME);
+	t = nft_table_builtin_find(h, table);
+	__nft_build_cache(h, NFT_CL_RULES, t, chain);
 }
 
 void nft_fake_cache(struct nft_handle *h)
@@ -414,7 +450,7 @@ void nft_rebuild_cache(struct nft_handle *h)
 		__nft_flush_cache(h);
 
 	h->cache_level = NFT_CL_NONE;
-	__nft_build_cache(h, level, NULL);
+	__nft_build_cache(h, level, NULL, NULL);
 }
 
 void nft_release_cache(struct nft_handle *h)
@@ -425,13 +461,13 @@ void nft_release_cache(struct nft_handle *h)
 
 struct nftnl_table_list *nftnl_table_list_get(struct nft_handle *h)
 {
-	__nft_build_cache(h, NFT_CL_TABLES, NULL);
+	__nft_build_cache(h, NFT_CL_TABLES, NULL, NULL);
 
 	return h->cache->tables;
 }
 
-struct nftnl_chain_list *nft_chain_list_get(struct nft_handle *h,
-					    const char *table)
+struct nftnl_chain_list *
+nft_chain_list_get(struct nft_handle *h, const char *table, const char *chain)
 {
 	const struct builtin_table *t;
 
@@ -439,7 +475,7 @@ struct nftnl_chain_list *nft_chain_list_get(struct nft_handle *h,
 	if (!t)
 		return NULL;
 
-	__nft_build_cache(h, NFT_CL_CHAINS, t);
+	__nft_build_cache(h, NFT_CL_CHAINS, t, chain);
 
 	return h->cache->table[t->type].chains;
 }
diff --git a/iptables/nft-cache.h b/iptables/nft-cache.h
index 423c6516de9bb..793a85f453ffc 100644
--- a/iptables/nft-cache.h
+++ b/iptables/nft-cache.h
@@ -4,14 +4,14 @@
 struct nft_handle;
 
 void nft_fake_cache(struct nft_handle *h);
-void nft_build_cache(struct nft_handle *h);
+void nft_build_cache(struct nft_handle *h, struct nftnl_chain *c);
 void nft_rebuild_cache(struct nft_handle *h);
 void nft_release_cache(struct nft_handle *h);
 void flush_chain_cache(struct nft_handle *h, const char *tablename);
 void flush_rule_cache(struct nftnl_chain *c);
 
-struct nftnl_chain_list *nft_chain_list_get(struct nft_handle *h,
-					    const char *table);
+struct nftnl_chain_list *
+nft_chain_list_get(struct nft_handle *h, const char *table, const char *chain);
 struct nftnl_table_list *nftnl_table_list_get(struct nft_handle *h);
 
 #endif /* _NFT_CACHE_H_ */
diff --git a/iptables/nft.c b/iptables/nft.c
index 94fabd78e527e..775582aab7955 100644
--- a/iptables/nft.c
+++ b/iptables/nft.c
@@ -709,7 +709,7 @@ nft_chain_builtin_find(const struct builtin_table *t, const char *chain)
 static void nft_chain_builtin_init(struct nft_handle *h,
 				   const struct builtin_table *table)
 {
-	struct nftnl_chain_list *list = nft_chain_list_get(h, table->name);
+	struct nftnl_chain_list *list = nft_chain_list_get(h, table->name, NULL);
 	struct nftnl_chain *c;
 	int i;
 
@@ -1178,7 +1178,7 @@ nft_rule_append(struct nft_handle *h, const char *chain, const char *table,
 	if (h->family == NFPROTO_BRIDGE) {
 		c = nft_chain_find(h, table, chain);
 		if (c && !nft_chain_builtin(c))
-			nft_build_cache(h);
+			nft_build_cache(h, c);
 	}
 
 	nft_fn = nft_rule_append;
@@ -1405,9 +1405,7 @@ int nft_rule_save(struct nft_handle *h, const char *table, unsigned int format)
 	struct nftnl_chain *c;
 	int ret = 0;
 
-	nft_build_cache(h);
-
-	list = nft_chain_list_get(h, table);
+	list = nft_chain_list_get(h, table, NULL);
 	if (!list)
 		return 0;
 
@@ -1417,6 +1415,7 @@ int nft_rule_save(struct nft_handle *h, const char *table, unsigned int format)
 
 	c = nftnl_chain_list_iter_next(iter);
 	while (c) {
+		nft_build_cache(h, c);
 		ret = nft_chain_save_rules(h, c, format);
 		if (ret != 0)
 			break;
@@ -1468,7 +1467,7 @@ int nft_rule_flush(struct nft_handle *h, const char *chain, const char *table,
 
 	nft_fn = nft_rule_flush;
 
-	list = nft_chain_list_get(h, table);
+	list = nft_chain_list_get(h, table, chain);
 	if (list == NULL) {
 		ret = 1;
 		goto err;
@@ -1533,7 +1532,7 @@ int nft_chain_user_add(struct nft_handle *h, const char *chain, const char *tabl
 
 	ret = batch_chain_add(h, NFT_COMPAT_CHAIN_USER_ADD, c);
 
-	list = nft_chain_list_get(h, table);
+	list = nft_chain_list_get(h, table, chain);
 	if (list)
 		nftnl_chain_list_add(c, list);
 
@@ -1573,7 +1572,7 @@ int nft_chain_restore(struct nft_handle *h, const char *chain, const char *table
 
 	ret = batch_chain_add(h, NFT_COMPAT_CHAIN_USER_ADD, c);
 
-	list = nft_chain_list_get(h, table);
+	list = nft_chain_list_get(h, table, chain);
 	if (list)
 		nftnl_chain_list_add(c, list);
 
@@ -1607,7 +1606,7 @@ static int __nft_chain_user_del(struct nftnl_chain *c, void *data)
 
 	/* This triggers required policy rule deletion. */
 	if (h->family == NFPROTO_BRIDGE)
-		nft_build_cache(h);
+		nft_build_cache(h, c);
 
 	/* XXX This triggers a fast lookup from the kernel. */
 	nftnl_chain_unset(c, NFTNL_CHAIN_HANDLE);
@@ -1632,7 +1631,7 @@ int nft_chain_user_del(struct nft_handle *h, const char *chain,
 
 	nft_fn = nft_chain_user_del;
 
-	list = nft_chain_list_get(h, table);
+	list = nft_chain_list_get(h, table, chain);
 	if (list == NULL)
 		return 0;
 
@@ -1660,7 +1659,7 @@ nft_chain_find(struct nft_handle *h, const char *table, const char *chain)
 {
 	struct nftnl_chain_list *list;
 
-	list = nft_chain_list_get(h, table);
+	list = nft_chain_list_get(h, table, chain);
 	if (list == NULL)
 		return NULL;
 
@@ -1890,7 +1889,7 @@ nft_rule_find(struct nft_handle *h, struct nftnl_chain *c, void *data, int rulen
 	struct nftnl_rule_iter *iter;
 	bool found = false;
 
-	nft_build_cache(h);
+	nft_build_cache(h, c);
 
 	if (rulenum >= 0)
 		/* Delete by rule number case */
@@ -2198,7 +2197,7 @@ int nft_rule_list(struct nft_handle *h, const char *chain, const char *table,
 	if (!nft_is_table_compatible(h, table))
 		xtables_error(OTHER_PROBLEM, "table `%s' is incompatible, use 'nft' tool.\n", table);
 
-	list = nft_chain_list_get(h, table);
+	list = nft_chain_list_get(h, table, chain);
 	if (!list)
 		return 0;
 
@@ -2299,7 +2298,7 @@ int nft_rule_list_save(struct nft_handle *h, const char *chain,
 	if (!nft_is_table_compatible(h, table))
 		xtables_error(OTHER_PROBLEM, "table `%s' is incompatible, use 'nft' tool.\n", table);
 
-	list = nft_chain_list_get(h, table);
+	list = nft_chain_list_get(h, table, chain);
 	if (!list)
 		return 0;
 
@@ -2717,7 +2716,7 @@ int ebt_set_user_chain_policy(struct nft_handle *h, const char *table,
 	else
 		return 0;
 
-	nft_build_cache(h);
+	nft_build_cache(h, c);
 
 	nftnl_chain_set_u32(c, NFTNL_CHAIN_POLICY, pval);
 	return 1;
@@ -2983,7 +2982,7 @@ int nft_chain_zero_counters(struct nft_handle *h, const char *chain,
 	struct nftnl_chain *c;
 	int ret = 0;
 
-	list = nft_chain_list_get(h, table);
+	list = nft_chain_list_get(h, table, chain);
 	if (list == NULL)
 		goto err;
 
@@ -3056,7 +3055,7 @@ static int nft_is_chain_compatible(struct nftnl_chain *c, void *data)
 	enum nf_inet_hooks hook;
 	int prio;
 
-	nft_build_cache(h);
+	nft_build_cache(h, c);
 
 	if (nftnl_rule_foreach(c, nft_is_rule_compatible, NULL))
 		return -1;
@@ -3089,7 +3088,7 @@ bool nft_is_table_compatible(struct nft_handle *h, const char *tablename)
 {
 	struct nftnl_chain_list *clist;
 
-	clist = nft_chain_list_get(h, tablename);
+	clist = nft_chain_list_get(h, tablename, NULL);
 	if (clist == NULL)
 		return false;
 
diff --git a/iptables/xtables-restore.c b/iptables/xtables-restore.c
index 7641955d1ce53..4f6d324bdafe9 100644
--- a/iptables/xtables-restore.c
+++ b/iptables/xtables-restore.c
@@ -63,7 +63,7 @@ static struct nftnl_chain_list *get_chain_list(struct nft_handle *h,
 {
 	struct nftnl_chain_list *chain_list;
 
-	chain_list = nft_chain_list_get(h, table);
+	chain_list = nft_chain_list_get(h, table, NULL);
 	if (chain_list == NULL)
 		xtables_error(OTHER_PROBLEM, "cannot retrieve chain list\n");
 
@@ -100,7 +100,7 @@ void xtables_restore_parse(struct nft_handle *h,
 	if (!h->noflush)
 		nft_fake_cache(h);
 	else
-		nft_build_cache(h);
+		nft_build_cache(h, NULL);
 
 	/* Grab standard input. */
 	while (fgets(buffer, sizeof(buffer), p->in)) {
diff --git a/iptables/xtables-save.c b/iptables/xtables-save.c
index 3741888f9af44..e234425ded293 100644
--- a/iptables/xtables-save.c
+++ b/iptables/xtables-save.c
@@ -83,7 +83,7 @@ __do_output(struct nft_handle *h, const char *tablename, void *data)
 		return 0;
 	}
 
-	chain_list = nft_chain_list_get(h, tablename);
+	chain_list = nft_chain_list_get(h, tablename, NULL);
 	if (!chain_list)
 		return 0;
 
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 09/11] nft: Reduce cache overhead of nft_chain_builtin_init()
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
                   ` (7 preceding siblings ...)
  2019-10-08 16:14 ` [iptables PATCH v3 08/11] nft-cache: Support partial rule cache per chain Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 10/11] nft: Support nft_is_table_compatible() per chain Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 11/11] nft: Optimize flushing all chains of a table Phil Sutter
  10 siblings, 0 replies; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

There is no need for a full chain cache, fetch only the few builtin
chains that might need to be created.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/nft.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/iptables/nft.c b/iptables/nft.c
index 775582aab7955..7e019d54ee475 100644
--- a/iptables/nft.c
+++ b/iptables/nft.c
@@ -709,15 +709,16 @@ nft_chain_builtin_find(const struct builtin_table *t, const char *chain)
 static void nft_chain_builtin_init(struct nft_handle *h,
 				   const struct builtin_table *table)
 {
-	struct nftnl_chain_list *list = nft_chain_list_get(h, table->name, NULL);
+	struct nftnl_chain_list *list;
 	struct nftnl_chain *c;
 	int i;
 
-	if (!list)
-		return;
-
 	/* Initialize built-in chains if they don't exist yet */
 	for (i=0; i < NF_INET_NUMHOOKS && table->chains[i].name != NULL; i++) {
+		list = nft_chain_list_get(h, table->name,
+					  table->chains[i].name);
+		if (!list)
+			continue;
 
 		c = nftnl_chain_list_lookup_byname(list, table->chains[i].name);
 		if (c != NULL)
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 10/11] nft: Support nft_is_table_compatible() per chain
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
                   ` (8 preceding siblings ...)
  2019-10-08 16:14 ` [iptables PATCH v3 09/11] nft: Reduce cache overhead of nft_chain_builtin_init() Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  2019-10-08 16:14 ` [iptables PATCH v3 11/11] nft: Optimize flushing all chains of a table Phil Sutter
  10 siblings, 0 replies; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

When operating on a single chain only, compatibility checking causes
unwanted overhead by checking all chains of the current table. Avoid
this by accepting the current chain name as parameter and pass it along
to nft_chain_list_get().

While being at it, introduce nft_assert_table_compatible() which
calls xtables_error() in case compatibility check fails. If a chain name
was given, include that in error message.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/nft.c          | 32 ++++++++++++++++++++++++--------
 iptables/nft.h          |  5 ++++-
 iptables/xtables-save.c |  2 +-
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/iptables/nft.c b/iptables/nft.c
index 7e019d54ee475..12cc423c87bbb 100644
--- a/iptables/nft.c
+++ b/iptables/nft.c
@@ -2192,12 +2192,10 @@ int nft_rule_list(struct nft_handle *h, const char *chain, const char *table,
 	bool found = false;
 
 	nft_xt_builtin_init(h, table);
+	nft_assert_table_compatible(h, table, chain);
 
 	ops = nft_family_ops_lookup(h->family);
 
-	if (!nft_is_table_compatible(h, table))
-		xtables_error(OTHER_PROBLEM, "table `%s' is incompatible, use 'nft' tool.\n", table);
-
 	list = nft_chain_list_get(h, table, chain);
 	if (!list)
 		return 0;
@@ -2295,9 +2293,7 @@ int nft_rule_list_save(struct nft_handle *h, const char *chain,
 	int ret = 0;
 
 	nft_xt_builtin_init(h, table);
-
-	if (!nft_is_table_compatible(h, table))
-		xtables_error(OTHER_PROBLEM, "table `%s' is incompatible, use 'nft' tool.\n", table);
+	nft_assert_table_compatible(h, table, chain);
 
 	list = nft_chain_list_get(h, table, chain);
 	if (!list)
@@ -3085,11 +3081,12 @@ static int nft_is_chain_compatible(struct nftnl_chain *c, void *data)
 	return 0;
 }
 
-bool nft_is_table_compatible(struct nft_handle *h, const char *tablename)
+bool nft_is_table_compatible(struct nft_handle *h,
+			     const char *table, const char *chain)
 {
 	struct nftnl_chain_list *clist;
 
-	clist = nft_chain_list_get(h, tablename, NULL);
+	clist = nft_chain_list_get(h, table, chain);
 	if (clist == NULL)
 		return false;
 
@@ -3098,3 +3095,22 @@ bool nft_is_table_compatible(struct nft_handle *h, const char *tablename)
 
 	return true;
 }
+
+void nft_assert_table_compatible(struct nft_handle *h,
+				 const char *table, const char *chain)
+{
+	const char *pfx = "", *sfx = "";
+
+	if (nft_is_table_compatible(h, table, chain))
+		return;
+
+	if (chain) {
+		pfx = "chain `";
+		sfx = "' in ";
+	} else {
+		chain = "";
+	}
+	xtables_error(OTHER_PROBLEM,
+		      "%s%s%stable `%s' is incompatible, use 'nft' tool.\n",
+		      pfx, chain, sfx, table);
+}
diff --git a/iptables/nft.h b/iptables/nft.h
index 9ae3122a1c515..4b8b3033a56c0 100644
--- a/iptables/nft.h
+++ b/iptables/nft.h
@@ -206,7 +206,10 @@ int nft_arp_rule_insert(struct nft_handle *h, const char *chain,
 
 void nft_rule_to_arpt_entry(struct nftnl_rule *r, struct arpt_entry *fw);
 
-bool nft_is_table_compatible(struct nft_handle *h, const char *name);
+bool nft_is_table_compatible(struct nft_handle *h,
+			     const char *table, const char *chain);
+void nft_assert_table_compatible(struct nft_handle *h,
+				 const char *table, const char *chain);
 
 int ebt_set_user_chain_policy(struct nft_handle *h, const char *table,
 			      const char *chain, const char *policy);
diff --git a/iptables/xtables-save.c b/iptables/xtables-save.c
index e234425ded293..44687f998c91a 100644
--- a/iptables/xtables-save.c
+++ b/iptables/xtables-save.c
@@ -77,7 +77,7 @@ __do_output(struct nft_handle *h, const char *tablename, void *data)
 	if (!nft_table_builtin_find(h, tablename))
 		return 0;
 
-	if (!nft_is_table_compatible(h, tablename)) {
+	if (!nft_is_table_compatible(h, tablename, NULL)) {
 		printf("# Table `%s' is incompatible, use 'nft' tool.\n",
 		       tablename);
 		return 0;
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [iptables PATCH v3 11/11] nft: Optimize flushing all chains of a table
  2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
                   ` (9 preceding siblings ...)
  2019-10-08 16:14 ` [iptables PATCH v3 10/11] nft: Support nft_is_table_compatible() per chain Phil Sutter
@ 2019-10-08 16:14 ` Phil Sutter
  10 siblings, 0 replies; 22+ messages in thread
From: Phil Sutter @ 2019-10-08 16:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Leverage nftables' support for flushing all chains of a table by
omitting NFTNL_RULE_CHAIN attribute in NFT_MSG_DELRULE payload.

The only caveat is with verbose output, as that still requires to have a
list of (existing) chains to iterate over. Apart from that, implementing
this shortcut is pretty straightforward: Don't retrieve a chain list and
just call __nft_rule_flush() directly which doesn't set above attribute
if chain name pointer is NULL.

A bigger deal is keeping rule cache consistent: Instead of just clearing
rule list for each flushed chain, flush_rule_cache() is updated to
iterate over all cached chains of the given table, clearing their rule
lists if not called for a specific chain.

While being at it, sort local variable declarations in nft_rule_flush()
from longest to shortest and drop the loop-local 'chain_name' variable
(but instead use 'chain' function parameter which is not used at that
point).

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 iptables/nft-cache.c | 22 +++++++++++++++++++---
 iptables/nft-cache.h |  3 ++-
 iptables/nft.c       | 32 ++++++++++++++++++--------------
 3 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
index 07406960030cf..8a33326a5bc24 100644
--- a/iptables/nft-cache.c
+++ b/iptables/nft-cache.c
@@ -381,7 +381,7 @@ static void __nft_flush_cache(struct nft_handle *h)
 	}
 }
 
-static int __flush_rule_cache(struct nftnl_rule *r, void *data)
+static int ____flush_rule_cache(struct nftnl_rule *r, void *data)
 {
 	nftnl_rule_list_del(r);
 	nftnl_rule_free(r);
@@ -389,9 +389,25 @@ static int __flush_rule_cache(struct nftnl_rule *r, void *data)
 	return 0;
 }
 
-void flush_rule_cache(struct nftnl_chain *c)
+static int __flush_rule_cache(struct nftnl_chain *c, void *data)
 {
-	nftnl_rule_foreach(c, __flush_rule_cache, NULL);
+	return nftnl_rule_foreach(c, ____flush_rule_cache, NULL);
+}
+
+int flush_rule_cache(struct nft_handle *h, const char *table,
+		     struct nftnl_chain *c)
+{
+	const struct builtin_table *t;
+
+	if (c)
+		return __flush_rule_cache(c, NULL);
+
+	t = nft_table_builtin_find(h, table);
+	if (!t || !h->cache->table[t->type].chains)
+		return 0;
+
+	return nftnl_chain_list_foreach(h->cache->table[t->type].chains,
+					__flush_rule_cache, NULL);
 }
 
 static int __flush_chain_cache(struct nftnl_chain *c, void *data)
diff --git a/iptables/nft-cache.h b/iptables/nft-cache.h
index 793a85f453ffc..cb7a7688be37a 100644
--- a/iptables/nft-cache.h
+++ b/iptables/nft-cache.h
@@ -8,7 +8,8 @@ void nft_build_cache(struct nft_handle *h, struct nftnl_chain *c);
 void nft_rebuild_cache(struct nft_handle *h);
 void nft_release_cache(struct nft_handle *h);
 void flush_chain_cache(struct nft_handle *h, const char *tablename);
-void flush_rule_cache(struct nftnl_chain *c);
+int flush_rule_cache(struct nft_handle *h, const char *table,
+		     struct nftnl_chain *c);
 
 struct nftnl_chain_list *
 nft_chain_list_get(struct nft_handle *h, const char *table, const char *chain);
diff --git a/iptables/nft.c b/iptables/nft.c
index 12cc423c87bbb..89b1c7a808f57 100644
--- a/iptables/nft.c
+++ b/iptables/nft.c
@@ -1437,7 +1437,7 @@ __nft_rule_flush(struct nft_handle *h, const char *table,
 	struct obj_update *obj;
 	struct nftnl_rule *r;
 
-	if (verbose)
+	if (verbose && chain)
 		fprintf(stdout, "Flushing chain `%s'\n", chain);
 
 	r = nftnl_rule_alloc();
@@ -1445,7 +1445,8 @@ __nft_rule_flush(struct nft_handle *h, const char *table,
 		return;
 
 	nftnl_rule_set_str(r, NFTNL_RULE_TABLE, table);
-	nftnl_rule_set_str(r, NFTNL_RULE_CHAIN, chain);
+	if (chain)
+		nftnl_rule_set_str(r, NFTNL_RULE_CHAIN, chain);
 
 	obj = batch_rule_add(h, NFT_COMPAT_RULE_FLUSH, r);
 	if (!obj) {
@@ -1459,19 +1460,21 @@ __nft_rule_flush(struct nft_handle *h, const char *table,
 int nft_rule_flush(struct nft_handle *h, const char *chain, const char *table,
 		   bool verbose)
 {
-	int ret = 0;
-	struct nftnl_chain_list *list;
 	struct nftnl_chain_list_iter *iter;
-	struct nftnl_chain *c;
+	struct nftnl_chain_list *list;
+	struct nftnl_chain *c = NULL;
+	int ret = 0;
 
 	nft_xt_builtin_init(h, table);
 
 	nft_fn = nft_rule_flush;
 
-	list = nft_chain_list_get(h, table, chain);
-	if (list == NULL) {
-		ret = 1;
-		goto err;
+	if (chain || verbose) {
+		list = nft_chain_list_get(h, table, chain);
+		if (list == NULL) {
+			ret = 1;
+			goto err;
+		}
 	}
 
 	if (chain) {
@@ -1480,9 +1483,11 @@ int nft_rule_flush(struct nft_handle *h, const char *chain, const char *table,
 			errno = ENOENT;
 			return 0;
 		}
+	}
 
+	if (chain || !verbose) {
 		__nft_rule_flush(h, table, chain, verbose, false);
-		flush_rule_cache(c);
+		flush_rule_cache(h, table, c);
 		return 1;
 	}
 
@@ -1494,11 +1499,10 @@ int nft_rule_flush(struct nft_handle *h, const char *chain, const char *table,
 
 	c = nftnl_chain_list_iter_next(iter);
 	while (c != NULL) {
-		const char *chain_name =
-			nftnl_chain_get_str(c, NFTNL_CHAIN_NAME);
+		chain = nftnl_chain_get_str(c, NFTNL_CHAIN_NAME);
 
-		__nft_rule_flush(h, table, chain_name, verbose, false);
-		flush_rule_cache(c);
+		__nft_rule_flush(h, table, chain, verbose, false);
+		flush_rule_cache(h, table, c);
 		c = nftnl_chain_list_iter_next(iter);
 	}
 	nftnl_chain_list_iter_destroy(iter);
-- 
2.23.0


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [iptables PATCH v3 01/11] nft: Pass nft_handle to flush_cache()
  2019-10-08 16:14 ` [iptables PATCH v3 01/11] nft: Pass nft_handle to flush_cache() Phil Sutter
@ 2019-10-09  9:30   ` Pablo Neira Ayuso
  0 siblings, 0 replies; 22+ messages in thread
From: Pablo Neira Ayuso @ 2019-10-09  9:30 UTC (permalink / raw)
  To: Phil Sutter; +Cc: netfilter-devel

On Tue, Oct 08, 2019 at 06:14:37PM +0200, Phil Sutter wrote:
> This allows to call nft_table_builtin_find() and hence removes the only
> real user of __nft_table_builtin_find(). Consequently remove the latter
> by integrating it into its sole caller.
> 
> Signed-off-by: Phil Sutter <phil@nwl.cc>

Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [iptables PATCH v3 02/11] nft: Avoid nested cache fetching
  2019-10-08 16:14 ` [iptables PATCH v3 02/11] nft: Avoid nested cache fetching Phil Sutter
@ 2019-10-09  9:30   ` Pablo Neira Ayuso
  0 siblings, 0 replies; 22+ messages in thread
From: Pablo Neira Ayuso @ 2019-10-09  9:30 UTC (permalink / raw)
  To: Phil Sutter; +Cc: netfilter-devel

On Tue, Oct 08, 2019 at 06:14:38PM +0200, Phil Sutter wrote:
> Don't call fetch_table_cache() from within fetch_chain_cache() but
> instead from __nft_build_cache(). Since that is the only caller of
> fetch_chain_cache(), this change should not have any effect in practice.
> 
> Signed-off-by: Phil Sutter <phil@nwl.cc>

Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [iptables PATCH v3 03/11] nft: Extract cache routines into nft-cache.c
  2019-10-08 16:14 ` [iptables PATCH v3 03/11] nft: Extract cache routines into nft-cache.c Phil Sutter
@ 2019-10-09  9:32   ` Pablo Neira Ayuso
  0 siblings, 0 replies; 22+ messages in thread
From: Pablo Neira Ayuso @ 2019-10-09  9:32 UTC (permalink / raw)
  To: Phil Sutter; +Cc: netfilter-devel

On Tue, Oct 08, 2019 at 06:14:39PM +0200, Phil Sutter wrote:
> The amount of code dealing with caching only is considerable and hence
> deserves an own source file.
> 
> Signed-off-by: Phil Sutter <phil@nwl.cc>

Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [iptables PATCH v3 04/11] nft-cache: Introduce cache levels
  2019-10-08 16:14 ` [iptables PATCH v3 04/11] nft-cache: Introduce cache levels Phil Sutter
@ 2019-10-09  9:37   ` Pablo Neira Ayuso
  2019-10-09 10:29     ` Pablo Neira Ayuso
  0 siblings, 1 reply; 22+ messages in thread
From: Pablo Neira Ayuso @ 2019-10-09  9:37 UTC (permalink / raw)
  To: Phil Sutter; +Cc: netfilter-devel

Hi Phil,

On Tue, Oct 08, 2019 at 06:14:40PM +0200, Phil Sutter wrote:
> Replace the simple have_cache boolean by a cache level indicator
> defining how complete the cache is. Since have_cache indicated full
> cache (including rules), make code depending on it check for cache level
> NFT_CL_RULES.
> 
> Core cache fetching routine __nft_build_cache() accepts a new level via
> parameter and raises cache completeness to that level.
> 
> Signed-off-by: Phil Sutter <phil@nwl.cc>
> ---
>  iptables/nft-cache.c | 51 +++++++++++++++++++++++++++++++-------------
>  iptables/nft.h       |  9 +++++++-
>  2 files changed, 44 insertions(+), 16 deletions(-)
> 
> diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
> index 5444419a5cc3b..22a87e94efd76 100644
> --- a/iptables/nft-cache.c
> +++ b/iptables/nft-cache.c
> @@ -224,30 +224,49 @@ static int fetch_rule_cache(struct nft_handle *h)
>  	return 0;
>  }
>  
> -static void __nft_build_cache(struct nft_handle *h)
> +static void __nft_build_cache(struct nft_handle *h, enum nft_cache_level level)
>  {
>  	uint32_t genid_start, genid_stop;
>  
> +	if (level <= h->cache_level)
> +		return;
>  retry:
>  	mnl_genid_get(h, &genid_start);
> -	fetch_table_cache(h);
> -	fetch_chain_cache(h);
> -	fetch_rule_cache(h);
> -	h->have_cache = true;
> -	mnl_genid_get(h, &genid_stop);
>  
> +	switch (h->cache_level) {
> +	case NFT_CL_NONE:
> +		fetch_table_cache(h);
> +		if (level == NFT_CL_TABLES)
> +			break;
> +		/* fall through */
> +	case NFT_CL_TABLES:

If the existing level is TABLES and use wants chains, then you have to
invalidate the existing table cache, then fetch the tables and chains
to make sure cache is consistent. I mean, extending an existing cache
might lead to inconsistencies.

Am I missing anything?

> +		fetch_chain_cache(h);
> +		if (level == NFT_CL_CHAINS)
> +			break;
> +		/* fall through */
> +	case NFT_CL_CHAINS:
> +		fetch_rule_cache(h);
> +		if (level == NFT_CL_RULES)
> +			break;
> +		/* fall through */
> +	case NFT_CL_RULES:
> +		break;
> +	}
> +
> +	mnl_genid_get(h, &genid_stop);
>  	if (genid_start != genid_stop) {
>  		flush_chain_cache(h, NULL);
>  		goto retry;
>  	}
>  
> +	h->cache_level = level;
>  	h->nft_genid = genid_start;
>  }

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [iptables PATCH v3 04/11] nft-cache: Introduce cache levels
  2019-10-09  9:37   ` Pablo Neira Ayuso
@ 2019-10-09 10:29     ` Pablo Neira Ayuso
  2019-10-10 22:09       ` Phil Sutter
  0 siblings, 1 reply; 22+ messages in thread
From: Pablo Neira Ayuso @ 2019-10-09 10:29 UTC (permalink / raw)
  To: Phil Sutter; +Cc: netfilter-devel

On Wed, Oct 09, 2019 at 11:37:23AM +0200, Pablo Neira Ayuso wrote:
> Hi Phil,
> 
> On Tue, Oct 08, 2019 at 06:14:40PM +0200, Phil Sutter wrote:
> > Replace the simple have_cache boolean by a cache level indicator
> > defining how complete the cache is. Since have_cache indicated full
> > cache (including rules), make code depending on it check for cache level
> > NFT_CL_RULES.
> > 
> > Core cache fetching routine __nft_build_cache() accepts a new level via
> > parameter and raises cache completeness to that level.
> > 
> > Signed-off-by: Phil Sutter <phil@nwl.cc>
> > ---
> >  iptables/nft-cache.c | 51 +++++++++++++++++++++++++++++++-------------
> >  iptables/nft.h       |  9 +++++++-
> >  2 files changed, 44 insertions(+), 16 deletions(-)
> > 
> > diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
> > index 5444419a5cc3b..22a87e94efd76 100644
> > --- a/iptables/nft-cache.c
> > +++ b/iptables/nft-cache.c
> > @@ -224,30 +224,49 @@ static int fetch_rule_cache(struct nft_handle *h)
> >  	return 0;
> >  }
> >  
> > -static void __nft_build_cache(struct nft_handle *h)
> > +static void __nft_build_cache(struct nft_handle *h, enum nft_cache_level level)
> >  {
> >  	uint32_t genid_start, genid_stop;
> >  
> > +	if (level <= h->cache_level)
> > +		return;
> >  retry:
> >  	mnl_genid_get(h, &genid_start);
> > -	fetch_table_cache(h);
> > -	fetch_chain_cache(h);
> > -	fetch_rule_cache(h);
> > -	h->have_cache = true;
> > -	mnl_genid_get(h, &genid_stop);
> >  
> > +	switch (h->cache_level) {
> > +	case NFT_CL_NONE:
> > +		fetch_table_cache(h);
> > +		if (level == NFT_CL_TABLES)
> > +			break;
> > +		/* fall through */
> > +	case NFT_CL_TABLES:
> 
> If the existing level is TABLES and use wants chains, then you have to
> invalidate the existing table cache, then fetch the tables and chains
> to make sure cache is consistent. I mean, extending an existing cache
> might lead to inconsistencies.
> 
> Am I missing anything?

If I'm correct, I wonder if we should go for splitting the parsing
from the evaluation phase here. Probably generate the rule by pointing
to the table and chain as string, then evaluate the ruleset update
batch to obtain the cache level in one go. This is the approach that
I had in mind with nftables, so you might avoid dumping the ruleset
over and over in an environment where dynamic updates are frequent.

The idea is to avoid fetching a cache, then canceling it by the rule
coming afterwards just because the cache is incomplete. So the cache
that is required is calculated once, then you go to the kernel and
fetch it (making sure generation number tells you that your cache is
consistent).

Makes sense to you?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [iptables PATCH v3 04/11] nft-cache: Introduce cache levels
  2019-10-09 10:29     ` Pablo Neira Ayuso
@ 2019-10-10 22:09       ` Phil Sutter
  2019-10-11  9:28         ` Pablo Neira Ayuso
  2019-10-11 10:20         ` Pablo Neira Ayuso
  0 siblings, 2 replies; 22+ messages in thread
From: Phil Sutter @ 2019-10-10 22:09 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Hi Pablo,

On Wed, Oct 09, 2019 at 12:29:01PM +0200, Pablo Neira Ayuso wrote:
> On Wed, Oct 09, 2019 at 11:37:23AM +0200, Pablo Neira Ayuso wrote:
> > Hi Phil,
> > 
> > On Tue, Oct 08, 2019 at 06:14:40PM +0200, Phil Sutter wrote:
> > > Replace the simple have_cache boolean by a cache level indicator
> > > defining how complete the cache is. Since have_cache indicated full
> > > cache (including rules), make code depending on it check for cache level
> > > NFT_CL_RULES.
> > > 
> > > Core cache fetching routine __nft_build_cache() accepts a new level via
> > > parameter and raises cache completeness to that level.
> > > 
> > > Signed-off-by: Phil Sutter <phil@nwl.cc>
> > > ---
> > >  iptables/nft-cache.c | 51 +++++++++++++++++++++++++++++++-------------
> > >  iptables/nft.h       |  9 +++++++-
> > >  2 files changed, 44 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
> > > index 5444419a5cc3b..22a87e94efd76 100644
> > > --- a/iptables/nft-cache.c
> > > +++ b/iptables/nft-cache.c
> > > @@ -224,30 +224,49 @@ static int fetch_rule_cache(struct nft_handle *h)
> > >  	return 0;
> > >  }
> > >  
> > > -static void __nft_build_cache(struct nft_handle *h)
> > > +static void __nft_build_cache(struct nft_handle *h, enum nft_cache_level level)
> > >  {
> > >  	uint32_t genid_start, genid_stop;
> > >  
> > > +	if (level <= h->cache_level)
> > > +		return;
> > >  retry:
> > >  	mnl_genid_get(h, &genid_start);
> > > -	fetch_table_cache(h);
> > > -	fetch_chain_cache(h);
> > > -	fetch_rule_cache(h);
> > > -	h->have_cache = true;
> > > -	mnl_genid_get(h, &genid_stop);
> > >  
> > > +	switch (h->cache_level) {
> > > +	case NFT_CL_NONE:
> > > +		fetch_table_cache(h);
> > > +		if (level == NFT_CL_TABLES)
> > > +			break;
> > > +		/* fall through */
> > > +	case NFT_CL_TABLES:
> > 
> > If the existing level is TABLES and use wants chains, then you have to
> > invalidate the existing table cache, then fetch the tables and chains
> > to make sure cache is consistent. I mean, extending an existing cache
> > might lead to inconsistencies.
> > 
> > Am I missing anything?

Hmm, this is a valid point indeed. At least one can't depend on stored
genid to match the local cache's state which defeats its purpose.

> If I'm correct, I wonder if we should go for splitting the parsing
> from the evaluation phase here. Probably generate the rule by pointing
> to the table and chain as string, then evaluate the ruleset update
> batch to obtain the cache level in one go. This is the approach that
> I had in mind with nftables, so you might avoid dumping the ruleset
> over and over in an environment where dynamic updates are frequent.
> 
> The idea is to avoid fetching a cache, then canceling it by the rule
> coming afterwards just because the cache is incomplete. So the cache
> that is required is calculated once, then you go to the kernel and
> fetch it (making sure generation number tells you that your cache is
> consistent).
> 
> Makes sense to you?

Well, I understand your approach and it's a proper way to deal with the
situation, but I fear this means quite some effort. I imagine we either
extend the xtables-restore table delete logic to add and disable more
dependency commands based on kernel ruleset contents or add these
dependency commands when iterating over the command set and populating
the cache.

The thing is, we do this mostly just for xtables-restore --noflush
logic, which in turn is probably not used often but popular among "power
users" trying to speed up the bunch of iptables commands they have to
enter at once.

Maybe we could go with a simpler solution for now, which is to check
kernel genid again and drop the local cache if it differs from what's
stored. If it doesn't, the current cache is still up to date and we may
just fetch what's missing. Or does that leave room for a race condition?

Thanks, Phil

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [iptables PATCH v3 04/11] nft-cache: Introduce cache levels
  2019-10-10 22:09       ` Phil Sutter
@ 2019-10-11  9:28         ` Pablo Neira Ayuso
  2019-10-11 11:24           ` Phil Sutter
  2019-10-11 10:20         ` Pablo Neira Ayuso
  1 sibling, 1 reply; 22+ messages in thread
From: Pablo Neira Ayuso @ 2019-10-11  9:28 UTC (permalink / raw)
  To: Phil Sutter, netfilter-devel

On Fri, Oct 11, 2019 at 12:09:11AM +0200, Phil Sutter wrote:
> Hi Pablo,
> 
> On Wed, Oct 09, 2019 at 12:29:01PM +0200, Pablo Neira Ayuso wrote:
> > On Wed, Oct 09, 2019 at 11:37:23AM +0200, Pablo Neira Ayuso wrote:
> > > Hi Phil,
> > > 
> > > On Tue, Oct 08, 2019 at 06:14:40PM +0200, Phil Sutter wrote:
> > > > Replace the simple have_cache boolean by a cache level indicator
> > > > defining how complete the cache is. Since have_cache indicated full
> > > > cache (including rules), make code depending on it check for cache level
> > > > NFT_CL_RULES.
> > > > 
> > > > Core cache fetching routine __nft_build_cache() accepts a new level via
> > > > parameter and raises cache completeness to that level.
> > > > 
> > > > Signed-off-by: Phil Sutter <phil@nwl.cc>
> > > > ---
> > > >  iptables/nft-cache.c | 51 +++++++++++++++++++++++++++++++-------------
> > > >  iptables/nft.h       |  9 +++++++-
> > > >  2 files changed, 44 insertions(+), 16 deletions(-)
> > > > 
> > > > diff --git a/iptables/nft-cache.c b/iptables/nft-cache.c
> > > > index 5444419a5cc3b..22a87e94efd76 100644
> > > > --- a/iptables/nft-cache.c
> > > > +++ b/iptables/nft-cache.c
> > > > @@ -224,30 +224,49 @@ static int fetch_rule_cache(struct nft_handle *h)
> > > >  	return 0;
> > > >  }
> > > >  
> > > > -static void __nft_build_cache(struct nft_handle *h)
> > > > +static void __nft_build_cache(struct nft_handle *h, enum nft_cache_level level)
> > > >  {
> > > >  	uint32_t genid_start, genid_stop;
> > > >  
> > > > +	if (level <= h->cache_level)
> > > > +		return;
> > > >  retry:
> > > >  	mnl_genid_get(h, &genid_start);
> > > > -	fetch_table_cache(h);
> > > > -	fetch_chain_cache(h);
> > > > -	fetch_rule_cache(h);
> > > > -	h->have_cache = true;
> > > > -	mnl_genid_get(h, &genid_stop);
> > > >  
> > > > +	switch (h->cache_level) {
> > > > +	case NFT_CL_NONE:
> > > > +		fetch_table_cache(h);
> > > > +		if (level == NFT_CL_TABLES)
> > > > +			break;
> > > > +		/* fall through */
> > > > +	case NFT_CL_TABLES:
> > > 
> > > If the existing level is TABLES and use wants chains, then you have to
> > > invalidate the existing table cache, then fetch the tables and chains
> > > to make sure cache is consistent. I mean, extending an existing cache
> > > might lead to inconsistencies.
> > > 
> > > Am I missing anything?
> 
> Hmm, this is a valid point indeed. At least one can't depend on stored
> genid to match the local cache's state which defeats its purpose.

Exactly.

> > If I'm correct, I wonder if we should go for splitting the parsing
> > from the evaluation phase here. Probably generate the rule by pointing
> > to the table and chain as string, then evaluate the ruleset update
> > batch to obtain the cache level in one go. This is the approach that
> > I had in mind with nftables, so you might avoid dumping the ruleset
> > over and over in an environment where dynamic updates are frequent.
> > 
> > The idea is to avoid fetching a cache, then canceling it by the rule
> > coming afterwards just because the cache is incomplete. So the cache
> > that is required is calculated once, then you go to the kernel and
> > fetch it (making sure generation number tells you that your cache is
> > consistent).
> > 
> > Makes sense to you?
> 
> Well, I understand your approach and it's a proper way to deal with the
> situation, but I fear this means quite some effort. I imagine we either
> extend the xtables-restore table delete logic to add and disable more
> dependency commands based on kernel ruleset contents or add these
> dependency commands when iterating over the command set and populating
> the cache.

It's always more work to make a bit of architectural changes, yes.

> The thing is, we do this mostly just for xtables-restore --noflush
> logic, which in turn is probably not used often but popular among "power
> users" trying to speed up the bunch of iptables commands they have to
> enter at once.

Yes, this is the kind of user that deals with frequent dynamic
updates.

> Maybe we could go with a simpler solution for now, which is to check
> kernel genid again and drop the local cache if it differs from what's
> stored. If it doesn't, the current cache is still up to date and we may
> just fetch what's missing. Or does that leave room for a race condition?

You could also just parse the ruleset twice in userspace, once to
calculate the cache you need and another to actually create the
transaction batch and push it into the kernel. That's a bit poor man
approach, but it might work. You would need to invoke
xtables_restore_parse() twice.

This more simpler approach probably requires less changes to the
existing codebase, that can be "reverted" later on if someone needs to
speed up this by removing this parsing happening twice.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [iptables PATCH v3 04/11] nft-cache: Introduce cache levels
  2019-10-10 22:09       ` Phil Sutter
  2019-10-11  9:28         ` Pablo Neira Ayuso
@ 2019-10-11 10:20         ` Pablo Neira Ayuso
  1 sibling, 0 replies; 22+ messages in thread
From: Pablo Neira Ayuso @ 2019-10-11 10:20 UTC (permalink / raw)
  To: Phil Sutter, netfilter-devel

On Fri, Oct 11, 2019 at 12:09:11AM +0200, Phil Sutter wrote:
[...]
> Maybe we could go with a simpler solution for now, which is to check
> kernel genid again and drop the local cache if it differs from what's
> stored. If it doesn't, the current cache is still up to date and we may
> just fetch what's missing. Or does that leave room for a race condition?

My concern with this approach is that, in the dynamic ruleset update
scenarios, assuming very frequent updates, you might lose race when
building the cache in stages. Hence, forcing you to restart from
scratch in the middle of the transaction handling.

I prefer to calculate the cache that is needed in one go by analyzing
the batch, it's simpler. Note that we might lose race still since
kernel might tell us we're working on an obsolete generation number ID
cache, forcing us to restart.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [iptables PATCH v3 04/11] nft-cache: Introduce cache levels
  2019-10-11  9:28         ` Pablo Neira Ayuso
@ 2019-10-11 11:24           ` Phil Sutter
  2019-10-14 10:00             ` Pablo Neira Ayuso
  0 siblings, 1 reply; 22+ messages in thread
From: Phil Sutter @ 2019-10-11 11:24 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel

Hi,

On Fri, Oct 11, 2019 at 11:28:23AM +0200, Pablo Neira Ayuso wrote:
[...]
> You could also just parse the ruleset twice in userspace, once to
> calculate the cache you need and another to actually create the
> transaction batch and push it into the kernel. That's a bit poor man
> approach, but it might work. You would need to invoke
> xtables_restore_parse() twice.

The problem with parsing twice is having to cache input which may be
huge for xtables-restore.

On Fri, Oct 11, 2019 at 12:20:52PM +0200, Pablo Neira Ayuso wrote:
> On Fri, Oct 11, 2019 at 12:09:11AM +0200, Phil Sutter wrote:
> [...]
> > Maybe we could go with a simpler solution for now, which is to check
> > kernel genid again and drop the local cache if it differs from what's
> > stored. If it doesn't, the current cache is still up to date and we may
> > just fetch what's missing. Or does that leave room for a race condition?
> 
> My concern with this approach is that, in the dynamic ruleset update
> scenarios, assuming very frequent updates, you might lose race when
> building the cache in stages. Hence, forcing you to restart from
> scratch in the middle of the transaction handling.

In a very busy environment there's always trouble, simply because we
can't atomically fetch ruleset from kernel and adjust and submit our
batch. Dealing with that means we're back at xtables-lock.

> I prefer to calculate the cache that is needed in one go by analyzing
> the batch, it's simpler. Note that we might lose race still since
> kernel might tell us we're working on an obsolete generation number ID
> cache, forcing us to restart.

My idea for conditional cache reset is based on the assumption that
conflicts are rare and we want to optimize for non-conflict case. So
core logic would be:

1) fetch kernel genid into genid_start
2) if cache level > NFT_CL_NONE and cache genid != genid_start:
   2a) drop local caches
   2b) set cache level to NFT_CL_NONE
3) call cache fetchers based on cache level and desired level
4) fetch kernel genid into genid_end
5) if genid_start != genid_end goto 1

So this is basically the old algorithm but with (2) added. What do you
think?

Thanks, Phil

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [iptables PATCH v3 04/11] nft-cache: Introduce cache levels
  2019-10-11 11:24           ` Phil Sutter
@ 2019-10-14 10:00             ` Pablo Neira Ayuso
  0 siblings, 0 replies; 22+ messages in thread
From: Pablo Neira Ayuso @ 2019-10-14 10:00 UTC (permalink / raw)
  To: Phil Sutter, netfilter-devel

On Fri, Oct 11, 2019 at 01:24:52PM +0200, Phil Sutter wrote:
> Hi,
> 
> On Fri, Oct 11, 2019 at 11:28:23AM +0200, Pablo Neira Ayuso wrote:
> [...]
> > You could also just parse the ruleset twice in userspace, once to
> > calculate the cache you need and another to actually create the
> > transaction batch and push it into the kernel. That's a bit poor man
> > approach, but it might work. You would need to invoke
> > xtables_restore_parse() twice.
> 
> The problem with parsing twice is having to cache input which may be
> huge for xtables-restore.
> 
> On Fri, Oct 11, 2019 at 12:20:52PM +0200, Pablo Neira Ayuso wrote:
> > On Fri, Oct 11, 2019 at 12:09:11AM +0200, Phil Sutter wrote:
> > [...]
> > > Maybe we could go with a simpler solution for now, which is to check
> > > kernel genid again and drop the local cache if it differs from what's
> > > stored. If it doesn't, the current cache is still up to date and we may
> > > just fetch what's missing. Or does that leave room for a race condition?
> > 
> > My concern with this approach is that, in the dynamic ruleset update
> > scenarios, assuming very frequent updates, you might lose race when
> > building the cache in stages. Hence, forcing you to restart from
> > scratch in the middle of the transaction handling.
> 
> In a very busy environment there's always trouble, simply because we
> can't atomically fetch ruleset from kernel and adjust and submit our
> batch. Dealing with that means we're back at xtables-lock.
> 
> > I prefer to calculate the cache that is needed in one go by analyzing
> > the batch, it's simpler. Note that we might lose race still since
> > kernel might tell us we're working on an obsolete generation number ID
> > cache, forcing us to restart.
> 
> My idea for conditional cache reset is based on the assumption that
> conflicts are rare and we want to optimize for non-conflict case. So
> core logic would be:
> 
> 1) fetch kernel genid into genid_start
> 2) if cache level > NFT_CL_NONE and cache genid != genid_start:
>    2a) drop local caches
>    2b) set cache level to NFT_CL_NONE
> 3) call cache fetchers based on cache level and desired level
> 4) fetch kernel genid into genid_end
> 5) if genid_start != genid_end goto 1
> 
> So this is basically the old algorithm but with (2) added. What do you
> think?

Please, make testcases to validate that races don't happen. Debugging
cache inconsistencies is not easy, that's why I like the idea of
calculating the cache first, then build it in one go. I'm fine with
starting with a more simple approach in the short term. Note that
reports from users on these cache inconsistency problems are usually
sparse, which is usually a bit frustrating. I understand a larger
rework might to accomodate the more simple approach will take more
time.

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, back to index

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-08 16:14 [iptables PATCH v3 00/11] Improve iptables-nft performance with large rulesets Phil Sutter
2019-10-08 16:14 ` [iptables PATCH v3 01/11] nft: Pass nft_handle to flush_cache() Phil Sutter
2019-10-09  9:30   ` Pablo Neira Ayuso
2019-10-08 16:14 ` [iptables PATCH v3 02/11] nft: Avoid nested cache fetching Phil Sutter
2019-10-09  9:30   ` Pablo Neira Ayuso
2019-10-08 16:14 ` [iptables PATCH v3 03/11] nft: Extract cache routines into nft-cache.c Phil Sutter
2019-10-09  9:32   ` Pablo Neira Ayuso
2019-10-08 16:14 ` [iptables PATCH v3 04/11] nft-cache: Introduce cache levels Phil Sutter
2019-10-09  9:37   ` Pablo Neira Ayuso
2019-10-09 10:29     ` Pablo Neira Ayuso
2019-10-10 22:09       ` Phil Sutter
2019-10-11  9:28         ` Pablo Neira Ayuso
2019-10-11 11:24           ` Phil Sutter
2019-10-14 10:00             ` Pablo Neira Ayuso
2019-10-11 10:20         ` Pablo Neira Ayuso
2019-10-08 16:14 ` [iptables PATCH v3 05/11] nft-cache: Fetch only chains in nft_chain_list_get() Phil Sutter
2019-10-08 16:14 ` [iptables PATCH v3 06/11] nft-cache: Cover for multiple fetcher invocation Phil Sutter
2019-10-08 16:14 ` [iptables PATCH v3 07/11] nft-cache: Support partial cache per table Phil Sutter
2019-10-08 16:14 ` [iptables PATCH v3 08/11] nft-cache: Support partial rule cache per chain Phil Sutter
2019-10-08 16:14 ` [iptables PATCH v3 09/11] nft: Reduce cache overhead of nft_chain_builtin_init() Phil Sutter
2019-10-08 16:14 ` [iptables PATCH v3 10/11] nft: Support nft_is_table_compatible() per chain Phil Sutter
2019-10-08 16:14 ` [iptables PATCH v3 11/11] nft: Optimize flushing all chains of a table Phil Sutter

Netfilter-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netfilter-devel/0 netfilter-devel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netfilter-devel netfilter-devel/ https://lore.kernel.org/netfilter-devel \
		netfilter-devel@vger.kernel.org
	public-inbox-index netfilter-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netfilter-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git