All of lore.kernel.org
 help / color / mirror / Atom feed
* [nf PATCH 0/5] Introduce locking for reset requests
@ 2023-09-23  1:38 Phil Sutter
  2023-09-23  1:38 ` [nf PATCH 1/5] netfilter: nf_tables: Don't allocate nft_rule_dump_ctx Phil Sutter
                   ` (4 more replies)
  0 siblings, 5 replies; 25+ messages in thread
From: Phil Sutter @ 2023-09-23  1:38 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel, Florian Westphal

Introduce a spin lock to serialize expression reset operations as
concurrent resetting of the same expression may lead to unexpected
results.

Original approach coined by Pablo and Florian, a remaining puzzle to
solve was the claim to avoid conditional spinlock calls. To achieve
this, follow Florian's suggested way of introducing dedicated nfnetlink
callbacks for *_RESET requests.

Avoiding the check for whether reset operation being requested in
callbacks is a close call, but the info must be carried into the dump
callback as well. While doing this, refactor the touched dump start
routines to embed the context into struct netlink_callback::ctx instead
of allocating it.

Phil Sutter (5):
  netfilter: nf_tables: Don't allocate nft_rule_dump_ctx
  netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  netfilter: nf_tables: Introduce struct nft_obj_dump_ctx
  netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests
  netfilter: nf_tables: Add locking for NFT_MSG_GETSETELEM_RESET
    requests

 include/net/netfilter/nf_tables.h |   1 +
 net/netfilter/nf_tables_api.c     | 528 ++++++++++++++++++++----------
 2 files changed, 353 insertions(+), 176 deletions(-)

-- 
2.41.0


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [nf PATCH 1/5] netfilter: nf_tables: Don't allocate nft_rule_dump_ctx
  2023-09-23  1:38 [nf PATCH 0/5] Introduce locking for reset requests Phil Sutter
@ 2023-09-23  1:38 ` Phil Sutter
  2023-09-23  1:38 ` [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests Phil Sutter
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 25+ messages in thread
From: Phil Sutter @ 2023-09-23  1:38 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel, Florian Westphal

Eliminate the direct use of netlink_callback::args when dumping rules by
casting nft_rule_dump_ctx over netlink_callback::ctx as suggested in
the struct's comment.

The value for 's_idx' has to be stored inside nft_rule_dump_ctx now, so
move the local 'idx' variable into it, too. Finally, make it hold the
'reset' boolean as well.

Note how this patch removes the zeroing of netlink_callback::args[1-5] -
none of the rule dump callbacks seem to make use of them.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 net/netfilter/nf_tables_api.c | 81 ++++++++++++++---------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4356189360fb8..511508407867d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3441,20 +3441,21 @@ static void audit_log_rule_reset(const struct nft_table *table,
 }
 
 struct nft_rule_dump_ctx {
+	unsigned int s_idx;
 	char *table;
 	char *chain;
+	bool reset;
 };
 
 static int __nf_tables_dump_rules(struct sk_buff *skb,
 				  unsigned int *idx,
 				  struct netlink_callback *cb,
 				  const struct nft_table *table,
-				  const struct nft_chain *chain,
-				  bool reset)
+				  const struct nft_chain *chain)
 {
+	struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
 	struct net *net = sock_net(skb->sk);
 	const struct nft_rule *rule, *prule;
-	unsigned int s_idx = cb->args[0];
 	unsigned int entries = 0;
 	int ret = 0;
 	u64 handle;
@@ -3463,12 +3464,9 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
 	list_for_each_entry_rcu(rule, &chain->rules, list) {
 		if (!nft_is_active(net, rule))
 			goto cont_skip;
-		if (*idx < s_idx)
+		if (*idx < ctx->s_idx)
 			goto cont;
-		if (*idx > s_idx) {
-			memset(&cb->args[1], 0,
-					sizeof(cb->args) - sizeof(cb->args[0]));
-		}
+
 		if (prule)
 			handle = prule->handle;
 		else
@@ -3479,7 +3477,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
 					NFT_MSG_NEWRULE,
 					NLM_F_MULTI | NLM_F_APPEND,
 					table->family,
-					table, chain, rule, handle, reset) < 0) {
+					table, chain, rule, handle, ctx->reset) < 0) {
 			ret = 1;
 			break;
 		}
@@ -3491,7 +3489,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
 		(*idx)++;
 	}
 
-	if (reset && entries)
+	if (ctx->reset && entries)
 		audit_log_rule_reset(table, cb->seq, entries);
 
 	return ret;
@@ -3501,17 +3499,13 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 				struct netlink_callback *cb)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
-	const struct nft_rule_dump_ctx *ctx = cb->data;
+	struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
 	struct nft_table *table;
 	const struct nft_chain *chain;
 	unsigned int idx = 0;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 	struct nftables_pernet *nft_net;
-	bool reset = false;
-
-	if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET)
-		reset = true;
 
 	rcu_read_lock();
 	nft_net = nft_pernet(net);
@@ -3521,10 +3515,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 		if (family != NFPROTO_UNSPEC && family != table->family)
 			continue;
 
-		if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
+		if (ctx->table && strcmp(ctx->table, table->name) != 0)
 			continue;
 
-		if (ctx && ctx->table && ctx->chain) {
+		if (ctx->table && ctx->chain) {
 			struct rhlist_head *list, *tmp;
 
 			list = rhltable_lookup(&table->chains_ht, ctx->chain,
@@ -3536,7 +3530,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 				if (!nft_is_active(net, chain))
 					continue;
 				__nf_tables_dump_rules(skb, &idx,
-						       cb, table, chain, reset);
+						       cb, table, chain);
 				break;
 			}
 			goto done;
@@ -3544,62 +3538,51 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 
 		list_for_each_entry_rcu(chain, &table->chains, list) {
 			if (__nf_tables_dump_rules(skb, &idx,
-						   cb, table, chain, reset))
+						   cb, table, chain))
 				goto done;
 		}
 
-		if (ctx && ctx->table)
+		if (ctx->table)
 			break;
 	}
 done:
 	rcu_read_unlock();
 
-	cb->args[0] = idx;
+	ctx->s_idx = idx;
 	return skb->len;
 }
 
 static int nf_tables_dump_rules_start(struct netlink_callback *cb)
 {
+	struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
 	const struct nlattr * const *nla = cb->data;
-	struct nft_rule_dump_ctx *ctx = NULL;
 
-	if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
-		ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
-		if (!ctx)
-			return -ENOMEM;
+	BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
 
-		if (nla[NFTA_RULE_TABLE]) {
-			ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
-							GFP_ATOMIC);
-			if (!ctx->table) {
-				kfree(ctx);
-				return -ENOMEM;
-			}
-		}
-		if (nla[NFTA_RULE_CHAIN]) {
-			ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
-						GFP_ATOMIC);
-			if (!ctx->chain) {
-				kfree(ctx->table);
-				kfree(ctx);
-				return -ENOMEM;
-			}
+	if (nla[NFTA_RULE_TABLE]) {
+		ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC);
+		if (!ctx->table)
+			return -ENOMEM;
+	}
+	if (nla[NFTA_RULE_CHAIN]) {
+		ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], GFP_ATOMIC);
+		if (!ctx->chain) {
+			kfree(ctx->table);
+			return -ENOMEM;
 		}
 	}
+	if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET)
+		ctx->reset = true;
 
-	cb->data = ctx;
 	return 0;
 }
 
 static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 {
-	struct nft_rule_dump_ctx *ctx = cb->data;
+	struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
 
-	if (ctx) {
-		kfree(ctx->table);
-		kfree(ctx->chain);
-		kfree(ctx);
-	}
+	kfree(ctx->table);
+	kfree(ctx->chain);
 	return 0;
 }
 
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-23  1:38 [nf PATCH 0/5] Introduce locking for reset requests Phil Sutter
  2023-09-23  1:38 ` [nf PATCH 1/5] netfilter: nf_tables: Don't allocate nft_rule_dump_ctx Phil Sutter
@ 2023-09-23  1:38 ` Phil Sutter
  2023-09-23 11:04   ` Florian Westphal
                     ` (2 more replies)
  2023-09-23  1:38 ` [nf PATCH 3/5] netfilter: nf_tables: Introduce struct nft_obj_dump_ctx Phil Sutter
                   ` (2 subsequent siblings)
  4 siblings, 3 replies; 25+ messages in thread
From: Phil Sutter @ 2023-09-23  1:38 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel, Florian Westphal

Rule reset is not protected by the commit mutex, so multiple reset
requests could run for the same data. Expressions' dump'n'reset routines
are not concurrency-safe, though. With nft_counter for instance, if
nft_counter_do_dump() runs twice at the same time, the old value may be
subtracted twice and thus the value underruns.

Solve this via introduction of a reset spinlock which ensures exclusive
access. To avoid conditional lock/unlock calls, introduce a dedicated
callback for nfnetlink, and for the asynchronous netlink dump (which by
itself must run unlocked).

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 include/net/netfilter/nf_tables.h |   1 +
 net/netfilter/nf_tables_api.c     | 138 ++++++++++++++++++++++--------
 2 files changed, 104 insertions(+), 35 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 7c816359d5a98..bd6849f4c46e3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1732,6 +1732,7 @@ struct nftables_pernet {
 	struct list_head	module_list;
 	struct list_head	notify_list;
 	struct mutex		commit_mutex;
+	spinlock_t		reset_lock;
 	u64			table_handle;
 	unsigned int		base_seq;
 	unsigned int		gc_seq;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 511508407867d..4bccd15a67105 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3552,6 +3552,19 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 	return skb->len;
 }
 
+static int nf_tables_dumpreset_rules(struct sk_buff *skb,
+				     struct netlink_callback *cb)
+{
+	struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
+	int ret;
+
+	spin_lock(&nft_net->reset_lock);
+	ret = nf_tables_dump_rules(skb, cb);
+	spin_unlock(&nft_net->reset_lock);
+
+	return ret;
+}
+
 static int nf_tables_dump_rules_start(struct netlink_callback *cb)
 {
 	struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
@@ -3571,12 +3584,18 @@ static int nf_tables_dump_rules_start(struct netlink_callback *cb)
 			return -ENOMEM;
 		}
 	}
-	if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET)
-		ctx->reset = true;
-
 	return 0;
 }
 
+static int nf_tables_dumpreset_rules_start(struct netlink_callback *cb)
+{
+	struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
+
+	ctx->reset = true;
+
+	return nf_tables_dump_rules_start(cb);
+}
+
 static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 {
 	struct nft_rule_dump_ctx *ctx = (void *)cb->ctx;
@@ -3587,8 +3606,9 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 }
 
 /* called with rcu_read_lock held */
-static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
-			     const struct nlattr * const nla[])
+static struct sk_buff *
+nf_tables_getrule_single(u32 portid, const struct nfnl_info *info,
+			 const struct nlattr * const nla[], bool reset)
 {
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_cur(info->net);
@@ -3598,60 +3618,107 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
 	struct net *net = info->net;
 	struct nft_table *table;
 	struct sk_buff *skb2;
-	bool reset = false;
 	int err;
 
-	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
-		struct netlink_dump_control c = {
-			.start= nf_tables_dump_rules_start,
-			.dump = nf_tables_dump_rules,
-			.done = nf_tables_dump_rules_done,
-			.module = THIS_MODULE,
-			.data = (void *)nla,
-		};
-
-		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
-	}
-
 	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
 	if (IS_ERR(table)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
-		return PTR_ERR(table);
+		return ERR_CAST(table);
 	}
 
 	chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
-		return PTR_ERR(chain);
+		return ERR_CAST(chain);
 	}
 
 	rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
 	if (IS_ERR(rule)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
-		return PTR_ERR(rule);
+		return ERR_CAST(rule);
 	}
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
 	if (!skb2)
-		return -ENOMEM;
-
-	if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET)
-		reset = true;
+		return ERR_PTR(-ENOMEM);
 
-	err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
+	err = nf_tables_fill_rule_info(skb2, net, portid,
 				       info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
 				       family, table, chain, rule, 0, reset);
-	if (err < 0)
-		goto err_fill_rule_info;
+	if (err < 0) {
+		kfree_skb(skb2);
+		return ERR_PTR(err);
+	}
 
-	if (reset)
-		audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1);
+	return skb2;
+}
 
-	return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const nla[])
+{
+	u32 portid = NETLINK_CB(skb).portid;
+	struct sk_buff *skb2;
 
-err_fill_rule_info:
-	kfree_skb(skb2);
-	return err;
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start= nf_tables_dump_rules_start,
+			.dump = nf_tables_dump_rules,
+			.done = nf_tables_dump_rules_done,
+			.module = THIS_MODULE,
+			.data = (void *)nla,
+		};
+
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	skb2 = nf_tables_getrule_single(portid, info, nla, false);
+	if (IS_ERR(skb2))
+		return PTR_ERR(skb2);
+
+	return nfnetlink_unicast(skb2, info->net, portid);
+}
+
+static int nf_tables_getrule_reset(struct sk_buff *skb,
+				   const struct nfnl_info *info,
+				   const struct nlattr * const nla[])
+{
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	u32 portid = NETLINK_CB(skb).portid;
+	char *tablename, *buf;
+	struct sk_buff *skb2;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start= nf_tables_dumpreset_rules_start,
+			.dump = nf_tables_dumpreset_rules,
+			.done = nf_tables_dump_rules_done,
+			.module = THIS_MODULE,
+			.data = (void *)nla,
+		};
+
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	if (!nla[NFTA_RULE_TABLE])
+		return -EINVAL;
+
+	tablename = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC);
+	if (!tablename)
+		return -ENOMEM;
+
+	spin_lock(&nft_net->reset_lock);
+	skb2 = nf_tables_getrule_single(portid, info, nla, true);
+	spin_unlock(&nft_net->reset_lock);
+	if (IS_ERR(skb2))
+		return PTR_ERR(skb2);
+
+	buf = kasprintf(GFP_ATOMIC, "%s:%u", tablename, nft_net->base_seq);
+	audit_log_nfcfg(buf, family, 1, AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
+	kfree(buf);
+	kfree(tablename);
+
+	return nfnetlink_unicast(skb2, info->net, portid);
 }
 
 void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
@@ -8950,7 +9017,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_rule_policy,
 	},
 	[NFT_MSG_GETRULE_RESET] = {
-		.call		= nf_tables_getrule,
+		.call		= nf_tables_getrule_reset,
 		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_RULE_MAX,
 		.policy		= nft_rule_policy,
@@ -11191,6 +11258,7 @@ static int __net_init nf_tables_init_net(struct net *net)
 	INIT_LIST_HEAD(&nft_net->module_list);
 	INIT_LIST_HEAD(&nft_net->notify_list);
 	mutex_init(&nft_net->commit_mutex);
+	spin_lock_init(&nft_net->reset_lock);
 	nft_net->base_seq = 1;
 	nft_net->gc_seq = 0;
 	nft_net->validate_state = NFT_VALIDATE_SKIP;
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [nf PATCH 3/5] netfilter: nf_tables: Introduce struct nft_obj_dump_ctx
  2023-09-23  1:38 [nf PATCH 0/5] Introduce locking for reset requests Phil Sutter
  2023-09-23  1:38 ` [nf PATCH 1/5] netfilter: nf_tables: Don't allocate nft_rule_dump_ctx Phil Sutter
  2023-09-23  1:38 ` [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests Phil Sutter
@ 2023-09-23  1:38 ` Phil Sutter
  2023-09-23  1:38 ` [nf PATCH 4/5] netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests Phil Sutter
  2023-09-23  1:38 ` [nf PATCH 5/5] netfilter: nf_tables: Add locking for NFT_MSG_GETSETELEM_RESET requests Phil Sutter
  4 siblings, 0 replies; 25+ messages in thread
From: Phil Sutter @ 2023-09-23  1:38 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel, Florian Westphal

Convert struct nft_obj_filter into a dump context data structure
equivalent to nft_rule_dump_ctx. Make it reside inside struct
netlink_callback::ctx scratch area so there's no need to allocate and
free it. Also carry the 'reset' boolean instead of determining this upon
each call to nf_tables_dump_obj() by inspecting the nlmsg_type.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 net/netfilter/nf_tables_api.c | 66 ++++++++++++++---------------------
 1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4bccd15a67105..f6d8edcf0b9d5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7665,25 +7665,23 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
 	return -1;
 }
 
-struct nft_obj_filter {
+struct nft_obj_dump_ctx {
+	unsigned int	s_idx;
 	char		*table;
 	u32		type;
+	bool		reset;
 };
 
 static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
-	const struct nft_table *table;
-	unsigned int idx = 0, s_idx = cb->args[0];
-	struct nft_obj_filter *filter = cb->data;
+	struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 	struct nftables_pernet *nft_net;
+	const struct nft_table *table;
 	struct nft_object *obj;
-	bool reset = false;
-
-	if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
-		reset = true;
+	unsigned int idx = 0;
 
 	rcu_read_lock();
 	nft_net = nft_pernet(net);
@@ -7696,19 +7694,14 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 		list_for_each_entry_rcu(obj, &table->objects, list) {
 			if (!nft_is_active(net, obj))
 				goto cont;
-			if (idx < s_idx)
+			if (idx < ctx->s_idx)
 				goto cont;
-			if (idx > s_idx)
-				memset(&cb->args[1], 0,
-				       sizeof(cb->args) - sizeof(cb->args[0]));
-			if (filter && filter->table &&
-			    strcmp(filter->table, table->name))
+			if (ctx->table && strcmp(ctx->table, table->name))
 				goto cont;
-			if (filter &&
-			    filter->type != NFT_OBJECT_UNSPEC &&
-			    obj->ops->type->type != filter->type)
+			if (ctx->type != NFT_OBJECT_UNSPEC &&
+			    obj->ops->type->type != ctx->type)
 				goto cont;
-			if (reset) {
+			if (ctx->reset) {
 				char *buf = kasprintf(GFP_ATOMIC,
 						      "%s:%u",
 						      table->name,
@@ -7727,7 +7720,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 						    NFT_MSG_NEWOBJ,
 						    NLM_F_MULTI | NLM_F_APPEND,
 						    table->family, table,
-						    obj, reset) < 0)
+						    obj, ctx->reset) < 0)
 				goto done;
 
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -7738,44 +7731,37 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 done:
 	rcu_read_unlock();
 
-	cb->args[0] = idx;
+	ctx->s_idx = idx;
 	return skb->len;
 }
 
 static int nf_tables_dump_obj_start(struct netlink_callback *cb)
 {
+	struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
 	const struct nlattr * const *nla = cb->data;
-	struct nft_obj_filter *filter = NULL;
 
-	if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) {
-		filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
-		if (!filter)
+	BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+
+	if (nla[NFTA_OBJ_TABLE]) {
+		ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
+		if (!ctx->table)
 			return -ENOMEM;
+	}
 
-		if (nla[NFTA_OBJ_TABLE]) {
-			filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
-			if (!filter->table) {
-				kfree(filter);
-				return -ENOMEM;
-			}
-		}
+	if (nla[NFTA_OBJ_TYPE])
+		ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
 
-		if (nla[NFTA_OBJ_TYPE])
-			filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-	}
+	if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+		ctx->reset = true;
 
-	cb->data = filter;
 	return 0;
 }
 
 static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 {
-	struct nft_obj_filter *filter = cb->data;
+	struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
 
-	if (filter) {
-		kfree(filter->table);
-		kfree(filter);
-	}
+	kfree(ctx->table);
 
 	return 0;
 }
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [nf PATCH 4/5] netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests
  2023-09-23  1:38 [nf PATCH 0/5] Introduce locking for reset requests Phil Sutter
                   ` (2 preceding siblings ...)
  2023-09-23  1:38 ` [nf PATCH 3/5] netfilter: nf_tables: Introduce struct nft_obj_dump_ctx Phil Sutter
@ 2023-09-23  1:38 ` Phil Sutter
  2023-09-23  1:38 ` [nf PATCH 5/5] netfilter: nf_tables: Add locking for NFT_MSG_GETSETELEM_RESET requests Phil Sutter
  4 siblings, 0 replies; 25+ messages in thread
From: Phil Sutter @ 2023-09-23  1:38 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel, Florian Westphal

Analogous to the same change for NFT_MSG_GETRULE_RESET.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 net/netfilter/nf_tables_api.c | 147 +++++++++++++++++++++++++---------
 1 file changed, 109 insertions(+), 38 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f6d8edcf0b9d5..8812d5282ca8f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7735,6 +7735,19 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+static int nf_tables_dumpreset_obj(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
+	int ret;
+
+	spin_lock(&nft_net->reset_lock);
+	ret = nf_tables_dump_obj(skb, cb);
+	spin_unlock(&nft_net->reset_lock);
+
+	return ret;
+}
+
 static int nf_tables_dump_obj_start(struct netlink_callback *cb)
 {
 	struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
@@ -7751,12 +7764,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
 	if (nla[NFTA_OBJ_TYPE])
 		ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
 
-	if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
-		ctx->reset = true;
-
 	return 0;
 }
 
+static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb)
+{
+	struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+
+	ctx->reset = true;
+
+	return nf_tables_dump_obj_start(cb);
+}
+
 static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 {
 	struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
@@ -7767,8 +7786,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 }
 
 /* called with rcu_read_lock held */
-static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
-			    const struct nlattr * const nla[])
+static struct sk_buff *
+nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
+			const struct nlattr * const nla[], bool reset)
 {
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_cur(info->net);
@@ -7777,10 +7797,47 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
 	struct net *net = info->net;
 	struct nft_object *obj;
 	struct sk_buff *skb2;
-	bool reset = false;
 	u32 objtype;
 	int err;
 
+	if (!nla[NFTA_OBJ_NAME] ||
+	    !nla[NFTA_OBJ_TYPE])
+		return ERR_PTR(-EINVAL);
+
+	table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
+		return ERR_CAST(table);
+	}
+
+	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
+	obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
+	if (IS_ERR(obj)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
+		return ERR_CAST(obj);
+	}
+
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb2)
+		return ERR_PTR(-ENOMEM);
+
+	err = nf_tables_fill_obj_info(skb2, net, portid,
+				      info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+				      family, table, obj, reset);
+	if (err < 0) {
+		kfree_skb(skb2);
+		return ERR_PTR(err);
+	}
+
+	return skb2;
+}
+
+static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
+{
+	u32 portid = NETLINK_CB(skb).portid;
+	struct sk_buff *skb2;
+
 	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.start = nf_tables_dump_obj_start,
@@ -7793,6 +7850,41 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
 		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
 	}
 
+	skb2 = nf_tables_getobj_single(portid, info, nla, false);
+	if (IS_ERR(skb2))
+		return PTR_ERR(skb2);
+
+	return nfnetlink_unicast(skb2, info->net, portid);
+}
+
+static int nf_tables_getobj_reset(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const nla[])
+{
+	const struct nftables_pernet *nft_net = nft_pernet(info->net);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	u32 portid = NETLINK_CB(skb).portid;
+	const struct nft_table *table;
+	struct net *net = info->net;
+	struct nft_object *obj;
+	struct sk_buff *skb2;
+	u32 objtype;
+	char *buf;
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start = nf_tables_dumpreset_obj_start,
+			.dump = nf_tables_dumpreset_obj,
+			.done = nf_tables_dump_obj_done,
+			.module = THIS_MODULE,
+			.data = (void *)nla,
+		};
+
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
 	if (!nla[NFTA_OBJ_NAME] ||
 	    !nla[NFTA_OBJ_TYPE])
 		return -EINVAL;
@@ -7810,39 +7902,18 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
 		return PTR_ERR(obj);
 	}
 
-	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
-	if (!skb2)
-		return -ENOMEM;
-
-	if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
-		reset = true;
-
-	if (reset) {
-		const struct nftables_pernet *nft_net;
-		char *buf;
-
-		nft_net = nft_pernet(net);
-		buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
-
-		audit_log_nfcfg(buf,
-				family,
-				obj->handle,
-				AUDIT_NFT_OP_OBJ_RESET,
-				GFP_ATOMIC);
-		kfree(buf);
-	}
-
-	err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
-				      info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
-				      family, table, obj, reset);
-	if (err < 0)
-		goto err_fill_obj_info;
+	spin_lock(&nft_net->reset_lock);
+	skb2 = nf_tables_getobj_single(portid, info, nla, true);
+	spin_unlock(&nft_net->reset_lock);
+	if (IS_ERR(skb2))
+		return PTR_ERR(skb2);
 
-	return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+	buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
+	audit_log_nfcfg(buf, family, obj->handle,
+			AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
+	kfree(buf);
 
-err_fill_obj_info:
-	kfree_skb(skb2);
-	return err;
+	return nfnetlink_unicast(skb2, net, portid);
 }
 
 static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
@@ -9103,7 +9174,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_obj_policy,
 	},
 	[NFT_MSG_GETOBJ_RESET] = {
-		.call		= nf_tables_getobj,
+		.call		= nf_tables_getobj_reset,
 		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_OBJ_MAX,
 		.policy		= nft_obj_policy,
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [nf PATCH 5/5] netfilter: nf_tables: Add locking for NFT_MSG_GETSETELEM_RESET requests
  2023-09-23  1:38 [nf PATCH 0/5] Introduce locking for reset requests Phil Sutter
                   ` (3 preceding siblings ...)
  2023-09-23  1:38 ` [nf PATCH 4/5] netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests Phil Sutter
@ 2023-09-23  1:38 ` Phil Sutter
  2023-09-25 10:53   ` Pablo Neira Ayuso
  4 siblings, 1 reply; 25+ messages in thread
From: Phil Sutter @ 2023-09-23  1:38 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel, Florian Westphal

Introduce dedicated callbacks for nfnetlink and the asynchronous dump
handling which perform the necessary locking.

Extending struct nft_set_dump_ctx by a 'reset' field relieves
nf_tables_dump_set() from having to check the nfnl msg type upon each
call.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 net/netfilter/nf_tables_api.c | 106 ++++++++++++++++++++++++++++------
 1 file changed, 87 insertions(+), 19 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8812d5282ca8f..0e5d9bdba82b8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5723,6 +5723,7 @@ static void audit_log_nft_set_reset(const struct nft_table *table,
 struct nft_set_dump_ctx {
 	const struct nft_set	*set;
 	struct nft_ctx		ctx;
+	bool			reset;
 };
 
 static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
@@ -5762,7 +5763,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	bool set_found = false;
 	struct nlmsghdr *nlh;
 	struct nlattr *nest;
-	bool reset = false;
 	u32 portid, seq;
 	int event;
 
@@ -5810,12 +5810,9 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET)
-		reset = true;
-
 	args.cb			= cb;
 	args.skb		= skb;
-	args.reset		= reset;
+	args.reset		= dump_ctx->reset;
 	args.iter.genmask	= nft_genmask_cur(net);
 	args.iter.skip		= cb->args[0];
 	args.iter.count		= 0;
@@ -5825,14 +5822,10 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (!args.iter.err && args.iter.count == cb->args[0])
 		args.iter.err = nft_set_catchall_dump(net, skb, set,
-						      reset, cb->seq);
+						      dump_ctx->reset, cb->seq);
 	nla_nest_end(skb, nest);
 	nlmsg_end(skb, nlh);
 
-	if (reset && args.iter.count > args.iter.skip)
-		audit_log_nft_set_reset(table, cb->seq,
-					args.iter.count - args.iter.skip);
-
 	rcu_read_unlock();
 
 	if (args.iter.err && args.iter.err != -EMSGSIZE)
@@ -5848,6 +5841,24 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	return -ENOSPC;
 }
 
+static int nf_tables_dumpreset_set(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
+	struct nft_set_dump_ctx *dump_ctx = cb->data;
+	int ret, skip = cb->args[0];
+
+	spin_lock(&nft_net->reset_lock);
+	ret = nf_tables_dump_set(skb, cb);
+	spin_unlock(&nft_net->reset_lock);
+
+	if (cb->args[0] > skip)
+		audit_log_nft_set_reset(dump_ctx->ctx.table, cb->seq,
+					cb->args[0] - skip);
+
+	return ret;
+}
+
 static int nf_tables_dump_set_start(struct netlink_callback *cb)
 {
 	struct nft_set_dump_ctx *dump_ctx = cb->data;
@@ -6065,7 +6076,6 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
 	struct nft_set *set;
 	struct nlattr *attr;
 	struct nft_ctx ctx;
-	bool reset = false;
 
 	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
 				 genmask, 0);
@@ -6090,6 +6100,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
 		struct nft_set_dump_ctx dump_ctx = {
 			.set = set,
 			.ctx = ctx,
+			.reset = false,
 		};
 
 		c.data = &dump_ctx;
@@ -6099,21 +6110,78 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
 	if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
 		return -EINVAL;
 
-	if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET)
-		reset = true;
+	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+		err = nft_get_set_elem(&ctx, set, attr, false);
+		if (err < 0) {
+			NL_SET_BAD_ATTR(extack, attr);
+			break;
+		}
+		nelems++;
+	}
+
+	return err;
+}
+
+static int nf_tables_getsetelem_reset(struct sk_buff *skb,
+				      const struct nfnl_info *info,
+				      const struct nlattr * const nla[])
+{
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	u8 family = info->nfmsg->nfgen_family;
+	int rem, err = 0, nelems = 0;
+	struct net *net = info->net;
+	struct nft_table *table;
+	struct nft_set *set;
+	struct nlattr *attr;
+	struct nft_ctx ctx;
 
+	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+				 genmask, 0);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+	if (IS_ERR(set))
+		return PTR_ERR(set);
+
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.start = nf_tables_dump_set_start,
+			.dump = nf_tables_dumpreset_set,
+			.done = nf_tables_dump_set_done,
+			.module = THIS_MODULE,
+		};
+		struct nft_set_dump_ctx dump_ctx = {
+			.set = set,
+			.ctx = ctx,
+			.reset = true,
+		};
+
+		c.data = &dump_ctx;
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+	}
+
+	if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
+		return -EINVAL;
+
+	spin_lock(&nft_net->reset_lock);
 	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
-		err = nft_get_set_elem(&ctx, set, attr, reset);
+		err = nft_get_set_elem(&ctx, set, attr, true);
 		if (err < 0) {
 			NL_SET_BAD_ATTR(extack, attr);
 			break;
 		}
 		nelems++;
 	}
+	spin_unlock(&nft_net->reset_lock);
 
-	if (reset)
-		audit_log_nft_set_reset(table, nft_pernet(net)->base_seq,
-					nelems);
+	audit_log_nft_set_reset(table, nft_net->base_seq, nelems);
 
 	return err;
 }
@@ -7861,7 +7929,7 @@ static int nf_tables_getobj_reset(struct sk_buff *skb,
 				  const struct nfnl_info *info,
 				  const struct nlattr * const nla[])
 {
-	const struct nftables_pernet *nft_net = nft_pernet(info->net);
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_cur(info->net);
 	u8 family = info->nfmsg->nfgen_family;
@@ -9128,7 +9196,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_set_elem_list_policy,
 	},
 	[NFT_MSG_GETSETELEM_RESET] = {
-		.call		= nf_tables_getsetelem,
+		.call		= nf_tables_getsetelem_reset,
 		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-23  1:38 ` [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests Phil Sutter
@ 2023-09-23 11:04   ` Florian Westphal
  2023-09-23 15:03     ` Phil Sutter
  2023-09-25 11:02     ` Pablo Neira Ayuso
  2023-09-25 10:47   ` Pablo Neira Ayuso
  2023-09-25 10:48   ` Pablo Neira Ayuso
  2 siblings, 2 replies; 25+ messages in thread
From: Florian Westphal @ 2023-09-23 11:04 UTC (permalink / raw)
  To: Phil Sutter; +Cc: Pablo Neira Ayuso, netfilter-devel, Florian Westphal

Phil Sutter <phil@nwl.cc> wrote:
>  	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
>  	if (IS_ERR(table)) {
>  		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
> -		return PTR_ERR(table);
> +		return ERR_CAST(table);
>  	}

Can you split that into another patch?

> +	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
> +		struct netlink_dump_control c = {
> +			.start= nf_tables_dumpreset_rules_start,
> +			.dump = nf_tables_dumpreset_rules,
> +			.done = nf_tables_dump_rules_done,
> +			.module = THIS_MODULE,
> +			.data = (void *)nla,
> +		};
> +
> +		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
> +	}
> +
> +	if (!nla[NFTA_RULE_TABLE])
> +		return -EINVAL;
> +
> +	tablename = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC);
> +	if (!tablename)
> +		return -ENOMEM;
> +	spin_lock(&nft_net->reset_lock);

Hmm. Stupid question.  Why do we need a spinlock to serialize?
This is now a distinct function, so:

> +	spin_unlock(&nft_net->reset_lock);
> +	if (IS_ERR(skb2))
> +		return PTR_ERR(skb2);

MIssing kfree(tablename)

> +	buf = kasprintf(GFP_ATOMIC, "%s:%u", tablename, nft_net->base_seq);
> +	audit_log_nfcfg(buf, family, 1, AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
> +	kfree(buf);
> +	kfree(tablename);
> +
> +	return nfnetlink_unicast(skb2, info->net, portid);
>  }
>  
>  void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
> @@ -8950,7 +9017,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
>  		.policy		= nft_rule_policy,
>  	},
>  	[NFT_MSG_GETRULE_RESET] = {
> -		.call		= nf_tables_getrule,
> +		.call		= nf_tables_getrule_reset,
>  		.type		= NFNL_CB_RCU,

This can be changed to CB_MUTEX, we can serialize by commit_mutex then.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-23 11:04   ` Florian Westphal
@ 2023-09-23 15:03     ` Phil Sutter
  2023-09-23 16:18       ` Florian Westphal
  2023-09-25 11:02     ` Pablo Neira Ayuso
  1 sibling, 1 reply; 25+ messages in thread
From: Phil Sutter @ 2023-09-23 15:03 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Pablo Neira Ayuso, netfilter-devel

On Sat, Sep 23, 2023 at 01:04:37PM +0200, Florian Westphal wrote:
> Phil Sutter <phil@nwl.cc> wrote:
> >  	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
> >  	if (IS_ERR(table)) {
> >  		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
> > -		return PTR_ERR(table);
> > +		return ERR_CAST(table);
> >  	}
> 
> Can you split that into another patch?

You mean the whole creation of nf_tables_getrule_single()? Because the
above change is only required due to the changed return type.

> > +	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
> > +		struct netlink_dump_control c = {
> > +			.start= nf_tables_dumpreset_rules_start,
> > +			.dump = nf_tables_dumpreset_rules,
> > +			.done = nf_tables_dump_rules_done,
> > +			.module = THIS_MODULE,
> > +			.data = (void *)nla,
> > +		};
> > +
> > +		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
> > +	}
> > +
> > +	if (!nla[NFTA_RULE_TABLE])
> > +		return -EINVAL;
> > +
> > +	tablename = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC);
> > +	if (!tablename)
> > +		return -ENOMEM;
> > +	spin_lock(&nft_net->reset_lock);
> 
> Hmm. Stupid question.  Why do we need a spinlock to serialize?
> This is now a distinct function, so:

On Tue, Sep 05, 2023 at 11:11:07PM +0200, Phil Sutter wrote:
[...]
> I guess NFNL_CB_MUTEX is a no go because it locks down the whole
> subsystem, right?

But he didn't get a reply. :(

What is the relation to this being a distinct function? Can't one have
the same callback function once with type CB_RCU and once as CB_MUTEX?
nfnetlink doesn't seem to care.

> > +	spin_unlock(&nft_net->reset_lock);
> > +	if (IS_ERR(skb2))
> > +		return PTR_ERR(skb2);
> 
> MIssing kfree(tablename)

Thanks! 

Cheers, Phil

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-23 15:03     ` Phil Sutter
@ 2023-09-23 16:18       ` Florian Westphal
  2023-09-25  9:32         ` Pablo Neira Ayuso
  0 siblings, 1 reply; 25+ messages in thread
From: Florian Westphal @ 2023-09-23 16:18 UTC (permalink / raw)
  To: Phil Sutter, Florian Westphal, Pablo Neira Ayuso, netfilter-devel

Phil Sutter <phil@nwl.cc> wrote:
> > Can you split that into another patch?
> 
> You mean the whole creation of nf_tables_getrule_single()? Because the
> above change is only required due to the changed return type.

Yes, I was wondering if there is a way to convert the return type
in a different patch.

If its too costly, don't bother.

> > Hmm. Stupid question.  Why do we need a spinlock to serialize?
> > This is now a distinct function, so:
> 
> On Tue, Sep 05, 2023 at 11:11:07PM +0200, Phil Sutter wrote:
> [...]
> > I guess NFNL_CB_MUTEX is a no go because it locks down the whole
> > subsystem, right?
> But he didn't get a reply. :(

Sorry, missed that :-(

If thats really a concern. alernative would be to do same thing as
nft_netlink_dump_start_rcu(), i.e. use _RCU as-is and then switch
from rcu to module reference held, plus, in your case, the transaction
mutex.

Actually I like that better because we already use this pattern and
afaics all dumpers call rcu_read_lock for us; i.e.:

callback_that_might_reset()
{
	try_module_get ...
	rcu_read_unlock()
	mutex_lock(net->commit_mutex)
	  dumper();
	mutex_unlock(net->commit_mutex)
	rcu_read_lock();
	module_put()
}

should do the trick.

> What is the relation to this being a distinct function? Can't one have
> the same callback function once with type CB_RCU and once as CB_MUTEX?
> nfnetlink doesn't seem to care.

You can but you need conditional locking in that case.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-23 16:18       ` Florian Westphal
@ 2023-09-25  9:32         ` Pablo Neira Ayuso
  2023-09-25 19:53           ` Florian Westphal
  0 siblings, 1 reply; 25+ messages in thread
From: Pablo Neira Ayuso @ 2023-09-25  9:32 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Phil Sutter, netfilter-devel

On Sat, Sep 23, 2023 at 06:18:13PM +0200, Florian Westphal wrote:
> Phil Sutter <phil@nwl.cc> wrote:
> > > Can you split that into another patch?
> > 
> > You mean the whole creation of nf_tables_getrule_single()? Because the
> > above change is only required due to the changed return type.
> 
> Yes, I was wondering if there is a way to convert the return type
> in a different patch.
> 
> If its too costly, don't bother.
> 
> > > Hmm. Stupid question.  Why do we need a spinlock to serialize?
> > > This is now a distinct function, so:
> > 
> > On Tue, Sep 05, 2023 at 11:11:07PM +0200, Phil Sutter wrote:
> > [...]
> > > I guess NFNL_CB_MUTEX is a no go because it locks down the whole
> > > subsystem, right?

NFNL_CB_MUTEX takes the global subsystem mutex, see
net/netfilter/nfnetlink.c

                case NFNL_CB_MUTEX:
                        rcu_read_unlock();
                        nfnl_lock(subsys_id);
                        ...

This does not help either for netlink dumps, because NFNL_CB_MUTEX
only guarantees that the first netlink dump chunk holds the mutex
while follow up calls to netlink_recvmsg() would be lockless.

Note, Florian updated nf_tables to use a per-netns mutex only.

> If thats really a concern. alernative would be to do same thing as
> nft_netlink_dump_start_rcu(), i.e. use _RCU as-is and then switch
> from rcu to module reference held, plus, in your case, the transaction
> mutex.
> 
> Actually I like that better because we already use this pattern and
> afaics all dumpers call rcu_read_lock for us; i.e.:
> 
> callback_that_might_reset()
> {
> 	try_module_get ...
> 	rcu_read_unlock()
> 	mutex_lock(net->commit_mutex)
> 	  dumper();
> 	mutex_unlock(net->commit_mutex)
> 	rcu_read_lock();
> 	module_put()
> }
>
> should do the trick.

Idiom above LGTM, *except for net->commit_mutex*. Please do not use
->commit_mutex: This will stall ruleset updates for no reason, netlink
dump would grab and release such mutex for each netlink_recvmsg() call
and netlink dump side will always retry because of NLM_F_EINTR.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-23  1:38 ` [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests Phil Sutter
  2023-09-23 11:04   ` Florian Westphal
@ 2023-09-25 10:47   ` Pablo Neira Ayuso
  2023-09-26  9:14     ` Phil Sutter
  2023-09-25 10:48   ` Pablo Neira Ayuso
  2 siblings, 1 reply; 25+ messages in thread
From: Pablo Neira Ayuso @ 2023-09-25 10:47 UTC (permalink / raw)
  To: Phil Sutter; +Cc: netfilter-devel, Florian Westphal

On Sat, Sep 23, 2023 at 03:38:04AM +0200, Phil Sutter wrote:
[...]
> @@ -3598,60 +3618,107 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
>  	struct net *net = info->net;
>  	struct nft_table *table;
>  	struct sk_buff *skb2;
> -	bool reset = false;
>  	int err;
>  
> -	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
> -		struct netlink_dump_control c = {
> -			.start= nf_tables_dump_rules_start,
> -			.dump = nf_tables_dump_rules,
> -			.done = nf_tables_dump_rules_done,
> -			.module = THIS_MODULE,
> -			.data = (void *)nla,
> -		};
> -
> -		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
> -	}
> -
>  	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
>  	if (IS_ERR(table)) {
>  		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
> -		return PTR_ERR(table);
> +		return ERR_CAST(table);

                return ERR_PTR(table);

for consistency and to make this batch slightly smaller?

>  	}

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-23  1:38 ` [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests Phil Sutter
  2023-09-23 11:04   ` Florian Westphal
  2023-09-25 10:47   ` Pablo Neira Ayuso
@ 2023-09-25 10:48   ` Pablo Neira Ayuso
  2023-09-25 11:01     ` Pablo Neira Ayuso
  2 siblings, 1 reply; 25+ messages in thread
From: Pablo Neira Ayuso @ 2023-09-25 10:48 UTC (permalink / raw)
  To: Phil Sutter; +Cc: netfilter-devel, Florian Westphal

On Sat, Sep 23, 2023 at 03:38:04AM +0200, Phil Sutter wrote:
[...]
> +static int nf_tables_getrule_reset(struct sk_buff *skb,
> +				   const struct nfnl_info *info,
> +				   const struct nlattr * const nla[])
> +{
> +	struct nftables_pernet *nft_net = nft_pernet(info->net);
> +	u8 family = info->nfmsg->nfgen_family;
> +	u32 portid = NETLINK_CB(skb).portid;
> +	char *tablename, *buf;
> +	struct sk_buff *skb2;
> +
> +	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
> +		struct netlink_dump_control c = {
> +			.start= nf_tables_dumpreset_rules_start,
> +			.dump = nf_tables_dumpreset_rules,
> +			.done = nf_tables_dump_rules_done,
> +			.module = THIS_MODULE,
> +			.data = (void *)nla,
> +		};
> +
> +		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
> +	}
> +
> +	if (!nla[NFTA_RULE_TABLE])
> +		return -EINVAL;
> +
> +	tablename = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC);
> +	if (!tablename)
> +		return -ENOMEM;
> +
> +	spin_lock(&nft_net->reset_lock);
> +	skb2 = nf_tables_getrule_single(portid, info, nla, true);
> +	spin_unlock(&nft_net->reset_lock);
> +	if (IS_ERR(skb2))

This leaks tablename?

> +		return PTR_ERR(skb2);
> +
> +	buf = kasprintf(GFP_ATOMIC, "%s:%u", tablename, nft_net->base_seq);
> +	audit_log_nfcfg(buf, family, 1, AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
> +	kfree(buf);
> +	kfree(tablename);
> +
> +	return nfnetlink_unicast(skb2, info->net, portid);
>  }
>  
>  void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 5/5] netfilter: nf_tables: Add locking for NFT_MSG_GETSETELEM_RESET requests
  2023-09-23  1:38 ` [nf PATCH 5/5] netfilter: nf_tables: Add locking for NFT_MSG_GETSETELEM_RESET requests Phil Sutter
@ 2023-09-25 10:53   ` Pablo Neira Ayuso
  0 siblings, 0 replies; 25+ messages in thread
From: Pablo Neira Ayuso @ 2023-09-25 10:53 UTC (permalink / raw)
  To: Phil Sutter; +Cc: netfilter-devel, Florian Westphal

On Sat, Sep 23, 2023 at 03:38:07AM +0200, Phil Sutter wrote:
> Introduce dedicated callbacks for nfnetlink and the asynchronous dump
> handling which perform the necessary locking.

I would describe here what this patchset fixes, basically, two
different threads that dump the set content with the _RESET command is
problematic since both threads are racing. You might find a better
wording for this.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-25 10:48   ` Pablo Neira Ayuso
@ 2023-09-25 11:01     ` Pablo Neira Ayuso
  0 siblings, 0 replies; 25+ messages in thread
From: Pablo Neira Ayuso @ 2023-09-25 11:01 UTC (permalink / raw)
  To: Phil Sutter; +Cc: netfilter-devel, Florian Westphal

On Mon, Sep 25, 2023 at 12:48:35PM +0200, Pablo Neira Ayuso wrote:
> On Sat, Sep 23, 2023 at 03:38:04AM +0200, Phil Sutter wrote:
> [...]
> > +static int nf_tables_getrule_reset(struct sk_buff *skb,
> > +				   const struct nfnl_info *info,
> > +				   const struct nlattr * const nla[])
> > +{
> > +	struct nftables_pernet *nft_net = nft_pernet(info->net);
> > +	u8 family = info->nfmsg->nfgen_family;
> > +	u32 portid = NETLINK_CB(skb).portid;
> > +	char *tablename, *buf;
> > +	struct sk_buff *skb2;
> > +
> > +	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
> > +		struct netlink_dump_control c = {
> > +			.start= nf_tables_dumpreset_rules_start,
> > +			.dump = nf_tables_dumpreset_rules,
> > +			.done = nf_tables_dump_rules_done,
> > +			.module = THIS_MODULE,
> > +			.data = (void *)nla,
> > +		};
> > +
> > +		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
> > +	}
> > +
> > +	if (!nla[NFTA_RULE_TABLE])
> > +		return -EINVAL;
> > +
> > +	tablename = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC);
> > +	if (!tablename)
> > +		return -ENOMEM;
> > +
> > +	spin_lock(&nft_net->reset_lock);
> > +	skb2 = nf_tables_getrule_single(portid, info, nla, true);
> > +	spin_unlock(&nft_net->reset_lock);
> > +	if (IS_ERR(skb2))
> 
> This leaks tablename?

Florian already reported this one, apologies.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-23 11:04   ` Florian Westphal
  2023-09-23 15:03     ` Phil Sutter
@ 2023-09-25 11:02     ` Pablo Neira Ayuso
  1 sibling, 0 replies; 25+ messages in thread
From: Pablo Neira Ayuso @ 2023-09-25 11:02 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Phil Sutter, netfilter-devel

On Sat, Sep 23, 2023 at 01:04:37PM +0200, Florian Westphal wrote:
> Phil Sutter <phil@nwl.cc> wrote:
> >  	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
> >  	if (IS_ERR(table)) {
> >  		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
> > -		return PTR_ERR(table);
> > +		return ERR_CAST(table);
> >  	}
> 
> Can you split that into another patch?

Agreed. Any preparation update would be good to be introduced in first
place. If you think ERR_CAST is better, maybe justify and target this
later at nf-next?

Thanks!

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-25  9:32         ` Pablo Neira Ayuso
@ 2023-09-25 19:53           ` Florian Westphal
  2023-09-26  9:34             ` Phil Sutter
  0 siblings, 1 reply; 25+ messages in thread
From: Florian Westphal @ 2023-09-25 19:53 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Florian Westphal, Phil Sutter, netfilter-devel

Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Sat, Sep 23, 2023 at 06:18:13PM +0200, Florian Westphal wrote:
> > callback_that_might_reset()
> > {
> > 	try_module_get ...
> > 	rcu_read_unlock()
> > 	mutex_lock(net->commit_mutex)
> > 	  dumper();
> > 	mutex_unlock(net->commit_mutex)
> > 	rcu_read_lock();
> > 	module_put()
> > }
> >
> > should do the trick.
> 
> Idiom above LGTM, *except for net->commit_mutex*. Please do not use
> ->commit_mutex: This will stall ruleset updates for no reason, netlink
> dump would grab and release such mutex for each netlink_recvmsg() call
> and netlink dump side will always retry because of NLM_F_EINTR.

It will stall updates, but for good reason: we are making changes to the
expressions state.

We even emit AUDIT messages about this.
So, I think the commit mutex is appropirate here.

That said, if you totally disagree, then I suppose a new "reset" mutex
could be used instead.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-25 10:47   ` Pablo Neira Ayuso
@ 2023-09-26  9:14     ` Phil Sutter
  2023-09-26 10:00       ` Pablo Neira Ayuso
  0 siblings, 1 reply; 25+ messages in thread
From: Phil Sutter @ 2023-09-26  9:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netfilter-devel, Florian Westphal

On Mon, Sep 25, 2023 at 12:47:14PM +0200, Pablo Neira Ayuso wrote:
> On Sat, Sep 23, 2023 at 03:38:04AM +0200, Phil Sutter wrote:
> [...]
> > @@ -3598,60 +3618,107 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
> >  	struct net *net = info->net;
> >  	struct nft_table *table;
> >  	struct sk_buff *skb2;
> > -	bool reset = false;
> >  	int err;
> >  
> > -	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
> > -		struct netlink_dump_control c = {
> > -			.start= nf_tables_dump_rules_start,
> > -			.dump = nf_tables_dump_rules,
> > -			.done = nf_tables_dump_rules_done,
> > -			.module = THIS_MODULE,
> > -			.data = (void *)nla,
> > -		};
> > -
> > -		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
> > -	}
> > -
> >  	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
> >  	if (IS_ERR(table)) {
> >  		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
> > -		return PTR_ERR(table);
> > +		return ERR_CAST(table);
> 
>                 return ERR_PTR(table);
> 
> for consistency and to make this batch slightly smaller?

ERR_PTR() expects a long arg, while 'table' is a pointer. The only valid
alternatives are '(struct sk_buff *)table' and 'ERR_PTR(PTR_ERR(table)).

I don't quite understand what's the problem with ERR_CAST(), it seems to
exist exactly for this purpose.

Cheers, Phil

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-25 19:53           ` Florian Westphal
@ 2023-09-26  9:34             ` Phil Sutter
  2023-09-26 10:09               ` Pablo Neira Ayuso
  0 siblings, 1 reply; 25+ messages in thread
From: Phil Sutter @ 2023-09-26  9:34 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Pablo Neira Ayuso, netfilter-devel

On Mon, Sep 25, 2023 at 09:53:17PM +0200, Florian Westphal wrote:
> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > On Sat, Sep 23, 2023 at 06:18:13PM +0200, Florian Westphal wrote:
> > > callback_that_might_reset()
> > > {
> > > 	try_module_get ...
> > > 	rcu_read_unlock()
> > > 	mutex_lock(net->commit_mutex)
> > > 	  dumper();
> > > 	mutex_unlock(net->commit_mutex)
> > > 	rcu_read_lock();
> > > 	module_put()
> > > }
> > >
> > > should do the trick.
> > 
> > Idiom above LGTM, *except for net->commit_mutex*. Please do not use
> > ->commit_mutex: This will stall ruleset updates for no reason, netlink
> > dump would grab and release such mutex for each netlink_recvmsg() call
> > and netlink dump side will always retry because of NLM_F_EINTR.
> 
> It will stall updates, but for good reason: we are making changes to the
> expressions state.

This also disqualifies the use of Pablo's suggested reset_lock, right?

Cheers, Phil

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-26  9:14     ` Phil Sutter
@ 2023-09-26 10:00       ` Pablo Neira Ayuso
  0 siblings, 0 replies; 25+ messages in thread
From: Pablo Neira Ayuso @ 2023-09-26 10:00 UTC (permalink / raw)
  To: Phil Sutter, netfilter-devel, Florian Westphal

On Tue, Sep 26, 2023 at 11:14:31AM +0200, Phil Sutter wrote:
> On Mon, Sep 25, 2023 at 12:47:14PM +0200, Pablo Neira Ayuso wrote:
> > On Sat, Sep 23, 2023 at 03:38:04AM +0200, Phil Sutter wrote:
> > [...]
> > > @@ -3598,60 +3618,107 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
> > >  	struct net *net = info->net;
> > >  	struct nft_table *table;
> > >  	struct sk_buff *skb2;
> > > -	bool reset = false;
> > >  	int err;
> > >  
> > > -	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
> > > -		struct netlink_dump_control c = {
> > > -			.start= nf_tables_dump_rules_start,
> > > -			.dump = nf_tables_dump_rules,
> > > -			.done = nf_tables_dump_rules_done,
> > > -			.module = THIS_MODULE,
> > > -			.data = (void *)nla,
> > > -		};
> > > -
> > > -		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
> > > -	}
> > > -
> > >  	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
> > >  	if (IS_ERR(table)) {
> > >  		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
> > > -		return PTR_ERR(table);
> > > +		return ERR_CAST(table);
> > 
> >                 return ERR_PTR(table);
> > 
> > for consistency and to make this batch slightly smaller?
> 
> ERR_PTR() expects a long arg, while 'table' is a pointer. The only valid
> alternatives are '(struct sk_buff *)table' and 'ERR_PTR(PTR_ERR(table)).

Then, you only need PTR_ERR(table) if you want to return int as it was
before?

> I don't quite understand what's the problem with ERR_CAST(), it seems to
> exist exactly for this purpose.

No problem, we have been using a different macro, patch description
does not describe why this is required.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-26  9:34             ` Phil Sutter
@ 2023-09-26 10:09               ` Pablo Neira Ayuso
  2023-09-26 12:14                 ` Phil Sutter
  0 siblings, 1 reply; 25+ messages in thread
From: Pablo Neira Ayuso @ 2023-09-26 10:09 UTC (permalink / raw)
  To: Phil Sutter, Florian Westphal, netfilter-devel

Hi Phil,

On Tue, Sep 26, 2023 at 11:34:43AM +0200, Phil Sutter wrote:
> On Mon, Sep 25, 2023 at 09:53:17PM +0200, Florian Westphal wrote:
> > Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > > On Sat, Sep 23, 2023 at 06:18:13PM +0200, Florian Westphal wrote:
> > > > callback_that_might_reset()
> > > > {
> > > > 	try_module_get ...
> > > > 	rcu_read_unlock()
> > > > 	mutex_lock(net->commit_mutex)
> > > > 	  dumper();
> > > > 	mutex_unlock(net->commit_mutex)
> > > > 	rcu_read_lock();
> > > > 	module_put()
> > > > }
> > > >
> > > > should do the trick.
> > > 
> > > Idiom above LGTM, *except for net->commit_mutex*. Please do not use
> > > ->commit_mutex: This will stall ruleset updates for no reason, netlink
> > > dump would grab and release such mutex for each netlink_recvmsg() call
> > > and netlink dump side will always retry because of NLM_F_EINTR.
> > 
> > It will stall updates, but for good reason: we are making changes to the
> > expressions state.
> 
> This also disqualifies the use of Pablo's suggested reset_lock, right?

Quick summary:

We are currently discussing if it makes sense to add a new lock or
not. The commit_mutex stalls updates, but netlink dumps retrieves
listings in chunks, that is, one recvmsg() call from userspace (to
retrieve one list chunk) will grab the mutex then release it until the
next recvmsg() call is done. Between these two calls an update is
still possible. The question is if it is worth to stall an ongoing
listing or updates.

There is the NLM_F_EINTR mechanism in place that tells that an
interference has occured while keeping the listing lockless.

Unless I am missing anything, the goal is to fix two different
processes that are listing at the same time, that is, two processes
running a netlink dump at the same time that are resetting the
stateful expressions in the ruleset.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-26 10:09               ` Pablo Neira Ayuso
@ 2023-09-26 12:14                 ` Phil Sutter
  2023-09-26 13:34                   ` Pablo Neira Ayuso
  0 siblings, 1 reply; 25+ messages in thread
From: Phil Sutter @ 2023-09-26 12:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Florian Westphal, netfilter-devel

On Tue, Sep 26, 2023 at 12:09:35PM +0200, Pablo Neira Ayuso wrote:
> Hi Phil,
> 
> On Tue, Sep 26, 2023 at 11:34:43AM +0200, Phil Sutter wrote:
> > On Mon, Sep 25, 2023 at 09:53:17PM +0200, Florian Westphal wrote:
> > > Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > > > On Sat, Sep 23, 2023 at 06:18:13PM +0200, Florian Westphal wrote:
> > > > > callback_that_might_reset()
> > > > > {
> > > > > 	try_module_get ...
> > > > > 	rcu_read_unlock()
> > > > > 	mutex_lock(net->commit_mutex)
> > > > > 	  dumper();
> > > > > 	mutex_unlock(net->commit_mutex)
> > > > > 	rcu_read_lock();
> > > > > 	module_put()
> > > > > }
> > > > >
> > > > > should do the trick.
> > > > 
> > > > Idiom above LGTM, *except for net->commit_mutex*. Please do not use
> > > > ->commit_mutex: This will stall ruleset updates for no reason, netlink
> > > > dump would grab and release such mutex for each netlink_recvmsg() call
> > > > and netlink dump side will always retry because of NLM_F_EINTR.
> > > 
> > > It will stall updates, but for good reason: we are making changes to the
> > > expressions state.
> > 
> > This also disqualifies the use of Pablo's suggested reset_lock, right?
> 
> Quick summary:
> 
> We are currently discussing if it makes sense to add a new lock or
> not. The commit_mutex stalls updates, but netlink dumps retrieves
> listings in chunks, that is, one recvmsg() call from userspace (to
> retrieve one list chunk) will grab the mutex then release it until the
> next recvmsg() call is done. Between these two calls an update is
> still possible. The question is if it is worth to stall an ongoing
> listing or updates.

Thanks for the summary. Assuming that a blocked commit will only be
postponed until after the current chunk was filled and is being
submitted to user space, I don't see how it would make a practical
difference for reset command if commit_mutex is used instead of
reset_lock (or a dedicated reset_mutex).

> There is the NLM_F_EINTR mechanism in place that tells that an
> interference has occured while keeping the listing lockless.
> 
> Unless I am missing anything, the goal is to fix two different
> processes that are listing at the same time, that is, two processes
> running a netlink dump at the same time that are resetting the
> stateful expressions in the ruleset.

Here's a simple repro I use to verify the locking approach (only rule
reset for now):

| set -e
| 
| RULESET='flush ruleset
| table t {
|       chain c {
|               counter packets 23 bytes 42
|       }
| }'
| 
| trap "$NFT list ruleset" EXIT
| for ((i = 0; i < 10000; i++)); do
|       echo "iter $i"
|       $NFT -f - <<< "$RULESET"
|       $NFT list ruleset | grep -q 'packets 23 bytes 42' >/dev/null
|       $NFT reset rules >/dev/null &
|       pid=$!
|       $NFT reset rules >/dev/null
|       wait $!
|       #$NFT list ruleset | grep 'packets'
|       $NFT list ruleset | grep -q 'packets 0 bytes 0' >/dev/null
| done

If the two calls clash, the rule will have huge counter values due to
underflow.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-26 12:14                 ` Phil Sutter
@ 2023-09-26 13:34                   ` Pablo Neira Ayuso
  2023-09-26 13:59                     ` Phil Sutter
  0 siblings, 1 reply; 25+ messages in thread
From: Pablo Neira Ayuso @ 2023-09-26 13:34 UTC (permalink / raw)
  To: Phil Sutter, Florian Westphal, netfilter-devel

On Tue, Sep 26, 2023 at 02:14:05PM +0200, Phil Sutter wrote:
> On Tue, Sep 26, 2023 at 12:09:35PM +0200, Pablo Neira Ayuso wrote:
> > Hi Phil,
> > 
> > On Tue, Sep 26, 2023 at 11:34:43AM +0200, Phil Sutter wrote:
> > > On Mon, Sep 25, 2023 at 09:53:17PM +0200, Florian Westphal wrote:
> > > > Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > > > > On Sat, Sep 23, 2023 at 06:18:13PM +0200, Florian Westphal wrote:
> > > > > > callback_that_might_reset()
> > > > > > {
> > > > > > 	try_module_get ...
> > > > > > 	rcu_read_unlock()
> > > > > > 	mutex_lock(net->commit_mutex)
> > > > > > 	  dumper();
> > > > > > 	mutex_unlock(net->commit_mutex)
> > > > > > 	rcu_read_lock();
> > > > > > 	module_put()
> > > > > > }
> > > > > >
> > > > > > should do the trick.
> > > > > 
> > > > > Idiom above LGTM, *except for net->commit_mutex*. Please do not use
> > > > > ->commit_mutex: This will stall ruleset updates for no reason, netlink
> > > > > dump would grab and release such mutex for each netlink_recvmsg() call
> > > > > and netlink dump side will always retry because of NLM_F_EINTR.
> > > > 
> > > > It will stall updates, but for good reason: we are making changes to the
> > > > expressions state.
> > > 
> > > This also disqualifies the use of Pablo's suggested reset_lock, right?
> > 
> > Quick summary:
> > 
> > We are currently discussing if it makes sense to add a new lock or
> > not. The commit_mutex stalls updates, but netlink dumps retrieves
> > listings in chunks, that is, one recvmsg() call from userspace (to
> > retrieve one list chunk) will grab the mutex then release it until the
> > next recvmsg() call is done. Between these two calls an update is
> > still possible. The question is if it is worth to stall an ongoing
> > listing or updates.
> 
> Thanks for the summary. Assuming that a blocked commit will only be
> postponed until after the current chunk was filled and is being
> submitted to user space, I don't see how it would make a practical
> difference for reset command if commit_mutex is used instead of
> reset_lock (or a dedicated reset_mutex).

If the problem we are addressing is two processes listing the ruleset
that concur to reset stateful expressions, then there is no difference.
However, this is stalling writers and I don't think we need this
according to the problem description.

Another point to consider is how likely this list-and-reset happens.
If it is unlikely, then commit_mutex should be fine. But if we expect
a process polling to fetch counters very often, this will introduce an
unnecessary interference with writers. In a very dynamic deployment,
with frequent transaction updates, that might stall transactions for
no reason.

Please, note that using commit_mutex will *not* fix either that
userspace has to properly deal with NLM_F_EINTR, and I am 100% sure
you told me last time when you submitted this that you would prefer to
fix that once you get a ticket^H^H^H^H^H^H complain from someone else.
Oh, and I accepted that deal.

Honestly, we already have things to improve in other fronts, such
speeding up set updates and reducing userspace memory consumption,
much of this is userspace work.

> > There is the NLM_F_EINTR mechanism in place that tells that an
> > interference has occured while keeping the listing lockless.
> > 
> > Unless I am missing anything, the goal is to fix two different
> > processes that are listing at the same time, that is, two processes
> > running a netlink dump at the same time that are resetting the
> > stateful expressions in the ruleset.
> 
> Here's a simple repro I use to verify the locking approach (only rule
> reset for now):
> 
> | set -e
> | 
> | RULESET='flush ruleset
> | table t {
> |       chain c {
> |               counter packets 23 bytes 42
> |       }
> | }'
> | 
> | trap "$NFT list ruleset" EXIT
> | for ((i = 0; i < 10000; i++)); do
> |       echo "iter $i"
> |       $NFT -f - <<< "$RULESET"
> |       $NFT list ruleset | grep -q 'packets 23 bytes 42' >/dev/null
> |       $NFT reset rules >/dev/null &
> |       pid=$!
> |       $NFT reset rules >/dev/null
> |       wait $!
> |       #$NFT list ruleset | grep 'packets'
> |       $NFT list ruleset | grep -q 'packets 0 bytes 0' >/dev/null
> | done
> 
> If the two calls clash, the rule will have huge counter values due to
> underflow.

Can you give a try with the reset_lock spinlock approach with this
script that exercises worst case?

Thanks.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-26 13:34                   ` Pablo Neira Ayuso
@ 2023-09-26 13:59                     ` Phil Sutter
  2023-09-27 11:41                       ` Florian Westphal
  0 siblings, 1 reply; 25+ messages in thread
From: Phil Sutter @ 2023-09-26 13:59 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: Florian Westphal, netfilter-devel

On Tue, Sep 26, 2023 at 03:34:40PM +0200, Pablo Neira Ayuso wrote:
> On Tue, Sep 26, 2023 at 02:14:05PM +0200, Phil Sutter wrote:
> > On Tue, Sep 26, 2023 at 12:09:35PM +0200, Pablo Neira Ayuso wrote:
> > > Hi Phil,
> > > 
> > > On Tue, Sep 26, 2023 at 11:34:43AM +0200, Phil Sutter wrote:
> > > > On Mon, Sep 25, 2023 at 09:53:17PM +0200, Florian Westphal wrote:
> > > > > Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> > > > > > On Sat, Sep 23, 2023 at 06:18:13PM +0200, Florian Westphal wrote:
> > > > > > > callback_that_might_reset()
> > > > > > > {
> > > > > > > 	try_module_get ...
> > > > > > > 	rcu_read_unlock()
> > > > > > > 	mutex_lock(net->commit_mutex)
> > > > > > > 	  dumper();
> > > > > > > 	mutex_unlock(net->commit_mutex)
> > > > > > > 	rcu_read_lock();
> > > > > > > 	module_put()
> > > > > > > }
> > > > > > >
> > > > > > > should do the trick.
> > > > > > 
> > > > > > Idiom above LGTM, *except for net->commit_mutex*. Please do not use
> > > > > > ->commit_mutex: This will stall ruleset updates for no reason, netlink
> > > > > > dump would grab and release such mutex for each netlink_recvmsg() call
> > > > > > and netlink dump side will always retry because of NLM_F_EINTR.
> > > > > 
> > > > > It will stall updates, but for good reason: we are making changes to the
> > > > > expressions state.
> > > > 
> > > > This also disqualifies the use of Pablo's suggested reset_lock, right?
> > > 
> > > Quick summary:
> > > 
> > > We are currently discussing if it makes sense to add a new lock or
> > > not. The commit_mutex stalls updates, but netlink dumps retrieves
> > > listings in chunks, that is, one recvmsg() call from userspace (to
> > > retrieve one list chunk) will grab the mutex then release it until the
> > > next recvmsg() call is done. Between these two calls an update is
> > > still possible. The question is if it is worth to stall an ongoing
> > > listing or updates.
> > 
> > Thanks for the summary. Assuming that a blocked commit will only be
> > postponed until after the current chunk was filled and is being
> > submitted to user space, I don't see how it would make a practical
> > difference for reset command if commit_mutex is used instead of
> > reset_lock (or a dedicated reset_mutex).
> 
> If the problem we are addressing is two processes listing the ruleset
> that concur to reset stateful expressions, then there is no difference.
> However, this is stalling writers and I don't think we need this
> according to the problem description.

ACK. Maybe Florian has a case in mind which requires to serialize reset
and commit?

> Another point to consider is how likely this list-and-reset happens.
> If it is unlikely, then commit_mutex should be fine. But if we expect
> a process polling to fetch counters very often, this will introduce an
> unnecessary interference with writers. In a very dynamic deployment,
> with frequent transaction updates, that might stall transactions for
> no reason.

I wonder if hardcore 'reset the rules fetching counters' users care
about performance of ruleset changes at the same time. Maybe we
shouldn't care until someone complains?

> Please, note that using commit_mutex will *not* fix either that
> userspace has to properly deal with NLM_F_EINTR, and I am 100% sure
> you told me last time when you submitted this that you would prefer to
> fix that once you get a ticket^H^H^H^H^H^H complain from someone else.
> Oh, and I accepted that deal.

Sorry, I don't recall. What did I promise to fix once someone complains?
NLM_F_EINTR handling in user space for reset commands? Is it broken??

> Honestly, we already have things to improve in other fronts, such
> speeding up set updates and reducing userspace memory consumption,
> much of this is userspace work.

I totally agree. And I didn't make the call for locking reset requests,
I'm just trying to answer it since it's my code that's broken in that
regard.

> > > There is the NLM_F_EINTR mechanism in place that tells that an
> > > interference has occured while keeping the listing lockless.
> > > 
> > > Unless I am missing anything, the goal is to fix two different
> > > processes that are listing at the same time, that is, two processes
> > > running a netlink dump at the same time that are resetting the
> > > stateful expressions in the ruleset.
> > 
> > Here's a simple repro I use to verify the locking approach (only rule
> > reset for now):
> > 
> > | set -e
> > | 
> > | RULESET='flush ruleset
> > | table t {
> > |       chain c {
> > |               counter packets 23 bytes 42
> > |       }
> > | }'
> > | 
> > | trap "$NFT list ruleset" EXIT
> > | for ((i = 0; i < 10000; i++)); do
> > |       echo "iter $i"
> > |       $NFT -f - <<< "$RULESET"
> > |       $NFT list ruleset | grep -q 'packets 23 bytes 42' >/dev/null
> > |       $NFT reset rules >/dev/null &
> > |       pid=$!
> > |       $NFT reset rules >/dev/null
> > |       wait $!
> > |       #$NFT list ruleset | grep 'packets'
> > |       $NFT list ruleset | grep -q 'packets 0 bytes 0' >/dev/null
> > | done
> > 
> > If the two calls clash, the rule will have huge counter values due to
> > underflow.
> 
> Can you give a try with the reset_lock spinlock approach with this
> script that exercises worst case?

It passes with this series applied. It just takes long to finish (due to
10k retries). If it triggers, it usually does within ~100 tries. But it
depends, and I don't know how to increase the chances. Otherwise I would
have put this in a kselftest.

Cheers, Phil

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-26 13:59                     ` Phil Sutter
@ 2023-09-27 11:41                       ` Florian Westphal
  2023-09-27 12:54                         ` Phil Sutter
  0 siblings, 1 reply; 25+ messages in thread
From: Florian Westphal @ 2023-09-27 11:41 UTC (permalink / raw)
  To: Phil Sutter, Pablo Neira Ayuso, Florian Westphal, netfilter-devel

Phil Sutter <phil@nwl.cc> wrote:
> > However, this is stalling writers and I don't think we need this
> > according to the problem description.
> 
> ACK. Maybe Florian has a case in mind which requires to serialize reset
> and commit?

No, spinlock is fine too, concurrent resets will just burn more cycles.
I did not think we'd have frequent resets which is why I suggegested
reuse of the existing lock, thats all.

No objections to new mutex or spinlock.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests
  2023-09-27 11:41                       ` Florian Westphal
@ 2023-09-27 12:54                         ` Phil Sutter
  0 siblings, 0 replies; 25+ messages in thread
From: Phil Sutter @ 2023-09-27 12:54 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Pablo Neira Ayuso, netfilter-devel

On Wed, Sep 27, 2023 at 01:41:33PM +0200, Florian Westphal wrote:
> Phil Sutter <phil@nwl.cc> wrote:
> > > However, this is stalling writers and I don't think we need this
> > > according to the problem description.
> > 
> > ACK. Maybe Florian has a case in mind which requires to serialize reset
> > and commit?
> 
> No, spinlock is fine too, concurrent resets will just burn more cycles.
> I did not think we'd have frequent resets which is why I suggegested
> reuse of the existing lock, thats all.
> 
> No objections to new mutex or spinlock.

OK, so we may just reuse commit_mutex until we find a practical use-case
for which this is a bottleneck? Or should one prefer a dedicated lock in
order to reduce complexity when it comes to lock debugging?

Cheers, Phil

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2023-09-27 12:55 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-23  1:38 [nf PATCH 0/5] Introduce locking for reset requests Phil Sutter
2023-09-23  1:38 ` [nf PATCH 1/5] netfilter: nf_tables: Don't allocate nft_rule_dump_ctx Phil Sutter
2023-09-23  1:38 ` [nf PATCH 2/5] netfilter: nf_tables: Add locking for NFT_MSG_GETRULE_RESET requests Phil Sutter
2023-09-23 11:04   ` Florian Westphal
2023-09-23 15:03     ` Phil Sutter
2023-09-23 16:18       ` Florian Westphal
2023-09-25  9:32         ` Pablo Neira Ayuso
2023-09-25 19:53           ` Florian Westphal
2023-09-26  9:34             ` Phil Sutter
2023-09-26 10:09               ` Pablo Neira Ayuso
2023-09-26 12:14                 ` Phil Sutter
2023-09-26 13:34                   ` Pablo Neira Ayuso
2023-09-26 13:59                     ` Phil Sutter
2023-09-27 11:41                       ` Florian Westphal
2023-09-27 12:54                         ` Phil Sutter
2023-09-25 11:02     ` Pablo Neira Ayuso
2023-09-25 10:47   ` Pablo Neira Ayuso
2023-09-26  9:14     ` Phil Sutter
2023-09-26 10:00       ` Pablo Neira Ayuso
2023-09-25 10:48   ` Pablo Neira Ayuso
2023-09-25 11:01     ` Pablo Neira Ayuso
2023-09-23  1:38 ` [nf PATCH 3/5] netfilter: nf_tables: Introduce struct nft_obj_dump_ctx Phil Sutter
2023-09-23  1:38 ` [nf PATCH 4/5] netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests Phil Sutter
2023-09-23  1:38 ` [nf PATCH 5/5] netfilter: nf_tables: Add locking for NFT_MSG_GETSETELEM_RESET requests Phil Sutter
2023-09-25 10:53   ` Pablo Neira Ayuso

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.