* [PATCH 01/13] netfilter: nf_tables: delay chain policy update until transaction is complete
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 02/13] netfilter: nft_flow_offload: add entry to flowtable after confirmation Pablo Neira Ayuso
` (12 subsequent siblings)
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Florian Westphal <fw@strlen.de>
When we process a long ruleset of the form
chain input {
type filter hook input priority filter; policy drop;
...
}
Then the base chain gets registered early on, we then continue to
process/validate the next messages coming in the same transaction.
Problem is that if the base chain policy is 'drop', it will take effect
immediately, which causes all traffic to get blocked until the
transaction completes or is aborted.
Fix this by deferring the policy until the transaction has been
processed and all of the rules have been flagged as active.
Reported-by: Jann Haber <jann.haber@selfnet.de>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_tables_api.c | 50 +++++++++++++++++++++++++++++++++----------
1 file changed, 39 insertions(+), 11 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1606eaa5ae0d..f2e12ae50544 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -214,33 +214,33 @@ static int nft_deltable(struct nft_ctx *ctx)
return err;
}
-static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
if (trans == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
if (msg_type == NFT_MSG_NEWCHAIN)
nft_activate_next(ctx->net, ctx->chain);
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
- return 0;
+ return trans;
}
static int nft_delchain(struct nft_ctx *ctx)
{
- int err;
+ struct nft_trans *trans;
- err = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN);
- if (err < 0)
- return err;
+ trans = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ctx->table->use--;
nft_deactivate_next(ctx->net, ctx->chain);
- return err;
+ return 0;
}
static void nft_rule_expr_activate(const struct nft_ctx *ctx,
@@ -1615,6 +1615,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_base_chain *basechain;
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
+ struct nft_trans *trans;
struct nft_chain *chain;
struct nft_rule **rules;
int err;
@@ -1662,7 +1663,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
ops->dev = hook.dev;
chain->flags |= NFT_BASE_CHAIN;
- basechain->policy = policy;
+ basechain->policy = NF_ACCEPT;
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
@@ -1698,13 +1699,18 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err)
goto err2;
- err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
- if (err < 0) {
+ trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
rhltable_remove(&table->chains_ht, &chain->rhlhead,
nft_chain_ht_params);
goto err2;
}
+ nft_trans_chain_policy(trans) = -1;
+ if (nft_is_base_chain(chain))
+ nft_trans_chain_policy(trans) = policy;
+
table->use++;
list_add_tail_rcu(&chain->list, &table->chains);
@@ -6311,6 +6317,27 @@ static int nf_tables_validate(struct net *net)
return 0;
}
+/* a drop policy has to be deferred until all rules have been activated,
+ * otherwise a large ruleset that contains a drop-policy base chain will
+ * cause all packets to get dropped until the full transaction has been
+ * processed.
+ *
+ * We defer the drop policy until the transaction has been finalized.
+ */
+static void nft_chain_commit_drop_policy(struct nft_trans *trans)
+{
+ struct nft_base_chain *basechain;
+
+ if (nft_trans_chain_policy(trans) != NF_DROP)
+ return;
+
+ if (!nft_is_base_chain(trans->ctx.chain))
+ return;
+
+ basechain = nft_base_chain(trans->ctx.chain);
+ basechain->policy = NF_DROP;
+}
+
static void nft_chain_commit_update(struct nft_trans *trans)
{
struct nft_base_chain *basechain;
@@ -6632,6 +6659,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
/* trans destroyed after rcu grace period */
} else {
+ nft_chain_commit_drop_policy(trans);
nft_clear(net, trans->ctx.chain);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
nft_trans_destroy(trans);
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 02/13] netfilter: nft_flow_offload: add entry to flowtable after confirmation
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 01/13] netfilter: nf_tables: delay chain policy update until transaction is complete Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 03/13] netfilter: nf_flow_table: fix netdev refcnt leak Pablo Neira Ayuso
` (11 subsequent siblings)
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
This is fixing flow offload for UDP traffic where packets only follow
one single direction.
The flow_offload_fixup_tcp() mechanism works fine in case that the
offloaded entry remains in SYN_RECV state, given sequence tracking is
reset and that conntrack handles syn+ack packets as a retransmission, ie.
sES + synack => sIG
for reply traffic.
Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nft_flow_offload.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 6e6b9adf7d38..8968c7f5a72e 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -94,8 +94,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (help)
goto out;
- if (ctinfo == IP_CT_NEW ||
- ctinfo == IP_CT_RELATED)
+ if (!nf_ct_is_confirmed(ct))
goto out;
if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 03/13] netfilter: nf_flow_table: fix netdev refcnt leak
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 01/13] netfilter: nf_tables: delay chain policy update until transaction is complete Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 02/13] netfilter: nft_flow_offload: add entry to flowtable after confirmation Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 04/13] netfilter: nf_flow_table: check ttl value in flow offload data path Pablo Neira Ayuso
` (10 subsequent siblings)
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Taehee Yoo <ap420073@gmail.com>
flow_offload_alloc() calls nf_route() to get a dst_entry. Internally,
nf_route() calls ip_route_output_key() that allocates a dst_entry and
holds it. So, a dst_entry should be released by dst_release() if
nf_route() is successful.
Otherwise, netns exit routine cannot be finished and the following
message is printed:
[ 257.490952] unregister_netdevice: waiting for lo to become free. Usage count = 1
Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nft_flow_offload.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 8968c7f5a72e..69d7a8439c7a 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -112,6 +112,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (ret < 0)
goto err_flow_add;
+ dst_release(route.tuple[!dir].dst);
return;
err_flow_add:
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 04/13] netfilter: nf_flow_table: check ttl value in flow offload data path
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (2 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 03/13] netfilter: nf_flow_table: fix netdev refcnt leak Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 05/13] netfilter: nf_conntrack_h323: restore boundary check correctness Pablo Neira Ayuso
` (9 subsequent siblings)
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Taehee Yoo <ap420073@gmail.com>
nf_flow_offload_ip_hook() and nf_flow_offload_ipv6_hook() do not check
ttl value. So, ttl value overflow may occur.
Fixes: 97add9f0d66d ("netfilter: flow table support for IPv4")
Fixes: 0995210753a2 ("netfilter: flow table support for IPv6")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_flow_table_ip.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 1d291a51cd45..46022a2867d7 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -181,6 +181,9 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
iph->protocol != IPPROTO_UDP)
return -1;
+ if (iph->ttl <= 1)
+ return -1;
+
thoff = iph->ihl * 4;
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
@@ -411,6 +414,9 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
ip6h->nexthdr != IPPROTO_UDP)
return -1;
+ if (ip6h->hop_limit <= 1)
+ return -1;
+
thoff = sizeof(*ip6h);
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 05/13] netfilter: nf_conntrack_h323: restore boundary check correctness
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (3 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 04/13] netfilter: nf_flow_table: check ttl value in flow offload data path Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 06/13] netfilter: nf_tables: fix base chain stat rcu_dereference usage Pablo Neira Ayuso
` (8 subsequent siblings)
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Jakub Jankowski <shasta@toxcorp.com>
Since commit bc7d811ace4a ("netfilter: nf_ct_h323: Convert
CHECK_BOUND macro to function"), NAT traversal for H.323
doesn't work, failing to parse H323-UserInformation.
nf_h323_error_boundary() compares contents of the bitstring,
not the addresses, preventing valid H.323 packets from being
conntrack'd.
This looks like an oversight from when CHECK_BOUND macro was
converted to a function.
To fix it, stop dereferencing bs->cur and bs->end.
Fixes: bc7d811ace4a ("netfilter: nf_ct_h323: Convert CHECK_BOUND macro to function")
Signed-off-by: Jakub Jankowski <shasta@toxcorp.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_h323_asn1.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 1601275efe2d..4c2ef42e189c 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -172,7 +172,7 @@ static int nf_h323_error_boundary(struct bitstr *bs, size_t bytes, size_t bits)
if (bits % BITS_PER_BYTE > 0)
bytes++;
- if (*bs->cur + bytes > *bs->end)
+ if (bs->cur + bytes > bs->end)
return 1;
return 0;
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 06/13] netfilter: nf_tables: fix base chain stat rcu_dereference usage
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (4 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 05/13] netfilter: nf_conntrack_h323: restore boundary check correctness Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 07/13] netfilter: nf_flow_table: fix missing error check for rhashtable_insert_fast Pablo Neira Ayuso
` (7 subsequent siblings)
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Florian Westphal <fw@strlen.de>
Following splat gets triggered when nfnetlink monitor is running while
xtables-nft selftests are running:
net/netfilter/nf_tables_api.c:1272 suspicious rcu_dereference_check() usage!
other info that might help us debug this:
1 lock held by xtables-nft-mul/27006:
#0: 00000000e0f85be9 (&net->nft.commit_mutex){+.+.}, at: nf_tables_valid_genid+0x1a/0x50
Call Trace:
nf_tables_fill_chain_info.isra.45+0x6cc/0x6e0
nf_tables_chain_notify+0xf8/0x1a0
nf_tables_commit+0x165c/0x1740
nf_tables_fill_chain_info() can be called both from dumps (rcu read locked)
or from the transaction path if a userspace process subscribed to nftables
notifications.
In the 'table dump' case, rcu_access_pointer() cannot be used: We do not
hold transaction mutex so the pointer can be NULLed right after the check.
Just unconditionally fetch the value, then have the helper return
immediately if its NULL.
In the notification case we don't hold the rcu read lock, but updates are
prevented due to transaction mutex. Use rcu_dereference_check() to make lockdep
aware of this.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_tables_api.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f2e12ae50544..e4f6ecac48c3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1190,6 +1190,9 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
u64 pkts, bytes;
int cpu;
+ if (!stats)
+ return 0;
+
memset(&total, 0, sizeof(total));
for_each_possible_cpu(cpu) {
cpu_stats = per_cpu_ptr(stats, cpu);
@@ -1247,6 +1250,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nft_is_base_chain(chain)) {
const struct nft_base_chain *basechain = nft_base_chain(chain);
const struct nf_hook_ops *ops = &basechain->ops;
+ struct nft_stats __percpu *stats;
struct nlattr *nest;
nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
@@ -1268,8 +1272,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
goto nla_put_failure;
- if (rcu_access_pointer(basechain->stats) &&
- nft_dump_stats(skb, rcu_dereference(basechain->stats)))
+ stats = rcu_dereference_check(basechain->stats,
+ lockdep_commit_lock_is_held(net));
+ if (nft_dump_stats(skb, stats))
goto nla_put_failure;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 07/13] netfilter: nf_flow_table: fix missing error check for rhashtable_insert_fast
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (5 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 06/13] netfilter: nf_tables: fix base chain stat rcu_dereference usage Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression Pablo Neira Ayuso
` (6 subsequent siblings)
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Taehee Yoo <ap420073@gmail.com>
rhashtable_insert_fast() may return an error value when memory
allocation fails, but flow_offload_add() does not check for errors.
This patch just adds missing error checking.
Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_flow_table_core.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 7aabfd4b1e50..a9e4f74b1ff6 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -185,14 +185,25 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
{
- flow->timeout = (u32)jiffies;
+ int err;
- rhashtable_insert_fast(&flow_table->rhashtable,
- &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
- nf_flow_offload_rhash_params);
- rhashtable_insert_fast(&flow_table->rhashtable,
- &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
- nf_flow_offload_rhash_params);
+ err = rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[0].node,
+ nf_flow_offload_rhash_params);
+ if (err < 0)
+ return err;
+
+ err = rhashtable_insert_fast(&flow_table->rhashtable,
+ &flow->tuplehash[1].node,
+ nf_flow_offload_rhash_params);
+ if (err < 0) {
+ rhashtable_remove_fast(&flow_table->rhashtable,
+ &flow->tuplehash[0].node,
+ nf_flow_offload_rhash_params);
+ return err;
+ }
+
+ flow->timeout = (u32)jiffies;
return 0;
}
EXPORT_SYMBOL_GPL(flow_offload_add);
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (6 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 07/13] netfilter: nf_flow_table: fix missing error check for rhashtable_insert_fast Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-06-24 3:44 ` Felix Kaechele
2019-05-13 9:56 ` [PATCH 09/13] netfilter: nf_conntrack_h323: Remove deprecated config check Pablo Neira Ayuso
` (5 subsequent siblings)
13 siblings, 1 reply; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Kristian Evensen <kristian.evensen@gmail.com>
Commit 59c08c69c278 ("netfilter: ctnetlink: Support L3 protocol-filter
on flush") introduced a user-space regression when flushing connection
track entries. Before this commit, the nfgen_family field was not used
by the kernel and all entries were removed. Since this commit,
nfgen_family is used to filter out entries that should not be removed.
One example a broken tool is conntrack. conntrack always sets
nfgen_family to AF_INET, so after 59c08c69c278 only IPv4 entries were
removed with the -F parameter.
Pablo Neira Ayuso suggested using nfgenmsg->version to resolve the
regression, and this commit implements his suggestion. nfgenmsg->version
is so far set to zero, so it is well-suited to be used as a flag for
selecting old or new flush behavior. If version is 0, nfgen_family is
ignored and all entries are used. If user-space sets the version to one
(or any other value than 0), then the new behavior is used. As version
only can have two valid values, I chose not to add a new
NFNETLINK_VERSION-constant.
Fixes: 59c08c69c278 ("netfilter: ctnetlink: Support L3 protocol-filter on flush")
Reported-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
Tested-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_netlink.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d7f61b0547c6..d2715b4d2e72 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1254,7 +1254,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u_int8_t u3 = nfmsg->nfgen_family;
+ u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
struct nf_conntrack_zone zone;
int err;
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-05-13 9:56 ` [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression Pablo Neira Ayuso
@ 2019-06-24 3:44 ` Felix Kaechele
2019-06-24 23:58 ` Pablo Neira Ayuso
0 siblings, 1 reply; 30+ messages in thread
From: Felix Kaechele @ 2019-06-24 3:44 UTC (permalink / raw)
To: Pablo Neira Ayuso, netfilter-devel
Hi there,
this patch is giving me some trouble as it breaks deletion of conntrack
entries in software that doesn't set the version flag to anything else
but 0.
I'm not entirely sure what is going on here but a piece of software I am
using is now unable to delete conntrack entries and is therefor not
functioning.
Specifically this piece of code seems to fail:
https://github.com/wlanslovenija/tunneldigger/blob/master/broker/src/tunneldigger_broker/conntrack.py#L112
That software relies heavily on libnetfilter_conntrack, which itself,
with this patch, seems to be broken as well:
[felix@x1 utils]$ sudo ./conntrack_create
TEST: create conntrack (OK)
[felix@x1 utils]$ sudo ./conntrack_delete
TEST: delete conntrack (-1)(No such file or directory)
If in libnetfilter_conntrack I edit utils/conntrack_delete.c and change
the line
nfct_set_attr_u8(ct, ATTR_L3PROTO, AF_INET);
to read
nfct_set_attr_u8(ct, ATTR_L3PROTO, AF_UNSPEC);
it starts working again.
As I said, I haven't entirely figured out why this patch breaks
previously working software and what I need to do on my end to unbreak
my software that is using libnetfilter_conntrack. I haven't found a way
to make libnetfilter_conntrack set any other version than NFNETLINK_V0
for the messages it sends, which I presume would fix my problem.
Any hints would be greatly appreciated.
Regards,
Felix
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-24 3:44 ` Felix Kaechele
@ 2019-06-24 23:58 ` Pablo Neira Ayuso
2019-06-25 3:02 ` Felix Kaechele
2019-06-25 15:01 ` Nicolas Dichtel
0 siblings, 2 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-06-24 23:58 UTC (permalink / raw)
To: Felix Kaechele; +Cc: netfilter-devel
[-- Attachment #1: Type: text/plain, Size: 215 bytes --]
On Sun, Jun 23, 2019 at 11:44:09PM -0400, Felix Kaechele wrote:
[...]
> [felix@x1 utils]$ sudo ./conntrack_delete
>
> TEST: delete conntrack (-1)(No such file or directory)
Could you give a try to this patch?
[-- Attachment #2: x.patch --]
[-- Type: text/x-diff, Size: 1099 bytes --]
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7db79c1b8084..4886b1599014 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1256,7 +1256,6 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
struct nf_conntrack_zone zone;
int err;
@@ -1266,11 +1265,13 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
if (cda[CTA_TUPLE_ORIG])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
- u3, &zone);
+ nfmsg->version, &zone);
else if (cda[CTA_TUPLE_REPLY])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
- u3, &zone);
+ nfmsg->version, &zone);
else {
+ u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
+
return ctnetlink_flush_conntrack(net, cda,
NETLINK_CB(skb).portid,
nlmsg_report(nlh), u3);
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-24 23:58 ` Pablo Neira Ayuso
@ 2019-06-25 3:02 ` Felix Kaechele
2019-06-25 8:08 ` Pablo Neira Ayuso
2019-06-25 15:01 ` Nicolas Dichtel
1 sibling, 1 reply; 30+ messages in thread
From: Felix Kaechele @ 2019-06-25 3:02 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel
On 2019-06-24 7:58 p.m., Pablo Neira Ayuso wrote:
> Could you give a try to this patch?
Hi there,
unfortunately the patch didn't work for me.
I did some deeper digging and it seems that nf_conntrack_find_get within
ctnetlink_del_conntrack will not find the entry if the address family
for the delete query is AF_UNSPEC (due to nfmsg->version being 0) but
the conntrack entry was initially created with AF_INET as the address
family. I believe the tuples will have different hashes in this case and
my guess is that this is not accounted for in the code, i.e. that
AF_UNSPEC should match both AF_INET and AF_INET6. At the moment it seems
to match none instead.
I could be wrong though, I'm not that familiar with the netfilter code.
Regards,
Felix
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 3:02 ` Felix Kaechele
@ 2019-06-25 8:08 ` Pablo Neira Ayuso
2019-06-25 11:33 ` Felix Kaechele
0 siblings, 1 reply; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-06-25 8:08 UTC (permalink / raw)
To: Felix Kaechele; +Cc: netfilter-devel
On Mon, Jun 24, 2019 at 11:02:40PM -0400, Felix Kaechele wrote:
> On 2019-06-24 7:58 p.m., Pablo Neira Ayuso wrote:
> > Could you give a try to this patch?
>
> Hi there,
>
> unfortunately the patch didn't work for me.
>
> I did some deeper digging and it seems that nf_conntrack_find_get within
> ctnetlink_del_conntrack will not find the entry if the address family for
> the delete query is AF_UNSPEC (due to nfmsg->version being 0) but the
> conntrack entry was initially created with AF_INET as the address family. I
> believe the tuples will have different hashes in this case and my guess is
> that this is not accounted for in the code, i.e. that AF_UNSPEC should match
> both AF_INET and AF_INET6. At the moment it seems to match none instead.
As you describe, conntrack is a hashtable and the layer 3 protocol is
part of the hash:
https://elixir.bootlin.com/linux/latest/source/net/netfilter/nf_conntrack_core.c#L188
so AF_UNSPEC cannot work.
There is no support for layer 3 wildcard deletion.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 8:08 ` Pablo Neira Ayuso
@ 2019-06-25 11:33 ` Felix Kaechele
2019-06-25 11:52 ` Kristian Evensen
2019-06-25 19:41 ` Pablo Neira Ayuso
0 siblings, 2 replies; 30+ messages in thread
From: Felix Kaechele @ 2019-06-25 11:33 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel, kristian.evensen
On 2019-06-25 4:08 a.m., Pablo Neira Ayuso wrote:
> As you describe, conntrack is a hashtable and the layer 3 protocol is
> part of the hash:
>
> https://elixir.bootlin.com/linux/latest/source/net/netfilter/nf_conntrack_core.c#L188
>
> so AF_UNSPEC cannot work.
>
> There is no support for layer 3 wildcard deletion.
So in this case I'd like to propose two options:
1. the patch should be reverted and userspace fixed to properly request
flushing of both AF_INET and AF_INET6 entries in the table when doing a
full flush
2. both this patch as well as the initial patch "netfilter: ctnetlink:
Support L3 protocol-filter on flush" should be reverted and a new
approach should be made to implement that feature.
As it stands right now current kernel versions that are being released
break userspace, which is unfortunate, because it forces me to run
older, vulnerable kernels.
Regards,
Felix
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 11:33 ` Felix Kaechele
@ 2019-06-25 11:52 ` Kristian Evensen
2019-06-25 14:45 ` Felix Kaechele
2019-06-25 19:40 ` Pablo Neira Ayuso
2019-06-25 19:41 ` Pablo Neira Ayuso
1 sibling, 2 replies; 30+ messages in thread
From: Kristian Evensen @ 2019-06-25 11:52 UTC (permalink / raw)
To: Felix Kaechele; +Cc: Pablo Neira Ayuso, Netfilter Development Mailing list
Hi,
I am sorry for the trouble that my change has caused. I do not know
what the correct action would be, but I am just trying to figure out
what is going on. In your first email, you wrote:
> this patch is giving me some trouble as it breaks deletion of conntrack
> entries in software that doesn't set the version flag to anything else
> but 0.
I might be a bit slow, but I have some trouble understanding this
sentence. Is what you are saying that software that sets version to
anything but 0 breaks? According to the discussion triggered by the
patch adding the feature that we fix here (see the thread [PATCH
07/31] netfilter: ctnetlink: Support L3 protocol-filter on flush),
using the version field is the correct solution. Pablo wrote:
> The version field was meant to deal with this case.
>
> It has been not unused so far because we had no good reason.
So I guess Nicholas worry was correct, that there are applications
that leave version uninitialized and they trigger the regression. I
will let others decide if not setting version counts as a regression
or incorrect API usage. If an uninitialized version counts as a
regression, I am fine with reverting and will try to come up with
another approach. However, I guess we now might have users that depend
on the new behavior of flush as well.
BR,
Kristian
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 11:52 ` Kristian Evensen
@ 2019-06-25 14:45 ` Felix Kaechele
2019-06-25 15:08 ` Kristian Evensen
2019-06-25 15:45 ` Kristian Evensen
2019-06-25 19:40 ` Pablo Neira Ayuso
1 sibling, 2 replies; 30+ messages in thread
From: Felix Kaechele @ 2019-06-25 14:45 UTC (permalink / raw)
To: Kristian Evensen; +Cc: Pablo Neira Ayuso, Netfilter Development Mailing list
On 2019-06-25 7:52 a.m., Kristian Evensen wrote:
> I am sorry for the trouble that my change has caused.
No worries. I appreciate you taking the time helping me out.
>> this patch is giving me some trouble as it breaks deletion of conntrack
>> entries in software that doesn't set the version flag to anything else
>> but 0.
>
> I might be a bit slow, but I have some trouble understanding this
> sentence. Is what you are saying that software that sets version to
> anything but 0 breaks?
Yeah, definitely not my best work of prose ;-)
What I was trying to say is: Any software that remains with the version
set to 0 will not work. By association, since libnetfilter_conntrack
explicitly sets the version to 0, anything that uses
libnetfilter_conntrack will be unable to delete a specific entry in the
conntrack table.
> According to the discussion triggered by the
> patch adding the feature that we fix here (see the thread [PATCH
> 07/31] netfilter: ctnetlink: Support L3 protocol-filter on flush),
> using the version field is the correct solution. Pablo wrote:
>
>> The version field was meant to deal with this case.
>>
>> It has been not unused so far because we had no good reason.
>
> So I guess Nicholas worry was correct, that there are applications
> that leave version uninitialized and they trigger the regression. I
> will let others decide if not setting version counts as a regression
> or incorrect API usage. If an uninitialized version counts as a
> regression, I am fine with reverting and will try to come up with
> another approach. However, I guess we now might have users that depend
> on the new behavior of flush as well.
I believe that's not the issue here.
So here's what my understanding is of what is happening:
Let's go back to that line of code:
u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
Just to make sure I understand this correctly: If the version is set to
0 the address family (l3proto) will be set to AF_UNSPEC regardless of
what the actual l3proto was set to by the user of the API.
It is only set to the value chosen by the if the version is set to a
non-null value.
We assume that all clients that require the old behaviour set their
version to 0, since that's the only valid value to set it to at this
point anyway.
The problem that arises from this:
When creating a conntrack entry with libnetfilter_conntrack it will
happly accept the address family you supply. Let's assume we create an
entry where AF_INET is supplied.
The entry will be created with AF_INET as it's l3proto. Hence, the tuple
and its hash are only going to be matched upon search if the l3proto
matches as well.
Now, when you try to delete an entry using libnetfilter_conntrack it
will explicitly set it's version to 0 in the process of creating the
message to the API, as it usually does.
Therefor the kernel, under the new behaviour, will disregard the l3proto
and set AF_UNSPEC instead. By doing this we have created a new tuple
with a hash that is different from the hash of the tuple that was used
when creating the conntrack entry, since it was initially created with
AF_INET, not AF_UNSPEC.
Therefor the search will turn up with no results as the tuples we
actually want to match are now different, after AF_INET was yanked and
AF_UNSPEC was put in.
Because the search doesn't find the entry we're trying to delete it will
return ENOENT and our entry will not be deleted.
I hope my description is somewhat comprehensible. I'm basically thinking
out loud here.
I have to admit that I also don't know how to approach this in a way
that will not break userspace but also satisfy the requirements of users
that would like L3 proto filtering.
I believe the changes required to make it work would also need to
permeate into how searches are performed. Specifically it would need to
include tuples with all possible l3protos when AF_UNSPEC is given, so we
actually gather all entries regardless of l3proto. I believe that this
is the intended behaviour, right?
Regards,
Felix
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 14:45 ` Felix Kaechele
@ 2019-06-25 15:08 ` Kristian Evensen
2019-06-25 16:16 ` Felix Kaechele
2019-06-25 15:45 ` Kristian Evensen
1 sibling, 1 reply; 30+ messages in thread
From: Kristian Evensen @ 2019-06-25 15:08 UTC (permalink / raw)
To: Felix Kaechele; +Cc: Pablo Neira Ayuso, Netfilter Development Mailing list
Hi,
On Tue, Jun 25, 2019 at 4:45 PM Felix Kaechele <felix@kaechele.ca> wrote:
> No worries. I appreciate you taking the time helping me out.
>
> >> this patch is giving me some trouble as it breaks deletion of conntrack
> >> entries in software that doesn't set the version flag to anything else
> >> but 0.
> >
> > I might be a bit slow, but I have some trouble understanding this
> > sentence. Is what you are saying that software that sets version to
> > anything but 0 breaks?
>
> Yeah, definitely not my best work of prose ;-)
> What I was trying to say is: Any software that remains with the version
> set to 0 will not work. By association, since libnetfilter_conntrack
> explicitly sets the version to 0, anything that uses
> libnetfilter_conntrack will be unable to delete a specific entry in the
> conntrack table.
Thanks, now I follow. I now see that you are talking about the
deleting and not flushing. Unless anyone beats me to it, I will try to
take a closer look at the problem later today. Pablos patch implements
the first thing that I wanted to try (only read and use version/family
when flushing), and I see that Nicolas has made some suggestions. If
you could first try Pablo's patch with Nicolas' changes, that would be
great as the change should revert behavior of delete back to how it
was before my change.
BR,
Kristian
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 15:08 ` Kristian Evensen
@ 2019-06-25 16:16 ` Felix Kaechele
2019-06-25 19:45 ` Pablo Neira Ayuso
0 siblings, 1 reply; 30+ messages in thread
From: Felix Kaechele @ 2019-06-25 16:16 UTC (permalink / raw)
To: Kristian Evensen; +Cc: Pablo Neira Ayuso, Netfilter Development Mailing list
[-- Attachment #1: Type: text/plain, Size: 542 bytes --]
On 2019-06-25 11:08 a.m., Kristian Evensen wrote:
> Pablos patch implements
> the first thing that I wanted to try (only read and use version/family
> when flushing), and I see that Nicolas has made some suggestions. If
> you could first try Pablo's patch with Nicolas' changes, that would be
> great as the change should revert behavior of delete back to how it
> was before my change.
Yes, these changes fix the issue for me.
I have attached the patch I used, which is probably the change that
Pablo initially intended.
Thanks!
Felix
[-- Attachment #2: fix-conntrack-delete.patch --]
[-- Type: text/x-patch, Size: 1109 bytes --]
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d2715b4d2e72..061bdab37b1a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1254,7 +1254,6 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
struct nf_conntrack_zone zone;
int err;
@@ -1264,11 +1263,13 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
if (cda[CTA_TUPLE_ORIG])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
- u3, &zone);
+ nfmsg->nfgen_family, &zone);
else if (cda[CTA_TUPLE_REPLY])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
- u3, &zone);
+ nfmsg->nfgen_family, &zone);
else {
+ u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
+
return ctnetlink_flush_conntrack(net, cda,
NETLINK_CB(skb).portid,
nlmsg_report(nlh), u3);
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 16:16 ` Felix Kaechele
@ 2019-06-25 19:45 ` Pablo Neira Ayuso
0 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-06-25 19:45 UTC (permalink / raw)
To: Felix Kaechele; +Cc: Kristian Evensen, Netfilter Development Mailing list
On Tue, Jun 25, 2019 at 12:16:30PM -0400, Felix Kaechele wrote:
> On 2019-06-25 11:08 a.m., Kristian Evensen wrote:
>
> > Pablos patch implements
> > the first thing that I wanted to try (only read and use version/family
> > when flushing), and I see that Nicolas has made some suggestions. If
> > you could first try Pablo's patch with Nicolas' changes, that would be
> > great as the change should revert behavior of delete back to how it
> > was before my change.
>
> Yes, these changes fix the issue for me.
>
> I have attached the patch I used, which is probably the change that Pablo
> initially intended.
That's the right fix indeed, would you mind to submit it including a
patch description and Signed-off-by: tag?
This should apply via git-am.
Thanks.
> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> index d2715b4d2e72..061bdab37b1a 100644
> --- a/net/netfilter/nf_conntrack_netlink.c
> +++ b/net/netfilter/nf_conntrack_netlink.c
> @@ -1254,7 +1254,6 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
> struct nf_conntrack_tuple tuple;
> struct nf_conn *ct;
> struct nfgenmsg *nfmsg = nlmsg_data(nlh);
> - u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
> struct nf_conntrack_zone zone;
> int err;
>
> @@ -1264,11 +1263,13 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
>
> if (cda[CTA_TUPLE_ORIG])
> err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
> - u3, &zone);
> + nfmsg->nfgen_family, &zone);
> else if (cda[CTA_TUPLE_REPLY])
> err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
> - u3, &zone);
> + nfmsg->nfgen_family, &zone);
> else {
> + u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
> +
> return ctnetlink_flush_conntrack(net, cda,
> NETLINK_CB(skb).portid,
> nlmsg_report(nlh), u3);
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 14:45 ` Felix Kaechele
2019-06-25 15:08 ` Kristian Evensen
@ 2019-06-25 15:45 ` Kristian Evensen
1 sibling, 0 replies; 30+ messages in thread
From: Kristian Evensen @ 2019-06-25 15:45 UTC (permalink / raw)
To: Felix Kaechele; +Cc: Pablo Neira Ayuso, Netfilter Development Mailing list
Hi Felix,
On Tue, Jun 25, 2019 at 4:45 PM Felix Kaechele <felix@kaechele.ca> wrote:
> So here's what my understanding is of what is happening:
>
> Let's go back to that line of code:
>
> u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
>
> Just to make sure I understand this correctly: If the version is set to
> 0 the address family (l3proto) will be set to AF_UNSPEC regardless of
> what the actual l3proto was set to by the user of the API.
> It is only set to the value chosen by the if the version is set to a
> non-null value.
> We assume that all clients that require the old behaviour set their
> version to 0, since that's the only valid value to set it to at this
> point anyway.
Yes, your understanding is correct and I think I now see what has gone
wrong. The change in the patch we are discussing here should only be
applied to the flush-path. What happened was that when fixing up the
support for flushing, we (well, I) forgot about delete. Until my
original patch got merged, u3 was never used when flushing. However,
the value was used when deleting. By changing the value assigned to
u3, we unfortunately broke delete. By moving the "u3 = nfmsg->version
...." line to the else-clause (like Pablo did in his patch) and
passing nfmsg->nfgen_family (like Nicolas suggests) to the
parse_tuple-calls, the old behavior for delete should be restored and
filter still support flushing by L3-protocol.
BR,
Kristian
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 11:52 ` Kristian Evensen
2019-06-25 14:45 ` Felix Kaechele
@ 2019-06-25 19:40 ` Pablo Neira Ayuso
1 sibling, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-06-25 19:40 UTC (permalink / raw)
To: Kristian Evensen; +Cc: Felix Kaechele, Netfilter Development Mailing list
On Tue, Jun 25, 2019 at 01:52:32PM +0200, Kristian Evensen wrote:
> Hi,
>
> I am sorry for the trouble that my change has caused. I do not know
> what the correct action would be, but I am just trying to figure out
> what is going on. In your first email, you wrote:
>
> > this patch is giving me some trouble as it breaks deletion of conntrack
> > entries in software that doesn't set the version flag to anything else
> > but 0.
>
> I might be a bit slow, but I have some trouble understanding this
> sentence. Is what you are saying that software that sets version to
> anything but 0 breaks? According to the discussion triggered by the
> patch adding the feature that we fix here (see the thread [PATCH
> 07/31] netfilter: ctnetlink: Support L3 protocol-filter on flush),
> using the version field is the correct solution. Pablo wrote:
>
> > The version field was meant to deal with this case.
> >
> > It has been not unused so far because we had no good reason.
>
> So I guess Nicholas worry was correct, that there are applications
> that leave version uninitialized and they trigger the regression. I
> will let others decide if not setting version counts as a regression
> or incorrect API usage. If an uninitialized version counts as a
> regression, I am fine with reverting and will try to come up with
> another approach. However, I guess we now might have users that depend
> on the new behavior of flush as well.
What kind of application is leaving this uninitialized?
The software you're pointing to is using libnetfilter_conntrack.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 11:33 ` Felix Kaechele
2019-06-25 11:52 ` Kristian Evensen
@ 2019-06-25 19:41 ` Pablo Neira Ayuso
1 sibling, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-06-25 19:41 UTC (permalink / raw)
To: Felix Kaechele; +Cc: netfilter-devel, kristian.evensen
On Tue, Jun 25, 2019 at 07:33:05AM -0400, Felix Kaechele wrote:
> On 2019-06-25 4:08 a.m., Pablo Neira Ayuso wrote:
> > As you describe, conntrack is a hashtable and the layer 3 protocol is
> > part of the hash:
> >
> > https://elixir.bootlin.com/linux/latest/source/net/netfilter/nf_conntrack_core.c#L188
> >
> > so AF_UNSPEC cannot work.
> >
> > There is no support for layer 3 wildcard deletion.
>
> So in this case I'd like to propose two options:
>
> 1. the patch should be reverted and userspace fixed to properly request
> flushing of both AF_INET and AF_INET6 entries in the table when doing a full
> flush
>
> 2. both this patch as well as the initial patch "netfilter: ctnetlink:
> Support L3 protocol-filter on flush" should be reverted and a new approach
> should be made to implement that feature.
>
> As it stands right now current kernel versions that are being released break
> userspace, which is unfortunate, because it forces me to run older,
> vulnerable kernels.
Your usecase has never ever worked. You cannot delete entries via
AF_UNSPEC, you're just mixing things up.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-24 23:58 ` Pablo Neira Ayuso
2019-06-25 3:02 ` Felix Kaechele
@ 2019-06-25 15:01 ` Nicolas Dichtel
2019-06-25 19:44 ` Pablo Neira Ayuso
1 sibling, 1 reply; 30+ messages in thread
From: Nicolas Dichtel @ 2019-06-25 15:01 UTC (permalink / raw)
To: Pablo Neira Ayuso, Felix Kaechele; +Cc: netfilter-devel, Kristian Evensen
Le 25/06/2019 à 01:58, Pablo Neira Ayuso a écrit :
> On Sun, Jun 23, 2019 at 11:44:09PM -0400, Felix Kaechele wrote:
> [...]
>> [felix@x1 utils]$ sudo ./conntrack_delete
>>
>> TEST: delete conntrack (-1)(No such file or directory)
>
> Could you give a try to this patch?
>
>
> x.patch
>
> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> index 7db79c1b8084..4886b1599014 100644
> --- a/net/netfilter/nf_conntrack_netlink.c
> +++ b/net/netfilter/nf_conntrack_netlink.c
> @@ -1256,7 +1256,6 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
> struct nf_conntrack_tuple tuple;
> struct nf_conn *ct;
> struct nfgenmsg *nfmsg = nlmsg_data(nlh);
> - u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
> struct nf_conntrack_zone zone;
> int err;
>
> @@ -1266,11 +1265,13 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
>
> if (cda[CTA_TUPLE_ORIG])
> err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
> - u3, &zone);
> + nfmsg->version, &zone);
nfmsg->nfgen_family?
> else if (cda[CTA_TUPLE_REPLY])
> err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
> - u3, &zone);
> + nfmsg->version, &zone);
Same here?
> else {
> + u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
> +
> return ctnetlink_flush_conntrack(net, cda,
> NETLINK_CB(skb).portid,
> nlmsg_report(nlh), u3);
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression
2019-06-25 15:01 ` Nicolas Dichtel
@ 2019-06-25 19:44 ` Pablo Neira Ayuso
0 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-06-25 19:44 UTC (permalink / raw)
To: Nicolas Dichtel; +Cc: Felix Kaechele, netfilter-devel, Kristian Evensen
On Tue, Jun 25, 2019 at 05:01:36PM +0200, Nicolas Dichtel wrote:
> Le 25/06/2019 à 01:58, Pablo Neira Ayuso a écrit :
> > On Sun, Jun 23, 2019 at 11:44:09PM -0400, Felix Kaechele wrote:
> > [...]
> >> [felix@x1 utils]$ sudo ./conntrack_delete
> >>
> >> TEST: delete conntrack (-1)(No such file or directory)
> >
> > Could you give a try to this patch?
> >
> >
> > x.patch
> >
> > diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
> > index 7db79c1b8084..4886b1599014 100644
> > --- a/net/netfilter/nf_conntrack_netlink.c
> > +++ b/net/netfilter/nf_conntrack_netlink.c
> > @@ -1256,7 +1256,6 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
> > struct nf_conntrack_tuple tuple;
> > struct nf_conn *ct;
> > struct nfgenmsg *nfmsg = nlmsg_data(nlh);
> > - u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
> > struct nf_conntrack_zone zone;
> > int err;
> >
> > @@ -1266,11 +1265,13 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
> >
> > if (cda[CTA_TUPLE_ORIG])
> > err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
> > - u3, &zone);
> > + nfmsg->version, &zone);
> nfmsg->nfgen_family?
>
> > else if (cda[CTA_TUPLE_REPLY])
> > err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
> > - u3, &zone);
> > + nfmsg->version, &zone);
> Same here?
Right, will send v2. Thanks Nicolas.
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 09/13] netfilter: nf_conntrack_h323: Remove deprecated config check
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (7 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 08/13] netfilter: ctnetlink: Resolve conntrack L3-protocol flush regression Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 10/13] netfilter: nf_flow_table: do not flow offload deleted conntrack entries Pablo Neira Ayuso
` (4 subsequent siblings)
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
CONFIG_NF_CONNTRACK_IPV6 has been deprecated so replace it with a check
for IPV6 instead.
Use nf_ip6_route6() instead of v6ops->route() and keep the IS_MODULE()
in nf_ipv6_ops as mentioned by Florian so that direct calls are used
when IPV6 is builtin and indirect calls are used only when IPV6 is a
module.
Fixes: a0ae2562c6c4b2 ("netfilter: conntrack: remove l3proto abstraction")
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_h323_main.c | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 005589c6d0f6..12de40390e97 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -748,24 +748,19 @@ static int callforward_do_filter(struct net *net,
}
break;
}
-#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6: {
- const struct nf_ipv6_ops *v6ops;
struct rt6_info *rt1, *rt2;
struct flowi6 fl1, fl2;
- v6ops = nf_get_ipv6_ops();
- if (!v6ops)
- return 0;
-
memset(&fl1, 0, sizeof(fl1));
fl1.daddr = src->in6;
memset(&fl2, 0, sizeof(fl2));
fl2.daddr = dst->in6;
- if (!v6ops->route(net, (struct dst_entry **)&rt1,
+ if (!nf_ip6_route(net, (struct dst_entry **)&rt1,
flowi6_to_flowi(&fl1), false)) {
- if (!v6ops->route(net, (struct dst_entry **)&rt2,
+ if (!nf_ip6_route(net, (struct dst_entry **)&rt2,
flowi6_to_flowi(&fl2), false)) {
if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
rt6_nexthop(rt2, &fl2.daddr)) &&
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 10/13] netfilter: nf_flow_table: do not flow offload deleted conntrack entries
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (8 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 09/13] netfilter: nf_conntrack_h323: Remove deprecated config check Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 11/13] netfilter: ebtables: CONFIG_COMPAT: reject trailing data after last rule Pablo Neira Ayuso
` (3 subsequent siblings)
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Taehee Yoo <ap420073@gmail.com>
Conntrack entries can be deleted by the masquerade module. In that case,
flow offload should be deleted too, but GC and data-path of flow offload
do not check for conntrack status bits, hence flow offload entries will
be removed only by the timeout.
Update garbage collector and data-path to check for ct->status. If
IPS_DYING_BIT is set, garbage collector removes flow offload entries and
data-path routine ignores them.
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_flow_table_core.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index a9e4f74b1ff6..4469519a4879 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -243,6 +243,7 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
{
struct flow_offload_tuple_rhash *tuplehash;
struct flow_offload *flow;
+ struct flow_offload_entry *e;
int dir;
tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
@@ -255,6 +256,10 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
return NULL;
+ e = container_of(flow, struct flow_offload_entry, flow);
+ if (unlikely(nf_ct_is_dying(e->ct)))
+ return NULL;
+
return tuplehash;
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);
@@ -301,8 +306,10 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
{
struct nf_flowtable *flow_table = data;
+ struct flow_offload_entry *e;
- if (nf_flow_has_expired(flow) ||
+ e = container_of(flow, struct flow_offload_entry, flow);
+ if (nf_flow_has_expired(flow) || nf_ct_is_dying(e->ct) ||
(flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)))
flow_offload_del(flow_table, flow);
}
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 11/13] netfilter: ebtables: CONFIG_COMPAT: reject trailing data after last rule
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (9 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 10/13] netfilter: nf_flow_table: do not flow offload deleted conntrack entries Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 12/13] netfilter: nf_tables: remove NFT_CT_TIMEOUT Pablo Neira Ayuso
` (2 subsequent siblings)
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Florian Westphal <fw@strlen.de>
If userspace provides a rule blob with trailing data after last target,
we trigger a splat, then convert ruleset to 64bit format (with trailing
data), then pass that to do_replace_finish() which then returns -EINVAL.
Erroring out right away avoids the splat plus unneeded translation and
error unwind.
Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support")
Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/bridge/netfilter/ebtables.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 3cad01ac64e4..3a1b94b5c0e5 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2158,7 +2158,9 @@ static int compat_copy_entries(unsigned char *data, unsigned int size_user,
if (ret < 0)
return ret;
- WARN_ON(size_remaining);
+ if (size_remaining)
+ return -EINVAL;
+
return state->buf_kern_offset;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 12/13] netfilter: nf_tables: remove NFT_CT_TIMEOUT
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (10 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 11/13] netfilter: ebtables: CONFIG_COMPAT: reject trailing data after last rule Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 9:56 ` [PATCH 13/13] netfilter: nf_tables: correct NFT_LOGLEVEL_MAX value Pablo Neira Ayuso
2019-05-13 16:02 ` [PATCH 00/13] Netfilter fixes for net David Miller
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
Never used anywhere in the code.
Fixes: 7e0b2b57f01d ("netfilter: nft_ct: add ct timeout support")
Reported-by: Stéphane Veyret <sveyret@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/uapi/linux/netfilter/nf_tables.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index a66c8de006cc..92bb1e2b2425 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -966,7 +966,6 @@ enum nft_socket_keys {
* @NFT_CT_DST_IP: conntrack layer 3 protocol destination (IPv4 address)
* @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address)
* @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address)
- * @NFT_CT_TIMEOUT: connection tracking timeout policy assigned to conntrack
*/
enum nft_ct_keys {
NFT_CT_STATE,
@@ -992,7 +991,6 @@ enum nft_ct_keys {
NFT_CT_DST_IP,
NFT_CT_SRC_IP6,
NFT_CT_DST_IP6,
- NFT_CT_TIMEOUT,
__NFT_CT_MAX
};
#define NFT_CT_MAX (__NFT_CT_MAX - 1)
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* [PATCH 13/13] netfilter: nf_tables: correct NFT_LOGLEVEL_MAX value
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (11 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 12/13] netfilter: nf_tables: remove NFT_CT_TIMEOUT Pablo Neira Ayuso
@ 2019-05-13 9:56 ` Pablo Neira Ayuso
2019-05-13 16:02 ` [PATCH 00/13] Netfilter fixes for net David Miller
13 siblings, 0 replies; 30+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-13 9:56 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev
From: Florian Westphal <fw@strlen.de>
should be same as NFT_LOGLEVEL_AUDIT, so use -, not +.
Fixes: 7eced5ab5a73 ("netfilter: nf_tables: add NFT_LOGLEVEL_* enumeration and use it")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
include/uapi/linux/netfilter/nf_tables.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 92bb1e2b2425..7bdb234f3d8c 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1134,7 +1134,7 @@ enum nft_log_level {
NFT_LOGLEVEL_AUDIT,
__NFT_LOGLEVEL_MAX
};
-#define NFT_LOGLEVEL_MAX (__NFT_LOGLEVEL_MAX + 1)
+#define NFT_LOGLEVEL_MAX (__NFT_LOGLEVEL_MAX - 1)
/**
* enum nft_queue_attributes - nf_tables queue expression netlink attributes
--
2.11.0
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH 00/13] Netfilter fixes for net
2019-05-13 9:56 [PATCH 00/13] Netfilter fixes for net Pablo Neira Ayuso
` (12 preceding siblings ...)
2019-05-13 9:56 ` [PATCH 13/13] netfilter: nf_tables: correct NFT_LOGLEVEL_MAX value Pablo Neira Ayuso
@ 2019-05-13 16:02 ` David Miller
13 siblings, 0 replies; 30+ messages in thread
From: David Miller @ 2019-05-13 16:02 UTC (permalink / raw)
To: pablo; +Cc: netfilter-devel, netdev
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 13 May 2019 11:56:17 +0200
> The following patchset contains Netfilter fixes for net:
...
> This batch comes with a conflict that can be fixed with this patch:
Thanks for this.
> You can pull these changes from:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
Pulled, thanks again.
^ permalink raw reply [flat|nested] 30+ messages in thread