netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH AUTOSEL 4.19 20/97] netfilter: nf_tables: fix suspicious RCU usage in nft_chain_stats_replace()
       [not found] <20181226223557.149329-1-sashal@kernel.org>
@ 2018-12-26 22:34 ` Sasha Levin
  2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 21/97] netfilter: seqadj: re-load tcp header pointer after possible head reallocation Sasha Levin
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2018-12-26 22:34 UTC (permalink / raw)
  To: stable, linux-kernel
  Cc: Taehee Yoo, Pablo Neira Ayuso, Sasha Levin, netfilter-devel,
	coreteam, netdev

From: Taehee Yoo <ap420073@gmail.com>

[ Upstream commit 4c05ec47384ab3627b62814e8f886e90cc38ce15 ]

basechain->stats is rcu protected data which is updated from
nft_chain_stats_replace(). This function is executed from the commit
phase which holds the pernet nf_tables commit mutex - not the global
nfnetlink subsystem mutex.

Test commands to reproduce the problem are:
   %iptables-nft -I INPUT
   %iptables-nft -Z
   %iptables-nft -Z

This patch uses RCU calls to handle basechain->stats updates to fix a
splat that looks like:

[89279.358755] =============================
[89279.363656] WARNING: suspicious RCU usage
[89279.368458] 4.20.0-rc2+ #44 Tainted: G        W    L
[89279.374661] -----------------------------
[89279.379542] net/netfilter/nf_tables_api.c:1404 suspicious rcu_dereference_protected() usage!
[...]
[89279.406556] 1 lock held by iptables-nft/5225:
[89279.411728]  #0: 00000000bf45a000 (&net->nft.commit_mutex){+.+.}, at: nf_tables_valid_genid+0x1f/0x70 [nf_tables]
[89279.424022] stack backtrace:
[89279.429236] CPU: 0 PID: 5225 Comm: iptables-nft Tainted: G        W    L    4.20.0-rc2+ #44
[89279.430135] Call Trace:
[89279.430135]  dump_stack+0xc9/0x16b
[89279.430135]  ? show_regs_print_info+0x5/0x5
[89279.430135]  ? lockdep_rcu_suspicious+0x117/0x160
[89279.430135]  nft_chain_commit_update+0x4ea/0x640 [nf_tables]
[89279.430135]  ? sched_clock_local+0xd4/0x140
[89279.430135]  ? check_flags.part.35+0x440/0x440
[89279.430135]  ? __rhashtable_remove_fast.constprop.67+0xec0/0xec0 [nf_tables]
[89279.430135]  ? sched_clock_cpu+0x126/0x170
[89279.430135]  ? find_held_lock+0x39/0x1c0
[89279.430135]  ? hlock_class+0x140/0x140
[89279.430135]  ? is_bpf_text_address+0x5/0xf0
[89279.430135]  ? check_flags.part.35+0x440/0x440
[89279.430135]  ? __lock_is_held+0xb4/0x140
[89279.430135]  nf_tables_commit+0x2555/0x39c0 [nf_tables]

Fixes: f102d66b335a4 ("netfilter: nf_tables: use dedicated mutex to guard transactions")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 include/linux/netfilter/nfnetlink.h | 12 ------------
 net/netfilter/nf_tables_api.c       | 21 +++++++++++++--------
 net/netfilter/nf_tables_core.c      |  2 +-
 3 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 4a520d3304a2..cf09ab37b45b 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -62,18 +62,6 @@ static inline bool lockdep_nfnl_is_held(__u8 subsys_id)
 }
 #endif /* CONFIG_PROVE_LOCKING */
 
-/*
- * nfnl_dereference - fetch RCU pointer when updates are prevented by subsys mutex
- *
- * @p: The pointer to read, prior to dereferencing
- * @ss: The nfnetlink subsystem ID
- *
- * Return the value of the specified RCU-protected pointer, but omit
- * the READ_ONCE(), because caller holds the NFNL subsystem mutex.
- */
-#define nfnl_dereference(p, ss)					\
-	rcu_dereference_protected(p, lockdep_nfnl_is_held(ss))
-
 #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
 	MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fe0558b15fd3..ed9af46720e1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1199,7 +1199,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 		if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
 			goto nla_put_failure;
 
-		if (basechain->stats && nft_dump_stats(skb, basechain->stats))
+		if (rcu_access_pointer(basechain->stats) &&
+		    nft_dump_stats(skb, rcu_dereference(basechain->stats)))
 			goto nla_put_failure;
 	}
 
@@ -1375,7 +1376,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
 	return newstats;
 }
 
-static void nft_chain_stats_replace(struct nft_base_chain *chain,
+static void nft_chain_stats_replace(struct net *net,
+				    struct nft_base_chain *chain,
 				    struct nft_stats __percpu *newstats)
 {
 	struct nft_stats __percpu *oldstats;
@@ -1383,8 +1385,9 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain,
 	if (newstats == NULL)
 		return;
 
-	if (chain->stats) {
-		oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES);
+	if (rcu_access_pointer(chain->stats)) {
+		oldstats = rcu_dereference_protected(chain->stats,
+					lockdep_commit_lock_is_held(net));
 		rcu_assign_pointer(chain->stats, newstats);
 		synchronize_rcu();
 		free_percpu(oldstats);
@@ -1421,9 +1424,10 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
 		struct nft_base_chain *basechain = nft_base_chain(chain);
 
 		module_put(basechain->type->owner);
-		free_percpu(basechain->stats);
-		if (basechain->stats)
+		if (rcu_access_pointer(basechain->stats)) {
 			static_branch_dec(&nft_counters_enabled);
+			free_percpu(rcu_dereference_raw(basechain->stats));
+		}
 		kfree(chain->name);
 		kfree(basechain);
 	} else {
@@ -1572,7 +1576,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 				kfree(basechain);
 				return PTR_ERR(stats);
 			}
-			basechain->stats = stats;
+			rcu_assign_pointer(basechain->stats, stats);
 			static_branch_inc(&nft_counters_enabled);
 		}
 
@@ -6145,7 +6149,8 @@ static void nft_chain_commit_update(struct nft_trans *trans)
 		return;
 
 	basechain = nft_base_chain(trans->ctx.chain);
-	nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
+	nft_chain_stats_replace(trans->ctx.net, basechain,
+				nft_trans_chain_stats(trans));
 
 	switch (nft_trans_chain_policy(trans)) {
 	case NF_DROP:
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index ffd5c0f9412b..60f258f2c707 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -101,7 +101,7 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
 	struct nft_stats *stats;
 
 	base_chain = nft_base_chain(chain);
-	if (!base_chain->stats)
+	if (!rcu_access_pointer(base_chain->stats))
 		return;
 
 	local_bh_disable();
-- 
2.19.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 4.19 21/97] netfilter: seqadj: re-load tcp header pointer after possible head reallocation
       [not found] <20181226223557.149329-1-sashal@kernel.org>
  2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 20/97] netfilter: nf_tables: fix suspicious RCU usage in nft_chain_stats_replace() Sasha Levin
@ 2018-12-26 22:34 ` Sasha Levin
  2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 35/97] netfilter: ipset: do not call ipset_nest_end after nla_nest_cancel Sasha Levin
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2018-12-26 22:34 UTC (permalink / raw)
  To: stable, linux-kernel
  Cc: Florian Westphal, Pablo Neira Ayuso, Sasha Levin,
	netfilter-devel, coreteam, netdev

From: Florian Westphal <fw@strlen.de>

[ Upstream commit 530aad77010b81526586dfc09130ec875cd084e4 ]

When adjusting sack block sequence numbers, skb_make_writable() gets
called to make sure tcp options are all in the linear area, and buffer
is not shared.

This can cause tcp header pointer to get reallocated, so we must
reaload it to avoid memory corruption.

This bug pre-dates git history.

Reported-by: Neel Mehta <nmehta@google.com>
Reported-by: Shane Huntley <shuntley@google.com>
Reported-by: Heather Adkins <argv@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 net/netfilter/nf_conntrack_seqadj.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index a975efd6b8c3..9da303461069 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -115,12 +115,12 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb,
 /* TCP SACK sequence number adjustment */
 static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
 				      unsigned int protoff,
-				      struct tcphdr *tcph,
 				      struct nf_conn *ct,
 				      enum ip_conntrack_info ctinfo)
 {
-	unsigned int dir, optoff, optend;
+	struct tcphdr *tcph = (void *)skb->data + protoff;
 	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+	unsigned int dir, optoff, optend;
 
 	optoff = protoff + sizeof(struct tcphdr);
 	optend = protoff + tcph->doff * 4;
@@ -128,6 +128,7 @@ static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
 	if (!skb_make_writable(skb, optend))
 		return 0;
 
+	tcph = (void *)skb->data + protoff;
 	dir = CTINFO2DIR(ctinfo);
 
 	while (optoff < optend) {
@@ -207,7 +208,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
 		 ntohl(newack));
 	tcph->ack_seq = newack;
 
-	res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+	res = nf_ct_sack_adjust(skb, protoff, ct, ctinfo);
 out:
 	spin_unlock_bh(&ct->lock);
 
-- 
2.19.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 4.19 35/97] netfilter: ipset: do not call ipset_nest_end after nla_nest_cancel
       [not found] <20181226223557.149329-1-sashal@kernel.org>
  2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 20/97] netfilter: nf_tables: fix suspicious RCU usage in nft_chain_stats_replace() Sasha Levin
  2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 21/97] netfilter: seqadj: re-load tcp header pointer after possible head reallocation Sasha Levin
@ 2018-12-26 22:34 ` Sasha Levin
  2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 36/97] netfilter: nat: can't use dst_hold on noref dst Sasha Levin
  2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 37/97] netfilter: nf_conncount: use rb_link_node_rcu() instead of rb_link_node() Sasha Levin
  4 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2018-12-26 22:34 UTC (permalink / raw)
  To: stable, linux-kernel
  Cc: Pan Bian, Jozsef Kadlecsik, Pablo Neira Ayuso, Sasha Levin,
	netfilter-devel, coreteam, netdev

From: Pan Bian <bianpan2016@163.com>

[ Upstream commit 708abf74dd87f8640871b814faa195fb5970b0e3 ]

In the error handling block, nla_nest_cancel(skb, atd) is called to
cancel the nest operation. But then, ipset_nest_end(skb, atd) is
unexpected called to end the nest operation. This patch calls the
ipset_nest_end only on the branch that nla_nest_cancel is not called.

Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 net/netfilter/ipset/ip_set_list_set.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 4eef55da0878..8da228da53ae 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -531,8 +531,8 @@ list_set_list(const struct ip_set *set,
 		ret = -EMSGSIZE;
 	} else {
 		cb->args[IPSET_CB_ARG0] = i;
+		ipset_nest_end(skb, atd);
 	}
-	ipset_nest_end(skb, atd);
 out:
 	rcu_read_unlock();
 	return ret;
-- 
2.19.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 4.19 36/97] netfilter: nat: can't use dst_hold on noref dst
       [not found] <20181226223557.149329-1-sashal@kernel.org>
                   ` (2 preceding siblings ...)
  2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 35/97] netfilter: ipset: do not call ipset_nest_end after nla_nest_cancel Sasha Levin
@ 2018-12-26 22:34 ` Sasha Levin
  2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 37/97] netfilter: nf_conncount: use rb_link_node_rcu() instead of rb_link_node() Sasha Levin
  4 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2018-12-26 22:34 UTC (permalink / raw)
  To: stable, linux-kernel
  Cc: Florian Westphal, Pablo Neira Ayuso, Sasha Levin,
	netfilter-devel, coreteam, netdev

From: Florian Westphal <fw@strlen.de>

[ Upstream commit 542fbda0f08f1cbbc250f9e59f7537649651d0c8 ]

The dst entry might already have a zero refcount, waiting on rcu list
to be free'd.  Using dst_hold() transitions its reference count to 1, and
next dst release will try to free it again -- resulting in a double free:

  WARNING: CPU: 1 PID: 0 at include/net/dst.h:239 nf_xfrm_me_harder+0xe7/0x130 [nf_nat]
  RIP: 0010:nf_xfrm_me_harder+0xe7/0x130 [nf_nat]
  Code: 48 8b 5c 24 60 65 48 33 1c 25 28 00 00 00 75 53 48 83 c4 68 5b 5d 41 5c c3 85 c0 74 0d 8d 48 01 f0 0f b1 0a 74 86 85 c0 75 f3 <0f> 0b e9 7b ff ff ff 29 c6 31 d2 b9 20 00 48 00 4c 89 e7 e8 31 27
  Call Trace:
  nf_nat_ipv4_out+0x78/0x90 [nf_nat_ipv4]
  nf_hook_slow+0x36/0xd0
  ip_output+0x9f/0xd0
  ip_forward+0x328/0x440
  ip_rcv+0x8a/0xb0

Use dst_hold_safe instead and bail out if we cannot take a reference.

Fixes: a4c2fd7f7891 ("net: remove DST_NOCACHE flag")
Reported-by: Martin Zaharinov <micron10@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 net/netfilter/nf_nat_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index e2b196054dfc..2268b10a9dcf 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -117,7 +117,8 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 	dst = skb_dst(skb);
 	if (dst->xfrm)
 		dst = ((struct xfrm_dst *)dst)->route;
-	dst_hold(dst);
+	if (!dst_hold_safe(dst))
+		return -EHOSTUNREACH;
 
 	if (sk && !net_eq(net, sock_net(sk)))
 		sk = NULL;
-- 
2.19.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH AUTOSEL 4.19 37/97] netfilter: nf_conncount: use rb_link_node_rcu() instead of rb_link_node()
       [not found] <20181226223557.149329-1-sashal@kernel.org>
                   ` (3 preceding siblings ...)
  2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 36/97] netfilter: nat: can't use dst_hold on noref dst Sasha Levin
@ 2018-12-26 22:34 ` Sasha Levin
  4 siblings, 0 replies; 5+ messages in thread
From: Sasha Levin @ 2018-12-26 22:34 UTC (permalink / raw)
  To: stable, linux-kernel
  Cc: Taehee Yoo, Pablo Neira Ayuso, Sasha Levin, netfilter-devel,
	coreteam, netdev

From: Taehee Yoo <ap420073@gmail.com>

[ Upstream commit d4e7df16567b80836a78d31b42f1a9355a636d67 ]

rbnode in insert_tree() is rcu protected pointer.
So, in order to handle this pointer, _rcu function should be used.
rb_link_node_rcu() is a rcu version of rb_link_node().

Fixes: 34848d5c896e ("netfilter: nf_conncount: Split insert and traversal")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 net/netfilter/nf_conncount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index b6d0f6deea86..9cd180bda092 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -427,7 +427,7 @@ insert_tree(struct net *net,
 	count = 1;
 	rbconn->list.count = count;
 
-	rb_link_node(&rbconn->node, parent, rbnode);
+	rb_link_node_rcu(&rbconn->node, parent, rbnode);
 	rb_insert_color(&rbconn->node, root);
 out_unlock:
 	spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
-- 
2.19.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-12-26 22:34 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20181226223557.149329-1-sashal@kernel.org>
2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 20/97] netfilter: nf_tables: fix suspicious RCU usage in nft_chain_stats_replace() Sasha Levin
2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 21/97] netfilter: seqadj: re-load tcp header pointer after possible head reallocation Sasha Levin
2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 35/97] netfilter: ipset: do not call ipset_nest_end after nla_nest_cancel Sasha Levin
2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 36/97] netfilter: nat: can't use dst_hold on noref dst Sasha Levin
2018-12-26 22:34 ` [PATCH AUTOSEL 4.19 37/97] netfilter: nf_conncount: use rb_link_node_rcu() instead of rb_link_node() Sasha Levin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).