Netfilter-Devel Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling
@ 2020-03-27  2:24 Florian Westphal
  2020-03-27  2:24 ` [PATCH nf-next 1/4] netfilter: nf_queue: make nf_queue_entry_release_refs static Florian Westphal
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Florian Westphal @ 2020-03-27  2:24 UTC (permalink / raw)
  To: netfilter-devel

running nft_queue.sh selftest with refcount debugging
enabled triggers following splat:

------------[ cut here ]------------
refcount_t: addition on 0; use-after-free.
WARNING: CPU: 0 PID: 2441 at lib/refcount.c:25 refcount_warn_saturate+0xbc/0x110
RIP: 0010:refcount_warn_saturate+0xbc/0x110
[..]
Call Trace:
 nf_queue_entry_get_refs+0x194/0x1b0
 nf_queue+0x38b/0x640
 nf_reinject+0x264/0x280
 nfqnl_recv_verdict+0x5d5/0x920
 nfnetlink_rcv_msg+0x27a/0x460

This is because nf_queue uses following pattern:
nf_queue_entry_get_refs()
  nf_queue() // leave rcu protection
  // nfnetlink, wait for verdict
  // sk might be closed now
nf_reinject // reenter rcu protection
  nf_queue_entry_release_refs // refcount can drop to 0
  // iterate/call remaining hooks and okfn

If the hook iteration results in another nf_queue() call, above splat
might be triggered.

This series fixes this by deferring the call to
nf_queue_entry_release_refs() until after the hook iteration/okfn
returns; i.e. another nf_queue invocation from nf_reinject path will
not observe a zero refcount.

This series also applies to nf, but given we're a bit further along in
release cycle nf-next might be better; this fix isn't simple,
unfortunately.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH nf-next 1/4] netfilter: nf_queue: make nf_queue_entry_release_refs static
  2020-03-27  2:24 [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling Florian Westphal
@ 2020-03-27  2:24 ` Florian Westphal
  2020-03-27  2:24 ` [PATCH nf-next 2/4] netfilter: nf_queue: place bridge physports into queue_entry struct Florian Westphal
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Florian Westphal @ 2020-03-27  2:24 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Florian Westphal

This is a preparation patch, no logical changes.
Move free_entry into core and rename it to something more sensible.

Will ease followup patches which will complicate the refcount handling.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_queue.h |  2 +-
 net/netfilter/nf_queue.c         | 10 ++++++++--
 net/netfilter/nfnetlink_queue.c  | 10 ++--------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 47088083667b..cdbd98730852 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -35,7 +35,7 @@ void nf_unregister_queue_handler(struct net *net);
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
 
 void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
-void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
+void nf_queue_entry_free(struct nf_queue_entry *entry);
 
 static inline void init_hashrandom(u32 *jhash_initval)
 {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index f8f52ff99cfb..4da5776a9904 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -64,7 +64,7 @@ static void nf_queue_entry_release_br_nf_refs(struct sk_buff *skb)
 #endif
 }
 
-void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
+static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 {
 	struct nf_hook_state *state = &entry->state;
 
@@ -78,7 +78,13 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 
 	nf_queue_entry_release_br_nf_refs(entry->skb);
 }
-EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
+
+void nf_queue_entry_free(struct nf_queue_entry *entry)
+{
+	nf_queue_entry_release_refs(entry);
+	kfree(entry);
+}
+EXPORT_SYMBOL_GPL(nf_queue_entry_free);
 
 static void nf_queue_entry_get_br_nf_refs(struct sk_buff *skb)
 {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 76535fd9278c..3243a31f6e82 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -737,12 +737,6 @@ static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
 #define nf_bridge_adjust_segmented_data(s) do {} while (0)
 #endif
 
-static void free_entry(struct nf_queue_entry *entry)
-{
-	nf_queue_entry_release_refs(entry);
-	kfree(entry);
-}
-
 static int
 __nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
 			   struct sk_buff *skb, struct nf_queue_entry *entry)
@@ -768,7 +762,7 @@ __nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
 		entry_seg->skb = skb;
 		ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
 		if (ret)
-			free_entry(entry_seg);
+			nf_queue_entry_free(entry_seg);
 	}
 	return ret;
 }
@@ -827,7 +821,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
 	if (queued) {
 		if (err) /* some segments are already queued */
-			free_entry(entry);
+			nf_queue_entry_free(entry);
 		kfree_skb(skb);
 		return 0;
 	}
-- 
2.24.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH nf-next 2/4] netfilter: nf_queue: place bridge physports into queue_entry struct
  2020-03-27  2:24 [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling Florian Westphal
  2020-03-27  2:24 ` [PATCH nf-next 1/4] netfilter: nf_queue: make nf_queue_entry_release_refs static Florian Westphal
@ 2020-03-27  2:24 ` Florian Westphal
  2020-03-27  2:24 ` [PATCH nf-next 3/4] netfilter: nf_queue: do not release refcouts until nf_reinject is done Florian Westphal
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Florian Westphal @ 2020-03-27  2:24 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Florian Westphal

The refcount is done via entry->skb, which does work fine.
Major problem: When putting the refcount of the bridge ports, we
must always put the references while the skb is still around.

However, we will need to put the references after okfn() to avoid
a possible 1 -> 0 -> 1 refcount transition, so we cannot use the
skb pointer anymore.

Place the physports in the queue entry structure instead to allow
for refcounting changes in the next patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_queue.h |  5 ++-
 net/netfilter/nf_queue.c         | 53 ++++++++++++++------------------
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index cdbd98730852..e770bba00066 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -14,7 +14,10 @@ struct nf_queue_entry {
 	struct sk_buff		*skb;
 	unsigned int		id;
 	unsigned int		hook_index;	/* index in hook_entries->hook[] */
-
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	struct net_device	*physin;
+	struct net_device	*physout;
+#endif
 	struct nf_hook_state	state;
 	u16			size; /* sizeof(entry) + saved route keys */
 
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4da5776a9904..96eb72908467 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -46,24 +46,6 @@ void nf_unregister_queue_handler(struct net *net)
 }
 EXPORT_SYMBOL(nf_unregister_queue_handler);
 
-static void nf_queue_entry_release_br_nf_refs(struct sk_buff *skb)
-{
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
-
-	if (nf_bridge) {
-		struct net_device *physdev;
-
-		physdev = nf_bridge_get_physindev(skb);
-		if (physdev)
-			dev_put(physdev);
-		physdev = nf_bridge_get_physoutdev(skb);
-		if (physdev)
-			dev_put(physdev);
-	}
-#endif
-}
-
 static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 {
 	struct nf_hook_state *state = &entry->state;
@@ -76,7 +58,12 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 	if (state->sk)
 		sock_put(state->sk);
 
-	nf_queue_entry_release_br_nf_refs(entry->skb);
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	if (entry->physin)
+		dev_put(entry->physin);
+	if (entry->physout)
+		dev_put(entry->physout);
+#endif
 }
 
 void nf_queue_entry_free(struct nf_queue_entry *entry)
@@ -86,20 +73,19 @@ void nf_queue_entry_free(struct nf_queue_entry *entry)
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_free);
 
-static void nf_queue_entry_get_br_nf_refs(struct sk_buff *skb)
+static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
 {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+	const struct sk_buff *skb = entry->skb;
+	struct nf_bridge_info *nf_bridge;
 
+	nf_bridge = nf_bridge_info_get(skb);
 	if (nf_bridge) {
-		struct net_device *physdev;
-
-		physdev = nf_bridge_get_physindev(skb);
-		if (physdev)
-			dev_hold(physdev);
-		physdev = nf_bridge_get_physoutdev(skb);
-		if (physdev)
-			dev_hold(physdev);
+		entry->physin = nf_bridge_get_physindev(skb);
+		entry->physout = nf_bridge_get_physoutdev(skb);
+	} else {
+		entry->physin = NULL;
+		entry->physout = NULL;
 	}
 #endif
 }
@@ -116,7 +102,12 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 	if (state->sk)
 		sock_hold(state->sk);
 
-	nf_queue_entry_get_br_nf_refs(entry->skb);
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	if (entry->physin)
+		dev_hold(entry->physin);
+	if (entry->physout)
+		dev_hold(entry->physout);
+#endif
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
 
@@ -207,6 +198,8 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 		.size	= sizeof(*entry) + route_key_size,
 	};
 
+	__nf_queue_entry_init_physdevs(entry);
+
 	nf_queue_entry_get_refs(entry);
 
 	switch (entry->state.pf) {
-- 
2.24.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH nf-next 3/4] netfilter: nf_queue: do not release refcouts until nf_reinject is done
  2020-03-27  2:24 [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling Florian Westphal
  2020-03-27  2:24 ` [PATCH nf-next 1/4] netfilter: nf_queue: make nf_queue_entry_release_refs static Florian Westphal
  2020-03-27  2:24 ` [PATCH nf-next 2/4] netfilter: nf_queue: place bridge physports into queue_entry struct Florian Westphal
@ 2020-03-27  2:24 ` Florian Westphal
  2020-03-27  2:24 ` [PATCH nf-next 4/4] netfilter: nf_queue: prefer nf_queue_entry_free Florian Westphal
  2020-03-29 15:07 ` [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling Pablo Neira Ayuso
  4 siblings, 0 replies; 6+ messages in thread
From: Florian Westphal @ 2020-03-27  2:24 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Florian Westphal

nf_queue is problematic when another NF_QUEUE invocation happens
from nf_reinject().

1. nf_queue is invoked, increments state->sk refcount.
2. skb is queued, waiting for verdict.
3. sk is closed/released.
3. verdict comes back, nf_reinject is called.
4. nf_reinject drops the reference -- refcount can now drop to 0

Instead of get_ref/release_ref pattern, we need to nest the get_ref calls:
    get_ref
       get_ref
       release_ref
     release_ref

So that when we invoke the next processing stage (another netfilter
or the okfn()), we hold at least one reference count on the
devices/socket.

After previous patch, it is now safe to put the entry even after okfn()
has potentially free'd the skb.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_queue.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 96eb72908467..aadccdd117f0 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -303,12 +303,10 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 
 	hooks = nf_hook_entries_head(net, pf, entry->state.hook);
 
-	nf_queue_entry_release_refs(entry);
-
 	i = entry->hook_index;
 	if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
 		kfree_skb(skb);
-		kfree(entry);
+		nf_queue_entry_free(entry);
 		return;
 	}
 
@@ -347,6 +345,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 		kfree_skb(skb);
 	}
 
-	kfree(entry);
+	nf_queue_entry_free(entry);
 }
 EXPORT_SYMBOL(nf_reinject);
-- 
2.24.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH nf-next 4/4] netfilter: nf_queue: prefer nf_queue_entry_free
  2020-03-27  2:24 [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling Florian Westphal
                   ` (2 preceding siblings ...)
  2020-03-27  2:24 ` [PATCH nf-next 3/4] netfilter: nf_queue: do not release refcouts until nf_reinject is done Florian Westphal
@ 2020-03-27  2:24 ` Florian Westphal
  2020-03-29 15:07 ` [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling Pablo Neira Ayuso
  4 siblings, 0 replies; 6+ messages in thread
From: Florian Westphal @ 2020-03-27  2:24 UTC (permalink / raw)
  To: netfilter-devel; +Cc: Florian Westphal

Instead of dropping refs+kfree, use the helper added in previous patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_queue.c | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index aadccdd117f0..bbd1209694b8 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -155,18 +155,16 @@ static void nf_ip6_saveroute(const struct sk_buff *skb,
 static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 		      unsigned int index, unsigned int queuenum)
 {
-	int status = -ENOENT;
 	struct nf_queue_entry *entry = NULL;
 	const struct nf_queue_handler *qh;
 	struct net *net = state->net;
 	unsigned int route_key_size;
+	int status;
 
 	/* QUEUE == DROP if no one is waiting, to be safe. */
 	qh = rcu_dereference(net->nf.queue_handler);
-	if (!qh) {
-		status = -ESRCH;
-		goto err;
-	}
+	if (!qh)
+		return -ESRCH;
 
 	switch (state->pf) {
 	case AF_INET:
@@ -181,14 +179,12 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 	}
 
 	entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
-	if (!entry) {
-		status = -ENOMEM;
-		goto err;
-	}
+	if (!entry)
+		return -ENOMEM;
 
 	if (skb_dst(skb) && !skb_dst_force(skb)) {
-		status = -ENETDOWN;
-		goto err;
+		kfree(entry);
+		return -ENETDOWN;
 	}
 
 	*entry = (struct nf_queue_entry) {
@@ -212,17 +208,12 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 	}
 
 	status = qh->outfn(entry, queuenum);
-
 	if (status < 0) {
-		nf_queue_entry_release_refs(entry);
-		goto err;
+		nf_queue_entry_free(entry);
+		return status;
 	}
 
 	return 0;
-
-err:
-	kfree(entry);
-	return status;
 }
 
 /* Packets leaving via this function must come back through nf_reinject(). */
-- 
2.24.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling
  2020-03-27  2:24 [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling Florian Westphal
                   ` (3 preceding siblings ...)
  2020-03-27  2:24 ` [PATCH nf-next 4/4] netfilter: nf_queue: prefer nf_queue_entry_free Florian Westphal
@ 2020-03-29 15:07 ` Pablo Neira Ayuso
  4 siblings, 0 replies; 6+ messages in thread
From: Pablo Neira Ayuso @ 2020-03-29 15:07 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netfilter-devel

On Fri, Mar 27, 2020 at 03:24:45AM +0100, Florian Westphal wrote:
[...]
> This series fixes this by deferring the call to
> nf_queue_entry_release_refs() until after the hook iteration/okfn
> returns; i.e. another nf_queue invocation from nf_reinject path will
> not observe a zero refcount.

Applied.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, back to index

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-27  2:24 [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling Florian Westphal
2020-03-27  2:24 ` [PATCH nf-next 1/4] netfilter: nf_queue: make nf_queue_entry_release_refs static Florian Westphal
2020-03-27  2:24 ` [PATCH nf-next 2/4] netfilter: nf_queue: place bridge physports into queue_entry struct Florian Westphal
2020-03-27  2:24 ` [PATCH nf-next 3/4] netfilter: nf_queue: do not release refcouts until nf_reinject is done Florian Westphal
2020-03-27  2:24 ` [PATCH nf-next 4/4] netfilter: nf_queue: prefer nf_queue_entry_free Florian Westphal
2020-03-29 15:07 ` [PATCH nf-next 0/4] netfilter: nf_queue: rework refcount handling Pablo Neira Ayuso

Netfilter-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netfilter-devel/0 netfilter-devel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netfilter-devel netfilter-devel/ https://lore.kernel.org/netfilter-devel \
		netfilter-devel@vger.kernel.org
	public-inbox-index netfilter-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netfilter-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git