Netfilter-Devel Archive on lore.kernel.org
 help / color / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 7/9] netfilter: nf_conncount: move all list iterations under spinlock
Date: Sat, 29 Dec 2018 13:58:01 +0100
Message-ID: <20181229125803.7415-8-pablo@netfilter.org> (raw)
In-Reply-To: <20181229125803.7415-1-pablo@netfilter.org>

Two CPUs may race to remove a connection from the list, the existing
conn->dead will result in a use-after-free. Use the per-list spinlock to
protect list iterations.

As all accesses to the list now happen while holding the per-list lock,
we no longer need to delay free operations with rcu.

Joint work with Florian.

Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conncount.c | 46 +++++++++++++++++++-------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index ce7f7d1212a6..d0fd195b19a8 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -43,8 +43,6 @@ struct nf_conncount_tuple {
 	struct nf_conntrack_zone	zone;
 	int				cpu;
 	u32				jiffies32;
-	bool				dead;
-	struct rcu_head			rcu_head;
 };
 
 struct nf_conncount_rb {
@@ -83,36 +81,21 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
 	return memcmp(a, b, klen * sizeof(u32));
 }
 
-static void __conn_free(struct rcu_head *h)
-{
-	struct nf_conncount_tuple *conn;
-
-	conn = container_of(h, struct nf_conncount_tuple, rcu_head);
-	kmem_cache_free(conncount_conn_cachep, conn);
-}
-
 static bool conn_free(struct nf_conncount_list *list,
 		      struct nf_conncount_tuple *conn)
 {
 	bool free_entry = false;
 
-	spin_lock_bh(&list->list_lock);
-
-	if (conn->dead) {
-		spin_unlock_bh(&list->list_lock);
-		return free_entry;
-	}
+	lockdep_assert_held(&list->list_lock);
 
 	list->count--;
-	conn->dead = true;
-	list_del_rcu(&conn->node);
+	list_del(&conn->node);
 	if (list->count == 0) {
 		list->dead = true;
 		free_entry = true;
 	}
 
-	spin_unlock_bh(&list->list_lock);
-	call_rcu(&conn->rcu_head, __conn_free);
+	kmem_cache_free(conncount_conn_cachep, conn);
 	return free_entry;
 }
 
@@ -242,7 +225,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
 }
 EXPORT_SYMBOL_GPL(nf_conncount_list_init);
 
-/* Return true if the list is empty */
+/* Return true if the list is empty. Must be called with BH disabled. */
 bool nf_conncount_gc_list(struct net *net,
 			  struct nf_conncount_list *list)
 {
@@ -253,12 +236,18 @@ bool nf_conncount_gc_list(struct net *net,
 	bool free_entry = false;
 	bool ret = false;
 
+	/* don't bother if other cpu is already doing GC */
+	if (!spin_trylock(&list->list_lock))
+		return false;
+
 	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
 		found = find_or_evict(net, list, conn, &free_entry);
 		if (IS_ERR(found)) {
 			if (PTR_ERR(found) == -ENOENT)  {
-				if (free_entry)
+				if (free_entry) {
+					spin_unlock(&list->list_lock);
 					return true;
+				}
 				collected++;
 			}
 			continue;
@@ -271,23 +260,24 @@ bool nf_conncount_gc_list(struct net *net,
 			 * closed already -> ditch it
 			 */
 			nf_ct_put(found_ct);
-			if (conn_free(list, conn))
+			if (conn_free(list, conn)) {
+				spin_unlock(&list->list_lock);
 				return true;
+			}
 			collected++;
 			continue;
 		}
 
 		nf_ct_put(found_ct);
 		if (collected > CONNCOUNT_GC_MAX_NODES)
-			return false;
+			break;
 	}
 
-	spin_lock_bh(&list->list_lock);
 	if (!list->count) {
 		list->dead = true;
 		ret = true;
 	}
-	spin_unlock_bh(&list->list_lock);
+	spin_unlock(&list->list_lock);
 
 	return ret;
 }
@@ -478,6 +468,7 @@ static void tree_gc_worker(struct work_struct *work)
 	tree = data->gc_tree % CONNCOUNT_SLOTS;
 	root = &data->root[tree];
 
+	local_bh_disable();
 	rcu_read_lock();
 	for (node = rb_first(root); node != NULL; node = rb_next(node)) {
 		rbconn = rb_entry(node, struct nf_conncount_rb, node);
@@ -485,6 +476,9 @@ static void tree_gc_worker(struct work_struct *work)
 			gc_count++;
 	}
 	rcu_read_unlock();
+	local_bh_enable();
+
+	cond_resched();
 
 	spin_lock_bh(&nf_conncount_locks[tree]);
 	if (gc_count < ARRAY_SIZE(gc_nodes))
-- 
2.11.0

  parent reply index

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-29 12:57 [PATCH 0/9] Netfilter fixes for net Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 1/9] netfilter: nf_tables: fix a missing check of nla_put_failure Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 2/9] netfilter: nf_conncount: replace CONNCOUNT_LOCK_SLOTS with CONNCOUNT_SLOTS Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 3/9] netfilter: nf_conncount: don't skip eviction when age is negative Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 4/9] netfilter: nf_conncount: split gc in two phases Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 5/9] netfilter: nf_conncount: restart search when nodes have been erased Pablo Neira Ayuso
2018-12-29 12:58 ` [PATCH 6/9] netfilter: nf_conncount: merge lookup and add functions Pablo Neira Ayuso
2018-12-29 12:58 ` Pablo Neira Ayuso [this message]
2018-12-29 12:58 ` [PATCH 8/9] netfilter: nf_conncount: speculative garbage collection on empty lists Pablo Neira Ayuso
2018-12-29 12:58 ` [PATCH 9/9] netfilter: nf_conncount: fix argument order to find_next_bit Pablo Neira Ayuso
2018-12-29 22:33 ` [PATCH 0/9] Netfilter fixes for net David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181229125803.7415-8-pablo@netfilter.org \
    --to=pablo@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Netfilter-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netfilter-devel/0 netfilter-devel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netfilter-devel netfilter-devel/ https://lore.kernel.org/netfilter-devel \
		netfilter-devel@vger.kernel.org
	public-inbox-index netfilter-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netfilter-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git