From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 7/9] netfilter: nf_conncount: move all list iterations under spinlock
Date: Sat, 29 Dec 2018 13:58:01 +0100 [thread overview]
Message-ID: <20181229125803.7415-8-pablo@netfilter.org> (raw)
In-Reply-To: <20181229125803.7415-1-pablo@netfilter.org>
Two CPUs may race to remove a connection from the list, the existing
conn->dead will result in a use-after-free. Use the per-list spinlock to
protect list iterations.
As all accesses to the list now happen while holding the per-list lock,
we no longer need to delay free operations with rcu.
Joint work with Florian.
Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conncount.c | 46 +++++++++++++++++++-------------------------
1 file changed, 20 insertions(+), 26 deletions(-)
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index ce7f7d1212a6..d0fd195b19a8 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -43,8 +43,6 @@ struct nf_conncount_tuple {
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
- bool dead;
- struct rcu_head rcu_head;
};
struct nf_conncount_rb {
@@ -83,36 +81,21 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32));
}
-static void __conn_free(struct rcu_head *h)
-{
- struct nf_conncount_tuple *conn;
-
- conn = container_of(h, struct nf_conncount_tuple, rcu_head);
- kmem_cache_free(conncount_conn_cachep, conn);
-}
-
static bool conn_free(struct nf_conncount_list *list,
struct nf_conncount_tuple *conn)
{
bool free_entry = false;
- spin_lock_bh(&list->list_lock);
-
- if (conn->dead) {
- spin_unlock_bh(&list->list_lock);
- return free_entry;
- }
+ lockdep_assert_held(&list->list_lock);
list->count--;
- conn->dead = true;
- list_del_rcu(&conn->node);
+ list_del(&conn->node);
if (list->count == 0) {
list->dead = true;
free_entry = true;
}
- spin_unlock_bh(&list->list_lock);
- call_rcu(&conn->rcu_head, __conn_free);
+ kmem_cache_free(conncount_conn_cachep, conn);
return free_entry;
}
@@ -242,7 +225,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
-/* Return true if the list is empty */
+/* Return true if the list is empty. Must be called with BH disabled. */
bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_list *list)
{
@@ -253,12 +236,18 @@ bool nf_conncount_gc_list(struct net *net,
bool free_entry = false;
bool ret = false;
+ /* don't bother if other cpu is already doing GC */
+ if (!spin_trylock(&list->list_lock))
+ return false;
+
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn, &free_entry);
if (IS_ERR(found)) {
if (PTR_ERR(found) == -ENOENT) {
- if (free_entry)
+ if (free_entry) {
+ spin_unlock(&list->list_lock);
return true;
+ }
collected++;
}
continue;
@@ -271,23 +260,24 @@ bool nf_conncount_gc_list(struct net *net,
* closed already -> ditch it
*/
nf_ct_put(found_ct);
- if (conn_free(list, conn))
+ if (conn_free(list, conn)) {
+ spin_unlock(&list->list_lock);
return true;
+ }
collected++;
continue;
}
nf_ct_put(found_ct);
if (collected > CONNCOUNT_GC_MAX_NODES)
- return false;
+ break;
}
- spin_lock_bh(&list->list_lock);
if (!list->count) {
list->dead = true;
ret = true;
}
- spin_unlock_bh(&list->list_lock);
+ spin_unlock(&list->list_lock);
return ret;
}
@@ -478,6 +468,7 @@ static void tree_gc_worker(struct work_struct *work)
tree = data->gc_tree % CONNCOUNT_SLOTS;
root = &data->root[tree];
+ local_bh_disable();
rcu_read_lock();
for (node = rb_first(root); node != NULL; node = rb_next(node)) {
rbconn = rb_entry(node, struct nf_conncount_rb, node);
@@ -485,6 +476,9 @@ static void tree_gc_worker(struct work_struct *work)
gc_count++;
}
rcu_read_unlock();
+ local_bh_enable();
+
+ cond_resched();
spin_lock_bh(&nf_conncount_locks[tree]);
if (gc_count < ARRAY_SIZE(gc_nodes))
--
2.11.0
next prev parent reply other threads:[~2018-12-29 12:58 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-12-29 12:57 [PATCH 0/9] Netfilter fixes for net Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 1/9] netfilter: nf_tables: fix a missing check of nla_put_failure Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 2/9] netfilter: nf_conncount: replace CONNCOUNT_LOCK_SLOTS with CONNCOUNT_SLOTS Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 3/9] netfilter: nf_conncount: don't skip eviction when age is negative Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 4/9] netfilter: nf_conncount: split gc in two phases Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 5/9] netfilter: nf_conncount: restart search when nodes have been erased Pablo Neira Ayuso
2018-12-29 12:58 ` [PATCH 6/9] netfilter: nf_conncount: merge lookup and add functions Pablo Neira Ayuso
2018-12-29 12:58 ` Pablo Neira Ayuso [this message]
2018-12-29 12:58 ` [PATCH 8/9] netfilter: nf_conncount: speculative garbage collection on empty lists Pablo Neira Ayuso
2018-12-29 12:58 ` [PATCH 9/9] netfilter: nf_conncount: fix argument order to find_next_bit Pablo Neira Ayuso
2018-12-29 22:33 ` [PATCH 0/9] Netfilter fixes for net David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181229125803.7415-8-pablo@netfilter.org \
--to=pablo@netfilter.org \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).