Netfilter-Devel Archive on lore.kernel.org
 help / color / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 6/9] netfilter: nf_conncount: merge lookup and add functions
Date: Sat, 29 Dec 2018 13:58:00 +0100
Message-ID: <20181229125803.7415-7-pablo@netfilter.org> (raw)
In-Reply-To: <20181229125803.7415-1-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

'lookup' is always followed by 'add'.
Merge both and make the list-walk part of nf_conncount_add().

This also avoids one unneeded unlock/re-lock pair.

Extra care needs to be taken in count_tree, as we only hold rcu
read lock, i.e. we can only insert to an existing tree node after
acquiring its lock and making sure it has a nonzero count.

As a zero count should be rare, just fall back to insert_tree()
(which acquires tree lock).

This issue and its solution were pointed out by Shawn Bohrer
during patch review.

Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h |  18 +---
 net/netfilter/nf_conncount.c               | 146 +++++++++++++----------------
 net/netfilter/nft_connlimit.c              |  14 +--
 3 files changed, 72 insertions(+), 106 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index 4b2b2baf8ab4..aa66775c15f4 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -5,12 +5,6 @@
 
 struct nf_conncount_data;
 
-enum nf_conncount_list_add {
-	NF_CONNCOUNT_ADDED, 	/* list add was ok */
-	NF_CONNCOUNT_ERR,	/* -ENOMEM, must drop skb */
-	NF_CONNCOUNT_SKIP,	/* list is already reclaimed by gc */
-};
-
 struct nf_conncount_list {
 	spinlock_t list_lock;
 	struct list_head head;	/* connections with the same filtering key */
@@ -29,18 +23,12 @@ unsigned int nf_conncount_count(struct net *net,
 				const struct nf_conntrack_tuple *tuple,
 				const struct nf_conntrack_zone *zone);
 
-void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
-			 const struct nf_conntrack_tuple *tuple,
-			 const struct nf_conntrack_zone *zone,
-			 bool *addit);
+int nf_conncount_add(struct net *net, struct nf_conncount_list *list,
+		     const struct nf_conntrack_tuple *tuple,
+		     const struct nf_conntrack_zone *zone);
 
 void nf_conncount_list_init(struct nf_conncount_list *list);
 
-enum nf_conncount_list_add
-nf_conncount_add(struct nf_conncount_list *list,
-		 const struct nf_conntrack_tuple *tuple,
-		 const struct nf_conntrack_zone *zone);
-
 bool nf_conncount_gc_list(struct net *net,
 			  struct nf_conncount_list *list);
 
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 0a83c694a8f1..ce7f7d1212a6 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -83,38 +83,6 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
 	return memcmp(a, b, klen * sizeof(u32));
 }
 
-enum nf_conncount_list_add
-nf_conncount_add(struct nf_conncount_list *list,
-		 const struct nf_conntrack_tuple *tuple,
-		 const struct nf_conntrack_zone *zone)
-{
-	struct nf_conncount_tuple *conn;
-
-	if (WARN_ON_ONCE(list->count > INT_MAX))
-		return NF_CONNCOUNT_ERR;
-
-	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
-	if (conn == NULL)
-		return NF_CONNCOUNT_ERR;
-
-	conn->tuple = *tuple;
-	conn->zone = *zone;
-	conn->cpu = raw_smp_processor_id();
-	conn->jiffies32 = (u32)jiffies;
-	conn->dead = false;
-	spin_lock_bh(&list->list_lock);
-	if (list->dead == true) {
-		kmem_cache_free(conncount_conn_cachep, conn);
-		spin_unlock_bh(&list->list_lock);
-		return NF_CONNCOUNT_SKIP;
-	}
-	list_add_tail(&conn->node, &list->head);
-	list->count++;
-	spin_unlock_bh(&list->list_lock);
-	return NF_CONNCOUNT_ADDED;
-}
-EXPORT_SYMBOL_GPL(nf_conncount_add);
-
 static void __conn_free(struct rcu_head *h)
 {
 	struct nf_conncount_tuple *conn;
@@ -177,11 +145,10 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
 	return ERR_PTR(-EAGAIN);
 }
 
-void nf_conncount_lookup(struct net *net,
-			 struct nf_conncount_list *list,
-			 const struct nf_conntrack_tuple *tuple,
-			 const struct nf_conntrack_zone *zone,
-			 bool *addit)
+static int __nf_conncount_add(struct net *net,
+			      struct nf_conncount_list *list,
+			      const struct nf_conntrack_tuple *tuple,
+			      const struct nf_conntrack_zone *zone)
 {
 	const struct nf_conntrack_tuple_hash *found;
 	struct nf_conncount_tuple *conn, *conn_n;
@@ -189,9 +156,6 @@ void nf_conncount_lookup(struct net *net,
 	unsigned int collect = 0;
 	bool free_entry = false;
 
-	/* best effort only */
-	*addit = tuple ? true : false;
-
 	/* check the saved connections */
 	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
 		if (collect > CONNCOUNT_GC_MAX_NODES)
@@ -201,21 +165,19 @@ void nf_conncount_lookup(struct net *net,
 		if (IS_ERR(found)) {
 			/* Not found, but might be about to be confirmed */
 			if (PTR_ERR(found) == -EAGAIN) {
-				if (!tuple)
-					continue;
-
 				if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
 				    nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
 				    nf_ct_zone_id(zone, zone->dir))
-					*addit = false;
-			} else if (PTR_ERR(found) == -ENOENT)
+					return 0; /* already exists */
+			} else {
 				collect++;
+			}
 			continue;
 		}
 
 		found_ct = nf_ct_tuplehash_to_ctrack(found);
 
-		if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
+		if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
 		    nf_ct_zone_equal(found_ct, zone, zone->dir)) {
 			/*
 			 * We should not see tuples twice unless someone hooks
@@ -223,7 +185,8 @@ void nf_conncount_lookup(struct net *net,
 			 *
 			 * Attempt to avoid a re-add in this case.
 			 */
-			*addit = false;
+			nf_ct_put(found_ct);
+			return 0;
 		} else if (already_closed(found_ct)) {
 			/*
 			 * we do not care about connections which are
@@ -237,8 +200,38 @@ void nf_conncount_lookup(struct net *net,
 
 		nf_ct_put(found_ct);
 	}
+
+	if (WARN_ON_ONCE(list->count > INT_MAX))
+		return -EOVERFLOW;
+
+	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
+	if (conn == NULL)
+		return -ENOMEM;
+
+	conn->tuple = *tuple;
+	conn->zone = *zone;
+	conn->cpu = raw_smp_processor_id();
+	conn->jiffies32 = (u32)jiffies;
+	list_add_tail(&conn->node, &list->head);
+	list->count++;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(nf_conncount_lookup);
+
+int nf_conncount_add(struct net *net,
+		     struct nf_conncount_list *list,
+		     const struct nf_conntrack_tuple *tuple,
+		     const struct nf_conntrack_zone *zone)
+{
+	int ret;
+
+	/* check the saved connections */
+	spin_lock_bh(&list->list_lock);
+	ret = __nf_conncount_add(net, list, tuple, zone);
+	spin_unlock_bh(&list->list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_add);
 
 void nf_conncount_list_init(struct nf_conncount_list *list)
 {
@@ -339,13 +332,11 @@ insert_tree(struct net *net,
 	    const struct nf_conntrack_tuple *tuple,
 	    const struct nf_conntrack_zone *zone)
 {
-	enum nf_conncount_list_add ret;
 	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
 	struct rb_node **rbnode, *parent;
 	struct nf_conncount_rb *rbconn;
 	struct nf_conncount_tuple *conn;
 	unsigned int count = 0, gc_count = 0;
-	bool node_found = false;
 	bool do_gc = true;
 
 	spin_lock_bh(&nf_conncount_locks[hash]);
@@ -363,20 +354,15 @@ insert_tree(struct net *net,
 		} else if (diff > 0) {
 			rbnode = &((*rbnode)->rb_right);
 		} else {
-			/* unlikely: other cpu added node already */
-			node_found = true;
-			ret = nf_conncount_add(&rbconn->list, tuple, zone);
-			if (ret == NF_CONNCOUNT_ERR) {
+			int ret;
+
+			ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
+			if (ret)
 				count = 0; /* hotdrop */
-			} else if (ret == NF_CONNCOUNT_ADDED) {
+			else
 				count = rbconn->list.count;
-			} else {
-				/* NF_CONNCOUNT_SKIP, rbconn is already
-				 * reclaimed by gc, insert a new tree node
-				 */
-				node_found = false;
-			}
-			break;
+			tree_nodes_free(root, gc_nodes, gc_count);
+			goto out_unlock;
 		}
 
 		if (gc_count >= ARRAY_SIZE(gc_nodes))
@@ -394,9 +380,6 @@ insert_tree(struct net *net,
 		goto restart;
 	}
 
-	if (node_found)
-		goto out_unlock;
-
 	/* expected case: match, insert new node */
 	rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
 	if (rbconn == NULL)
@@ -431,7 +414,6 @@ count_tree(struct net *net,
 	   const struct nf_conntrack_tuple *tuple,
 	   const struct nf_conntrack_zone *zone)
 {
-	enum nf_conncount_list_add ret;
 	struct rb_root *root;
 	struct rb_node *parent;
 	struct nf_conncount_rb *rbconn;
@@ -444,7 +426,6 @@ count_tree(struct net *net,
 	parent = rcu_dereference_raw(root->rb_node);
 	while (parent) {
 		int diff;
-		bool addit;
 
 		rbconn = rb_entry(parent, struct nf_conncount_rb, node);
 
@@ -454,24 +435,29 @@ count_tree(struct net *net,
 		} else if (diff > 0) {
 			parent = rcu_dereference_raw(parent->rb_right);
 		} else {
-			/* same source network -> be counted! */
-			nf_conncount_lookup(net, &rbconn->list, tuple, zone,
-					    &addit);
+			int ret;
 
-			if (!addit)
+			if (!tuple) {
+				nf_conncount_gc_list(net, &rbconn->list);
 				return rbconn->list.count;
+			}
 
-			ret = nf_conncount_add(&rbconn->list, tuple, zone);
-			if (ret == NF_CONNCOUNT_ERR) {
-				return 0; /* hotdrop */
-			} else if (ret == NF_CONNCOUNT_ADDED) {
-				return rbconn->list.count;
-			} else {
-				/* NF_CONNCOUNT_SKIP, rbconn is already
-				 * reclaimed by gc, insert a new tree node
-				 */
+			spin_lock_bh(&rbconn->list.list_lock);
+			/* Node might be about to be free'd.
+			 * We need to defer to insert_tree() in this case.
+			 */
+			if (rbconn->list.count == 0) {
+				spin_unlock_bh(&rbconn->list.list_lock);
 				break;
 			}
+
+			/* same source network -> be counted! */
+			ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
+			spin_unlock_bh(&rbconn->list.list_lock);
+			if (ret)
+				return 0; /* hotdrop */
+			else
+				return rbconn->list.count;
 		}
 	}
 
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index b90d96ba4a12..af1497ab9464 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
 	enum ip_conntrack_info ctinfo;
 	const struct nf_conn *ct;
 	unsigned int count;
-	bool addit;
 
 	tuple_ptr = &tuple;
 
@@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
 		return;
 	}
 
-	nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
-			    &addit);
-	count = priv->list.count;
-
-	if (!addit)
-		goto out;
-
-	if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
+	if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
 		regs->verdict.code = NF_DROP;
 		return;
 	}
-	count++;
-out:
+
+	count = priv->list.count;
 
 	if ((count > priv->limit) ^ priv->invert) {
 		regs->verdict.code = NFT_BREAK;
-- 
2.11.0

  parent reply index

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-29 12:57 [PATCH 0/9] Netfilter fixes for net Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 1/9] netfilter: nf_tables: fix a missing check of nla_put_failure Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 2/9] netfilter: nf_conncount: replace CONNCOUNT_LOCK_SLOTS with CONNCOUNT_SLOTS Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 3/9] netfilter: nf_conncount: don't skip eviction when age is negative Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 4/9] netfilter: nf_conncount: split gc in two phases Pablo Neira Ayuso
2018-12-29 12:57 ` [PATCH 5/9] netfilter: nf_conncount: restart search when nodes have been erased Pablo Neira Ayuso
2018-12-29 12:58 ` Pablo Neira Ayuso [this message]
2018-12-29 12:58 ` [PATCH 7/9] netfilter: nf_conncount: move all list iterations under spinlock Pablo Neira Ayuso
2018-12-29 12:58 ` [PATCH 8/9] netfilter: nf_conncount: speculative garbage collection on empty lists Pablo Neira Ayuso
2018-12-29 12:58 ` [PATCH 9/9] netfilter: nf_conncount: fix argument order to find_next_bit Pablo Neira Ayuso
2018-12-29 22:33 ` [PATCH 0/9] Netfilter fixes for net David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181229125803.7415-7-pablo@netfilter.org \
    --to=pablo@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Netfilter-Devel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/netfilter-devel/0 netfilter-devel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 netfilter-devel netfilter-devel/ https://lore.kernel.org/netfilter-devel \
		netfilter-devel@vger.kernel.org
	public-inbox-index netfilter-devel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.netfilter-devel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git