netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net
Subject: [PATCH 05/25] netfilter: conntrack: simplify early_drop
Date: Sat, 23 Jul 2016 13:02:05 +0200	[thread overview]
Message-ID: <1469271745-14523-6-git-send-email-pablo@netfilter.org> (raw)
In-Reply-To: <1469271745-14523-1-git-send-email-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

We don't need to acquire the bucket lock during early drop, we can
use lockless traveral just like ____nf_conntrack_find.

The timer deletion serves as synchronization point, if another cpu
attempts to evict same entry, only one will succeed with timer deletion.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h |  1 +
 net/netfilter/nf_conntrack_core.c    | 95 ++++++++++++++++++------------------
 2 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 5d3397f..2a5133e 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -301,6 +301,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
+#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
 
 #define MODULE_ALIAS_NFCT_HELPER(helper) \
         MODULE_ALIAS("nfct-helper-" helper)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1289e7e..e0e9c9a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -834,67 +834,66 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
 
 /* There's a small race here where we may free a just-assured
    connection.  Too bad: we're in trouble anyway. */
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static unsigned int early_drop_list(struct net *net,
+				    struct hlist_nulls_head *head)
 {
-	/* Use oldest entry, which is roughly LRU */
 	struct nf_conntrack_tuple_hash *h;
-	struct nf_conn *tmp;
 	struct hlist_nulls_node *n;
-	unsigned int i, hash, sequence;
-	struct nf_conn *ct = NULL;
-	spinlock_t *lockp;
-	bool ret = false;
+	unsigned int drops = 0;
+	struct nf_conn *tmp;
 
-	i = 0;
+	hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
+		tmp = nf_ct_tuplehash_to_ctrack(h);
 
-	local_bh_disable();
-restart:
-	sequence = read_seqcount_begin(&nf_conntrack_generation);
-	for (; i < NF_CT_EVICTION_RANGE; i++) {
-		hash = scale_hash(_hash++);
-		lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
-		nf_conntrack_lock(lockp);
-		if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
-			spin_unlock(lockp);
-			goto restart;
-		}
-		hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
-					       hnnode) {
-			tmp = nf_ct_tuplehash_to_ctrack(h);
-
-			if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
-			    !net_eq(nf_ct_net(tmp), net) ||
-			    nf_ct_is_dying(tmp))
-				continue;
-
-			if (atomic_inc_not_zero(&tmp->ct_general.use)) {
-				ct = tmp;
-				break;
-			}
-		}
+		if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
+		    !net_eq(nf_ct_net(tmp), net) ||
+		    nf_ct_is_dying(tmp))
+			continue;
 
-		spin_unlock(lockp);
-		if (ct)
-			break;
+		if (!atomic_inc_not_zero(&tmp->ct_general.use))
+			continue;
+
+		/* kill only if still in same netns -- might have moved due to
+		 * SLAB_DESTROY_BY_RCU rules.
+		 *
+		 * We steal the timer reference.  If that fails timer has
+		 * already fired or someone else deleted it. Just drop ref
+		 * and move to next entry.
+		 */
+		if (net_eq(nf_ct_net(tmp), net) &&
+		    nf_ct_is_confirmed(tmp) &&
+		    del_timer(&tmp->timeout) &&
+		    nf_ct_delete(tmp, 0, 0))
+			drops++;
+
+		nf_ct_put(tmp);
 	}
 
-	local_bh_enable();
+	return drops;
+}
 
-	if (!ct)
-		return false;
+static noinline int early_drop(struct net *net, unsigned int _hash)
+{
+	unsigned int i;
 
-	/* kill only if in same netns -- might have moved due to
-	 * SLAB_DESTROY_BY_RCU rules
-	 */
-	if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
-		if (nf_ct_delete(ct, 0, 0)) {
-			NF_CT_STAT_INC_ATOMIC(net, early_drop);
-			ret = true;
+	for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
+		struct hlist_nulls_head *ct_hash;
+		unsigned hash, sequence, drops;
+
+		do {
+			sequence = read_seqcount_begin(&nf_conntrack_generation);
+			hash = scale_hash(_hash++);
+			ct_hash = nf_conntrack_hash;
+		} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+		drops = early_drop_list(net, &ct_hash[hash]);
+		if (drops) {
+			NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
+			return true;
 		}
 	}
 
-	nf_ct_put(ct);
-	return ret;
+	return false;
 }
 
 static struct nf_conn *
-- 
2.1.4


  parent reply	other threads:[~2016-07-23 11:03 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-23 11:02 [PATCH 00/25] Netfilter/IPVS updates for net-next Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 01/25] ipvs: count pre-established TCP states as active Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 02/25] netfilter: conntrack: fix race between nf_conntrack proc read and hash resize Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 03/25] netfilter: cttimeout: unlink timeout obj again when hash resize happen Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 04/25] netfilter: nf_ct_helper: unlink helper " Pablo Neira Ayuso
2016-07-23 11:02 ` Pablo Neira Ayuso [this message]
2016-07-23 11:02 ` [PATCH 06/25] netfilter: move nat hlist_head to nf_conn Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 07/25] netfilter: nat: convert nat bysrc hash to rhashtable Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 08/25] netfilter: physdev: physdev-is-out should not work with OUTPUT chain Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 09/25] netfilter: nft_ct: make byte/packet expr more friendly Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 10/25] netfilter: constify arg to is_dying/confirmed Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 11/25] netfilter: nf_tables: get rid of possible_net_t from set and basechain Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 12/25] netfilter: nf_conntrack_h323: fix off-by-one in DecodeQ931 Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 13/25] netfilter: conntrack: protect early_drop by rcu read lock Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 14/25] netfilter: x_tables: speed up jump target validation Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 15/25] netfilter: nft_ct: fix unpaired nf_connlabels_get/put call Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 16/25] netfilter: Add helper array register/unregister functions Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 17/25] netfilter: nft_log: fix possible memory leak if log expr init fail Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 18/25] netfilter: nft_log: check the validity of log level Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 19/25] netfilter: nft_log: fix snaplen does not truncate packets Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 20/25] netfilter: nf_tables: allow to filter out rules by table and chain Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 21/25] netfilter: conntrack: support a fixed size of 128 distinct labels Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 22/25] netfilter: connlabels: move set helper to xt_connlabel Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 23/25] netfilter: h323: Use mod_timer instead of set_expect_timeout Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 24/25] netfilter: nft_compat: put back match/target module if init fail Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 25/25] netfilter: nft_compat: fix crash when related match/target module is removed Pablo Neira Ayuso
2016-07-25  5:03 ` [PATCH 00/25] Netfilter/IPVS updates for net-next David Miller
2016-07-23 11:08 Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 05/25] netfilter: conntrack: simplify early_drop Pablo Neira Ayuso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1469271745-14523-6-git-send-email-pablo@netfilter.org \
    --to=pablo@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=netfilter-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).