netfilter-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Pablo Neira Ayuso <pablo@netfilter.org>
To: netfilter-devel@vger.kernel.org
Cc: davem@davemloft.net, netdev@vger.kernel.org
Subject: [PATCH 02/25] netfilter: conntrack: fix race between nf_conntrack proc read and hash resize
Date: Sat, 23 Jul 2016 13:08:16 +0200	[thread overview]
Message-ID: <1469272119-29942-3-git-send-email-pablo@netfilter.org> (raw)
In-Reply-To: <1469272119-29942-1-git-send-email-pablo@netfilter.org>

From: Liping Zhang <liping.zhang@spreadtrum.com>

When we do "cat /proc/net/nf_conntrack", and meanwhile resize the conntrack
hash table via /sys/module/nf_conntrack/parameters/hashsize, race will
happen, because reader can observe a newly allocated hash but the old size
(or vice versa). So oops will happen like follows:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000017
  IP: [<ffffffffa0418e21>] seq_print_acct+0x11/0x50 [nf_conntrack]
  Call Trace:
  [<ffffffffa0412f4e>] ? ct_seq_show+0x14e/0x340 [nf_conntrack]
  [<ffffffff81261a1c>] seq_read+0x2cc/0x390
  [<ffffffff812a8d62>] proc_reg_read+0x42/0x70
  [<ffffffff8123bee7>] __vfs_read+0x37/0x130
  [<ffffffff81347980>] ? security_file_permission+0xa0/0xc0
  [<ffffffff8123cf75>] vfs_read+0x95/0x140
  [<ffffffff8123e475>] SyS_read+0x55/0xc0
  [<ffffffff817c2572>] entry_SYSCALL_64_fastpath+0x1a/0xa4

It is very easy to reproduce this kernel crash.
1. open one shell and input the following cmds:
  while : ; do
    echo $RANDOM > /sys/module/nf_conntrack/parameters/hashsize
  done
2. open more shells and input the following cmds:
  while : ; do
    cat /proc/net/nf_conntrack
  done
3. just wait a monent, oops will happen soon.

The solution in this patch is based on Florian's Commit 5e3c61f98175
("netfilter: conntrack: fix lookup race during hash resize"). And
add a wrapper function nf_conntrack_get_ht to get hash and hsize
suggested by Florian Westphal.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h             |  2 ++
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 14 ++++++++++----
 net/netfilter/nf_conntrack_core.c                     | 17 +++++++++++++++++
 net/netfilter/nf_conntrack_standalone.c               | 14 +++++++++-----
 4 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3e2f332..79d7ac5 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -51,6 +51,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 			const struct nf_conntrack_l3proto *l3proto,
 			const struct nf_conntrack_l4proto *l4proto);
 
+void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
+
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
 nf_conntrack_find_get(struct net *net,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index c6f3c40..6392371 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -26,6 +26,8 @@
 
 struct ct_iter_state {
 	struct seq_net_private p;
+	struct hlist_nulls_head *hash;
+	unsigned int htable_size;
 	unsigned int bucket;
 };
 
@@ -35,10 +37,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < st->htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(
-			hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
+			hlist_nulls_first_rcu(&st->hash[st->bucket]));
 		if (!is_a_nulls(n))
 			return n;
 	}
@@ -53,11 +55,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(hlist_nulls_next_rcu(head));
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= st->htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(
-			hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
+			hlist_nulls_first_rcu(&st->hash[st->bucket]));
 	}
 	return head;
 }
@@ -75,7 +77,11 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
+	struct ct_iter_state *st = seq->private;
+
 	rcu_read_lock();
+
+	nf_conntrack_get_ht(&st->hash, &st->htable_size);
 	return ct_get_idx(seq, *pos);
 }
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 153e33f..1289e7e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -460,6 +460,23 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
 	       net_eq(net, nf_ct_net(ct));
 }
 
+/* must be called with rcu read lock held */
+void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+{
+	struct hlist_nulls_head *hptr;
+	unsigned int sequence, hsz;
+
+	do {
+		sequence = read_seqcount_begin(&nf_conntrack_generation);
+		hsz = nf_conntrack_htable_size;
+		hptr = nf_conntrack_hash;
+	} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+	*hash = hptr;
+	*hsize = hsz;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
+
 /*
  * Warning :
  * - Caller must take a reference on returned object
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 2aaa188..958a145 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(print_tuple);
 
 struct ct_iter_state {
 	struct seq_net_private p;
+	struct hlist_nulls_head *hash;
+	unsigned int htable_size;
 	unsigned int bucket;
 	u_int64_t time_now;
 };
@@ -58,9 +60,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < st->htable_size;
 	     st->bucket++) {
-		n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
+		n = rcu_dereference(
+			hlist_nulls_first_rcu(&st->hash[st->bucket]));
 		if (!is_a_nulls(n))
 			return n;
 	}
@@ -75,12 +78,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(hlist_nulls_next_rcu(head));
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= st->htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(
-				hlist_nulls_first_rcu(
-					&nf_conntrack_hash[st->bucket]));
+			hlist_nulls_first_rcu(&st->hash[st->bucket]));
 	}
 	return head;
 }
@@ -102,6 +104,8 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
 
 	st->time_now = ktime_get_real_ns();
 	rcu_read_lock();
+
+	nf_conntrack_get_ht(&st->hash, &st->htable_size);
 	return ct_get_idx(seq, *pos);
 }
 
-- 
2.1.4

  parent reply	other threads:[~2016-07-23 11:08 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-23 11:08 [PATCH 00/25] Netfilter/IPVS updates for net-next Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 01/25] ipvs: count pre-established TCP states as active Pablo Neira Ayuso
2016-07-23 11:08 ` Pablo Neira Ayuso [this message]
2016-07-23 11:08 ` [PATCH 03/25] netfilter: cttimeout: unlink timeout obj again when hash resize happen Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 04/25] netfilter: nf_ct_helper: unlink helper " Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 05/25] netfilter: conntrack: simplify early_drop Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 06/25] netfilter: move nat hlist_head to nf_conn Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 07/25] netfilter: nat: convert nat bysrc hash to rhashtable Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 08/25] netfilter: physdev: physdev-is-out should not work with OUTPUT chain Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 09/25] netfilter: nft_ct: make byte/packet expr more friendly Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 10/25] netfilter: constify arg to is_dying/confirmed Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 11/25] netfilter: nf_tables: get rid of possible_net_t from set and basechain Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 12/25] netfilter: nf_conntrack_h323: fix off-by-one in DecodeQ931 Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 13/25] netfilter: conntrack: protect early_drop by rcu read lock Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 14/25] netfilter: x_tables: speed up jump target validation Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 15/25] netfilter: nft_ct: fix unpaired nf_connlabels_get/put call Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 16/25] netfilter: Add helper array register/unregister functions Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 17/25] netfilter: nft_log: fix possible memory leak if log expr init fail Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 18/25] netfilter: nft_log: check the validity of log level Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 19/25] netfilter: nft_log: fix snaplen does not truncate packets Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 20/25] netfilter: nf_tables: allow to filter out rules by table and chain Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 21/25] netfilter: conntrack: support a fixed size of 128 distinct labels Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 22/25] netfilter: connlabels: move set helper to xt_connlabel Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 23/25] netfilter: h323: Use mod_timer instead of set_expect_timeout Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 24/25] netfilter: nft_compat: put back match/target module if init fail Pablo Neira Ayuso
2016-07-23 11:08 ` [PATCH 25/25] netfilter: nft_compat: fix crash when related match/target module is removed Pablo Neira Ayuso
  -- strict thread matches above, loose matches on Subject: below --
2016-07-23 11:02 [PATCH 00/25] Netfilter/IPVS updates for net-next Pablo Neira Ayuso
2016-07-23 11:02 ` [PATCH 02/25] netfilter: conntrack: fix race between nf_conntrack proc read and hash resize Pablo Neira Ayuso

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1469272119-29942-3-git-send-email-pablo@netfilter.org \
    --to=pablo@netfilter.org \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).