All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Florian Westphal <fw@strlen.de>,
	Pablo Neira Ayuso <pablo@netfilter.org>,
	Sasha Levin <sashal@kernel.org>,
	netfilter-devel@vger.kernel.org, coreteam@netfilter.org,
	netdev@vger.kernel.org
Subject: [PATCH AUTOSEL 4.20 06/72] netfilter: nft_compat: make lists per netns
Date: Sat, 23 Feb 2019 16:03:16 -0500	[thread overview]
Message-ID: <20190223210422.199966-6-sashal@kernel.org> (raw)
In-Reply-To: <20190223210422.199966-1-sashal@kernel.org>

From: Florian Westphal <fw@strlen.de>

[ Upstream commit cf52572ebbd7189a1966c2b5fc34b97078cd1dce ]

There are two problems with nft_compat since the netlink config
plane uses a per-netns mutex:

1. Concurrent add/del accesses to the same list
2. accesses to a list element after it has been free'd already.

This patch fixes the first problem.

Freeing occurs from a work queue, after transaction mutexes have been
released, i.e., it still possible for a new transaction (even from
same net ns) to find the to-be-deleted expression in the list.

The ->destroy functions are not allowed to have any such side effects,
i.e. the list_del() in the destroy function is not allowed.

This part of the problem is solved in the next patch.
I tried to make this work by serializing list access via mutex
and by moving list_del() to a deactivate callback, but
Taehee spotted following race on this approach:

  NET #0                          NET #1
   >select_ops()
   ->init()
                                   ->select_ops()
   ->deactivate()
   ->destroy()
      nft_xt_put()
       kfree_rcu(xt, rcu_head);
                                   ->init() <-- use-after-free occurred.

Unfortunately, we can't increment reference count in
select_ops(), because we can't undo the refcount increase in
case a different expression fails in the same batch.

(The destroy hook will only be called in case the expression
 was initialized successfully).

Fixes: f102d66b335a ("netfilter: nf_tables: use dedicated mutex to guard transactions")
Reported-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 net/netfilter/nft_compat.c | 129 +++++++++++++++++++++++++------------
 1 file changed, 89 insertions(+), 40 deletions(-)

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index acc85acad31bd..abed3490a8f8a 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -22,6 +22,7 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_arp/arp_tables.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netns/generic.h>
 
 struct nft_xt {
 	struct list_head	head;
@@ -43,6 +44,20 @@ struct nft_xt_match_priv {
 	void *info;
 };
 
+struct nft_compat_net {
+	struct list_head nft_target_list;
+	struct list_head nft_match_list;
+};
+
+static unsigned int nft_compat_net_id __read_mostly;
+static struct nft_expr_type nft_match_type;
+static struct nft_expr_type nft_target_type;
+
+static struct nft_compat_net *nft_compat_pernet(struct net *net)
+{
+	return net_generic(net, nft_compat_net_id);
+}
+
 static bool nft_xt_put(struct nft_xt *xt)
 {
 	if (refcount_dec_and_test(&xt->refcnt)) {
@@ -734,10 +749,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
 	.cb		= nfnl_nft_compat_cb,
 };
 
-static LIST_HEAD(nft_match_list);
-
-static struct nft_expr_type nft_match_type;
-
 static bool nft_match_cmp(const struct xt_match *match,
 			  const char *name, u32 rev, u32 family)
 {
@@ -749,6 +760,7 @@ static const struct nft_expr_ops *
 nft_match_select_ops(const struct nft_ctx *ctx,
 		     const struct nlattr * const tb[])
 {
+	struct nft_compat_net *cn;
 	struct nft_xt *nft_match;
 	struct xt_match *match;
 	unsigned int matchsize;
@@ -765,8 +777,10 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 	rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
 	family = ctx->family;
 
+	cn = nft_compat_pernet(ctx->net);
+
 	/* Re-use the existing match if it's already loaded. */
-	list_for_each_entry(nft_match, &nft_match_list, head) {
+	list_for_each_entry(nft_match, &cn->nft_match_list, head) {
 		struct xt_match *match = nft_match->ops.data;
 
 		if (nft_match_cmp(match, mt_name, rev, family))
@@ -810,7 +824,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 
 	nft_match->ops.size = matchsize;
 
-	list_add(&nft_match->head, &nft_match_list);
+	list_add(&nft_match->head, &cn->nft_match_list);
 
 	return &nft_match->ops;
 err:
@@ -826,10 +840,6 @@ static struct nft_expr_type nft_match_type __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
-static LIST_HEAD(nft_target_list);
-
-static struct nft_expr_type nft_target_type;
-
 static bool nft_target_cmp(const struct xt_target *tg,
 			   const char *name, u32 rev, u32 family)
 {
@@ -841,6 +851,7 @@ static const struct nft_expr_ops *
 nft_target_select_ops(const struct nft_ctx *ctx,
 		      const struct nlattr * const tb[])
 {
+	struct nft_compat_net *cn;
 	struct nft_xt *nft_target;
 	struct xt_target *target;
 	char *tg_name;
@@ -861,8 +872,9 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	    strcmp(tg_name, "standard") == 0)
 		return ERR_PTR(-EINVAL);
 
+	cn = nft_compat_pernet(ctx->net);
 	/* Re-use the existing target if it's already loaded. */
-	list_for_each_entry(nft_target, &nft_target_list, head) {
+	list_for_each_entry(nft_target, &cn->nft_target_list, head) {
 		struct xt_target *target = nft_target->ops.data;
 
 		if (!target->target)
@@ -907,7 +919,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	else
 		nft_target->ops.eval = nft_target_eval_xt;
 
-	list_add(&nft_target->head, &nft_target_list);
+	list_add(&nft_target->head, &cn->nft_target_list);
 
 	return &nft_target->ops;
 err:
@@ -923,13 +935,74 @@ static struct nft_expr_type nft_target_type __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
+static int __net_init nft_compat_init_net(struct net *net)
+{
+	struct nft_compat_net *cn = nft_compat_pernet(net);
+
+	INIT_LIST_HEAD(&cn->nft_target_list);
+	INIT_LIST_HEAD(&cn->nft_match_list);
+
+	return 0;
+}
+
+static void __net_exit nft_compat_exit_net(struct net *net)
+{
+	struct nft_compat_net *cn = nft_compat_pernet(net);
+	struct nft_xt *xt, *next;
+
+	if (list_empty(&cn->nft_match_list) &&
+	    list_empty(&cn->nft_target_list))
+		return;
+
+	/* If there was an error that caused nft_xt expr to not be initialized
+	 * fully and noone else requested the same expression later, the lists
+	 * contain 0-refcount entries that still hold module reference.
+	 *
+	 * Clean them here.
+	 */
+	mutex_lock(&net->nft.commit_mutex);
+	list_for_each_entry_safe(xt, next, &cn->nft_target_list, head) {
+		struct xt_target *target = xt->ops.data;
+
+		list_del_init(&xt->head);
+
+		if (refcount_read(&xt->refcnt))
+			continue;
+		module_put(target->me);
+		kfree(xt);
+	}
+
+	list_for_each_entry_safe(xt, next, &cn->nft_match_list, head) {
+		struct xt_match *match = xt->ops.data;
+
+		list_del_init(&xt->head);
+
+		if (refcount_read(&xt->refcnt))
+			continue;
+		module_put(match->me);
+		kfree(xt);
+	}
+	mutex_unlock(&net->nft.commit_mutex);
+}
+
+static struct pernet_operations nft_compat_net_ops = {
+	.init	= nft_compat_init_net,
+	.exit	= nft_compat_exit_net,
+	.id	= &nft_compat_net_id,
+	.size	= sizeof(struct nft_compat_net),
+};
+
 static int __init nft_compat_module_init(void)
 {
 	int ret;
 
+	ret = register_pernet_subsys(&nft_compat_net_ops);
+	if (ret < 0)
+		goto err_target;
+
 	ret = nft_register_expr(&nft_match_type);
 	if (ret < 0)
-		return ret;
+		goto err_pernet;
 
 	ret = nft_register_expr(&nft_target_type);
 	if (ret < 0)
@@ -942,45 +1015,21 @@ static int __init nft_compat_module_init(void)
 	}
 
 	return ret;
-
 err_target:
 	nft_unregister_expr(&nft_target_type);
 err_match:
 	nft_unregister_expr(&nft_match_type);
+err_pernet:
+	unregister_pernet_subsys(&nft_compat_net_ops);
 	return ret;
 }
 
 static void __exit nft_compat_module_exit(void)
 {
-	struct nft_xt *xt, *next;
-
-	/* list should be empty here, it can be non-empty only in case there
-	 * was an error that caused nft_xt expr to not be initialized fully
-	 * and noone else requested the same expression later.
-	 *
-	 * In this case, the lists contain 0-refcount entries that still
-	 * hold module reference.
-	 */
-	list_for_each_entry_safe(xt, next, &nft_target_list, head) {
-		struct xt_target *target = xt->ops.data;
-
-		if (WARN_ON_ONCE(refcount_read(&xt->refcnt)))
-			continue;
-		module_put(target->me);
-		kfree(xt);
-	}
-
-	list_for_each_entry_safe(xt, next, &nft_match_list, head) {
-		struct xt_match *match = xt->ops.data;
-
-		if (WARN_ON_ONCE(refcount_read(&xt->refcnt)))
-			continue;
-		module_put(match->me);
-		kfree(xt);
-	}
 	nfnetlink_subsys_unregister(&nfnl_compat_subsys);
 	nft_unregister_expr(&nft_target_type);
 	nft_unregister_expr(&nft_match_type);
+	unregister_pernet_subsys(&nft_compat_net_ops);
 }
 
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
-- 
2.19.1


  parent reply	other threads:[~2019-02-23 21:04 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-23 21:03 [PATCH AUTOSEL 4.20 01/72] vti4: Fix a ipip packet processing bug in 'IPCOMP' virtual tunnel Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 02/72] xfrm: refine validation of template and selector families Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 03/72] xfrm: Make set-mark default behavior backward compatible Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 04/72] perf ordered_events: Fix crash in ordered_events__free Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 05/72] netfilter: nft_compat: use refcnt_t type for nft_xt reference count Sasha Levin
2019-02-23 21:03 ` Sasha Levin [this message]
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 07/72] netfilter: nft_compat: destroy function must not have side effects Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 08/72] perf script: Fix crash with printing mixed trace point and other events Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 09/72] perf core: Fix perf_proc_update_handler() bug Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 10/72] perf python: Remove -fstack-clash-protection when building with some clang versions Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 11/72] perf tools: Handle TOPOLOGY headers with no CPU Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 12/72] perf script: Fix crash when processing recorded stat data Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 13/72] IB/{hfi1, qib}: Fix WC.byte_len calculation for UD_SEND_WITH_IMM Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 14/72] iommu/amd: Call free_iova_fast with pfn in map_sg Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 15/72] iommu/amd: Unmap all mapped pages in error path of map_sg Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 16/72] riscv: fixup max_low_pfn with PFN_DOWN Sasha Levin
2019-02-23 21:03   ` Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 17/72] ipvs: Fix signed integer overflow when setsockopt timeout Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 18/72] iommu/amd: Fix IOMMU page flush when detach device from a domain Sasha Levin
2019-02-23 21:03   ` Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 19/72] clk: ti: Fix error handling in ti_clk_parse_divider_data() Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 20/72] clk: qcom: gcc: Use active only source for CPUSS clocks Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 21/72] xtensa: SMP: fix ccount_timer_shutdown Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 22/72] RDMA/umem: Add missing initialization of owning_mm Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 23/72] riscv: Adjust mmap base address at a third of task size Sasha Levin
2019-02-23 21:03   ` Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 24/72] IB/ipoib: Fix for use-after-free in ipoib_cm_tx_start Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 25/72] selftests: cpu-hotplug: fix case where CPUs offline > CPUs present Sasha Levin
2019-02-23 21:03   ` Sasha Levin
2019-02-23 21:03   ` sashal
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 26/72] xtensa: SMP: fix secondary CPU initialization Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 27/72] xtensa: smp_lx200_defconfig: fix vectors clash Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 28/72] xtensa: SMP: mark each possible CPU as present Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 29/72] iomap: get/put the page in iomap_page_create/release() Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 30/72] iomap: fix a use after free in iomap_dio_rw Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 31/72] xtensa: SMP: limit number of possible CPUs by NR_CPUS Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 32/72] net: altera_tse: fix msgdma_tx_completion on non-zero fill_level case Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 33/72] net: hns: Fix for missing of_node_put() after of_parse_phandle() Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 34/72] net: hns: Restart autoneg need return failed when autoneg off Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 35/72] net: hns: Fix wrong read accesses via Clause 45 MDIO protocol Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 36/72] net: stmmac: dwmac-rk: fix error handling in rk_gmac_powerup() Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 37/72] netfilter: ebtables: compat: un-break 32bit setsockopt when no rules are present Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 38/72] netfilter: nfnetlink_osf: add missing fmatch check Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 39/72] gpio: vf610: Mask all GPIO interrupts Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 40/72] selftests: net: use LDLIBS instead of LDFLAGS Sasha Levin
2019-02-23 21:03   ` Sasha Levin
2019-02-23 21:03   ` sashal
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 41/72] selftests: timers: " Sasha Levin
2019-02-23 21:03   ` Sasha Levin
2019-02-23 21:03   ` sashal
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 42/72] nfs: Fix NULL pointer dereference of dev_name Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 43/72] qed: Fix bug in tx promiscuous mode settings Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 44/72] qed: Fix LACP pdu drops for VFs Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 45/72] qed: Fix VF probe failure while FLR Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 46/72] qed: Fix system crash in ll2 xmit Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 47/72] qed: Fix stack out of bounds bug Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 48/72] scsi: libfc: free skb when receiving invalid flogi resp Sasha Levin
2019-02-23 21:03 ` [PATCH AUTOSEL 4.20 49/72] scsi: scsi_debug: fix write_same with virtual_gb problem Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 50/72] scsi: bnx2fc: Fix error handling in probe() Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 51/72] scsi: 53c700: pass correct "dev" to dma_alloc_attrs() Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 52/72] platform/x86: Fix unmet dependency warning for ACPI_CMPC Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 53/72] platform/x86: Fix unmet dependency warning for SAMSUNG_Q10 Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 54/72] x86/cpu: Add Atom Tremont (Jacobsville) Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 55/72] net: macb: Apply RXUBR workaround only to versions with errata Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 56/72] x86/boot/compressed/64: Set EFER.LME=1 in 32-bit trampoline before returning to long mode Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 57/72] cifs: fix computation for MAX_SMB2_HDR_SIZE Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 58/72] blk-mq: fix a hung issue when fsync Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 59/72] x86/microcode/amd: Don't falsely trick the late loading mechanism Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 60/72] apparmor: Fix warning about unused function apparmor_ipv6_postroute Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 61/72] arm64: kprobe: Always blacklist the KVM world-switch code Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 62/72] apparmor: Fix aa_label_build() error handling for failed merges Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 63/72] x86/kexec: Don't setup EFI info if EFI runtime is not enabled Sasha Levin
2019-02-23 21:04   ` Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 64/72] proc: fix /proc/net/* after setns(2) Sasha Levin
2019-02-23 21:04   ` Sasha Levin
2019-02-23 21:04   ` sashal
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 65/72] x86_64: increase stack size for KASAN_EXTRA Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 66/72] mm, memory_hotplug: is_mem_section_removable do not pass the end of a zone Sasha Levin
2019-02-26 12:46   ` Mike Rapoport
2019-03-11 15:21     ` Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 67/72] mm, memory_hotplug: test_pages_in_a_zone do not pass the end of zone Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 68/72] psi: fix aggregation idle shut-off Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 69/72] lib/test_kmod.c: potential double free in error handling Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 70/72] fs/drop_caches.c: avoid softlockups in drop_pagecache_sb() Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 71/72] autofs: drop dentry reference only when it is never used Sasha Levin
2019-02-23 21:04 ` [PATCH AUTOSEL 4.20 72/72] autofs: fix error return in autofs_fill_super() Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190223210422.199966-6-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=coreteam@netfilter.org \
    --cc=fw@strlen.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=pablo@netfilter.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.