From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Shawn Bohrer <sbohrer@cloudflare.com>,
Florian Westphal <fw@strlen.de>,
Pablo Neira Ayuso <pablo@netfilter.org>
Subject: [PATCH 4.19 12/99] netfilter: nf_conncount: speculative garbage collection on empty lists
Date: Mon, 21 Jan 2019 14:48:04 +0100 [thread overview]
Message-ID: <20190121134914.386905593@linuxfoundation.org> (raw)
In-Reply-To: <20190121134913.924726465@linuxfoundation.org>
4.19-stable review patch. If anyone has any objections, please let me know.
------------------
From: Pablo Neira Ayuso <pablo@netfilter.org>
commit c80f10bc973af2ace6b1414724eeff61eaa71837 upstream.
Instead of removing a empty list node that might be reintroduced soon
thereafter, tentatively place the empty list node on the list passed to
tree_nodes_free(), then re-check if the list is empty again before erasing
it from the tree.
[ Florian: rebase on top of pending nf_conncount fixes ]
Fixes: 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
Reviewed-by: Shawn Bohrer <sbohrer@cloudflare.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/net/netfilter/nf_conntrack_count.h | 1
net/netfilter/nf_conncount.c | 47 +++++++++--------------------
2 files changed, 15 insertions(+), 33 deletions(-)
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -9,7 +9,6 @@ struct nf_conncount_list {
spinlock_t list_lock;
struct list_head head; /* connections with the same filtering key */
unsigned int count; /* length of list */
- bool dead;
};
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -81,27 +81,20 @@ static int key_diff(const u32 *a, const
return memcmp(a, b, klen * sizeof(u32));
}
-static bool conn_free(struct nf_conncount_list *list,
+static void conn_free(struct nf_conncount_list *list,
struct nf_conncount_tuple *conn)
{
- bool free_entry = false;
-
lockdep_assert_held(&list->list_lock);
list->count--;
list_del(&conn->node);
- if (list->count == 0) {
- list->dead = true;
- free_entry = true;
- }
kmem_cache_free(conncount_conn_cachep, conn);
- return free_entry;
}
static const struct nf_conntrack_tuple_hash *
find_or_evict(struct net *net, struct nf_conncount_list *list,
- struct nf_conncount_tuple *conn, bool *free_entry)
+ struct nf_conncount_tuple *conn)
{
const struct nf_conntrack_tuple_hash *found;
unsigned long a, b;
@@ -121,7 +114,7 @@ find_or_evict(struct net *net, struct nf
*/
age = a - b;
if (conn->cpu == cpu || age >= 2) {
- *free_entry = conn_free(list, conn);
+ conn_free(list, conn);
return ERR_PTR(-ENOENT);
}
@@ -137,14 +130,13 @@ static int __nf_conncount_add(struct net
struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
unsigned int collect = 0;
- bool free_entry = false;
/* check the saved connections */
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
if (collect > CONNCOUNT_GC_MAX_NODES)
break;
- found = find_or_evict(net, list, conn, &free_entry);
+ found = find_or_evict(net, list, conn);
if (IS_ERR(found)) {
/* Not found, but might be about to be confirmed */
if (PTR_ERR(found) == -EAGAIN) {
@@ -221,7 +213,6 @@ void nf_conncount_list_init(struct nf_co
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 0;
- list->dead = false;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
@@ -233,7 +224,6 @@ bool nf_conncount_gc_list(struct net *ne
struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct;
unsigned int collected = 0;
- bool free_entry = false;
bool ret = false;
/* don't bother if other cpu is already doing GC */
@@ -241,15 +231,10 @@ bool nf_conncount_gc_list(struct net *ne
return false;
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
- found = find_or_evict(net, list, conn, &free_entry);
+ found = find_or_evict(net, list, conn);
if (IS_ERR(found)) {
- if (PTR_ERR(found) == -ENOENT) {
- if (free_entry) {
- spin_unlock(&list->list_lock);
- return true;
- }
+ if (PTR_ERR(found) == -ENOENT)
collected++;
- }
continue;
}
@@ -260,10 +245,7 @@ bool nf_conncount_gc_list(struct net *ne
* closed already -> ditch it
*/
nf_ct_put(found_ct);
- if (conn_free(list, conn)) {
- spin_unlock(&list->list_lock);
- return true;
- }
+ conn_free(list, conn);
collected++;
continue;
}
@@ -273,10 +255,8 @@ bool nf_conncount_gc_list(struct net *ne
break;
}
- if (!list->count) {
- list->dead = true;
+ if (!list->count)
ret = true;
- }
spin_unlock(&list->list_lock);
return ret;
@@ -291,6 +271,7 @@ static void __tree_nodes_free(struct rcu
kmem_cache_free(conncount_rb_cachep, rbconn);
}
+/* caller must hold tree nf_conncount_locks[] lock */
static void tree_nodes_free(struct rb_root *root,
struct nf_conncount_rb *gc_nodes[],
unsigned int gc_count)
@@ -300,8 +281,10 @@ static void tree_nodes_free(struct rb_ro
while (gc_count) {
rbconn = gc_nodes[--gc_count];
spin_lock(&rbconn->list.list_lock);
- rb_erase(&rbconn->node, root);
- call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+ if (!rbconn->list.count) {
+ rb_erase(&rbconn->node, root);
+ call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+ }
spin_unlock(&rbconn->list.list_lock);
}
}
@@ -318,7 +301,6 @@ insert_tree(struct net *net,
struct rb_root *root,
unsigned int hash,
const u32 *key,
- u8 keylen,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{
@@ -327,6 +309,7 @@ insert_tree(struct net *net,
struct nf_conncount_rb *rbconn;
struct nf_conncount_tuple *conn;
unsigned int count = 0, gc_count = 0;
+ u8 keylen = data->keylen;
bool do_gc = true;
spin_lock_bh(&nf_conncount_locks[hash]);
@@ -454,7 +437,7 @@ count_tree(struct net *net,
if (!tuple)
return 0;
- return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
+ return insert_tree(net, data, root, hash, key, tuple, zone);
}
static void tree_gc_worker(struct work_struct *work)
next prev parent reply other threads:[~2019-01-21 14:08 UTC|newest]
Thread overview: 109+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-01-21 13:47 [PATCH 4.19 00/99] 4.19.17-stable review Greg Kroah-Hartman
2019-01-21 13:47 ` [PATCH 4.19 01/99] tty/ldsem: Wake up readers after timed out down_write() Greg Kroah-Hartman
2019-01-21 13:47 ` [PATCH 4.19 02/99] tty: Hold tty_ldisc_lock() during tty_reopen() Greg Kroah-Hartman
2019-01-21 13:47 ` [PATCH 4.19 03/99] tty: Simplify tty->count math in tty_reopen() Greg Kroah-Hartman
2019-01-21 13:47 ` [PATCH 4.19 04/99] tty: Dont hold ldisc lock in tty_reopen() if ldisc present Greg Kroah-Hartman
2019-01-21 13:47 ` [PATCH 4.19 05/99] can: gw: ensure DLC boundaries after CAN frame modification Greg Kroah-Hartman
2019-01-21 13:47 ` [PATCH 4.19 06/99] netfilter: nf_conncount: replace CONNCOUNT_LOCK_SLOTS with CONNCOUNT_SLOTS Greg Kroah-Hartman
2019-01-21 13:47 ` [PATCH 4.19 07/99] netfilter: nf_conncount: dont skip eviction when age is negative Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 08/99] netfilter: nf_conncount: split gc in two phases Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 09/99] netfilter: nf_conncount: restart search when nodes have been erased Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 10/99] netfilter: nf_conncount: merge lookup and add functions Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 11/99] netfilter: nf_conncount: move all list iterations under spinlock Greg Kroah-Hartman
2019-01-21 13:48 ` Greg Kroah-Hartman [this message]
2019-01-21 13:48 ` [PATCH 4.19 13/99] netfilter: nf_conncount: fix argument order to find_next_bit Greg Kroah-Hartman
2019-04-22 14:41 ` Andreas Hartmann
2019-04-22 17:27 ` Florian Westphal
2019-04-22 18:49 ` Andreas Hartmann
2019-04-22 18:57 ` Florian Westphal
2019-04-22 19:26 ` Andreas Hartmann
2019-04-22 19:40 ` Florian Westphal
2019-01-21 13:48 ` [PATCH 4.19 14/99] mmc: sdhci-msm: Disable CDR function on TX Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 15/99] Revert "scsi: target: iscsi: cxgbit: fix csk leak" Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 16/99] scsi: target: iscsi: cxgbit: fix csk leak Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 17/99] scsi: target: iscsi: cxgbit: fix csk leak - 2 Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 18/99] arm64/kvm: consistently handle host HCR_EL2 flags Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 19/99] arm64: Dont trap host pointer auth use to EL2 Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 20/99] ipv6: fix kernel-infoleak in ipv6_local_error() Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 21/99] net: bridge: fix a bug on using a neighbour cache entry without checking its state Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 22/99] packet: Do not leak dev refcounts on error exit Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 23/99] tcp: change txhash on SYN-data timeout Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 24/99] tun: publish tfile after its fully initialized Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 25/99] lan743x: Remove phy_read from link status change function Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 26/99] smc: move unhash as early as possible in smc_release() Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 27/99] r8169: dont try to read counters if chip is in a PCI power-save state Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 28/99] bonding: update nest level on unlink Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 29/99] ip: on queued skb use skb_header_pointer instead of pskb_may_pull Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 30/99] r8169: load Realtek PHY driver module before r8169 Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 31/99] crypto: sm3 - fix undefined shift by >= width of value Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 32/99] crypto: caam - fix zero-length buffer DMA mapping Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 33/99] crypto: authencesn - Avoid twice completion call in decrypt path Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 34/99] crypto: ccree - convert to use crypto_authenc_extractkeys() Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 35/99] crypto: bcm " Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 36/99] crypto: authenc - fix parsing key with misaligned rta_len Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 37/99] crypto: talitos - reorder code in talitos_edesc_alloc() Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 38/99] crypto: talitos - fix ablkcipher for CONFIG_VMAP_STACK Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 39/99] xen: Fix x86 sched_clock() interface for xen Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 40/99] Revert "btrfs: balance dirty metadata pages in btrfs_finish_ordered_io" Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 41/99] btrfs: wait on ordered extents on abort cleanup Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 42/99] Yama: Check for pid death before checking ancestry Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 43/99] scsi: core: Synchronize request queue PM status only on successful resume Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 44/99] scsi: sd: Fix cache_type_store() Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 45/99] mips: fix n32 compat_ipc_parse_version Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 46/99] MIPS: BCM47XX: Setup struct device for the SoC Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 47/99] MIPS: lantiq: Fix IPI interrupt handling Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 48/99] drm/i915/gvt: Fix mmap range check Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 49/99] OF: properties: add missing of_node_put Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 50/99] mfd: tps6586x: Handle interrupts on suspend Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 51/99] media: v4l: ioctl: Validate num_planes for debug messages Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 52/99] RDMA/nldev: Dont expose unsafe global rkey to regular user Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 53/99] RDMA/vmw_pvrdma: Return the correct opcode when creating WR Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 54/99] kbuild: Disable LD_DEAD_CODE_DATA_ELIMINATION with ftrace & GCC <= 4.7 Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 55/99] net: dsa: realtek-smi: fix OF child-node lookup Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 56/99] pstore/ram: Avoid allocation and leak of platform data Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 57/99] arm64: kaslr: ensure randomized quantities are clean to the PoC Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 58/99] arm64: dts: marvell: armada-ap806: reserve PSCI area Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 59/99] Disable MSI also when pcie-octeon.pcie_disable on Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 60/99] fix int_sqrt64() for very large numbers Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 61/99] omap2fb: Fix stack memory disclosure Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 62/99] media: vivid: fix error handling of kthread_run Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 63/99] media: vivid: set min width/height to a value > 0 Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 64/99] bpf: in __bpf_redirect_no_mac pull mac only if present Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 65/99] ipv6: make icmp6_send() robust against null skb->dev Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 66/99] LSM: Check for NULL cred-security on free Greg Kroah-Hartman
2019-01-21 13:48 ` [PATCH 4.19 67/99] media: vb2: vb2_mmap: move lock up Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 68/99] sunrpc: handle ENOMEM in rpcb_getport_async Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 69/99] netfilter: ebtables: account ebt_table_info to kmemcg Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 70/99] block: use rcu_work instead of call_rcu to avoid sleep in softirq Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 71/99] selinux: fix GPF on invalid policy Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 72/99] blockdev: Fix livelocks on loop device Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 73/99] sctp: allocate sctp_sockaddr_entry with kzalloc Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 74/99] tipc: fix uninit-value in in tipc_conn_rcv_sub Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 75/99] tipc: fix uninit-value in tipc_nl_compat_link_reset_stats Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 76/99] tipc: fix uninit-value in tipc_nl_compat_bearer_enable Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 77/99] tipc: fix uninit-value in tipc_nl_compat_link_set Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 78/99] tipc: fix uninit-value in tipc_nl_compat_name_table_dump Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 79/99] tipc: fix uninit-value in tipc_nl_compat_doit Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 80/99] block/loop: Dont grab "struct file" for vfs_getattr() operation Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 81/99] block/loop: Use global lock for ioctl() operation Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 82/99] loop: Fold __loop_release into loop_release Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 83/99] loop: Get rid of loop_index_mutex Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 84/99] loop: Push lo_ctl_mutex down into individual ioctls Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 85/99] loop: Split setting of lo_state from loop_clr_fd Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 86/99] loop: Push loop_ctl_mutex down into loop_clr_fd() Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 87/99] loop: Push loop_ctl_mutex down to loop_get_status() Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 88/99] loop: Push loop_ctl_mutex down to loop_set_status() Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 89/99] loop: Push loop_ctl_mutex down to loop_set_fd() Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 90/99] loop: Push loop_ctl_mutex down to loop_change_fd() Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 91/99] loop: Move special partition reread handling in loop_clr_fd() Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 92/99] loop: Move loop_reread_partitions() out of loop_ctl_mutex Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 93/99] loop: Fix deadlock when calling blkdev_reread_part() Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 94/99] loop: Avoid circular locking dependency between loop_ctl_mutex and bd_mutex Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 95/99] loop: Get rid of nested acquisition of loop_ctl_mutex Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 96/99] loop: Fix double mutex_unlock(&loop_ctl_mutex) in loop_control_ioctl() Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 97/99] loop: drop caches if offset or block_size are changed Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 98/99] drm/fb-helper: Ignore the value of fb_var_screeninfo.pixclock Greg Kroah-Hartman
2019-01-21 13:49 ` [PATCH 4.19 99/99] selftests: Fix test errors related to lib.mk khdr target Greg Kroah-Hartman
2019-01-22 16:08 ` [PATCH 4.19 00/99] 4.19.17-stable review Naresh Kamboju
2019-01-22 19:23 ` Guenter Roeck
2019-01-22 22:27 ` shuah
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190121134914.386905593@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=fw@strlen.de \
--cc=linux-kernel@vger.kernel.org \
--cc=pablo@netfilter.org \
--cc=sbohrer@cloudflare.com \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).