All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH nf-next] netfilter: conntrack: configurable conntrack gc scan interval
@ 2021-11-20 18:23 Eyal Birger
  2021-11-20 21:38   ` kernel test robot
  0 siblings, 1 reply; 3+ messages in thread
From: Eyal Birger @ 2021-11-20 18:23 UTC (permalink / raw)
  To: fw; +Cc: netfilter-devel, shmulik.ladkani, Eyal Birger

In Commit 4608fdfc07e1 ("netfilter: conntrack: collect all entries in one cycle")
conntrack gc was changed to run periodically every 2 minutes.

On systems handling many UDP connections, this leads to bursts of session
termination handling.

As suggested in the original commit, provide the ability to control the gc
interval using a sysctl knob.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
---
 Documentation/networking/nf_conntrack-sysctl.rst | 4 ++++
 include/net/netfilter/nf_conntrack.h             | 1 +
 net/netfilter/nf_conntrack_core.c                | 4 +++-
 net/netfilter/nf_conntrack_standalone.c          | 9 +++++++++
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index 311128abb768..7aaa5e26ed3f 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -207,3 +207,7 @@ nf_flowtable_udp_timeout - INTEGER (seconds)
         Control offload timeout for udp connections.
         UDP connections may be offloaded from nf conntrack to nf flow table.
         Once aged, the connection is returned to nf conntrack with udp pickup timeout.
+
+nf_conntrack_gc_scan_intervaL - INTEGER (seconds)
+	default 120
+	minimum 1
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index cc663c68ddc4..f4ed812936a8 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -314,6 +314,7 @@ extern struct hlist_nulls_head *nf_conntrack_hash;
 extern unsigned int nf_conntrack_htable_size;
 extern seqcount_spinlock_t nf_conntrack_generation;
 extern unsigned int nf_conntrack_max;
+extern unsigned int nf_conntrack_gc_scan_interval;
 
 /* must be called with rcu read lock held */
 static inline void
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 054ee9d25efe..5fc56751d4ed 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -83,6 +83,8 @@ static DEFINE_MUTEX(nf_conntrack_mutex);
 #define MIN_CHAINLEN	8u
 #define MAX_CHAINLEN	(32u - MIN_CHAINLEN)
 
+__read_mostly unsigned int nf_conntrack_gc_scan_interval = GC_SCAN_INTERVAL;
+EXPORT_SYMBOL_GPL(nf_conntrack_gc_scan_interval);
 static struct conntrack_gc_work conntrack_gc_work;
 
 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -1422,7 +1424,7 @@ static void gc_worker(struct work_struct *work)
 {
 	unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
 	unsigned int i, hashsz, nf_conntrack_max95 = 0;
-	unsigned long next_run = GC_SCAN_INTERVAL;
+	unsigned long next_run = max(nf_conntrack_gc_scan_interval, HZ);
 	struct conntrack_gc_work *gc_work;
 	gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 80f675d884b2..436e37df70e5 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -565,6 +565,7 @@ enum nf_ct_sysctl_index {
 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
 	NF_SYSCTL_CT_TIMESTAMP,
 #endif
+	NF_SYSCTL_CT_GC_SCAN_INTERVAL,
 	NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC,
 	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT,
 	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV,
@@ -707,6 +708,13 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 		.extra2 	= SYSCTL_ONE,
 	},
 #endif
+	[NF_SYSCTL_CT_GC_SCAN_INTERVAL] = {
+		.procname	= "nf_conntrack_gc_scan_interval",
+		.data		= &nf_conntrack_gc_scan_interval,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
 	[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = {
 		.procname	= "nf_conntrack_generic_timeout",
 		.maxlen		= sizeof(unsigned int),
@@ -1123,6 +1131,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 		table[NF_SYSCTL_CT_MAX].mode = 0444;
 		table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444;
 		table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
+		table[NF_SYSCTL_CT_GC_SCAN_INTERVAL].mode = 0444;
 	}
 
 	cnet->sysctl_header = register_net_sysctl(net, "net/netfilter", table);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH nf-next] netfilter: conntrack: configurable conntrack gc scan interval
  2021-11-20 18:23 [PATCH nf-next] netfilter: conntrack: configurable conntrack gc scan interval Eyal Birger
@ 2021-11-20 21:38   ` kernel test robot
  0 siblings, 0 replies; 3+ messages in thread
From: kernel test robot @ 2021-11-20 21:38 UTC (permalink / raw)
  To: Eyal Birger, fw
  Cc: llvm, kbuild-all, netfilter-devel, shmulik.ladkani, Eyal Birger

[-- Attachment #1: Type: text/plain, Size: 5421 bytes --]

Hi Eyal,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on nf-next/master]

url:    https://github.com/0day-ci/linux/commits/Eyal-Birger/netfilter-conntrack-configurable-conntrack-gc-scan-interval/20211121-022522
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
config: hexagon-randconfig-r045-20211121 (attached as .config)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/2e10f30e82a92d09c72cfcd3e659cd2395bf3e3e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Eyal-Birger/netfilter-conntrack-configurable-conntrack-gc-scan-interval/20211121-022522
        git checkout 2e10f30e82a92d09c72cfcd3e659cd2395bf3e3e
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=hexagon 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> net/netfilter/nf_conntrack_core.c:1427:27: warning: comparison of distinct pointer types ('typeof (nf_conntrack_gc_scan_interval) *' (aka 'unsigned int *') and 'typeof (300) *' (aka 'int *')) [-Wcompare-distinct-pointer-types]
           unsigned long next_run = max(nf_conntrack_gc_scan_interval, HZ);
                                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/minmax.h:52:19: note: expanded from macro 'max'
   #define max(x, y)       __careful_cmp(x, y, >)
                           ^~~~~~~~~~~~~~~~~~~~~~
   include/linux/minmax.h:36:24: note: expanded from macro '__careful_cmp'
           __builtin_choose_expr(__safe_cmp(x, y), \
                                 ^~~~~~~~~~~~~~~~
   include/linux/minmax.h:26:4: note: expanded from macro '__safe_cmp'
                   (__typecheck(x, y) && __no_side_effects(x, y))
                    ^~~~~~~~~~~~~~~~~
   include/linux/minmax.h:20:28: note: expanded from macro '__typecheck'
           (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
                      ~~~~~~~~~~~~~~ ^  ~~~~~~~~~~~~~~
   1 warning generated.


vim +1427 net/netfilter/nf_conntrack_core.c

  1422	
  1423	static void gc_worker(struct work_struct *work)
  1424	{
  1425		unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
  1426		unsigned int i, hashsz, nf_conntrack_max95 = 0;
> 1427		unsigned long next_run = max(nf_conntrack_gc_scan_interval, HZ);
  1428		struct conntrack_gc_work *gc_work;
  1429		gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
  1430	
  1431		i = gc_work->next_bucket;
  1432		if (gc_work->early_drop)
  1433			nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
  1434	
  1435		do {
  1436			struct nf_conntrack_tuple_hash *h;
  1437			struct hlist_nulls_head *ct_hash;
  1438			struct hlist_nulls_node *n;
  1439			struct nf_conn *tmp;
  1440	
  1441			rcu_read_lock();
  1442	
  1443			nf_conntrack_get_ht(&ct_hash, &hashsz);
  1444			if (i >= hashsz) {
  1445				rcu_read_unlock();
  1446				break;
  1447			}
  1448	
  1449			hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
  1450				struct nf_conntrack_net *cnet;
  1451				struct net *net;
  1452	
  1453				tmp = nf_ct_tuplehash_to_ctrack(h);
  1454	
  1455				if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
  1456					nf_ct_offload_timeout(tmp);
  1457					continue;
  1458				}
  1459	
  1460				if (nf_ct_is_expired(tmp)) {
  1461					nf_ct_gc_expired(tmp);
  1462					continue;
  1463				}
  1464	
  1465				if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
  1466					continue;
  1467	
  1468				net = nf_ct_net(tmp);
  1469				cnet = nf_ct_pernet(net);
  1470				if (atomic_read(&cnet->count) < nf_conntrack_max95)
  1471					continue;
  1472	
  1473				/* need to take reference to avoid possible races */
  1474				if (!atomic_inc_not_zero(&tmp->ct_general.use))
  1475					continue;
  1476	
  1477				if (gc_worker_skip_ct(tmp)) {
  1478					nf_ct_put(tmp);
  1479					continue;
  1480				}
  1481	
  1482				if (gc_worker_can_early_drop(tmp))
  1483					nf_ct_kill(tmp);
  1484	
  1485				nf_ct_put(tmp);
  1486			}
  1487	
  1488			/* could check get_nulls_value() here and restart if ct
  1489			 * was moved to another chain.  But given gc is best-effort
  1490			 * we will just continue with next hash slot.
  1491			 */
  1492			rcu_read_unlock();
  1493			cond_resched();
  1494			i++;
  1495	
  1496			if (time_after(jiffies, end_time) && i < hashsz) {
  1497				gc_work->next_bucket = i;
  1498				next_run = 0;
  1499				break;
  1500			}
  1501		} while (i < hashsz);
  1502	
  1503		if (gc_work->exiting)
  1504			return;
  1505	
  1506		/*
  1507		 * Eviction will normally happen from the packet path, and not
  1508		 * from this gc worker.
  1509		 *
  1510		 * This worker is only here to reap expired entries when system went
  1511		 * idle after a busy period.
  1512		 */
  1513		if (next_run) {
  1514			gc_work->early_drop = false;
  1515			gc_work->next_bucket = 0;
  1516		}
  1517		queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
  1518	}
  1519	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 25854 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH nf-next] netfilter: conntrack: configurable conntrack gc scan interval
@ 2021-11-20 21:38   ` kernel test robot
  0 siblings, 0 replies; 3+ messages in thread
From: kernel test robot @ 2021-11-20 21:38 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 5569 bytes --]

Hi Eyal,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on nf-next/master]

url:    https://github.com/0day-ci/linux/commits/Eyal-Birger/netfilter-conntrack-configurable-conntrack-gc-scan-interval/20211121-022522
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git master
config: hexagon-randconfig-r045-20211121 (attached as .config)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/2e10f30e82a92d09c72cfcd3e659cd2395bf3e3e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Eyal-Birger/netfilter-conntrack-configurable-conntrack-gc-scan-interval/20211121-022522
        git checkout 2e10f30e82a92d09c72cfcd3e659cd2395bf3e3e
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 ARCH=hexagon 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> net/netfilter/nf_conntrack_core.c:1427:27: warning: comparison of distinct pointer types ('typeof (nf_conntrack_gc_scan_interval) *' (aka 'unsigned int *') and 'typeof (300) *' (aka 'int *')) [-Wcompare-distinct-pointer-types]
           unsigned long next_run = max(nf_conntrack_gc_scan_interval, HZ);
                                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/minmax.h:52:19: note: expanded from macro 'max'
   #define max(x, y)       __careful_cmp(x, y, >)
                           ^~~~~~~~~~~~~~~~~~~~~~
   include/linux/minmax.h:36:24: note: expanded from macro '__careful_cmp'
           __builtin_choose_expr(__safe_cmp(x, y), \
                                 ^~~~~~~~~~~~~~~~
   include/linux/minmax.h:26:4: note: expanded from macro '__safe_cmp'
                   (__typecheck(x, y) && __no_side_effects(x, y))
                    ^~~~~~~~~~~~~~~~~
   include/linux/minmax.h:20:28: note: expanded from macro '__typecheck'
           (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
                      ~~~~~~~~~~~~~~ ^  ~~~~~~~~~~~~~~
   1 warning generated.


vim +1427 net/netfilter/nf_conntrack_core.c

  1422	
  1423	static void gc_worker(struct work_struct *work)
  1424	{
  1425		unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
  1426		unsigned int i, hashsz, nf_conntrack_max95 = 0;
> 1427		unsigned long next_run = max(nf_conntrack_gc_scan_interval, HZ);
  1428		struct conntrack_gc_work *gc_work;
  1429		gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
  1430	
  1431		i = gc_work->next_bucket;
  1432		if (gc_work->early_drop)
  1433			nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
  1434	
  1435		do {
  1436			struct nf_conntrack_tuple_hash *h;
  1437			struct hlist_nulls_head *ct_hash;
  1438			struct hlist_nulls_node *n;
  1439			struct nf_conn *tmp;
  1440	
  1441			rcu_read_lock();
  1442	
  1443			nf_conntrack_get_ht(&ct_hash, &hashsz);
  1444			if (i >= hashsz) {
  1445				rcu_read_unlock();
  1446				break;
  1447			}
  1448	
  1449			hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
  1450				struct nf_conntrack_net *cnet;
  1451				struct net *net;
  1452	
  1453				tmp = nf_ct_tuplehash_to_ctrack(h);
  1454	
  1455				if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
  1456					nf_ct_offload_timeout(tmp);
  1457					continue;
  1458				}
  1459	
  1460				if (nf_ct_is_expired(tmp)) {
  1461					nf_ct_gc_expired(tmp);
  1462					continue;
  1463				}
  1464	
  1465				if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
  1466					continue;
  1467	
  1468				net = nf_ct_net(tmp);
  1469				cnet = nf_ct_pernet(net);
  1470				if (atomic_read(&cnet->count) < nf_conntrack_max95)
  1471					continue;
  1472	
  1473				/* need to take reference to avoid possible races */
  1474				if (!atomic_inc_not_zero(&tmp->ct_general.use))
  1475					continue;
  1476	
  1477				if (gc_worker_skip_ct(tmp)) {
  1478					nf_ct_put(tmp);
  1479					continue;
  1480				}
  1481	
  1482				if (gc_worker_can_early_drop(tmp))
  1483					nf_ct_kill(tmp);
  1484	
  1485				nf_ct_put(tmp);
  1486			}
  1487	
  1488			/* could check get_nulls_value() here and restart if ct
  1489			 * was moved to another chain.  But given gc is best-effort
  1490			 * we will just continue with next hash slot.
  1491			 */
  1492			rcu_read_unlock();
  1493			cond_resched();
  1494			i++;
  1495	
  1496			if (time_after(jiffies, end_time) && i < hashsz) {
  1497				gc_work->next_bucket = i;
  1498				next_run = 0;
  1499				break;
  1500			}
  1501		} while (i < hashsz);
  1502	
  1503		if (gc_work->exiting)
  1504			return;
  1505	
  1506		/*
  1507		 * Eviction will normally happen from the packet path, and not
  1508		 * from this gc worker.
  1509		 *
  1510		 * This worker is only here to reap expired entries when system went
  1511		 * idle after a busy period.
  1512		 */
  1513		if (next_run) {
  1514			gc_work->early_drop = false;
  1515			gc_work->next_bucket = 0;
  1516		}
  1517		queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
  1518	}
  1519	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 25854 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-11-20 21:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-20 18:23 [PATCH nf-next] netfilter: conntrack: configurable conntrack gc scan interval Eyal Birger
2021-11-20 21:38 ` kernel test robot
2021-11-20 21:38   ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.