bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* pass kernel pointers to the sysctl ->proc_handler method v3
@ 2020-04-24  6:43 Christoph Hellwig
  2020-04-24  6:43 ` [PATCH 1/5] bpf-cgroup: remove unused exports Christoph Hellwig
                   ` (6 more replies)
  0 siblings, 7 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-04-24  6:43 UTC (permalink / raw)
  To: Kees Cook, Iurii Zaikin
  Cc: Alexei Starovoitov, Daniel Borkmann, linux-kernel, linux-mm,
	linux-fsdevel, netdev, bpf

Hi all,

this series changes the sysctl ->proc_handler methods to take kernel
pointers.  This simplifies some of the pointer handling in the methods
(which could probably be further simplified now), and gets rid of the
set_fs address space overrides used by bpf.

Changes since v2:
 - free the buffer modified by BPF
 - move pid_max and friends to pid.h

Changes since v1:
 - drop a patch merged by Greg
 - don't copy data out on a write
 - fix buffer allocation in bpf

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/5] bpf-cgroup: remove unused exports
  2020-04-24  6:43 pass kernel pointers to the sysctl ->proc_handler method v3 Christoph Hellwig
@ 2020-04-24  6:43 ` Christoph Hellwig
  2020-04-27 21:23   ` Daniel Borkmann
  2020-04-24  6:43 ` [PATCH 2/5] mm: remove watermark_boost_factor_sysctl_handler Christoph Hellwig
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-04-24  6:43 UTC (permalink / raw)
  To: Kees Cook, Iurii Zaikin
  Cc: Alexei Starovoitov, Daniel Borkmann, linux-kernel, linux-mm,
	linux-fsdevel, netdev, bpf, Andrey Ignatov

Except for a few of the networking hooks called from modular ipv4 or
ipv6 code, all of hooks are just called from guaranteed to be built-in
code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Andrey Ignatov <rdna@fb.com>
---
 kernel/bpf/cgroup.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index cb305e71e7deb..929d9a7263da1 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1054,7 +1054,6 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 
 	return !allow;
 }
-EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
 
 static const struct bpf_func_proto *
 cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
@@ -1221,7 +1220,6 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 
 	return ret == 1 ? 0 : -EPERM;
 }
-EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
 
 #ifdef CONFIG_NET
 static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
@@ -1326,7 +1324,6 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 		sockopt_free_buf(&ctx);
 	return ret;
 }
-EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt);
 
 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 				       int optname, char __user *optval,
@@ -1413,7 +1410,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 	sockopt_free_buf(&ctx);
 	return ret;
 }
-EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt);
 #endif
 
 static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
-- 
2.26.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/5] mm: remove watermark_boost_factor_sysctl_handler
  2020-04-24  6:43 pass kernel pointers to the sysctl ->proc_handler method v3 Christoph Hellwig
  2020-04-24  6:43 ` [PATCH 1/5] bpf-cgroup: remove unused exports Christoph Hellwig
@ 2020-04-24  6:43 ` Christoph Hellwig
  2020-05-04 18:41   ` Kees Cook
  2020-04-24  6:43 ` [PATCH 3/5] sysctl: remove all extern declaration from sysctl.c Christoph Hellwig
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-04-24  6:43 UTC (permalink / raw)
  To: Kees Cook, Iurii Zaikin
  Cc: Alexei Starovoitov, Daniel Borkmann, linux-kernel, linux-mm,
	linux-fsdevel, netdev, bpf, David Rientjes

watermark_boost_factor_sysctl_handler is just a pointless wrapper for
proc_dointvec_minmax, so remove it and use proc_dointvec_minmax
directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David Rientjes <rientjes@google.com>
---
 include/linux/mmzone.h |  2 --
 kernel/sysctl.c        |  2 +-
 mm/page_alloc.c        | 12 ------------
 3 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1b9de7d220fb7..f37bb8f187fc7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -911,8 +911,6 @@ static inline int is_highmem(struct zone *zone)
 struct ctl_table;
 int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-int watermark_boost_factor_sysctl_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
 int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a176d8727a3a..99d27acf46465 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1491,7 +1491,7 @@ static struct ctl_table vm_table[] = {
 		.data		= &watermark_boost_factor,
 		.maxlen		= sizeof(watermark_boost_factor),
 		.mode		= 0644,
-		.proc_handler	= watermark_boost_factor_sysctl_handler,
+		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 	},
 	{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 69827d4fa0527..62c1550cd43ec 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7978,18 +7978,6 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
-int watermark_boost_factor_sysctl_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
-{
-	int rc;
-
-	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
-	if (rc)
-		return rc;
-
-	return 0;
-}
-
 int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
 	void __user *buffer, size_t *length, loff_t *ppos)
 {
-- 
2.26.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 3/5] sysctl: remove all extern declaration from sysctl.c
  2020-04-24  6:43 pass kernel pointers to the sysctl ->proc_handler method v3 Christoph Hellwig
  2020-04-24  6:43 ` [PATCH 1/5] bpf-cgroup: remove unused exports Christoph Hellwig
  2020-04-24  6:43 ` [PATCH 2/5] mm: remove watermark_boost_factor_sysctl_handler Christoph Hellwig
@ 2020-04-24  6:43 ` Christoph Hellwig
  2020-05-04  1:25   ` Stephen Rothwell
  2020-05-04 18:42   ` Kees Cook
  2020-04-24  6:43 ` [PATCH 4/5] sysctl: avoid forward declarations Christoph Hellwig
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-04-24  6:43 UTC (permalink / raw)
  To: Kees Cook, Iurii Zaikin
  Cc: Alexei Starovoitov, Daniel Borkmann, linux-kernel, linux-mm,
	linux-fsdevel, netdev, bpf

Extern declarations in .c files are a bad style and can lead to
mismatches.  Use existing definitions in headers where they exist,
and otherwise move the external declarations to suitable header
files.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/coredump.h |  4 ++++
 include/linux/file.h     |  2 ++
 include/linux/mm.h       |  2 ++
 include/linux/mmzone.h   |  2 ++
 include/linux/pid.h      |  3 +++
 include/linux/sysctl.h   |  8 +++++++
 kernel/sysctl.c          | 45 +++-------------------------------------
 7 files changed, 24 insertions(+), 42 deletions(-)

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index abf4b4e65dbb9..7a899e83835d5 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -22,4 +22,8 @@ extern void do_coredump(const kernel_siginfo_t *siginfo);
 static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
 #endif
 
+extern int core_uses_pid;
+extern char core_pattern[];
+extern unsigned int core_pipe_limit;
+
 #endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/file.h b/include/linux/file.h
index 142d102f285e5..122f80084a3ef 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -94,4 +94,6 @@ extern void fd_install(unsigned int fd, struct file *file);
 extern void flush_delayed_fput(void);
 extern void __fput_sync(struct file *);
 
+extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
+
 #endif /* __LINUX_FILE_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5a323422d783d..9c4e7e76deddc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3140,5 +3140,7 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
 				      pgoff_t first_index, pgoff_t nr);
 #endif
 
+extern int sysctl_nr_trim_pages;
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f37bb8f187fc7..b2af594ef0f7c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -909,6 +909,7 @@ static inline int is_highmem(struct zone *zone)
 
 /* These two functions are used to setup the per zone pages min values */
 struct ctl_table;
+
 int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
@@ -925,6 +926,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
 
 extern int numa_zonelist_order_handler(struct ctl_table *, int,
 			void __user *, size_t *, loff_t *);
+extern int percpu_pagelist_fraction;
 extern char numa_zonelist_order[];
 #define NUMA_ZONELIST_ORDER_LEN	16
 
diff --git a/include/linux/pid.h b/include/linux/pid.h
index cc896f0fc4e34..93543cbc0e6b3 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -108,6 +108,9 @@ extern void transfer_pid(struct task_struct *old, struct task_struct *new,
 struct pid_namespace;
 extern struct pid_namespace init_pid_ns;
 
+extern int pid_max;
+extern int pid_max_min, pid_max_max;
+
 /*
  * look up a PID in the hash table. Must be called with the tasklist_lock
  * or rcu_read_lock() held.
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 02fa84493f237..36143ca40b56b 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -207,7 +207,15 @@ void unregister_sysctl_table(struct ctl_table_header * table);
 
 extern int sysctl_init(void);
 
+extern int pwrsw_enabled;
+extern int unaligned_enabled;
+extern int unaligned_dump_stack;
+extern int no_unaligned_warning;
+
 extern struct ctl_table sysctl_mount_point[];
+extern struct ctl_table random_table[];
+extern struct ctl_table firmware_config_table[];
+extern struct ctl_table epoll_table[];
 
 #else /* CONFIG_SYSCTL */
 static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 99d27acf46465..31b934865ebc3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -68,6 +68,9 @@
 #include <linux/bpf.h>
 #include <linux/mount.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/coredump.h>
+#include <linux/latencytop.h>
+#include <linux/pid.h>
 
 #include "../lib/kstrtox.h"
 
@@ -103,22 +106,6 @@
 
 #if defined(CONFIG_SYSCTL)
 
-/* External variables not in a header file. */
-extern int suid_dumpable;
-#ifdef CONFIG_COREDUMP
-extern int core_uses_pid;
-extern char core_pattern[];
-extern unsigned int core_pipe_limit;
-#endif
-extern int pid_max;
-extern int pid_max_min, pid_max_max;
-extern int percpu_pagelist_fraction;
-extern int latencytop_enabled;
-extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
-#ifndef CONFIG_MMU
-extern int sysctl_nr_trim_pages;
-#endif
-
 /* Constants used for minimum and  maximum */
 #ifdef CONFIG_LOCKUP_DETECTOR
 static int sixty = 60;
@@ -160,24 +147,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 #ifdef CONFIG_INOTIFY_USER
 #include <linux/inotify.h>
 #endif
-#ifdef CONFIG_SPARC
-#endif
-
-#ifdef CONFIG_PARISC
-extern int pwrsw_enabled;
-#endif
-
-#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
-extern int unaligned_enabled;
-#endif
-
-#ifdef CONFIG_IA64
-extern int unaligned_dump_stack;
-#endif
-
-#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
-extern int no_unaligned_warning;
-#endif
 
 #ifdef CONFIG_PROC_SYSCTL
 
@@ -243,14 +212,6 @@ static struct ctl_table vm_table[];
 static struct ctl_table fs_table[];
 static struct ctl_table debug_table[];
 static struct ctl_table dev_table[];
-extern struct ctl_table random_table[];
-#ifdef CONFIG_EPOLL
-extern struct ctl_table epoll_table[];
-#endif
-
-#ifdef CONFIG_FW_LOADER_USER_HELPER
-extern struct ctl_table firmware_config_table[];
-#endif
 
 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
-- 
2.26.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/5] sysctl: avoid forward declarations
  2020-04-24  6:43 pass kernel pointers to the sysctl ->proc_handler method v3 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2020-04-24  6:43 ` [PATCH 3/5] sysctl: remove all extern declaration from sysctl.c Christoph Hellwig
@ 2020-04-24  6:43 ` Christoph Hellwig
  2020-05-04 18:44   ` Kees Cook
  2020-04-26 15:51 ` pass kernel pointers to the sysctl ->proc_handler method v3 Alexei Starovoitov
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-04-24  6:43 UTC (permalink / raw)
  To: Kees Cook, Iurii Zaikin
  Cc: Alexei Starovoitov, Daniel Borkmann, linux-kernel, linux-mm,
	linux-fsdevel, netdev, bpf

Move the sysctl tables to the end of the file to avoid lots of pointless
forward declarations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/sysctl.c | 3565 +++++++++++++++++++++++------------------------
 1 file changed, 1764 insertions(+), 1801 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 31b934865ebc3..511543d238794 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -176,79 +176,13 @@ enum sysctl_writes_mode {
 };
 
 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
-
-static int proc_do_cad_pid(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos);
-static int proc_taint(struct ctl_table *table, int write,
-			       void __user *buffer, size_t *lenp, loff_t *ppos);
-#ifdef CONFIG_COMPACTION
-static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
-					       int write, void __user *buffer,
-					       size_t *lenp, loff_t *ppos);
-#endif
-#endif
-
-#ifdef CONFIG_PRINTK
-static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos);
-#endif
-
-static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp, loff_t *ppos);
-#ifdef CONFIG_COREDUMP
-static int proc_dostring_coredump(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp, loff_t *ppos);
-#endif
-static int proc_dopipe_max_size(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp, loff_t *ppos);
-
-#ifdef CONFIG_MAGIC_SYSRQ
-static int sysrq_sysctl_handler(struct ctl_table *table, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos);
-#endif
-
-static struct ctl_table kern_table[];
-static struct ctl_table vm_table[];
-static struct ctl_table fs_table[];
-static struct ctl_table debug_table[];
-static struct ctl_table dev_table[];
+#endif /* CONFIG_PROC_SYSCTL */
 
 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
 int sysctl_legacy_va_layout;
 #endif
 
-/* The default sysctl tables: */
-
-static struct ctl_table sysctl_base_table[] = {
-	{
-		.procname	= "kernel",
-		.mode		= 0555,
-		.child		= kern_table,
-	},
-	{
-		.procname	= "vm",
-		.mode		= 0555,
-		.child		= vm_table,
-	},
-	{
-		.procname	= "fs",
-		.mode		= 0555,
-		.child		= fs_table,
-	},
-	{
-		.procname	= "debug",
-		.mode		= 0555,
-		.child		= debug_table,
-	},
-	{
-		.procname	= "dev",
-		.mode		= 0555,
-		.child		= dev_table,
-	},
-	{ }
-};
-
 #ifdef CONFIG_SCHED_DEBUG
 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
@@ -265,1676 +199,12 @@ static int min_extfrag_threshold;
 static int max_extfrag_threshold = 1000;
 #endif
 
-static struct ctl_table kern_table[] = {
-	{
-		.procname	= "sched_child_runs_first",
-		.data		= &sysctl_sched_child_runs_first,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_SCHED_DEBUG
-	{
-		.procname	= "sched_min_granularity_ns",
-		.data		= &sysctl_sched_min_granularity,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_proc_update_handler,
-		.extra1		= &min_sched_granularity_ns,
-		.extra2		= &max_sched_granularity_ns,
-	},
-	{
-		.procname	= "sched_latency_ns",
-		.data		= &sysctl_sched_latency,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_proc_update_handler,
-		.extra1		= &min_sched_granularity_ns,
-		.extra2		= &max_sched_granularity_ns,
-	},
-	{
-		.procname	= "sched_wakeup_granularity_ns",
-		.data		= &sysctl_sched_wakeup_granularity,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_proc_update_handler,
-		.extra1		= &min_wakeup_granularity_ns,
-		.extra2		= &max_wakeup_granularity_ns,
-	},
-#ifdef CONFIG_SMP
-	{
-		.procname	= "sched_tunable_scaling",
-		.data		= &sysctl_sched_tunable_scaling,
-		.maxlen		= sizeof(enum sched_tunable_scaling),
-		.mode		= 0644,
-		.proc_handler	= sched_proc_update_handler,
-		.extra1		= &min_sched_tunable_scaling,
-		.extra2		= &max_sched_tunable_scaling,
-	},
-	{
-		.procname	= "sched_migration_cost_ns",
-		.data		= &sysctl_sched_migration_cost,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "sched_nr_migrate",
-		.data		= &sysctl_sched_nr_migrate,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_SCHEDSTATS
-	{
-		.procname	= "sched_schedstats",
-		.data		= NULL,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_schedstats,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif /* CONFIG_SCHEDSTATS */
-#endif /* CONFIG_SMP */
-#ifdef CONFIG_NUMA_BALANCING
-	{
-		.procname	= "numa_balancing_scan_delay_ms",
-		.data		= &sysctl_numa_balancing_scan_delay,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "numa_balancing_scan_period_min_ms",
-		.data		= &sysctl_numa_balancing_scan_period_min,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "numa_balancing_scan_period_max_ms",
-		.data		= &sysctl_numa_balancing_scan_period_max,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "numa_balancing_scan_size_mb",
-		.data		= &sysctl_numa_balancing_scan_size,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "numa_balancing",
-		.data		= NULL, /* filled in by handler */
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_numa_balancing,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif /* CONFIG_NUMA_BALANCING */
-#endif /* CONFIG_SCHED_DEBUG */
-	{
-		.procname	= "sched_rt_period_us",
-		.data		= &sysctl_sched_rt_period,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_rt_handler,
-	},
-	{
-		.procname	= "sched_rt_runtime_us",
-		.data		= &sysctl_sched_rt_runtime,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= sched_rt_handler,
-	},
-	{
-		.procname	= "sched_rr_timeslice_ms",
-		.data		= &sysctl_sched_rr_timeslice,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= sched_rr_handler,
-	},
-#ifdef CONFIG_UCLAMP_TASK
-	{
-		.procname	= "sched_util_clamp_min",
-		.data		= &sysctl_sched_uclamp_util_min,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_sched_uclamp_handler,
-	},
-	{
-		.procname	= "sched_util_clamp_max",
-		.data		= &sysctl_sched_uclamp_util_max,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_sched_uclamp_handler,
-	},
-#endif
-#ifdef CONFIG_SCHED_AUTOGROUP
-	{
-		.procname	= "sched_autogroup_enabled",
-		.data		= &sysctl_sched_autogroup_enabled,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_CFS_BANDWIDTH
-	{
-		.procname	= "sched_cfs_bandwidth_slice_us",
-		.data		= &sysctl_sched_cfs_bandwidth_slice,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-	},
-#endif
-#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
-	{
-		.procname	= "sched_energy_aware",
-		.data		= &sysctl_sched_energy_aware,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_energy_aware_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_PROVE_LOCKING
-	{
-		.procname	= "prove_locking",
-		.data		= &prove_locking,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_LOCK_STAT
-	{
-		.procname	= "lock_stat",
-		.data		= &lock_stat,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "panic",
-		.data		= &panic_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_COREDUMP
-	{
-		.procname	= "core_uses_pid",
-		.data		= &core_uses_pid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "core_pattern",
-		.data		= core_pattern,
-		.maxlen		= CORENAME_MAX_SIZE,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring_coredump,
-	},
-	{
-		.procname	= "core_pipe_limit",
-		.data		= &core_pipe_limit,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_PROC_SYSCTL
-	{
-		.procname	= "tainted",
-		.maxlen 	= sizeof(long),
-		.mode		= 0644,
-		.proc_handler	= proc_taint,
-	},
-	{
-		.procname	= "sysctl_writes_strict",
-		.data		= &sysctl_writes_strict,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &neg_one,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_LATENCYTOP
-	{
-		.procname	= "latencytop",
-		.data		= &latencytop_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_latencytop,
-	},
-#endif
-#ifdef CONFIG_BLK_DEV_INITRD
-	{
-		.procname	= "real-root-dev",
-		.data		= &real_root_dev,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "print-fatal-signals",
-		.data		= &print_fatal_signals,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_SPARC
-	{
-		.procname	= "reboot-cmd",
-		.data		= reboot_command,
-		.maxlen		= 256,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring,
-	},
-	{
-		.procname	= "stop-a",
-		.data		= &stop_a_enabled,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "scons-poweroff",
-		.data		= &scons_pwroff,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_SPARC64
-	{
-		.procname	= "tsb-ratio",
-		.data		= &sysctl_tsb_ratio,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_PARISC
-	{
-		.procname	= "soft-power",
-		.data		= &pwrsw_enabled,
-		.maxlen		= sizeof (int),
-	 	.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
-	{
-		.procname	= "unaligned-trap",
-		.data		= &unaligned_enabled,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "ctrl-alt-del",
-		.data		= &C_A_D,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_FUNCTION_TRACER
-	{
-		.procname	= "ftrace_enabled",
-		.data		= &ftrace_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= ftrace_enable_sysctl,
-	},
-#endif
-#ifdef CONFIG_STACK_TRACER
-	{
-		.procname	= "stack_tracer_enabled",
-		.data		= &stack_tracer_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= stack_trace_sysctl,
-	},
-#endif
-#ifdef CONFIG_TRACING
-	{
-		.procname	= "ftrace_dump_on_oops",
-		.data		= &ftrace_dump_on_oops,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "traceoff_on_warning",
-		.data		= &__disable_trace_on_warning,
-		.maxlen		= sizeof(__disable_trace_on_warning),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "tracepoint_printk",
-		.data		= &tracepoint_printk,
-		.maxlen		= sizeof(tracepoint_printk),
-		.mode		= 0644,
-		.proc_handler	= tracepoint_printk_sysctl,
-	},
-#endif
-#ifdef CONFIG_KEXEC_CORE
-	{
-		.procname	= "kexec_load_disabled",
-		.data		= &kexec_load_disabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		/* only handle a transition from default "0" to "1" */
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_MODULES
-	{
-		.procname	= "modprobe",
-		.data		= &modprobe_path,
-		.maxlen		= KMOD_PATH_LEN,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring,
-	},
-	{
-		.procname	= "modules_disabled",
-		.data		= &modules_disabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		/* only handle a transition from default "0" to "1" */
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_UEVENT_HELPER
-	{
-		.procname	= "hotplug",
-		.data		= &uevent_helper,
-		.maxlen		= UEVENT_HELPER_PATH_LEN,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring,
-	},
-#endif
-#ifdef CONFIG_CHR_DEV_SG
-	{
-		.procname	= "sg-big-buff",
-		.data		= &sg_big_buff,
-		.maxlen		= sizeof (int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_BSD_PROCESS_ACCT
-	{
-		.procname	= "acct",
-		.data		= &acct_parm,
-		.maxlen		= 3*sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_MAGIC_SYSRQ
-	{
-		.procname	= "sysrq",
-		.data		= NULL,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= sysrq_sysctl_handler,
-	},
-#endif
-#ifdef CONFIG_PROC_SYSCTL
-	{
-		.procname	= "cad_pid",
-		.data		= NULL,
-		.maxlen		= sizeof (int),
-		.mode		= 0600,
-		.proc_handler	= proc_do_cad_pid,
-	},
-#endif
-	{
-		.procname	= "threads-max",
-		.data		= NULL,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_max_threads,
-	},
-	{
-		.procname	= "random",
-		.mode		= 0555,
-		.child		= random_table,
-	},
-	{
-		.procname	= "usermodehelper",
-		.mode		= 0555,
-		.child		= usermodehelper_table,
-	},
-#ifdef CONFIG_FW_LOADER_USER_HELPER
-	{
-		.procname	= "firmware_config",
-		.mode		= 0555,
-		.child		= firmware_config_table,
-	},
-#endif
-	{
-		.procname	= "overflowuid",
-		.data		= &overflowuid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &minolduid,
-		.extra2		= &maxolduid,
-	},
-	{
-		.procname	= "overflowgid",
-		.data		= &overflowgid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &minolduid,
-		.extra2		= &maxolduid,
-	},
-#ifdef CONFIG_S390
-	{
-		.procname	= "userprocess_debug",
-		.data		= &show_unhandled_signals,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "pid_max",
-		.data		= &pid_max,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &pid_max_min,
-		.extra2		= &pid_max_max,
-	},
-	{
-		.procname	= "panic_on_oops",
-		.data		= &panic_on_oops,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "panic_print",
-		.data		= &panic_print,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-#if defined CONFIG_PRINTK
-	{
-		.procname	= "printk",
-		.data		= &console_loglevel,
-		.maxlen		= 4*sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "printk_ratelimit",
-		.data		= &printk_ratelimit_state.interval,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "printk_ratelimit_burst",
-		.data		= &printk_ratelimit_state.burst,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "printk_delay",
-		.data		= &printk_delay_msec,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &ten_thousand,
-	},
-	{
-		.procname	= "printk_devkmsg",
-		.data		= devkmsg_log_str,
-		.maxlen		= DEVKMSG_STR_MAX_SIZE,
-		.mode		= 0644,
-		.proc_handler	= devkmsg_sysctl_set_loglvl,
-	},
-	{
-		.procname	= "dmesg_restrict",
-		.data		= &dmesg_restrict,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax_sysadmin,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "kptr_restrict",
-		.data		= &kptr_restrict,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax_sysadmin,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-#endif
-	{
-		.procname	= "ngroups_max",
-		.data		= &ngroups_max,
-		.maxlen		= sizeof (int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "cap_last_cap",
-		.data		= (void *)&cap_last_cap,
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-#if defined(CONFIG_LOCKUP_DETECTOR)
-	{
-		.procname       = "watchdog",
-		.data		= &watchdog_user_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler   = proc_watchdog,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "watchdog_thresh",
-		.data		= &watchdog_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_watchdog_thresh,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &sixty,
-	},
-	{
-		.procname       = "nmi_watchdog",
-		.data		= &nmi_watchdog_user_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
-		.proc_handler   = proc_nmi_watchdog,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "watchdog_cpumask",
-		.data		= &watchdog_cpumask_bits,
-		.maxlen		= NR_CPUS,
-		.mode		= 0644,
-		.proc_handler	= proc_watchdog_cpumask,
-	},
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
-	{
-		.procname       = "soft_watchdog",
-		.data		= &soft_watchdog_user_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler   = proc_soft_watchdog,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "softlockup_panic",
-		.data		= &softlockup_panic,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#ifdef CONFIG_SMP
-	{
-		.procname	= "softlockup_all_cpu_backtrace",
-		.data		= &sysctl_softlockup_all_cpu_backtrace,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif /* CONFIG_SMP */
-#endif
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-	{
-		.procname	= "hardlockup_panic",
-		.data		= &hardlockup_panic,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#ifdef CONFIG_SMP
-	{
-		.procname	= "hardlockup_all_cpu_backtrace",
-		.data		= &sysctl_hardlockup_all_cpu_backtrace,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif /* CONFIG_SMP */
-#endif
-#endif
-
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
-	{
-		.procname       = "unknown_nmi_panic",
-		.data           = &unknown_nmi_panic,
-		.maxlen         = sizeof (int),
-		.mode           = 0644,
-		.proc_handler   = proc_dointvec,
-	},
-#endif
-#if defined(CONFIG_X86)
-	{
-		.procname	= "panic_on_unrecovered_nmi",
-		.data		= &panic_on_unrecovered_nmi,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "panic_on_io_nmi",
-		.data		= &panic_on_io_nmi,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-	{
-		.procname	= "panic_on_stackoverflow",
-		.data		= &sysctl_panic_on_stackoverflow,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "bootloader_type",
-		.data		= &bootloader_type,
-		.maxlen		= sizeof (int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "bootloader_version",
-		.data		= &bootloader_version,
-		.maxlen		= sizeof (int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "io_delay_type",
-		.data		= &io_delay_type,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#if defined(CONFIG_MMU)
-	{
-		.procname	= "randomize_va_space",
-		.data		= &randomize_va_space,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#if defined(CONFIG_S390) && defined(CONFIG_SMP)
-	{
-		.procname	= "spin_retry",
-		.data		= &spin_retry,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
-	{
-		.procname	= "acpi_video_flags",
-		.data		= &acpi_realmode_flags,
-		.maxlen		= sizeof (unsigned long),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-#endif
-#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
-	{
-		.procname	= "ignore-unaligned-usertrap",
-		.data		= &no_unaligned_warning,
-		.maxlen		= sizeof (int),
-	 	.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_IA64
-	{
-		.procname	= "unaligned-dump-stack",
-		.data		= &unaligned_dump_stack,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_DETECT_HUNG_TASK
-	{
-		.procname	= "hung_task_panic",
-		.data		= &sysctl_hung_task_panic,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "hung_task_check_count",
-		.data		= &sysctl_hung_task_check_count,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "hung_task_timeout_secs",
-		.data		= &sysctl_hung_task_timeout_secs,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= proc_dohung_task_timeout_secs,
-		.extra2		= &hung_task_timeout_max,
-	},
-	{
-		.procname	= "hung_task_check_interval_secs",
-		.data		= &sysctl_hung_task_check_interval_secs,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= proc_dohung_task_timeout_secs,
-		.extra2		= &hung_task_timeout_max,
-	},
-	{
-		.procname	= "hung_task_warnings",
-		.data		= &sysctl_hung_task_warnings,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &neg_one,
-	},
-#endif
-#ifdef CONFIG_RT_MUTEXES
-	{
-		.procname	= "max_lock_depth",
-		.data		= &max_lock_depth,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "poweroff_cmd",
-		.data		= &poweroff_cmd,
-		.maxlen		= POWEROFF_CMD_PATH_LEN,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring,
-	},
-#ifdef CONFIG_KEYS
-	{
-		.procname	= "keys",
-		.mode		= 0555,
-		.child		= key_sysctls,
-	},
-#endif
-#ifdef CONFIG_PERF_EVENTS
-	/*
-	 * User-space scripts rely on the existence of this file
-	 * as a feature check for perf_events being enabled.
-	 *
-	 * So it's an ABI, do not remove!
-	 */
-	{
-		.procname	= "perf_event_paranoid",
-		.data		= &sysctl_perf_event_paranoid,
-		.maxlen		= sizeof(sysctl_perf_event_paranoid),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "perf_event_mlock_kb",
-		.data		= &sysctl_perf_event_mlock,
-		.maxlen		= sizeof(sysctl_perf_event_mlock),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "perf_event_max_sample_rate",
-		.data		= &sysctl_perf_event_sample_rate,
-		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
-		.mode		= 0644,
-		.proc_handler	= perf_proc_update_handler,
-		.extra1		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "perf_cpu_time_max_percent",
-		.data		= &sysctl_perf_cpu_time_max_percent,
-		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
-		.mode		= 0644,
-		.proc_handler	= perf_cpu_time_max_percent_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-	{
-		.procname	= "perf_event_max_stack",
-		.data		= &sysctl_perf_event_max_stack,
-		.maxlen		= sizeof(sysctl_perf_event_max_stack),
-		.mode		= 0644,
-		.proc_handler	= perf_event_max_stack_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &six_hundred_forty_kb,
-	},
-	{
-		.procname	= "perf_event_max_contexts_per_stack",
-		.data		= &sysctl_perf_event_max_contexts_per_stack,
-		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
-		.mode		= 0644,
-		.proc_handler	= perf_event_max_stack_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_thousand,
-	},
-#endif
-	{
-		.procname	= "panic_on_warn",
-		.data		= &panic_on_warn,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-	{
-		.procname	= "timer_migration",
-		.data		= &sysctl_timer_migration,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= timer_migration_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_BPF_SYSCALL
-	{
-		.procname	= "unprivileged_bpf_disabled",
-		.data		= &sysctl_unprivileged_bpf_disabled,
-		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
-		.mode		= 0644,
-		/* only handle a transition from default "0" to "1" */
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "bpf_stats_enabled",
-		.data		= &bpf_stats_enabled_key.key,
-		.maxlen		= sizeof(bpf_stats_enabled_key),
-		.mode		= 0644,
-		.proc_handler	= proc_do_static_key,
-	},
-#endif
-#if defined(CONFIG_TREE_RCU)
-	{
-		.procname	= "panic_on_rcu_stall",
-		.data		= &sysctl_panic_on_rcu_stall,
-		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
-	{
-		.procname	= "stack_erasing",
-		.data		= NULL,
-		.maxlen		= sizeof(int),
-		.mode		= 0600,
-		.proc_handler	= stack_erasing_sysctl,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-	{ }
-};
-
-static struct ctl_table vm_table[] = {
-	{
-		.procname	= "overcommit_memory",
-		.data		= &sysctl_overcommit_memory,
-		.maxlen		= sizeof(sysctl_overcommit_memory),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-	{
-		.procname	= "panic_on_oom",
-		.data		= &sysctl_panic_on_oom,
-		.maxlen		= sizeof(sysctl_panic_on_oom),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-	{
-		.procname	= "oom_kill_allocating_task",
-		.data		= &sysctl_oom_kill_allocating_task,
-		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "oom_dump_tasks",
-		.data		= &sysctl_oom_dump_tasks,
-		.maxlen		= sizeof(sysctl_oom_dump_tasks),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "overcommit_ratio",
-		.data		= &sysctl_overcommit_ratio,
-		.maxlen		= sizeof(sysctl_overcommit_ratio),
-		.mode		= 0644,
-		.proc_handler	= overcommit_ratio_handler,
-	},
-	{
-		.procname	= "overcommit_kbytes",
-		.data		= &sysctl_overcommit_kbytes,
-		.maxlen		= sizeof(sysctl_overcommit_kbytes),
-		.mode		= 0644,
-		.proc_handler	= overcommit_kbytes_handler,
-	},
-	{
-		.procname	= "page-cluster", 
-		.data		= &page_cluster,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "dirty_background_ratio",
-		.data		= &dirty_background_ratio,
-		.maxlen		= sizeof(dirty_background_ratio),
-		.mode		= 0644,
-		.proc_handler	= dirty_background_ratio_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-	{
-		.procname	= "dirty_background_bytes",
-		.data		= &dirty_background_bytes,
-		.maxlen		= sizeof(dirty_background_bytes),
-		.mode		= 0644,
-		.proc_handler	= dirty_background_bytes_handler,
-		.extra1		= &one_ul,
-	},
-	{
-		.procname	= "dirty_ratio",
-		.data		= &vm_dirty_ratio,
-		.maxlen		= sizeof(vm_dirty_ratio),
-		.mode		= 0644,
-		.proc_handler	= dirty_ratio_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-	{
-		.procname	= "dirty_bytes",
-		.data		= &vm_dirty_bytes,
-		.maxlen		= sizeof(vm_dirty_bytes),
-		.mode		= 0644,
-		.proc_handler	= dirty_bytes_handler,
-		.extra1		= &dirty_bytes_min,
-	},
-	{
-		.procname	= "dirty_writeback_centisecs",
-		.data		= &dirty_writeback_interval,
-		.maxlen		= sizeof(dirty_writeback_interval),
-		.mode		= 0644,
-		.proc_handler	= dirty_writeback_centisecs_handler,
-	},
-	{
-		.procname	= "dirty_expire_centisecs",
-		.data		= &dirty_expire_interval,
-		.maxlen		= sizeof(dirty_expire_interval),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "dirtytime_expire_seconds",
-		.data		= &dirtytime_expire_interval,
-		.maxlen		= sizeof(dirtytime_expire_interval),
-		.mode		= 0644,
-		.proc_handler	= dirtytime_interval_handler,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "swappiness",
-		.data		= &vm_swappiness,
-		.maxlen		= sizeof(vm_swappiness),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-#ifdef CONFIG_HUGETLB_PAGE
-	{
-		.procname	= "nr_hugepages",
-		.data		= NULL,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= hugetlb_sysctl_handler,
-	},
-#ifdef CONFIG_NUMA
-	{
-		.procname       = "nr_hugepages_mempolicy",
-		.data           = NULL,
-		.maxlen         = sizeof(unsigned long),
-		.mode           = 0644,
-		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
-	},
-	{
-		.procname		= "numa_stat",
-		.data			= &sysctl_vm_numa_stat,
-		.maxlen			= sizeof(int),
-		.mode			= 0644,
-		.proc_handler	= sysctl_vm_numa_stat_handler,
-		.extra1			= SYSCTL_ZERO,
-		.extra2			= SYSCTL_ONE,
-	},
-#endif
-	 {
-		.procname	= "hugetlb_shm_group",
-		.data		= &sysctl_hugetlb_shm_group,
-		.maxlen		= sizeof(gid_t),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	 },
-	{
-		.procname	= "nr_overcommit_hugepages",
-		.data		= NULL,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= hugetlb_overcommit_handler,
-	},
-#endif
-	{
-		.procname	= "lowmem_reserve_ratio",
-		.data		= &sysctl_lowmem_reserve_ratio,
-		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
-		.mode		= 0644,
-		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
-	},
-	{
-		.procname	= "drop_caches",
-		.data		= &sysctl_drop_caches,
-		.maxlen		= sizeof(int),
-		.mode		= 0200,
-		.proc_handler	= drop_caches_sysctl_handler,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= &four,
-	},
-#ifdef CONFIG_COMPACTION
-	{
-		.procname	= "compact_memory",
-		.data		= &sysctl_compact_memory,
-		.maxlen		= sizeof(int),
-		.mode		= 0200,
-		.proc_handler	= sysctl_compaction_handler,
-	},
-	{
-		.procname	= "extfrag_threshold",
-		.data		= &sysctl_extfrag_threshold,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_extfrag_threshold,
-		.extra2		= &max_extfrag_threshold,
-	},
-	{
-		.procname	= "compact_unevictable_allowed",
-		.data		= &sysctl_compact_unevictable_allowed,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-
-#endif /* CONFIG_COMPACTION */
-	{
-		.procname	= "min_free_kbytes",
-		.data		= &min_free_kbytes,
-		.maxlen		= sizeof(min_free_kbytes),
-		.mode		= 0644,
-		.proc_handler	= min_free_kbytes_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "watermark_boost_factor",
-		.data		= &watermark_boost_factor,
-		.maxlen		= sizeof(watermark_boost_factor),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "watermark_scale_factor",
-		.data		= &watermark_scale_factor,
-		.maxlen		= sizeof(watermark_scale_factor),
-		.mode		= 0644,
-		.proc_handler	= watermark_scale_factor_sysctl_handler,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= &one_thousand,
-	},
-	{
-		.procname	= "percpu_pagelist_fraction",
-		.data		= &percpu_pagelist_fraction,
-		.maxlen		= sizeof(percpu_pagelist_fraction),
-		.mode		= 0644,
-		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-	},
-#ifdef CONFIG_MMU
-	{
-		.procname	= "max_map_count",
-		.data		= &sysctl_max_map_count,
-		.maxlen		= sizeof(sysctl_max_map_count),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-#else
-	{
-		.procname	= "nr_trim_pages",
-		.data		= &sysctl_nr_trim_pages,
-		.maxlen		= sizeof(sysctl_nr_trim_pages),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-#endif
-	{
-		.procname	= "laptop_mode",
-		.data		= &laptop_mode,
-		.maxlen		= sizeof(laptop_mode),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "block_dump",
-		.data		= &block_dump,
-		.maxlen		= sizeof(block_dump),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "vfs_cache_pressure",
-		.data		= &sysctl_vfs_cache_pressure,
-		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-	},
-#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
-    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
-	{
-		.procname	= "legacy_va_layout",
-		.data		= &sysctl_legacy_va_layout,
-		.maxlen		= sizeof(sysctl_legacy_va_layout),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-	},
-#endif
-#ifdef CONFIG_NUMA
-	{
-		.procname	= "zone_reclaim_mode",
-		.data		= &node_reclaim_mode,
-		.maxlen		= sizeof(node_reclaim_mode),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "min_unmapped_ratio",
-		.data		= &sysctl_min_unmapped_ratio,
-		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
-		.mode		= 0644,
-		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-	{
-		.procname	= "min_slab_ratio",
-		.data		= &sysctl_min_slab_ratio,
-		.maxlen		= sizeof(sysctl_min_slab_ratio),
-		.mode		= 0644,
-		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-#endif
-#ifdef CONFIG_SMP
-	{
-		.procname	= "stat_interval",
-		.data		= &sysctl_stat_interval,
-		.maxlen		= sizeof(sysctl_stat_interval),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "stat_refresh",
-		.data		= NULL,
-		.maxlen		= 0,
-		.mode		= 0600,
-		.proc_handler	= vmstat_refresh,
-	},
-#endif
-#ifdef CONFIG_MMU
-	{
-		.procname	= "mmap_min_addr",
-		.data		= &dac_mmap_min_addr,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= mmap_min_addr_handler,
-	},
-#endif
-#ifdef CONFIG_NUMA
-	{
-		.procname	= "numa_zonelist_order",
-		.data		= &numa_zonelist_order,
-		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
-		.mode		= 0644,
-		.proc_handler	= numa_zonelist_order_handler,
-	},
-#endif
-#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
-   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
-	{
-		.procname	= "vdso_enabled",
-#ifdef CONFIG_X86_32
-		.data		= &vdso32_enabled,
-		.maxlen		= sizeof(vdso32_enabled),
-#else
-		.data		= &vdso_enabled,
-		.maxlen		= sizeof(vdso_enabled),
-#endif
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-	},
-#endif
-#ifdef CONFIG_HIGHMEM
-	{
-		.procname	= "highmem_is_dirtyable",
-		.data		= &vm_highmem_is_dirtyable,
-		.maxlen		= sizeof(vm_highmem_is_dirtyable),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
-	{
-		.procname	= "memory_failure_early_kill",
-		.data		= &sysctl_memory_failure_early_kill,
-		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "memory_failure_recovery",
-		.data		= &sysctl_memory_failure_recovery,
-		.maxlen		= sizeof(sysctl_memory_failure_recovery),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-	{
-		.procname	= "user_reserve_kbytes",
-		.data		= &sysctl_user_reserve_kbytes,
-		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-	{
-		.procname	= "admin_reserve_kbytes",
-		.data		= &sysctl_admin_reserve_kbytes,
-		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
-	{
-		.procname	= "mmap_rnd_bits",
-		.data		= &mmap_rnd_bits,
-		.maxlen		= sizeof(mmap_rnd_bits),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&mmap_rnd_bits_min,
-		.extra2		= (void *)&mmap_rnd_bits_max,
-	},
-#endif
-#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
-	{
-		.procname	= "mmap_rnd_compat_bits",
-		.data		= &mmap_rnd_compat_bits,
-		.maxlen		= sizeof(mmap_rnd_compat_bits),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&mmap_rnd_compat_bits_min,
-		.extra2		= (void *)&mmap_rnd_compat_bits_max,
-	},
-#endif
-#ifdef CONFIG_USERFAULTFD
-	{
-		.procname	= "unprivileged_userfaultfd",
-		.data		= &sysctl_unprivileged_userfaultfd,
-		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-	{ }
-};
-
-static struct ctl_table fs_table[] = {
-	{
-		.procname	= "inode-nr",
-		.data		= &inodes_stat,
-		.maxlen		= 2*sizeof(long),
-		.mode		= 0444,
-		.proc_handler	= proc_nr_inodes,
-	},
-	{
-		.procname	= "inode-state",
-		.data		= &inodes_stat,
-		.maxlen		= 7*sizeof(long),
-		.mode		= 0444,
-		.proc_handler	= proc_nr_inodes,
-	},
-	{
-		.procname	= "file-nr",
-		.data		= &files_stat,
-		.maxlen		= sizeof(files_stat),
-		.mode		= 0444,
-		.proc_handler	= proc_nr_files,
-	},
-	{
-		.procname	= "file-max",
-		.data		= &files_stat.max_files,
-		.maxlen		= sizeof(files_stat.max_files),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &zero_ul,
-		.extra2		= &long_max,
-	},
-	{
-		.procname	= "nr_open",
-		.data		= &sysctl_nr_open,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &sysctl_nr_open_min,
-		.extra2		= &sysctl_nr_open_max,
-	},
-	{
-		.procname	= "dentry-state",
-		.data		= &dentry_stat,
-		.maxlen		= 6*sizeof(long),
-		.mode		= 0444,
-		.proc_handler	= proc_nr_dentry,
-	},
-	{
-		.procname	= "overflowuid",
-		.data		= &fs_overflowuid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &minolduid,
-		.extra2		= &maxolduid,
-	},
-	{
-		.procname	= "overflowgid",
-		.data		= &fs_overflowgid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &minolduid,
-		.extra2		= &maxolduid,
-	},
-#ifdef CONFIG_FILE_LOCKING
-	{
-		.procname	= "leases-enable",
-		.data		= &leases_enable,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_DNOTIFY
-	{
-		.procname	= "dir-notify-enable",
-		.data		= &dir_notify_enable,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_MMU
-#ifdef CONFIG_FILE_LOCKING
-	{
-		.procname	= "lease-break-time",
-		.data		= &lease_break_time,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_AIO
-	{
-		.procname	= "aio-nr",
-		.data		= &aio_nr,
-		.maxlen		= sizeof(aio_nr),
-		.mode		= 0444,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-	{
-		.procname	= "aio-max-nr",
-		.data		= &aio_max_nr,
-		.maxlen		= sizeof(aio_max_nr),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-#endif /* CONFIG_AIO */
-#ifdef CONFIG_INOTIFY_USER
-	{
-		.procname	= "inotify",
-		.mode		= 0555,
-		.child		= inotify_table,
-	},
-#endif	
-#ifdef CONFIG_EPOLL
-	{
-		.procname	= "epoll",
-		.mode		= 0555,
-		.child		= epoll_table,
-	},
-#endif
-#endif
-	{
-		.procname	= "protected_symlinks",
-		.data		= &sysctl_protected_symlinks,
-		.maxlen		= sizeof(int),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "protected_hardlinks",
-		.data		= &sysctl_protected_hardlinks,
-		.maxlen		= sizeof(int),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "protected_fifos",
-		.data		= &sysctl_protected_fifos,
-		.maxlen		= sizeof(int),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-	{
-		.procname	= "protected_regular",
-		.data		= &sysctl_protected_regular,
-		.maxlen		= sizeof(int),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-	{
-		.procname	= "suid_dumpable",
-		.data		= &suid_dumpable,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax_coredump,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
-	{
-		.procname	= "binfmt_misc",
-		.mode		= 0555,
-		.child		= sysctl_mount_point,
-	},
-#endif
-	{
-		.procname	= "pipe-max-size",
-		.data		= &pipe_max_size,
-		.maxlen		= sizeof(pipe_max_size),
-		.mode		= 0644,
-		.proc_handler	= proc_dopipe_max_size,
-	},
-	{
-		.procname	= "pipe-user-pages-hard",
-		.data		= &pipe_user_pages_hard,
-		.maxlen		= sizeof(pipe_user_pages_hard),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-	{
-		.procname	= "pipe-user-pages-soft",
-		.data		= &pipe_user_pages_soft,
-		.maxlen		= sizeof(pipe_user_pages_soft),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-	{
-		.procname	= "mount-max",
-		.data		= &sysctl_mount_max,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-	},
-	{ }
-};
-
-static struct ctl_table debug_table[] = {
-#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
-	{
-		.procname	= "exception-trace",
-		.data		= &show_unhandled_signals,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-#endif
-#if defined(CONFIG_OPTPROBES)
-	{
-		.procname	= "kprobes-optimization",
-		.data		= &sysctl_kprobes_optimization,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_kprobes_optimization_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-	{ }
-};
-
-static struct ctl_table dev_table[] = {
-	{ }
-};
-
-int __init sysctl_init(void)
-{
-	struct ctl_table_header *hdr;
-
-	hdr = register_sysctl_table(sysctl_base_table);
-	kmemleak_not_leak(hdr);
-	return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
-/*
- * /proc/sys support
- */
-
+#endif /* CONFIG_SYSCTL */
+
+/*
+ * /proc/sys support
+ */
+
 #ifdef CONFIG_PROC_SYSCTL
 
 static int _proc_do_string(char *data, int maxlen, int write,
@@ -3307,95 +1577,1788 @@ int proc_dointvec(struct ctl_table *table, int write,
 	return -ENOSYS;
 }
 
-int proc_douintvec(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+int proc_douintvec(struct ctl_table *table, int write,
+		  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_minmax(struct ctl_table *table, int write,
+		    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
+		    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
+		    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
+			     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_minmax(struct ctl_table *table, int write,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_douintvec_minmax(struct ctl_table *table, int write,
-			  void __user *buffer, size_t *lenp, loff_t *ppos)
+int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+				      void __user *buffer,
+				      size_t *lenp, loff_t *ppos)
 {
-	return -ENOSYS;
+    return -ENOSYS;
 }
 
-int proc_dointvec_jiffies(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
+int proc_do_large_bitmap(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
+#endif /* CONFIG_PROC_SYSCTL */
+
+#if defined(CONFIG_SYSCTL)
+int proc_do_static_key(struct ctl_table *table, int write,
+		       void __user *buffer, size_t *lenp,
+		       loff_t *ppos)
+{
+	struct static_key *key = (struct static_key *)table->data;
+	static DEFINE_MUTEX(static_key_mutex);
+	int val, ret;
+	struct ctl_table tmp = {
+		.data   = &val,
+		.maxlen = sizeof(val),
+		.mode   = table->mode,
+		.extra1 = SYSCTL_ZERO,
+		.extra2 = SYSCTL_ONE,
+	};
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mutex_lock(&static_key_mutex);
+	val = static_key_enabled(key);
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret) {
+		if (val)
+			static_key_enable(key);
+		else
+			static_key_disable(key);
+	}
+	mutex_unlock(&static_key_mutex);
+	return ret;
+}
+
+static struct ctl_table kern_table[] = {
+	{
+		.procname	= "sched_child_runs_first",
+		.data		= &sysctl_sched_child_runs_first,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_SCHED_DEBUG
+	{
+		.procname	= "sched_min_granularity_ns",
+		.data		= &sysctl_sched_min_granularity,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_proc_update_handler,
+		.extra1		= &min_sched_granularity_ns,
+		.extra2		= &max_sched_granularity_ns,
+	},
+	{
+		.procname	= "sched_latency_ns",
+		.data		= &sysctl_sched_latency,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_proc_update_handler,
+		.extra1		= &min_sched_granularity_ns,
+		.extra2		= &max_sched_granularity_ns,
+	},
+	{
+		.procname	= "sched_wakeup_granularity_ns",
+		.data		= &sysctl_sched_wakeup_granularity,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_proc_update_handler,
+		.extra1		= &min_wakeup_granularity_ns,
+		.extra2		= &max_wakeup_granularity_ns,
+	},
+#ifdef CONFIG_SMP
+	{
+		.procname	= "sched_tunable_scaling",
+		.data		= &sysctl_sched_tunable_scaling,
+		.maxlen		= sizeof(enum sched_tunable_scaling),
+		.mode		= 0644,
+		.proc_handler	= sched_proc_update_handler,
+		.extra1		= &min_sched_tunable_scaling,
+		.extra2		= &max_sched_tunable_scaling,
+	},
+	{
+		.procname	= "sched_migration_cost_ns",
+		.data		= &sysctl_sched_migration_cost,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sched_nr_migrate",
+		.data		= &sysctl_sched_nr_migrate,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_SCHEDSTATS
+	{
+		.procname	= "sched_schedstats",
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_schedstats,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif /* CONFIG_SCHEDSTATS */
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_NUMA_BALANCING
+	{
+		.procname	= "numa_balancing_scan_delay_ms",
+		.data		= &sysctl_numa_balancing_scan_delay,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_period_min_ms",
+		.data		= &sysctl_numa_balancing_scan_period_min,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_period_max_ms",
+		.data		= &sysctl_numa_balancing_scan_period_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_size_mb",
+		.data		= &sysctl_numa_balancing_scan_size,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "numa_balancing",
+		.data		= NULL, /* filled in by handler */
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_numa_balancing,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
+	{
+		.procname	= "sched_rt_period_us",
+		.data		= &sysctl_sched_rt_period,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_rt_handler,
+	},
+	{
+		.procname	= "sched_rt_runtime_us",
+		.data		= &sysctl_sched_rt_runtime,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sched_rt_handler,
+	},
+	{
+		.procname	= "sched_rr_timeslice_ms",
+		.data		= &sysctl_sched_rr_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sched_rr_handler,
+	},
+#ifdef CONFIG_UCLAMP_TASK
+	{
+		.procname	= "sched_util_clamp_min",
+		.data		= &sysctl_sched_uclamp_util_min,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_sched_uclamp_handler,
+	},
+	{
+		.procname	= "sched_util_clamp_max",
+		.data		= &sysctl_sched_uclamp_util_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_sched_uclamp_handler,
+	},
+#endif
+#ifdef CONFIG_SCHED_AUTOGROUP
+	{
+		.procname	= "sched_autogroup_enabled",
+		.data		= &sysctl_sched_autogroup_enabled,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		.procname	= "sched_cfs_bandwidth_slice_us",
+		.data		= &sysctl_sched_cfs_bandwidth_slice,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+	},
+#endif
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+	{
+		.procname	= "sched_energy_aware",
+		.data		= &sysctl_sched_energy_aware,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_energy_aware_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_PROVE_LOCKING
+	{
+		.procname	= "prove_locking",
+		.data		= &prove_locking,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_LOCK_STAT
+	{
+		.procname	= "lock_stat",
+		.data		= &lock_stat,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "panic",
+		.data		= &panic_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_COREDUMP
+	{
+		.procname	= "core_uses_pid",
+		.data		= &core_uses_pid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "core_pattern",
+		.data		= core_pattern,
+		.maxlen		= CORENAME_MAX_SIZE,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring_coredump,
+	},
+	{
+		.procname	= "core_pipe_limit",
+		.data		= &core_pipe_limit,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_PROC_SYSCTL
+	{
+		.procname	= "tainted",
+		.maxlen 	= sizeof(long),
+		.mode		= 0644,
+		.proc_handler	= proc_taint,
+	},
+	{
+		.procname	= "sysctl_writes_strict",
+		.data		= &sysctl_writes_strict,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &neg_one,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_LATENCYTOP
+	{
+		.procname	= "latencytop",
+		.data		= &latencytop_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_latencytop,
+	},
+#endif
+#ifdef CONFIG_BLK_DEV_INITRD
+	{
+		.procname	= "real-root-dev",
+		.data		= &real_root_dev,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "print-fatal-signals",
+		.data		= &print_fatal_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_SPARC
+	{
+		.procname	= "reboot-cmd",
+		.data		= reboot_command,
+		.maxlen		= 256,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+	{
+		.procname	= "stop-a",
+		.data		= &stop_a_enabled,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "scons-poweroff",
+		.data		= &scons_pwroff,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_SPARC64
+	{
+		.procname	= "tsb-ratio",
+		.data		= &sysctl_tsb_ratio,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_PARISC
+	{
+		.procname	= "soft-power",
+		.data		= &pwrsw_enabled,
+		.maxlen		= sizeof (int),
+	 	.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
+	{
+		.procname	= "unaligned-trap",
+		.data		= &unaligned_enabled,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "ctrl-alt-del",
+		.data		= &C_A_D,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_FUNCTION_TRACER
+	{
+		.procname	= "ftrace_enabled",
+		.data		= &ftrace_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= ftrace_enable_sysctl,
+	},
+#endif
+#ifdef CONFIG_STACK_TRACER
+	{
+		.procname	= "stack_tracer_enabled",
+		.data		= &stack_tracer_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= stack_trace_sysctl,
+	},
+#endif
+#ifdef CONFIG_TRACING
+	{
+		.procname	= "ftrace_dump_on_oops",
+		.data		= &ftrace_dump_on_oops,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "traceoff_on_warning",
+		.data		= &__disable_trace_on_warning,
+		.maxlen		= sizeof(__disable_trace_on_warning),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tracepoint_printk",
+		.data		= &tracepoint_printk,
+		.maxlen		= sizeof(tracepoint_printk),
+		.mode		= 0644,
+		.proc_handler	= tracepoint_printk_sysctl,
+	},
+#endif
+#ifdef CONFIG_KEXEC_CORE
+	{
+		.procname	= "kexec_load_disabled",
+		.data		= &kexec_load_disabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_MODULES
+	{
+		.procname	= "modprobe",
+		.data		= &modprobe_path,
+		.maxlen		= KMOD_PATH_LEN,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+	{
+		.procname	= "modules_disabled",
+		.data		= &modules_disabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_UEVENT_HELPER
+	{
+		.procname	= "hotplug",
+		.data		= &uevent_helper,
+		.maxlen		= UEVENT_HELPER_PATH_LEN,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+#endif
+#ifdef CONFIG_CHR_DEV_SG
+	{
+		.procname	= "sg-big-buff",
+		.data		= &sg_big_buff,
+		.maxlen		= sizeof (int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	{
+		.procname	= "acct",
+		.data		= &acct_parm,
+		.maxlen		= 3*sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_MAGIC_SYSRQ
+	{
+		.procname	= "sysrq",
+		.data		= NULL,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= sysrq_sysctl_handler,
+	},
+#endif
+#ifdef CONFIG_PROC_SYSCTL
+	{
+		.procname	= "cad_pid",
+		.data		= NULL,
+		.maxlen		= sizeof (int),
+		.mode		= 0600,
+		.proc_handler	= proc_do_cad_pid,
+	},
+#endif
+	{
+		.procname	= "threads-max",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_max_threads,
+	},
+	{
+		.procname	= "random",
+		.mode		= 0555,
+		.child		= random_table,
+	},
+	{
+		.procname	= "usermodehelper",
+		.mode		= 0555,
+		.child		= usermodehelper_table,
+	},
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	{
+		.procname	= "firmware_config",
+		.mode		= 0555,
+		.child		= firmware_config_table,
+	},
+#endif
+	{
+		.procname	= "overflowuid",
+		.data		= &overflowuid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &minolduid,
+		.extra2		= &maxolduid,
+	},
+	{
+		.procname	= "overflowgid",
+		.data		= &overflowgid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &minolduid,
+		.extra2		= &maxolduid,
+	},
+#ifdef CONFIG_S390
+	{
+		.procname	= "userprocess_debug",
+		.data		= &show_unhandled_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "pid_max",
+		.data		= &pid_max,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &pid_max_min,
+		.extra2		= &pid_max_max,
+	},
+	{
+		.procname	= "panic_on_oops",
+		.data		= &panic_on_oops,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "panic_print",
+		.data		= &panic_print,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+#if defined CONFIG_PRINTK
+	{
+		.procname	= "printk",
+		.data		= &console_loglevel,
+		.maxlen		= 4*sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "printk_ratelimit",
+		.data		= &printk_ratelimit_state.interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "printk_ratelimit_burst",
+		.data		= &printk_ratelimit_state.burst,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "printk_delay",
+		.data		= &printk_delay_msec,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &ten_thousand,
+	},
+	{
+		.procname	= "printk_devkmsg",
+		.data		= devkmsg_log_str,
+		.maxlen		= DEVKMSG_STR_MAX_SIZE,
+		.mode		= 0644,
+		.proc_handler	= devkmsg_sysctl_set_loglvl,
+	},
+	{
+		.procname	= "dmesg_restrict",
+		.data		= &dmesg_restrict,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_sysadmin,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "kptr_restrict",
+		.data		= &kptr_restrict,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_sysadmin,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+#endif
+	{
+		.procname	= "ngroups_max",
+		.data		= &ngroups_max,
+		.maxlen		= sizeof (int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "cap_last_cap",
+		.data		= (void *)&cap_last_cap,
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+#if defined(CONFIG_LOCKUP_DETECTOR)
+	{
+		.procname       = "watchdog",
+		.data		= &watchdog_user_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_watchdog,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "watchdog_thresh",
+		.data		= &watchdog_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_watchdog_thresh,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &sixty,
+	},
+	{
+		.procname       = "nmi_watchdog",
+		.data		= &nmi_watchdog_user_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
+		.proc_handler   = proc_nmi_watchdog,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "watchdog_cpumask",
+		.data		= &watchdog_cpumask_bits,
+		.maxlen		= NR_CPUS,
+		.mode		= 0644,
+		.proc_handler	= proc_watchdog_cpumask,
+	},
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+	{
+		.procname       = "soft_watchdog",
+		.data		= &soft_watchdog_user_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_soft_watchdog,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "softlockup_panic",
+		.data		= &softlockup_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#ifdef CONFIG_SMP
+	{
+		.procname	= "softlockup_all_cpu_backtrace",
+		.data		= &sysctl_softlockup_all_cpu_backtrace,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif /* CONFIG_SMP */
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+	{
+		.procname	= "hardlockup_panic",
+		.data		= &hardlockup_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#ifdef CONFIG_SMP
+	{
+		.procname	= "hardlockup_all_cpu_backtrace",
+		.data		= &sysctl_hardlockup_all_cpu_backtrace,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif /* CONFIG_SMP */
+#endif
+#endif
+
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+	{
+		.procname       = "unknown_nmi_panic",
+		.data           = &unknown_nmi_panic,
+		.maxlen         = sizeof (int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec,
+	},
+#endif
+#if defined(CONFIG_X86)
+	{
+		.procname	= "panic_on_unrecovered_nmi",
+		.data		= &panic_on_unrecovered_nmi,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "panic_on_io_nmi",
+		.data		= &panic_on_io_nmi,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	{
+		.procname	= "panic_on_stackoverflow",
+		.data		= &sysctl_panic_on_stackoverflow,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "bootloader_type",
+		.data		= &bootloader_type,
+		.maxlen		= sizeof (int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "bootloader_version",
+		.data		= &bootloader_version,
+		.maxlen		= sizeof (int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "io_delay_type",
+		.data		= &io_delay_type,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#if defined(CONFIG_MMU)
+	{
+		.procname	= "randomize_va_space",
+		.data		= &randomize_va_space,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#if defined(CONFIG_S390) && defined(CONFIG_SMP)
+	{
+		.procname	= "spin_retry",
+		.data		= &spin_retry,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
+	{
+		.procname	= "acpi_video_flags",
+		.data		= &acpi_realmode_flags,
+		.maxlen		= sizeof (unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+#endif
+#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
+	{
+		.procname	= "ignore-unaligned-usertrap",
+		.data		= &no_unaligned_warning,
+		.maxlen		= sizeof (int),
+	 	.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_IA64
+	{
+		.procname	= "unaligned-dump-stack",
+		.data		= &unaligned_dump_stack,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_DETECT_HUNG_TASK
+	{
+		.procname	= "hung_task_panic",
+		.data		= &sysctl_hung_task_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "hung_task_check_count",
+		.data		= &sysctl_hung_task_check_count,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "hung_task_timeout_secs",
+		.data		= &sysctl_hung_task_timeout_secs,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_dohung_task_timeout_secs,
+		.extra2		= &hung_task_timeout_max,
+	},
+	{
+		.procname	= "hung_task_check_interval_secs",
+		.data		= &sysctl_hung_task_check_interval_secs,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_dohung_task_timeout_secs,
+		.extra2		= &hung_task_timeout_max,
+	},
+	{
+		.procname	= "hung_task_warnings",
+		.data		= &sysctl_hung_task_warnings,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &neg_one,
+	},
+#endif
+#ifdef CONFIG_RT_MUTEXES
+	{
+		.procname	= "max_lock_depth",
+		.data		= &max_lock_depth,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "poweroff_cmd",
+		.data		= &poweroff_cmd,
+		.maxlen		= POWEROFF_CMD_PATH_LEN,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+#ifdef CONFIG_KEYS
+	{
+		.procname	= "keys",
+		.mode		= 0555,
+		.child		= key_sysctls,
+	},
+#endif
+#ifdef CONFIG_PERF_EVENTS
+	/*
+	 * User-space scripts rely on the existence of this file
+	 * as a feature check for perf_events being enabled.
+	 *
+	 * So it's an ABI, do not remove!
+	 */
+	{
+		.procname	= "perf_event_paranoid",
+		.data		= &sysctl_perf_event_paranoid,
+		.maxlen		= sizeof(sysctl_perf_event_paranoid),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "perf_event_mlock_kb",
+		.data		= &sysctl_perf_event_mlock,
+		.maxlen		= sizeof(sysctl_perf_event_mlock),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "perf_event_max_sample_rate",
+		.data		= &sysctl_perf_event_sample_rate,
+		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
+		.mode		= 0644,
+		.proc_handler	= perf_proc_update_handler,
+		.extra1		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "perf_cpu_time_max_percent",
+		.data		= &sysctl_perf_cpu_time_max_percent,
+		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
+		.mode		= 0644,
+		.proc_handler	= perf_cpu_time_max_percent_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+	{
+		.procname	= "perf_event_max_stack",
+		.data		= &sysctl_perf_event_max_stack,
+		.maxlen		= sizeof(sysctl_perf_event_max_stack),
+		.mode		= 0644,
+		.proc_handler	= perf_event_max_stack_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &six_hundred_forty_kb,
+	},
+	{
+		.procname	= "perf_event_max_contexts_per_stack",
+		.data		= &sysctl_perf_event_max_contexts_per_stack,
+		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
+		.mode		= 0644,
+		.proc_handler	= perf_event_max_stack_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_thousand,
+	},
+#endif
+	{
+		.procname	= "panic_on_warn",
+		.data		= &panic_on_warn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+	{
+		.procname	= "timer_migration",
+		.data		= &sysctl_timer_migration,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= timer_migration_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_BPF_SYSCALL
+	{
+		.procname	= "unprivileged_bpf_disabled",
+		.data		= &sysctl_unprivileged_bpf_disabled,
+		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "bpf_stats_enabled",
+		.data		= &bpf_stats_enabled_key.key,
+		.maxlen		= sizeof(bpf_stats_enabled_key),
+		.mode		= 0644,
+		.proc_handler	= proc_do_static_key,
+	},
+#endif
+#if defined(CONFIG_TREE_RCU)
+	{
+		.procname	= "panic_on_rcu_stall",
+		.data		= &sysctl_panic_on_rcu_stall,
+		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+	{
+		.procname	= "stack_erasing",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= stack_erasing_sysctl,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+	{ }
+};
 
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
+static struct ctl_table vm_table[] = {
+	{
+		.procname	= "overcommit_memory",
+		.data		= &sysctl_overcommit_memory,
+		.maxlen		= sizeof(sysctl_overcommit_memory),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "panic_on_oom",
+		.data		= &sysctl_panic_on_oom,
+		.maxlen		= sizeof(sysctl_panic_on_oom),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "oom_kill_allocating_task",
+		.data		= &sysctl_oom_kill_allocating_task,
+		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "oom_dump_tasks",
+		.data		= &sysctl_oom_dump_tasks,
+		.maxlen		= sizeof(sysctl_oom_dump_tasks),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "overcommit_ratio",
+		.data		= &sysctl_overcommit_ratio,
+		.maxlen		= sizeof(sysctl_overcommit_ratio),
+		.mode		= 0644,
+		.proc_handler	= overcommit_ratio_handler,
+	},
+	{
+		.procname	= "overcommit_kbytes",
+		.data		= &sysctl_overcommit_kbytes,
+		.maxlen		= sizeof(sysctl_overcommit_kbytes),
+		.mode		= 0644,
+		.proc_handler	= overcommit_kbytes_handler,
+	},
+	{
+		.procname	= "page-cluster", 
+		.data		= &page_cluster,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "dirty_background_ratio",
+		.data		= &dirty_background_ratio,
+		.maxlen		= sizeof(dirty_background_ratio),
+		.mode		= 0644,
+		.proc_handler	= dirty_background_ratio_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+	{
+		.procname	= "dirty_background_bytes",
+		.data		= &dirty_background_bytes,
+		.maxlen		= sizeof(dirty_background_bytes),
+		.mode		= 0644,
+		.proc_handler	= dirty_background_bytes_handler,
+		.extra1		= &one_ul,
+	},
+	{
+		.procname	= "dirty_ratio",
+		.data		= &vm_dirty_ratio,
+		.maxlen		= sizeof(vm_dirty_ratio),
+		.mode		= 0644,
+		.proc_handler	= dirty_ratio_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+	{
+		.procname	= "dirty_bytes",
+		.data		= &vm_dirty_bytes,
+		.maxlen		= sizeof(vm_dirty_bytes),
+		.mode		= 0644,
+		.proc_handler	= dirty_bytes_handler,
+		.extra1		= &dirty_bytes_min,
+	},
+	{
+		.procname	= "dirty_writeback_centisecs",
+		.data		= &dirty_writeback_interval,
+		.maxlen		= sizeof(dirty_writeback_interval),
+		.mode		= 0644,
+		.proc_handler	= dirty_writeback_centisecs_handler,
+	},
+	{
+		.procname	= "dirty_expire_centisecs",
+		.data		= &dirty_expire_interval,
+		.maxlen		= sizeof(dirty_expire_interval),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "dirtytime_expire_seconds",
+		.data		= &dirtytime_expire_interval,
+		.maxlen		= sizeof(dirtytime_expire_interval),
+		.mode		= 0644,
+		.proc_handler	= dirtytime_interval_handler,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "swappiness",
+		.data		= &vm_swappiness,
+		.maxlen		= sizeof(vm_swappiness),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+#ifdef CONFIG_HUGETLB_PAGE
+	{
+		.procname	= "nr_hugepages",
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= hugetlb_sysctl_handler,
+	},
+#ifdef CONFIG_NUMA
+	{
+		.procname       = "nr_hugepages_mempolicy",
+		.data           = NULL,
+		.maxlen         = sizeof(unsigned long),
+		.mode           = 0644,
+		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
+	},
+	{
+		.procname		= "numa_stat",
+		.data			= &sysctl_vm_numa_stat,
+		.maxlen			= sizeof(int),
+		.mode			= 0644,
+		.proc_handler	= sysctl_vm_numa_stat_handler,
+		.extra1			= SYSCTL_ZERO,
+		.extra2			= SYSCTL_ONE,
+	},
+#endif
+	 {
+		.procname	= "hugetlb_shm_group",
+		.data		= &sysctl_hugetlb_shm_group,
+		.maxlen		= sizeof(gid_t),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	 },
+	{
+		.procname	= "nr_overcommit_hugepages",
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= hugetlb_overcommit_handler,
+	},
+#endif
+	{
+		.procname	= "lowmem_reserve_ratio",
+		.data		= &sysctl_lowmem_reserve_ratio,
+		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
+		.mode		= 0644,
+		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
+	},
+	{
+		.procname	= "drop_caches",
+		.data		= &sysctl_drop_caches,
+		.maxlen		= sizeof(int),
+		.mode		= 0200,
+		.proc_handler	= drop_caches_sysctl_handler,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= &four,
+	},
+#ifdef CONFIG_COMPACTION
+	{
+		.procname	= "compact_memory",
+		.data		= &sysctl_compact_memory,
+		.maxlen		= sizeof(int),
+		.mode		= 0200,
+		.proc_handler	= sysctl_compaction_handler,
+	},
+	{
+		.procname	= "extfrag_threshold",
+		.data		= &sysctl_extfrag_threshold,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &min_extfrag_threshold,
+		.extra2		= &max_extfrag_threshold,
+	},
+	{
+		.procname	= "compact_unevictable_allowed",
+		.data		= &sysctl_compact_unevictable_allowed,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+
+#endif /* CONFIG_COMPACTION */
+	{
+		.procname	= "min_free_kbytes",
+		.data		= &min_free_kbytes,
+		.maxlen		= sizeof(min_free_kbytes),
+		.mode		= 0644,
+		.proc_handler	= min_free_kbytes_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "watermark_boost_factor",
+		.data		= &watermark_boost_factor,
+		.maxlen		= sizeof(watermark_boost_factor),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "watermark_scale_factor",
+		.data		= &watermark_scale_factor,
+		.maxlen		= sizeof(watermark_scale_factor),
+		.mode		= 0644,
+		.proc_handler	= watermark_scale_factor_sysctl_handler,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= &one_thousand,
+	},
+	{
+		.procname	= "percpu_pagelist_fraction",
+		.data		= &percpu_pagelist_fraction,
+		.maxlen		= sizeof(percpu_pagelist_fraction),
+		.mode		= 0644,
+		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+	},
+#ifdef CONFIG_MMU
+	{
+		.procname	= "max_map_count",
+		.data		= &sysctl_max_map_count,
+		.maxlen		= sizeof(sysctl_max_map_count),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+#else
+	{
+		.procname	= "nr_trim_pages",
+		.data		= &sysctl_nr_trim_pages,
+		.maxlen		= sizeof(sysctl_nr_trim_pages),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+#endif
+	{
+		.procname	= "laptop_mode",
+		.data		= &laptop_mode,
+		.maxlen		= sizeof(laptop_mode),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "block_dump",
+		.data		= &block_dump,
+		.maxlen		= sizeof(block_dump),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "vfs_cache_pressure",
+		.data		= &sysctl_vfs_cache_pressure,
+		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+	},
+#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
+	{
+		.procname	= "legacy_va_layout",
+		.data		= &sysctl_legacy_va_layout,
+		.maxlen		= sizeof(sysctl_legacy_va_layout),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+	},
+#endif
+#ifdef CONFIG_NUMA
+	{
+		.procname	= "zone_reclaim_mode",
+		.data		= &node_reclaim_mode,
+		.maxlen		= sizeof(node_reclaim_mode),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "min_unmapped_ratio",
+		.data		= &sysctl_min_unmapped_ratio,
+		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
+		.mode		= 0644,
+		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+	{
+		.procname	= "min_slab_ratio",
+		.data		= &sysctl_min_slab_ratio,
+		.maxlen		= sizeof(sysctl_min_slab_ratio),
+		.mode		= 0644,
+		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+#endif
+#ifdef CONFIG_SMP
+	{
+		.procname	= "stat_interval",
+		.data		= &sysctl_stat_interval,
+		.maxlen		= sizeof(sysctl_stat_interval),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "stat_refresh",
+		.data		= NULL,
+		.maxlen		= 0,
+		.mode		= 0600,
+		.proc_handler	= vmstat_refresh,
+	},
+#endif
+#ifdef CONFIG_MMU
+	{
+		.procname	= "mmap_min_addr",
+		.data		= &dac_mmap_min_addr,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= mmap_min_addr_handler,
+	},
+#endif
+#ifdef CONFIG_NUMA
+	{
+		.procname	= "numa_zonelist_order",
+		.data		= &numa_zonelist_order,
+		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
+		.mode		= 0644,
+		.proc_handler	= numa_zonelist_order_handler,
+	},
+#endif
+#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
+   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
+	{
+		.procname	= "vdso_enabled",
+#ifdef CONFIG_X86_32
+		.data		= &vdso32_enabled,
+		.maxlen		= sizeof(vdso32_enabled),
+#else
+		.data		= &vdso_enabled,
+		.maxlen		= sizeof(vdso_enabled),
+#endif
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+	},
+#endif
+#ifdef CONFIG_HIGHMEM
+	{
+		.procname	= "highmem_is_dirtyable",
+		.data		= &vm_highmem_is_dirtyable,
+		.maxlen		= sizeof(vm_highmem_is_dirtyable),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+	{
+		.procname	= "memory_failure_early_kill",
+		.data		= &sysctl_memory_failure_early_kill,
+		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "memory_failure_recovery",
+		.data		= &sysctl_memory_failure_recovery,
+		.maxlen		= sizeof(sysctl_memory_failure_recovery),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+	{
+		.procname	= "user_reserve_kbytes",
+		.data		= &sysctl_user_reserve_kbytes,
+		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+	{
+		.procname	= "admin_reserve_kbytes",
+		.data		= &sysctl_admin_reserve_kbytes,
+		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+	{
+		.procname	= "mmap_rnd_bits",
+		.data		= &mmap_rnd_bits,
+		.maxlen		= sizeof(mmap_rnd_bits),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&mmap_rnd_bits_min,
+		.extra2		= (void *)&mmap_rnd_bits_max,
+	},
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+	{
+		.procname	= "mmap_rnd_compat_bits",
+		.data		= &mmap_rnd_compat_bits,
+		.maxlen		= sizeof(mmap_rnd_compat_bits),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&mmap_rnd_compat_bits_min,
+		.extra2		= (void *)&mmap_rnd_compat_bits_max,
+	},
+#endif
+#ifdef CONFIG_USERFAULTFD
+	{
+		.procname	= "unprivileged_userfaultfd",
+		.data		= &sysctl_unprivileged_userfaultfd,
+		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+	{ }
+};
 
-int proc_doulongvec_minmax(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
+static struct ctl_table fs_table[] = {
+	{
+		.procname	= "inode-nr",
+		.data		= &inodes_stat,
+		.maxlen		= 2*sizeof(long),
+		.mode		= 0444,
+		.proc_handler	= proc_nr_inodes,
+	},
+	{
+		.procname	= "inode-state",
+		.data		= &inodes_stat,
+		.maxlen		= 7*sizeof(long),
+		.mode		= 0444,
+		.proc_handler	= proc_nr_inodes,
+	},
+	{
+		.procname	= "file-nr",
+		.data		= &files_stat,
+		.maxlen		= sizeof(files_stat),
+		.mode		= 0444,
+		.proc_handler	= proc_nr_files,
+	},
+	{
+		.procname	= "file-max",
+		.data		= &files_stat.max_files,
+		.maxlen		= sizeof(files_stat.max_files),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+		.extra1		= &zero_ul,
+		.extra2		= &long_max,
+	},
+	{
+		.procname	= "nr_open",
+		.data		= &sysctl_nr_open,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &sysctl_nr_open_min,
+		.extra2		= &sysctl_nr_open_max,
+	},
+	{
+		.procname	= "dentry-state",
+		.data		= &dentry_stat,
+		.maxlen		= 6*sizeof(long),
+		.mode		= 0444,
+		.proc_handler	= proc_nr_dentry,
+	},
+	{
+		.procname	= "overflowuid",
+		.data		= &fs_overflowuid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &minolduid,
+		.extra2		= &maxolduid,
+	},
+	{
+		.procname	= "overflowgid",
+		.data		= &fs_overflowgid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &minolduid,
+		.extra2		= &maxolduid,
+	},
+#ifdef CONFIG_FILE_LOCKING
+	{
+		.procname	= "leases-enable",
+		.data		= &leases_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_DNOTIFY
+	{
+		.procname	= "dir-notify-enable",
+		.data		= &dir_notify_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_MMU
+#ifdef CONFIG_FILE_LOCKING
+	{
+		.procname	= "lease-break-time",
+		.data		= &lease_break_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_AIO
+	{
+		.procname	= "aio-nr",
+		.data		= &aio_nr,
+		.maxlen		= sizeof(aio_nr),
+		.mode		= 0444,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+	{
+		.procname	= "aio-max-nr",
+		.data		= &aio_max_nr,
+		.maxlen		= sizeof(aio_max_nr),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+#endif /* CONFIG_AIO */
+#ifdef CONFIG_INOTIFY_USER
+	{
+		.procname	= "inotify",
+		.mode		= 0555,
+		.child		= inotify_table,
+	},
+#endif	
+#ifdef CONFIG_EPOLL
+	{
+		.procname	= "epoll",
+		.mode		= 0555,
+		.child		= epoll_table,
+	},
+#endif
+#endif
+	{
+		.procname	= "protected_symlinks",
+		.data		= &sysctl_protected_symlinks,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "protected_hardlinks",
+		.data		= &sysctl_protected_hardlinks,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "protected_fifos",
+		.data		= &sysctl_protected_fifos,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "protected_regular",
+		.data		= &sysctl_protected_regular,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "suid_dumpable",
+		.data		= &suid_dumpable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_coredump,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+	{
+		.procname	= "binfmt_misc",
+		.mode		= 0555,
+		.child		= sysctl_mount_point,
+	},
+#endif
+	{
+		.procname	= "pipe-max-size",
+		.data		= &pipe_max_size,
+		.maxlen		= sizeof(pipe_max_size),
+		.mode		= 0644,
+		.proc_handler	= proc_dopipe_max_size,
+	},
+	{
+		.procname	= "pipe-user-pages-hard",
+		.data		= &pipe_user_pages_hard,
+		.maxlen		= sizeof(pipe_user_pages_hard),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+	{
+		.procname	= "pipe-user-pages-soft",
+		.data		= &pipe_user_pages_soft,
+		.maxlen		= sizeof(pipe_user_pages_soft),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+	{
+		.procname	= "mount-max",
+		.data		= &sysctl_mount_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+	},
+	{ }
+};
 
-int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-				      void __user *buffer,
-				      size_t *lenp, loff_t *ppos)
-{
-    return -ENOSYS;
-}
+static struct ctl_table debug_table[] = {
+#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
+	{
+		.procname	= "exception-trace",
+		.data		= &show_unhandled_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#endif
+#if defined(CONFIG_OPTPROBES)
+	{
+		.procname	= "kprobes-optimization",
+		.data		= &sysctl_kprobes_optimization,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_kprobes_optimization_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+	{ }
+};
 
-int proc_do_large_bitmap(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
+static struct ctl_table dev_table[] = {
+	{ }
+};
 
-#endif /* CONFIG_PROC_SYSCTL */
+static struct ctl_table sysctl_base_table[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= kern_table,
+	},
+	{
+		.procname	= "vm",
+		.mode		= 0555,
+		.child		= vm_table,
+	},
+	{
+		.procname	= "fs",
+		.mode		= 0555,
+		.child		= fs_table,
+	},
+	{
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{
+		.procname	= "dev",
+		.mode		= 0555,
+		.child		= dev_table,
+	},
+	{ }
+};
 
-#if defined(CONFIG_SYSCTL)
-int proc_do_static_key(struct ctl_table *table, int write,
-		       void __user *buffer, size_t *lenp,
-		       loff_t *ppos)
+int __init sysctl_init(void)
 {
-	struct static_key *key = (struct static_key *)table->data;
-	static DEFINE_MUTEX(static_key_mutex);
-	int val, ret;
-	struct ctl_table tmp = {
-		.data   = &val,
-		.maxlen = sizeof(val),
-		.mode   = table->mode,
-		.extra1 = SYSCTL_ZERO,
-		.extra2 = SYSCTL_ONE,
-	};
-
-	if (write && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
+	struct ctl_table_header *hdr;
 
-	mutex_lock(&static_key_mutex);
-	val = static_key_enabled(key);
-	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
-	if (write && !ret) {
-		if (val)
-			static_key_enable(key);
-		else
-			static_key_disable(key);
-	}
-	mutex_unlock(&static_key_mutex);
-	return ret;
+	hdr = register_sysctl_table(sysctl_base_table);
+	kmemleak_not_leak(hdr);
+	return 0;
 }
-#endif
+#endif /* CONFIG_SYSCTL */
 /*
  * No sense putting this after each symbol definition, twice,
  * exception granted :-)
-- 
2.26.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler
       [not found] ` <20200424064338.538313-6-hch@lst.de>
@ 2020-04-24 19:06   ` Andrey Ignatov
  2020-04-27  5:34     ` Christoph Hellwig
  2020-05-04 19:01   ` Kees Cook
  2020-06-04 20:22   ` WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 __alloc_pages_nodemask (Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler) Vegard Nossum
  2 siblings, 1 reply; 25+ messages in thread
From: Andrey Ignatov @ 2020-04-24 19:06 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Kees Cook, Iurii Zaikin, Alexei Starovoitov, Daniel Borkmann,
	linux-kernel, linux-mm, linux-fsdevel, netdev, bpf

Christoph Hellwig <hch@lst.de> [Thu, 2020-04-23 23:44 -0700]:
> Instead of having all the sysctl handlers deal with user pointers, which
> is rather hairy in terms of the BPF interaction, copy the input to and
> from  userspace in common code.  This also means that the strings are
> always NUL-terminated by the common code, making the API a little bit
> safer.
> 
> As most handler just pass through the data to one of the common handlers
> a lot of the changes are mechnical.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Acked-by: Andrey Ignatov <rdna@fb.com>

...

> @@ -72,33 +70,21 @@ extern unsigned int sysctl_sched_autogroup_enabled;
>  extern int sysctl_sched_rr_timeslice;
>  extern int sched_rr_timeslice;
>  
> -extern int sched_rr_handler(struct ctl_table *table, int write,
> -		void __user *buffer, size_t *lenp,
> -		loff_t *ppos);
> -
> -extern int sched_rt_handler(struct ctl_table *table, int write,
> -		void __user *buffer, size_t *lenp,
> -		loff_t *ppos);
> -
> -#ifdef CONFIG_UCLAMP_TASK

Decided to skim through the patch one last time to double-check the fix
from previous iteration and found that this ifdef got lost below.

> -extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
> -				       void __user *buffer, size_t *lenp,
> -				       loff_t *ppos);
> -#endif
> -
> -extern int sysctl_numa_balancing(struct ctl_table *table, int write,
> -				 void __user *buffer, size_t *lenp,
> -				 loff_t *ppos);
> -
> -extern int sysctl_schedstats(struct ctl_table *table, int write,
> -				 void __user *buffer, size_t *lenp,
> -				 loff_t *ppos);
> +int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
> +		size_t *lenp, loff_t *ppos);
> +int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
> +		size_t *lenp, loff_t *ppos);
> +int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
> +		void *buffer, size_t *lenp, loff_t *ppos);

Here ^^

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: pass kernel pointers to the sysctl ->proc_handler method v3
  2020-04-24  6:43 pass kernel pointers to the sysctl ->proc_handler method v3 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2020-04-24  6:43 ` [PATCH 4/5] sysctl: avoid forward declarations Christoph Hellwig
@ 2020-04-26 15:51 ` Alexei Starovoitov
  2020-04-27  5:35   ` Christoph Hellwig
  2020-04-26 15:59 ` Al Viro
       [not found] ` <20200424064338.538313-6-hch@lst.de>
  6 siblings, 1 reply; 25+ messages in thread
From: Alexei Starovoitov @ 2020-04-26 15:51 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Kees Cook, Iurii Zaikin, Alexei Starovoitov, Daniel Borkmann,
	linux-kernel, linux-mm, linux-fsdevel, netdev, bpf

On Fri, Apr 24, 2020 at 08:43:33AM +0200, Christoph Hellwig wrote:
> Hi all,
> 
> this series changes the sysctl ->proc_handler methods to take kernel
> pointers.  This simplifies some of the pointer handling in the methods
> (which could probably be further simplified now), and gets rid of the
> set_fs address space overrides used by bpf.
> 
> Changes since v2:
>  - free the buffer modified by BPF
>  - move pid_max and friends to pid.h
> 
> Changes since v1:
>  - drop a patch merged by Greg
>  - don't copy data out on a write
>  - fix buffer allocation in bpf

The set looks good to me.
Should I take it via bpf-next tree ?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: pass kernel pointers to the sysctl ->proc_handler method v3
  2020-04-24  6:43 pass kernel pointers to the sysctl ->proc_handler method v3 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2020-04-26 15:51 ` pass kernel pointers to the sysctl ->proc_handler method v3 Alexei Starovoitov
@ 2020-04-26 15:59 ` Al Viro
  2020-04-27  5:36   ` Christoph Hellwig
       [not found] ` <20200424064338.538313-6-hch@lst.de>
  6 siblings, 1 reply; 25+ messages in thread
From: Al Viro @ 2020-04-26 15:59 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Kees Cook, Iurii Zaikin, Alexei Starovoitov, Daniel Borkmann,
	linux-kernel, linux-mm, linux-fsdevel, netdev, bpf

On Fri, Apr 24, 2020 at 08:43:33AM +0200, Christoph Hellwig wrote:
> Hi all,
> 
> this series changes the sysctl ->proc_handler methods to take kernel
> pointers.  This simplifies some of the pointer handling in the methods
> (which could probably be further simplified now), and gets rid of the
> set_fs address space overrides used by bpf.
> 
> Changes since v2:
>  - free the buffer modified by BPF
>  - move pid_max and friends to pid.h
> 
> Changes since v1:
>  - drop a patch merged by Greg
>  - don't copy data out on a write
>  - fix buffer allocation in bpf

OK, I can live with that; further work can live on top of that, anyway.
How are we going to handle that?  I can put it into never-rebased branch
in vfs.git (#work.sysctl), so that people could pull that.

FWIW, I'm putting together more uaccess stuff (will probably hit -next
tonight or tomorrow); this would fit well there...

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler
  2020-04-24 19:06   ` [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler Andrey Ignatov
@ 2020-04-27  5:34     ` Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-04-27  5:34 UTC (permalink / raw)
  To: Andrey Ignatov
  Cc: Christoph Hellwig, Kees Cook, Iurii Zaikin, Alexei Starovoitov,
	Daniel Borkmann, linux-kernel, linux-mm, linux-fsdevel, netdev,
	bpf

On Fri, Apr 24, 2020 at 12:06:50PM -0700, Andrey Ignatov wrote:
> > -
> > -#ifdef CONFIG_UCLAMP_TASK
> 
> Decided to skim through the patch one last time to double-check the fix
> from previous iteration and found that this ifdef got lost below.
> 
> > -extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
> > -				       void __user *buffer, size_t *lenp,
> > -				       loff_t *ppos);
> > -#endif

There is no need for ifdefs around prototypes that aren't used.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: pass kernel pointers to the sysctl ->proc_handler method v3
  2020-04-26 15:51 ` pass kernel pointers to the sysctl ->proc_handler method v3 Alexei Starovoitov
@ 2020-04-27  5:35   ` Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-04-27  5:35 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Christoph Hellwig, Kees Cook, Iurii Zaikin, Alexei Starovoitov,
	Daniel Borkmann, linux-kernel, linux-mm, linux-fsdevel, netdev,
	bpf

On Sun, Apr 26, 2020 at 08:51:00AM -0700, Alexei Starovoitov wrote:
> The set looks good to me.
> Should I take it via bpf-next tree ?

The first patch is a little unrelated and I think taking it via the
bpf tree sounds fine.   Al volunteered the vfs tree for the actual
sysctl changes, which looks more suitable.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: pass kernel pointers to the sysctl ->proc_handler method v3
  2020-04-26 15:59 ` Al Viro
@ 2020-04-27  5:36   ` Christoph Hellwig
  2020-04-27  7:15     ` Al Viro
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-04-27  5:36 UTC (permalink / raw)
  To: Al Viro
  Cc: Christoph Hellwig, Kees Cook, Iurii Zaikin, Alexei Starovoitov,
	Daniel Borkmann, linux-kernel, linux-mm, linux-fsdevel, netdev,
	bpf

On Sun, Apr 26, 2020 at 04:59:58PM +0100, Al Viro wrote:
> OK, I can live with that; further work can live on top of that, anyway.
> How are we going to handle that?  I can put it into never-rebased branch
> in vfs.git (#work.sysctl), so that people could pull that.
> 
> FWIW, I'm putting together more uaccess stuff (will probably hit -next
> tonight or tomorrow); this would fit well there...

Sounds good to me.  The first patch isn't really needed for the series
and could go in through the bpf tree.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: pass kernel pointers to the sysctl ->proc_handler method v3
  2020-04-27  5:36   ` Christoph Hellwig
@ 2020-04-27  7:15     ` Al Viro
  0 siblings, 0 replies; 25+ messages in thread
From: Al Viro @ 2020-04-27  7:15 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Kees Cook, Iurii Zaikin, Alexei Starovoitov, Daniel Borkmann,
	linux-kernel, linux-mm, linux-fsdevel, netdev, bpf

On Mon, Apr 27, 2020 at 07:36:16AM +0200, Christoph Hellwig wrote:
> On Sun, Apr 26, 2020 at 04:59:58PM +0100, Al Viro wrote:
> > OK, I can live with that; further work can live on top of that, anyway.
> > How are we going to handle that?  I can put it into never-rebased branch
> > in vfs.git (#work.sysctl), so that people could pull that.
> > 
> > FWIW, I'm putting together more uaccess stuff (will probably hit -next
> > tonight or tomorrow); this would fit well there...
> 
> Sounds good to me.  The first patch isn't really needed for the series
> and could go in through the bpf tree.

OK, ##2--5 are in #work.sysctl, based at 5.7-rc1

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/5] bpf-cgroup: remove unused exports
  2020-04-24  6:43 ` [PATCH 1/5] bpf-cgroup: remove unused exports Christoph Hellwig
@ 2020-04-27 21:23   ` Daniel Borkmann
  0 siblings, 0 replies; 25+ messages in thread
From: Daniel Borkmann @ 2020-04-27 21:23 UTC (permalink / raw)
  To: Christoph Hellwig, Kees Cook, Iurii Zaikin
  Cc: Alexei Starovoitov, linux-kernel, linux-mm, linux-fsdevel,
	netdev, bpf, Andrey Ignatov

On 4/24/20 8:43 AM, Christoph Hellwig wrote:
> Except for a few of the networking hooks called from modular ipv4 or
> ipv6 code, all of hooks are just called from guaranteed to be built-in
> code.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Acked-by: Andrey Ignatov <rdna@fb.com>

Applied this one to bpf-next, thanks!

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/5] sysctl: remove all extern declaration from sysctl.c
  2020-04-24  6:43 ` [PATCH 3/5] sysctl: remove all extern declaration from sysctl.c Christoph Hellwig
@ 2020-05-04  1:25   ` Stephen Rothwell
  2020-05-04 18:42   ` Kees Cook
  1 sibling, 0 replies; 25+ messages in thread
From: Stephen Rothwell @ 2020-05-04  1:25 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Kees Cook, Iurii Zaikin, Alexei Starovoitov, Daniel Borkmann,
	linux-kernel, linux-mm, linux-fsdevel, netdev, bpf

[-- Attachment #1: Type: text/plain, Size: 1187 bytes --]

Hi Christoph,

On Fri, 24 Apr 2020 08:43:36 +0200 Christoph Hellwig <hch@lst.de> wrote:
>
> Extern declarations in .c files are a bad style and can lead to
> mismatches.  Use existing definitions in headers where they exist,
> and otherwise move the external declarations to suitable header
> files.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  include/linux/coredump.h |  4 ++++
>  include/linux/file.h     |  2 ++
>  include/linux/mm.h       |  2 ++
>  include/linux/mmzone.h   |  2 ++
>  include/linux/pid.h      |  3 +++
>  include/linux/sysctl.h   |  8 +++++++
>  kernel/sysctl.c          | 45 +++-------------------------------------
>  7 files changed, 24 insertions(+), 42 deletions(-)

A couple of suggestions for another patch (since this one is in a
shared branch in Al's tree now):

There is an "extern struct ctl_table random_table[];" in
drivers/char/random.c which is redundant now (in fact always was).

There is already an "extern struct ctl_table epoll_table[];" in
include/linux/poll.h, so could have included that in kernel/sysctl.c
instead of adding the new one in include/linux/sysctl.h

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/5] mm: remove watermark_boost_factor_sysctl_handler
  2020-04-24  6:43 ` [PATCH 2/5] mm: remove watermark_boost_factor_sysctl_handler Christoph Hellwig
@ 2020-05-04 18:41   ` Kees Cook
  0 siblings, 0 replies; 25+ messages in thread
From: Kees Cook @ 2020-05-04 18:41 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Iurii Zaikin, Alexei Starovoitov, Daniel Borkmann, linux-kernel,
	linux-mm, linux-fsdevel, netdev, bpf, David Rientjes

On Fri, Apr 24, 2020 at 08:43:35AM +0200, Christoph Hellwig wrote:
> watermark_boost_factor_sysctl_handler is just a pointless wrapper for
> proc_dointvec_minmax, so remove it and use proc_dointvec_minmax
> directly.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Kees Cook <keescook@chromium.org>

-Kees

> Acked-by: David Rientjes <rientjes@google.com>
> ---
>  include/linux/mmzone.h |  2 --
>  kernel/sysctl.c        |  2 +-
>  mm/page_alloc.c        | 12 ------------
>  3 files changed, 1 insertion(+), 15 deletions(-)
> 
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 1b9de7d220fb7..f37bb8f187fc7 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -911,8 +911,6 @@ static inline int is_highmem(struct zone *zone)
>  struct ctl_table;
>  int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
>  					void __user *, size_t *, loff_t *);
> -int watermark_boost_factor_sysctl_handler(struct ctl_table *, int,
> -					void __user *, size_t *, loff_t *);
>  int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
>  					void __user *, size_t *, loff_t *);
>  extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 8a176d8727a3a..99d27acf46465 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1491,7 +1491,7 @@ static struct ctl_table vm_table[] = {
>  		.data		= &watermark_boost_factor,
>  		.maxlen		= sizeof(watermark_boost_factor),
>  		.mode		= 0644,
> -		.proc_handler	= watermark_boost_factor_sysctl_handler,
> +		.proc_handler	= proc_dointvec_minmax,
>  		.extra1		= SYSCTL_ZERO,
>  	},
>  	{
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 69827d4fa0527..62c1550cd43ec 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -7978,18 +7978,6 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
>  	return 0;
>  }
>  
> -int watermark_boost_factor_sysctl_handler(struct ctl_table *table, int write,
> -	void __user *buffer, size_t *length, loff_t *ppos)
> -{
> -	int rc;
> -
> -	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
> -	if (rc)
> -		return rc;
> -
> -	return 0;
> -}
> -
>  int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
>  	void __user *buffer, size_t *length, loff_t *ppos)
>  {
> -- 
> 2.26.1
> 

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/5] sysctl: remove all extern declaration from sysctl.c
  2020-04-24  6:43 ` [PATCH 3/5] sysctl: remove all extern declaration from sysctl.c Christoph Hellwig
  2020-05-04  1:25   ` Stephen Rothwell
@ 2020-05-04 18:42   ` Kees Cook
  1 sibling, 0 replies; 25+ messages in thread
From: Kees Cook @ 2020-05-04 18:42 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Iurii Zaikin, Alexei Starovoitov, Daniel Borkmann, linux-kernel,
	linux-mm, linux-fsdevel, netdev, bpf

On Fri, Apr 24, 2020 at 08:43:36AM +0200, Christoph Hellwig wrote:
> Extern declarations in .c files are a bad style and can lead to
> mismatches.  Use existing definitions in headers where they exist,
> and otherwise move the external declarations to suitable header
> files.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Kees Cook <keescook@chromium.org>

-Kees

> ---
>  include/linux/coredump.h |  4 ++++
>  include/linux/file.h     |  2 ++
>  include/linux/mm.h       |  2 ++
>  include/linux/mmzone.h   |  2 ++
>  include/linux/pid.h      |  3 +++
>  include/linux/sysctl.h   |  8 +++++++
>  kernel/sysctl.c          | 45 +++-------------------------------------
>  7 files changed, 24 insertions(+), 42 deletions(-)
> 
> diff --git a/include/linux/coredump.h b/include/linux/coredump.h
> index abf4b4e65dbb9..7a899e83835d5 100644
> --- a/include/linux/coredump.h
> +++ b/include/linux/coredump.h
> @@ -22,4 +22,8 @@ extern void do_coredump(const kernel_siginfo_t *siginfo);
>  static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
>  #endif
>  
> +extern int core_uses_pid;
> +extern char core_pattern[];
> +extern unsigned int core_pipe_limit;
> +
>  #endif /* _LINUX_COREDUMP_H */
> diff --git a/include/linux/file.h b/include/linux/file.h
> index 142d102f285e5..122f80084a3ef 100644
> --- a/include/linux/file.h
> +++ b/include/linux/file.h
> @@ -94,4 +94,6 @@ extern void fd_install(unsigned int fd, struct file *file);
>  extern void flush_delayed_fput(void);
>  extern void __fput_sync(struct file *);
>  
> +extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
> +
>  #endif /* __LINUX_FILE_H */
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 5a323422d783d..9c4e7e76deddc 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3140,5 +3140,7 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
>  				      pgoff_t first_index, pgoff_t nr);
>  #endif
>  
> +extern int sysctl_nr_trim_pages;
> +
>  #endif /* __KERNEL__ */
>  #endif /* _LINUX_MM_H */
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index f37bb8f187fc7..b2af594ef0f7c 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -909,6 +909,7 @@ static inline int is_highmem(struct zone *zone)
>  
>  /* These two functions are used to setup the per zone pages min values */
>  struct ctl_table;
> +
>  int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
>  					void __user *, size_t *, loff_t *);
>  int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
> @@ -925,6 +926,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
>  
>  extern int numa_zonelist_order_handler(struct ctl_table *, int,
>  			void __user *, size_t *, loff_t *);
> +extern int percpu_pagelist_fraction;
>  extern char numa_zonelist_order[];
>  #define NUMA_ZONELIST_ORDER_LEN	16
>  
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index cc896f0fc4e34..93543cbc0e6b3 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -108,6 +108,9 @@ extern void transfer_pid(struct task_struct *old, struct task_struct *new,
>  struct pid_namespace;
>  extern struct pid_namespace init_pid_ns;
>  
> +extern int pid_max;
> +extern int pid_max_min, pid_max_max;
> +
>  /*
>   * look up a PID in the hash table. Must be called with the tasklist_lock
>   * or rcu_read_lock() held.
> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index 02fa84493f237..36143ca40b56b 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -207,7 +207,15 @@ void unregister_sysctl_table(struct ctl_table_header * table);
>  
>  extern int sysctl_init(void);
>  
> +extern int pwrsw_enabled;
> +extern int unaligned_enabled;
> +extern int unaligned_dump_stack;
> +extern int no_unaligned_warning;
> +
>  extern struct ctl_table sysctl_mount_point[];
> +extern struct ctl_table random_table[];
> +extern struct ctl_table firmware_config_table[];
> +extern struct ctl_table epoll_table[];
>  
>  #else /* CONFIG_SYSCTL */
>  static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 99d27acf46465..31b934865ebc3 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -68,6 +68,9 @@
>  #include <linux/bpf.h>
>  #include <linux/mount.h>
>  #include <linux/userfaultfd_k.h>
> +#include <linux/coredump.h>
> +#include <linux/latencytop.h>
> +#include <linux/pid.h>
>  
>  #include "../lib/kstrtox.h"
>  
> @@ -103,22 +106,6 @@
>  
>  #if defined(CONFIG_SYSCTL)
>  
> -/* External variables not in a header file. */
> -extern int suid_dumpable;
> -#ifdef CONFIG_COREDUMP
> -extern int core_uses_pid;
> -extern char core_pattern[];
> -extern unsigned int core_pipe_limit;
> -#endif
> -extern int pid_max;
> -extern int pid_max_min, pid_max_max;
> -extern int percpu_pagelist_fraction;
> -extern int latencytop_enabled;
> -extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
> -#ifndef CONFIG_MMU
> -extern int sysctl_nr_trim_pages;
> -#endif
> -
>  /* Constants used for minimum and  maximum */
>  #ifdef CONFIG_LOCKUP_DETECTOR
>  static int sixty = 60;
> @@ -160,24 +147,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
>  #ifdef CONFIG_INOTIFY_USER
>  #include <linux/inotify.h>
>  #endif
> -#ifdef CONFIG_SPARC
> -#endif
> -
> -#ifdef CONFIG_PARISC
> -extern int pwrsw_enabled;
> -#endif
> -
> -#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
> -extern int unaligned_enabled;
> -#endif
> -
> -#ifdef CONFIG_IA64
> -extern int unaligned_dump_stack;
> -#endif
> -
> -#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
> -extern int no_unaligned_warning;
> -#endif
>  
>  #ifdef CONFIG_PROC_SYSCTL
>  
> @@ -243,14 +212,6 @@ static struct ctl_table vm_table[];
>  static struct ctl_table fs_table[];
>  static struct ctl_table debug_table[];
>  static struct ctl_table dev_table[];
> -extern struct ctl_table random_table[];
> -#ifdef CONFIG_EPOLL
> -extern struct ctl_table epoll_table[];
> -#endif
> -
> -#ifdef CONFIG_FW_LOADER_USER_HELPER
> -extern struct ctl_table firmware_config_table[];
> -#endif
>  
>  #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
>      defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
> -- 
> 2.26.1
> 

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/5] sysctl: avoid forward declarations
  2020-04-24  6:43 ` [PATCH 4/5] sysctl: avoid forward declarations Christoph Hellwig
@ 2020-05-04 18:44   ` Kees Cook
  0 siblings, 0 replies; 25+ messages in thread
From: Kees Cook @ 2020-05-04 18:44 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Iurii Zaikin, Alexei Starovoitov, Daniel Borkmann, linux-kernel,
	linux-mm, linux-fsdevel, netdev, bpf

On Fri, Apr 24, 2020 at 08:43:37AM +0200, Christoph Hellwig wrote:
> Move the sysctl tables to the end of the file to avoid lots of pointless
> forward declarations.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Kees Cook <keescook@chromium.org>

-Kees

> ---
>  kernel/sysctl.c | 3565 +++++++++++++++++++++++------------------------
>  1 file changed, 1764 insertions(+), 1801 deletions(-)
> 
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 31b934865ebc3..511543d238794 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -176,79 +176,13 @@ enum sysctl_writes_mode {
>  };
>  
>  static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
> -
> -static int proc_do_cad_pid(struct ctl_table *table, int write,
> -		  void __user *buffer, size_t *lenp, loff_t *ppos);
> -static int proc_taint(struct ctl_table *table, int write,
> -			       void __user *buffer, size_t *lenp, loff_t *ppos);
> -#ifdef CONFIG_COMPACTION
> -static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
> -					       int write, void __user *buffer,
> -					       size_t *lenp, loff_t *ppos);
> -#endif
> -#endif
> -
> -#ifdef CONFIG_PRINTK
> -static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
> -				void __user *buffer, size_t *lenp, loff_t *ppos);
> -#endif
> -
> -static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
> -		void __user *buffer, size_t *lenp, loff_t *ppos);
> -#ifdef CONFIG_COREDUMP
> -static int proc_dostring_coredump(struct ctl_table *table, int write,
> -		void __user *buffer, size_t *lenp, loff_t *ppos);
> -#endif
> -static int proc_dopipe_max_size(struct ctl_table *table, int write,
> -		void __user *buffer, size_t *lenp, loff_t *ppos);
> -
> -#ifdef CONFIG_MAGIC_SYSRQ
> -static int sysrq_sysctl_handler(struct ctl_table *table, int write,
> -			void __user *buffer, size_t *lenp, loff_t *ppos);
> -#endif
> -
> -static struct ctl_table kern_table[];
> -static struct ctl_table vm_table[];
> -static struct ctl_table fs_table[];
> -static struct ctl_table debug_table[];
> -static struct ctl_table dev_table[];
> +#endif /* CONFIG_PROC_SYSCTL */
>  
>  #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
>      defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
>  int sysctl_legacy_va_layout;
>  #endif
>  
> -/* The default sysctl tables: */
> -
> -static struct ctl_table sysctl_base_table[] = {
> -	{
> -		.procname	= "kernel",
> -		.mode		= 0555,
> -		.child		= kern_table,
> -	},
> -	{
> -		.procname	= "vm",
> -		.mode		= 0555,
> -		.child		= vm_table,
> -	},
> -	{
> -		.procname	= "fs",
> -		.mode		= 0555,
> -		.child		= fs_table,
> -	},
> -	{
> -		.procname	= "debug",
> -		.mode		= 0555,
> -		.child		= debug_table,
> -	},
> -	{
> -		.procname	= "dev",
> -		.mode		= 0555,
> -		.child		= dev_table,
> -	},
> -	{ }
> -};
> -
>  #ifdef CONFIG_SCHED_DEBUG
>  static int min_sched_granularity_ns = 100000;		/* 100 usecs */
>  static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
> @@ -265,1676 +199,12 @@ static int min_extfrag_threshold;
>  static int max_extfrag_threshold = 1000;
>  #endif
>  
> -static struct ctl_table kern_table[] = {
> -	{
> -		.procname	= "sched_child_runs_first",
> -		.data		= &sysctl_sched_child_runs_first,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#ifdef CONFIG_SCHED_DEBUG
> -	{
> -		.procname	= "sched_min_granularity_ns",
> -		.data		= &sysctl_sched_min_granularity,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= sched_proc_update_handler,
> -		.extra1		= &min_sched_granularity_ns,
> -		.extra2		= &max_sched_granularity_ns,
> -	},
> -	{
> -		.procname	= "sched_latency_ns",
> -		.data		= &sysctl_sched_latency,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= sched_proc_update_handler,
> -		.extra1		= &min_sched_granularity_ns,
> -		.extra2		= &max_sched_granularity_ns,
> -	},
> -	{
> -		.procname	= "sched_wakeup_granularity_ns",
> -		.data		= &sysctl_sched_wakeup_granularity,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= sched_proc_update_handler,
> -		.extra1		= &min_wakeup_granularity_ns,
> -		.extra2		= &max_wakeup_granularity_ns,
> -	},
> -#ifdef CONFIG_SMP
> -	{
> -		.procname	= "sched_tunable_scaling",
> -		.data		= &sysctl_sched_tunable_scaling,
> -		.maxlen		= sizeof(enum sched_tunable_scaling),
> -		.mode		= 0644,
> -		.proc_handler	= sched_proc_update_handler,
> -		.extra1		= &min_sched_tunable_scaling,
> -		.extra2		= &max_sched_tunable_scaling,
> -	},
> -	{
> -		.procname	= "sched_migration_cost_ns",
> -		.data		= &sysctl_sched_migration_cost,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "sched_nr_migrate",
> -		.data		= &sysctl_sched_nr_migrate,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#ifdef CONFIG_SCHEDSTATS
> -	{
> -		.procname	= "sched_schedstats",
> -		.data		= NULL,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= sysctl_schedstats,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif /* CONFIG_SCHEDSTATS */
> -#endif /* CONFIG_SMP */
> -#ifdef CONFIG_NUMA_BALANCING
> -	{
> -		.procname	= "numa_balancing_scan_delay_ms",
> -		.data		= &sysctl_numa_balancing_scan_delay,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "numa_balancing_scan_period_min_ms",
> -		.data		= &sysctl_numa_balancing_scan_period_min,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "numa_balancing_scan_period_max_ms",
> -		.data		= &sysctl_numa_balancing_scan_period_max,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "numa_balancing_scan_size_mb",
> -		.data		= &sysctl_numa_balancing_scan_size,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "numa_balancing",
> -		.data		= NULL, /* filled in by handler */
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= sysctl_numa_balancing,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif /* CONFIG_NUMA_BALANCING */
> -#endif /* CONFIG_SCHED_DEBUG */
> -	{
> -		.procname	= "sched_rt_period_us",
> -		.data		= &sysctl_sched_rt_period,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= sched_rt_handler,
> -	},
> -	{
> -		.procname	= "sched_rt_runtime_us",
> -		.data		= &sysctl_sched_rt_runtime,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= sched_rt_handler,
> -	},
> -	{
> -		.procname	= "sched_rr_timeslice_ms",
> -		.data		= &sysctl_sched_rr_timeslice,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= sched_rr_handler,
> -	},
> -#ifdef CONFIG_UCLAMP_TASK
> -	{
> -		.procname	= "sched_util_clamp_min",
> -		.data		= &sysctl_sched_uclamp_util_min,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= sysctl_sched_uclamp_handler,
> -	},
> -	{
> -		.procname	= "sched_util_clamp_max",
> -		.data		= &sysctl_sched_uclamp_util_max,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= sysctl_sched_uclamp_handler,
> -	},
> -#endif
> -#ifdef CONFIG_SCHED_AUTOGROUP
> -	{
> -		.procname	= "sched_autogroup_enabled",
> -		.data		= &sysctl_sched_autogroup_enabled,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -#ifdef CONFIG_CFS_BANDWIDTH
> -	{
> -		.procname	= "sched_cfs_bandwidth_slice_us",
> -		.data		= &sysctl_sched_cfs_bandwidth_slice,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ONE,
> -	},
> -#endif
> -#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
> -	{
> -		.procname	= "sched_energy_aware",
> -		.data		= &sysctl_sched_energy_aware,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= sched_energy_aware_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -#ifdef CONFIG_PROVE_LOCKING
> -	{
> -		.procname	= "prove_locking",
> -		.data		= &prove_locking,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_LOCK_STAT
> -	{
> -		.procname	= "lock_stat",
> -		.data		= &lock_stat,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -	{
> -		.procname	= "panic",
> -		.data		= &panic_timeout,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#ifdef CONFIG_COREDUMP
> -	{
> -		.procname	= "core_uses_pid",
> -		.data		= &core_uses_pid,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "core_pattern",
> -		.data		= core_pattern,
> -		.maxlen		= CORENAME_MAX_SIZE,
> -		.mode		= 0644,
> -		.proc_handler	= proc_dostring_coredump,
> -	},
> -	{
> -		.procname	= "core_pipe_limit",
> -		.data		= &core_pipe_limit,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_PROC_SYSCTL
> -	{
> -		.procname	= "tainted",
> -		.maxlen 	= sizeof(long),
> -		.mode		= 0644,
> -		.proc_handler	= proc_taint,
> -	},
> -	{
> -		.procname	= "sysctl_writes_strict",
> -		.data		= &sysctl_writes_strict,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &neg_one,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -#ifdef CONFIG_LATENCYTOP
> -	{
> -		.procname	= "latencytop",
> -		.data		= &latencytop_enabled,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= sysctl_latencytop,
> -	},
> -#endif
> -#ifdef CONFIG_BLK_DEV_INITRD
> -	{
> -		.procname	= "real-root-dev",
> -		.data		= &real_root_dev,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -	{
> -		.procname	= "print-fatal-signals",
> -		.data		= &print_fatal_signals,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#ifdef CONFIG_SPARC
> -	{
> -		.procname	= "reboot-cmd",
> -		.data		= reboot_command,
> -		.maxlen		= 256,
> -		.mode		= 0644,
> -		.proc_handler	= proc_dostring,
> -	},
> -	{
> -		.procname	= "stop-a",
> -		.data		= &stop_a_enabled,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "scons-poweroff",
> -		.data		= &scons_pwroff,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_SPARC64
> -	{
> -		.procname	= "tsb-ratio",
> -		.data		= &sysctl_tsb_ratio,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_PARISC
> -	{
> -		.procname	= "soft-power",
> -		.data		= &pwrsw_enabled,
> -		.maxlen		= sizeof (int),
> -	 	.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
> -	{
> -		.procname	= "unaligned-trap",
> -		.data		= &unaligned_enabled,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -	{
> -		.procname	= "ctrl-alt-del",
> -		.data		= &C_A_D,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#ifdef CONFIG_FUNCTION_TRACER
> -	{
> -		.procname	= "ftrace_enabled",
> -		.data		= &ftrace_enabled,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= ftrace_enable_sysctl,
> -	},
> -#endif
> -#ifdef CONFIG_STACK_TRACER
> -	{
> -		.procname	= "stack_tracer_enabled",
> -		.data		= &stack_tracer_enabled,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= stack_trace_sysctl,
> -	},
> -#endif
> -#ifdef CONFIG_TRACING
> -	{
> -		.procname	= "ftrace_dump_on_oops",
> -		.data		= &ftrace_dump_on_oops,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "traceoff_on_warning",
> -		.data		= &__disable_trace_on_warning,
> -		.maxlen		= sizeof(__disable_trace_on_warning),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "tracepoint_printk",
> -		.data		= &tracepoint_printk,
> -		.maxlen		= sizeof(tracepoint_printk),
> -		.mode		= 0644,
> -		.proc_handler	= tracepoint_printk_sysctl,
> -	},
> -#endif
> -#ifdef CONFIG_KEXEC_CORE
> -	{
> -		.procname	= "kexec_load_disabled",
> -		.data		= &kexec_load_disabled,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		/* only handle a transition from default "0" to "1" */
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ONE,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -#ifdef CONFIG_MODULES
> -	{
> -		.procname	= "modprobe",
> -		.data		= &modprobe_path,
> -		.maxlen		= KMOD_PATH_LEN,
> -		.mode		= 0644,
> -		.proc_handler	= proc_dostring,
> -	},
> -	{
> -		.procname	= "modules_disabled",
> -		.data		= &modules_disabled,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		/* only handle a transition from default "0" to "1" */
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ONE,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -#ifdef CONFIG_UEVENT_HELPER
> -	{
> -		.procname	= "hotplug",
> -		.data		= &uevent_helper,
> -		.maxlen		= UEVENT_HELPER_PATH_LEN,
> -		.mode		= 0644,
> -		.proc_handler	= proc_dostring,
> -	},
> -#endif
> -#ifdef CONFIG_CHR_DEV_SG
> -	{
> -		.procname	= "sg-big-buff",
> -		.data		= &sg_big_buff,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_BSD_PROCESS_ACCT
> -	{
> -		.procname	= "acct",
> -		.data		= &acct_parm,
> -		.maxlen		= 3*sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_MAGIC_SYSRQ
> -	{
> -		.procname	= "sysrq",
> -		.data		= NULL,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0644,
> -		.proc_handler	= sysrq_sysctl_handler,
> -	},
> -#endif
> -#ifdef CONFIG_PROC_SYSCTL
> -	{
> -		.procname	= "cad_pid",
> -		.data		= NULL,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0600,
> -		.proc_handler	= proc_do_cad_pid,
> -	},
> -#endif
> -	{
> -		.procname	= "threads-max",
> -		.data		= NULL,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= sysctl_max_threads,
> -	},
> -	{
> -		.procname	= "random",
> -		.mode		= 0555,
> -		.child		= random_table,
> -	},
> -	{
> -		.procname	= "usermodehelper",
> -		.mode		= 0555,
> -		.child		= usermodehelper_table,
> -	},
> -#ifdef CONFIG_FW_LOADER_USER_HELPER
> -	{
> -		.procname	= "firmware_config",
> -		.mode		= 0555,
> -		.child		= firmware_config_table,
> -	},
> -#endif
> -	{
> -		.procname	= "overflowuid",
> -		.data		= &overflowuid,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &minolduid,
> -		.extra2		= &maxolduid,
> -	},
> -	{
> -		.procname	= "overflowgid",
> -		.data		= &overflowgid,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &minolduid,
> -		.extra2		= &maxolduid,
> -	},
> -#ifdef CONFIG_S390
> -	{
> -		.procname	= "userprocess_debug",
> -		.data		= &show_unhandled_signals,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -	{
> -		.procname	= "pid_max",
> -		.data		= &pid_max,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &pid_max_min,
> -		.extra2		= &pid_max_max,
> -	},
> -	{
> -		.procname	= "panic_on_oops",
> -		.data		= &panic_on_oops,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "panic_print",
> -		.data		= &panic_print,
> -		.maxlen		= sizeof(unsigned long),
> -		.mode		= 0644,
> -		.proc_handler	= proc_doulongvec_minmax,
> -	},
> -#if defined CONFIG_PRINTK
> -	{
> -		.procname	= "printk",
> -		.data		= &console_loglevel,
> -		.maxlen		= 4*sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "printk_ratelimit",
> -		.data		= &printk_ratelimit_state.interval,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_jiffies,
> -	},
> -	{
> -		.procname	= "printk_ratelimit_burst",
> -		.data		= &printk_ratelimit_state.burst,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "printk_delay",
> -		.data		= &printk_delay_msec,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &ten_thousand,
> -	},
> -	{
> -		.procname	= "printk_devkmsg",
> -		.data		= devkmsg_log_str,
> -		.maxlen		= DEVKMSG_STR_MAX_SIZE,
> -		.mode		= 0644,
> -		.proc_handler	= devkmsg_sysctl_set_loglvl,
> -	},
> -	{
> -		.procname	= "dmesg_restrict",
> -		.data		= &dmesg_restrict,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax_sysadmin,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "kptr_restrict",
> -		.data		= &kptr_restrict,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax_sysadmin,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &two,
> -	},
> -#endif
> -	{
> -		.procname	= "ngroups_max",
> -		.data		= &ngroups_max,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "cap_last_cap",
> -		.data		= (void *)&cap_last_cap,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#if defined(CONFIG_LOCKUP_DETECTOR)
> -	{
> -		.procname       = "watchdog",
> -		.data		= &watchdog_user_enabled,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler   = proc_watchdog,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "watchdog_thresh",
> -		.data		= &watchdog_thresh,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_watchdog_thresh,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &sixty,
> -	},
> -	{
> -		.procname       = "nmi_watchdog",
> -		.data		= &nmi_watchdog_user_enabled,
> -		.maxlen		= sizeof(int),
> -		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
> -		.proc_handler   = proc_nmi_watchdog,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "watchdog_cpumask",
> -		.data		= &watchdog_cpumask_bits,
> -		.maxlen		= NR_CPUS,
> -		.mode		= 0644,
> -		.proc_handler	= proc_watchdog_cpumask,
> -	},
> -#ifdef CONFIG_SOFTLOCKUP_DETECTOR
> -	{
> -		.procname       = "soft_watchdog",
> -		.data		= &soft_watchdog_user_enabled,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler   = proc_soft_watchdog,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "softlockup_panic",
> -		.data		= &softlockup_panic,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#ifdef CONFIG_SMP
> -	{
> -		.procname	= "softlockup_all_cpu_backtrace",
> -		.data		= &sysctl_softlockup_all_cpu_backtrace,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif /* CONFIG_SMP */
> -#endif
> -#ifdef CONFIG_HARDLOCKUP_DETECTOR
> -	{
> -		.procname	= "hardlockup_panic",
> -		.data		= &hardlockup_panic,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#ifdef CONFIG_SMP
> -	{
> -		.procname	= "hardlockup_all_cpu_backtrace",
> -		.data		= &sysctl_hardlockup_all_cpu_backtrace,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif /* CONFIG_SMP */
> -#endif
> -#endif
> -
> -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
> -	{
> -		.procname       = "unknown_nmi_panic",
> -		.data           = &unknown_nmi_panic,
> -		.maxlen         = sizeof (int),
> -		.mode           = 0644,
> -		.proc_handler   = proc_dointvec,
> -	},
> -#endif
> -#if defined(CONFIG_X86)
> -	{
> -		.procname	= "panic_on_unrecovered_nmi",
> -		.data		= &panic_on_unrecovered_nmi,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "panic_on_io_nmi",
> -		.data		= &panic_on_io_nmi,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#ifdef CONFIG_DEBUG_STACKOVERFLOW
> -	{
> -		.procname	= "panic_on_stackoverflow",
> -		.data		= &sysctl_panic_on_stackoverflow,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -	{
> -		.procname	= "bootloader_type",
> -		.data		= &bootloader_type,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "bootloader_version",
> -		.data		= &bootloader_version,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0444,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "io_delay_type",
> -		.data		= &io_delay_type,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#if defined(CONFIG_MMU)
> -	{
> -		.procname	= "randomize_va_space",
> -		.data		= &randomize_va_space,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#if defined(CONFIG_S390) && defined(CONFIG_SMP)
> -	{
> -		.procname	= "spin_retry",
> -		.data		= &spin_retry,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
> -	{
> -		.procname	= "acpi_video_flags",
> -		.data		= &acpi_realmode_flags,
> -		.maxlen		= sizeof (unsigned long),
> -		.mode		= 0644,
> -		.proc_handler	= proc_doulongvec_minmax,
> -	},
> -#endif
> -#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
> -	{
> -		.procname	= "ignore-unaligned-usertrap",
> -		.data		= &no_unaligned_warning,
> -		.maxlen		= sizeof (int),
> -	 	.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_IA64
> -	{
> -		.procname	= "unaligned-dump-stack",
> -		.data		= &unaligned_dump_stack,
> -		.maxlen		= sizeof (int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_DETECT_HUNG_TASK
> -	{
> -		.procname	= "hung_task_panic",
> -		.data		= &sysctl_hung_task_panic,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "hung_task_check_count",
> -		.data		= &sysctl_hung_task_check_count,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -	{
> -		.procname	= "hung_task_timeout_secs",
> -		.data		= &sysctl_hung_task_timeout_secs,
> -		.maxlen		= sizeof(unsigned long),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dohung_task_timeout_secs,
> -		.extra2		= &hung_task_timeout_max,
> -	},
> -	{
> -		.procname	= "hung_task_check_interval_secs",
> -		.data		= &sysctl_hung_task_check_interval_secs,
> -		.maxlen		= sizeof(unsigned long),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dohung_task_timeout_secs,
> -		.extra2		= &hung_task_timeout_max,
> -	},
> -	{
> -		.procname	= "hung_task_warnings",
> -		.data		= &sysctl_hung_task_warnings,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &neg_one,
> -	},
> -#endif
> -#ifdef CONFIG_RT_MUTEXES
> -	{
> -		.procname	= "max_lock_depth",
> -		.data		= &max_lock_depth,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -	{
> -		.procname	= "poweroff_cmd",
> -		.data		= &poweroff_cmd,
> -		.maxlen		= POWEROFF_CMD_PATH_LEN,
> -		.mode		= 0644,
> -		.proc_handler	= proc_dostring,
> -	},
> -#ifdef CONFIG_KEYS
> -	{
> -		.procname	= "keys",
> -		.mode		= 0555,
> -		.child		= key_sysctls,
> -	},
> -#endif
> -#ifdef CONFIG_PERF_EVENTS
> -	/*
> -	 * User-space scripts rely on the existence of this file
> -	 * as a feature check for perf_events being enabled.
> -	 *
> -	 * So it's an ABI, do not remove!
> -	 */
> -	{
> -		.procname	= "perf_event_paranoid",
> -		.data		= &sysctl_perf_event_paranoid,
> -		.maxlen		= sizeof(sysctl_perf_event_paranoid),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "perf_event_mlock_kb",
> -		.data		= &sysctl_perf_event_mlock,
> -		.maxlen		= sizeof(sysctl_perf_event_mlock),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "perf_event_max_sample_rate",
> -		.data		= &sysctl_perf_event_sample_rate,
> -		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
> -		.mode		= 0644,
> -		.proc_handler	= perf_proc_update_handler,
> -		.extra1		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "perf_cpu_time_max_percent",
> -		.data		= &sysctl_perf_cpu_time_max_percent,
> -		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
> -		.mode		= 0644,
> -		.proc_handler	= perf_cpu_time_max_percent_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &one_hundred,
> -	},
> -	{
> -		.procname	= "perf_event_max_stack",
> -		.data		= &sysctl_perf_event_max_stack,
> -		.maxlen		= sizeof(sysctl_perf_event_max_stack),
> -		.mode		= 0644,
> -		.proc_handler	= perf_event_max_stack_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &six_hundred_forty_kb,
> -	},
> -	{
> -		.procname	= "perf_event_max_contexts_per_stack",
> -		.data		= &sysctl_perf_event_max_contexts_per_stack,
> -		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
> -		.mode		= 0644,
> -		.proc_handler	= perf_event_max_stack_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &one_thousand,
> -	},
> -#endif
> -	{
> -		.procname	= "panic_on_warn",
> -		.data		= &panic_on_warn,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
> -	{
> -		.procname	= "timer_migration",
> -		.data		= &sysctl_timer_migration,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= timer_migration_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -#ifdef CONFIG_BPF_SYSCALL
> -	{
> -		.procname	= "unprivileged_bpf_disabled",
> -		.data		= &sysctl_unprivileged_bpf_disabled,
> -		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
> -		.mode		= 0644,
> -		/* only handle a transition from default "0" to "1" */
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ONE,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "bpf_stats_enabled",
> -		.data		= &bpf_stats_enabled_key.key,
> -		.maxlen		= sizeof(bpf_stats_enabled_key),
> -		.mode		= 0644,
> -		.proc_handler	= proc_do_static_key,
> -	},
> -#endif
> -#if defined(CONFIG_TREE_RCU)
> -	{
> -		.procname	= "panic_on_rcu_stall",
> -		.data		= &sysctl_panic_on_rcu_stall,
> -		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
> -	{
> -		.procname	= "stack_erasing",
> -		.data		= NULL,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0600,
> -		.proc_handler	= stack_erasing_sysctl,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -	{ }
> -};
> -
> -static struct ctl_table vm_table[] = {
> -	{
> -		.procname	= "overcommit_memory",
> -		.data		= &sysctl_overcommit_memory,
> -		.maxlen		= sizeof(sysctl_overcommit_memory),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &two,
> -	},
> -	{
> -		.procname	= "panic_on_oom",
> -		.data		= &sysctl_panic_on_oom,
> -		.maxlen		= sizeof(sysctl_panic_on_oom),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &two,
> -	},
> -	{
> -		.procname	= "oom_kill_allocating_task",
> -		.data		= &sysctl_oom_kill_allocating_task,
> -		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "oom_dump_tasks",
> -		.data		= &sysctl_oom_dump_tasks,
> -		.maxlen		= sizeof(sysctl_oom_dump_tasks),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -	{
> -		.procname	= "overcommit_ratio",
> -		.data		= &sysctl_overcommit_ratio,
> -		.maxlen		= sizeof(sysctl_overcommit_ratio),
> -		.mode		= 0644,
> -		.proc_handler	= overcommit_ratio_handler,
> -	},
> -	{
> -		.procname	= "overcommit_kbytes",
> -		.data		= &sysctl_overcommit_kbytes,
> -		.maxlen		= sizeof(sysctl_overcommit_kbytes),
> -		.mode		= 0644,
> -		.proc_handler	= overcommit_kbytes_handler,
> -	},
> -	{
> -		.procname	= "page-cluster", 
> -		.data		= &page_cluster,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -	{
> -		.procname	= "dirty_background_ratio",
> -		.data		= &dirty_background_ratio,
> -		.maxlen		= sizeof(dirty_background_ratio),
> -		.mode		= 0644,
> -		.proc_handler	= dirty_background_ratio_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &one_hundred,
> -	},
> -	{
> -		.procname	= "dirty_background_bytes",
> -		.data		= &dirty_background_bytes,
> -		.maxlen		= sizeof(dirty_background_bytes),
> -		.mode		= 0644,
> -		.proc_handler	= dirty_background_bytes_handler,
> -		.extra1		= &one_ul,
> -	},
> -	{
> -		.procname	= "dirty_ratio",
> -		.data		= &vm_dirty_ratio,
> -		.maxlen		= sizeof(vm_dirty_ratio),
> -		.mode		= 0644,
> -		.proc_handler	= dirty_ratio_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &one_hundred,
> -	},
> -	{
> -		.procname	= "dirty_bytes",
> -		.data		= &vm_dirty_bytes,
> -		.maxlen		= sizeof(vm_dirty_bytes),
> -		.mode		= 0644,
> -		.proc_handler	= dirty_bytes_handler,
> -		.extra1		= &dirty_bytes_min,
> -	},
> -	{
> -		.procname	= "dirty_writeback_centisecs",
> -		.data		= &dirty_writeback_interval,
> -		.maxlen		= sizeof(dirty_writeback_interval),
> -		.mode		= 0644,
> -		.proc_handler	= dirty_writeback_centisecs_handler,
> -	},
> -	{
> -		.procname	= "dirty_expire_centisecs",
> -		.data		= &dirty_expire_interval,
> -		.maxlen		= sizeof(dirty_expire_interval),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -	{
> -		.procname	= "dirtytime_expire_seconds",
> -		.data		= &dirtytime_expire_interval,
> -		.maxlen		= sizeof(dirtytime_expire_interval),
> -		.mode		= 0644,
> -		.proc_handler	= dirtytime_interval_handler,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -	{
> -		.procname	= "swappiness",
> -		.data		= &vm_swappiness,
> -		.maxlen		= sizeof(vm_swappiness),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &one_hundred,
> -	},
> -#ifdef CONFIG_HUGETLB_PAGE
> -	{
> -		.procname	= "nr_hugepages",
> -		.data		= NULL,
> -		.maxlen		= sizeof(unsigned long),
> -		.mode		= 0644,
> -		.proc_handler	= hugetlb_sysctl_handler,
> -	},
> -#ifdef CONFIG_NUMA
> -	{
> -		.procname       = "nr_hugepages_mempolicy",
> -		.data           = NULL,
> -		.maxlen         = sizeof(unsigned long),
> -		.mode           = 0644,
> -		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
> -	},
> -	{
> -		.procname		= "numa_stat",
> -		.data			= &sysctl_vm_numa_stat,
> -		.maxlen			= sizeof(int),
> -		.mode			= 0644,
> -		.proc_handler	= sysctl_vm_numa_stat_handler,
> -		.extra1			= SYSCTL_ZERO,
> -		.extra2			= SYSCTL_ONE,
> -	},
> -#endif
> -	 {
> -		.procname	= "hugetlb_shm_group",
> -		.data		= &sysctl_hugetlb_shm_group,
> -		.maxlen		= sizeof(gid_t),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	 },
> -	{
> -		.procname	= "nr_overcommit_hugepages",
> -		.data		= NULL,
> -		.maxlen		= sizeof(unsigned long),
> -		.mode		= 0644,
> -		.proc_handler	= hugetlb_overcommit_handler,
> -	},
> -#endif
> -	{
> -		.procname	= "lowmem_reserve_ratio",
> -		.data		= &sysctl_lowmem_reserve_ratio,
> -		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
> -		.mode		= 0644,
> -		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
> -	},
> -	{
> -		.procname	= "drop_caches",
> -		.data		= &sysctl_drop_caches,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0200,
> -		.proc_handler	= drop_caches_sysctl_handler,
> -		.extra1		= SYSCTL_ONE,
> -		.extra2		= &four,
> -	},
> -#ifdef CONFIG_COMPACTION
> -	{
> -		.procname	= "compact_memory",
> -		.data		= &sysctl_compact_memory,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0200,
> -		.proc_handler	= sysctl_compaction_handler,
> -	},
> -	{
> -		.procname	= "extfrag_threshold",
> -		.data		= &sysctl_extfrag_threshold,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &min_extfrag_threshold,
> -		.extra2		= &max_extfrag_threshold,
> -	},
> -	{
> -		.procname	= "compact_unevictable_allowed",
> -		.data		= &sysctl_compact_unevictable_allowed,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -
> -#endif /* CONFIG_COMPACTION */
> -	{
> -		.procname	= "min_free_kbytes",
> -		.data		= &min_free_kbytes,
> -		.maxlen		= sizeof(min_free_kbytes),
> -		.mode		= 0644,
> -		.proc_handler	= min_free_kbytes_sysctl_handler,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -	{
> -		.procname	= "watermark_boost_factor",
> -		.data		= &watermark_boost_factor,
> -		.maxlen		= sizeof(watermark_boost_factor),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -	{
> -		.procname	= "watermark_scale_factor",
> -		.data		= &watermark_scale_factor,
> -		.maxlen		= sizeof(watermark_scale_factor),
> -		.mode		= 0644,
> -		.proc_handler	= watermark_scale_factor_sysctl_handler,
> -		.extra1		= SYSCTL_ONE,
> -		.extra2		= &one_thousand,
> -	},
> -	{
> -		.procname	= "percpu_pagelist_fraction",
> -		.data		= &percpu_pagelist_fraction,
> -		.maxlen		= sizeof(percpu_pagelist_fraction),
> -		.mode		= 0644,
> -		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -#ifdef CONFIG_MMU
> -	{
> -		.procname	= "max_map_count",
> -		.data		= &sysctl_max_map_count,
> -		.maxlen		= sizeof(sysctl_max_map_count),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -#else
> -	{
> -		.procname	= "nr_trim_pages",
> -		.data		= &sysctl_nr_trim_pages,
> -		.maxlen		= sizeof(sysctl_nr_trim_pages),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -#endif
> -	{
> -		.procname	= "laptop_mode",
> -		.data		= &laptop_mode,
> -		.maxlen		= sizeof(laptop_mode),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_jiffies,
> -	},
> -	{
> -		.procname	= "block_dump",
> -		.data		= &block_dump,
> -		.maxlen		= sizeof(block_dump),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -	{
> -		.procname	= "vfs_cache_pressure",
> -		.data		= &sysctl_vfs_cache_pressure,
> -		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
> -    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
> -	{
> -		.procname	= "legacy_va_layout",
> -		.data		= &sysctl_legacy_va_layout,
> -		.maxlen		= sizeof(sysctl_legacy_va_layout),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -#endif
> -#ifdef CONFIG_NUMA
> -	{
> -		.procname	= "zone_reclaim_mode",
> -		.data		= &node_reclaim_mode,
> -		.maxlen		= sizeof(node_reclaim_mode),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -	{
> -		.procname	= "min_unmapped_ratio",
> -		.data		= &sysctl_min_unmapped_ratio,
> -		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
> -		.mode		= 0644,
> -		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &one_hundred,
> -	},
> -	{
> -		.procname	= "min_slab_ratio",
> -		.data		= &sysctl_min_slab_ratio,
> -		.maxlen		= sizeof(sysctl_min_slab_ratio),
> -		.mode		= 0644,
> -		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &one_hundred,
> -	},
> -#endif
> -#ifdef CONFIG_SMP
> -	{
> -		.procname	= "stat_interval",
> -		.data		= &sysctl_stat_interval,
> -		.maxlen		= sizeof(sysctl_stat_interval),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_jiffies,
> -	},
> -	{
> -		.procname	= "stat_refresh",
> -		.data		= NULL,
> -		.maxlen		= 0,
> -		.mode		= 0600,
> -		.proc_handler	= vmstat_refresh,
> -	},
> -#endif
> -#ifdef CONFIG_MMU
> -	{
> -		.procname	= "mmap_min_addr",
> -		.data		= &dac_mmap_min_addr,
> -		.maxlen		= sizeof(unsigned long),
> -		.mode		= 0644,
> -		.proc_handler	= mmap_min_addr_handler,
> -	},
> -#endif
> -#ifdef CONFIG_NUMA
> -	{
> -		.procname	= "numa_zonelist_order",
> -		.data		= &numa_zonelist_order,
> -		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
> -		.mode		= 0644,
> -		.proc_handler	= numa_zonelist_order_handler,
> -	},
> -#endif
> -#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
> -   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
> -	{
> -		.procname	= "vdso_enabled",
> -#ifdef CONFIG_X86_32
> -		.data		= &vdso32_enabled,
> -		.maxlen		= sizeof(vdso32_enabled),
> -#else
> -		.data		= &vdso_enabled,
> -		.maxlen		= sizeof(vdso_enabled),
> -#endif
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -		.extra1		= SYSCTL_ZERO,
> -	},
> -#endif
> -#ifdef CONFIG_HIGHMEM
> -	{
> -		.procname	= "highmem_is_dirtyable",
> -		.data		= &vm_highmem_is_dirtyable,
> -		.maxlen		= sizeof(vm_highmem_is_dirtyable),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -#ifdef CONFIG_MEMORY_FAILURE
> -	{
> -		.procname	= "memory_failure_early_kill",
> -		.data		= &sysctl_memory_failure_early_kill,
> -		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "memory_failure_recovery",
> -		.data		= &sysctl_memory_failure_recovery,
> -		.maxlen		= sizeof(sysctl_memory_failure_recovery),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -	{
> -		.procname	= "user_reserve_kbytes",
> -		.data		= &sysctl_user_reserve_kbytes,
> -		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
> -		.mode		= 0644,
> -		.proc_handler	= proc_doulongvec_minmax,
> -	},
> -	{
> -		.procname	= "admin_reserve_kbytes",
> -		.data		= &sysctl_admin_reserve_kbytes,
> -		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
> -		.mode		= 0644,
> -		.proc_handler	= proc_doulongvec_minmax,
> -	},
> -#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
> -	{
> -		.procname	= "mmap_rnd_bits",
> -		.data		= &mmap_rnd_bits,
> -		.maxlen		= sizeof(mmap_rnd_bits),
> -		.mode		= 0600,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= (void *)&mmap_rnd_bits_min,
> -		.extra2		= (void *)&mmap_rnd_bits_max,
> -	},
> -#endif
> -#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
> -	{
> -		.procname	= "mmap_rnd_compat_bits",
> -		.data		= &mmap_rnd_compat_bits,
> -		.maxlen		= sizeof(mmap_rnd_compat_bits),
> -		.mode		= 0600,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= (void *)&mmap_rnd_compat_bits_min,
> -		.extra2		= (void *)&mmap_rnd_compat_bits_max,
> -	},
> -#endif
> -#ifdef CONFIG_USERFAULTFD
> -	{
> -		.procname	= "unprivileged_userfaultfd",
> -		.data		= &sysctl_unprivileged_userfaultfd,
> -		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -	{ }
> -};
> -
> -static struct ctl_table fs_table[] = {
> -	{
> -		.procname	= "inode-nr",
> -		.data		= &inodes_stat,
> -		.maxlen		= 2*sizeof(long),
> -		.mode		= 0444,
> -		.proc_handler	= proc_nr_inodes,
> -	},
> -	{
> -		.procname	= "inode-state",
> -		.data		= &inodes_stat,
> -		.maxlen		= 7*sizeof(long),
> -		.mode		= 0444,
> -		.proc_handler	= proc_nr_inodes,
> -	},
> -	{
> -		.procname	= "file-nr",
> -		.data		= &files_stat,
> -		.maxlen		= sizeof(files_stat),
> -		.mode		= 0444,
> -		.proc_handler	= proc_nr_files,
> -	},
> -	{
> -		.procname	= "file-max",
> -		.data		= &files_stat.max_files,
> -		.maxlen		= sizeof(files_stat.max_files),
> -		.mode		= 0644,
> -		.proc_handler	= proc_doulongvec_minmax,
> -		.extra1		= &zero_ul,
> -		.extra2		= &long_max,
> -	},
> -	{
> -		.procname	= "nr_open",
> -		.data		= &sysctl_nr_open,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &sysctl_nr_open_min,
> -		.extra2		= &sysctl_nr_open_max,
> -	},
> -	{
> -		.procname	= "dentry-state",
> -		.data		= &dentry_stat,
> -		.maxlen		= 6*sizeof(long),
> -		.mode		= 0444,
> -		.proc_handler	= proc_nr_dentry,
> -	},
> -	{
> -		.procname	= "overflowuid",
> -		.data		= &fs_overflowuid,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &minolduid,
> -		.extra2		= &maxolduid,
> -	},
> -	{
> -		.procname	= "overflowgid",
> -		.data		= &fs_overflowgid,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= &minolduid,
> -		.extra2		= &maxolduid,
> -	},
> -#ifdef CONFIG_FILE_LOCKING
> -	{
> -		.procname	= "leases-enable",
> -		.data		= &leases_enable,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_DNOTIFY
> -	{
> -		.procname	= "dir-notify-enable",
> -		.data		= &dir_notify_enable,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_MMU
> -#ifdef CONFIG_FILE_LOCKING
> -	{
> -		.procname	= "lease-break-time",
> -		.data		= &lease_break_time,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec,
> -	},
> -#endif
> -#ifdef CONFIG_AIO
> -	{
> -		.procname	= "aio-nr",
> -		.data		= &aio_nr,
> -		.maxlen		= sizeof(aio_nr),
> -		.mode		= 0444,
> -		.proc_handler	= proc_doulongvec_minmax,
> -	},
> -	{
> -		.procname	= "aio-max-nr",
> -		.data		= &aio_max_nr,
> -		.maxlen		= sizeof(aio_max_nr),
> -		.mode		= 0644,
> -		.proc_handler	= proc_doulongvec_minmax,
> -	},
> -#endif /* CONFIG_AIO */
> -#ifdef CONFIG_INOTIFY_USER
> -	{
> -		.procname	= "inotify",
> -		.mode		= 0555,
> -		.child		= inotify_table,
> -	},
> -#endif	
> -#ifdef CONFIG_EPOLL
> -	{
> -		.procname	= "epoll",
> -		.mode		= 0555,
> -		.child		= epoll_table,
> -	},
> -#endif
> -#endif
> -	{
> -		.procname	= "protected_symlinks",
> -		.data		= &sysctl_protected_symlinks,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0600,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "protected_hardlinks",
> -		.data		= &sysctl_protected_hardlinks,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0600,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -	{
> -		.procname	= "protected_fifos",
> -		.data		= &sysctl_protected_fifos,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0600,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &two,
> -	},
> -	{
> -		.procname	= "protected_regular",
> -		.data		= &sysctl_protected_regular,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0600,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &two,
> -	},
> -	{
> -		.procname	= "suid_dumpable",
> -		.data		= &suid_dumpable,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax_coredump,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= &two,
> -	},
> -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
> -	{
> -		.procname	= "binfmt_misc",
> -		.mode		= 0555,
> -		.child		= sysctl_mount_point,
> -	},
> -#endif
> -	{
> -		.procname	= "pipe-max-size",
> -		.data		= &pipe_max_size,
> -		.maxlen		= sizeof(pipe_max_size),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dopipe_max_size,
> -	},
> -	{
> -		.procname	= "pipe-user-pages-hard",
> -		.data		= &pipe_user_pages_hard,
> -		.maxlen		= sizeof(pipe_user_pages_hard),
> -		.mode		= 0644,
> -		.proc_handler	= proc_doulongvec_minmax,
> -	},
> -	{
> -		.procname	= "pipe-user-pages-soft",
> -		.data		= &pipe_user_pages_soft,
> -		.maxlen		= sizeof(pipe_user_pages_soft),
> -		.mode		= 0644,
> -		.proc_handler	= proc_doulongvec_minmax,
> -	},
> -	{
> -		.procname	= "mount-max",
> -		.data		= &sysctl_mount_max,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec_minmax,
> -		.extra1		= SYSCTL_ONE,
> -	},
> -	{ }
> -};
> -
> -static struct ctl_table debug_table[] = {
> -#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
> -	{
> -		.procname	= "exception-trace",
> -		.data		= &show_unhandled_signals,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_dointvec
> -	},
> -#endif
> -#if defined(CONFIG_OPTPROBES)
> -	{
> -		.procname	= "kprobes-optimization",
> -		.data		= &sysctl_kprobes_optimization,
> -		.maxlen		= sizeof(int),
> -		.mode		= 0644,
> -		.proc_handler	= proc_kprobes_optimization_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> -#endif
> -	{ }
> -};
> -
> -static struct ctl_table dev_table[] = {
> -	{ }
> -};
> -
> -int __init sysctl_init(void)
> -{
> -	struct ctl_table_header *hdr;
> -
> -	hdr = register_sysctl_table(sysctl_base_table);
> -	kmemleak_not_leak(hdr);
> -	return 0;
> -}
> -
> -#endif /* CONFIG_SYSCTL */
> -
> -/*
> - * /proc/sys support
> - */
> -
> +#endif /* CONFIG_SYSCTL */
> +
> +/*
> + * /proc/sys support
> + */
> +
>  #ifdef CONFIG_PROC_SYSCTL
>  
>  static int _proc_do_string(char *data, int maxlen, int write,
> @@ -3307,95 +1577,1788 @@ int proc_dointvec(struct ctl_table *table, int write,
>  	return -ENOSYS;
>  }
>  
> -int proc_douintvec(struct ctl_table *table, int write,
> -		  void __user *buffer, size_t *lenp, loff_t *ppos)
> +int proc_douintvec(struct ctl_table *table, int write,
> +		  void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	return -ENOSYS;
> +}
> +
> +int proc_dointvec_minmax(struct ctl_table *table, int write,
> +		    void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	return -ENOSYS;
> +}
> +
> +int proc_douintvec_minmax(struct ctl_table *table, int write,
> +			  void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	return -ENOSYS;
> +}
> +
> +int proc_dointvec_jiffies(struct ctl_table *table, int write,
> +		    void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	return -ENOSYS;
> +}
> +
> +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
> +		    void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	return -ENOSYS;
> +}
> +
> +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
> +			     void __user *buffer, size_t *lenp, loff_t *ppos)
>  {
>  	return -ENOSYS;
>  }
>  
> -int proc_dointvec_minmax(struct ctl_table *table, int write,
> +int proc_doulongvec_minmax(struct ctl_table *table, int write,
>  		    void __user *buffer, size_t *lenp, loff_t *ppos)
>  {
>  	return -ENOSYS;
>  }
>  
> -int proc_douintvec_minmax(struct ctl_table *table, int write,
> -			  void __user *buffer, size_t *lenp, loff_t *ppos)
> +int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
> +				      void __user *buffer,
> +				      size_t *lenp, loff_t *ppos)
>  {
> -	return -ENOSYS;
> +    return -ENOSYS;
>  }
>  
> -int proc_dointvec_jiffies(struct ctl_table *table, int write,
> -		    void __user *buffer, size_t *lenp, loff_t *ppos)
> +int proc_do_large_bitmap(struct ctl_table *table, int write,
> +			 void __user *buffer, size_t *lenp, loff_t *ppos)
>  {
>  	return -ENOSYS;
>  }
>  
> -int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
> -		    void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> +#endif /* CONFIG_PROC_SYSCTL */
> +
> +#if defined(CONFIG_SYSCTL)
> +int proc_do_static_key(struct ctl_table *table, int write,
> +		       void __user *buffer, size_t *lenp,
> +		       loff_t *ppos)
> +{
> +	struct static_key *key = (struct static_key *)table->data;
> +	static DEFINE_MUTEX(static_key_mutex);
> +	int val, ret;
> +	struct ctl_table tmp = {
> +		.data   = &val,
> +		.maxlen = sizeof(val),
> +		.mode   = table->mode,
> +		.extra1 = SYSCTL_ZERO,
> +		.extra2 = SYSCTL_ONE,
> +	};
> +
> +	if (write && !capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	mutex_lock(&static_key_mutex);
> +	val = static_key_enabled(key);
> +	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
> +	if (write && !ret) {
> +		if (val)
> +			static_key_enable(key);
> +		else
> +			static_key_disable(key);
> +	}
> +	mutex_unlock(&static_key_mutex);
> +	return ret;
> +}
> +
> +static struct ctl_table kern_table[] = {
> +	{
> +		.procname	= "sched_child_runs_first",
> +		.data		= &sysctl_sched_child_runs_first,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#ifdef CONFIG_SCHED_DEBUG
> +	{
> +		.procname	= "sched_min_granularity_ns",
> +		.data		= &sysctl_sched_min_granularity,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= sched_proc_update_handler,
> +		.extra1		= &min_sched_granularity_ns,
> +		.extra2		= &max_sched_granularity_ns,
> +	},
> +	{
> +		.procname	= "sched_latency_ns",
> +		.data		= &sysctl_sched_latency,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= sched_proc_update_handler,
> +		.extra1		= &min_sched_granularity_ns,
> +		.extra2		= &max_sched_granularity_ns,
> +	},
> +	{
> +		.procname	= "sched_wakeup_granularity_ns",
> +		.data		= &sysctl_sched_wakeup_granularity,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= sched_proc_update_handler,
> +		.extra1		= &min_wakeup_granularity_ns,
> +		.extra2		= &max_wakeup_granularity_ns,
> +	},
> +#ifdef CONFIG_SMP
> +	{
> +		.procname	= "sched_tunable_scaling",
> +		.data		= &sysctl_sched_tunable_scaling,
> +		.maxlen		= sizeof(enum sched_tunable_scaling),
> +		.mode		= 0644,
> +		.proc_handler	= sched_proc_update_handler,
> +		.extra1		= &min_sched_tunable_scaling,
> +		.extra2		= &max_sched_tunable_scaling,
> +	},
> +	{
> +		.procname	= "sched_migration_cost_ns",
> +		.data		= &sysctl_sched_migration_cost,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "sched_nr_migrate",
> +		.data		= &sysctl_sched_nr_migrate,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#ifdef CONFIG_SCHEDSTATS
> +	{
> +		.procname	= "sched_schedstats",
> +		.data		= NULL,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_schedstats,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif /* CONFIG_SCHEDSTATS */
> +#endif /* CONFIG_SMP */
> +#ifdef CONFIG_NUMA_BALANCING
> +	{
> +		.procname	= "numa_balancing_scan_delay_ms",
> +		.data		= &sysctl_numa_balancing_scan_delay,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "numa_balancing_scan_period_min_ms",
> +		.data		= &sysctl_numa_balancing_scan_period_min,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "numa_balancing_scan_period_max_ms",
> +		.data		= &sysctl_numa_balancing_scan_period_max,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "numa_balancing_scan_size_mb",
> +		.data		= &sysctl_numa_balancing_scan_size,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "numa_balancing",
> +		.data		= NULL, /* filled in by handler */
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_numa_balancing,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif /* CONFIG_NUMA_BALANCING */
> +#endif /* CONFIG_SCHED_DEBUG */
> +	{
> +		.procname	= "sched_rt_period_us",
> +		.data		= &sysctl_sched_rt_period,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= sched_rt_handler,
> +	},
> +	{
> +		.procname	= "sched_rt_runtime_us",
> +		.data		= &sysctl_sched_rt_runtime,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= sched_rt_handler,
> +	},
> +	{
> +		.procname	= "sched_rr_timeslice_ms",
> +		.data		= &sysctl_sched_rr_timeslice,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= sched_rr_handler,
> +	},
> +#ifdef CONFIG_UCLAMP_TASK
> +	{
> +		.procname	= "sched_util_clamp_min",
> +		.data		= &sysctl_sched_uclamp_util_min,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_sched_uclamp_handler,
> +	},
> +	{
> +		.procname	= "sched_util_clamp_max",
> +		.data		= &sysctl_sched_uclamp_util_max,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_sched_uclamp_handler,
> +	},
> +#endif
> +#ifdef CONFIG_SCHED_AUTOGROUP
> +	{
> +		.procname	= "sched_autogroup_enabled",
> +		.data		= &sysctl_sched_autogroup_enabled,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +#ifdef CONFIG_CFS_BANDWIDTH
> +	{
> +		.procname	= "sched_cfs_bandwidth_slice_us",
> +		.data		= &sysctl_sched_cfs_bandwidth_slice,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ONE,
> +	},
> +#endif
> +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
> +	{
> +		.procname	= "sched_energy_aware",
> +		.data		= &sysctl_sched_energy_aware,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= sched_energy_aware_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +#ifdef CONFIG_PROVE_LOCKING
> +	{
> +		.procname	= "prove_locking",
> +		.data		= &prove_locking,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_LOCK_STAT
> +	{
> +		.procname	= "lock_stat",
> +		.data		= &lock_stat,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +	{
> +		.procname	= "panic",
> +		.data		= &panic_timeout,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#ifdef CONFIG_COREDUMP
> +	{
> +		.procname	= "core_uses_pid",
> +		.data		= &core_uses_pid,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "core_pattern",
> +		.data		= core_pattern,
> +		.maxlen		= CORENAME_MAX_SIZE,
> +		.mode		= 0644,
> +		.proc_handler	= proc_dostring_coredump,
> +	},
> +	{
> +		.procname	= "core_pipe_limit",
> +		.data		= &core_pipe_limit,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_PROC_SYSCTL
> +	{
> +		.procname	= "tainted",
> +		.maxlen 	= sizeof(long),
> +		.mode		= 0644,
> +		.proc_handler	= proc_taint,
> +	},
> +	{
> +		.procname	= "sysctl_writes_strict",
> +		.data		= &sysctl_writes_strict,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &neg_one,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +#ifdef CONFIG_LATENCYTOP
> +	{
> +		.procname	= "latencytop",
> +		.data		= &latencytop_enabled,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_latencytop,
> +	},
> +#endif
> +#ifdef CONFIG_BLK_DEV_INITRD
> +	{
> +		.procname	= "real-root-dev",
> +		.data		= &real_root_dev,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +	{
> +		.procname	= "print-fatal-signals",
> +		.data		= &print_fatal_signals,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#ifdef CONFIG_SPARC
> +	{
> +		.procname	= "reboot-cmd",
> +		.data		= reboot_command,
> +		.maxlen		= 256,
> +		.mode		= 0644,
> +		.proc_handler	= proc_dostring,
> +	},
> +	{
> +		.procname	= "stop-a",
> +		.data		= &stop_a_enabled,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "scons-poweroff",
> +		.data		= &scons_pwroff,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_SPARC64
> +	{
> +		.procname	= "tsb-ratio",
> +		.data		= &sysctl_tsb_ratio,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_PARISC
> +	{
> +		.procname	= "soft-power",
> +		.data		= &pwrsw_enabled,
> +		.maxlen		= sizeof (int),
> +	 	.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
> +	{
> +		.procname	= "unaligned-trap",
> +		.data		= &unaligned_enabled,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +	{
> +		.procname	= "ctrl-alt-del",
> +		.data		= &C_A_D,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#ifdef CONFIG_FUNCTION_TRACER
> +	{
> +		.procname	= "ftrace_enabled",
> +		.data		= &ftrace_enabled,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= ftrace_enable_sysctl,
> +	},
> +#endif
> +#ifdef CONFIG_STACK_TRACER
> +	{
> +		.procname	= "stack_tracer_enabled",
> +		.data		= &stack_tracer_enabled,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= stack_trace_sysctl,
> +	},
> +#endif
> +#ifdef CONFIG_TRACING
> +	{
> +		.procname	= "ftrace_dump_on_oops",
> +		.data		= &ftrace_dump_on_oops,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "traceoff_on_warning",
> +		.data		= &__disable_trace_on_warning,
> +		.maxlen		= sizeof(__disable_trace_on_warning),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "tracepoint_printk",
> +		.data		= &tracepoint_printk,
> +		.maxlen		= sizeof(tracepoint_printk),
> +		.mode		= 0644,
> +		.proc_handler	= tracepoint_printk_sysctl,
> +	},
> +#endif
> +#ifdef CONFIG_KEXEC_CORE
> +	{
> +		.procname	= "kexec_load_disabled",
> +		.data		= &kexec_load_disabled,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		/* only handle a transition from default "0" to "1" */
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ONE,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +#ifdef CONFIG_MODULES
> +	{
> +		.procname	= "modprobe",
> +		.data		= &modprobe_path,
> +		.maxlen		= KMOD_PATH_LEN,
> +		.mode		= 0644,
> +		.proc_handler	= proc_dostring,
> +	},
> +	{
> +		.procname	= "modules_disabled",
> +		.data		= &modules_disabled,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		/* only handle a transition from default "0" to "1" */
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ONE,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +#ifdef CONFIG_UEVENT_HELPER
> +	{
> +		.procname	= "hotplug",
> +		.data		= &uevent_helper,
> +		.maxlen		= UEVENT_HELPER_PATH_LEN,
> +		.mode		= 0644,
> +		.proc_handler	= proc_dostring,
> +	},
> +#endif
> +#ifdef CONFIG_CHR_DEV_SG
> +	{
> +		.procname	= "sg-big-buff",
> +		.data		= &sg_big_buff,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0444,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_BSD_PROCESS_ACCT
> +	{
> +		.procname	= "acct",
> +		.data		= &acct_parm,
> +		.maxlen		= 3*sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_MAGIC_SYSRQ
> +	{
> +		.procname	= "sysrq",
> +		.data		= NULL,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0644,
> +		.proc_handler	= sysrq_sysctl_handler,
> +	},
> +#endif
> +#ifdef CONFIG_PROC_SYSCTL
> +	{
> +		.procname	= "cad_pid",
> +		.data		= NULL,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0600,
> +		.proc_handler	= proc_do_cad_pid,
> +	},
> +#endif
> +	{
> +		.procname	= "threads-max",
> +		.data		= NULL,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_max_threads,
> +	},
> +	{
> +		.procname	= "random",
> +		.mode		= 0555,
> +		.child		= random_table,
> +	},
> +	{
> +		.procname	= "usermodehelper",
> +		.mode		= 0555,
> +		.child		= usermodehelper_table,
> +	},
> +#ifdef CONFIG_FW_LOADER_USER_HELPER
> +	{
> +		.procname	= "firmware_config",
> +		.mode		= 0555,
> +		.child		= firmware_config_table,
> +	},
> +#endif
> +	{
> +		.procname	= "overflowuid",
> +		.data		= &overflowuid,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &minolduid,
> +		.extra2		= &maxolduid,
> +	},
> +	{
> +		.procname	= "overflowgid",
> +		.data		= &overflowgid,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &minolduid,
> +		.extra2		= &maxolduid,
> +	},
> +#ifdef CONFIG_S390
> +	{
> +		.procname	= "userprocess_debug",
> +		.data		= &show_unhandled_signals,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +	{
> +		.procname	= "pid_max",
> +		.data		= &pid_max,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &pid_max_min,
> +		.extra2		= &pid_max_max,
> +	},
> +	{
> +		.procname	= "panic_on_oops",
> +		.data		= &panic_on_oops,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "panic_print",
> +		.data		= &panic_print,
> +		.maxlen		= sizeof(unsigned long),
> +		.mode		= 0644,
> +		.proc_handler	= proc_doulongvec_minmax,
> +	},
> +#if defined CONFIG_PRINTK
> +	{
> +		.procname	= "printk",
> +		.data		= &console_loglevel,
> +		.maxlen		= 4*sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "printk_ratelimit",
> +		.data		= &printk_ratelimit_state.interval,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_jiffies,
> +	},
> +	{
> +		.procname	= "printk_ratelimit_burst",
> +		.data		= &printk_ratelimit_state.burst,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "printk_delay",
> +		.data		= &printk_delay_msec,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &ten_thousand,
> +	},
> +	{
> +		.procname	= "printk_devkmsg",
> +		.data		= devkmsg_log_str,
> +		.maxlen		= DEVKMSG_STR_MAX_SIZE,
> +		.mode		= 0644,
> +		.proc_handler	= devkmsg_sysctl_set_loglvl,
> +	},
> +	{
> +		.procname	= "dmesg_restrict",
> +		.data		= &dmesg_restrict,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax_sysadmin,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "kptr_restrict",
> +		.data		= &kptr_restrict,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax_sysadmin,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &two,
> +	},
> +#endif
> +	{
> +		.procname	= "ngroups_max",
> +		.data		= &ngroups_max,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0444,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "cap_last_cap",
> +		.data		= (void *)&cap_last_cap,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0444,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#if defined(CONFIG_LOCKUP_DETECTOR)
> +	{
> +		.procname       = "watchdog",
> +		.data		= &watchdog_user_enabled,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler   = proc_watchdog,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "watchdog_thresh",
> +		.data		= &watchdog_thresh,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_watchdog_thresh,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &sixty,
> +	},
> +	{
> +		.procname       = "nmi_watchdog",
> +		.data		= &nmi_watchdog_user_enabled,
> +		.maxlen		= sizeof(int),
> +		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
> +		.proc_handler   = proc_nmi_watchdog,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "watchdog_cpumask",
> +		.data		= &watchdog_cpumask_bits,
> +		.maxlen		= NR_CPUS,
> +		.mode		= 0644,
> +		.proc_handler	= proc_watchdog_cpumask,
> +	},
> +#ifdef CONFIG_SOFTLOCKUP_DETECTOR
> +	{
> +		.procname       = "soft_watchdog",
> +		.data		= &soft_watchdog_user_enabled,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler   = proc_soft_watchdog,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "softlockup_panic",
> +		.data		= &softlockup_panic,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#ifdef CONFIG_SMP
> +	{
> +		.procname	= "softlockup_all_cpu_backtrace",
> +		.data		= &sysctl_softlockup_all_cpu_backtrace,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif /* CONFIG_SMP */
> +#endif
> +#ifdef CONFIG_HARDLOCKUP_DETECTOR
> +	{
> +		.procname	= "hardlockup_panic",
> +		.data		= &hardlockup_panic,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#ifdef CONFIG_SMP
> +	{
> +		.procname	= "hardlockup_all_cpu_backtrace",
> +		.data		= &sysctl_hardlockup_all_cpu_backtrace,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif /* CONFIG_SMP */
> +#endif
> +#endif
> +
> +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
> +	{
> +		.procname       = "unknown_nmi_panic",
> +		.data           = &unknown_nmi_panic,
> +		.maxlen         = sizeof (int),
> +		.mode           = 0644,
> +		.proc_handler   = proc_dointvec,
> +	},
> +#endif
> +#if defined(CONFIG_X86)
> +	{
> +		.procname	= "panic_on_unrecovered_nmi",
> +		.data		= &panic_on_unrecovered_nmi,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "panic_on_io_nmi",
> +		.data		= &panic_on_io_nmi,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#ifdef CONFIG_DEBUG_STACKOVERFLOW
> +	{
> +		.procname	= "panic_on_stackoverflow",
> +		.data		= &sysctl_panic_on_stackoverflow,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +	{
> +		.procname	= "bootloader_type",
> +		.data		= &bootloader_type,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0444,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "bootloader_version",
> +		.data		= &bootloader_version,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0444,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "io_delay_type",
> +		.data		= &io_delay_type,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#if defined(CONFIG_MMU)
> +	{
> +		.procname	= "randomize_va_space",
> +		.data		= &randomize_va_space,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#if defined(CONFIG_S390) && defined(CONFIG_SMP)
> +	{
> +		.procname	= "spin_retry",
> +		.data		= &spin_retry,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
> +	{
> +		.procname	= "acpi_video_flags",
> +		.data		= &acpi_realmode_flags,
> +		.maxlen		= sizeof (unsigned long),
> +		.mode		= 0644,
> +		.proc_handler	= proc_doulongvec_minmax,
> +	},
> +#endif
> +#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
> +	{
> +		.procname	= "ignore-unaligned-usertrap",
> +		.data		= &no_unaligned_warning,
> +		.maxlen		= sizeof (int),
> +	 	.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_IA64
> +	{
> +		.procname	= "unaligned-dump-stack",
> +		.data		= &unaligned_dump_stack,
> +		.maxlen		= sizeof (int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_DETECT_HUNG_TASK
> +	{
> +		.procname	= "hung_task_panic",
> +		.data		= &sysctl_hung_task_panic,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "hung_task_check_count",
> +		.data		= &sysctl_hung_task_check_count,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +	{
> +		.procname	= "hung_task_timeout_secs",
> +		.data		= &sysctl_hung_task_timeout_secs,
> +		.maxlen		= sizeof(unsigned long),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dohung_task_timeout_secs,
> +		.extra2		= &hung_task_timeout_max,
> +	},
> +	{
> +		.procname	= "hung_task_check_interval_secs",
> +		.data		= &sysctl_hung_task_check_interval_secs,
> +		.maxlen		= sizeof(unsigned long),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dohung_task_timeout_secs,
> +		.extra2		= &hung_task_timeout_max,
> +	},
> +	{
> +		.procname	= "hung_task_warnings",
> +		.data		= &sysctl_hung_task_warnings,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &neg_one,
> +	},
> +#endif
> +#ifdef CONFIG_RT_MUTEXES
> +	{
> +		.procname	= "max_lock_depth",
> +		.data		= &max_lock_depth,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +	{
> +		.procname	= "poweroff_cmd",
> +		.data		= &poweroff_cmd,
> +		.maxlen		= POWEROFF_CMD_PATH_LEN,
> +		.mode		= 0644,
> +		.proc_handler	= proc_dostring,
> +	},
> +#ifdef CONFIG_KEYS
> +	{
> +		.procname	= "keys",
> +		.mode		= 0555,
> +		.child		= key_sysctls,
> +	},
> +#endif
> +#ifdef CONFIG_PERF_EVENTS
> +	/*
> +	 * User-space scripts rely on the existence of this file
> +	 * as a feature check for perf_events being enabled.
> +	 *
> +	 * So it's an ABI, do not remove!
> +	 */
> +	{
> +		.procname	= "perf_event_paranoid",
> +		.data		= &sysctl_perf_event_paranoid,
> +		.maxlen		= sizeof(sysctl_perf_event_paranoid),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "perf_event_mlock_kb",
> +		.data		= &sysctl_perf_event_mlock,
> +		.maxlen		= sizeof(sysctl_perf_event_mlock),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "perf_event_max_sample_rate",
> +		.data		= &sysctl_perf_event_sample_rate,
> +		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
> +		.mode		= 0644,
> +		.proc_handler	= perf_proc_update_handler,
> +		.extra1		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "perf_cpu_time_max_percent",
> +		.data		= &sysctl_perf_cpu_time_max_percent,
> +		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
> +		.mode		= 0644,
> +		.proc_handler	= perf_cpu_time_max_percent_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &one_hundred,
> +	},
> +	{
> +		.procname	= "perf_event_max_stack",
> +		.data		= &sysctl_perf_event_max_stack,
> +		.maxlen		= sizeof(sysctl_perf_event_max_stack),
> +		.mode		= 0644,
> +		.proc_handler	= perf_event_max_stack_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &six_hundred_forty_kb,
> +	},
> +	{
> +		.procname	= "perf_event_max_contexts_per_stack",
> +		.data		= &sysctl_perf_event_max_contexts_per_stack,
> +		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
> +		.mode		= 0644,
> +		.proc_handler	= perf_event_max_stack_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &one_thousand,
> +	},
> +#endif
> +	{
> +		.procname	= "panic_on_warn",
> +		.data		= &panic_on_warn,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
> +	{
> +		.procname	= "timer_migration",
> +		.data		= &sysctl_timer_migration,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= timer_migration_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +#ifdef CONFIG_BPF_SYSCALL
> +	{
> +		.procname	= "unprivileged_bpf_disabled",
> +		.data		= &sysctl_unprivileged_bpf_disabled,
> +		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
> +		.mode		= 0644,
> +		/* only handle a transition from default "0" to "1" */
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ONE,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "bpf_stats_enabled",
> +		.data		= &bpf_stats_enabled_key.key,
> +		.maxlen		= sizeof(bpf_stats_enabled_key),
> +		.mode		= 0644,
> +		.proc_handler	= proc_do_static_key,
> +	},
> +#endif
> +#if defined(CONFIG_TREE_RCU)
> +	{
> +		.procname	= "panic_on_rcu_stall",
> +		.data		= &sysctl_panic_on_rcu_stall,
> +		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
> +	{
> +		.procname	= "stack_erasing",
> +		.data		= NULL,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0600,
> +		.proc_handler	= stack_erasing_sysctl,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +	{ }
> +};
>  
> -int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
> -			     void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> +static struct ctl_table vm_table[] = {
> +	{
> +		.procname	= "overcommit_memory",
> +		.data		= &sysctl_overcommit_memory,
> +		.maxlen		= sizeof(sysctl_overcommit_memory),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &two,
> +	},
> +	{
> +		.procname	= "panic_on_oom",
> +		.data		= &sysctl_panic_on_oom,
> +		.maxlen		= sizeof(sysctl_panic_on_oom),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &two,
> +	},
> +	{
> +		.procname	= "oom_kill_allocating_task",
> +		.data		= &sysctl_oom_kill_allocating_task,
> +		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "oom_dump_tasks",
> +		.data		= &sysctl_oom_dump_tasks,
> +		.maxlen		= sizeof(sysctl_oom_dump_tasks),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +	{
> +		.procname	= "overcommit_ratio",
> +		.data		= &sysctl_overcommit_ratio,
> +		.maxlen		= sizeof(sysctl_overcommit_ratio),
> +		.mode		= 0644,
> +		.proc_handler	= overcommit_ratio_handler,
> +	},
> +	{
> +		.procname	= "overcommit_kbytes",
> +		.data		= &sysctl_overcommit_kbytes,
> +		.maxlen		= sizeof(sysctl_overcommit_kbytes),
> +		.mode		= 0644,
> +		.proc_handler	= overcommit_kbytes_handler,
> +	},
> +	{
> +		.procname	= "page-cluster", 
> +		.data		= &page_cluster,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +	{
> +		.procname	= "dirty_background_ratio",
> +		.data		= &dirty_background_ratio,
> +		.maxlen		= sizeof(dirty_background_ratio),
> +		.mode		= 0644,
> +		.proc_handler	= dirty_background_ratio_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &one_hundred,
> +	},
> +	{
> +		.procname	= "dirty_background_bytes",
> +		.data		= &dirty_background_bytes,
> +		.maxlen		= sizeof(dirty_background_bytes),
> +		.mode		= 0644,
> +		.proc_handler	= dirty_background_bytes_handler,
> +		.extra1		= &one_ul,
> +	},
> +	{
> +		.procname	= "dirty_ratio",
> +		.data		= &vm_dirty_ratio,
> +		.maxlen		= sizeof(vm_dirty_ratio),
> +		.mode		= 0644,
> +		.proc_handler	= dirty_ratio_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &one_hundred,
> +	},
> +	{
> +		.procname	= "dirty_bytes",
> +		.data		= &vm_dirty_bytes,
> +		.maxlen		= sizeof(vm_dirty_bytes),
> +		.mode		= 0644,
> +		.proc_handler	= dirty_bytes_handler,
> +		.extra1		= &dirty_bytes_min,
> +	},
> +	{
> +		.procname	= "dirty_writeback_centisecs",
> +		.data		= &dirty_writeback_interval,
> +		.maxlen		= sizeof(dirty_writeback_interval),
> +		.mode		= 0644,
> +		.proc_handler	= dirty_writeback_centisecs_handler,
> +	},
> +	{
> +		.procname	= "dirty_expire_centisecs",
> +		.data		= &dirty_expire_interval,
> +		.maxlen		= sizeof(dirty_expire_interval),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +	{
> +		.procname	= "dirtytime_expire_seconds",
> +		.data		= &dirtytime_expire_interval,
> +		.maxlen		= sizeof(dirtytime_expire_interval),
> +		.mode		= 0644,
> +		.proc_handler	= dirtytime_interval_handler,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +	{
> +		.procname	= "swappiness",
> +		.data		= &vm_swappiness,
> +		.maxlen		= sizeof(vm_swappiness),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &one_hundred,
> +	},
> +#ifdef CONFIG_HUGETLB_PAGE
> +	{
> +		.procname	= "nr_hugepages",
> +		.data		= NULL,
> +		.maxlen		= sizeof(unsigned long),
> +		.mode		= 0644,
> +		.proc_handler	= hugetlb_sysctl_handler,
> +	},
> +#ifdef CONFIG_NUMA
> +	{
> +		.procname       = "nr_hugepages_mempolicy",
> +		.data           = NULL,
> +		.maxlen         = sizeof(unsigned long),
> +		.mode           = 0644,
> +		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
> +	},
> +	{
> +		.procname		= "numa_stat",
> +		.data			= &sysctl_vm_numa_stat,
> +		.maxlen			= sizeof(int),
> +		.mode			= 0644,
> +		.proc_handler	= sysctl_vm_numa_stat_handler,
> +		.extra1			= SYSCTL_ZERO,
> +		.extra2			= SYSCTL_ONE,
> +	},
> +#endif
> +	 {
> +		.procname	= "hugetlb_shm_group",
> +		.data		= &sysctl_hugetlb_shm_group,
> +		.maxlen		= sizeof(gid_t),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	 },
> +	{
> +		.procname	= "nr_overcommit_hugepages",
> +		.data		= NULL,
> +		.maxlen		= sizeof(unsigned long),
> +		.mode		= 0644,
> +		.proc_handler	= hugetlb_overcommit_handler,
> +	},
> +#endif
> +	{
> +		.procname	= "lowmem_reserve_ratio",
> +		.data		= &sysctl_lowmem_reserve_ratio,
> +		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
> +		.mode		= 0644,
> +		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
> +	},
> +	{
> +		.procname	= "drop_caches",
> +		.data		= &sysctl_drop_caches,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0200,
> +		.proc_handler	= drop_caches_sysctl_handler,
> +		.extra1		= SYSCTL_ONE,
> +		.extra2		= &four,
> +	},
> +#ifdef CONFIG_COMPACTION
> +	{
> +		.procname	= "compact_memory",
> +		.data		= &sysctl_compact_memory,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0200,
> +		.proc_handler	= sysctl_compaction_handler,
> +	},
> +	{
> +		.procname	= "extfrag_threshold",
> +		.data		= &sysctl_extfrag_threshold,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &min_extfrag_threshold,
> +		.extra2		= &max_extfrag_threshold,
> +	},
> +	{
> +		.procname	= "compact_unevictable_allowed",
> +		.data		= &sysctl_compact_unevictable_allowed,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +
> +#endif /* CONFIG_COMPACTION */
> +	{
> +		.procname	= "min_free_kbytes",
> +		.data		= &min_free_kbytes,
> +		.maxlen		= sizeof(min_free_kbytes),
> +		.mode		= 0644,
> +		.proc_handler	= min_free_kbytes_sysctl_handler,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +	{
> +		.procname	= "watermark_boost_factor",
> +		.data		= &watermark_boost_factor,
> +		.maxlen		= sizeof(watermark_boost_factor),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +	{
> +		.procname	= "watermark_scale_factor",
> +		.data		= &watermark_scale_factor,
> +		.maxlen		= sizeof(watermark_scale_factor),
> +		.mode		= 0644,
> +		.proc_handler	= watermark_scale_factor_sysctl_handler,
> +		.extra1		= SYSCTL_ONE,
> +		.extra2		= &one_thousand,
> +	},
> +	{
> +		.procname	= "percpu_pagelist_fraction",
> +		.data		= &percpu_pagelist_fraction,
> +		.maxlen		= sizeof(percpu_pagelist_fraction),
> +		.mode		= 0644,
> +		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +#ifdef CONFIG_MMU
> +	{
> +		.procname	= "max_map_count",
> +		.data		= &sysctl_max_map_count,
> +		.maxlen		= sizeof(sysctl_max_map_count),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +#else
> +	{
> +		.procname	= "nr_trim_pages",
> +		.data		= &sysctl_nr_trim_pages,
> +		.maxlen		= sizeof(sysctl_nr_trim_pages),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +#endif
> +	{
> +		.procname	= "laptop_mode",
> +		.data		= &laptop_mode,
> +		.maxlen		= sizeof(laptop_mode),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_jiffies,
> +	},
> +	{
> +		.procname	= "block_dump",
> +		.data		= &block_dump,
> +		.maxlen		= sizeof(block_dump),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +	{
> +		.procname	= "vfs_cache_pressure",
> +		.data		= &sysctl_vfs_cache_pressure,
> +		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
> +    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
> +	{
> +		.procname	= "legacy_va_layout",
> +		.data		= &sysctl_legacy_va_layout,
> +		.maxlen		= sizeof(sysctl_legacy_va_layout),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +#endif
> +#ifdef CONFIG_NUMA
> +	{
> +		.procname	= "zone_reclaim_mode",
> +		.data		= &node_reclaim_mode,
> +		.maxlen		= sizeof(node_reclaim_mode),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +	{
> +		.procname	= "min_unmapped_ratio",
> +		.data		= &sysctl_min_unmapped_ratio,
> +		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &one_hundred,
> +	},
> +	{
> +		.procname	= "min_slab_ratio",
> +		.data		= &sysctl_min_slab_ratio,
> +		.maxlen		= sizeof(sysctl_min_slab_ratio),
> +		.mode		= 0644,
> +		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &one_hundred,
> +	},
> +#endif
> +#ifdef CONFIG_SMP
> +	{
> +		.procname	= "stat_interval",
> +		.data		= &sysctl_stat_interval,
> +		.maxlen		= sizeof(sysctl_stat_interval),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_jiffies,
> +	},
> +	{
> +		.procname	= "stat_refresh",
> +		.data		= NULL,
> +		.maxlen		= 0,
> +		.mode		= 0600,
> +		.proc_handler	= vmstat_refresh,
> +	},
> +#endif
> +#ifdef CONFIG_MMU
> +	{
> +		.procname	= "mmap_min_addr",
> +		.data		= &dac_mmap_min_addr,
> +		.maxlen		= sizeof(unsigned long),
> +		.mode		= 0644,
> +		.proc_handler	= mmap_min_addr_handler,
> +	},
> +#endif
> +#ifdef CONFIG_NUMA
> +	{
> +		.procname	= "numa_zonelist_order",
> +		.data		= &numa_zonelist_order,
> +		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
> +		.mode		= 0644,
> +		.proc_handler	= numa_zonelist_order_handler,
> +	},
> +#endif
> +#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
> +   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
> +	{
> +		.procname	= "vdso_enabled",
> +#ifdef CONFIG_X86_32
> +		.data		= &vdso32_enabled,
> +		.maxlen		= sizeof(vdso32_enabled),
> +#else
> +		.data		= &vdso_enabled,
> +		.maxlen		= sizeof(vdso_enabled),
> +#endif
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +		.extra1		= SYSCTL_ZERO,
> +	},
> +#endif
> +#ifdef CONFIG_HIGHMEM
> +	{
> +		.procname	= "highmem_is_dirtyable",
> +		.data		= &vm_highmem_is_dirtyable,
> +		.maxlen		= sizeof(vm_highmem_is_dirtyable),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +#ifdef CONFIG_MEMORY_FAILURE
> +	{
> +		.procname	= "memory_failure_early_kill",
> +		.data		= &sysctl_memory_failure_early_kill,
> +		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "memory_failure_recovery",
> +		.data		= &sysctl_memory_failure_recovery,
> +		.maxlen		= sizeof(sysctl_memory_failure_recovery),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +	{
> +		.procname	= "user_reserve_kbytes",
> +		.data		= &sysctl_user_reserve_kbytes,
> +		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
> +		.mode		= 0644,
> +		.proc_handler	= proc_doulongvec_minmax,
> +	},
> +	{
> +		.procname	= "admin_reserve_kbytes",
> +		.data		= &sysctl_admin_reserve_kbytes,
> +		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
> +		.mode		= 0644,
> +		.proc_handler	= proc_doulongvec_minmax,
> +	},
> +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
> +	{
> +		.procname	= "mmap_rnd_bits",
> +		.data		= &mmap_rnd_bits,
> +		.maxlen		= sizeof(mmap_rnd_bits),
> +		.mode		= 0600,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= (void *)&mmap_rnd_bits_min,
> +		.extra2		= (void *)&mmap_rnd_bits_max,
> +	},
> +#endif
> +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
> +	{
> +		.procname	= "mmap_rnd_compat_bits",
> +		.data		= &mmap_rnd_compat_bits,
> +		.maxlen		= sizeof(mmap_rnd_compat_bits),
> +		.mode		= 0600,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= (void *)&mmap_rnd_compat_bits_min,
> +		.extra2		= (void *)&mmap_rnd_compat_bits_max,
> +	},
> +#endif
> +#ifdef CONFIG_USERFAULTFD
> +	{
> +		.procname	= "unprivileged_userfaultfd",
> +		.data		= &sysctl_unprivileged_userfaultfd,
> +		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +	{ }
> +};
>  
> -int proc_doulongvec_minmax(struct ctl_table *table, int write,
> -		    void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> +static struct ctl_table fs_table[] = {
> +	{
> +		.procname	= "inode-nr",
> +		.data		= &inodes_stat,
> +		.maxlen		= 2*sizeof(long),
> +		.mode		= 0444,
> +		.proc_handler	= proc_nr_inodes,
> +	},
> +	{
> +		.procname	= "inode-state",
> +		.data		= &inodes_stat,
> +		.maxlen		= 7*sizeof(long),
> +		.mode		= 0444,
> +		.proc_handler	= proc_nr_inodes,
> +	},
> +	{
> +		.procname	= "file-nr",
> +		.data		= &files_stat,
> +		.maxlen		= sizeof(files_stat),
> +		.mode		= 0444,
> +		.proc_handler	= proc_nr_files,
> +	},
> +	{
> +		.procname	= "file-max",
> +		.data		= &files_stat.max_files,
> +		.maxlen		= sizeof(files_stat.max_files),
> +		.mode		= 0644,
> +		.proc_handler	= proc_doulongvec_minmax,
> +		.extra1		= &zero_ul,
> +		.extra2		= &long_max,
> +	},
> +	{
> +		.procname	= "nr_open",
> +		.data		= &sysctl_nr_open,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &sysctl_nr_open_min,
> +		.extra2		= &sysctl_nr_open_max,
> +	},
> +	{
> +		.procname	= "dentry-state",
> +		.data		= &dentry_stat,
> +		.maxlen		= 6*sizeof(long),
> +		.mode		= 0444,
> +		.proc_handler	= proc_nr_dentry,
> +	},
> +	{
> +		.procname	= "overflowuid",
> +		.data		= &fs_overflowuid,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &minolduid,
> +		.extra2		= &maxolduid,
> +	},
> +	{
> +		.procname	= "overflowgid",
> +		.data		= &fs_overflowgid,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &minolduid,
> +		.extra2		= &maxolduid,
> +	},
> +#ifdef CONFIG_FILE_LOCKING
> +	{
> +		.procname	= "leases-enable",
> +		.data		= &leases_enable,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_DNOTIFY
> +	{
> +		.procname	= "dir-notify-enable",
> +		.data		= &dir_notify_enable,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_MMU
> +#ifdef CONFIG_FILE_LOCKING
> +	{
> +		.procname	= "lease-break-time",
> +		.data		= &lease_break_time,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec,
> +	},
> +#endif
> +#ifdef CONFIG_AIO
> +	{
> +		.procname	= "aio-nr",
> +		.data		= &aio_nr,
> +		.maxlen		= sizeof(aio_nr),
> +		.mode		= 0444,
> +		.proc_handler	= proc_doulongvec_minmax,
> +	},
> +	{
> +		.procname	= "aio-max-nr",
> +		.data		= &aio_max_nr,
> +		.maxlen		= sizeof(aio_max_nr),
> +		.mode		= 0644,
> +		.proc_handler	= proc_doulongvec_minmax,
> +	},
> +#endif /* CONFIG_AIO */
> +#ifdef CONFIG_INOTIFY_USER
> +	{
> +		.procname	= "inotify",
> +		.mode		= 0555,
> +		.child		= inotify_table,
> +	},
> +#endif	
> +#ifdef CONFIG_EPOLL
> +	{
> +		.procname	= "epoll",
> +		.mode		= 0555,
> +		.child		= epoll_table,
> +	},
> +#endif
> +#endif
> +	{
> +		.procname	= "protected_symlinks",
> +		.data		= &sysctl_protected_symlinks,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0600,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "protected_hardlinks",
> +		.data		= &sysctl_protected_hardlinks,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0600,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +	{
> +		.procname	= "protected_fifos",
> +		.data		= &sysctl_protected_fifos,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0600,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &two,
> +	},
> +	{
> +		.procname	= "protected_regular",
> +		.data		= &sysctl_protected_regular,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0600,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &two,
> +	},
> +	{
> +		.procname	= "suid_dumpable",
> +		.data		= &suid_dumpable,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax_coredump,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= &two,
> +	},
> +#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
> +	{
> +		.procname	= "binfmt_misc",
> +		.mode		= 0555,
> +		.child		= sysctl_mount_point,
> +	},
> +#endif
> +	{
> +		.procname	= "pipe-max-size",
> +		.data		= &pipe_max_size,
> +		.maxlen		= sizeof(pipe_max_size),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dopipe_max_size,
> +	},
> +	{
> +		.procname	= "pipe-user-pages-hard",
> +		.data		= &pipe_user_pages_hard,
> +		.maxlen		= sizeof(pipe_user_pages_hard),
> +		.mode		= 0644,
> +		.proc_handler	= proc_doulongvec_minmax,
> +	},
> +	{
> +		.procname	= "pipe-user-pages-soft",
> +		.data		= &pipe_user_pages_soft,
> +		.maxlen		= sizeof(pipe_user_pages_soft),
> +		.mode		= 0644,
> +		.proc_handler	= proc_doulongvec_minmax,
> +	},
> +	{
> +		.procname	= "mount-max",
> +		.data		= &sysctl_mount_max,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= SYSCTL_ONE,
> +	},
> +	{ }
> +};
>  
> -int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
> -				      void __user *buffer,
> -				      size_t *lenp, loff_t *ppos)
> -{
> -    return -ENOSYS;
> -}
> +static struct ctl_table debug_table[] = {
> +#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
> +	{
> +		.procname	= "exception-trace",
> +		.data		= &show_unhandled_signals,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec
> +	},
> +#endif
> +#if defined(CONFIG_OPTPROBES)
> +	{
> +		.procname	= "kprobes-optimization",
> +		.data		= &sysctl_kprobes_optimization,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_kprobes_optimization_handler,
> +		.extra1		= SYSCTL_ZERO,
> +		.extra2		= SYSCTL_ONE,
> +	},
> +#endif
> +	{ }
> +};
>  
> -int proc_do_large_bitmap(struct ctl_table *table, int write,
> -			 void __user *buffer, size_t *lenp, loff_t *ppos)
> -{
> -	return -ENOSYS;
> -}
> +static struct ctl_table dev_table[] = {
> +	{ }
> +};
>  
> -#endif /* CONFIG_PROC_SYSCTL */
> +static struct ctl_table sysctl_base_table[] = {
> +	{
> +		.procname	= "kernel",
> +		.mode		= 0555,
> +		.child		= kern_table,
> +	},
> +	{
> +		.procname	= "vm",
> +		.mode		= 0555,
> +		.child		= vm_table,
> +	},
> +	{
> +		.procname	= "fs",
> +		.mode		= 0555,
> +		.child		= fs_table,
> +	},
> +	{
> +		.procname	= "debug",
> +		.mode		= 0555,
> +		.child		= debug_table,
> +	},
> +	{
> +		.procname	= "dev",
> +		.mode		= 0555,
> +		.child		= dev_table,
> +	},
> +	{ }
> +};
>  
> -#if defined(CONFIG_SYSCTL)
> -int proc_do_static_key(struct ctl_table *table, int write,
> -		       void __user *buffer, size_t *lenp,
> -		       loff_t *ppos)
> +int __init sysctl_init(void)
>  {
> -	struct static_key *key = (struct static_key *)table->data;
> -	static DEFINE_MUTEX(static_key_mutex);
> -	int val, ret;
> -	struct ctl_table tmp = {
> -		.data   = &val,
> -		.maxlen = sizeof(val),
> -		.mode   = table->mode,
> -		.extra1 = SYSCTL_ZERO,
> -		.extra2 = SYSCTL_ONE,
> -	};
> -
> -	if (write && !capable(CAP_SYS_ADMIN))
> -		return -EPERM;
> +	struct ctl_table_header *hdr;
>  
> -	mutex_lock(&static_key_mutex);
> -	val = static_key_enabled(key);
> -	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
> -	if (write && !ret) {
> -		if (val)
> -			static_key_enable(key);
> -		else
> -			static_key_disable(key);
> -	}
> -	mutex_unlock(&static_key_mutex);
> -	return ret;
> +	hdr = register_sysctl_table(sysctl_base_table);
> +	kmemleak_not_leak(hdr);
> +	return 0;
>  }
> -#endif
> +#endif /* CONFIG_SYSCTL */
>  /*
>   * No sense putting this after each symbol definition, twice,
>   * exception granted :-)
> -- 
> 2.26.1
> 

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler
       [not found] ` <20200424064338.538313-6-hch@lst.de>
  2020-04-24 19:06   ` [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler Andrey Ignatov
@ 2020-05-04 19:01   ` Kees Cook
  2020-05-05  5:57     ` Christoph Hellwig
  2020-06-04 20:22   ` WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 __alloc_pages_nodemask (Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler) Vegard Nossum
  2 siblings, 1 reply; 25+ messages in thread
From: Kees Cook @ 2020-05-04 19:01 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Iurii Zaikin, Alexei Starovoitov, Daniel Borkmann, linux-kernel,
	linux-mm, linux-fsdevel, netdev, bpf, Andrey Ignatov

On Fri, Apr 24, 2020 at 08:43:38AM +0200, Christoph Hellwig wrote:
> Instead of having all the sysctl handlers deal with user pointers, which
> is rather hairy in terms of the BPF interaction, copy the input to and
> from  userspace in common code.  This also means that the strings are
> always NUL-terminated by the common code, making the API a little bit
> safer.
> 
> As most handler just pass through the data to one of the common handlers
> a lot of the changes are mechnical.

This is a lovely cleanup; thank you!

Tiny notes below...

> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index b6f5d459b087d..df2143e05c571 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -539,13 +539,13 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
>  	return err;
>  }
>  
> -static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
> +static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
>  		size_t count, loff_t *ppos, int write)
>  {
>  	struct inode *inode = file_inode(filp);
>  	struct ctl_table_header *head = grab_header(inode);
>  	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
> -	void *new_buf = NULL;
> +	void *kbuf;
>  	ssize_t error;
>  
>  	if (IS_ERR(head))
> @@ -564,27 +564,38 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
>  	if (!table->proc_handler)
>  		goto out;
>  
> -	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
> -					   ppos, &new_buf);
> +	if (write) {
> +		kbuf = memdup_user_nul(ubuf, count);
> +		if (IS_ERR(kbuf)) {
> +			error = PTR_ERR(kbuf);
> +			goto out;
> +		}
> +	} else {
> +		error = -ENOMEM;
> +		kbuf = kzalloc(count, GFP_KERNEL);
> +		if (!kbuf)
> +			goto out;
> +	}
> +
> +	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count,
> +					   ppos);
>  	if (error)
> -		goto out;
> +		goto out_free_buf;
>  
>  	/* careful: calling conventions are nasty here */

Is this comment still valid after doing these cleanups?

> -	if (new_buf) {
> -		mm_segment_t old_fs;
> -
> -		old_fs = get_fs();
> -		set_fs(KERNEL_DS);
> -		error = table->proc_handler(table, write, (void __user *)new_buf,
> -					    &count, ppos);
> -		set_fs(old_fs);
> -		kfree(new_buf);
> -	} else {
> -		error = table->proc_handler(table, write, buf, &count, ppos);
> +	error = table->proc_handler(table, write, kbuf, &count, ppos);
> +	if (error)
> +		goto out_free_buf;
> +
> +	if (!write) {
> +		error = -EFAULT;
> +		if (copy_to_user(ubuf, kbuf, count))
> +			goto out_free_buf;
>  	}

Something I noticed here that existed in the original code, but might be
nice to improve while we're here is to make sure that the "count"
returned from proc_handler() cannot grow _larger_, since then we might
expose heap memory beyond the end of the allocation.

I'll send a patch for this...

>  
> -	if (!error)
> -		error = count;
> +	error = count;
> +out_free_buf:
> +	kfree(kbuf);
>  out:
>  	sysctl_head_finish(head);
>  
> [...]
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 511543d238794..e26fe7e8e19d7 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> [...]
> @@ -682,7 +661,6 @@ static int do_proc_douintvec_w(unsigned int *tbl_data,
>  		left -= proc_skip_spaces(&p);
>  
>  out_free:
> -	kfree(kbuf);
>  	if (err)
>  		return -EINVAL;

This label name isn't accurate any more... *shrug*

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler
  2020-05-04 19:01   ` Kees Cook
@ 2020-05-05  5:57     ` Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-05-05  5:57 UTC (permalink / raw)
  To: Kees Cook
  Cc: Christoph Hellwig, Iurii Zaikin, Alexei Starovoitov,
	Daniel Borkmann, linux-kernel, linux-mm, linux-fsdevel, netdev,
	bpf, Andrey Ignatov

On Mon, May 04, 2020 at 12:01:11PM -0700, Kees Cook wrote:
> >  	if (error)
> > -		goto out;
> > +		goto out_free_buf;
> >  
> >  	/* careful: calling conventions are nasty here */
> 
> Is this comment still valid after doing these cleanups?

The comment is pretty old so I decided to keep it.  That being said
I'm not sure it really is very helpful.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 __alloc_pages_nodemask (Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler)
       [not found] ` <20200424064338.538313-6-hch@lst.de>
  2020-04-24 19:06   ` [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler Andrey Ignatov
  2020-05-04 19:01   ` Kees Cook
@ 2020-06-04 20:22   ` Vegard Nossum
  2020-06-08  6:51     ` Christoph Hellwig
  2 siblings, 1 reply; 25+ messages in thread
From: Vegard Nossum @ 2020-06-04 20:22 UTC (permalink / raw)
  To: Christoph Hellwig, Kees Cook, Iurii Zaikin
  Cc: Alexei Starovoitov, Daniel Borkmann, linux-kernel, Al Viro, bpf,
	Andrey Ignatov


(Trimmed original Ccs due to outgoing email policy.)

Hi,

On 2020-04-24 08:43, Christoph Hellwig wrote:
> Instead of having all the sysctl handlers deal with user pointers, which
> is rather hairy in terms of the BPF interaction, copy the input to and
> from  userspace in common code.  This also means that the strings are
> always NUL-terminated by the common code, making the API a little bit
> safer.
> 
> As most handler just pass through the data to one of the common handlers
> a lot of the changes are mechnical.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Acked-by: Andrey Ignatov <rdna@fb.com>

[snip]
> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index b6f5d459b087d..df2143e05c571 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -539,13 +539,13 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
>   	return err;
>   }
>   
> -static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
> +static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
>   		size_t count, loff_t *ppos, int write)
>   {
>   	struct inode *inode = file_inode(filp);
>   	struct ctl_table_header *head = grab_header(inode);
>   	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
> -	void *new_buf = NULL;
> +	void *kbuf;
>   	ssize_t error;
>   
>   	if (IS_ERR(head))
> @@ -564,27 +564,38 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
>   	if (!table->proc_handler)
>   		goto out;
>   
> -	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
> -					   ppos, &new_buf);
> +	if (write) {
> +		kbuf = memdup_user_nul(ubuf, count);
> +		if (IS_ERR(kbuf)) {
> +			error = PTR_ERR(kbuf);
> +			goto out;
> +		}
> +	} else {
> +		error = -ENOMEM;
> +		kbuf = kzalloc(count, GFP_KERNEL);
> +		if (!kbuf)
> +			goto out;
> +	}
> +
> +	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count,
> +					   ppos);
>   	if (error)
> -		goto out;
> +		goto out_free_buf;
>   
>   	/* careful: calling conventions are nasty here */
> -	if (new_buf) {
> -		mm_segment_t old_fs;
> -
> -		old_fs = get_fs();
> -		set_fs(KERNEL_DS);
> -		error = table->proc_handler(table, write, (void __user *)new_buf,
> -					    &count, ppos);
> -		set_fs(old_fs);
> -		kfree(new_buf);
> -	} else {
> -		error = table->proc_handler(table, write, buf, &count, ppos);
> +	error = table->proc_handler(table, write, kbuf, &count, ppos);
> +	if (error)
> +		goto out_free_buf;
> +
> +	if (!write) {
> +		error = -EFAULT;
> +		if (copy_to_user(ubuf, kbuf, count))
> +			goto out_free_buf;
>   	}
>   
> -	if (!error)
> -		error = count;
> +	error = count;
> +out_free_buf:
> +	kfree(kbuf);
>   out:
>   	sysctl_head_finish(head);
>   

This commit in recent linus/master
(32927393dc1ccd60fb2bdc05b9e8e88753761469) causes a regression for me:

------------[ cut here ]------------
WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 
__alloc_pages_nodemask+0x1cd/0x2a0
CPU: 1 PID: 52 Comm: init Not tainted 5.7.0+ #218
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
Ubuntu-1.8.2-1ubuntu1 04/01/2014
RIP: 0010:__alloc_pages_nodemask+0x1cd/0x2a0
Code: 0f 85 26 ff ff ff 65 48 8b 04 25 00 7d 01 00 48 05 88 07 00 00 41 
bd 01 00 00 00 48 89 44 24 08 e9 07 ff ff ff 80 e7 20 75 02 <0f> 0b 45 
31 ed eb 98 44 8b 64 24 18 65 8b 05 d0 25 e9 7e 89 c0 48
RSP: 0018:ffffc900000e7de0 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 00000000000400c0 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000013 RDI: 0000000000040dc0
RBP: 000000007ffff000 R08: ffffffff820276c0 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffffc900000e7f08
R13: 0000000000000013 R14: 0000000000000013 R15: ffffffff81c34ce0
FS:  00000000006cf880(0000) GS:ffff88803ed00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000004a1dab CR3: 000000003e012002 CR4: 00000000003606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
  kmalloc_order+0x16/0x70
  kmalloc_order_trace+0x18/0xa0
  proc_sys_call_handler+0xf7/0x170
  vfs_read+0x98/0x120
  ksys_read+0x5a/0xd0
  do_syscall_64+0x43/0x140
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x43f910
Code: 01 f0 ff ff 0f 83 e0 57 00 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 
1f 44 00 00 83 3d 19 f2 28 00 00 75 14 b8 00 00 00 00 0f 05 <48> 3d 01 
f0 ff ff 0f 83 b4 57 00 00 c3 48 83 ec 08 e8 4a 39 00 00
RSP: 002b:00007fffffffeaa8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043f910
RDX: 0000008000000000 RSI: 0000000000000000 RDI: 0000000000000003
RBP: 00007fffffffed10 R08: 0000000000000000 R09: 00000000006cf880
R10: 00000000006cfb50 R11: 0000000000000246 R12: 0000000000401870
R13: 0000000000401900 R14: 0000000000000000 R15: 0000000000000000
---[ end trace 20146069c1ec4970 ]---

It's easy to reproduce by just doing

     read(open("/proc/sys/vm/swappiness", O_RDONLY), 0, 512UL * 1024 * 
1024 * 1024);

or so. Reverting the commit fixes the issue for me.


Vegard

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 __alloc_pages_nodemask (Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler)
  2020-06-04 20:22   ` WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 __alloc_pages_nodemask (Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler) Vegard Nossum
@ 2020-06-08  6:51     ` Christoph Hellwig
  2020-06-08  7:45       ` Vegard Nossum
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-06-08  6:51 UTC (permalink / raw)
  To: Vegard Nossum
  Cc: Christoph Hellwig, Kees Cook, Iurii Zaikin, Alexei Starovoitov,
	Daniel Borkmann, linux-kernel, Al Viro, bpf, Andrey Ignatov

On Thu, Jun 04, 2020 at 10:22:21PM +0200, Vegard Nossum wrote:
> It's easy to reproduce by just doing
>
>     read(open("/proc/sys/vm/swappiness", O_RDONLY), 0, 512UL * 1024 * 1024 
> * 1024);
>
> or so. Reverting the commit fixes the issue for me.

Yes, doing giant allocations will fail and trace.  We have to options
here that both seems sensible:

 - trunate sysctrl calls to some sensible length
 - (optionally) use vmalloc

Is this a real application or just a test case trying to do the
stupidmost possible thing?

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 __alloc_pages_nodemask (Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler)
  2020-06-08  6:51     ` Christoph Hellwig
@ 2020-06-08  7:45       ` Vegard Nossum
  2020-06-08 13:05         ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Vegard Nossum @ 2020-06-08  7:45 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Kees Cook, Iurii Zaikin, Alexei Starovoitov, Daniel Borkmann,
	linux-kernel, Al Viro, bpf, Andrey Ignatov


On 2020-06-08 08:51, Christoph Hellwig wrote:
> On Thu, Jun 04, 2020 at 10:22:21PM +0200, Vegard Nossum wrote:
>> It's easy to reproduce by just doing
>>
>>      read(open("/proc/sys/vm/swappiness", O_RDONLY), 0, 512UL * 1024 * 1024
>> * 1024);
>>
>> or so. Reverting the commit fixes the issue for me.
> 
> Yes, doing giant allocations will fail and trace.  We have to options
> here that both seems sensible:
> 
>   - trunate sysctrl calls to some sensible length
>   - (optionally) use vmalloc
> 
> Is this a real application or just a test case trying to do the
> stupidmost possible thing?
> 

Just a test case.

Allowing the kernel to allocate an unbounded amount of memory on behalf
of userspace is an easy DOS.

All the length checks were already in there, e.g.

  static int cmm_timeout_handler(struct ctl_table *ctl, int write,
                               void __user *buffer, size_t *lenp, loff_t 
*ppos)
  {
         char buf[64], *p;
[...]
                 len = min(*lenp, sizeof(buf));
                 if (copy_from_user(buf, buffer, len))
                         return -EFAULT;


Vegard

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 __alloc_pages_nodemask (Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler)
  2020-06-08  7:45       ` Vegard Nossum
@ 2020-06-08 13:05         ` Christoph Hellwig
  2020-06-08 16:40           ` Alexei Starovoitov
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-06-08 13:05 UTC (permalink / raw)
  To: Vegard Nossum
  Cc: Christoph Hellwig, Kees Cook, Iurii Zaikin, Alexei Starovoitov,
	Daniel Borkmann, linux-kernel, Al Viro, bpf, Andrey Ignatov

On Mon, Jun 08, 2020 at 09:45:49AM +0200, Vegard Nossum wrote:
> Just a test case.
>
> Allowing the kernel to allocate an unbounded amount of memory on behalf
> of userspace is an easy DOS.
>
> All the length checks were already in there, e.g.
>
>  static int cmm_timeout_handler(struct ctl_table *ctl, int write,
>                               void __user *buffer, size_t *lenp, loff_t 
> *ppos)
>  {
>         char buf[64], *p;
> [...]
>                 len = min(*lenp, sizeof(buf));
>                 if (copy_from_user(buf, buffer, len))
>                         return -EFAULT;

Doesn't help if we don't know the exact limit yet.  But we can put
some arbitrary but reasonable limit like KMALLOC_MAX_SIZE on the
sysctls and see if this sticks.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 __alloc_pages_nodemask (Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler)
  2020-06-08 13:05         ` Christoph Hellwig
@ 2020-06-08 16:40           ` Alexei Starovoitov
  2020-06-08 16:49             ` sdf
  0 siblings, 1 reply; 25+ messages in thread
From: Alexei Starovoitov @ 2020-06-08 16:40 UTC (permalink / raw)
  To: Christoph Hellwig, Stanislav Fomichev
  Cc: Vegard Nossum, Kees Cook, Iurii Zaikin, Alexei Starovoitov,
	Daniel Borkmann, LKML, Al Viro, bpf, Andrey Ignatov

On Mon, Jun 8, 2020 at 6:05 AM Christoph Hellwig <hch@lst.de> wrote:
>
> On Mon, Jun 08, 2020 at 09:45:49AM +0200, Vegard Nossum wrote:
> > Just a test case.
> >
> > Allowing the kernel to allocate an unbounded amount of memory on behalf
> > of userspace is an easy DOS.
> >
> > All the length checks were already in there, e.g.
> >
> >  static int cmm_timeout_handler(struct ctl_table *ctl, int write,
> >                               void __user *buffer, size_t *lenp, loff_t
> > *ppos)
> >  {
> >         char buf[64], *p;
> > [...]
> >                 len = min(*lenp, sizeof(buf));
> >                 if (copy_from_user(buf, buffer, len))
> >                         return -EFAULT;
>
> Doesn't help if we don't know the exact limit yet.  But we can put
> some arbitrary but reasonable limit like KMALLOC_MAX_SIZE on the
> sysctls and see if this sticks.

adding Stanislav. I think he's looking into this already.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 __alloc_pages_nodemask (Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler)
  2020-06-08 16:40           ` Alexei Starovoitov
@ 2020-06-08 16:49             ` sdf
  0 siblings, 0 replies; 25+ messages in thread
From: sdf @ 2020-06-08 16:49 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Christoph Hellwig, Vegard Nossum, Kees Cook, Iurii Zaikin,
	Alexei Starovoitov, Daniel Borkmann, LKML, Al Viro, bpf,
	Andrey Ignatov

On 06/08, Alexei Starovoitov wrote:
> On Mon, Jun 8, 2020 at 6:05 AM Christoph Hellwig <hch@lst.de> wrote:
> >
> > On Mon, Jun 08, 2020 at 09:45:49AM +0200, Vegard Nossum wrote:
> > > Just a test case.
> > >
> > > Allowing the kernel to allocate an unbounded amount of memory on  
> behalf
> > > of userspace is an easy DOS.
> > >
> > > All the length checks were already in there, e.g.
> > >
> > >  static int cmm_timeout_handler(struct ctl_table *ctl, int write,
> > >                               void __user *buffer, size_t *lenp,  
> loff_t
> > > *ppos)
> > >  {
> > >         char buf[64], *p;
> > > [...]
> > >                 len = min(*lenp, sizeof(buf));
> > >                 if (copy_from_user(buf, buffer, len))
> > >                         return -EFAULT;
> >
> > Doesn't help if we don't know the exact limit yet.  But we can put
> > some arbitrary but reasonable limit like KMALLOC_MAX_SIZE on the
> > sysctls and see if this sticks.

> adding Stanislav. I think he's looking into this already.
Yeah, I'm looking at it from the get/setsockopt point of view.
I'm currently trying to bypass allocating a buffer if it's greater
than PAGE_SIZE.
I suppose for sysctls we should try to do something similar?

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2020-06-08 16:49 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-24  6:43 pass kernel pointers to the sysctl ->proc_handler method v3 Christoph Hellwig
2020-04-24  6:43 ` [PATCH 1/5] bpf-cgroup: remove unused exports Christoph Hellwig
2020-04-27 21:23   ` Daniel Borkmann
2020-04-24  6:43 ` [PATCH 2/5] mm: remove watermark_boost_factor_sysctl_handler Christoph Hellwig
2020-05-04 18:41   ` Kees Cook
2020-04-24  6:43 ` [PATCH 3/5] sysctl: remove all extern declaration from sysctl.c Christoph Hellwig
2020-05-04  1:25   ` Stephen Rothwell
2020-05-04 18:42   ` Kees Cook
2020-04-24  6:43 ` [PATCH 4/5] sysctl: avoid forward declarations Christoph Hellwig
2020-05-04 18:44   ` Kees Cook
2020-04-26 15:51 ` pass kernel pointers to the sysctl ->proc_handler method v3 Alexei Starovoitov
2020-04-27  5:35   ` Christoph Hellwig
2020-04-26 15:59 ` Al Viro
2020-04-27  5:36   ` Christoph Hellwig
2020-04-27  7:15     ` Al Viro
     [not found] ` <20200424064338.538313-6-hch@lst.de>
2020-04-24 19:06   ` [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler Andrey Ignatov
2020-04-27  5:34     ` Christoph Hellwig
2020-05-04 19:01   ` Kees Cook
2020-05-05  5:57     ` Christoph Hellwig
2020-06-04 20:22   ` WARNING: CPU: 1 PID: 52 at mm/page_alloc.c:4826 __alloc_pages_nodemask (Re: [PATCH 5/5] sysctl: pass kernel pointers to ->proc_handler) Vegard Nossum
2020-06-08  6:51     ` Christoph Hellwig
2020-06-08  7:45       ` Vegard Nossum
2020-06-08 13:05         ` Christoph Hellwig
2020-06-08 16:40           ` Alexei Starovoitov
2020-06-08 16:49             ` sdf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).