linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
@ 2021-12-07  1:13 Xiaoming Ni
  2021-12-07  1:38 ` Andrew Morton
  0 siblings, 1 reply; 20+ messages in thread
From: Xiaoming Ni @ 2021-12-07  1:13 UTC (permalink / raw)
  To: linux-kernel, mcgrof, viro, ebiederm, keescook, jlayton, bfields,
	yzaikin, akpm
  Cc: nixiaoming, wangle6

To avoid duplicated code, add a set of macro functions to initialize the
sysctl table for each feature.

The system initialization process is as follows:

	start_kernel () {
		...
		/* init proc and sysctl base,
		 * proc_root_init()-->proc_sys_init()-->sysctl_init_bases()
		 */
		proc_root_init(); /* init proc and sysctl base */
		...
		arch_call_rest_init();
	}

	arch_call_rest_init()-->rest_init()-->kernel_init()
	kernel_init() {
		...
		kernel_init_freeable(); /* do all initcalls */
		...
		do_sysctl_args(); /* Process the sysctl parameter: sysctl.*= */
	}

	kernel_init_freeable()--->do_basic_setup()-->do_initcalls()
	do_initcalls() {
		for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
			do_initcall_level
	}

The sysctl interface of each subfeature should be registered after
sysctl_init_bases() and before do_sysctl_args(). It seems that the sysctl
interface does not depend on initcall_levels. To prevent the sysctl
interface from being initialized before the feature itself. The
lowest-level late_initcall() is used as the common sysctl interface
registration level.

Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
---
 fs/coredump.c          |  7 +------
 fs/dcache.c            |  7 +------
 fs/exec.c              |  8 +-------
 fs/file_table.c        |  7 +------
 fs/inode.c             |  7 +------
 fs/locks.c             |  7 +------
 fs/namei.c             |  8 +-------
 fs/namespace.c         |  7 +------
 include/linux/sysctl.h | 15 +++++++++++++++
 kernel/stackleak.c     |  7 +------
 10 files changed, 24 insertions(+), 56 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 570d98398668..8f6c6322651d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -943,12 +943,7 @@ static struct ctl_table coredump_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_coredump_sysctls(void)
-{
-	register_sysctl_init("kernel", coredump_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_coredump_sysctls);
+kernel_sysctl_initcall(coredump_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 /*
diff --git a/fs/dcache.c b/fs/dcache.c
index 0eef1102f460..c1570243aaee 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -195,12 +195,7 @@ static struct ctl_table fs_dcache_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_dcache_sysctls(void)
-{
-	register_sysctl_init("fs", fs_dcache_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_dcache_sysctls);
+fs_sysctl_initcall(fs_dcache_sysctls);
 #endif
 
 /*
diff --git a/fs/exec.c b/fs/exec.c
index cc5ec43df028..3c30e686af79 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2124,11 +2124,5 @@ static struct ctl_table fs_exec_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_exec_sysctls(void)
-{
-	register_sysctl_init("fs", fs_exec_sysctls);
-	return 0;
-}
-
-fs_initcall(init_fs_exec_sysctls);
+fs_sysctl_initcall(fs_exec_sysctls);
 #endif /* CONFIG_SYSCTL */
diff --git a/fs/file_table.c b/fs/file_table.c
index 57edef16dce4..52b60e9378a7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -116,12 +116,7 @@ static struct ctl_table fs_stat_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_stat_sysctls(void)
-{
-	register_sysctl_init("fs", fs_stat_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_stat_sysctls);
+fs_sysctl_initcall(fs_stat_sysctls);
 #endif
 
 static struct file *__alloc_file(int flags, const struct cred *cred)
diff --git a/fs/inode.c b/fs/inode.c
index bef6ba9b8eb4..cd20d6dd8ffa 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_inode_sysctls(void)
-{
-	register_sysctl_init("fs", inodes_sysctls);
-	return 0;
-}
-early_initcall(init_fs_inode_sysctls);
+fs_sysctl_initcall(inodes_sysctls);
 #endif
 
 static int no_open(struct inode *inode, struct file *file)
diff --git a/fs/locks.c b/fs/locks.c
index 8c6df10cd9ed..d86a74d19ed3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -113,12 +113,7 @@ static struct ctl_table locks_sysctls[] = {
 	{}
 };
 
-static int __init init_fs_locks_sysctls(void)
-{
-	register_sysctl_init("fs", locks_sysctls);
-	return 0;
-}
-early_initcall(init_fs_locks_sysctls);
+fs_sysctl_initcall(locks_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 /*
diff --git a/fs/namei.c b/fs/namei.c
index 8d4f832f94aa..7eb636796fd4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1066,13 +1066,7 @@ static struct ctl_table namei_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_namei_sysctls(void)
-{
-	register_sysctl_init("fs", namei_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_namei_sysctls);
-
+fs_sysctl_initcall(namei_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 /**
diff --git a/fs/namespace.c b/fs/namespace.c
index 647af66f313d..7117214b7a85 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4626,11 +4626,6 @@ static struct ctl_table fs_namespace_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_namespace_sysctls(void)
-{
-	register_sysctl_init("fs", fs_namespace_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_namespace_sysctls);
+fs_sysctl_initcall(fs_namespace_sysctls);
 
 #endif /* CONFIG_SYSCTL */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index acf0805cf3a0..228be2197348 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -231,6 +231,21 @@ extern int sysctl_init_bases(void);
 extern void __register_sysctl_init(const char *path, struct ctl_table *table,
 				 const char *table_name);
 #define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
+
+#define sysctl_initcall(path, table) \
+	static int __init init_##table(void) \
+	{ \
+		register_sysctl_init(path, table); \
+		return  0;\
+	} \
+	late_initcall(init_##table)
+
+#define kernel_sysctl_initcall(table) sysctl_initcall("kernel", table)
+#define fs_sysctl_initcall(table) sysctl_initcall("fs", table)
+#define vm_sysctl_initcall(table) sysctl_initcall("vm", table)
+#define debug_sysctl_initcall(table) sysctl_initcall("debug", table)
+#define dev_sysctl_initcall(table) sysctl_initcall("dev", table)
+
 extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
 
 void do_sysctl_args(void);
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index 66b8af394e58..f084c1c787ba 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -57,12 +57,7 @@ static struct ctl_table stackleak_sysctls[] = {
 	{}
 };
 
-static int __init stackleak_sysctls_init(void)
-{
-	register_sysctl_init("kernel", stackleak_sysctls);
-	return 0;
-}
-late_initcall(stackleak_sysctls_init);
+kernel_sysctl_initcall(stackleak_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 #define skip_erasing()	static_branch_unlikely(&stack_erasing_bypass)
-- 
2.12.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-07  1:13 [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature Xiaoming Ni
@ 2021-12-07  1:38 ` Andrew Morton
  2021-12-07  1:50   ` Joe Perches
                     ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Andrew Morton @ 2021-12-07  1:38 UTC (permalink / raw)
  To: Xiaoming Ni
  Cc: linux-kernel, mcgrof, viro, ebiederm, keescook, jlayton, bfields,
	yzaikin, wangle6, Joe Perches

On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:

> To avoid duplicated code, add a set of macro functions to initialize the
> sysctl table for each feature.
> 
> The system initialization process is as follows:
> 
> 	start_kernel () {
> 		...
> 		/* init proc and sysctl base,
> 		 * proc_root_init()-->proc_sys_init()-->sysctl_init_bases()
> 		 */
> 		proc_root_init(); /* init proc and sysctl base */
> 		...
> 		arch_call_rest_init();
> 	}
> 
> 	arch_call_rest_init()-->rest_init()-->kernel_init()
> 	kernel_init() {
> 		...
> 		kernel_init_freeable(); /* do all initcalls */
> 		...
> 		do_sysctl_args(); /* Process the sysctl parameter: sysctl.*= */
> 	}
> 
> 	kernel_init_freeable()--->do_basic_setup()-->do_initcalls()
> 	do_initcalls() {
> 		for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
> 			do_initcall_level
> 	}
> 
> The sysctl interface of each subfeature should be registered after
> sysctl_init_bases() and before do_sysctl_args(). It seems that the sysctl
> interface does not depend on initcall_levels. To prevent the sysctl
> interface from being initialized before the feature itself. The
> lowest-level late_initcall() is used as the common sysctl interface
> registration level.

I'm not normally a fan of wrapping commonly-used code sequences into
magical macros, but this one does seem to make sense.

I wonder if it is possible to cook up a checkpatch rule to tell people
to henceforth use the magic macros rather than to open-code things in
the old way.  Sounds hard.

> --- a/fs/coredump.c
> +++ b/fs/coredump.c
> @@ -943,12 +943,7 @@ static struct ctl_table coredump_sysctls[] = {
>  	{ }
>  };
>  
> -static int __init init_fs_coredump_sysctls(void)
> -{
> -	register_sysctl_init("kernel", coredump_sysctls);
> -	return 0;
> -}
> -fs_initcall(init_fs_coredump_sysctls);
> +kernel_sysctl_initcall(coredump_sysctls);

But this and several like it are functional changes.

>  #endif /* CONFIG_SYSCTL */
>
> ...
>
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
>  	{ }
>  };
>  
> -static int __init init_fs_inode_sysctls(void)
> -{
> -	register_sysctl_init("fs", inodes_sysctls);
> -	return 0;
> -}
> -early_initcall(init_fs_inode_sysctls);
> +fs_sysctl_initcall(inodes_sysctls);
>  #endif

Here's another, of many.

Someone made the decision to use early_initcall() here (why?) and this
patch switches it to late_initcall()!  Worrisome.  Each such stealth
conversion should be explained and justified, shouldn't it?



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-07  1:38 ` Andrew Morton
@ 2021-12-07  1:50   ` Joe Perches
  2021-12-07  6:25     ` Xiaoming Ni
  2021-12-07  6:30     ` Julia Lawall
  2021-12-07  3:09   ` Xiaoming Ni
  2021-12-07 20:18   ` Luis Chamberlain
  2 siblings, 2 replies; 20+ messages in thread
From: Joe Perches @ 2021-12-07  1:50 UTC (permalink / raw)
  To: Andrew Morton, Xiaoming Ni, Julia Lawall, cocci
  Cc: linux-kernel, mcgrof, viro, ebiederm, keescook, jlayton, bfields,
	yzaikin, wangle6

On Mon, 2021-12-06 at 17:38 -0800, Andrew Morton wrote:
> On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
> 
> > To avoid duplicated code, add a set of macro functions to initialize the
> > sysctl table for each feature.
> > 
> > The system initialization process is as follows:
> > 
> > 	start_kernel () {
> > 		...
> > 		/* init proc and sysctl base,
> > 		 * proc_root_init()-->proc_sys_init()-->sysctl_init_bases()
> > 		 */
> > 		proc_root_init(); /* init proc and sysctl base */
> > 		...
> > 		arch_call_rest_init();
> > 	}
> > 
> > 	arch_call_rest_init()-->rest_init()-->kernel_init()
> > 	kernel_init() {
> > 		...
> > 		kernel_init_freeable(); /* do all initcalls */
> > 		...
> > 		do_sysctl_args(); /* Process the sysctl parameter: sysctl.*= */
> > 	}
> > 
> > 	kernel_init_freeable()--->do_basic_setup()-->do_initcalls()
> > 	do_initcalls() {
> > 		for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
> > 			do_initcall_level
> > 	}
> > 
> > The sysctl interface of each subfeature should be registered after
> > sysctl_init_bases() and before do_sysctl_args(). It seems that the sysctl
> > interface does not depend on initcall_levels. To prevent the sysctl
> > interface from being initialized before the feature itself. The
> > lowest-level late_initcall() is used as the common sysctl interface
> > registration level.
> 
> I'm not normally a fan of wrapping commonly-used code sequences into
> magical macros, but this one does seem to make sense.
> 
> I wonder if it is possible to cook up a checkpatch rule to tell people
> to henceforth use the magic macros rather than to open-code things in
> the old way.  Sounds hard.

Almost impossible for checkpatch.
Likely easier in coccinelle.



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-07  1:38 ` Andrew Morton
  2021-12-07  1:50   ` Joe Perches
@ 2021-12-07  3:09   ` Xiaoming Ni
  2021-12-07 20:18   ` Luis Chamberlain
  2 siblings, 0 replies; 20+ messages in thread
From: Xiaoming Ni @ 2021-12-07  3:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, mcgrof, viro, ebiederm, keescook, jlayton, bfields,
	yzaikin, wangle6, Joe Perches

On 2021/12/7 9:38, Andrew Morton wrote:
> On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
> 
>> To avoid duplicated code, add a set of macro functions to initialize the
>> sysctl table for each feature.
>>
>> The system initialization process is as follows:
>>
>> 	start_kernel () {
>> 		...
>> 		/* init proc and sysctl base,
>> 		 * proc_root_init()-->proc_sys_init()-->sysctl_init_bases()
>> 		 */
>> 		proc_root_init(); /* init proc and sysctl base */
>> 		...
>> 		arch_call_rest_init();
>> 	}
>>
>> 	arch_call_rest_init()-->rest_init()-->kernel_init()
>> 	kernel_init() {
>> 		...
>> 		kernel_init_freeable(); /* do all initcalls */
>> 		...
>> 		do_sysctl_args(); /* Process the sysctl parameter: sysctl.*= */
>> 	}
>>
>> 	kernel_init_freeable()--->do_basic_setup()-->do_initcalls()
>> 	do_initcalls() {
>> 		for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
>> 			do_initcall_level
>> 	}
>>
>> The sysctl interface of each subfeature should be registered after
>> sysctl_init_bases() and before do_sysctl_args(). It seems that the sysctl
>> interface does not depend on initcall_levels. To prevent the sysctl
>> interface from being initialized before the feature itself. The
>> lowest-level late_initcall() is used as the common sysctl interface
>> registration level.
> 
> I'm not normally a fan of wrapping commonly-used code sequences into
> magical macros, but this one does seem to make sense.
> 
> I wonder if it is possible to cook up a checkpatch rule to tell people
> to henceforth use the magic macros rather than to open-code things in
> the old way.  Sounds hard.
> 
>> --- a/fs/coredump.c
>> +++ b/fs/coredump.c
>> @@ -943,12 +943,7 @@ static struct ctl_table coredump_sysctls[] = {
>>   	{ }
>>   };
>>   
>> -static int __init init_fs_coredump_sysctls(void)
>> -{
>> -	register_sysctl_init("kernel", coredump_sysctls);
>> -	return 0;
>> -}
>> -fs_initcall(init_fs_coredump_sysctls);
>> +kernel_sysctl_initcall(coredump_sysctls);
> 
> But this and several like it are functional changes.
> 
>>   #endif /* CONFIG_SYSCTL */
>>
>> ...
>>
>> --- a/fs/inode.c
>> +++ b/fs/inode.c
>> @@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
>>   	{ }
>>   };
>>   
>> -static int __init init_fs_inode_sysctls(void)
>> -{
>> -	register_sysctl_init("fs", inodes_sysctls);
>> -	return 0;
>> -}
>> -early_initcall(init_fs_inode_sysctls);
>> +fs_sysctl_initcall(inodes_sysctls);
>>   #endif
> 
> Here's another, of many.
> 
> Someone made the decision to use early_initcall() here (why?) and this
> patch switches it to late_initcall()!  Worrisome.  Each such stealth
> conversion should be explained and justified, shouldn't it?
> 

static noinline void __init kernel_init_freeable(void)
{
	...
	do_pre_smp_initcalls(); /* do early_initcall */
	lockup_detector_init();

	smp_init();
	sched_init_smp();

	padata_init();
	page_alloc_init_late();
	/* Initialize page ext after all struct pages are initialized. */
	page_ext_init();

	do_basic_setup();  /* do other initcall */
	...
}

Between do_pre_smp_initcalls() and do_basic_setup(), no sysctl interface 
window is configured. In addition, all sysctl data has initial values. 
Delayed configuration does not affect the behavior after startup.
So I think we can change it to late_initcall().


Thanks
Xiaoming Ni

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-07  1:50   ` Joe Perches
@ 2021-12-07  6:25     ` Xiaoming Ni
  2021-12-07  6:30     ` Julia Lawall
  1 sibling, 0 replies; 20+ messages in thread
From: Xiaoming Ni @ 2021-12-07  6:25 UTC (permalink / raw)
  To: Joe Perches, Andrew Morton, Julia Lawall, cocci
  Cc: linux-kernel, mcgrof, viro, ebiederm, keescook, jlayton, bfields,
	yzaikin, wangle6

On 2021/12/7 9:50, Joe Perches wrote:
> On Mon, 2021-12-06 at 17:38 -0800, Andrew Morton wrote:
>> On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
>>
>>> To avoid duplicated code, add a set of macro functions to initialize the
>>> sysctl table for each feature.
>>>
>>> The system initialization process is as follows:
>>>
>>> 	start_kernel () {
>>> 		...
>>> 		/* init proc and sysctl base,
>>> 		 * proc_root_init()-->proc_sys_init()-->sysctl_init_bases()
>>> 		 */
>>> 		proc_root_init(); /* init proc and sysctl base */
>>> 		...
>>> 		arch_call_rest_init();
>>> 	}
>>>
>>> 	arch_call_rest_init()-->rest_init()-->kernel_init()
>>> 	kernel_init() {
>>> 		...
>>> 		kernel_init_freeable(); /* do all initcalls */
>>> 		...
>>> 		do_sysctl_args(); /* Process the sysctl parameter: sysctl.*= */
>>> 	}
>>>
>>> 	kernel_init_freeable()--->do_basic_setup()-->do_initcalls()
>>> 	do_initcalls() {
>>> 		for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
>>> 			do_initcall_level
>>> 	}
>>>
>>> The sysctl interface of each subfeature should be registered after
>>> sysctl_init_bases() and before do_sysctl_args(). It seems that the sysctl
>>> interface does not depend on initcall_levels. To prevent the sysctl
>>> interface from being initialized before the feature itself. The
>>> lowest-level late_initcall() is used as the common sysctl interface
>>> registration level.
>>
>> I'm not normally a fan of wrapping commonly-used code sequences into
>> magical macros, but this one does seem to make sense.
>>
>> I wonder if it is possible to cook up a checkpatch rule to tell people
>> to henceforth use the magic macros rather than to open-code things in
>> the old way.  Sounds hard.
> 
> Almost impossible for checkpatch.
> Likely easier in coccinelle.
>

Maybe we can add a rudimentary check to the checkpatch.

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index b01c36a15d9d..26e953ae4cc5 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -7212,6 +7212,12 @@ sub process {
                              "Deprecated use of '$deprecated_api', 
prefer '$new_api' instead\n" . $herecurr);
                 }

+# check register_sysctl_init
+               if ($prevline =~ /{/ && $rawline =~ 
/\sregister_sysctl_init\(\"(kernel|fs|vm|debug|dev)\",\s+(.*)\)\;/) {
+                       WARN("DEPRECATED_API",
+                            "Deprecated use of 
'register_sysctl_init(\"$1\", $2);', prefer '$1_sysctl_initcall($2);' 
instead\n".$herecurr);
+               }
+
  # check for various structs that are normally const (ops, kgdb, 
device_tree)
  # and avoid what seem like struct definitions 'struct foo {'
                 if (defined($const_structs) &&



Thanks
Xiaoming Ni

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-07  1:50   ` Joe Perches
  2021-12-07  6:25     ` Xiaoming Ni
@ 2021-12-07  6:30     ` Julia Lawall
  1 sibling, 0 replies; 20+ messages in thread
From: Julia Lawall @ 2021-12-07  6:30 UTC (permalink / raw)
  To: Joe Perches
  Cc: Andrew Morton, Xiaoming Ni, Julia Lawall, cocci, linux-kernel,
	mcgrof, viro, ebiederm, keescook, jlayton, bfields, yzaikin,
	wangle6



On Mon, 6 Dec 2021, Joe Perches wrote:

> On Mon, 2021-12-06 at 17:38 -0800, Andrew Morton wrote:
> > On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
> >
> > > To avoid duplicated code, add a set of macro functions to initialize the
> > > sysctl table for each feature.
> > >
> > > The system initialization process is as follows:
> > >
> > > 	start_kernel () {
> > > 		...
> > > 		/* init proc and sysctl base,
> > > 		 * proc_root_init()-->proc_sys_init()-->sysctl_init_bases()
> > > 		 */
> > > 		proc_root_init(); /* init proc and sysctl base */
> > > 		...
> > > 		arch_call_rest_init();
> > > 	}
> > >
> > > 	arch_call_rest_init()-->rest_init()-->kernel_init()
> > > 	kernel_init() {
> > > 		...
> > > 		kernel_init_freeable(); /* do all initcalls */
> > > 		...
> > > 		do_sysctl_args(); /* Process the sysctl parameter: sysctl.*= */
> > > 	}
> > >
> > > 	kernel_init_freeable()--->do_basic_setup()-->do_initcalls()
> > > 	do_initcalls() {
> > > 		for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
> > > 			do_initcall_level
> > > 	}
> > >
> > > The sysctl interface of each subfeature should be registered after
> > > sysctl_init_bases() and before do_sysctl_args(). It seems that the sysctl
> > > interface does not depend on initcall_levels. To prevent the sysctl
> > > interface from being initialized before the feature itself. The
> > > lowest-level late_initcall() is used as the common sysctl interface
> > > registration level.
> >
> > I'm not normally a fan of wrapping commonly-used code sequences into
> > magical macros, but this one does seem to make sense.
> >
> > I wonder if it is possible to cook up a checkpatch rule to tell people
> > to henceforth use the magic macros rather than to open-code things in
> > the old way.  Sounds hard.
>
> Almost impossible for checkpatch.
> Likely easier in coccinelle.

Is there a typical example of the open coded version?

julia

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-07  1:38 ` Andrew Morton
  2021-12-07  1:50   ` Joe Perches
  2021-12-07  3:09   ` Xiaoming Ni
@ 2021-12-07 20:18   ` Luis Chamberlain
  2021-12-07 21:08     ` Eric W. Biederman
  2 siblings, 1 reply; 20+ messages in thread
From: Luis Chamberlain @ 2021-12-07 20:18 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Xiaoming Ni, linux-kernel, viro, ebiederm, keescook, jlayton,
	bfields, yzaikin, wangle6, Joe Perches

On Mon, Dec 06, 2021 at 05:38:42PM -0800, Andrew Morton wrote:
> On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
> > --- a/fs/inode.c
> > +++ b/fs/inode.c
> > @@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
> >  	{ }
> >  };
> >  
> > -static int __init init_fs_inode_sysctls(void)
> > -{
> > -	register_sysctl_init("fs", inodes_sysctls);
> > -	return 0;
> > -}
> > -early_initcall(init_fs_inode_sysctls);
> > +fs_sysctl_initcall(inodes_sysctls);
> >  #endif
> 
> Here's another, of many.
> 
> Someone made the decision to use early_initcall() here (why?) and this
> patch switches it to late_initcall()!  Worrisome.  Each such stealth
> conversion should be explained and justified, shouldn't it?

I made the decisions for quite a bit of the ordering and yes I agree
this need *very careful* explanation, specially if we are going to
generalize this.

First and foremost. git grep for sysctl_init_bases and you will see
that the bases for now are initialized on proc_sys_init() and that
gets called on proc_root_init() and that in turn on init/main.c's
start_kernel(). And so this happens *before* the init levels.

The proper care for what goes on top of this needs to take into
consideration the different init levels and that the if a sysctl
is using a directory *on top* of a base, then that sysctl registration
must be registered *after* that directory. The *base* directory for
"fs" is now registered through fs/sysctls.c() on init_fs_sysctls()
using register_sysctl_base(). I made these changes with these names
and requiring the DECLARE_SYSCTL_BASE() so it would be easy for us
to look at where these are declared.

So the next step in order to consider is *link* ordering and that
order is maintained by the Makefile. That is why I put this at the
top of the fs Makfile:

obj-$(CONFIG_SYSCTL)            += sysctls.o 

So any file after this can use early_initcall(), because the base
for "fs" was declared first in link order, and it used early_initcall().
It is fine to have the other stuff that goes on top of the "fs" base
use late_initcall() but that assumes that vetting has been done so that
if a directory on "fs" was created, let's call it "foo", vetting was done
to ensure that things on top of "foo" are registered *after* the "foo"
directory.

We now have done the cleanup for "fs", and we can do what we see fine
for "fs", but we may run into surprises later with the other bases, so
I'd be wary of making assumptions at this point if we can use
late_initcall().

So, as a rule of thumb I'd like to see bases use early_initcall(). The
rest requires manual work and vetting.

So, how about this, we define fs_sysctl_initcall() to use also
early_initcall(), and ask susbsystems to do their vetting so that
the base also gets linked first.

After this, if a directory on top of a base is created we should likely create
a new init level and just bump that to use the next init level. So
something like fs_sysctl_base_initcall_subdir_1() map to core_initcall()
and so on.

That would allow us to easily grep for directory structures easily and
puts some implicit onus of ordering on those folks doing these conversions.
We'd document well the link order stuff for those using the base stuff
too as that is likely only where this will matter most.

  Luis

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-07 20:18   ` Luis Chamberlain
@ 2021-12-07 21:08     ` Eric W. Biederman
  2021-12-07 22:39       ` Luis Chamberlain
  0 siblings, 1 reply; 20+ messages in thread
From: Eric W. Biederman @ 2021-12-07 21:08 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: Andrew Morton, Xiaoming Ni, linux-kernel, viro, keescook,
	jlayton, bfields, yzaikin, wangle6, Joe Perches

Luis Chamberlain <mcgrof@kernel.org> writes:

> On Mon, Dec 06, 2021 at 05:38:42PM -0800, Andrew Morton wrote:
>> On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
>> > --- a/fs/inode.c
>> > +++ b/fs/inode.c
>> > @@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
>> >  	{ }
>> >  };
>> >  
>> > -static int __init init_fs_inode_sysctls(void)
>> > -{
>> > -	register_sysctl_init("fs", inodes_sysctls);
>> > -	return 0;
>> > -}
>> > -early_initcall(init_fs_inode_sysctls);
>> > +fs_sysctl_initcall(inodes_sysctls);
>> >  #endif
>> 
>> Here's another, of many.
>> 
>> Someone made the decision to use early_initcall() here (why?) and this
>> patch switches it to late_initcall()!  Worrisome.  Each such stealth
>> conversion should be explained and justified, shouldn't it?
>
> I made the decisions for quite a bit of the ordering and yes I agree
> this need *very careful* explanation, specially if we are going to
> generalize this.
>
> First and foremost. git grep for sysctl_init_bases and you will see
> that the bases for now are initialized on proc_sys_init() and that
> gets called on proc_root_init() and that in turn on init/main.c's
> start_kernel(). And so this happens *before* the init levels.
>
> The proper care for what goes on top of this needs to take into
> consideration the different init levels and that the if a sysctl
> is using a directory *on top* of a base, then that sysctl registration
> must be registered *after* that directory. The *base* directory for
> "fs" is now registered through fs/sysctls.c() on init_fs_sysctls()
> using register_sysctl_base(). I made these changes with these names
> and requiring the DECLARE_SYSCTL_BASE() so it would be easy for us
> to look at where these are declared.
>
> So the next step in order to consider is *link* ordering and that
> order is maintained by the Makefile. That is why I put this at the
> top of the fs Makfile:
>
> obj-$(CONFIG_SYSCTL)            += sysctls.o 
>
> So any file after this can use early_initcall(), because the base
> for "fs" was declared first in link order, and it used early_initcall().
> It is fine to have the other stuff that goes on top of the "fs" base
> use late_initcall() but that assumes that vetting has been done so that
> if a directory on "fs" was created, let's call it "foo", vetting was done
> to ensure that things on top of "foo" are registered *after* the "foo"
> directory.
>
> We now have done the cleanup for "fs", and we can do what we see fine
> for "fs", but we may run into surprises later with the other bases, so
> I'd be wary of making assumptions at this point if we can use
> late_initcall().
>
> So, as a rule of thumb I'd like to see bases use early_initcall(). The
> rest requires manual work and vetting.
>
> So, how about this, we define fs_sysctl_initcall() to use also
> early_initcall(), and ask susbsystems to do their vetting so that
> the base also gets linked first.
>
> After this, if a directory on top of a base is created we should likely create
> a new init level and just bump that to use the next init level. So
> something like fs_sysctl_base_initcall_subdir_1() map to core_initcall()
> and so on.
>
> That would allow us to easily grep for directory structures easily and
> puts some implicit onus of ordering on those folks doing these conversions.
> We'd document well the link order stuff for those using the base stuff
> too as that is likely only where this will matter most.

I am a bit confused at this explanation of things.

Last I looked the implementation of sysctls allocated the directories
independently of the sysctls entries that populated them.

Which should be that as long as there are not conflicts in name of
directory entries, any order should work.

Am I misremembering how the code works?


Eric

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-07 21:08     ` Eric W. Biederman
@ 2021-12-07 22:39       ` Luis Chamberlain
  2021-12-08  2:10         ` Xiaoming Ni
  2021-12-13 16:04         ` [PATCH] " Eric W. Biederman
  0 siblings, 2 replies; 20+ messages in thread
From: Luis Chamberlain @ 2021-12-07 22:39 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Andrew Morton, Xiaoming Ni, linux-kernel, viro, keescook,
	jlayton, bfields, yzaikin, wangle6, Joe Perches

On Tue, Dec 07, 2021 at 03:08:03PM -0600, Eric W. Biederman wrote:
> Luis Chamberlain <mcgrof@kernel.org> writes:
> 
> > On Mon, Dec 06, 2021 at 05:38:42PM -0800, Andrew Morton wrote:
> >> On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
> >> > --- a/fs/inode.c
> >> > +++ b/fs/inode.c
> >> > @@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
> >> >  	{ }
> >> >  };
> >> >  
> >> > -static int __init init_fs_inode_sysctls(void)
> >> > -{
> >> > -	register_sysctl_init("fs", inodes_sysctls);
> >> > -	return 0;
> >> > -}
> >> > -early_initcall(init_fs_inode_sysctls);
> >> > +fs_sysctl_initcall(inodes_sysctls);
> >> >  #endif
> >> 
> >> Here's another, of many.
> >> 
> >> Someone made the decision to use early_initcall() here (why?) and this
> >> patch switches it to late_initcall()!  Worrisome.  Each such stealth
> >> conversion should be explained and justified, shouldn't it?
> >
> > I made the decisions for quite a bit of the ordering and yes I agree
> > this need *very careful* explanation, specially if we are going to
> > generalize this.
> >
> > First and foremost. git grep for sysctl_init_bases and you will see
> > that the bases for now are initialized on proc_sys_init() and that
> > gets called on proc_root_init() and that in turn on init/main.c's
> > start_kernel(). And so this happens *before* the init levels.
> >
> > The proper care for what goes on top of this needs to take into
> > consideration the different init levels and that the if a sysctl
> > is using a directory *on top* of a base, then that sysctl registration
> > must be registered *after* that directory. The *base* directory for
> > "fs" is now registered through fs/sysctls.c() on init_fs_sysctls()
> > using register_sysctl_base(). I made these changes with these names
> > and requiring the DECLARE_SYSCTL_BASE() so it would be easy for us
> > to look at where these are declared.
> >
> > So the next step in order to consider is *link* ordering and that
> > order is maintained by the Makefile. That is why I put this at the
> > top of the fs Makfile:
> >
> > obj-$(CONFIG_SYSCTL)            += sysctls.o 
> >
> > So any file after this can use early_initcall(), because the base
> > for "fs" was declared first in link order, and it used early_initcall().
> > It is fine to have the other stuff that goes on top of the "fs" base
> > use late_initcall() but that assumes that vetting has been done so that
> > if a directory on "fs" was created, let's call it "foo", vetting was done
> > to ensure that things on top of "foo" are registered *after* the "foo"
> > directory.
> >
> > We now have done the cleanup for "fs", and we can do what we see fine
> > for "fs", but we may run into surprises later with the other bases, so
> > I'd be wary of making assumptions at this point if we can use
> > late_initcall().
> >
> > So, as a rule of thumb I'd like to see bases use early_initcall(). The
> > rest requires manual work and vetting.
> >
> > So, how about this, we define fs_sysctl_initcall() to use also
> > early_initcall(), and ask susbsystems to do their vetting so that
> > the base also gets linked first.
> >
> > After this, if a directory on top of a base is created we should likely create
> > a new init level and just bump that to use the next init level. So
> > something like fs_sysctl_base_initcall_subdir_1() map to core_initcall()
> > and so on.
> >
> > That would allow us to easily grep for directory structures easily and
> > puts some implicit onus of ordering on those folks doing these conversions.
> > We'd document well the link order stuff for those using the base stuff
> > too as that is likely only where this will matter most.
> 
> I am a bit confused at this explanation of things.
> 
> Last I looked the implementation of sysctls allocated the directories
> independently of the sysctls entries that populated them.

With most sysctls being created using the same kernel/sysctl.c file and
structure, yes, this was true. With the changes now on linux-next things
change a bit. The goal is to move sysctls to be registered where they
are actually defined. But the directory that holds them must be
registered first. During the first phase of cleanups now on linux-next
all filesystem "fs" syscls were moved to be delcared in the kernel's
fs/ directory. The last part was to register the base "fs" directory.
For this declareres were added to simplify that and to clarify which
are base directories:

https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=ededd3fc701668743087c77ceeeb7490107cc12c

Then, this commit moves the "fs" base to be declared to fs/ as well:

https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=d0f885a73ec6e05803ce99f279232b3116061ed8

This used early_initcall() for the base for "fs" and that is
because there are no built-in sysctls for "fs" which need to
be exposed prior to the init levels.

So after this then order is important. If you are using the same
init level, the the next thing which will ensure order is the order
of things being linked, so what order they appear on the Makefile.
And this is why the base move for the "fs" sysctl directory is kept
at the top of fs/Makfile:

obj-$(CONFIG_SYSCTL)		+= sysctls.o

  Luis

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-07 22:39       ` Luis Chamberlain
@ 2021-12-08  2:10         ` Xiaoming Ni
  2021-12-08  2:44           ` Luis Chamberlain
  2021-12-13 16:04         ` [PATCH] " Eric W. Biederman
  1 sibling, 1 reply; 20+ messages in thread
From: Xiaoming Ni @ 2021-12-08  2:10 UTC (permalink / raw)
  To: Luis Chamberlain, Eric W. Biederman
  Cc: Andrew Morton, linux-kernel, viro, keescook, jlayton, bfields,
	yzaikin, wangle6, Joe Perches

On 2021/12/8 6:39, Luis Chamberlain wrote:
> On Tue, Dec 07, 2021 at 03:08:03PM -0600, Eric W. Biederman wrote:
>> Luis Chamberlain <mcgrof@kernel.org> writes:
>>
>>> On Mon, Dec 06, 2021 at 05:38:42PM -0800, Andrew Morton wrote:
>>>> On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
>>>>> --- a/fs/inode.c
>>>>> +++ b/fs/inode.c
>>>>> @@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
>>>>>   	{ }
>>>>>   };
>>>>>   
>>>>> -static int __init init_fs_inode_sysctls(void)
>>>>> -{
>>>>> -	register_sysctl_init("fs", inodes_sysctls);
>>>>> -	return 0;
>>>>> -}
>>>>> -early_initcall(init_fs_inode_sysctls);
>>>>> +fs_sysctl_initcall(inodes_sysctls);
>>>>>   #endif
>>>>
>>>> Here's another, of many.
>>>>
>>>> Someone made the decision to use early_initcall() here (why?) and this
>>>> patch switches it to late_initcall()!  Worrisome.  Each such stealth
>>>> conversion should be explained and justified, shouldn't it?
>>>
>>> I made the decisions for quite a bit of the ordering and yes I agree
>>> this need *very careful* explanation, specially if we are going to
>>> generalize this.
>>>
>>> First and foremost. git grep for sysctl_init_bases and you will see
>>> that the bases for now are initialized on proc_sys_init() and that
>>> gets called on proc_root_init() and that in turn on init/main.c's
>>> start_kernel(). And so this happens *before* the init levels.
>>>
>>> The proper care for what goes on top of this needs to take into
>>> consideration the different init levels and that the if a sysctl
>>> is using a directory *on top* of a base, then that sysctl registration
>>> must be registered *after* that directory. The *base* directory for
>>> "fs" is now registered through fs/sysctls.c() on init_fs_sysctls()
>>> using register_sysctl_base(). I made these changes with these names
>>> and requiring the DECLARE_SYSCTL_BASE() so it would be easy for us
>>> to look at where these are declared.
>>>
>>> So the next step in order to consider is *link* ordering and that
>>> order is maintained by the Makefile. That is why I put this at the
>>> top of the fs Makfile:
>>>
>>> obj-$(CONFIG_SYSCTL)            += sysctls.o
>>>
>>> So any file after this can use early_initcall(), because the base
>>> for "fs" was declared first in link order, and it used early_initcall().
>>> It is fine to have the other stuff that goes on top of the "fs" base
>>> use late_initcall() but that assumes that vetting has been done so that
>>> if a directory on "fs" was created, let's call it "foo", vetting was done
>>> to ensure that things on top of "foo" are registered *after* the "foo"
>>> directory.
>>>
>>> We now have done the cleanup for "fs", and we can do what we see fine
>>> for "fs", but we may run into surprises later with the other bases, so
>>> I'd be wary of making assumptions at this point if we can use
>>> late_initcall().
>>>
>>> So, as a rule of thumb I'd like to see bases use early_initcall(). The
>>> rest requires manual work and vetting.
>>>
>>> So, how about this, we define fs_sysctl_initcall() to use also
>>> early_initcall(), and ask susbsystems to do their vetting so that
>>> the base also gets linked first.
>>>
>>> After this, if a directory on top of a base is created we should likely create
>>> a new init level and just bump that to use the next init level. So
>>> something like fs_sysctl_base_initcall_subdir_1() map to core_initcall()
>>> and so on.
>>>
>>> That would allow us to easily grep for directory structures easily and
>>> puts some implicit onus of ordering on those folks doing these conversions.
>>> We'd document well the link order stuff for those using the base stuff
>>> too as that is likely only where this will matter most.
>>
>> I am a bit confused at this explanation of things.
>>
>> Last I looked the implementation of sysctls allocated the directories
>> independently of the sysctls entries that populated them.
> 
> With most sysctls being created using the same kernel/sysctl.c file and
> structure, yes, this was true. With the changes now on linux-next things
> change a bit. The goal is to move sysctls to be registered where they
> are actually defined. But the directory that holds them must be
> registered first. During the first phase of cleanups now on linux-next
> all filesystem "fs" syscls were moved to be delcared in the kernel's
> fs/ directory. The last part was to register the base "fs" directory.
> For this declareres were added to simplify that and to clarify which
> are base directories:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=ededd3fc701668743087c77ceeeb7490107cc12c
> 
> Then, this commit moves the "fs" base to be declared to fs/ as well:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=d0f885a73ec6e05803ce99f279232b3116061ed8
> 
> This used early_initcall() for the base for "fs" and that is
> because there are no built-in sysctls for "fs" which need to
> be exposed prior to the init levels.
> 
> So after this then order is important. If you are using the same
> init level, the the next thing which will ensure order is the order
> of things being linked, so what order they appear on the Makefile.
> And this is why the base move for the "fs" sysctl directory is kept
> at the top of fs/Makfile:
> 
> obj-$(CONFIG_SYSCTL)		+= sysctls.o
> 
>    Luis
> .
> 

Root node of the tree, using "early_initcall":
	Basic framework,  "fs", "kernel", "debug", "vm", "dev", "net"
Fork node. Select initcall_level based on the number of directory levels:
	Registration directory shared by multiple features.
Leaf node, use "late_initcall":
	File Interface

Is this a feasible classification?

Thanks
Xiaoming Ni

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-08  2:10         ` Xiaoming Ni
@ 2021-12-08  2:44           ` Luis Chamberlain
  2021-12-08 12:34             ` Xiaoming Ni
  0 siblings, 1 reply; 20+ messages in thread
From: Luis Chamberlain @ 2021-12-08  2:44 UTC (permalink / raw)
  To: Xiaoming Ni
  Cc: Eric W. Biederman, Andrew Morton, linux-kernel, viro, keescook,
	jlayton, bfields, yzaikin, wangle6, Joe Perches

On Wed, Dec 08, 2021 at 10:10:08AM +0800, Xiaoming Ni wrote:
> On 2021/12/8 6:39, Luis Chamberlain wrote:
> > On Tue, Dec 07, 2021 at 03:08:03PM -0600, Eric W. Biederman wrote:
> > > Luis Chamberlain <mcgrof@kernel.org> writes:
> > > 
> > > > On Mon, Dec 06, 2021 at 05:38:42PM -0800, Andrew Morton wrote:
> > > > > On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
> > > > > > --- a/fs/inode.c
> > > > > > +++ b/fs/inode.c
> > > > > > @@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
> > > > > >   	{ }
> > > > > >   };
> > > > > > -static int __init init_fs_inode_sysctls(void)
> > > > > > -{
> > > > > > -	register_sysctl_init("fs", inodes_sysctls);
> > > > > > -	return 0;
> > > > > > -}
> > > > > > -early_initcall(init_fs_inode_sysctls);
> > > > > > +fs_sysctl_initcall(inodes_sysctls);
> > > > > >   #endif
> > > > > 
> > > > > Here's another, of many.
> > > > > 
> > > > > Someone made the decision to use early_initcall() here (why?) and this
> > > > > patch switches it to late_initcall()!  Worrisome.  Each such stealth
> > > > > conversion should be explained and justified, shouldn't it?
> > > > 
> > > > I made the decisions for quite a bit of the ordering and yes I agree
> > > > this need *very careful* explanation, specially if we are going to
> > > > generalize this.
> > > > 
> > > > First and foremost. git grep for sysctl_init_bases and you will see
> > > > that the bases for now are initialized on proc_sys_init() and that
> > > > gets called on proc_root_init() and that in turn on init/main.c's
> > > > start_kernel(). And so this happens *before* the init levels.
> > > > 
> > > > The proper care for what goes on top of this needs to take into
> > > > consideration the different init levels and that the if a sysctl
> > > > is using a directory *on top* of a base, then that sysctl registration
> > > > must be registered *after* that directory. The *base* directory for
> > > > "fs" is now registered through fs/sysctls.c() on init_fs_sysctls()
> > > > using register_sysctl_base(). I made these changes with these names
> > > > and requiring the DECLARE_SYSCTL_BASE() so it would be easy for us
> > > > to look at where these are declared.
> > > > 
> > > > So the next step in order to consider is *link* ordering and that
> > > > order is maintained by the Makefile. That is why I put this at the
> > > > top of the fs Makfile:
> > > > 
> > > > obj-$(CONFIG_SYSCTL)            += sysctls.o
> > > > 
> > > > So any file after this can use early_initcall(), because the base
> > > > for "fs" was declared first in link order, and it used early_initcall().
> > > > It is fine to have the other stuff that goes on top of the "fs" base
> > > > use late_initcall() but that assumes that vetting has been done so that
> > > > if a directory on "fs" was created, let's call it "foo", vetting was done
> > > > to ensure that things on top of "foo" are registered *after* the "foo"
> > > > directory.
> > > > 
> > > > We now have done the cleanup for "fs", and we can do what we see fine
> > > > for "fs", but we may run into surprises later with the other bases, so
> > > > I'd be wary of making assumptions at this point if we can use
> > > > late_initcall().
> > > > 
> > > > So, as a rule of thumb I'd like to see bases use early_initcall(). The
> > > > rest requires manual work and vetting.
> > > > 
> > > > So, how about this, we define fs_sysctl_initcall() to use also
> > > > early_initcall(), and ask susbsystems to do their vetting so that
> > > > the base also gets linked first.
> > > > 
> > > > After this, if a directory on top of a base is created we should likely create
> > > > a new init level and just bump that to use the next init level. So
> > > > something like fs_sysctl_base_initcall_subdir_1() map to core_initcall()
> > > > and so on.
> > > > 
> > > > That would allow us to easily grep for directory structures easily and
> > > > puts some implicit onus of ordering on those folks doing these conversions.
> > > > We'd document well the link order stuff for those using the base stuff
> > > > too as that is likely only where this will matter most.
> > > 
> > > I am a bit confused at this explanation of things.
> > > 
> > > Last I looked the implementation of sysctls allocated the directories
> > > independently of the sysctls entries that populated them.
> > 
> > With most sysctls being created using the same kernel/sysctl.c file and
> > structure, yes, this was true. With the changes now on linux-next things
> > change a bit. The goal is to move sysctls to be registered where they
> > are actually defined. But the directory that holds them must be
> > registered first. During the first phase of cleanups now on linux-next
> > all filesystem "fs" syscls were moved to be delcared in the kernel's
> > fs/ directory. The last part was to register the base "fs" directory.
> > For this declareres were added to simplify that and to clarify which
> > are base directories:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=ededd3fc701668743087c77ceeeb7490107cc12c
> > 
> > Then, this commit moves the "fs" base to be declared to fs/ as well:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=d0f885a73ec6e05803ce99f279232b3116061ed8
> > 
> > This used early_initcall() for the base for "fs" and that is
> > because there are no built-in sysctls for "fs" which need to
> > be exposed prior to the init levels.
> > 
> > So after this then order is important. If you are using the same
> > init level, the the next thing which will ensure order is the order
> > of things being linked, so what order they appear on the Makefile.
> > And this is why the base move for the "fs" sysctl directory is kept
> > at the top of fs/Makfile:
> > 
> > obj-$(CONFIG_SYSCTL)		+= sysctls.o
> > 
> >    Luis
> > .
> > 
> 
> Root node of the tree, using "early_initcall":
> 	Basic framework,  "fs", "kernel", "debug", "vm", "dev", "net"

register_sysctl_base() and yes these use early_initcall() as-is on
linux-next.

> Fork node. Select initcall_level based on the number of directory levels:
> 	Registration directory shared by multiple features.

Sure.

> Leaf node, use "late_initcall":
> 	File Interface

I am not sure this gives enough guidance. What is the difference between
fork node and a leaf node?

> Is this a feasible classification?

  Luis

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-08  2:44           ` Luis Chamberlain
@ 2021-12-08 12:34             ` Xiaoming Ni
  2021-12-08 20:05               ` Luis Chamberlain
  0 siblings, 1 reply; 20+ messages in thread
From: Xiaoming Ni @ 2021-12-08 12:34 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: Eric W. Biederman, Andrew Morton, linux-kernel, viro, keescook,
	jlayton, bfields, yzaikin, wangle6, Joe Perches

On 2021/12/8 10:44, Luis Chamberlain wrote:
> On Wed, Dec 08, 2021 at 10:10:08AM +0800, Xiaoming Ni wrote:
>> On 2021/12/8 6:39, Luis Chamberlain wrote:
>>> On Tue, Dec 07, 2021 at 03:08:03PM -0600, Eric W. Biederman wrote:
>>>> Luis Chamberlain <mcgrof@kernel.org> writes:
>>>>
>>>>> On Mon, Dec 06, 2021 at 05:38:42PM -0800, Andrew Morton wrote:
>>>>>> On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
>>>>>>> --- a/fs/inode.c
>>>>>>> +++ b/fs/inode.c
>>>>>>> @@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
>>>>>>>    	{ }
>>>>>>>    };
>>>>>>> -static int __init init_fs_inode_sysctls(void)
>>>>>>> -{
>>>>>>> -	register_sysctl_init("fs", inodes_sysctls);
>>>>>>> -	return 0;
>>>>>>> -}
>>>>>>> -early_initcall(init_fs_inode_sysctls);
>>>>>>> +fs_sysctl_initcall(inodes_sysctls);
>>>>>>>    #endif
>>>>>>
>>>>>> Here's another, of many.
>>>>>>
>>>>>> Someone made the decision to use early_initcall() here (why?) and this
>>>>>> patch switches it to late_initcall()!  Worrisome.  Each such stealth
>>>>>> conversion should be explained and justified, shouldn't it?
>>>>>
>>>>> I made the decisions for quite a bit of the ordering and yes I agree
>>>>> this need *very careful* explanation, specially if we are going to
>>>>> generalize this.
>>>>>
>>>>> First and foremost. git grep for sysctl_init_bases and you will see
>>>>> that the bases for now are initialized on proc_sys_init() and that
>>>>> gets called on proc_root_init() and that in turn on init/main.c's
>>>>> start_kernel(). And so this happens *before* the init levels.
>>>>>
>>>>> The proper care for what goes on top of this needs to take into
>>>>> consideration the different init levels and that the if a sysctl
>>>>> is using a directory *on top* of a base, then that sysctl registration
>>>>> must be registered *after* that directory. The *base* directory for
>>>>> "fs" is now registered through fs/sysctls.c() on init_fs_sysctls()
>>>>> using register_sysctl_base(). I made these changes with these names
>>>>> and requiring the DECLARE_SYSCTL_BASE() so it would be easy for us
>>>>> to look at where these are declared.
>>>>>
>>>>> So the next step in order to consider is *link* ordering and that
>>>>> order is maintained by the Makefile. That is why I put this at the
>>>>> top of the fs Makfile:
>>>>>
>>>>> obj-$(CONFIG_SYSCTL)            += sysctls.o
>>>>>
>>>>> So any file after this can use early_initcall(), because the base
>>>>> for "fs" was declared first in link order, and it used early_initcall().
>>>>> It is fine to have the other stuff that goes on top of the "fs" base
>>>>> use late_initcall() but that assumes that vetting has been done so that
>>>>> if a directory on "fs" was created, let's call it "foo", vetting was done
>>>>> to ensure that things on top of "foo" are registered *after* the "foo"
>>>>> directory.
>>>>>
>>>>> We now have done the cleanup for "fs", and we can do what we see fine
>>>>> for "fs", but we may run into surprises later with the other bases, so
>>>>> I'd be wary of making assumptions at this point if we can use
>>>>> late_initcall().
>>>>>
>>>>> So, as a rule of thumb I'd like to see bases use early_initcall(). The
>>>>> rest requires manual work and vetting.
>>>>>
>>>>> So, how about this, we define fs_sysctl_initcall() to use also
>>>>> early_initcall(), and ask susbsystems to do their vetting so that
>>>>> the base also gets linked first.
>>>>>
>>>>> After this, if a directory on top of a base is created we should likely create
>>>>> a new init level and just bump that to use the next init level. So
>>>>> something like fs_sysctl_base_initcall_subdir_1() map to core_initcall()
>>>>> and so on.
>>>>>
>>>>> That would allow us to easily grep for directory structures easily and
>>>>> puts some implicit onus of ordering on those folks doing these conversions.
>>>>> We'd document well the link order stuff for those using the base stuff
>>>>> too as that is likely only where this will matter most.
>>>>
>>>> I am a bit confused at this explanation of things.
>>>>
>>>> Last I looked the implementation of sysctls allocated the directories
>>>> independently of the sysctls entries that populated them.
>>>
>>> With most sysctls being created using the same kernel/sysctl.c file and
>>> structure, yes, this was true. With the changes now on linux-next things
>>> change a bit. The goal is to move sysctls to be registered where they
>>> are actually defined. But the directory that holds them must be
>>> registered first. During the first phase of cleanups now on linux-next
>>> all filesystem "fs" syscls were moved to be delcared in the kernel's
>>> fs/ directory. The last part was to register the base "fs" directory.
>>> For this declareres were added to simplify that and to clarify which
>>> are base directories:
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=ededd3fc701668743087c77ceeeb7490107cc12c
>>>
>>> Then, this commit moves the "fs" base to be declared to fs/ as well:
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=d0f885a73ec6e05803ce99f279232b3116061ed8
>>>
>>> This used early_initcall() for the base for "fs" and that is
>>> because there are no built-in sysctls for "fs" which need to
>>> be exposed prior to the init levels.
>>>
>>> So after this then order is important. If you are using the same
>>> init level, the the next thing which will ensure order is the order
>>> of things being linked, so what order they appear on the Makefile.
>>> And this is why the base move for the "fs" sysctl directory is kept
>>> at the top of fs/Makfile:
>>>
>>> obj-$(CONFIG_SYSCTL)		+= sysctls.o
>>>
>>>     Luis
>>> .
>>>
>>
>> Root node of the tree, using "early_initcall":
>> 	Basic framework,  "fs", "kernel", "debug", "vm", "dev", "net"
> 
> register_sysctl_base() and yes these use early_initcall() as-is on
> linux-next.
> 
>> Fork node. Select initcall_level based on the number of directory levels:
>> 	Registration directory shared by multiple features.
> 
> Sure.
> 
/proc/sys/kernel/random/
	random_table
	driver/char/random.c
/proc/sys/kernel/usermodehelper/
	usermodehelper_table
	kernel/umh.c
/proc/sys/kernel/firmware_config/
	firmware_config_table
	drivers/base/firmware_loader/fallback_table.c
/proc/sys/kernel/keys/
	key_sysctls
	security/keys/sysctl.c
/proc/sys/fs/inotify/
	inotify_table
	fs/notify/inotify/inotify_user.c
/proc/sys/fs/fanotify/
	fanotify_table
	fs/notify/fanotify/fanotify_user.c
/proc/sys/fs/epoll
	epoll_table
	fs/eventpoll.c

I haven't checked all the sysctl subdirectories, but it seems that many 
are not shared by multiple features.
Most features use the sysctl mechanism simply to create a file interface 
for configuring parameters.
There are few scenarios for creating directories for other features.
There may be tree fork nodes, but only a few.


>> Leaf node, use "late_initcall":
>> 	File Interface
> 
> I am not sure this gives enough guidance. What is the difference between
> fork node and a leaf node?
Leaf node:
a) File, .child = NULL
b) Directory, which is not shared by multiple features, .child != NULL


Thanks
Xiaoming Ni


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-08 12:34             ` Xiaoming Ni
@ 2021-12-08 20:05               ` Luis Chamberlain
  2021-12-10  8:58                 ` [PATCH v2] " Xiaoming Ni
  0 siblings, 1 reply; 20+ messages in thread
From: Luis Chamberlain @ 2021-12-08 20:05 UTC (permalink / raw)
  To: Xiaoming Ni
  Cc: Eric W. Biederman, Andrew Morton, linux-kernel, viro, keescook,
	jlayton, bfields, yzaikin, wangle6, Joe Perches

On Wed, Dec 08, 2021 at 08:34:26PM +0800, Xiaoming Ni wrote:
> On 2021/12/8 10:44, Luis Chamberlain wrote:
> > On Wed, Dec 08, 2021 at 10:10:08AM +0800, Xiaoming Ni wrote:
> > > Root node of the tree, using "early_initcall":
> > > 	Basic framework,  "fs", "kernel", "debug", "vm", "dev", "net"
> > 
> > register_sysctl_base() and yes these use early_initcall() as-is on
> > linux-next.
> > 
> > > Fork node. Select initcall_level based on the number of directory levels:
> > > 	Registration directory shared by multiple features.
> > 
> > Sure.
> > 
> /proc/sys/kernel/random/
> 	random_table
> 	driver/char/random.c
> /proc/sys/kernel/usermodehelper/
> 	usermodehelper_table
> 	kernel/umh.c
> /proc/sys/kernel/firmware_config/
> 	firmware_config_table
> 	drivers/base/firmware_loader/fallback_table.c
> /proc/sys/kernel/keys/
> 	key_sysctls
> 	security/keys/sysctl.c
> /proc/sys/fs/inotify/
> 	inotify_table
> 	fs/notify/inotify/inotify_user.c
> /proc/sys/fs/fanotify/
> 	fanotify_table
> 	fs/notify/fanotify/fanotify_user.c
> /proc/sys/fs/epoll
> 	epoll_table
> 	fs/eventpoll.c
> 
> I haven't checked all the sysctl subdirectories, but it seems that many are
> not shared by multiple features.
> Most features use the sysctl mechanism simply to create a file interface for
> configuring parameters.
> There are few scenarios for creating directories for other features.
> There may be tree fork nodes, but only a few.
> 
> 
> > > Leaf node, use "late_initcall":
> > > 	File Interface
> > 
> > I am not sure this gives enough guidance. What is the difference between
> > fork node and a leaf node?
> Leaf node:
> a) File, .child = NULL
> b) Directory, which is not shared by multiple features, .child != NULL

If we are going to use that nomenclature then we should document it as such.
Fine by me.

  Luis

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-08 20:05               ` Luis Chamberlain
@ 2021-12-10  8:58                 ` Xiaoming Ni
  2021-12-10 17:20                   ` Luis Chamberlain
  2021-12-13  3:31                   ` [PATCH v3] " Xiaoming Ni
  0 siblings, 2 replies; 20+ messages in thread
From: Xiaoming Ni @ 2021-12-10  8:58 UTC (permalink / raw)
  To: linux-kernel, viro, ebiederm, keescook, jlayton, bfields, mcgrof,
	yzaikin, apw, joe, dwaipayanray1, lukas.bulwahn, julia.lawall,
	akpm
  Cc: nixiaoming, wangle6

To avoid duplicated code, add a set of macro functions to initialize the
sysctl table for each feature.

The system initialization process is as follows:

	start_kernel () {
		...
		/* init proc and sysctl base,
		 * proc_root_init()-->proc_sys_init()-->sysctl_init_bases()
		 */
		proc_root_init(); /* init proc and sysctl base */
		...
		arch_call_rest_init();
	}

	arch_call_rest_init()-->rest_init()-->kernel_init()
	kernel_init() {
		...
		kernel_init_freeable(); /* do all initcalls */
		...
		do_sysctl_args(); /* Process the sysctl parameter: sysctl.*= */
	}

	kernel_init_freeable()--->do_basic_setup()-->do_initcalls()
	do_initcalls() {
		for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
			do_initcall_level
	}

The sysctl interface of each subfeature should be registered after
sysctl_init_bases() and before do_sysctl_args(). It seems that the sysctl
interface does not depend on initcall_levels. To prevent the sysctl
interface from being initialized before the feature itself. The
lowest-level late_initcall() is used as the common sysctl interface
registration level.

Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>

---
v2:
  Add a simple checkpatch check.
  Add code comment.
v1:
  https://lore.kernel.org/lkml/20211207011320.100102-1-nixiaoming@huawei.com/
---
 fs/coredump.c          |  7 +------
 fs/dcache.c            |  7 +------
 fs/exec.c              |  8 +-------
 fs/file_table.c        |  7 +------
 fs/inode.c             |  7 +------
 fs/locks.c             |  7 +------
 fs/namei.c             |  8 +-------
 fs/namespace.c         |  7 +------
 include/linux/sysctl.h | 19 +++++++++++++++++++
 kernel/stackleak.c     |  7 +------
 scripts/checkpatch.pl  |  6 ++++++
 11 files changed, 34 insertions(+), 56 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 570d98398668..8f6c6322651d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -943,12 +943,7 @@ static struct ctl_table coredump_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_coredump_sysctls(void)
-{
-	register_sysctl_init("kernel", coredump_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_coredump_sysctls);
+kernel_sysctl_initcall(coredump_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 /*
diff --git a/fs/dcache.c b/fs/dcache.c
index 0eef1102f460..c1570243aaee 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -195,12 +195,7 @@ static struct ctl_table fs_dcache_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_dcache_sysctls(void)
-{
-	register_sysctl_init("fs", fs_dcache_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_dcache_sysctls);
+fs_sysctl_initcall(fs_dcache_sysctls);
 #endif
 
 /*
diff --git a/fs/exec.c b/fs/exec.c
index cc5ec43df028..3c30e686af79 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2124,11 +2124,5 @@ static struct ctl_table fs_exec_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_exec_sysctls(void)
-{
-	register_sysctl_init("fs", fs_exec_sysctls);
-	return 0;
-}
-
-fs_initcall(init_fs_exec_sysctls);
+fs_sysctl_initcall(fs_exec_sysctls);
 #endif /* CONFIG_SYSCTL */
diff --git a/fs/file_table.c b/fs/file_table.c
index 57edef16dce4..52b60e9378a7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -116,12 +116,7 @@ static struct ctl_table fs_stat_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_stat_sysctls(void)
-{
-	register_sysctl_init("fs", fs_stat_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_stat_sysctls);
+fs_sysctl_initcall(fs_stat_sysctls);
 #endif
 
 static struct file *__alloc_file(int flags, const struct cred *cred)
diff --git a/fs/inode.c b/fs/inode.c
index bef6ba9b8eb4..cd20d6dd8ffa 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_inode_sysctls(void)
-{
-	register_sysctl_init("fs", inodes_sysctls);
-	return 0;
-}
-early_initcall(init_fs_inode_sysctls);
+fs_sysctl_initcall(inodes_sysctls);
 #endif
 
 static int no_open(struct inode *inode, struct file *file)
diff --git a/fs/locks.c b/fs/locks.c
index 8c6df10cd9ed..d86a74d19ed3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -113,12 +113,7 @@ static struct ctl_table locks_sysctls[] = {
 	{}
 };
 
-static int __init init_fs_locks_sysctls(void)
-{
-	register_sysctl_init("fs", locks_sysctls);
-	return 0;
-}
-early_initcall(init_fs_locks_sysctls);
+fs_sysctl_initcall(locks_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 /*
diff --git a/fs/namei.c b/fs/namei.c
index 8d4f832f94aa..7eb636796fd4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1066,13 +1066,7 @@ static struct ctl_table namei_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_namei_sysctls(void)
-{
-	register_sysctl_init("fs", namei_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_namei_sysctls);
-
+fs_sysctl_initcall(namei_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 /**
diff --git a/fs/namespace.c b/fs/namespace.c
index 647af66f313d..7117214b7a85 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4626,11 +4626,6 @@ static struct ctl_table fs_namespace_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_namespace_sysctls(void)
-{
-	register_sysctl_init("fs", fs_namespace_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_namespace_sysctls);
+fs_sysctl_initcall(fs_namespace_sysctls);
 
 #endif /* CONFIG_SYSCTL */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index acf0805cf3a0..ce33e61a8287 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -231,6 +231,25 @@ extern int sysctl_init_bases(void);
 extern void __register_sysctl_init(const char *path, struct ctl_table *table,
 				 const char *table_name);
 #define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
+
+#define sysctl_initcall(path, table) \
+	static int __init init_##table(void) \
+	{ \
+		register_sysctl_init(path, table); \
+		return  0;\
+	} \
+	late_initcall(init_##table)
+
+/*
+ * Use xxx_sysctl_initcall() to initialize your sysctl interface unless you want
+ * to register the sysctl directory and share it with other features.
+ */
+#define kernel_sysctl_initcall(table) sysctl_initcall("kernel", table)
+#define fs_sysctl_initcall(table) sysctl_initcall("fs", table)
+#define vm_sysctl_initcall(table) sysctl_initcall("vm", table)
+#define debug_sysctl_initcall(table) sysctl_initcall("debug", table)
+#define dev_sysctl_initcall(table) sysctl_initcall("dev", table)
+
 extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
 
 void do_sysctl_args(void);
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index 66b8af394e58..f084c1c787ba 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -57,12 +57,7 @@ static struct ctl_table stackleak_sysctls[] = {
 	{}
 };
 
-static int __init stackleak_sysctls_init(void)
-{
-	register_sysctl_init("kernel", stackleak_sysctls);
-	return 0;
-}
-late_initcall(stackleak_sysctls_init);
+kernel_sysctl_initcall(stackleak_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 #define skip_erasing()	static_branch_unlikely(&stack_erasing_bypass)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index b01c36a15d9d..e8701d54b458 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -7212,6 +7212,12 @@ sub process {
 			     "Deprecated use of '$deprecated_api', prefer '$new_api' instead\n" . $herecurr);
 		}
 
+# check register_sysctl_init
+		if ($prevline =~ /{/ && $rawline =~ /\sregister_sysctl_init\(\"(kernel|fs|vm|debug|dev)\",\s+(.*)\)\;/) {
+			WARN("DEPRECATED_API",
+			     "Deprecated use of 'register_sysctl_init(\"$1\", $2);', prefer '$1_sysctl_initcall($2);' instead\n".$herecurr);
+		}
+
 # check for various structs that are normally const (ops, kgdb, device_tree)
 # and avoid what seem like struct definitions 'struct foo {'
 		if (defined($const_structs) &&
-- 
2.12.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-10  8:58                 ` [PATCH v2] " Xiaoming Ni
@ 2021-12-10 17:20                   ` Luis Chamberlain
  2021-12-12  9:58                     ` Xiaoming Ni
  2021-12-13  3:31                   ` [PATCH v3] " Xiaoming Ni
  1 sibling, 1 reply; 20+ messages in thread
From: Luis Chamberlain @ 2021-12-10 17:20 UTC (permalink / raw)
  To: Xiaoming Ni
  Cc: linux-kernel, viro, ebiederm, keescook, jlayton, bfields,
	yzaikin, apw, joe, dwaipayanray1, lukas.bulwahn, julia.lawall,
	akpm, wangle6

On Fri, Dec 10, 2021 at 04:58:49PM +0800, Xiaoming Ni wrote:
> To avoid duplicated code, add a set of macro functions to initialize the
> sysctl table for each feature.
> 
> The system initialization process is as follows:
> 
> 	start_kernel () {
> 		...
> 		/* init proc and sysctl base,
> 		 * proc_root_init()-->proc_sys_init()-->sysctl_init_bases()
> 		 */
> 		proc_root_init(); /* init proc and sysctl base */
> 		...
> 		arch_call_rest_init();
> 	}
> 
> 	arch_call_rest_init()-->rest_init()-->kernel_init()
> 	kernel_init() {
> 		...
> 		kernel_init_freeable(); /* do all initcalls */
> 		...
> 		do_sysctl_args(); /* Process the sysctl parameter: sysctl.*= */
> 	}
> 
> 	kernel_init_freeable()--->do_basic_setup()-->do_initcalls()
> 	do_initcalls() {
> 		for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
> 			do_initcall_level
> 	}

It was nice to have this documented in the commit log, however you
don't provide a developer documentation for this in your changes.
Can you justify through documentation why we can use init levels
with the above information for the sysctl_initcall() macro?

> The sysctl interface of each subfeature should be registered after
> sysctl_init_bases() and before do_sysctl_args().

Indeed.

> It seems 

Seems is poor judgement for a change in the kernel. It is or not.

> that the sysctl
> interface does not depend on initcall_levels. To prevent the sysctl
> interface from being initialized before the feature itself. The
> lowest-level

Lower to me means early, but since we are talking about time, best
to clarify and say the latest init level during kernel bootup.

> late_initcall() is used as the common sysctl interface
> registration level.
> 
> Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
> 
> ---
> v2:
>   Add a simple checkpatch check.
>   Add code comment.
> v1:
>   https://lore.kernel.org/lkml/20211207011320.100102-1-nixiaoming@huawei.com/
> ---
>  fs/coredump.c          |  7 +------
>  fs/dcache.c            |  7 +------
>  fs/exec.c              |  8 +-------
>  fs/file_table.c        |  7 +------
>  fs/inode.c             |  7 +------
>  fs/locks.c             |  7 +------
>  fs/namei.c             |  8 +-------
>  fs/namespace.c         |  7 +------
>  include/linux/sysctl.h | 19 +++++++++++++++++++
>  kernel/stackleak.c     |  7 +------
>  scripts/checkpatch.pl  |  6 ++++++
>  11 files changed, 34 insertions(+), 56 deletions(-)
> 
> diff --git a/fs/coredump.c b/fs/coredump.c
> index 570d98398668..8f6c6322651d 100644
> --- a/fs/coredump.c
> +++ b/fs/coredump.c
> @@ -943,12 +943,7 @@ static struct ctl_table coredump_sysctls[] = {
>  	{ }
>  };
>  
> -static int __init init_fs_coredump_sysctls(void)
> -{
> -	register_sysctl_init("kernel", coredump_sysctls);
> -	return 0;
> -}
> -fs_initcall(init_fs_coredump_sysctls);
> +kernel_sysctl_initcall(coredump_sysctls);

Nice.

Yes, although I went with fs_initcall() your documentation above
does give us certainty that this is fine as well. No need to kick
this through earlier.

>  #endif /* CONFIG_SYSCTL */
>  
>  /*
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 0eef1102f460..c1570243aaee 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -195,12 +195,7 @@ static struct ctl_table fs_dcache_sysctls[] = {
>  	{ }
>  };
>  
> -static int __init init_fs_dcache_sysctls(void)
> -{
> -	register_sysctl_init("fs", fs_dcache_sysctls);
> -	return 0;
> -}
> -fs_initcall(init_fs_dcache_sysctls);
> +fs_sysctl_initcall(fs_dcache_sysctls);

Seems fine by me using the same logic as above and I like that
you are splitting this by bases. Likewise for the others, this
is looking good.

> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
> index acf0805cf3a0..ce33e61a8287 100644
> --- a/include/linux/sysctl.h
> +++ b/include/linux/sysctl.h
> @@ -231,6 +231,25 @@ extern int sysctl_init_bases(void);
>  extern void __register_sysctl_init(const char *path, struct ctl_table *table,
>  				 const char *table_name);

Yes please take the time to write some documentation here which can
explain to developers *why* we use the init levels specified.

>  #define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
> +
> +#define sysctl_initcall(path, table) \
> +	static int __init init_##table(void) \
> +	{ \
> +		register_sysctl_init(path, table); \
> +		return  0;\
> +	} \
> +	late_initcall(init_##table)
> +
> +/*
> + * Use xxx_sysctl_initcall() to initialize your sysctl interface unless you want
> + * to register the sysctl directory and share it with other features.
> + */
> +#define kernel_sysctl_initcall(table) sysctl_initcall("kernel", table)
> +#define fs_sysctl_initcall(table) sysctl_initcall("fs", table)
> +#define vm_sysctl_initcall(table) sysctl_initcall("vm", table)
> +#define debug_sysctl_initcall(table) sysctl_initcall("debug", table)
> +#define dev_sysctl_initcall(table) sysctl_initcall("dev", table)
> +
>  extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
>  
>  void do_sysctl_args(void);

  Luis

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-10 17:20                   ` Luis Chamberlain
@ 2021-12-12  9:58                     ` Xiaoming Ni
  0 siblings, 0 replies; 20+ messages in thread
From: Xiaoming Ni @ 2021-12-12  9:58 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: linux-kernel, viro, ebiederm, keescook, jlayton, bfields,
	yzaikin, apw, joe, dwaipayanray1, lukas.bulwahn, julia.lawall,
	akpm, wangle6

On 2021/12/11 1:20, Luis Chamberlain wrote:
> On Fri, Dec 10, 2021 at 04:58:49PM +0800, Xiaoming Ni wrote:
>> To avoid duplicated code, add a set of macro functions to initialize the
>> sysctl table for each feature.
>>
>> The system initialization process is as follows:
>>
>> 	start_kernel () {
>> 		...
>> 		/* init proc and sysctl base,
>> 		 * proc_root_init()-->proc_sys_init()-->sysctl_init_bases()
>> 		 */
>> 		proc_root_init(); /* init proc and sysctl base */
>> 		...
>> 		arch_call_rest_init();
>> 	}
>>
>> 	arch_call_rest_init()-->rest_init()-->kernel_init()
>> 	kernel_init() {
>> 		...
>> 		kernel_init_freeable(); /* do all initcalls */
>> 		...
>> 		do_sysctl_args(); /* Process the sysctl parameter: sysctl.*= */
>> 	}
>>
>> 	kernel_init_freeable()--->do_basic_setup()-->do_initcalls()
>> 	do_initcalls() {
>> 		for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
>> 			do_initcall_level
>> 	}
> 
> It was nice to have this documented in the commit log, however you
> don't provide a developer documentation for this in your changes.
> Can you justify through documentation why we can use init levels
> with the above information for the sysctl_initcall() macro?
> 
>> The sysctl interface of each subfeature should be registered after
>> sysctl_init_bases() and before do_sysctl_args().
> 
> Indeed.
> 
>> It seems
> 
> Seems is poor judgement for a change in the kernel. It is or not.
> 
>> that the sysctl
>> interface does not depend on initcall_levels. To prevent the sysctl
>> interface from being initialized before the feature itself. The
>> lowest-level
> 
> Lower to me means early, but since we are talking about time, best
> to clarify and say the latest init level during kernel bootup.
> 
>> late_initcall() is used as the common sysctl interface
>> registration level.
>>
>> Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
>>
>> ---
>> v2:
>>    Add a simple checkpatch check.
>>    Add code comment.
>> v1:
>>    https://lore.kernel.org/lkml/20211207011320.100102-1-nixiaoming@huawei.com/
>> ---
>>   fs/coredump.c          |  7 +------
>>   fs/dcache.c            |  7 +------
>>   fs/exec.c              |  8 +-------
>>   fs/file_table.c        |  7 +------
>>   fs/inode.c             |  7 +------
>>   fs/locks.c             |  7 +------
>>   fs/namei.c             |  8 +-------
>>   fs/namespace.c         |  7 +------
>>   include/linux/sysctl.h | 19 +++++++++++++++++++
>>   kernel/stackleak.c     |  7 +------
>>   scripts/checkpatch.pl  |  6 ++++++
>>   11 files changed, 34 insertions(+), 56 deletions(-)
>>
>> diff --git a/fs/coredump.c b/fs/coredump.c
>> index 570d98398668..8f6c6322651d 100644
>> --- a/fs/coredump.c
>> +++ b/fs/coredump.c
>> @@ -943,12 +943,7 @@ static struct ctl_table coredump_sysctls[] = {
>>   	{ }
>>   };
>>   
>> -static int __init init_fs_coredump_sysctls(void)
>> -{
>> -	register_sysctl_init("kernel", coredump_sysctls);
>> -	return 0;
>> -}
>> -fs_initcall(init_fs_coredump_sysctls);
>> +kernel_sysctl_initcall(coredump_sysctls);
> 
> Nice.
> 
> Yes, although I went with fs_initcall() your documentation above
> does give us certainty that this is fine as well. No need to kick
> this through earlier.
> 
>>   #endif /* CONFIG_SYSCTL */
>>   
>>   /*
>> diff --git a/fs/dcache.c b/fs/dcache.c
>> index 0eef1102f460..c1570243aaee 100644
>> --- a/fs/dcache.c
>> +++ b/fs/dcache.c
>> @@ -195,12 +195,7 @@ static struct ctl_table fs_dcache_sysctls[] = {
>>   	{ }
>>   };
>>   
>> -static int __init init_fs_dcache_sysctls(void)
>> -{
>> -	register_sysctl_init("fs", fs_dcache_sysctls);
>> -	return 0;
>> -}
>> -fs_initcall(init_fs_dcache_sysctls);
>> +fs_sysctl_initcall(fs_dcache_sysctls);
> 
> Seems fine by me using the same logic as above and I like that
> you are splitting this by bases. Likewise for the others, this
> is looking good.
> 
>> diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
>> index acf0805cf3a0..ce33e61a8287 100644
>> --- a/include/linux/sysctl.h
>> +++ b/include/linux/sysctl.h
>> @@ -231,6 +231,25 @@ extern int sysctl_init_bases(void);
>>   extern void __register_sysctl_init(const char *path, struct ctl_table *table,
>>   				 const char *table_name);
> 
> Yes please take the time to write some documentation here which can
> explain to developers *why* we use the init levels specified.
> 
>>   #define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
>> +

/** 

  * sysctl_initcall() - register and init sysctl leaf node to path 

  * @path:  path name for sysctl base 

  * @table: This is the sysctl leaf table that needs to be registered to 
the path
  * 

  * Leaf node in the sysctl tree: 

  * a) File, .child = NULL 

  * b) Directory, which is not shared by multiple features, .child != 
NULL
  * 

  * The sysctl interface for each subfeature should be in the after 

  * sysctl_init_bases() and before do_sysctl_args(). 

  * sysctl_init_bases() is executed before early_initcall(). 

  * do_sysctl_args() is executed after late_initcall(). 

  * Therefore, it is safe to add leaves to the sysctl tree using 
late_initcall().
  */

How about that description?

>> +#define sysctl_initcall(path, table) \
>> +	static int __init init_##table(void) \
>> +	{ \
>> +		register_sysctl_init(path, table); \
>> +		return  0;\
>> +	} \
>> +	late_initcall(init_##table)
>> +
>> +/*
>> + * Use xxx_sysctl_initcall() to initialize your sysctl interface unless you want
>> + * to register the sysctl directory and share it with other features.
>> + */
>> +#define kernel_sysctl_initcall(table) sysctl_initcall("kernel", table)
>> +#define fs_sysctl_initcall(table) sysctl_initcall("fs", table)
>> +#define vm_sysctl_initcall(table) sysctl_initcall("vm", table)
>> +#define debug_sysctl_initcall(table) sysctl_initcall("debug", table)
>> +#define dev_sysctl_initcall(table) sysctl_initcall("dev", table)
>> +
>>   extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
>>   
>>   void do_sysctl_args(void);
> 
>    Luis
> .
> 

Thanks
Xiaoming Ni

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-10  8:58                 ` [PATCH v2] " Xiaoming Ni
  2021-12-10 17:20                   ` Luis Chamberlain
@ 2021-12-13  3:31                   ` Xiaoming Ni
  2021-12-19 21:32                     ` Luis Chamberlain
  1 sibling, 1 reply; 20+ messages in thread
From: Xiaoming Ni @ 2021-12-13  3:31 UTC (permalink / raw)
  To: linux-kernel, viro, ebiederm, keescook, jlayton, bfields, mcgrof,
	yzaikin, apw, joe, dwaipayanray1, lukas.bulwahn, julia.lawall,
	akpm
  Cc: nixiaoming, wangle6

To avoid duplicated code, add a set of macro functions to initialize the
sysctl table for each feature.

The system initialization process is as follows:

	start_kernel () {
		...
		/* init proc and sysctl base,
		 * proc_root_init()-->proc_sys_init()-->sysctl_init_bases()
		 */
		proc_root_init(); /* init proc and sysctl base */
		...
		arch_call_rest_init();
	}

	arch_call_rest_init()-->rest_init()-->kernel_init()
	kernel_init() {
		...
		kernel_init_freeable(); /* do all initcalls */
		...
		do_sysctl_args(); /* Process the sysctl parameter: sysctl.*= */
	}

	kernel_init_freeable()--->do_basic_setup()-->do_initcalls()
	do_initcalls() {
		for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
			do_initcall_level
	}

The sysctl interface of each subfeature should be registered after
sysctl_init_bases() and before do_sysctl_args().
sysctl_init_bases() is executed before early_initcall().
do_sysctl_args() is executed after late_initcall().
To prevent the sysctl interface from being initialized before the feature
itself. The lowest level of late_initcall() is safe as a normal sysctl
interface registration level.

Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
---
v3:
  Adjust the commit information description and add document comments to
  sysctl_initcall().
v2: https://lore.kernel.org/lkml/20211210085849.66169-1-nixiaoming@huawei.com/
  Add a simple checkpatch check.
  Add code comment.
v1: https://lore.kernel.org/lkml/20211207011320.100102-1-nixiaoming@huawei.com/
---
 fs/coredump.c          |  7 +------
 fs/dcache.c            |  7 +------
 fs/exec.c              |  8 +-------
 fs/file_table.c        |  7 +------
 fs/inode.c             |  7 +------
 fs/locks.c             |  7 +------
 fs/namei.c             |  8 +-------
 fs/namespace.c         |  7 +------
 include/linux/sysctl.h | 34 ++++++++++++++++++++++++++++++++++
 kernel/stackleak.c     |  7 +------
 scripts/checkpatch.pl  |  6 ++++++
 11 files changed, 49 insertions(+), 56 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 570d98398668..8f6c6322651d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -943,12 +943,7 @@ static struct ctl_table coredump_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_coredump_sysctls(void)
-{
-	register_sysctl_init("kernel", coredump_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_coredump_sysctls);
+kernel_sysctl_initcall(coredump_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 /*
diff --git a/fs/dcache.c b/fs/dcache.c
index 0eef1102f460..c1570243aaee 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -195,12 +195,7 @@ static struct ctl_table fs_dcache_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_dcache_sysctls(void)
-{
-	register_sysctl_init("fs", fs_dcache_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_dcache_sysctls);
+fs_sysctl_initcall(fs_dcache_sysctls);
 #endif
 
 /*
diff --git a/fs/exec.c b/fs/exec.c
index cc5ec43df028..3c30e686af79 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2124,11 +2124,5 @@ static struct ctl_table fs_exec_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_exec_sysctls(void)
-{
-	register_sysctl_init("fs", fs_exec_sysctls);
-	return 0;
-}
-
-fs_initcall(init_fs_exec_sysctls);
+fs_sysctl_initcall(fs_exec_sysctls);
 #endif /* CONFIG_SYSCTL */
diff --git a/fs/file_table.c b/fs/file_table.c
index 57edef16dce4..52b60e9378a7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -116,12 +116,7 @@ static struct ctl_table fs_stat_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_stat_sysctls(void)
-{
-	register_sysctl_init("fs", fs_stat_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_stat_sysctls);
+fs_sysctl_initcall(fs_stat_sysctls);
 #endif
 
 static struct file *__alloc_file(int flags, const struct cred *cred)
diff --git a/fs/inode.c b/fs/inode.c
index bef6ba9b8eb4..cd20d6dd8ffa 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_inode_sysctls(void)
-{
-	register_sysctl_init("fs", inodes_sysctls);
-	return 0;
-}
-early_initcall(init_fs_inode_sysctls);
+fs_sysctl_initcall(inodes_sysctls);
 #endif
 
 static int no_open(struct inode *inode, struct file *file)
diff --git a/fs/locks.c b/fs/locks.c
index 8c6df10cd9ed..d86a74d19ed3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -113,12 +113,7 @@ static struct ctl_table locks_sysctls[] = {
 	{}
 };
 
-static int __init init_fs_locks_sysctls(void)
-{
-	register_sysctl_init("fs", locks_sysctls);
-	return 0;
-}
-early_initcall(init_fs_locks_sysctls);
+fs_sysctl_initcall(locks_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 /*
diff --git a/fs/namei.c b/fs/namei.c
index 8d4f832f94aa..7eb636796fd4 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1066,13 +1066,7 @@ static struct ctl_table namei_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_namei_sysctls(void)
-{
-	register_sysctl_init("fs", namei_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_namei_sysctls);
-
+fs_sysctl_initcall(namei_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 /**
diff --git a/fs/namespace.c b/fs/namespace.c
index 647af66f313d..7117214b7a85 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4626,11 +4626,6 @@ static struct ctl_table fs_namespace_sysctls[] = {
 	{ }
 };
 
-static int __init init_fs_namespace_sysctls(void)
-{
-	register_sysctl_init("fs", fs_namespace_sysctls);
-	return 0;
-}
-fs_initcall(init_fs_namespace_sysctls);
+fs_sysctl_initcall(fs_namespace_sysctls);
 
 #endif /* CONFIG_SYSCTL */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index acf0805cf3a0..1c09894d453e 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -231,6 +231,40 @@ extern int sysctl_init_bases(void);
 extern void __register_sysctl_init(const char *path, struct ctl_table *table,
 				 const char *table_name);
 #define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table)
+
+/**
+ * sysctl_initcall() - register and init sysctl leaf node to path
+ * @path: path name for sysctl base
+ * @table: sysctl leaf table to be registered to the path
+ *
+ * Leaf node in the sysctl tree:
+ * a) File, .child = NULL
+ * b) Directory, which is not shared by multiple features, .child != NULL
+ *
+ * The sysctl interface for each subfeature should be after sysctl_init_bases()
+ * and before do_sysctl_args().
+ * sysctl_init_bases() is executed before early_initcall().
+ * do_sysctl_args() is executed after late_initcall().
+ * Therefore, it is safe to add leaves to the sysctl tree using late_initcall().
+ */
+#define sysctl_initcall(path, table) \
+	static int __init init_##table(void) \
+	{ \
+		register_sysctl_init(path, table); \
+		return  0;\
+	} \
+	late_initcall(init_##table)
+
+/*
+ * Use xxx_sysctl_initcall() to initialize your sysctl interface unless you want
+ * to register the sysctl directory and share it with other features.
+ */
+#define kernel_sysctl_initcall(table) sysctl_initcall("kernel", table)
+#define fs_sysctl_initcall(table) sysctl_initcall("fs", table)
+#define vm_sysctl_initcall(table) sysctl_initcall("vm", table)
+#define debug_sysctl_initcall(table) sysctl_initcall("debug", table)
+#define dev_sysctl_initcall(table) sysctl_initcall("dev", table)
+
 extern struct ctl_table_header *register_sysctl_mount_point(const char *path);
 
 void do_sysctl_args(void);
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index 66b8af394e58..f084c1c787ba 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -57,12 +57,7 @@ static struct ctl_table stackleak_sysctls[] = {
 	{}
 };
 
-static int __init stackleak_sysctls_init(void)
-{
-	register_sysctl_init("kernel", stackleak_sysctls);
-	return 0;
-}
-late_initcall(stackleak_sysctls_init);
+kernel_sysctl_initcall(stackleak_sysctls);
 #endif /* CONFIG_SYSCTL */
 
 #define skip_erasing()	static_branch_unlikely(&stack_erasing_bypass)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index b01c36a15d9d..e8701d54b458 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -7212,6 +7212,12 @@ sub process {
 			     "Deprecated use of '$deprecated_api', prefer '$new_api' instead\n" . $herecurr);
 		}
 
+# check register_sysctl_init
+		if ($prevline =~ /{/ && $rawline =~ /\sregister_sysctl_init\(\"(kernel|fs|vm|debug|dev)\",\s+(.*)\)\;/) {
+			WARN("DEPRECATED_API",
+			     "Deprecated use of 'register_sysctl_init(\"$1\", $2);', prefer '$1_sysctl_initcall($2);' instead\n".$herecurr);
+		}
+
 # check for various structs that are normally const (ops, kgdb, device_tree)
 # and avoid what seem like struct definitions 'struct foo {'
 		if (defined($const_structs) &&
-- 
2.12.3


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-07 22:39       ` Luis Chamberlain
  2021-12-08  2:10         ` Xiaoming Ni
@ 2021-12-13 16:04         ` Eric W. Biederman
  2021-12-13 21:29           ` Luis Chamberlain
  1 sibling, 1 reply; 20+ messages in thread
From: Eric W. Biederman @ 2021-12-13 16:04 UTC (permalink / raw)
  To: Luis Chamberlain
  Cc: Andrew Morton, Xiaoming Ni, linux-kernel, viro, keescook,
	jlayton, bfields, yzaikin, wangle6, Joe Perches

Luis Chamberlain <mcgrof@kernel.org> writes:

> On Tue, Dec 07, 2021 at 03:08:03PM -0600, Eric W. Biederman wrote:
>> Luis Chamberlain <mcgrof@kernel.org> writes:
>> 
>> > On Mon, Dec 06, 2021 at 05:38:42PM -0800, Andrew Morton wrote:
>> >> On Tue, 7 Dec 2021 09:13:20 +0800 Xiaoming Ni <nixiaoming@huawei.com> wrote:
>> >> > --- a/fs/inode.c
>> >> > +++ b/fs/inode.c
>> >> > @@ -132,12 +132,7 @@ static struct ctl_table inodes_sysctls[] = {
>> >> >  	{ }
>> >> >  };
>> >> >  
>> >> > -static int __init init_fs_inode_sysctls(void)
>> >> > -{
>> >> > -	register_sysctl_init("fs", inodes_sysctls);
>> >> > -	return 0;
>> >> > -}
>> >> > -early_initcall(init_fs_inode_sysctls);
>> >> > +fs_sysctl_initcall(inodes_sysctls);
>> >> >  #endif
>> >> 
>> >> Here's another, of many.
>> >> 
>> >> Someone made the decision to use early_initcall() here (why?) and this
>> >> patch switches it to late_initcall()!  Worrisome.  Each such stealth
>> >> conversion should be explained and justified, shouldn't it?
>> >
>> > I made the decisions for quite a bit of the ordering and yes I agree
>> > this need *very careful* explanation, specially if we are going to
>> > generalize this.
>> >
>> > First and foremost. git grep for sysctl_init_bases and you will see
>> > that the bases for now are initialized on proc_sys_init() and that
>> > gets called on proc_root_init() and that in turn on init/main.c's
>> > start_kernel(). And so this happens *before* the init levels.
>> >
>> > The proper care for what goes on top of this needs to take into
>> > consideration the different init levels and that the if a sysctl
>> > is using a directory *on top* of a base, then that sysctl registration
>> > must be registered *after* that directory. The *base* directory for
>> > "fs" is now registered through fs/sysctls.c() on init_fs_sysctls()
>> > using register_sysctl_base(). I made these changes with these names
>> > and requiring the DECLARE_SYSCTL_BASE() so it would be easy for us
>> > to look at where these are declared.
>> >
>> > So the next step in order to consider is *link* ordering and that
>> > order is maintained by the Makefile. That is why I put this at the
>> > top of the fs Makfile:
>> >
>> > obj-$(CONFIG_SYSCTL)            += sysctls.o 
>> >
>> > So any file after this can use early_initcall(), because the base
>> > for "fs" was declared first in link order, and it used early_initcall().
>> > It is fine to have the other stuff that goes on top of the "fs" base
>> > use late_initcall() but that assumes that vetting has been done so that
>> > if a directory on "fs" was created, let's call it "foo", vetting was done
>> > to ensure that things on top of "foo" are registered *after* the "foo"
>> > directory.
>> >
>> > We now have done the cleanup for "fs", and we can do what we see fine
>> > for "fs", but we may run into surprises later with the other bases, so
>> > I'd be wary of making assumptions at this point if we can use
>> > late_initcall().
>> >
>> > So, as a rule of thumb I'd like to see bases use early_initcall(). The
>> > rest requires manual work and vetting.
>> >
>> > So, how about this, we define fs_sysctl_initcall() to use also
>> > early_initcall(), and ask susbsystems to do their vetting so that
>> > the base also gets linked first.
>> >
>> > After this, if a directory on top of a base is created we should likely create
>> > a new init level and just bump that to use the next init level. So
>> > something like fs_sysctl_base_initcall_subdir_1() map to core_initcall()
>> > and so on.
>> >
>> > That would allow us to easily grep for directory structures easily and
>> > puts some implicit onus of ordering on those folks doing these conversions.
>> > We'd document well the link order stuff for those using the base stuff
>> > too as that is likely only where this will matter most.
>> 
>> I am a bit confused at this explanation of things.
>> 
>> Last I looked the implementation of sysctls allocated the directories
>> independently of the sysctls entries that populated them.
>
> With most sysctls being created using the same kernel/sysctl.c file and
> structure, yes, this was true. With the changes now on linux-next things
> change a bit. The goal is to move sysctls to be registered where they
> are actually defined. But the directory that holds them must be
> registered first.

Why?

If you look at __register_sysctl_table the core registration function
you will see that all directory entries get reduced to simply a path
component.  So they have nothing but their names as input into
the sysctl subsystem.

Further __register_sysctl_table goes in a loop through the path
and does get_subdir to find or create the directory.

Which means that if two places were to register sysctls in
the "fs" directory they would both use the safe "fs" directory
and whoever can first would create it.

Which should mean that worrying about ordering is completely
unnecessary.

What am I missing?


> During the first phase of cleanups now on linux-next
> all filesystem "fs" syscls were moved to be delcared in the kernel's
> fs/ directory. The last part was to register the base "fs" directory.
> For this declareres were added to simplify that and to clarify which
> are base directories:

Unless you are have something playing games with the ctl_table_set
that is all unnecessary.

>
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=ededd3fc701668743087c77ceeeb7490107cc12c
>
> Then, this commit moves the "fs" base to be declared to fs/ as well:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=d0f885a73ec6e05803ce99f279232b3116061ed8
>
> This used early_initcall() for the base for "fs" and that is
> because there are no built-in sysctls for "fs" which need to
> be exposed prior to the init levels.
>
> So after this then order is important. If you are using the same
> init level, the the next thing which will ensure order is the order
> of things being linked, so what order they appear on the Makefile.
> And this is why the base move for the "fs" sysctl directory is kept
> at the top of fs/Makfile:

I really think this is doing a lot of work to ensure ordering that
is not necessary.   I did not see anything in your patches that adds
an ordering requirement.

I don't see anything in the existing code that adds an ordering
requirement.

Eric

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-13 16:04         ` [PATCH] " Eric W. Biederman
@ 2021-12-13 21:29           ` Luis Chamberlain
  0 siblings, 0 replies; 20+ messages in thread
From: Luis Chamberlain @ 2021-12-13 21:29 UTC (permalink / raw)
  To: Eric W. Biederman, Xiaoming Ni
  Cc: Andrew Morton, linux-kernel, viro, keescook, jlayton, bfields,
	yzaikin, wangle6, Joe Perches

On Mon, Dec 13, 2021 at 10:04:48AM -0600, Eric W. Biederman wrote:
> If you look at __register_sysctl_table the core registration function
> you will see that all directory entries get reduced to simply a path
> component.  So they have nothing but their names as input into
> the sysctl subsystem.
> 
> Further __register_sysctl_table goes in a loop through the path
> and does get_subdir to find or create the directory.
> 
> Which means that if two places were to register sysctls in
> the "fs" directory they would both use the safe "fs" directory
> and whoever can first would create it.
> 
> Which should mean that worrying about ordering is completely
> unnecessary.
> 
> What am I missing?

I was being too cautious, sorry for not having dug enough.
Then this makes things much easier. Thanks!

  Luis

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3] sysctl: Add a group of macro functions to initcall the sysctl table of each feature
  2021-12-13  3:31                   ` [PATCH v3] " Xiaoming Ni
@ 2021-12-19 21:32                     ` Luis Chamberlain
  0 siblings, 0 replies; 20+ messages in thread
From: Luis Chamberlain @ 2021-12-19 21:32 UTC (permalink / raw)
  To: Xiaoming Ni
  Cc: linux-kernel, viro, ebiederm, keescook, jlayton, bfields,
	yzaikin, apw, joe, dwaipayanray1, lukas.bulwahn, julia.lawall,
	akpm, wangle6

On Mon, Dec 13, 2021 at 11:31:19AM +0800, Xiaoming Ni wrote:
> To avoid duplicated code, add a set of macro functions to initialize the
> sysctl table for each feature.
> 
> The system initialization process is as follows:

See Eric's comments, we don't need to take care here on order as if the
path does not exist, it will be created. So please updates the commit to
to instead reflect that reality as I was just lazy and assumed the worst
and was just being cautious. If you get to add a few tests on
lib/test_sysctl.c and tools/testing/selftests/sysctl/sysctl.sh  to
verify it would be great too.

  Luis

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2021-12-19 21:33 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-07  1:13 [PATCH] sysctl: Add a group of macro functions to initcall the sysctl table of each feature Xiaoming Ni
2021-12-07  1:38 ` Andrew Morton
2021-12-07  1:50   ` Joe Perches
2021-12-07  6:25     ` Xiaoming Ni
2021-12-07  6:30     ` Julia Lawall
2021-12-07  3:09   ` Xiaoming Ni
2021-12-07 20:18   ` Luis Chamberlain
2021-12-07 21:08     ` Eric W. Biederman
2021-12-07 22:39       ` Luis Chamberlain
2021-12-08  2:10         ` Xiaoming Ni
2021-12-08  2:44           ` Luis Chamberlain
2021-12-08 12:34             ` Xiaoming Ni
2021-12-08 20:05               ` Luis Chamberlain
2021-12-10  8:58                 ` [PATCH v2] " Xiaoming Ni
2021-12-10 17:20                   ` Luis Chamberlain
2021-12-12  9:58                     ` Xiaoming Ni
2021-12-13  3:31                   ` [PATCH v3] " Xiaoming Ni
2021-12-19 21:32                     ` Luis Chamberlain
2021-12-13 16:04         ` [PATCH] " Eric W. Biederman
2021-12-13 21:29           ` Luis Chamberlain

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).