All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
@ 2022-06-17 11:24 Peter Zijlstra
  2022-06-17 11:40 ` Mark Rutland
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Peter Zijlstra @ 2022-06-17 11:24 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: x86, linux-kernel, Josh Poimboeuf, christophe.leroy,
	naveen.n.rao, mbenes


Hi,

I recently noticed that __mcount_loc is 64bit wide, containing absolute
addresses. Since __mcount_loc is a permanent section (not one we drop
after boot), this bloats the kernel memory usage for no real purpose.

The below patch adds __mcount_loc_32 and objtool support to generate it.
This saves, on an x86_64-defconfig + FTRACE, 23975*4 ~= 94K of permanent
storage.

XXX hobbled sorttable for now
XXX compile tested only

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 Makefile                                |    3 ++
 arch/x86/Kconfig                        |    2 -
 include/asm-generic/vmlinux.lds.h       |    1 
 include/linux/ftrace.h                  |    6 -----
 include/linux/ftrace_types.h            |   21 +++++++++++++++++
 include/linux/module.h                  |    3 +-
 kernel/trace/Kconfig                    |   18 +++++++++++++++
 kernel/trace/ftrace.c                   |   38 +++++++++++++++++++++-----------
 scripts/Makefile.lib                    |    1 
 tools/objtool/builtin-check.c           |    2 +
 tools/objtool/check.c                   |   25 ++++++++++-----------
 tools/objtool/include/objtool/builtin.h |    1 
 12 files changed, 90 insertions(+), 31 deletions(-)

--- a/Makefile
+++ b/Makefile
@@ -858,6 +858,9 @@ endif
 ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL
   CC_FLAGS_USING	+= -DCC_USING_NOP_MCOUNT
 endif
+ifdef CONFIG_FTRACE_MCOUNT32_USE_OBJTOOL
+  CC_FLAGS_USING	+= -DCC_USING_NOP_MCOUNT -DCC_USING_MCOUNT_LOC_32
+endif
 ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
   ifdef CONFIG_HAVE_C_RECORDMCOUNT
     BUILD_C_RECORDMCOUNT := y
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -190,7 +190,7 @@ config X86
 	select HAVE_CONTEXT_TRACKING_OFFSTACK	if HAVE_CONTEXT_TRACKING
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_OBJTOOL_MCOUNT		if HAVE_OBJTOOL
-	select HAVE_BUILDTIME_MCOUNT_SORT
+	select HAVE_BUILDTIME_MCOUNT_SORT	if !HAVE_OBJTOOL_MCOUNT
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -172,6 +172,7 @@
 #define MCOUNT_REC()	. = ALIGN(8);				\
 			__start_mcount_loc = .;			\
 			KEEP(*(__mcount_loc))			\
+			KEEP(*(__mcount_loc_32))		\
 			KEEP(*(__patchable_function_entries))	\
 			__stop_mcount_loc = .;			\
 			ftrace_stub_graph = ftrace_stub;	\
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -19,6 +19,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/ftrace_types.h>
 
 #include <asm/ftrace.h>
 
@@ -926,11 +927,6 @@ static inline unsigned long get_lock_par
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
-#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
-#define FTRACE_CALLSITE_SECTION	"__patchable_function_entries"
-#else
-#define FTRACE_CALLSITE_SECTION	"__mcount_loc"
-#endif
 #else
 static inline void ftrace_init(void) { }
 #endif
--- /dev/null
+++ b/include/linux/ftrace_types.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_FTRACE_TYPES_H
+#define _LINUX_FTRACE_TYPES_H
+
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+
+#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
+#define FTRACE_CALLSITE_SECTION	"__patchable_function_entries"
+typedef unsigned long mcount_t;
+#elif CC_USING_MCOUNT_LOC_32
+#define FTRACE_CALLSITE_SECTION	"__mcount_loc_32"
+typedef u32 mcount_t;
+#else
+#define FTRACE_CALLSITE_SECTION	"__mcount_loc"
+typedef unsigned long mcount_t;
+#endif
+
+#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
+
+#endif /* _LINUX_FTRACE_TYPES_H */
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -28,6 +28,7 @@
 #include <linux/srcu.h>
 #include <linux/static_call_types.h>
 #include <linux/cfi.h>
+#include <linux/ftrace_types.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -493,7 +494,7 @@ struct module {
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 	unsigned int num_ftrace_callsites;
-	unsigned long *ftrace_callsites;
+	mcount_t *ftrace_callsites;
 #endif
 #ifdef CONFIG_KPROBES
 	void *kprobes_text_start;
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -717,16 +717,33 @@ config FTRACE_MCOUNT_USE_PATCHABLE_FUNCT
 	bool
 	depends on FTRACE_MCOUNT_RECORD
 
+#
+# mcount preference:
+#  * objtool --mcount32
+#  * gcc -mrecord-mcount
+#  * objtool --mcount
+#  * recordmcount
+#
+
+config FTRACE_MCOUNT32_USE_OBJTOOL
+	def_bool y
+	depends on HAVE_OBJTOOL_MCOUNT
+	depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
+	depends on FTRACE_MCOUNT_RECORD
+	select OBJTOOL
+
 config FTRACE_MCOUNT_USE_CC
 	def_bool y
 	depends on $(cc-option,-mrecord-mcount)
 	depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
+	depends on !FTRACE_MCOUNT32_USE_OBJTOOL
 	depends on FTRACE_MCOUNT_RECORD
 
 config FTRACE_MCOUNT_USE_OBJTOOL
 	def_bool y
 	depends on HAVE_OBJTOOL_MCOUNT
 	depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
+	depends on !FTRACE_MCOUNT32_USE_OBJTOOL
 	depends on !FTRACE_MCOUNT_USE_CC
 	depends on FTRACE_MCOUNT_RECORD
 	select OBJTOOL
@@ -735,6 +752,7 @@ config FTRACE_MCOUNT_USE_RECORDMCOUNT
 	def_bool y
 	depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
 	depends on !FTRACE_MCOUNT_USE_CC
+	depends on !FTRACE_MCOUNT32_USE_OBJTOOL
 	depends on !FTRACE_MCOUNT_USE_OBJTOOL
 	depends on FTRACE_MCOUNT_RECORD
 
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6544,18 +6544,26 @@ static __init int ftrace_init_dyn_tracef
 
 static int ftrace_cmp_ips(const void *a, const void *b)
 {
-	const unsigned long *ipa = a;
-	const unsigned long *ipb = b;
+	const mcount_t *_a = a;
+	const mcount_t *_b = b;
 
-	if (*ipa > *ipb)
+#ifdef CC_USING_MCOUNT_LOC_32
+	const unsigned long ipa = (unsigned long)_a + *_a;
+	const unsigned long ipb = (unsigned long)_b + *_b;
+#else
+	const unsigned long ipa = *_a;
+	const unsigned long ipb = *_b;
+#endif
+
+	if (ipa > ipb)
 		return 1;
-	if (*ipa < *ipb)
+	if (ipa < ipb)
 		return -1;
 	return 0;
 }
 
 #ifdef CONFIG_FTRACE_SORT_STARTUP_TEST
-static void test_is_sorted(unsigned long *start, unsigned long count)
+static void test_is_sorted(mcount_t *start, unsigned long count)
 {
 	int i;
 
@@ -6570,23 +6578,23 @@ static void test_is_sorted(unsigned long
 		pr_info("ftrace section at %px sorted properly\n", start);
 }
 #else
-static void test_is_sorted(unsigned long *start, unsigned long count)
+static void test_is_sorted(mcount_t *start, unsigned long count)
 {
 }
 #endif
 
 static int ftrace_process_locs(struct module *mod,
-			       unsigned long *start,
-			       unsigned long *end)
+			       mcount_t *start,
+			       mcount_t *end)
 {
 	struct ftrace_page *start_pg;
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec;
 	unsigned long count;
-	unsigned long *p;
 	unsigned long addr;
 	unsigned long flags = 0; /* Shut up gcc */
 	int ret = -ENOMEM;
+	mcount_t *p;
 
 	count = end - start;
 
@@ -6637,7 +6645,13 @@ static int ftrace_process_locs(struct mo
 	pg = start_pg;
 	while (p < end) {
 		unsigned long end_offset;
-		addr = ftrace_call_adjust(*p++);
+#ifdef CC_USING_MCOUNT_LOC_32
+		addr = (unsigned long)p + *p;
+		p++;
+#else
+		addr = *p++;
+#endif
+		addr = ftrace_call_adjust(addr);
 		/*
 		 * Some architecture linkers will pad between
 		 * the different mcount_loc sections of different
@@ -7280,8 +7294,8 @@ int __init __weak ftrace_dyn_arch_init(v
 
 void __init ftrace_init(void)
 {
-	extern unsigned long __start_mcount_loc[];
-	extern unsigned long __stop_mcount_loc[];
+	extern mcount_t __start_mcount_loc[];
+	extern mcount_t __stop_mcount_loc[];
 	unsigned long count, flags;
 	int ret;
 
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -234,6 +234,7 @@ objtool_args =								\
 	$(if $(CONFIG_HAVE_NOINSTR_HACK), --hacks=noinstr)		\
 	$(if $(CONFIG_X86_KERNEL_IBT), --ibt)				\
 	$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)		\
+	$(if $(CONFIG_FTRACE_MCOUNT32_USE_OBJTOOL), --mcount32)		\
 	$(if $(CONFIG_UNWINDER_ORC), --orc)				\
 	$(if $(CONFIG_RETPOLINE), --retpoline)				\
 	$(if $(CONFIG_SLS), --sls)					\
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -65,6 +65,7 @@ const struct option check_options[] = {
 	OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr", "patch toolchain bugs/limitations", parse_hacks),
 	OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
 	OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
+	OPT_BOOLEAN(0,   "mcount32", &opts.mcount32, "annotate mcount/fentry calls for ftrace"),
 	OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
 	OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
 	OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
@@ -120,6 +121,7 @@ static bool opts_valid(void)
 	    opts.hack_noinstr		||
 	    opts.ibt			||
 	    opts.mcount			||
+	    opts.mcount32		||
 	    opts.noinstr		||
 	    opts.orc			||
 	    opts.retpoline		||
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -803,15 +803,16 @@ static int create_ibt_endbr_seal_section
 
 static int create_mcount_loc_sections(struct objtool_file *file)
 {
-	struct section *sec;
-	unsigned long *loc;
+	const char *secname = opts.mcount32 ? "__mcount_loc_32" : "__mcount_loc";
+	unsigned int entsize = opts.mcount32 ? sizeof(u32) : sizeof(u64);
 	struct instruction *insn;
+	struct section *sec;
 	int idx;
 
-	sec = find_section_by_name(file->elf, "__mcount_loc");
+	sec = find_section_by_name(file->elf, secname);
 	if (sec) {
 		INIT_LIST_HEAD(&file->mcount_loc_list);
-		WARN("file already has __mcount_loc section, skipping");
+		WARN("file already has %s section, skipping", secname);
 		return 0;
 	}
 
@@ -822,19 +823,18 @@ static int create_mcount_loc_sections(st
 	list_for_each_entry(insn, &file->mcount_loc_list, call_node)
 		idx++;
 
-	sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx);
+	sec = elf_create_section(file->elf, secname, 0, entsize, idx);
 	if (!sec)
 		return -1;
 
 	idx = 0;
 	list_for_each_entry(insn, &file->mcount_loc_list, call_node) {
-
-		loc = (unsigned long *)sec->data->d_buf + idx;
-		memset(loc, 0, sizeof(unsigned long));
+		void *loc = sec->data->d_buf + idx * entsize;
+		memset(loc, 0, entsize);
 
 		if (elf_add_reloc_to_insn(file->elf, sec,
-					  idx * sizeof(unsigned long),
-					  R_X86_64_64,
+					  idx * entsize,
+					  opts.mcount32 ? R_X86_64_PC32 : R_X86_64_64,
 					  insn->sec, insn->offset))
 			return -1;
 
@@ -1174,7 +1174,7 @@ static void annotate_call_site(struct ob
 		return;
 	}
 
-	if (opts.mcount && sym->fentry) {
+	if ((opts.mcount || opts.mcount32) && sym->fentry) {
 		if (sibling)
 			WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset);
 
@@ -3827,6 +3827,7 @@ static int validate_ibt(struct objtool_f
 		    !strcmp(sec->name, "__ex_table")			||
 		    !strcmp(sec->name, "__jump_table")			||
 		    !strcmp(sec->name, "__mcount_loc")			||
+		    !strcmp(sec->name, "__mcount_loc_32")		||
 		    !strcmp(sec->name, "__tracepoints"))
 			continue;
 
@@ -3974,7 +3975,7 @@ int check(struct objtool_file *file)
 		warnings += ret;
 	}
 
-	if (opts.mcount) {
+	if (opts.mcount || opts.mcount32) {
 		ret = create_mcount_loc_sections(file);
 		if (ret < 0)
 			goto out;
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -16,6 +16,7 @@ struct opts {
 	bool hack_noinstr;
 	bool ibt;
 	bool mcount;
+	bool mcount32;
 	bool noinstr;
 	bool orc;
 	bool retpoline;

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-17 11:24 [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc Peter Zijlstra
@ 2022-06-17 11:40 ` Mark Rutland
  2022-06-21 16:27   ` Ard Biesheuvel
  2022-06-22 14:54   ` Steven Rostedt
  2022-06-17 11:44 ` Christophe Leroy
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 14+ messages in thread
From: Mark Rutland @ 2022-06-17 11:40 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Steven Rostedt, x86, linux-kernel, Josh Poimboeuf,
	christophe.leroy, naveen.n.rao, mbenes, Nathan Chancellor,
	Nick Desaulniers, Ard Biesheuvel

On Fri, Jun 17, 2022 at 01:24:53PM +0200, Peter Zijlstra wrote:
> 
> Hi,
> 
> I recently noticed that __mcount_loc is 64bit wide, containing absolute
> addresses. Since __mcount_loc is a permanent section (not one we drop
> after boot), this bloats the kernel memory usage for no real purpose.
> 
> The below patch adds __mcount_loc_32 and objtool support to generate it.
> This saves, on an x86_64-defconfig + FTRACE, 23975*4 ~= 94K of permanent
> storage.

We have a similar issue on arm64, which is exacerbated by needing ABS64
relocations (24 bytes per entry!) adding significant bloat when FTRACE is
enabled.

It'd be really nice if going forwards compilers could expose an option to
generate PC32/PREL32 entries directly for this.

Mark.

> 
> XXX hobbled sorttable for now
> XXX compile tested only
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  Makefile                                |    3 ++
>  arch/x86/Kconfig                        |    2 -
>  include/asm-generic/vmlinux.lds.h       |    1 
>  include/linux/ftrace.h                  |    6 -----
>  include/linux/ftrace_types.h            |   21 +++++++++++++++++
>  include/linux/module.h                  |    3 +-
>  kernel/trace/Kconfig                    |   18 +++++++++++++++
>  kernel/trace/ftrace.c                   |   38 +++++++++++++++++++++-----------
>  scripts/Makefile.lib                    |    1 
>  tools/objtool/builtin-check.c           |    2 +
>  tools/objtool/check.c                   |   25 ++++++++++-----------
>  tools/objtool/include/objtool/builtin.h |    1 
>  12 files changed, 90 insertions(+), 31 deletions(-)
> 
> --- a/Makefile
> +++ b/Makefile
> @@ -858,6 +858,9 @@ endif
>  ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL
>    CC_FLAGS_USING	+= -DCC_USING_NOP_MCOUNT
>  endif
> +ifdef CONFIG_FTRACE_MCOUNT32_USE_OBJTOOL
> +  CC_FLAGS_USING	+= -DCC_USING_NOP_MCOUNT -DCC_USING_MCOUNT_LOC_32
> +endif
>  ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
>    ifdef CONFIG_HAVE_C_RECORDMCOUNT
>      BUILD_C_RECORDMCOUNT := y
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -190,7 +190,7 @@ config X86
>  	select HAVE_CONTEXT_TRACKING_OFFSTACK	if HAVE_CONTEXT_TRACKING
>  	select HAVE_C_RECORDMCOUNT
>  	select HAVE_OBJTOOL_MCOUNT		if HAVE_OBJTOOL
> -	select HAVE_BUILDTIME_MCOUNT_SORT
> +	select HAVE_BUILDTIME_MCOUNT_SORT	if !HAVE_OBJTOOL_MCOUNT
>  	select HAVE_DEBUG_KMEMLEAK
>  	select HAVE_DMA_CONTIGUOUS
>  	select HAVE_DYNAMIC_FTRACE
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -172,6 +172,7 @@
>  #define MCOUNT_REC()	. = ALIGN(8);				\
>  			__start_mcount_loc = .;			\
>  			KEEP(*(__mcount_loc))			\
> +			KEEP(*(__mcount_loc_32))		\
>  			KEEP(*(__patchable_function_entries))	\
>  			__stop_mcount_loc = .;			\
>  			ftrace_stub_graph = ftrace_stub;	\
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -19,6 +19,7 @@
>  #include <linux/types.h>
>  #include <linux/init.h>
>  #include <linux/fs.h>
> +#include <linux/ftrace_types.h>
>  
>  #include <asm/ftrace.h>
>  
> @@ -926,11 +927,6 @@ static inline unsigned long get_lock_par
>  
>  #ifdef CONFIG_FTRACE_MCOUNT_RECORD
>  extern void ftrace_init(void);
> -#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
> -#define FTRACE_CALLSITE_SECTION	"__patchable_function_entries"
> -#else
> -#define FTRACE_CALLSITE_SECTION	"__mcount_loc"
> -#endif
>  #else
>  static inline void ftrace_init(void) { }
>  #endif
> --- /dev/null
> +++ b/include/linux/ftrace_types.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _LINUX_FTRACE_TYPES_H
> +#define _LINUX_FTRACE_TYPES_H
> +
> +#ifdef CONFIG_FTRACE_MCOUNT_RECORD
> +
> +#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
> +#define FTRACE_CALLSITE_SECTION	"__patchable_function_entries"
> +typedef unsigned long mcount_t;
> +#elif CC_USING_MCOUNT_LOC_32
> +#define FTRACE_CALLSITE_SECTION	"__mcount_loc_32"
> +typedef u32 mcount_t;
> +#else
> +#define FTRACE_CALLSITE_SECTION	"__mcount_loc"
> +typedef unsigned long mcount_t;
> +#endif
> +
> +#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
> +
> +#endif /* _LINUX_FTRACE_TYPES_H */
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -28,6 +28,7 @@
>  #include <linux/srcu.h>
>  #include <linux/static_call_types.h>
>  #include <linux/cfi.h>
> +#include <linux/ftrace_types.h>
>  
>  #include <linux/percpu.h>
>  #include <asm/module.h>
> @@ -493,7 +494,7 @@ struct module {
>  #endif
>  #ifdef CONFIG_FTRACE_MCOUNT_RECORD
>  	unsigned int num_ftrace_callsites;
> -	unsigned long *ftrace_callsites;
> +	mcount_t *ftrace_callsites;
>  #endif
>  #ifdef CONFIG_KPROBES
>  	void *kprobes_text_start;
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -717,16 +717,33 @@ config FTRACE_MCOUNT_USE_PATCHABLE_FUNCT
>  	bool
>  	depends on FTRACE_MCOUNT_RECORD
>  
> +#
> +# mcount preference:
> +#  * objtool --mcount32
> +#  * gcc -mrecord-mcount
> +#  * objtool --mcount
> +#  * recordmcount
> +#
> +
> +config FTRACE_MCOUNT32_USE_OBJTOOL
> +	def_bool y
> +	depends on HAVE_OBJTOOL_MCOUNT
> +	depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
> +	depends on FTRACE_MCOUNT_RECORD
> +	select OBJTOOL
> +
>  config FTRACE_MCOUNT_USE_CC
>  	def_bool y
>  	depends on $(cc-option,-mrecord-mcount)
>  	depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
> +	depends on !FTRACE_MCOUNT32_USE_OBJTOOL
>  	depends on FTRACE_MCOUNT_RECORD
>  
>  config FTRACE_MCOUNT_USE_OBJTOOL
>  	def_bool y
>  	depends on HAVE_OBJTOOL_MCOUNT
>  	depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
> +	depends on !FTRACE_MCOUNT32_USE_OBJTOOL
>  	depends on !FTRACE_MCOUNT_USE_CC
>  	depends on FTRACE_MCOUNT_RECORD
>  	select OBJTOOL
> @@ -735,6 +752,7 @@ config FTRACE_MCOUNT_USE_RECORDMCOUNT
>  	def_bool y
>  	depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
>  	depends on !FTRACE_MCOUNT_USE_CC
> +	depends on !FTRACE_MCOUNT32_USE_OBJTOOL
>  	depends on !FTRACE_MCOUNT_USE_OBJTOOL
>  	depends on FTRACE_MCOUNT_RECORD
>  
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -6544,18 +6544,26 @@ static __init int ftrace_init_dyn_tracef
>  
>  static int ftrace_cmp_ips(const void *a, const void *b)
>  {
> -	const unsigned long *ipa = a;
> -	const unsigned long *ipb = b;
> +	const mcount_t *_a = a;
> +	const mcount_t *_b = b;
>  
> -	if (*ipa > *ipb)
> +#ifdef CC_USING_MCOUNT_LOC_32
> +	const unsigned long ipa = (unsigned long)_a + *_a;
> +	const unsigned long ipb = (unsigned long)_b + *_b;
> +#else
> +	const unsigned long ipa = *_a;
> +	const unsigned long ipb = *_b;
> +#endif
> +
> +	if (ipa > ipb)
>  		return 1;
> -	if (*ipa < *ipb)
> +	if (ipa < ipb)
>  		return -1;
>  	return 0;
>  }
>  
>  #ifdef CONFIG_FTRACE_SORT_STARTUP_TEST
> -static void test_is_sorted(unsigned long *start, unsigned long count)
> +static void test_is_sorted(mcount_t *start, unsigned long count)
>  {
>  	int i;
>  
> @@ -6570,23 +6578,23 @@ static void test_is_sorted(unsigned long
>  		pr_info("ftrace section at %px sorted properly\n", start);
>  }
>  #else
> -static void test_is_sorted(unsigned long *start, unsigned long count)
> +static void test_is_sorted(mcount_t *start, unsigned long count)
>  {
>  }
>  #endif
>  
>  static int ftrace_process_locs(struct module *mod,
> -			       unsigned long *start,
> -			       unsigned long *end)
> +			       mcount_t *start,
> +			       mcount_t *end)
>  {
>  	struct ftrace_page *start_pg;
>  	struct ftrace_page *pg;
>  	struct dyn_ftrace *rec;
>  	unsigned long count;
> -	unsigned long *p;
>  	unsigned long addr;
>  	unsigned long flags = 0; /* Shut up gcc */
>  	int ret = -ENOMEM;
> +	mcount_t *p;
>  
>  	count = end - start;
>  
> @@ -6637,7 +6645,13 @@ static int ftrace_process_locs(struct mo
>  	pg = start_pg;
>  	while (p < end) {
>  		unsigned long end_offset;
> -		addr = ftrace_call_adjust(*p++);
> +#ifdef CC_USING_MCOUNT_LOC_32
> +		addr = (unsigned long)p + *p;
> +		p++;
> +#else
> +		addr = *p++;
> +#endif
> +		addr = ftrace_call_adjust(addr);
>  		/*
>  		 * Some architecture linkers will pad between
>  		 * the different mcount_loc sections of different
> @@ -7280,8 +7294,8 @@ int __init __weak ftrace_dyn_arch_init(v
>  
>  void __init ftrace_init(void)
>  {
> -	extern unsigned long __start_mcount_loc[];
> -	extern unsigned long __stop_mcount_loc[];
> +	extern mcount_t __start_mcount_loc[];
> +	extern mcount_t __stop_mcount_loc[];
>  	unsigned long count, flags;
>  	int ret;
>  
> --- a/scripts/Makefile.lib
> +++ b/scripts/Makefile.lib
> @@ -234,6 +234,7 @@ objtool_args =								\
>  	$(if $(CONFIG_HAVE_NOINSTR_HACK), --hacks=noinstr)		\
>  	$(if $(CONFIG_X86_KERNEL_IBT), --ibt)				\
>  	$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount)		\
> +	$(if $(CONFIG_FTRACE_MCOUNT32_USE_OBJTOOL), --mcount32)		\
>  	$(if $(CONFIG_UNWINDER_ORC), --orc)				\
>  	$(if $(CONFIG_RETPOLINE), --retpoline)				\
>  	$(if $(CONFIG_SLS), --sls)					\
> --- a/tools/objtool/builtin-check.c
> +++ b/tools/objtool/builtin-check.c
> @@ -65,6 +65,7 @@ const struct option check_options[] = {
>  	OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr", "patch toolchain bugs/limitations", parse_hacks),
>  	OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
>  	OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
> +	OPT_BOOLEAN(0,   "mcount32", &opts.mcount32, "annotate mcount/fentry calls for ftrace"),
>  	OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
>  	OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
>  	OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
> @@ -120,6 +121,7 @@ static bool opts_valid(void)
>  	    opts.hack_noinstr		||
>  	    opts.ibt			||
>  	    opts.mcount			||
> +	    opts.mcount32		||
>  	    opts.noinstr		||
>  	    opts.orc			||
>  	    opts.retpoline		||
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -803,15 +803,16 @@ static int create_ibt_endbr_seal_section
>  
>  static int create_mcount_loc_sections(struct objtool_file *file)
>  {
> -	struct section *sec;
> -	unsigned long *loc;
> +	const char *secname = opts.mcount32 ? "__mcount_loc_32" : "__mcount_loc";
> +	unsigned int entsize = opts.mcount32 ? sizeof(u32) : sizeof(u64);
>  	struct instruction *insn;
> +	struct section *sec;
>  	int idx;
>  
> -	sec = find_section_by_name(file->elf, "__mcount_loc");
> +	sec = find_section_by_name(file->elf, secname);
>  	if (sec) {
>  		INIT_LIST_HEAD(&file->mcount_loc_list);
> -		WARN("file already has __mcount_loc section, skipping");
> +		WARN("file already has %s section, skipping", secname);
>  		return 0;
>  	}
>  
> @@ -822,19 +823,18 @@ static int create_mcount_loc_sections(st
>  	list_for_each_entry(insn, &file->mcount_loc_list, call_node)
>  		idx++;
>  
> -	sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx);
> +	sec = elf_create_section(file->elf, secname, 0, entsize, idx);
>  	if (!sec)
>  		return -1;
>  
>  	idx = 0;
>  	list_for_each_entry(insn, &file->mcount_loc_list, call_node) {
> -
> -		loc = (unsigned long *)sec->data->d_buf + idx;
> -		memset(loc, 0, sizeof(unsigned long));
> +		void *loc = sec->data->d_buf + idx * entsize;
> +		memset(loc, 0, entsize);
>  
>  		if (elf_add_reloc_to_insn(file->elf, sec,
> -					  idx * sizeof(unsigned long),
> -					  R_X86_64_64,
> +					  idx * entsize,
> +					  opts.mcount32 ? R_X86_64_PC32 : R_X86_64_64,
>  					  insn->sec, insn->offset))
>  			return -1;
>  
> @@ -1174,7 +1174,7 @@ static void annotate_call_site(struct ob
>  		return;
>  	}
>  
> -	if (opts.mcount && sym->fentry) {
> +	if ((opts.mcount || opts.mcount32) && sym->fentry) {
>  		if (sibling)
>  			WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset);
>  
> @@ -3827,6 +3827,7 @@ static int validate_ibt(struct objtool_f
>  		    !strcmp(sec->name, "__ex_table")			||
>  		    !strcmp(sec->name, "__jump_table")			||
>  		    !strcmp(sec->name, "__mcount_loc")			||
> +		    !strcmp(sec->name, "__mcount_loc_32")		||
>  		    !strcmp(sec->name, "__tracepoints"))
>  			continue;
>  
> @@ -3974,7 +3975,7 @@ int check(struct objtool_file *file)
>  		warnings += ret;
>  	}
>  
> -	if (opts.mcount) {
> +	if (opts.mcount || opts.mcount32) {
>  		ret = create_mcount_loc_sections(file);
>  		if (ret < 0)
>  			goto out;
> --- a/tools/objtool/include/objtool/builtin.h
> +++ b/tools/objtool/include/objtool/builtin.h
> @@ -16,6 +16,7 @@ struct opts {
>  	bool hack_noinstr;
>  	bool ibt;
>  	bool mcount;
> +	bool mcount32;
>  	bool noinstr;
>  	bool orc;
>  	bool retpoline;

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-17 11:24 [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc Peter Zijlstra
  2022-06-17 11:40 ` Mark Rutland
@ 2022-06-17 11:44 ` Christophe Leroy
  2022-06-17 12:06   ` Peter Zijlstra
  2022-06-17 20:11 ` Josh Poimboeuf
  2022-06-22 14:50 ` Steven Rostedt
  3 siblings, 1 reply; 14+ messages in thread
From: Christophe Leroy @ 2022-06-17 11:44 UTC (permalink / raw)
  To: Peter Zijlstra, Steven Rostedt
  Cc: x86, linux-kernel, Josh Poimboeuf, naveen.n.rao, mbenes



Le 17/06/2022 à 13:24, Peter Zijlstra a écrit :
> 
> Hi,
> 
> I recently noticed that __mcount_loc is 64bit wide, containing absolute
> addresses. Since __mcount_loc is a permanent section (not one we drop
> after boot), this bloats the kernel memory usage for no real purpose.

I guess you mean it is 64bit wide on 64 bits architectures ? Because it 
seems to be defined as 'long' so will be 32bit on 32 bits architectures ?

Christophe

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-17 11:44 ` Christophe Leroy
@ 2022-06-17 12:06   ` Peter Zijlstra
  0 siblings, 0 replies; 14+ messages in thread
From: Peter Zijlstra @ 2022-06-17 12:06 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Steven Rostedt, x86, linux-kernel, Josh Poimboeuf, naveen.n.rao, mbenes

On Fri, Jun 17, 2022 at 11:44:49AM +0000, Christophe Leroy wrote:
> 
> 
> Le 17/06/2022 à 13:24, Peter Zijlstra a écrit :
> > 
> > Hi,
> > 
> > I recently noticed that __mcount_loc is 64bit wide, containing absolute
> > addresses. Since __mcount_loc is a permanent section (not one we drop
> > after boot), this bloats the kernel memory usage for no real purpose.
> 
> I guess you mean it is 64bit wide on 64 bits architectures ? Because it 
> seems to be defined as 'long' so will be 32bit on 32 bits architectures ?

Right, it's an absolute address in the native size. Clearly I'm somewhat
64bit biassed :-)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-17 11:24 [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc Peter Zijlstra
  2022-06-17 11:40 ` Mark Rutland
  2022-06-17 11:44 ` Christophe Leroy
@ 2022-06-17 20:11 ` Josh Poimboeuf
  2022-06-20  7:35   ` Peter Zijlstra
  2022-06-22 14:50 ` Steven Rostedt
  3 siblings, 1 reply; 14+ messages in thread
From: Josh Poimboeuf @ 2022-06-17 20:11 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Steven Rostedt, x86, linux-kernel, Josh Poimboeuf,
	christophe.leroy, naveen.n.rao, mbenes

On Fri, Jun 17, 2022 at 01:24:53PM +0200, Peter Zijlstra wrote:
> 
> Hi,
> 
> I recently noticed that __mcount_loc is 64bit wide, containing absolute
> addresses. Since __mcount_loc is a permanent section (not one we drop
> after boot), this bloats the kernel memory usage for no real purpose.
> 
> The below patch adds __mcount_loc_32 and objtool support to generate it.
> This saves, on an x86_64-defconfig + FTRACE, 23975*4 ~= 94K of permanent
> storage.
> 
> XXX hobbled sorttable for now
> XXX compile tested only
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Nice!

Some nits:

- No need for renaming the section, it can still be called
  '__mcount_loc' regardless?

- No need for a new FTRACE_MCOUNT32_USE_OBJTOOL config option or
  '--mcount32' cmdline option, just change the old ones to be pc32?

- change "32" to "PC32": CC_USING_MCOUNT_LOC_PC32

That will shrink this patch down quite a bit.

-- 
Josh

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-17 20:11 ` Josh Poimboeuf
@ 2022-06-20  7:35   ` Peter Zijlstra
  2022-06-20  7:42     ` Christophe Leroy
  0 siblings, 1 reply; 14+ messages in thread
From: Peter Zijlstra @ 2022-06-20  7:35 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Steven Rostedt, x86, linux-kernel, Josh Poimboeuf,
	christophe.leroy, naveen.n.rao, mbenes

On Fri, Jun 17, 2022 at 01:11:42PM -0700, Josh Poimboeuf wrote:
> On Fri, Jun 17, 2022 at 01:24:53PM +0200, Peter Zijlstra wrote:
> > 
> > Hi,
> > 
> > I recently noticed that __mcount_loc is 64bit wide, containing absolute
> > addresses. Since __mcount_loc is a permanent section (not one we drop
> > after boot), this bloats the kernel memory usage for no real purpose.
> > 
> > The below patch adds __mcount_loc_32 and objtool support to generate it.
> > This saves, on an x86_64-defconfig + FTRACE, 23975*4 ~= 94K of permanent
> > storage.
> > 
> > XXX hobbled sorttable for now
> > XXX compile tested only
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> 
> Nice!
> 
> Some nits:
> 
> - No need for renaming the section, it can still be called
>   '__mcount_loc' regardless?

I wanted to avoid mixing them by accidental build funnies, also, it
having a different name makes it easier to check what's what with a
simple readelf.

> - No need for a new FTRACE_MCOUNT32_USE_OBJTOOL config option or
>   '--mcount32' cmdline option, just change the old ones to be pc32?

Right, so I did that because of the pending --mcount patches for Power.
If Christophe is on board with that, sure, can do.

> - change "32" to "PC32": CC_USING_MCOUNT_LOC_PC32

Right.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-20  7:35   ` Peter Zijlstra
@ 2022-06-20  7:42     ` Christophe Leroy
  0 siblings, 0 replies; 14+ messages in thread
From: Christophe Leroy @ 2022-06-20  7:42 UTC (permalink / raw)
  To: Peter Zijlstra, Josh Poimboeuf
  Cc: Steven Rostedt, x86, linux-kernel, Josh Poimboeuf, naveen.n.rao, mbenes



Le 20/06/2022 à 09:35, Peter Zijlstra a écrit :
> On Fri, Jun 17, 2022 at 01:11:42PM -0700, Josh Poimboeuf wrote:
>> On Fri, Jun 17, 2022 at 01:24:53PM +0200, Peter Zijlstra wrote:
>>>
>>> Hi,
>>>
>>> I recently noticed that __mcount_loc is 64bit wide, containing absolute
>>> addresses. Since __mcount_loc is a permanent section (not one we drop
>>> after boot), this bloats the kernel memory usage for no real purpose.
>>>
>>> The below patch adds __mcount_loc_32 and objtool support to generate it.
>>> This saves, on an x86_64-defconfig + FTRACE, 23975*4 ~= 94K of permanent
>>> storage.
>>>
>>> XXX hobbled sorttable for now
>>> XXX compile tested only
>>>
>>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>>
>> Nice!
>>
>> Some nits:
>>
>> - No need for renaming the section, it can still be called
>>    '__mcount_loc' regardless?
> 
> I wanted to avoid mixing them by accidental build funnies, also, it
> having a different name makes it easier to check what's what with a
> simple readelf.
> 
>> - No need for a new FTRACE_MCOUNT32_USE_OBJTOOL config option or
>>    '--mcount32' cmdline option, just change the old ones to be pc32?
> 
> Right, so I did that because of the pending --mcount patches for Power.
> If Christophe is on board with that, sure, can do.

Yes, on 32 bits platforms it makes no difference, so lets convert all 
platforms to PC32, it's always easier to have only one solution for all.

> 
>> - change "32" to "PC32": CC_USING_MCOUNT_LOC_PC32
> 
> Right.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-17 11:40 ` Mark Rutland
@ 2022-06-21 16:27   ` Ard Biesheuvel
  2022-06-23 14:19     ` Mark Rutland
  2022-06-22 14:54   ` Steven Rostedt
  1 sibling, 1 reply; 14+ messages in thread
From: Ard Biesheuvel @ 2022-06-21 16:27 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Peter Zijlstra, Steven Rostedt, X86 ML,
	Linux Kernel Mailing List, Josh Poimboeuf, Christophe Leroy,
	Naveen N . Rao, Miroslav Benes, Nathan Chancellor,
	Nick Desaulniers

On Fri, 17 Jun 2022 at 13:40, Mark Rutland <mark.rutland@arm.com> wrote:
>
> On Fri, Jun 17, 2022 at 01:24:53PM +0200, Peter Zijlstra wrote:
> >
> > Hi,
> >
> > I recently noticed that __mcount_loc is 64bit wide, containing absolute
> > addresses. Since __mcount_loc is a permanent section (not one we drop
> > after boot), this bloats the kernel memory usage for no real purpose.
> >
> > The below patch adds __mcount_loc_32 and objtool support to generate it.
> > This saves, on an x86_64-defconfig + FTRACE, 23975*4 ~= 94K of permanent
> > storage.
>
> We have a similar issue on arm64, which is exacerbated by needing ABS64
> relocations (24 bytes per entry!) adding significant bloat when FTRACE is
> enabled.
>
> It'd be really nice if going forwards compilers could expose an option to
> generate PC32/PREL32 entries directly for this.
>

As opposed to generating absolute references today? Or as opposed to
having to rely on our own tooling?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-17 11:24 [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc Peter Zijlstra
                   ` (2 preceding siblings ...)
  2022-06-17 20:11 ` Josh Poimboeuf
@ 2022-06-22 14:50 ` Steven Rostedt
  2022-06-24  7:16   ` Peter Zijlstra
  3 siblings, 1 reply; 14+ messages in thread
From: Steven Rostedt @ 2022-06-22 14:50 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: x86, linux-kernel, Josh Poimboeuf, christophe.leroy,
	naveen.n.rao, mbenes

On Fri, 17 Jun 2022 13:24:53 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> Hi,
> 
> I recently noticed that __mcount_loc is 64bit wide, containing absolute
> addresses. Since __mcount_loc is a permanent section (not one we drop
> after boot), this bloats the kernel memory usage for no real purpose.

Wait, it's not dropped? Nothing uses it after it is read. It should be
dropped when init data is dropped.

From include/asm-generic/vmlinux.lds.h

/* init and exit section handling */
#define INIT_DATA                                                       \
        KEEP(*(SORT(___kentry+*)))                                      \
        *(.init.data init.data.*)                                       \
        MEM_DISCARD(init.data*)                                         \
        KERNEL_CTORS()                                                  \
        MCOUNT_REC()                                                    \  <<----
        *(.init.rodata .init.rodata.*)                                  \
        FTRACE_EVENTS()                                                 \
        TRACE_SYSCALLS()                                                \
        KPROBE_BLACKLIST()                                              \
        ERROR_INJECT_WHITELIST()                                        \
        MEM_DISCARD(init.rodata)                                        \


So it should be dropped after boot.

-- Steve

> 
> The below patch adds __mcount_loc_32 and objtool support to generate it.
> This saves, on an x86_64-defconfig + FTRACE, 23975*4 ~= 94K of permanent
> storage.
> 
> XXX hobbled sorttable for now
> XXX compile tested only
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-17 11:40 ` Mark Rutland
  2022-06-21 16:27   ` Ard Biesheuvel
@ 2022-06-22 14:54   ` Steven Rostedt
  2022-06-23 14:21     ` Mark Rutland
  1 sibling, 1 reply; 14+ messages in thread
From: Steven Rostedt @ 2022-06-22 14:54 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Peter Zijlstra, x86, linux-kernel, Josh Poimboeuf,
	christophe.leroy, naveen.n.rao, mbenes, Nathan Chancellor,
	Nick Desaulniers, Ard Biesheuvel

On Fri, 17 Jun 2022 12:40:00 +0100
Mark Rutland <mark.rutland@arm.com> wrote:

> We have a similar issue on arm64, which is exacerbated by needing ABS64
> relocations (24 bytes per entry!) adding significant bloat when FTRACE is
> enabled.

I have patches that bring down the size quite a bit. The mcount loc is
read into the dyn_rec, which has two longs (the second long is the
flags that only use 32 bits, but is a long to make it aligned, as a 64
bit word followed by a 32bit word just added 32 bits of padding to make
it an array).

The patches make it into two ints (which bring down the size for 64 bit
machines). The lists are broken up into blocks, and what I do is put
the top 32 bits of a word into the top of the block, and make sure that
they are the same among all the entries in the block.

I guess its time to bring this back alive.

-- Steve

> 
> It'd be really nice if going forwards compilers could expose an option to
> generate PC32/PREL32 entries directly for this.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-21 16:27   ` Ard Biesheuvel
@ 2022-06-23 14:19     ` Mark Rutland
  0 siblings, 0 replies; 14+ messages in thread
From: Mark Rutland @ 2022-06-23 14:19 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Peter Zijlstra, Steven Rostedt, X86 ML,
	Linux Kernel Mailing List, Josh Poimboeuf, Christophe Leroy,
	Naveen N . Rao, Miroslav Benes, Nathan Chancellor,
	Nick Desaulniers

On Tue, Jun 21, 2022 at 06:27:39PM +0200, Ard Biesheuvel wrote:
> On Fri, 17 Jun 2022 at 13:40, Mark Rutland <mark.rutland@arm.com> wrote:
> >
> > On Fri, Jun 17, 2022 at 01:24:53PM +0200, Peter Zijlstra wrote:
> > >
> > > Hi,
> > >
> > > I recently noticed that __mcount_loc is 64bit wide, containing absolute
> > > addresses. Since __mcount_loc is a permanent section (not one we drop
> > > after boot), this bloats the kernel memory usage for no real purpose.
> > >
> > > The below patch adds __mcount_loc_32 and objtool support to generate it.
> > > This saves, on an x86_64-defconfig + FTRACE, 23975*4 ~= 94K of permanent
> > > storage.
> >
> > We have a similar issue on arm64, which is exacerbated by needing ABS64
> > relocations (24 bytes per entry!) adding significant bloat when FTRACE is
> > enabled.
> >
> > It'd be really nice if going forwards compilers could expose an option to
> > generate PC32/PREL32 entries directly for this.
> 
> As opposed to generating absolute references today? Or as opposed to
> having to rely on our own tooling?

Both; My prefrence would be the compiler had the option to generate these
entries as relative at compile-time, without needing a binary post-processing
step.

That said, from my PoV this is one of the things that I think it's somewhat
reasonable for objtool to do, as it's arguably akin to the build-time sort, and
doesn't require deep knowledge of the control flow, etc.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-22 14:54   ` Steven Rostedt
@ 2022-06-23 14:21     ` Mark Rutland
  2022-06-24  0:01       ` Steven Rostedt
  0 siblings, 1 reply; 14+ messages in thread
From: Mark Rutland @ 2022-06-23 14:21 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Peter Zijlstra, x86, linux-kernel, Josh Poimboeuf,
	christophe.leroy, naveen.n.rao, mbenes, Nathan Chancellor,
	Nick Desaulniers, Ard Biesheuvel

On Wed, Jun 22, 2022 at 10:54:36AM -0400, Steven Rostedt wrote:
> On Fri, 17 Jun 2022 12:40:00 +0100
> Mark Rutland <mark.rutland@arm.com> wrote:
> 
> > We have a similar issue on arm64, which is exacerbated by needing ABS64
> > relocations (24 bytes per entry!) adding significant bloat when FTRACE is
> > enabled.
> 
> I have patches that bring down the size quite a bit. The mcount loc is
> read into the dyn_rec, which has two longs (the second long is the
> flags that only use 32 bits, but is a long to make it aligned, as a 64
> bit word followed by a 32bit word just added 32 bits of padding to make
> it an array).
> 
> The patches make it into two ints (which bring down the size for 64 bit
> machines). The lists are broken up into blocks, and what I do is put
> the top 32 bits of a word into the top of the block, and make sure that
> they are the same among all the entries in the block.
> 
> I guess its time to bring this back alive.

I don't think that helps? I'm on about the size of the kernel "Image" file, not
the runtime memory footprint.

... unless you mean doing that at compiler time?

Mark.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-23 14:21     ` Mark Rutland
@ 2022-06-24  0:01       ` Steven Rostedt
  0 siblings, 0 replies; 14+ messages in thread
From: Steven Rostedt @ 2022-06-24  0:01 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Peter Zijlstra, x86, linux-kernel, Josh Poimboeuf,
	christophe.leroy, naveen.n.rao, mbenes, Nathan Chancellor,
	Nick Desaulniers, Ard Biesheuvel

On Thu, 23 Jun 2022 15:21:30 +0100
Mark Rutland <mark.rutland@arm.com> wrote:

> I don't think that helps? I'm on about the size of the kernel "Image" file, not
> the runtime memory footprint.
> 
> ... unless you mean doing that at compiler time?

Oh, when Peter said: "This saves, on an x86_64-defconfig + FTRACE,
23975*4 ~= 94K of permanent storage." I was thinking of "storage in
memory". I wasn't thinking of disk space on the host machine.

But sure, this is fine with me. I'll have to take a deeper look at the
patch though.

-- Steve

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc
  2022-06-22 14:50 ` Steven Rostedt
@ 2022-06-24  7:16   ` Peter Zijlstra
  0 siblings, 0 replies; 14+ messages in thread
From: Peter Zijlstra @ 2022-06-24  7:16 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: x86, linux-kernel, Josh Poimboeuf, christophe.leroy,
	naveen.n.rao, mbenes

On Wed, Jun 22, 2022 at 10:50:18AM -0400, Steven Rostedt wrote:
> On Fri, 17 Jun 2022 13:24:53 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > Hi,
> > 
> > I recently noticed that __mcount_loc is 64bit wide, containing absolute
> > addresses. Since __mcount_loc is a permanent section (not one we drop
> > after boot), this bloats the kernel memory usage for no real purpose.
> 
> Wait, it's not dropped? Nothing uses it after it is read. It should be
> dropped when init data is dropped.
> 
> From include/asm-generic/vmlinux.lds.h
> 
> /* init and exit section handling */
> #define INIT_DATA                                                       \
>         KEEP(*(SORT(___kentry+*)))                                      \
>         *(.init.data init.data.*)                                       \
>         MEM_DISCARD(init.data*)                                         \
>         KERNEL_CTORS()                                                  \
>         MCOUNT_REC()                                                    \  <<----
>         *(.init.rodata .init.rodata.*)                                  \
>         FTRACE_EVENTS()                                                 \
>         TRACE_SYSCALLS()                                                \
>         KPROBE_BLACKLIST()                                              \
>         ERROR_INJECT_WHITELIST()                                        \
>         MEM_DISCARD(init.rodata)                                        \
> 
> 
> So it should be dropped after boot.

Urgh, clearly I got lost somewhere along the line. In that case it
doesn't matter nearly as much.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2022-06-24  7:17 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-17 11:24 [RFC][PATCH] ftrace,objtool: PC32 based __mcount_loc Peter Zijlstra
2022-06-17 11:40 ` Mark Rutland
2022-06-21 16:27   ` Ard Biesheuvel
2022-06-23 14:19     ` Mark Rutland
2022-06-22 14:54   ` Steven Rostedt
2022-06-23 14:21     ` Mark Rutland
2022-06-24  0:01       ` Steven Rostedt
2022-06-17 11:44 ` Christophe Leroy
2022-06-17 12:06   ` Peter Zijlstra
2022-06-17 20:11 ` Josh Poimboeuf
2022-06-20  7:35   ` Peter Zijlstra
2022-06-20  7:42     ` Christophe Leroy
2022-06-22 14:50 ` Steven Rostedt
2022-06-24  7:16   ` Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.