linux-kbuild.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
@ 2024-04-22  6:05 Yuntao Liu
  2024-04-22 16:01 ` Masahiro Yamada
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Yuntao Liu @ 2024-04-22  6:05 UTC (permalink / raw)
  To: linux-kernel, linux-kbuild
  Cc: tglx, mingo, bp, dave.hansen, hpa, masahiroy, nathan, nicolas,
	peterz, jpoimboe, leitao, petr.pavlu, richard.weiyang,
	ruanjinjie, ndesaulniers, jgross, liuyuntao12

The current x86 architecture does not yet support the
HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
embedded scenarios, and enabling this feature would be beneficial for
reducing the size of the kernel image.

In order to make this work, we keep the necessary tables by annotating
them with KEEP, also it requires further changes to linker script to KEEP
some tables and wildcard compiler generated sections into the right place.

Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
the objtool's --orc and --retpoline parameters, which will alter the
layout of the binary file, thereby preventing gc-sections from functioning
properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
selected when they are not enabled.

Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
instead of performing the slow LTO link again. This can also prevent
gc-sections from functioning properly. Therefore, using this optimization
when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.

The size comparison of zImage is as follows:
x86_def_defconfig  i386_defconfig    tinyconfig
10892288           10826240          607232          no dce
10748928           10719744          529408          dce
1.3%               0.98%             12.8%           shrink

When using smaller config file, there is a significant reduction in the
size of the zImage.
---
 arch/x86/Kconfig              |  1 +
 arch/x86/kernel/vmlinux.lds.S | 24 ++++++++++++------------
 scripts/link-vmlinux.sh       |  2 +-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a902680b6537..92dfbc8ee4e7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -247,6 +247,7 @@ config X86
 	select HAVE_FUNCTION_ERROR_INJECTION
 	select HAVE_KRETPROBES
 	select HAVE_RETHOOK
+	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !CONFIG_UNWINDER_ORC && !CONFIG_MITIGATION_RETPOLINE
 	select HAVE_LIVEPATCH			if X86_64
 	select HAVE_MIXED_BREAKPOINTS_REGS
 	select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3509afc6a672..aeee2b9b6a6a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -253,7 +253,7 @@ SECTIONS
 
 	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
 		__x86_cpu_dev_start = .;
-		*(.x86_cpu_dev.init)
+		KEEP(*(.x86_cpu_dev.init))
 		__x86_cpu_dev_end = .;
 	}
 
@@ -261,7 +261,7 @@ SECTIONS
 	.x86_intel_mid_dev.init : AT(ADDR(.x86_intel_mid_dev.init) - \
 								LOAD_OFFSET) {
 		__x86_intel_mid_dev_start = .;
-		*(.x86_intel_mid_dev.init)
+		KEEP(*(.x86_intel_mid_dev.init))
 		__x86_intel_mid_dev_end = .;
 	}
 #endif
@@ -275,21 +275,21 @@ SECTIONS
 	. = ALIGN(8);
 	.retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) {
 		__retpoline_sites = .;
-		*(.retpoline_sites)
+		KEEP(*(.retpoline_sites))
 		__retpoline_sites_end = .;
 	}
 
 	. = ALIGN(8);
 	.return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
 		__return_sites = .;
-		*(.return_sites)
+		KEEP(*(.return_sites))
 		__return_sites_end = .;
 	}
 
 	. = ALIGN(8);
 	.call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) {
 		__call_sites = .;
-		*(.call_sites)
+		KEEP(*(.call_sites))
 		__call_sites_end = .;
 	}
 #endif
@@ -298,7 +298,7 @@ SECTIONS
 	. = ALIGN(8);
 	.ibt_endbr_seal : AT(ADDR(.ibt_endbr_seal) - LOAD_OFFSET) {
 		__ibt_endbr_seal = .;
-		*(.ibt_endbr_seal)
+		KEEP(*(.ibt_endbr_seal))
 		__ibt_endbr_seal_end = .;
 	}
 #endif
@@ -307,7 +307,7 @@ SECTIONS
 	. = ALIGN(8);
 	.cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) {
 		__cfi_sites = .;
-		*(.cfi_sites)
+		KEEP(*(.cfi_sites))
 		__cfi_sites_end = .;
 	}
 #endif
@@ -320,7 +320,7 @@ SECTIONS
 	. = ALIGN(8);
 	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
 		__alt_instructions = .;
-		*(.altinstructions)
+		KEEP(*(.altinstructions))
 		__alt_instructions_end = .;
 	}
 
@@ -330,13 +330,13 @@ SECTIONS
 	 * get the address and the length of them to patch the kernel safely.
 	 */
 	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-		*(.altinstr_replacement)
+		KEEP(*(.altinstr_replacement))
 	}
 
 	. = ALIGN(8);
 	.apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) {
 		__apicdrivers = .;
-		*(.apicdrivers);
+		KEEP(*(.apicdrivers));
 		__apicdrivers_end = .;
 	}
 
@@ -406,7 +406,7 @@ SECTIONS
 	.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
 		__brk_base = .;
 		. += 64 * 1024;		/* 64k alignment slop space */
-		*(.bss..brk)		/* areas brk users have reserved */
+		KEEP(*(.bss..brk))	/* areas brk users have reserved */
 		__brk_limit = .;
 	}
 
@@ -432,7 +432,7 @@ SECTIONS
 	. = ALIGN(HPAGE_SIZE);
 	.init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) {
 		__init_scratch_begin = .;
-		*(.init.scratch)
+		KEEP(*(.init.scratch))
 		. = ALIGN(HPAGE_SIZE);
 		__init_scratch_end = .;
 	}
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 7862a8101747..7287b5a9f17d 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -60,7 +60,7 @@ vmlinux_link()
 	# skip output file argument
 	shift
 
-	if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then
+	if [ is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT ] && [ ! is_enabled CONFIG_LD_DEAD_CODE_DATA_ELIMINATION ]; then
 		# Use vmlinux.o instead of performing the slow LTO link again.
 		objs=vmlinux.o
 		libs=
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
  2024-04-22  6:05 [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION Yuntao Liu
@ 2024-04-22 16:01 ` Masahiro Yamada
  2024-04-23 11:48   ` liuyuntao (F)
  2024-04-22 19:24 ` Nathan Chancellor
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Masahiro Yamada @ 2024-04-22 16:01 UTC (permalink / raw)
  To: Yuntao Liu
  Cc: linux-kernel, linux-kbuild, tglx, mingo, bp, dave.hansen, hpa,
	nathan, nicolas, peterz, jpoimboe, leitao, petr.pavlu,
	richard.weiyang, ruanjinjie, ndesaulniers, jgross

On Mon, Apr 22, 2024 at 3:41 PM Yuntao Liu <liuyuntao12@huawei.com> wrote:
>
> The current x86 architecture does not yet support the
> HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
> embedded scenarios, and enabling this feature would be beneficial for
> reducing the size of the kernel image.
>
> In order to make this work, we keep the necessary tables by annotating
> them with KEEP, also it requires further changes to linker script to KEEP
> some tables and wildcard compiler generated sections into the right place.
>
> Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
> the objtool's --orc and --retpoline parameters, which will alter the
> layout of the binary file, thereby preventing gc-sections from functioning
> properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
> selected when they are not enabled.
>
> Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
> instead of performing the slow LTO link again. This can also prevent
> gc-sections from functioning properly. Therefore, using this optimization
> when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.
>
> The size comparison of zImage is as follows:
> x86_def_defconfig  i386_defconfig    tinyconfig
> 10892288           10826240          607232          no dce
> 10748928           10719744          529408          dce
> 1.3%               0.98%             12.8%           shrink
>
> When using smaller config file, there is a significant reduction in the
> size of the zImage.
> ---

> diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
> index 7862a8101747..7287b5a9f17d 100755
> --- a/scripts/link-vmlinux.sh
> +++ b/scripts/link-vmlinux.sh
> @@ -60,7 +60,7 @@ vmlinux_link()
>         # skip output file argument
>         shift
>
> -       if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then
> +       if [ is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT ] && [ ! is_enabled CONFIG_LD_DEAD_CODE_DATA_ELIMINATION ]; then
>                 # Use vmlinux.o instead of performing the slow LTO link again.
>                 objs=vmlinux.o
>                 libs=
> --


This is wrong.
You should not put is_enabled inside [ ... ]

(is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT) && !
is_enabled CONFIG_LD_DEAD_CODE_DATA_ELIMINATION

is still weird.


When CONFIG_LTO_CLANG=y and CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y,
the result of LTO will be discarded.


-- 
Best Regards
Masahiro Yamada

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
  2024-04-22  6:05 [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION Yuntao Liu
  2024-04-22 16:01 ` Masahiro Yamada
@ 2024-04-22 19:24 ` Nathan Chancellor
  2024-04-23 12:02   ` liuyuntao (F)
  2024-04-23  0:59 ` Wei Yang
  2024-04-24 11:06 ` Alexander Lobakin
  3 siblings, 1 reply; 11+ messages in thread
From: Nathan Chancellor @ 2024-04-22 19:24 UTC (permalink / raw)
  To: Yuntao Liu
  Cc: linux-kernel, linux-kbuild, tglx, mingo, bp, dave.hansen, hpa,
	masahiroy, nicolas, peterz, jpoimboe, leitao, petr.pavlu,
	richard.weiyang, ruanjinjie, ndesaulniers, jgross

Hi Yuntao,

Just a drive by review since I saw this patch via another CC in my
inbox, I would wait for x86 maintainer thoughts before sending a v2.

On Mon, Apr 22, 2024 at 06:05:56AM +0000, Yuntao Liu wrote:
> The current x86 architecture does not yet support the
> HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
> embedded scenarios, and enabling this feature would be beneficial for
> reducing the size of the kernel image.
> 
> In order to make this work, we keep the necessary tables by annotating
> them with KEEP, also it requires further changes to linker script to KEEP
> some tables and wildcard compiler generated sections into the right place.
> 
> Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
> the objtool's --orc and --retpoline parameters, which will alter the
> layout of the binary file, thereby preventing gc-sections from functioning
> properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
> selected when they are not enabled.
> 
> Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
> instead of performing the slow LTO link again. This can also prevent
> gc-sections from functioning properly. Therefore, using this optimization
> when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.

These two paragraphs indicate to me that this feature will be
unselectable the vast majority of x86 configurations, why should the
upstream kernel support it in that case?

> The size comparison of zImage is as follows:

                         ^ bzImage?

> x86_def_defconfig  i386_defconfig    tinyconfig
> 10892288           10826240          607232          no dce
> 10748928           10719744          529408          dce
> 1.3%               0.98%             12.8%           shrink
> 
> When using smaller config file, there is a significant reduction in the
> size of the zImage.

Same here.

What toolchain was this tested with? There have been behavior
differences between the GNU and LLVM toolchains that have shown up when
dead code elimination is enabled, such as with 32-bit ARM [1] and RISC-V
[2]. While I am not saying there are any problems here, it would be good
to qualify how well this has been tested and perhaps do some testing
with other toolchains and versions, especially since you are touching
areas guarded by CONFIG_LTO_CLANG. Does the resulting kernel boot and
run properly?

[1]: https://lore.kernel.org/30b01c65-12f2-4ee0-81d5-c7a2da2c36b4@app.fastmail.com/
[2]: https://lore.kernel.org/20230622215327.GA1135447@dev-arch.thelio-3990X/

> ---
>  arch/x86/Kconfig              |  1 +
>  arch/x86/kernel/vmlinux.lds.S | 24 ++++++++++++------------
>  scripts/link-vmlinux.sh       |  2 +-
>  3 files changed, 14 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index a902680b6537..92dfbc8ee4e7 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -247,6 +247,7 @@ config X86
>  	select HAVE_FUNCTION_ERROR_INJECTION
>  	select HAVE_KRETPROBES
>  	select HAVE_RETHOOK
> +	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !CONFIG_UNWINDER_ORC && !CONFIG_MITIGATION_RETPOLINE

This is incorrect, it should be

    select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !UNWINDER_ORC && !MITIGATION_RETPOLINE

>  	select HAVE_LIVEPATCH			if X86_64
>  	select HAVE_MIXED_BREAKPOINTS_REGS
>  	select HAVE_MOD_ARCH_SPECIFIC

Cheers,
Nathan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
  2024-04-22  6:05 [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION Yuntao Liu
  2024-04-22 16:01 ` Masahiro Yamada
  2024-04-22 19:24 ` Nathan Chancellor
@ 2024-04-23  0:59 ` Wei Yang
  2024-04-23 12:10   ` liuyuntao (F)
  2024-04-24 11:06 ` Alexander Lobakin
  3 siblings, 1 reply; 11+ messages in thread
From: Wei Yang @ 2024-04-23  0:59 UTC (permalink / raw)
  To: Yuntao Liu
  Cc: linux-kernel, linux-kbuild, tglx, mingo, bp, dave.hansen, hpa,
	masahiroy, nathan, nicolas, peterz, jpoimboe, leitao, petr.pavlu,
	richard.weiyang, ruanjinjie, ndesaulniers, jgross

Hi, Yuntao

I am not that familiar with this, so got some questions below.

On Mon, Apr 22, 2024 at 06:05:56AM +0000, Yuntao Liu wrote:
>The current x86 architecture does not yet support the
>HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
>embedded scenarios, and enabling this feature would be beneficial for
>reducing the size of the kernel image.
>
>In order to make this work, we keep the necessary tables by annotating
>them with KEEP, also it requires further changes to linker script to KEEP
>some tables and wildcard compiler generated sections into the right place.
>
>Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
>the objtool's --orc and --retpoline parameters, which will alter the
>layout of the binary file, thereby preventing gc-sections from functioning
>properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
>selected when they are not enabled.
>
>Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
>instead of performing the slow LTO link again. This can also prevent
>gc-sections from functioning properly. Therefore, using this optimization
>when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.
>
>The size comparison of zImage is as follows:
>x86_def_defconfig  i386_defconfig    tinyconfig
>10892288           10826240          607232          no dce
>10748928           10719744          529408          dce
>1.3%               0.98%             12.8%           shrink
>
>When using smaller config file, there is a significant reduction in the
>size of the zImage.
>---
> arch/x86/Kconfig              |  1 +
> arch/x86/kernel/vmlinux.lds.S | 24 ++++++++++++------------
> scripts/link-vmlinux.sh       |  2 +-
> 3 files changed, 14 insertions(+), 13 deletions(-)
>
>diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>index a902680b6537..92dfbc8ee4e7 100644
>--- a/arch/x86/Kconfig
>+++ b/arch/x86/Kconfig
>@@ -247,6 +247,7 @@ config X86
> 	select HAVE_FUNCTION_ERROR_INJECTION
> 	select HAVE_KRETPROBES
> 	select HAVE_RETHOOK
>+	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !CONFIG_UNWINDER_ORC && !CONFIG_MITIGATION_RETPOLINE

How to make sure only these two config affect the function?

> 	select HAVE_LIVEPATCH			if X86_64
> 	select HAVE_MIXED_BREAKPOINTS_REGS
> 	select HAVE_MOD_ARCH_SPECIFIC
>diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
>index 3509afc6a672..aeee2b9b6a6a 100644
>--- a/arch/x86/kernel/vmlinux.lds.S
>+++ b/arch/x86/kernel/vmlinux.lds.S
>@@ -253,7 +253,7 @@ SECTIONS
> 
> 	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
> 		__x86_cpu_dev_start = .;
>-		*(.x86_cpu_dev.init)
>+		KEEP(*(.x86_cpu_dev.init))

How you choose the sections to keep?

If my understanding is correct, keep the sections is for gc-section. Sounds we
need to keep all possible section, otherwise it would mis-behave?

> 		__x86_cpu_dev_end = .;
> 	}
> 
>@@ -261,7 +261,7 @@ SECTIONS
> 	.x86_intel_mid_dev.init : AT(ADDR(.x86_intel_mid_dev.init) - \
> 								LOAD_OFFSET) {
> 		__x86_intel_mid_dev_start = .;
>-		*(.x86_intel_mid_dev.init)
>+		KEEP(*(.x86_intel_mid_dev.init))
> 		__x86_intel_mid_dev_end = .;
> 	}
> #endif
>@@ -275,21 +275,21 @@ SECTIONS
> 	. = ALIGN(8);
> 	.retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) {
> 		__retpoline_sites = .;
>-		*(.retpoline_sites)
>+		KEEP(*(.retpoline_sites))
> 		__retpoline_sites_end = .;
> 	}
> 
> 	. = ALIGN(8);
> 	.return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
> 		__return_sites = .;
>-		*(.return_sites)
>+		KEEP(*(.return_sites))
> 		__return_sites_end = .;
> 	}
> 
> 	. = ALIGN(8);
> 	.call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) {
> 		__call_sites = .;
>-		*(.call_sites)
>+		KEEP(*(.call_sites))
> 		__call_sites_end = .;
> 	}
> #endif
>@@ -298,7 +298,7 @@ SECTIONS
> 	. = ALIGN(8);
> 	.ibt_endbr_seal : AT(ADDR(.ibt_endbr_seal) - LOAD_OFFSET) {
> 		__ibt_endbr_seal = .;
>-		*(.ibt_endbr_seal)
>+		KEEP(*(.ibt_endbr_seal))
> 		__ibt_endbr_seal_end = .;
> 	}
> #endif
>@@ -307,7 +307,7 @@ SECTIONS
> 	. = ALIGN(8);
> 	.cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) {
> 		__cfi_sites = .;
>-		*(.cfi_sites)
>+		KEEP(*(.cfi_sites))
> 		__cfi_sites_end = .;
> 	}
> #endif
>@@ -320,7 +320,7 @@ SECTIONS
> 	. = ALIGN(8);
> 	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
> 		__alt_instructions = .;
>-		*(.altinstructions)
>+		KEEP(*(.altinstructions))
> 		__alt_instructions_end = .;
> 	}
> 
>@@ -330,13 +330,13 @@ SECTIONS
> 	 * get the address and the length of them to patch the kernel safely.
> 	 */
> 	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
>-		*(.altinstr_replacement)
>+		KEEP(*(.altinstr_replacement))
> 	}
> 
> 	. = ALIGN(8);
> 	.apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) {
> 		__apicdrivers = .;
>-		*(.apicdrivers);
>+		KEEP(*(.apicdrivers));
> 		__apicdrivers_end = .;
> 	}
> 
>@@ -406,7 +406,7 @@ SECTIONS
> 	.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
> 		__brk_base = .;
> 		. += 64 * 1024;		/* 64k alignment slop space */
>-		*(.bss..brk)		/* areas brk users have reserved */
>+		KEEP(*(.bss..brk))	/* areas brk users have reserved */
> 		__brk_limit = .;
> 	}
> 
>@@ -432,7 +432,7 @@ SECTIONS
> 	. = ALIGN(HPAGE_SIZE);
> 	.init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) {
> 		__init_scratch_begin = .;
>-		*(.init.scratch)
>+		KEEP(*(.init.scratch))
> 		. = ALIGN(HPAGE_SIZE);
> 		__init_scratch_end = .;
> 	}
>diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
>index 7862a8101747..7287b5a9f17d 100755
>--- a/scripts/link-vmlinux.sh
>+++ b/scripts/link-vmlinux.sh
>@@ -60,7 +60,7 @@ vmlinux_link()
> 	# skip output file argument
> 	shift
> 
>-	if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then
>+	if [ is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT ] && [ ! is_enabled CONFIG_LD_DEAD_CODE_DATA_ELIMINATION ]; then
> 		# Use vmlinux.o instead of performing the slow LTO link again.
> 		objs=vmlinux.o
> 		libs=
>-- 
>2.34.1

-- 
Wei Yang
Help you, Help me

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
  2024-04-22 16:01 ` Masahiro Yamada
@ 2024-04-23 11:48   ` liuyuntao (F)
  0 siblings, 0 replies; 11+ messages in thread
From: liuyuntao (F) @ 2024-04-23 11:48 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: linux-kernel, linux-kbuild, tglx, mingo, bp, dave.hansen, hpa,
	nathan, nicolas, peterz, jpoimboe, leitao, petr.pavlu,
	richard.weiyang, ruanjinjie, ndesaulniers, jgross



On 2024/4/23 0:01, Masahiro Yamada wrote:
> On Mon, Apr 22, 2024 at 3:41 PM Yuntao Liu <liuyuntao12@huawei.com> wrote:
>>
>> The current x86 architecture does not yet support the
>> HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
>> embedded scenarios, and enabling this feature would be beneficial for
>> reducing the size of the kernel image.
>>
>> In order to make this work, we keep the necessary tables by annotating
>> them with KEEP, also it requires further changes to linker script to KEEP
>> some tables and wildcard compiler generated sections into the right place.
>>
>> Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
>> the objtool's --orc and --retpoline parameters, which will alter the
>> layout of the binary file, thereby preventing gc-sections from functioning
>> properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
>> selected when they are not enabled.
>>
>> Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
>> instead of performing the slow LTO link again. This can also prevent
>> gc-sections from functioning properly. Therefore, using this optimization
>> when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.
>>
>> The size comparison of zImage is as follows:
>> x86_def_defconfig  i386_defconfig    tinyconfig
>> 10892288           10826240          607232          no dce
>> 10748928           10719744          529408          dce
>> 1.3%               0.98%             12.8%           shrink
>>
>> When using smaller config file, there is a significant reduction in the
>> size of the zImage.
>> ---
> 
>> diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
>> index 7862a8101747..7287b5a9f17d 100755
>> --- a/scripts/link-vmlinux.sh
>> +++ b/scripts/link-vmlinux.sh
>> @@ -60,7 +60,7 @@ vmlinux_link()
>>          # skip output file argument
>>          shift
>>
>> -       if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then
>> +       if [ is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT ] && [ ! is_enabled CONFIG_LD_DEAD_CODE_DATA_ELIMINATION ]; then
>>                  # Use vmlinux.o instead of performing the slow LTO link again.
>>                  objs=vmlinux.o
>>                  libs=
>> --
> 
> 
> This is wrong.
> You should not put is_enabled inside [ ... ]
> 

My mistake.

> (is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT) && !
> is_enabled CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
> 
> is still weird.
> 
> 
> When CONFIG_LTO_CLANG=y and CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y,
> the result of LTO will be discarded.
>

On arm and risc-v, these two configs can both be enabled without any 
issues, i think, it should be the same for x86 as well.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
  2024-04-22 19:24 ` Nathan Chancellor
@ 2024-04-23 12:02   ` liuyuntao (F)
  0 siblings, 0 replies; 11+ messages in thread
From: liuyuntao (F) @ 2024-04-23 12:02 UTC (permalink / raw)
  To: Nathan Chancellor
  Cc: linux-kernel, linux-kbuild, tglx, mingo, bp, dave.hansen, hpa,
	masahiroy, nicolas, peterz, jpoimboe, leitao, petr.pavlu,
	richard.weiyang, ruanjinjie, ndesaulniers, jgross



On 2024/4/23 3:24, Nathan Chancellor wrote:
> Hi Yuntao,
> 
> Just a drive by review since I saw this patch via another CC in my
> inbox, I would wait for x86 maintainer thoughts before sending a v2.
> 
> On Mon, Apr 22, 2024 at 06:05:56AM +0000, Yuntao Liu wrote:
>> The current x86 architecture does not yet support the
>> HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
>> embedded scenarios, and enabling this feature would be beneficial for
>> reducing the size of the kernel image.
>>
>> In order to make this work, we keep the necessary tables by annotating
>> them with KEEP, also it requires further changes to linker script to KEEP
>> some tables and wildcard compiler generated sections into the right place.
>>
>> Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
>> the objtool's --orc and --retpoline parameters, which will alter the
>> layout of the binary file, thereby preventing gc-sections from functioning
>> properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
>> selected when they are not enabled.
>>
>> Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
>> instead of performing the slow LTO link again. This can also prevent
>> gc-sections from functioning properly. Therefore, using this optimization
>> when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.
> 
> These two paragraphs indicate to me that this feature will be
> unselectable the vast majority of x86 configurations, why should the
> upstream kernel support it in that case?
> 

Just to refine the DEAD_CODE_DATA_ELIMINATION this feature, it might 
also offer users an extra choice in certain situations.

>> The size comparison of zImage is as follows:
> 
>                           ^ bzImage?
> 

Yes, it should be bzImage.

>> x86_def_defconfig  i386_defconfig    tinyconfig
>> 10892288           10826240          607232          no dce
>> 10748928           10719744          529408          dce
>> 1.3%               0.98%             12.8%           shrink
>>
>> When using smaller config file, there is a significant reduction in the
>> size of the zImage.
> 
> Same here.
> 
> What toolchain was this tested with? There have been behavior
> differences between the GNU and LLVM toolchains that have shown up when
> dead code elimination is enabled, such as with 32-bit ARM [1] and RISC-V
> [2]. While I am not saying there are any problems here, it would be good
> to qualify how well this has been tested and perhaps do some testing
> with other toolchains and versions, especially since you are touching
> areas guarded by CONFIG_LTO_CLANG. Does the resulting kernel boot and
> run properly?
> 
> [1]: https://lore.kernel.org/30b01c65-12f2-4ee0-81d5-c7a2da2c36b4@app.fastmail.com/
> [2]: https://lore.kernel.org/20230622215327.GA1135447@dev-arch.thelio-3990X/
> 

I use GNU toolchains, and the kernel boots well with x86_64_defconfig in 
qemu.
Using LLVM toolchains, I came acrossa link failure:
> ld.lld: error: undefined hidden symbol: __alt_reloc_selftest               
> referenced by alternative.c                                            
>                .thinlto-cache/llvmcache-6140C39409062E0AC950603FE9B6042154C497B6:(.altinstr_replacement+0x30)
I am still struggling with it.

>> ---
>>   arch/x86/Kconfig              |  1 +
>>   arch/x86/kernel/vmlinux.lds.S | 24 ++++++++++++------------
>>   scripts/link-vmlinux.sh       |  2 +-
>>   3 files changed, 14 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index a902680b6537..92dfbc8ee4e7 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -247,6 +247,7 @@ config X86
>>   	select HAVE_FUNCTION_ERROR_INJECTION
>>   	select HAVE_KRETPROBES
>>   	select HAVE_RETHOOK
>> +	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !CONFIG_UNWINDER_ORC && !CONFIG_MITIGATION_RETPOLINE
> 
> This is incorrect, it should be
> 
>      select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !UNWINDER_ORC && !MITIGATION_RETPOLINE
> 

It is my mistake.

>>   	select HAVE_LIVEPATCH			if X86_64
>>   	select HAVE_MIXED_BREAKPOINTS_REGS
>>   	select HAVE_MOD_ARCH_SPECIFIC
> 
> Cheers,
> Nathan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
  2024-04-23  0:59 ` Wei Yang
@ 2024-04-23 12:10   ` liuyuntao (F)
  0 siblings, 0 replies; 11+ messages in thread
From: liuyuntao (F) @ 2024-04-23 12:10 UTC (permalink / raw)
  To: Wei Yang
  Cc: linux-kernel, linux-kbuild, tglx, mingo, bp, dave.hansen, hpa,
	masahiroy, nathan, nicolas, peterz, jpoimboe, leitao, petr.pavlu,
	ruanjinjie, ndesaulniers, jgross



On 2024/4/23 8:59, Wei Yang wrote:
> Hi, Yuntao
> 
> I am not that familiar with this, so got some questions below.
> 
> On Mon, Apr 22, 2024 at 06:05:56AM +0000, Yuntao Liu wrote:
>> The current x86 architecture does not yet support the
>> HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
>> embedded scenarios, and enabling this feature would be beneficial for
>> reducing the size of the kernel image.
>>
>> In order to make this work, we keep the necessary tables by annotating
>> them with KEEP, also it requires further changes to linker script to KEEP
>> some tables and wildcard compiler generated sections into the right place.
>>
>> Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
>> the objtool's --orc and --retpoline parameters, which will alter the
>> layout of the binary file, thereby preventing gc-sections from functioning
>> properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
>> selected when they are not enabled.
>>
>> Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
>> instead of performing the slow LTO link again. This can also prevent
>> gc-sections from functioning properly. Therefore, using this optimization
>> when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.
>>
>> The size comparison of zImage is as follows:
>> x86_def_defconfig  i386_defconfig    tinyconfig
>> 10892288           10826240          607232          no dce
>> 10748928           10719744          529408          dce
>> 1.3%               0.98%             12.8%           shrink
>>
>> When using smaller config file, there is a significant reduction in the
>> size of the zImage.
>> ---
>> arch/x86/Kconfig              |  1 +
>> arch/x86/kernel/vmlinux.lds.S | 24 ++++++++++++------------
>> scripts/link-vmlinux.sh       |  2 +-
>> 3 files changed, 14 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index a902680b6537..92dfbc8ee4e7 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -247,6 +247,7 @@ config X86
>> 	select HAVE_FUNCTION_ERROR_INJECTION
>> 	select HAVE_KRETPROBES
>> 	select HAVE_RETHOOK
>> +	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !CONFIG_UNWINDER_ORC && !CONFIG_MITIGATION_RETPOLINE
> 
> How to make sure only these two config affect the function?
> 

I have tested all the parameters of objtool, and only when these two 
config are removed can gc-sections take effect.

>> 	select HAVE_LIVEPATCH			if X86_64
>> 	select HAVE_MIXED_BREAKPOINTS_REGS
>> 	select HAVE_MOD_ARCH_SPECIFIC
>> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
>> index 3509afc6a672..aeee2b9b6a6a 100644
>> --- a/arch/x86/kernel/vmlinux.lds.S
>> +++ b/arch/x86/kernel/vmlinux.lds.S
>> @@ -253,7 +253,7 @@ SECTIONS
>>
>> 	.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
>> 		__x86_cpu_dev_start = .;
>> -		*(.x86_cpu_dev.init)
>> +		KEEP(*(.x86_cpu_dev.init))
> 
> How you choose the sections to keep?
> 
> If my understanding is correct, keep the sections is for gc-section. Sounds we
> need to keep all possible section, otherwise it would mis-behave?
> 

Nicholas Piggin has mentioned it before.

> FYI, easiest way to check if you forgot to KEEP a linker table is
> to look at `readelf -S vmlinux` differences, and to see what is
> being trimmed, look at nm differences or use --print-gc-sections
> LD option to see what symbols you're trimming. Linker tables,
> boot entry, and exception entry tends to require anchoring.
https://lore.kernel.org/all/20170709031333.29443-1-npiggin@gmail.com/

>> 		__x86_cpu_dev_end = .;
>> 	}
>>
>> @@ -261,7 +261,7 @@ SECTIONS
>> 	.x86_intel_mid_dev.init : AT(ADDR(.x86_intel_mid_dev.init) - \
>> 								LOAD_OFFSET) {
>> 		__x86_intel_mid_dev_start = .;
>> -		*(.x86_intel_mid_dev.init)
>> +		KEEP(*(.x86_intel_mid_dev.init))
>> 		__x86_intel_mid_dev_end = .;
>> 	}
>> #endif
>> @@ -275,21 +275,21 @@ SECTIONS
>> 	. = ALIGN(8);
>> 	.retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) {
>> 		__retpoline_sites = .;
>> -		*(.retpoline_sites)
>> +		KEEP(*(.retpoline_sites))
>> 		__retpoline_sites_end = .;
>> 	}
>>
>> 	. = ALIGN(8);
>> 	.return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
>> 		__return_sites = .;
>> -		*(.return_sites)
>> +		KEEP(*(.return_sites))
>> 		__return_sites_end = .;
>> 	}
>>
>> 	. = ALIGN(8);
>> 	.call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) {
>> 		__call_sites = .;
>> -		*(.call_sites)
>> +		KEEP(*(.call_sites))
>> 		__call_sites_end = .;
>> 	}
>> #endif
>> @@ -298,7 +298,7 @@ SECTIONS
>> 	. = ALIGN(8);
>> 	.ibt_endbr_seal : AT(ADDR(.ibt_endbr_seal) - LOAD_OFFSET) {
>> 		__ibt_endbr_seal = .;
>> -		*(.ibt_endbr_seal)
>> +		KEEP(*(.ibt_endbr_seal))
>> 		__ibt_endbr_seal_end = .;
>> 	}
>> #endif
>> @@ -307,7 +307,7 @@ SECTIONS
>> 	. = ALIGN(8);
>> 	.cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) {
>> 		__cfi_sites = .;
>> -		*(.cfi_sites)
>> +		KEEP(*(.cfi_sites))
>> 		__cfi_sites_end = .;
>> 	}
>> #endif
>> @@ -320,7 +320,7 @@ SECTIONS
>> 	. = ALIGN(8);
>> 	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
>> 		__alt_instructions = .;
>> -		*(.altinstructions)
>> +		KEEP(*(.altinstructions))
>> 		__alt_instructions_end = .;
>> 	}
>>
>> @@ -330,13 +330,13 @@ SECTIONS
>> 	 * get the address and the length of them to patch the kernel safely.
>> 	 */
>> 	.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
>> -		*(.altinstr_replacement)
>> +		KEEP(*(.altinstr_replacement))
>> 	}
>>
>> 	. = ALIGN(8);
>> 	.apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) {
>> 		__apicdrivers = .;
>> -		*(.apicdrivers);
>> +		KEEP(*(.apicdrivers));
>> 		__apicdrivers_end = .;
>> 	}
>>
>> @@ -406,7 +406,7 @@ SECTIONS
>> 	.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
>> 		__brk_base = .;
>> 		. += 64 * 1024;		/* 64k alignment slop space */
>> -		*(.bss..brk)		/* areas brk users have reserved */
>> +		KEEP(*(.bss..brk))	/* areas brk users have reserved */
>> 		__brk_limit = .;
>> 	}
>>
>> @@ -432,7 +432,7 @@ SECTIONS
>> 	. = ALIGN(HPAGE_SIZE);
>> 	.init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) {
>> 		__init_scratch_begin = .;
>> -		*(.init.scratch)
>> +		KEEP(*(.init.scratch))
>> 		. = ALIGN(HPAGE_SIZE);
>> 		__init_scratch_end = .;
>> 	}
>> diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
>> index 7862a8101747..7287b5a9f17d 100755
>> --- a/scripts/link-vmlinux.sh
>> +++ b/scripts/link-vmlinux.sh
>> @@ -60,7 +60,7 @@ vmlinux_link()
>> 	# skip output file argument
>> 	shift
>>
>> -	if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then
>> +	if [ is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT ] && [ ! is_enabled CONFIG_LD_DEAD_CODE_DATA_ELIMINATION ]; then
>> 		# Use vmlinux.o instead of performing the slow LTO link again.
>> 		objs=vmlinux.o
>> 		libs=
>> -- 
>> 2.34.1
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
  2024-04-22  6:05 [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION Yuntao Liu
                   ` (2 preceding siblings ...)
  2024-04-23  0:59 ` Wei Yang
@ 2024-04-24 11:06 ` Alexander Lobakin
  2024-04-25  6:37   ` liuyuntao (F)
  3 siblings, 1 reply; 11+ messages in thread
From: Alexander Lobakin @ 2024-04-24 11:06 UTC (permalink / raw)
  To: Yuntao Liu
  Cc: linux-kernel, linux-kbuild, tglx, mingo, bp, dave.hansen, hpa,
	masahiroy, nathan, nicolas, peterz, jpoimboe, leitao, petr.pavlu,
	richard.weiyang, ruanjinjie, ndesaulniers, jgross

From: Yuntao Liu <liuyuntao12@huawei.com>
Date: Mon, 22 Apr 2024 06:05:56 +0000

> The current x86 architecture does not yet support the
> HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
> embedded scenarios, and enabling this feature would be beneficial for
> reducing the size of the kernel image.
> 
> In order to make this work, we keep the necessary tables by annotating
> them with KEEP, also it requires further changes to linker script to KEEP
> some tables and wildcard compiler generated sections into the right place.
> 
> Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
> the objtool's --orc and --retpoline parameters, which will alter the
> layout of the binary file, thereby preventing gc-sections from functioning
> properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
> selected when they are not enabled.

Dunno, I have DCE enabled for years on my home kernel, see commit [0]
with both ORC and retpolines enabled, and I didn't have any issues.
vmlinux still shrinks well, even with Clang LTO.

> 
> Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
> instead of performing the slow LTO link again. This can also prevent
> gc-sections from functioning properly. Therefore, using this optimization
> when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.
> 
> The size comparison of zImage is as follows:
> x86_def_defconfig  i386_defconfig    tinyconfig
> 10892288           10826240          607232          no dce
> 10748928           10719744          529408          dce
> 1.3%               0.98%             12.8%           shrink
> 
> When using smaller config file, there is a significant reduction in the
> size of the zImage.

[0] https://github.com/solbjorn/linux/commit/25c4953ea73d

Thanks,
Olek

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
  2024-04-24 11:06 ` Alexander Lobakin
@ 2024-04-25  6:37   ` liuyuntao (F)
  2024-04-25  9:21     ` Alexander Lobakin
  0 siblings, 1 reply; 11+ messages in thread
From: liuyuntao (F) @ 2024-04-25  6:37 UTC (permalink / raw)
  To: Alexander Lobakin
  Cc: linux-kernel, linux-kbuild, tglx, mingo, bp, dave.hansen, hpa,
	masahiroy, nathan, nicolas, peterz, jpoimboe, leitao, petr.pavlu,
	richard.weiyang, ruanjinjie, ndesaulniers, jgross



On 2024/4/24 19:06, Alexander Lobakin wrote:
> From: Yuntao Liu <liuyuntao12@huawei.com>
> Date: Mon, 22 Apr 2024 06:05:56 +0000
> 
>> The current x86 architecture does not yet support the
>> HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
>> embedded scenarios, and enabling this feature would be beneficial for
>> reducing the size of the kernel image.
>>
>> In order to make this work, we keep the necessary tables by annotating
>> them with KEEP, also it requires further changes to linker script to KEEP
>> some tables and wildcard compiler generated sections into the right place.
>>
>> Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
>> the objtool's --orc and --retpoline parameters, which will alter the
>> layout of the binary file, thereby preventing gc-sections from functioning
>> properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
>> selected when they are not enabled.
> 
> Dunno, I have DCE enabled for years on my home kernel, see commit [0]
> with both ORC and retpolines enabled, and I didn't have any issues.
> vmlinux still shrinks well, even with Clang LTO.
> 
>>
>> Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
>> instead of performing the slow LTO link again. This can also prevent
>> gc-sections from functioning properly. Therefore, using this optimization
>> when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.
>>
>> The size comparison of zImage is as follows:
>> x86_def_defconfig  i386_defconfig    tinyconfig
>> 10892288           10826240          607232          no dce
>> 10748928           10719744          529408          dce
>> 1.3%               0.98%             12.8%           shrink
>>
>> When using smaller config file, there is a significant reduction in the
>> size of the zImage.
> 
> [0] https://github.com/solbjorn/linux/commit/25c4953ea73d
> 
> Thanks,
> Olek

I apply your patch, and use LLVM toolchain to compile the kernel, it not 
boot on QEMU.
I use the following command.
> qemu-system-x86_64  -smp 2 -m 1024M -nographic -kernel mainline_linux/arch/x86/boot/bzImage -hda rootfs.img -append "root=/dev/sda console=ttyS0 rootfstype=ext4 init=/linuxrc rw"
Have you tested your patch on the latest mainline version?
Thanks.
Yuntao.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
  2024-04-25  6:37   ` liuyuntao (F)
@ 2024-04-25  9:21     ` Alexander Lobakin
  2024-04-25  9:41       ` liuyuntao (F)
  0 siblings, 1 reply; 11+ messages in thread
From: Alexander Lobakin @ 2024-04-25  9:21 UTC (permalink / raw)
  To: liuyuntao (F)
  Cc: linux-kernel, linux-kbuild, tglx, mingo, bp, dave.hansen, hpa,
	masahiroy, nathan, nicolas, peterz, jpoimboe, leitao, petr.pavlu,
	richard.weiyang, ruanjinjie, ndesaulniers, jgross

From: Liuyuntao (F) <liuyuntao12@huawei.com>
Date: Thu, 25 Apr 2024 14:37:19 +0800

> 
> 
> On 2024/4/24 19:06, Alexander Lobakin wrote:
>> From: Yuntao Liu <liuyuntao12@huawei.com>
>> Date: Mon, 22 Apr 2024 06:05:56 +0000
>>
>>> The current x86 architecture does not yet support the
>>> HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
>>> embedded scenarios, and enabling this feature would be beneficial for
>>> reducing the size of the kernel image.
>>>
>>> In order to make this work, we keep the necessary tables by annotating
>>> them with KEEP, also it requires further changes to linker script to
>>> KEEP
>>> some tables and wildcard compiler generated sections into the right
>>> place.
>>>
>>> Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
>>> the objtool's --orc and --retpoline parameters, which will alter the
>>> layout of the binary file, thereby preventing gc-sections from
>>> functioning
>>> properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
>>> selected when they are not enabled.
>>
>> Dunno, I have DCE enabled for years on my home kernel, see commit [0]
>> with both ORC and retpolines enabled, and I didn't have any issues.
>> vmlinux still shrinks well, even with Clang LTO.
>>
>>>
>>> Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
>>> instead of performing the slow LTO link again. This can also prevent
>>> gc-sections from functioning properly. Therefore, using this
>>> optimization
>>> when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.
>>>
>>> The size comparison of zImage is as follows:
>>> x86_def_defconfig  i386_defconfig    tinyconfig
>>> 10892288           10826240          607232          no dce
>>> 10748928           10719744          529408          dce
>>> 1.3%               0.98%             12.8%           shrink
>>>
>>> When using smaller config file, there is a significant reduction in the
>>> size of the zImage.
>>
>> [0] https://github.com/solbjorn/linux/commit/25c4953ea73d
>>
>> Thanks,
>> Olek
> 
> I apply your patch, and use LLVM toolchain to compile the kernel, it not
> boot on QEMU.
> I use the following command.
>> qemu-system-x86_64  -smp 2 -m 1024M -nographic -kernel
>> mainline_linux/arch/x86/boot/bzImage -hda rootfs.img -append
>> "root=/dev/sda console=ttyS0 rootfstype=ext4 init=/linuxrc rw"
> Have you tested your patch on the latest mainline version?

Nope, it was a year ago and I haven't touched it since then. Did the
low-level code change a lot?

> Thanks.
> Yuntao.

Thanks,
Olek

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
  2024-04-25  9:21     ` Alexander Lobakin
@ 2024-04-25  9:41       ` liuyuntao (F)
  0 siblings, 0 replies; 11+ messages in thread
From: liuyuntao (F) @ 2024-04-25  9:41 UTC (permalink / raw)
  To: Alexander Lobakin
  Cc: linux-kernel, linux-kbuild, tglx, mingo, bp, dave.hansen, hpa,
	masahiroy, nathan, nicolas, peterz, jpoimboe, leitao, petr.pavlu,
	richard.weiyang, ruanjinjie, ndesaulniers, jgross



On 2024/4/25 17:21, Alexander Lobakin wrote:
> From: Liuyuntao (F) <liuyuntao12@huawei.com>
> Date: Thu, 25 Apr 2024 14:37:19 +0800
> 
>>
>>
>> On 2024/4/24 19:06, Alexander Lobakin wrote:
>>> From: Yuntao Liu <liuyuntao12@huawei.com>
>>> Date: Mon, 22 Apr 2024 06:05:56 +0000
>>>
>>>> The current x86 architecture does not yet support the
>>>> HAVE_LD_DEAD_CODE_DATA_ELIMINATION feature. x86 is widely used in
>>>> embedded scenarios, and enabling this feature would be beneficial for
>>>> reducing the size of the kernel image.
>>>>
>>>> In order to make this work, we keep the necessary tables by annotating
>>>> them with KEEP, also it requires further changes to linker script to
>>>> KEEP
>>>> some tables and wildcard compiler generated sections into the right
>>>> place.
>>>>
>>>> Enabling CONFIG_UNWINDER_ORC or CONFIG_MITIGATION_RETPOLINE will enable
>>>> the objtool's --orc and --retpoline parameters, which will alter the
>>>> layout of the binary file, thereby preventing gc-sections from
>>>> functioning
>>>> properly. Therefore, HAVE_LD_DEAD_CODE_DATA_ELIMINATION should only be
>>>> selected when they are not enabled.
>>>
>>> Dunno, I have DCE enabled for years on my home kernel, see commit [0]
>>> with both ORC and retpolines enabled, and I didn't have any issues.
>>> vmlinux still shrinks well, even with Clang LTO.
>>>
>>>>
>>>> Enabling CONFIG_LTO_CLANG or CONFIG_X86_KERNEL_IBT will use vmlinux.o
>>>> instead of performing the slow LTO link again. This can also prevent
>>>> gc-sections from functioning properly. Therefore, using this
>>>> optimization
>>>> when CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is not enabled.
>>>>
>>>> The size comparison of zImage is as follows:
>>>> x86_def_defconfig  i386_defconfig    tinyconfig
>>>> 10892288           10826240          607232          no dce
>>>> 10748928           10719744          529408          dce
>>>> 1.3%               0.98%             12.8%           shrink
>>>>
>>>> When using smaller config file, there is a significant reduction in the
>>>> size of the zImage.
>>>
>>> [0] https://github.com/solbjorn/linux/commit/25c4953ea73d
>>>
>>> Thanks,
>>> Olek
>>
>> I apply your patch, and use LLVM toolchain to compile the kernel, it not
>> boot on QEMU.
>> I use the following command.
>>> qemu-system-x86_64  -smp 2 -m 1024M -nographic -kernel
>>> mainline_linux/arch/x86/boot/bzImage -hda rootfs.img -append
>>> "root=/dev/sda console=ttyS0 rootfstype=ext4 init=/linuxrc rw"
>> Have you tested your patch on the latest mainline version?
> 
> Nope, it was a year ago and I haven't touched it since then. Did the
> low-level code change a lot?
> 
No, I'm not yet certain what changes have had an impact on it compared 
to a year ago.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-04-25  9:41 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-22  6:05 [PATCH] x86: enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION Yuntao Liu
2024-04-22 16:01 ` Masahiro Yamada
2024-04-23 11:48   ` liuyuntao (F)
2024-04-22 19:24 ` Nathan Chancellor
2024-04-23 12:02   ` liuyuntao (F)
2024-04-23  0:59 ` Wei Yang
2024-04-23 12:10   ` liuyuntao (F)
2024-04-24 11:06 ` Alexander Lobakin
2024-04-25  6:37   ` liuyuntao (F)
2024-04-25  9:21     ` Alexander Lobakin
2024-04-25  9:41       ` liuyuntao (F)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).