linux-kbuild.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH v8 1/3] riscv: Introduce CONFIG_RELOCATABLE
       [not found] ` <20230215143626.453491-2-alexghiti@rivosinc.com>
@ 2023-02-22 12:29   ` Alexandre Ghiti
  2023-02-24 15:58     ` Björn Töpel
  2023-03-22 13:38     ` Alexandre Ghiti
  0 siblings, 2 replies; 6+ messages in thread
From: Alexandre Ghiti @ 2023-02-22 12:29 UTC (permalink / raw)
  To: Alexandre Ghiti, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	linuxppc-dev, linux-kernel, linux-riscv, nathan, linux-kbuild,
	llvm, ndesaulniers, Björn Töpel

+cc linux-kbuild, llvm, Nathan, Nick

On 2/15/23 15:36, Alexandre Ghiti wrote:
> From: Alexandre Ghiti <alex@ghiti.fr>
>
> This config allows to compile 64b kernel as PIE and to relocate it at
> any virtual address at runtime: this paves the way to KASLR.
> Runtime relocation is possible since relocation metadata are embedded into
> the kernel.
>
> Note that relocating at runtime introduces an overhead even if the
> kernel is loaded at the same address it was linked at and that the compiler
> options are those used in arm64 which uses the same RELA relocation
> format.
>
> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
> ---
>   arch/riscv/Kconfig              | 14 +++++++++
>   arch/riscv/Makefile             |  7 +++--
>   arch/riscv/kernel/efi-header.S  |  6 ++--
>   arch/riscv/kernel/vmlinux.lds.S | 10 ++++--
>   arch/riscv/mm/Makefile          |  4 +++
>   arch/riscv/mm/init.c            | 54 ++++++++++++++++++++++++++++++++-
>   6 files changed, 87 insertions(+), 8 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index e2b656043abf..e0ee7ce4b2e3 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -544,6 +544,20 @@ config COMPAT
>   
>   	  If you want to execute 32-bit userspace applications, say Y.
>   
> +config RELOCATABLE
> +	bool "Build a relocatable kernel"
> +	depends on MMU && 64BIT && !XIP_KERNEL
> +	help
> +          This builds a kernel as a Position Independent Executable (PIE),
> +          which retains all relocation metadata required to relocate the
> +          kernel binary at runtime to a different virtual address than the
> +          address it was linked at.
> +          Since RISCV uses the RELA relocation format, this requires a
> +          relocation pass at runtime even if the kernel is loaded at the
> +          same address it was linked at.
> +
> +          If unsure, say N.
> +
>   endmenu # "Kernel features"
>   
>   menu "Boot options"
> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> index 82153960ac00..97c34136b027 100644
> --- a/arch/riscv/Makefile
> +++ b/arch/riscv/Makefile
> @@ -7,9 +7,12 @@
>   #
>   
>   OBJCOPYFLAGS    := -O binary
> -LDFLAGS_vmlinux :=
> +ifeq ($(CONFIG_RELOCATABLE),y)
> +	LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro
> +	KBUILD_CFLAGS += -fPIE
> +endif
>   ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
> -	LDFLAGS_vmlinux := --no-relax
> +	LDFLAGS_vmlinux += --no-relax
>   	KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
>   	CC_FLAGS_FTRACE := -fpatchable-function-entry=8
>   endif
> diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
> index 8e733aa48ba6..f7ee09c4f12d 100644
> --- a/arch/riscv/kernel/efi-header.S
> +++ b/arch/riscv/kernel/efi-header.S
> @@ -33,7 +33,7 @@ optional_header:
>   	.byte	0x02					// MajorLinkerVersion
>   	.byte	0x14					// MinorLinkerVersion
>   	.long	__pecoff_text_end - efi_header_end	// SizeOfCode
> -	.long	__pecoff_data_virt_size			// SizeOfInitializedData
> +	.long	__pecoff_data_virt_end - __pecoff_text_end	// SizeOfInitializedData
>   	.long	0					// SizeOfUninitializedData
>   	.long	__efistub_efi_pe_entry - _start		// AddressOfEntryPoint
>   	.long	efi_header_end - _start			// BaseOfCode
> @@ -91,9 +91,9 @@ section_table:
>   		IMAGE_SCN_MEM_EXECUTE			// Characteristics
>   
>   	.ascii	".data\0\0\0"
> -	.long	__pecoff_data_virt_size			// VirtualSize
> +	.long	__pecoff_data_virt_end - __pecoff_text_end	// VirtualSize
>   	.long	__pecoff_text_end - _start		// VirtualAddress
> -	.long	__pecoff_data_raw_size			// SizeOfRawData
> +	.long	__pecoff_data_raw_end - __pecoff_text_end	// SizeOfRawData
>   	.long	__pecoff_text_end - _start		// PointerToRawData
>   
>   	.long	0					// PointerToRelocations
> diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> index 4e6c88aa4d87..8be2de3be08c 100644
> --- a/arch/riscv/kernel/vmlinux.lds.S
> +++ b/arch/riscv/kernel/vmlinux.lds.S
> @@ -122,9 +122,15 @@ SECTIONS
>   		*(.sdata*)
>   	}
>   
> +	.rela.dyn : ALIGN(8) {
> +		__rela_dyn_start = .;
> +		*(.rela .rela*)
> +		__rela_dyn_end = .;
> +	}
> +


So I realized those relocations would be better in the init section so 
we can get rid of them at some point. So I tried the following:

diff --git a/arch/riscv/kernel/vmlinux.lds.S 
b/arch/riscv/kernel/vmlinux.lds.S
index 7ac215467fd5..6111023a89ef 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -93,6 +93,12 @@ SECTIONS
                 *(.rel.dyn*)
         }

+       .rela.dyn : ALIGN(8) {
+               __rela_dyn_start = .;
+               *(.rela .rela*)
+               __rela_dyn_end = .;
+       }
+
         __init_data_end = .;

         . = ALIGN(8);
@@ -119,12 +125,6 @@ SECTIONS
                 *(.sdata*)
         }

-       .rela.dyn : ALIGN(8) {
-               __rela_dyn_start = .;
-               *(.rela .rela*)
-               __rela_dyn_end = .;
-       }
-
  #ifdef CONFIG_EFI
         .pecoff_edata_padding : { BYTE(0); . = 
ALIGN(PECOFF_FILE_ALIGNMENT); }
         __pecoff_data_raw_end = ABSOLUTE(.);


But then all the relocations in vmlinux end up being null:

vmlinux:     file format elf64-littleriscv

$ riscv64-linux-gnu-objdump -R vmlinux

DYNAMIC RELOCATION RECORDS
OFFSET           TYPE              VALUE
0000000000000000 R_RISCV_NONE      *ABS*
0000000000000000 R_RISCV_NONE      *ABS*
....

  I also noticed that re-linking vmlinux with the same command right 
after works (ie, the relocations are now valid):

$ riscv64-linux-gnu-objdump -R vmlinux

vmlinux:     file format elf64-littleriscv

DYNAMIC RELOCATION RECORDS
OFFSET           TYPE              VALUE
ffffffff82600718 R_RISCV_RELATIVE  *ABS*-0x000000007d9ff8e8
ffffffff82600720 R_RISCV_RELATIVE  *ABS*-0x000000007d9ff8e8
...

Below is the command used to generate this working vmlinux:

riscv64-unknown-linux-gnu-ld -melf64lriscv -z noexecstack 
--no-warn-rwx-segments -shared -Bsymbolic -z notext -z norelro 
--no-relax --build-id=sha1 --script=./arch/riscv/kernel/vmlinux.lds 
-Map=vmlinux.map -o vmlinux --whole-archive vmlinux.a .vmlinux.export.o 
init/version-timestamp.o --no-whole-archive --start-group 
./drivers/firmware/efi/libstub/lib.a --end-group .tmp_vmlinux.kallsyms3.o

I tried a lot of things, but I struggle to understand, does anyone have 
any idea? FYI, the same problem happens with LLVM.

Thanks,

Alex


>   #ifdef CONFIG_EFI
>   	.pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
> -	__pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);
> +	__pecoff_data_raw_end = ABSOLUTE(.);
>   #endif
>   
>   	/* End of data section */
> @@ -134,7 +140,7 @@ SECTIONS
>   
>   #ifdef CONFIG_EFI
>   	. = ALIGN(PECOFF_SECTION_ALIGNMENT);
> -	__pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
> +	__pecoff_data_virt_end = ABSOLUTE(.);
>   #endif
>   	_end = .;
>   
> diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
> index 2ac177c05352..b85e9e82f082 100644
> --- a/arch/riscv/mm/Makefile
> +++ b/arch/riscv/mm/Makefile
> @@ -1,6 +1,10 @@
>   # SPDX-License-Identifier: GPL-2.0-only
>   
>   CFLAGS_init.o := -mcmodel=medany
> +ifdef CONFIG_RELOCATABLE
> +CFLAGS_init.o += -fno-pie
> +endif
> +
>   ifdef CONFIG_FTRACE
>   CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
>   CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 7f01c2e56efe..3862696c2ac9 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -20,6 +20,9 @@
>   #include <linux/dma-map-ops.h>
>   #include <linux/crash_dump.h>
>   #include <linux/hugetlb.h>
> +#ifdef CONFIG_RELOCATABLE
> +#include <linux/elf.h>
> +#endif
>   
>   #include <asm/fixmap.h>
>   #include <asm/tlbflush.h>
> @@ -146,7 +149,7 @@ static void __init print_vm_layout(void)
>   		print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
>   #endif
>   
> -		print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR,
> +		print_ml("kernel", (unsigned long)kernel_map.virt_addr,
>   			 (unsigned long)ADDRESS_SPACE_END);
>   	}
>   }
> @@ -854,6 +857,44 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>   #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
>   #endif
>   
> +#ifdef CONFIG_RELOCATABLE
> +extern unsigned long __rela_dyn_start, __rela_dyn_end;
> +
> +static void __init relocate_kernel(void)
> +{
> +	Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
> +	/*
> +	 * This holds the offset between the linked virtual address and the
> +	 * relocated virtual address.
> +	 */
> +	uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
> +	/*
> +	 * This holds the offset between kernel linked virtual address and
> +	 * physical address.
> +	 */
> +	uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
> +
> +	for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
> +		Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
> +		Elf64_Addr relocated_addr = rela->r_addend;
> +
> +		if (rela->r_info != R_RISCV_RELATIVE)
> +			continue;
> +
> +		/*
> +		 * Make sure to not relocate vdso symbols like rt_sigreturn
> +		 * which are linked from the address 0 in vmlinux since
> +		 * vdso symbol addresses are actually used as an offset from
> +		 * mm->context.vdso in VDSO_OFFSET macro.
> +		 */
> +		if (relocated_addr >= KERNEL_LINK_ADDR)
> +			relocated_addr += reloc_offset;
> +
> +		*(Elf64_Addr *)addr = relocated_addr;
> +	}
> +}
> +#endif /* CONFIG_RELOCATABLE */
> +
>   #ifdef CONFIG_XIP_KERNEL
>   static void __init create_kernel_page_table(pgd_t *pgdir,
>   					    __always_unused bool early)
> @@ -1039,6 +1080,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>   	BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
>   #endif
>   
> +#ifdef CONFIG_RELOCATABLE
> +	/*
> +	 * Early page table uses only one PUD, which makes it possible
> +	 * to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset
> +	 * makes the kernel cross over a PUD_SIZE boundary, raise a bug
> +	 * since a part of the kernel would not get mapped.
> +	 */
> +	BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
> +	relocate_kernel();
> +#endif
> +
>   	apply_early_boot_alternatives();
>   	pt_ops_set_early();
>   

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v8 1/3] riscv: Introduce CONFIG_RELOCATABLE
  2023-02-22 12:29   ` [PATCH v8 1/3] riscv: Introduce CONFIG_RELOCATABLE Alexandre Ghiti
@ 2023-02-24 15:58     ` Björn Töpel
  2023-03-22 18:25       ` Nick Desaulniers
  2023-03-22 13:38     ` Alexandre Ghiti
  1 sibling, 1 reply; 6+ messages in thread
From: Björn Töpel @ 2023-02-24 15:58 UTC (permalink / raw)
  To: Alexandre Ghiti, Alexandre Ghiti, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Paul Walmsley, Palmer Dabbelt,
	Albert Ou, linuxppc-dev, linux-kernel, linux-riscv, nathan,
	linux-kbuild, llvm, ndesaulniers

Alexandre Ghiti <alex@ghiti.fr> writes:

> +cc linux-kbuild, llvm, Nathan, Nick
>
> On 2/15/23 15:36, Alexandre Ghiti wrote:
>> From: Alexandre Ghiti <alex@ghiti.fr>
>>
>> This config allows to compile 64b kernel as PIE and to relocate it at
>> any virtual address at runtime: this paves the way to KASLR.
>> Runtime relocation is possible since relocation metadata are embedded into
>> the kernel.
>>
>> Note that relocating at runtime introduces an overhead even if the
>> kernel is loaded at the same address it was linked at and that the compiler
>> options are those used in arm64 which uses the same RELA relocation
>> format.
>>
>> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
>> ---
>>   arch/riscv/Kconfig              | 14 +++++++++
>>   arch/riscv/Makefile             |  7 +++--
>>   arch/riscv/kernel/efi-header.S  |  6 ++--
>>   arch/riscv/kernel/vmlinux.lds.S | 10 ++++--
>>   arch/riscv/mm/Makefile          |  4 +++
>>   arch/riscv/mm/init.c            | 54 ++++++++++++++++++++++++++++++++-
>>   6 files changed, 87 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>> index e2b656043abf..e0ee7ce4b2e3 100644
>> --- a/arch/riscv/Kconfig
>> +++ b/arch/riscv/Kconfig
>> @@ -544,6 +544,20 @@ config COMPAT
>>   
>>   	  If you want to execute 32-bit userspace applications, say Y.
>>   
>> +config RELOCATABLE
>> +	bool "Build a relocatable kernel"
>> +	depends on MMU && 64BIT && !XIP_KERNEL
>> +	help
>> +          This builds a kernel as a Position Independent Executable (PIE),
>> +          which retains all relocation metadata required to relocate the
>> +          kernel binary at runtime to a different virtual address than the
>> +          address it was linked at.
>> +          Since RISCV uses the RELA relocation format, this requires a
>> +          relocation pass at runtime even if the kernel is loaded at the
>> +          same address it was linked at.
>> +
>> +          If unsure, say N.
>> +
>>   endmenu # "Kernel features"
>>   
>>   menu "Boot options"
>> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
>> index 82153960ac00..97c34136b027 100644
>> --- a/arch/riscv/Makefile
>> +++ b/arch/riscv/Makefile
>> @@ -7,9 +7,12 @@
>>   #
>>   
>>   OBJCOPYFLAGS    := -O binary
>> -LDFLAGS_vmlinux :=
>> +ifeq ($(CONFIG_RELOCATABLE),y)
>> +	LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro
>> +	KBUILD_CFLAGS += -fPIE
>> +endif
>>   ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
>> -	LDFLAGS_vmlinux := --no-relax
>> +	LDFLAGS_vmlinux += --no-relax
>>   	KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
>>   	CC_FLAGS_FTRACE := -fpatchable-function-entry=8
>>   endif
>> diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
>> index 8e733aa48ba6..f7ee09c4f12d 100644
>> --- a/arch/riscv/kernel/efi-header.S
>> +++ b/arch/riscv/kernel/efi-header.S
>> @@ -33,7 +33,7 @@ optional_header:
>>   	.byte	0x02					// MajorLinkerVersion
>>   	.byte	0x14					// MinorLinkerVersion
>>   	.long	__pecoff_text_end - efi_header_end	// SizeOfCode
>> -	.long	__pecoff_data_virt_size			// SizeOfInitializedData
>> +	.long	__pecoff_data_virt_end - __pecoff_text_end	// SizeOfInitializedData
>>   	.long	0					// SizeOfUninitializedData
>>   	.long	__efistub_efi_pe_entry - _start		// AddressOfEntryPoint
>>   	.long	efi_header_end - _start			// BaseOfCode
>> @@ -91,9 +91,9 @@ section_table:
>>   		IMAGE_SCN_MEM_EXECUTE			// Characteristics
>>   
>>   	.ascii	".data\0\0\0"
>> -	.long	__pecoff_data_virt_size			// VirtualSize
>> +	.long	__pecoff_data_virt_end - __pecoff_text_end	// VirtualSize
>>   	.long	__pecoff_text_end - _start		// VirtualAddress
>> -	.long	__pecoff_data_raw_size			// SizeOfRawData
>> +	.long	__pecoff_data_raw_end - __pecoff_text_end	// SizeOfRawData
>>   	.long	__pecoff_text_end - _start		// PointerToRawData
>>   
>>   	.long	0					// PointerToRelocations
>> diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
>> index 4e6c88aa4d87..8be2de3be08c 100644
>> --- a/arch/riscv/kernel/vmlinux.lds.S
>> +++ b/arch/riscv/kernel/vmlinux.lds.S
>> @@ -122,9 +122,15 @@ SECTIONS
>>   		*(.sdata*)
>>   	}
>>   
>> +	.rela.dyn : ALIGN(8) {
>> +		__rela_dyn_start = .;
>> +		*(.rela .rela*)
>> +		__rela_dyn_end = .;
>> +	}
>> +
>
>
> So I realized those relocations would be better in the init section so 
> we can get rid of them at some point. So I tried the following:
>
> diff --git a/arch/riscv/kernel/vmlinux.lds.S 
> b/arch/riscv/kernel/vmlinux.lds.S
> index 7ac215467fd5..6111023a89ef 100644
> --- a/arch/riscv/kernel/vmlinux.lds.S
> +++ b/arch/riscv/kernel/vmlinux.lds.S
> @@ -93,6 +93,12 @@ SECTIONS
>                  *(.rel.dyn*)
>          }
>
> +       .rela.dyn : ALIGN(8) {
> +               __rela_dyn_start = .;
> +               *(.rela .rela*)
> +               __rela_dyn_end = .;
> +       }
> +
>          __init_data_end = .;
>
>          . = ALIGN(8);
> @@ -119,12 +125,6 @@ SECTIONS
>                  *(.sdata*)
>          }
>
> -       .rela.dyn : ALIGN(8) {
> -               __rela_dyn_start = .;
> -               *(.rela .rela*)
> -               __rela_dyn_end = .;
> -       }
> -
>   #ifdef CONFIG_EFI
>          .pecoff_edata_padding : { BYTE(0); . = 
> ALIGN(PECOFF_FILE_ALIGNMENT); }
>          __pecoff_data_raw_end = ABSOLUTE(.);
>
>
> But then all the relocations in vmlinux end up being null:
>
> vmlinux:     file format elf64-littleriscv
>
> $ riscv64-linux-gnu-objdump -R vmlinux
>
> DYNAMIC RELOCATION RECORDS
> OFFSET           TYPE              VALUE
> 0000000000000000 R_RISCV_NONE      *ABS*
> 0000000000000000 R_RISCV_NONE      *ABS*
> ....
>
>   I also noticed that re-linking vmlinux with the same command right 
> after works (ie, the relocations are now valid):
>
> $ riscv64-linux-gnu-objdump -R vmlinux
>
> vmlinux:     file format elf64-littleriscv
>
> DYNAMIC RELOCATION RECORDS
> OFFSET           TYPE              VALUE
> ffffffff82600718 R_RISCV_RELATIVE  *ABS*-0x000000007d9ff8e8
> ffffffff82600720 R_RISCV_RELATIVE  *ABS*-0x000000007d9ff8e8
> ...
>
> Below is the command used to generate this working vmlinux:
>
> riscv64-unknown-linux-gnu-ld -melf64lriscv -z noexecstack 
> --no-warn-rwx-segments -shared -Bsymbolic -z notext -z norelro 
> --no-relax --build-id=sha1 --script=./arch/riscv/kernel/vmlinux.lds 
> -Map=vmlinux.map -o vmlinux --whole-archive vmlinux.a .vmlinux.export.o 
> init/version-timestamp.o --no-whole-archive --start-group 
> ./drivers/firmware/efi/libstub/lib.a --end-group .tmp_vmlinux.kallsyms3.o
>
> I tried a lot of things, but I struggle to understand, does anyone have 
> any idea? FYI, the same problem happens with LLVM.

Don't ask me *why*, but adding --emit-relocs to your linker flags solves
"the NULL .rela.dyn" both for GCC and LLVM.

The downside is that you end up with a bunch of .rela cruft in your
vmlinux.


Björn

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v8 1/3] riscv: Introduce CONFIG_RELOCATABLE
  2023-02-22 12:29   ` [PATCH v8 1/3] riscv: Introduce CONFIG_RELOCATABLE Alexandre Ghiti
  2023-02-24 15:58     ` Björn Töpel
@ 2023-03-22 13:38     ` Alexandre Ghiti
  1 sibling, 0 replies; 6+ messages in thread
From: Alexandre Ghiti @ 2023-03-22 13:38 UTC (permalink / raw)
  To: Alexandre Ghiti, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	linuxppc-dev, linux-kernel, linux-riscv, nathan, linux-kbuild,
	llvm, ndesaulniers, Björn Töpel

@linux-kbuild: Does anyone has an idea to solve this?

Thanks!

On 2/22/23 13:29, Alexandre Ghiti wrote:
> +cc linux-kbuild, llvm, Nathan, Nick
>
> On 2/15/23 15:36, Alexandre Ghiti wrote:
>> From: Alexandre Ghiti <alex@ghiti.fr>
>>
>> This config allows to compile 64b kernel as PIE and to relocate it at
>> any virtual address at runtime: this paves the way to KASLR.
>> Runtime relocation is possible since relocation metadata are embedded 
>> into
>> the kernel.
>>
>> Note that relocating at runtime introduces an overhead even if the
>> kernel is loaded at the same address it was linked at and that the 
>> compiler
>> options are those used in arm64 which uses the same RELA relocation
>> format.
>>
>> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
>> ---
>>   arch/riscv/Kconfig              | 14 +++++++++
>>   arch/riscv/Makefile             |  7 +++--
>>   arch/riscv/kernel/efi-header.S  |  6 ++--
>>   arch/riscv/kernel/vmlinux.lds.S | 10 ++++--
>>   arch/riscv/mm/Makefile          |  4 +++
>>   arch/riscv/mm/init.c            | 54 ++++++++++++++++++++++++++++++++-
>>   6 files changed, 87 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>> index e2b656043abf..e0ee7ce4b2e3 100644
>> --- a/arch/riscv/Kconfig
>> +++ b/arch/riscv/Kconfig
>> @@ -544,6 +544,20 @@ config COMPAT
>>           If you want to execute 32-bit userspace applications, say Y.
>>   +config RELOCATABLE
>> +    bool "Build a relocatable kernel"
>> +    depends on MMU && 64BIT && !XIP_KERNEL
>> +    help
>> +          This builds a kernel as a Position Independent Executable 
>> (PIE),
>> +          which retains all relocation metadata required to relocate 
>> the
>> +          kernel binary at runtime to a different virtual address 
>> than the
>> +          address it was linked at.
>> +          Since RISCV uses the RELA relocation format, this requires a
>> +          relocation pass at runtime even if the kernel is loaded at 
>> the
>> +          same address it was linked at.
>> +
>> +          If unsure, say N.
>> +
>>   endmenu # "Kernel features"
>>     menu "Boot options"
>> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
>> index 82153960ac00..97c34136b027 100644
>> --- a/arch/riscv/Makefile
>> +++ b/arch/riscv/Makefile
>> @@ -7,9 +7,12 @@
>>   #
>>     OBJCOPYFLAGS    := -O binary
>> -LDFLAGS_vmlinux :=
>> +ifeq ($(CONFIG_RELOCATABLE),y)
>> +    LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro
>> +    KBUILD_CFLAGS += -fPIE
>> +endif
>>   ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
>> -    LDFLAGS_vmlinux := --no-relax
>> +    LDFLAGS_vmlinux += --no-relax
>>       KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
>>       CC_FLAGS_FTRACE := -fpatchable-function-entry=8
>>   endif
>> diff --git a/arch/riscv/kernel/efi-header.S 
>> b/arch/riscv/kernel/efi-header.S
>> index 8e733aa48ba6..f7ee09c4f12d 100644
>> --- a/arch/riscv/kernel/efi-header.S
>> +++ b/arch/riscv/kernel/efi-header.S
>> @@ -33,7 +33,7 @@ optional_header:
>>       .byte    0x02                    // MajorLinkerVersion
>>       .byte    0x14                    // MinorLinkerVersion
>>       .long    __pecoff_text_end - efi_header_end    // SizeOfCode
>> -    .long    __pecoff_data_virt_size            // 
>> SizeOfInitializedData
>> +    .long    __pecoff_data_virt_end - __pecoff_text_end    // 
>> SizeOfInitializedData
>>       .long    0                    // SizeOfUninitializedData
>>       .long    __efistub_efi_pe_entry - _start        // 
>> AddressOfEntryPoint
>>       .long    efi_header_end - _start            // BaseOfCode
>> @@ -91,9 +91,9 @@ section_table:
>>           IMAGE_SCN_MEM_EXECUTE            // Characteristics
>>         .ascii    ".data\0\0\0"
>> -    .long    __pecoff_data_virt_size            // VirtualSize
>> +    .long    __pecoff_data_virt_end - __pecoff_text_end    // 
>> VirtualSize
>>       .long    __pecoff_text_end - _start        // VirtualAddress
>> -    .long    __pecoff_data_raw_size            // SizeOfRawData
>> +    .long    __pecoff_data_raw_end - __pecoff_text_end    // 
>> SizeOfRawData
>>       .long    __pecoff_text_end - _start        // PointerToRawData
>>         .long    0                    // PointerToRelocations
>> diff --git a/arch/riscv/kernel/vmlinux.lds.S 
>> b/arch/riscv/kernel/vmlinux.lds.S
>> index 4e6c88aa4d87..8be2de3be08c 100644
>> --- a/arch/riscv/kernel/vmlinux.lds.S
>> +++ b/arch/riscv/kernel/vmlinux.lds.S
>> @@ -122,9 +122,15 @@ SECTIONS
>>           *(.sdata*)
>>       }
>>   +    .rela.dyn : ALIGN(8) {
>> +        __rela_dyn_start = .;
>> +        *(.rela .rela*)
>> +        __rela_dyn_end = .;
>> +    }
>> +
>
>
> So I realized those relocations would be better in the init section so 
> we can get rid of them at some point. So I tried the following:
>
> diff --git a/arch/riscv/kernel/vmlinux.lds.S 
> b/arch/riscv/kernel/vmlinux.lds.S
> index 7ac215467fd5..6111023a89ef 100644
> --- a/arch/riscv/kernel/vmlinux.lds.S
> +++ b/arch/riscv/kernel/vmlinux.lds.S
> @@ -93,6 +93,12 @@ SECTIONS
>                 *(.rel.dyn*)
>         }
>
> +       .rela.dyn : ALIGN(8) {
> +               __rela_dyn_start = .;
> +               *(.rela .rela*)
> +               __rela_dyn_end = .;
> +       }
> +
>         __init_data_end = .;
>
>         . = ALIGN(8);
> @@ -119,12 +125,6 @@ SECTIONS
>                 *(.sdata*)
>         }
>
> -       .rela.dyn : ALIGN(8) {
> -               __rela_dyn_start = .;
> -               *(.rela .rela*)
> -               __rela_dyn_end = .;
> -       }
> -
>  #ifdef CONFIG_EFI
>         .pecoff_edata_padding : { BYTE(0); . = 
> ALIGN(PECOFF_FILE_ALIGNMENT); }
>         __pecoff_data_raw_end = ABSOLUTE(.);
>
>
> But then all the relocations in vmlinux end up being null:
>
> vmlinux:     file format elf64-littleriscv
>
> $ riscv64-linux-gnu-objdump -R vmlinux
>
> DYNAMIC RELOCATION RECORDS
> OFFSET           TYPE              VALUE
> 0000000000000000 R_RISCV_NONE      *ABS*
> 0000000000000000 R_RISCV_NONE      *ABS*
> ....
>
>  I also noticed that re-linking vmlinux with the same command right 
> after works (ie, the relocations are now valid):
>
> $ riscv64-linux-gnu-objdump -R vmlinux
>
> vmlinux:     file format elf64-littleriscv
>
> DYNAMIC RELOCATION RECORDS
> OFFSET           TYPE              VALUE
> ffffffff82600718 R_RISCV_RELATIVE  *ABS*-0x000000007d9ff8e8
> ffffffff82600720 R_RISCV_RELATIVE  *ABS*-0x000000007d9ff8e8
> ...
>
> Below is the command used to generate this working vmlinux:
>
> riscv64-unknown-linux-gnu-ld -melf64lriscv -z noexecstack 
> --no-warn-rwx-segments -shared -Bsymbolic -z notext -z norelro 
> --no-relax --build-id=sha1 --script=./arch/riscv/kernel/vmlinux.lds 
> -Map=vmlinux.map -o vmlinux --whole-archive vmlinux.a 
> .vmlinux.export.o init/version-timestamp.o --no-whole-archive 
> --start-group ./drivers/firmware/efi/libstub/lib.a --end-group 
> .tmp_vmlinux.kallsyms3.o
>
> I tried a lot of things, but I struggle to understand, does anyone 
> have any idea? FYI, the same problem happens with LLVM.
>
> Thanks,
>
> Alex
>
>
>>   #ifdef CONFIG_EFI
>>       .pecoff_edata_padding : { BYTE(0); . = 
>> ALIGN(PECOFF_FILE_ALIGNMENT); }
>> -    __pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);
>> +    __pecoff_data_raw_end = ABSOLUTE(.);
>>   #endif
>>         /* End of data section */
>> @@ -134,7 +140,7 @@ SECTIONS
>>     #ifdef CONFIG_EFI
>>       . = ALIGN(PECOFF_SECTION_ALIGNMENT);
>> -    __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
>> +    __pecoff_data_virt_end = ABSOLUTE(.);
>>   #endif
>>       _end = .;
>>   diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
>> index 2ac177c05352..b85e9e82f082 100644
>> --- a/arch/riscv/mm/Makefile
>> +++ b/arch/riscv/mm/Makefile
>> @@ -1,6 +1,10 @@
>>   # SPDX-License-Identifier: GPL-2.0-only
>>     CFLAGS_init.o := -mcmodel=medany
>> +ifdef CONFIG_RELOCATABLE
>> +CFLAGS_init.o += -fno-pie
>> +endif
>> +
>>   ifdef CONFIG_FTRACE
>>   CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
>>   CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
>> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
>> index 7f01c2e56efe..3862696c2ac9 100644
>> --- a/arch/riscv/mm/init.c
>> +++ b/arch/riscv/mm/init.c
>> @@ -20,6 +20,9 @@
>>   #include <linux/dma-map-ops.h>
>>   #include <linux/crash_dump.h>
>>   #include <linux/hugetlb.h>
>> +#ifdef CONFIG_RELOCATABLE
>> +#include <linux/elf.h>
>> +#endif
>>     #include <asm/fixmap.h>
>>   #include <asm/tlbflush.h>
>> @@ -146,7 +149,7 @@ static void __init print_vm_layout(void)
>>           print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
>>   #endif
>>   -        print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR,
>> +        print_ml("kernel", (unsigned long)kernel_map.virt_addr,
>>                (unsigned long)ADDRESS_SPACE_END);
>>       }
>>   }
>> @@ -854,6 +857,44 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>>   #error "setup_vm() is called from head.S before relocate so it 
>> should not use absolute addressing."
>>   #endif
>>   +#ifdef CONFIG_RELOCATABLE
>> +extern unsigned long __rela_dyn_start, __rela_dyn_end;
>> +
>> +static void __init relocate_kernel(void)
>> +{
>> +    Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
>> +    /*
>> +     * This holds the offset between the linked virtual address and the
>> +     * relocated virtual address.
>> +     */
>> +    uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
>> +    /*
>> +     * This holds the offset between kernel linked virtual address and
>> +     * physical address.
>> +     */
>> +    uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - 
>> kernel_map.phys_addr;
>> +
>> +    for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
>> +        Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
>> +        Elf64_Addr relocated_addr = rela->r_addend;
>> +
>> +        if (rela->r_info != R_RISCV_RELATIVE)
>> +            continue;
>> +
>> +        /*
>> +         * Make sure to not relocate vdso symbols like rt_sigreturn
>> +         * which are linked from the address 0 in vmlinux since
>> +         * vdso symbol addresses are actually used as an offset from
>> +         * mm->context.vdso in VDSO_OFFSET macro.
>> +         */
>> +        if (relocated_addr >= KERNEL_LINK_ADDR)
>> +            relocated_addr += reloc_offset;
>> +
>> +        *(Elf64_Addr *)addr = relocated_addr;
>> +    }
>> +}
>> +#endif /* CONFIG_RELOCATABLE */
>> +
>>   #ifdef CONFIG_XIP_KERNEL
>>   static void __init create_kernel_page_table(pgd_t *pgdir,
>>                           __always_unused bool early)
>> @@ -1039,6 +1080,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>>       BUG_ON((kernel_map.virt_addr + kernel_map.size) > 
>> ADDRESS_SPACE_END - SZ_4K);
>>   #endif
>>   +#ifdef CONFIG_RELOCATABLE
>> +    /*
>> +     * Early page table uses only one PUD, which makes it possible
>> +     * to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset
>> +     * makes the kernel cross over a PUD_SIZE boundary, raise a bug
>> +     * since a part of the kernel would not get mapped.
>> +     */
>> +    BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < 
>> kernel_map.size);
>> +    relocate_kernel();
>> +#endif
>> +
>>       apply_early_boot_alternatives();
>>       pt_ops_set_early();
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v8 1/3] riscv: Introduce CONFIG_RELOCATABLE
  2023-02-24 15:58     ` Björn Töpel
@ 2023-03-22 18:25       ` Nick Desaulniers
  2023-03-23 20:01         ` Fangrui Song
  2023-03-24 10:34         ` Alexandre Ghiti
  0 siblings, 2 replies; 6+ messages in thread
From: Nick Desaulniers @ 2023-03-22 18:25 UTC (permalink / raw)
  To: Björn Töpel
  Cc: Alexandre Ghiti, Alexandre Ghiti, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Paul Walmsley, Palmer Dabbelt,
	Albert Ou, linuxppc-dev, linux-kernel, linux-riscv, nathan,
	linux-kbuild, llvm

On Fri, Feb 24, 2023 at 7:58 AM Björn Töpel <bjorn@kernel.org> wrote:
>
> Alexandre Ghiti <alex@ghiti.fr> writes:
>
> > +cc linux-kbuild, llvm, Nathan, Nick
> >
> > On 2/15/23 15:36, Alexandre Ghiti wrote:
> >> From: Alexandre Ghiti <alex@ghiti.fr>
> >>
> > I tried a lot of things, but I struggle to understand, does anyone have
> > any idea? FYI, the same problem happens with LLVM.

Off the top of my head, no idea.

(Maybe as a follow up to this series, I wonder if pursuing
ARCH_HAS_RELR for ARCH=riscv is worthwhile?)

>
> Don't ask me *why*, but adding --emit-relocs to your linker flags solves
> "the NULL .rela.dyn" both for GCC and LLVM.
>
> The downside is that you end up with a bunch of .rela cruft in your
> vmlinux.

There was a patch just this week to use $(OBJCOPY) to strip these from
vmlinux (for x86). Looks like x86 uses --emit-relocs for KASLR:
https://lore.kernel.org/lkml/20230320121006.4863-1-petr.pavlu@suse.com/
-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v8 1/3] riscv: Introduce CONFIG_RELOCATABLE
  2023-03-22 18:25       ` Nick Desaulniers
@ 2023-03-23 20:01         ` Fangrui Song
  2023-03-24 10:34         ` Alexandre Ghiti
  1 sibling, 0 replies; 6+ messages in thread
From: Fangrui Song @ 2023-03-23 20:01 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Björn Töpel, Alexandre Ghiti, Alexandre Ghiti,
	Michael Ellerman, Nicholas Piggin, Christophe Leroy,
	Paul Walmsley, Palmer Dabbelt, Albert Ou, linuxppc-dev,
	linux-kernel, linux-riscv, nathan, linux-kbuild, llvm

On Wed, Mar 22, 2023 at 11:26 AM Nick Desaulniers
<ndesaulniers@google.com> wrote:
>
> On Fri, Feb 24, 2023 at 7:58 AM Björn Töpel <bjorn@kernel.org> wrote:
> >
> > Alexandre Ghiti <alex@ghiti.fr> writes:
> >
> > > +cc linux-kbuild, llvm, Nathan, Nick
> > >
> > > On 2/15/23 15:36, Alexandre Ghiti wrote:
> > >> From: Alexandre Ghiti <alex@ghiti.fr>
> > >>
> > > I tried a lot of things, but I struggle to understand, does anyone have
> > > any idea? FYI, the same problem happens with LLVM.
>
> Off the top of my head, no idea.
>
> (Maybe as a follow up to this series, I wonder if pursuing
> ARCH_HAS_RELR for ARCH=riscv is worthwhile?)

(I had thought about this for my own fun, but the currently only
implementation arch/arm64/kernel/head.S uses assembly.
Every port needs to write some assembly for the same task, which is a pity.
In FreeBSD rtld, glibc, and musl, DT_RELR code is target-independent.)


> >
> > Don't ask me *why*, but adding --emit-relocs to your linker flags solves
> > "the NULL .rela.dyn" both for GCC and LLVM.
> >
> > The downside is that you end up with a bunch of .rela cruft in your
> > vmlinux.
>
> There was a patch just this week to use $(OBJCOPY) to strip these from
> vmlinux (for x86). Looks like x86 uses --emit-relocs for KASLR:
> https://lore.kernel.org/lkml/20230320121006.4863-1-petr.pavlu@suse.com/
> --
> Thanks,
> ~Nick Desaulniers
>


-- 
宋方睿

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v8 1/3] riscv: Introduce CONFIG_RELOCATABLE
  2023-03-22 18:25       ` Nick Desaulniers
  2023-03-23 20:01         ` Fangrui Song
@ 2023-03-24 10:34         ` Alexandre Ghiti
  1 sibling, 0 replies; 6+ messages in thread
From: Alexandre Ghiti @ 2023-03-24 10:34 UTC (permalink / raw)
  To: Nick Desaulniers, Björn Töpel
  Cc: Alexandre Ghiti, Michael Ellerman, Nicholas Piggin,
	Christophe Leroy, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	linuxppc-dev, linux-kernel, linux-riscv, nathan, linux-kbuild,
	llvm

Hi Nick,

On 3/22/23 19:25, Nick Desaulniers wrote:
> On Fri, Feb 24, 2023 at 7:58 AM Björn Töpel <bjorn@kernel.org> wrote:
>> Alexandre Ghiti <alex@ghiti.fr> writes:
>>
>>> +cc linux-kbuild, llvm, Nathan, Nick
>>>
>>> On 2/15/23 15:36, Alexandre Ghiti wrote:
>>>> From: Alexandre Ghiti <alex@ghiti.fr>
>>>>
>>> I tried a lot of things, but I struggle to understand, does anyone have
>>> any idea? FYI, the same problem happens with LLVM.
> Off the top of my head, no idea.
>
> (Maybe as a follow up to this series, I wonder if pursuing
> ARCH_HAS_RELR for ARCH=riscv is worthwhile?)


IIUC, the goal for using RELR is to reduce the size of a kernel image: 
right now, this is not my priority, but I'll add that to my todo list 
because that may be useful to distros.


>
>> Don't ask me *why*, but adding --emit-relocs to your linker flags solves
>> "the NULL .rela.dyn" both for GCC and LLVM.
>>
>> The downside is that you end up with a bunch of .rela cruft in your
>> vmlinux.
> There was a patch just this week to use $(OBJCOPY) to strip these from
> vmlinux (for x86). Looks like x86 uses --emit-relocs for KASLR:
> https://lore.kernel.org/lkml/20230320121006.4863-1-petr.pavlu@suse.com/


That's nice, that would be an interesting intermediate step until we 
find the issue here as I believe it is important to have the relocations 
in the init section to save memory.

Thanks for your answer Nick, really appreciated,

Alex



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-03-24 10:34 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20230215143626.453491-1-alexghiti@rivosinc.com>
     [not found] ` <20230215143626.453491-2-alexghiti@rivosinc.com>
2023-02-22 12:29   ` [PATCH v8 1/3] riscv: Introduce CONFIG_RELOCATABLE Alexandre Ghiti
2023-02-24 15:58     ` Björn Töpel
2023-03-22 18:25       ` Nick Desaulniers
2023-03-23 20:01         ` Fangrui Song
2023-03-24 10:34         ` Alexandre Ghiti
2023-03-22 13:38     ` Alexandre Ghiti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).