All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] arm64: Add support for relocating the kernel with RELR relocations
@ 2019-07-05  8:02 Peter Collingbourne
  2019-07-08 18:02 ` Nick Desaulniers
                   ` (4 more replies)
  0 siblings, 5 replies; 17+ messages in thread
From: Peter Collingbourne @ 2019-07-05  8:02 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon, Mark Rutland, Ard Biesheuvel
  Cc: clang-built-linux, Peter Collingbourne, linux-arm-kernel

RELR is a relocation packing format for relative relocations.
The format is described in a generic-abi proposal:
https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion

The LLD linker can be instructed to pack relocations in the RELR
format by passing the flag --pack-dyn-relocs=relr.

This patch adds a new config option, CONFIG_RELR. Enabling this option
instructs the linker to pack vmlinux's relative relocations in the RELR
format, and causes the kernel to apply the relocations at startup along
with the RELA relocations. RELA relocations still need to be applied
because the linker will emit RELA relative relocations if they are
unrepresentable in the RELR format (i.e. address not a multiple of 2).

Enabling CONFIG_RELR reduces the size of a defconfig kernel image
with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
compressed (lz4).

Signed-off-by: Peter Collingbourne <pcc@google.com>
---
 arch/arm64/Kconfig              |  9 +++++
 arch/arm64/Makefile             |  4 ++
 arch/arm64/kernel/head.S        | 70 ++++++++++++++++++++++++++++-----
 arch/arm64/kernel/vmlinux.lds.S |  9 +++++
 4 files changed, 83 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 697ea05107298..f0cd0d2607e70 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1447,6 +1447,15 @@ config RELOCATABLE
 	  relocation pass at runtime even if the kernel is loaded at the
 	  same address it was linked at.
 
+config RELR
+	bool "Use RELR relocation packing"
+	depends on RELOCATABLE && $(ld-option,--pack-dyn-relocs=relr)
+	help
+	  Store the kernel's dynamic relocations in the RELR relocation packing
+	  format. Requires a compatible linker (currently only LLD supports
+	  this feature), as well as compatible NM and OBJCOPY utilities
+	  (llvm-nm and llvm-objcopy are compatible).
+
 config RANDOMIZE_BASE
 	bool "Randomize the address of the kernel image"
 	select ARM64_MODULE_PLTS if MODULES
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index e9d2e578cbe67..16a8636f815c9 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -22,6 +22,10 @@ LDFLAGS_vmlinux		+= -shared -Bsymbolic -z notext -z norelro \
 			$(call ld-option, --no-apply-dynamic-relocs)
 endif
 
+ifeq ($(CONFIG_RELR),y)
+  LDFLAGS_vmlinux += --pack-dyn-relocs=relr
+endif
+
 ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
   ifeq ($(call ld-option, --fix-cortex-a53-843419),)
 $(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2cdacd1c141b9..9b27d5e7d8f70 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -102,6 +102,7 @@ pe_header:
 	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
 	 *  x28        __create_page_tables()     callee preserved temp register
 	 *  x19/x20    __primary_switch()         callee preserved temp registers
+	 *  x24        __primary_switch()         current RELR displacement
 	 */
 ENTRY(stext)
 	bl	preserve_boot_args
@@ -824,24 +825,63 @@ __relocate_kernel:
 	 * Iterate over each entry in the relocation table, and apply the
 	 * relocations in place.
 	 */
-	ldr	w9, =__rela_offset		// offset to reloc table
-	ldr	w10, =__rela_size		// size of reloc table
-
 	mov_q	x11, KIMAGE_VADDR		// default virtual offset
 	add	x11, x11, x23			// actual virtual offset
+
+	ldr	w9, =__rela_offset		// offset to reloc table
+	ldr	w10, =__rela_size		// size of reloc table
 	add	x9, x9, x11			// __va(.rela)
 	add	x10, x9, x10			// __va(.rela) + sizeof(.rela)
 
 0:	cmp	x9, x10
 	b.hs	1f
-	ldp	x11, x12, [x9], #24
-	ldr	x13, [x9, #-8]
-	cmp	w12, #R_AARCH64_RELATIVE
+	ldp	x12, x13, [x9], #24
+	ldr	x14, [x9, #-8]
+	cmp	w13, #R_AARCH64_RELATIVE
 	b.ne	0b
-	add	x13, x13, x23			// relocate
-	str	x13, [x11, x23]
+	add	x14, x14, x23			// relocate
+	str	x14, [x12, x23]
 	b	0b
-1:	ret
+
+1:
+#ifdef CONFIG_RELR
+	ldr	w9, =__relr_offset		// offset to reloc table
+	ldr	w10, =__relr_size		// size of reloc table
+	add	x9, x9, x11			// __va(.relr)
+	add	x10, x9, x10			// __va(.relr) + sizeof(.relr)
+
+	sub	x15, x23, x24			// delta from previous offset
+	cbz	x15, 7f				// nothing to do if unchanged
+	mov	x24, x23			// save new offset
+
+2:	cmp	x9, x10
+	b.hs	7f
+	ldr	x11, [x9], #8
+	tbnz	x11, #0, 3f			// branch to handle bitmaps
+	add	x13, x11, x23
+	ldr	x12, [x13]			// relocate address entry
+	add	x12, x12, x15
+	str	x12, [x13], #8			// adjust to start of bitmap
+	b	2b
+
+3:	mov	x14, x13
+4:	lsr	x11, x11, #1
+	cbz	x11, 6f
+	tbz	x11, #0, 5f			// skip bit if not set
+	ldr	x12, [x14]			// relocate bit
+	add	x12, x12, x15
+	str	x12, [x14]
+
+5:	add	x14, x14, #8			// move to next bit's address
+	b	4b
+
+6:	add	x13, x13, #(8 * 63)		// move to next bitmap's address
+	b	2b
+
+7:
+#endif
+	ret
+
 ENDPROC(__relocate_kernel)
 #endif
 
@@ -854,6 +894,18 @@ __primary_switch:
 	adrp	x1, init_pg_dir
 	bl	__enable_mmu
 #ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_RELR
+	/*
+	 * RELR is similar to REL in that the addends are stored in place in the
+	 * binary. This means that RELR relocations cannot be applied
+	 * idempotently. We use x24 to keep track of the currently applied
+	 * displacement so that we can correctly relocate if __relocate_kernel
+	 * is called twice with non-zero displacements (i.e. if there is both a
+	 * physical misalignment and a KASLR displacement). We start off at 0
+	 * because no displacement has been applied yet.
+	 */
+	mov	x24, #0
+#endif
 	bl	__relocate_kernel
 #ifdef CONFIG_RANDOMIZE_BASE
 	ldr	x8, =__primary_switched
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 7fa0083749078..31716afa30f65 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -200,6 +200,15 @@ SECTIONS
 	__rela_offset	= ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
 	__rela_size	= SIZEOF(.rela.dyn);
 
+#ifdef CONFIG_RELR
+	.relr.dyn : ALIGN(8) {
+		*(.relr.dyn)
+	}
+
+	__relr_offset	= ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
+	__relr_size	= SIZEOF(.relr.dyn);
+#endif
+
 	. = ALIGN(SEGMENT_ALIGN);
 	__initdata_end = .;
 	__init_end = .;
-- 
2.22.0.410.gd8fdbe21b5-goog


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-05  8:02 [PATCH] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
@ 2019-07-08 18:02 ` Nick Desaulniers
  2019-07-09 22:04   ` Peter Collingbourne
  2019-07-10 16:21 ` Will Deacon
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Nick Desaulniers @ 2019-07-08 18:02 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Mark Rutland, Ard Biesheuvel, Masahiro Yamada, Catalin Marinas,
	clang-built-linux, Will Deacon, Linux ARM

On Fri, Jul 5, 2019 at 1:03 AM 'Peter Collingbourne' via Clang Built
Linux <clang-built-linux@googlegroups.com> wrote:
>
> RELR is a relocation packing format for relative relocations.
> The format is described in a generic-abi proposal:
> https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion

Cool, Rahul reports 9-19% savings for various userspace binaries.
Just curious, but a quick scan makes it seem like this could be
do-able for other arch's as well? (maybe a topic for a separate
thread)

>
> The LLD linker can be instructed to pack relocations in the RELR
> format by passing the flag --pack-dyn-relocs=relr.
>
> This patch adds a new config option, CONFIG_RELR. Enabling this option
> instructs the linker to pack vmlinux's relative relocations in the RELR
> format, and causes the kernel to apply the relocations at startup along
> with the RELA relocations. RELA relocations still need to be applied
> because the linker will emit RELA relative relocations if they are
> unrepresentable in the RELR format (i.e. address not a multiple of 2).
>
> Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> compressed (lz4).

Neat! Thanks for the patch; I'll try to get it tested this week to see
if I can reproduce the results and boot test on hardware (I think
llvm-nm has no known issues, I'll need to check llvm-objcopy).

>
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> ---
>  arch/arm64/Kconfig              |  9 +++++
>  arch/arm64/Makefile             |  4 ++
>  arch/arm64/kernel/head.S        | 70 ++++++++++++++++++++++++++++-----
>  arch/arm64/kernel/vmlinux.lds.S |  9 +++++
>  4 files changed, 83 insertions(+), 9 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 697ea05107298..f0cd0d2607e70 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1447,6 +1447,15 @@ config RELOCATABLE
>           relocation pass at runtime even if the kernel is loaded at the
>           same address it was linked at.
>
> +config RELR
> +       bool "Use RELR relocation packing"
> +       depends on RELOCATABLE && $(ld-option,--pack-dyn-relocs=relr)

Oh, ld-option in Kconfig? +Masahiro

> +       help
> +         Store the kernel's dynamic relocations in the RELR relocation packing
> +         format. Requires a compatible linker (currently only LLD supports
> +         this feature), as well as compatible NM and OBJCOPY utilities
> +         (llvm-nm and llvm-objcopy are compatible).

So sounds like `make LD=ld.lld NM=llvm-nm OBJCOPY=llvm-objcopy` will
be needed to test.  The ld-option check above doesn't seem strong
enough, but maybe it's not easy to feature test NM or OBJCOPY?

> +
>  config RANDOMIZE_BASE
>         bool "Randomize the address of the kernel image"
>         select ARM64_MODULE_PLTS if MODULES
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index e9d2e578cbe67..16a8636f815c9 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -22,6 +22,10 @@ LDFLAGS_vmlinux              += -shared -Bsymbolic -z notext -z norelro \
>                         $(call ld-option, --no-apply-dynamic-relocs)
>  endif
>
> +ifeq ($(CONFIG_RELR),y)
> +  LDFLAGS_vmlinux += --pack-dyn-relocs=relr
> +endif
> +
>  ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
>    ifeq ($(call ld-option, --fix-cortex-a53-843419),)
>  $(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 2cdacd1c141b9..9b27d5e7d8f70 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -102,6 +102,7 @@ pe_header:
>          *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
>          *  x28        __create_page_tables()     callee preserved temp register
>          *  x19/x20    __primary_switch()         callee preserved temp registers
> +        *  x24        __primary_switch()         current RELR displacement
>          */
>  ENTRY(stext)
>         bl      preserve_boot_args
> @@ -824,24 +825,63 @@ __relocate_kernel:
>          * Iterate over each entry in the relocation table, and apply the
>          * relocations in place.
>          */
> -       ldr     w9, =__rela_offset              // offset to reloc table
> -       ldr     w10, =__rela_size               // size of reloc table
> -
>         mov_q   x11, KIMAGE_VADDR               // default virtual offset
>         add     x11, x11, x23                   // actual virtual offset
> +
> +       ldr     w9, =__rela_offset              // offset to reloc table
> +       ldr     w10, =__rela_size               // size of reloc table

Was this reordering intentional?  I don't think w9 or w10 would change
across the mov_q and add above? Or is it just to match the loop update
below?

>         add     x9, x9, x11                     // __va(.rela)
>         add     x10, x9, x10                    // __va(.rela) + sizeof(.rela)
>
>  0:     cmp     x9, x10
>         b.hs    1f
> -       ldp     x11, x12, [x9], #24
> -       ldr     x13, [x9, #-8]
> -       cmp     w12, #R_AARCH64_RELATIVE
> +       ldp     x12, x13, [x9], #24
> +       ldr     x14, [x9, #-8]
> +       cmp     w13, #R_AARCH64_RELATIVE

Can you help me understand this renaming?
x11 -> x12
x13 -> x14
x12 -> x13
but they all get clobbered before use in your added ifdef hunk?

>         b.ne    0b
> -       add     x13, x13, x23                   // relocate
> -       str     x13, [x11, x23]
> +       add     x14, x14, x23                   // relocate
> +       str     x14, [x12, x23]
>         b       0b
> -1:     ret
> +
> +1:
> +#ifdef CONFIG_RELR
> +       ldr     w9, =__relr_offset              // offset to reloc table
> +       ldr     w10, =__relr_size               // size of reloc table

Were these modified since first loaded in the above hunk?  I see the
offsets applied below, but I don't spot any loops back up to `1:` (but
could be missing it).  It also doesn't look like x11 or x10 are
modified below (or above), so this looks like we're rematerializing
values that already exist in those registers, IIUC?  Maybe I'm missing
some side effect of one of the instructions?

> +       add     x9, x9, x11                     // __va(.relr)
> +       add     x10, x9, x10                    // __va(.relr) + sizeof(.relr)
> +
> +       sub     x15, x23, x24                   // delta from previous offset
> +       cbz     x15, 7f                         // nothing to do if unchanged
> +       mov     x24, x23                        // save new offset
> +
> +2:     cmp     x9, x10
> +       b.hs    7f
> +       ldr     x11, [x9], #8
> +       tbnz    x11, #0, 3f                     // branch to handle bitmaps
> +       add     x13, x11, x23
> +       ldr     x12, [x13]                      // relocate address entry
> +       add     x12, x12, x15
> +       str     x12, [x13], #8                  // adjust to start of bitmap
> +       b       2b
> +
> +3:     mov     x14, x13
> +4:     lsr     x11, x11, #1
> +       cbz     x11, 6f
> +       tbz     x11, #0, 5f                     // skip bit if not set
> +       ldr     x12, [x14]                      // relocate bit
> +       add     x12, x12, x15
> +       str     x12, [x14]
> +
> +5:     add     x14, x14, #8                    // move to next bit's address
> +       b       4b
> +
> +6:     add     x13, x13, #(8 * 63)             // move to next bitmap's address

Sorry, what's this constant `#(8 * 63)`?

> +       b       2b
> +
> +7:
> +#endif
> +       ret
> +
>  ENDPROC(__relocate_kernel)
>  #endif
>
> @@ -854,6 +894,18 @@ __primary_switch:
>         adrp    x1, init_pg_dir
>         bl      __enable_mmu
>  #ifdef CONFIG_RELOCATABLE
> +#ifdef CONFIG_RELR
> +       /*
> +        * RELR is similar to REL in that the addends are stored in place in the
> +        * binary. This means that RELR relocations cannot be applied
> +        * idempotently. We use x24 to keep track of the currently applied
> +        * displacement so that we can correctly relocate if __relocate_kernel
> +        * is called twice with non-zero displacements (i.e. if there is both a
> +        * physical misalignment and a KASLR displacement). We start off at 0

Sounds like I should test w/ and w/o CONFIG_RANDOMIZE_BASE enabled?

> +        * because no displacement has been applied yet.
> +        */
> +       mov     x24, #0
> +#endif
>         bl      __relocate_kernel
>  #ifdef CONFIG_RANDOMIZE_BASE
>         ldr     x8, =__primary_switched
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index 7fa0083749078..31716afa30f65 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -200,6 +200,15 @@ SECTIONS
>         __rela_offset   = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
>         __rela_size     = SIZEOF(.rela.dyn);
>
> +#ifdef CONFIG_RELR
> +       .relr.dyn : ALIGN(8) {
> +               *(.relr.dyn)
> +       }
> +
> +       __relr_offset   = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
> +       __relr_size     = SIZEOF(.relr.dyn);
> +#endif
> +
>         . = ALIGN(SEGMENT_ALIGN);
>         __initdata_end = .;
>         __init_end = .;
> --

-- 
Thanks,
~Nick Desaulniers

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-08 18:02 ` Nick Desaulniers
@ 2019-07-09 22:04   ` Peter Collingbourne
  2019-07-09 23:13     ` Nick Desaulniers
  0 siblings, 1 reply; 17+ messages in thread
From: Peter Collingbourne @ 2019-07-09 22:04 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Mark Rutland, Ard Biesheuvel, Masahiro Yamada, Catalin Marinas,
	clang-built-linux, Will Deacon, Linux ARM

On Mon, Jul 8, 2019 at 11:02 AM Nick Desaulniers
<ndesaulniers@google.com> wrote:
>
> On Fri, Jul 5, 2019 at 1:03 AM 'Peter Collingbourne' via Clang Built
> Linux <clang-built-linux@googlegroups.com> wrote:
> >
> > RELR is a relocation packing format for relative relocations.
> > The format is described in a generic-abi proposal:
> > https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion
>
> Cool, Rahul reports 9-19% savings for various userspace binaries.
> Just curious, but a quick scan makes it seem like this could be
> do-able for other arch's as well? (maybe a topic for a separate
> thread)

Yes, but it would likely involve reimplementing the self-relocation
code in assembly for each architecture so that we can guarantee that
the code does not itself need to be relocated. Probably worth
discussing on a separate thread.

> > The LLD linker can be instructed to pack relocations in the RELR
> > format by passing the flag --pack-dyn-relocs=relr.
> >
> > This patch adds a new config option, CONFIG_RELR. Enabling this option
> > instructs the linker to pack vmlinux's relative relocations in the RELR
> > format, and causes the kernel to apply the relocations at startup along
> > with the RELA relocations. RELA relocations still need to be applied
> > because the linker will emit RELA relative relocations if they are
> > unrepresentable in the RELR format (i.e. address not a multiple of 2).
> >
> > Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> > with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> > compressed (lz4).
>
> Neat! Thanks for the patch; I'll try to get it tested this week to see
> if I can reproduce the results and boot test on hardware (I think
> llvm-nm has no known issues, I'll need to check llvm-objcopy).

Thanks. I've already boot tested it using qemu and I was planning to
test on hikey960 (currently waiting on a part), but more testing would
be useful.

> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > ---
> >  arch/arm64/Kconfig              |  9 +++++
> >  arch/arm64/Makefile             |  4 ++
> >  arch/arm64/kernel/head.S        | 70 ++++++++++++++++++++++++++++-----
> >  arch/arm64/kernel/vmlinux.lds.S |  9 +++++
> >  4 files changed, 83 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 697ea05107298..f0cd0d2607e70 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -1447,6 +1447,15 @@ config RELOCATABLE
> >           relocation pass at runtime even if the kernel is loaded at the
> >           same address it was linked at.
> >
> > +config RELR
> > +       bool "Use RELR relocation packing"
> > +       depends on RELOCATABLE && $(ld-option,--pack-dyn-relocs=relr)
>
> Oh, ld-option in Kconfig? +Masahiro
>
> > +       help
> > +         Store the kernel's dynamic relocations in the RELR relocation packing
> > +         format. Requires a compatible linker (currently only LLD supports
> > +         this feature), as well as compatible NM and OBJCOPY utilities
> > +         (llvm-nm and llvm-objcopy are compatible).
>
> So sounds like `make LD=ld.lld NM=llvm-nm OBJCOPY=llvm-objcopy` will
> be needed to test.  The ld-option check above doesn't seem strong
> enough, but maybe it's not easy to feature test NM or OBJCOPY?

Right. Ideally we want to test the property that the tool accepts an
input file with a .relr.dyn section, and this isn't easy without
actually creating such a file. We could test that the tools are
actually the LLVM versions (e.g. by testing the output of $TOOL
--version), but I'm not sure if we want to exclude the possibility
that GNU or other toolchains will add support for this section in the
future.

> > +
> >  config RANDOMIZE_BASE
> >         bool "Randomize the address of the kernel image"
> >         select ARM64_MODULE_PLTS if MODULES
> > diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> > index e9d2e578cbe67..16a8636f815c9 100644
> > --- a/arch/arm64/Makefile
> > +++ b/arch/arm64/Makefile
> > @@ -22,6 +22,10 @@ LDFLAGS_vmlinux              += -shared -Bsymbolic -z notext -z norelro \
> >                         $(call ld-option, --no-apply-dynamic-relocs)
> >  endif
> >
> > +ifeq ($(CONFIG_RELR),y)
> > +  LDFLAGS_vmlinux += --pack-dyn-relocs=relr
> > +endif
> > +
> >  ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
> >    ifeq ($(call ld-option, --fix-cortex-a53-843419),)
> >  $(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > index 2cdacd1c141b9..9b27d5e7d8f70 100644
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -102,6 +102,7 @@ pe_header:
> >          *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
> >          *  x28        __create_page_tables()     callee preserved temp register
> >          *  x19/x20    __primary_switch()         callee preserved temp registers
> > +        *  x24        __primary_switch()         current RELR displacement
> >          */
> >  ENTRY(stext)
> >         bl      preserve_boot_args
> > @@ -824,24 +825,63 @@ __relocate_kernel:
> >          * Iterate over each entry in the relocation table, and apply the
> >          * relocations in place.
> >          */
> > -       ldr     w9, =__rela_offset              // offset to reloc table
> > -       ldr     w10, =__rela_size               // size of reloc table
> > -
> >         mov_q   x11, KIMAGE_VADDR               // default virtual offset
> >         add     x11, x11, x23                   // actual virtual offset
> > +
> > +       ldr     w9, =__rela_offset              // offset to reloc table
> > +       ldr     w10, =__rela_size               // size of reloc table
>
> Was this reordering intentional?  I don't think w9 or w10 would change
> across the mov_q and add above? Or is it just to match the loop update
> below?

Yes, it was intended to group all of the code that deals with RELA
tables together so that it resembles the RELR code.

> >         add     x9, x9, x11                     // __va(.rela)
> >         add     x10, x9, x10                    // __va(.rela) + sizeof(.rela)
> >
> >  0:     cmp     x9, x10
> >         b.hs    1f
> > -       ldp     x11, x12, [x9], #24
> > -       ldr     x13, [x9, #-8]
> > -       cmp     w12, #R_AARCH64_RELATIVE
> > +       ldp     x12, x13, [x9], #24
> > +       ldr     x14, [x9, #-8]
> > +       cmp     w13, #R_AARCH64_RELATIVE
>
> Can you help me understand this renaming?
> x11 -> x12
> x13 -> x14
> x12 -> x13
> but they all get clobbered before use in your added ifdef hunk?

I use the value of x11 before it is clobbered in the instruction with
the comment "// __va(.relr)".

> >         b.ne    0b
> > -       add     x13, x13, x23                   // relocate
> > -       str     x13, [x11, x23]
> > +       add     x14, x14, x23                   // relocate
> > +       str     x14, [x12, x23]
> >         b       0b
> > -1:     ret
> > +
> > +1:
> > +#ifdef CONFIG_RELR
> > +       ldr     w9, =__relr_offset              // offset to reloc table
> > +       ldr     w10, =__relr_size               // size of reloc table
>
> Were these modified since first loaded in the above hunk?  I see the
> offsets applied below, but I don't spot any loops back up to `1:` (but
> could be missing it).  It also doesn't look like x11 or x10 are
> modified below (or above), so this looks like we're rematerializing
> values that already exist in those registers, IIUC?  Maybe I'm missing
> some side effect of one of the instructions?

These instructions refer to __relr_{offset,size} which are different
from __rela_{offset,size} loaded above. They are only loaded once per
function call; the main loop is between labels 2 and 7 below.

You might have missed the implicit increment of x9 by 8 in the "ldr
x11, [x9], #8" instruction below (see also similar instructions
above), which is how we move to the next relocation table entry in the
main loop.

> > +       add     x9, x9, x11                     // __va(.relr)
> > +       add     x10, x9, x10                    // __va(.relr) + sizeof(.relr)
> > +
> > +       sub     x15, x23, x24                   // delta from previous offset
> > +       cbz     x15, 7f                         // nothing to do if unchanged
> > +       mov     x24, x23                        // save new offset
> > +
> > +2:     cmp     x9, x10
> > +       b.hs    7f
> > +       ldr     x11, [x9], #8
> > +       tbnz    x11, #0, 3f                     // branch to handle bitmaps
> > +       add     x13, x11, x23
> > +       ldr     x12, [x13]                      // relocate address entry
> > +       add     x12, x12, x15
> > +       str     x12, [x13], #8                  // adjust to start of bitmap
> > +       b       2b
> > +
> > +3:     mov     x14, x13
> > +4:     lsr     x11, x11, #1
> > +       cbz     x11, 6f
> > +       tbz     x11, #0, 5f                     // skip bit if not set
> > +       ldr     x12, [x14]                      // relocate bit
> > +       add     x12, x12, x15
> > +       str     x12, [x14]
> > +
> > +5:     add     x14, x14, #8                    // move to next bit's address
> > +       b       4b
> > +
> > +6:     add     x13, x13, #(8 * 63)             // move to next bitmap's address
>
> Sorry, what's this constant `#(8 * 63)`?

It is the number of bytes covered by a bitmap entry. 8 is the word
size, and 63 is the number of significant bits in a bitmap entry.

>
> > +       b       2b
> > +
> > +7:
> > +#endif
> > +       ret
> > +
> >  ENDPROC(__relocate_kernel)
> >  #endif
> >
> > @@ -854,6 +894,18 @@ __primary_switch:
> >         adrp    x1, init_pg_dir
> >         bl      __enable_mmu
> >  #ifdef CONFIG_RELOCATABLE
> > +#ifdef CONFIG_RELR
> > +       /*
> > +        * RELR is similar to REL in that the addends are stored in place in the
> > +        * binary. This means that RELR relocations cannot be applied
> > +        * idempotently. We use x24 to keep track of the currently applied
> > +        * displacement so that we can correctly relocate if __relocate_kernel
> > +        * is called twice with non-zero displacements (i.e. if there is both a
> > +        * physical misalignment and a KASLR displacement). We start off at 0
>
> Sounds like I should test w/ and w/o CONFIG_RANDOMIZE_BASE enabled?

Sure, makes sense.

Peter

> > +        * because no displacement has been applied yet.
> > +        */
> > +       mov     x24, #0
> > +#endif
> >         bl      __relocate_kernel
> >  #ifdef CONFIG_RANDOMIZE_BASE
> >         ldr     x8, =__primary_switched
> > diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> > index 7fa0083749078..31716afa30f65 100644
> > --- a/arch/arm64/kernel/vmlinux.lds.S
> > +++ b/arch/arm64/kernel/vmlinux.lds.S
> > @@ -200,6 +200,15 @@ SECTIONS
> >         __rela_offset   = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
> >         __rela_size     = SIZEOF(.rela.dyn);
> >
> > +#ifdef CONFIG_RELR
> > +       .relr.dyn : ALIGN(8) {
> > +               *(.relr.dyn)
> > +       }
> > +
> > +       __relr_offset   = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
> > +       __relr_size     = SIZEOF(.relr.dyn);
> > +#endif
> > +
> >         . = ALIGN(SEGMENT_ALIGN);
> >         __initdata_end = .;
> >         __init_end = .;
> > --
>
> --
> Thanks,
> ~Nick Desaulniers

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-09 22:04   ` Peter Collingbourne
@ 2019-07-09 23:13     ` Nick Desaulniers
  2019-07-12 19:40       ` Peter Collingbourne
  0 siblings, 1 reply; 17+ messages in thread
From: Nick Desaulniers @ 2019-07-09 23:13 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Mark Rutland, Ard Biesheuvel, Masahiro Yamada, Catalin Marinas,
	clang-built-linux, Will Deacon, Linux ARM

On Tue, Jul 9, 2019 at 3:04 PM Peter Collingbourne <pcc@google.com> wrote:
>
> On Mon, Jul 8, 2019 at 11:02 AM Nick Desaulniers
> <ndesaulniers@google.com> wrote:
> >
> > On Fri, Jul 5, 2019 at 1:03 AM 'Peter Collingbourne' via Clang Built
> > Linux <clang-built-linux@googlegroups.com> wrote:
> > > +config RELR
> > > +       bool "Use RELR relocation packing"
> > > +       depends on RELOCATABLE && $(ld-option,--pack-dyn-relocs=relr)
> >
> > Oh, ld-option in Kconfig? +Masahiro
> >
> > > +       help
> > > +         Store the kernel's dynamic relocations in the RELR relocation packing
> > > +         format. Requires a compatible linker (currently only LLD supports
> > > +         this feature), as well as compatible NM and OBJCOPY utilities
> > > +         (llvm-nm and llvm-objcopy are compatible).
> >
> > So sounds like `make LD=ld.lld NM=llvm-nm OBJCOPY=llvm-objcopy` will
> > be needed to test.  The ld-option check above doesn't seem strong
> > enough, but maybe it's not easy to feature test NM or OBJCOPY?
>
> Right. Ideally we want to test the property that the tool accepts an
> input file with a .relr.dyn section, and this isn't easy without
> actually creating such a file. We could test that the tools are
> actually the LLVM versions (e.g. by testing the output of $TOOL
> --version), but I'm not sure if we want to exclude the possibility
> that GNU or other toolchains will add support for this section in the
> future.

eh, I strongly dislike version checks due to their brittleness.
https://lkml.org/lkml/2019/6/25/1253
Maybe a script like `scripts/cc-can-link.sh` ?

> > >         add     x9, x9, x11                     // __va(.rela)
> > >         add     x10, x9, x10                    // __va(.rela) + sizeof(.rela)
> > >
> > >  0:     cmp     x9, x10
> > >         b.hs    1f
> > > -       ldp     x11, x12, [x9], #24
> > > -       ldr     x13, [x9, #-8]
> > > -       cmp     w12, #R_AARCH64_RELATIVE
> > > +       ldp     x12, x13, [x9], #24
> > > +       ldr     x14, [x9, #-8]
> > > +       cmp     w13, #R_AARCH64_RELATIVE
> >
> > Can you help me understand this renaming?
> > x11 -> x12
> > x13 -> x14
> > x12 -> x13
> > but they all get clobbered before use in your added ifdef hunk?
>
> I use the value of x11 before it is clobbered in the instruction with
> the comment "// __va(.relr)".
>
> > >         b.ne    0b
> > > -       add     x13, x13, x23                   // relocate
> > > -       str     x13, [x11, x23]
> > > +       add     x14, x14, x23                   // relocate
> > > +       str     x14, [x12, x23]
> > >         b       0b
> > > -1:     ret
> > > +
> > > +1:
> > > +#ifdef CONFIG_RELR
> > > +       ldr     w9, =__relr_offset              // offset to reloc table
> > > +       ldr     w10, =__relr_size               // size of reloc table
> >
> > Were these modified since first loaded in the above hunk?  I see the
> > offsets applied below, but I don't spot any loops back up to `1:` (but
> > could be missing it).  It also doesn't look like x11 or x10 are
> > modified below (or above), so this looks like we're rematerializing
> > values that already exist in those registers, IIUC?  Maybe I'm missing
> > some side effect of one of the instructions?
>
> These instructions refer to __relr_{offset,size} which are different
> from __rela_{offset,size} loaded above. They are only loaded once per
> function call; the main loop is between labels 2 and 7 below.

oh, sorry, yes I missed __relr_ vs __rela__.  Thanks for the clarification.

>
> You might have missed the implicit increment of x9 by 8 in the "ldr
> x11, [x9], #8" instruction below (see also similar instructions
> above), which is how we move to the next relocation table entry in the
> main loop.

Yep, I forgot about the pre vs post increment syntax:
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.kui0100a/armasm_cihgjhed.htm
https://azeria-labs.com/memory-instructions-load-and-store-part-4/

> > > +6:     add     x13, x13, #(8 * 63)             // move to next bitmap's address
> >
> > Sorry, what's this constant `#(8 * 63)`?
>
> It is the number of bytes covered by a bitmap entry. 8 is the word
> size, and 63 is the number of significant bits in a bitmap entry.

Might be good to add that as a comment inline w/ the code?

Pulling down the patch to test.
-- 
Thanks,
~Nick Desaulniers

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-05  8:02 [PATCH] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
  2019-07-08 18:02 ` Nick Desaulniers
@ 2019-07-10 16:21 ` Will Deacon
  2019-07-12 19:40   ` Peter Collingbourne
  2019-07-10 23:14 ` Nick Desaulniers
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Will Deacon @ 2019-07-10 16:21 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Mark Rutland, Catalin Marinas, clang-built-linux,
	linux-arm-kernel, Ard Biesheuvel

On Fri, Jul 05, 2019 at 01:02:31AM -0700, Peter Collingbourne wrote:
> RELR is a relocation packing format for relative relocations.
> The format is described in a generic-abi proposal:
> https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion
> 
> The LLD linker can be instructed to pack relocations in the RELR
> format by passing the flag --pack-dyn-relocs=relr.
> 
> This patch adds a new config option, CONFIG_RELR. Enabling this option
> instructs the linker to pack vmlinux's relative relocations in the RELR
> format, and causes the kernel to apply the relocations at startup along
> with the RELA relocations. RELA relocations still need to be applied
> because the linker will emit RELA relative relocations if they are
> unrepresentable in the RELR format (i.e. address not a multiple of 2).
> 
> Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> compressed (lz4).
> 
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> ---
>  arch/arm64/Kconfig              |  9 +++++
>  arch/arm64/Makefile             |  4 ++
>  arch/arm64/kernel/head.S        | 70 ++++++++++++++++++++++++++++-----
>  arch/arm64/kernel/vmlinux.lds.S |  9 +++++
>  4 files changed, 83 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 697ea05107298..f0cd0d2607e70 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1447,6 +1447,15 @@ config RELOCATABLE
>  	  relocation pass at runtime even if the kernel is loaded at the
>  	  same address it was linked at.
>  
> +config RELR
> +	bool "Use RELR relocation packing"
> +	depends on RELOCATABLE && $(ld-option,--pack-dyn-relocs=relr)

Do you know if this will also be supported by binutils and, if so, whether
they've agreed to use the same name for the option?

> +	help
> +	  Store the kernel's dynamic relocations in the RELR relocation packing
> +	  format. Requires a compatible linker (currently only LLD supports
> +	  this feature), as well as compatible NM and OBJCOPY utilities
> +	  (llvm-nm and llvm-objcopy are compatible).
> +
>  config RANDOMIZE_BASE
>  	bool "Randomize the address of the kernel image"
>  	select ARM64_MODULE_PLTS if MODULES
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index e9d2e578cbe67..16a8636f815c9 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -22,6 +22,10 @@ LDFLAGS_vmlinux		+= -shared -Bsymbolic -z notext -z norelro \
>  			$(call ld-option, --no-apply-dynamic-relocs)
>  endif
>  
> +ifeq ($(CONFIG_RELR),y)
> +  LDFLAGS_vmlinux += --pack-dyn-relocs=relr
> +endif
> +
>  ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
>    ifeq ($(call ld-option, --fix-cortex-a53-843419),)
>  $(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 2cdacd1c141b9..9b27d5e7d8f70 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -102,6 +102,7 @@ pe_header:
>  	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
>  	 *  x28        __create_page_tables()     callee preserved temp register
>  	 *  x19/x20    __primary_switch()         callee preserved temp registers
> +	 *  x24        __primary_switch()         current RELR displacement

I think the comment is a bit misleading here, since x24 is used by
__relocate_kernel(). Maybe make the middle column say:

	__primary_switch() .. __relocate_kernel()

it's still not ideal, since the latter can be invoked twice, but oh well.

>  	 */
>  ENTRY(stext)
>  	bl	preserve_boot_args
> @@ -824,24 +825,63 @@ __relocate_kernel:
>  	 * Iterate over each entry in the relocation table, and apply the
>  	 * relocations in place.
>  	 */
> -	ldr	w9, =__rela_offset		// offset to reloc table
> -	ldr	w10, =__rela_size		// size of reloc table
> -
>  	mov_q	x11, KIMAGE_VADDR		// default virtual offset
>  	add	x11, x11, x23			// actual virtual offset
> +
> +	ldr	w9, =__rela_offset		// offset to reloc table
> +	ldr	w10, =__rela_size		// size of reloc table

I agree with Nick that I'd prefer to leave these lines alone.

>  	add	x9, x9, x11			// __va(.rela)
>  	add	x10, x9, x10			// __va(.rela) + sizeof(.rela)
>  
>  0:	cmp	x9, x10
>  	b.hs	1f
> -	ldp	x11, x12, [x9], #24
> -	ldr	x13, [x9, #-8]
> -	cmp	w12, #R_AARCH64_RELATIVE
> +	ldp	x12, x13, [x9], #24
> +	ldr	x14, [x9, #-8]
> +	cmp	w13, #R_AARCH64_RELATIVE
>  	b.ne	0b
> -	add	x13, x13, x23			// relocate
> -	str	x13, [x11, x23]
> +	add	x14, x14, x23			// relocate
> +	str	x14, [x12, x23]
>  	b	0b
> -1:	ret

So the reason you're removing this ret is because we'll end up with both a
.relr section *and* .rela section, correct?

> +1:
> +#ifdef CONFIG_RELR
> +	ldr	w9, =__relr_offset		// offset to reloc table
> +	ldr	w10, =__relr_size		// size of reloc table
> +	add	x9, x9, x11			// __va(.relr)
> +	add	x10, x9, x10			// __va(.relr) + sizeof(.relr)
> +
> +	sub	x15, x23, x24			// delta from previous offset
> +	cbz	x15, 7f				// nothing to do if unchanged
> +	mov	x24, x23			// save new offset
> +
> +2:	cmp	x9, x10
> +	b.hs	7f
> +	ldr	x11, [x9], #8
> +	tbnz	x11, #0, 3f			// branch to handle bitmaps

Can we guarantee that x13 has been initialised at this point?

> +	add	x13, x11, x23
> +	ldr	x12, [x13]			// relocate address entry
> +	add	x12, x12, x15
> +	str	x12, [x13], #8			// adjust to start of bitmap
> +	b	2b
> +
> +3:	mov	x14, x13
> +4:	lsr	x11, x11, #1
> +	cbz	x11, 6f
> +	tbz	x11, #0, 5f			// skip bit if not set
> +	ldr	x12, [x14]			// relocate bit
> +	add	x12, x12, x15
> +	str	x12, [x14]
> +
> +5:	add	x14, x14, #8			// move to next bit's address
> +	b	4b
> +
> +6:	add	x13, x13, #(8 * 63)		// move to next bitmap's address
> +	b	2b

This desparately needs a block comment at the top (immediately after the
#ifdef CONFIG_RELR) describing the algorithm and the layout of the .relr
section, please.

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-05  8:02 [PATCH] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
  2019-07-08 18:02 ` Nick Desaulniers
  2019-07-10 16:21 ` Will Deacon
@ 2019-07-10 23:14 ` Nick Desaulniers
  2019-07-12 19:40   ` Peter Collingbourne
  2019-07-12 19:33 ` [PATCH v2] arm64: Add support for relocating the kernel with RELR Peter Collingbourne
  2019-07-12 19:38 ` [PATCH v2] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
  4 siblings, 1 reply; 17+ messages in thread
From: Nick Desaulniers @ 2019-07-10 23:14 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Mark Rutland, Ard Biesheuvel, Catalin Marinas, clang-built-linux,
	Jordan Rupprecht, Will Deacon, Linux ARM

On Fri, Jul 5, 2019 at 1:03 AM 'Peter Collingbourne' via Clang Built
Linux <clang-built-linux@googlegroups.com> wrote:
>
> RELR is a relocation packing format for relative relocations.
> The format is described in a generic-abi proposal:
> https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion
>
> The LLD linker can be instructed to pack relocations in the RELR
> format by passing the flag --pack-dyn-relocs=relr.
>
> This patch adds a new config option, CONFIG_RELR. Enabling this option
> instructs the linker to pack vmlinux's relative relocations in the RELR
> format, and causes the kernel to apply the relocations at startup along
> with the RELA relocations. RELA relocations still need to be applied
> because the linker will emit RELA relative relocations if they are
> unrepresentable in the RELR format (i.e. address not a multiple of 2).
>
> Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> compressed (lz4).
>
> Signed-off-by: Peter Collingbourne <pcc@google.com>

Tested with:
ToT llvm r365583
mainline linux commit 5ad18b2e60b7

pre-patch application:
$ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang LD=ld.lld
NM=llvm-nm OBJCOPY=llvm-objcopy -j71 defconfig
$ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang LD=ld.lld
NM=llvm-nm OBJCOPY=llvm-objcopy -j71
$ du -h vmlinux
227M vmlinux
$ du -h arch/arm64/boot/Image
25M arch/arm64/boot/Image
$ du -h arch/arm64/boot/Image.gz
9.0M arch/arm64/boot/Image.gz
$ find . -name \*.ko | xargs du -ch | tail -n 1
437M total
$ qemu-system-aarch64 -kernel arch/arm64/boot/Image.gz -machine virt
-cpu cortex-a57 -nographic --append "console=ttyAMA0 root=/dev/ram0"
-m 2048 -initrd /android1/buildroot/output/images/rootfs.cpio
...
# cat /proc/version
Linux version 5.2.0-00915-g5ad18b2e60b7
(ndesaulniers@ndesaulniers1.mtv.corp.google.com) (clang version 9.0.0
(https://github.com/llvm/llvm-project.git
b1843e130ad9c4269ece5d08718b33566a41d919)) #66 SMP PREEMPT Tue Jul 9
16:50:18 PDT 2019
(qemu) q

post-patch application:
$ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang LD=ld.lld
NM=llvm-nm OBJCOPY=llvm-objcopy -j71 clean
$ git am /tmp/relr.eml
Applying: arm64: Add support for relocating the kernel with RELR relocations
$ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang LD=ld.lld
NM=llvm-nm OBJCOPY=llvm-objcopy -j71
...
Use RELR relocation packing (RELR) [N/y/?] (NEW) y
...
$ du -h vmlinux
224M vmlinux
$ du -h arch/arm64/boot/Image
21M arch/arm64/boot/Image
$ du -h arch/arm64/boot/Image.gz
8.7M arch/arm64/boot/Image.gz
$ find . -name \*.ko | xargs du -ch | tail -n 1
424M total
$ qemu-system-aarch64 -kernel arch/arm64/boot/Image.gz -machine virt
-cpu cortex-a57 -nographic --append "console=ttyAMA0 root=/dev/ram0"
-m 2048 -initrd /android1/buildroot/output/images/rootfs.cpio
...
# cat /proc/version
Linux version 5.2.0-00916-g3798f5948201
(ndesaulniers@ndesaulniers1.mtv.corp.google.com) (clang version 9.0.0
(https://github.com/llvm/llvm-project.git
b1843e130ad9c4269ece5d08718b33566a41d919)) #67 SMP PREEMPT Wed Jul 10
09:37:10 PDT 2019
(qemu) q

So for an arm64 defconfig, looks like roughly 1.32% savings on vmlinux
size, 16% savings on Image size, 3.33% savings on compressed image
size (when gzipping), and 2.97% savings in loadable kernel module
size. (this is off the rounded value from `du -h` which isn't very
precise).

Moving on to an actual device kernel (we plan on shipping a device
with an LLD linked kernel soon):

pre-patch application:
$ du -h vmlinux
361M vmlinux
$ du -h arch/arm64/boot/Image
44M arch/arm64/boot/Image
$ du -h arch/arm64/boot/Image.lz4
20M arch/arm64/boot/Image.lz4
$ find . -name \*.ko | xargs du -ch | tail -n 1
37M total

post patch application:
$ du -h vmlinux
359M vmlinux
$ du -h arch/arm64/boot/Image
42M arch/arm64/boot/Image
$ du -h arch/arm64/boot/Image.lz4
19M arch/arm64/boot/Image.lz4
$ find . -name \*.ko | xargs du -ch | tail -n 1
37M total

0.55% savings on vmlinux, 4.5% savings on Image, 5% savings on
compressed Image w/ LZ4 compression, no or very small savings on
kernel modules.  For kernel modules, this could be because of my
rounding with `du -h` or a bug in our kernel version scripts not
applying these relocations to kernel modules.

What should I grep for the relocation type in an ELF object to verify
that RELR relocations exist?
$ llvm-objdump -r *.ko| grep RELR
?
$ llvm-readelf -S vmlinux } grep relr
<doesn't show anything>
I'd have assumed by your patches change to the linker script a section
called .relr.dyn to be listed.
Are the tools not interpreting the section name and need to be fixed,
or is your change to the linker script not necessary?
Maybe both of those tools need to be updated to know about such a new
relocation type?
+ Jordan for heads up.

Either way, I've confirmed the claimed 16% reduction in image size for
the defconfig, and 5% savings in compressed images size w/ LZ4.

Finally, I confirmed that this patch boots on a device.  I had to
backport https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git/commit/?id=e9e08a07385e08f1a7f85c5d1e345c21c9564963
for llvm-objcopy support.

Then I ran 30 boot time measurements with 60s wait intervals in
between test runs:
prepatch:
mean: 3049ms
median: 3281ms
stddev: 330ms

postpatch:
mean: 3091ms
median: 3260ms
stddev: 290ms

While decompressing the kernel image quickly depends on its size, I
assume processing these types of relocations and the volume of them
takes longer?

Finally, I boot tested mainline with CONFIG_RANDOMIZE_BASE disabled
and this patch applied (it worked).

Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
-- 
Thanks,
~Nick Desaulniers

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2] arm64: Add support for relocating the kernel with RELR
  2019-07-05  8:02 [PATCH] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
                   ` (2 preceding siblings ...)
  2019-07-10 23:14 ` Nick Desaulniers
@ 2019-07-12 19:33 ` Peter Collingbourne
  2019-07-12 19:38 ` [PATCH v2] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
  4 siblings, 0 replies; 17+ messages in thread
From: Peter Collingbourne @ 2019-07-12 19:33 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon, Mark Rutland, Ard Biesheuvel,
	Masahiro Yamada
  Cc: clang-built-linux, linux-arm-kernel

Changes in v2:
- Reverted change to RELA processing
- Added more comments, as requested by Nick and Will
- Added a feature test for NM and OBJCOPY
- Made CONFIG_RELR=y the default if the tools support it


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-05  8:02 [PATCH] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
                   ` (3 preceding siblings ...)
  2019-07-12 19:33 ` [PATCH v2] arm64: Add support for relocating the kernel with RELR Peter Collingbourne
@ 2019-07-12 19:38 ` Peter Collingbourne
  2019-07-29 20:00   ` Peter Collingbourne
                     ` (2 more replies)
  4 siblings, 3 replies; 17+ messages in thread
From: Peter Collingbourne @ 2019-07-12 19:38 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon, Mark Rutland, Ard Biesheuvel,
	Masahiro Yamada
  Cc: clang-built-linux, Nick Desaulniers, Peter Collingbourne,
	linux-arm-kernel

RELR is a relocation packing format for relative relocations.
The format is described in a generic-abi proposal:
https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion

The LLD linker can be instructed to pack relocations in the RELR
format by passing the flag --pack-dyn-relocs=relr.

This patch adds a new config option, CONFIG_RELR. Enabling this option
instructs the linker to pack vmlinux's relative relocations in the RELR
format, and causes the kernel to apply the relocations at startup along
with the RELA relocations. RELA relocations still need to be applied
because the linker will emit RELA relative relocations if they are
unrepresentable in the RELR format (i.e. address not a multiple of 2).

Enabling CONFIG_RELR reduces the size of a defconfig kernel image
with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
compressed (lz4).

Signed-off-by: Peter Collingbourne <pcc@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
---
Changes in v2:
- Reverted change to RELA processing
- Added more comments, as requested by Nick and Will
- Added a feature test for NM and OBJCOPY
- Made CONFIG_RELR=y the default if the tools support it

 arch/arm64/Kconfig              | 10 ++++
 arch/arm64/Makefile             |  4 ++
 arch/arm64/kernel/head.S        | 96 ++++++++++++++++++++++++++++++---
 arch/arm64/kernel/vmlinux.lds.S |  9 ++++
 init/Kconfig                    |  3 ++
 scripts/tools-support-relr.sh   | 16 ++++++
 6 files changed, 132 insertions(+), 6 deletions(-)
 create mode 100755 scripts/tools-support-relr.sh

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7442edbcabfc3..cf3907d21d097 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1476,6 +1476,16 @@ config RELOCATABLE
 	  relocation pass at runtime even if the kernel is loaded at the
 	  same address it was linked at.
 
+config RELR
+	bool "Use RELR relocation packing"
+	depends on RELOCATABLE && TOOLS_SUPPORT_RELR
+	default y
+	help
+	  Store the kernel's dynamic relocations in the RELR relocation packing
+	  format. Requires a compatible linker (currently only LLD supports
+	  this feature), as well as compatible NM and OBJCOPY utilities
+	  (llvm-nm and llvm-objcopy are compatible).
+
 config RANDOMIZE_BASE
 	bool "Randomize the address of the kernel image"
 	select ARM64_MODULE_PLTS if MODULES
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index bb1f1dbb34e8f..11f84450c7784 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -22,6 +22,10 @@ LDFLAGS_vmlinux		+= -shared -Bsymbolic -z notext -z norelro \
 			$(call ld-option, --no-apply-dynamic-relocs)
 endif
 
+ifeq ($(CONFIG_RELR),y)
+  LDFLAGS_vmlinux += --pack-dyn-relocs=relr
+endif
+
 ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
   ifeq ($(call ld-option, --fix-cortex-a53-843419),)
 $(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2cdacd1c141b9..cc23302e9d95e 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -102,6 +102,8 @@ pe_header:
 	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
 	 *  x28        __create_page_tables()     callee preserved temp register
 	 *  x19/x20    __primary_switch()         callee preserved temp registers
+	 *  x24        __primary_switch() .. relocate_kernel()
+	 *                                        current RELR displacement
 	 */
 ENTRY(stext)
 	bl	preserve_boot_args
@@ -834,14 +836,93 @@ __relocate_kernel:
 
 0:	cmp	x9, x10
 	b.hs	1f
-	ldp	x11, x12, [x9], #24
-	ldr	x13, [x9, #-8]
-	cmp	w12, #R_AARCH64_RELATIVE
+	ldp	x12, x13, [x9], #24
+	ldr	x14, [x9, #-8]
+	cmp	w13, #R_AARCH64_RELATIVE
 	b.ne	0b
-	add	x13, x13, x23			// relocate
-	str	x13, [x11, x23]
+	add	x14, x14, x23			// relocate
+	str	x14, [x12, x23]
 	b	0b
-1:	ret
+
+1:
+#ifdef CONFIG_RELR
+	/*
+	 * Apply RELR relocations.
+	 *
+	 * RELR is a compressed format for storing relative relocations. The
+	 * encoded sequence of entries looks like:
+	 * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
+	 *
+	 * i.e. start with an address, followed by any number of bitmaps. The
+	 * address entry encodes 1 relocation. The subsequent bitmap entries
+	 * encode up to 63 relocations each, at subsequent offsets following
+	 * the last address entry.
+	 *
+	 * The bitmap entries must have 1 in the least significant bit. The
+	 * assumption here is that an address cannot have 1 in lsb. Odd
+	 * addresses are not supported. Any odd addresses are stored in the RELA
+	 * section, which is handled above.
+	 *
+	 * Excluding the least significant bit in the bitmap, each non-zero
+	 * bit in the bitmap represents a relocation to be applied to
+	 * a corresponding machine word that follows the base address
+	 * word. The second least significant bit represents the machine
+	 * word immediately following the initial address, and each bit
+	 * that follows represents the next word, in linear order. As such,
+	 * a single bitmap can encode up to 63 relocations in a 64-bit object.
+	 *
+	 * In this implementation we store the address of the next RELR table
+	 * entry in x9, the address being relocated by the current address or
+	 * bitmap entry in x13 and the address being relocated by the current
+	 * bit in x14.
+	 *
+	 * Because addends are stored in place in the binary, RELR relocations
+	 * cannot be applied idempotently. We use x24 to keep track of the
+	 * currently applied displacement so that we can correctly relocate if
+	 * __relocate_kernel is called twice with non-zero displacements (i.e.
+	 * if there is both a physical misalignment and a KASLR displacement).
+	 */
+	ldr	w9, =__relr_offset		// offset to reloc table
+	ldr	w10, =__relr_size		// size of reloc table
+	add	x9, x9, x11			// __va(.relr)
+	add	x10, x9, x10			// __va(.relr) + sizeof(.relr)
+
+	sub	x15, x23, x24			// delta from previous offset
+	cbz	x15, 7f				// nothing to do if unchanged
+	mov	x24, x23			// save new offset
+
+2:	cmp	x9, x10
+	b.hs	7f
+	ldr	x11, [x9], #8
+	tbnz	x11, #0, 3f			// branch to handle bitmaps
+	add	x13, x11, x23
+	ldr	x12, [x13]			// relocate address entry
+	add	x12, x12, x15
+	str	x12, [x13], #8			// adjust to start of bitmap
+	b	2b
+
+3:	mov	x14, x13
+4:	lsr	x11, x11, #1
+	cbz	x11, 6f
+	tbz	x11, #0, 5f			// skip bit if not set
+	ldr	x12, [x14]			// relocate bit
+	add	x12, x12, x15
+	str	x12, [x14]
+
+5:	add	x14, x14, #8			// move to next bit's address
+	b	4b
+
+6:	/*
+	 * Move to the next bitmap's address. 8 is the word size, and 63 is the
+	 * number of significant bits in a bitmap entry.
+	 */
+	add	x13, x13, #(8 * 63)
+	b	2b
+
+7:
+#endif
+	ret
+
 ENDPROC(__relocate_kernel)
 #endif
 
@@ -854,6 +935,9 @@ __primary_switch:
 	adrp	x1, init_pg_dir
 	bl	__enable_mmu
 #ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_RELR
+	mov	x24, #0				// no RELR displacement yet
+#endif
 	bl	__relocate_kernel
 #ifdef CONFIG_RANDOMIZE_BASE
 	ldr	x8, =__primary_switched
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 7fa0083749078..31716afa30f65 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -200,6 +200,15 @@ SECTIONS
 	__rela_offset	= ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
 	__rela_size	= SIZEOF(.rela.dyn);
 
+#ifdef CONFIG_RELR
+	.relr.dyn : ALIGN(8) {
+		*(.relr.dyn)
+	}
+
+	__relr_offset	= ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
+	__relr_size	= SIZEOF(.relr.dyn);
+#endif
+
 	. = ALIGN(SEGMENT_ALIGN);
 	__initdata_end = .;
 	__init_end = .;
diff --git a/init/Kconfig b/init/Kconfig
index 99da78db04405..a148c795fde39 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -30,6 +30,9 @@ config CC_CAN_LINK
 config CC_HAS_ASM_GOTO
 	def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC))
 
+config TOOLS_SUPPORT_RELR
+	def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
+
 config CC_HAS_WARN_MAYBE_UNINITIALIZED
 	def_bool $(cc-option,-Wmaybe-uninitialized)
 	help
diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh
new file mode 100755
index 0000000000000..97a2c844a95e4
--- /dev/null
+++ b/scripts/tools-support-relr.sh
@@ -0,0 +1,16 @@
+#!/bin/sh -eu
+# SPDX-License-Identifier: GPL-2.0
+
+tmp_file=$(mktemp)
+trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT
+
+cat << "END" | "$CC" -c -x c - -o $tmp_file.o >/dev/null 2>&1
+void *p = &p;
+END
+"$LD" $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file
+
+# Despite printing an error message, GNU nm still exits with exit code 0 if it
+# sees a relr section. So we need to check that nothing is printed to stderr.
+test -z "$("$NM" $tmp_file 2>&1 >/dev/null)"
+
+"$OBJCOPY" -O binary $tmp_file $tmp_file.bin
-- 
2.22.0.510.g264f2c817a-goog


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-09 23:13     ` Nick Desaulniers
@ 2019-07-12 19:40       ` Peter Collingbourne
  0 siblings, 0 replies; 17+ messages in thread
From: Peter Collingbourne @ 2019-07-12 19:40 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Mark Rutland, Ard Biesheuvel, Masahiro Yamada, Catalin Marinas,
	clang-built-linux, Will Deacon, Linux ARM

On Tue, Jul 9, 2019 at 4:13 PM Nick Desaulniers <ndesaulniers@google.com> wrote:
>
> On Tue, Jul 9, 2019 at 3:04 PM Peter Collingbourne <pcc@google.com> wrote:
> >
> > On Mon, Jul 8, 2019 at 11:02 AM Nick Desaulniers
> > <ndesaulniers@google.com> wrote:
> > >
> > > On Fri, Jul 5, 2019 at 1:03 AM 'Peter Collingbourne' via Clang Built
> > > Linux <clang-built-linux@googlegroups.com> wrote:
> > > > +config RELR
> > > > +       bool "Use RELR relocation packing"
> > > > +       depends on RELOCATABLE && $(ld-option,--pack-dyn-relocs=relr)
> > >
> > > Oh, ld-option in Kconfig? +Masahiro
> > >
> > > > +       help
> > > > +         Store the kernel's dynamic relocations in the RELR relocation packing
> > > > +         format. Requires a compatible linker (currently only LLD supports
> > > > +         this feature), as well as compatible NM and OBJCOPY utilities
> > > > +         (llvm-nm and llvm-objcopy are compatible).
> > >
> > > So sounds like `make LD=ld.lld NM=llvm-nm OBJCOPY=llvm-objcopy` will
> > > be needed to test.  The ld-option check above doesn't seem strong
> > > enough, but maybe it's not easy to feature test NM or OBJCOPY?
> >
> > Right. Ideally we want to test the property that the tool accepts an
> > input file with a .relr.dyn section, and this isn't easy without
> > actually creating such a file. We could test that the tools are
> > actually the LLVM versions (e.g. by testing the output of $TOOL
> > --version), but I'm not sure if we want to exclude the possibility
> > that GNU or other toolchains will add support for this section in the
> > future.
>
> eh, I strongly dislike version checks due to their brittleness.
> https://lkml.org/lkml/2019/6/25/1253
> Maybe a script like `scripts/cc-can-link.sh` ?

Okay, in v2 I've added a feature test for LD, NM and OBJCOPY.

Now that we can accurately test whether all of the provided tools
support RELR, I propose that we make it the default if the tools
support it, which is what I've done in v2.

> > > >         add     x9, x9, x11                     // __va(.rela)
> > > >         add     x10, x9, x10                    // __va(.rela) + sizeof(.rela)
> > > >
> > > >  0:     cmp     x9, x10
> > > >         b.hs    1f
> > > > -       ldp     x11, x12, [x9], #24
> > > > -       ldr     x13, [x9, #-8]
> > > > -       cmp     w12, #R_AARCH64_RELATIVE
> > > > +       ldp     x12, x13, [x9], #24
> > > > +       ldr     x14, [x9, #-8]
> > > > +       cmp     w13, #R_AARCH64_RELATIVE
> > >
> > > Can you help me understand this renaming?
> > > x11 -> x12
> > > x13 -> x14
> > > x12 -> x13
> > > but they all get clobbered before use in your added ifdef hunk?
> >
> > I use the value of x11 before it is clobbered in the instruction with
> > the comment "// __va(.relr)".
> >
> > > >         b.ne    0b
> > > > -       add     x13, x13, x23                   // relocate
> > > > -       str     x13, [x11, x23]
> > > > +       add     x14, x14, x23                   // relocate
> > > > +       str     x14, [x12, x23]
> > > >         b       0b
> > > > -1:     ret
> > > > +
> > > > +1:
> > > > +#ifdef CONFIG_RELR
> > > > +       ldr     w9, =__relr_offset              // offset to reloc table
> > > > +       ldr     w10, =__relr_size               // size of reloc table
> > >
> > > Were these modified since first loaded in the above hunk?  I see the
> > > offsets applied below, but I don't spot any loops back up to `1:` (but
> > > could be missing it).  It also doesn't look like x11 or x10 are
> > > modified below (or above), so this looks like we're rematerializing
> > > values that already exist in those registers, IIUC?  Maybe I'm missing
> > > some side effect of one of the instructions?
> >
> > These instructions refer to __relr_{offset,size} which are different
> > from __rela_{offset,size} loaded above. They are only loaded once per
> > function call; the main loop is between labels 2 and 7 below.
>
> oh, sorry, yes I missed __relr_ vs __rela__.  Thanks for the clarification.
>
> >
> > You might have missed the implicit increment of x9 by 8 in the "ldr
> > x11, [x9], #8" instruction below (see also similar instructions
> > above), which is how we move to the next relocation table entry in the
> > main loop.
>
> Yep, I forgot about the pre vs post increment syntax:
> http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.kui0100a/armasm_cihgjhed.htm
> https://azeria-labs.com/memory-instructions-load-and-store-part-4/
>
> > > > +6:     add     x13, x13, #(8 * 63)             // move to next bitmap's address
> > >
> > > Sorry, what's this constant `#(8 * 63)`?
> >
> > It is the number of bytes covered by a bitmap entry. 8 is the word
> > size, and 63 is the number of significant bits in a bitmap entry.
>
> Might be good to add that as a comment inline w/ the code?

Done in v2.



Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-10 16:21 ` Will Deacon
@ 2019-07-12 19:40   ` Peter Collingbourne
  0 siblings, 0 replies; 17+ messages in thread
From: Peter Collingbourne @ 2019-07-12 19:40 UTC (permalink / raw)
  To: Will Deacon
  Cc: Mark Rutland, Catalin Marinas, clang-built-linux, Linux ARM,
	Ard Biesheuvel

On Wed, Jul 10, 2019 at 9:21 AM Will Deacon <will@kernel.org> wrote:
>
> On Fri, Jul 05, 2019 at 01:02:31AM -0700, Peter Collingbourne wrote:
> > RELR is a relocation packing format for relative relocations.
> > The format is described in a generic-abi proposal:
> > https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion
> >
> > The LLD linker can be instructed to pack relocations in the RELR
> > format by passing the flag --pack-dyn-relocs=relr.
> >
> > This patch adds a new config option, CONFIG_RELR. Enabling this option
> > instructs the linker to pack vmlinux's relative relocations in the RELR
> > format, and causes the kernel to apply the relocations at startup along
> > with the RELA relocations. RELA relocations still need to be applied
> > because the linker will emit RELA relative relocations if they are
> > unrepresentable in the RELR format (i.e. address not a multiple of 2).
> >
> > Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> > with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> > compressed (lz4).
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > ---
> >  arch/arm64/Kconfig              |  9 +++++
> >  arch/arm64/Makefile             |  4 ++
> >  arch/arm64/kernel/head.S        | 70 ++++++++++++++++++++++++++++-----
> >  arch/arm64/kernel/vmlinux.lds.S |  9 +++++
> >  4 files changed, 83 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 697ea05107298..f0cd0d2607e70 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -1447,6 +1447,15 @@ config RELOCATABLE
> >         relocation pass at runtime even if the kernel is loaded at the
> >         same address it was linked at.
> >
> > +config RELR
> > +     bool "Use RELR relocation packing"
> > +     depends on RELOCATABLE && $(ld-option,--pack-dyn-relocs=relr)
>
> Do you know if this will also be supported by binutils and, if so, whether
> they've agreed to use the same name for the option?

A number of binutils developers (Cary Coutant, Alan Modra) expressed
support for the format on the generic-abi thread, but I don't know
what the plans of the binutils developers are in terms of
implementation.

> > +     help
> > +       Store the kernel's dynamic relocations in the RELR relocation packing
> > +       format. Requires a compatible linker (currently only LLD supports
> > +       this feature), as well as compatible NM and OBJCOPY utilities
> > +       (llvm-nm and llvm-objcopy are compatible).
> > +
> >  config RANDOMIZE_BASE
> >       bool "Randomize the address of the kernel image"
> >       select ARM64_MODULE_PLTS if MODULES
> > diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> > index e9d2e578cbe67..16a8636f815c9 100644
> > --- a/arch/arm64/Makefile
> > +++ b/arch/arm64/Makefile
> > @@ -22,6 +22,10 @@ LDFLAGS_vmlinux            += -shared -Bsymbolic -z notext -z norelro \
> >                       $(call ld-option, --no-apply-dynamic-relocs)
> >  endif
> >
> > +ifeq ($(CONFIG_RELR),y)
> > +  LDFLAGS_vmlinux += --pack-dyn-relocs=relr
> > +endif
> > +
> >  ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
> >    ifeq ($(call ld-option, --fix-cortex-a53-843419),)
> >  $(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > index 2cdacd1c141b9..9b27d5e7d8f70 100644
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -102,6 +102,7 @@ pe_header:
> >        *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
> >        *  x28        __create_page_tables()     callee preserved temp register
> >        *  x19/x20    __primary_switch()         callee preserved temp registers
> > +      *  x24        __primary_switch()         current RELR displacement
>
> I think the comment is a bit misleading here, since x24 is used by
> __relocate_kernel(). Maybe make the middle column say:
>
>         __primary_switch() .. __relocate_kernel()
>
> it's still not ideal, since the latter can be invoked twice, but oh well.

Done in v2.

> >        */
> >  ENTRY(stext)
> >       bl      preserve_boot_args
> > @@ -824,24 +825,63 @@ __relocate_kernel:
> >        * Iterate over each entry in the relocation table, and apply the
> >        * relocations in place.
> >        */
> > -     ldr     w9, =__rela_offset              // offset to reloc table
> > -     ldr     w10, =__rela_size               // size of reloc table
> > -
> >       mov_q   x11, KIMAGE_VADDR               // default virtual offset
> >       add     x11, x11, x23                   // actual virtual offset
> > +
> > +     ldr     w9, =__rela_offset              // offset to reloc table
> > +     ldr     w10, =__rela_size               // size of reloc table
>
> I agree with Nick that I'd prefer to leave these lines alone.

Okay, I've reverted this part in v2.

> >       add     x9, x9, x11                     // __va(.rela)
> >       add     x10, x9, x10                    // __va(.rela) + sizeof(.rela)
> >
> >  0:   cmp     x9, x10
> >       b.hs    1f
> > -     ldp     x11, x12, [x9], #24
> > -     ldr     x13, [x9, #-8]
> > -     cmp     w12, #R_AARCH64_RELATIVE
> > +     ldp     x12, x13, [x9], #24
> > +     ldr     x14, [x9, #-8]
> > +     cmp     w13, #R_AARCH64_RELATIVE
> >       b.ne    0b
> > -     add     x13, x13, x23                   // relocate
> > -     str     x13, [x11, x23]
> > +     add     x14, x14, x23                   // relocate
> > +     str     x14, [x12, x23]
> >       b       0b
> > -1:   ret
>
> So the reason you're removing this ret is because we'll end up with both a
> .relr section *and* .rela section, correct?

Right. It's likely that the rela section will be empty when
CONFIG_RELR is enabled, but it isn't guaranteed. There are currently
no relocations at odd addresses in arm64 defconfig, but I've seen a
few in at least one of our Android device kernels.

> > +1:
> > +#ifdef CONFIG_RELR
> > +     ldr     w9, =__relr_offset              // offset to reloc table
> > +     ldr     w10, =__relr_size               // size of reloc table
> > +     add     x9, x9, x11                     // __va(.relr)
> > +     add     x10, x9, x10                    // __va(.relr) + sizeof(.relr)
> > +
> > +     sub     x15, x23, x24                   // delta from previous offset
> > +     cbz     x15, 7f                         // nothing to do if unchanged
> > +     mov     x24, x23                        // save new offset
> > +
> > +2:   cmp     x9, x10
> > +     b.hs    7f
> > +     ldr     x11, [x9], #8
> > +     tbnz    x11, #0, 3f                     // branch to handle bitmaps
>
> Can we guarantee that x13 has been initialised at this point?

Yes. x13 will be initialized while processing an address entry, and
the format guarantees that each sequence of bitmap entries will be
preceded with an address entry.

> > +     add     x13, x11, x23
> > +     ldr     x12, [x13]                      // relocate address entry
> > +     add     x12, x12, x15
> > +     str     x12, [x13], #8                  // adjust to start of bitmap
> > +     b       2b
> > +
> > +3:   mov     x14, x13
> > +4:   lsr     x11, x11, #1
> > +     cbz     x11, 6f
> > +     tbz     x11, #0, 5f                     // skip bit if not set
> > +     ldr     x12, [x14]                      // relocate bit
> > +     add     x12, x12, x15
> > +     str     x12, [x14]
> > +
> > +5:   add     x14, x14, #8                    // move to next bit's address
> > +     b       4b
> > +
> > +6:   add     x13, x13, #(8 * 63)             // move to next bitmap's address
> > +     b       2b
>
> This desparately needs a block comment at the top (immediately after the
> #ifdef CONFIG_RELR) describing the algorithm and the layout of the .relr
> section, please.

Done in v2.



Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-10 23:14 ` Nick Desaulniers
@ 2019-07-12 19:40   ` Peter Collingbourne
  0 siblings, 0 replies; 17+ messages in thread
From: Peter Collingbourne @ 2019-07-12 19:40 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Mark Rutland, Ard Biesheuvel, Catalin Marinas, clang-built-linux,
	Jordan Rupprecht, Will Deacon, Linux ARM

On Wed, Jul 10, 2019 at 4:14 PM Nick Desaulniers
<ndesaulniers@google.com> wrote:
>
> On Fri, Jul 5, 2019 at 1:03 AM 'Peter Collingbourne' via Clang Built
> Linux <clang-built-linux@googlegroups.com> wrote:
> >
> > RELR is a relocation packing format for relative relocations.
> > The format is described in a generic-abi proposal:
> > https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion
> >
> > The LLD linker can be instructed to pack relocations in the RELR
> > format by passing the flag --pack-dyn-relocs=relr.
> >
> > This patch adds a new config option, CONFIG_RELR. Enabling this option
> > instructs the linker to pack vmlinux's relative relocations in the RELR
> > format, and causes the kernel to apply the relocations at startup along
> > with the RELA relocations. RELA relocations still need to be applied
> > because the linker will emit RELA relative relocations if they are
> > unrepresentable in the RELR format (i.e. address not a multiple of 2).
> >
> > Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> > with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> > compressed (lz4).
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
>
> Tested with:
> ToT llvm r365583
> mainline linux commit 5ad18b2e60b7
>
> pre-patch application:
> $ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang LD=ld.lld
> NM=llvm-nm OBJCOPY=llvm-objcopy -j71 defconfig
> $ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang LD=ld.lld
> NM=llvm-nm OBJCOPY=llvm-objcopy -j71
> $ du -h vmlinux
> 227M vmlinux
> $ du -h arch/arm64/boot/Image
> 25M arch/arm64/boot/Image
> $ du -h arch/arm64/boot/Image.gz
> 9.0M arch/arm64/boot/Image.gz
> $ find . -name \*.ko | xargs du -ch | tail -n 1
> 437M total
> $ qemu-system-aarch64 -kernel arch/arm64/boot/Image.gz -machine virt
> -cpu cortex-a57 -nographic --append "console=ttyAMA0 root=/dev/ram0"
> -m 2048 -initrd /android1/buildroot/output/images/rootfs.cpio
> ...
> # cat /proc/version
> Linux version 5.2.0-00915-g5ad18b2e60b7
> (ndesaulniers@ndesaulniers1.mtv.corp.google.com) (clang version 9.0.0
> (https://github.com/llvm/llvm-project.git
> b1843e130ad9c4269ece5d08718b33566a41d919)) #66 SMP PREEMPT Tue Jul 9
> 16:50:18 PDT 2019
> (qemu) q
>
> post-patch application:
> $ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang LD=ld.lld
> NM=llvm-nm OBJCOPY=llvm-objcopy -j71 clean
> $ git am /tmp/relr.eml
> Applying: arm64: Add support for relocating the kernel with RELR relocations
> $ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang LD=ld.lld
> NM=llvm-nm OBJCOPY=llvm-objcopy -j71
> ...
> Use RELR relocation packing (RELR) [N/y/?] (NEW) y
> ...
> $ du -h vmlinux
> 224M vmlinux
> $ du -h arch/arm64/boot/Image
> 21M arch/arm64/boot/Image
> $ du -h arch/arm64/boot/Image.gz
> 8.7M arch/arm64/boot/Image.gz
> $ find . -name \*.ko | xargs du -ch | tail -n 1
> 424M total
> $ qemu-system-aarch64 -kernel arch/arm64/boot/Image.gz -machine virt
> -cpu cortex-a57 -nographic --append "console=ttyAMA0 root=/dev/ram0"
> -m 2048 -initrd /android1/buildroot/output/images/rootfs.cpio
> ...
> # cat /proc/version
> Linux version 5.2.0-00916-g3798f5948201
> (ndesaulniers@ndesaulniers1.mtv.corp.google.com) (clang version 9.0.0
> (https://github.com/llvm/llvm-project.git
> b1843e130ad9c4269ece5d08718b33566a41d919)) #67 SMP PREEMPT Wed Jul 10
> 09:37:10 PDT 2019
> (qemu) q
>
> So for an arm64 defconfig, looks like roughly 1.32% savings on vmlinux
> size, 16% savings on Image size, 3.33% savings on compressed image
> size (when gzipping), and 2.97% savings in loadable kernel module
> size. (this is off the rounded value from `du -h` which isn't very
> precise).
>
> Moving on to an actual device kernel (we plan on shipping a device
> with an LLD linked kernel soon):
>
> pre-patch application:
> $ du -h vmlinux
> 361M vmlinux
> $ du -h arch/arm64/boot/Image
> 44M arch/arm64/boot/Image
> $ du -h arch/arm64/boot/Image.lz4
> 20M arch/arm64/boot/Image.lz4
> $ find . -name \*.ko | xargs du -ch | tail -n 1
> 37M total
>
> post patch application:
> $ du -h vmlinux
> 359M vmlinux
> $ du -h arch/arm64/boot/Image
> 42M arch/arm64/boot/Image
> $ du -h arch/arm64/boot/Image.lz4
> 19M arch/arm64/boot/Image.lz4
> $ find . -name \*.ko | xargs du -ch | tail -n 1
> 37M total
>
> 0.55% savings on vmlinux, 4.5% savings on Image, 5% savings on
> compressed Image w/ LZ4 compression, no or very small savings on
> kernel modules.  For kernel modules, this could be because of my
> rounding with `du -h` or a bug in our kernel version scripts not
> applying these relocations to kernel modules.

Kernel modules are linked separately from the kernel itself, and are
linked as relocatable object files rather than shared objects. The
linker does not support RELR packing for object files, and it's not
really clear whether that's feasible because virtual addresses are not
fixed within object files. So there should be no effect on kernel
modules. (I'm not sure how to explain your observed 2.97% savings
above, then.) If we did switch kernel modules over to being linked as
shared objects, that would allow for RELR relocations in the modules.

> What should I grep for the relocation type in an ELF object to verify
> that RELR relocations exist?
> $ llvm-objdump -r *.ko| grep RELR
> ?

That's expected as mentioned above.

> $ llvm-readelf -S vmlinux } grep relr
> <doesn't show anything>

Assuming that you mean s/}/|/, that's correct. I get:

$ ~/l/ra/bin/llvm-readelf -S vmlinux | grep relr
  [33] .relr.dyn         RELR            ffff0000113ddd98 135dd98
00fb08 08   A  0   0  8

Are you sure that you were looking at the right file?

> I'd have assumed by your patches change to the linker script a section
> called .relr.dyn to be listed.
> Are the tools not interpreting the section name and need to be fixed,
> or is your change to the linker script not necessary?
> Maybe both of those tools need to be updated to know about such a new
> relocation type?
> + Jordan for heads up.
>
> Either way, I've confirmed the claimed 16% reduction in image size for
> the defconfig, and 5% savings in compressed images size w/ LZ4.
>
> Finally, I confirmed that this patch boots on a device.  I had to
> backport https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git/commit/?id=e9e08a07385e08f1a7f85c5d1e345c21c9564963
> for llvm-objcopy support.
>
> Then I ran 30 boot time measurements with 60s wait intervals in
> between test runs:
> prepatch:
> mean: 3049ms
> median: 3281ms
> stddev: 330ms
>
> postpatch:
> mean: 3091ms
> median: 3260ms
> stddev: 290ms
>
> While decompressing the kernel image quickly depends on its size, I
> assume processing these types of relocations and the volume of them
> takes longer?

It depends. On the one hand there will be fewer cache misses coming
from the relocation section, but on the other there will be more
misses coming from the relocated data because we now need to load from
it and it probably won't be as compact as the relocation section. I
would expect the overall effect to be an improvement though due to
less time spent decompressing.

I generally look at the median when taking performance measurements
rather than the mean as this ignores outliers, and your median figures
do seem to show an improvement. In my own boot time measurements I
observed a 100 run median boot time decrease of around 10ms, although
this was on qemu and not real hardware.

> Finally, I boot tested mainline with CONFIG_RANDOMIZE_BASE disabled
> and this patch applied (it worked).
>
> Tested-by: Nick Desaulniers <ndesaulniers@google.com>
> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>

Thanks.


Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-12 19:38 ` [PATCH v2] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
@ 2019-07-29 20:00   ` Peter Collingbourne
  2019-07-31 16:48   ` Will Deacon
  2019-08-01  1:18   ` [PATCH v3] " Peter Collingbourne
  2 siblings, 0 replies; 17+ messages in thread
From: Peter Collingbourne @ 2019-07-29 20:00 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon, Mark Rutland, Ard Biesheuvel,
	Masahiro Yamada
  Cc: clang-built-linux, Nick Desaulniers, Linux ARM

On Fri, Jul 12, 2019 at 12:39 PM Peter Collingbourne <pcc@google.com> wrote:
>
> RELR is a relocation packing format for relative relocations.
> The format is described in a generic-abi proposal:
> https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion
>
> The LLD linker can be instructed to pack relocations in the RELR
> format by passing the flag --pack-dyn-relocs=relr.
>
> This patch adds a new config option, CONFIG_RELR. Enabling this option
> instructs the linker to pack vmlinux's relative relocations in the RELR
> format, and causes the kernel to apply the relocations at startup along
> with the RELA relocations. RELA relocations still need to be applied
> because the linker will emit RELA relative relocations if they are
> unrepresentable in the RELR format (i.e. address not a multiple of 2).
>
> Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> compressed (lz4).
>
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Tested-by: Nick Desaulniers <ndesaulniers@google.com>
> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
> ---
> Changes in v2:
> - Reverted change to RELA processing
> - Added more comments, as requested by Nick and Will
> - Added a feature test for NM and OBJCOPY
> - Made CONFIG_RELR=y the default if the tools support it

Ping.

Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-12 19:38 ` [PATCH v2] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
  2019-07-29 20:00   ` Peter Collingbourne
@ 2019-07-31 16:48   ` Will Deacon
  2019-08-01  1:19     ` Peter Collingbourne
  2019-08-01  1:18   ` [PATCH v3] " Peter Collingbourne
  2 siblings, 1 reply; 17+ messages in thread
From: Will Deacon @ 2019-07-31 16:48 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Mark Rutland, clang-built-linux, Catalin Marinas, Ard Biesheuvel,
	Nick Desaulniers, Masahiro Yamada, linux-arm-kernel

On Fri, Jul 12, 2019 at 12:38:46PM -0700, Peter Collingbourne wrote:
> RELR is a relocation packing format for relative relocations.
> The format is described in a generic-abi proposal:
> https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion
> 
> The LLD linker can be instructed to pack relocations in the RELR
> format by passing the flag --pack-dyn-relocs=relr.
> 
> This patch adds a new config option, CONFIG_RELR. Enabling this option
> instructs the linker to pack vmlinux's relative relocations in the RELR
> format, and causes the kernel to apply the relocations at startup along
> with the RELA relocations. RELA relocations still need to be applied
> because the linker will emit RELA relative relocations if they are
> unrepresentable in the RELR format (i.e. address not a multiple of 2).
> 
> Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> compressed (lz4).
> 
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Tested-by: Nick Desaulniers <ndesaulniers@google.com>
> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
> ---
> Changes in v2:
> - Reverted change to RELA processing
> - Added more comments, as requested by Nick and Will
> - Added a feature test for NM and OBJCOPY
> - Made CONFIG_RELR=y the default if the tools support it
> 
>  arch/arm64/Kconfig              | 10 ++++
>  arch/arm64/Makefile             |  4 ++
>  arch/arm64/kernel/head.S        | 96 ++++++++++++++++++++++++++++++---
>  arch/arm64/kernel/vmlinux.lds.S |  9 ++++
>  init/Kconfig                    |  3 ++
>  scripts/tools-support-relr.sh   | 16 ++++++
>  6 files changed, 132 insertions(+), 6 deletions(-)
>  create mode 100755 scripts/tools-support-relr.sh
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 7442edbcabfc3..cf3907d21d097 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1476,6 +1476,16 @@ config RELOCATABLE
>  	  relocation pass at runtime even if the kernel is loaded at the
>  	  same address it was linked at.
>  
> +config RELR
> +	bool "Use RELR relocation packing"
> +	depends on RELOCATABLE && TOOLS_SUPPORT_RELR
> +	default y
> +	help
> +	  Store the kernel's dynamic relocations in the RELR relocation packing
> +	  format. Requires a compatible linker (currently only LLD supports

Drop "currently" because it will just rot

> +	  this feature), as well as compatible NM and OBJCOPY utilities
> +	  (llvm-nm and llvm-objcopy are compatible).
> +
>  config RANDOMIZE_BASE
>  	bool "Randomize the address of the kernel image"
>  	select ARM64_MODULE_PLTS if MODULES
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index bb1f1dbb34e8f..11f84450c7784 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -22,6 +22,10 @@ LDFLAGS_vmlinux		+= -shared -Bsymbolic -z notext -z norelro \
>  			$(call ld-option, --no-apply-dynamic-relocs)
>  endif
>  
> +ifeq ($(CONFIG_RELR),y)
> +  LDFLAGS_vmlinux += --pack-dyn-relocs=relr
> +endif

RELR isn't arm64-specific, right? So we could put this in the top-level
Makefile and have arm64 select ARCH_HAS_RELR if relocatable, so that other
architecture can easily support this in future.

>  ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
>    ifeq ($(call ld-option, --fix-cortex-a53-843419),)
>  $(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 2cdacd1c141b9..cc23302e9d95e 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -102,6 +102,8 @@ pe_header:
>  	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
>  	 *  x28        __create_page_tables()     callee preserved temp register
>  	 *  x19/x20    __primary_switch()         callee preserved temp registers
> +	 *  x24        __primary_switch() .. relocate_kernel()
> +	 *                                        current RELR displacement
>  	 */
>  ENTRY(stext)
>  	bl	preserve_boot_args
> @@ -834,14 +836,93 @@ __relocate_kernel:
>  
>  0:	cmp	x9, x10
>  	b.hs	1f
> -	ldp	x11, x12, [x9], #24
> -	ldr	x13, [x9, #-8]
> -	cmp	w12, #R_AARCH64_RELATIVE
> +	ldp	x12, x13, [x9], #24
> +	ldr	x14, [x9, #-8]
> +	cmp	w13, #R_AARCH64_RELATIVE
>  	b.ne	0b
> -	add	x13, x13, x23			// relocate
> -	str	x13, [x11, x23]
> +	add	x14, x14, x23			// relocate
> +	str	x14, [x12, x23]
>  	b	0b
> -1:	ret
> +
> +1:
> +#ifdef CONFIG_RELR
> +	/*
> +	 * Apply RELR relocations.
> +	 *
> +	 * RELR is a compressed format for storing relative relocations. The
> +	 * encoded sequence of entries looks like:
> +	 * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]

I assume these are treated as an array of u64 types for the purposes of
endianness? (have you tested with a big-endian kernel?).

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v3] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-12 19:38 ` [PATCH v2] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
  2019-07-29 20:00   ` Peter Collingbourne
  2019-07-31 16:48   ` Will Deacon
@ 2019-08-01  1:18   ` Peter Collingbourne
  2019-08-01 12:05     ` Will Deacon
  2 siblings, 1 reply; 17+ messages in thread
From: Peter Collingbourne @ 2019-08-01  1:18 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon, Mark Rutland, Ard Biesheuvel,
	Masahiro Yamada
  Cc: clang-built-linux, Nick Desaulniers, Peter Collingbourne,
	linux-arm-kernel

RELR is a relocation packing format for relative relocations.
The format is described in a generic-abi proposal:
https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion

The LLD linker can be instructed to pack relocations in the RELR
format by passing the flag --pack-dyn-relocs=relr.

This patch adds a new config option, CONFIG_RELR. Enabling this option
instructs the linker to pack vmlinux's relative relocations in the RELR
format, and causes the kernel to apply the relocations at startup along
with the RELA relocations. RELA relocations still need to be applied
because the linker will emit RELA relative relocations if they are
unrepresentable in the RELR format (i.e. address not a multiple of 2).

Enabling CONFIG_RELR reduces the size of a defconfig kernel image
with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
compressed (lz4).

Signed-off-by: Peter Collingbourne <pcc@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
---

Notes:
    Changes in v3:
    - Move Kconfig/Makefile logic to arch-independent location
    - Tweak CONFIG_RELR documentation to remove "currently"
    
    Changes in v2:
    - Reverted change to RELA processing
    - Added more comments, as requested by Nick and Will
    - Added a feature test for NM and OBJCOPY
    - Made CONFIG_RELR=y the default if the tools support it

 Makefile                        |  4 ++
 arch/Kconfig                    | 14 +++++
 arch/arm64/Kconfig              |  1 +
 arch/arm64/kernel/head.S        | 96 ++++++++++++++++++++++++++++++---
 arch/arm64/kernel/vmlinux.lds.S |  9 ++++
 init/Kconfig                    |  3 ++
 scripts/tools-support-relr.sh   | 16 ++++++
 7 files changed, 137 insertions(+), 6 deletions(-)
 create mode 100755 scripts/tools-support-relr.sh

diff --git a/Makefile b/Makefile
index 5ee6f68898693..23ed9dbef12a1 100644
--- a/Makefile
+++ b/Makefile
@@ -912,6 +912,10 @@ ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
 LDFLAGS_vmlinux	+= $(call ld-option, -X,)
 endif
 
+ifeq ($(CONFIG_RELR),y)
+LDFLAGS_vmlinux	+= --pack-dyn-relocs=relr
+endif
+
 # insure the checker run with the right endianness
 CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian)
 
diff --git a/arch/Kconfig b/arch/Kconfig
index d4c1f0551dfe0..719b27275f86a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -936,6 +936,20 @@ config LOCK_EVENT_COUNTS
 	  the chance of application behavior change because of timing
 	  differences. The counts are reported via debugfs.
 
+# Select if the architecture has support for applying RELR relocations.
+config ARCH_HAS_RELR
+	bool
+
+config RELR
+	bool "Use RELR relocation packing"
+	depends on ARCH_HAS_RELR && TOOLS_SUPPORT_RELR
+	default y
+	help
+	  Store the kernel's dynamic relocations in the RELR relocation packing
+	  format. Requires a compatible linker (LLD supports this feature), as
+	  well as compatible NM and OBJCOPY utilities (llvm-nm and llvm-objcopy
+	  are compatible).
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 399f595ef852e..ac1bc9cc22a26 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1467,6 +1467,7 @@ endif
 
 config RELOCATABLE
 	bool
+	select ARCH_HAS_RELR
 	help
 	  This builds the kernel as a Position Independent Executable (PIE),
 	  which retains all relocation metadata required to relocate the
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2cdacd1c141b9..cc23302e9d95e 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -102,6 +102,8 @@ pe_header:
 	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
 	 *  x28        __create_page_tables()     callee preserved temp register
 	 *  x19/x20    __primary_switch()         callee preserved temp registers
+	 *  x24        __primary_switch() .. relocate_kernel()
+	 *                                        current RELR displacement
 	 */
 ENTRY(stext)
 	bl	preserve_boot_args
@@ -834,14 +836,93 @@ __relocate_kernel:
 
 0:	cmp	x9, x10
 	b.hs	1f
-	ldp	x11, x12, [x9], #24
-	ldr	x13, [x9, #-8]
-	cmp	w12, #R_AARCH64_RELATIVE
+	ldp	x12, x13, [x9], #24
+	ldr	x14, [x9, #-8]
+	cmp	w13, #R_AARCH64_RELATIVE
 	b.ne	0b
-	add	x13, x13, x23			// relocate
-	str	x13, [x11, x23]
+	add	x14, x14, x23			// relocate
+	str	x14, [x12, x23]
 	b	0b
-1:	ret
+
+1:
+#ifdef CONFIG_RELR
+	/*
+	 * Apply RELR relocations.
+	 *
+	 * RELR is a compressed format for storing relative relocations. The
+	 * encoded sequence of entries looks like:
+	 * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
+	 *
+	 * i.e. start with an address, followed by any number of bitmaps. The
+	 * address entry encodes 1 relocation. The subsequent bitmap entries
+	 * encode up to 63 relocations each, at subsequent offsets following
+	 * the last address entry.
+	 *
+	 * The bitmap entries must have 1 in the least significant bit. The
+	 * assumption here is that an address cannot have 1 in lsb. Odd
+	 * addresses are not supported. Any odd addresses are stored in the RELA
+	 * section, which is handled above.
+	 *
+	 * Excluding the least significant bit in the bitmap, each non-zero
+	 * bit in the bitmap represents a relocation to be applied to
+	 * a corresponding machine word that follows the base address
+	 * word. The second least significant bit represents the machine
+	 * word immediately following the initial address, and each bit
+	 * that follows represents the next word, in linear order. As such,
+	 * a single bitmap can encode up to 63 relocations in a 64-bit object.
+	 *
+	 * In this implementation we store the address of the next RELR table
+	 * entry in x9, the address being relocated by the current address or
+	 * bitmap entry in x13 and the address being relocated by the current
+	 * bit in x14.
+	 *
+	 * Because addends are stored in place in the binary, RELR relocations
+	 * cannot be applied idempotently. We use x24 to keep track of the
+	 * currently applied displacement so that we can correctly relocate if
+	 * __relocate_kernel is called twice with non-zero displacements (i.e.
+	 * if there is both a physical misalignment and a KASLR displacement).
+	 */
+	ldr	w9, =__relr_offset		// offset to reloc table
+	ldr	w10, =__relr_size		// size of reloc table
+	add	x9, x9, x11			// __va(.relr)
+	add	x10, x9, x10			// __va(.relr) + sizeof(.relr)
+
+	sub	x15, x23, x24			// delta from previous offset
+	cbz	x15, 7f				// nothing to do if unchanged
+	mov	x24, x23			// save new offset
+
+2:	cmp	x9, x10
+	b.hs	7f
+	ldr	x11, [x9], #8
+	tbnz	x11, #0, 3f			// branch to handle bitmaps
+	add	x13, x11, x23
+	ldr	x12, [x13]			// relocate address entry
+	add	x12, x12, x15
+	str	x12, [x13], #8			// adjust to start of bitmap
+	b	2b
+
+3:	mov	x14, x13
+4:	lsr	x11, x11, #1
+	cbz	x11, 6f
+	tbz	x11, #0, 5f			// skip bit if not set
+	ldr	x12, [x14]			// relocate bit
+	add	x12, x12, x15
+	str	x12, [x14]
+
+5:	add	x14, x14, #8			// move to next bit's address
+	b	4b
+
+6:	/*
+	 * Move to the next bitmap's address. 8 is the word size, and 63 is the
+	 * number of significant bits in a bitmap entry.
+	 */
+	add	x13, x13, #(8 * 63)
+	b	2b
+
+7:
+#endif
+	ret
+
 ENDPROC(__relocate_kernel)
 #endif
 
@@ -854,6 +935,9 @@ __primary_switch:
 	adrp	x1, init_pg_dir
 	bl	__enable_mmu
 #ifdef CONFIG_RELOCATABLE
+#ifdef CONFIG_RELR
+	mov	x24, #0				// no RELR displacement yet
+#endif
 	bl	__relocate_kernel
 #ifdef CONFIG_RANDOMIZE_BASE
 	ldr	x8, =__primary_switched
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 7fa0083749078..31716afa30f65 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -200,6 +200,15 @@ SECTIONS
 	__rela_offset	= ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
 	__rela_size	= SIZEOF(.rela.dyn);
 
+#ifdef CONFIG_RELR
+	.relr.dyn : ALIGN(8) {
+		*(.relr.dyn)
+	}
+
+	__relr_offset	= ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
+	__relr_size	= SIZEOF(.relr.dyn);
+#endif
+
 	. = ALIGN(SEGMENT_ALIGN);
 	__initdata_end = .;
 	__init_end = .;
diff --git a/init/Kconfig b/init/Kconfig
index bd7d650d4a996..d96127ebc44e0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -30,6 +30,9 @@ config CC_CAN_LINK
 config CC_HAS_ASM_GOTO
 	def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC))
 
+config TOOLS_SUPPORT_RELR
+	def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
+
 config CC_HAS_WARN_MAYBE_UNINITIALIZED
 	def_bool $(cc-option,-Wmaybe-uninitialized)
 	help
diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh
new file mode 100755
index 0000000000000..97a2c844a95e4
--- /dev/null
+++ b/scripts/tools-support-relr.sh
@@ -0,0 +1,16 @@
+#!/bin/sh -eu
+# SPDX-License-Identifier: GPL-2.0
+
+tmp_file=$(mktemp)
+trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT
+
+cat << "END" | "$CC" -c -x c - -o $tmp_file.o >/dev/null 2>&1
+void *p = &p;
+END
+"$LD" $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file
+
+# Despite printing an error message, GNU nm still exits with exit code 0 if it
+# sees a relr section. So we need to check that nothing is printed to stderr.
+test -z "$("$NM" $tmp_file 2>&1 >/dev/null)"
+
+"$OBJCOPY" -O binary $tmp_file $tmp_file.bin
-- 
2.22.0.709.g102302147b-goog


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] arm64: Add support for relocating the kernel with RELR relocations
  2019-07-31 16:48   ` Will Deacon
@ 2019-08-01  1:19     ` Peter Collingbourne
  0 siblings, 0 replies; 17+ messages in thread
From: Peter Collingbourne @ 2019-08-01  1:19 UTC (permalink / raw)
  To: Will Deacon
  Cc: Mark Rutland, clang-built-linux, Catalin Marinas, Ard Biesheuvel,
	Nick Desaulniers, Masahiro Yamada, Linux ARM

On Wed, Jul 31, 2019 at 9:48 AM Will Deacon <will@kernel.org> wrote:
>
> On Fri, Jul 12, 2019 at 12:38:46PM -0700, Peter Collingbourne wrote:
> > RELR is a relocation packing format for relative relocations.
> > The format is described in a generic-abi proposal:
> > https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion
> >
> > The LLD linker can be instructed to pack relocations in the RELR
> > format by passing the flag --pack-dyn-relocs=relr.
> >
> > This patch adds a new config option, CONFIG_RELR. Enabling this option
> > instructs the linker to pack vmlinux's relative relocations in the RELR
> > format, and causes the kernel to apply the relocations at startup along
> > with the RELA relocations. RELA relocations still need to be applied
> > because the linker will emit RELA relative relocations if they are
> > unrepresentable in the RELR format (i.e. address not a multiple of 2).
> >
> > Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> > with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> > compressed (lz4).
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > Tested-by: Nick Desaulniers <ndesaulniers@google.com>
> > Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
> > ---
> > Changes in v2:
> > - Reverted change to RELA processing
> > - Added more comments, as requested by Nick and Will
> > - Added a feature test for NM and OBJCOPY
> > - Made CONFIG_RELR=y the default if the tools support it
> >
> >  arch/arm64/Kconfig              | 10 ++++
> >  arch/arm64/Makefile             |  4 ++
> >  arch/arm64/kernel/head.S        | 96 ++++++++++++++++++++++++++++++---
> >  arch/arm64/kernel/vmlinux.lds.S |  9 ++++
> >  init/Kconfig                    |  3 ++
> >  scripts/tools-support-relr.sh   | 16 ++++++
> >  6 files changed, 132 insertions(+), 6 deletions(-)
> >  create mode 100755 scripts/tools-support-relr.sh
> >
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 7442edbcabfc3..cf3907d21d097 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -1476,6 +1476,16 @@ config RELOCATABLE
> >         relocation pass at runtime even if the kernel is loaded at the
> >         same address it was linked at.
> >
> > +config RELR
> > +     bool "Use RELR relocation packing"
> > +     depends on RELOCATABLE && TOOLS_SUPPORT_RELR
> > +     default y
> > +     help
> > +       Store the kernel's dynamic relocations in the RELR relocation packing
> > +       format. Requires a compatible linker (currently only LLD supports
>
> Drop "currently" because it will just rot

Done in v3.

> > +       this feature), as well as compatible NM and OBJCOPY utilities
> > +       (llvm-nm and llvm-objcopy are compatible).
> > +
> >  config RANDOMIZE_BASE
> >       bool "Randomize the address of the kernel image"
> >       select ARM64_MODULE_PLTS if MODULES
> > diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> > index bb1f1dbb34e8f..11f84450c7784 100644
> > --- a/arch/arm64/Makefile
> > +++ b/arch/arm64/Makefile
> > @@ -22,6 +22,10 @@ LDFLAGS_vmlinux            += -shared -Bsymbolic -z notext -z norelro \
> >                       $(call ld-option, --no-apply-dynamic-relocs)
> >  endif
> >
> > +ifeq ($(CONFIG_RELR),y)
> > +  LDFLAGS_vmlinux += --pack-dyn-relocs=relr
> > +endif
>
> RELR isn't arm64-specific, right? So we could put this in the top-level
> Makefile and have arm64 select ARCH_HAS_RELR if relocatable, so that other
> architecture can easily support this in future.

Makes sense, done in v3.

> >  ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
> >    ifeq ($(call ld-option, --fix-cortex-a53-843419),)
> >  $(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > index 2cdacd1c141b9..cc23302e9d95e 100644
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -102,6 +102,8 @@ pe_header:
> >        *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
> >        *  x28        __create_page_tables()     callee preserved temp register
> >        *  x19/x20    __primary_switch()         callee preserved temp registers
> > +      *  x24        __primary_switch() .. relocate_kernel()
> > +      *                                        current RELR displacement
> >        */
> >  ENTRY(stext)
> >       bl      preserve_boot_args
> > @@ -834,14 +836,93 @@ __relocate_kernel:
> >
> >  0:   cmp     x9, x10
> >       b.hs    1f
> > -     ldp     x11, x12, [x9], #24
> > -     ldr     x13, [x9, #-8]
> > -     cmp     w12, #R_AARCH64_RELATIVE
> > +     ldp     x12, x13, [x9], #24
> > +     ldr     x14, [x9, #-8]
> > +     cmp     w13, #R_AARCH64_RELATIVE
> >       b.ne    0b
> > -     add     x13, x13, x23                   // relocate
> > -     str     x13, [x11, x23]
> > +     add     x14, x14, x23                   // relocate
> > +     str     x14, [x12, x23]
> >       b       0b
> > -1:   ret
> > +
> > +1:
> > +#ifdef CONFIG_RELR
> > +     /*
> > +      * Apply RELR relocations.
> > +      *
> > +      * RELR is a compressed format for storing relative relocations. The
> > +      * encoded sequence of entries looks like:
> > +      * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
>
> I assume these are treated as an array of u64 types for the purposes of
> endianness?

Yes, the element type is defined as an integer whose endianness
matches the ELF endianness and whose size matches the ELF bitness.
This may be one of the many things that would need to be adjusted for
a hypothetical ILP32 kernel however because ILP32 on aarch64 uses
32-bit ELF according to the latest spec.

> (have you tested with a big-endian kernel?).

Unfortunately I have no easy way to test that because LLD doesn't
currently support big-endian aarch64.

Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] arm64: Add support for relocating the kernel with RELR relocations
  2019-08-01  1:18   ` [PATCH v3] " Peter Collingbourne
@ 2019-08-01 12:05     ` Will Deacon
  2019-08-01 17:51       ` Peter Collingbourne
  0 siblings, 1 reply; 17+ messages in thread
From: Will Deacon @ 2019-08-01 12:05 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Mark Rutland, clang-built-linux, Catalin Marinas, Ard Biesheuvel,
	Nick Desaulniers, Masahiro Yamada, linux-arm-kernel

On Wed, Jul 31, 2019 at 06:18:42PM -0700, Peter Collingbourne wrote:
> RELR is a relocation packing format for relative relocations.
> The format is described in a generic-abi proposal:
> https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion
> 
> The LLD linker can be instructed to pack relocations in the RELR
> format by passing the flag --pack-dyn-relocs=relr.
> 
> This patch adds a new config option, CONFIG_RELR. Enabling this option
> instructs the linker to pack vmlinux's relative relocations in the RELR
> format, and causes the kernel to apply the relocations at startup along
> with the RELA relocations. RELA relocations still need to be applied
> because the linker will emit RELA relative relocations if they are
> unrepresentable in the RELR format (i.e. address not a multiple of 2).
> 
> Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> compressed (lz4).
> 
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Tested-by: Nick Desaulniers <ndesaulniers@google.com>
> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
> ---
> 
> Notes:
>     Changes in v3:
>     - Move Kconfig/Makefile logic to arch-independent location
>     - Tweak CONFIG_RELR documentation to remove "currently"

Excellent, thanks. Queued for 5.4.

One more question: is there any benefit to supporting this for loadable
modules as well?

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] arm64: Add support for relocating the kernel with RELR relocations
  2019-08-01 12:05     ` Will Deacon
@ 2019-08-01 17:51       ` Peter Collingbourne
  0 siblings, 0 replies; 17+ messages in thread
From: Peter Collingbourne @ 2019-08-01 17:51 UTC (permalink / raw)
  To: Will Deacon
  Cc: Mark Rutland, clang-built-linux, Catalin Marinas, Ard Biesheuvel,
	Nick Desaulniers, Masahiro Yamada, Linux ARM

On Thu, Aug 1, 2019 at 5:05 AM Will Deacon <will@kernel.org> wrote:
>
> On Wed, Jul 31, 2019 at 06:18:42PM -0700, Peter Collingbourne wrote:
> > RELR is a relocation packing format for relative relocations.
> > The format is described in a generic-abi proposal:
> > https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion
> >
> > The LLD linker can be instructed to pack relocations in the RELR
> > format by passing the flag --pack-dyn-relocs=relr.
> >
> > This patch adds a new config option, CONFIG_RELR. Enabling this option
> > instructs the linker to pack vmlinux's relative relocations in the RELR
> > format, and causes the kernel to apply the relocations at startup along
> > with the RELA relocations. RELA relocations still need to be applied
> > because the linker will emit RELA relative relocations if they are
> > unrepresentable in the RELR format (i.e. address not a multiple of 2).
> >
> > Enabling CONFIG_RELR reduces the size of a defconfig kernel image
> > with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5%
> > compressed (lz4).
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > Tested-by: Nick Desaulniers <ndesaulniers@google.com>
> > Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
> > ---
> >
> > Notes:
> >     Changes in v3:
> >     - Move Kconfig/Makefile logic to arch-independent location
> >     - Tweak CONFIG_RELR documentation to remove "currently"
>
> Excellent, thanks. Queued for 5.4.

Thanks.

> One more question: is there any benefit to supporting this for loadable
> modules as well?

It looks like there would be, but it would be much smaller than the
kernel itself, as well as being smaller than the benefit of building
modules as shared objects instead of object files, which as I
mentioned to Nick is a prerequisite for the linker to be able to emit
RELR relocations. The kernel appears to proportionally contain many
more RELATIVE relocations than the modules, probably because of
__ksymtab which is almost entirely RELATIVE relocations. Shared
objects should help significantly with binary size because the linker
would then be able to statically resolve relocations between sections,
avoiding the need to store them explicitly in the module, but they
have the downside of requiring PIC which could hurt performance.

As a quick experiment I did:

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index d3776c945a173..015898faba590 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -65,7 +65,7 @@ endif

 KBUILD_CFLAGS  += -mgeneral-regs-only $(lseinstr) $(brokengasinst)
$(compat_vdso)
 KBUILD_CFLAGS  += -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS  += $(call cc-disable-warning, psabi)
+KBUILD_CFLAGS  += $(call cc-disable-warning, psabi) -fPIC
 KBUILD_AFLAGS  += $(lseinstr) $(brokengasinst) $(compat_vdso)

 KBUILD_CFLAGS  += $(call cc-option,-mabi=lp64)

and built a defconfig kernel. I then did:

$ for i in **/*.ko ; do ld.lld -shared -o ${i}.so $i -Bsymbolic  -N
2>/dev/null ; done
$ for i in **/*.ko ; do ld.lld -shared -o ${i}.relr.so $i -Bsymbolic
-N --pack-dyn-relocs=relr 2>/dev/null ; done

That gave me 494 out of 525 modules that can currently be linked as
shared objects, and 491 that can be linked with RELR (looks like
there's an LLD bug preventing linking the other 3). The binary sizes
are:

$ cat `ls  **/*.ko.relr.so | sed -e 's/\.ko\.relr\.so$/.ko/g'` | wc -c
279730136
$ cat `ls  **/*.ko.relr.so | sed -e 's/\.ko\.relr\.so$/.ko.so/g'` | wc -c
134944384
$ cat **/*.ko.relr.so | wc -c
134617216

So that's roughly >50% size improvement from shared objects and 0.3% from RELR.

Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2019-08-01 17:51 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-05  8:02 [PATCH] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
2019-07-08 18:02 ` Nick Desaulniers
2019-07-09 22:04   ` Peter Collingbourne
2019-07-09 23:13     ` Nick Desaulniers
2019-07-12 19:40       ` Peter Collingbourne
2019-07-10 16:21 ` Will Deacon
2019-07-12 19:40   ` Peter Collingbourne
2019-07-10 23:14 ` Nick Desaulniers
2019-07-12 19:40   ` Peter Collingbourne
2019-07-12 19:33 ` [PATCH v2] arm64: Add support for relocating the kernel with RELR Peter Collingbourne
2019-07-12 19:38 ` [PATCH v2] arm64: Add support for relocating the kernel with RELR relocations Peter Collingbourne
2019-07-29 20:00   ` Peter Collingbourne
2019-07-31 16:48   ` Will Deacon
2019-08-01  1:19     ` Peter Collingbourne
2019-08-01  1:18   ` [PATCH v3] " Peter Collingbourne
2019-08-01 12:05     ` Will Deacon
2019-08-01 17:51       ` Peter Collingbourne

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.