linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] x86-64: Stack protector and percpu improvements
@ 2021-11-13 12:40 Brian Gerst
  2021-11-13 12:40 ` [PATCH 1/3] x86-64: Use per-cpu stack canary if supported by compiler Brian Gerst
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Brian Gerst @ 2021-11-13 12:40 UTC (permalink / raw)
  To: linux-kernel, x86
  Cc: Andy Lutomirski, Borislav Petkov, Thomas Gleixner,
	H . Peter Anvin, Ingo Molnar, Brian Gerst

Currently, x86-64 uses an unusual per-cpu layout, where the percpu section
is linked at absolute address 0.  The reason behind this is that older GCC
versions placed the stack protector (if enabled) at a fixed offset from the
GS segment base.  Since the GS segement is also used for percpu variables,
this forced the current layout.

GCC since version 8.1 supports a configurable location for the stack
protector value, which allows removal of the restriction on where the percpu
data is linked.  Compatibility with older compilers is maintained until
the minimum compiler version is raised.

Brian Gerst (3):
  x86-64: Use per-cpu stack canary if supported by compiler
  x86/relocs: Make absolute percpu relocations conditional
  x86_64: Use relative per-cpu offsets

 arch/x86/Kconfig                      |  9 +++++++++
 arch/x86/Makefile                     | 21 ++++++++++++++-------
 arch/x86/boot/compressed/Makefile     |  3 ++-
 arch/x86/entry/entry_64.S             |  6 +++++-
 arch/x86/include/asm/percpu.h         |  4 ++--
 arch/x86/include/asm/processor.h      | 14 +++++++++++---
 arch/x86/include/asm/stackprotector.h | 17 ++++++-----------
 arch/x86/kernel/asm-offsets_64.c      |  2 +-
 arch/x86/kernel/cpu/common.c          | 14 ++++++++------
 arch/x86/kernel/head_64.S             | 12 ++++++++----
 arch/x86/kernel/setup_percpu.c        |  2 +-
 arch/x86/kernel/vmlinux.lds.S         | 16 +++++++---------
 arch/x86/tools/relocs.c               |  4 ++--
 arch/x86/tools/relocs.h               |  4 ++--
 arch/x86/tools/relocs_common.c        | 11 ++++++++---
 arch/x86/xen/xen-head.S               | 10 ++++------
 init/Kconfig                          |  1 -
 17 files changed, 90 insertions(+), 60 deletions(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/3] x86-64: Use per-cpu stack canary if supported by compiler
  2021-11-13 12:40 [PATCH 0/3] x86-64: Stack protector and percpu improvements Brian Gerst
@ 2021-11-13 12:40 ` Brian Gerst
  2021-11-13 12:40 ` [PATCH 2/3] x86/relocs: Make absolute percpu relocations conditional Brian Gerst
  2021-11-13 12:40 ` [PATCH 3/3] x86_64: Use relative per-cpu offsets Brian Gerst
  2 siblings, 0 replies; 11+ messages in thread
From: Brian Gerst @ 2021-11-13 12:40 UTC (permalink / raw)
  To: linux-kernel, x86
  Cc: Andy Lutomirski, Borislav Petkov, Thomas Gleixner,
	H . Peter Anvin, Ingo Molnar, Brian Gerst

If the compiler supports it, use a standard per-cpu variable for the
stack protector instead of the old fixed location.  Keep the fixed
location code for compatibility with older compilers.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/Kconfig                      |  5 +++++
 arch/x86/Makefile                     | 21 ++++++++++++++-------
 arch/x86/entry/entry_64.S             |  6 +++++-
 arch/x86/include/asm/processor.h      | 14 +++++++++++---
 arch/x86/include/asm/stackprotector.h | 17 ++++++-----------
 arch/x86/kernel/asm-offsets_64.c      |  2 +-
 arch/x86/kernel/cpu/common.c          | 14 ++++++++------
 arch/x86/kernel/head_64.S             | 16 ++++++++++++----
 arch/x86/kernel/vmlinux.lds.S         |  4 ++--
 arch/x86/xen/xen-head.S               | 10 ++++------
 10 files changed, 68 insertions(+), 41 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b1d4b481fcdd..8268910e09cd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -397,6 +397,11 @@ config CC_HAS_SANE_STACKPROTECTOR
 	   the compiler produces broken code or if it does not let us control
 	   the segment on 32-bit kernels.
 
+config STACKPROTECTOR_FIXED
+	bool
+	depends on X86_64 && STACKPROTECTOR
+	default y if !$(cc-option,-mstack-protector-guard-reg=gs)
+
 menu "Processor type and features"
 
 config SMP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index aab70413ae7a..94aee76a3457 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -79,13 +79,7 @@ ifeq ($(CONFIG_X86_32),y)
         # temporary until string.h is fixed
         KBUILD_CFLAGS += -ffreestanding
 
-	ifeq ($(CONFIG_STACKPROTECTOR),y)
-		ifeq ($(CONFIG_SMP),y)
-			KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
-		else
-			KBUILD_CFLAGS += -mstack-protector-guard=global
-		endif
-	endif
+	percpu_seg := fs
 else
         BITS := 64
         UTS_MACHINE := x86_64
@@ -126,6 +120,19 @@ else
 
         KBUILD_CFLAGS += -mno-red-zone
         KBUILD_CFLAGS += -mcmodel=kernel
+
+	percpu_seg := gs
+endif
+
+ifeq ($(CONFIG_STACKPROTECTOR),y)
+	ifneq ($(CONFIG_STACKPROTECTOR_FIXED),y)
+		ifeq ($(CONFIG_SMP),y)
+			KBUILD_CFLAGS += -mstack-protector-guard-reg=$(percpu_seg) \
+					 -mstack-protector-guard-symbol=__stack_chk_guard
+		else
+			KBUILD_CFLAGS += -mstack-protector-guard=global
+		endif
+	endif
 endif
 
 ifdef CONFIG_X86_X32
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index e38a4cf795d9..2006fde249c2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -217,6 +217,10 @@ syscall_return_via_sysret:
 	sysretq
 SYM_CODE_END(entry_SYSCALL_64)
 
+#ifdef CONFIG_STACKPROTECTOR_FIXED
+#define __stack_chk_guard fixed_percpu_data + stack_canary_offset
+#endif
+
 /*
  * %rdi: prev task
  * %rsi: next task
@@ -240,7 +244,7 @@ SYM_FUNC_START(__switch_to_asm)
 
 #ifdef CONFIG_STACKPROTECTOR
 	movq	TASK_stack_canary(%rsi), %rbx
-	movq	%rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
+	movq	%rbx, PER_CPU_VAR(__stack_chk_guard)
 #endif
 
 #ifdef CONFIG_RETPOLINE
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 191878a65c61..44376a15b9d2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -427,6 +427,8 @@ struct irq_stack {
 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
 
 #ifdef CONFIG_X86_64
+
+#ifdef CONFIG_STACKPROTECTOR_FIXED
 struct fixed_percpu_data {
 	/*
 	 * GCC hardcodes the stack canary as %gs:40.  Since the
@@ -442,10 +444,19 @@ struct fixed_percpu_data {
 
 DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
 DECLARE_INIT_PER_CPU(fixed_percpu_data);
+#endif /* STACKPROTECTOR_FIXED */
 
 static inline unsigned long cpu_kernelmode_gs_base(int cpu)
 {
+#ifdef CONFIG_STACKPROTECTOR_FIXED
 	return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
+#else
+#ifdef CONFIG_SMP
+	return per_cpu_offset(cpu);
+#else
+	return 0;
+#endif
+#endif
 }
 
 DECLARE_PER_CPU(void *, hardirq_stack_ptr);
@@ -455,9 +466,6 @@ extern asmlinkage void ignore_sysret(void);
 /* Save actual FS/GS selectors and bases to current->thread */
 void current_save_fsgs(void);
 #else	/* X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
-#endif
 DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
 #endif	/* !X86_64 */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 24a8d6c4fb18..1c7f0035c8fb 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -37,6 +37,12 @@
 #include <linux/random.h>
 #include <linux/sched.h>
 
+#ifdef CONFIG_STACKPROTECTOR_FIXED
+#define __stack_chk_guard fixed_percpu_data.stack_canary
+#else
+DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
+#endif
+
 /*
  * Initialize the stackprotector canary value.
  *
@@ -53,9 +59,6 @@ static __always_inline void boot_init_stack_canary(void)
 	u64 canary;
 	u64 tsc;
 
-#ifdef CONFIG_X86_64
-	BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
-#endif
 	/*
 	 * We both use the random pool and the current TSC as a source
 	 * of randomness. The TSC only matters for very early init,
@@ -68,20 +71,12 @@ static __always_inline void boot_init_stack_canary(void)
 	canary &= CANARY_MASK;
 
 	current->stack_canary = canary;
-#ifdef CONFIG_X86_64
-	this_cpu_write(fixed_percpu_data.stack_canary, canary);
-#else
 	this_cpu_write(__stack_chk_guard, canary);
-#endif
 }
 
 static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
 {
-#ifdef CONFIG_X86_64
-	per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
-#else
 	per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
-#endif
 }
 
 #else	/* STACKPROTECTOR */
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index b14533af7676..e9f4870a5321 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,7 +56,7 @@ int main(void)
 
 	BLANK();
 
-#ifdef CONFIG_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR_FIXED
 	DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
 	BLANK();
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0083464de5e3..f3dfb5b59530 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1767,10 +1767,6 @@ static __init int setup_clearcpuid(char *arg)
 __setup("clearcpuid=", setup_clearcpuid);
 
 #ifdef CONFIG_X86_64
-DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
-		     fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
-EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
-
 /*
  * The following percpu variables are hot.  Align current_task to
  * cacheline size such that they fall in the same cacheline.
@@ -1840,12 +1836,18 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
 	(unsigned long)&init_thread_union + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
 
+#endif	/* CONFIG_X86_64 */
+
 #ifdef CONFIG_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR_FIXED
+DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
+		     fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
+EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
+#else
 DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
 EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
 #endif
-
-#endif	/* CONFIG_X86_64 */
+#endif
 
 /*
  * Clear all 6 debug registers:
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d8b3ebd2bb85..6e396ffb1610 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -198,10 +198,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	movl %eax,%fs
 	movl %eax,%gs
 
-	/* Set up %gs.
-	 *
-	 * The base of %gs always points to fixed_percpu_data. If the
-	 * stack protector canary is enabled, it is located at %gs:40.
+	/*
+	 * Set up GS base.
 	 * Note that, on SMP, the boot cpu uses init data section until
 	 * the per cpu areas are set up.
 	 */
@@ -337,7 +335,17 @@ SYM_CODE_END(vc_boot_ghcb)
 	__REFDATA
 	.balign	8
 SYM_DATA(initial_code,	.quad x86_64_start_kernel)
+
+#ifdef CONFIG_STACKPROTECTOR_FIXED
 SYM_DATA(initial_gs,	.quad INIT_PER_CPU_VAR(fixed_percpu_data))
+#else
+#ifdef CONFIG_SMP
+SYM_DATA(initial_gs,	.quad __per_cpu_load)
+#else
+SYM_DATA(initial_gs,	.quad 0)
+#endif
+#endif
+
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 SYM_DATA(initial_vc_handler,	.quad handle_vc_boot_ghcb)
 #endif
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3d6dc12d198f..c475d21d2126 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -481,10 +481,10 @@ SECTIONS
  */
 #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
 INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(fixed_percpu_data);
 INIT_PER_CPU(irq_stack_backing_store);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_STACKPROTECTOR_FIXED
+INIT_PER_CPU(fixed_percpu_data);
 . = ASSERT((fixed_percpu_data == 0),
            "fixed_percpu_data is not at start of per-cpu area");
 #endif
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 9e27b86a0c31..54995efd74f7 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -54,16 +54,14 @@ SYM_CODE_START(startup_xen)
 	mov %_ASM_SI, xen_start_info
 	mov initial_stack(%rip), %rsp
 
-	/* Set up %gs.
-	 *
-	 * The base of %gs always points to fixed_percpu_data.  If the
-	 * stack protector canary is enabled, it is located at %gs:40.
+	/*
+	 * Set up GS base.
 	 * Note that, on SMP, the boot cpu uses init data section until
 	 * the per cpu areas are set up.
 	 */
 	movl	$MSR_GS_BASE,%ecx
-	movq	$INIT_PER_CPU_VAR(fixed_percpu_data),%rax
-	cdq
+	movl	initial_gs(%rip),%eax
+	movl	initial_gs+4(%rip),%edx
 	wrmsr
 
 	call xen_start_kernel
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] x86/relocs: Make absolute percpu relocations conditional
  2021-11-13 12:40 [PATCH 0/3] x86-64: Stack protector and percpu improvements Brian Gerst
  2021-11-13 12:40 ` [PATCH 1/3] x86-64: Use per-cpu stack canary if supported by compiler Brian Gerst
@ 2021-11-13 12:40 ` Brian Gerst
  2021-11-13 12:40 ` [PATCH 3/3] x86_64: Use relative per-cpu offsets Brian Gerst
  2 siblings, 0 replies; 11+ messages in thread
From: Brian Gerst @ 2021-11-13 12:40 UTC (permalink / raw)
  To: linux-kernel, x86
  Cc: Andy Lutomirski, Borislav Petkov, Thomas Gleixner,
	H . Peter Anvin, Ingo Molnar, Brian Gerst

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/Kconfig                  |  4 ++++
 arch/x86/boot/compressed/Makefile |  3 ++-
 arch/x86/tools/relocs.c           |  4 ++--
 arch/x86/tools/relocs.h           |  4 ++--
 arch/x86/tools/relocs_common.c    | 11 ++++++++---
 init/Kconfig                      |  1 -
 6 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8268910e09cd..832a6626df72 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -402,6 +402,10 @@ config STACKPROTECTOR_FIXED
 	depends on X86_64 && STACKPROTECTOR
 	default y if !$(cc-option,-mstack-protector-guard-reg=gs)
 
+config X86_ABSOLUTE_PERCPU
+	def_bool X86_64 && SMP
+	select KALLSYMS_ABSOLUTE_PERCPU
+
 menu "Processor type and features"
 
 config SMP
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 431bf7f846c3..ff493f262f2a 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -112,8 +112,9 @@ $(obj)/vmlinux.bin: vmlinux FORCE
 targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
 
 CMD_RELOCS = arch/x86/tools/relocs
+relocs-flags-$(CONFIG_X86_ABSOLUTE_PERCPU) += --absolute-percpu
 quiet_cmd_relocs = RELOCS  $@
-      cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
+      cmd_relocs = $(CMD_RELOCS) $(relocs-flags-y) $< > $@;$(CMD_RELOCS) --abs-relocs $<
 $(obj)/vmlinux.relocs: vmlinux FORCE
 	$(call if_changed,relocs)
 
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 3f5d39768287..bad375444cee 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -1166,7 +1166,7 @@ static void print_reloc_info(void)
 
 void process(FILE *fp, int use_real_mode, int as_text,
 	     int show_absolute_syms, int show_absolute_relocs,
-	     int show_reloc_info)
+	     int show_reloc_info, int absolute_percpu)
 {
 	regex_init(use_real_mode);
 	read_ehdr(fp);
@@ -1174,7 +1174,7 @@ void process(FILE *fp, int use_real_mode, int as_text,
 	read_strtabs(fp);
 	read_symtabs(fp);
 	read_relocs(fp);
-	if (ELF_BITS == 64)
+	if (ELF_BITS == 64 && absolute_percpu)
 		percpu_init();
 	if (show_absolute_syms) {
 		print_absolute_symbols();
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h
index 4c49c82446eb..dcf323f6102f 100644
--- a/arch/x86/tools/relocs.h
+++ b/arch/x86/tools/relocs.h
@@ -32,8 +32,8 @@ enum symtype {
 
 void process_32(FILE *fp, int use_real_mode, int as_text,
 		int show_absolute_syms, int show_absolute_relocs,
-		int show_reloc_info);
+		int show_reloc_info, int absolute_percpu);
 void process_64(FILE *fp, int use_real_mode, int as_text,
 		int show_absolute_syms, int show_absolute_relocs,
-		int show_reloc_info);
+		int show_reloc_info, int absolute_percpu);
 #endif /* RELOCS_H */
diff --git a/arch/x86/tools/relocs_common.c b/arch/x86/tools/relocs_common.c
index 6634352a20bc..aa5fdea1e87a 100644
--- a/arch/x86/tools/relocs_common.c
+++ b/arch/x86/tools/relocs_common.c
@@ -19,7 +19,7 @@ static void usage(void)
 int main(int argc, char **argv)
 {
 	int show_absolute_syms, show_absolute_relocs, show_reloc_info;
-	int as_text, use_real_mode;
+	int as_text, use_real_mode, absolute_percpu;
 	const char *fname;
 	FILE *fp;
 	int i;
@@ -30,6 +30,7 @@ int main(int argc, char **argv)
 	show_reloc_info = 0;
 	as_text = 0;
 	use_real_mode = 0;
+	absolute_percpu = 0;
 	fname = NULL;
 	for (i = 1; i < argc; i++) {
 		char *arg = argv[i];
@@ -54,6 +55,10 @@ int main(int argc, char **argv)
 				use_real_mode = 1;
 				continue;
 			}
+			if (strcmp(arg, "--absolute-percpu") == 0) {
+				absolute_percpu = 1;
+				continue;
+			}
 		}
 		else if (!fname) {
 			fname = arg;
@@ -75,11 +80,11 @@ int main(int argc, char **argv)
 	if (e_ident[EI_CLASS] == ELFCLASS64)
 		process_64(fp, use_real_mode, as_text,
 			   show_absolute_syms, show_absolute_relocs,
-			   show_reloc_info);
+			   show_reloc_info, absolute_percpu);
 	else
 		process_32(fp, use_real_mode, as_text,
 			   show_absolute_syms, show_absolute_relocs,
-			   show_reloc_info);
+			   show_reloc_info, absolute_percpu);
 	fclose(fp);
 	return 0;
 }
diff --git a/init/Kconfig b/init/Kconfig
index 11f8a845f259..f8da0e5b7663 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1716,7 +1716,6 @@ config KALLSYMS_ALL
 config KALLSYMS_ABSOLUTE_PERCPU
 	bool
 	depends on KALLSYMS
-	default X86_64 && SMP
 
 config KALLSYMS_BASE_RELATIVE
 	bool
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] x86_64: Use relative per-cpu offsets
  2021-11-13 12:40 [PATCH 0/3] x86-64: Stack protector and percpu improvements Brian Gerst
  2021-11-13 12:40 ` [PATCH 1/3] x86-64: Use per-cpu stack canary if supported by compiler Brian Gerst
  2021-11-13 12:40 ` [PATCH 2/3] x86/relocs: Make absolute percpu relocations conditional Brian Gerst
@ 2021-11-13 12:40 ` Brian Gerst
  2021-11-14  1:18   ` Andy Lutomirski
  2 siblings, 1 reply; 11+ messages in thread
From: Brian Gerst @ 2021-11-13 12:40 UTC (permalink / raw)
  To: linux-kernel, x86
  Cc: Andy Lutomirski, Borislav Petkov, Thomas Gleixner,
	H . Peter Anvin, Ingo Molnar, Brian Gerst

The per-cpu section is currently linked at virtual address 0, because
older compilers hardcoded the stack protector canary value at a fixed
offset from the start of the GS segment.  Use a standard relative offset
as the GS base when the stack protector is disabled, or a newer compiler
is used that supports a configurable location for the stack canary.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
 arch/x86/Kconfig               |  2 +-
 arch/x86/include/asm/percpu.h  |  4 ++--
 arch/x86/kernel/head_64.S      |  4 ----
 arch/x86/kernel/setup_percpu.c |  2 +-
 arch/x86/kernel/vmlinux.lds.S  | 14 ++++++--------
 5 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 832a6626df72..fae7724505bd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -403,7 +403,7 @@ config STACKPROTECTOR_FIXED
 	default y if !$(cc-option,-mstack-protector-guard-reg=gs)
 
 config X86_ABSOLUTE_PERCPU
-	def_bool X86_64 && SMP
+	def_bool STACKPROTECTOR_FIXED && SMP
 	select KALLSYMS_ABSOLUTE_PERCPU
 
 menu "Processor type and features"
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a3c33b79fb86..8294781bb483 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -16,7 +16,7 @@
 #define PER_CPU_VAR(var)	var
 #endif	/* SMP */
 
-#ifdef CONFIG_X86_64_SMP
+#ifdef CONFIG_X86_ABSOLUTE_PERCPU
 #define INIT_PER_CPU_VAR(var)  init_per_cpu__##var
 #else
 #define INIT_PER_CPU_VAR(var)  var
@@ -59,7 +59,7 @@
 #define DECLARE_INIT_PER_CPU(var) \
        extern typeof(var) init_per_cpu_var(var)
 
-#ifdef CONFIG_X86_64_SMP
+#ifdef CONFIG_X86_ABSOLUTE_PERCPU
 #define init_per_cpu_var(var)  init_per_cpu__##var
 #else
 #define init_per_cpu_var(var)  var
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6e396ffb1610..c1b6209a01ca 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -339,12 +339,8 @@ SYM_DATA(initial_code,	.quad x86_64_start_kernel)
 #ifdef CONFIG_STACKPROTECTOR_FIXED
 SYM_DATA(initial_gs,	.quad INIT_PER_CPU_VAR(fixed_percpu_data))
 #else
-#ifdef CONFIG_SMP
-SYM_DATA(initial_gs,	.quad __per_cpu_load)
-#else
 SYM_DATA(initial_gs,	.quad 0)
 #endif
-#endif
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 SYM_DATA(initial_vc_handler,	.quad handle_vc_boot_ghcb)
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5afd98559193..4c0020a6ced9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -26,7 +26,7 @@
 DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
 EXPORT_PER_CPU_SYMBOL(cpu_number);
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ABSOLUTE_PERCPU
 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
 #else
 #define BOOT_PERCPU_OFFSET 0
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index c475d21d2126..18e1deb9fa52 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -102,10 +102,10 @@ jiffies = jiffies_64;
 PHDRS {
 	text PT_LOAD FLAGS(5);          /* R_E */
 	data PT_LOAD FLAGS(6);          /* RW_ */
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_ABSOLUTE_PERCPU
 	percpu PT_LOAD FLAGS(6);        /* RW_ */
 #endif
+#ifdef CONFIG_X86_64
 	init PT_LOAD FLAGS(7);          /* RWE */
 #endif
 	note PT_NOTE FLAGS(0);          /* ___ */
@@ -215,7 +215,7 @@ SECTIONS
 		__init_begin = .; /* paired with __init_end */
 	}
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
+#ifdef CONFIG_X86_ABSOLUTE_PERCPU
 	/*
 	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
 	 * output PHDR, so the next output section - .init.text - should
@@ -339,7 +339,7 @@ SECTIONS
 		EXIT_DATA
 	}
 
-#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
+#ifndef CONFIG_X86_ABSOLUTE_PERCPU
 	PERCPU_SECTION(INTERNODE_CACHE_BYTES)
 #endif
 
@@ -474,7 +474,7 @@ SECTIONS
 . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
 	   "kernel image bigger than KERNEL_IMAGE_SIZE");
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ABSOLUTE_PERCPU
 /*
  * Per-cpu symbols which need to be offset from __per_cpu_load
  * for the boot processor.
@@ -483,13 +483,11 @@ SECTIONS
 INIT_PER_CPU(gdt_page);
 INIT_PER_CPU(irq_stack_backing_store);
 
-#ifdef CONFIG_STACKPROTECTOR_FIXED
 INIT_PER_CPU(fixed_percpu_data);
 . = ASSERT((fixed_percpu_data == 0),
            "fixed_percpu_data is not at start of per-cpu area");
-#endif
 
-#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_X86_ABSOLUTE_PERCPU */
 
 #ifdef CONFIG_KEXEC_CORE
 #include <asm/kexec.h>
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] x86_64: Use relative per-cpu offsets
  2021-11-13 12:40 ` [PATCH 3/3] x86_64: Use relative per-cpu offsets Brian Gerst
@ 2021-11-14  1:18   ` Andy Lutomirski
  2021-11-14  4:24     ` H. Peter Anvin
  2021-11-14  4:54     ` Brian Gerst
  0 siblings, 2 replies; 11+ messages in thread
From: Andy Lutomirski @ 2021-11-14  1:18 UTC (permalink / raw)
  To: Brian Gerst, Linux Kernel Mailing List, the arch/x86 maintainers
  Cc: Borislav Petkov, Thomas Gleixner, H. Peter Anvin, Ingo Molnar



On Sat, Nov 13, 2021, at 4:40 AM, Brian Gerst wrote:
> The per-cpu section is currently linked at virtual address 0, because
> older compilers hardcoded the stack protector canary value at a fixed
> offset from the start of the GS segment.  Use a standard relative offset
> as the GS base when the stack protector is disabled, or a newer compiler
> is used that supports a configurable location for the stack canary.

Can you explain the benefit?  Also, I think we should consider dropping support for the fixed model like we did on x86_32.

>
> Signed-off-by: Brian Gerst <brgerst@gmail.com>
> ---
>  arch/x86/Kconfig               |  2 +-
>  arch/x86/include/asm/percpu.h  |  4 ++--
>  arch/x86/kernel/head_64.S      |  4 ----
>  arch/x86/kernel/setup_percpu.c |  2 +-
>  arch/x86/kernel/vmlinux.lds.S  | 14 ++++++--------
>  5 files changed, 10 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 832a6626df72..fae7724505bd 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -403,7 +403,7 @@ config STACKPROTECTOR_FIXED
>  	default y if !$(cc-option,-mstack-protector-guard-reg=gs)
> 
>  config X86_ABSOLUTE_PERCPU
> -	def_bool X86_64 && SMP
> +	def_bool STACKPROTECTOR_FIXED && SMP
>  	select KALLSYMS_ABSOLUTE_PERCPU
> 
>  menu "Processor type and features"
> diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
> index a3c33b79fb86..8294781bb483 100644
> --- a/arch/x86/include/asm/percpu.h
> +++ b/arch/x86/include/asm/percpu.h
> @@ -16,7 +16,7 @@
>  #define PER_CPU_VAR(var)	var
>  #endif	/* SMP */
> 
> -#ifdef CONFIG_X86_64_SMP
> +#ifdef CONFIG_X86_ABSOLUTE_PERCPU
>  #define INIT_PER_CPU_VAR(var)  init_per_cpu__##var
>  #else
>  #define INIT_PER_CPU_VAR(var)  var
> @@ -59,7 +59,7 @@
>  #define DECLARE_INIT_PER_CPU(var) \
>         extern typeof(var) init_per_cpu_var(var)
> 
> -#ifdef CONFIG_X86_64_SMP
> +#ifdef CONFIG_X86_ABSOLUTE_PERCPU
>  #define init_per_cpu_var(var)  init_per_cpu__##var
>  #else
>  #define init_per_cpu_var(var)  var
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 6e396ffb1610..c1b6209a01ca 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -339,12 +339,8 @@ SYM_DATA(initial_code,	.quad x86_64_start_kernel)
>  #ifdef CONFIG_STACKPROTECTOR_FIXED
>  SYM_DATA(initial_gs,	.quad INIT_PER_CPU_VAR(fixed_percpu_data))
>  #else
> -#ifdef CONFIG_SMP
> -SYM_DATA(initial_gs,	.quad __per_cpu_load)
> -#else
>  SYM_DATA(initial_gs,	.quad 0)
>  #endif
> -#endif
> 
>  #ifdef CONFIG_AMD_MEM_ENCRYPT
>  SYM_DATA(initial_vc_handler,	.quad handle_vc_boot_ghcb)
> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
> index 5afd98559193..4c0020a6ced9 100644
> --- a/arch/x86/kernel/setup_percpu.c
> +++ b/arch/x86/kernel/setup_percpu.c
> @@ -26,7 +26,7 @@
>  DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
>  EXPORT_PER_CPU_SYMBOL(cpu_number);
> 
> -#ifdef CONFIG_X86_64
> +#ifdef CONFIG_X86_ABSOLUTE_PERCPU
>  #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
>  #else
>  #define BOOT_PERCPU_OFFSET 0
> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> index c475d21d2126..18e1deb9fa52 100644
> --- a/arch/x86/kernel/vmlinux.lds.S
> +++ b/arch/x86/kernel/vmlinux.lds.S
> @@ -102,10 +102,10 @@ jiffies = jiffies_64;
>  PHDRS {
>  	text PT_LOAD FLAGS(5);          /* R_E */
>  	data PT_LOAD FLAGS(6);          /* RW_ */
> -#ifdef CONFIG_X86_64
> -#ifdef CONFIG_SMP
> +#ifdef CONFIG_X86_ABSOLUTE_PERCPU
>  	percpu PT_LOAD FLAGS(6);        /* RW_ */
>  #endif
> +#ifdef CONFIG_X86_64
>  	init PT_LOAD FLAGS(7);          /* RWE */
>  #endif
>  	note PT_NOTE FLAGS(0);          /* ___ */
> @@ -215,7 +215,7 @@ SECTIONS
>  		__init_begin = .; /* paired with __init_end */
>  	}
> 
> -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
> +#ifdef CONFIG_X86_ABSOLUTE_PERCPU
>  	/*
>  	 * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
>  	 * output PHDR, so the next output section - .init.text - should
> @@ -339,7 +339,7 @@ SECTIONS
>  		EXIT_DATA
>  	}
> 
> -#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
> +#ifndef CONFIG_X86_ABSOLUTE_PERCPU
>  	PERCPU_SECTION(INTERNODE_CACHE_BYTES)
>  #endif
> 
> @@ -474,7 +474,7 @@ SECTIONS
>  . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
>  	   "kernel image bigger than KERNEL_IMAGE_SIZE");
> 
> -#ifdef CONFIG_X86_64
> +#ifdef CONFIG_X86_ABSOLUTE_PERCPU
>  /*
>   * Per-cpu symbols which need to be offset from __per_cpu_load
>   * for the boot processor.
> @@ -483,13 +483,11 @@ SECTIONS
>  INIT_PER_CPU(gdt_page);
>  INIT_PER_CPU(irq_stack_backing_store);
> 
> -#ifdef CONFIG_STACKPROTECTOR_FIXED
>  INIT_PER_CPU(fixed_percpu_data);
>  . = ASSERT((fixed_percpu_data == 0),
>             "fixed_percpu_data is not at start of per-cpu area");
> -#endif
> 
> -#endif /* CONFIG_X86_64 */
> +#endif /* CONFIG_X86_ABSOLUTE_PERCPU */
> 
>  #ifdef CONFIG_KEXEC_CORE
>  #include <asm/kexec.h>
> -- 
> 2.31.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] x86_64: Use relative per-cpu offsets
  2021-11-14  1:18   ` Andy Lutomirski
@ 2021-11-14  4:24     ` H. Peter Anvin
  2021-11-14  4:54     ` Brian Gerst
  1 sibling, 0 replies; 11+ messages in thread
From: H. Peter Anvin @ 2021-11-14  4:24 UTC (permalink / raw)
  To: Andy Lutomirski, Brian Gerst, Linux Kernel Mailing List,
	the arch/x86 maintainers
  Cc: Borislav Petkov, Thomas Gleixner, Ingo Molnar

On 11/13/21 17:18, Andy Lutomirski wrote:
> 
> 
> On Sat, Nov 13, 2021, at 4:40 AM, Brian Gerst wrote:
>> The per-cpu section is currently linked at virtual address 0, because
>> older compilers hardcoded the stack protector canary value at a fixed
>> offset from the start of the GS segment.  Use a standard relative offset
>> as the GS base when the stack protector is disabled, or a newer compiler
>> is used that supports a configurable location for the stack canary.
> 
> Can you explain the benefit?  Also, I think we should consider dropping support for the fixed model like we did on x86_32.
> 

It would seem that UNLESS the fixed model is dropped, the benefit is 
probably negative.

	-hpa


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] x86_64: Use relative per-cpu offsets
  2021-11-14  1:18   ` Andy Lutomirski
  2021-11-14  4:24     ` H. Peter Anvin
@ 2021-11-14  4:54     ` Brian Gerst
  2021-11-14 11:03       ` Peter Zijlstra
  1 sibling, 1 reply; 11+ messages in thread
From: Brian Gerst @ 2021-11-14  4:54 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Linux Kernel Mailing List, the arch/x86 maintainers,
	Borislav Petkov, Thomas Gleixner, H. Peter Anvin, Ingo Molnar

On Sat, Nov 13, 2021 at 8:18 PM Andy Lutomirski <luto@kernel.org> wrote:
>
>
>
> On Sat, Nov 13, 2021, at 4:40 AM, Brian Gerst wrote:
> > The per-cpu section is currently linked at virtual address 0, because
> > older compilers hardcoded the stack protector canary value at a fixed
> > offset from the start of the GS segment.  Use a standard relative offset
> > as the GS base when the stack protector is disabled, or a newer compiler
> > is used that supports a configurable location for the stack canary.
>
> Can you explain the benefit?  Also, I think we should consider dropping support for the fixed model like we did on x86_32.

This patch probably makes more sense if we drop the fixed model, as
that gets rid of alot of code that works around having to link the
percpu section differently.  I can respin this patchset to remove the
fixed model if it is agreed to require GCC 8.1 or later for stack
protector support.  The big question is if any actively supported
distributions still use an older compiler.

--
Brian Gerst

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] x86_64: Use relative per-cpu offsets
  2021-11-14  4:54     ` Brian Gerst
@ 2021-11-14 11:03       ` Peter Zijlstra
  2021-11-14 18:29         ` Brian Gerst
  0 siblings, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2021-11-14 11:03 UTC (permalink / raw)
  To: Brian Gerst
  Cc: Andy Lutomirski, Linux Kernel Mailing List,
	the arch/x86 maintainers, Borislav Petkov, Thomas Gleixner,
	H. Peter Anvin, Ingo Molnar

On Sat, Nov 13, 2021 at 11:54:19PM -0500, Brian Gerst wrote:
> On Sat, Nov 13, 2021 at 8:18 PM Andy Lutomirski <luto@kernel.org> wrote:
> >
> >
> >
> > On Sat, Nov 13, 2021, at 4:40 AM, Brian Gerst wrote:
> > > The per-cpu section is currently linked at virtual address 0, because
> > > older compilers hardcoded the stack protector canary value at a fixed
> > > offset from the start of the GS segment.  Use a standard relative offset
> > > as the GS base when the stack protector is disabled, or a newer compiler
> > > is used that supports a configurable location for the stack canary.
> >
> > Can you explain the benefit?  Also, I think we should consider dropping support for the fixed model like we did on x86_32.
> 
> This patch probably makes more sense if we drop the fixed model, as
> that gets rid of alot of code that works around having to link the
> percpu section differently.

Can someone spell out these benefits please? To me having per-cpu start
at 0 makes perfect sense, how does not having that make things better?

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] x86_64: Use relative per-cpu offsets
  2021-11-14 11:03       ` Peter Zijlstra
@ 2021-11-14 18:29         ` Brian Gerst
  2021-11-15 18:12           ` Andy Lutomirski
  2021-11-15 20:44           ` Peter Zijlstra
  0 siblings, 2 replies; 11+ messages in thread
From: Brian Gerst @ 2021-11-14 18:29 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andy Lutomirski, Linux Kernel Mailing List,
	the arch/x86 maintainers, Borislav Petkov, Thomas Gleixner,
	H. Peter Anvin, Ingo Molnar

On Sun, Nov 14, 2021 at 6:03 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Sat, Nov 13, 2021 at 11:54:19PM -0500, Brian Gerst wrote:
> > On Sat, Nov 13, 2021 at 8:18 PM Andy Lutomirski <luto@kernel.org> wrote:
> > >
> > >
> > >
> > > On Sat, Nov 13, 2021, at 4:40 AM, Brian Gerst wrote:
> > > > The per-cpu section is currently linked at virtual address 0, because
> > > > older compilers hardcoded the stack protector canary value at a fixed
> > > > offset from the start of the GS segment.  Use a standard relative offset
> > > > as the GS base when the stack protector is disabled, or a newer compiler
> > > > is used that supports a configurable location for the stack canary.
> > >
> > > Can you explain the benefit?  Also, I think we should consider dropping support for the fixed model like we did on x86_32.
> >
> > This patch probably makes more sense if we drop the fixed model, as
> > that gets rid of alot of code that works around having to link the
> > percpu section differently.
>
> Can someone spell out these benefits please? To me having per-cpu start
> at 0 makes perfect sense, how does not having that make things better?

The best reason is that the percpu section is currently not subject to
KASLR.  It actually needs extra support to counter the effects of
relocation.  There have also been a number of linker bugs over the
years that have had to be worked around.

If we were to decide to drop the fixed stack protector the diffstat
would look something like:

 arch/x86/Makefile                         |  19 ++--
 arch/x86/boot/compressed/misc.c           |  12 ---
 arch/x86/entry/entry_64.S                 |   2 +-
 arch/x86/include/asm/percpu.h             |  22 -----
 arch/x86/include/asm/processor.h          |  24 ++---
 arch/x86/include/asm/stackprotector.h     |  13 +--
 arch/x86/kernel/asm-offsets_64.c          |   6 --
 arch/x86/kernel/cpu/common.c              |   8 +-
 arch/x86/kernel/head_64.S                 |  11 ++-
 arch/x86/kernel/irq_64.c                  |   1 -
 arch/x86/kernel/vmlinux.lds.S             |  33 -------
 arch/x86/tools/relocs.c                   | 143 +-----------------------------
 arch/x86/xen/xen-head.S                   |  10 +--
 scripts/gcc-x86_64-has-stack-protector.sh |   2 +-
 14 files changed, 35 insertions(+), 271 deletions(-)

--
Brian Gerst

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] x86_64: Use relative per-cpu offsets
  2021-11-14 18:29         ` Brian Gerst
@ 2021-11-15 18:12           ` Andy Lutomirski
  2021-11-15 20:44           ` Peter Zijlstra
  1 sibling, 0 replies; 11+ messages in thread
From: Andy Lutomirski @ 2021-11-15 18:12 UTC (permalink / raw)
  To: Brian Gerst, Peter Zijlstra (Intel)
  Cc: Linux Kernel Mailing List, the arch/x86 maintainers,
	Borislav Petkov, Thomas Gleixner, H. Peter Anvin, Ingo Molnar



On Sun, Nov 14, 2021, at 10:29 AM, Brian Gerst wrote:
> On Sun, Nov 14, 2021 at 6:03 AM Peter Zijlstra <peterz@infradead.org> wrote:
>>
>> On Sat, Nov 13, 2021 at 11:54:19PM -0500, Brian Gerst wrote:
>> > On Sat, Nov 13, 2021 at 8:18 PM Andy Lutomirski <luto@kernel.org> wrote:
>> > >
>> > >
>> > >
>> > > On Sat, Nov 13, 2021, at 4:40 AM, Brian Gerst wrote:
>> > > > The per-cpu section is currently linked at virtual address 0, because
>> > > > older compilers hardcoded the stack protector canary value at a fixed
>> > > > offset from the start of the GS segment.  Use a standard relative offset
>> > > > as the GS base when the stack protector is disabled, or a newer compiler
>> > > > is used that supports a configurable location for the stack canary.
>> > >
>> > > Can you explain the benefit?  Also, I think we should consider dropping support for the fixed model like we did on x86_32.
>> >
>> > This patch probably makes more sense if we drop the fixed model, as
>> > that gets rid of alot of code that works around having to link the
>> > percpu section differently.
>>
>> Can someone spell out these benefits please? To me having per-cpu start
>> at 0 makes perfect sense, how does not having that make things better?
>
> The best reason is that the percpu section is currently not subject to
> KASLR.  It actually needs extra support to counter the effects of
> relocation.  There have also been a number of linker bugs over the
> years that have had to be worked around.
>
> If we were to decide to drop the fixed stack protector the diffstat
> would look something like:
>
>  arch/x86/Makefile                         |  19 ++--
>  arch/x86/boot/compressed/misc.c           |  12 ---
>  arch/x86/entry/entry_64.S                 |   2 +-
>  arch/x86/include/asm/percpu.h             |  22 -----
>  arch/x86/include/asm/processor.h          |  24 ++---
>  arch/x86/include/asm/stackprotector.h     |  13 +--
>  arch/x86/kernel/asm-offsets_64.c          |   6 --
>  arch/x86/kernel/cpu/common.c              |   8 +-
>  arch/x86/kernel/head_64.S                 |  11 ++-
>  arch/x86/kernel/irq_64.c                  |   1 -
>  arch/x86/kernel/vmlinux.lds.S             |  33 -------
>  arch/x86/tools/relocs.c                   | 143 +-----------------------------
>  arch/x86/xen/xen-head.S                   |  10 +--
>  scripts/gcc-x86_64-has-stack-protector.sh |   2 +-
>  14 files changed, 35 insertions(+), 271 deletions(-)
>

Ack.  We did this for 32-bit and got few complaints. Let’s finish the job.

> --
> Brian Gerst

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] x86_64: Use relative per-cpu offsets
  2021-11-14 18:29         ` Brian Gerst
  2021-11-15 18:12           ` Andy Lutomirski
@ 2021-11-15 20:44           ` Peter Zijlstra
  1 sibling, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2021-11-15 20:44 UTC (permalink / raw)
  To: Brian Gerst
  Cc: Andy Lutomirski, Linux Kernel Mailing List,
	the arch/x86 maintainers, Borislav Petkov, Thomas Gleixner,
	H. Peter Anvin, Ingo Molnar

On Sun, Nov 14, 2021 at 01:29:46PM -0500, Brian Gerst wrote:
> On Sun, Nov 14, 2021 at 6:03 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Sat, Nov 13, 2021 at 11:54:19PM -0500, Brian Gerst wrote:
> > > On Sat, Nov 13, 2021 at 8:18 PM Andy Lutomirski <luto@kernel.org> wrote:
> > > >
> > > >
> > > >
> > > > On Sat, Nov 13, 2021, at 4:40 AM, Brian Gerst wrote:
> > > > > The per-cpu section is currently linked at virtual address 0, because
> > > > > older compilers hardcoded the stack protector canary value at a fixed
> > > > > offset from the start of the GS segment.  Use a standard relative offset
> > > > > as the GS base when the stack protector is disabled, or a newer compiler
> > > > > is used that supports a configurable location for the stack canary.
> > > >
> > > > Can you explain the benefit?  Also, I think we should consider dropping support for the fixed model like we did on x86_32.
> > >
> > > This patch probably makes more sense if we drop the fixed model, as
> > > that gets rid of alot of code that works around having to link the
> > > percpu section differently.
> >
> > Can someone spell out these benefits please? To me having per-cpu start
> > at 0 makes perfect sense, how does not having that make things better?
> 
> The best reason is that the percpu section is currently not subject to
> KASLR.  It actually needs extra support to counter the effects of
> relocation.  There have also been a number of linker bugs over the
> years that have had to be worked around.

I'm confused.. having the variables 0-offset is related to KASLR how?
The dynamic placement of per-cpu thunks and their base address in %GS
gives plenty opportunity to move them around at boot time, no?

> If we were to decide to drop the fixed stack protector the diffstat
> would look something like:

Dropping the fixed stack protecter seems fine to me; I just don't see
why we should move away from 0-offset.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-11-16  0:08 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-13 12:40 [PATCH 0/3] x86-64: Stack protector and percpu improvements Brian Gerst
2021-11-13 12:40 ` [PATCH 1/3] x86-64: Use per-cpu stack canary if supported by compiler Brian Gerst
2021-11-13 12:40 ` [PATCH 2/3] x86/relocs: Make absolute percpu relocations conditional Brian Gerst
2021-11-13 12:40 ` [PATCH 3/3] x86_64: Use relative per-cpu offsets Brian Gerst
2021-11-14  1:18   ` Andy Lutomirski
2021-11-14  4:24     ` H. Peter Anvin
2021-11-14  4:54     ` Brian Gerst
2021-11-14 11:03       ` Peter Zijlstra
2021-11-14 18:29         ` Brian Gerst
2021-11-15 18:12           ` Andy Lutomirski
2021-11-15 20:44           ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).