linux-hardening.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup
@ 2022-01-25  9:14 Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 1/8] ARM: mm: switch to swapper_pg_dir early for vmap'ed stack Ard Biesheuvel
                   ` (7 more replies)
  0 siblings, 8 replies; 10+ messages in thread
From: Ard Biesheuvel @ 2022-01-25  9:14 UTC (permalink / raw)
  To: linux, linux-arm-kernel
  Cc: linux-hardening, Ard Biesheuvel, Arnd Bergmann, Kees Cook,
	Keith Packard, Linus Walleij, Nick Desaulniers, Marc Zyngier

This v6 series is a followup to [0], and presents the proposed changes
as a set of delta patches against the v4 that was already taken into
rmk/devel-stable, and dropped from v5.17 when it turned out that were
unresolved issues related to suspend/resume.

These patches apply onto rmk/devel-stable directly, which means there is
an unresolved conflict with the changes that landed in v5.17-rc1 via the
ARM tree: these will need to be handled by the maintainer.

Code can be found under the arm-vmap-stacks-v6 tag at
git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git

[0] https://lore.kernel.org/linux-arm-kernel/20220124174744.1054712-1-ardb@kernel.org/

Cc: Russell King <linux@armlinux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Keith Packard <keithpac@amazon.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Marc Zyngier <maz@kernel.org>

Ard Biesheuvel (8):
  ARM: mm: switch to swapper_pg_dir early for vmap'ed stack
  ARM: assembler: define a Kconfig symbol for group relocation support
  ARM: smp: elide HWCAP_TLS checks or __entry_task updates on SMP+v6
  ARM: entry: avoid clobbering R9 in IRQ handler
  ARM: mm: make vmalloc_seq handling SMP safe
  ARM: iop: make iop_handle_irq() static
  ARM: drop pointless SMP check on secondary startup path
  ARM: make get_current() and __my_cpu_offset() __always_inline

 arch/arm/Kconfig                   | 13 +++++++++-
 arch/arm/include/asm/assembler.h   |  8 +++----
 arch/arm/include/asm/current.h     | 10 ++++----
 arch/arm/include/asm/mmu.h         |  2 +-
 arch/arm/include/asm/mmu_context.h | 22 +++++++++++++++--
 arch/arm/include/asm/page.h        |  3 +--
 arch/arm/include/asm/percpu.h      |  6 ++---
 arch/arm/include/asm/switch_to.h   |  4 ++--
 arch/arm/include/asm/tls.h         | 22 ++++++++++++-----
 arch/arm/kernel/entry-armv.S       |  9 ++++---
 arch/arm/kernel/entry-header.S     | 17 ++++++-------
 arch/arm/kernel/head.S             |  7 ++++++
 arch/arm/kernel/module.c           |  7 +++++-
 arch/arm/kernel/sleep.S            |  7 ++++++
 arch/arm/kernel/smp.c              |  5 ----
 arch/arm/kernel/traps.c            | 25 ++++++--------------
 arch/arm/mach-iop32x/irq.c         |  2 +-
 arch/arm/mm/context.c              |  3 +--
 arch/arm/mm/ioremap.c              | 18 ++++++++------
 19 files changed, 115 insertions(+), 75 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v6 1/8] ARM: mm: switch to swapper_pg_dir early for vmap'ed stack
  2022-01-25  9:14 [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup Ard Biesheuvel
@ 2022-01-25  9:14 ` Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 2/8] ARM: assembler: define a Kconfig symbol for group relocation support Ard Biesheuvel
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Ard Biesheuvel @ 2022-01-25  9:14 UTC (permalink / raw)
  To: linux, linux-arm-kernel
  Cc: linux-hardening, Ard Biesheuvel, Arnd Bergmann, Kees Cook,
	Keith Packard, Linus Walleij, Nick Desaulniers, Marc Zyngier

When onlining a CPU, switch to swapper_pg_dir as soon as possible so
that it is guaranteed that the vmap'ed stack is mapped before it is
used.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/Kconfig        | 2 +-
 arch/arm/kernel/head.S  | 7 +++++++
 arch/arm/kernel/sleep.S | 7 +++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c32b79453ddf..359a3b85c8b3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -128,7 +128,7 @@ config ARM
 	select RTC_LIB
 	select SYS_SUPPORTS_APM_EMULATION
 	select THREAD_INFO_IN_TASK
-	select HAVE_ARCH_VMAP_STACK if MMU && (!LD_IS_LLD || LLD_VERSION >= 140000) && !PM_SLEEP_SMP
+	select HAVE_ARCH_VMAP_STACK if MMU && (!LD_IS_LLD || LLD_VERSION >= 140000)
 	select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
 	# Above selects are sorted alphabetically; please add new ones
 	# according to that.  Thanks.
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index c04dd94630c7..500612d3da2e 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -424,6 +424,13 @@ ENDPROC(secondary_startup)
 ENDPROC(secondary_startup_arm)
 
 ENTRY(__secondary_switched)
+#if defined(CONFIG_VMAP_STACK) && !defined(CONFIG_ARM_LPAE)
+	@ Before using the vmap'ed stack, we have to switch to swapper_pg_dir
+	@ as the ID map does not cover the vmalloc region.
+	mrc	p15, 0, ip, c2, c0, 1	@ read TTBR1
+	mcr	p15, 0, ip, c2, c0, 0	@ set TTBR0
+	instr_sync
+#endif
 	adr_l	r7, secondary_data + 12		@ get secondary_data.stack
 	ldr	sp, [r7]
 	ldr	r0, [r7, #4]			@ get secondary_data.task
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index f909baf17912..a86a1d4f3461 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -119,6 +119,13 @@ ENTRY(cpu_resume_mmu)
 ENDPROC(cpu_resume_mmu)
 	.popsection
 cpu_resume_after_mmu:
+#if defined(CONFIG_VMAP_STACK) && !defined(CONFIG_ARM_LPAE)
+	@ Before using the vmap'ed stack, we have to switch to swapper_pg_dir
+	@ as the ID map does not cover the vmalloc region.
+	mrc	p15, 0, ip, c2, c0, 1	@ read TTBR1
+	mcr	p15, 0, ip, c2, c0, 0	@ set TTBR0
+	instr_sync
+#endif
 	bl	cpu_init		@ restore the und/abt/irq banked regs
 	mov	r0, #0			@ return zero on success
 	ldmfd	sp!, {r4 - r11, pc}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v6 2/8] ARM: assembler: define a Kconfig symbol for group relocation support
  2022-01-25  9:14 [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 1/8] ARM: mm: switch to swapper_pg_dir early for vmap'ed stack Ard Biesheuvel
@ 2022-01-25  9:14 ` Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 3/8] ARM: smp: elide HWCAP_TLS checks or __entry_task updates on SMP+v6 Ard Biesheuvel
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Ard Biesheuvel @ 2022-01-25  9:14 UTC (permalink / raw)
  To: linux, linux-arm-kernel
  Cc: linux-hardening, Ard Biesheuvel, Arnd Bergmann, Kees Cook,
	Keith Packard, Linus Walleij, Nick Desaulniers, Marc Zyngier

Nathan reports the group relocations go out of range in pathological
cases such as allyesconfig kernels, which have little chance of actually
booting but are still used in validation.

So add a Kconfig symbol for this feature, and make it depend on
!COMPILE_TEST.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/Kconfig                 | 13 ++++++++++++-
 arch/arm/include/asm/assembler.h |  8 ++++----
 arch/arm/include/asm/current.h   |  8 ++++----
 arch/arm/include/asm/percpu.h    |  4 ++--
 arch/arm/kernel/module.c         |  7 ++++++-
 5 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 359a3b85c8b3..70ab8d807032 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -128,7 +128,7 @@ config ARM
 	select RTC_LIB
 	select SYS_SUPPORTS_APM_EMULATION
 	select THREAD_INFO_IN_TASK
-	select HAVE_ARCH_VMAP_STACK if MMU && (!LD_IS_LLD || LLD_VERSION >= 140000)
+	select HAVE_ARCH_VMAP_STACK if MMU && ARM_HAS_GROUP_RELOCS
 	select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
 	# Above selects are sorted alphabetically; please add new ones
 	# according to that.  Thanks.
@@ -140,6 +140,17 @@ config ARM
 	  Europe.  There is an ARM Linux project with a web page at
 	  <http://www.arm.linux.org.uk/>.
 
+config ARM_HAS_GROUP_RELOCS
+	def_bool y
+	depends on !LD_IS_LLD || LLD_VERSION >= 140000
+	depends on !COMPILE_TEST
+	help
+	  Whether or not to use R_ARM_ALU_PC_Gn or R_ARM_LDR_PC_Gn group
+	  relocations, which have been around for a long time, but were not
+	  supported in LLD until version 14. The combined range is -/+ 256 MiB,
+	  which is usually sufficient, but not for allyesconfig, so we disable
+	  this feature when doing compile testing.
+
 config ARM_HAS_SG_CHAIN
 	bool
 
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 59d7b9e81934..9998718a49ca 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -656,8 +656,8 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 
 	.macro		__ldst_va, op, reg, tmp, sym, cond
 #if __LINUX_ARM_ARCH__ >= 7 || \
-    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
-    (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
+    !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
+    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
 	mov_l		\tmp, \sym, \cond
 	\op\cond	\reg, [\tmp]
 #else
@@ -716,8 +716,8 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	 */
 	.macro		ldr_this_cpu, rd:req, sym:req, t1:req, t2:req
 #if __LINUX_ARM_ARCH__ >= 7 || \
-    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
-    (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
+    !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
+    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
 	this_cpu_offset	\t1
 	mov_l		\t2, \sym
 	ldr		\rd, [\t1, \t2]
diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
index 2f9d79214b25..131a89bbec6b 100644
--- a/arch/arm/include/asm/current.h
+++ b/arch/arm/include/asm/current.h
@@ -37,8 +37,8 @@ static inline __attribute_const__ struct task_struct *get_current(void)
 #ifdef CONFIG_CPU_V6
 	    "1:							\n\t"
 	    "	.subsection 1					\n\t"
-#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
-    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
+#if defined(CONFIG_ARM_HAS_GROUP_RELOCS) && \
+    !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
 	    "2: " LOAD_SYM_ARMV6(%0, __current) "		\n\t"
 	    "	b	1b					\n\t"
 #else
@@ -55,8 +55,8 @@ static inline __attribute_const__ struct task_struct *get_current(void)
 #endif
 	    : "=r"(cur));
 #elif __LINUX_ARM_ARCH__>= 7 || \
-      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
-      (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
+      !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \
+      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
 	cur = __current;
 #else
 	asm(LOAD_SYM_ARMV6(%0, __current) : "=r"(cur));
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
index 28961d60877d..a09034ae45a1 100644
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -38,8 +38,8 @@ static inline unsigned long __my_cpu_offset(void)
 #ifdef CONFIG_CPU_V6
 	    "1:							\n\t"
 	    "	.subsection 1					\n\t"
-#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
-    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
+#if defined(CONFIG_ARM_HAS_GROUP_RELOCS) && \
+    !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
 	    "2: " LOAD_SYM_ARMV6(%0, __per_cpu_offset) "	\n\t"
 	    "	b	1b					\n\t"
 #else
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 4d33a7acf617..549abcedf795 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -68,6 +68,7 @@ bool module_exit_section(const char *name)
 		strstarts(name, ".ARM.exidx.exit");
 }
 
+#ifdef CONFIG_ARM_HAS_GROUP_RELOCS
 /*
  * This implements the partitioning algorithm for group relocations as
  * documented in the ARM AArch32 ELF psABI (IHI 0044).
@@ -103,6 +104,7 @@ static u32 get_group_rem(u32 group, u32 *offset)
 	} while (group--);
 	return shift;
 }
+#endif
 
 int
 apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
@@ -118,7 +120,9 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 		unsigned long loc;
 		Elf32_Sym *sym;
 		const char *symname;
+#ifdef CONFIG_ARM_HAS_GROUP_RELOCS
 		u32 shift, group = 1;
+#endif
 		s32 offset;
 		u32 tmp;
 #ifdef CONFIG_THUMB2_KERNEL
@@ -249,6 +253,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			*(u32 *)loc = __opcode_to_mem_arm(tmp);
 			break;
 
+#ifdef CONFIG_ARM_HAS_GROUP_RELOCS
 		case R_ARM_ALU_PC_G0_NC:
 			group = 0;
 			fallthrough;
@@ -296,7 +301,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			}
 			*(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset);
 			break;
-
+#endif
 #ifdef CONFIG_THUMB2_KERNEL
 		case R_ARM_THM_CALL:
 		case R_ARM_THM_JUMP24:
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v6 3/8] ARM: smp: elide HWCAP_TLS checks or __entry_task updates on SMP+v6
  2022-01-25  9:14 [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 1/8] ARM: mm: switch to swapper_pg_dir early for vmap'ed stack Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 2/8] ARM: assembler: define a Kconfig symbol for group relocation support Ard Biesheuvel
@ 2022-01-25  9:14 ` Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 4/8] ARM: entry: avoid clobbering R9 in IRQ handler Ard Biesheuvel
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Ard Biesheuvel @ 2022-01-25  9:14 UTC (permalink / raw)
  To: linux, linux-arm-kernel
  Cc: linux-hardening, Ard Biesheuvel, Arnd Bergmann, Kees Cook,
	Keith Packard, Linus Walleij, Nick Desaulniers, Marc Zyngier

Use the SMP_ON_UP patching framework to elide HWCAP_TLS tests from the
context switch and return to userspace code paths, as SMP systems are
guaranteed to have this h/w capability.

At the same time, omit the update of __entry_task if the system is
detected to be UP at runtime, as in that case, the value is never used.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/switch_to.h |  4 ++--
 arch/arm/include/asm/tls.h       | 22 ++++++++++++++------
 arch/arm/kernel/entry-header.S   | 17 +++++++--------
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
index a482c99934ff..f67ae946a3c6 100644
--- a/arch/arm/include/asm/switch_to.h
+++ b/arch/arm/include/asm/switch_to.h
@@ -3,6 +3,7 @@
 #define __ASM_ARM_SWITCH_TO_H
 
 #include <linux/thread_info.h>
+#include <asm/smp_plat.h>
 
 /*
  * For v7 SMP cores running a preemptible kernel we may be pre-empted
@@ -40,8 +41,7 @@ static inline void set_ti_cpu(struct task_struct *p)
 do {									\
 	__complete_pending_tlbi();					\
 	set_ti_cpu(next);						\
-	if (IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO) ||		\
-	    IS_ENABLED(CONFIG_SMP))					\
+	if (IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || is_smp())	\
 		__this_cpu_write(__entry_task, next);			\
 	last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));	\
 } while (0)
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index d712c170c095..3dcd0f71a0da 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -18,22 +18,32 @@
 	.endm
 
 	.macro switch_tls_v6, base, tp, tpuser, tmp1, tmp2
+#ifdef CONFIG_SMP
+ALT_SMP(nop)
+ALT_UP_B(.L0_\@)
+	.subsection 1
+#endif
+.L0_\@:
 	ldr_va	\tmp1, elf_hwcap
 	mov	\tmp2, #0xffff0fff
 	tst	\tmp1, #HWCAP_TLS		@ hardware TLS available?
 	streq	\tp, [\tmp2, #-15]		@ set TLS value at 0xffff0ff0
-	mrcne	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
-#ifndef CONFIG_SMP
-	mcrne	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+	beq	.L2_\@
+	mcr	p15, 0, \tp, c13, c0, 3		@ yes, set TLS register
+#ifdef CONFIG_SMP
+	b	.L1_\@
+	.previous
 #endif
-	mcrne	p15, 0, \tpuser, c13, c0, 2	@ set user r/w register
-	strne	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
+.L1_\@: switch_tls_v6k \base, \tp, \tpuser, \tmp1, \tmp2
+.L2_\@:
 	.endm
 
 	.macro switch_tls_software, base, tp, tpuser, tmp1, tmp2
 	mov	\tmp1, #0xffff0fff
 	str	\tp, [\tmp1, #-15]		@ set TLS value at 0xffff0ff0
 	.endm
+#else
+#include <asm/smp_plat.h>
 #endif
 
 #ifdef CONFIG_TLS_REG_EMUL
@@ -44,7 +54,7 @@
 #elif defined(CONFIG_CPU_V6)
 #define tls_emu		0
 #define has_tls_reg		(elf_hwcap & HWCAP_TLS)
-#define defer_tls_reg_update	IS_ENABLED(CONFIG_SMP)
+#define defer_tls_reg_update	is_smp()
 #define switch_tls	switch_tls_v6
 #elif defined(CONFIG_CPU_32v6K)
 #define tls_emu		0
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index cb82ff5adec1..9a1dc142f782 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -292,21 +292,18 @@
 
 
 	.macro	restore_user_regs, fast = 0, offset = 0
-#if defined(CONFIG_CPU_32v6K) || defined(CONFIG_SMP)
-#if defined(CONFIG_CPU_V6) && defined(CONFIG_SMP)
-ALT_SMP(b	.L1_\@	)
-ALT_UP( nop		)
-	ldr_va	r1, elf_hwcap
-	tst	r1, #HWCAP_TLS			@ hardware TLS available?
-	beq	.L2_\@
-.L1_\@:
+#if defined(CONFIG_CPU_32v6K) && \
+    (!defined(CONFIG_CPU_V6) || defined(CONFIG_SMP))
+#ifdef CONFIG_CPU_V6
+ALT_SMP(nop)
+ALT_UP_B(.L1_\@)
 #endif
 	@ The TLS register update is deferred until return to user space so we
 	@ can use it for other things while running in the kernel
-	get_thread_info r1
+	mrc	p15, 0, r1, c13, c0, 3		@ get current_thread_info pointer
 	ldr	r1, [r1, #TI_TP_VALUE]
 	mcr	p15, 0, r1, c13, c0, 3		@ set TLS register
-.L2_\@:
+.L1_\@:
 #endif
 
 	uaccess_enable r1, isb=0
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v6 4/8] ARM: entry: avoid clobbering R9 in IRQ handler
  2022-01-25  9:14 [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup Ard Biesheuvel
                   ` (2 preceding siblings ...)
  2022-01-25  9:14 ` [PATCH v6 3/8] ARM: smp: elide HWCAP_TLS checks or __entry_task updates on SMP+v6 Ard Biesheuvel
@ 2022-01-25  9:14 ` Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 5/8] ARM: mm: make vmalloc_seq handling SMP safe Ard Biesheuvel
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Ard Biesheuvel @ 2022-01-25  9:14 UTC (permalink / raw)
  To: linux, linux-arm-kernel
  Cc: linux-hardening, Ard Biesheuvel, Arnd Bergmann, Kees Cook,
	Keith Packard, Linus Walleij, Nick Desaulniers, Marc Zyngier

Avoid using R9 in the IRQ handler code, as the entry code uses it for
tsk, and expects it to remain untouched between the IRQ entry and exit
code.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/entry-armv.S | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index b58bda51e4b8..038aabb6578f 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -38,11 +38,10 @@
 #ifdef CONFIG_UNWINDER_ARM
 	mov	fpreg, sp		@ Preserve original SP
 #else
-	mov	r8, fp			@ Preserve original FP
-	mov	r9, sp			@ Preserve original SP
+	mov	r7, fp			@ Preserve original FP
+	mov	r8, sp			@ Preserve original SP
 #endif
 	ldr_this_cpu sp, irq_stack_ptr, r2, r3
-
 	.if	\from_user == 0
 UNWIND(	.setfp	fpreg, sp		)
 	@
@@ -82,8 +81,8 @@ UNWIND(	.setfp	fpreg, sp		)
 #ifdef CONFIG_UNWINDER_ARM
 	mov	sp, fpreg		@ Restore original SP
 #else
-	mov	fp, r8			@ Restore original FP
-	mov	sp, r9			@ Restore original SP
+	mov	fp, r7			@ Restore original FP
+	mov	sp, r8			@ Restore original SP
 #endif // CONFIG_UNWINDER_ARM
 #endif // CONFIG_IRQSTACKS
 	.endm
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v6 5/8] ARM: mm: make vmalloc_seq handling SMP safe
  2022-01-25  9:14 [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup Ard Biesheuvel
                   ` (3 preceding siblings ...)
  2022-01-25  9:14 ` [PATCH v6 4/8] ARM: entry: avoid clobbering R9 in IRQ handler Ard Biesheuvel
@ 2022-01-25  9:14 ` Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 6/8] ARM: iop: make iop_handle_irq() static Ard Biesheuvel
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Ard Biesheuvel @ 2022-01-25  9:14 UTC (permalink / raw)
  To: linux, linux-arm-kernel
  Cc: linux-hardening, Ard Biesheuvel, Arnd Bergmann, Kees Cook,
	Keith Packard, Linus Walleij, Nick Desaulniers, Marc Zyngier

Rework the vmalloc_seq handling so it can be used safely under SMP, as
we started using it to ensure that vmap'ed stacks are guaranteed to be
mapped by the active mm before switching to a task, and here we need to
ensure that changes to the page tables are visible to other CPUs when
they observe a change in the sequence count.

Since LPAE needs none of this, fold a check against it into the
vmalloc_seq counter check after breaking it out into a separate static
inline helper.

Given that vmap'ed stacks are now also supported on !SMP configurations,
let's drop the WARN() that could potentially now fire spuriously.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/mmu.h         |  2 +-
 arch/arm/include/asm/mmu_context.h | 22 +++++++++++++++--
 arch/arm/include/asm/page.h        |  3 +--
 arch/arm/kernel/traps.c            | 25 ++++++--------------
 arch/arm/mm/context.c              |  3 +--
 arch/arm/mm/ioremap.c              | 18 ++++++++------
 6 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 1592a4264488..e049723840d3 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -10,7 +10,7 @@ typedef struct {
 #else
 	int		switch_pending;
 #endif
-	unsigned int	vmalloc_seq;
+	atomic_t	vmalloc_seq;
 	unsigned long	sigpage;
 #ifdef CONFIG_VDSO
 	unsigned long	vdso;
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 84e58956fcab..db2cb06aa8cf 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -23,6 +23,16 @@
 
 void __check_vmalloc_seq(struct mm_struct *mm);
 
+#ifdef CONFIG_MMU
+static inline void check_vmalloc_seq(struct mm_struct *mm)
+{
+	if (!IS_ENABLED(CONFIG_ARM_LPAE) &&
+	    unlikely(atomic_read(&mm->context.vmalloc_seq) !=
+		     atomic_read(&init_mm.context.vmalloc_seq)))
+		__check_vmalloc_seq(mm);
+}
+#endif
+
 #ifdef CONFIG_CPU_HAS_ASID
 
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
@@ -52,8 +62,7 @@ static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
 static inline void check_and_switch_context(struct mm_struct *mm,
 					    struct task_struct *tsk)
 {
-	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
-		__check_vmalloc_seq(mm);
+	check_vmalloc_seq(mm);
 
 	if (irqs_disabled())
 		/*
@@ -129,6 +138,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 #endif
 }
 
+#ifdef CONFIG_VMAP_STACK
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+	if (mm != &init_mm)
+		check_vmalloc_seq(mm);
+}
+#define enter_lazy_tlb enter_lazy_tlb
+#endif
+
 #include <asm-generic/mmu_context.h>
 
 #endif
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 7b871ed99ccf..5fcc8a600e36 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -147,11 +147,10 @@ extern void copy_page(void *to, const void *from);
 #include <asm/pgtable-3level-types.h>
 #else
 #include <asm/pgtable-2level-types.h>
-#endif
-
 #ifdef CONFIG_VMAP_STACK
 #define ARCH_PAGE_TABLE_SYNC_MASK	PGTBL_PMD_MODIFIED
 #endif
+#endif
 
 #endif /* CONFIG_MMU */
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3f38357efc46..08612032aefe 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -885,6 +885,7 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
 	die("kernel stack overflow", regs, 0);
 }
 
+#ifndef CONFIG_ARM_LPAE
 /*
  * Normally, we rely on the logic in do_translation_fault() to update stale PMD
  * entries covering the vmalloc space in a task's page tables when it first
@@ -895,26 +896,14 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
  * So we need to ensure that these PMD entries are up to date *before* the MM
  * switch. As we already have some logic in the MM switch path that takes care
  * of this, let's trigger it by bumping the counter every time the core vmalloc
- * code modifies a PMD entry in the vmalloc region.
+ * code modifies a PMD entry in the vmalloc region. Use release semantics on
+ * the store so that other CPUs observing the counter's new value are
+ * guaranteed to see the updated page table entries as well.
  */
 void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
 {
-	if (start > VMALLOC_END || end < VMALLOC_START)
-		return;
-
-	/*
-	 * This hooks into the core vmalloc code to receive notifications of
-	 * any PMD level changes that have been made to the kernel page tables.
-	 * This means it should only be triggered once for every MiB worth of
-	 * vmalloc space, given that we don't support huge vmalloc/vmap on ARM,
-	 * and that kernel PMD level table entries are rarely (if ever)
-	 * updated.
-	 *
-	 * This means that the counter is going to max out at ~250 for the
-	 * typical case. If it overflows, something entirely unexpected has
-	 * occurred so let's throw a warning if that happens.
-	 */
-	WARN_ON(++init_mm.context.vmalloc_seq == UINT_MAX);
+	if (start < VMALLOC_END && end > VMALLOC_START)
+		atomic_inc_return_release(&init_mm.context.vmalloc_seq);
 }
-
+#endif
 #endif
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 48091870db89..4204ffa2d104 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -240,8 +240,7 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	unsigned int cpu = smp_processor_id();
 	u64 asid;
 
-	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
-		__check_vmalloc_seq(mm);
+	check_vmalloc_seq(mm);
 
 	/*
 	 * We cannot update the pgd and the ASID atomicly with classic
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 6e830b9418c9..8963c8c63471 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -117,16 +117,21 @@ EXPORT_SYMBOL(ioremap_page);
 
 void __check_vmalloc_seq(struct mm_struct *mm)
 {
-	unsigned int seq;
+	int seq;
 
 	do {
-		seq = init_mm.context.vmalloc_seq;
+		seq = atomic_read(&init_mm.context.vmalloc_seq);
 		memcpy(pgd_offset(mm, VMALLOC_START),
 		       pgd_offset_k(VMALLOC_START),
 		       sizeof(pgd_t) * (pgd_index(VMALLOC_END) -
 					pgd_index(VMALLOC_START)));
-		mm->context.vmalloc_seq = seq;
-	} while (seq != init_mm.context.vmalloc_seq);
+		/*
+		 * Use a store-release so that other CPUs that observe the
+		 * counter's new value are guaranteed to see the results of the
+		 * memcpy as well.
+		 */
+		atomic_set_release(&mm->context.vmalloc_seq, seq);
+	} while (seq != atomic_read(&init_mm.context.vmalloc_seq));
 }
 
 #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
@@ -157,7 +162,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 			 * Note: this is still racy on SMP machines.
 			 */
 			pmd_clear(pmdp);
-			init_mm.context.vmalloc_seq++;
+			atomic_inc_return_release(&init_mm.context.vmalloc_seq);
 
 			/*
 			 * Free the page table, if there was one.
@@ -174,8 +179,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 	 * Ensure that the active_mm is up to date - we want to
 	 * catch any use-after-iounmap cases.
 	 */
-	if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)
-		__check_vmalloc_seq(current->active_mm);
+	check_vmalloc_seq(current->active_mm);
 
 	flush_tlb_kernel_range(virt, end);
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v6 6/8] ARM: iop: make iop_handle_irq() static
  2022-01-25  9:14 [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup Ard Biesheuvel
                   ` (4 preceding siblings ...)
  2022-01-25  9:14 ` [PATCH v6 5/8] ARM: mm: make vmalloc_seq handling SMP safe Ard Biesheuvel
@ 2022-01-25  9:14 ` Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 7/8] ARM: drop pointless SMP check on secondary startup path Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 8/8] ARM: make get_current() and __my_cpu_offset() __always_inline Ard Biesheuvel
  7 siblings, 0 replies; 10+ messages in thread
From: Ard Biesheuvel @ 2022-01-25  9:14 UTC (permalink / raw)
  To: linux, linux-arm-kernel
  Cc: linux-hardening, Ard Biesheuvel, Arnd Bergmann, Kees Cook,
	Keith Packard, Linus Walleij, Nick Desaulniers, Marc Zyngier

The build bots complain about iop_handle_irq() not being declared so
let's make it static instead.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/mach-iop32x/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-iop32x/irq.c b/arch/arm/mach-iop32x/irq.c
index b820839eaae8..6dca7e97d81f 100644
--- a/arch/arm/mach-iop32x/irq.c
+++ b/arch/arm/mach-iop32x/irq.c
@@ -59,7 +59,7 @@ struct irq_chip ext_chip = {
 	.irq_unmask	= iop32x_irq_unmask,
 };
 
-void iop_handle_irq(struct pt_regs *regs)
+static void iop_handle_irq(struct pt_regs *regs)
 {
 	u32 mask;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v6 7/8] ARM: drop pointless SMP check on secondary startup path
  2022-01-25  9:14 [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup Ard Biesheuvel
                   ` (5 preceding siblings ...)
  2022-01-25  9:14 ` [PATCH v6 6/8] ARM: iop: make iop_handle_irq() static Ard Biesheuvel
@ 2022-01-25  9:14 ` Ard Biesheuvel
  2022-01-25  9:14 ` [PATCH v6 8/8] ARM: make get_current() and __my_cpu_offset() __always_inline Ard Biesheuvel
  7 siblings, 0 replies; 10+ messages in thread
From: Ard Biesheuvel @ 2022-01-25  9:14 UTC (permalink / raw)
  To: linux, linux-arm-kernel
  Cc: linux-hardening, Ard Biesheuvel, Arnd Bergmann, Kees Cook,
	Keith Packard, Linus Walleij, Nick Desaulniers, Marc Zyngier

Only SMP systems use the secondary startup path by definition, so there
is no need for SMP conditionals there.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/smp.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 951559e5bea3..e34efa96cea1 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -405,11 +405,6 @@ static void smp_store_cpu_info(unsigned int cpuid)
 
 static void set_current(struct task_struct *cur)
 {
-	if (!IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO) && !is_smp()) {
-		__current = cur;
-		return;
-	}
-
 	/* Set TPIDRURO */
 	asm("mcr p15, 0, %0, c13, c0, 3" :: "r"(cur) : "memory");
 }
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v6 8/8] ARM: make get_current() and __my_cpu_offset() __always_inline
  2022-01-25  9:14 [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup Ard Biesheuvel
                   ` (6 preceding siblings ...)
  2022-01-25  9:14 ` [PATCH v6 7/8] ARM: drop pointless SMP check on secondary startup path Ard Biesheuvel
@ 2022-01-25  9:14 ` Ard Biesheuvel
  2022-01-25 20:48   ` Nick Desaulniers
  7 siblings, 1 reply; 10+ messages in thread
From: Ard Biesheuvel @ 2022-01-25  9:14 UTC (permalink / raw)
  To: linux, linux-arm-kernel
  Cc: linux-hardening, Ard Biesheuvel, Arnd Bergmann, Kees Cook,
	Keith Packard, Linus Walleij, Nick Desaulniers, Marc Zyngier

The get_current() and __my_cpu_offset() accessors evaluate to only a
single instruction emitted inline, but due to the size of the asm string
that is created for SMP+v6 configurations, the compiler assumes
otherwise, and may emit the functions out of line instead.

So use __always_inline to avoid this.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/current.h | 2 +-
 arch/arm/include/asm/percpu.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
index 131a89bbec6b..1e1178bf176d 100644
--- a/arch/arm/include/asm/current.h
+++ b/arch/arm/include/asm/current.h
@@ -14,7 +14,7 @@ struct task_struct;
 
 extern struct task_struct *__current;
 
-static inline __attribute_const__ struct task_struct *get_current(void)
+static __always_inline __attribute_const__ struct task_struct *get_current(void)
 {
 	struct task_struct *cur;
 
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
index a09034ae45a1..7545c87c251f 100644
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -25,7 +25,7 @@ static inline void set_my_cpu_offset(unsigned long off)
 	asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
 }
 
-static inline unsigned long __my_cpu_offset(void)
+static __always_inline unsigned long __my_cpu_offset(void)
 {
 	unsigned long off;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v6 8/8] ARM: make get_current() and __my_cpu_offset() __always_inline
  2022-01-25  9:14 ` [PATCH v6 8/8] ARM: make get_current() and __my_cpu_offset() __always_inline Ard Biesheuvel
@ 2022-01-25 20:48   ` Nick Desaulniers
  0 siblings, 0 replies; 10+ messages in thread
From: Nick Desaulniers @ 2022-01-25 20:48 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: linux, linux-arm-kernel, linux-hardening, Arnd Bergmann,
	Kees Cook, Keith Packard, Linus Walleij, Marc Zyngier

On Tue, Jan 25, 2022 at 1:15 AM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> The get_current() and __my_cpu_offset() accessors evaluate to only a
> single instruction emitted inline, but due to the size of the asm string
> that is created for SMP+v6 configurations, the compiler assumes
> otherwise, and may emit the functions out of line instead.
>
> So use __always_inline to avoid this.
>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>

Alternatively, you could use the inline qualifier on the asm stmt.
i.e. `asm inline ("my asm string")`. Only supported since gcc-8.3+ and
all kernel-supported versions of clang though. See `asm_inline` in
include/linux/compiler_types.h.
Either way,
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>

> ---
>  arch/arm/include/asm/current.h | 2 +-
>  arch/arm/include/asm/percpu.h  | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
> index 131a89bbec6b..1e1178bf176d 100644
> --- a/arch/arm/include/asm/current.h
> +++ b/arch/arm/include/asm/current.h
> @@ -14,7 +14,7 @@ struct task_struct;
>
>  extern struct task_struct *__current;
>
> -static inline __attribute_const__ struct task_struct *get_current(void)
> +static __always_inline __attribute_const__ struct task_struct *get_current(void)
>  {
>         struct task_struct *cur;
>
> diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
> index a09034ae45a1..7545c87c251f 100644
> --- a/arch/arm/include/asm/percpu.h
> +++ b/arch/arm/include/asm/percpu.h
> @@ -25,7 +25,7 @@ static inline void set_my_cpu_offset(unsigned long off)
>         asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
>  }
>
> -static inline unsigned long __my_cpu_offset(void)
> +static __always_inline unsigned long __my_cpu_offset(void)
>  {
>         unsigned long off;
>
> --
> 2.30.2
>


-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-01-25 20:49 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-25  9:14 [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 1/8] ARM: mm: switch to swapper_pg_dir early for vmap'ed stack Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 2/8] ARM: assembler: define a Kconfig symbol for group relocation support Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 3/8] ARM: smp: elide HWCAP_TLS checks or __entry_task updates on SMP+v6 Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 4/8] ARM: entry: avoid clobbering R9 in IRQ handler Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 5/8] ARM: mm: make vmalloc_seq handling SMP safe Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 6/8] ARM: iop: make iop_handle_irq() static Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 7/8] ARM: drop pointless SMP check on secondary startup path Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 8/8] ARM: make get_current() and __my_cpu_offset() __always_inline Ard Biesheuvel
2022-01-25 20:48   ` Nick Desaulniers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).