All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-arm-kernel@lists.infradead.org, linux@armlinux.org.uk
Cc: Ard Biesheuvel <ardb@kernel.org>,
	Nicolas Pitre <nico@fluxnic.net>, Arnd Bergmann <arnd@arndb.de>,
	Kees Cook <keescook@chromium.org>,
	Keith Packard <keithpac@amazon.com>,
	Linus Walleij <linus.walleij@linaro.org>,
	Nick Desaulniers <ndesaulniers@google.com>,
	Tony Lindgren <tony@atomide.com>, Marc Zyngier <maz@kernel.org>,
	Vladimir Murzin <vladimir.murzin@arm.com>,
	Jesse Taube <mr.bossman075@gmail.com>
Subject: [PATCH v3 10/14] ARM: percpu: add SMP_ON_UP support
Date: Fri,  3 Dec 2021 11:08:59 +0100	[thread overview]
Message-ID: <20211203100903.334206-11-ardb@kernel.org> (raw)
In-Reply-To: <20211203100903.334206-1-ardb@kernel.org>

Permit the use of the TPIDRPRW system register for carrying the per-CPU
offset in generic SMP configurations that also target non-SMP capable
ARMv6 cores. This uses the SMP_ON_UP code patching framework to turn all
TPIDRPRW accesses into reads/writes of entry #0 in the __per_cpu_offset
array.

While at it, switch over some existing direct TPIDRPRW accesses in asm
code to invocations of a new helper that is patched in the same way when
necessary.

Note that CPU_V6+SMP without SMP_ON_UP results in a kernel that does not
boot on v6 CPUs without SMP extensions, so add this dependency to
Kconfig as well.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/assembler.h | 59 +++++++++++++++++++-
 arch/arm/include/asm/insn.h      | 24 ++++++++
 arch/arm/include/asm/percpu.h    | 25 ++++++++-
 arch/arm/kernel/entry-armv.S     | 16 +-----
 arch/arm/kernel/sleep.S          |  4 +-
 arch/arm/mm/Kconfig              |  1 +
 6 files changed, 107 insertions(+), 22 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 2095638b7140..f9b3dd0e9ef5 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -220,9 +220,7 @@ THUMB(	fpreg	.req	r7	)
 
 	.macro	reload_current, t1:req, t2:req
 #ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
-	adr_l	\t1, __entry_task		@ get __entry_task base address
-	mrc	p15, 0, \t2, c13, c0, 4		@ get per-CPU offset
-	ldr	\t1, [\t1, \t2]			@ load variable
+	ldr_this_cpu \t1, __entry_task, \t1, \t2
 	mcr	p15, 0, \t1, c13, c0, 3		@ store in TPIDRURO
 #endif
 	.endm
@@ -312,6 +310,26 @@ THUMB(	fpreg	.req	r7	)
 #define ALT_UP_B(label) b label
 #endif
 
+	/*
+	 * this_cpu_offset - load the per-CPU offset of this CPU into
+	 * 		     register 'rd'
+	 */
+	.macro		this_cpu_offset, rd:req
+#ifdef CONFIG_SMP
+ALT_SMP(mrc		p15, 0, \rd, c13, c0, 4)
+#ifdef CONFIG_CPU_V6
+ALT_UP_B(.L1_\@)
+.L0_\@:
+	.subsection	1
+.L1_\@: ldr_va		\rd, __per_cpu_offset
+	b		.L0_\@
+	.previous
+#endif
+#else
+	mov		\rd, #0
+#endif
+	.endm
+
 /*
  * Instruction barrier
  */
@@ -648,6 +666,41 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 	__ldst_va	str, \rn, \tmp, \sym, \cond
 	.endm
 
+	/*
+	 * ldr_this_cpu_armv6 - Load a 32-bit word from the per-CPU variable 'sym',
+	 *			without using a temp register. Supported in ARM mode
+	 *			only.
+	 */
+	.macro		ldr_this_cpu_armv6, rd:req, sym:req
+	this_cpu_offset	\rd
+	.globl		\sym
+	.reloc		.L0_\@, R_ARM_ALU_PC_G0_NC, \sym
+	.reloc		.L1_\@, R_ARM_ALU_PC_G1_NC, \sym
+	.reloc		.L2_\@, R_ARM_LDR_PC_G2, \sym
+	add		\rd, \rd, pc
+.L0_\@: sub		\rd, \rd, #4
+.L1_\@: sub		\rd, \rd, #0
+.L2_\@: ldr		\rd, [\rd, #4]
+	.endm
+
+	/*
+	 * ldr_this_cpu - Load a 32-bit word from the per-CPU variable 'sym'
+	 *		  into register 'rd', which may be the stack pointer,
+	 *		  using 't1' and 't2' as general temp registers. These
+	 *		  are permitted to overlap with 'rd' if != sp
+	 */
+	.macro		ldr_this_cpu, rd:req, sym:req, t1:req, t2:req
+#if __LINUX_ARM_ARCH__ >= 7 || \
+    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
+    (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
+	this_cpu_offset	\t1
+	mov_l		\t2, \sym
+	ldr		\rd, [\t1, \t2]
+#else
+	ldr_this_cpu_armv6 \rd, \sym
+#endif
+	.endm
+
 	/*
 	 * rev_l - byte-swap a 32-bit value
 	 *
diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h
index 5475cbf9fb6b..a160ed3ea427 100644
--- a/arch/arm/include/asm/insn.h
+++ b/arch/arm/include/asm/insn.h
@@ -2,6 +2,30 @@
 #ifndef __ASM_ARM_INSN_H
 #define __ASM_ARM_INSN_H
 
+#include <linux/types.h>
+
+/*
+ * Avoid a literal load by emitting a sequence of ADD/LDR instructions with the
+ * appropriate relocations. The combined sequence has a range of -/+ 256 MiB,
+ * which should be sufficient for the core kernel as well as modules loaded
+ * into the module region. (Not supported by LLD before release 14)
+ */
+#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
+    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
+#define LOAD_SYM_ARMV6(reg, sym)					\
+	"	.globl	" #sym "				\n\t"	\
+	"	.reloc	10f, R_ARM_ALU_PC_G0_NC, " #sym "	\n\t"	\
+	"	.reloc	11f, R_ARM_ALU_PC_G1_NC, " #sym "	\n\t"	\
+	"	.reloc	12f, R_ARM_LDR_PC_G2, " #sym "		\n\t"	\
+	"10:	sub	" #reg ", pc, #8			\n\t"	\
+	"11:	sub	" #reg ", " #reg ", #4			\n\t"	\
+	"12:	ldr	" #reg ", [" #reg ", #0]		\n\t"
+#else
+#define LOAD_SYM_ARMV6(reg, sym)					\
+	"	ldr	" #reg ", =" #sym "			\n\t"	\
+	"	ldr	" #reg ", [" #reg "]			\n\t"
+#endif
+
 static inline unsigned long
 arm_gen_nop(void)
 {
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
index e2fcb3cfd3de..a4a0d38d016a 100644
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -5,15 +5,22 @@
 #ifndef _ASM_ARM_PERCPU_H_
 #define _ASM_ARM_PERCPU_H_
 
+#include <asm/insn.h>
+
 register unsigned long current_stack_pointer asm ("sp");
 
 /*
  * Same as asm-generic/percpu.h, except that we store the per cpu offset
  * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
  */
-#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
+#ifdef CONFIG_SMP
 static inline void set_my_cpu_offset(unsigned long off)
 {
+	extern unsigned int smp_on_up;
+
+	if (IS_ENABLED(CONFIG_CPU_V6) && !smp_on_up)
+		return;
+
 	/* Set TPIDRPRW */
 	asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
 }
@@ -27,8 +34,20 @@ static inline unsigned long __my_cpu_offset(void)
 	 * We want to allow caching the value, so avoid using volatile and
 	 * instead use a fake stack read to hazard against barrier().
 	 */
-	asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off)
-		: "Q" (*(const unsigned long *)current_stack_pointer));
+	asm("0:	mrc p15, 0, %0, c13, c0, 4			\n\t"
+#ifdef CONFIG_CPU_V6
+	    "1:							\n\t"
+	    "	.subsection 1					\n\t"
+	    "2: " LOAD_SYM_ARMV6(%0, __per_cpu_offset) "	\n\t"
+	    "	b	1b					\n\t"
+	    "	.previous					\n\t"
+	    "	.pushsection \".alt.smp.init\", \"a\"		\n\t"
+	    "	.long	0b - .					\n\t"
+	    "	b	. + (2b - 0b)				\n\t"
+	    "	.popsection					\n\t"
+#endif
+	     : "=r" (off)
+	     : "Q" (*(const unsigned long *)current_stack_pointer));
 
 	return off;
 }
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7f7ac963445c..43d917f0d9a9 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -35,15 +35,14 @@
 	.macro	irq_handler, from_user:req
 	mov	r0, sp
 #ifdef CONFIG_IRQSTACKS
-	mov_l	r2, irq_stack_ptr	@ Take base address
-	mrc	p15, 0, r3, c13, c0, 4	@ Get CPU offset
 #ifdef CONFIG_UNWINDER_ARM
 	mov	fpreg, sp		@ Preserve original SP
 #else
 	mov	r8, fp			@ Preserve original FP
 	mov	r9, sp			@ Preserve original SP
 #endif
-	ldr	sp, [r2, r3]		@ Load SP from per-CPU var
+	ldr_this_cpu sp, irq_stack_ptr, r2, r3
+
 	.if	\from_user == 0
 UNWIND(	.setfp	fpreg, sp		)
 	@
@@ -876,16 +875,7 @@ __bad_stack:
 THUMB(	bx	pc		)
 THUMB(	nop			)
 THUMB(	.arm			)
-	mrc	p15, 0, ip, c13, c0, 4		@ Get per-CPU offset
-
-	.globl	overflow_stack_ptr
-	.reloc	0f, R_ARM_ALU_PC_G0_NC, overflow_stack_ptr
-	.reloc	1f, R_ARM_ALU_PC_G1_NC, overflow_stack_ptr
-	.reloc	2f, R_ARM_LDR_PC_G2, overflow_stack_ptr
-	add	ip, ip, pc
-0:	add	ip, ip, #-4
-1:	add	ip, ip, #0
-2:	ldr	ip, [ip, #4]
+	ldr_this_cpu_armv6 ip, overflow_stack_ptr
 
 	str	sp, [ip, #-4]!			@ Preserve original SP value
 	mov	sp, ip				@ Switch to overflow stack
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 803b51e5cba0..f909baf17912 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -71,9 +71,7 @@ ENTRY(__cpu_suspend)
 	@ Run the suspend code from the overflow stack so we don't have to rely
 	@ on vmalloc-to-phys conversions anywhere in the arch suspend code.
 	@ The original SP value captured in R5 will be restored on the way out.
-	mov_l	r6, overflow_stack_ptr	@ Base pointer
-	mrc	p15, 0, r7, c13, c0, 4	@ Get per-CPU offset
-	ldr	sp, [r6, r7]		@ Address of this CPU's overflow stack
+	ldr_this_cpu sp, overflow_stack_ptr, r6, r7
 #endif
 	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
 	sub	sp, sp, r4		@ allocate CPU state on stack
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 58afba346729..a91ff22c6c2e 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -386,6 +386,7 @@ config CPU_V6
 	select CPU_PABRT_V6
 	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V6 if MMU
+	select SMP_ON_UP if SMP
 
 # ARMv6k
 config CPU_V6K
-- 
2.30.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2021-12-03 10:14 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-03 10:08 [PATCH v3 00/14] ARM: enable IRQ stacks and vmap'ed stacks for UP Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 01/14] ARM: riscpc: use GENERIC_IRQ_MULTI_HANDLER Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 02/14] ARM: footbridge: " Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 03/14] ARM: iop32x: offset IRQ numbers by 1 Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 04/14] ARM: iop32x: use GENERIC_IRQ_MULTI_HANDLER Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 05/14] ARM: remove old-style irq entry Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 06/14] irqchip: nvic: Use GENERIC_IRQ_MULTI_HANDLER Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 07/14] ARM: entry: preserve thread_info pointer in switch_to Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 08/14] ARM: module: implement support for PC-relative group relocations Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 09/14] ARM: assembler: add optimized ldr/str macros to load variables from memory Ard Biesheuvel
2021-12-03 10:08 ` Ard Biesheuvel [this message]
2021-12-03 10:09 ` [PATCH v3 11/14] ARM: use TLS register for 'current' on !SMP as well Ard Biesheuvel
2021-12-03 10:09 ` [PATCH v3 12/14] ARM: smp: defer TPIDRURO update for SMP v6 configurations too Ard Biesheuvel
2021-12-03 10:09 ` [PATCH v3 13/14] ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems Ard Biesheuvel
2021-12-03 10:09 ` [PATCH v3 14/14] ARM: v7m: enable support for IRQ stacks Ard Biesheuvel
2021-12-03 10:46 ` [PATCH v3 00/14] ARM: enable IRQ stacks and vmap'ed stacks for UP Vladimir Murzin
2021-12-03 12:53 ` Marc Zyngier
2021-12-05  0:38   ` Linus Walleij

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211203100903.334206-11-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=arnd@arndb.de \
    --cc=keescook@chromium.org \
    --cc=keithpac@amazon.com \
    --cc=linus.walleij@linaro.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux@armlinux.org.uk \
    --cc=maz@kernel.org \
    --cc=mr.bossman075@gmail.com \
    --cc=ndesaulniers@google.com \
    --cc=nico@fluxnic.net \
    --cc=tony@atomide.com \
    --cc=vladimir.murzin@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.