From: Ard Biesheuvel <ardb@kernel.org>
To: linux-arm-kernel@lists.infradead.org, linux@armlinux.org.uk
Cc: Ard Biesheuvel <ardb@kernel.org>,
Nicolas Pitre <nico@fluxnic.net>, Arnd Bergmann <arnd@arndb.de>,
Kees Cook <keescook@chromium.org>,
Keith Packard <keithpac@amazon.com>,
Linus Walleij <linus.walleij@linaro.org>,
Nick Desaulniers <ndesaulniers@google.com>,
Tony Lindgren <tony@atomide.com>, Marc Zyngier <maz@kernel.org>,
Vladimir Murzin <vladimir.murzin@arm.com>,
Jesse Taube <mr.bossman075@gmail.com>
Subject: [PATCH v3 10/14] ARM: percpu: add SMP_ON_UP support
Date: Fri, 3 Dec 2021 11:08:59 +0100 [thread overview]
Message-ID: <20211203100903.334206-11-ardb@kernel.org> (raw)
In-Reply-To: <20211203100903.334206-1-ardb@kernel.org>
Permit the use of the TPIDRPRW system register for carrying the per-CPU
offset in generic SMP configurations that also target non-SMP capable
ARMv6 cores. This uses the SMP_ON_UP code patching framework to turn all
TPIDRPRW accesses into reads/writes of entry #0 in the __per_cpu_offset
array.
While at it, switch over some existing direct TPIDRPRW accesses in asm
code to invocations of a new helper that is patched in the same way when
necessary.
Note that CPU_V6+SMP without SMP_ON_UP results in a kernel that does not
boot on v6 CPUs without SMP extensions, so add this dependency to
Kconfig as well.
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm/include/asm/assembler.h | 59 +++++++++++++++++++-
arch/arm/include/asm/insn.h | 24 ++++++++
arch/arm/include/asm/percpu.h | 25 ++++++++-
arch/arm/kernel/entry-armv.S | 16 +-----
arch/arm/kernel/sleep.S | 4 +-
arch/arm/mm/Kconfig | 1 +
6 files changed, 107 insertions(+), 22 deletions(-)
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 2095638b7140..f9b3dd0e9ef5 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -220,9 +220,7 @@ THUMB( fpreg .req r7 )
.macro reload_current, t1:req, t2:req
#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
- adr_l \t1, __entry_task @ get __entry_task base address
- mrc p15, 0, \t2, c13, c0, 4 @ get per-CPU offset
- ldr \t1, [\t1, \t2] @ load variable
+ ldr_this_cpu \t1, __entry_task, \t1, \t2
mcr p15, 0, \t1, c13, c0, 3 @ store in TPIDRURO
#endif
.endm
@@ -312,6 +310,26 @@ THUMB( fpreg .req r7 )
#define ALT_UP_B(label) b label
#endif
+ /*
+ * this_cpu_offset - load the per-CPU offset of this CPU into
+ * register 'rd'
+ */
+ .macro this_cpu_offset, rd:req
+#ifdef CONFIG_SMP
+ALT_SMP(mrc p15, 0, \rd, c13, c0, 4)
+#ifdef CONFIG_CPU_V6
+ALT_UP_B(.L1_\@)
+.L0_\@:
+ .subsection 1
+.L1_\@: ldr_va \rd, __per_cpu_offset
+ b .L0_\@
+ .previous
+#endif
+#else
+ mov \rd, #0
+#endif
+ .endm
+
/*
* Instruction barrier
*/
@@ -648,6 +666,41 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
__ldst_va str, \rn, \tmp, \sym, \cond
.endm
+ /*
+ * ldr_this_cpu_armv6 - Load a 32-bit word from the per-CPU variable 'sym',
+ * without using a temp register. Supported in ARM mode
+ * only.
+ */
+ .macro ldr_this_cpu_armv6, rd:req, sym:req
+ this_cpu_offset \rd
+ .globl \sym
+ .reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym
+ .reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym
+ .reloc .L2_\@, R_ARM_LDR_PC_G2, \sym
+ add \rd, \rd, pc
+.L0_\@: sub \rd, \rd, #4
+.L1_\@: sub \rd, \rd, #0
+.L2_\@: ldr \rd, [\rd, #4]
+ .endm
+
+ /*
+ * ldr_this_cpu - Load a 32-bit word from the per-CPU variable 'sym'
+ * into register 'rd', which may be the stack pointer,
+ * using 't1' and 't2' as general temp registers. These
+ * are permitted to overlap with 'rd' if != sp
+ */
+ .macro ldr_this_cpu, rd:req, sym:req, t1:req, t2:req
+#if __LINUX_ARM_ARCH__ >= 7 || \
+ (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
+ (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
+ this_cpu_offset \t1
+ mov_l \t2, \sym
+ ldr \rd, [\t1, \t2]
+#else
+ ldr_this_cpu_armv6 \rd, \sym
+#endif
+ .endm
+
/*
* rev_l - byte-swap a 32-bit value
*
diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h
index 5475cbf9fb6b..a160ed3ea427 100644
--- a/arch/arm/include/asm/insn.h
+++ b/arch/arm/include/asm/insn.h
@@ -2,6 +2,30 @@
#ifndef __ASM_ARM_INSN_H
#define __ASM_ARM_INSN_H
+#include <linux/types.h>
+
+/*
+ * Avoid a literal load by emitting a sequence of ADD/LDR instructions with the
+ * appropriate relocations. The combined sequence has a range of -/+ 256 MiB,
+ * which should be sufficient for the core kernel as well as modules loaded
+ * into the module region. (Not supported by LLD before release 14)
+ */
+#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
+ !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
+#define LOAD_SYM_ARMV6(reg, sym) \
+ " .globl " #sym " \n\t" \
+ " .reloc 10f, R_ARM_ALU_PC_G0_NC, " #sym " \n\t" \
+ " .reloc 11f, R_ARM_ALU_PC_G1_NC, " #sym " \n\t" \
+ " .reloc 12f, R_ARM_LDR_PC_G2, " #sym " \n\t" \
+ "10: sub " #reg ", pc, #8 \n\t" \
+ "11: sub " #reg ", " #reg ", #4 \n\t" \
+ "12: ldr " #reg ", [" #reg ", #0] \n\t"
+#else
+#define LOAD_SYM_ARMV6(reg, sym) \
+ " ldr " #reg ", =" #sym " \n\t" \
+ " ldr " #reg ", [" #reg "] \n\t"
+#endif
+
static inline unsigned long
arm_gen_nop(void)
{
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
index e2fcb3cfd3de..a4a0d38d016a 100644
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -5,15 +5,22 @@
#ifndef _ASM_ARM_PERCPU_H_
#define _ASM_ARM_PERCPU_H_
+#include <asm/insn.h>
+
register unsigned long current_stack_pointer asm ("sp");
/*
* Same as asm-generic/percpu.h, except that we store the per cpu offset
* in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
*/
-#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
+#ifdef CONFIG_SMP
static inline void set_my_cpu_offset(unsigned long off)
{
+ extern unsigned int smp_on_up;
+
+ if (IS_ENABLED(CONFIG_CPU_V6) && !smp_on_up)
+ return;
+
/* Set TPIDRPRW */
asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
}
@@ -27,8 +34,20 @@ static inline unsigned long __my_cpu_offset(void)
* We want to allow caching the value, so avoid using volatile and
* instead use a fake stack read to hazard against barrier().
*/
- asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off)
- : "Q" (*(const unsigned long *)current_stack_pointer));
+ asm("0: mrc p15, 0, %0, c13, c0, 4 \n\t"
+#ifdef CONFIG_CPU_V6
+ "1: \n\t"
+ " .subsection 1 \n\t"
+ "2: " LOAD_SYM_ARMV6(%0, __per_cpu_offset) " \n\t"
+ " b 1b \n\t"
+ " .previous \n\t"
+ " .pushsection \".alt.smp.init\", \"a\" \n\t"
+ " .long 0b - . \n\t"
+ " b . + (2b - 0b) \n\t"
+ " .popsection \n\t"
+#endif
+ : "=r" (off)
+ : "Q" (*(const unsigned long *)current_stack_pointer));
return off;
}
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7f7ac963445c..43d917f0d9a9 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -35,15 +35,14 @@
.macro irq_handler, from_user:req
mov r0, sp
#ifdef CONFIG_IRQSTACKS
- mov_l r2, irq_stack_ptr @ Take base address
- mrc p15, 0, r3, c13, c0, 4 @ Get CPU offset
#ifdef CONFIG_UNWINDER_ARM
mov fpreg, sp @ Preserve original SP
#else
mov r8, fp @ Preserve original FP
mov r9, sp @ Preserve original SP
#endif
- ldr sp, [r2, r3] @ Load SP from per-CPU var
+ ldr_this_cpu sp, irq_stack_ptr, r2, r3
+
.if \from_user == 0
UNWIND( .setfp fpreg, sp )
@
@@ -876,16 +875,7 @@ __bad_stack:
THUMB( bx pc )
THUMB( nop )
THUMB( .arm )
- mrc p15, 0, ip, c13, c0, 4 @ Get per-CPU offset
-
- .globl overflow_stack_ptr
- .reloc 0f, R_ARM_ALU_PC_G0_NC, overflow_stack_ptr
- .reloc 1f, R_ARM_ALU_PC_G1_NC, overflow_stack_ptr
- .reloc 2f, R_ARM_LDR_PC_G2, overflow_stack_ptr
- add ip, ip, pc
-0: add ip, ip, #-4
-1: add ip, ip, #0
-2: ldr ip, [ip, #4]
+ ldr_this_cpu_armv6 ip, overflow_stack_ptr
str sp, [ip, #-4]! @ Preserve original SP value
mov sp, ip @ Switch to overflow stack
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 803b51e5cba0..f909baf17912 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -71,9 +71,7 @@ ENTRY(__cpu_suspend)
@ Run the suspend code from the overflow stack so we don't have to rely
@ on vmalloc-to-phys conversions anywhere in the arch suspend code.
@ The original SP value captured in R5 will be restored on the way out.
- mov_l r6, overflow_stack_ptr @ Base pointer
- mrc p15, 0, r7, c13, c0, 4 @ Get per-CPU offset
- ldr sp, [r6, r7] @ Address of this CPU's overflow stack
+ ldr_this_cpu sp, overflow_stack_ptr, r6, r7
#endif
add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn
sub sp, sp, r4 @ allocate CPU state on stack
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 58afba346729..a91ff22c6c2e 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -386,6 +386,7 @@ config CPU_V6
select CPU_PABRT_V6
select CPU_THUMB_CAPABLE
select CPU_TLB_V6 if MMU
+ select SMP_ON_UP if SMP
# ARMv6k
config CPU_V6K
--
2.30.2
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2021-12-03 10:14 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-03 10:08 [PATCH v3 00/14] ARM: enable IRQ stacks and vmap'ed stacks for UP Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 01/14] ARM: riscpc: use GENERIC_IRQ_MULTI_HANDLER Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 02/14] ARM: footbridge: " Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 03/14] ARM: iop32x: offset IRQ numbers by 1 Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 04/14] ARM: iop32x: use GENERIC_IRQ_MULTI_HANDLER Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 05/14] ARM: remove old-style irq entry Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 06/14] irqchip: nvic: Use GENERIC_IRQ_MULTI_HANDLER Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 07/14] ARM: entry: preserve thread_info pointer in switch_to Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 08/14] ARM: module: implement support for PC-relative group relocations Ard Biesheuvel
2021-12-03 10:08 ` [PATCH v3 09/14] ARM: assembler: add optimized ldr/str macros to load variables from memory Ard Biesheuvel
2021-12-03 10:08 ` Ard Biesheuvel [this message]
2021-12-03 10:09 ` [PATCH v3 11/14] ARM: use TLS register for 'current' on !SMP as well Ard Biesheuvel
2021-12-03 10:09 ` [PATCH v3 12/14] ARM: smp: defer TPIDRURO update for SMP v6 configurations too Ard Biesheuvel
2021-12-03 10:09 ` [PATCH v3 13/14] ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems Ard Biesheuvel
2021-12-03 10:09 ` [PATCH v3 14/14] ARM: v7m: enable support for IRQ stacks Ard Biesheuvel
2021-12-03 10:46 ` [PATCH v3 00/14] ARM: enable IRQ stacks and vmap'ed stacks for UP Vladimir Murzin
2021-12-03 12:53 ` Marc Zyngier
2021-12-05 0:38 ` Linus Walleij
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211203100903.334206-11-ardb@kernel.org \
--to=ardb@kernel.org \
--cc=arnd@arndb.de \
--cc=keescook@chromium.org \
--cc=keithpac@amazon.com \
--cc=linus.walleij@linaro.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux@armlinux.org.uk \
--cc=maz@kernel.org \
--cc=mr.bossman075@gmail.com \
--cc=ndesaulniers@google.com \
--cc=nico@fluxnic.net \
--cc=tony@atomide.com \
--cc=vladimir.murzin@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.