linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] ARM: avoid literal references in inline assembly
@ 2021-12-22 10:49 Ard Biesheuvel
  2021-12-22 19:13 ` Nathan Chancellor
  0 siblings, 1 reply; 2+ messages in thread
From: Ard Biesheuvel @ 2021-12-22 10:49 UTC (permalink / raw)
  To: linux-arm-kernel; +Cc: arnd, linux, Ard Biesheuvel, Nathan Chancellor

Nathan reports that the new get_current() and per-CPU offset accessors
may cause problems at build time due to the use of a literal to hold the
address of the respective variables. This is due to the fact that LLD
before v14 does not support the PC-relative group relocations that are
normally used for this, and the fallback relies on literals but does not
emit the literal pools explictly using the .ltorg directive.

./arch/arm/include/asm/current.h:53:6: error: out of range pc-relative fixup value
        asm(LOAD_SYM_ARMV6(%0, __current) : "=r"(cur));
            ^
./arch/arm/include/asm/insn.h:25:2: note: expanded from macro 'LOAD_SYM_ARMV6'
        "       ldr     " #reg ", =" #sym "                     \n\t"   \
        ^
<inline asm>:1:3: note: instantiated into assembly here
                ldr     r0, =__current
                ^

Since emitting a literal pool in this particular case is not possible,
let's avoid the LOAD_SYM_ARMV6() entirely, and use the ordinary C
assigment instead.

As it turns out, there are other such cases, and here, using .ltorg to
emit the literal pool within range of the LDR instruction would be
possible due to the presence of an unconditional branch right after it.
Unfortunately, putting .ltorg directives in subsections appears to
confuse the Clang inline assembler, resulting in similar errors even
though the .ltorg is most definitely within range.

So let's fix this by emitting the literal explicitly, and not rely on
the assembler to figure this out. This means we have move the fallback
out of the LOAD_SYM_ARMV6() macro and into the callers.

Fixes: 9c46929e7989 ("ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems")
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Link: https://github.com/ClangBuiltLinux/linux/issues/1551
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
Marked as v2 as it supersedes [0], which did not fully mitigate the
issue.

[0] https://lore.kernel.org/all/20211220225217.458335-1-ardb@kernel.org/

 arch/arm/include/asm/current.h | 13 +++++++++++--
 arch/arm/include/asm/insn.h    |  7 -------
 arch/arm/include/asm/percpu.h  |  8 ++++++++
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
index 69ecf4c6c725..2f9d79214b25 100644
--- a/arch/arm/include/asm/current.h
+++ b/arch/arm/include/asm/current.h
@@ -32,27 +32,36 @@ static inline __attribute_const__ struct task_struct *get_current(void)
 	 * https://github.com/ClangBuiltLinux/linux/issues/1485
 	 */
 	cur = __builtin_thread_pointer();
 #elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
 	asm("0:	mrc p15, 0, %0, c13, c0, 3			\n\t"
 #ifdef CONFIG_CPU_V6
 	    "1:							\n\t"
 	    "	.subsection 1					\n\t"
+#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
+    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
 	    "2: " LOAD_SYM_ARMV6(%0, __current) "		\n\t"
 	    "	b	1b					\n\t"
+#else
+	    "2:	ldr	%0, 3f					\n\t"
+	    "	ldr	%0, [%0]				\n\t"
+	    "	b	1b					\n\t"
+	    "3:	.long	__current				\n\t"
+#endif
 	    "	.previous					\n\t"
 	    "	.pushsection \".alt.smp.init\", \"a\"		\n\t"
 	    "	.long	0b - .					\n\t"
 	    "	b	. + (2b - 0b)				\n\t"
 	    "	.popsection					\n\t"
 #endif
 	    : "=r"(cur));
-#elif __LINUX_ARM_ARCH__>=7 || \
-      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
+#elif __LINUX_ARM_ARCH__>= 7 || \
+      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
+      (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
 	cur = __current;
 #else
 	asm(LOAD_SYM_ARMV6(%0, __current) : "=r"(cur));
 #endif
 	return cur;
 }
 
 #define current get_current()
diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h
index a160ed3ea427..faf3d1c28368 100644
--- a/arch/arm/include/asm/insn.h
+++ b/arch/arm/include/asm/insn.h
@@ -5,31 +5,24 @@
 #include <linux/types.h>
 
 /*
  * Avoid a literal load by emitting a sequence of ADD/LDR instructions with the
  * appropriate relocations. The combined sequence has a range of -/+ 256 MiB,
  * which should be sufficient for the core kernel as well as modules loaded
  * into the module region. (Not supported by LLD before release 14)
  */
-#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
-    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
 #define LOAD_SYM_ARMV6(reg, sym)					\
 	"	.globl	" #sym "				\n\t"	\
 	"	.reloc	10f, R_ARM_ALU_PC_G0_NC, " #sym "	\n\t"	\
 	"	.reloc	11f, R_ARM_ALU_PC_G1_NC, " #sym "	\n\t"	\
 	"	.reloc	12f, R_ARM_LDR_PC_G2, " #sym "		\n\t"	\
 	"10:	sub	" #reg ", pc, #8			\n\t"	\
 	"11:	sub	" #reg ", " #reg ", #4			\n\t"	\
 	"12:	ldr	" #reg ", [" #reg ", #0]		\n\t"
-#else
-#define LOAD_SYM_ARMV6(reg, sym)					\
-	"	ldr	" #reg ", =" #sym "			\n\t"	\
-	"	ldr	" #reg ", [" #reg "]			\n\t"
-#endif
 
 static inline unsigned long
 arm_gen_nop(void)
 {
 #ifdef CONFIG_THUMB2_KERNEL
 	return 0xf3af8000; /* nop.w */
 #else
 	return 0xe1a00000; /* mov r0, r0 */
diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
index a4a0d38d016a..28961d60877d 100644
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -33,18 +33,26 @@ static inline unsigned long __my_cpu_offset(void)
 	 * Read TPIDRPRW.
 	 * We want to allow caching the value, so avoid using volatile and
 	 * instead use a fake stack read to hazard against barrier().
 	 */
 	asm("0:	mrc p15, 0, %0, c13, c0, 4			\n\t"
 #ifdef CONFIG_CPU_V6
 	    "1:							\n\t"
 	    "	.subsection 1					\n\t"
+#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
+    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
 	    "2: " LOAD_SYM_ARMV6(%0, __per_cpu_offset) "	\n\t"
 	    "	b	1b					\n\t"
+#else
+	    "2: ldr	%0, 3f					\n\t"
+	    "	ldr	%0, [%0]				\n\t"
+	    "	b	1b					\n\t"
+	    "3:	.long	__per_cpu_offset			\n\t"
+#endif
 	    "	.previous					\n\t"
 	    "	.pushsection \".alt.smp.init\", \"a\"		\n\t"
 	    "	.long	0b - .					\n\t"
 	    "	b	. + (2b - 0b)				\n\t"
 	    "	.popsection					\n\t"
 #endif
 	     : "=r" (off)
 	     : "Q" (*(const unsigned long *)current_stack_pointer));
-- 
2.30.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] ARM: avoid literal references in inline assembly
  2021-12-22 10:49 [PATCH v2] ARM: avoid literal references in inline assembly Ard Biesheuvel
@ 2021-12-22 19:13 ` Nathan Chancellor
  0 siblings, 0 replies; 2+ messages in thread
From: Nathan Chancellor @ 2021-12-22 19:13 UTC (permalink / raw)
  To: Ard Biesheuvel; +Cc: linux-arm-kernel, arnd, linux, nathan

On Wed, Dec 22, 2021 at 11:49:39AM +0100, Ard Biesheuvel wrote:
> Nathan reports that the new get_current() and per-CPU offset accessors
> may cause problems at build time due to the use of a literal to hold the
> address of the respective variables. This is due to the fact that LLD
> before v14 does not support the PC-relative group relocations that are
> normally used for this, and the fallback relies on literals but does not
> emit the literal pools explictly using the .ltorg directive.
> 
> ./arch/arm/include/asm/current.h:53:6: error: out of range pc-relative fixup value
>         asm(LOAD_SYM_ARMV6(%0, __current) : "=r"(cur));
>             ^
> ./arch/arm/include/asm/insn.h:25:2: note: expanded from macro 'LOAD_SYM_ARMV6'
>         "       ldr     " #reg ", =" #sym "                     \n\t"   \
>         ^
> <inline asm>:1:3: note: instantiated into assembly here
>                 ldr     r0, =__current
>                 ^
> 
> Since emitting a literal pool in this particular case is not possible,
> let's avoid the LOAD_SYM_ARMV6() entirely, and use the ordinary C
> assigment instead.
> 
> As it turns out, there are other such cases, and here, using .ltorg to
> emit the literal pool within range of the LDR instruction would be
> possible due to the presence of an unconditional branch right after it.
> Unfortunately, putting .ltorg directives in subsections appears to
> confuse the Clang inline assembler, resulting in similar errors even
> though the .ltorg is most definitely within range.
> 
> So let's fix this by emitting the literal explicitly, and not rely on
> the assembler to figure this out. This means we have move the fallback
> out of the LOAD_SYM_ARMV6() macro and into the callers.
> 
> Fixes: 9c46929e7989 ("ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems")
> Reported-by: Nathan Chancellor <natechancellor@gmail.com>
> Link: https://github.com/ClangBuiltLinux/linux/issues/1551
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>

I build tested this with LLVM 12, 13, and 14 and boot tested a few
configs in QEMU, nothing screamed.

Tested-by: Nathan Chancellor <nathan@kernel.org>

> ---
> Marked as v2 as it supersedes [0], which did not fully mitigate the
> issue.
> 
> [0] https://lore.kernel.org/all/20211220225217.458335-1-ardb@kernel.org/
> 
>  arch/arm/include/asm/current.h | 13 +++++++++++--
>  arch/arm/include/asm/insn.h    |  7 -------
>  arch/arm/include/asm/percpu.h  |  8 ++++++++
>  3 files changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
> index 69ecf4c6c725..2f9d79214b25 100644
> --- a/arch/arm/include/asm/current.h
> +++ b/arch/arm/include/asm/current.h
> @@ -32,27 +32,36 @@ static inline __attribute_const__ struct task_struct *get_current(void)
>  	 * https://github.com/ClangBuiltLinux/linux/issues/1485
>  	 */
>  	cur = __builtin_thread_pointer();
>  #elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
>  	asm("0:	mrc p15, 0, %0, c13, c0, 3			\n\t"
>  #ifdef CONFIG_CPU_V6
>  	    "1:							\n\t"
>  	    "	.subsection 1					\n\t"
> +#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
> +    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
>  	    "2: " LOAD_SYM_ARMV6(%0, __current) "		\n\t"
>  	    "	b	1b					\n\t"
> +#else
> +	    "2:	ldr	%0, 3f					\n\t"
> +	    "	ldr	%0, [%0]				\n\t"
> +	    "	b	1b					\n\t"
> +	    "3:	.long	__current				\n\t"
> +#endif
>  	    "	.previous					\n\t"
>  	    "	.pushsection \".alt.smp.init\", \"a\"		\n\t"
>  	    "	.long	0b - .					\n\t"
>  	    "	b	. + (2b - 0b)				\n\t"
>  	    "	.popsection					\n\t"
>  #endif
>  	    : "=r"(cur));
> -#elif __LINUX_ARM_ARCH__>=7 || \
> -      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
> +#elif __LINUX_ARM_ARCH__>= 7 || \
> +      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
> +      (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
>  	cur = __current;
>  #else
>  	asm(LOAD_SYM_ARMV6(%0, __current) : "=r"(cur));
>  #endif
>  	return cur;
>  }
>  
>  #define current get_current()
> diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h
> index a160ed3ea427..faf3d1c28368 100644
> --- a/arch/arm/include/asm/insn.h
> +++ b/arch/arm/include/asm/insn.h
> @@ -5,31 +5,24 @@
>  #include <linux/types.h>
>  
>  /*
>   * Avoid a literal load by emitting a sequence of ADD/LDR instructions with the
>   * appropriate relocations. The combined sequence has a range of -/+ 256 MiB,
>   * which should be sufficient for the core kernel as well as modules loaded
>   * into the module region. (Not supported by LLD before release 14)
>   */
> -#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
> -    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
>  #define LOAD_SYM_ARMV6(reg, sym)					\
>  	"	.globl	" #sym "				\n\t"	\
>  	"	.reloc	10f, R_ARM_ALU_PC_G0_NC, " #sym "	\n\t"	\
>  	"	.reloc	11f, R_ARM_ALU_PC_G1_NC, " #sym "	\n\t"	\
>  	"	.reloc	12f, R_ARM_LDR_PC_G2, " #sym "		\n\t"	\
>  	"10:	sub	" #reg ", pc, #8			\n\t"	\
>  	"11:	sub	" #reg ", " #reg ", #4			\n\t"	\
>  	"12:	ldr	" #reg ", [" #reg ", #0]		\n\t"
> -#else
> -#define LOAD_SYM_ARMV6(reg, sym)					\
> -	"	ldr	" #reg ", =" #sym "			\n\t"	\
> -	"	ldr	" #reg ", [" #reg "]			\n\t"
> -#endif
>  
>  static inline unsigned long
>  arm_gen_nop(void)
>  {
>  #ifdef CONFIG_THUMB2_KERNEL
>  	return 0xf3af8000; /* nop.w */
>  #else
>  	return 0xe1a00000; /* mov r0, r0 */
> diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
> index a4a0d38d016a..28961d60877d 100644
> --- a/arch/arm/include/asm/percpu.h
> +++ b/arch/arm/include/asm/percpu.h
> @@ -33,18 +33,26 @@ static inline unsigned long __my_cpu_offset(void)
>  	 * Read TPIDRPRW.
>  	 * We want to allow caching the value, so avoid using volatile and
>  	 * instead use a fake stack read to hazard against barrier().
>  	 */
>  	asm("0:	mrc p15, 0, %0, c13, c0, 4			\n\t"
>  #ifdef CONFIG_CPU_V6
>  	    "1:							\n\t"
>  	    "	.subsection 1					\n\t"
> +#if !(defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) && \
> +    !(defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
>  	    "2: " LOAD_SYM_ARMV6(%0, __per_cpu_offset) "	\n\t"
>  	    "	b	1b					\n\t"
> +#else
> +	    "2: ldr	%0, 3f					\n\t"
> +	    "	ldr	%0, [%0]				\n\t"
> +	    "	b	1b					\n\t"
> +	    "3:	.long	__per_cpu_offset			\n\t"
> +#endif
>  	    "	.previous					\n\t"
>  	    "	.pushsection \".alt.smp.init\", \"a\"		\n\t"
>  	    "	.long	0b - .					\n\t"
>  	    "	b	. + (2b - 0b)				\n\t"
>  	    "	.popsection					\n\t"
>  #endif
>  	     : "=r" (off)
>  	     : "Q" (*(const unsigned long *)current_stack_pointer));
> -- 
> 2.30.2
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-12-22 19:14 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-22 10:49 [PATCH v2] ARM: avoid literal references in inline assembly Ard Biesheuvel
2021-12-22 19:13 ` Nathan Chancellor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).