All of lore.kernel.org
 help / color / mirror / Atom feed
From: will.deacon@arm.com (Will Deacon)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v5sub2 1/8] arm64: add support for module PLTs
Date: Thu, 25 Feb 2016 18:29:03 +0000	[thread overview]
Message-ID: <20160225182902.GA29259@arm.com> (raw)
In-Reply-To: <CAKv+Gu_Bv0CsMrCR=fiowZjtmmepjdLi7RX94wv_oDdrbjr3iw@mail.gmail.com>

On Thu, Feb 25, 2016 at 06:31:04PM +0100, Ard Biesheuvel wrote:
> On 25 February 2016 at 17:56, Will Deacon <will.deacon@arm.com> wrote:
> > the plt will do:
> >
> > get_addr_of_callee_into_x16_and_clobber_x17_or_something
> > br callee
> >
> > then the callee will be compiled with all those weird options, but *not*
> > the ones specifying x16 and x17. That means it can happily use those guys
> > as scratch, because the caller will take care of them.
> >
> 
> Yes, that makes sense. But if you don't relax that restriction, you
> only need the alternative __LL_SC_CALL for modules.

Totally untested patch below, but I'll give it a whirl tomorrow.

It looks a bit mental.

Will

--->8

diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 197e06afbf71..5b595b32ab40 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -33,6 +33,8 @@ static inline void atomic_andnot(int i, atomic_t *v)
 	register atomic_t *x1 asm ("x1") = v;
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot),
+	"	nop\n"
+	"	nop\n"
 	"	stclr	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
@@ -45,6 +47,8 @@ static inline void atomic_or(int i, atomic_t *v)
 	register atomic_t *x1 asm ("x1") = v;
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or),
+	"	nop\n"
+	"	nop\n"
 	"	stset	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
@@ -57,6 +61,8 @@ static inline void atomic_xor(int i, atomic_t *v)
 	register atomic_t *x1 asm ("x1") = v;
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor),
+	"	nop\n"
+	"	nop\n"
 	"	steor	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
@@ -69,6 +75,8 @@ static inline void atomic_add(int i, atomic_t *v)
 	register atomic_t *x1 asm ("x1") = v;
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add),
+	"	nop\n"
+	"	nop\n"
 	"	stadd	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
@@ -83,9 +91,9 @@ static inline int atomic_add_return##name(int i, atomic_t *v)		\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
 	__LL_SC_ATOMIC(add_return##name),				\
 	/* LSE atomics */						\
+	"	nop\n"							\
 	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
 	"	add	%w[i], %w[i], w30")				\
 	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
@@ -109,9 +117,9 @@ static inline void atomic_and(int i, atomic_t *v)
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
-	"	nop\n"
 	__LL_SC_ATOMIC(and),
 	/* LSE atomics */
+	"	nop\n"
 	"	mvn	%w[i], %w[i]\n"
 	"	stclr	%w[i], %[v]")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
@@ -126,9 +134,9 @@ static inline void atomic_sub(int i, atomic_t *v)
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
-	"	nop\n"
 	__LL_SC_ATOMIC(sub),
 	/* LSE atomics */
+	"	nop\n"
 	"	neg	%w[i], %w[i]\n"
 	"	stadd	%w[i], %[v]")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
@@ -144,9 +152,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v)		\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-	__LL_SC_ATOMIC(sub_return##name)				\
-	"	nop",							\
+	__LL_SC_ATOMIC(sub_return##name),				\
 	/* LSE atomics */						\
 	"	neg	%w[i], %w[i]\n"					\
 	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
@@ -174,6 +180,8 @@ static inline void atomic64_andnot(long i, atomic64_t *v)
 	register atomic64_t *x1 asm ("x1") = v;
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot),
+	"	nop\n"
+	"	nop\n"
 	"	stclr	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
@@ -186,6 +194,8 @@ static inline void atomic64_or(long i, atomic64_t *v)
 	register atomic64_t *x1 asm ("x1") = v;
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or),
+	"	nop\n"
+	"	nop\n"
 	"	stset	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
@@ -198,6 +208,8 @@ static inline void atomic64_xor(long i, atomic64_t *v)
 	register atomic64_t *x1 asm ("x1") = v;
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor),
+	"	nop\n"
+	"	nop\n"
 	"	steor	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
@@ -210,6 +222,8 @@ static inline void atomic64_add(long i, atomic64_t *v)
 	register atomic64_t *x1 asm ("x1") = v;
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add),
+	"	nop\n"
+	"	nop\n"
 	"	stadd	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
@@ -224,9 +238,9 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v)	\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
 	__LL_SC_ATOMIC64(add_return##name),				\
 	/* LSE atomics */						\
+	"	nop\n"							\
 	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
 	"	add	%[i], %[i], x30")				\
 	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
@@ -250,9 +264,9 @@ static inline void atomic64_and(long i, atomic64_t *v)
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
-	"	nop\n"
 	__LL_SC_ATOMIC64(and),
 	/* LSE atomics */
+	"	nop\n"
 	"	mvn	%[i], %[i]\n"
 	"	stclr	%[i], %[v]")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
@@ -267,9 +281,9 @@ static inline void atomic64_sub(long i, atomic64_t *v)
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
-	"	nop\n"
 	__LL_SC_ATOMIC64(sub),
 	/* LSE atomics */
+	"	nop\n"
 	"	neg	%[i], %[i]\n"
 	"	stadd	%[i], %[v]")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
@@ -285,9 +299,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v)	\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-	__LL_SC_ATOMIC64(sub_return##name)				\
-	"	nop",							\
+	__LL_SC_ATOMIC64(sub_return##name),				\
 	/* LSE atomics */						\
 	"	neg	%[i], %[i]\n"					\
 	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
@@ -312,12 +324,10 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
-	"	nop\n"
 	__LL_SC_ATOMIC64(dec_if_positive)
 	"	nop\n"
 	"	nop\n"
 	"	nop\n"
-	"	nop\n"
 	"	nop",
 	/* LSE atomics */
 	"1:	ldr	x30, %[v]\n"
@@ -350,9 +360,7 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,	\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-		__LL_SC_CMPXCHG(name)					\
-	"	nop",							\
+		__LL_SC_CMPXCHG(name),					\
 	/* LSE atomics */						\
 	"	mov	" #w "30, %" #w "[old]\n"			\
 	"	cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n"		\
@@ -404,8 +412,6 @@ static inline long __cmpxchg_double##name(unsigned long old1,		\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
 	"	nop\n"							\
-	"	nop\n"							\
-	"	nop\n"							\
 	__LL_SC_CMPXCHG_DBL(name),					\
 	/* LSE atomics */						\
 	"	casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 3de42d68611d..8252dc6b3046 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -25,7 +25,10 @@ __asm__(".arch_extension	lse");
 #define __LL_SC_EXPORT(x)	EXPORT_SYMBOL(__LL_SC_PREFIX(x))
 
 /* Macro for constructing calls to out-of-line ll/sc atomics */
-#define __LL_SC_CALL(op)	"bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
+#define __LL_SC_CALL(op)						\
+	"stp	x16, x17, [sp, #-16]!\n"				\
+	"bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"			\
+	"ldp	x16, x17, [sp], #16\n"
 
 /* In-line patching at runtime */
 #define ARM64_LSE_ATOMIC_INSN(llsc, lse)				\
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 1a811ecf71da..c86b7909ef31 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -4,15 +4,16 @@ lib-y		:= bitops.o clear_user.o delay.o copy_from_user.o	\
 		   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o	\
 		   strchr.o strrchr.o
 
-# Tell the compiler to treat all general purpose registers as
-# callee-saved, which allows for efficient runtime patching of the bl
-# instruction in the caller with an atomic instruction when supported by
-# the CPU. Result and argument registers are handled correctly, based on
-# the function prototype.
+# Tell the compiler to treat all general purpose registers (with the
+# exception of the IP registers, which are already handled by the caller
+# in case of a PLT) as callee-saved, which allows for efficient runtime
+# patching of the bl instruction in the caller with an atomic instruction
+# when supported by the CPU. Result and argument registers are handled
+# correctly, based on the function prototype.
 lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
 CFLAGS_atomic_ll_sc.o	:= -fcall-used-x0 -ffixed-x1 -ffixed-x2		\
 		   -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6		\
 		   -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9		\
 		   -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12	\
 		   -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15	\
-		   -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18
+		   -fcall-saved-x18

  reply	other threads:[~2016-02-25 18:29 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-01 13:09 [PATCH v5sub2 0/8] arm64: implement virtual KASLR Ard Biesheuvel
2016-02-01 13:09 ` [PATCH v5sub2 1/8] arm64: add support for module PLTs Ard Biesheuvel
2016-02-04 15:13   ` Catalin Marinas
2016-02-04 15:31     ` Ard Biesheuvel
2016-02-05 15:42       ` Catalin Marinas
2016-02-05 15:53         ` Ard Biesheuvel
2016-02-05 16:00           ` Catalin Marinas
2016-02-05 16:20             ` Ard Biesheuvel
2016-02-05 16:46               ` Catalin Marinas
2016-02-05 16:54                 ` Ard Biesheuvel
2016-02-05 17:21                   ` Catalin Marinas
2016-02-05 20:39                   ` Kees Cook
2016-02-08 10:12                     ` [PATCH] arm64: allow the module region to be randomized independently Ard Biesheuvel
2016-02-08 18:13                       ` Catalin Marinas
2016-02-08 18:29                         ` Ard Biesheuvel
2016-02-09 10:03                         ` Ard Biesheuvel
2016-02-09 10:45                           ` Catalin Marinas
2016-02-25 16:07   ` [PATCH v5sub2 1/8] arm64: add support for module PLTs Will Deacon
2016-02-25 16:12     ` Ard Biesheuvel
2016-02-25 16:13       ` Ard Biesheuvel
2016-02-25 16:26       ` Will Deacon
2016-02-25 16:33         ` Ard Biesheuvel
2016-02-25 16:42           ` Will Deacon
2016-02-25 16:43             ` Ard Biesheuvel
2016-02-25 16:46               ` Will Deacon
2016-02-25 16:49                 ` Ard Biesheuvel
2016-02-25 16:50                   ` Ard Biesheuvel
2016-02-25 16:56                     ` Will Deacon
2016-02-25 17:31                       ` Ard Biesheuvel
2016-02-25 18:29                         ` Will Deacon [this message]
2016-02-01 13:09 ` [PATCH v5sub2 2/8] arm64: avoid R_AARCH64_ABS64 relocations for Image header fields Ard Biesheuvel
2016-02-01 13:09 ` [PATCH v5sub2 3/8] arm64: avoid dynamic relocations in early boot code Ard Biesheuvel
2016-02-01 13:09 ` [PATCH v5sub2 4/8] arm64: make asm/elf.h available to asm files Ard Biesheuvel
2016-02-01 13:09 ` [PATCH v5sub2 5/8] scripts/sortextable: add support for ET_DYN binaries Ard Biesheuvel
2016-02-01 13:09 ` [PATCH v5sub2 6/8] arm64: add support for building vmlinux as a relocatable PIE binary Ard Biesheuvel
2016-02-01 13:09 ` [PATCH v5sub2 7/8] arm64: add support for kernel ASLR Ard Biesheuvel
2016-02-01 13:09 ` [PATCH v5sub2 8/8] arm64: kaslr: randomize the linear region Ard Biesheuvel
2016-02-01 13:35 ` [PATCH v5sub2 0/8] arm64: implement virtual KASLR Ard Biesheuvel
2016-02-05 17:32   ` Catalin Marinas
2016-02-05 17:38     ` Ard Biesheuvel
2016-02-05 17:46       ` Catalin Marinas
2016-02-05 20:42       ` Kees Cook
2016-02-08 12:14         ` Catalin Marinas
2016-02-08 14:30           ` Ard Biesheuvel
2016-02-08 16:19             ` Catalin Marinas
2016-02-08 16:20               ` Ard Biesheuvel
2016-02-08 16:46                 ` Catalin Marinas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160225182902.GA29259@arm.com \
    --to=will.deacon@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.