All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v6 1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere
@ 2020-12-30  6:59 ` Peter Collingbourne
  0 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2020-12-30  6:59 UTC (permalink / raw)
  To: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer
  Cc: Peter Collingbourne, Linux ARM, Kevin Brodsky, Andrey Konovalov,
	Will Deacon, linux-api, libc-alpha

In an upcoming change we are going to introduce per-task SCTLR_EL1
bits for PAC. Move the existing per-task SCTLR_EL1 field out of the
MTE-specific code so that we will be able to use it from both the
PAC and MTE code paths and make the task switching code more efficient.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Ic65fac78a7926168fa68f9e8da591c9e04ff7278
---
 arch/arm64/Kconfig                 |  4 +++
 arch/arm64/include/asm/mte.h       |  4 ---
 arch/arm64/include/asm/processor.h | 10 ++++++-
 arch/arm64/kernel/mte.c            | 47 ++++++------------------------
 arch/arm64/kernel/process.c        | 34 +++++++++++++++++----
 5 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 05e17351e4f3..82e38d1ca012 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -348,6 +348,9 @@ config KASAN_SHADOW_OFFSET
 	default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
 	default 0xffffffffffffffff
 
+config ARM64_NEED_SCTLR_USER
+	bool
+
 source "arch/arm64/Kconfig.platforms"
 
 menu "Kernel Features"
@@ -1653,6 +1656,7 @@ config ARM64_MTE
 	# Required for tag checking in the uaccess routines
 	depends on ARM64_PAN
 	select ARCH_USES_HIGH_VMA_FLAGS
+	select ARM64_NEED_SCTLR_USER
 	help
 	  Memory Tagging (part of the ARMv8.5 Extensions) provides
 	  architectural support for run-time, always-on detection of
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index d02aff9f493d..4e807969e767 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -42,7 +42,6 @@ void mte_free_tag_storage(char *storage);
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void flush_mte_state(void);
-void mte_thread_switch(struct task_struct *next);
 void mte_suspend_exit(void);
 long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
@@ -65,9 +64,6 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom)
 static inline void flush_mte_state(void)
 {
 }
-static inline void mte_thread_switch(struct task_struct *next)
-{
-}
 static inline void mte_suspend_exit(void)
 {
 }
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ca2cd75d3286..3287d8888b24 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -151,11 +151,15 @@ struct thread_struct {
 	struct ptrauth_keys_kernel	keys_kernel;
 #endif
 #ifdef CONFIG_ARM64_MTE
-	u64			sctlr_tcf0;
 	u64			gcr_user_excl;
 #endif
+#ifdef CONFIG_ARM64_NEED_SCTLR_USER
+	u64			sctlr_user;
+#endif
 };
 
+#define SCTLR_USER_MASK SCTLR_EL1_TCF0_MASK
+
 static inline void arch_thread_struct_whitelist(unsigned long *offset,
 						unsigned long *size)
 {
@@ -247,6 +251,10 @@ extern void release_thread(struct task_struct *);
 
 unsigned long get_wchan(struct task_struct *p);
 
+#ifdef CONFIG_ARM64_NEED_SCTLR_USER
+void set_task_sctlr_el1(u64 sctlr);
+#endif
+
 /* Thread switching */
 extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 					 struct task_struct *next);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index dc9ada64feed..48e8a75288a0 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -158,26 +158,6 @@ void mte_enable_kernel(void)
 	isb();
 }
 
-static void update_sctlr_el1_tcf0(u64 tcf0)
-{
-	/* ISB required for the kernel uaccess routines */
-	sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
-	isb();
-}
-
-static void set_sctlr_el1_tcf0(u64 tcf0)
-{
-	/*
-	 * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
-	 * optimisation. Disable preemption so that it does not see
-	 * the variable update before the SCTLR_EL1.TCF0 one.
-	 */
-	preempt_disable();
-	current->thread.sctlr_tcf0 = tcf0;
-	update_sctlr_el1_tcf0(tcf0);
-	preempt_enable();
-}
-
 static void update_gcr_el1_excl(u64 excl)
 {
 
@@ -210,21 +190,12 @@ void flush_mte_state(void)
 	write_sysreg_s(0, SYS_TFSRE0_EL1);
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
 	/* disable tag checking */
-	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+	set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) |
+			   SCTLR_EL1_TCF0_NONE);
 	/* reset tag generation mask */
 	set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
 }
 
-void mte_thread_switch(struct task_struct *next)
-{
-	if (!system_supports_mte())
-		return;
-
-	/* avoid expensive SCTLR_EL1 accesses if no change */
-	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
-		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
-}
-
 void mte_suspend_exit(void)
 {
 	if (!system_supports_mte())
@@ -235,7 +206,7 @@ void mte_suspend_exit(void)
 
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
-	u64 tcf0;
+	u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK;
 	u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
 		       SYS_GCR_EL1_EXCL_MASK;
 
@@ -244,23 +215,23 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 
 	switch (arg & PR_MTE_TCF_MASK) {
 	case PR_MTE_TCF_NONE:
-		tcf0 = SCTLR_EL1_TCF0_NONE;
+		sctlr |= SCTLR_EL1_TCF0_NONE;
 		break;
 	case PR_MTE_TCF_SYNC:
-		tcf0 = SCTLR_EL1_TCF0_SYNC;
+		sctlr |= SCTLR_EL1_TCF0_SYNC;
 		break;
 	case PR_MTE_TCF_ASYNC:
-		tcf0 = SCTLR_EL1_TCF0_ASYNC;
+		sctlr |= SCTLR_EL1_TCF0_ASYNC;
 		break;
 	default:
 		return -EINVAL;
 	}
 
 	if (task != current) {
-		task->thread.sctlr_tcf0 = tcf0;
+		task->thread.sctlr_user = sctlr;
 		task->thread.gcr_user_excl = gcr_excl;
 	} else {
-		set_sctlr_el1_tcf0(tcf0);
+		set_task_sctlr_el1(sctlr);
 		set_gcr_el1_excl(gcr_excl);
 	}
 
@@ -277,7 +248,7 @@ long get_mte_ctrl(struct task_struct *task)
 
 	ret = incl << PR_MTE_TAG_SHIFT;
 
-	switch (task->thread.sctlr_tcf0) {
+	switch (task->thread.sctlr_user & SCTLR_EL1_TCF0_MASK) {
 	case SCTLR_EL1_TCF0_NONE:
 		ret |= PR_MTE_TCF_NONE;
 		break;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6616486a58fe..2c4b8194adae 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -529,6 +529,29 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
 	write_sysreg(val, cntkctl_el1);
 }
 
+#ifdef CONFIG_ARM64_NEED_SCTLR_USER
+static void update_sctlr_el1(u64 sctlr)
+{
+	sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK, sctlr);
+
+	/* ISB required for the kernel uaccess routines when setting TCF0. */
+	isb();
+}
+
+void set_task_sctlr_el1(u64 sctlr)
+{
+	/*
+	 * __switch_to() checks current->thread.sctlr as an
+	 * optimisation. Disable preemption so that it does not see
+	 * the variable update before the SCTLR_EL1 one.
+	 */
+	preempt_disable();
+	current->thread.sctlr_user = sctlr;
+	update_sctlr_el1(sctlr);
+	preempt_enable();
+}
+#endif  /* CONFIG_ARM64_NEED_SCTLR_USER */
+
 /*
  * Thread switching.
  */
@@ -553,12 +576,11 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	 */
 	dsb(ish);
 
-	/*
-	 * MTE thread switching must happen after the DSB above to ensure that
-	 * any asynchronous tag check faults have been logged in the TFSR*_EL1
-	 * registers.
-	 */
-	mte_thread_switch(next);
+#ifdef CONFIG_ARM64_NEED_SCTLR_USER
+	/* avoid expensive SCTLR_EL1 accesses if no change */
+	if (prev->thread.sctlr_user != next->thread.sctlr_user)
+		update_sctlr_el1(next->thread.sctlr_user);
+#endif
 
 	/* the actual thread switch */
 	last = cpu_switch_to(prev, next);
-- 
2.29.2.729.g45daf8777d-goog


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere
@ 2020-12-30  6:59 ` Peter Collingbourne
  0 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2020-12-30  6:59 UTC (permalink / raw)
  To: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer
  Cc: libc-alpha, Peter Collingbourne, Andrey Konovalov, Kevin Brodsky,
	linux-api, Will Deacon, Linux ARM

In an upcoming change we are going to introduce per-task SCTLR_EL1
bits for PAC. Move the existing per-task SCTLR_EL1 field out of the
MTE-specific code so that we will be able to use it from both the
PAC and MTE code paths and make the task switching code more efficient.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Ic65fac78a7926168fa68f9e8da591c9e04ff7278
---
 arch/arm64/Kconfig                 |  4 +++
 arch/arm64/include/asm/mte.h       |  4 ---
 arch/arm64/include/asm/processor.h | 10 ++++++-
 arch/arm64/kernel/mte.c            | 47 ++++++------------------------
 arch/arm64/kernel/process.c        | 34 +++++++++++++++++----
 5 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 05e17351e4f3..82e38d1ca012 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -348,6 +348,9 @@ config KASAN_SHADOW_OFFSET
 	default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
 	default 0xffffffffffffffff
 
+config ARM64_NEED_SCTLR_USER
+	bool
+
 source "arch/arm64/Kconfig.platforms"
 
 menu "Kernel Features"
@@ -1653,6 +1656,7 @@ config ARM64_MTE
 	# Required for tag checking in the uaccess routines
 	depends on ARM64_PAN
 	select ARCH_USES_HIGH_VMA_FLAGS
+	select ARM64_NEED_SCTLR_USER
 	help
 	  Memory Tagging (part of the ARMv8.5 Extensions) provides
 	  architectural support for run-time, always-on detection of
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index d02aff9f493d..4e807969e767 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -42,7 +42,6 @@ void mte_free_tag_storage(char *storage);
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void flush_mte_state(void);
-void mte_thread_switch(struct task_struct *next);
 void mte_suspend_exit(void);
 long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
@@ -65,9 +64,6 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom)
 static inline void flush_mte_state(void)
 {
 }
-static inline void mte_thread_switch(struct task_struct *next)
-{
-}
 static inline void mte_suspend_exit(void)
 {
 }
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ca2cd75d3286..3287d8888b24 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -151,11 +151,15 @@ struct thread_struct {
 	struct ptrauth_keys_kernel	keys_kernel;
 #endif
 #ifdef CONFIG_ARM64_MTE
-	u64			sctlr_tcf0;
 	u64			gcr_user_excl;
 #endif
+#ifdef CONFIG_ARM64_NEED_SCTLR_USER
+	u64			sctlr_user;
+#endif
 };
 
+#define SCTLR_USER_MASK SCTLR_EL1_TCF0_MASK
+
 static inline void arch_thread_struct_whitelist(unsigned long *offset,
 						unsigned long *size)
 {
@@ -247,6 +251,10 @@ extern void release_thread(struct task_struct *);
 
 unsigned long get_wchan(struct task_struct *p);
 
+#ifdef CONFIG_ARM64_NEED_SCTLR_USER
+void set_task_sctlr_el1(u64 sctlr);
+#endif
+
 /* Thread switching */
 extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 					 struct task_struct *next);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index dc9ada64feed..48e8a75288a0 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -158,26 +158,6 @@ void mte_enable_kernel(void)
 	isb();
 }
 
-static void update_sctlr_el1_tcf0(u64 tcf0)
-{
-	/* ISB required for the kernel uaccess routines */
-	sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
-	isb();
-}
-
-static void set_sctlr_el1_tcf0(u64 tcf0)
-{
-	/*
-	 * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
-	 * optimisation. Disable preemption so that it does not see
-	 * the variable update before the SCTLR_EL1.TCF0 one.
-	 */
-	preempt_disable();
-	current->thread.sctlr_tcf0 = tcf0;
-	update_sctlr_el1_tcf0(tcf0);
-	preempt_enable();
-}
-
 static void update_gcr_el1_excl(u64 excl)
 {
 
@@ -210,21 +190,12 @@ void flush_mte_state(void)
 	write_sysreg_s(0, SYS_TFSRE0_EL1);
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
 	/* disable tag checking */
-	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+	set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) |
+			   SCTLR_EL1_TCF0_NONE);
 	/* reset tag generation mask */
 	set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
 }
 
-void mte_thread_switch(struct task_struct *next)
-{
-	if (!system_supports_mte())
-		return;
-
-	/* avoid expensive SCTLR_EL1 accesses if no change */
-	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
-		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
-}
-
 void mte_suspend_exit(void)
 {
 	if (!system_supports_mte())
@@ -235,7 +206,7 @@ void mte_suspend_exit(void)
 
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
-	u64 tcf0;
+	u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK;
 	u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
 		       SYS_GCR_EL1_EXCL_MASK;
 
@@ -244,23 +215,23 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 
 	switch (arg & PR_MTE_TCF_MASK) {
 	case PR_MTE_TCF_NONE:
-		tcf0 = SCTLR_EL1_TCF0_NONE;
+		sctlr |= SCTLR_EL1_TCF0_NONE;
 		break;
 	case PR_MTE_TCF_SYNC:
-		tcf0 = SCTLR_EL1_TCF0_SYNC;
+		sctlr |= SCTLR_EL1_TCF0_SYNC;
 		break;
 	case PR_MTE_TCF_ASYNC:
-		tcf0 = SCTLR_EL1_TCF0_ASYNC;
+		sctlr |= SCTLR_EL1_TCF0_ASYNC;
 		break;
 	default:
 		return -EINVAL;
 	}
 
 	if (task != current) {
-		task->thread.sctlr_tcf0 = tcf0;
+		task->thread.sctlr_user = sctlr;
 		task->thread.gcr_user_excl = gcr_excl;
 	} else {
-		set_sctlr_el1_tcf0(tcf0);
+		set_task_sctlr_el1(sctlr);
 		set_gcr_el1_excl(gcr_excl);
 	}
 
@@ -277,7 +248,7 @@ long get_mte_ctrl(struct task_struct *task)
 
 	ret = incl << PR_MTE_TAG_SHIFT;
 
-	switch (task->thread.sctlr_tcf0) {
+	switch (task->thread.sctlr_user & SCTLR_EL1_TCF0_MASK) {
 	case SCTLR_EL1_TCF0_NONE:
 		ret |= PR_MTE_TCF_NONE;
 		break;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6616486a58fe..2c4b8194adae 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -529,6 +529,29 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
 	write_sysreg(val, cntkctl_el1);
 }
 
+#ifdef CONFIG_ARM64_NEED_SCTLR_USER
+static void update_sctlr_el1(u64 sctlr)
+{
+	sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK, sctlr);
+
+	/* ISB required for the kernel uaccess routines when setting TCF0. */
+	isb();
+}
+
+void set_task_sctlr_el1(u64 sctlr)
+{
+	/*
+	 * __switch_to() checks current->thread.sctlr as an
+	 * optimisation. Disable preemption so that it does not see
+	 * the variable update before the SCTLR_EL1 one.
+	 */
+	preempt_disable();
+	current->thread.sctlr_user = sctlr;
+	update_sctlr_el1(sctlr);
+	preempt_enable();
+}
+#endif  /* CONFIG_ARM64_NEED_SCTLR_USER */
+
 /*
  * Thread switching.
  */
@@ -553,12 +576,11 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	 */
 	dsb(ish);
 
-	/*
-	 * MTE thread switching must happen after the DSB above to ensure that
-	 * any asynchronous tag check faults have been logged in the TFSR*_EL1
-	 * registers.
-	 */
-	mte_thread_switch(next);
+#ifdef CONFIG_ARM64_NEED_SCTLR_USER
+	/* avoid expensive SCTLR_EL1 accesses if no change */
+	if (prev->thread.sctlr_user != next->thread.sctlr_user)
+		update_sctlr_el1(next->thread.sctlr_user);
+#endif
 
 	/* the actual thread switch */
 	last = cpu_switch_to(prev, next);
-- 
2.29.2.729.g45daf8777d-goog


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 2/3] arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)
  2020-12-30  6:59 ` Peter Collingbourne
@ 2020-12-30  6:59   ` Peter Collingbourne
  -1 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2020-12-30  6:59 UTC (permalink / raw)
  To: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer
  Cc: Peter Collingbourne, Linux ARM, Kevin Brodsky, Andrey Konovalov,
	Will Deacon, linux-api, libc-alpha

This change introduces a prctl that allows the user program to control
which PAC keys are enabled in a particular task. The main reason
why this is useful is to enable a userspace ABI that uses PAC to
sign and authenticate function pointers and other pointers exposed
outside of the function, while still allowing binaries conforming
to the ABI to interoperate with legacy binaries that do not sign or
authenticate pointers.

The idea is that a dynamic loader or early startup code would issue
this prctl very early after establishing that a process may load legacy
binaries, but before executing any PAC instructions.

This change adds a small amount of overhead to kernel entry and exit
due to additional required instruction sequences.

On a DragonBoard 845c (Cortex-A75) with the powersave governor, the
overhead of similar instruction sequences was measured as 4.9ns when
simulating the common case where IA is left enabled, or 43.7ns when
simulating the uncommon case where IA is disabled. These numbers can
be seen as the worst case scenario, since in more realistic scenarios
a better performing governor would be used and a newer chip would be
used that would support PAC unlike Cortex-A75 and would be expected
to be faster than Cortex-A75.

On an Apple M1 under a hypervisor, the overhead of the entry/exit
instruction sequences introduced by this patch was measured as 0.3ns
in the case where IA is left enabled, and 33.0ns in the case where
IA is disabled.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://linux-review.googlesource.com/id/Ibc41a5e6a76b275efbaa126b31119dc197b927a5
---
v6:
- rebased onto 5.11rc1
- changed the PR_PAC_{SET,GET}_ENABLED_KEYS values as a new prctl
  was introduced in 5.11

v5:
- rebased onto the kasan series
- fix some compat checks to check the correct task
- add some new perf measurements

v4:
- split the patch in two
- move the PAC || MTE into a config and make code conditional
  on it
- rename ptrauth_prctl_* -> ptrauth_*
- add some notes on fork/exec/clone behavior and performance to
  the doc
- change -1 to sizeof(long) in the ptrace code
- improve some comments
- add a WARN_ON to arg_to_enxx_mask

v3:
- fix some style nits
- move kernel entry ISB to after setting EnIA
- rename sctlr -> sctlr_user
- remove init_sctlr

v2:
- added prctl(PR_PAC_GET_ENABLED_KEYS)
- added ptrace APIs for getting and setting the set of enabled
  keys
- optimized the instruction sequence for kernel entry/exit
- rebased on top of MTE series

 .../arm64/pointer-authentication.rst          | 34 ++++++++++
 arch/arm64/Kconfig                            |  1 +
 arch/arm64/include/asm/mte.h                  |  4 +-
 arch/arm64/include/asm/pointer_auth.h         | 25 +++++++-
 arch/arm64/include/asm/processor.h            |  9 ++-
 arch/arm64/include/asm/sysreg.h               |  4 +-
 arch/arm64/kernel/asm-offsets.c               |  3 +
 arch/arm64/kernel/entry.S                     | 39 +++++++++++-
 arch/arm64/kernel/mte.c                       |  2 +-
 arch/arm64/kernel/pointer_auth.c              | 62 +++++++++++++++++++
 arch/arm64/kernel/process.c                   | 10 ++-
 arch/arm64/kernel/ptrace.c                    | 41 ++++++++++++
 include/uapi/linux/elf.h                      |  1 +
 include/uapi/linux/prctl.h                    |  4 ++
 kernel/sys.c                                  | 16 +++++
 15 files changed, 242 insertions(+), 13 deletions(-)

diff --git a/Documentation/arm64/pointer-authentication.rst b/Documentation/arm64/pointer-authentication.rst
index 30b2ab06526b..f127666ea3a8 100644
--- a/Documentation/arm64/pointer-authentication.rst
+++ b/Documentation/arm64/pointer-authentication.rst
@@ -107,3 +107,37 @@ filter out the Pointer Authentication system key registers from
 KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID
 register. Any attempt to use the Pointer Authentication instructions will
 result in an UNDEFINED exception being injected into the guest.
+
+
+Enabling and disabling keys
+---------------------------
+
+The prctl PR_PAC_SET_ENABLED_KEYS allows the user program to control which
+PAC keys are enabled in a particular task. It takes two arguments, the
+first being a bitmask of PR_PAC_APIAKEY, PR_PAC_APIBKEY, PR_PAC_APDAKEY
+and PR_PAC_APDBKEY specifying which keys shall be affected by this prctl,
+and the second being a bitmask of the same bits specifying whether the key
+should be enabled or disabled. For example::
+
+  prctl(PR_PAC_SET_ENABLED_KEYS,
+        PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY,
+        PR_PAC_APIBKEY, 0, 0);
+
+disables all keys except the IB key.
+
+The main reason why this is useful is to enable a userspace ABI that uses PAC
+instructions to sign and authenticate function pointers and other pointers
+exposed outside of the function, while still allowing binaries conforming to
+the ABI to interoperate with legacy binaries that do not sign or authenticate
+pointers.
+
+The idea is that a dynamic loader or early startup code would issue this
+prctl very early after establishing that a process may load legacy binaries,
+but before executing any PAC instructions.
+
+For compatibility with previous kernel versions, processes start up with IA,
+IB, DA and DB enabled, and are reset to this state on exec(). Processes created
+via fork() and clone() inherit the key enabled state from the calling process.
+
+It is recommended to avoid disabling the IA key, as this has higher performance
+overhead than disabling any of the other keys.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 82e38d1ca012..fb705ef10c05 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1485,6 +1485,7 @@ config ARM64_PTR_AUTH
 	depends on LD_IS_LLD || LD_VERSION >= 233010000 || (CC_IS_GCC && GCC_VERSION < 90100)
 	depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE
 	depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
+	select ARM64_NEED_SCTLR_USER
 	help
 	  Pointer authentication (part of the ARMv8.3 Extensions) provides
 	  instructions for signing and authenticating pointers against secret
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 4e807969e767..05ac68938a90 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -41,7 +41,7 @@ void mte_free_tag_storage(char *storage);
 
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
-void flush_mte_state(void);
+void mte_thread_init_user(void);
 void mte_suspend_exit(void);
 long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
@@ -61,7 +61,7 @@ static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
 static inline void mte_copy_page_tags(void *kto, const void *kfrom)
 {
 }
-static inline void flush_mte_state(void)
+static inline void mte_thread_init_user(void)
 {
 }
 static inline void mte_suspend_exit(void)
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index c6b4f0603024..1a85e25d98ba 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -3,6 +3,7 @@
 #define __ASM_POINTER_AUTH_H
 
 #include <linux/bitops.h>
+#include <linux/prctl.h>
 #include <linux/random.h>
 
 #include <asm/cpufeature.h>
@@ -71,13 +72,26 @@ static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kerne
 
 extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg);
 
+extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+				    unsigned long enabled);
+extern int ptrauth_get_enabled_keys(struct task_struct *tsk);
+
 static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 {
 	return ptrauth_clear_pac(ptr);
 }
 
-#define ptrauth_thread_init_user(tsk)					\
-	ptrauth_keys_init_user(&(tsk)->thread.keys_user)
+#define ptrauth_thread_init_user()                                             \
+	do {                                                                   \
+		ptrauth_keys_init_user(&current->thread.keys_user);            \
+									       \
+		/* enable all keys */                                          \
+		if (system_supports_address_auth())                            \
+			set_task_sctlr_el1(current->thread.sctlr_user |        \
+					   SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |   \
+					   SCTLR_ELx_ENDA | SCTLR_ELx_ENDB);   \
+	} while (0)
+
 #define ptrauth_thread_init_kernel(tsk)					\
 	ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
 #define ptrauth_thread_switch_kernel(tsk)				\
@@ -85,10 +99,15 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 
 #else /* CONFIG_ARM64_PTR_AUTH */
 #define ptrauth_prctl_reset_keys(tsk, arg)	(-EINVAL)
+#define ptrauth_set_enabled_keys(tsk, keys, enabled)	(-EINVAL)
+#define ptrauth_get_enabled_keys(tsk)	(-EINVAL)
 #define ptrauth_strip_insn_pac(lr)	(lr)
-#define ptrauth_thread_init_user(tsk)
+#define ptrauth_thread_init_user()
 #define ptrauth_thread_init_kernel(tsk)
 #define ptrauth_thread_switch_kernel(tsk)
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
+#define PR_PAC_ENABLED_KEYS_MASK                                               \
+	(PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
+
 #endif /* __ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 3287d8888b24..ff2616401cc2 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -158,7 +158,9 @@ struct thread_struct {
 #endif
 };
 
-#define SCTLR_USER_MASK SCTLR_EL1_TCF0_MASK
+#define SCTLR_USER_MASK                                                        \
+	(SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB |   \
+	 SCTLR_EL1_TCF0_MASK)
 
 static inline void arch_thread_struct_whitelist(unsigned long *offset,
 						unsigned long *size)
@@ -309,6 +311,11 @@ extern void __init minsigstksz_setup(void);
 /* PR_PAC_RESET_KEYS prctl */
 #define PAC_RESET_KEYS(tsk, arg)	ptrauth_prctl_reset_keys(tsk, arg)
 
+/* PR_PAC_{SET,GET}_ENABLED_KEYS prctl */
+#define PAC_SET_ENABLED_KEYS(tsk, keys, enabled)				\
+	ptrauth_set_enabled_keys(tsk, keys, enabled)
+#define PAC_GET_ENABLED_KEYS(tsk) ptrauth_get_enabled_keys(tsk)
+
 #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
 /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
 long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 8b5e7e5c3cc8..4255c99bce41 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -559,8 +559,10 @@
 #define SCTLR_ELx_TCF_ASYNC	(UL(0x2) << SCTLR_ELx_TCF_SHIFT)
 #define SCTLR_ELx_TCF_MASK	(UL(0x3) << SCTLR_ELx_TCF_SHIFT)
 
+#define SCTLR_ELx_ENIA_SHIFT	31
+
 #define SCTLR_ELx_ITFSB	(BIT(37))
-#define SCTLR_ELx_ENIA	(BIT(31))
+#define SCTLR_ELx_ENIA	(BIT(SCTLR_ELx_ENIA_SHIFT))
 #define SCTLR_ELx_ENIB	(BIT(30))
 #define SCTLR_ELx_ENDA	(BIT(27))
 #define SCTLR_ELx_EE    (BIT(25))
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f42fd9e33981..dc0907a0240c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -43,6 +43,9 @@ int main(void)
 #endif
   BLANK();
   DEFINE(THREAD_CPU_CONTEXT,	offsetof(struct task_struct, thread.cpu_context));
+#ifdef CONFIG_ARM64_NEED_SCTLR_USER
+  DEFINE(THREAD_SCTLR_USER,	offsetof(struct task_struct, thread.sctlr_user));
+#endif
 #ifdef CONFIG_ARM64_PTR_AUTH
   DEFINE(THREAD_KEYS_USER,	offsetof(struct task_struct, thread.keys_user));
   DEFINE(THREAD_KEYS_KERNEL,	offsetof(struct task_struct, thread.keys_kernel));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2a93fa5f4e49..33037121fd0d 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -247,7 +247,24 @@ alternative_else_nop_endif
 	check_mte_async_tcf x19, x22
 	apply_ssbd 1, x22, x23
 
-	ptrauth_keys_install_kernel tsk, x20, x22, x23
+	ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+alternative_if ARM64_HAS_ADDRESS_AUTH
+	/*
+	 * Enable IA for in-kernel PAC if the task had it disabled. Although
+	 * this could be implemented with an unconditional MRS which would avoid
+	 * a load, this was measured to be slower on Cortex-A75 and Cortex-A76.
+	 */
+	ldr	x0, [tsk, THREAD_SCTLR_USER]
+	tbnz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	mrs	x0, sctlr_el1
+	orr	x0, x0, SCTLR_ELx_ENIA
+	msr	sctlr_el1, x0
+1:
+	isb
+alternative_else_nop_endif
+#endif
 
 	mte_set_kernel_gcr x22, x23
 
@@ -351,9 +368,27 @@ alternative_else_nop_endif
 3:
 	scs_save tsk, x0
 
-	/* No kernel C function calls after this as user keys are set. */
+	/*
+	 * No kernel C function calls after this as user keys are set and IA may
+	 * be disabled.
+	 */
 	ptrauth_keys_install_user tsk, x0, x1, x2
 
+#ifdef CONFIG_ARM64_PTR_AUTH
+alternative_if ARM64_HAS_ADDRESS_AUTH
+	/*
+	 * IA was enabled for in-kernel PAC. Disable it now if needed.
+	 * All other per-task SCTLR bits were updated on task switch.
+	 */
+	ldr	x0, [tsk, THREAD_SCTLR_USER]
+	tbnz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	mrs	x0, sctlr_el1
+	bic	x0, x0, SCTLR_ELx_ENIA
+	msr	sctlr_el1, x0
+1:
+alternative_else_nop_endif
+#endif
+
 	mte_set_user_gcr tsk, x0, x1
 
 	apply_ssbd 0, x0, x1
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 48e8a75288a0..057ce3bbcf7c 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -180,7 +180,7 @@ static void set_gcr_el1_excl(u64 excl)
 	 */
 }
 
-void flush_mte_state(void)
+void mte_thread_init_user(void)
 {
 	if (!system_supports_mte())
 		return;
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index adb955fd9bdd..f03e5bfe4490 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -46,3 +46,65 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
 
 	return 0;
 }
+
+static u64 arg_to_enxx_mask(unsigned long arg)
+{
+	u64 sctlr_enxx_mask = 0;
+
+	WARN_ON(arg & ~PR_PAC_ENABLED_KEYS_MASK);
+	if (arg & PR_PAC_APIAKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENIA;
+	if (arg & PR_PAC_APIBKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENIB;
+	if (arg & PR_PAC_APDAKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENDA;
+	if (arg & PR_PAC_APDBKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENDB;
+	return sctlr_enxx_mask;
+}
+
+int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+			     unsigned long enabled)
+{
+	u64 sctlr = tsk->thread.sctlr_user;
+
+	if (!system_supports_address_auth())
+		return -EINVAL;
+
+	if (is_compat_thread(task_thread_info(tsk)))
+		return -EINVAL;
+
+	if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
+		return -EINVAL;
+
+	sctlr &= ~arg_to_enxx_mask(keys);
+	sctlr |= arg_to_enxx_mask(enabled);
+	if (tsk == current)
+		set_task_sctlr_el1(sctlr);
+	else
+		tsk->thread.sctlr_user = sctlr;
+
+	return 0;
+}
+
+int ptrauth_get_enabled_keys(struct task_struct *tsk)
+{
+	int retval = 0;
+
+	if (!system_supports_address_auth())
+		return -EINVAL;
+
+	if (is_compat_thread(task_thread_info(tsk)))
+		return -EINVAL;
+
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENIA)
+		retval |= PR_PAC_APIAKEY;
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENIB)
+		retval |= PR_PAC_APIBKEY;
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENDA)
+		retval |= PR_PAC_APDAKEY;
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENDB)
+		retval |= PR_PAC_APDBKEY;
+
+	return retval;
+}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 2c4b8194adae..a6b0edc67e41 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -339,7 +339,6 @@ void flush_thread(void)
 	tls_thread_flush();
 	flush_ptrace_hw_breakpoint(current);
 	flush_tagged_addr_state();
-	flush_mte_state();
 }
 
 void release_thread(struct task_struct *dead_task)
@@ -532,7 +531,11 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
 #ifdef CONFIG_ARM64_NEED_SCTLR_USER
 static void update_sctlr_el1(u64 sctlr)
 {
-	sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK, sctlr);
+	/*
+	 * EnIA must not be cleared while in the kernel as this is necessary for
+	 * in-kernel PAC. It will be cleared on kernel exit if needed.
+	 */
+	sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr);
 
 	/* ISB required for the kernel uaccess routines when setting TCF0. */
 	isb();
@@ -630,7 +633,8 @@ void arch_setup_new_exec(void)
 {
 	current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
 
-	ptrauth_thread_init_user(current);
+	ptrauth_thread_init_user();
+	mte_thread_init_user();
 
 	if (task_spec_ssb_noexec(current)) {
 		arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 8ac487c84e37..c2c82b7bbf8f 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -908,6 +908,38 @@ static int pac_mask_get(struct task_struct *target,
 	return membuf_write(&to, &uregs, sizeof(uregs));
 }
 
+static int pac_enabled_keys_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	long enabled_keys = ptrauth_get_enabled_keys(target);
+
+	if (IS_ERR_VALUE(enabled_keys))
+		return enabled_keys;
+
+	return membuf_write(&to, &enabled_keys, sizeof(enabled_keys));
+}
+
+static int pac_enabled_keys_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	long enabled_keys = ptrauth_get_enabled_keys(target);
+
+	if (IS_ERR_VALUE(enabled_keys))
+		return enabled_keys;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &enabled_keys, 0,
+				 sizeof(long));
+	if (ret)
+		return ret;
+
+	return ptrauth_set_enabled_keys(target, PR_PAC_ENABLED_KEYS_MASK,
+					enabled_keys);
+}
+
 #ifdef CONFIG_CHECKPOINT_RESTORE
 static __uint128_t pac_key_to_user(const struct ptrauth_key *key)
 {
@@ -1073,6 +1105,7 @@ enum aarch64_regset {
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
 	REGSET_PAC_MASK,
+	REGSET_PAC_ENABLED_KEYS,
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	REGSET_PACA_KEYS,
 	REGSET_PACG_KEYS,
@@ -1159,6 +1192,14 @@ static const struct user_regset aarch64_regsets[] = {
 		.regset_get = pac_mask_get,
 		/* this cannot be set dynamically */
 	},
+	[REGSET_PAC_ENABLED_KEYS] = {
+		.core_note_type = NT_ARM_PAC_ENABLED_KEYS,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = pac_enabled_keys_get,
+		.set = pac_enabled_keys_set,
+	},
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	[REGSET_PACA_KEYS] = {
 		.core_note_type = NT_ARM_PACA_KEYS,
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 30f68b42eeb5..61bf4774b8f2 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -426,6 +426,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_PACA_KEYS	0x407	/* ARM pointer authentication address keys */
 #define NT_ARM_PACG_KEYS	0x408	/* ARM pointer authentication generic key */
 #define NT_ARM_TAGGED_ADDR_CTRL	0x409	/* arm64 tagged address control (prctl()) */
+#define NT_ARM_PAC_ENABLED_KEYS	0x40a	/* arm64 ptr auth enabled keys (prctl()) */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 90deb41c8a34..affc96453319 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -252,4 +252,8 @@ struct prctl_mm_map {
 # define PR_SYS_DISPATCH_OFF		0
 # define PR_SYS_DISPATCH_ON		1
 
+/* Set/get enabled arm64 pointer authentication keys */
+#define PR_PAC_SET_ENABLED_KEYS		60
+#define PR_PAC_GET_ENABLED_KEYS		61
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 51f00fe20e4d..bb439f72cb8c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -120,6 +120,12 @@
 #ifndef PAC_RESET_KEYS
 # define PAC_RESET_KEYS(a, b)	(-EINVAL)
 #endif
+#ifndef PAC_SET_ENABLED_KEYS
+# define PAC_SET_ENABLED_KEYS(a, b, c)	(-EINVAL)
+#endif
+#ifndef PAC_GET_ENABLED_KEYS
+# define PAC_GET_ENABLED_KEYS(a)	(-EINVAL)
+#endif
 #ifndef SET_TAGGED_ADDR_CTRL
 # define SET_TAGGED_ADDR_CTRL(a)	(-EINVAL)
 #endif
@@ -2498,6 +2504,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			return -EINVAL;
 		error = PAC_RESET_KEYS(me, arg2);
 		break;
+	case PR_PAC_SET_ENABLED_KEYS:
+		if (arg4 || arg5)
+			return -EINVAL;
+		error = PAC_SET_ENABLED_KEYS(me, arg2, arg3);
+		break;
+	case PR_PAC_GET_ENABLED_KEYS:
+		if (arg2 || arg3 || arg4 || arg5)
+			return -EINVAL;
+		error = PAC_GET_ENABLED_KEYS(me);
+		break;
 	case PR_SET_TAGGED_ADDR_CTRL:
 		if (arg3 || arg4 || arg5)
 			return -EINVAL;
-- 
2.29.2.729.g45daf8777d-goog


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 2/3] arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)
@ 2020-12-30  6:59   ` Peter Collingbourne
  0 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2020-12-30  6:59 UTC (permalink / raw)
  To: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer
  Cc: libc-alpha, Peter Collingbourne, Andrey Konovalov, Kevin Brodsky,
	linux-api, Will Deacon, Linux ARM

This change introduces a prctl that allows the user program to control
which PAC keys are enabled in a particular task. The main reason
why this is useful is to enable a userspace ABI that uses PAC to
sign and authenticate function pointers and other pointers exposed
outside of the function, while still allowing binaries conforming
to the ABI to interoperate with legacy binaries that do not sign or
authenticate pointers.

The idea is that a dynamic loader or early startup code would issue
this prctl very early after establishing that a process may load legacy
binaries, but before executing any PAC instructions.

This change adds a small amount of overhead to kernel entry and exit
due to additional required instruction sequences.

On a DragonBoard 845c (Cortex-A75) with the powersave governor, the
overhead of similar instruction sequences was measured as 4.9ns when
simulating the common case where IA is left enabled, or 43.7ns when
simulating the uncommon case where IA is disabled. These numbers can
be seen as the worst case scenario, since in more realistic scenarios
a better performing governor would be used and a newer chip would be
used that would support PAC unlike Cortex-A75 and would be expected
to be faster than Cortex-A75.

On an Apple M1 under a hypervisor, the overhead of the entry/exit
instruction sequences introduced by this patch was measured as 0.3ns
in the case where IA is left enabled, and 33.0ns in the case where
IA is disabled.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://linux-review.googlesource.com/id/Ibc41a5e6a76b275efbaa126b31119dc197b927a5
---
v6:
- rebased onto 5.11rc1
- changed the PR_PAC_{SET,GET}_ENABLED_KEYS values as a new prctl
  was introduced in 5.11

v5:
- rebased onto the kasan series
- fix some compat checks to check the correct task
- add some new perf measurements

v4:
- split the patch in two
- move the PAC || MTE into a config and make code conditional
  on it
- rename ptrauth_prctl_* -> ptrauth_*
- add some notes on fork/exec/clone behavior and performance to
  the doc
- change -1 to sizeof(long) in the ptrace code
- improve some comments
- add a WARN_ON to arg_to_enxx_mask

v3:
- fix some style nits
- move kernel entry ISB to after setting EnIA
- rename sctlr -> sctlr_user
- remove init_sctlr

v2:
- added prctl(PR_PAC_GET_ENABLED_KEYS)
- added ptrace APIs for getting and setting the set of enabled
  keys
- optimized the instruction sequence for kernel entry/exit
- rebased on top of MTE series

 .../arm64/pointer-authentication.rst          | 34 ++++++++++
 arch/arm64/Kconfig                            |  1 +
 arch/arm64/include/asm/mte.h                  |  4 +-
 arch/arm64/include/asm/pointer_auth.h         | 25 +++++++-
 arch/arm64/include/asm/processor.h            |  9 ++-
 arch/arm64/include/asm/sysreg.h               |  4 +-
 arch/arm64/kernel/asm-offsets.c               |  3 +
 arch/arm64/kernel/entry.S                     | 39 +++++++++++-
 arch/arm64/kernel/mte.c                       |  2 +-
 arch/arm64/kernel/pointer_auth.c              | 62 +++++++++++++++++++
 arch/arm64/kernel/process.c                   | 10 ++-
 arch/arm64/kernel/ptrace.c                    | 41 ++++++++++++
 include/uapi/linux/elf.h                      |  1 +
 include/uapi/linux/prctl.h                    |  4 ++
 kernel/sys.c                                  | 16 +++++
 15 files changed, 242 insertions(+), 13 deletions(-)

diff --git a/Documentation/arm64/pointer-authentication.rst b/Documentation/arm64/pointer-authentication.rst
index 30b2ab06526b..f127666ea3a8 100644
--- a/Documentation/arm64/pointer-authentication.rst
+++ b/Documentation/arm64/pointer-authentication.rst
@@ -107,3 +107,37 @@ filter out the Pointer Authentication system key registers from
 KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID
 register. Any attempt to use the Pointer Authentication instructions will
 result in an UNDEFINED exception being injected into the guest.
+
+
+Enabling and disabling keys
+---------------------------
+
+The prctl PR_PAC_SET_ENABLED_KEYS allows the user program to control which
+PAC keys are enabled in a particular task. It takes two arguments, the
+first being a bitmask of PR_PAC_APIAKEY, PR_PAC_APIBKEY, PR_PAC_APDAKEY
+and PR_PAC_APDBKEY specifying which keys shall be affected by this prctl,
+and the second being a bitmask of the same bits specifying whether the key
+should be enabled or disabled. For example::
+
+  prctl(PR_PAC_SET_ENABLED_KEYS,
+        PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY,
+        PR_PAC_APIBKEY, 0, 0);
+
+disables all keys except the IB key.
+
+The main reason why this is useful is to enable a userspace ABI that uses PAC
+instructions to sign and authenticate function pointers and other pointers
+exposed outside of the function, while still allowing binaries conforming to
+the ABI to interoperate with legacy binaries that do not sign or authenticate
+pointers.
+
+The idea is that a dynamic loader or early startup code would issue this
+prctl very early after establishing that a process may load legacy binaries,
+but before executing any PAC instructions.
+
+For compatibility with previous kernel versions, processes start up with IA,
+IB, DA and DB enabled, and are reset to this state on exec(). Processes created
+via fork() and clone() inherit the key enabled state from the calling process.
+
+It is recommended to avoid disabling the IA key, as this has higher performance
+overhead than disabling any of the other keys.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 82e38d1ca012..fb705ef10c05 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1485,6 +1485,7 @@ config ARM64_PTR_AUTH
 	depends on LD_IS_LLD || LD_VERSION >= 233010000 || (CC_IS_GCC && GCC_VERSION < 90100)
 	depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE
 	depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
+	select ARM64_NEED_SCTLR_USER
 	help
 	  Pointer authentication (part of the ARMv8.3 Extensions) provides
 	  instructions for signing and authenticating pointers against secret
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 4e807969e767..05ac68938a90 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -41,7 +41,7 @@ void mte_free_tag_storage(char *storage);
 
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
-void flush_mte_state(void);
+void mte_thread_init_user(void);
 void mte_suspend_exit(void);
 long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
@@ -61,7 +61,7 @@ static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
 static inline void mte_copy_page_tags(void *kto, const void *kfrom)
 {
 }
-static inline void flush_mte_state(void)
+static inline void mte_thread_init_user(void)
 {
 }
 static inline void mte_suspend_exit(void)
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index c6b4f0603024..1a85e25d98ba 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -3,6 +3,7 @@
 #define __ASM_POINTER_AUTH_H
 
 #include <linux/bitops.h>
+#include <linux/prctl.h>
 #include <linux/random.h>
 
 #include <asm/cpufeature.h>
@@ -71,13 +72,26 @@ static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kerne
 
 extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg);
 
+extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+				    unsigned long enabled);
+extern int ptrauth_get_enabled_keys(struct task_struct *tsk);
+
 static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 {
 	return ptrauth_clear_pac(ptr);
 }
 
-#define ptrauth_thread_init_user(tsk)					\
-	ptrauth_keys_init_user(&(tsk)->thread.keys_user)
+#define ptrauth_thread_init_user()                                             \
+	do {                                                                   \
+		ptrauth_keys_init_user(&current->thread.keys_user);            \
+									       \
+		/* enable all keys */                                          \
+		if (system_supports_address_auth())                            \
+			set_task_sctlr_el1(current->thread.sctlr_user |        \
+					   SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |   \
+					   SCTLR_ELx_ENDA | SCTLR_ELx_ENDB);   \
+	} while (0)
+
 #define ptrauth_thread_init_kernel(tsk)					\
 	ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
 #define ptrauth_thread_switch_kernel(tsk)				\
@@ -85,10 +99,15 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 
 #else /* CONFIG_ARM64_PTR_AUTH */
 #define ptrauth_prctl_reset_keys(tsk, arg)	(-EINVAL)
+#define ptrauth_set_enabled_keys(tsk, keys, enabled)	(-EINVAL)
+#define ptrauth_get_enabled_keys(tsk)	(-EINVAL)
 #define ptrauth_strip_insn_pac(lr)	(lr)
-#define ptrauth_thread_init_user(tsk)
+#define ptrauth_thread_init_user()
 #define ptrauth_thread_init_kernel(tsk)
 #define ptrauth_thread_switch_kernel(tsk)
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
+#define PR_PAC_ENABLED_KEYS_MASK                                               \
+	(PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
+
 #endif /* __ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 3287d8888b24..ff2616401cc2 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -158,7 +158,9 @@ struct thread_struct {
 #endif
 };
 
-#define SCTLR_USER_MASK SCTLR_EL1_TCF0_MASK
+#define SCTLR_USER_MASK                                                        \
+	(SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB |   \
+	 SCTLR_EL1_TCF0_MASK)
 
 static inline void arch_thread_struct_whitelist(unsigned long *offset,
 						unsigned long *size)
@@ -309,6 +311,11 @@ extern void __init minsigstksz_setup(void);
 /* PR_PAC_RESET_KEYS prctl */
 #define PAC_RESET_KEYS(tsk, arg)	ptrauth_prctl_reset_keys(tsk, arg)
 
+/* PR_PAC_{SET,GET}_ENABLED_KEYS prctl */
+#define PAC_SET_ENABLED_KEYS(tsk, keys, enabled)				\
+	ptrauth_set_enabled_keys(tsk, keys, enabled)
+#define PAC_GET_ENABLED_KEYS(tsk) ptrauth_get_enabled_keys(tsk)
+
 #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
 /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
 long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 8b5e7e5c3cc8..4255c99bce41 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -559,8 +559,10 @@
 #define SCTLR_ELx_TCF_ASYNC	(UL(0x2) << SCTLR_ELx_TCF_SHIFT)
 #define SCTLR_ELx_TCF_MASK	(UL(0x3) << SCTLR_ELx_TCF_SHIFT)
 
+#define SCTLR_ELx_ENIA_SHIFT	31
+
 #define SCTLR_ELx_ITFSB	(BIT(37))
-#define SCTLR_ELx_ENIA	(BIT(31))
+#define SCTLR_ELx_ENIA	(BIT(SCTLR_ELx_ENIA_SHIFT))
 #define SCTLR_ELx_ENIB	(BIT(30))
 #define SCTLR_ELx_ENDA	(BIT(27))
 #define SCTLR_ELx_EE    (BIT(25))
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f42fd9e33981..dc0907a0240c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -43,6 +43,9 @@ int main(void)
 #endif
   BLANK();
   DEFINE(THREAD_CPU_CONTEXT,	offsetof(struct task_struct, thread.cpu_context));
+#ifdef CONFIG_ARM64_NEED_SCTLR_USER
+  DEFINE(THREAD_SCTLR_USER,	offsetof(struct task_struct, thread.sctlr_user));
+#endif
 #ifdef CONFIG_ARM64_PTR_AUTH
   DEFINE(THREAD_KEYS_USER,	offsetof(struct task_struct, thread.keys_user));
   DEFINE(THREAD_KEYS_KERNEL,	offsetof(struct task_struct, thread.keys_kernel));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2a93fa5f4e49..33037121fd0d 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -247,7 +247,24 @@ alternative_else_nop_endif
 	check_mte_async_tcf x19, x22
 	apply_ssbd 1, x22, x23
 
-	ptrauth_keys_install_kernel tsk, x20, x22, x23
+	ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+alternative_if ARM64_HAS_ADDRESS_AUTH
+	/*
+	 * Enable IA for in-kernel PAC if the task had it disabled. Although
+	 * this could be implemented with an unconditional MRS which would avoid
+	 * a load, this was measured to be slower on Cortex-A75 and Cortex-A76.
+	 */
+	ldr	x0, [tsk, THREAD_SCTLR_USER]
+	tbnz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	mrs	x0, sctlr_el1
+	orr	x0, x0, SCTLR_ELx_ENIA
+	msr	sctlr_el1, x0
+1:
+	isb
+alternative_else_nop_endif
+#endif
 
 	mte_set_kernel_gcr x22, x23
 
@@ -351,9 +368,27 @@ alternative_else_nop_endif
 3:
 	scs_save tsk, x0
 
-	/* No kernel C function calls after this as user keys are set. */
+	/*
+	 * No kernel C function calls after this as user keys are set and IA may
+	 * be disabled.
+	 */
 	ptrauth_keys_install_user tsk, x0, x1, x2
 
+#ifdef CONFIG_ARM64_PTR_AUTH
+alternative_if ARM64_HAS_ADDRESS_AUTH
+	/*
+	 * IA was enabled for in-kernel PAC. Disable it now if needed.
+	 * All other per-task SCTLR bits were updated on task switch.
+	 */
+	ldr	x0, [tsk, THREAD_SCTLR_USER]
+	tbnz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	mrs	x0, sctlr_el1
+	bic	x0, x0, SCTLR_ELx_ENIA
+	msr	sctlr_el1, x0
+1:
+alternative_else_nop_endif
+#endif
+
 	mte_set_user_gcr tsk, x0, x1
 
 	apply_ssbd 0, x0, x1
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 48e8a75288a0..057ce3bbcf7c 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -180,7 +180,7 @@ static void set_gcr_el1_excl(u64 excl)
 	 */
 }
 
-void flush_mte_state(void)
+void mte_thread_init_user(void)
 {
 	if (!system_supports_mte())
 		return;
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index adb955fd9bdd..f03e5bfe4490 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -46,3 +46,65 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
 
 	return 0;
 }
+
+static u64 arg_to_enxx_mask(unsigned long arg)
+{
+	u64 sctlr_enxx_mask = 0;
+
+	WARN_ON(arg & ~PR_PAC_ENABLED_KEYS_MASK);
+	if (arg & PR_PAC_APIAKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENIA;
+	if (arg & PR_PAC_APIBKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENIB;
+	if (arg & PR_PAC_APDAKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENDA;
+	if (arg & PR_PAC_APDBKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENDB;
+	return sctlr_enxx_mask;
+}
+
+int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+			     unsigned long enabled)
+{
+	u64 sctlr = tsk->thread.sctlr_user;
+
+	if (!system_supports_address_auth())
+		return -EINVAL;
+
+	if (is_compat_thread(task_thread_info(tsk)))
+		return -EINVAL;
+
+	if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
+		return -EINVAL;
+
+	sctlr &= ~arg_to_enxx_mask(keys);
+	sctlr |= arg_to_enxx_mask(enabled);
+	if (tsk == current)
+		set_task_sctlr_el1(sctlr);
+	else
+		tsk->thread.sctlr_user = sctlr;
+
+	return 0;
+}
+
+int ptrauth_get_enabled_keys(struct task_struct *tsk)
+{
+	int retval = 0;
+
+	if (!system_supports_address_auth())
+		return -EINVAL;
+
+	if (is_compat_thread(task_thread_info(tsk)))
+		return -EINVAL;
+
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENIA)
+		retval |= PR_PAC_APIAKEY;
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENIB)
+		retval |= PR_PAC_APIBKEY;
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENDA)
+		retval |= PR_PAC_APDAKEY;
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENDB)
+		retval |= PR_PAC_APDBKEY;
+
+	return retval;
+}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 2c4b8194adae..a6b0edc67e41 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -339,7 +339,6 @@ void flush_thread(void)
 	tls_thread_flush();
 	flush_ptrace_hw_breakpoint(current);
 	flush_tagged_addr_state();
-	flush_mte_state();
 }
 
 void release_thread(struct task_struct *dead_task)
@@ -532,7 +531,11 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
 #ifdef CONFIG_ARM64_NEED_SCTLR_USER
 static void update_sctlr_el1(u64 sctlr)
 {
-	sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK, sctlr);
+	/*
+	 * EnIA must not be cleared while in the kernel as this is necessary for
+	 * in-kernel PAC. It will be cleared on kernel exit if needed.
+	 */
+	sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr);
 
 	/* ISB required for the kernel uaccess routines when setting TCF0. */
 	isb();
@@ -630,7 +633,8 @@ void arch_setup_new_exec(void)
 {
 	current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
 
-	ptrauth_thread_init_user(current);
+	ptrauth_thread_init_user();
+	mte_thread_init_user();
 
 	if (task_spec_ssb_noexec(current)) {
 		arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 8ac487c84e37..c2c82b7bbf8f 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -908,6 +908,38 @@ static int pac_mask_get(struct task_struct *target,
 	return membuf_write(&to, &uregs, sizeof(uregs));
 }
 
+static int pac_enabled_keys_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	long enabled_keys = ptrauth_get_enabled_keys(target);
+
+	if (IS_ERR_VALUE(enabled_keys))
+		return enabled_keys;
+
+	return membuf_write(&to, &enabled_keys, sizeof(enabled_keys));
+}
+
+static int pac_enabled_keys_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	long enabled_keys = ptrauth_get_enabled_keys(target);
+
+	if (IS_ERR_VALUE(enabled_keys))
+		return enabled_keys;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &enabled_keys, 0,
+				 sizeof(long));
+	if (ret)
+		return ret;
+
+	return ptrauth_set_enabled_keys(target, PR_PAC_ENABLED_KEYS_MASK,
+					enabled_keys);
+}
+
 #ifdef CONFIG_CHECKPOINT_RESTORE
 static __uint128_t pac_key_to_user(const struct ptrauth_key *key)
 {
@@ -1073,6 +1105,7 @@ enum aarch64_regset {
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
 	REGSET_PAC_MASK,
+	REGSET_PAC_ENABLED_KEYS,
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	REGSET_PACA_KEYS,
 	REGSET_PACG_KEYS,
@@ -1159,6 +1192,14 @@ static const struct user_regset aarch64_regsets[] = {
 		.regset_get = pac_mask_get,
 		/* this cannot be set dynamically */
 	},
+	[REGSET_PAC_ENABLED_KEYS] = {
+		.core_note_type = NT_ARM_PAC_ENABLED_KEYS,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = pac_enabled_keys_get,
+		.set = pac_enabled_keys_set,
+	},
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	[REGSET_PACA_KEYS] = {
 		.core_note_type = NT_ARM_PACA_KEYS,
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 30f68b42eeb5..61bf4774b8f2 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -426,6 +426,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_PACA_KEYS	0x407	/* ARM pointer authentication address keys */
 #define NT_ARM_PACG_KEYS	0x408	/* ARM pointer authentication generic key */
 #define NT_ARM_TAGGED_ADDR_CTRL	0x409	/* arm64 tagged address control (prctl()) */
+#define NT_ARM_PAC_ENABLED_KEYS	0x40a	/* arm64 ptr auth enabled keys (prctl()) */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 90deb41c8a34..affc96453319 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -252,4 +252,8 @@ struct prctl_mm_map {
 # define PR_SYS_DISPATCH_OFF		0
 # define PR_SYS_DISPATCH_ON		1
 
+/* Set/get enabled arm64 pointer authentication keys */
+#define PR_PAC_SET_ENABLED_KEYS		60
+#define PR_PAC_GET_ENABLED_KEYS		61
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 51f00fe20e4d..bb439f72cb8c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -120,6 +120,12 @@
 #ifndef PAC_RESET_KEYS
 # define PAC_RESET_KEYS(a, b)	(-EINVAL)
 #endif
+#ifndef PAC_SET_ENABLED_KEYS
+# define PAC_SET_ENABLED_KEYS(a, b, c)	(-EINVAL)
+#endif
+#ifndef PAC_GET_ENABLED_KEYS
+# define PAC_GET_ENABLED_KEYS(a)	(-EINVAL)
+#endif
 #ifndef SET_TAGGED_ADDR_CTRL
 # define SET_TAGGED_ADDR_CTRL(a)	(-EINVAL)
 #endif
@@ -2498,6 +2504,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			return -EINVAL;
 		error = PAC_RESET_KEYS(me, arg2);
 		break;
+	case PR_PAC_SET_ENABLED_KEYS:
+		if (arg4 || arg5)
+			return -EINVAL;
+		error = PAC_SET_ENABLED_KEYS(me, arg2, arg3);
+		break;
+	case PR_PAC_GET_ENABLED_KEYS:
+		if (arg2 || arg3 || arg4 || arg5)
+			return -EINVAL;
+		error = PAC_GET_ENABLED_KEYS(me);
+		break;
 	case PR_SET_TAGGED_ADDR_CTRL:
 		if (arg3 || arg4 || arg5)
 			return -EINVAL;
-- 
2.29.2.729.g45daf8777d-goog


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
  2020-12-30  6:59 ` Peter Collingbourne
@ 2020-12-30  6:59   ` Peter Collingbourne
  -1 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2020-12-30  6:59 UTC (permalink / raw)
  To: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer
  Cc: Peter Collingbourne, Linux ARM, Kevin Brodsky, Andrey Konovalov,
	Will Deacon, linux-api, libc-alpha

The kernel does not use any keys besides IA so we don't need to
install IB/DA/DB/GA on kernel exit if we arrange to install them
on task switch instead, which we can expect to happen an order of
magnitude less often.

Furthermore we can avoid installing the user IA in the case where the
user task has IA disabled and just leave the kernel IA installed. This
also lets us avoid needing to install IA on kernel entry.

On an Apple M1 under a hypervisor, the overhead of kernel entry/exit
has been measured to be reduced by 15.6ns in the case where IA is
enabled, and 31.9ns in the case where IA is disabled.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Ieddf6b580d23c9e0bed45a822dabe72d2ffc9a8e
---
 arch/arm64/include/asm/asm_pointer_auth.h | 20 +------------
 arch/arm64/include/asm/pointer_auth.h     | 36 ++++++++++++++++++-----
 arch/arm64/kernel/asm-offsets.c           |  4 ---
 arch/arm64/kernel/entry.S                 | 33 ++++++++++++---------
 arch/arm64/kernel/pointer_auth.c          |  1 +
 arch/arm64/kernel/process.c               |  1 +
 arch/arm64/kernel/suspend.c               |  3 +-
 7 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index 52dead2a8640..8ca2dc0661ee 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -13,30 +13,12 @@
  * so use the base value of ldp as thread.keys_user and offset as
  * thread.keys_user.ap*.
  */
-	.macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
+	.macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
 	mov	\tmp1, #THREAD_KEYS_USER
 	add	\tmp1, \tsk, \tmp1
-alternative_if_not ARM64_HAS_ADDRESS_AUTH
-	b	.Laddr_auth_skip_\@
-alternative_else_nop_endif
 	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA]
 	msr_s	SYS_APIAKEYLO_EL1, \tmp2
 	msr_s	SYS_APIAKEYHI_EL1, \tmp3
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIB]
-	msr_s	SYS_APIBKEYLO_EL1, \tmp2
-	msr_s	SYS_APIBKEYHI_EL1, \tmp3
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDA]
-	msr_s	SYS_APDAKEYLO_EL1, \tmp2
-	msr_s	SYS_APDAKEYHI_EL1, \tmp3
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDB]
-	msr_s	SYS_APDBKEYLO_EL1, \tmp2
-	msr_s	SYS_APDBKEYHI_EL1, \tmp3
-.Laddr_auth_skip_\@:
-alternative_if ARM64_HAS_GENERIC_AUTH
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APGA]
-	msr_s	SYS_APGAKEYLO_EL1, \tmp2
-	msr_s	SYS_APGAKEYHI_EL1, \tmp3
-alternative_else_nop_endif
 	.endm
 
 	.macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index 1a85e25d98ba..3e40fc776ea3 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -35,6 +35,25 @@ struct ptrauth_keys_kernel {
 	struct ptrauth_key apia;
 };
 
+#define __ptrauth_key_install_nosync(k, v)			\
+do {								\
+	struct ptrauth_key __pki_v = (v);			\
+	write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1);	\
+	write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1);	\
+} while (0)
+
+static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys)
+{
+	if (system_supports_address_auth()) {
+		__ptrauth_key_install_nosync(APIB, keys->apib);
+		__ptrauth_key_install_nosync(APDA, keys->apda);
+		__ptrauth_key_install_nosync(APDB, keys->apdb);
+	}
+
+	if (system_supports_generic_auth())
+		__ptrauth_key_install_nosync(APGA, keys->apga);
+}
+
 static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
 {
 	if (system_supports_address_auth()) {
@@ -46,14 +65,9 @@ static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
 
 	if (system_supports_generic_auth())
 		get_random_bytes(&keys->apga, sizeof(keys->apga));
-}
 
-#define __ptrauth_key_install_nosync(k, v)			\
-do {								\
-	struct ptrauth_key __pki_v = (v);			\
-	write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1);	\
-	write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1);	\
-} while (0)
+	ptrauth_keys_install_user(keys);
+}
 
 static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys)
 {
@@ -81,6 +95,9 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 	return ptrauth_clear_pac(ptr);
 }
 
+#define ptrauth_suspend_exit()                                                 \
+	ptrauth_keys_install_user(&current->thread.keys_user)
+
 #define ptrauth_thread_init_user()                                             \
 	do {                                                                   \
 		ptrauth_keys_init_user(&current->thread.keys_user);            \
@@ -92,6 +109,9 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 					   SCTLR_ELx_ENDA | SCTLR_ELx_ENDB);   \
 	} while (0)
 
+#define ptrauth_thread_switch_user(tsk)                                        \
+	ptrauth_keys_install_user(&(tsk)->thread.keys_user)
+
 #define ptrauth_thread_init_kernel(tsk)					\
 	ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
 #define ptrauth_thread_switch_kernel(tsk)				\
@@ -102,8 +122,10 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 #define ptrauth_set_enabled_keys(tsk, keys, enabled)	(-EINVAL)
 #define ptrauth_get_enabled_keys(tsk)	(-EINVAL)
 #define ptrauth_strip_insn_pac(lr)	(lr)
+#define ptrauth_suspend_exit()
 #define ptrauth_thread_init_user()
 #define ptrauth_thread_init_kernel(tsk)
+#define ptrauth_thread_switch_user(tsk)
 #define ptrauth_thread_switch_kernel(tsk)
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index dc0907a0240c..2bc21a71abcf 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -147,10 +147,6 @@ int main(void)
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
   DEFINE(PTRAUTH_USER_KEY_APIA,		offsetof(struct ptrauth_keys_user, apia));
-  DEFINE(PTRAUTH_USER_KEY_APIB,		offsetof(struct ptrauth_keys_user, apib));
-  DEFINE(PTRAUTH_USER_KEY_APDA,		offsetof(struct ptrauth_keys_user, apda));
-  DEFINE(PTRAUTH_USER_KEY_APDB,		offsetof(struct ptrauth_keys_user, apdb));
-  DEFINE(PTRAUTH_USER_KEY_APGA,		offsetof(struct ptrauth_keys_user, apga));
   DEFINE(PTRAUTH_KERNEL_KEY_APIA,	offsetof(struct ptrauth_keys_kernel, apia));
   BLANK();
 #endif
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 33037121fd0d..933db3f30f7a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -247,21 +247,26 @@ alternative_else_nop_endif
 	check_mte_async_tcf x19, x22
 	apply_ssbd 1, x22, x23
 
-	ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
-
 #ifdef CONFIG_ARM64_PTR_AUTH
 alternative_if ARM64_HAS_ADDRESS_AUTH
 	/*
 	 * Enable IA for in-kernel PAC if the task had it disabled. Although
 	 * this could be implemented with an unconditional MRS which would avoid
 	 * a load, this was measured to be slower on Cortex-A75 and Cortex-A76.
+	 *
+	 * Install the kernel IA key only if IA was enabled in the task. If IA
+	 * was disabled on kernel exit then we would have left the kernel IA
+	 * installed so there is no need to install it again.
 	 */
 	ldr	x0, [tsk, THREAD_SCTLR_USER]
-	tbnz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	tbz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	__ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
+	b	2f
+1:
 	mrs	x0, sctlr_el1
 	orr	x0, x0, SCTLR_ELx_ENIA
 	msr	sctlr_el1, x0
-1:
+2:
 	isb
 alternative_else_nop_endif
 #endif
@@ -368,24 +373,24 @@ alternative_else_nop_endif
 3:
 	scs_save tsk, x0
 
-	/*
-	 * No kernel C function calls after this as user keys are set and IA may
-	 * be disabled.
-	 */
-	ptrauth_keys_install_user tsk, x0, x1, x2
-
 #ifdef CONFIG_ARM64_PTR_AUTH
 alternative_if ARM64_HAS_ADDRESS_AUTH
 	/*
-	 * IA was enabled for in-kernel PAC. Disable it now if needed.
-	 * All other per-task SCTLR bits were updated on task switch.
+	 * IA was enabled for in-kernel PAC. Disable it now if needed, or
+	 * alternatively install the user's IA. All other per-task keys and
+	 * SCTLR bits were updated on task switch.
+	 *
+	 * No kernel C function calls after this.
 	 */
 	ldr	x0, [tsk, THREAD_SCTLR_USER]
-	tbnz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	tbz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	__ptrauth_keys_install_user tsk, x0, x1, x2
+	b	2f
+1:
 	mrs	x0, sctlr_el1
 	bic	x0, x0, SCTLR_ELx_ENIA
 	msr	sctlr_el1, x0
-1:
+2:
 alternative_else_nop_endif
 #endif
 
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index f03e5bfe4490..60901ab0a7fe 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -43,6 +43,7 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
 		get_random_bytes(&keys->apdb, sizeof(keys->apdb));
 	if (arg & PR_PAC_APGAKEY)
 		get_random_bytes(&keys->apga, sizeof(keys->apga));
+	ptrauth_keys_install_user(keys);
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a6b0edc67e41..c978876ce07f 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -570,6 +570,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	entry_task_switch(next);
 	ssbs_thread_switch(next);
 	erratum_1418040_thread_switch(prev, next);
+	ptrauth_thread_switch_user(next);
 
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index a67b37a7a47e..c5a6614a41f9 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -74,8 +74,9 @@ void notrace __cpu_suspend_exit(void)
 	 */
 	spectre_v4_enable_mitigation(NULL);
 
-	/* Restore additional MTE-specific configuration */
+	/* Restore additional feature-specific configuration */
 	mte_suspend_exit();
+	ptrauth_suspend_exit();
 }
 
 /*
-- 
2.29.2.729.g45daf8777d-goog


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
@ 2020-12-30  6:59   ` Peter Collingbourne
  0 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2020-12-30  6:59 UTC (permalink / raw)
  To: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer
  Cc: libc-alpha, Peter Collingbourne, Andrey Konovalov, Kevin Brodsky,
	linux-api, Will Deacon, Linux ARM

The kernel does not use any keys besides IA so we don't need to
install IB/DA/DB/GA on kernel exit if we arrange to install them
on task switch instead, which we can expect to happen an order of
magnitude less often.

Furthermore we can avoid installing the user IA in the case where the
user task has IA disabled and just leave the kernel IA installed. This
also lets us avoid needing to install IA on kernel entry.

On an Apple M1 under a hypervisor, the overhead of kernel entry/exit
has been measured to be reduced by 15.6ns in the case where IA is
enabled, and 31.9ns in the case where IA is disabled.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Ieddf6b580d23c9e0bed45a822dabe72d2ffc9a8e
---
 arch/arm64/include/asm/asm_pointer_auth.h | 20 +------------
 arch/arm64/include/asm/pointer_auth.h     | 36 ++++++++++++++++++-----
 arch/arm64/kernel/asm-offsets.c           |  4 ---
 arch/arm64/kernel/entry.S                 | 33 ++++++++++++---------
 arch/arm64/kernel/pointer_auth.c          |  1 +
 arch/arm64/kernel/process.c               |  1 +
 arch/arm64/kernel/suspend.c               |  3 +-
 7 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index 52dead2a8640..8ca2dc0661ee 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -13,30 +13,12 @@
  * so use the base value of ldp as thread.keys_user and offset as
  * thread.keys_user.ap*.
  */
-	.macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
+	.macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
 	mov	\tmp1, #THREAD_KEYS_USER
 	add	\tmp1, \tsk, \tmp1
-alternative_if_not ARM64_HAS_ADDRESS_AUTH
-	b	.Laddr_auth_skip_\@
-alternative_else_nop_endif
 	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA]
 	msr_s	SYS_APIAKEYLO_EL1, \tmp2
 	msr_s	SYS_APIAKEYHI_EL1, \tmp3
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIB]
-	msr_s	SYS_APIBKEYLO_EL1, \tmp2
-	msr_s	SYS_APIBKEYHI_EL1, \tmp3
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDA]
-	msr_s	SYS_APDAKEYLO_EL1, \tmp2
-	msr_s	SYS_APDAKEYHI_EL1, \tmp3
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDB]
-	msr_s	SYS_APDBKEYLO_EL1, \tmp2
-	msr_s	SYS_APDBKEYHI_EL1, \tmp3
-.Laddr_auth_skip_\@:
-alternative_if ARM64_HAS_GENERIC_AUTH
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APGA]
-	msr_s	SYS_APGAKEYLO_EL1, \tmp2
-	msr_s	SYS_APGAKEYHI_EL1, \tmp3
-alternative_else_nop_endif
 	.endm
 
 	.macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index 1a85e25d98ba..3e40fc776ea3 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -35,6 +35,25 @@ struct ptrauth_keys_kernel {
 	struct ptrauth_key apia;
 };
 
+#define __ptrauth_key_install_nosync(k, v)			\
+do {								\
+	struct ptrauth_key __pki_v = (v);			\
+	write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1);	\
+	write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1);	\
+} while (0)
+
+static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys)
+{
+	if (system_supports_address_auth()) {
+		__ptrauth_key_install_nosync(APIB, keys->apib);
+		__ptrauth_key_install_nosync(APDA, keys->apda);
+		__ptrauth_key_install_nosync(APDB, keys->apdb);
+	}
+
+	if (system_supports_generic_auth())
+		__ptrauth_key_install_nosync(APGA, keys->apga);
+}
+
 static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
 {
 	if (system_supports_address_auth()) {
@@ -46,14 +65,9 @@ static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
 
 	if (system_supports_generic_auth())
 		get_random_bytes(&keys->apga, sizeof(keys->apga));
-}
 
-#define __ptrauth_key_install_nosync(k, v)			\
-do {								\
-	struct ptrauth_key __pki_v = (v);			\
-	write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1);	\
-	write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1);	\
-} while (0)
+	ptrauth_keys_install_user(keys);
+}
 
 static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys)
 {
@@ -81,6 +95,9 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 	return ptrauth_clear_pac(ptr);
 }
 
+#define ptrauth_suspend_exit()                                                 \
+	ptrauth_keys_install_user(&current->thread.keys_user)
+
 #define ptrauth_thread_init_user()                                             \
 	do {                                                                   \
 		ptrauth_keys_init_user(&current->thread.keys_user);            \
@@ -92,6 +109,9 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 					   SCTLR_ELx_ENDA | SCTLR_ELx_ENDB);   \
 	} while (0)
 
+#define ptrauth_thread_switch_user(tsk)                                        \
+	ptrauth_keys_install_user(&(tsk)->thread.keys_user)
+
 #define ptrauth_thread_init_kernel(tsk)					\
 	ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
 #define ptrauth_thread_switch_kernel(tsk)				\
@@ -102,8 +122,10 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 #define ptrauth_set_enabled_keys(tsk, keys, enabled)	(-EINVAL)
 #define ptrauth_get_enabled_keys(tsk)	(-EINVAL)
 #define ptrauth_strip_insn_pac(lr)	(lr)
+#define ptrauth_suspend_exit()
 #define ptrauth_thread_init_user()
 #define ptrauth_thread_init_kernel(tsk)
+#define ptrauth_thread_switch_user(tsk)
 #define ptrauth_thread_switch_kernel(tsk)
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index dc0907a0240c..2bc21a71abcf 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -147,10 +147,6 @@ int main(void)
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
   DEFINE(PTRAUTH_USER_KEY_APIA,		offsetof(struct ptrauth_keys_user, apia));
-  DEFINE(PTRAUTH_USER_KEY_APIB,		offsetof(struct ptrauth_keys_user, apib));
-  DEFINE(PTRAUTH_USER_KEY_APDA,		offsetof(struct ptrauth_keys_user, apda));
-  DEFINE(PTRAUTH_USER_KEY_APDB,		offsetof(struct ptrauth_keys_user, apdb));
-  DEFINE(PTRAUTH_USER_KEY_APGA,		offsetof(struct ptrauth_keys_user, apga));
   DEFINE(PTRAUTH_KERNEL_KEY_APIA,	offsetof(struct ptrauth_keys_kernel, apia));
   BLANK();
 #endif
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 33037121fd0d..933db3f30f7a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -247,21 +247,26 @@ alternative_else_nop_endif
 	check_mte_async_tcf x19, x22
 	apply_ssbd 1, x22, x23
 
-	ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
-
 #ifdef CONFIG_ARM64_PTR_AUTH
 alternative_if ARM64_HAS_ADDRESS_AUTH
 	/*
 	 * Enable IA for in-kernel PAC if the task had it disabled. Although
 	 * this could be implemented with an unconditional MRS which would avoid
 	 * a load, this was measured to be slower on Cortex-A75 and Cortex-A76.
+	 *
+	 * Install the kernel IA key only if IA was enabled in the task. If IA
+	 * was disabled on kernel exit then we would have left the kernel IA
+	 * installed so there is no need to install it again.
 	 */
 	ldr	x0, [tsk, THREAD_SCTLR_USER]
-	tbnz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	tbz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	__ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
+	b	2f
+1:
 	mrs	x0, sctlr_el1
 	orr	x0, x0, SCTLR_ELx_ENIA
 	msr	sctlr_el1, x0
-1:
+2:
 	isb
 alternative_else_nop_endif
 #endif
@@ -368,24 +373,24 @@ alternative_else_nop_endif
 3:
 	scs_save tsk, x0
 
-	/*
-	 * No kernel C function calls after this as user keys are set and IA may
-	 * be disabled.
-	 */
-	ptrauth_keys_install_user tsk, x0, x1, x2
-
 #ifdef CONFIG_ARM64_PTR_AUTH
 alternative_if ARM64_HAS_ADDRESS_AUTH
 	/*
-	 * IA was enabled for in-kernel PAC. Disable it now if needed.
-	 * All other per-task SCTLR bits were updated on task switch.
+	 * IA was enabled for in-kernel PAC. Disable it now if needed, or
+	 * alternatively install the user's IA. All other per-task keys and
+	 * SCTLR bits were updated on task switch.
+	 *
+	 * No kernel C function calls after this.
 	 */
 	ldr	x0, [tsk, THREAD_SCTLR_USER]
-	tbnz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	tbz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	__ptrauth_keys_install_user tsk, x0, x1, x2
+	b	2f
+1:
 	mrs	x0, sctlr_el1
 	bic	x0, x0, SCTLR_ELx_ENIA
 	msr	sctlr_el1, x0
-1:
+2:
 alternative_else_nop_endif
 #endif
 
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index f03e5bfe4490..60901ab0a7fe 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -43,6 +43,7 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
 		get_random_bytes(&keys->apdb, sizeof(keys->apdb));
 	if (arg & PR_PAC_APGAKEY)
 		get_random_bytes(&keys->apga, sizeof(keys->apga));
+	ptrauth_keys_install_user(keys);
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a6b0edc67e41..c978876ce07f 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -570,6 +570,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	entry_task_switch(next);
 	ssbs_thread_switch(next);
 	erratum_1418040_thread_switch(prev, next);
+	ptrauth_thread_switch_user(next);
 
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index a67b37a7a47e..c5a6614a41f9 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -74,8 +74,9 @@ void notrace __cpu_suspend_exit(void)
 	 */
 	spectre_v4_enable_mitigation(NULL);
 
-	/* Restore additional MTE-specific configuration */
+	/* Restore additional feature-specific configuration */
 	mte_suspend_exit();
+	ptrauth_suspend_exit();
 }
 
 /*
-- 
2.29.2.729.g45daf8777d-goog


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 2/3] arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)
  2020-12-30  6:59   ` Peter Collingbourne
@ 2021-01-26 12:49     ` Will Deacon
  -1 siblings, 0 replies; 22+ messages in thread
From: Will Deacon @ 2021-01-26 12:49 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer,
	Linux ARM, Kevin Brodsky, Andrey Konovalov, linux-api,
	libc-alpha

On Tue, Dec 29, 2020 at 10:59:14PM -0800, Peter Collingbourne wrote:
> This change introduces a prctl that allows the user program to control
> which PAC keys are enabled in a particular task. The main reason
> why this is useful is to enable a userspace ABI that uses PAC to
> sign and authenticate function pointers and other pointers exposed
> outside of the function, while still allowing binaries conforming
> to the ABI to interoperate with legacy binaries that do not sign or
> authenticate pointers.
> 
> The idea is that a dynamic loader or early startup code would issue
> this prctl very early after establishing that a process may load legacy
> binaries, but before executing any PAC instructions.
> 
> This change adds a small amount of overhead to kernel entry and exit
> due to additional required instruction sequences.
> 
> On a DragonBoard 845c (Cortex-A75) with the powersave governor, the
> overhead of similar instruction sequences was measured as 4.9ns when
> simulating the common case where IA is left enabled, or 43.7ns when
> simulating the uncommon case where IA is disabled. These numbers can
> be seen as the worst case scenario, since in more realistic scenarios
> a better performing governor would be used and a newer chip would be
> used that would support PAC unlike Cortex-A75 and would be expected
> to be faster than Cortex-A75.
> 
> On an Apple M1 under a hypervisor, the overhead of the entry/exit
> instruction sequences introduced by this patch was measured as 0.3ns
> in the case where IA is left enabled, and 33.0ns in the case where
> IA is disabled.
> 
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Reviewed-by: Dave Martin <Dave.Martin@arm.com>
> Link: https://linux-review.googlesource.com/id/Ibc41a5e6a76b275efbaa126b31119dc197b927a5

[...]

> diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
> index adb955fd9bdd..f03e5bfe4490 100644
> --- a/arch/arm64/kernel/pointer_auth.c
> +++ b/arch/arm64/kernel/pointer_auth.c
> @@ -46,3 +46,65 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
>  
>  	return 0;
>  }
> +
> +static u64 arg_to_enxx_mask(unsigned long arg)
> +{
> +	u64 sctlr_enxx_mask = 0;
> +
> +	WARN_ON(arg & ~PR_PAC_ENABLED_KEYS_MASK);
> +	if (arg & PR_PAC_APIAKEY)
> +		sctlr_enxx_mask |= SCTLR_ELx_ENIA;
> +	if (arg & PR_PAC_APIBKEY)
> +		sctlr_enxx_mask |= SCTLR_ELx_ENIB;
> +	if (arg & PR_PAC_APDAKEY)
> +		sctlr_enxx_mask |= SCTLR_ELx_ENDA;
> +	if (arg & PR_PAC_APDBKEY)
> +		sctlr_enxx_mask |= SCTLR_ELx_ENDB;
> +	return sctlr_enxx_mask;
> +}
> +
> +int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
> +			     unsigned long enabled)
> +{
> +	u64 sctlr = tsk->thread.sctlr_user;
> +
> +	if (!system_supports_address_auth())
> +		return -EINVAL;
> +
> +	if (is_compat_thread(task_thread_info(tsk)))
> +		return -EINVAL;
> +
> +	if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
> +		return -EINVAL;
> +
> +	sctlr &= ~arg_to_enxx_mask(keys);
> +	sctlr |= arg_to_enxx_mask(enabled);
> +	if (tsk == current)
> +		set_task_sctlr_el1(sctlr);
> +	else
> +		tsk->thread.sctlr_user = sctlr;

Who synchronizes all these modifications to 'sctlr_user'? Seems like it gets
hit by two independent prctl()s as well as ptrace.

Will

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 2/3] arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)
@ 2021-01-26 12:49     ` Will Deacon
  0 siblings, 0 replies; 22+ messages in thread
From: Will Deacon @ 2021-01-26 12:49 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: libc-alpha, Szabolcs Nagy, Catalin Marinas, Kevin Brodsky,
	linux-api, Kostya Serebryany, Florian Weimer, Linux ARM,
	Andrey Konovalov, Vincenzo Frascino, Dave Martin,
	Evgenii Stepanov

On Tue, Dec 29, 2020 at 10:59:14PM -0800, Peter Collingbourne wrote:
> This change introduces a prctl that allows the user program to control
> which PAC keys are enabled in a particular task. The main reason
> why this is useful is to enable a userspace ABI that uses PAC to
> sign and authenticate function pointers and other pointers exposed
> outside of the function, while still allowing binaries conforming
> to the ABI to interoperate with legacy binaries that do not sign or
> authenticate pointers.
> 
> The idea is that a dynamic loader or early startup code would issue
> this prctl very early after establishing that a process may load legacy
> binaries, but before executing any PAC instructions.
> 
> This change adds a small amount of overhead to kernel entry and exit
> due to additional required instruction sequences.
> 
> On a DragonBoard 845c (Cortex-A75) with the powersave governor, the
> overhead of similar instruction sequences was measured as 4.9ns when
> simulating the common case where IA is left enabled, or 43.7ns when
> simulating the uncommon case where IA is disabled. These numbers can
> be seen as the worst case scenario, since in more realistic scenarios
> a better performing governor would be used and a newer chip would be
> used that would support PAC unlike Cortex-A75 and would be expected
> to be faster than Cortex-A75.
> 
> On an Apple M1 under a hypervisor, the overhead of the entry/exit
> instruction sequences introduced by this patch was measured as 0.3ns
> in the case where IA is left enabled, and 33.0ns in the case where
> IA is disabled.
> 
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Reviewed-by: Dave Martin <Dave.Martin@arm.com>
> Link: https://linux-review.googlesource.com/id/Ibc41a5e6a76b275efbaa126b31119dc197b927a5

[...]

> diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
> index adb955fd9bdd..f03e5bfe4490 100644
> --- a/arch/arm64/kernel/pointer_auth.c
> +++ b/arch/arm64/kernel/pointer_auth.c
> @@ -46,3 +46,65 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
>  
>  	return 0;
>  }
> +
> +static u64 arg_to_enxx_mask(unsigned long arg)
> +{
> +	u64 sctlr_enxx_mask = 0;
> +
> +	WARN_ON(arg & ~PR_PAC_ENABLED_KEYS_MASK);
> +	if (arg & PR_PAC_APIAKEY)
> +		sctlr_enxx_mask |= SCTLR_ELx_ENIA;
> +	if (arg & PR_PAC_APIBKEY)
> +		sctlr_enxx_mask |= SCTLR_ELx_ENIB;
> +	if (arg & PR_PAC_APDAKEY)
> +		sctlr_enxx_mask |= SCTLR_ELx_ENDA;
> +	if (arg & PR_PAC_APDBKEY)
> +		sctlr_enxx_mask |= SCTLR_ELx_ENDB;
> +	return sctlr_enxx_mask;
> +}
> +
> +int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
> +			     unsigned long enabled)
> +{
> +	u64 sctlr = tsk->thread.sctlr_user;
> +
> +	if (!system_supports_address_auth())
> +		return -EINVAL;
> +
> +	if (is_compat_thread(task_thread_info(tsk)))
> +		return -EINVAL;
> +
> +	if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
> +		return -EINVAL;
> +
> +	sctlr &= ~arg_to_enxx_mask(keys);
> +	sctlr |= arg_to_enxx_mask(enabled);
> +	if (tsk == current)
> +		set_task_sctlr_el1(sctlr);
> +	else
> +		tsk->thread.sctlr_user = sctlr;

Who synchronizes all these modifications to 'sctlr_user'? Seems like it gets
hit by two independent prctl()s as well as ptrace.

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere
  2020-12-30  6:59 ` Peter Collingbourne
@ 2021-01-26 12:49   ` Will Deacon
  -1 siblings, 0 replies; 22+ messages in thread
From: Will Deacon @ 2021-01-26 12:49 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer,
	Linux ARM, Kevin Brodsky, Andrey Konovalov, linux-api,
	libc-alpha

On Tue, Dec 29, 2020 at 10:59:13PM -0800, Peter Collingbourne wrote:
> In an upcoming change we are going to introduce per-task SCTLR_EL1
> bits for PAC. Move the existing per-task SCTLR_EL1 field out of the
> MTE-specific code so that we will be able to use it from both the
> PAC and MTE code paths and make the task switching code more efficient.
> 
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Link: https://linux-review.googlesource.com/id/Ic65fac78a7926168fa68f9e8da591c9e04ff7278
> ---
>  arch/arm64/Kconfig                 |  4 +++
>  arch/arm64/include/asm/mte.h       |  4 ---
>  arch/arm64/include/asm/processor.h | 10 ++++++-
>  arch/arm64/kernel/mte.c            | 47 ++++++------------------------
>  arch/arm64/kernel/process.c        | 34 +++++++++++++++++----
>  5 files changed, 50 insertions(+), 49 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 05e17351e4f3..82e38d1ca012 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -348,6 +348,9 @@ config KASAN_SHADOW_OFFSET
>  	default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
>  	default 0xffffffffffffffff
>  
> +config ARM64_NEED_SCTLR_USER
> +	bool
> +
>  source "arch/arm64/Kconfig.platforms"
>  
>  menu "Kernel Features"
> @@ -1653,6 +1656,7 @@ config ARM64_MTE
>  	# Required for tag checking in the uaccess routines
>  	depends on ARM64_PAN
>  	select ARCH_USES_HIGH_VMA_FLAGS
> +	select ARM64_NEED_SCTLR_USER

I'm not sure I'd bother with this; is it the end of the world if we do this
unconditionally?

Will

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere
@ 2021-01-26 12:49   ` Will Deacon
  0 siblings, 0 replies; 22+ messages in thread
From: Will Deacon @ 2021-01-26 12:49 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: libc-alpha, Szabolcs Nagy, Catalin Marinas, Kevin Brodsky,
	linux-api, Kostya Serebryany, Florian Weimer, Linux ARM,
	Andrey Konovalov, Vincenzo Frascino, Dave Martin,
	Evgenii Stepanov

On Tue, Dec 29, 2020 at 10:59:13PM -0800, Peter Collingbourne wrote:
> In an upcoming change we are going to introduce per-task SCTLR_EL1
> bits for PAC. Move the existing per-task SCTLR_EL1 field out of the
> MTE-specific code so that we will be able to use it from both the
> PAC and MTE code paths and make the task switching code more efficient.
> 
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Link: https://linux-review.googlesource.com/id/Ic65fac78a7926168fa68f9e8da591c9e04ff7278
> ---
>  arch/arm64/Kconfig                 |  4 +++
>  arch/arm64/include/asm/mte.h       |  4 ---
>  arch/arm64/include/asm/processor.h | 10 ++++++-
>  arch/arm64/kernel/mte.c            | 47 ++++++------------------------
>  arch/arm64/kernel/process.c        | 34 +++++++++++++++++----
>  5 files changed, 50 insertions(+), 49 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 05e17351e4f3..82e38d1ca012 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -348,6 +348,9 @@ config KASAN_SHADOW_OFFSET
>  	default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
>  	default 0xffffffffffffffff
>  
> +config ARM64_NEED_SCTLR_USER
> +	bool
> +
>  source "arch/arm64/Kconfig.platforms"
>  
>  menu "Kernel Features"
> @@ -1653,6 +1656,7 @@ config ARM64_MTE
>  	# Required for tag checking in the uaccess routines
>  	depends on ARM64_PAN
>  	select ARCH_USES_HIGH_VMA_FLAGS
> +	select ARM64_NEED_SCTLR_USER

I'm not sure I'd bother with this; is it the end of the world if we do this
unconditionally?

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
  2020-12-30  6:59   ` Peter Collingbourne
@ 2021-01-26 13:09     ` Will Deacon
  -1 siblings, 0 replies; 22+ messages in thread
From: Will Deacon @ 2021-01-26 13:09 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer,
	Linux ARM, Kevin Brodsky, Andrey Konovalov, linux-api,
	libc-alpha

On Tue, Dec 29, 2020 at 10:59:15PM -0800, Peter Collingbourne wrote:
> The kernel does not use any keys besides IA so we don't need to
> install IB/DA/DB/GA on kernel exit if we arrange to install them
> on task switch instead, which we can expect to happen an order of
> magnitude less often.
> 
> Furthermore we can avoid installing the user IA in the case where the
> user task has IA disabled and just leave the kernel IA installed. This
> also lets us avoid needing to install IA on kernel entry.

I've got to be honest, this makes me nervous in case there is a way for
userspace to recover the kernel key even though EnIA is clear. Currently,
EnIA doesn't affect XPAC* and PACGA instructions, and the architecture
clearly expects us to be switching these things:

  | Note
  | Keys are not banked by Exception level. Arm expects software to switch the
  | keys between Exception levels, typically by swapping the values with zero
  | so that the current key values are not present in memo

But then:

> On an Apple M1 under a hypervisor, the overhead of kernel entry/exit
> has been measured to be reduced by 15.6ns in the case where IA is
> enabled, and 31.9ns in the case where IA is disabled.

That's a good improvement, so this feels like its worth doing. I suppose all we
can do is keep an eye on the architecture in case any future extensions mean
the approach taken here is dangerous.

Will

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
@ 2021-01-26 13:09     ` Will Deacon
  0 siblings, 0 replies; 22+ messages in thread
From: Will Deacon @ 2021-01-26 13:09 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: libc-alpha, Szabolcs Nagy, Catalin Marinas, Kevin Brodsky,
	linux-api, Kostya Serebryany, Florian Weimer, Linux ARM,
	Andrey Konovalov, Vincenzo Frascino, Dave Martin,
	Evgenii Stepanov

On Tue, Dec 29, 2020 at 10:59:15PM -0800, Peter Collingbourne wrote:
> The kernel does not use any keys besides IA so we don't need to
> install IB/DA/DB/GA on kernel exit if we arrange to install them
> on task switch instead, which we can expect to happen an order of
> magnitude less often.
> 
> Furthermore we can avoid installing the user IA in the case where the
> user task has IA disabled and just leave the kernel IA installed. This
> also lets us avoid needing to install IA on kernel entry.

I've got to be honest, this makes me nervous in case there is a way for
userspace to recover the kernel key even though EnIA is clear. Currently,
EnIA doesn't affect XPAC* and PACGA instructions, and the architecture
clearly expects us to be switching these things:

  | Note
  | Keys are not banked by Exception level. Arm expects software to switch the
  | keys between Exception levels, typically by swapping the values with zero
  | so that the current key values are not present in memo

But then:

> On an Apple M1 under a hypervisor, the overhead of kernel entry/exit
> has been measured to be reduced by 15.6ns in the case where IA is
> enabled, and 31.9ns in the case where IA is disabled.

That's a good improvement, so this feels like its worth doing. I suppose all we
can do is keep an eye on the architecture in case any future extensions mean
the approach taken here is dangerous.

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere
  2021-01-26 12:49   ` Will Deacon
@ 2021-02-12  4:47     ` Peter Collingbourne
  -1 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2021-02-12  4:47 UTC (permalink / raw)
  To: Will Deacon
  Cc: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer,
	Linux ARM, Kevin Brodsky, Andrey Konovalov, Linux API,
	libc-alpha

On Tue, Jan 26, 2021 at 4:50 AM Will Deacon <will@kernel.org> wrote:
>
> On Tue, Dec 29, 2020 at 10:59:13PM -0800, Peter Collingbourne wrote:
> > In an upcoming change we are going to introduce per-task SCTLR_EL1
> > bits for PAC. Move the existing per-task SCTLR_EL1 field out of the
> > MTE-specific code so that we will be able to use it from both the
> > PAC and MTE code paths and make the task switching code more efficient.
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > Link: https://linux-review.googlesource.com/id/Ic65fac78a7926168fa68f9e8da591c9e04ff7278
> > ---
> >  arch/arm64/Kconfig                 |  4 +++
> >  arch/arm64/include/asm/mte.h       |  4 ---
> >  arch/arm64/include/asm/processor.h | 10 ++++++-
> >  arch/arm64/kernel/mte.c            | 47 ++++++------------------------
> >  arch/arm64/kernel/process.c        | 34 +++++++++++++++++----
> >  5 files changed, 50 insertions(+), 49 deletions(-)
> >
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 05e17351e4f3..82e38d1ca012 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -348,6 +348,9 @@ config KASAN_SHADOW_OFFSET
> >       default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
> >       default 0xffffffffffffffff
> >
> > +config ARM64_NEED_SCTLR_USER
> > +     bool
> > +
> >  source "arch/arm64/Kconfig.platforms"
> >
> >  menu "Kernel Features"
> > @@ -1653,6 +1656,7 @@ config ARM64_MTE
> >       # Required for tag checking in the uaccess routines
> >       depends on ARM64_PAN
> >       select ARCH_USES_HIGH_VMA_FLAGS
> > +     select ARM64_NEED_SCTLR_USER
>
> I'm not sure I'd bother with this; is it the end of the world if we do this
> unconditionally?

I think I'd be fine with doing it unconditionally. If both PAC and MTE
are disabled then I believe that the only additional code that we
would end up executing is the comparison between prev and next
sctlr_user on task switch in process.c (which would always yield
false) and since task switch is much less common than entry/exit it
seems acceptable to me.

Peter

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere
@ 2021-02-12  4:47     ` Peter Collingbourne
  0 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2021-02-12  4:47 UTC (permalink / raw)
  To: Will Deacon
  Cc: libc-alpha, Szabolcs Nagy, Catalin Marinas, Kevin Brodsky,
	Linux API, Kostya Serebryany, Florian Weimer, Linux ARM,
	Andrey Konovalov, Vincenzo Frascino, Dave Martin,
	Evgenii Stepanov

On Tue, Jan 26, 2021 at 4:50 AM Will Deacon <will@kernel.org> wrote:
>
> On Tue, Dec 29, 2020 at 10:59:13PM -0800, Peter Collingbourne wrote:
> > In an upcoming change we are going to introduce per-task SCTLR_EL1
> > bits for PAC. Move the existing per-task SCTLR_EL1 field out of the
> > MTE-specific code so that we will be able to use it from both the
> > PAC and MTE code paths and make the task switching code more efficient.
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > Link: https://linux-review.googlesource.com/id/Ic65fac78a7926168fa68f9e8da591c9e04ff7278
> > ---
> >  arch/arm64/Kconfig                 |  4 +++
> >  arch/arm64/include/asm/mte.h       |  4 ---
> >  arch/arm64/include/asm/processor.h | 10 ++++++-
> >  arch/arm64/kernel/mte.c            | 47 ++++++------------------------
> >  arch/arm64/kernel/process.c        | 34 +++++++++++++++++----
> >  5 files changed, 50 insertions(+), 49 deletions(-)
> >
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index 05e17351e4f3..82e38d1ca012 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -348,6 +348,9 @@ config KASAN_SHADOW_OFFSET
> >       default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
> >       default 0xffffffffffffffff
> >
> > +config ARM64_NEED_SCTLR_USER
> > +     bool
> > +
> >  source "arch/arm64/Kconfig.platforms"
> >
> >  menu "Kernel Features"
> > @@ -1653,6 +1656,7 @@ config ARM64_MTE
> >       # Required for tag checking in the uaccess routines
> >       depends on ARM64_PAN
> >       select ARCH_USES_HIGH_VMA_FLAGS
> > +     select ARM64_NEED_SCTLR_USER
>
> I'm not sure I'd bother with this; is it the end of the world if we do this
> unconditionally?

I think I'd be fine with doing it unconditionally. If both PAC and MTE
are disabled then I believe that the only additional code that we
would end up executing is the comparison between prev and next
sctlr_user on task switch in process.c (which would always yield
false) and since task switch is much less common than entry/exit it
seems acceptable to me.

Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 2/3] arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)
  2021-01-26 12:49     ` Will Deacon
@ 2021-02-12  4:52       ` Peter Collingbourne
  -1 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2021-02-12  4:52 UTC (permalink / raw)
  To: Will Deacon
  Cc: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer,
	Linux ARM, Kevin Brodsky, Andrey Konovalov, Linux API,
	libc-alpha

On Tue, Jan 26, 2021 at 4:49 AM Will Deacon <will@kernel.org> wrote:
>
> On Tue, Dec 29, 2020 at 10:59:14PM -0800, Peter Collingbourne wrote:
> > This change introduces a prctl that allows the user program to control
> > which PAC keys are enabled in a particular task. The main reason
> > why this is useful is to enable a userspace ABI that uses PAC to
> > sign and authenticate function pointers and other pointers exposed
> > outside of the function, while still allowing binaries conforming
> > to the ABI to interoperate with legacy binaries that do not sign or
> > authenticate pointers.
> >
> > The idea is that a dynamic loader or early startup code would issue
> > this prctl very early after establishing that a process may load legacy
> > binaries, but before executing any PAC instructions.
> >
> > This change adds a small amount of overhead to kernel entry and exit
> > due to additional required instruction sequences.
> >
> > On a DragonBoard 845c (Cortex-A75) with the powersave governor, the
> > overhead of similar instruction sequences was measured as 4.9ns when
> > simulating the common case where IA is left enabled, or 43.7ns when
> > simulating the uncommon case where IA is disabled. These numbers can
> > be seen as the worst case scenario, since in more realistic scenarios
> > a better performing governor would be used and a newer chip would be
> > used that would support PAC unlike Cortex-A75 and would be expected
> > to be faster than Cortex-A75.
> >
> > On an Apple M1 under a hypervisor, the overhead of the entry/exit
> > instruction sequences introduced by this patch was measured as 0.3ns
> > in the case where IA is left enabled, and 33.0ns in the case where
> > IA is disabled.
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > Reviewed-by: Dave Martin <Dave.Martin@arm.com>
> > Link: https://linux-review.googlesource.com/id/Ibc41a5e6a76b275efbaa126b31119dc197b927a5
>
> [...]
>
> > diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
> > index adb955fd9bdd..f03e5bfe4490 100644
> > --- a/arch/arm64/kernel/pointer_auth.c
> > +++ b/arch/arm64/kernel/pointer_auth.c
> > @@ -46,3 +46,65 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
> >
> >       return 0;
> >  }
> > +
> > +static u64 arg_to_enxx_mask(unsigned long arg)
> > +{
> > +     u64 sctlr_enxx_mask = 0;
> > +
> > +     WARN_ON(arg & ~PR_PAC_ENABLED_KEYS_MASK);
> > +     if (arg & PR_PAC_APIAKEY)
> > +             sctlr_enxx_mask |= SCTLR_ELx_ENIA;
> > +     if (arg & PR_PAC_APIBKEY)
> > +             sctlr_enxx_mask |= SCTLR_ELx_ENIB;
> > +     if (arg & PR_PAC_APDAKEY)
> > +             sctlr_enxx_mask |= SCTLR_ELx_ENDA;
> > +     if (arg & PR_PAC_APDBKEY)
> > +             sctlr_enxx_mask |= SCTLR_ELx_ENDB;
> > +     return sctlr_enxx_mask;
> > +}
> > +
> > +int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
> > +                          unsigned long enabled)
> > +{
> > +     u64 sctlr = tsk->thread.sctlr_user;
> > +
> > +     if (!system_supports_address_auth())
> > +             return -EINVAL;
> > +
> > +     if (is_compat_thread(task_thread_info(tsk)))
> > +             return -EINVAL;
> > +
> > +     if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
> > +             return -EINVAL;
> > +
> > +     sctlr &= ~arg_to_enxx_mask(keys);
> > +     sctlr |= arg_to_enxx_mask(enabled);
> > +     if (tsk == current)
> > +             set_task_sctlr_el1(sctlr);
> > +     else
> > +             tsk->thread.sctlr_user = sctlr;
>
> Who synchronizes all these modifications to 'sctlr_user'? Seems like it gets
> hit by two independent prctl()s as well as ptrace.

The prctls can only affect the current task, so I believe that they
are naturally synchronized (since a task cannot run on multiple CPUs
at once as far as I'm aware). As for ptrace, I believe the tracee must
be stopped in order for the tracer to issue PTRACE_SETREGS (so it
wouldn't be able to issue prctls of its own) and a process can only
have one tracer at a time, so there is again natural synchronization.

Peter

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 2/3] arm64: Introduce prctl(PR_PAC_{SET, GET}_ENABLED_KEYS)
@ 2021-02-12  4:52       ` Peter Collingbourne
  0 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2021-02-12  4:52 UTC (permalink / raw)
  To: Will Deacon
  Cc: libc-alpha, Szabolcs Nagy, Catalin Marinas, Kevin Brodsky,
	Linux API, Kostya Serebryany, Florian Weimer, Linux ARM,
	Andrey Konovalov, Vincenzo Frascino, Dave Martin,
	Evgenii Stepanov

On Tue, Jan 26, 2021 at 4:49 AM Will Deacon <will@kernel.org> wrote:
>
> On Tue, Dec 29, 2020 at 10:59:14PM -0800, Peter Collingbourne wrote:
> > This change introduces a prctl that allows the user program to control
> > which PAC keys are enabled in a particular task. The main reason
> > why this is useful is to enable a userspace ABI that uses PAC to
> > sign and authenticate function pointers and other pointers exposed
> > outside of the function, while still allowing binaries conforming
> > to the ABI to interoperate with legacy binaries that do not sign or
> > authenticate pointers.
> >
> > The idea is that a dynamic loader or early startup code would issue
> > this prctl very early after establishing that a process may load legacy
> > binaries, but before executing any PAC instructions.
> >
> > This change adds a small amount of overhead to kernel entry and exit
> > due to additional required instruction sequences.
> >
> > On a DragonBoard 845c (Cortex-A75) with the powersave governor, the
> > overhead of similar instruction sequences was measured as 4.9ns when
> > simulating the common case where IA is left enabled, or 43.7ns when
> > simulating the uncommon case where IA is disabled. These numbers can
> > be seen as the worst case scenario, since in more realistic scenarios
> > a better performing governor would be used and a newer chip would be
> > used that would support PAC unlike Cortex-A75 and would be expected
> > to be faster than Cortex-A75.
> >
> > On an Apple M1 under a hypervisor, the overhead of the entry/exit
> > instruction sequences introduced by this patch was measured as 0.3ns
> > in the case where IA is left enabled, and 33.0ns in the case where
> > IA is disabled.
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > Reviewed-by: Dave Martin <Dave.Martin@arm.com>
> > Link: https://linux-review.googlesource.com/id/Ibc41a5e6a76b275efbaa126b31119dc197b927a5
>
> [...]
>
> > diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
> > index adb955fd9bdd..f03e5bfe4490 100644
> > --- a/arch/arm64/kernel/pointer_auth.c
> > +++ b/arch/arm64/kernel/pointer_auth.c
> > @@ -46,3 +46,65 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
> >
> >       return 0;
> >  }
> > +
> > +static u64 arg_to_enxx_mask(unsigned long arg)
> > +{
> > +     u64 sctlr_enxx_mask = 0;
> > +
> > +     WARN_ON(arg & ~PR_PAC_ENABLED_KEYS_MASK);
> > +     if (arg & PR_PAC_APIAKEY)
> > +             sctlr_enxx_mask |= SCTLR_ELx_ENIA;
> > +     if (arg & PR_PAC_APIBKEY)
> > +             sctlr_enxx_mask |= SCTLR_ELx_ENIB;
> > +     if (arg & PR_PAC_APDAKEY)
> > +             sctlr_enxx_mask |= SCTLR_ELx_ENDA;
> > +     if (arg & PR_PAC_APDBKEY)
> > +             sctlr_enxx_mask |= SCTLR_ELx_ENDB;
> > +     return sctlr_enxx_mask;
> > +}
> > +
> > +int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
> > +                          unsigned long enabled)
> > +{
> > +     u64 sctlr = tsk->thread.sctlr_user;
> > +
> > +     if (!system_supports_address_auth())
> > +             return -EINVAL;
> > +
> > +     if (is_compat_thread(task_thread_info(tsk)))
> > +             return -EINVAL;
> > +
> > +     if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
> > +             return -EINVAL;
> > +
> > +     sctlr &= ~arg_to_enxx_mask(keys);
> > +     sctlr |= arg_to_enxx_mask(enabled);
> > +     if (tsk == current)
> > +             set_task_sctlr_el1(sctlr);
> > +     else
> > +             tsk->thread.sctlr_user = sctlr;
>
> Who synchronizes all these modifications to 'sctlr_user'? Seems like it gets
> hit by two independent prctl()s as well as ptrace.

The prctls can only affect the current task, so I believe that they
are naturally synchronized (since a task cannot run on multiple CPUs
at once as far as I'm aware). As for ptrace, I believe the tracee must
be stopped in order for the tracer to issue PTRACE_SETREGS (so it
wouldn't be able to issue prctls of its own) and a process can only
have one tracer at a time, so there is again natural synchronization.

Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
  2021-01-26 13:09     ` Will Deacon
@ 2021-02-12  5:01       ` Peter Collingbourne
  -1 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2021-02-12  5:01 UTC (permalink / raw)
  To: Will Deacon
  Cc: Catalin Marinas, Evgenii Stepanov, Kostya Serebryany,
	Vincenzo Frascino, Dave Martin, Szabolcs Nagy, Florian Weimer,
	Linux ARM, Kevin Brodsky, Andrey Konovalov, Linux API,
	libc-alpha

On Tue, Jan 26, 2021 at 5:09 AM Will Deacon <will@kernel.org> wrote:
>
> On Tue, Dec 29, 2020 at 10:59:15PM -0800, Peter Collingbourne wrote:
> > The kernel does not use any keys besides IA so we don't need to
> > install IB/DA/DB/GA on kernel exit if we arrange to install them
> > on task switch instead, which we can expect to happen an order of
> > magnitude less often.
> >
> > Furthermore we can avoid installing the user IA in the case where the
> > user task has IA disabled and just leave the kernel IA installed. This
> > also lets us avoid needing to install IA on kernel entry.
>
> I've got to be honest, this makes me nervous in case there is a way for
> userspace to recover the kernel key even though EnIA is clear. Currently,
> EnIA doesn't affect XPAC* and PACGA instructions, and the architecture

For GA I would expect it to be controlled by a hypothetical EnGA, not
by EnIA (and I'm a bit surprised that there isn't an EnGA; doesn't it
mean that a userspace program running under an unaware kernel or
hypervisor may sign things using the GA from potentially another
hypervisor guest?)

> clearly expects us to be switching these things:
>
>   | Note
>   | Keys are not banked by Exception level. Arm expects software to switch the
>   | keys between Exception levels, typically by swapping the values with zero
>   | so that the current key values are not present in memo
>
> But then:
>
> > On an Apple M1 under a hypervisor, the overhead of kernel entry/exit
> > has been measured to be reduced by 15.6ns in the case where IA is
> > enabled, and 31.9ns in the case where IA is disabled.
>
> That's a good improvement, so this feels like its worth doing. I suppose all we
> can do is keep an eye on the architecture in case any future extensions mean
> the approach taken here is dangerous.

Right. I think it makes sense for any future extensions not to expose
a key in any way if the corresponding key enable bit is clear (to
avoid the potential problem that I highlighted with GA above) unless
the operating system has explicitly opted into such behavior, e.g. by
setting a separate bit in SCTLR_EL1.

Peter

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
@ 2021-02-12  5:01       ` Peter Collingbourne
  0 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2021-02-12  5:01 UTC (permalink / raw)
  To: Will Deacon
  Cc: libc-alpha, Szabolcs Nagy, Catalin Marinas, Kevin Brodsky,
	Linux API, Kostya Serebryany, Florian Weimer, Linux ARM,
	Andrey Konovalov, Vincenzo Frascino, Dave Martin,
	Evgenii Stepanov

On Tue, Jan 26, 2021 at 5:09 AM Will Deacon <will@kernel.org> wrote:
>
> On Tue, Dec 29, 2020 at 10:59:15PM -0800, Peter Collingbourne wrote:
> > The kernel does not use any keys besides IA so we don't need to
> > install IB/DA/DB/GA on kernel exit if we arrange to install them
> > on task switch instead, which we can expect to happen an order of
> > magnitude less often.
> >
> > Furthermore we can avoid installing the user IA in the case where the
> > user task has IA disabled and just leave the kernel IA installed. This
> > also lets us avoid needing to install IA on kernel entry.
>
> I've got to be honest, this makes me nervous in case there is a way for
> userspace to recover the kernel key even though EnIA is clear. Currently,
> EnIA doesn't affect XPAC* and PACGA instructions, and the architecture

For GA I would expect it to be controlled by a hypothetical EnGA, not
by EnIA (and I'm a bit surprised that there isn't an EnGA; doesn't it
mean that a userspace program running under an unaware kernel or
hypervisor may sign things using the GA from potentially another
hypervisor guest?)

> clearly expects us to be switching these things:
>
>   | Note
>   | Keys are not banked by Exception level. Arm expects software to switch the
>   | keys between Exception levels, typically by swapping the values with zero
>   | so that the current key values are not present in memo
>
> But then:
>
> > On an Apple M1 under a hypervisor, the overhead of kernel entry/exit
> > has been measured to be reduced by 15.6ns in the case where IA is
> > enabled, and 31.9ns in the case where IA is disabled.
>
> That's a good improvement, so this feels like its worth doing. I suppose all we
> can do is keep an eye on the architecture in case any future extensions mean
> the approach taken here is dangerous.

Right. I think it makes sense for any future extensions not to expose
a key in any way if the corresponding key enable bit is clear (to
avoid the potential problem that I highlighted with GA above) unless
the operating system has explicitly opted into such behavior, e.g. by
setting a separate bit in SCTLR_EL1.

Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
  2021-02-12  5:01       ` Peter Collingbourne
@ 2021-02-12 11:01         ` James Morse
  -1 siblings, 0 replies; 22+ messages in thread
From: James Morse @ 2021-02-12 11:01 UTC (permalink / raw)
  To: Peter Collingbourne, Will Deacon
  Cc: libc-alpha, Szabolcs Nagy, Catalin Marinas, Kevin Brodsky,
	Linux API, Kostya Serebryany, Florian Weimer, Linux ARM,
	Andrey Konovalov, Vincenzo Frascino, Dave Martin,
	Evgenii Stepanov

Hi Peter,

On 12/02/2021 05:01, Peter Collingbourne wrote:
> On Tue, Jan 26, 2021 at 5:09 AM Will Deacon <will@kernel.org> wrote:
>>
>> On Tue, Dec 29, 2020 at 10:59:15PM -0800, Peter Collingbourne wrote:
>>> The kernel does not use any keys besides IA so we don't need to
>>> install IB/DA/DB/GA on kernel exit if we arrange to install them
>>> on task switch instead, which we can expect to happen an order of
>>> magnitude less often.
>>>
>>> Furthermore we can avoid installing the user IA in the case where the
>>> user task has IA disabled and just leave the kernel IA installed. This
>>> also lets us avoid needing to install IA on kernel entry.
>>
>> I've got to be honest, this makes me nervous in case there is a way for
>> userspace to recover the kernel key even though EnIA is clear. Currently,
>> EnIA doesn't affect XPAC* and PACGA instructions, and the architecture

> For GA I would expect it to be controlled by a hypothetical EnGA, not
> by EnIA (and I'm a bit surprised that there isn't an EnGA;

PACGA is undefined if the CPU doesn't implement PAC, whereas PACIASP is a NOP if the CPU
doesn't implement PAC.

I think the reason from the SCTLR_ELx controls is to make unaware systems transform the
instructions that were hints back into hints. (e.g. the AddPACIA psuedo code). This is
needed on mismatched big-little systems, otherwise processes can't be migrated between them.

For the non-hint instructions, user-space needs to test the hwcap/id-register-emulation to
know it can use these instructions, and the compiler shouldn't output them unconditionally.


> doesn't it
> mean that a userspace program running under an unaware kernel or
> hypervisor may sign things using the GA from potentially another
> hypervisor guest?)

The hypervisor controls all this with HCR_EL2.API, which also traps PACGA et al.
For the hypervisor its all or nothing.
If the hypervisor is emulating a machine without PAC, it can emulate an undefined
exception regardless of whether the CPU supports PAC or not.

Does this match your reading?


Thanks,

James

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
@ 2021-02-12 11:01         ` James Morse
  0 siblings, 0 replies; 22+ messages in thread
From: James Morse @ 2021-02-12 11:01 UTC (permalink / raw)
  To: Peter Collingbourne, Will Deacon
  Cc: libc-alpha, Szabolcs Nagy, Catalin Marinas, Kevin Brodsky,
	Andrey Konovalov, Kostya Serebryany, Florian Weimer,
	Evgenii Stepanov, Linux API, Vincenzo Frascino, Dave Martin,
	Linux ARM

Hi Peter,

On 12/02/2021 05:01, Peter Collingbourne wrote:
> On Tue, Jan 26, 2021 at 5:09 AM Will Deacon <will@kernel.org> wrote:
>>
>> On Tue, Dec 29, 2020 at 10:59:15PM -0800, Peter Collingbourne wrote:
>>> The kernel does not use any keys besides IA so we don't need to
>>> install IB/DA/DB/GA on kernel exit if we arrange to install them
>>> on task switch instead, which we can expect to happen an order of
>>> magnitude less often.
>>>
>>> Furthermore we can avoid installing the user IA in the case where the
>>> user task has IA disabled and just leave the kernel IA installed. This
>>> also lets us avoid needing to install IA on kernel entry.
>>
>> I've got to be honest, this makes me nervous in case there is a way for
>> userspace to recover the kernel key even though EnIA is clear. Currently,
>> EnIA doesn't affect XPAC* and PACGA instructions, and the architecture

> For GA I would expect it to be controlled by a hypothetical EnGA, not
> by EnIA (and I'm a bit surprised that there isn't an EnGA;

PACGA is undefined if the CPU doesn't implement PAC, whereas PACIASP is a NOP if the CPU
doesn't implement PAC.

I think the reason from the SCTLR_ELx controls is to make unaware systems transform the
instructions that were hints back into hints. (e.g. the AddPACIA psuedo code). This is
needed on mismatched big-little systems, otherwise processes can't be migrated between them.

For the non-hint instructions, user-space needs to test the hwcap/id-register-emulation to
know it can use these instructions, and the compiler shouldn't output them unconditionally.


> doesn't it
> mean that a userspace program running under an unaware kernel or
> hypervisor may sign things using the GA from potentially another
> hypervisor guest?)

The hypervisor controls all this with HCR_EL2.API, which also traps PACGA et al.
For the hypervisor its all or nothing.
If the hypervisor is emulating a machine without PAC, it can emulate an undefined
exception regardless of whether the CPU supports PAC or not.

Does this match your reading?


Thanks,

James

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
  2021-02-12 11:01         ` James Morse
@ 2021-02-12 18:20           ` Peter Collingbourne
  -1 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2021-02-12 18:20 UTC (permalink / raw)
  To: James Morse
  Cc: Will Deacon, libc-alpha, Szabolcs Nagy, Catalin Marinas,
	Kevin Brodsky, Linux API, Kostya Serebryany, Florian Weimer,
	Linux ARM, Andrey Konovalov, Vincenzo Frascino, Dave Martin,
	Evgenii Stepanov

On Fri, Feb 12, 2021 at 3:01 AM James Morse <james.morse@arm.com> wrote:
>
> Hi Peter,
>
> On 12/02/2021 05:01, Peter Collingbourne wrote:
> > On Tue, Jan 26, 2021 at 5:09 AM Will Deacon <will@kernel.org> wrote:
> >>
> >> On Tue, Dec 29, 2020 at 10:59:15PM -0800, Peter Collingbourne wrote:
> >>> The kernel does not use any keys besides IA so we don't need to
> >>> install IB/DA/DB/GA on kernel exit if we arrange to install them
> >>> on task switch instead, which we can expect to happen an order of
> >>> magnitude less often.
> >>>
> >>> Furthermore we can avoid installing the user IA in the case where the
> >>> user task has IA disabled and just leave the kernel IA installed. This
> >>> also lets us avoid needing to install IA on kernel entry.
> >>
> >> I've got to be honest, this makes me nervous in case there is a way for
> >> userspace to recover the kernel key even though EnIA is clear. Currently,
> >> EnIA doesn't affect XPAC* and PACGA instructions, and the architecture
>
> > For GA I would expect it to be controlled by a hypothetical EnGA, not
> > by EnIA (and I'm a bit surprised that there isn't an EnGA;
>
> PACGA is undefined if the CPU doesn't implement PAC, whereas PACIASP is a NOP if the CPU
> doesn't implement PAC.
>
> I think the reason from the SCTLR_ELx controls is to make unaware systems transform the
> instructions that were hints back into hints. (e.g. the AddPACIA psuedo code). This is
> needed on mismatched big-little systems, otherwise processes can't be migrated between them.

It's needed for more than that, see the history of my
PR_PAC_SET_ENABLED_KEYS patch, in particular [1].

> For the non-hint instructions, user-space needs to test the hwcap/id-register-emulation to
> know it can use these instructions, and the compiler shouldn't output them unconditionally.

Right, unless the target is known to support them.

> > doesn't it
> > mean that a userspace program running under an unaware kernel or
> > hypervisor may sign things using the GA from potentially another
> > hypervisor guest?)
>
> The hypervisor controls all this with HCR_EL2.API, which also traps PACGA et al.
> For the hypervisor its all or nothing.
> If the hypervisor is emulating a machine without PAC, it can emulate an undefined
> exception regardless of whether the CPU supports PAC or not.
>
> Does this match your reading?

I think that's right. So an unaware hypervisor would set API to 0 and
none of the guests would be able to use the authentication
instructions. Since it looks like API=0 would make the hint-space
instructions trap as well, I would imagine that a hypervisor would
need to emulate them as no-ops if it's emulating a machine without
PAC.

Peter

[1] https://www.spinics.net/lists/arm-kernel/msg830889.html

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths
@ 2021-02-12 18:20           ` Peter Collingbourne
  0 siblings, 0 replies; 22+ messages in thread
From: Peter Collingbourne @ 2021-02-12 18:20 UTC (permalink / raw)
  To: James Morse
  Cc: libc-alpha, Szabolcs Nagy, Linux API, Kevin Brodsky,
	Andrey Konovalov, Kostya Serebryany, Florian Weimer,
	Evgenii Stepanov, Catalin Marinas, Vincenzo Frascino,
	Will Deacon, Dave Martin, Linux ARM

On Fri, Feb 12, 2021 at 3:01 AM James Morse <james.morse@arm.com> wrote:
>
> Hi Peter,
>
> On 12/02/2021 05:01, Peter Collingbourne wrote:
> > On Tue, Jan 26, 2021 at 5:09 AM Will Deacon <will@kernel.org> wrote:
> >>
> >> On Tue, Dec 29, 2020 at 10:59:15PM -0800, Peter Collingbourne wrote:
> >>> The kernel does not use any keys besides IA so we don't need to
> >>> install IB/DA/DB/GA on kernel exit if we arrange to install them
> >>> on task switch instead, which we can expect to happen an order of
> >>> magnitude less often.
> >>>
> >>> Furthermore we can avoid installing the user IA in the case where the
> >>> user task has IA disabled and just leave the kernel IA installed. This
> >>> also lets us avoid needing to install IA on kernel entry.
> >>
> >> I've got to be honest, this makes me nervous in case there is a way for
> >> userspace to recover the kernel key even though EnIA is clear. Currently,
> >> EnIA doesn't affect XPAC* and PACGA instructions, and the architecture
>
> > For GA I would expect it to be controlled by a hypothetical EnGA, not
> > by EnIA (and I'm a bit surprised that there isn't an EnGA;
>
> PACGA is undefined if the CPU doesn't implement PAC, whereas PACIASP is a NOP if the CPU
> doesn't implement PAC.
>
> I think the reason from the SCTLR_ELx controls is to make unaware systems transform the
> instructions that were hints back into hints. (e.g. the AddPACIA psuedo code). This is
> needed on mismatched big-little systems, otherwise processes can't be migrated between them.

It's needed for more than that, see the history of my
PR_PAC_SET_ENABLED_KEYS patch, in particular [1].

> For the non-hint instructions, user-space needs to test the hwcap/id-register-emulation to
> know it can use these instructions, and the compiler shouldn't output them unconditionally.

Right, unless the target is known to support them.

> > doesn't it
> > mean that a userspace program running under an unaware kernel or
> > hypervisor may sign things using the GA from potentially another
> > hypervisor guest?)
>
> The hypervisor controls all this with HCR_EL2.API, which also traps PACGA et al.
> For the hypervisor its all or nothing.
> If the hypervisor is emulating a machine without PAC, it can emulate an undefined
> exception regardless of whether the CPU supports PAC or not.
>
> Does this match your reading?

I think that's right. So an unaware hypervisor would set API to 0 and
none of the guests would be able to use the authentication
instructions. Since it looks like API=0 would make the hint-space
instructions trap as well, I would imagine that a hypervisor would
need to emulate them as no-ops if it's emulating a machine without
PAC.

Peter

[1] https://www.spinics.net/lists/arm-kernel/msg830889.html

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2021-02-12 18:21 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-30  6:59 [PATCH v6 1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere Peter Collingbourne
2020-12-30  6:59 ` Peter Collingbourne
2020-12-30  6:59 ` [PATCH v6 2/3] arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS) Peter Collingbourne
2020-12-30  6:59   ` Peter Collingbourne
2021-01-26 12:49   ` Will Deacon
2021-01-26 12:49     ` Will Deacon
2021-02-12  4:52     ` Peter Collingbourne
2021-02-12  4:52       ` [PATCH v6 2/3] arm64: Introduce prctl(PR_PAC_{SET, GET}_ENABLED_KEYS) Peter Collingbourne
2020-12-30  6:59 ` [PATCH v6 3/3] arm64: pac: Optimize kernel entry/exit key installation code paths Peter Collingbourne
2020-12-30  6:59   ` Peter Collingbourne
2021-01-26 13:09   ` Will Deacon
2021-01-26 13:09     ` Will Deacon
2021-02-12  5:01     ` Peter Collingbourne
2021-02-12  5:01       ` Peter Collingbourne
2021-02-12 11:01       ` James Morse
2021-02-12 11:01         ` James Morse
2021-02-12 18:20         ` Peter Collingbourne
2021-02-12 18:20           ` Peter Collingbourne
2021-01-26 12:49 ` [PATCH v6 1/3] arm64: mte: make the per-task SCTLR_EL1 field usable elsewhere Will Deacon
2021-01-26 12:49   ` Will Deacon
2021-02-12  4:47   ` Peter Collingbourne
2021-02-12  4:47     ` Peter Collingbourne

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.