linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] arm64: mte: switch GCR_EL1 on task switch rather than entry/exit
@ 2021-07-02 19:45 Peter Collingbourne
  2021-07-04 15:37 ` Andrey Konovalov
  2021-07-05 12:52 ` Catalin Marinas
  0 siblings, 2 replies; 4+ messages in thread
From: Peter Collingbourne @ 2021-07-02 19:45 UTC (permalink / raw)
  To: Catalin Marinas, Vincenzo Frascino, Will Deacon, Andrey Konovalov
  Cc: Peter Collingbourne, Evgenii Stepanov, Szabolcs Nagy,
	Tejas Belagod, linux-arm-kernel

Accessing GCR_EL1 and issuing an ISB can be expensive on some
microarchitectures. To avoid taking this performance hit on every
kernel entry/exit, switch GCR_EL1 on task switch rather than
entry/exit. This is essentially a revert of commit bad1e1c663e0
("arm64: mte: switch GCR_EL1 in kernel entry and exit").

This requires changing how we generate random tags for HW tag-based
KASAN, since at this point IRG would use the user's exclusion mask,
which may not be suitable for kernel use. In this patch I chose to take
the modulus of CNTVCT_EL0, however alternative approaches are possible.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/I560a190a74176ca4cc5191dad08f77f6b1577c75
---
v2:
- rebase onto v9 of the tag checking mode preference series

 arch/arm64/include/asm/mte-kasan.h | 15 ++++++---
 arch/arm64/include/asm/mte.h       |  2 --
 arch/arm64/kernel/entry.S          | 41 -----------------------
 arch/arm64/kernel/mte.c            | 54 +++++++++++-------------------
 4 files changed, 29 insertions(+), 83 deletions(-)

diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index ddd4d17cf9a0..e9b3c1bdbba3 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -13,6 +13,8 @@
 
 #ifdef CONFIG_ARM64_MTE
 
+extern u64 mte_tag_mod;
+
 /*
  * These functions are meant to be only used from KASAN runtime through
  * the arch_*() interface defined in asm/memory.h.
@@ -37,15 +39,18 @@ static inline u8 mte_get_mem_tag(void *addr)
 	return mte_get_ptr_tag(addr);
 }
 
-/* Generate a random tag. */
+/*
+ * Generate a random tag. We can't use IRG because the user's GCR_EL1 is still
+ * installed for performance reasons. Instead, take the modulus of the
+ * architected timer which should be random enough for our purposes.
+ */
 static inline u8 mte_get_random_tag(void)
 {
-	void *addr;
+	u64 cntvct;
 
-	asm(__MTE_PREAMBLE "irg %0, %0"
-		: "=r" (addr));
+	asm("mrs %0, cntvct_el0" : "=r"(cntvct));
 
-	return mte_get_ptr_tag(addr);
+	return 0xF0 | (cntvct % mte_tag_mod);
 }
 
 /*
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index bc88a1ced0d7..412b94efcb11 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -16,8 +16,6 @@
 
 #include <asm/pgtable-types.h>
 
-extern u64 gcr_kernel_excl;
-
 void mte_clear_page_tags(void *addr);
 unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
 				      unsigned long n);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ce59280355c5..c95bfe145639 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -175,43 +175,6 @@ alternative_else_nop_endif
 #endif
 	.endm
 
-	.macro mte_set_gcr, tmp, tmp2
-#ifdef CONFIG_ARM64_MTE
-	/*
-	 * Calculate and set the exclude mask preserving
-	 * the RRND (bit[16]) setting.
-	 */
-	mrs_s	\tmp2, SYS_GCR_EL1
-	bfxil	\tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
-	msr_s	SYS_GCR_EL1, \tmp2
-#endif
-	.endm
-
-	.macro mte_set_kernel_gcr, tmp, tmp2
-#ifdef CONFIG_KASAN_HW_TAGS
-alternative_if_not ARM64_MTE
-	b	1f
-alternative_else_nop_endif
-	ldr_l	\tmp, gcr_kernel_excl
-
-	mte_set_gcr \tmp, \tmp2
-	isb
-1:
-#endif
-	.endm
-
-	.macro mte_set_user_gcr, tsk, tmp, tmp2
-#ifdef CONFIG_ARM64_MTE
-alternative_if_not ARM64_MTE
-	b	1f
-alternative_else_nop_endif
-	ldr	\tmp, [\tsk, #THREAD_MTE_CTRL]
-
-	mte_set_gcr \tmp, \tmp2
-1:
-#endif
-	.endm
-
 	.macro	kernel_entry, el, regsize = 64
 	.if	\regsize == 32
 	mov	w0, w0				// zero upper 32 bits of x0
@@ -273,8 +236,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
 alternative_else_nop_endif
 #endif
 
-	mte_set_kernel_gcr x22, x23
-
 	scs_load tsk, x20
 	.else
 	add	x21, sp, #PT_REGS_SIZE
@@ -398,8 +359,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
 alternative_else_nop_endif
 #endif
 
-	mte_set_user_gcr tsk, x0, x1
-
 	apply_ssbd 0, x0, x1
 	.endif
 
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 48f218e070cc..b8d3e0b20702 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -23,7 +23,7 @@
 #include <asm/ptrace.h>
 #include <asm/sysreg.h>
 
-u64 gcr_kernel_excl __ro_after_init;
+u64 mte_tag_mod __ro_after_init;
 
 static bool report_fault_once = true;
 
@@ -98,22 +98,7 @@ int memcmp_pages(struct page *page1, struct page *page2)
 
 void mte_init_tags(u64 max_tag)
 {
-	static bool gcr_kernel_excl_initialized;
-
-	if (!gcr_kernel_excl_initialized) {
-		/*
-		 * The format of the tags in KASAN is 0xFF and in MTE is 0xF.
-		 * This conversion extracts an MTE tag from a KASAN tag.
-		 */
-		u64 incl = GENMASK(FIELD_GET(MTE_TAG_MASK >> MTE_TAG_SHIFT,
-					     max_tag), 0);
-
-		gcr_kernel_excl = ~incl & SYS_GCR_EL1_EXCL_MASK;
-		gcr_kernel_excl_initialized = true;
-	}
-
-	/* Enable the kernel exclude mask for random tags generation. */
-	write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1);
+	mte_tag_mod = (max_tag & 0xF) + 1;
 }
 
 static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
@@ -188,19 +173,7 @@ void mte_check_tfsr_el1(void)
 }
 #endif
 
-static void update_gcr_el1_excl(u64 excl)
-{
-
-	/*
-	 * Note that the mask controlled by the user via prctl() is an
-	 * include while GCR_EL1 accepts an exclude mask.
-	 * No need for ISB since this only affects EL0 currently, implicit
-	 * with ERET.
-	 */
-	sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl);
-}
-
-static void mte_update_sctlr_user(struct task_struct *task)
+static void mte_sync_ctrl(struct task_struct *task)
 {
 	/*
 	 * This can only be called on the current or next task since the CPU
@@ -219,6 +192,17 @@ static void mte_update_sctlr_user(struct task_struct *task)
 	else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
 		sctlr |= SCTLR_EL1_TCF0_SYNC;
 	task->thread.sctlr_user = sctlr;
+
+	/*
+	 * Note that the mask controlled by the user via prctl() is an
+	 * include while GCR_EL1 accepts an exclude mask.
+	 * No need for ISB since this only affects EL0 currently, implicit
+	 * with ERET.
+	 */
+	sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK,
+			   (mte_ctrl & MTE_CTRL_GCR_USER_EXCL_MASK) >>
+				   MTE_CTRL_GCR_USER_EXCL_SHIFT);
+
 	preempt_enable();
 }
 
@@ -233,13 +217,13 @@ void mte_thread_init_user(void)
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
 	/* disable tag checking and reset tag generation mask */
 	current->thread.mte_ctrl = MTE_CTRL_GCR_USER_EXCL_MASK;
-	mte_update_sctlr_user(current);
+	mte_sync_ctrl(current);
 	set_task_sctlr_el1(current->thread.sctlr_user);
 }
 
 void mte_thread_switch(struct task_struct *next)
 {
-	mte_update_sctlr_user(next);
+	mte_sync_ctrl(next);
 
 	/*
 	 * Check if an async tag exception occurred at EL1.
@@ -273,7 +257,7 @@ void mte_suspend_exit(void)
 	if (!system_supports_mte())
 		return;
 
-	update_gcr_el1_excl(gcr_kernel_excl);
+	mte_sync_ctrl(current);
 }
 
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
@@ -291,7 +275,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 
 	task->thread.mte_ctrl = mte_ctrl;
 	if (task == current) {
-		mte_update_sctlr_user(task);
+		mte_sync_ctrl(task);
 		set_task_sctlr_el1(task->thread.sctlr_user);
 	}
 
@@ -467,7 +451,7 @@ static ssize_t mte_tcf_preferred_show(struct device *dev,
 
 static void sync_sctlr(void *arg)
 {
-	mte_update_sctlr_user(current);
+	mte_sync_ctrl(current);
 	set_task_sctlr_el1(current->thread.sctlr_user);
 }
 
-- 
2.32.0.93.g670b81a890-goog


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] arm64: mte: switch GCR_EL1 on task switch rather than entry/exit
  2021-07-02 19:45 [PATCH v2] arm64: mte: switch GCR_EL1 on task switch rather than entry/exit Peter Collingbourne
@ 2021-07-04 15:37 ` Andrey Konovalov
  2021-07-05 12:52 ` Catalin Marinas
  1 sibling, 0 replies; 4+ messages in thread
From: Andrey Konovalov @ 2021-07-04 15:37 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Catalin Marinas, Vincenzo Frascino, Will Deacon,
	Evgenii Stepanov, Szabolcs Nagy, Tejas Belagod, Linux ARM

On Fri, Jul 2, 2021 at 9:45 PM Peter Collingbourne <pcc@google.com> wrote:
>
> Accessing GCR_EL1 and issuing an ISB can be expensive on some
> microarchitectures. To avoid taking this performance hit on every
> kernel entry/exit, switch GCR_EL1 on task switch rather than
> entry/exit. This is essentially a revert of commit bad1e1c663e0
> ("arm64: mte: switch GCR_EL1 in kernel entry and exit").
>
> This requires changing how we generate random tags for HW tag-based
> KASAN, since at this point IRG would use the user's exclusion mask,
> which may not be suitable for kernel use. In this patch I chose to take
> the modulus of CNTVCT_EL0, however alternative approaches are possible.
>
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Link: https://linux-review.googlesource.com/id/I560a190a74176ca4cc5191dad08f77f6b1577c75
> ---
> v2:
> - rebase onto v9 of the tag checking mode preference series
>
>  arch/arm64/include/asm/mte-kasan.h | 15 ++++++---
>  arch/arm64/include/asm/mte.h       |  2 --
>  arch/arm64/kernel/entry.S          | 41 -----------------------
>  arch/arm64/kernel/mte.c            | 54 +++++++++++-------------------
>  4 files changed, 29 insertions(+), 83 deletions(-)
>
> diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
> index ddd4d17cf9a0..e9b3c1bdbba3 100644
> --- a/arch/arm64/include/asm/mte-kasan.h
> +++ b/arch/arm64/include/asm/mte-kasan.h
> @@ -13,6 +13,8 @@
>
>  #ifdef CONFIG_ARM64_MTE
>
> +extern u64 mte_tag_mod;
> +
>  /*
>   * These functions are meant to be only used from KASAN runtime through
>   * the arch_*() interface defined in asm/memory.h.
> @@ -37,15 +39,18 @@ static inline u8 mte_get_mem_tag(void *addr)
>         return mte_get_ptr_tag(addr);
>  }
>
> -/* Generate a random tag. */
> +/*
> + * Generate a random tag. We can't use IRG because the user's GCR_EL1 is still
> + * installed for performance reasons. Instead, take the modulus of the
> + * architected timer which should be random enough for our purposes.

It's unfortunate that we can't use the instruction that was
specifically designed for random tag generation.

> + */
>  static inline u8 mte_get_random_tag(void)
>  {
> -       void *addr;
> +       u64 cntvct;
>
> -       asm(__MTE_PREAMBLE "irg %0, %0"
> -               : "=r" (addr));
> +       asm("mrs %0, cntvct_el0" : "=r"(cntvct));
>
> -       return mte_get_ptr_tag(addr);
> +       return 0xF0 | (cntvct % mte_tag_mod);
>  }
>
>  /*
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index bc88a1ced0d7..412b94efcb11 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -16,8 +16,6 @@
>
>  #include <asm/pgtable-types.h>
>
> -extern u64 gcr_kernel_excl;
> -
>  void mte_clear_page_tags(void *addr);
>  unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
>                                       unsigned long n);
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index ce59280355c5..c95bfe145639 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -175,43 +175,6 @@ alternative_else_nop_endif
>  #endif
>         .endm
>
> -       .macro mte_set_gcr, tmp, tmp2
> -#ifdef CONFIG_ARM64_MTE
> -       /*
> -        * Calculate and set the exclude mask preserving
> -        * the RRND (bit[16]) setting.
> -        */
> -       mrs_s   \tmp2, SYS_GCR_EL1
> -       bfxil   \tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
> -       msr_s   SYS_GCR_EL1, \tmp2
> -#endif
> -       .endm
> -
> -       .macro mte_set_kernel_gcr, tmp, tmp2
> -#ifdef CONFIG_KASAN_HW_TAGS
> -alternative_if_not ARM64_MTE
> -       b       1f
> -alternative_else_nop_endif
> -       ldr_l   \tmp, gcr_kernel_excl
> -
> -       mte_set_gcr \tmp, \tmp2
> -       isb
> -1:
> -#endif
> -       .endm
> -
> -       .macro mte_set_user_gcr, tsk, tmp, tmp2
> -#ifdef CONFIG_ARM64_MTE
> -alternative_if_not ARM64_MTE
> -       b       1f
> -alternative_else_nop_endif
> -       ldr     \tmp, [\tsk, #THREAD_MTE_CTRL]
> -
> -       mte_set_gcr \tmp, \tmp2
> -1:
> -#endif
> -       .endm
> -
>         .macro  kernel_entry, el, regsize = 64
>         .if     \regsize == 32
>         mov     w0, w0                          // zero upper 32 bits of x0
> @@ -273,8 +236,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
>  alternative_else_nop_endif
>  #endif
>
> -       mte_set_kernel_gcr x22, x23
> -
>         scs_load tsk, x20
>         .else
>         add     x21, sp, #PT_REGS_SIZE
> @@ -398,8 +359,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
>  alternative_else_nop_endif
>  #endif
>
> -       mte_set_user_gcr tsk, x0, x1
> -
>         apply_ssbd 0, x0, x1
>         .endif
>
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 48f218e070cc..b8d3e0b20702 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -23,7 +23,7 @@
>  #include <asm/ptrace.h>
>  #include <asm/sysreg.h>
>
> -u64 gcr_kernel_excl __ro_after_init;
> +u64 mte_tag_mod __ro_after_init;
>
>  static bool report_fault_once = true;
>
> @@ -98,22 +98,7 @@ int memcmp_pages(struct page *page1, struct page *page2)
>
>  void mte_init_tags(u64 max_tag)
>  {
> -       static bool gcr_kernel_excl_initialized;
> -
> -       if (!gcr_kernel_excl_initialized) {
> -               /*
> -                * The format of the tags in KASAN is 0xFF and in MTE is 0xF.
> -                * This conversion extracts an MTE tag from a KASAN tag.
> -                */
> -               u64 incl = GENMASK(FIELD_GET(MTE_TAG_MASK >> MTE_TAG_SHIFT,
> -                                            max_tag), 0);
> -
> -               gcr_kernel_excl = ~incl & SYS_GCR_EL1_EXCL_MASK;
> -               gcr_kernel_excl_initialized = true;
> -       }
> -
> -       /* Enable the kernel exclude mask for random tags generation. */
> -       write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1);
> +       mte_tag_mod = (max_tag & 0xF) + 1;
>  }
>
>  static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
> @@ -188,19 +173,7 @@ void mte_check_tfsr_el1(void)
>  }
>  #endif
>
> -static void update_gcr_el1_excl(u64 excl)
> -{
> -
> -       /*
> -        * Note that the mask controlled by the user via prctl() is an
> -        * include while GCR_EL1 accepts an exclude mask.
> -        * No need for ISB since this only affects EL0 currently, implicit
> -        * with ERET.
> -        */
> -       sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl);
> -}
> -
> -static void mte_update_sctlr_user(struct task_struct *task)
> +static void mte_sync_ctrl(struct task_struct *task)
>  {
>         /*
>          * This can only be called on the current or next task since the CPU
> @@ -219,6 +192,17 @@ static void mte_update_sctlr_user(struct task_struct *task)
>         else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
>                 sctlr |= SCTLR_EL1_TCF0_SYNC;
>         task->thread.sctlr_user = sctlr;
> +
> +       /*
> +        * Note that the mask controlled by the user via prctl() is an
> +        * include while GCR_EL1 accepts an exclude mask.
> +        * No need for ISB since this only affects EL0 currently, implicit
> +        * with ERET.
> +        */
> +       sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK,
> +                          (mte_ctrl & MTE_CTRL_GCR_USER_EXCL_MASK) >>
> +                                  MTE_CTRL_GCR_USER_EXCL_SHIFT);
> +
>         preempt_enable();
>  }
>
> @@ -233,13 +217,13 @@ void mte_thread_init_user(void)
>         clear_thread_flag(TIF_MTE_ASYNC_FAULT);
>         /* disable tag checking and reset tag generation mask */
>         current->thread.mte_ctrl = MTE_CTRL_GCR_USER_EXCL_MASK;
> -       mte_update_sctlr_user(current);
> +       mte_sync_ctrl(current);
>         set_task_sctlr_el1(current->thread.sctlr_user);
>  }
>
>  void mte_thread_switch(struct task_struct *next)
>  {
> -       mte_update_sctlr_user(next);
> +       mte_sync_ctrl(next);
>
>         /*
>          * Check if an async tag exception occurred at EL1.
> @@ -273,7 +257,7 @@ void mte_suspend_exit(void)
>         if (!system_supports_mte())
>                 return;
>
> -       update_gcr_el1_excl(gcr_kernel_excl);
> +       mte_sync_ctrl(current);
>  }
>
>  long set_mte_ctrl(struct task_struct *task, unsigned long arg)
> @@ -291,7 +275,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
>
>         task->thread.mte_ctrl = mte_ctrl;
>         if (task == current) {
> -               mte_update_sctlr_user(task);
> +               mte_sync_ctrl(task);
>                 set_task_sctlr_el1(task->thread.sctlr_user);
>         }
>
> @@ -467,7 +451,7 @@ static ssize_t mte_tcf_preferred_show(struct device *dev,
>
>  static void sync_sctlr(void *arg)
>  {
> -       mte_update_sctlr_user(current);
> +       mte_sync_ctrl(current);
>         set_task_sctlr_el1(current->thread.sctlr_user);
>  }
>
> --
> 2.32.0.93.g670b81a890-goog
>

Acked-by: Andrey Konovalov <andreyknvl@gmail.com>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] arm64: mte: switch GCR_EL1 on task switch rather than entry/exit
  2021-07-02 19:45 [PATCH v2] arm64: mte: switch GCR_EL1 on task switch rather than entry/exit Peter Collingbourne
  2021-07-04 15:37 ` Andrey Konovalov
@ 2021-07-05 12:52 ` Catalin Marinas
  2021-07-09  1:50   ` Peter Collingbourne
  1 sibling, 1 reply; 4+ messages in thread
From: Catalin Marinas @ 2021-07-05 12:52 UTC (permalink / raw)
  To: Peter Collingbourne
  Cc: Vincenzo Frascino, Will Deacon, Andrey Konovalov,
	Evgenii Stepanov, Szabolcs Nagy, Tejas Belagod, linux-arm-kernel

On Fri, Jul 02, 2021 at 12:45:18PM -0700, Peter Collingbourne wrote:
> Accessing GCR_EL1 and issuing an ISB can be expensive on some
> microarchitectures. To avoid taking this performance hit on every
> kernel entry/exit, switch GCR_EL1 on task switch rather than
> entry/exit. This is essentially a revert of commit bad1e1c663e0
> ("arm64: mte: switch GCR_EL1 in kernel entry and exit").

As per the discussion in v1, we can avoid an ISB, though we are still
left with the GCR_EL1 access. I'm surprised that access to a non
self-synchronising register is that expensive but I suspect the
benchmark is just timing a dummy syscall. I'm not asking for numbers but
I'd like to make sure we don't optimise for unrealistic use-cases. Is
something like a geekbench score affected for example?

While we can get rid of the IRG in the kernel, at some point we may want
to use ADDG as generated by the compiler. That too is affected by the
GCR_EL1.Exclude mask.

> This requires changing how we generate random tags for HW tag-based
> KASAN, since at this point IRG would use the user's exclusion mask,
> which may not be suitable for kernel use. In this patch I chose to take
> the modulus of CNTVCT_EL0, however alternative approaches are possible.

So a few successive mte_get_mem_tag() will give the same result if the
counter hasn't changed. Even if ARMv8.6 requires a 1GHz timer frequency,
I think an implementation is allowed to count in bigger increments.

I'm inclined to NAK this patch on the grounds that we may need a
specific GCR_EL1 configuration for the kernel. Feedback to the
microarchitects: make access to this register faster.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] arm64: mte: switch GCR_EL1 on task switch rather than entry/exit
  2021-07-05 12:52 ` Catalin Marinas
@ 2021-07-09  1:50   ` Peter Collingbourne
  0 siblings, 0 replies; 4+ messages in thread
From: Peter Collingbourne @ 2021-07-09  1:50 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Vincenzo Frascino, Will Deacon, Andrey Konovalov,
	Evgenii Stepanov, Szabolcs Nagy, Tejas Belagod, Linux ARM

On Mon, Jul 5, 2021 at 5:52 AM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Fri, Jul 02, 2021 at 12:45:18PM -0700, Peter Collingbourne wrote:
> > Accessing GCR_EL1 and issuing an ISB can be expensive on some
> > microarchitectures. To avoid taking this performance hit on every
> > kernel entry/exit, switch GCR_EL1 on task switch rather than
> > entry/exit. This is essentially a revert of commit bad1e1c663e0
> > ("arm64: mte: switch GCR_EL1 in kernel entry and exit").
>
> As per the discussion in v1, we can avoid an ISB, though we are still
> left with the GCR_EL1 access. I'm surprised that access to a non
> self-synchronising register is that expensive but I suspect the
> benchmark is just timing a dummy syscall. I'm not asking for numbers but
> I'd like to make sure we don't optimise for unrealistic use-cases. Is
> something like a geekbench score affected for example?

FWIW, I was using this test program:
https://patchwork.kernel.org/project/linux-arm-kernel/patch/20200801011152.39838-1-pcc@google.com/#23572981

Since it's an invalid syscall it's a good way to measure the effect of
changes to entry/exit in isolation, but it does mean that we need to
be careful when also making changes elsewhere in the kernel, as will
become apparent in a moment.

> While we can get rid of the IRG in the kernel, at some point we may want
> to use ADDG as generated by the compiler. That too is affected by the
> GCR_EL1.Exclude mask.
>
> > This requires changing how we generate random tags for HW tag-based
> > KASAN, since at this point IRG would use the user's exclusion mask,
> > which may not be suitable for kernel use. In this patch I chose to take
> > the modulus of CNTVCT_EL0, however alternative approaches are possible.
>
> So a few successive mte_get_mem_tag() will give the same result if the
> counter hasn't changed. Even if ARMv8.6 requires a 1GHz timer frequency,
> I think an implementation is allowed to count in bigger increments.

Yes, I observed that Apple M1 for example counts in increments of 16.
Taking the modulus of the timer would happen to work as long as the
increment is small enough (since it would mean that the timer would
likely have incremented by the time we need to make another
allocation) and a power of 2 (to ensure that we permute through all of
the possible tag values), which I would expect to be the case on most
microarchitectures.

However, I developed an in-kernel allocator microbenchmark which
revealed a more important issue with this patch, which is that on most
cores switching from IRG to reading the timer costs more than the
performance improvement from switching from the single ISB patch to
the GCR on task switch patch. Which means that if KASAN is enabled, a
single allocation would wipe out the performance improvement from
avoiding touching GCR on entry/exit. I also tried a number of
alternative approaches and they were also too expensive. So now I am
less inclined to push for an approach that avoids touching GCR on
entry/exit.

> BTW, can you also modify mte_set_kernel_gcr to only do a write to the
> GCR_EL1 register rather than a read-modify-write?

Yes, this helps a bit. In v3 I now do this as well as single ISB.

Peter

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-07-09  1:52 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-02 19:45 [PATCH v2] arm64: mte: switch GCR_EL1 on task switch rather than entry/exit Peter Collingbourne
2021-07-04 15:37 ` Andrey Konovalov
2021-07-05 12:52 ` Catalin Marinas
2021-07-09  1:50   ` Peter Collingbourne

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).