linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl()
       [not found] ` <20200715170844.30064-19-catalin.marinas@arm.com>
@ 2020-07-20 15:30   ` Kevin Brodsky
  2020-07-20 17:00     ` Dave Martin
  2020-07-22 11:09     ` Catalin Marinas
  2020-08-04 19:34   ` Kevin Brodsky
  1 sibling, 2 replies; 23+ messages in thread
From: Kevin Brodsky @ 2020-07-20 15:30 UTC (permalink / raw)
  To: Catalin Marinas, linux-arm-kernel
  Cc: linux-arch, Szabolcs Nagy, Andrey Konovalov, Peter Collingbourne,
	linux-mm, Andrew Morton, Vincenzo Frascino, Will Deacon,
	Dave P Martin

On 15/07/2020 18:08, Catalin Marinas wrote:
> By default, even if PROT_MTE is set on a memory range, there is no tag
> check fault reporting (SIGSEGV). Introduce a set of option to the
> exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
> check fault mode:
>
>    PR_MTE_TCF_NONE  - no reporting (default)
>    PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
>    PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting
>
> These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
> context-switched by the kernel. Note that uaccess done by the kernel is
> not checked and cannot be configured by the user.
>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> ---
>
> Notes:
>      v3:
>      - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency.
>      - Move mte_thread_switch() in this patch from an earlier one. In
>        addition, it is called after the dsb() in __switch_to() so that any
>        asynchronous tag check faults have been registered in the TFSR_EL1
>        registers (to be added with the in-kernel MTE support.
>      
>      v2:
>      - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE.
>      - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter
>        Collingbourne).
>      - Added ISB to update_sctlr_el1_tcf0() since, with the latest
>        architecture update/fix, the TCF0 field is used by the uaccess
>        routines.
>
>   arch/arm64/include/asm/mte.h       | 14 ++++++
>   arch/arm64/include/asm/processor.h |  3 ++
>   arch/arm64/kernel/mte.c            | 77 ++++++++++++++++++++++++++++++
>   arch/arm64/kernel/process.c        | 26 ++++++++--
>   include/uapi/linux/prctl.h         |  6 +++
>   5 files changed, 123 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index b2577eee62c2..df2efbc9f8f1 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -21,6 +21,9 @@ void mte_clear_page_tags(void *addr);
>   void mte_sync_tags(pte_t *ptep, pte_t pte);
>   void mte_copy_page_tags(void *kto, const void *kfrom);
>   void flush_mte_state(void);
> +void mte_thread_switch(struct task_struct *next);
> +long set_mte_ctrl(unsigned long arg);
> +long get_mte_ctrl(void);
>   
>   #else
>   
> @@ -36,6 +39,17 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom)
>   static inline void flush_mte_state(void)
>   {
>   }
> +static inline void mte_thread_switch(struct task_struct *next)
> +{
> +}
> +static inline long set_mte_ctrl(unsigned long arg)
> +{
> +	return 0;
> +}
> +static inline long get_mte_ctrl(void)
> +{
> +	return 0;
> +}
>   
>   #endif
>   
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 240fe5e5b720..80e7f0573309 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -151,6 +151,9 @@ struct thread_struct {
>   	struct ptrauth_keys_user	keys_user;
>   	struct ptrauth_keys_kernel	keys_kernel;
>   #endif
> +#ifdef CONFIG_ARM64_MTE
> +	u64			sctlr_tcf0;
> +#endif
>   };
>   
>   static inline void arch_thread_struct_whitelist(unsigned long *offset,
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 5f54fd140610..375483a1f573 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -5,6 +5,8 @@
>   
>   #include <linux/bitops.h>
>   #include <linux/mm.h>
> +#include <linux/prctl.h>
> +#include <linux/sched.h>
>   #include <linux/string.h>
>   #include <linux/thread_info.h>
>   
> @@ -49,6 +51,26 @@ int memcmp_pages(struct page *page1, struct page *page2)
>   	return ret;
>   }
>   
> +static void update_sctlr_el1_tcf0(u64 tcf0)
> +{
> +	/* ISB required for the kernel uaccess routines */
> +	sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
> +	isb();
> +}
> +
> +static void set_sctlr_el1_tcf0(u64 tcf0)
> +{
> +	/*
> +	 * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
> +	 * optimisation. Disable preemption so that it does not see
> +	 * the variable update before the SCTLR_EL1.TCF0 one.
> +	 */
> +	preempt_disable();
> +	current->thread.sctlr_tcf0 = tcf0;
> +	update_sctlr_el1_tcf0(tcf0);
> +	preempt_enable();
> +}
> +
>   void flush_mte_state(void)
>   {
>   	if (!system_supports_mte())
> @@ -58,4 +80,59 @@ void flush_mte_state(void)
>   	dsb(ish);
>   	write_sysreg_s(0, SYS_TFSRE0_EL1);
>   	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> +	/* disable tag checking */
> +	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
> +}
> +
> +void mte_thread_switch(struct task_struct *next)
> +{
> +	if (!system_supports_mte())
> +		return;
> +
> +	/* avoid expensive SCTLR_EL1 accesses if no change */
> +	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)

I think this could be improved by checking whether `next` is a kernel thread, in 
which case thread.sctlr_tcf0 is 0 but there is no point in setting SCTLR_EL1.TCF0, 
since there should not be any access via TTBR0.

Kevin

> +		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
> +}
> +
> +long set_mte_ctrl(unsigned long arg)
> +{
> +	u64 tcf0;
> +
> +	if (!system_supports_mte())
> +		return 0;
> +
> +	switch (arg & PR_MTE_TCF_MASK) {
> +	case PR_MTE_TCF_NONE:
> +		tcf0 = SCTLR_EL1_TCF0_NONE;
> +		break;
> +	case PR_MTE_TCF_SYNC:
> +		tcf0 = SCTLR_EL1_TCF0_SYNC;
> +		break;
> +	case PR_MTE_TCF_ASYNC:
> +		tcf0 = SCTLR_EL1_TCF0_ASYNC;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	set_sctlr_el1_tcf0(tcf0);
> +
> +	return 0;
> +}
> +
> +long get_mte_ctrl(void)
> +{
> +	if (!system_supports_mte())
> +		return 0;
> +
> +	switch (current->thread.sctlr_tcf0) {
> +	case SCTLR_EL1_TCF0_NONE:
> +		return PR_MTE_TCF_NONE;
> +	case SCTLR_EL1_TCF0_SYNC:
> +		return PR_MTE_TCF_SYNC;
> +	case SCTLR_EL1_TCF0_ASYNC:
> +		return PR_MTE_TCF_ASYNC;
> +	}
> +
> +	return 0;
>   }
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 695705d1f8e5..d19ce8053a03 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -544,6 +544,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
>   	 */
>   	dsb(ish);
>   
> +	/*
> +	 * MTE thread switching must happen after the DSB above to ensure that
> +	 * any asynchronous tag check faults have been logged in the TFSR*_EL1
> +	 * registers.
> +	 */
> +	mte_thread_switch(next);
> +
>   	/* the actual thread switch */
>   	last = cpu_switch_to(prev, next);
>   
> @@ -603,9 +610,15 @@ static unsigned int tagged_addr_disabled;
>   
>   long set_tagged_addr_ctrl(unsigned long arg)
>   {
> +	unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
> +
>   	if (is_compat_task())
>   		return -EINVAL;
> -	if (arg & ~PR_TAGGED_ADDR_ENABLE)
> +
> +	if (system_supports_mte())
> +		valid_mask |= PR_MTE_TCF_MASK;
> +
> +	if (arg & ~valid_mask)
>   		return -EINVAL;
>   
>   	/*
> @@ -615,6 +628,9 @@ long set_tagged_addr_ctrl(unsigned long arg)
>   	if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
>   		return -EINVAL;
>   
> +	if (set_mte_ctrl(arg) != 0)
> +		return -EINVAL;
> +
>   	update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
>   
>   	return 0;
> @@ -622,13 +638,17 @@ long set_tagged_addr_ctrl(unsigned long arg)
>   
>   long get_tagged_addr_ctrl(void)
>   {
> +	long ret = 0;
> +
>   	if (is_compat_task())
>   		return -EINVAL;
>   
>   	if (test_thread_flag(TIF_TAGGED_ADDR))
> -		return PR_TAGGED_ADDR_ENABLE;
> +		ret = PR_TAGGED_ADDR_ENABLE;
>   
> -	return 0;
> +	ret |= get_mte_ctrl();
> +
> +	return ret;
>   }
>   
>   /*
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 07b4f8131e36..2390ab324afa 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -233,6 +233,12 @@ struct prctl_mm_map {
>   #define PR_SET_TAGGED_ADDR_CTRL		55
>   #define PR_GET_TAGGED_ADDR_CTRL		56
>   # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
> +/* MTE tag check fault modes */
> +# define PR_MTE_TCF_SHIFT		1
> +# define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
>   
>   /* Control reclaim behavior when allocating memory */
>   #define PR_SET_IO_FLUSHER		57


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl()
  2020-07-20 15:30   ` [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl() Kevin Brodsky
@ 2020-07-20 17:00     ` Dave Martin
  2020-07-22 10:28       ` Catalin Marinas
  2020-07-23 19:33       ` Kevin Brodsky
  2020-07-22 11:09     ` Catalin Marinas
  1 sibling, 2 replies; 23+ messages in thread
From: Dave Martin @ 2020-07-20 17:00 UTC (permalink / raw)
  To: Kevin Brodsky
  Cc: linux-arch, Will Deacon, Szabolcs Nagy, Catalin Marinas,
	linux-mm, Andrey Konovalov, Andrew Morton, Vincenzo Frascino,
	Peter Collingbourne, linux-arm-kernel

On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote:
> On 15/07/2020 18:08, Catalin Marinas wrote:
> >By default, even if PROT_MTE is set on a memory range, there is no tag
> >check fault reporting (SIGSEGV). Introduce a set of option to the
> >exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
> >check fault mode:
> >
> >   PR_MTE_TCF_NONE  - no reporting (default)
> >   PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
> >   PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting
> >
> >These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
> >context-switched by the kernel. Note that uaccess done by the kernel is
> >not checked and cannot be configured by the user.
> >
> >Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> >Cc: Will Deacon <will@kernel.org>
> >---
> >
> >Notes:
> >     v3:
> >     - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency.
> >     - Move mte_thread_switch() in this patch from an earlier one. In
> >       addition, it is called after the dsb() in __switch_to() so that any
> >       asynchronous tag check faults have been registered in the TFSR_EL1
> >       registers (to be added with the in-kernel MTE support.
> >     v2:
> >     - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE.
> >     - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter
> >       Collingbourne).
> >     - Added ISB to update_sctlr_el1_tcf0() since, with the latest
> >       architecture update/fix, the TCF0 field is used by the uaccess
> >       routines.

[...]

> >diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c

[...]

> >+void mte_thread_switch(struct task_struct *next)
> >+{
> >+	if (!system_supports_mte())
> >+		return;
> >+
> >+	/* avoid expensive SCTLR_EL1 accesses if no change */
> >+	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
> 
> I think this could be improved by checking whether `next` is a kernel
> thread, in which case thread.sctlr_tcf0 is 0 but there is no point in
> setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0.

Out of interest, do we have a nice way of testing for a kernel thread
now?

I remember fpsimd_thread_switch() used to check for task->mm, but we
seem to have got rid of that at some point.  set_mm() can defeat this,
and anyway the heavy lifting for FPSIMD is now deferred until returning
to userspace.

Cheers
---Dave

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl()
  2020-07-20 17:00     ` Dave Martin
@ 2020-07-22 10:28       ` Catalin Marinas
  2020-07-23 19:33       ` Kevin Brodsky
  1 sibling, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2020-07-22 10:28 UTC (permalink / raw)
  To: Dave Martin
  Cc: linux-arch, Will Deacon, Szabolcs Nagy, Andrey Konovalov,
	Kevin Brodsky, linux-mm, Andrew Morton, Vincenzo Frascino,
	Peter Collingbourne, linux-arm-kernel

On Mon, Jul 20, 2020 at 06:00:50PM +0100, Dave P Martin wrote:
> On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote:
> > On 15/07/2020 18:08, Catalin Marinas wrote:
> > >+void mte_thread_switch(struct task_struct *next)
> > >+{
> > >+	if (!system_supports_mte())
> > >+		return;
> > >+
> > >+	/* avoid expensive SCTLR_EL1 accesses if no change */
> > >+	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
> > 
> > I think this could be improved by checking whether `next` is a kernel
> > thread, in which case thread.sctlr_tcf0 is 0 but there is no point in
> > setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0.
> 
> Out of interest, do we have a nice way of testing for a kernel thread
> now?
> 
> I remember fpsimd_thread_switch() used to check for task->mm, but we
> seem to have got rid of that at some point.  set_mm() can defeat this,
> and anyway the heavy lifting for FPSIMD is now deferred until returning
> to userspace.

I think a better way is (current->flags & PF_KTHREAD). kthread_use_mm()
indeed changes current->mm.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl()
  2020-07-20 15:30   ` [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl() Kevin Brodsky
  2020-07-20 17:00     ` Dave Martin
@ 2020-07-22 11:09     ` Catalin Marinas
  1 sibling, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2020-07-22 11:09 UTC (permalink / raw)
  To: Kevin Brodsky
  Cc: linux-arch, Szabolcs Nagy, Andrey Konovalov, Peter Collingbourne,
	linux-mm, Andrew Morton, Vincenzo Frascino, Will Deacon,
	Dave P Martin, linux-arm-kernel

On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote:
> On 15/07/2020 18:08, Catalin Marinas wrote:
> > +void mte_thread_switch(struct task_struct *next)
> > +{
> > +	if (!system_supports_mte())
> > +		return;
> > +
> > +	/* avoid expensive SCTLR_EL1 accesses if no change */
> > +	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
> 
> I think this could be improved by checking whether `next` is a kernel
> thread, in which case thread.sctlr_tcf0 is 0 but there is no point in
> setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0.

It's not about kernel or user thread here. kthread_use_mm() (just
use_mm() in older kernels) would set an mm on a kernel thread,
temporarily making it behave as a user one. Since the sctlr_tcf0 is per
thread, not per mm, we need to switch to the default TCF0 for kthreads
so that user accesses (if use_mm() is called) don't generate any tag
check faults. Note that switch_mm() does not touch TCF0.

If we did allow a global, per-mm TCF0 setting, such kthreads could only
handle synchronous faults and no SIGSEGV generated (as we do with
copy_{from,to}_user() for normal threads).

If we want to revisit per-thread vs per-mm TCF0 setting, now is the
time.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl()
  2020-07-20 17:00     ` Dave Martin
  2020-07-22 10:28       ` Catalin Marinas
@ 2020-07-23 19:33       ` Kevin Brodsky
  1 sibling, 0 replies; 23+ messages in thread
From: Kevin Brodsky @ 2020-07-23 19:33 UTC (permalink / raw)
  To: Dave Martin
  Cc: linux-arch, Will Deacon, Szabolcs Nagy, Catalin Marinas,
	linux-mm, Andrey Konovalov, Andrew Morton, Vincenzo Frascino,
	Peter Collingbourne, linux-arm-kernel

On 20/07/2020 18:00, Dave Martin wrote:
> On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote:
>> On 15/07/2020 18:08, Catalin Marinas wrote:
>>> By default, even if PROT_MTE is set on a memory range, there is no tag
>>> check fault reporting (SIGSEGV). Introduce a set of option to the
>>> exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
>>> check fault mode:
>>>
>>>    PR_MTE_TCF_NONE  - no reporting (default)
>>>    PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
>>>    PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting
>>>
>>> These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
>>> context-switched by the kernel. Note that uaccess done by the kernel is
>>> not checked and cannot be configured by the user.
>>>
>>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
>>> Cc: Will Deacon <will@kernel.org>
>>> ---
>>>
>>> Notes:
>>>      v3:
>>>      - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency.
>>>      - Move mte_thread_switch() in this patch from an earlier one. In
>>>        addition, it is called after the dsb() in __switch_to() so that any
>>>        asynchronous tag check faults have been registered in the TFSR_EL1
>>>        registers (to be added with the in-kernel MTE support.
>>>      v2:
>>>      - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE.
>>>      - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter
>>>        Collingbourne).
>>>      - Added ISB to update_sctlr_el1_tcf0() since, with the latest
>>>        architecture update/fix, the TCF0 field is used by the uaccess
>>>        routines.
> [...]
>
>>> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> [...]
>
>>> +void mte_thread_switch(struct task_struct *next)
>>> +{
>>> +	if (!system_supports_mte())
>>> +		return;
>>> +
>>> +	/* avoid expensive SCTLR_EL1 accesses if no change */
>>> +	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
>> I think this could be improved by checking whether `next` is a kernel
>> thread, in which case thread.sctlr_tcf0 is 0 but there is no point in
>> setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0.
> Out of interest, do we have a nice way of testing for a kernel thread
> now?

Isn't it as simple as checking if PF_KTHREAD is set in tsk->flags? At least this is 
what ssbs_thread_switch() does.

Kevin

> I remember fpsimd_thread_switch() used to check for task->mm, but we
> seem to have got rid of that at some point.  set_mm() can defeat this,
> and anyway the heavy lifting for FPSIMD is now deferred until returning
> to userspace.
>
> Cheers
> ---Dave


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
       [not found] ` <20200715170844.30064-30-catalin.marinas@arm.com>
@ 2020-07-27 16:36   ` Szabolcs Nagy
  2020-07-28 11:08     ` Dave Martin
  0 siblings, 1 reply; 23+ messages in thread
From: Szabolcs Nagy @ 2020-07-27 16:36 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arch, Peter Collingbourne, Andrey Konovalov, Kevin Brodsky,
	linux-mm, Andrew Morton, Vincenzo Frascino, Will Deacon,
	Dave P Martin, linux-arm-kernel

The 07/15/2020 18:08, Catalin Marinas wrote:
> From: Vincenzo Frascino <vincenzo.frascino@arm.com>
> 
> Memory Tagging Extension (part of the ARMv8.5 Extensions) provides
> a mechanism to detect the sources of memory related errors which
> may be vulnerable to exploitation, including bounds violations,
> use-after-free, use-after-return, use-out-of-scope and use before
> initialization errors.
> 
> Add Memory Tagging Extension documentation for the arm64 linux
> kernel support.
> 
> Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
> Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Acked-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
> Cc: Will Deacon <will@kernel.org>
> ---
> 
> Notes:
>     v7:
>     - Add information on ptrace() regset access (NT_ARM_TAGGED_ADDR_CTRL).
>     
>     v4:
>     - Document behaviour of madvise(MADV_DONTNEED/MADV_FREE).
>     - Document the initial process state on fork/execve.
>     - Clarify when the kernel uaccess checks the tags.
>     - Minor updates to the example code.
>     - A few other minor clean-ups following review.
>     
>     v3:
>     - Modify the uaccess checking conditions: only when the sync mode is
>       selected by the user. In async mode, the kernel uaccesses are not
>       checked.
>     - Clarify that an include mask of 0 (exclude mask 0xffff) results in
>       always generating tag 0.
>     - Document the ptrace() interface.
>     
>     v2:
>     - Documented the uaccess kernel tag checking mode.
>     - Removed the BTI definitions from cpu-feature-registers.rst.
>     - Removed the paragraph stating that MTE depends on the tagged address
>       ABI (while the Kconfig entry does, there is no requirement for the
>       user to enable both).
>     - Changed the GCR_EL1.Exclude handling description following the change
>       in the prctl() interface (include vs exclude mask).
>     - Updated the example code.
> 
>  Documentation/arm64/cpu-feature-registers.rst |   2 +
>  Documentation/arm64/elf_hwcaps.rst            |   4 +
>  Documentation/arm64/index.rst                 |   1 +
>  .../arm64/memory-tagging-extension.rst        | 305 ++++++++++++++++++
>  4 files changed, 312 insertions(+)
>  create mode 100644 Documentation/arm64/memory-tagging-extension.rst
> 
> diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst
...
> +Tag Check Faults
> +----------------
> +
> +When ``PROT_MTE`` is enabled on an address range and a mismatch between
> +the logical and allocation tags occurs on access, there are three
> +configurable behaviours:
> +
> +- *Ignore* - This is the default mode. The CPU (and kernel) ignores the
> +  tag check fault.
> +
> +- *Synchronous* - The kernel raises a ``SIGSEGV`` synchronously, with
> +  ``.si_code = SEGV_MTESERR`` and ``.si_addr = <fault-address>``. The
> +  memory access is not performed. If ``SIGSEGV`` is ignored or blocked
> +  by the offending thread, the containing process is terminated with a
> +  ``coredump``.
> +
> +- *Asynchronous* - The kernel raises a ``SIGSEGV``, in the offending
> +  thread, asynchronously following one or multiple tag check faults,
> +  with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
> +  address is unknown).
> +
> +The user can select the above modes, per thread, using the
> +``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where
> +``flags`` contain one of the following values in the ``PR_MTE_TCF_MASK``
> +bit-field:
> +
> +- ``PR_MTE_TCF_NONE``  - *Ignore* tag check faults
> +- ``PR_MTE_TCF_SYNC``  - *Synchronous* tag check fault mode
> +- ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode
> +
> +The current tag check fault mode can be read using the
> +``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.

we discussed the need for per process prctl off list, i will
try to summarize the requirement here:

- it cannot be guaranteed in general that a library initializer
or first call into a library happens when the process is still
single threaded.

- user code currently has no way to call prctl in all threads of
a process and even within the c runtime doing so is problematic
(it has to signal all threads, which requires a reserved signal
and dealing with exiting threads and signal masks, such mechanism
can break qemu user and various other userspace tooling).

- we don't yet have defined contract in userspace about how user
code may enable mte (i.e. use the prctl call), but it seems that
there will be use cases for it: LD_PRELOADing malloc for heap
tagging is one such case, but any library or custom allocator
that wants to use mte will have this issue: when it enables mte
it wants to enable it for all threads in the process. (or at
least all threads managed by the c runtime).

- even if user code is not allowed to call the prctl directly,
i.e. the prctl settings are owned by the libc, there will be
cases when the settings have to be changed in a multithreaded
process (e.g. dlopening a library that requires a particular
mte state).

a solution is to introduce a flag like SECCOMP_FILTER_FLAG_TSYNC
that means the prctl is for all threads in the process not just
for the current one. however the exact semantics is not obvious
if there are inconsistent settings in different threads or user
code tries to use the prctl concurrently: first checking then
setting the mte state via separate prctl calls is racy. but if
the userspace contract for enabling mte limits who and when can
call the prctl then i think the simple sync flag approach works.

(the sync flag should apply to all prctl settings: tagged addr
syscall abi, mte check fault mode, irg tag excludes. ideally it
would work for getting the process wide state and it would fail
in case of inconsistent settings.)

we may need to document some memory ordering details when
memory accesses in other threads are affected, but i think
that can be something simple that leaves it unspecified
what happens with memory accesses that are not synchrnized
with the prctl call.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-07-27 16:36   ` [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation Szabolcs Nagy
@ 2020-07-28 11:08     ` Dave Martin
  2020-07-28 14:53       ` Szabolcs Nagy
  0 siblings, 1 reply; 23+ messages in thread
From: Dave Martin @ 2020-07-28 11:08 UTC (permalink / raw)
  To: Szabolcs Nagy
  Cc: linux-arch, Will Deacon, Catalin Marinas, Kevin Brodsky,
	linux-mm, Andrey Konovalov, Andrew Morton, Vincenzo Frascino,
	Peter Collingbourne, linux-arm-kernel

On Mon, Jul 27, 2020 at 05:36:35PM +0100, Szabolcs Nagy wrote:
> The 07/15/2020 18:08, Catalin Marinas wrote:
> > From: Vincenzo Frascino <vincenzo.frascino@arm.com>
> > 
> > Memory Tagging Extension (part of the ARMv8.5 Extensions) provides
> > a mechanism to detect the sources of memory related errors which
> > may be vulnerable to exploitation, including bounds violations,
> > use-after-free, use-after-return, use-out-of-scope and use before
> > initialization errors.
> > 
> > Add Memory Tagging Extension documentation for the arm64 linux
> > kernel support.
> > 
> > Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
> > Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
> > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> > Acked-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
> > Cc: Will Deacon <will@kernel.org>
> > ---
> > 
> > Notes:
> >     v7:
> >     - Add information on ptrace() regset access (NT_ARM_TAGGED_ADDR_CTRL).
> >     
> >     v4:
> >     - Document behaviour of madvise(MADV_DONTNEED/MADV_FREE).
> >     - Document the initial process state on fork/execve.
> >     - Clarify when the kernel uaccess checks the tags.
> >     - Minor updates to the example code.
> >     - A few other minor clean-ups following review.
> >     
> >     v3:
> >     - Modify the uaccess checking conditions: only when the sync mode is
> >       selected by the user. In async mode, the kernel uaccesses are not
> >       checked.
> >     - Clarify that an include mask of 0 (exclude mask 0xffff) results in
> >       always generating tag 0.
> >     - Document the ptrace() interface.
> >     
> >     v2:
> >     - Documented the uaccess kernel tag checking mode.
> >     - Removed the BTI definitions from cpu-feature-registers.rst.
> >     - Removed the paragraph stating that MTE depends on the tagged address
> >       ABI (while the Kconfig entry does, there is no requirement for the
> >       user to enable both).
> >     - Changed the GCR_EL1.Exclude handling description following the change
> >       in the prctl() interface (include vs exclude mask).
> >     - Updated the example code.
> > 
> >  Documentation/arm64/cpu-feature-registers.rst |   2 +
> >  Documentation/arm64/elf_hwcaps.rst            |   4 +
> >  Documentation/arm64/index.rst                 |   1 +
> >  .../arm64/memory-tagging-extension.rst        | 305 ++++++++++++++++++
> >  4 files changed, 312 insertions(+)
> >  create mode 100644 Documentation/arm64/memory-tagging-extension.rst
> > 
> > diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst
> ...
> > +Tag Check Faults
> > +----------------
> > +
> > +When ``PROT_MTE`` is enabled on an address range and a mismatch between
> > +the logical and allocation tags occurs on access, there are three
> > +configurable behaviours:
> > +
> > +- *Ignore* - This is the default mode. The CPU (and kernel) ignores the
> > +  tag check fault.
> > +
> > +- *Synchronous* - The kernel raises a ``SIGSEGV`` synchronously, with
> > +  ``.si_code = SEGV_MTESERR`` and ``.si_addr = <fault-address>``. The
> > +  memory access is not performed. If ``SIGSEGV`` is ignored or blocked
> > +  by the offending thread, the containing process is terminated with a
> > +  ``coredump``.
> > +
> > +- *Asynchronous* - The kernel raises a ``SIGSEGV``, in the offending
> > +  thread, asynchronously following one or multiple tag check faults,
> > +  with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
> > +  address is unknown).
> > +
> > +The user can select the above modes, per thread, using the
> > +``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where
> > +``flags`` contain one of the following values in the ``PR_MTE_TCF_MASK``
> > +bit-field:
> > +
> > +- ``PR_MTE_TCF_NONE``  - *Ignore* tag check faults
> > +- ``PR_MTE_TCF_SYNC``  - *Synchronous* tag check fault mode
> > +- ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode
> > +
> > +The current tag check fault mode can be read using the
> > +``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.
> 
> we discussed the need for per process prctl off list, i will
> try to summarize the requirement here:
> 
> - it cannot be guaranteed in general that a library initializer
> or first call into a library happens when the process is still
> single threaded.
> 
> - user code currently has no way to call prctl in all threads of
> a process and even within the c runtime doing so is problematic
> (it has to signal all threads, which requires a reserved signal
> and dealing with exiting threads and signal masks, such mechanism
> can break qemu user and various other userspace tooling).

When working on the SVE support, I came to the conclusion that this
kind of thing would normally either be done by the runtime itself, or in
close cooperation with the runtime.  However, for SVE it never makes
sense for one thread to asynchronously change the vector length of
another thread -- that's different from the MTE situation.

> - we don't yet have defined contract in userspace about how user
> code may enable mte (i.e. use the prctl call), but it seems that
> there will be use cases for it: LD_PRELOADing malloc for heap
> tagging is one such case, but any library or custom allocator
> that wants to use mte will have this issue: when it enables mte
> it wants to enable it for all threads in the process. (or at
> least all threads managed by the c runtime).

What are the situations where we anticipate a need to twiddle MTE in
multiple threads simultaneously, other than during process startup?

> - even if user code is not allowed to call the prctl directly,
> i.e. the prctl settings are owned by the libc, there will be
> cases when the settings have to be changed in a multithreaded
> process (e.g. dlopening a library that requires a particular
> mte state).

Could be avoided by refusing to dlopen a library that is incompatible
with the current process.

dlopen()ing a library that doesn't support tagged addresses, in a
process that does use tagged addresses, seems undesirable even if tag
checking is currently turned off.


> a solution is to introduce a flag like SECCOMP_FILTER_FLAG_TSYNC
> that means the prctl is for all threads in the process not just
> for the current one. however the exact semantics is not obvious
> if there are inconsistent settings in different threads or user
> code tries to use the prctl concurrently: first checking then
> setting the mte state via separate prctl calls is racy. but if
> the userspace contract for enabling mte limits who and when can
> call the prctl then i think the simple sync flag approach works.
> 
> (the sync flag should apply to all prctl settings: tagged addr
> syscall abi, mte check fault mode, irg tag excludes. ideally it
> would work for getting the process wide state and it would fail
> in case of inconsistent settings.)

If going down this route, perhaps we could have sets of settings:
so for each setting we have a process-wide value and a per-thread
value, with defines rules about how they combine.

Since MTE is a debugging feature, we might be able to be less aggressive
about synchronisation than in the SECCOMP case.

> we may need to document some memory ordering details when
> memory accesses in other threads are affected, but i think
> that can be something simple that leaves it unspecified
> what happens with memory accesses that are not synchrnized
> with the prctl call.

Hmmm...

Cheers
---Dave

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-07-28 11:08     ` Dave Martin
@ 2020-07-28 14:53       ` Szabolcs Nagy
  2020-07-28 19:59         ` Catalin Marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Szabolcs Nagy @ 2020-07-28 14:53 UTC (permalink / raw)
  To: Dave Martin
  Cc: linux-arch, nd, Will Deacon, Catalin Marinas, Kevin Brodsky,
	linux-mm, Andrey Konovalov, Andrew Morton, Vincenzo Frascino,
	Peter Collingbourne, linux-arm-kernel

The 07/28/2020 12:08, Dave Martin wrote:
> On Mon, Jul 27, 2020 at 05:36:35PM +0100, Szabolcs Nagy wrote:
> > The 07/15/2020 18:08, Catalin Marinas wrote:
> > > +The user can select the above modes, per thread, using the
> > > +``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where
> > > +``flags`` contain one of the following values in the ``PR_MTE_TCF_MASK``
> > > +bit-field:
> > > +
> > > +- ``PR_MTE_TCF_NONE``  - *Ignore* tag check faults
> > > +- ``PR_MTE_TCF_SYNC``  - *Synchronous* tag check fault mode
> > > +- ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode
> > > +
> > > +The current tag check fault mode can be read using the
> > > +``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.
> > 
> > we discussed the need for per process prctl off list, i will
> > try to summarize the requirement here:
> > 
> > - it cannot be guaranteed in general that a library initializer
> > or first call into a library happens when the process is still
> > single threaded.
> > 
> > - user code currently has no way to call prctl in all threads of
> > a process and even within the c runtime doing so is problematic
> > (it has to signal all threads, which requires a reserved signal
> > and dealing with exiting threads and signal masks, such mechanism
> > can break qemu user and various other userspace tooling).
> 
> When working on the SVE support, I came to the conclusion that this
> kind of thing would normally either be done by the runtime itself, or in
> close cooperation with the runtime.  However, for SVE it never makes
> sense for one thread to asynchronously change the vector length of
> another thread -- that's different from the MTE situation.

currently there is libc mechanism to do some operation
in all threads (e.g. for set*id) but this is fragile
and not something that can be exposed to user code.
(on the kernel side it should be much simpler to do)

> > - we don't yet have defined contract in userspace about how user
> > code may enable mte (i.e. use the prctl call), but it seems that
> > there will be use cases for it: LD_PRELOADing malloc for heap
> > tagging is one such case, but any library or custom allocator
> > that wants to use mte will have this issue: when it enables mte
> > it wants to enable it for all threads in the process. (or at
> > least all threads managed by the c runtime).
> 
> What are the situations where we anticipate a need to twiddle MTE in
> multiple threads simultaneously, other than during process startup?
> 
> > - even if user code is not allowed to call the prctl directly,
> > i.e. the prctl settings are owned by the libc, there will be
> > cases when the settings have to be changed in a multithreaded
> > process (e.g. dlopening a library that requires a particular
> > mte state).
> 
> Could be avoided by refusing to dlopen a library that is incompatible
> with the current process.
> 
> dlopen()ing a library that doesn't support tagged addresses, in a
> process that does use tagged addresses, seems undesirable even if tag
> checking is currently turned off.

yes but it can go the other way too:

at startup the libc does not enable tag checks
for performance reasons, but at dlopen time a
library is detected to use mte (e.g. stack
tagging or custom allocator).

then libc or the dlopened library has to ensure
that checks are enabled in all threads. (in case
of stack tagging the libc has to mark existing
stacks with PROT_MTE too, there is mechanism for
this in glibc to deal with dlopened libraries
that require executable stack and only reject
the dlopen if this cannot be performed.)

another usecase is that the libc is mte-safe
(it accepts tagged pointers and memory in its
interfaces), but it does not enable mte (this
will be the case with glibc 2.32) and user
libraries have to enable mte to use it (custom
allocator or malloc interposition are examples).

and i think this is necessary if userpsace wants
to turn async tag check into sync tag check at
runtime when a failure is detected.

> > a solution is to introduce a flag like SECCOMP_FILTER_FLAG_TSYNC
> > that means the prctl is for all threads in the process not just
> > for the current one. however the exact semantics is not obvious
> > if there are inconsistent settings in different threads or user
> > code tries to use the prctl concurrently: first checking then
> > setting the mte state via separate prctl calls is racy. but if
> > the userspace contract for enabling mte limits who and when can
> > call the prctl then i think the simple sync flag approach works.
> > 
> > (the sync flag should apply to all prctl settings: tagged addr
> > syscall abi, mte check fault mode, irg tag excludes. ideally it
> > would work for getting the process wide state and it would fail
> > in case of inconsistent settings.)
> 
> If going down this route, perhaps we could have sets of settings:
> so for each setting we have a process-wide value and a per-thread
> value, with defines rules about how they combine.
> 
> Since MTE is a debugging feature, we might be able to be less aggressive
> about synchronisation than in the SECCOMP case.

separate process-wide and per-thread value
works for me and i expect most uses will
be process wide settings.

i don't think mte is less of a security
feature than seccomp.

if linux does not want to add a per process
setting then only libc will be able to opt-in
to mte and only at very early in the startup
process (before executing any user code that
may start threads). this is not out of question,
but i think it limits the usage and deployment
options.

> > we may need to document some memory ordering details when
> > memory accesses in other threads are affected, but i think
> > that can be something simple that leaves it unspecified
> > what happens with memory accesses that are not synchrnized
> > with the prctl call.
> 
> Hmmm...

e.g. it may be enough if the spec only works if
there is no PROT_MTE memory mapped yet, and no
tagged addresses are present in the multi-threaded
process when the prctl is called.


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-07-28 14:53       ` Szabolcs Nagy
@ 2020-07-28 19:59         ` Catalin Marinas
  2020-08-03 12:43           ` Szabolcs Nagy
  0 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2020-07-28 19:59 UTC (permalink / raw)
  To: Szabolcs Nagy
  Cc: linux-arch, nd, Will Deacon, Andrey Konovalov, Kevin Brodsky,
	linux-mm, Andrew Morton, Vincenzo Frascino, Peter Collingbourne,
	Dave Martin, linux-arm-kernel

On Tue, Jul 28, 2020 at 03:53:51PM +0100, Szabolcs Nagy wrote:
> The 07/28/2020 12:08, Dave Martin wrote:
> > On Mon, Jul 27, 2020 at 05:36:35PM +0100, Szabolcs Nagy wrote:
> > > a solution is to introduce a flag like SECCOMP_FILTER_FLAG_TSYNC
> > > that means the prctl is for all threads in the process not just
> > > for the current one. however the exact semantics is not obvious
> > > if there are inconsistent settings in different threads or user
> > > code tries to use the prctl concurrently: first checking then
> > > setting the mte state via separate prctl calls is racy. but if
> > > the userspace contract for enabling mte limits who and when can
> > > call the prctl then i think the simple sync flag approach works.
> > > 
> > > (the sync flag should apply to all prctl settings: tagged addr
> > > syscall abi, mte check fault mode, irg tag excludes. ideally it
> > > would work for getting the process wide state and it would fail
> > > in case of inconsistent settings.)
> > 
> > If going down this route, perhaps we could have sets of settings:
> > so for each setting we have a process-wide value and a per-thread
> > value, with defines rules about how they combine.
> > 
> > Since MTE is a debugging feature, we might be able to be less aggressive
> > about synchronisation than in the SECCOMP case.
> 
> separate process-wide and per-thread value
> works for me and i expect most uses will
> be process wide settings.

The problem with the thread synchronisation is, unlike SECCOMP, that we
need to update the SCTLR_EL1.TCF0 field across all the CPUs that may run
threads of the current process. I haven't convinced myself that this is
race-free without heavy locking. If we go for some heavy mechanism like
stop_machine(), that opens the kernel to DoS attacks from user. Still
investigating if something like membarrier() would be sufficient.

SECCOMP gets away with this as it only needs to set some variable
without IPI'ing the other CPUs.

> i don't think mte is less of a security
> feature than seccomp.

Well, MTE is probabilistic, SECCOMP seems to be more precise ;).

> if linux does not want to add a per process
> setting then only libc will be able to opt-in
> to mte and only at very early in the startup
> process (before executing any user code that
> may start threads). this is not out of question,
> but i think it limits the usage and deployment
> options.

There is also the risk that we try to be too flexible at this stage
without a real use-case.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-07-28 19:59         ` Catalin Marinas
@ 2020-08-03 12:43           ` Szabolcs Nagy
  2020-08-07 15:19             ` Catalin Marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Szabolcs Nagy @ 2020-08-03 12:43 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arch, nd, Will Deacon, Andrey Konovalov, Kevin Brodsky,
	linux-mm, Andrew Morton, Vincenzo Frascino, Peter Collingbourne,
	Dave Martin, linux-arm-kernel

The 07/28/2020 20:59, Catalin Marinas wrote:
> On Tue, Jul 28, 2020 at 03:53:51PM +0100, Szabolcs Nagy wrote:
> > if linux does not want to add a per process
> > setting then only libc will be able to opt-in
> > to mte and only at very early in the startup
> > process (before executing any user code that
> > may start threads). this is not out of question,
> > but i think it limits the usage and deployment
> > options.
> 
> There is also the risk that we try to be too flexible at this stage
> without a real use-case.

i don't know how mte will be turned on in libc.

if we can always turn sync tag checks on early
whenever mte is available then i think there is
no issue.

but if we have to make the decision later for
compatibility or performance reasons then per
thread setting is problematic.

use of the prctl outside of libc is very limited
if it's per thread only:

- application code may use it in a (elf specific)
  pre-initialization function, but that's a bit
  obscure (not exposed in c) and it is reasonable
  for an application to enable mte checks after
  it registered a signal handler for mte faults.
  (and at that point it may be multi-threaded).

- library code normally initializes per thread
  state on the first call into the library from a
  given thread, but with mte, as soon as memory /
  pointers are tagged in one thread, all threads
  are affected: not performing checks in other
  threads is less secure (may be ok) and it means
  incompatible syscall abi (not ok). so at least
  PR_TAGGED_ADDR_ENABLE should have process wide
  setting for this usage.

but i guess it is fine to design the mechanism
for these in a later linux version, until then
such usage will be unreliable (will depend on
how early threads are created).

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl()
       [not found] ` <20200715170844.30064-19-catalin.marinas@arm.com>
  2020-07-20 15:30   ` [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl() Kevin Brodsky
@ 2020-08-04 19:34   ` Kevin Brodsky
  2020-08-05  9:24     ` Catalin Marinas
  1 sibling, 1 reply; 23+ messages in thread
From: Kevin Brodsky @ 2020-08-04 19:34 UTC (permalink / raw)
  To: Catalin Marinas, linux-arm-kernel
  Cc: linux-arch, Szabolcs Nagy, Andrey Konovalov, Peter Collingbourne,
	linux-mm, Andrew Morton, Vincenzo Frascino, Will Deacon,
	Dave P Martin

On 15/07/2020 18:08, Catalin Marinas wrote:
> By default, even if PROT_MTE is set on a memory range, there is no tag
> check fault reporting (SIGSEGV). Introduce a set of option to the
> exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
> check fault mode:
>
>    PR_MTE_TCF_NONE  - no reporting (default)
>    PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
>    PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting
>
> These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
> context-switched by the kernel. Note that uaccess done by the kernel is
> not checked and cannot be configured by the user.

The last sentence is outdated, it should probably say that uaccess is only checked in 
in synchronous mode.

Kevin

> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> ---
>
> Notes:
>      v3:
>      - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency.
>      - Move mte_thread_switch() in this patch from an earlier one. In
>        addition, it is called after the dsb() in __switch_to() so that any
>        asynchronous tag check faults have been registered in the TFSR_EL1
>        registers (to be added with the in-kernel MTE support.
>      
>      v2:
>      - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE.
>      - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter
>        Collingbourne).
>      - Added ISB to update_sctlr_el1_tcf0() since, with the latest
>        architecture update/fix, the TCF0 field is used by the uaccess
>        routines.
>
>   arch/arm64/include/asm/mte.h       | 14 ++++++
>   arch/arm64/include/asm/processor.h |  3 ++
>   arch/arm64/kernel/mte.c            | 77 ++++++++++++++++++++++++++++++
>   arch/arm64/kernel/process.c        | 26 ++++++++--
>   include/uapi/linux/prctl.h         |  6 +++
>   5 files changed, 123 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index b2577eee62c2..df2efbc9f8f1 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -21,6 +21,9 @@ void mte_clear_page_tags(void *addr);
>   void mte_sync_tags(pte_t *ptep, pte_t pte);
>   void mte_copy_page_tags(void *kto, const void *kfrom);
>   void flush_mte_state(void);
> +void mte_thread_switch(struct task_struct *next);
> +long set_mte_ctrl(unsigned long arg);
> +long get_mte_ctrl(void);
>   
>   #else
>   
> @@ -36,6 +39,17 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom)
>   static inline void flush_mte_state(void)
>   {
>   }
> +static inline void mte_thread_switch(struct task_struct *next)
> +{
> +}
> +static inline long set_mte_ctrl(unsigned long arg)
> +{
> +	return 0;
> +}
> +static inline long get_mte_ctrl(void)
> +{
> +	return 0;
> +}
>   
>   #endif
>   
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 240fe5e5b720..80e7f0573309 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -151,6 +151,9 @@ struct thread_struct {
>   	struct ptrauth_keys_user	keys_user;
>   	struct ptrauth_keys_kernel	keys_kernel;
>   #endif
> +#ifdef CONFIG_ARM64_MTE
> +	u64			sctlr_tcf0;
> +#endif
>   };
>   
>   static inline void arch_thread_struct_whitelist(unsigned long *offset,
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 5f54fd140610..375483a1f573 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -5,6 +5,8 @@
>   
>   #include <linux/bitops.h>
>   #include <linux/mm.h>
> +#include <linux/prctl.h>
> +#include <linux/sched.h>
>   #include <linux/string.h>
>   #include <linux/thread_info.h>
>   
> @@ -49,6 +51,26 @@ int memcmp_pages(struct page *page1, struct page *page2)
>   	return ret;
>   }
>   
> +static void update_sctlr_el1_tcf0(u64 tcf0)
> +{
> +	/* ISB required for the kernel uaccess routines */
> +	sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
> +	isb();
> +}
> +
> +static void set_sctlr_el1_tcf0(u64 tcf0)
> +{
> +	/*
> +	 * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
> +	 * optimisation. Disable preemption so that it does not see
> +	 * the variable update before the SCTLR_EL1.TCF0 one.
> +	 */
> +	preempt_disable();
> +	current->thread.sctlr_tcf0 = tcf0;
> +	update_sctlr_el1_tcf0(tcf0);
> +	preempt_enable();
> +}
> +
>   void flush_mte_state(void)
>   {
>   	if (!system_supports_mte())
> @@ -58,4 +80,59 @@ void flush_mte_state(void)
>   	dsb(ish);
>   	write_sysreg_s(0, SYS_TFSRE0_EL1);
>   	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> +	/* disable tag checking */
> +	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
> +}
> +
> +void mte_thread_switch(struct task_struct *next)
> +{
> +	if (!system_supports_mte())
> +		return;
> +
> +	/* avoid expensive SCTLR_EL1 accesses if no change */
> +	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
> +		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
> +}
> +
> +long set_mte_ctrl(unsigned long arg)
> +{
> +	u64 tcf0;
> +
> +	if (!system_supports_mte())
> +		return 0;
> +
> +	switch (arg & PR_MTE_TCF_MASK) {
> +	case PR_MTE_TCF_NONE:
> +		tcf0 = SCTLR_EL1_TCF0_NONE;
> +		break;
> +	case PR_MTE_TCF_SYNC:
> +		tcf0 = SCTLR_EL1_TCF0_SYNC;
> +		break;
> +	case PR_MTE_TCF_ASYNC:
> +		tcf0 = SCTLR_EL1_TCF0_ASYNC;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	set_sctlr_el1_tcf0(tcf0);
> +
> +	return 0;
> +}
> +
> +long get_mte_ctrl(void)
> +{
> +	if (!system_supports_mte())
> +		return 0;
> +
> +	switch (current->thread.sctlr_tcf0) {
> +	case SCTLR_EL1_TCF0_NONE:
> +		return PR_MTE_TCF_NONE;
> +	case SCTLR_EL1_TCF0_SYNC:
> +		return PR_MTE_TCF_SYNC;
> +	case SCTLR_EL1_TCF0_ASYNC:
> +		return PR_MTE_TCF_ASYNC;
> +	}
> +
> +	return 0;
>   }
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 695705d1f8e5..d19ce8053a03 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -544,6 +544,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
>   	 */
>   	dsb(ish);
>   
> +	/*
> +	 * MTE thread switching must happen after the DSB above to ensure that
> +	 * any asynchronous tag check faults have been logged in the TFSR*_EL1
> +	 * registers.
> +	 */
> +	mte_thread_switch(next);
> +
>   	/* the actual thread switch */
>   	last = cpu_switch_to(prev, next);
>   
> @@ -603,9 +610,15 @@ static unsigned int tagged_addr_disabled;
>   
>   long set_tagged_addr_ctrl(unsigned long arg)
>   {
> +	unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
> +
>   	if (is_compat_task())
>   		return -EINVAL;
> -	if (arg & ~PR_TAGGED_ADDR_ENABLE)
> +
> +	if (system_supports_mte())
> +		valid_mask |= PR_MTE_TCF_MASK;
> +
> +	if (arg & ~valid_mask)
>   		return -EINVAL;
>   
>   	/*
> @@ -615,6 +628,9 @@ long set_tagged_addr_ctrl(unsigned long arg)
>   	if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
>   		return -EINVAL;
>   
> +	if (set_mte_ctrl(arg) != 0)
> +		return -EINVAL;
> +
>   	update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
>   
>   	return 0;
> @@ -622,13 +638,17 @@ long set_tagged_addr_ctrl(unsigned long arg)
>   
>   long get_tagged_addr_ctrl(void)
>   {
> +	long ret = 0;
> +
>   	if (is_compat_task())
>   		return -EINVAL;
>   
>   	if (test_thread_flag(TIF_TAGGED_ADDR))
> -		return PR_TAGGED_ADDR_ENABLE;
> +		ret = PR_TAGGED_ADDR_ENABLE;
>   
> -	return 0;
> +	ret |= get_mte_ctrl();
> +
> +	return ret;
>   }
>   
>   /*
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 07b4f8131e36..2390ab324afa 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -233,6 +233,12 @@ struct prctl_mm_map {
>   #define PR_SET_TAGGED_ADDR_CTRL		55
>   #define PR_GET_TAGGED_ADDR_CTRL		56
>   # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
> +/* MTE tag check fault modes */
> +# define PR_MTE_TCF_SHIFT		1
> +# define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
>   
>   /* Control reclaim behavior when allocating memory */
>   #define PR_SET_IO_FLUSHER		57


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl()
  2020-08-04 19:34   ` Kevin Brodsky
@ 2020-08-05  9:24     ` Catalin Marinas
  0 siblings, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2020-08-05  9:24 UTC (permalink / raw)
  To: Kevin Brodsky
  Cc: linux-arch, Szabolcs Nagy, Andrey Konovalov, Peter Collingbourne,
	linux-mm, Andrew Morton, Vincenzo Frascino, Will Deacon,
	Dave P Martin, linux-arm-kernel

On Tue, Aug 04, 2020 at 08:34:42PM +0100, Kevin Brodsky wrote:
> On 15/07/2020 18:08, Catalin Marinas wrote:
> > By default, even if PROT_MTE is set on a memory range, there is no tag
> > check fault reporting (SIGSEGV). Introduce a set of option to the
> > exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
> > check fault mode:
> > 
> >    PR_MTE_TCF_NONE  - no reporting (default)
> >    PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
> >    PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting
> > 
> > These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
> > context-switched by the kernel. Note that uaccess done by the kernel is
> > not checked and cannot be configured by the user.
> 
> The last sentence is outdated, it should probably say that uaccess is only
> checked in in synchronous mode.

Thanks, I forgot about the commit log. The documentation was updated to:

**Note**: Kernel accesses to the user address space (e.g. ``read()``
system call) are not checked if the user thread tag checking mode is
``PR_MTE_TCF_NONE`` or ``PR_MTE_TCF_ASYNC``. If the tag checking mode is
``PR_MTE_TCF_SYNC``, the kernel makes a best effort to check its user
address accesses, however it cannot always guarantee it.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-08-03 12:43           ` Szabolcs Nagy
@ 2020-08-07 15:19             ` Catalin Marinas
  2020-08-10 14:13               ` Szabolcs Nagy
  0 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2020-08-07 15:19 UTC (permalink / raw)
  To: Szabolcs Nagy
  Cc: linux-arch, nd, Will Deacon, Andrey Konovalov, Kevin Brodsky,
	linux-mm, Andrew Morton, Vincenzo Frascino, Peter Collingbourne,
	Dave Martin, linux-arm-kernel

Hi Szabolcs,

On Mon, Aug 03, 2020 at 01:43:10PM +0100, Szabolcs Nagy wrote:
> The 07/28/2020 20:59, Catalin Marinas wrote:
> > On Tue, Jul 28, 2020 at 03:53:51PM +0100, Szabolcs Nagy wrote:
> > > if linux does not want to add a per process setting then only libc
> > > will be able to opt-in to mte and only at very early in the
> > > startup process (before executing any user code that may start
> > > threads). this is not out of question, but i think it limits the
> > > usage and deployment options.
> > 
> > There is also the risk that we try to be too flexible at this stage
> > without a real use-case.
> 
> i don't know how mte will be turned on in libc.
> 
> if we can always turn sync tag checks on early whenever mte is
> available then i think there is no issue.
> 
> but if we have to make the decision later for compatibility or
> performance reasons then per thread setting is problematic.

At least for libc, I'm not sure how you could even turn MTE on at
run-time. The heap allocations would have to be mapped with PROT_MTE as
we can't easily change them (well, you could mprotect(), assuming the
user doesn't use tagged pointers on them).

There is a case to switch tag checking from asynchronous to synchronous
at run-time based on a signal but that's rather specific to Android
where zygote controls the signal handler. I don't think you can do this
with libc. Even on Android, since the async fault signal is delivered
per thread, it probably does this lazily (alternatively, it could issue
a SIGUSRx to the other threads for synchronisation).

> use of the prctl outside of libc is very limited if it's per thread
> only:

In the non-Android context, I think the prctl() for MTE control should
be restricted to the libc. You can control the mode prior to the process
being started using environment variables. I really don't see how the
libc could handle the changing of the MTE behaviour at run-time without
itself handling signals.

> - application code may use it in a (elf specific) pre-initialization
>   function, but that's a bit obscure (not exposed in c) and it is
>   reasonable for an application to enable mte checks after it
>   registered a signal handler for mte faults. (and at that point it
>   may be multi-threaded).

Since the app can install signal handlers, it can also deal with
notifying other threads with a SIGUSRx, assuming that it decided this
after multiple threads were created. If it does this while
single-threaded, subsequent threads would inherit the first one.

The only use-case I see for doing this in the kernel is if the code
requiring an MTE behaviour change cannot install signal handlers. More
on this below.

> - library code normally initializes per thread state on the first call
>   into the library from a given thread, but with mte, as soon as
>   memory / pointers are tagged in one thread, all threads are
>   affected: not performing checks in other threads is less secure (may
>   be ok) and it means incompatible syscall abi (not ok). so at least
>   PR_TAGGED_ADDR_ENABLE should have process wide setting for this
>   usage.

My assumption with MTE is that the libc will initialise it when the
library is loaded (something __attribute__((constructor))) and it's
still in single-threaded mode. Does it wait until the first malloc()
call? Also, is there such thing as a per-thread initialiser for a
dynamic library (not sure it can be implemented in practice though)?

The PR_TAGGED_ADDR_ENABLE synchronisation at least doesn't require IPIs
to other CPUs to change the hardware state. However, it can still race
with thread creation or a prctl() on another thread, not sure what we
can define here, especially as it depends on the kernel internals: e.g.
thread creation copies some data structures of the calling thread but at
the same time another thread wants to change such structures for all
threads of that process. The ordering of events here looks pretty
fragile.

Maybe with another global status (per process) which takes priority over
the per thread one would be easier. But such priority is not temporal
(i.e. whoever called prctl() last) but pretty strict: once a global
control was requested, it will remain global no matter what subsequent
threads request (or we can do it the other way around).

> but i guess it is fine to design the mechanism for these in a later
> linux version, until then such usage will be unreliable (will depend
> on how early threads are created).

Until we have a real use-case, I'd not complicate the matters further.
For example, I'm still not sure how realistic it is for an application
to load a new heap allocator after some threads were created. Even the
glibc support, I don't think it needs this.

Could an LD_PRELOADED library be initialised after threads were created
(I guess it could if another preloaded library created threads)? Even if
it does, do we have an example or it's rather theoretical.

If this becomes an essential use-case, we can look at adding a new flag
for prctl() which would set the option globally, with the caveats
mentioned above. It doesn't need to be in the initial ABI (and the
PR_TAGGED_ADDR_ENABLE is already upstream).

Thanks.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-08-07 15:19             ` Catalin Marinas
@ 2020-08-10 14:13               ` Szabolcs Nagy
  2020-08-11 17:20                 ` Catalin Marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Szabolcs Nagy @ 2020-08-10 14:13 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arch, nd, Will Deacon, Andrey Konovalov, Kevin Brodsky,
	linux-mm, Andrew Morton, Vincenzo Frascino, Peter Collingbourne,
	Dave Martin, linux-arm-kernel

The 08/07/2020 16:19, Catalin Marinas wrote:
> On Mon, Aug 03, 2020 at 01:43:10PM +0100, Szabolcs Nagy wrote:
> > if we can always turn sync tag checks on early whenever mte is
> > available then i think there is no issue.
> > 
> > but if we have to make the decision later for compatibility or
> > performance reasons then per thread setting is problematic.
> 
> At least for libc, I'm not sure how you could even turn MTE on at
> run-time. The heap allocations would have to be mapped with PROT_MTE as
> we can't easily change them (well, you could mprotect(), assuming the
> user doesn't use tagged pointers on them).

e.g. dlopen of library with stack tagging.
(libc can mark stacks with PROT_MTE at that time)

or just turn on sync tag checks later when using
heap tagging.

> 
> There is a case to switch tag checking from asynchronous to synchronous
> at run-time based on a signal but that's rather specific to Android
> where zygote controls the signal handler. I don't think you can do this
> with libc. Even on Android, since the async fault signal is delivered
> per thread, it probably does this lazily (alternatively, it could issue
> a SIGUSRx to the other threads for synchronisation).

i think what that zygote is doing is a valid use-case but
in a normal linux setup the application owns the signal
handlers so the tag check switch has to be done by the
application. the libc can expose some api for it, so in
principle it's enough if the libc can do the runtime
switch, but we dont plan to add new libc apis for mte.

> > use of the prctl outside of libc is very limited if it's per thread
> > only:
> 
> In the non-Android context, I think the prctl() for MTE control should
> be restricted to the libc. You can control the mode prior to the process
> being started using environment variables. I really don't see how the
> libc could handle the changing of the MTE behaviour at run-time without
> itself handling signals.
> 
> > - application code may use it in a (elf specific) pre-initialization
> >   function, but that's a bit obscure (not exposed in c) and it is
> >   reasonable for an application to enable mte checks after it
> >   registered a signal handler for mte faults. (and at that point it
> >   may be multi-threaded).
> 
> Since the app can install signal handlers, it can also deal with
> notifying other threads with a SIGUSRx, assuming that it decided this
> after multiple threads were created. If it does this while
> single-threaded, subsequent threads would inherit the first one.

the application does not know what libraries create what
threads in the background, i dont think there is a way to
send signals to each thread (e.g. /proc/self/task cannot
be read atomically with respect to thread creation/exit).

the libc controls thread creation and exit so it can have
a list of threads it can notify, but an application cannot
do this. (libc could provide an api so applications can
do some per thread operation, but a libc would not do this
happily: currently there are locks around thread creation
and exit that are only needed for this "signal all threads"
mechanism which makes it hard to expose to users)

one way applications sometimes work this around is to
self re-exec. but that's a big hammer and not entirely
reliable (e.g. the exe may not be available on the
filesystem any more or the commandline args may need
to be preserved but they are clobbered, or some complex
application state needs to be recreated etc)

> 
> The only use-case I see for doing this in the kernel is if the code
> requiring an MTE behaviour change cannot install signal handlers. More
> on this below.
> 
> > - library code normally initializes per thread state on the first call
> >   into the library from a given thread, but with mte, as soon as
> >   memory / pointers are tagged in one thread, all threads are
> >   affected: not performing checks in other threads is less secure (may
> >   be ok) and it means incompatible syscall abi (not ok). so at least
> >   PR_TAGGED_ADDR_ENABLE should have process wide setting for this
> >   usage.
> 
> My assumption with MTE is that the libc will initialise it when the
> library is loaded (something __attribute__((constructor))) and it's
> still in single-threaded mode. Does it wait until the first malloc()
> call? Also, is there such thing as a per-thread initialiser for a
> dynamic library (not sure it can be implemented in practice though)?

there is no per thread initializer in an elf module.
(tls state is usually initialized lazily in threads
when necessary.)

malloc calls can happen before the ctors of an LD_PRELOAD
library and threads can be created before both.
glibc runs ldpreload ctors after other library ctors.

custom allocator can be of course dlopened.
(i'd expect several language runtimes to have their
own allocator and support dlopening the runtime)

> 
> The PR_TAGGED_ADDR_ENABLE synchronisation at least doesn't require IPIs
> to other CPUs to change the hardware state. However, it can still race
> with thread creation or a prctl() on another thread, not sure what we
> can define here, especially as it depends on the kernel internals: e.g.
> thread creation copies some data structures of the calling thread but at
> the same time another thread wants to change such structures for all
> threads of that process. The ordering of events here looks pretty
> fragile.
> 
> Maybe with another global status (per process) which takes priority over
> the per thread one would be easier. But such priority is not temporal
> (i.e. whoever called prctl() last) but pretty strict: once a global
> control was requested, it will remain global no matter what subsequent
> threads request (or we can do it the other way around).

i see.

> > but i guess it is fine to design the mechanism for these in a later
> > linux version, until then such usage will be unreliable (will depend
> > on how early threads are created).
> 
> Until we have a real use-case, I'd not complicate the matters further.
> For example, I'm still not sure how realistic it is for an application
> to load a new heap allocator after some threads were created. Even the
> glibc support, I don't think it needs this.
> 
> Could an LD_PRELOADED library be initialised after threads were created
> (I guess it could if another preloaded library created threads)? Even if
> it does, do we have an example or it's rather theoretical.

i believe this happens e.g. in applications built
with tsan. (the thread sanitizer creates a
background thread early which i think does not
call malloc itself but may want to access
malloced memory, but i don't have a setup with
tsan support to test this)

> 
> If this becomes an essential use-case, we can look at adding a new flag
> for prctl() which would set the option globally, with the caveats
> mentioned above. It doesn't need to be in the initial ABI (and the
> PR_TAGGED_ADDR_ENABLE is already upstream).
> 
> Thanks.
> 
> -- 
> Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-08-10 14:13               ` Szabolcs Nagy
@ 2020-08-11 17:20                 ` Catalin Marinas
  2020-08-12 12:45                   ` Szabolcs Nagy
  0 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2020-08-11 17:20 UTC (permalink / raw)
  To: Szabolcs Nagy
  Cc: linux-arch, nd, Will Deacon, Andrey Konovalov, Kevin Brodsky,
	linux-mm, Andrew Morton, Vincenzo Frascino, Peter Collingbourne,
	Dave Martin, linux-arm-kernel

On Mon, Aug 10, 2020 at 03:13:09PM +0100, Szabolcs Nagy wrote:
> The 08/07/2020 16:19, Catalin Marinas wrote:
> > On Mon, Aug 03, 2020 at 01:43:10PM +0100, Szabolcs Nagy wrote:
> > > if we can always turn sync tag checks on early whenever mte is
> > > available then i think there is no issue.
> > > 
> > > but if we have to make the decision later for compatibility or
> > > performance reasons then per thread setting is problematic.
> > 
> > At least for libc, I'm not sure how you could even turn MTE on at
> > run-time. The heap allocations would have to be mapped with PROT_MTE as
> > we can't easily change them (well, you could mprotect(), assuming the
> > user doesn't use tagged pointers on them).
> 
> e.g. dlopen of library with stack tagging. (libc can mark stacks with
> PROT_MTE at that time)

If we allow such mixed object support with stack tagging enabled at
dlopen, PROT_MTE would need to be turned on for each thread stack. This
wouldn't require synchronisation, only knowing where the thread stacks
are, but you'd need to make sure threads don't call into the new library
until the stacks have been mprotect'ed. Doing this midway through a
function execution may corrupt the tags.

So I'm not sure how safe any of this is without explicit user
synchronisation (i.e. don't call into the library until all threads have
been updated). Even changing options like GCR_EL1.Excl across multiple
threads may have unwanted effects. See this comment from Peter, the
difference being that instead of an explicit prctl() call on the current
stack, another thread would do it:

https://lore.kernel.org/linux-arch/CAMn1gO5rhOG1W+nVe103v=smvARcFFp_Ct9XqH2Ca4BUMfpDdg@mail.gmail.com/

> or just turn on sync tag checks later when using heap tagging.

I wonder whether setting the synchronous tag check mode by default would
improve this aspect. This would not have any effect until PROT_MTE is
used. If software wants some better performance they can explicitly opt
in to asynchronous mode or disable tag checking after some SIGSEGV +
reporting (this shouldn't exclude the environment variables you
currently use for controlling the tag check mode).

Also, if there are saner defaults for the user GCR_EL1.Excl (currently
all masked), we should decide them now.

If stack tagging will come with some ELF information, we could make the
default tag checking and GCR_EL1.Excl choices based on that, otherwise
maybe we should revisit the default configuration the kernel sets for
the user in the absence of any other information.

> > There is a case to switch tag checking from asynchronous to synchronous
> > at run-time based on a signal but that's rather specific to Android
> > where zygote controls the signal handler. I don't think you can do this
> > with libc. Even on Android, since the async fault signal is delivered
> > per thread, it probably does this lazily (alternatively, it could issue
> > a SIGUSRx to the other threads for synchronisation).
> 
> i think what that zygote is doing is a valid use-case but
> in a normal linux setup the application owns the signal
> handlers so the tag check switch has to be done by the
> application. the libc can expose some api for it, so in
> principle it's enough if the libc can do the runtime
> switch, but we dont plan to add new libc apis for mte.

Due to the synchronisation aspect especially regarding the stack
tagging, I'm not sure the kernel alone can safely do this.

Changing the tagged address syscall ABI across multiple threads should
be safer (well, at least the relaxing part). But if we don't solve the
other aspects I mentioned above, I don't think there is much point in
only doing it for this.

> > > - library code normally initializes per thread state on the first call
> > >   into the library from a given thread, but with mte, as soon as
> > >   memory / pointers are tagged in one thread, all threads are
> > >   affected: not performing checks in other threads is less secure (may
> > >   be ok) and it means incompatible syscall abi (not ok). so at least
> > >   PR_TAGGED_ADDR_ENABLE should have process wide setting for this
> > >   usage.
> > 
> > My assumption with MTE is that the libc will initialise it when the
> > library is loaded (something __attribute__((constructor))) and it's
> > still in single-threaded mode. Does it wait until the first malloc()
> > call? Also, is there such thing as a per-thread initialiser for a
> > dynamic library (not sure it can be implemented in practice though)?
> 
> there is no per thread initializer in an elf module.
> (tls state is usually initialized lazily in threads
> when necessary.)
> 
> malloc calls can happen before the ctors of an LD_PRELOAD
> library and threads can be created before both.
> glibc runs ldpreload ctors after other library ctors.

In the presence of stack tagging, I think any subsequent MTE config
change across all threads is unsafe, irrespective of whether it's done
by the kernel or user via SIGUSRx. I think the best we can do here is
start with more appropriate defaults or enable them based on an ELF note
before the application is started. The dynamic loader would not have to
do anything extra here.

If we ignore stack tagging, the global configuration change may be
achievable. I think for the MTE bits, this could be done lazily by the
libc (e.g. on malloc()/free() call). The tag checking won't happen
before such calls unless we change the kernel defaults. There is still
the tagged address ABI enabling, could this be done lazily on syscall by
the libc? If not, the kernel could synchronise (force) this on syscall
entry from each thread based on some global prctl() bit.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-08-11 17:20                 ` Catalin Marinas
@ 2020-08-12 12:45                   ` Szabolcs Nagy
  2020-08-19  9:54                     ` Catalin Marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Szabolcs Nagy @ 2020-08-12 12:45 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arch, nd, Will Deacon, Andrey Konovalov, Kevin Brodsky,
	linux-mm, Andrew Morton, Vincenzo Frascino, Peter Collingbourne,
	Dave Martin, linux-arm-kernel

The 08/11/2020 18:20, Catalin Marinas wrote:
> On Mon, Aug 10, 2020 at 03:13:09PM +0100, Szabolcs Nagy wrote:
> > The 08/07/2020 16:19, Catalin Marinas wrote:
> > > On Mon, Aug 03, 2020 at 01:43:10PM +0100, Szabolcs Nagy wrote:
> > > > if we can always turn sync tag checks on early whenever mte is
> > > > available then i think there is no issue.
> > > > 
> > > > but if we have to make the decision later for compatibility or
> > > > performance reasons then per thread setting is problematic.
> > > 
> > > At least for libc, I'm not sure how you could even turn MTE on at
> > > run-time. The heap allocations would have to be mapped with PROT_MTE as
> > > we can't easily change them (well, you could mprotect(), assuming the
> > > user doesn't use tagged pointers on them).
> > 
> > e.g. dlopen of library with stack tagging. (libc can mark stacks with
> > PROT_MTE at that time)
> 
> If we allow such mixed object support with stack tagging enabled at
> dlopen, PROT_MTE would need to be turned on for each thread stack. This
> wouldn't require synchronisation, only knowing where the thread stacks
> are, but you'd need to make sure threads don't call into the new library
> until the stacks have been mprotect'ed. Doing this midway through a
> function execution may corrupt the tags.
> 
> So I'm not sure how safe any of this is without explicit user
> synchronisation (i.e. don't call into the library until all threads have
> been updated). Even changing options like GCR_EL1.Excl across multiple
> threads may have unwanted effects. See this comment from Peter, the
> difference being that instead of an explicit prctl() call on the current
> stack, another thread would do it:
> 
> https://lore.kernel.org/linux-arch/CAMn1gO5rhOG1W+nVe103v=smvARcFFp_Ct9XqH2Ca4BUMfpDdg@mail.gmail.com/

there is no midway problem: the libc (ld.so) would do
the PROT_MTE at dlopen time based on some elf marking
(which can be handled before relocation processing,
so before library code can run, the midway problem
happens when a library, e.g libc, wants to turn on
stack tagging on itself).

the libc already does this when a library is loaded
that requires executable stack (it marks stacks as
PROT_EXEC at dlopen time or fails the dlopen if that
is not possible, this does not require running code
in other threads, only synchronization with thread
creation and exit. but changing the check mode for
mte needs per thread code execution.).

i'm not entirely sure if this is a good idea, but i
expect stack tagging not to be used in the libc
(because libc needs to run on all hw and we don't yet
have a backward compatible stack tagging solution),
so stack tagging should work when only some elf modules
in a process are built with it, which implies that
enabling it at dlopen time should work otherwise
it will not be very useful.

> > or just turn on sync tag checks later when using heap tagging.
> 
> I wonder whether setting the synchronous tag check mode by default would
> improve this aspect. This would not have any effect until PROT_MTE is
> used. If software wants some better performance they can explicitly opt
> in to asynchronous mode or disable tag checking after some SIGSEGV +
> reporting (this shouldn't exclude the environment variables you
> currently use for controlling the tag check mode).
> 
> Also, if there are saner defaults for the user GCR_EL1.Excl (currently
> all masked), we should decide them now.
> 
> If stack tagging will come with some ELF information, we could make the
> default tag checking and GCR_EL1.Excl choices based on that, otherwise
> maybe we should revisit the default configuration the kernel sets for
> the user in the absence of any other information.

do tag checks have overhead if PROT_MTE is not used?
i'd expect some checks are still done at memory access.
(and the tagged address syscall abi has to be in use.)

turning sync tag checks on early would enable the
most of the interesting usecases (only PROT_MTE has
to be handled at runtime not the prctls. however i
don't yet know how userspace will deal with compat
issues, i.e. it may not be valid to unconditionally
turn tag checks on early).

> > > > - library code normally initializes per thread state on the first call
> > > >   into the library from a given thread, but with mte, as soon as
> > > >   memory / pointers are tagged in one thread, all threads are
> > > >   affected: not performing checks in other threads is less secure (may
> > > >   be ok) and it means incompatible syscall abi (not ok). so at least
> > > >   PR_TAGGED_ADDR_ENABLE should have process wide setting for this
> > > >   usage.
> > > 
> > > My assumption with MTE is that the libc will initialise it when the
> > > library is loaded (something __attribute__((constructor))) and it's
> > > still in single-threaded mode. Does it wait until the first malloc()
> > > call? Also, is there such thing as a per-thread initialiser for a
> > > dynamic library (not sure it can be implemented in practice though)?
> > 
> > there is no per thread initializer in an elf module.
> > (tls state is usually initialized lazily in threads
> > when necessary.)
> > 
> > malloc calls can happen before the ctors of an LD_PRELOAD
> > library and threads can be created before both.
> > glibc runs ldpreload ctors after other library ctors.
> 
> In the presence of stack tagging, I think any subsequent MTE config
> change across all threads is unsafe, irrespective of whether it's done
> by the kernel or user via SIGUSRx. I think the best we can do here is
> start with more appropriate defaults or enable them based on an ELF note
> before the application is started. The dynamic loader would not have to
> do anything extra here.
> 
> If we ignore stack tagging, the global configuration change may be
> achievable. I think for the MTE bits, this could be done lazily by the
> libc (e.g. on malloc()/free() call). The tag checking won't happen
> before such calls unless we change the kernel defaults. There is still
> the tagged address ABI enabling, could this be done lazily on syscall by
> the libc? If not, the kernel could synchronise (force) this on syscall
> entry from each thread based on some global prctl() bit.

i think the interesting use-cases are all about
changing mte settings before mte is in use in any
way but after there are multiple threads.
(the async -> sync mode change on tag faults is
i think less interesting to the gnu linux world.)

i guess lazy syscall abi switch works, but it is
ugly: raw syscall usage will be problematic and
doing checks before calling into the vdso might
have unwanted overhead.

based on the discussion it seems we should design
the userspace abis so that per process prctl is
not required and then see how far we get.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 22/29] arm64: mte: ptrace: Add PTRACE_{PEEK,POKE}MTETAGS support
       [not found] ` <20200715170844.30064-23-catalin.marinas@arm.com>
@ 2020-08-13 14:01   ` Luis Machado
  2020-08-22 10:56     ` Catalin Marinas
  0 siblings, 1 reply; 23+ messages in thread
From: Luis Machado @ 2020-08-13 14:01 UTC (permalink / raw)
  To: Catalin Marinas, linux-arm-kernel
  Cc: linux-arch, Omair Javaid, Szabolcs Nagy, Andrey Konovalov,
	Kevin Brodsky, Peter Collingbourne, linux-mm, Alan Hayward,
	Andrew Morton, Vincenzo Frascino, Will Deacon, Dave P Martin

Hi Catalin,

There has been changes to GDB's/LLDB's side to incorporate a tag type 
into the peek/poke requests. This is an attempt to anticipate required 
support for other tag types, like CHERI's tags. Those are somewhat 
different from MTE's tags.

The core file design for storing tags, which is in progress and 
currently in my court, is also taking into account other types of tags.

Given the above, should we consider passing a type to the kernel ptrace 
requests as well?

Also, since the ptrace requests would have to handle different types of 
tags, should we rename PEEKMTETAGS/POKEMTETAGS to PEEKTAGS/POKETAGS 
instead and make those requests generic?

Regards,
Luis

On 7/15/20 2:08 PM, Catalin Marinas wrote:
> Add support for bulk setting/getting of the MTE tags in a tracee's
> address space at 'addr' in the ptrace() syscall prototype. 'data' points
> to a struct iovec in the tracer's address space with iov_base
> representing the address of a tracer's buffer of length iov_len. The
> tags to be copied to/from the tracer's buffer are stored as one tag per
> byte.
> 
> On successfully copying at least one tag, ptrace() returns 0 and updates
> the tracer's iov_len with the number of tags copied. In case of error,
> either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> page.
> 
> Note that the tag copying functions are not performance critical,
> therefore they lack optimisations found in typical memory copy routines.
> 
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Alan Hayward <Alan.Hayward@arm.com>
> Cc: Luis Machado <luis.machado@linaro.org>
> Cc: Omair Javaid <omair.javaid@linaro.org>
> ---
> 
> Notes:
>      v4:
>      - Following the change to only clear the tags in a page if it is mapped
>        to user with PROT_MTE, ptrace() now will refuse to access tags in
>        pages not previously mapped with PROT_MTE (PG_mte_tagged set). This is
>        primarily to avoid leaking uninitialised tags to user via ptrace().
>      - Fix SYM_FUNC_END argument typo.
>      - Rename MTE_ALLOC_* to MTE_GRANULE_*.
>      - Use uao_user_alternative for the user access in case we ever want to
>        call mte_copy_tags_* with a kernel buffer. It also matches the other
>        uaccess routines in the kernel.
>      - Simplify arch_ptrace() slightly.
>      - Reorder down_write_killable() with access_ok() in
>        __access_remote_tags().
>      - Handle copy length 0 in mte_copy_tags_{to,from}_user().
>      - Use put_user() instead of __put_user().
>      
>      New in v3.
> 
>   arch/arm64/include/asm/mte.h         |  17 ++++
>   arch/arm64/include/uapi/asm/ptrace.h |   3 +
>   arch/arm64/kernel/mte.c              | 139 +++++++++++++++++++++++++++
>   arch/arm64/kernel/ptrace.c           |   7 ++
>   arch/arm64/lib/mte.S                 |  53 ++++++++++
>   5 files changed, 219 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index 1a919905295b..7ea0c0e526d1 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -5,6 +5,11 @@
>   #ifndef __ASM_MTE_H
>   #define __ASM_MTE_H
>   
> +#define MTE_GRANULE_SIZE	UL(16)
> +#define MTE_GRANULE_MASK	(~(MTE_GRANULE_SIZE - 1))
> +#define MTE_TAG_SHIFT		56
> +#define MTE_TAG_SIZE		4
> +
>   #ifndef __ASSEMBLY__
>   
>   #include <linux/page-flags.h>
> @@ -12,6 +17,10 @@
>   #include <asm/pgtable-types.h>
>   
>   void mte_clear_page_tags(void *addr);
> +unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
> +				      unsigned long n);
> +unsigned long mte_copy_tags_to_user(void __user *to, void *from,
> +				    unsigned long n);
>   
>   #ifdef CONFIG_ARM64_MTE
>   
> @@ -25,6 +34,8 @@ void mte_thread_switch(struct task_struct *next);
>   void mte_suspend_exit(void);
>   long set_mte_ctrl(struct task_struct *task, unsigned long arg);
>   long get_mte_ctrl(struct task_struct *task);
> +int mte_ptrace_copy_tags(struct task_struct *child, long request,
> +			 unsigned long addr, unsigned long data);
>   
>   #else
>   
> @@ -54,6 +65,12 @@ static inline long get_mte_ctrl(struct task_struct *task)
>   {
>   	return 0;
>   }
> +static inline int mte_ptrace_copy_tags(struct task_struct *child,
> +				       long request, unsigned long addr,
> +				       unsigned long data)
> +{
> +	return -EIO;
> +}
>   
>   #endif
>   
> diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> index 06413d9f2341..758ae984ff97 100644
> --- a/arch/arm64/include/uapi/asm/ptrace.h
> +++ b/arch/arm64/include/uapi/asm/ptrace.h
> @@ -76,6 +76,9 @@
>   /* syscall emulation path in ptrace */
>   #define PTRACE_SYSEMU		  31
>   #define PTRACE_SYSEMU_SINGLESTEP  32
> +/* MTE allocation tag access */
> +#define PTRACE_PEEKMTETAGS	  33
> +#define PTRACE_POKEMTETAGS	  34
>   
>   #ifndef __ASSEMBLY__
>   
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index e80c49af74af..0a8b90afe9d7 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -4,14 +4,18 @@
>    */
>   
>   #include <linux/bitops.h>
> +#include <linux/kernel.h>
>   #include <linux/mm.h>
>   #include <linux/prctl.h>
>   #include <linux/sched.h>
> +#include <linux/sched/mm.h>
>   #include <linux/string.h>
>   #include <linux/thread_info.h>
> +#include <linux/uio.h>
>   
>   #include <asm/cpufeature.h>
>   #include <asm/mte.h>
> +#include <asm/ptrace.h>
>   #include <asm/sysreg.h>
>   
>   void mte_sync_tags(pte_t *ptep, pte_t pte)
> @@ -179,3 +183,138 @@ long get_mte_ctrl(struct task_struct *task)
>   
>   	return ret;
>   }
> +
> +/*
> + * Access MTE tags in another process' address space as given in mm. Update
> + * the number of tags copied. Return 0 if any tags copied, error otherwise.
> + * Inspired by __access_remote_vm().
> + */
> +static int __access_remote_tags(struct task_struct *tsk, struct mm_struct *mm,
> +				unsigned long addr, struct iovec *kiov,
> +				unsigned int gup_flags)
> +{
> +	struct vm_area_struct *vma;
> +	void __user *buf = kiov->iov_base;
> +	size_t len = kiov->iov_len;
> +	int ret;
> +	int write = gup_flags & FOLL_WRITE;
> +
> +	if (!access_ok(buf, len))
> +		return -EFAULT;
> +
> +	if (mmap_read_lock_killable(mm))
> +		return -EIO;
> +
> +	while (len) {
> +		unsigned long tags, offset;
> +		void *maddr;
> +		struct page *page = NULL;
> +
> +		ret = get_user_pages_remote(tsk, mm, addr, 1, gup_flags,
> +					    &page, &vma, NULL);
> +		if (ret <= 0)
> +			break;
> +
> +		/*
> +		 * Only copy tags if the page has been mapped as PROT_MTE
> +		 * (PG_mte_tagged set). Otherwise the tags are not valid and
> +		 * not accessible to user. Moreover, an mprotect(PROT_MTE)
> +		 * would cause the existing tags to be cleared if the page
> +		 * was never mapped with PROT_MTE.
> +		 */
> +		if (!test_bit(PG_mte_tagged, &page->flags)) {
> +			ret = -EOPNOTSUPP;
> +			put_page(page);
> +			break;
> +		}
> +
> +		/* limit access to the end of the page */
> +		offset = offset_in_page(addr);
> +		tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE);
> +
> +		maddr = page_address(page);
> +		if (write) {
> +			tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
> +			set_page_dirty_lock(page);
> +		} else {
> +			tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
> +		}
> +		put_page(page);
> +
> +		/* error accessing the tracer's buffer */
> +		if (!tags)
> +			break;
> +
> +		len -= tags;
> +		buf += tags;
> +		addr += tags * MTE_GRANULE_SIZE;
> +	}
> +	mmap_read_unlock(mm);
> +
> +	/* return an error if no tags copied */
> +	kiov->iov_len = buf - kiov->iov_base;
> +	if (!kiov->iov_len) {
> +		/* check for error accessing the tracee's address space */
> +		if (ret <= 0)
> +			return -EIO;
> +		else
> +			return -EFAULT;
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Copy MTE tags in another process' address space at 'addr' to/from tracer's
> + * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
> + */
> +static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
> +			      struct iovec *kiov, unsigned int gup_flags)
> +{
> +	struct mm_struct *mm;
> +	int ret;
> +
> +	mm = get_task_mm(tsk);
> +	if (!mm)
> +		return -EPERM;
> +
> +	if (!tsk->ptrace || (current != tsk->parent) ||
> +	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
> +	     !ptracer_capable(tsk, mm->user_ns))) {
> +		mmput(mm);
> +		return -EPERM;
> +	}
> +
> +	ret = __access_remote_tags(tsk, mm, addr, kiov, gup_flags);
> +	mmput(mm);
> +
> +	return ret;
> +}
> +
> +int mte_ptrace_copy_tags(struct task_struct *child, long request,
> +			 unsigned long addr, unsigned long data)
> +{
> +	int ret;
> +	struct iovec kiov;
> +	struct iovec __user *uiov = (void __user *)data;
> +	unsigned int gup_flags = FOLL_FORCE;
> +
> +	if (!system_supports_mte())
> +		return -EIO;
> +
> +	if (get_user(kiov.iov_base, &uiov->iov_base) ||
> +	    get_user(kiov.iov_len, &uiov->iov_len))
> +		return -EFAULT;
> +
> +	if (request == PTRACE_POKEMTETAGS)
> +		gup_flags |= FOLL_WRITE;
> +
> +	/* align addr to the MTE tag granule */
> +	addr &= MTE_GRANULE_MASK;
> +
> +	ret = access_remote_tags(child, addr, &kiov, gup_flags);
> +	if (!ret)
> +		ret = put_user(kiov.iov_len, &uiov->iov_len);
> +
> +	return ret;
> +}
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 4582014dda25..653a03598c75 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -34,6 +34,7 @@
>   #include <asm/cpufeature.h>
>   #include <asm/debug-monitors.h>
>   #include <asm/fpsimd.h>
> +#include <asm/mte.h>
>   #include <asm/pointer_auth.h>
>   #include <asm/stacktrace.h>
>   #include <asm/syscall.h>
> @@ -1796,6 +1797,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
>   long arch_ptrace(struct task_struct *child, long request,
>   		 unsigned long addr, unsigned long data)
>   {
> +	switch (request) {
> +	case PTRACE_PEEKMTETAGS:
> +	case PTRACE_POKEMTETAGS:
> +		return mte_ptrace_copy_tags(child, request, addr, data);
> +	}
> +
>   	return ptrace_request(child, request, addr, data);
>   }
>   
> diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
> index 3c3d0edbbca3..434f81d9a180 100644
> --- a/arch/arm64/lib/mte.S
> +++ b/arch/arm64/lib/mte.S
> @@ -4,7 +4,9 @@
>    */
>   #include <linux/linkage.h>
>   
> +#include <asm/alternative.h>
>   #include <asm/assembler.h>
> +#include <asm/mte.h>
>   #include <asm/page.h>
>   #include <asm/sysreg.h>
>   
> @@ -51,3 +53,54 @@ SYM_FUNC_START(mte_copy_page_tags)
>   	b.ne	1b
>   	ret
>   SYM_FUNC_END(mte_copy_page_tags)
> +
> +/*
> + * Read tags from a user buffer (one tag per byte) and set the corresponding
> + * tags at the given kernel address. Used by PTRACE_POKEMTETAGS.
> + *   x0 - kernel address (to)
> + *   x1 - user buffer (from)
> + *   x2 - number of tags/bytes (n)
> + * Returns:
> + *   x0 - number of tags read/set
> + */
> +SYM_FUNC_START(mte_copy_tags_from_user)
> +	mov	x3, x1
> +	cbz	x2, 2f
> +1:
> +	uao_user_alternative 2f, ldrb, ldtrb, w4, x1, 0
> +	lsl	x4, x4, #MTE_TAG_SHIFT
> +	stg	x4, [x0], #MTE_GRANULE_SIZE
> +	add	x1, x1, #1
> +	subs	x2, x2, #1
> +	b.ne	1b
> +
> +	// exception handling and function return
> +2:	sub	x0, x1, x3		// update the number of tags set
> +	ret
> +SYM_FUNC_END(mte_copy_tags_from_user)
> +
> +/*
> + * Get the tags from a kernel address range and write the tag values to the
> + * given user buffer (one tag per byte). Used by PTRACE_PEEKMTETAGS.
> + *   x0 - user buffer (to)
> + *   x1 - kernel address (from)
> + *   x2 - number of tags/bytes (n)
> + * Returns:
> + *   x0 - number of tags read/set
> + */
> +SYM_FUNC_START(mte_copy_tags_to_user)
> +	mov	x3, x0
> +	cbz	x2, 2f
> +1:
> +	ldg	x4, [x1]
> +	ubfx	x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
> +	uao_user_alternative 2f, strb, sttrb, w4, x0, 0
> +	add	x0, x0, #1
> +	add	x1, x1, #MTE_GRANULE_SIZE
> +	subs	x2, x2, #1
> +	b.ne	1b
> +
> +	// exception handling and function return
> +2:	sub	x0, x0, x3		// update the number of tags copied
> +	ret
> +SYM_FUNC_END(mte_copy_tags_to_user)
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-08-12 12:45                   ` Szabolcs Nagy
@ 2020-08-19  9:54                     ` Catalin Marinas
  2020-08-20 16:43                       ` Szabolcs Nagy
  0 siblings, 1 reply; 23+ messages in thread
From: Catalin Marinas @ 2020-08-19  9:54 UTC (permalink / raw)
  To: Szabolcs Nagy
  Cc: linux-arch, nd, Will Deacon, Andrey Konovalov, Kevin Brodsky,
	linux-mm, Andrew Morton, Vincenzo Frascino, Peter Collingbourne,
	Dave Martin, linux-arm-kernel

On Wed, Aug 12, 2020 at 01:45:21PM +0100, Szabolcs Nagy wrote:
> On 08/11/2020 18:20, Catalin Marinas wrote:
> > If we allow such mixed object support with stack tagging enabled at
> > dlopen, PROT_MTE would need to be turned on for each thread stack. This
> > wouldn't require synchronisation, only knowing where the thread stacks
> > are, but you'd need to make sure threads don't call into the new library
> > until the stacks have been mprotect'ed. Doing this midway through a
> > function execution may corrupt the tags.
> > 
> > So I'm not sure how safe any of this is without explicit user
> > synchronisation (i.e. don't call into the library until all threads have
> > been updated). Even changing options like GCR_EL1.Excl across multiple
> > threads may have unwanted effects. See this comment from Peter, the
> > difference being that instead of an explicit prctl() call on the current
> > stack, another thread would do it:
> > 
> > https://lore.kernel.org/linux-arch/CAMn1gO5rhOG1W+nVe103v=smvARcFFp_Ct9XqH2Ca4BUMfpDdg@mail.gmail.com/
> 
> there is no midway problem: the libc (ld.so) would do the PROT_MTE at
> dlopen time based on some elf marking (which can be handled before
> relocation processing, so before library code can run, the midway
> problem happens when a library, e.g libc, wants to turn on stack
> tagging on itself).

OK, that makes sense, you can't call into the new object until the
relocations have been resolved.

> the libc already does this when a library is loaded that requires
> executable stack (it marks stacks as PROT_EXEC at dlopen time or fails
> the dlopen if that is not possible, this does not require running code
> in other threads, only synchronization with thread creation and exit.
> but changing the check mode for mte needs per thread code execution.).
> 
> i'm not entirely sure if this is a good idea, but i expect stack
> tagging not to be used in the libc (because libc needs to run on all
> hw and we don't yet have a backward compatible stack tagging
> solution),

In theory, you could have two libc deployed in your distro and ldd gets
smarter to pick the right one. I still hope we'd find a compromise with
stack tagging and single binary.

> so stack tagging should work when only some elf modules in a process
> are built with it, which implies that enabling it at dlopen time
> should work otherwise it will not be very useful.

There is still the small risk of an old object using tagged pointers to
the stack. Since the stack would be shared between such objects, turning
PROT_MTE on would cause issues. Hopefully such problems are minor and
not really a concern for the kernel.

> do tag checks have overhead if PROT_MTE is not used? i'd expect some
> checks are still done at memory access. (and the tagged address
> syscall abi has to be in use.)

My understanding from talking to hardware engineers is that there won't
be an overhead if PROT_MTE is not used, no tags being fetched or
checked. But I can't guarantee until we get real silicon.

> turning sync tag checks on early would enable the most of the
> interesting usecases (only PROT_MTE has to be handled at runtime not
> the prctls. however i don't yet know how userspace will deal with
> compat issues, i.e. it may not be valid to unconditionally turn tag
> checks on early).

If we change the defaults so that no prctl() is required for the
standard use-case, it would solve most of the common deployment issues:

1. Tagged address ABI default on when HWCAP2_MTE is present
2. Synchronous TCF by default
3. GCR_EL1.Excl allows all tags except 0 by default

Any other configuration diverging from the above is considered
specialist deployment and will have to issue the prctl() on a per-thread
basis.

Compat issues in user-space will be dealt with via environment
variables but pretty much on/off rather than fine-grained tag checking
mode. So for glibc, you'd have only _MTAG=0 or 1 and the only effect is
using PROT_MTE + tagged pointers or no-PROT_MTE + tag 0.

> > In the presence of stack tagging, I think any subsequent MTE config
> > change across all threads is unsafe, irrespective of whether it's done
> > by the kernel or user via SIGUSRx. I think the best we can do here is
> > start with more appropriate defaults or enable them based on an ELF note
> > before the application is started. The dynamic loader would not have to
> > do anything extra here.
> > 
> > If we ignore stack tagging, the global configuration change may be
> > achievable. I think for the MTE bits, this could be done lazily by the
> > libc (e.g. on malloc()/free() call). The tag checking won't happen
> > before such calls unless we change the kernel defaults. There is still
> > the tagged address ABI enabling, could this be done lazily on syscall by
> > the libc? If not, the kernel could synchronise (force) this on syscall
> > entry from each thread based on some global prctl() bit.
> 
> i think the interesting use-cases are all about changing mte settings
> before mte is in use in any way but after there are multiple threads.
> (the async -> sync mode change on tag faults is i think less
> interesting to the gnu linux world.)

So let's consider async/sync/no-check specialist uses and glibc would
not have to handle them. I don't think async mode is useful on its own
unless you have a way to turn on sync mode at run-time for more precise
error identification (well, hoping that it will happen again).

> i guess lazy syscall abi switch works, but it is ugly: raw syscall
> usage will be problematic and doing checks before calling into the
> vdso might have unwanted overhead.

This lazy ABI switch could be handled by the kernel, though I wonder
whether we should just relax it permanently when HWCAP2_MTE is present.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-08-19  9:54                     ` Catalin Marinas
@ 2020-08-20 16:43                       ` Szabolcs Nagy
  2020-08-20 17:27                         ` Paul Eggert
  2020-08-22 11:28                         ` Catalin Marinas
  0 siblings, 2 replies; 23+ messages in thread
From: Szabolcs Nagy @ 2020-08-20 16:43 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: linux-arch, Richard Earnshaw, nd, libc-alpha, Will Deacon,
	Andrey Konovalov, Kevin Brodsky, linux-mm, Matthew Malcomson,
	Andrew Morton, Vincenzo Frascino, Peter Collingbourne,
	Dave Martin, linux-arm-kernel

adding libc-alpha on cc, they might have
additional input about compat issue handling:

the discussion is about enabling tag checks,
which cannot be done reasonably at runtime when
a process is already multi-threaded so it has to
be decided early either in the libc or in the
kernel. such early decision might have backward
compat issues (we dont yet know if any allocator
wants to use tagging opportunistically later for
debugging or if there will be code loaded later
that is incompatible with tag checks).

The 08/19/2020 10:54, Catalin Marinas wrote:
> On Wed, Aug 12, 2020 at 01:45:21PM +0100, Szabolcs Nagy wrote:
> > On 08/11/2020 18:20, Catalin Marinas wrote:
> > > If we allow such mixed object support with stack tagging enabled at
> > > dlopen, PROT_MTE would need to be turned on for each thread stack. This
> > > wouldn't require synchronisation, only knowing where the thread stacks
> > > are, but you'd need to make sure threads don't call into the new library
> > > until the stacks have been mprotect'ed. Doing this midway through a
> > > function execution may corrupt the tags.
...
> > there is no midway problem: the libc (ld.so) would do the PROT_MTE at
> > dlopen time based on some elf marking (which can be handled before
> > relocation processing, so before library code can run, the midway
> > problem happens when a library, e.g libc, wants to turn on stack
> > tagging on itself).
> 
> OK, that makes sense, you can't call into the new object until the
> relocations have been resolved.
> 
> > the libc already does this when a library is loaded that requires
> > executable stack (it marks stacks as PROT_EXEC at dlopen time or fails
> > the dlopen if that is not possible, this does not require running code
> > in other threads, only synchronization with thread creation and exit.
> > but changing the check mode for mte needs per thread code execution.).
> > 
> > i'm not entirely sure if this is a good idea, but i expect stack
> > tagging not to be used in the libc (because libc needs to run on all
> > hw and we don't yet have a backward compatible stack tagging
> > solution),
> 
> In theory, you could have two libc deployed in your distro and ldd gets
> smarter to pick the right one. I still hope we'd find a compromise with
> stack tagging and single binary.

distros don't like the two libc solution, i
think we will look at backward compat stack
tagging support, but we might end up not
having any stack tagging in libc, only in
user binaries (used for debugging).

> > so stack tagging should work when only some elf modules in a process
> > are built with it, which implies that enabling it at dlopen time
> > should work otherwise it will not be very useful.
> 
> There is still the small risk of an old object using tagged pointers to
> the stack. Since the stack would be shared between such objects, turning
> PROT_MTE on would cause issues. Hopefully such problems are minor and
> not really a concern for the kernel.
> 
> > do tag checks have overhead if PROT_MTE is not used? i'd expect some
> > checks are still done at memory access. (and the tagged address
> > syscall abi has to be in use.)
> 
> My understanding from talking to hardware engineers is that there won't
> be an overhead if PROT_MTE is not used, no tags being fetched or
> checked. But I can't guarantee until we get real silicon.
> 
> > turning sync tag checks on early would enable the most of the
> > interesting usecases (only PROT_MTE has to be handled at runtime not
> > the prctls. however i don't yet know how userspace will deal with
> > compat issues, i.e. it may not be valid to unconditionally turn tag
> > checks on early).
> 
> If we change the defaults so that no prctl() is required for the
> standard use-case, it would solve most of the common deployment issues:
> 
> 1. Tagged address ABI default on when HWCAP2_MTE is present
> 2. Synchronous TCF by default
> 3. GCR_EL1.Excl allows all tags except 0 by default
> 
> Any other configuration diverging from the above is considered
> specialist deployment and will have to issue the prctl() on a per-thread
> basis.
> 
> Compat issues in user-space will be dealt with via environment
> variables but pretty much on/off rather than fine-grained tag checking
> mode. So for glibc, you'd have only _MTAG=0 or 1 and the only effect is
> using PROT_MTE + tagged pointers or no-PROT_MTE + tag 0.

enabling mte checks by default would be
nice and simple (a libc can support tagging
allocators without any change assuming its
code is mte safe which is true e.g. for the
latest glibc release and for musl libc).

the compat issue with this is existing code
using pointer top bits which i assume faults
when dereferenced with the mte checks enabled.
(although this should be very rare since
top byte ignore on deref is aarch64 specific.)

i see two options:

- don't care about top bit compat issues:
  change the default in the kernel as you
  described (so checks are enabled and users
  only need PROT_MTE mapping if they want to
  use taggging).

- care about top bit issues:
  leave the kernel abi as in the patch set
  and do the mte setup early in the libc.
  add elf markings to new binaries that
  they are mte compatible and libc can use
  that marking for the mte setup. dlopening
  incompatible libraries will fail. the
  issue with this is that we have no idea
  how to add the marking and the marking
  prevents mte use with existing binaries
  (and eg. ldpreload malloc would require
  an updated libc).

for me it's hard to figure out which is
the right direction for mte.

> > > In the presence of stack tagging, I think any subsequent MTE config
> > > change across all threads is unsafe, irrespective of whether it's done
> > > by the kernel or user via SIGUSRx. I think the best we can do here is
> > > start with more appropriate defaults or enable them based on an ELF note
> > > before the application is started. The dynamic loader would not have to
> > > do anything extra here.
> > > 
> > > If we ignore stack tagging, the global configuration change may be
> > > achievable. I think for the MTE bits, this could be done lazily by the
> > > libc (e.g. on malloc()/free() call). The tag checking won't happen
> > > before such calls unless we change the kernel defaults. There is still
> > > the tagged address ABI enabling, could this be done lazily on syscall by
> > > the libc? If not, the kernel could synchronise (force) this on syscall
> > > entry from each thread based on some global prctl() bit.
> > 
> > i think the interesting use-cases are all about changing mte settings
> > before mte is in use in any way but after there are multiple threads.
> > (the async -> sync mode change on tag faults is i think less
> > interesting to the gnu linux world.)
> 
> So let's consider async/sync/no-check specialist uses and glibc would
> not have to handle them. I don't think async mode is useful on its own
> unless you have a way to turn on sync mode at run-time for more precise
> error identification (well, hoping that it will happen again).
> 
> > i guess lazy syscall abi switch works, but it is ugly: raw syscall
> > usage will be problematic and doing checks before calling into the
> > vdso might have unwanted overhead.
> 
> This lazy ABI switch could be handled by the kernel, though I wonder
> whether we should just relax it permanently when HWCAP2_MTE is present.

yeah i don't immediately see a problem with that.
but ideally there would be an escape hatch
(a way to opt out from the change).

thanks


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-08-20 16:43                       ` Szabolcs Nagy
@ 2020-08-20 17:27                         ` Paul Eggert
  2020-08-22 11:31                           ` Catalin Marinas
  2020-08-22 11:28                         ` Catalin Marinas
  1 sibling, 1 reply; 23+ messages in thread
From: Paul Eggert @ 2020-08-20 17:27 UTC (permalink / raw)
  To: Szabolcs Nagy, Catalin Marinas
  Cc: linux-arch, Richard Earnshaw, Vincenzo Frascino, libc-alpha,
	Peter Collingbourne, Andrey Konovalov, Kevin Brodsky, linux-mm,
	Matthew Malcomson, Andrew Morton, nd, Will Deacon, Dave Martin,
	linux-arm-kernel

On 8/20/20 9:43 AM, Szabolcs Nagy wrote:
> the compat issue with this is existing code
> using pointer top bits which i assume faults
> when dereferenced with the mte checks enabled.
> (although this should be very rare since
> top byte ignore on deref is aarch64 specific.)

Does anyone know of significant aarch64-specific application code that depends 
on top byte ignore? I would think it's so rare (nonexistent?) as to not be worth 
worrying about.

Even in the bad old days when Emacs used pointer top bits for typechecking, it 
carefully removed those bits before dereferencing. Any other reasonably-portable 
application would have to do the same of course.

This whole thing reminds me of the ancient IBM S/360 mainframes that were 
documented to ignore the top 8 bits of 32-bit addresses merely because a single 
model (the IBM 360/30, circa 1965) was so underpowered that it couldn't quickly 
check that the top bits were zero. This has caused countless software hassles 
over the years. Even today, the IBM z-Series hardware and software still 
supports 24-bit addressing mode because of that early-1960s design mistake. See:

Mashey JR. The Long Road to 64 Bits. ACM Queue. 2006-10-10. 
https://queue.acm.org/detail.cfm?id=1165766

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 22/29] arm64: mte: ptrace: Add PTRACE_{PEEK,POKE}MTETAGS support
  2020-08-13 14:01   ` [PATCH v7 22/29] arm64: mte: ptrace: Add PTRACE_{PEEK,POKE}MTETAGS support Luis Machado
@ 2020-08-22 10:56     ` Catalin Marinas
  0 siblings, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2020-08-22 10:56 UTC (permalink / raw)
  To: Luis Machado
  Cc: linux-arch, Omair Javaid, Szabolcs Nagy, Andrey Konovalov,
	Kevin Brodsky, Peter Collingbourne, linux-mm, Alan Hayward,
	Andrew Morton, Vincenzo Frascino, Will Deacon, Dave P Martin,
	linux-arm-kernel

Hi Luis,

On Thu, Aug 13, 2020 at 11:01:06AM -0300, Luis Machado wrote:
> There has been changes to GDB's/LLDB's side to incorporate a tag type into
> the peek/poke requests. This is an attempt to anticipate required support
> for other tag types, like CHERI's tags. Those are somewhat different from
> MTE's tags.

Please note that Morello (the ARM's CHERI implementation) won't go into
mainline Linux for the time being. It's a development board to
experiment with CHERI and the architecture may eventually turn out
slightly different. Also note that the current Morello hardware doesn't
support MTE.

The tags are indeed different from the MTE ones, though both are just
additional metadata associated with a set of bytes in memory. It happens
that a tag in both cases corresponds to a 16-byte memory range.

> The core file design for storing tags, which is in progress and currently in
> my court, is also taking into account other types of tags.

It makes sense for the core file.

> Given the above, should we consider passing a type to the kernel ptrace
> requests as well?
> 
> Also, since the ptrace requests would have to handle different types of
> tags, should we rename PEEKMTETAGS/POKEMTETAGS to PEEKTAGS/POKETAGS instead
> and make those requests generic?

I'm not sure how we could pass a type since ptrace() only takes a single
argument for the request. We could use a different structure than iovec
and encode a type in a field in the new structure but I'd rather keep
the generic struct iovec. So basically the "MTE" part in PEEKMTETAGS is
the type.

Internally, the kernel implementation will probably translate the
request into a common function call with a tag type but for the
user-visible ptrace() interface, I don't see what benefits it would
bring. If you have a better suggestion on how to encode the type, I'm
open to discuss it.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-08-20 16:43                       ` Szabolcs Nagy
  2020-08-20 17:27                         ` Paul Eggert
@ 2020-08-22 11:28                         ` Catalin Marinas
  1 sibling, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2020-08-22 11:28 UTC (permalink / raw)
  To: Szabolcs Nagy
  Cc: linux-arch, Richard Earnshaw, nd, libc-alpha, Will Deacon,
	Andrey Konovalov, Kevin Brodsky, linux-mm, Matthew Malcomson,
	Andrew Morton, Vincenzo Frascino, Peter Collingbourne,
	Dave Martin, linux-arm-kernel

On Thu, Aug 20, 2020 at 05:43:15PM +0100, Szabolcs Nagy wrote:
> The 08/19/2020 10:54, Catalin Marinas wrote:
> > On Wed, Aug 12, 2020 at 01:45:21PM +0100, Szabolcs Nagy wrote:
> > > On 08/11/2020 18:20, Catalin Marinas wrote:
> > > turning sync tag checks on early would enable the most of the
> > > interesting usecases (only PROT_MTE has to be handled at runtime not
> > > the prctls. however i don't yet know how userspace will deal with
> > > compat issues, i.e. it may not be valid to unconditionally turn tag
> > > checks on early).
> > 
> > If we change the defaults so that no prctl() is required for the
> > standard use-case, it would solve most of the common deployment issues:
> > 
> > 1. Tagged address ABI default on when HWCAP2_MTE is present
> > 2. Synchronous TCF by default
> > 3. GCR_EL1.Excl allows all tags except 0 by default
> > 
> > Any other configuration diverging from the above is considered
> > specialist deployment and will have to issue the prctl() on a per-thread
> > basis.
> > 
> > Compat issues in user-space will be dealt with via environment
> > variables but pretty much on/off rather than fine-grained tag checking
> > mode. So for glibc, you'd have only _MTAG=0 or 1 and the only effect is
> > using PROT_MTE + tagged pointers or no-PROT_MTE + tag 0.
> 
> enabling mte checks by default would be nice and simple (a libc can
> support tagging allocators without any change assuming its code is mte
> safe which is true e.g. for the latest glibc release and for musl
> libc).

While talking to the Android folk, it occurred to me that the default
tag checking mode doesn't even need to be decided by the kernel. The
dynamic loader can set the desired tag check mode and the tagged address
ABI based on environment variables (_MTAG_ENABLE=x) and do a prctl()
before any threads have been created. Subsequent malloc() calls or
dlopen() can mmap/mprotect different memory regions to PROT_MTE and all
threads will be affected equally.

The only configuration a heap allocator may want to change is the tag
exclude mask (GCR_EL1.Excl) but even this can, by convention, be
configured by the dynamic loader.

> the compat issue with this is existing code using pointer top bits
> which i assume faults when dereferenced with the mte checks enabled.
> (although this should be very rare since top byte ignore on deref is
> aarch64 specific.)

They'd fault only if they dereference PROT_MTE memory and the tag check
mode is async or sync.

> i see two options:
> 
> - don't care about top bit compat issues:
>   change the default in the kernel as you described (so checks are
>   enabled and users only need PROT_MTE mapping if they want to use
>   taggging).

As I said above, suggested by the Google guys, this default choice can
be left with the dynamic loader before any threads are started.

> - care about top bit issues:
>   leave the kernel abi as in the patch set and do the mte setup early
>   in the libc. add elf markings to new binaries that they are mte
>   compatible and libc can use that marking for the mte setup.
>   dlopening incompatible libraries will fail. the issue with this is
>   that we have no idea how to add the marking and the marking prevents
>   mte use with existing binaries (and eg. ldpreload malloc would
>   require an updated libc).

Maybe a third option (which leaves the kernel ABI as is):

If the ELF markings only control the PROT_MTE regions (stack or heap),
we can configure the tag checking mode and tagged address ABI early
through environment variables (_MTAG_ENABLE). If you have a problematic
binary, just set _MTAG_ENABLE=0 and a dlopen, even if loading an
MTE-capable object, would not map the stack with PROT_MTE. Heap
allocators could also ignore _MTAG_ENABLE since PROT_MTE doesn't have an
effect if no tag checking is in place. This way we can probably mix
objects as long as we have a control.

So, in summary, I think we can get away with only issuing the prctl() in
the dynamic loader before any threads start and using PROT_MTE later at
run-time, multi-threaded, as needed by malloc(), dlopen etc.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation
  2020-08-20 17:27                         ` Paul Eggert
@ 2020-08-22 11:31                           ` Catalin Marinas
  0 siblings, 0 replies; 23+ messages in thread
From: Catalin Marinas @ 2020-08-22 11:31 UTC (permalink / raw)
  To: Paul Eggert
  Cc: linux-arch, Richard Earnshaw, Vincenzo Frascino, libc-alpha,
	Szabolcs Nagy, Andrey Konovalov, Kevin Brodsky,
	Peter Collingbourne, linux-mm, Matthew Malcomson, Andrew Morton,
	nd, Will Deacon, Dave Martin, linux-arm-kernel

On Thu, Aug 20, 2020 at 10:27:43AM -0700, Paul Eggert wrote:
> On 8/20/20 9:43 AM, Szabolcs Nagy wrote:
> > the compat issue with this is existing code
> > using pointer top bits which i assume faults
> > when dereferenced with the mte checks enabled.
> > (although this should be very rare since
> > top byte ignore on deref is aarch64 specific.)
> 
> Does anyone know of significant aarch64-specific application code that
> depends on top byte ignore? I would think it's so rare (nonexistent?) as to
> not be worth worrying about.

Apart from the LLVM hwasan feature, I'm not aware of code relying on the
top byte ignore. There were discussions in the past to use it with some
JITs but I'm not sure they ever materialised.

I think the Mozilla JS engine uses (used?) additional bits on top of a
pointer but they are masked out before the access.

> Even in the bad old days when Emacs used pointer top bits for typechecking,
> it carefully removed those bits before dereferencing. Any other
> reasonably-portable application would have to do the same of course.

I agree.

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2020-08-22 11:33 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20200715170844.30064-1-catalin.marinas@arm.com>
     [not found] ` <20200715170844.30064-30-catalin.marinas@arm.com>
2020-07-27 16:36   ` [PATCH v7 29/29] arm64: mte: Add Memory Tagging Extension documentation Szabolcs Nagy
2020-07-28 11:08     ` Dave Martin
2020-07-28 14:53       ` Szabolcs Nagy
2020-07-28 19:59         ` Catalin Marinas
2020-08-03 12:43           ` Szabolcs Nagy
2020-08-07 15:19             ` Catalin Marinas
2020-08-10 14:13               ` Szabolcs Nagy
2020-08-11 17:20                 ` Catalin Marinas
2020-08-12 12:45                   ` Szabolcs Nagy
2020-08-19  9:54                     ` Catalin Marinas
2020-08-20 16:43                       ` Szabolcs Nagy
2020-08-20 17:27                         ` Paul Eggert
2020-08-22 11:31                           ` Catalin Marinas
2020-08-22 11:28                         ` Catalin Marinas
     [not found] ` <20200715170844.30064-19-catalin.marinas@arm.com>
2020-07-20 15:30   ` [PATCH v7 18/29] arm64: mte: Allow user control of the tag check mode via prctl() Kevin Brodsky
2020-07-20 17:00     ` Dave Martin
2020-07-22 10:28       ` Catalin Marinas
2020-07-23 19:33       ` Kevin Brodsky
2020-07-22 11:09     ` Catalin Marinas
2020-08-04 19:34   ` Kevin Brodsky
2020-08-05  9:24     ` Catalin Marinas
     [not found] ` <20200715170844.30064-23-catalin.marinas@arm.com>
2020-08-13 14:01   ` [PATCH v7 22/29] arm64: mte: ptrace: Add PTRACE_{PEEK,POKE}MTETAGS support Luis Machado
2020-08-22 10:56     ` Catalin Marinas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).