All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Dave Martin <Dave.Martin@arm.com>
Cc: linux-arm-kernel@lists.infradead.org,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will.deacon@arm.com>,
	Ard Biesheuvel <ard.biesheuvel@linaro.org>,
	Szabolcs Nagy <szabolcs.nagy@arm.com>,
	Okamoto Takayuki <tokamoto@jp.fujitsu.com>,
	kvmarm@lists.cs.columbia.edu, libc-alpha@sourceware.org,
	linux-arch@vger.kernel.org
Subject: Re: [PATCH v5 16/30] arm64/sve: Backend logic for setting the vector length
Date: Fri, 10 Nov 2017 10:27:36 +0000	[thread overview]
Message-ID: <8760aiqvhj.fsf@linaro.org> (raw)
In-Reply-To: <1509465082-30427-17-git-send-email-Dave.Martin@arm.com>


Dave Martin <Dave.Martin@arm.com> writes:

> This patch implements the core logic for changing a task's vector
> length on request from userspace.  This will be used by the ptrace
> and prctl frontends that are implemented in later patches.
>
> The SVE architecture permits, but does not require, implementations
> to support vector lengths that are not a power of two.  To handle
> this, logic is added to check a requested vector length against a
> possibly sparse bitmap of available vector lengths at runtime, so
> that the best supported value can be chosen.
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Alex Bennée <alex.bennee@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  arch/arm64/include/asm/fpsimd.h |   8 +++
>  arch/arm64/kernel/fpsimd.c      | 137 +++++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/prctl.h      |   5 ++
>  3 files changed, 149 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 9bbd74c..86f550c 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -20,6 +20,7 @@
>
>  #ifndef __ASSEMBLY__
>
> +#include <linux/cache.h>
>  #include <linux/stddef.h>
>
>  /*
> @@ -70,17 +71,24 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
>
>  extern void fpsimd_flush_task_state(struct task_struct *target);
>
> +/* Maximum VL that SVE VL-agnostic software can transparently support */
> +#define SVE_VL_ARCH_MAX 0x100
> +
>  extern void sve_save_state(void *state, u32 *pfpsr);
>  extern void sve_load_state(void const *state, u32 const *pfpsr,
>  			   unsigned long vq_minus_1);
>  extern unsigned int sve_get_vl(void);
>
> +extern int __ro_after_init sve_max_vl;
> +
>  #ifdef CONFIG_ARM64_SVE
>
>  extern size_t sve_state_size(struct task_struct const *task);
>
>  extern void sve_alloc(struct task_struct *task);
>  extern void fpsimd_release_task(struct task_struct *task);
> +extern int sve_set_vector_length(struct task_struct *task,
> +				 unsigned long vl, unsigned long flags);
>
>  #else /* ! CONFIG_ARM64_SVE */
>
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index e0b5ef5..1ceb069 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -17,8 +17,10 @@
>   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
>
> +#include <linux/bitmap.h>
>  #include <linux/bottom_half.h>
>  #include <linux/bug.h>
> +#include <linux/cache.h>
>  #include <linux/compat.h>
>  #include <linux/cpu.h>
>  #include <linux/cpu_pm.h>
> @@ -28,6 +30,7 @@
>  #include <linux/init.h>
>  #include <linux/percpu.h>
>  #include <linux/preempt.h>
> +#include <linux/prctl.h>
>  #include <linux/ptrace.h>
>  #include <linux/sched/signal.h>
>  #include <linux/signal.h>
> @@ -113,6 +116,20 @@ static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
>  /* Default VL for tasks that don't set it explicitly: */
>  static int sve_default_vl = SVE_VL_MIN;
>
> +#ifdef CONFIG_ARM64_SVE
> +
> +/* Maximum supported vector length across all CPUs (initially poisoned) */
> +int __ro_after_init sve_max_vl = -1;
> +/* Set of available vector lengths, as vq_to_bit(vq): */
> +static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
> +
> +#else /* ! CONFIG_ARM64_SVE */
> +
> +/* Dummy declaration for code that will be optimised out: */
> +extern DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
> +
> +#endif /* ! CONFIG_ARM64_SVE */
> +
>  /*
>   * Call __sve_free() directly only if you know task can't be scheduled
>   * or preempted.
> @@ -270,6 +287,50 @@ static void task_fpsimd_save(void)
>  	}
>  }
>
> +/*
> + * Helpers to translate bit indices in sve_vq_map to VQ values (and
> + * vice versa).  This allows find_next_bit() to be used to find the
> + * _maximum_ VQ not exceeding a certain value.
> + */
> +
> +static unsigned int vq_to_bit(unsigned int vq)
> +{
> +	return SVE_VQ_MAX - vq;
> +}
> +
> +static unsigned int bit_to_vq(unsigned int bit)
> +{
> +	if (WARN_ON(bit >= SVE_VQ_MAX))
> +		bit = SVE_VQ_MAX - 1;
> +
> +	return SVE_VQ_MAX - bit;
> +}
> +
> +/*
> + * All vector length selection from userspace comes through here.
> + * We're on a slow path, so some sanity-checks are included.
> + * If things go wrong there's a bug somewhere, but try to fall back to a
> + * safe choice.
> + */
> +static unsigned int find_supported_vector_length(unsigned int vl)
> +{
> +	int bit;
> +	int max_vl = sve_max_vl;
> +
> +	if (WARN_ON(!sve_vl_valid(vl)))
> +		vl = SVE_VL_MIN;
> +
> +	if (WARN_ON(!sve_vl_valid(max_vl)))
> +		max_vl = SVE_VL_MIN;
> +
> +	if (vl > max_vl)
> +		vl = max_vl;
> +
> +	bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
> +			    vq_to_bit(sve_vq_from_vl(vl)));
> +	return sve_vl_from_vq(bit_to_vq(bit));
> +}
> +
>  #define ZREG(sve_state, vq, n) ((char *)(sve_state) +		\
>  	(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
>
> @@ -364,6 +425,76 @@ void sve_alloc(struct task_struct *task)
>  	BUG_ON(!task->thread.sve_state);
>  }
>
> +int sve_set_vector_length(struct task_struct *task,
> +			  unsigned long vl, unsigned long flags)
> +{
> +	if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
> +				     PR_SVE_SET_VL_ONEXEC))
> +		return -EINVAL;
> +
> +	if (!sve_vl_valid(vl))
> +		return -EINVAL;
> +
> +	/*
> +	 * Clamp to the maximum vector length that VL-agnostic SVE code can
> +	 * work with.  A flag may be assigned in the future to allow setting
> +	 * of larger vector lengths without confusing older software.
> +	 */
> +	if (vl > SVE_VL_ARCH_MAX)
> +		vl = SVE_VL_ARCH_MAX;
> +
> +	vl = find_supported_vector_length(vl);
> +
> +	if (flags & (PR_SVE_VL_INHERIT |
> +		     PR_SVE_SET_VL_ONEXEC))
> +		task->thread.sve_vl_onexec = vl;
> +	else
> +		/* Reset VL to system default on next exec: */
> +		task->thread.sve_vl_onexec = 0;
> +
> +	/* Only actually set the VL if not deferred: */
> +	if (flags & PR_SVE_SET_VL_ONEXEC)
> +		goto out;
> +
> +	if (vl == task->thread.sve_vl)
> +		goto out;
> +
> +	/*
> +	 * To ensure the FPSIMD bits of the SVE vector registers are preserved,
> +	 * write any live register state back to task_struct, and convert to a
> +	 * non-SVE thread.
> +	 */
> +	if (task == current) {
> +		local_bh_disable();
> +
> +		task_fpsimd_save();
> +		set_thread_flag(TIF_FOREIGN_FPSTATE);
> +	}
> +
> +	fpsimd_flush_task_state(task);
> +	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
> +		sve_to_fpsimd(task);
> +
> +	if (task == current)
> +		local_bh_enable();
> +
> +	/*
> +	 * Force reallocation of task SVE state to the correct size
> +	 * on next use:
> +	 */
> +	sve_free(task);
> +
> +	task->thread.sve_vl = vl;
> +
> +out:
> +	if (flags & PR_SVE_VL_INHERIT)
> +		set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
> +	else
> +		clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
> +
> +	return 0;
> +}
> +
>  /*
>   * Called from the put_task_struct() path, which cannot get here
>   * unless dead_task is really dead and not schedulable.
> @@ -480,7 +611,7 @@ void fpsimd_thread_switch(struct task_struct *next)
>
>  void fpsimd_flush_thread(void)
>  {
> -	int vl;
> +	int vl, supported_vl;
>
>  	if (!system_supports_fpsimd())
>  		return;
> @@ -508,6 +639,10 @@ void fpsimd_flush_thread(void)
>  		if (WARN_ON(!sve_vl_valid(vl)))
>  			vl = SVE_VL_MIN;
>
> +		supported_vl = find_supported_vector_length(vl);
> +		if (WARN_ON(supported_vl != vl))
> +			vl = supported_vl;
> +
>  		current->thread.sve_vl = vl;
>
>  		/*
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index a8d0759..1b64901 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -197,4 +197,9 @@ struct prctl_mm_map {
>  # define PR_CAP_AMBIENT_LOWER		3
>  # define PR_CAP_AMBIENT_CLEAR_ALL	4
>
> +/* arm64 Scalable Vector Extension controls */
> +# define PR_SVE_SET_VL_ONEXEC		(1 << 18) /* defer effect until exec */
> +# define PR_SVE_VL_LEN_MASK		0xffff
> +# define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
> +
>  #endif /* _LINUX_PRCTL_H */

WARNING: multiple messages have this Message-ID (diff)
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Dave Martin <Dave.Martin@arm.com>
Cc: linux-arm-kernel@lists.infradead.org,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will.deacon@arm.com>,
	Ard Biesheuvel <ard.biesheuvel@linaro.org>,
	Szabolcs Nagy <szabolcs.nagy@arm.com>,
	Okamoto Takayuki <tokamoto@jp.fujitsu.com>,
	kvmarm@lists.cs.columbia.edu, libc-alpha@sourceware.org,
	linux-arch@vger.kernel.org
Subject: Re: [PATCH v5 16/30] arm64/sve: Backend logic for setting the vector length
Date: Fri, 10 Nov 2017 10:27:36 +0000	[thread overview]
Message-ID: <8760aiqvhj.fsf@linaro.org> (raw)
Message-ID: <20171110102736.P3nVQ05itQymRnIVnh_ci-ruSIhv1jEeKb6PsR6eTw8@z> (raw)
In-Reply-To: <1509465082-30427-17-git-send-email-Dave.Martin@arm.com>


Dave Martin <Dave.Martin@arm.com> writes:

> This patch implements the core logic for changing a task's vector
> length on request from userspace.  This will be used by the ptrace
> and prctl frontends that are implemented in later patches.
>
> The SVE architecture permits, but does not require, implementations
> to support vector lengths that are not a power of two.  To handle
> this, logic is added to check a requested vector length against a
> possibly sparse bitmap of available vector lengths at runtime, so
> that the best supported value can be chosen.
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Alex Bennée <alex.bennee@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  arch/arm64/include/asm/fpsimd.h |   8 +++
>  arch/arm64/kernel/fpsimd.c      | 137 +++++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/prctl.h      |   5 ++
>  3 files changed, 149 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 9bbd74c..86f550c 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -20,6 +20,7 @@
>
>  #ifndef __ASSEMBLY__
>
> +#include <linux/cache.h>
>  #include <linux/stddef.h>
>
>  /*
> @@ -70,17 +71,24 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
>
>  extern void fpsimd_flush_task_state(struct task_struct *target);
>
> +/* Maximum VL that SVE VL-agnostic software can transparently support */
> +#define SVE_VL_ARCH_MAX 0x100
> +
>  extern void sve_save_state(void *state, u32 *pfpsr);
>  extern void sve_load_state(void const *state, u32 const *pfpsr,
>  			   unsigned long vq_minus_1);
>  extern unsigned int sve_get_vl(void);
>
> +extern int __ro_after_init sve_max_vl;
> +
>  #ifdef CONFIG_ARM64_SVE
>
>  extern size_t sve_state_size(struct task_struct const *task);
>
>  extern void sve_alloc(struct task_struct *task);
>  extern void fpsimd_release_task(struct task_struct *task);
> +extern int sve_set_vector_length(struct task_struct *task,
> +				 unsigned long vl, unsigned long flags);
>
>  #else /* ! CONFIG_ARM64_SVE */
>
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index e0b5ef5..1ceb069 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -17,8 +17,10 @@
>   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
>
> +#include <linux/bitmap.h>
>  #include <linux/bottom_half.h>
>  #include <linux/bug.h>
> +#include <linux/cache.h>
>  #include <linux/compat.h>
>  #include <linux/cpu.h>
>  #include <linux/cpu_pm.h>
> @@ -28,6 +30,7 @@
>  #include <linux/init.h>
>  #include <linux/percpu.h>
>  #include <linux/preempt.h>
> +#include <linux/prctl.h>
>  #include <linux/ptrace.h>
>  #include <linux/sched/signal.h>
>  #include <linux/signal.h>
> @@ -113,6 +116,20 @@ static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
>  /* Default VL for tasks that don't set it explicitly: */
>  static int sve_default_vl = SVE_VL_MIN;
>
> +#ifdef CONFIG_ARM64_SVE
> +
> +/* Maximum supported vector length across all CPUs (initially poisoned) */
> +int __ro_after_init sve_max_vl = -1;
> +/* Set of available vector lengths, as vq_to_bit(vq): */
> +static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
> +
> +#else /* ! CONFIG_ARM64_SVE */
> +
> +/* Dummy declaration for code that will be optimised out: */
> +extern DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
> +
> +#endif /* ! CONFIG_ARM64_SVE */
> +
>  /*
>   * Call __sve_free() directly only if you know task can't be scheduled
>   * or preempted.
> @@ -270,6 +287,50 @@ static void task_fpsimd_save(void)
>  	}
>  }
>
> +/*
> + * Helpers to translate bit indices in sve_vq_map to VQ values (and
> + * vice versa).  This allows find_next_bit() to be used to find the
> + * _maximum_ VQ not exceeding a certain value.
> + */
> +
> +static unsigned int vq_to_bit(unsigned int vq)
> +{
> +	return SVE_VQ_MAX - vq;
> +}
> +
> +static unsigned int bit_to_vq(unsigned int bit)
> +{
> +	if (WARN_ON(bit >= SVE_VQ_MAX))
> +		bit = SVE_VQ_MAX - 1;
> +
> +	return SVE_VQ_MAX - bit;
> +}
> +
> +/*
> + * All vector length selection from userspace comes through here.
> + * We're on a slow path, so some sanity-checks are included.
> + * If things go wrong there's a bug somewhere, but try to fall back to a
> + * safe choice.
> + */
> +static unsigned int find_supported_vector_length(unsigned int vl)
> +{
> +	int bit;
> +	int max_vl = sve_max_vl;
> +
> +	if (WARN_ON(!sve_vl_valid(vl)))
> +		vl = SVE_VL_MIN;
> +
> +	if (WARN_ON(!sve_vl_valid(max_vl)))
> +		max_vl = SVE_VL_MIN;
> +
> +	if (vl > max_vl)
> +		vl = max_vl;
> +
> +	bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
> +			    vq_to_bit(sve_vq_from_vl(vl)));
> +	return sve_vl_from_vq(bit_to_vq(bit));
> +}
> +
>  #define ZREG(sve_state, vq, n) ((char *)(sve_state) +		\
>  	(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
>
> @@ -364,6 +425,76 @@ void sve_alloc(struct task_struct *task)
>  	BUG_ON(!task->thread.sve_state);
>  }
>
> +int sve_set_vector_length(struct task_struct *task,
> +			  unsigned long vl, unsigned long flags)
> +{
> +	if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
> +				     PR_SVE_SET_VL_ONEXEC))
> +		return -EINVAL;
> +
> +	if (!sve_vl_valid(vl))
> +		return -EINVAL;
> +
> +	/*
> +	 * Clamp to the maximum vector length that VL-agnostic SVE code can
> +	 * work with.  A flag may be assigned in the future to allow setting
> +	 * of larger vector lengths without confusing older software.
> +	 */
> +	if (vl > SVE_VL_ARCH_MAX)
> +		vl = SVE_VL_ARCH_MAX;
> +
> +	vl = find_supported_vector_length(vl);
> +
> +	if (flags & (PR_SVE_VL_INHERIT |
> +		     PR_SVE_SET_VL_ONEXEC))
> +		task->thread.sve_vl_onexec = vl;
> +	else
> +		/* Reset VL to system default on next exec: */
> +		task->thread.sve_vl_onexec = 0;
> +
> +	/* Only actually set the VL if not deferred: */
> +	if (flags & PR_SVE_SET_VL_ONEXEC)
> +		goto out;
> +
> +	if (vl == task->thread.sve_vl)
> +		goto out;
> +
> +	/*
> +	 * To ensure the FPSIMD bits of the SVE vector registers are preserved,
> +	 * write any live register state back to task_struct, and convert to a
> +	 * non-SVE thread.
> +	 */
> +	if (task == current) {
> +		local_bh_disable();
> +
> +		task_fpsimd_save();
> +		set_thread_flag(TIF_FOREIGN_FPSTATE);
> +	}
> +
> +	fpsimd_flush_task_state(task);
> +	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
> +		sve_to_fpsimd(task);
> +
> +	if (task == current)
> +		local_bh_enable();
> +
> +	/*
> +	 * Force reallocation of task SVE state to the correct size
> +	 * on next use:
> +	 */
> +	sve_free(task);
> +
> +	task->thread.sve_vl = vl;
> +
> +out:
> +	if (flags & PR_SVE_VL_INHERIT)
> +		set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
> +	else
> +		clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
> +
> +	return 0;
> +}
> +
>  /*
>   * Called from the put_task_struct() path, which cannot get here
>   * unless dead_task is really dead and not schedulable.
> @@ -480,7 +611,7 @@ void fpsimd_thread_switch(struct task_struct *next)
>
>  void fpsimd_flush_thread(void)
>  {
> -	int vl;
> +	int vl, supported_vl;
>
>  	if (!system_supports_fpsimd())
>  		return;
> @@ -508,6 +639,10 @@ void fpsimd_flush_thread(void)
>  		if (WARN_ON(!sve_vl_valid(vl)))
>  			vl = SVE_VL_MIN;
>
> +		supported_vl = find_supported_vector_length(vl);
> +		if (WARN_ON(supported_vl != vl))
> +			vl = supported_vl;
> +
>  		current->thread.sve_vl = vl;
>
>  		/*
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index a8d0759..1b64901 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -197,4 +197,9 @@ struct prctl_mm_map {
>  # define PR_CAP_AMBIENT_LOWER		3
>  # define PR_CAP_AMBIENT_CLEAR_ALL	4
>
> +/* arm64 Scalable Vector Extension controls */
> +# define PR_SVE_SET_VL_ONEXEC		(1 << 18) /* defer effect until exec */
> +# define PR_SVE_VL_LEN_MASK		0xffff
> +# define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
> +
>  #endif /* _LINUX_PRCTL_H */


--
Alex Bennée

WARNING: multiple messages have this Message-ID (diff)
From: alex.bennee@linaro.org (Alex Bennée)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v5 16/30] arm64/sve: Backend logic for setting the vector length
Date: Fri, 10 Nov 2017 10:27:36 +0000	[thread overview]
Message-ID: <8760aiqvhj.fsf@linaro.org> (raw)
In-Reply-To: <1509465082-30427-17-git-send-email-Dave.Martin@arm.com>


Dave Martin <Dave.Martin@arm.com> writes:

> This patch implements the core logic for changing a task's vector
> length on request from userspace.  This will be used by the ptrace
> and prctl frontends that are implemented in later patches.
>
> The SVE architecture permits, but does not require, implementations
> to support vector lengths that are not a power of two.  To handle
> this, logic is added to check a requested vector length against a
> possibly sparse bitmap of available vector lengths at runtime, so
> that the best supported value can be chosen.
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Alex Benn?e <alex.bennee@linaro.org>

Reviewed-by: Alex Benn?e <alex.bennee@linaro.org>

> ---
>  arch/arm64/include/asm/fpsimd.h |   8 +++
>  arch/arm64/kernel/fpsimd.c      | 137 +++++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/prctl.h      |   5 ++
>  3 files changed, 149 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 9bbd74c..86f550c 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -20,6 +20,7 @@
>
>  #ifndef __ASSEMBLY__
>
> +#include <linux/cache.h>
>  #include <linux/stddef.h>
>
>  /*
> @@ -70,17 +71,24 @@ extern void fpsimd_update_current_state(struct fpsimd_state *state);
>
>  extern void fpsimd_flush_task_state(struct task_struct *target);
>
> +/* Maximum VL that SVE VL-agnostic software can transparently support */
> +#define SVE_VL_ARCH_MAX 0x100
> +
>  extern void sve_save_state(void *state, u32 *pfpsr);
>  extern void sve_load_state(void const *state, u32 const *pfpsr,
>  			   unsigned long vq_minus_1);
>  extern unsigned int sve_get_vl(void);
>
> +extern int __ro_after_init sve_max_vl;
> +
>  #ifdef CONFIG_ARM64_SVE
>
>  extern size_t sve_state_size(struct task_struct const *task);
>
>  extern void sve_alloc(struct task_struct *task);
>  extern void fpsimd_release_task(struct task_struct *task);
> +extern int sve_set_vector_length(struct task_struct *task,
> +				 unsigned long vl, unsigned long flags);
>
>  #else /* ! CONFIG_ARM64_SVE */
>
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index e0b5ef5..1ceb069 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -17,8 +17,10 @@
>   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
>
> +#include <linux/bitmap.h>
>  #include <linux/bottom_half.h>
>  #include <linux/bug.h>
> +#include <linux/cache.h>
>  #include <linux/compat.h>
>  #include <linux/cpu.h>
>  #include <linux/cpu_pm.h>
> @@ -28,6 +30,7 @@
>  #include <linux/init.h>
>  #include <linux/percpu.h>
>  #include <linux/preempt.h>
> +#include <linux/prctl.h>
>  #include <linux/ptrace.h>
>  #include <linux/sched/signal.h>
>  #include <linux/signal.h>
> @@ -113,6 +116,20 @@ static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
>  /* Default VL for tasks that don't set it explicitly: */
>  static int sve_default_vl = SVE_VL_MIN;
>
> +#ifdef CONFIG_ARM64_SVE
> +
> +/* Maximum supported vector length across all CPUs (initially poisoned) */
> +int __ro_after_init sve_max_vl = -1;
> +/* Set of available vector lengths, as vq_to_bit(vq): */
> +static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
> +
> +#else /* ! CONFIG_ARM64_SVE */
> +
> +/* Dummy declaration for code that will be optimised out: */
> +extern DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
> +
> +#endif /* ! CONFIG_ARM64_SVE */
> +
>  /*
>   * Call __sve_free() directly only if you know task can't be scheduled
>   * or preempted.
> @@ -270,6 +287,50 @@ static void task_fpsimd_save(void)
>  	}
>  }
>
> +/*
> + * Helpers to translate bit indices in sve_vq_map to VQ values (and
> + * vice versa).  This allows find_next_bit() to be used to find the
> + * _maximum_ VQ not exceeding a certain value.
> + */
> +
> +static unsigned int vq_to_bit(unsigned int vq)
> +{
> +	return SVE_VQ_MAX - vq;
> +}
> +
> +static unsigned int bit_to_vq(unsigned int bit)
> +{
> +	if (WARN_ON(bit >= SVE_VQ_MAX))
> +		bit = SVE_VQ_MAX - 1;
> +
> +	return SVE_VQ_MAX - bit;
> +}
> +
> +/*
> + * All vector length selection from userspace comes through here.
> + * We're on a slow path, so some sanity-checks are included.
> + * If things go wrong there's a bug somewhere, but try to fall back to a
> + * safe choice.
> + */
> +static unsigned int find_supported_vector_length(unsigned int vl)
> +{
> +	int bit;
> +	int max_vl = sve_max_vl;
> +
> +	if (WARN_ON(!sve_vl_valid(vl)))
> +		vl = SVE_VL_MIN;
> +
> +	if (WARN_ON(!sve_vl_valid(max_vl)))
> +		max_vl = SVE_VL_MIN;
> +
> +	if (vl > max_vl)
> +		vl = max_vl;
> +
> +	bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
> +			    vq_to_bit(sve_vq_from_vl(vl)));
> +	return sve_vl_from_vq(bit_to_vq(bit));
> +}
> +
>  #define ZREG(sve_state, vq, n) ((char *)(sve_state) +		\
>  	(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
>
> @@ -364,6 +425,76 @@ void sve_alloc(struct task_struct *task)
>  	BUG_ON(!task->thread.sve_state);
>  }
>
> +int sve_set_vector_length(struct task_struct *task,
> +			  unsigned long vl, unsigned long flags)
> +{
> +	if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
> +				     PR_SVE_SET_VL_ONEXEC))
> +		return -EINVAL;
> +
> +	if (!sve_vl_valid(vl))
> +		return -EINVAL;
> +
> +	/*
> +	 * Clamp to the maximum vector length that VL-agnostic SVE code can
> +	 * work with.  A flag may be assigned in the future to allow setting
> +	 * of larger vector lengths without confusing older software.
> +	 */
> +	if (vl > SVE_VL_ARCH_MAX)
> +		vl = SVE_VL_ARCH_MAX;
> +
> +	vl = find_supported_vector_length(vl);
> +
> +	if (flags & (PR_SVE_VL_INHERIT |
> +		     PR_SVE_SET_VL_ONEXEC))
> +		task->thread.sve_vl_onexec = vl;
> +	else
> +		/* Reset VL to system default on next exec: */
> +		task->thread.sve_vl_onexec = 0;
> +
> +	/* Only actually set the VL if not deferred: */
> +	if (flags & PR_SVE_SET_VL_ONEXEC)
> +		goto out;
> +
> +	if (vl == task->thread.sve_vl)
> +		goto out;
> +
> +	/*
> +	 * To ensure the FPSIMD bits of the SVE vector registers are preserved,
> +	 * write any live register state back to task_struct, and convert to a
> +	 * non-SVE thread.
> +	 */
> +	if (task == current) {
> +		local_bh_disable();
> +
> +		task_fpsimd_save();
> +		set_thread_flag(TIF_FOREIGN_FPSTATE);
> +	}
> +
> +	fpsimd_flush_task_state(task);
> +	if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
> +		sve_to_fpsimd(task);
> +
> +	if (task == current)
> +		local_bh_enable();
> +
> +	/*
> +	 * Force reallocation of task SVE state to the correct size
> +	 * on next use:
> +	 */
> +	sve_free(task);
> +
> +	task->thread.sve_vl = vl;
> +
> +out:
> +	if (flags & PR_SVE_VL_INHERIT)
> +		set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
> +	else
> +		clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
> +
> +	return 0;
> +}
> +
>  /*
>   * Called from the put_task_struct() path, which cannot get here
>   * unless dead_task is really dead and not schedulable.
> @@ -480,7 +611,7 @@ void fpsimd_thread_switch(struct task_struct *next)
>
>  void fpsimd_flush_thread(void)
>  {
> -	int vl;
> +	int vl, supported_vl;
>
>  	if (!system_supports_fpsimd())
>  		return;
> @@ -508,6 +639,10 @@ void fpsimd_flush_thread(void)
>  		if (WARN_ON(!sve_vl_valid(vl)))
>  			vl = SVE_VL_MIN;
>
> +		supported_vl = find_supported_vector_length(vl);
> +		if (WARN_ON(supported_vl != vl))
> +			vl = supported_vl;
> +
>  		current->thread.sve_vl = vl;
>
>  		/*
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index a8d0759..1b64901 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -197,4 +197,9 @@ struct prctl_mm_map {
>  # define PR_CAP_AMBIENT_LOWER		3
>  # define PR_CAP_AMBIENT_CLEAR_ALL	4
>
> +/* arm64 Scalable Vector Extension controls */
> +# define PR_SVE_SET_VL_ONEXEC		(1 << 18) /* defer effect until exec */
> +# define PR_SVE_VL_LEN_MASK		0xffff
> +# define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
> +
>  #endif /* _LINUX_PRCTL_H */


--
Alex Benn?e

  reply	other threads:[~2017-11-10 10:27 UTC|newest]

Thread overview: 174+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-31 15:50 [PATCH v5 00/30] ARM Scalable Vector Extension (SVE) Dave Martin
2017-10-31 15:50 ` Dave Martin
2017-10-31 15:50 ` Dave Martin
2017-10-31 15:50 ` [PATCH v5 01/30] regset: Add support for dynamically sized regsets Dave Martin
2017-10-31 15:50   ` Dave Martin
2017-11-01 11:42   ` Catalin Marinas
2017-11-01 11:42     ` Catalin Marinas
2017-11-01 13:16     ` Dave Martin
2017-11-01 13:16       ` Dave Martin
2017-11-08 11:50       ` Alex Bennée
2017-11-08 11:50         ` Alex Bennée
2017-11-08 11:50         ` Alex Bennée
2017-10-31 15:50 ` [PATCH v5 02/30] arm64: fpsimd: Correctly annotate exception helpers called from asm Dave Martin
2017-10-31 15:50   ` Dave Martin
2017-10-31 15:50   ` Dave Martin
2017-11-01 11:42   ` Catalin Marinas
2017-11-01 11:42     ` Catalin Marinas
2017-10-31 15:50 ` [PATCH v5 03/30] arm64: signal: Verify extra data is user-readable in sys_rt_sigreturn Dave Martin
2017-10-31 15:50   ` Dave Martin
2017-10-31 15:50   ` Dave Martin
2017-11-01 11:43   ` Catalin Marinas
2017-11-01 11:43     ` Catalin Marinas
2017-10-31 15:50 ` [PATCH v5 04/30] arm64: KVM: Hide unsupported AArch64 CPU features from guests Dave Martin
2017-10-31 15:50   ` Dave Martin
2017-11-01  4:47   ` Christoffer Dall
2017-11-01  4:47     ` Christoffer Dall
2017-11-01 10:26     ` Dave Martin
2017-11-01 10:26       ` Dave Martin
2017-11-02  8:15       ` Christoffer Dall
2017-11-02  8:15         ` Christoffer Dall
2017-11-02  9:20         ` Dave Martin
2017-11-02  9:20           ` Dave Martin
2017-11-02 11:01         ` Dave Martin
2017-11-02 11:01           ` Dave Martin
2017-11-02 19:18           ` Christoffer Dall
2017-11-02 19:18             ` Christoffer Dall
2017-10-31 15:50 ` [PATCH v5 05/30] arm64: efi: Add missing Kconfig dependency on KERNEL_MODE_NEON Dave Martin
2017-10-31 15:50   ` Dave Martin
2017-10-31 15:50   ` Dave Martin
2017-10-31 15:50 ` [PATCH v5 06/30] arm64: Port deprecated instruction emulation to new sysctl interface Dave Martin
2017-10-31 15:50   ` Dave Martin
2017-10-31 15:50 ` [PATCH v5 07/30] arm64: fpsimd: Simplify uses of {set,clear}_ti_thread_flag() Dave Martin
2017-10-31 15:50   ` [PATCH v5 07/30] arm64: fpsimd: Simplify uses of {set, clear}_ti_thread_flag() Dave Martin
2017-10-31 15:51 ` [PATCH v5 08/30] arm64/sve: System register and exception syndrome definitions Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 09/30] arm64/sve: Low-level SVE architectural state manipulation functions Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 10/30] arm64/sve: Kconfig update and conditional compilation support Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 11/30] arm64/sve: Signal frame and context structure definition Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-11-08 16:34   ` Alex Bennée
2017-11-08 16:34     ` Alex Bennée
2017-11-08 16:34     ` Alex Bennée
2017-10-31 15:51 ` [PATCH v5 12/30] arm64/sve: Low-level CPU setup Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-11-08 16:37   ` Alex Bennée
2017-11-08 16:37     ` Alex Bennée
2017-11-08 16:37     ` Alex Bennée
2017-10-31 15:51 ` [PATCH v5 13/30] arm64/sve: Core task context handling Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-11-09 17:16   ` Alex Bennée
2017-11-09 17:16     ` Alex Bennée
2017-11-09 17:16     ` Alex Bennée
2017-11-09 17:56     ` Dave Martin
2017-11-09 17:56       ` Dave Martin
2017-11-09 18:06       ` Alex Bennée
2017-11-09 18:06         ` Alex Bennée
2017-11-09 18:06         ` Alex Bennée
2017-10-31 15:51 ` [PATCH v5 14/30] arm64/sve: Support vector length resetting for new processes Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 15/30] arm64/sve: Signal handling support Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-11-01 14:33   ` Catalin Marinas
2017-11-01 14:33     ` Catalin Marinas
2017-11-07 13:22   ` Alex Bennée
2017-11-07 13:22     ` Alex Bennée
2017-11-07 13:22     ` Alex Bennée
2017-11-08 16:11     ` Dave Martin
2017-11-08 16:11       ` Dave Martin
2017-12-06 19:56   ` Kees Cook
2017-12-06 19:56     ` Kees Cook
2017-12-07 10:49     ` Will Deacon
2017-12-07 10:49       ` Will Deacon
2017-12-07 12:03       ` Dave Martin
2017-12-07 12:03         ` Dave Martin
2017-12-07 18:50       ` Kees Cook
2017-12-07 18:50         ` Kees Cook
2017-12-11 14:07         ` Will Deacon
2017-12-11 14:07           ` Will Deacon
2017-12-11 19:23           ` Kees Cook
2017-12-11 19:23             ` Kees Cook
2017-12-12 10:40             ` Will Deacon
2017-12-12 10:40               ` Will Deacon
2017-12-12 11:11               ` Dave Martin
2017-12-12 11:11                 ` Dave Martin
2017-12-12 19:36                 ` Kees Cook
2017-12-12 19:36                   ` Kees Cook
2017-12-12 19:36                   ` Kees Cook
2017-10-31 15:51 ` [PATCH v5 16/30] arm64/sve: Backend logic for setting the vector length Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-11-10 10:27   ` Alex Bennée [this message]
2017-11-10 10:27     ` Alex Bennée
2017-11-10 10:27     ` Alex Bennée
2017-10-31 15:51 ` [PATCH v5 17/30] arm64: cpufeature: Move sys_caps_initialised declarations Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 18/30] arm64/sve: Probe SVE capabilities and usable vector lengths Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 19/30] arm64/sve: Preserve SVE registers around kernel-mode NEON use Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 20/30] arm64/sve: Preserve SVE registers around EFI runtime service calls Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 21/30] arm64/sve: ptrace and ELF coredump support Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 22/30] arm64/sve: Add prctl controls for userspace vector length management Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 23/30] arm64/sve: Add sysctl to set the default vector length for new processes Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 24/30] arm64/sve: KVM: Prevent guests from using SVE Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 25/30] arm64/sve: KVM: Treat guest SVE use as undefined instruction execution Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 26/30] arm64/sve: KVM: Hide SVE from CPU features exposed to guests Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [PATCH v5 27/30] arm64/sve: Detect SVE and activate runtime support Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [RFC PATCH v5 29/30] arm64: signal: Report signal frame size to userspace via auxv Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51 ` [RFC PATCH v5 30/30] arm64/sve: signal: Include SVE when computing AT_MINSIGSTKSZ Dave Martin
2017-10-31 15:51   ` Dave Martin
2017-10-31 15:51   ` Dave Martin
     [not found] ` <1509465082-30427-1-git-send-email-Dave.Martin-5wv7dgnIgG8@public.gmane.org>
2017-10-31 15:51   ` [PATCH v5 28/30] arm64/sve: Add documentation Dave Martin
2017-10-31 15:51     ` Dave Martin
2017-10-31 15:51     ` Dave Martin
2017-11-02 16:32   ` [PATCH v5 00/30] ARM Scalable Vector Extension (SVE) Will Deacon
2017-11-02 16:32     ` Will Deacon
2017-11-02 16:32     ` Will Deacon
     [not found]     ` <20171102163248.GB595-5wv7dgnIgG8@public.gmane.org>
2017-11-02 17:04       ` Dave P Martin
2017-11-02 17:04         ` Dave P Martin
2017-11-02 17:04         ` Dave P Martin
2017-11-29 15:04 ` Alex Bennée
2017-11-29 15:04   ` Alex Bennée
2017-11-29 15:04   ` Alex Bennée
     [not found]   ` <877eu9dt3n.fsf-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
2017-11-29 15:21     ` Will Deacon
2017-11-29 15:21       ` Will Deacon
2017-11-29 15:21       ` Will Deacon
     [not found]       ` <20171129152140.GD10650-5wv7dgnIgG8@public.gmane.org>
2017-11-29 15:37         ` Dave Martin
2017-11-29 15:37           ` Dave Martin
2017-11-29 15:37           ` Dave Martin
2018-01-08 14:49 ` Yury Norov
2018-01-08 14:49   ` Yury Norov
2018-01-08 14:49   ` Yury Norov
2018-01-09 16:51   ` Yury Norov
2018-01-09 16:51     ` Yury Norov
2018-01-09 16:51     ` Yury Norov
2018-01-15 17:22     ` Dave Martin
2018-01-15 17:22       ` Dave Martin
2018-01-15 17:22       ` Dave Martin
     [not found]       ` <20180115172201.GW22781-M5GwZQ6tE7x5pKCnmE3YQBJ8xKzm50AiAL8bYrjMMd8@public.gmane.org>
2018-01-16 10:11         ` Yury Norov
2018-01-16 10:11           ` Yury Norov
2018-01-16 16:05           ` Dave Martin
2018-01-16 16:05             ` Dave Martin
2018-01-15 16:55   ` Dave Martin
2018-01-15 16:55     ` Dave Martin
2018-01-15 16:55     ` Dave Martin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8760aiqvhj.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=Dave.Martin@arm.com \
    --cc=ard.biesheuvel@linaro.org \
    --cc=catalin.marinas@arm.com \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=libc-alpha@sourceware.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=szabolcs.nagy@arm.com \
    --cc=tokamoto@jp.fujitsu.com \
    --cc=will.deacon@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.