* [RFC PATCH 01/29] arm64: signal: Refactor sigcontext parsing in rt_sigreturn
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 19:38 ` [RFC PATCH 02/29] arm64: signal: factor frame layout and population into separate passes Dave Martin
` (29 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
Currently, rt_sigreturn does very limited checking on the
sigcontext coming from userspace.
Future additions of extra dynamic sigcontext data will increase the
potential for surprises. Also, it is not clear whether the
sigcontext extension records are supposed to occur in a particular
order.
This patch factors out the sigcontext parsing into a separate
function, and adds extra checks to validate the well-formedness of
the sigcontext structure.
To help with this, an abstraction for the signal frame layout is
also added, using offsets to track the location of different
records in the frame. Although trivial, this provides a base to
extend upon in order to track more complex layouts.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/signal.c | 121 +++++++++++++++++++++++++++++++++++++--------
1 file changed, 101 insertions(+), 20 deletions(-)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 404dd67..4f8dbe0 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -19,9 +19,11 @@
#include <linux/compat.h>
#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/personality.h>
#include <linux/freezer.h>
+#include <linux/stddef.h>
#include <linux/uaccess.h>
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
@@ -45,6 +47,10 @@ struct rt_sigframe {
u64 lr;
};
+struct rt_sigframe_user_layout {
+ struct rt_sigframe __user *sigframe;
+};
+
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
{
struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state;
@@ -92,12 +98,86 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
+struct user_ctxs {
+ struct fpsimd_context __user *fpsimd;
+};
+
+static int parse_user_sigframe(struct user_ctxs *user,
+ struct rt_sigframe __user *sf)
+{
+ struct sigcontext __user *sc = &sf->uc.uc_mcontext;
+ struct _aarch64_ctx __user *head =
+ (struct _aarch64_ctx __user *)&sc->__reserved;
+ size_t offset = 0;
+
+ user->fpsimd = NULL;
+
+ while (1) {
+ int err;
+ u32 magic, size;
+
+ head = (struct _aarch64_ctx __user *)&sc->__reserved[offset];
+ if (!IS_ALIGNED((unsigned long)head, 16))
+ goto invalid;
+
+ err = 0;
+ __get_user_error(magic, &head->magic, err);
+ __get_user_error(size, &head->size, err);
+ if (err)
+ return err;
+
+ switch (magic) {
+ case 0:
+ if (size)
+ goto invalid;
+
+ goto done;
+
+ case FPSIMD_MAGIC:
+ if (user->fpsimd)
+ goto invalid;
+
+ if (offset > sizeof(sc->__reserved) -
+ sizeof(*user->fpsimd) ||
+ size < sizeof(*user->fpsimd))
+ goto invalid;
+
+ user->fpsimd = (struct fpsimd_context __user *)head;
+ break;
+
+ case ESR_MAGIC:
+ /* ignore */
+ break;
+
+ default:
+ goto invalid;
+ }
+
+ if (size < sizeof(*head))
+ goto invalid;
+
+ if (size > sizeof(sc->__reserved) - (sizeof(*head) + offset))
+ goto invalid;
+
+ offset += size;
+ }
+
+done:
+ if (!user->fpsimd)
+ goto invalid;
+
+ return 0;
+
+invalid:
+ return -EINVAL;
+}
+
static int restore_sigframe(struct pt_regs *regs,
struct rt_sigframe __user *sf)
{
sigset_t set;
int i, err;
- void *aux = sf->uc.uc_mcontext.__reserved;
+ struct user_ctxs user;
err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
if (err == 0)
@@ -116,12 +196,11 @@ static int restore_sigframe(struct pt_regs *regs,
regs->syscallno = ~0UL;
err |= !valid_user_regs(®s->user_regs, current);
+ if (err == 0)
+ err = parse_user_sigframe(&user, sf);
- if (err == 0) {
- struct fpsimd_context *fpsimd_ctx =
- container_of(aux, struct fpsimd_context, head);
- err |= restore_fpsimd_context(fpsimd_ctx);
- }
+ if (err == 0)
+ err = restore_fpsimd_context(user.fpsimd);
return err;
}
@@ -162,10 +241,11 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return 0;
}
-static int setup_sigframe(struct rt_sigframe __user *sf,
+static int setup_sigframe(struct rt_sigframe_user_layout *user,
struct pt_regs *regs, sigset_t *set)
{
int i, err = 0;
+ struct rt_sigframe __user *sf = user->sigframe;
void *aux = sf->uc.uc_mcontext.__reserved;
struct _aarch64_ctx *end;
@@ -209,33 +289,32 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
return err;
}
-static struct rt_sigframe __user *get_sigframe(struct ksignal *ksig,
- struct pt_regs *regs)
+static int get_sigframe(struct rt_sigframe_user_layout *user,
+ struct ksignal *ksig, struct pt_regs *regs)
{
unsigned long sp, sp_top;
- struct rt_sigframe __user *frame;
sp = sp_top = sigsp(regs->sp, ksig);
sp = (sp - sizeof(struct rt_sigframe)) & ~15;
- frame = (struct rt_sigframe __user *)sp;
+ user->sigframe = (struct rt_sigframe __user *)sp;
/*
* Check that we can actually write to the signal frame.
*/
- if (!access_ok(VERIFY_WRITE, frame, sp_top - sp))
- frame = NULL;
+ if (!access_ok(VERIFY_WRITE, user->sigframe, sp_top - sp))
+ return -EFAULT;
- return frame;
+ return 0;
}
static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
- void __user *frame, int usig)
+ struct rt_sigframe_user_layout *user, int usig)
{
__sigrestore_t sigtramp;
regs->regs[0] = usig;
- regs->sp = (unsigned long)frame;
+ regs->sp = (unsigned long)user->sigframe;
regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp);
regs->pc = (unsigned long)ka->sa.sa_handler;
@@ -250,20 +329,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
+ struct rt_sigframe_user_layout user;
struct rt_sigframe __user *frame;
int err = 0;
- frame = get_sigframe(ksig, regs);
- if (!frame)
+ if (get_sigframe(&user, ksig, regs))
return 1;
+ frame = user.sigframe;
+
__put_user_error(0, &frame->uc.uc_flags, err);
__put_user_error(NULL, &frame->uc.uc_link, err);
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
- err |= setup_sigframe(frame, regs, set);
+ err |= setup_sigframe(&user, regs, set);
if (err == 0) {
- setup_return(regs, &ksig->ka, frame, usig);
+ setup_return(regs, &ksig->ka, &user, usig);
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
regs->regs[1] = (unsigned long)&frame->info;
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 02/29] arm64: signal: factor frame layout and population into separate passes
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
2016-11-25 19:38 ` [RFC PATCH 01/29] arm64: signal: Refactor sigcontext parsing in rt_sigreturn Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 19:38 ` [RFC PATCH 03/29] arm64: signal: factor out signal frame record allocation Dave Martin
` (28 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
In preparation for expanding the signal frame, this patch refactors
the signal frame setup code in setup_sigframe() into two separate
passes.
The first pass, setup_sigframe_layout(), determines the sizeof the
signal frame and its internal layout, including the presence and
location of optional records. The resulting knowledge is used to
allocate and locate the user stack space required for the signal
frame and to determine which optional records to include.
The second pass, setup_sigframe(), is called once the stack frame
is allocated in order to populate it with the necessary context
information.
This change has no effect on the signal ABI, but will make it
easier to expand the signal frame in future patches.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/signal.c | 112 +++++++++++++++++++++++++++++++++++----------
1 file changed, 88 insertions(+), 24 deletions(-)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 4f8dbe0..fc08371 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -25,6 +25,7 @@
#include <linux/freezer.h>
#include <linux/stddef.h>
#include <linux/uaccess.h>
+#include <linux/string.h>
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
@@ -49,8 +50,39 @@ struct rt_sigframe {
struct rt_sigframe_user_layout {
struct rt_sigframe __user *sigframe;
+
+ unsigned long size; /* size of allocated sigframe data */
+ unsigned long limit; /* largest allowed size */
+
+ unsigned long fpsimd_offset;
+ unsigned long esr_offset;
+ unsigned long end_offset;
};
+static void init_user_layout(struct rt_sigframe_user_layout *user)
+{
+ memset(user, 0, sizeof(*user));
+ user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved);
+
+ user->limit = user->size +
+ sizeof(user->sigframe->uc.uc_mcontext.__reserved) -
+ round_up(sizeof(struct _aarch64_ctx), 16);
+ /* ^ reserve space for terminator */
+}
+
+static size_t sigframe_size(struct rt_sigframe_user_layout const *user)
+{
+ return round_up(max(user->size, sizeof(struct rt_sigframe)), 16);
+}
+
+static void __user *apply_user_offset(
+ struct rt_sigframe_user_layout const *user, unsigned long offset)
+{
+ char __user *base = (char __user *)user->sigframe;
+
+ return base + offset;
+}
+
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
{
struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state;
@@ -106,26 +138,35 @@ static int parse_user_sigframe(struct user_ctxs *user,
struct rt_sigframe __user *sf)
{
struct sigcontext __user *sc = &sf->uc.uc_mcontext;
- struct _aarch64_ctx __user *head =
- (struct _aarch64_ctx __user *)&sc->__reserved;
+ struct _aarch64_ctx __user *head;
+ char __user *base = (char __user *)&sc->__reserved;
size_t offset = 0;
+ size_t limit = sizeof(sc->__reserved);
user->fpsimd = NULL;
+ if (!IS_ALIGNED((unsigned long)base, 16))
+ goto invalid;
+
while (1) {
- int err;
+ int err = 0;
u32 magic, size;
- head = (struct _aarch64_ctx __user *)&sc->__reserved[offset];
- if (!IS_ALIGNED((unsigned long)head, 16))
+ if (limit - offset < sizeof(*head))
goto invalid;
- err = 0;
+ if (!IS_ALIGNED(offset, 16))
+ goto invalid;
+
+ head = (struct _aarch64_ctx __user *)(base + offset);
__get_user_error(magic, &head->magic, err);
__get_user_error(size, &head->size, err);
if (err)
return err;
+ if (limit - offset < size)
+ goto invalid;
+
switch (magic) {
case 0:
if (size)
@@ -137,9 +178,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
if (user->fpsimd)
goto invalid;
- if (offset > sizeof(sc->__reserved) -
- sizeof(*user->fpsimd) ||
- size < sizeof(*user->fpsimd))
+ if (size < sizeof(*user->fpsimd))
goto invalid;
user->fpsimd = (struct fpsimd_context __user *)head;
@@ -156,7 +195,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
if (size < sizeof(*head))
goto invalid;
- if (size > sizeof(sc->__reserved) - (sizeof(*head) + offset))
+ if (limit - offset < size)
goto invalid;
offset += size;
@@ -241,13 +280,30 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return 0;
}
+/* Determine the layout of optional records in the signal frame */
+static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
+{
+ user->fpsimd_offset = user->size;
+ user->size += round_up(sizeof(struct fpsimd_context), 16);
+
+ /* fault information, if valid */
+ if (current->thread.fault_code) {
+ user->esr_offset = user->size;
+ user->size += round_up(sizeof(struct esr_context), 16);
+ }
+
+ /* set the "end" magic */
+ user->end_offset = user->size;
+
+ return 0;
+}
+
+
static int setup_sigframe(struct rt_sigframe_user_layout *user,
struct pt_regs *regs, sigset_t *set)
{
int i, err = 0;
struct rt_sigframe __user *sf = user->sigframe;
- void *aux = sf->uc.uc_mcontext.__reserved;
- struct _aarch64_ctx *end;
/* set up the stack frame for unwinding */
__put_user_error(regs->regs[29], &sf->fp, err);
@@ -265,26 +321,29 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
if (err == 0) {
- struct fpsimd_context *fpsimd_ctx =
- container_of(aux, struct fpsimd_context, head);
+ struct fpsimd_context __user *fpsimd_ctx =
+ apply_user_offset(user, user->fpsimd_offset);
err |= preserve_fpsimd_context(fpsimd_ctx);
- aux += sizeof(*fpsimd_ctx);
}
/* fault information, if valid */
- if (current->thread.fault_code) {
- struct esr_context *esr_ctx =
- container_of(aux, struct esr_context, head);
+ if (err == 0 && user->esr_offset) {
+ struct esr_context __user *esr_ctx =
+ apply_user_offset(user, user->esr_offset);
+
__put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err);
__put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err);
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
- aux += sizeof(*esr_ctx);
}
/* set the "end" magic */
- end = aux;
- __put_user_error(0, &end->magic, err);
- __put_user_error(0, &end->size, err);
+ if (err == 0) {
+ struct _aarch64_ctx __user *end =
+ apply_user_offset(user, user->end_offset);
+
+ __put_user_error(0, &end->magic, err);
+ __put_user_error(0, &end->size, err);
+ }
return err;
}
@@ -293,10 +352,15 @@ static int get_sigframe(struct rt_sigframe_user_layout *user,
struct ksignal *ksig, struct pt_regs *regs)
{
unsigned long sp, sp_top;
+ int err;
- sp = sp_top = sigsp(regs->sp, ksig);
+ init_user_layout(user);
+ err = setup_sigframe_layout(user);
+ if (err)
+ return err;
- sp = (sp - sizeof(struct rt_sigframe)) & ~15;
+ sp = sp_top = sigsp(regs->sp, ksig);
+ sp = (sp & ~15) - sigframe_size(user);
user->sigframe = (struct rt_sigframe __user *)sp;
/*
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 03/29] arm64: signal: factor out signal frame record allocation
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
2016-11-25 19:38 ` [RFC PATCH 01/29] arm64: signal: Refactor sigcontext parsing in rt_sigreturn Dave Martin
2016-11-25 19:38 ` [RFC PATCH 02/29] arm64: signal: factor frame layout and population into separate passes Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 19:38 ` [RFC PATCH 04/29] arm64: signal: Allocate extra sigcontext space as needed Dave Martin
` (27 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
Factor out the allocator for signal frame optional records into a
separate function, to ensure consistency and facilitate later
expansion of the signal frame.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/signal.c | 43 ++++++++++++++++++++++++++++++++++++-------
1 file changed, 36 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index fc08371..653b614 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -75,6 +75,22 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user)
return round_up(max(user->size, sizeof(struct rt_sigframe)), 16);
}
+/*
+ * Allocate space for an optional record of <size> bytes in the user
+ * signal frame. The offset from the signal frame base address to the
+ * allocated block is assigned to *offset.
+ */
+static int sigframe_alloc(struct rt_sigframe_user_layout *user,
+ unsigned long *offset, size_t size)
+{
+ size_t padded_size = round_up(size, 16);
+
+ *offset = user->size;
+ user->size += padded_size;
+
+ return 0;
+}
+
static void __user *apply_user_offset(
struct rt_sigframe_user_layout const *user, unsigned long offset)
{
@@ -283,19 +299,32 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
/* Determine the layout of optional records in the signal frame */
static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
{
- user->fpsimd_offset = user->size;
- user->size += round_up(sizeof(struct fpsimd_context), 16);
+ int err;
+
+ err = sigframe_alloc(user, &user->fpsimd_offset,
+ sizeof(struct fpsimd_context));
+ if (err)
+ return err;
/* fault information, if valid */
if (current->thread.fault_code) {
- user->esr_offset = user->size;
- user->size += round_up(sizeof(struct esr_context), 16);
+ err = sigframe_alloc(user, &user->esr_offset,
+ sizeof(struct esr_context));
+ if (err)
+ return err;
}
- /* set the "end" magic */
- user->end_offset = user->size;
+ /*
+ * Allocate space for the terminator record.
+ * HACK: here we undo the reservation of space for the end record.
+ * This bodge should be replaced with a cleaner approach later on.
+ */
+ user->limit = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved) +
+ sizeof(user->sigframe->uc.uc_mcontext.__reserved);
- return 0;
+ err = sigframe_alloc(user, &user->end_offset,
+ sizeof(struct _aarch64_ctx));
+ return err;
}
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 04/29] arm64: signal: Allocate extra sigcontext space as needed
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (2 preceding siblings ...)
2016-11-25 19:38 ` [RFC PATCH 03/29] arm64: signal: factor out signal frame record allocation Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 19:38 ` [RFC PATCH 05/29] arm64: signal: Parse extra_context during sigreturn Dave Martin
` (26 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
This patch modifies the context block allocator to create an
extra_context expansion block as necessary, and adds the necessary
code to populate, parse and decode this block.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/include/uapi/asm/sigcontext.h | 27 ++++++++
arch/arm64/kernel/signal.c | 112 +++++++++++++++++++++++++------
2 files changed, 120 insertions(+), 19 deletions(-)
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index ee469be..1af8437 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -61,4 +61,31 @@ struct esr_context {
__u64 esr;
};
+/*
+ * Pointer to extra space for additional structures that don't fit in
+ * sigcontext.__reserved[]. Note:
+ *
+ * 1) fpsimd_context, esr_context and extra_context must be placed in
+ * sigcontext.__reserved[] if present. They cannot be placed in the
+ * extra space. Any other record can be placed either in the extra
+ * space or in sigcontext.__reserved[].
+ *
+ * 2) There must not be more than one extra_context.
+ *
+ * 3) If extra_context is present, it must be followed immediately in
+ * sigcontext.__reserved[] by the terminating null _aarch64_ctx (i.e.,
+ * extra_context must be the last record in sigcontext.__reserved[]
+ * except for the terminator).
+ *
+ * 4) The extra space must itself be terminated with a null
+ * _aarch64_ctx.
+ */
+#define EXTRA_MAGIC 0x45585401
+
+struct extra_context {
+ struct _aarch64_ctx head;
+ void *data; /* 16-byte aligned pointer to the extra space */
+ __u32 size; /* size in bytes of the extra space */
+};
+
#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 653b614..ea3f6bf 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -25,6 +25,7 @@
#include <linux/freezer.h>
#include <linux/stddef.h>
#include <linux/uaccess.h>
+#include <linux/sizes.h>
#include <linux/string.h>
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
@@ -56,18 +57,22 @@ struct rt_sigframe_user_layout {
unsigned long fpsimd_offset;
unsigned long esr_offset;
+ unsigned long extra_offset;
unsigned long end_offset;
};
static void init_user_layout(struct rt_sigframe_user_layout *user)
{
+ const size_t __reserved_size =
+ sizeof(user->sigframe->uc.uc_mcontext.__reserved);
+ const size_t terminator_size =
+ round_up(sizeof(struct _aarch64_ctx), 16);
+
memset(user, 0, sizeof(*user));
user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved);
-
- user->limit = user->size +
- sizeof(user->sigframe->uc.uc_mcontext.__reserved) -
- round_up(sizeof(struct _aarch64_ctx), 16);
- /* ^ reserve space for terminator */
+ user->limit = user->size + (__reserved_size - terminator_size -
+ sizeof(struct extra_context));
+ /* Reserve space for extension and terminator ^ */
}
static size_t sigframe_size(struct rt_sigframe_user_layout const *user)
@@ -75,6 +80,49 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user)
return round_up(max(user->size, sizeof(struct rt_sigframe)), 16);
}
+/* Sanity limit on the maximum size of signal frame we'll try to generate. */
+/* This is NOT ABI. */
+#define SIGFRAME_MAXSZ SZ_64K
+
+static int __sigframe_alloc(struct rt_sigframe_user_layout *user,
+ unsigned long *offset, size_t size, bool extend)
+{
+ size_t padded_size = round_up(size, 16);
+
+ if (padded_size > user->limit - user->size &&
+ !user->extra_offset &&
+ extend) {
+ int ret;
+
+ ret = __sigframe_alloc(user, &user->extra_offset,
+ sizeof(struct extra_context), false);
+ if (ret)
+ return ret;
+
+ /*
+ * Further allocations must go after the fixed-size
+ * part of the signal frame:
+ */
+ user->size = round_up(sizeof(struct rt_sigframe), 16);
+
+ /*
+ * Allow expansion up to SIGFRAME_MAXSZ, ensuring space for
+ * the terminator:
+ */
+ user->limit = SIGFRAME_MAXSZ -
+ round_up(sizeof(struct _aarch64_ctx), 16);
+ }
+
+ /* Still not enough space? Bad luck! */
+ if (padded_size > user->limit - user->size)
+ return -ENOMEM;
+
+ *offset = user->size;
+ user->size += padded_size;
+
+ return 0;
+}
+
/*
* Allocate space for an optional record of <size> bytes in the user
* signal frame. The offset from the signal frame base address to the
@@ -83,11 +131,26 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user)
static int sigframe_alloc(struct rt_sigframe_user_layout *user,
unsigned long *offset, size_t size)
{
- size_t padded_size = round_up(size, 16);
+ return __sigframe_alloc(user, offset, size, true);
+}
- *offset = user->size;
- user->size += padded_size;
+/* Allocate the null terminator record and prevent further allocations */
+static int sigframe_alloc_end(struct rt_sigframe_user_layout *user)
+{
+ int ret;
+ const size_t terminator_size =
+ round_up(sizeof(struct _aarch64_ctx), 16);
+
+ /* Un-reserve the space reserved for the terminator: */
+ user->limit += terminator_size;
+
+ ret = sigframe_alloc(user, &user->end_offset,
+ sizeof(struct _aarch64_ctx));
+ if (ret)
+ return ret;
+ /* Prevent further allocation: */
+ user->limit = user->size;
return 0;
}
@@ -314,17 +377,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
return err;
}
- /*
- * Allocate space for the terminator record.
- * HACK: here we undo the reservation of space for the end record.
- * This bodge should be replaced with a cleaner approach later on.
- */
- user->limit = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved) +
- sizeof(user->sigframe->uc.uc_mcontext.__reserved);
-
- err = sigframe_alloc(user, &user->end_offset,
- sizeof(struct _aarch64_ctx));
- return err;
+ return sigframe_alloc_end(user);
}
@@ -365,6 +418,27 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
}
+ if (err == 0 && user->extra_offset) {
+ struct extra_context __user *extra =
+ apply_user_offset(user, user->extra_offset);
+ struct _aarch64_ctx __user *end =
+ (struct _aarch64_ctx __user *)((char __user *)extra +
+ round_up(sizeof(*extra), 16));
+ void __user *extra_data = apply_user_offset(user,
+ round_up(sizeof(struct rt_sigframe), 16));
+ u32 extra_size = round_up(user->size, 16) -
+ round_up(sizeof(struct rt_sigframe), 16);
+
+ __put_user_error(EXTRA_MAGIC, &extra->head.magic, err);
+ __put_user_error(sizeof(*extra), &extra->head.size, err);
+ __put_user_error(extra_data, &extra->data, err);
+ __put_user_error(extra_size, &extra->size, err);
+
+ /* Add the terminator */
+ __put_user_error(0, &end->magic, err);
+ __put_user_error(0, &end->size, err);
+ }
+
/* set the "end" magic */
if (err == 0) {
struct _aarch64_ctx __user *end =
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 05/29] arm64: signal: Parse extra_context during sigreturn
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (3 preceding siblings ...)
2016-11-25 19:38 ` [RFC PATCH 04/29] arm64: signal: Allocate extra sigcontext space as needed Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 19:38 ` [RFC PATCH 06/29] arm64: efi: Add missing Kconfig dependency on KERNEL_MODE_NEON Dave Martin
` (25 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
If extra_context is present, parse it.
To avoid abuse by userspace, this patch attempts to ensure that:
* that no more than one extra_context is accepted;
* that the extra_context is a sensible size;
* that the extra context data is properly aligned.
This patch relies on the user accessors in order to ensure that the
user-supplied extra context data pointer is an honest userspace
address.
Other than that, the kernel doesn't care specially whether the
pointer supplied is sensible (e.g., not garbage, doesn't overlap
sigcontext.__reserved[], etc.) since this cannot harm the kernel.
More checks may be added later in order to aid debugging of
botched sigreturns from userspace.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/signal.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index ea3f6bf..c7175a3 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -221,6 +221,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
char __user *base = (char __user *)&sc->__reserved;
size_t offset = 0;
size_t limit = sizeof(sc->__reserved);
+ bool have_extra_context = false;
user->fpsimd = NULL;
@@ -230,6 +231,9 @@ static int parse_user_sigframe(struct user_ctxs *user,
while (1) {
int err = 0;
u32 magic, size;
+ struct extra_context const __user *extra;
+ void __user *extra_data;
+ u32 extra_size;
if (limit - offset < sizeof(*head))
goto invalid;
@@ -267,6 +271,42 @@ static int parse_user_sigframe(struct user_ctxs *user,
/* ignore */
break;
+ case EXTRA_MAGIC:
+ if (have_extra_context)
+ goto invalid;
+
+ if (size < sizeof(*extra))
+ goto invalid;
+
+ extra = (struct extra_context const __user *)head;
+ __get_user_error(extra_data, &extra->data, err);
+ __get_user_error(extra_size, &extra->size, err);
+ if (err)
+ return err;
+
+ /* Prevent looping/repeated parsing of extra_conext */
+ have_extra_context = true;
+
+ /*
+ * Rely on the __user accessors to reject bogus
+ * pointers.
+ */
+ base = extra_data;
+ if (!IS_ALIGNED((unsigned long)base, 16))
+ goto invalid;
+
+ /* Reject "unreasonably large" frames: */
+ limit = extra_size;
+ if (limit > SIGFRAME_MAXSZ - sizeof(sc->__reserved))
+ goto invalid;
+
+ /*
+ * Ignore trailing terminator in __reserved[]
+ * and start parsing extra_data:
+ */
+ offset = 0;
+ continue;
+
default:
goto invalid;
}
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 06/29] arm64: efi: Add missing Kconfig dependency on KERNEL_MODE_NEON
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (4 preceding siblings ...)
2016-11-25 19:38 ` [RFC PATCH 05/29] arm64: signal: Parse extra_context during sigreturn Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 20:25 ` Ard Biesheuvel
2016-11-25 19:38 ` [RFC PATCH 07/29] arm64/sve: Allow kernel-mode NEON to be disabled in Kconfig Dave Martin
` (24 subsequent siblings)
30 siblings, 1 reply; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
The EFI runtime services ABI permits calls to EFI to clobber
certain FPSIMD/NEON registers, as per the AArch64 procedure call
standard.
Saving/restoring the clobbered registers around such calls needs
KERNEL_MODE_NEON, but the dependency is missing from Kconfig.
This patch adds the missing dependency.
This will aid bisection of the patches implementing support for the
ARM Scalable Vector Extension (SVE).
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 969ef88..d008bb6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -963,6 +963,7 @@ config EFI_STUB
config EFI
bool "UEFI runtime support"
depends on OF && !CPU_BIG_ENDIAN
+ depends on KERNEL_MODE_NEON
select LIBFDT
select UCS2_STRING
select EFI_PARAMS_FROM_FDT
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 06/29] arm64: efi: Add missing Kconfig dependency on KERNEL_MODE_NEON
2016-11-25 19:38 ` [RFC PATCH 06/29] arm64: efi: Add missing Kconfig dependency on KERNEL_MODE_NEON Dave Martin
@ 2016-11-25 20:25 ` Ard Biesheuvel
0 siblings, 0 replies; 61+ messages in thread
From: Ard Biesheuvel @ 2016-11-25 20:25 UTC (permalink / raw)
To: linux-arm-kernel
On 25 November 2016 at 19:38, Dave Martin <Dave.Martin@arm.com> wrote:
> The EFI runtime services ABI permits calls to EFI to clobber
> certain FPSIMD/NEON registers, as per the AArch64 procedure call
> standard.
>
> Saving/restoring the clobbered registers around such calls needs
> KERNEL_MODE_NEON, but the dependency is missing from Kconfig.
>
> This patch adds the missing dependency.
>
> This will aid bisection of the patches implementing support for the
> ARM Scalable Vector Extension (SVE).
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> arch/arm64/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 969ef88..d008bb6 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -963,6 +963,7 @@ config EFI_STUB
> config EFI
> bool "UEFI runtime support"
> depends on OF && !CPU_BIG_ENDIAN
> + depends on KERNEL_MODE_NEON
> select LIBFDT
> select UCS2_STRING
> select EFI_PARAMS_FROM_FDT
> --
> 2.1.4
>
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 07/29] arm64/sve: Allow kernel-mode NEON to be disabled in Kconfig
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (5 preceding siblings ...)
2016-11-25 19:38 ` [RFC PATCH 06/29] arm64: efi: Add missing Kconfig dependency on KERNEL_MODE_NEON Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 19:38 ` [RFC PATCH 08/29] arm64/sve: Low-level save/restore code Dave Martin
` (23 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
Currently, support for kernel-mode NEON alongside the Scalable
Vector Extension doesn't work, so allow KERNEL_MODE_NEON to be
disabled.
This is only needed for bisectability of the SVE patches and will
be removed later.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/Kconfig | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d008bb6..1bdcaf1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -223,9 +223,6 @@ config SWIOTLB
config IOMMU_HELPER
def_bool SWIOTLB
-config KERNEL_MODE_NEON
- def_bool y
-
config FIX_EARLYCON_MEM
def_bool y
@@ -268,6 +265,10 @@ endmenu
menu "Kernel Features"
+config KERNEL_MODE_NEON
+ bool "Support NEON/FPSIMD code in the kernel"
+ default y
+
menu "ARM errata workarounds via the alternatives framework"
config ARM64_ERRATUM_826319
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 08/29] arm64/sve: Low-level save/restore code
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (6 preceding siblings ...)
2016-11-25 19:38 ` [RFC PATCH 07/29] arm64/sve: Allow kernel-mode NEON to be disabled in Kconfig Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 19:38 ` [RFC PATCH 09/29] arm64/sve: Boot-time feature detection and reporting Dave Martin
` (22 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
This patch adds low-level save/restore for the Scalable Vector
Extension.
This is helper code only, and is not used for anything yet.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/Kconfig | 12 +++
arch/arm64/include/asm/fpsimd.h | 3 +
arch/arm64/include/asm/fpsimdmacros.h | 145 ++++++++++++++++++++++++++++++++++
arch/arm64/kernel/entry-fpsimd.S | 17 ++++
4 files changed, 177 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1bdcaf1..cd6c846 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -876,6 +876,18 @@ config ARM64_UAO
endmenu
+config ARM64_SVE
+ bool "ARM Scalable Vector Extension support"
+ default y
+ depends on !KERNEL_MODE_NEON # until it works with SVE
+ help
+ The Scalable Vector Extension (SVE) is an extension to the AArch64
+ execution state which complements and extends the SIMD functionality
+ of the base architecture to support much larger vectors and to enable
+ additional vectorisation opportunities.
+
+ To enable use of this extension on CPUs that implement it, say Y.
+
config ARM64_MODULE_CMODEL_LARGE
bool
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 50f559f..92f45ee 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -81,6 +81,9 @@ extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
u32 num_regs);
extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+extern void sve_save_state(void *state, u32 *pfpsr);
+extern void sve_load_state(void const *state, u32 const *pfpsr);
+
#endif
#endif
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index a2daf12..e2bb032 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -131,3 +131,148 @@
ldp q0, q1, [\state, #-16 * 0 - 16]
0:
.endm
+
+.macro _check_reg nr
+ .if (\nr) < 0 || (\nr) > 31
+ .error "Bad register number \nr."
+ .endif
+.endm
+
+.macro _check_zreg znr
+ .if (\znr) < 0 || (\znr) > 31
+ .error "Bad Scalable Vector Extension vector register number \znr."
+ .endif
+.endm
+
+.macro _check_preg pnr
+ .if (\pnr) < 0 || (\pnr) > 15
+ .error "Bad Scalable Vector Extension predicate register number \pnr."
+ .endif
+.endm
+
+.macro _check_num n, min, max
+ .if (\n) < (\min) || (\n) > (\max)
+ .error "Number \n out of range [\min,\max]"
+ .endif
+.endm
+
+.macro _zstrv znt, nspb, ioff=0
+ _check_zreg \znt
+ _check_reg \nspb
+ _check_num (\ioff), -0x100, 0xff
+ .inst 0xe5804000 \
+ | (\znt) \
+ | ((\nspb) << 5) \
+ | (((\ioff) & 7) << 10) \
+ | (((\ioff) & 0x1f8) << 13)
+.endm
+
+.macro _zldrv znt, nspb, ioff=0
+ _check_zreg \znt
+ _check_reg \nspb
+ _check_num (\ioff), -0x100, 0xff
+ .inst 0x85804000 \
+ | (\znt) \
+ | ((\nspb) << 5) \
+ | (((\ioff) & 7) << 10) \
+ | (((\ioff) & 0x1f8) << 13)
+.endm
+
+.macro _zstrp pnt, nspb, ioff=0
+ _check_preg \pnt
+ _check_reg \nspb
+ _check_num (\ioff), -0x100, 0xff
+ .inst 0xe5800000 \
+ | (\pnt) \
+ | ((\nspb) << 5) \
+ | (((\ioff) & 7) << 10) \
+ | (((\ioff) & 0x1f8) << 13)
+.endm
+
+.macro _zldrp pnt, nspb, ioff=0
+ _check_preg \pnt
+ _check_reg \nspb
+ _check_num (\ioff), -0x100, 0xff
+ .inst 0x85800000 \
+ | (\pnt) \
+ | ((\nspb) << 5) \
+ | (((\ioff) & 7) << 10) \
+ | (((\ioff) & 0x1f8) << 13)
+.endm
+
+.macro _zrdvl nspd, is1
+ _check_reg \nspd
+ _check_num (\is1), -0x20, 0x1f
+ .inst 0x04bf5000 \
+ | (\nspd) \
+ | (((\is1) & 0x3f) << 5)
+.endm
+
+.macro _zrdffr pnd
+ _check_preg \pnd
+ .inst 0x2519f000 \
+ | (\pnd)
+.endm
+
+.macro _zwrffr pnd
+ _check_preg \pnd
+ .inst 0x25289000 \
+ | ((\pnd) << 5)
+.endm
+
+.macro for from, to, insn
+ .if (\from) >= (\to)
+ \insn (\from)
+ .exitm
+ .endif
+
+ for \from, ((\from) + (\to)) / 2, \insn
+ for ((\from) + (\to)) / 2 + 1, \to, \insn
+.endm
+
+.macro sve_save nb, xpfpsr, ntmp
+ .macro savez n
+ _zstrv \n, \nb, (\n) - 34
+ .endm
+
+ .macro savep n
+ _zstrp \n, \nb, (\n) - 16
+ .endm
+
+ for 0, 31, savez
+ for 0, 15, savep
+ _zrdffr 0
+ _zstrp 0, \nb
+ _zldrp 0, \nb, -16
+
+ mrs x\ntmp, fpsr
+ str w\ntmp, [\xpfpsr]
+ mrs x\ntmp, fpcr
+ str w\ntmp, [\xpfpsr, #4]
+
+ .purgem savez
+ .purgem savep
+.endm
+
+.macro sve_load nb, xpfpsr, ntmp
+ .macro loadz n
+ _zldrv \n, \nb, (\n) - 34
+ .endm
+
+ .macro loadp n
+ _zldrp \n, \nb, (\n) - 16
+ .endm
+
+ for 0, 31, loadz
+ _zldrp 0, \nb
+ _zwrffr 0
+ for 0, 15, loadp
+
+ ldr w\ntmp, [\xpfpsr]
+ msr fpsr, x\ntmp
+ ldr w\ntmp, [\xpfpsr, #4]
+ msr fpcr, x\ntmp
+
+ .purgem loadz
+ .purgem loadp
+.endm
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index c44a82f..5dcec55 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -65,3 +65,20 @@ ENTRY(fpsimd_load_partial_state)
ENDPROC(fpsimd_load_partial_state)
#endif
+
+#ifdef CONFIG_ARM64_SVE
+ENTRY(sve_save_state)
+ sve_save 0, x1, 2
+ ret
+ENDPROC(sve_save_state)
+
+ENTRY(sve_load_state)
+ sve_load 0, x1, 2
+ ret
+ENDPROC(sve_load_state)
+
+ENTRY(sve_get_vl)
+ _zrdvl 0, 1
+ ret
+ENDPROC(sve_get_vl)
+#endif /* CONFIG_ARM64_SVE */
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 09/29] arm64/sve: Boot-time feature detection and reporting
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (7 preceding siblings ...)
2016-11-25 19:38 ` [RFC PATCH 08/29] arm64/sve: Low-level save/restore code Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 19:38 ` [RFC PATCH 10/29] arm64/sve: Boot-time feature enablement Dave Martin
` (21 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
This patch adds feature detection for the ARM Scalable Vector
Extension, and adds basic informative feature reporting via
/proc/cpuinfo.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/include/asm/sysreg.h | 1 +
arch/arm64/include/uapi/asm/hwcap.h | 1 +
arch/arm64/kernel/cpufeature.c | 3 +++
arch/arm64/kernel/cpuinfo.c | 1 +
4 files changed, 6 insertions(+)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6c80b36..ccce9ad 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -113,6 +113,7 @@
#define ID_AA64ISAR0_AES_SHIFT 4
/* id_aa64pfr0 */
+#define ID_AA64PFR0_SVE_SHIFT 32
#define ID_AA64PFR0_GIC_SHIFT 24
#define ID_AA64PFR0_ASIMD_SHIFT 20
#define ID_AA64PFR0_FP_SHIFT 16
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index a739287..f0de828 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -30,5 +30,6 @@
#define HWCAP_ATOMICS (1 << 8)
#define HWCAP_FPHP (1 << 9)
#define HWCAP_ASIMDHP (1 << 10)
+#define HWCAP_SVE (1 << 11)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c02504e..5126288 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -856,6 +856,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
+#ifdef CONFIG_ARM64_SVE
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SVE),
+#endif
{},
};
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index b3d5b3e..53e9fe2 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -63,6 +63,7 @@ static const char *const hwcap_str[] = {
"atomics",
"fphp",
"asimdhp",
+ "sve",
NULL
};
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 10/29] arm64/sve: Boot-time feature enablement
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (8 preceding siblings ...)
2016-11-25 19:38 ` [RFC PATCH 09/29] arm64/sve: Boot-time feature detection and reporting Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 19:38 ` [RFC PATCH 11/29] arm64/sve: Expand task_struct for Scalable Vector Extension state Dave Martin
` (20 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
This patch enables Scalable Vector Extension access for the kernel
on boot.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/include/asm/kvm_arm.h | 1 +
arch/arm64/include/asm/sysreg.h | 10 ++++++++++
arch/arm64/kernel/head.S | 16 +++++++++++++++-
arch/arm64/mm/proc.S | 22 +++++++++++++++++++++-
4 files changed, 47 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 2a2752b..ae7afb2 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -185,6 +185,7 @@
#define CPTR_EL2_TCPAC (1 << 31)
#define CPTR_EL2_TTA (1 << 20)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
+#define CPTR_EL2_TZ (1 << 8)
#define CPTR_EL2_DEFAULT 0x000033ff
/* Hyp Debug Configuration Register bits */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index ccce9ad..09a44b3 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -302,4 +302,14 @@ static inline void config_sctlr_el1(u32 clear, u32 set)
#endif
+#define ZIDR_EL1 sys_reg(3, 0, 0, 0, 7)
+#define ZCR_EL1 sys_reg(3, 0, 1, 2, 0)
+#define ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
+
+#define ZCR_EL1_LEN_MASK 0x1ff
+
+#define CPACR_EL1_ZEN_EL1EN (1 << 16)
+#define CPACR_EL1_ZEN_EL0EN (1 << 17)
+#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
+
#endif /* __ASM_SYSREG_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 332e331..ae4448f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -572,9 +572,23 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
/* Coprocessor traps. */
mov x0, #0x33ff
+
+ /* SVE register access */
+ mrs x1, id_aa64pfr0_el1
+ ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
+ cbz x1, 4f
+
+ bic x0, x0, #CPTR_EL2_TZ // Disable SVE traps to EL2
msr cptr_el2, x0 // Disable copro. traps to EL2
-1:
+ isb
+
+ mrs_s x1, ZIDR_EL1 // Scalable Vector Extension:
+ and x1, x1, #ZCR_EL1_LEN_MASK // Enable full vector length
+ msr_s ZCR_EL2, x1 // for EL1.
+ b 1f
+4: msr cptr_el2, x0 // Disable copro. traps to EL2
+1:
#ifdef CONFIG_COMPAT
msr hstr_el2, xzr // Disable CP15 traps to EL2
#endif
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 352c73b..1da8160 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -27,6 +27,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/cpufeature.h>
#include <asm/alternative.h>
+#include <asm/sysreg.h>
#ifdef CONFIG_ARM64_64K_PAGES
#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
@@ -184,12 +185,31 @@ ENTRY(__cpu_setup)
dsb nsh
mov x0, #3 << 20
+
+ /* SVE */
+ mrs x5, id_aa64pfr0_el1
+ ubfx x5, x5, #ID_AA64PFR0_SVE_SHIFT, #4
+ cbz x5, 1f
+
+ bic x0, x0, #CPACR_EL1_ZEN
+ orr x0, x0, #CPACR_EL1_ZEN_EL1EN // SVE: trap for EL0, not EL1
msr cpacr_el1, x0 // Enable FP/ASIMD
- mov x0, #1 << 12 // Reset mdscr_el1 and disable
+ isb
+
+ mrs_s x5, ZIDR_EL1 // SVE: Enable full vector len
+ and x5, x5, #ZCR_EL1_LEN_MASK // initially
+ msr_s ZCR_EL1, x5
+
+ b 2f
+
+1: msr cpacr_el1, x0 // Enable FP/ASIMD
+
+2: mov x0, #1 << 12 // Reset mdscr_el1 and disable
msr mdscr_el1, x0 // access to the DCC from EL0
isb // Unmask debug exceptions now,
enable_dbg // since this is per-cpu
reset_pmuserenr_el0 x0 // Disable PMU access from EL0
+
/*
* Memory region attributes for LPAE:
*
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 11/29] arm64/sve: Expand task_struct for Scalable Vector Extension state
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (9 preceding siblings ...)
2016-11-25 19:38 ` [RFC PATCH 10/29] arm64/sve: Boot-time feature enablement Dave Martin
@ 2016-11-25 19:38 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 12/29] arm64/sve: Save/restore SVE state on context switch paths Dave Martin
` (19 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:38 UTC (permalink / raw)
To: linux-arm-kernel
This patch expands task_struct to accommodate the Scalable Vector
Extension state.
The extra space is not used for anything yet.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/fpsimd.h | 12 +++++++
arch/arm64/kernel/fpsimd.c | 71 ++++++++++++++++++++++++++++++++++++++++-
arch/arm64/kernel/process.c | 2 +-
arch/arm64/kernel/setup.c | 3 ++
5 files changed, 87 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index cd6c846..e8d04dd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -19,6 +19,7 @@ config ARM64
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
select ARCH_WANT_FRAME_POINTERS
+ select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARM_AMBA
select ARM_ARCH_TIMER
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 92f45ee..1c41259 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -51,6 +51,15 @@ struct fpsimd_partial_state {
__uint128_t vregs[32];
};
+/*
+ * Scalable Vector Extension state structure template.
+ * The layout is vector length dependent, with vector length = vl * 16 bytes.
+ */
+#define fpsimd_sve_state(vl) { \
+ __uint128_t zregs[32][vl]; \
+ u16 pregs[16][vl]; \
+ u16 ffr[vl]; \
+}
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
@@ -81,8 +90,11 @@ extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
u32 num_regs);
extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+extern void __init fpsimd_init_task_struct_size(void);
+
extern void sve_save_state(void *state, u32 *pfpsr);
extern void sve_load_state(void const *state, u32 const *pfpsr);
+extern unsigned int sve_get_vl(void);
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 394c61d..05eca45 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -26,6 +26,7 @@
#include <linux/hardirq.h>
#include <asm/fpsimd.h>
+#include <asm/cpufeature.h>
#include <asm/cputype.h>
#define FPEXC_IOF (1 << 0)
@@ -125,6 +126,47 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
send_sig_info(SIGFPE, &info, current);
}
+#ifdef CONFIG_ARM64_SVE
+
+static void *__task_sve_state(struct task_struct *task)
+{
+ return (char *)task + ALIGN(sizeof(*task), 16);
+}
+
+static void *__task_pffr(struct task_struct *task)
+{
+ unsigned int vl = sve_get_vl();
+
+ BUG_ON(vl % 16);
+ return (char *)__task_sve_state(task) + 34 * vl;
+}
+
+#else /* !CONFIG_ARM64_SVE */
+
+/* Turn any non-optimised out attempts to use these into a link error: */
+extern void *__task_sve_state(struct task_struct *task);
+extern void *__task_pffr(struct task_struct *task);
+
+#endif /* !CONFIG_ARM64_SVE */
+
+static void task_fpsimd_load(struct task_struct *task)
+{
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE))
+ sve_load_state(__task_pffr(task),
+ &task->thread.fpsimd_state.fpsr);
+ else
+ fpsimd_load_state(&task->thread.fpsimd_state);
+}
+
+static void task_fpsimd_save(struct task_struct *task)
+{
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE))
+ sve_save_state(__task_pffr(task),
+ &task->thread.fpsimd_state.fpsr);
+ else
+ fpsimd_save_state(&task->thread.fpsimd_state);
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
/*
@@ -157,8 +199,20 @@ void fpsimd_thread_switch(struct task_struct *next)
void fpsimd_flush_thread(void)
{
- memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
fpsimd_flush_task_state(current);
+
+ memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE)) {
+ BUG_ON((char *)__task_sve_state(current) < (char *)current);
+ BUG_ON(arch_task_struct_size <
+ ((char *)__task_sve_state(current) - (char *)current));
+
+ memset(__task_sve_state(current), 0,
+ arch_task_struct_size -
+ ((char *)__task_sve_state(current) - (char *)current));
+ }
+
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
@@ -315,6 +369,21 @@ static inline void fpsimd_hotplug_init(void)
static inline void fpsimd_hotplug_init(void) { }
#endif
+void __init fpsimd_init_task_struct_size(void)
+{
+ arch_task_struct_size = sizeof(struct task_struct);
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ ((read_cpuid(ID_AA64PFR0_EL1) >> ID_AA64PFR0_SVE_SHIFT)
+ & 0xf) == 1) {
+ arch_task_struct_size = sizeof(struct task_struct) +
+ 35 * sve_get_vl();
+
+ pr_info("SVE: enabled with maximum %u bits per vector\n",
+ sve_get_vl() * 8);
+ }
+}
+
/*
* FP/SIMD support code initialisation.
*/
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 01753cd..7e19c3c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -242,7 +242,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
if (current->mm)
fpsimd_preserve_current_state();
- *dst = *src;
+ memcpy(dst, src, arch_task_struct_size);
return 0;
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f534f49..f0f551e 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -234,6 +234,9 @@ void __init setup_arch(char **cmdline_p)
pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
sprintf(init_utsname()->machine, UTS_MACHINE);
+
+ fpsimd_init_task_struct_size();
+
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 12/29] arm64/sve: Save/restore SVE state on context switch paths
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (10 preceding siblings ...)
2016-11-25 19:38 ` [RFC PATCH 11/29] arm64/sve: Expand task_struct for Scalable Vector Extension state Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 13/29] arm64/sve: Basic support for KERNEL_MODE_NEON Dave Martin
` (18 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
This patch implements basic handling of the Scalable Vector
Extension state on the primary context switch paths.
This does *not* (correctly) handle the signal path, and doesn't do
save/restore for SVE-only accesses that don't affect the FPSIMD
state (i.e., FFR).
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/fpsimd.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 05eca45..81cfdb5 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -175,7 +175,7 @@ void fpsimd_thread_switch(struct task_struct *next)
* 'current'.
*/
if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_save_state(¤t->thread.fpsimd_state);
+ task_fpsimd_save(current);
if (next->mm) {
/*
@@ -224,7 +224,7 @@ void fpsimd_preserve_current_state(void)
{
preempt_disable();
if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_save_state(¤t->thread.fpsimd_state);
+ task_fpsimd_save(current);
preempt_enable();
}
@@ -239,7 +239,7 @@ void fpsimd_restore_current_state(void)
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = ¤t->thread.fpsimd_state;
- fpsimd_load_state(st);
+ task_fpsimd_load(current);
this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
@@ -325,7 +325,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
switch (cmd) {
case CPU_PM_ENTER:
if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_save_state(¤t->thread.fpsimd_state);
+ task_fpsimd_save(current);
this_cpu_write(fpsimd_last_state, NULL);
break;
case CPU_PM_EXIT:
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 13/29] arm64/sve: Basic support for KERNEL_MODE_NEON
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (11 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 12/29] arm64/sve: Save/restore SVE state on context switch paths Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 20:45 ` Ard Biesheuvel
2016-11-25 19:39 ` [RFC PATCH 14/29] Revert "arm64/sve: Allow kernel-mode NEON to be disabled in Kconfig" Dave Martin
` (17 subsequent siblings)
30 siblings, 1 reply; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
In order to enable CONFIG_KERNEL_MODE_NEON and things that rely on
it to be configured together with Scalable Vector Extension support
in the same kernel, this patch implements basic support for
saving/restoring the SVE state around kernel_neon_begin()...
kernel_neon_end().
This patch is not optimal and will generally save more state than
necessary, more often than necessary. Further optimisations can be
implemented in future patches.
This patch is not intended to allow general-purpose _SVE_ code to
execute in the kernel safely. That functionality may also follow
in later patches.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/Kconfig | 1 -
arch/arm64/kernel/fpsimd.c | 22 ++++++++++++++++++----
2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e8d04dd..7266761 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -880,7 +880,6 @@ endmenu
config ARM64_SVE
bool "ARM Scalable Vector Extension support"
default y
- depends on !KERNEL_MODE_NEON # until it works with SVE
help
The Scalable Vector Extension (SVE) is an extension to the AArch64
execution state which complements and extends the SIMD functionality
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 81cfdb5..cb947dd 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -282,11 +282,26 @@ static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
*/
void kernel_neon_begin_partial(u32 num_regs)
{
+ preempt_disable();
+
+ /*
+ * For now, we have no special storage for SVE registers in
+ * interrupt context, so always save the userland SVE state
+ * if there is any, even for interrupts.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE) &&
+ current->mm &&
+ !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ fpsimd_save_state(¤t->thread.fpsimd_state);
+ this_cpu_write(fpsimd_last_state, NULL);
+ }
+
if (in_interrupt()) {
struct fpsimd_partial_state *s = this_cpu_ptr(
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
-
BUG_ON(num_regs > 32);
+
+ /* Save partial state for interrupted kernel-mode NEON code: */
fpsimd_save_partial_state(s, roundup(num_regs, 2));
} else {
/*
@@ -295,7 +310,6 @@ void kernel_neon_begin_partial(u32 num_regs)
* that there is no longer userland FPSIMD state in the
* registers.
*/
- preempt_disable();
if (current->mm &&
!test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(¤t->thread.fpsimd_state);
@@ -310,9 +324,9 @@ void kernel_neon_end(void)
struct fpsimd_partial_state *s = this_cpu_ptr(
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
fpsimd_load_partial_state(s);
- } else {
- preempt_enable();
}
+
+ preempt_enable();
}
EXPORT_SYMBOL(kernel_neon_end);
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 13/29] arm64/sve: Basic support for KERNEL_MODE_NEON
2016-11-25 19:39 ` [RFC PATCH 13/29] arm64/sve: Basic support for KERNEL_MODE_NEON Dave Martin
@ 2016-11-25 20:45 ` Ard Biesheuvel
2016-11-26 11:30 ` Catalin Marinas
0 siblings, 1 reply; 61+ messages in thread
From: Ard Biesheuvel @ 2016-11-25 20:45 UTC (permalink / raw)
To: linux-arm-kernel
On 25 November 2016 at 19:39, Dave Martin <Dave.Martin@arm.com> wrote:
> In order to enable CONFIG_KERNEL_MODE_NEON and things that rely on
> it to be configured together with Scalable Vector Extension support
> in the same kernel, this patch implements basic support for
> saving/restoring the SVE state around kernel_neon_begin()...
> kernel_neon_end().
>
> This patch is not optimal and will generally save more state than
> necessary, more often than necessary. Further optimisations can be
> implemented in future patches.
>
> This patch is not intended to allow general-purpose _SVE_ code to
> execute in the kernel safely. That functionality may also follow
> in later patches.
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> ---
> arch/arm64/Kconfig | 1 -
> arch/arm64/kernel/fpsimd.c | 22 ++++++++++++++++++----
> 2 files changed, 18 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index e8d04dd..7266761 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -880,7 +880,6 @@ endmenu
> config ARM64_SVE
> bool "ARM Scalable Vector Extension support"
> default y
> - depends on !KERNEL_MODE_NEON # until it works with SVE
> help
> The Scalable Vector Extension (SVE) is an extension to the AArch64
> execution state which complements and extends the SIMD functionality
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 81cfdb5..cb947dd 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -282,11 +282,26 @@ static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
> */
> void kernel_neon_begin_partial(u32 num_regs)
> {
> + preempt_disable();
> +
> + /*
> + * For now, we have no special storage for SVE registers in
> + * interrupt context, so always save the userland SVE state
> + * if there is any, even for interrupts.
> + */
> + if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE) &&
> + current->mm &&
> + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) {
> + fpsimd_save_state(¤t->thread.fpsimd_state);
> + this_cpu_write(fpsimd_last_state, NULL);
> + }
> +
I am having trouble understanding why we need all of this if we don't
support SVE in the kernel. Could you elaborate?
> if (in_interrupt()) {
> struct fpsimd_partial_state *s = this_cpu_ptr(
> in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> -
> BUG_ON(num_regs > 32);
> +
> + /* Save partial state for interrupted kernel-mode NEON code: */
> fpsimd_save_partial_state(s, roundup(num_regs, 2));
> } else {
> /*
> @@ -295,7 +310,6 @@ void kernel_neon_begin_partial(u32 num_regs)
> * that there is no longer userland FPSIMD state in the
> * registers.
> */
> - preempt_disable();
> if (current->mm &&
> !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
> fpsimd_save_state(¤t->thread.fpsimd_state);
> @@ -310,9 +324,9 @@ void kernel_neon_end(void)
> struct fpsimd_partial_state *s = this_cpu_ptr(
> in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
> fpsimd_load_partial_state(s);
> - } else {
> - preempt_enable();
> }
> +
> + preempt_enable();
> }
> EXPORT_SYMBOL(kernel_neon_end);
>
> --
> 2.1.4
>
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 13/29] arm64/sve: Basic support for KERNEL_MODE_NEON
2016-11-25 20:45 ` Ard Biesheuvel
@ 2016-11-26 11:30 ` Catalin Marinas
2016-11-28 11:47 ` Dave Martin
0 siblings, 1 reply; 61+ messages in thread
From: Catalin Marinas @ 2016-11-26 11:30 UTC (permalink / raw)
To: linux-arm-kernel
On Fri, Nov 25, 2016 at 08:45:02PM +0000, Ard Biesheuvel wrote:
> On 25 November 2016 at 19:39, Dave Martin <Dave.Martin@arm.com> wrote:
> > In order to enable CONFIG_KERNEL_MODE_NEON and things that rely on
> > it to be configured together with Scalable Vector Extension support
> > in the same kernel, this patch implements basic support for
> > saving/restoring the SVE state around kernel_neon_begin()...
> > kernel_neon_end().
> >
> > This patch is not optimal and will generally save more state than
> > necessary, more often than necessary. Further optimisations can be
> > implemented in future patches.
> >
> > This patch is not intended to allow general-purpose _SVE_ code to
> > execute in the kernel safely. That functionality may also follow
> > in later patches.
> >
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > ---
> > arch/arm64/Kconfig | 1 -
> > arch/arm64/kernel/fpsimd.c | 22 ++++++++++++++++++----
> > 2 files changed, 18 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index e8d04dd..7266761 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -880,7 +880,6 @@ endmenu
> > config ARM64_SVE
> > bool "ARM Scalable Vector Extension support"
> > default y
> > - depends on !KERNEL_MODE_NEON # until it works with SVE
> > help
> > The Scalable Vector Extension (SVE) is an extension to the AArch64
> > execution state which complements and extends the SIMD functionality
> > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > index 81cfdb5..cb947dd 100644
> > --- a/arch/arm64/kernel/fpsimd.c
> > +++ b/arch/arm64/kernel/fpsimd.c
> > @@ -282,11 +282,26 @@ static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
> > */
> > void kernel_neon_begin_partial(u32 num_regs)
> > {
> > + preempt_disable();
> > +
> > + /*
> > + * For now, we have no special storage for SVE registers in
> > + * interrupt context, so always save the userland SVE state
> > + * if there is any, even for interrupts.
> > + */
> > + if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE) &&
> > + current->mm &&
> > + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) {
> > + fpsimd_save_state(¤t->thread.fpsimd_state);
> > + this_cpu_write(fpsimd_last_state, NULL);
> > + }
> > +
>
> I am having trouble understanding why we need all of this if we don't
> support SVE in the kernel. Could you elaborate?
Dave knows all the details but a reason is that touching a Neon register
zeros the upper SVE state in the same vector register. So we can't
safely save/restore just the Neon part without corrupting the SVE state.
--
Catalin
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 13/29] arm64/sve: Basic support for KERNEL_MODE_NEON
2016-11-26 11:30 ` Catalin Marinas
@ 2016-11-28 11:47 ` Dave Martin
2016-11-28 12:06 ` Catalin Marinas
0 siblings, 1 reply; 61+ messages in thread
From: Dave Martin @ 2016-11-28 11:47 UTC (permalink / raw)
To: linux-arm-kernel
On Sat, Nov 26, 2016 at 11:30:42AM +0000, Catalin Marinas wrote:
> On Fri, Nov 25, 2016 at 08:45:02PM +0000, Ard Biesheuvel wrote:
> > On 25 November 2016 at 19:39, Dave Martin <Dave.Martin@arm.com> wrote:
> > > In order to enable CONFIG_KERNEL_MODE_NEON and things that rely on
> > > it to be configured together with Scalable Vector Extension support
> > > in the same kernel, this patch implements basic support for
> > > saving/restoring the SVE state around kernel_neon_begin()...
> > > kernel_neon_end().
> > >
> > > This patch is not optimal and will generally save more state than
> > > necessary, more often than necessary. Further optimisations can be
> > > implemented in future patches.
> > >
> > > This patch is not intended to allow general-purpose _SVE_ code to
> > > execute in the kernel safely. That functionality may also follow
> > > in later patches.
> > >
> > > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > > ---
> > > arch/arm64/Kconfig | 1 -
> > > arch/arm64/kernel/fpsimd.c | 22 ++++++++++++++++++----
> > > 2 files changed, 18 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > > index e8d04dd..7266761 100644
> > > --- a/arch/arm64/Kconfig
> > > +++ b/arch/arm64/Kconfig
> > > @@ -880,7 +880,6 @@ endmenu
> > > config ARM64_SVE
> > > bool "ARM Scalable Vector Extension support"
> > > default y
> > > - depends on !KERNEL_MODE_NEON # until it works with SVE
> > > help
> > > The Scalable Vector Extension (SVE) is an extension to the AArch64
> > > execution state which complements and extends the SIMD functionality
> > > diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> > > index 81cfdb5..cb947dd 100644
> > > --- a/arch/arm64/kernel/fpsimd.c
> > > +++ b/arch/arm64/kernel/fpsimd.c
> > > @@ -282,11 +282,26 @@ static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
> > > */
> > > void kernel_neon_begin_partial(u32 num_regs)
> > > {
> > > + preempt_disable();
> > > +
> > > + /*
> > > + * For now, we have no special storage for SVE registers in
> > > + * interrupt context, so always save the userland SVE state
> > > + * if there is any, even for interrupts.
> > > + */
> > > + if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE) &&
> > > + current->mm &&
> > > + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) {
> > > + fpsimd_save_state(¤t->thread.fpsimd_state);
> > > + this_cpu_write(fpsimd_last_state, NULL);
> > > + }
> > > +
> >
> > I am having trouble understanding why we need all of this if we don't
> > support SVE in the kernel. Could you elaborate?
>
> Dave knows all the details but a reason is that touching a Neon register
> zeros the upper SVE state in the same vector register. So we can't
> safely save/restore just the Neon part without corrupting the SVE state.
This is right -- this also means that EFI services can trash the upper
bits of an SVE vector register (as a side-effect of FPSIMD/NEON usage).
It's overkill to save/restore absolutely everything -- I ignore num_regs
for example -- but I wanted to keep things as simple as possible
initially.
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 13/29] arm64/sve: Basic support for KERNEL_MODE_NEON
2016-11-28 11:47 ` Dave Martin
@ 2016-11-28 12:06 ` Catalin Marinas
2016-11-28 12:29 ` Dave Martin
0 siblings, 1 reply; 61+ messages in thread
From: Catalin Marinas @ 2016-11-28 12:06 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Nov 28, 2016 at 11:47:26AM +0000, Dave P Martin wrote:
> On Sat, Nov 26, 2016 at 11:30:42AM +0000, Catalin Marinas wrote:
> > On Fri, Nov 25, 2016 at 08:45:02PM +0000, Ard Biesheuvel wrote:
> > > On 25 November 2016 at 19:39, Dave Martin <Dave.Martin@arm.com> wrote:
> > > > --- a/arch/arm64/kernel/fpsimd.c
> > > > +++ b/arch/arm64/kernel/fpsimd.c
> > > > @@ -282,11 +282,26 @@ static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
> > > > */
> > > > void kernel_neon_begin_partial(u32 num_regs)
> > > > {
> > > > + preempt_disable();
> > > > +
> > > > + /*
> > > > + * For now, we have no special storage for SVE registers in
> > > > + * interrupt context, so always save the userland SVE state
> > > > + * if there is any, even for interrupts.
> > > > + */
> > > > + if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE) &&
> > > > + current->mm &&
> > > > + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) {
> > > > + fpsimd_save_state(¤t->thread.fpsimd_state);
> > > > + this_cpu_write(fpsimd_last_state, NULL);
> > > > + }
> > > > +
> > >
> > > I am having trouble understanding why we need all of this if we don't
> > > support SVE in the kernel. Could you elaborate?
> >
> > Dave knows all the details but a reason is that touching a Neon register
> > zeros the upper SVE state in the same vector register. So we can't
> > safely save/restore just the Neon part without corrupting the SVE state.
>
> This is right -- this also means that EFI services can trash the upper
> bits of an SVE vector register (as a side-effect of FPSIMD/NEON usage).
>
> It's overkill to save/restore absolutely everything -- I ignore num_regs
> for example -- but I wanted to keep things as simple as possible
> initially.
Without looking at your patches in detail, could we mandate in the ABI
that the SVE state is lost on the user/kernel syscall boundary? I guess
even for the PCS, the SVE state is caller-saved, so there shouldn't be
an additional cost to user. On interrupts, however, we'd have to
preserve the SVE state but if we do this on entry/exit points, the
kernel_neon_*() functions would not have to deal with any SVE state (and
even ignore it completely if in interrupt).
BTW, we will need an SVE ABI document in Documentation/arm64/ to specify
the requirements for syscall and sigcontext modifications.
--
Catalin
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 13/29] arm64/sve: Basic support for KERNEL_MODE_NEON
2016-11-28 12:06 ` Catalin Marinas
@ 2016-11-28 12:29 ` Dave Martin
2016-12-06 15:36 ` Ard Biesheuvel
0 siblings, 1 reply; 61+ messages in thread
From: Dave Martin @ 2016-11-28 12:29 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Nov 28, 2016 at 12:06:24PM +0000, Catalin Marinas wrote:
> On Mon, Nov 28, 2016 at 11:47:26AM +0000, Dave P Martin wrote:
> > On Sat, Nov 26, 2016 at 11:30:42AM +0000, Catalin Marinas wrote:
> > > On Fri, Nov 25, 2016 at 08:45:02PM +0000, Ard Biesheuvel wrote:
> > > > On 25 November 2016 at 19:39, Dave Martin <Dave.Martin@arm.com> wrote:
> > > > > --- a/arch/arm64/kernel/fpsimd.c
> > > > > +++ b/arch/arm64/kernel/fpsimd.c
> > > > > @@ -282,11 +282,26 @@ static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
> > > > > */
> > > > > void kernel_neon_begin_partial(u32 num_regs)
> > > > > {
> > > > > + preempt_disable();
> > > > > +
> > > > > + /*
> > > > > + * For now, we have no special storage for SVE registers in
> > > > > + * interrupt context, so always save the userland SVE state
> > > > > + * if there is any, even for interrupts.
> > > > > + */
> > > > > + if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE) &&
> > > > > + current->mm &&
> > > > > + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) {
> > > > > + fpsimd_save_state(¤t->thread.fpsimd_state);
> > > > > + this_cpu_write(fpsimd_last_state, NULL);
> > > > > + }
> > > > > +
> > > >
> > > > I am having trouble understanding why we need all of this if we don't
> > > > support SVE in the kernel. Could you elaborate?
> > >
> > > Dave knows all the details but a reason is that touching a Neon register
> > > zeros the upper SVE state in the same vector register. So we can't
> > > safely save/restore just the Neon part without corrupting the SVE state.
> >
> > This is right -- this also means that EFI services can trash the upper
> > bits of an SVE vector register (as a side-effect of FPSIMD/NEON usage).
> >
> > It's overkill to save/restore absolutely everything -- I ignore num_regs
> > for example -- but I wanted to keep things as simple as possible
> > initially.
>
> Without looking at your patches in detail, could we mandate in the ABI
> that the SVE state is lost on the user/kernel syscall boundary? I guess
> even for the PCS, the SVE state is caller-saved, so there shouldn't be
> an additional cost to user. On interrupts, however, we'd have to
> preserve the SVE state but if we do this on entry/exit points, the
> kernel_neon_*() functions would not have to deal with any SVE state (and
> even ignore it completely if in interrupt).
See [RFC PATCH 24/29] arm64/sve: Discard SVE state on system call.
Currently, kernel_neon_begin_partial() doesn't take advandage of this --
discarding the state is deferred until sched-out, and anyway I don't
check for TIF_SVE in kernel_neon_begin_partial(). There's definitely
some room for improvement.
> the requirements for syscall and sigcontext modifications.
Agreed -- I wanted to get this series posted so skipped that for now,
but I plan to include a Documentation patch alongside the final series.
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 13/29] arm64/sve: Basic support for KERNEL_MODE_NEON
2016-11-28 12:29 ` Dave Martin
@ 2016-12-06 15:36 ` Ard Biesheuvel
0 siblings, 0 replies; 61+ messages in thread
From: Ard Biesheuvel @ 2016-12-06 15:36 UTC (permalink / raw)
To: linux-arm-kernel
On 28 November 2016 at 12:29, Dave Martin <Dave.Martin@arm.com> wrote:
> On Mon, Nov 28, 2016 at 12:06:24PM +0000, Catalin Marinas wrote:
>> On Mon, Nov 28, 2016 at 11:47:26AM +0000, Dave P Martin wrote:
>> > On Sat, Nov 26, 2016 at 11:30:42AM +0000, Catalin Marinas wrote:
>> > > On Fri, Nov 25, 2016 at 08:45:02PM +0000, Ard Biesheuvel wrote:
>> > > > On 25 November 2016 at 19:39, Dave Martin <Dave.Martin@arm.com> wrote:
>> > > > > --- a/arch/arm64/kernel/fpsimd.c
>> > > > > +++ b/arch/arm64/kernel/fpsimd.c
>> > > > > @@ -282,11 +282,26 @@ static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
>> > > > > */
>> > > > > void kernel_neon_begin_partial(u32 num_regs)
>> > > > > {
>> > > > > + preempt_disable();
>> > > > > +
>> > > > > + /*
>> > > > > + * For now, we have no special storage for SVE registers in
>> > > > > + * interrupt context, so always save the userland SVE state
>> > > > > + * if there is any, even for interrupts.
>> > > > > + */
>> > > > > + if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE) &&
>> > > > > + current->mm &&
>> > > > > + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) {
>> > > > > + fpsimd_save_state(¤t->thread.fpsimd_state);
>> > > > > + this_cpu_write(fpsimd_last_state, NULL);
>> > > > > + }
>> > > > > +
>> > > >
>> > > > I am having trouble understanding why we need all of this if we don't
>> > > > support SVE in the kernel. Could you elaborate?
>> > >
>> > > Dave knows all the details but a reason is that touching a Neon register
>> > > zeros the upper SVE state in the same vector register. So we can't
>> > > safely save/restore just the Neon part without corrupting the SVE state.
>> >
>> > This is right -- this also means that EFI services can trash the upper
>> > bits of an SVE vector register (as a side-effect of FPSIMD/NEON usage).
>> >
>> > It's overkill to save/restore absolutely everything -- I ignore num_regs
>> > for example -- but I wanted to keep things as simple as possible
>> > initially.
>>
Actually, I think we could simplify this even further by always
preserving the userland state, instead of having two copies of the
statements above. The reason is that stacking and unstacking, as we do
for softirq/hardirq context, is only required if we happen to be
interrupting a thread while it is executing in the kernel *and* using
the NEON, in all other cases we can simply preserve the userland
context, and let the exit code take care of restoring the state upon
exit to userland (unless we're interrupting kernel mode NEON executing
in softirq context from an interrupt handler).
i will send out a separate RFC with a proposal to optimize this, which
I think will remove the need for this patch entirely.
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 14/29] Revert "arm64/sve: Allow kernel-mode NEON to be disabled in Kconfig"
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (12 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 13/29] arm64/sve: Basic support for KERNEL_MODE_NEON Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 15/29] arm64/sve: Restore working FPSIMD save/restore around signals Dave Martin
` (16 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
Now that KERNEL_MODE_NEON works for SVE, we can just default it
back to y.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/Kconfig | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7266761..bf9915cb 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -224,6 +224,9 @@ config SWIOTLB
config IOMMU_HELPER
def_bool SWIOTLB
+config KERNEL_MODE_NEON
+ def_bool y
+
config FIX_EARLYCON_MEM
def_bool y
@@ -266,10 +269,6 @@ endmenu
menu "Kernel Features"
-config KERNEL_MODE_NEON
- bool "Support NEON/FPSIMD code in the kernel"
- default y
-
menu "ARM errata workarounds via the alternatives framework"
config ARM64_ERRATUM_826319
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 15/29] arm64/sve: Restore working FPSIMD save/restore around signals
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (13 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 14/29] Revert "arm64/sve: Allow kernel-mode NEON to be disabled in Kconfig" Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 16/29] arm64/sve: signal: Add SVE state record to sigcontext Dave Martin
` (15 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
Because fpsimd_state and the SVE state are not magically
synchronised in the task_struct, stale FPSIMD data may be saved on
signal handler entry, and restored data my be lost on sigreturn.
This patch converts between SVE and FPSIMD views around the signal,
restoring working FPSIMD save/restore.
This will not save/restore the SVE state properly, but it should
restore a working FPSIMD ABI.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/include/asm/fpsimd.h | 1 +
arch/arm64/kernel/fpsimd.c | 92 ++++++++++++++++++++++++++++++++++++++++-
arch/arm64/kernel/signal.c | 2 +-
arch/arm64/kernel/signal32.c | 2 +-
4 files changed, 94 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 1c41259..aa82b38 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -80,6 +80,7 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
+extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct fpsimd_state *state);
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index cb947dd..9a90921 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -228,6 +228,52 @@ void fpsimd_preserve_current_state(void)
preempt_enable();
}
+#ifdef CONFIG_ARM64_SVE
+
+/* Helpers to sync task FPSIMD and SVE register views */
+
+static void __task_sve_to_fpsimd(struct task_struct *task, unsigned int vq)
+{
+ struct sve_struct fpsimd_sve_state(vq) *sst =
+ __task_sve_state(task);
+ struct fpsimd_state *fst = &task->thread.fpsimd_state;
+ unsigned int i;
+
+ for (i = 0; i < 32; ++i)
+ fst->vregs[i] = sst->zregs[i][0];
+}
+
+static void task_sve_to_fpsimd(struct task_struct *task)
+{
+ unsigned int vl = sve_get_vl();
+ unsigned int vq;
+
+ if (!(elf_hwcap & HWCAP_SVE))
+ return;
+
+ BUG_ON(vl % 16);
+ vq = vl / 16;
+ BUG_ON(vq < 1 || vq > 16);
+
+ __task_sve_to_fpsimd(task, vq);
+}
+
+#else /* ! CONFIG_ARM64_SVE */
+
+static void task_sve_to_fpsimd(struct task_struct *task __always_unused) { }
+
+#endif /* ! CONFIG_ARM64_SVE */
+
+
+void fpsimd_signal_preserve_current_state(void)
+{
+ WARN_ONCE(elf_hwcap & HWCAP_SVE,
+ "SVE state save/restore around signals doesn't work properly, expect userspace corruption!\n");
+
+ fpsimd_preserve_current_state();
+ task_sve_to_fpsimd(current);
+}
+
/*
* Load the userland FPSIMD state of 'current' from memory, but only if the
* FPSIMD state already held in the registers is /not/ the most recent FPSIMD
@@ -246,6 +292,43 @@ void fpsimd_restore_current_state(void)
preempt_enable();
}
+
+#ifdef CONFIG_ARM64_SVE
+
+static void __task_fpsimd_to_sve(struct task_struct *task, unsigned int vq)
+{
+ struct sve_struct fpsimd_sve_state(vq) *sst =
+ __task_sve_state(task);
+ struct fpsimd_state *fst = &task->thread.fpsimd_state;
+ unsigned int i;
+
+ memset(sst, 0, sizeof(*sst));
+ for (i = 0; i < 32; ++i)
+ sst->zregs[i][0] = fst->vregs[i];
+}
+
+static void task_fpsimd_to_sve(struct task_struct *task)
+{
+ unsigned int vl = sve_get_vl();
+ unsigned int vq;
+
+ if (!(elf_hwcap & HWCAP_SVE))
+ return;
+
+ BUG_ON(vl % 16);
+ vq = vl / 16;
+ BUG_ON(vq < 1 || vq > 16);
+
+ __task_fpsimd_to_sve(task, vq);
+}
+
+#else /* ! CONFIG_ARM64_SVE */
+
+/* Turn any non-optimised out attempts to use this into a link error: */
+extern void task_fpsimd_to_sve(struct task_struct *task);
+
+#endif /* ! CONFIG_ARM64_SVE */
+
/*
* Load an updated userland FPSIMD state for 'current' from memory and set the
* flag that indicates that the FPSIMD register contents are the most recent
@@ -254,13 +337,20 @@ void fpsimd_restore_current_state(void)
void fpsimd_update_current_state(struct fpsimd_state *state)
{
preempt_disable();
- fpsimd_load_state(state);
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE)) {
+ current->thread.fpsimd_state = *state;
+ task_fpsimd_to_sve(current);
+ }
+ task_fpsimd_load(current);
+
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = ¤t->thread.fpsimd_state;
this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
}
+
preempt_enable();
}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index c7175a3..1e430b4 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -168,7 +168,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
int err;
/* dump the hardware registers to the fpsimd_state structure */
- fpsimd_preserve_current_state();
+ fpsimd_signal_preserve_current_state();
/* copy the FP and status/control registers */
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index b7063de..08ca0dd 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -244,7 +244,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
* Note that this also saves V16-31, which aren't visible
* in AArch32.
*/
- fpsimd_preserve_current_state();
+ fpsimd_signal_preserve_current_state();
/* Place structure header on the stack */
__put_user_error(magic, &frame->magic, err);
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 16/29] arm64/sve: signal: Add SVE state record to sigcontext
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (14 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 15/29] arm64/sve: Restore working FPSIMD save/restore around signals Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 17/29] arm64/sve: signal: Dump Scalable Vector Extension registers to user stack Dave Martin
` (14 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
This patch adds a record to sigcontext that will contain the SVE
state.
Subsequent patches will implement the actual register dumping.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/include/uapi/asm/sigcontext.h | 86 ++++++++++++++++++++++++++++++++
arch/arm64/kernel/signal.c | 62 +++++++++++++++++++++++
2 files changed, 148 insertions(+)
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 1af8437..11c915d 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -88,4 +88,90 @@ struct extra_context {
__u32 size; /* size in bytes of the extra space */
};
+#define SVE_MAGIC 0x53564501
+
+struct sve_context {
+ struct _aarch64_ctx head;
+ __u16 vl;
+ __u16 __reserved[3];
+};
+
+/*
+ * The SVE architecture leaves space for future expansion of the
+ * vector length beyond its initial architectural limit of 2048 bits
+ * (16 quadwords).
+ */
+#define SVE_VQ_MIN 1
+#define SVE_VQ_MAX 0x200
+
+#define SVE_VL_MIN (SVE_VQ_MIN * 0x10)
+#define SVE_VL_MAX (SVE_VQ_MAX * 0x10)
+
+#define SVE_NUM_ZREGS 32
+#define SVE_NUM_PREGS 16
+
+#define sve_vl_valid(vl) \
+ ((vl) % 0x10 == 0 && (vl) >= SVE_VL_MIN && (vl) <= SVE_VL_MAX)
+#define sve_vq_from_vl(vl) ((vl) / 0x10)
+
+/*
+ * The total size of meaningful data in the SVE context in bytes,
+ * including the header, is given by SVE_SIG_CONTEXT_SIZE(vq).
+ *
+ * Note: for all these macros, the "vq" argument denotes the SVE
+ * vector length in quadwords (i.e., units of 128 bits).
+ *
+ * The correct way to obtain vq is to use sve_vq_from_vl(vl). The
+ * result is valid if and only if sve_vl_valid(vl) is true. This is
+ * guaranteed for a struct sve_context written by the kernel.
+ *
+ *
+ * Additional macros describe the contents and layout of the payload.
+ * For each, SVE_SIG_x_OFFSET(args) is the start offset relative to
+ * the start of struct sve_context, and SVE_SIG_x_SIZE(args) is the
+ * size in bytes:
+ *
+ * x type description
+ * - ---- -----------
+ * REGS the entire SVE context
+ *
+ * ZREGS __uint128_t[SVE_NUM_ZREGS][vq] all Z-registers
+ * ZREG __uint128_t[vq] individual Z-register Zn
+ *
+ * PREGS uint16_t[SVE_NUM_PREGS][vq] all P-registers
+ * PREG uint16_t[vq] individual P-register Pn
+ *
+ * FFR uint16_t[vq] first-fault status register
+ *
+ * Additional data might be appended in the future.
+ */
+
+#define SVE_SIG_ZREG_SIZE(vq) ((__u32)(vq) * 16)
+#define SVE_SIG_PREG_SIZE(vq) ((__u32)(vq) * 2)
+#define SVE_SIG_FFR_SIZE(vq) SVE_SIG_PREG_SIZE(vq)
+
+#define SVE_SIG_REGS_OFFSET ((sizeof(struct sve_context) + 15) / 16 * 16)
+
+#define SVE_SIG_ZREGS_OFFSET SVE_SIG_REGS_OFFSET
+#define SVE_SIG_ZREG_OFFSET(vq, n) \
+ (SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREG_SIZE(vq) * (n))
+#define SVE_SIG_ZREGS_SIZE(vq) \
+ (SVE_SIG_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_SIG_ZREGS_OFFSET)
+
+#define SVE_SIG_PREGS_OFFSET(vq) \
+ (SVE_SIG_ZREGS_OFFSET + SVE_SIG_ZREGS_SIZE(vq))
+#define SVE_SIG_PREG_OFFSET(vq, n) \
+ (SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREG_SIZE(vq) * (n))
+#define SVE_SIG_PREGS_SIZE(vq) \
+ (SVE_SIG_PREG_OFFSET(vq, SVE_NUM_PREGS) - SVE_SIG_PREGS_OFFSET(vq))
+
+#define SVE_SIG_FFR_OFFSET(vq) \
+ (SVE_SIG_PREGS_OFFSET(vq) + SVE_SIG_PREGS_SIZE(vq))
+
+#define SVE_SIG_REGS_SIZE(vq) \
+ (SVE_SIG_FFR_OFFSET(vq) + SVE_SIG_FFR_SIZE(vq) - SVE_SIG_REGS_OFFSET)
+
+#define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq))
+
+
#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 1e430b4..7418237 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -57,6 +57,7 @@ struct rt_sigframe_user_layout {
unsigned long fpsimd_offset;
unsigned long esr_offset;
+ unsigned long sve_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
@@ -209,8 +210,39 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
+
+#ifdef CONFIG_ARM64_SVE
+
+static int preserve_sve_context(struct sve_context __user *ctx)
+{
+ int err = 0;
+ u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+ unsigned int vl = sve_get_vl();
+ unsigned int vq = sve_vq_from_vl(vl);
+
+ memset(reserved, 0, sizeof(reserved));
+
+ __put_user_error(SVE_MAGIC, &ctx->head.magic, err);
+ __put_user_error(round_up(SVE_SIG_CONTEXT_SIZE(vq), 16),
+ &ctx->head.size, err);
+ __put_user_error(vl, &ctx->vl, err);
+ BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+ err |= copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+ return err ? -EFAULT : 0;
+}
+
+#else /* ! CONFIG_ARM64_SVE */
+
+/* Turn any non-optimised out attempt to use this into a link error: */
+extern int preserve_sve_context(void __user *ctx);
+
+#endif /* ! CONFIG_ARM64_SVE */
+
+
struct user_ctxs {
struct fpsimd_context __user *fpsimd;
+ struct sve_context __user *sve;
};
static int parse_user_sigframe(struct user_ctxs *user,
@@ -224,6 +256,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
bool have_extra_context = false;
user->fpsimd = NULL;
+ user->sve = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -271,6 +304,19 @@ static int parse_user_sigframe(struct user_ctxs *user,
/* ignore */
break;
+ case SVE_MAGIC:
+ if (!IS_ENABLED(CONFIG_ARM64_SVE))
+ goto invalid;
+
+ if (user->sve)
+ goto invalid;
+
+ if (size < sizeof(*user->sve))
+ goto invalid;
+
+ user->sve = (struct sve_context __user *)head;
+ break;
+
case EXTRA_MAGIC:
if (have_extra_context)
goto invalid;
@@ -417,6 +463,15 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
return err;
}
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE)) {
+ unsigned int vq = sve_vq_from_vl(sve_get_vl());
+
+ err = sigframe_alloc(user, &user->sve_offset,
+ SVE_SIG_CONTEXT_SIZE(vq));
+ if (err)
+ return err;
+ }
+
return sigframe_alloc_end(user);
}
@@ -458,6 +513,13 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
}
+ /* Scalable Vector Extension state, if present */
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && err == 0 && user->sve_offset) {
+ struct sve_context __user *sve_ctx =
+ apply_user_offset(user, user->sve_offset);
+ err |= preserve_sve_context(sve_ctx);
+ }
+
if (err == 0 && user->extra_offset) {
struct extra_context __user *extra =
apply_user_offset(user, user->extra_offset);
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 17/29] arm64/sve: signal: Dump Scalable Vector Extension registers to user stack
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (15 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 16/29] arm64/sve: signal: Add SVE state record to sigcontext Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 18/29] arm64/sve: signal: Restore FPSIMD/SVE state in rt_sigreturn Dave Martin
` (13 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
This patch populates the sve_regs() area reserved on the user stack
with the actual register context.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/include/asm/fpsimd.h | 1 +
arch/arm64/kernel/fpsimd.c | 5 ++---
arch/arm64/kernel/signal.c | 8 ++++++++
3 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index aa82b38..e39066a 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -93,6 +93,7 @@ extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
extern void __init fpsimd_init_task_struct_size(void);
+extern void *__task_sve_state(struct task_struct *task);
extern void sve_save_state(void *state, u32 *pfpsr);
extern void sve_load_state(void const *state, u32 const *pfpsr);
extern unsigned int sve_get_vl(void);
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 9a90921..4ef2e37 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -128,7 +128,7 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
#ifdef CONFIG_ARM64_SVE
-static void *__task_sve_state(struct task_struct *task)
+void *__task_sve_state(struct task_struct *task)
{
return (char *)task + ALIGN(sizeof(*task), 16);
}
@@ -143,8 +143,7 @@ static void *__task_pffr(struct task_struct *task)
#else /* !CONFIG_ARM64_SVE */
-/* Turn any non-optimised out attempts to use these into a link error: */
-extern void *__task_sve_state(struct task_struct *task);
+/* Turn any non-optimised out attempts to use this into a link error: */
extern void *__task_pffr(struct task_struct *task);
#endif /* !CONFIG_ARM64_SVE */
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 7418237..038e7338 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -229,6 +229,14 @@ static int preserve_sve_context(struct sve_context __user *ctx)
BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
err |= copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+ /*
+ * This assumes that the SVE state has already been saved to
+ * the task struct by calling preserve_fpsimd_context().
+ */
+ err |= copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET,
+ __task_sve_state(current),
+ SVE_SIG_REGS_SIZE(vq));
+
return err ? -EFAULT : 0;
}
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 18/29] arm64/sve: signal: Restore FPSIMD/SVE state in rt_sigreturn
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (16 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 17/29] arm64/sve: signal: Dump Scalable Vector Extension registers to user stack Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 19/29] arm64/sve: Avoid corruption when replacing the SVE state Dave Martin
` (12 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
This patch adds the missing logic to restore the SVE state in
rt_sigreturn.
Because the FPSIMD and SVE state alias, this code replaces the
existing fpsimd restore code when there is SVE state to restore.
For Zn[127:0], the saved FPSIMD state in Vn takes precedence.
Since __task_fpsimd_to_sve() is used to merge the FPSIMD and SVE
state back together, and only for this purpose, we don't want it to
zero out the SVE state -- hence delete the memset() from there.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/fpsimd.c | 4 ---
arch/arm64/kernel/signal.c | 87 ++++++++++++++++++++++++++++++++++++++++------
2 files changed, 76 insertions(+), 15 deletions(-)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 4ef2e37..b1a8d3e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -266,9 +266,6 @@ static void task_sve_to_fpsimd(struct task_struct *task __always_unused) { }
void fpsimd_signal_preserve_current_state(void)
{
- WARN_ONCE(elf_hwcap & HWCAP_SVE,
- "SVE state save/restore around signals doesn't work properly, expect userspace corruption!\n");
-
fpsimd_preserve_current_state();
task_sve_to_fpsimd(current);
}
@@ -301,7 +298,6 @@ static void __task_fpsimd_to_sve(struct task_struct *task, unsigned int vq)
struct fpsimd_state *fst = &task->thread.fpsimd_state;
unsigned int i;
- memset(sst, 0, sizeof(*sst));
for (i = 0; i < 32; ++i)
sst->zregs[i][0] = fst->vregs[i];
}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 038e7338..2697d09 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -211,6 +211,11 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
}
+struct user_ctxs {
+ struct fpsimd_context __user *fpsimd;
+ struct sve_context __user *sve;
+};
+
#ifdef CONFIG_ARM64_SVE
static int preserve_sve_context(struct sve_context __user *ctx)
@@ -240,19 +245,68 @@ static int preserve_sve_context(struct sve_context __user *ctx)
return err ? -EFAULT : 0;
}
+static int __restore_sve_fpsimd_context(struct user_ctxs *user,
+ unsigned int vl, unsigned int vq)
+{
+ int err;
+ struct fpsimd_sve_state(vq) *task_sve_regs =
+ __task_sve_state(current);
+ struct fpsimd_state fpsimd;
+
+ if (vl != sve_get_vl())
+ return -EINVAL;
+
+ BUG_ON(SVE_SIG_REGS_SIZE(vq) > sizeof(*task_sve_regs));
+ BUG_ON(round_up(SVE_SIG_REGS_SIZE(vq), 16) < sizeof(*task_sve_regs));
+ BUG_ON(SVE_SIG_FFR_OFFSET(vq) - SVE_SIG_REGS_OFFSET !=
+ (char *)&task_sve_regs->ffr - (char *)task_sve_regs);
+ err = __copy_from_user(task_sve_regs,
+ (char __user const *)user->sve +
+ SVE_SIG_REGS_OFFSET,
+ SVE_SIG_REGS_SIZE(vq));
+ if (err)
+ return err;
+
+ /* copy the FP and status/control registers */
+ /* restore_sigframe() already checked that user->fpsimd != NULL. */
+ err = __copy_from_user(fpsimd.vregs, user->fpsimd->vregs,
+ sizeof(fpsimd.vregs));
+ __get_user_error(fpsimd.fpsr, &user->fpsimd->fpsr, err);
+ __get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err);
+
+ /* load the hardware registers from the fpsimd_state structure */
+ if (!err)
+ fpsimd_update_current_state(&fpsimd);
+
+ return err;
+}
+
+static int restore_sve_fpsimd_context(struct user_ctxs *user)
+{
+ int err;
+ u16 vl, vq;
+
+ err = __get_user(vl, &user->sve->vl);
+ if (err)
+ return err;
+
+ if (!sve_vl_valid(vl))
+ return -EINVAL;
+
+ vq = sve_vq_from_vl(vl);
+
+ return __restore_sve_fpsimd_context(user, vl, vq);
+}
+
#else /* ! CONFIG_ARM64_SVE */
-/* Turn any non-optimised out attempt to use this into a link error: */
+/* Turn any non-optimised out attempts to use these into a link error: */
extern int preserve_sve_context(void __user *ctx);
+extern int restore_sve_fpsimd_context(struct user_ctxs *user);
#endif /* ! CONFIG_ARM64_SVE */
-struct user_ctxs {
- struct fpsimd_context __user *fpsimd;
- struct sve_context __user *sve;
-};
-
static int parse_user_sigframe(struct user_ctxs *user,
struct rt_sigframe __user *sf)
{
@@ -316,6 +370,9 @@ static int parse_user_sigframe(struct user_ctxs *user,
if (!IS_ENABLED(CONFIG_ARM64_SVE))
goto invalid;
+ if (!(elf_hwcap & HWCAP_SVE))
+ goto invalid;
+
if (user->sve)
goto invalid;
@@ -375,9 +432,6 @@ static int parse_user_sigframe(struct user_ctxs *user,
}
done:
- if (!user->fpsimd)
- goto invalid;
-
return 0;
invalid:
@@ -411,8 +465,19 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0)
err = parse_user_sigframe(&user, sf);
- if (err == 0)
- err = restore_fpsimd_context(user.fpsimd);
+ if (err == 0) {
+ if (!user.fpsimd)
+ return -EINVAL;
+
+ if (user.sve) {
+ if (!IS_ENABLED(CONFIG_ARM64_SVE) ||
+ !(elf_hwcap & HWCAP_SVE))
+ return -EINVAL;
+
+ err = restore_sve_fpsimd_context(&user);
+ } else
+ err = restore_fpsimd_context(user.fpsimd);
+ }
return err;
}
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 19/29] arm64/sve: Avoid corruption when replacing the SVE state
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (17 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 18/29] arm64/sve: signal: Restore FPSIMD/SVE state in rt_sigreturn Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 20/29] arm64/sve: traps: Add descriptive string for SVE exceptions Dave Martin
` (11 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
If preemption occurs during replacement of the whole SVE state,
as occurs during execve() or rt_sigreturn(), then some or all of
the new state for the thread can be lost, due to erroneous saving
of the pre-existing state over the new data.
This patch disables preemption around the affected operations to
avoid this failure mode.
This should be reexamined later if the impact on preemption latency
proves to be excessive.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/fpsimd.c | 4 ++++
arch/arm64/kernel/signal.c | 9 ++++++++-
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index b1a8d3e..cda079e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -198,6 +198,8 @@ void fpsimd_thread_switch(struct task_struct *next)
void fpsimd_flush_thread(void)
{
+ preempt_disable();
+
fpsimd_flush_task_state(current);
memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
@@ -213,6 +215,8 @@ void fpsimd_flush_thread(void)
}
set_thread_flag(TIF_FOREIGN_FPSTATE);
+
+ preempt_enable();
}
/*
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 2697d09..129b016 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -256,6 +256,10 @@ static int __restore_sve_fpsimd_context(struct user_ctxs *user,
if (vl != sve_get_vl())
return -EINVAL;
+ preempt_disable();
+
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+
BUG_ON(SVE_SIG_REGS_SIZE(vq) > sizeof(*task_sve_regs));
BUG_ON(round_up(SVE_SIG_REGS_SIZE(vq), 16) < sizeof(*task_sve_regs));
BUG_ON(SVE_SIG_FFR_OFFSET(vq) - SVE_SIG_REGS_OFFSET !=
@@ -265,7 +269,7 @@ static int __restore_sve_fpsimd_context(struct user_ctxs *user,
SVE_SIG_REGS_OFFSET,
SVE_SIG_REGS_SIZE(vq));
if (err)
- return err;
+ goto out_preempt;
/* copy the FP and status/control registers */
/* restore_sigframe() already checked that user->fpsimd != NULL. */
@@ -278,6 +282,9 @@ static int __restore_sve_fpsimd_context(struct user_ctxs *user,
if (!err)
fpsimd_update_current_state(&fpsimd);
+out_preempt:
+ preempt_enable();
+
return err;
}
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 20/29] arm64/sve: traps: Add descriptive string for SVE exceptions
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (18 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 19/29] arm64/sve: Avoid corruption when replacing the SVE state Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 21/29] arm64/sve: Enable SVE on demand for userspace Dave Martin
` (10 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
In preparation for SVE trapping in userspace, let's print something
relevant instead of "UNREGOCNIZED EC" when an unhandled SVE
exception occurs.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/include/asm/esr.h | 3 ++-
arch/arm64/kernel/traps.c | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index d14c478..87729f3 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -42,7 +42,8 @@
#define ESR_ELx_EC_HVC64 (0x16)
#define ESR_ELx_EC_SMC64 (0x17)
#define ESR_ELx_EC_SYS64 (0x18)
-/* Unallocated EC: 0x19 - 0x1E */
+#define ESR_ELx_EC_SVE (0x19)
+/* Unallocated EC: 0x1A - 0x1E */
#define ESR_ELx_EC_IMP_DEF (0x1f)
#define ESR_ELx_EC_IABT_LOW (0x20)
#define ESR_ELx_EC_IABT_CUR (0x21)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index c9986b3..aaab1dd 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -569,6 +569,7 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_HVC64] = "HVC (AArch64)",
[ESR_ELx_EC_SMC64] = "SMC (AArch64)",
[ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)",
+ [ESR_ELx_EC_SVE] = "SVE",
[ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF",
[ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)",
[ESR_ELx_EC_IABT_CUR] = "IABT (current EL)",
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 21/29] arm64/sve: Enable SVE on demand for userspace
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (19 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 20/29] arm64/sve: traps: Add descriptive string for SVE exceptions Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 22/29] arm64/sve: Implement FPSIMD-only context for tasks not using SVE Dave Martin
` (9 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
This patch tracks whether a task has ever attempted to use the
Scalable Vector Extension. If and only if SVE is in use by a task,
it will be enabled for userspace when scheduling the task in. For
other tasks, SVE is disabled when scheduling in.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/include/asm/thread_info.h | 1 +
arch/arm64/kernel/entry.S | 18 +++++++++++++++++-
arch/arm64/kernel/fpsimd.c | 30 ++++++++++++++++++++++++++++++
3 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e9ea5a6..2deac86 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -117,6 +117,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SYSCALL_AUDIT 9
#define TIF_SYSCALL_TRACEPOINT 10
#define TIF_SECCOMP 11
+#define TIF_SVE 17 /* Scalable Vector Extension in use */
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 223d54a..fe20560 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -465,6 +465,10 @@ el0_sync:
b.eq el0_ia
cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access
b.eq el0_fpsimd_acc
+#ifdef CONFIG_ARM64_SVE
+ cmp x24, #ESR_ELx_EC_SVE // SVE access
+ b.eq el0_sve_acc
+#endif
cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception
b.eq el0_fpsimd_exc
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
@@ -563,9 +567,21 @@ el0_fpsimd_acc:
mov x1, sp
bl do_fpsimd_acc
b ret_to_user
+#ifdef CONFIG_ARM64_SVE
+ /*
+ * Scalable Vector Extension access
+ */
+el0_sve_acc:
+ enable_dbg
+ ct_user_exit
+ mov x0, x25
+ mov x1, sp
+ bl do_sve_acc
+ b ret_to_user
+#endif
el0_fpsimd_exc:
/*
- * Floating Point or Advanced SIMD exception
+ * Floating Point, Advanced SIMD or SVE exception
*/
enable_dbg
ct_user_exit
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index cda079e..40566a9 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -99,6 +99,20 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
WARN_ON(1);
}
+#ifdef CONFIG_ARM64_SVE
+void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+{
+ unsigned long tmp;
+
+ if (test_and_set_thread_flag(TIF_SVE))
+ BUG();
+
+ asm ("mrs %0, cpacr_el1" : "=r" (tmp));
+ asm volatile ("msr cpacr_el1, %0" :: "r" (tmp | (1 << 17)));
+ /* Serialised by exception return to user */
+}
+#endif
+
/*
* Raise a SIGFPE for the current process.
*/
@@ -283,11 +297,27 @@ void fpsimd_restore_current_state(void)
{
preempt_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ unsigned long tmp;
+ unsigned long flags;
+
struct fpsimd_state *st = ¤t->thread.fpsimd_state;
task_fpsimd_load(current);
this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE)) {
+ /*
+ * Flip SVE enable for userspace if it doesn't
+ * match the current_task.
+ */
+ asm ("mrs %0, cpacr_el1" : "=r" (tmp));
+ flags = current_thread_info()->flags;
+ if ((tmp ^ (unsigned long)flags) & (1 << 17)) {
+ tmp ^= 1 << 17;
+ asm volatile ("msr cpacr_el1, %0" :: "r" (tmp));
+ }
+ }
}
preempt_enable();
}
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 22/29] arm64/sve: Implement FPSIMD-only context for tasks not using SVE
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (20 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 21/29] arm64/sve: Enable SVE on demand for userspace Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 23/29] arm64/sve: Move ZEN handling to the common task_fpsimd_load() path Dave Martin
` (8 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
To reduce unnecessary context switch overhead, we don't need to
switch the whole SVE state for tasks that are not using it.
This patch restores the FPSIMD-only behaviour for tasks that have
never used SVE.
Note that coredumps and ptrace may see FPSIMD/SVE out of sync at
present -- this will be fixed later.
SVE state is saved on signal delivery only for tasks that have
used SVE. However, it should be possible to add SVE state on
return from a signal handler when the task didn't have any SVE
state previously. The caller may need to add its own SVE record
to the signal frame in this case.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/fpsimd.c | 34 +++++++++++++++++++++++-----------
arch/arm64/kernel/signal.c | 5 ++++-
2 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 40566a9..cad86e5 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -100,6 +100,9 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
}
#ifdef CONFIG_ARM64_SVE
+
+static void task_fpsimd_to_sve(struct task_struct *task);
+
void do_sve_acc(unsigned int esr, struct pt_regs *regs)
{
unsigned long tmp;
@@ -107,11 +110,16 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
if (test_and_set_thread_flag(TIF_SVE))
BUG();
+ BUG_ON(is_compat_task());
+
+ task_fpsimd_to_sve(current);
+
asm ("mrs %0, cpacr_el1" : "=r" (tmp));
asm volatile ("msr cpacr_el1, %0" :: "r" (tmp | (1 << 17)));
/* Serialised by exception return to user */
}
-#endif
+
+#endif /* CONFIG_ARM64_SVE */
/*
* Raise a SIGFPE for the current process.
@@ -164,7 +172,8 @@ extern void *__task_pffr(struct task_struct *task);
static void task_fpsimd_load(struct task_struct *task)
{
- if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE))
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ test_tsk_thread_flag(task, TIF_SVE))
sve_load_state(__task_pffr(task),
&task->thread.fpsimd_state.fpsr);
else
@@ -173,7 +182,8 @@ static void task_fpsimd_load(struct task_struct *task)
static void task_fpsimd_save(struct task_struct *task)
{
- if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE))
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ test_tsk_thread_flag(task, TIF_SVE))
sve_save_state(__task_pffr(task),
&task->thread.fpsimd_state.fpsr);
else
@@ -202,11 +212,9 @@ void fpsimd_thread_switch(struct task_struct *next)
if (__this_cpu_read(fpsimd_last_state) == st
&& st->cpu == smp_processor_id())
- clear_ti_thread_flag(task_thread_info(next),
- TIF_FOREIGN_FPSTATE);
+ clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
else
- set_ti_thread_flag(task_thread_info(next),
- TIF_FOREIGN_FPSTATE);
+ set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
}
}
@@ -285,7 +293,8 @@ static void task_sve_to_fpsimd(struct task_struct *task __always_unused) { }
void fpsimd_signal_preserve_current_state(void)
{
fpsimd_preserve_current_state();
- task_sve_to_fpsimd(current);
+ if (test_thread_flag(TIF_SVE))
+ task_sve_to_fpsimd(current);
}
/*
@@ -367,7 +376,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
{
preempt_disable();
- if (IS_ENABLED(CONFIG_ARM64_SVE)) {
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
current->thread.fpsimd_state = *state;
task_fpsimd_to_sve(current);
}
@@ -408,8 +417,8 @@ void kernel_neon_begin_partial(u32 num_regs)
* interrupt context, so always save the userland SVE state
* if there is any, even for interrupts.
*/
- if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE) &&
- current->mm &&
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ test_thread_flag(TIF_SVE) && current->mm &&
!test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) {
fpsimd_save_state(¤t->thread.fpsimd_state);
this_cpu_write(fpsimd_last_state, NULL);
@@ -532,6 +541,9 @@ static int __init fpsimd_init(void)
if (!(elf_hwcap & HWCAP_ASIMD))
pr_notice("Advanced SIMD is not implemented\n");
+ if (!(elf_hwcap & HWCAP_SVE))
+ pr_info("Scalable Vector Extension available\n");
+
return 0;
}
late_initcall(fpsimd_init);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 129b016..2528ec1 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -259,6 +259,7 @@ static int __restore_sve_fpsimd_context(struct user_ctxs *user,
preempt_disable();
set_thread_flag(TIF_FOREIGN_FPSTATE);
+ set_thread_flag(TIF_SVE);
BUG_ON(SVE_SIG_REGS_SIZE(vq) > sizeof(*task_sve_regs));
BUG_ON(round_up(SVE_SIG_REGS_SIZE(vq), 16) < sizeof(*task_sve_regs));
@@ -543,9 +544,11 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
return err;
}
- if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE)) {
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
unsigned int vq = sve_vq_from_vl(sve_get_vl());
+ BUG_ON(!(elf_hwcap & HWCAP_SVE));
+
err = sigframe_alloc(user, &user->sve_offset,
SVE_SIG_CONTEXT_SIZE(vq));
if (err)
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 23/29] arm64/sve: Move ZEN handling to the common task_fpsimd_load() path
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (21 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 22/29] arm64/sve: Implement FPSIMD-only context for tasks not using SVE Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 24/29] arm64/sve: Discard SVE state on system call Dave Martin
` (7 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
Currently, ZEN is handled only in fpsimd_restore_current_state(),
which is not sufficient since it applies only in certain
situations.
Since all the relevant paths call task_fpsimd_load(), this patch
moves the ZEN handling there.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/include/asm/thread_info.h | 1 +
arch/arm64/kernel/fpsimd.c | 48 +++++++++++++++++++-----------------
2 files changed, 27 insertions(+), 22 deletions(-)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 2deac86..6819d08 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -133,6 +133,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_SVE (1 << TIF_SVE)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index cad86e5..5834f81 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -102,21 +102,24 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
#ifdef CONFIG_ARM64_SVE
static void task_fpsimd_to_sve(struct task_struct *task);
+static void task_fpsimd_load(struct task_struct *task);
void do_sve_acc(unsigned int esr, struct pt_regs *regs)
{
- unsigned long tmp;
+ if (test_and_set_thread_flag(TIF_SVE)) {
+ unsigned long tmp;
+
+ asm ("mrs %0, cpacr_el1" : "=r" (tmp));
- if (test_and_set_thread_flag(TIF_SVE))
+ printk(KERN_INFO "%s: Strange, ZEN=%u\n",
+ __func__, (unsigned int)((tmp >> 16) & 3));
BUG();
+ }
BUG_ON(is_compat_task());
task_fpsimd_to_sve(current);
-
- asm ("mrs %0, cpacr_el1" : "=r" (tmp));
- asm volatile ("msr cpacr_el1, %0" :: "r" (tmp | (1 << 17)));
- /* Serialised by exception return to user */
+ task_fpsimd_load(current);
}
#endif /* CONFIG_ARM64_SVE */
@@ -178,6 +181,23 @@ static void task_fpsimd_load(struct task_struct *task)
&task->thread.fpsimd_state.fpsr);
else
fpsimd_load_state(&task->thread.fpsimd_state);
+
+ /*
+ * Flip SVE enable for userspace if it doesn't match the
+ * current_task.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_SVE) && (elf_hwcap & HWCAP_SVE)) {
+ unsigned int tmp, flags;
+
+ asm ("mrs %0, cpacr_el1" : "=r" (tmp));
+ flags = task_thread_info(task)->flags;
+ BUILD_BUG_ON(_TIF_SVE != CPACR_EL1_ZEN_EL0EN);
+ if ((tmp ^ (unsigned long)flags) & _TIF_SVE) {
+ tmp ^= _TIF_SVE;
+ asm volatile ("msr cpacr_el1, %0" :: "r" (tmp));
+ /* Serialised by exception return to user */
+ }
+ }
}
static void task_fpsimd_save(struct task_struct *task)
@@ -306,27 +326,11 @@ void fpsimd_restore_current_state(void)
{
preempt_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
- unsigned long tmp;
- unsigned long flags;
-
struct fpsimd_state *st = ¤t->thread.fpsimd_state;
task_fpsimd_load(current);
this_cpu_write(fpsimd_last_state, st);
st->cpu = smp_processor_id();
-
- if (IS_ENABLED(CONFIG_ARM64_SVE)) {
- /*
- * Flip SVE enable for userspace if it doesn't
- * match the current_task.
- */
- asm ("mrs %0, cpacr_el1" : "=r" (tmp));
- flags = current_thread_info()->flags;
- if ((tmp ^ (unsigned long)flags) & (1 << 17)) {
- tmp ^= 1 << 17;
- asm volatile ("msr cpacr_el1, %0" :: "r" (tmp));
- }
- }
}
preempt_enable();
}
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 24/29] arm64/sve: Discard SVE state on system call
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (22 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 23/29] arm64/sve: Move ZEN handling to the common task_fpsimd_load() path Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 25/29] arm64/sve: Avoid preempt_disable() during sigreturn Dave Martin
` (6 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
The base procedure call standard for the Scalable Vector Extension
defines all of the SVE programmer's model state (Z0-31, P0-15, FFR)
as caller-save, except for that subset of the state that aliases
FPSIMD state.
System calls from userspace will almost always be made through C
library wrappers -- as a consequence of the PCS there will thus
rarely if ever be any live SVE state at syscall entry in practice.
This gives us an opportinity to make SVE explicitly caller-save
around SVC and so stop carrying around the SVE state for tasks that
use SVE only occasionally (say, by calling a library).
Note that FPSIMD state will still be preserved around SVC.
As a crude heuristic to avoid pathological cases where a thread
that uses SVE frequently has to fault back into the kernel again to
re-enable SVE after a syscall, we switch the thread back to
FPSIMD-only context tracking only if the context is actually
switched out before returning to userspace.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/fpsimd.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5834f81..2e1056e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -203,6 +203,23 @@ static void task_fpsimd_load(struct task_struct *task)
static void task_fpsimd_save(struct task_struct *task)
{
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+ task_pt_regs(task)->syscallno != ~0UL &&
+ test_tsk_thread_flag(task, TIF_SVE)) {
+ unsigned long tmp;
+
+ clear_tsk_thread_flag(task, TIF_SVE);
+
+ /* Trap if the task tries to use SVE again: */
+ asm volatile (
+ "mrs %[tmp], cpacr_el1\n\t"
+ "bic %[tmp], %[tmp], %[mask]\n\t"
+ "msr cpacr_el1, %[tmp]"
+ : [tmp] "=r" (tmp)
+ : [mask] "i" (CPACR_EL1_ZEN_EL0EN)
+ );
+ }
+
+ if (IS_ENABLED(CONFIG_ARM64_SVE) &&
test_tsk_thread_flag(task, TIF_SVE))
sve_save_state(__task_pffr(task),
&task->thread.fpsimd_state.fpsr);
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 25/29] arm64/sve: Avoid preempt_disable() during sigreturn
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (23 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 24/29] arm64/sve: Discard SVE state on system call Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 26/29] arm64/sve: Avoid stale user register state after SVE access exception Dave Martin
` (5 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
Currently the sigreturn implementation for SVE relies on
preempt_disable() to avoid an intervening context switch from
corrupting the SVE state in the task_struct.
Unforunately, __get_user() and friends are not safe under
preempt_disable().
As an alternative, this patch removes preempt_disable() and sets
TIF_FOREIGN_FPSTATE instead: this will inform the context switch
code that the current CPU registers don't contain the SVE/FPSIMD
state of the current task, preventing writeback to the task_struct
during context switch.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/signal.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 2528ec1..942d66f 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -256,10 +256,8 @@ static int __restore_sve_fpsimd_context(struct user_ctxs *user,
if (vl != sve_get_vl())
return -EINVAL;
- preempt_disable();
-
set_thread_flag(TIF_FOREIGN_FPSTATE);
- set_thread_flag(TIF_SVE);
+ barrier();
BUG_ON(SVE_SIG_REGS_SIZE(vq) > sizeof(*task_sve_regs));
BUG_ON(round_up(SVE_SIG_REGS_SIZE(vq), 16) < sizeof(*task_sve_regs));
@@ -270,7 +268,7 @@ static int __restore_sve_fpsimd_context(struct user_ctxs *user,
SVE_SIG_REGS_OFFSET,
SVE_SIG_REGS_SIZE(vq));
if (err)
- goto out_preempt;
+ return err;
/* copy the FP and status/control registers */
/* restore_sigframe() already checked that user->fpsimd != NULL. */
@@ -279,13 +277,13 @@ static int __restore_sve_fpsimd_context(struct user_ctxs *user,
__get_user_error(fpsimd.fpsr, &user->fpsimd->fpsr, err);
__get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err);
+ barrier();
+ set_thread_flag(TIF_SVE);
+
/* load the hardware registers from the fpsimd_state structure */
if (!err)
fpsimd_update_current_state(&fpsimd);
-out_preempt:
- preempt_enable();
-
return err;
}
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 26/29] arm64/sve: Avoid stale user register state after SVE access exception
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (24 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 25/29] arm64/sve: Avoid preempt_disable() during sigreturn Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 27/29] arm64/sve: ptrace support Dave Martin
` (4 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
Currently, when an SVE access exception is taken from userspace,
the FPSIMD state in the task_struct is converted to SVE state and
reloaded as SVE state.
Unfortunately, since we've been executing in userspace there's no
guarantee that the FPSIMD state saved will actually be up to date
with respect to the registers, so updates may be lost.
This patch saves the FPSIMD state back to the task_struct first, to
ensure that the task_struct copy of the data is up to date.
Also, the CPACR handling logic is removed since task_fpsimd_load()
handles it anyway.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kernel/fpsimd.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 2e1056e..1750301 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -103,22 +103,18 @@ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
static void task_fpsimd_to_sve(struct task_struct *task);
static void task_fpsimd_load(struct task_struct *task);
+static void task_fpsimd_save(struct task_struct *task);
void do_sve_acc(unsigned int esr, struct pt_regs *regs)
{
- if (test_and_set_thread_flag(TIF_SVE)) {
- unsigned long tmp;
-
- asm ("mrs %0, cpacr_el1" : "=r" (tmp));
-
- printk(KERN_INFO "%s: Strange, ZEN=%u\n",
- __func__, (unsigned int)((tmp >> 16) & 3));
- BUG();
- }
-
BUG_ON(is_compat_task());
+ task_fpsimd_save(current);
+
task_fpsimd_to_sve(current);
+ if (test_and_set_thread_flag(TIF_SVE))
+ BUG(); /* We shouldn't trap if SVE was already enabled! */
+
task_fpsimd_load(current);
}
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 27/29] arm64/sve: ptrace support
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (25 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 26/29] arm64/sve: Avoid stale user register state after SVE access exception Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 28/29] arm64: KVM: Treat SVE use by guests as undefined instruction execution Dave Martin
` (3 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
From: Alan Hayward <alan.hayward@arm.com>
This patch adds support for accessing a task's SVE registers via
ptrace.
Some additional helpers are added in order to support the SVE/
FPSIMD register view synchronisation operations that are required
in order to make the NT_PRFPREG and NT_ARM_SVE regsets interact
correctly.
fpr_set()/fpr_get() are refactored into backend/frontend functions,
so that the core can be reused by sve_set()/sve_get() for the case
where no SVE registers are stored for a thread.
Signed-off-by: Alan Hayward <alan.hayward@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/include/asm/fpsimd.h | 20 +++
arch/arm64/include/uapi/asm/ptrace.h | 125 +++++++++++++++
arch/arm64/include/uapi/asm/sigcontext.h | 4 +
arch/arm64/kernel/fpsimd.c | 42 +++++
arch/arm64/kernel/ptrace.c | 254 ++++++++++++++++++++++++++++++-
include/uapi/linux/elf.h | 1 +
6 files changed, 440 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index e39066a..88bcf69 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -35,6 +35,10 @@ struct fpsimd_state {
__uint128_t vregs[32];
u32 fpsr;
u32 fpcr;
+ /*
+ * For ptrace compatibility, pad to next 128-bit
+ * boundary here if extending this struct.
+ */
};
};
/* the id of the last cpu to have restored this state */
@@ -98,6 +102,22 @@ extern void sve_save_state(void *state, u32 *pfpsr);
extern void sve_load_state(void const *state, u32 const *pfpsr);
extern unsigned int sve_get_vl(void);
+/*
+ * FPSIMD/SVE synchronisation helpers for ptrace:
+ * For use on stopped tasks only
+ */
+
+extern void fpsimd_sync_to_sve(struct task_struct *task);
+
+#ifdef CONFIG_ARM64_SVE
+extern void fpsimd_sync_to_fpsimd(struct task_struct *task);
+extern void fpsimd_sync_from_fpsimd_zeropad(struct task_struct *task);
+#else /* !CONFIG_ARM64_SVE */
+static void __maybe_unused fpsimd_sync_to_fpsimd(struct task_struct *task) { }
+static void __maybe_unused fpsimd_sync_from_fpsimd_zeropad(
+ struct task_struct *task) { }
+#endif /* !CONFIG_ARM64_SVE */
+
#endif
#endif
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index b5c3933..48b57a0 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -22,6 +22,7 @@
#include <linux/types.h>
#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
/*
@@ -77,6 +78,7 @@ struct user_fpsimd_state {
__uint128_t vregs[32];
__u32 fpsr;
__u32 fpcr;
+ /* Pad to next 128-bit boundary here if extending this struct */
};
struct user_hwdebug_state {
@@ -89,6 +91,129 @@ struct user_hwdebug_state {
} dbg_regs[16];
};
+/* SVE/FP/SIMD state (NT_ARM_SVE) */
+
+struct user_sve_header {
+ __u32 size; /* total meaningful regset content in bytes */
+ __u32 max_size; /* maxmium possible size for this thread */
+ __u16 vl; /* current vector length */
+ __u16 max_vl; /* maximum possible vector length */
+ __u16 flags;
+ __u16 __reserved;
+};
+
+/* Definitions for user_sve_header.flags: */
+#define SVE_PT_REGS_MASK (1 << 0)
+
+#define SVE_PT_REGS_FPSIMD 0
+#define SVE_PT_REGS_SVE SVE_PT_REGS_MASK
+
+
+/*
+ * The remainder of the SVE state follows struct user_sve_header. The
+ * total size of the SVE state (including header) depends on the
+ * metadata in the header: SVE_PT_SIZE(vq, flags) gives the total size
+ * of the state in bytes, including the header.
+ *
+ * Refer to <asm/sigcontext.h> for details of how to pass the correct
+ * "vq" argument to these macros.
+ */
+
+/* Offset from the start of struct user_sve_header to the register data */
+#define SVE_PT_REGS_OFFSET ((sizeof(struct sve_context) + 15) / 16 * 16)
+
+/*
+ * The register data content and layout depends on the value of the
+ * flags field.
+ */
+
+/*
+ * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD case:
+ *
+ * The payload starts at offset SVE_PT_FPSIMD_OFFSET, and is of type
+ * struct user_fpsimd_state. Additional data might be appended in the
+ * future: use SVE_PT_FPSIMD_SIZE(vq, flags) to compute the total size.
+ * SVE_PT_FPSIMD_SIZE(vq, flags) will never be less than
+ * sizeof(struct user_fpsimd_state).
+ */
+
+#define SVE_PT_FPSIMD_OFFSET SVE_PT_REGS_OFFSET
+
+#define SVE_PT_FPSIMD_SIZE(vq, flags) (sizeof(struct user_fpsimd_state))
+
+/*
+ * (flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE case:
+ *
+ * The payload starts at offset SVE_PT_SVE_OFFSET, and is of size
+ * SVE_PT_SVE_SIZE(vq, flags).
+ *
+ * Additional macros describe the contents and layout of the payload.
+ * For each, SVE_PT_SVE_x_OFFSET(args) is the start offset relative to
+ * the start of struct user_sve_header, and SVE_PT_SVE_x_SIZE(args) is
+ * the size in bytes:
+ *
+ * x type description
+ * - ---- -----------
+ * ZREGS \
+ * ZREG |
+ * PREGS | refer to <asm/sigcontext.h>
+ * PREG |
+ * FFR /
+ *
+ * FPSR uint32_t FPSR
+ * FPCR uint32_t FPCR
+ *
+ * Additional data might be appended in the future.
+ */
+
+#define SVE_PT_SVE_ZREG_SIZE(vq) SVE_SIG_ZREG_SIZE(vq)
+#define SVE_PT_SVE_PREG_SIZE(vq) SVE_SIG_PREG_SIZE(vq)
+#define SVE_PT_SVE_FFR_SIZE(vq) SVE_SIG_FFR_SIZE(vq)
+#define SVE_PT_SVE_FPSR_SIZE sizeof(__u32)
+#define SVE_PT_SVE_FPCR_SIZE sizeof(__u32)
+
+#define __SVE_SIG_TO_PT(offset) \
+ ((offset) - SVE_SIG_REGS_OFFSET + SVE_PT_REGS_OFFSET)
+
+#define SVE_PT_SVE_OFFSET SVE_PT_REGS_OFFSET
+
+#define SVE_PT_SVE_ZREGS_OFFSET \
+ __SVE_SIG_TO_PT(SVE_SIG_ZREGS_OFFSET)
+#define SVE_PT_SVE_ZREG_OFFSET(vq, n) \
+ __SVE_SIG_TO_PT(SVE_SIG_ZREG_OFFSET(vq, n))
+#define SVE_PT_SVE_ZREGS_SIZE(vq) \
+ (SVE_PT_SVE_ZREG_OFFSET(vq, SVE_NUM_ZREGS) - SVE_PT_SVE_ZREGS_OFFSET)
+
+#define SVE_PT_SVE_PREGS_OFFSET(vq) \
+ __SVE_SIG_TO_PT(SVE_SIG_PREGS_OFFSET(vq))
+#define SVE_PT_SVE_PREG_OFFSET(vq, n) \
+ __SVE_SIG_TO_PT(SVE_SIG_PREG_OFFSET(vq, n))
+#define SVE_PT_SVE_PREGS_SIZE(vq) \
+ (SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \
+ SVE_PT_SVE_PREGS_OFFSET(vq))
+
+#define SVE_PT_SVE_FFR_OFFSET(vq) \
+ __SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq))
+
+#define SVE_PT_SVE_FPSR_OFFSET(vq) \
+ ((SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq) + 15) / 16 * 16)
+#define SVE_PT_SVE_FPCR_OFFSET(vq) \
+ (SVE_PT_SVE_FPSR_OFFSET(vq) + SVE_PT_SVE_FPSR_SIZE)
+
+/*
+ * Any future extension appended after FPCR must be aligned to the next
+ * 128-bit boundary.
+ */
+
+#define SVE_PT_SVE_SIZE(vq, flags) \
+ ((SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE - \
+ SVE_PT_SVE_OFFSET + 15) / 16 * 16)
+
+#define SVE_PT_SIZE(vq, flags) \
+ (((flags) & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE ? \
+ SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \
+ : SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
+
#endif /* __ASSEMBLY__ */
#endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 11c915d..91e55de 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -16,6 +16,8 @@
#ifndef _UAPI__ASM_SIGCONTEXT_H
#define _UAPI__ASM_SIGCONTEXT_H
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
/*
@@ -96,6 +98,8 @@ struct sve_context {
__u16 __reserved[3];
};
+#endif /* !__ASSEMBLY__ */
+
/*
* The SVE architecture leaves space for future expansion of the
* vector length beyond its initial architectural limit of 2048 bits
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 1750301..6a5e725 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -417,6 +417,48 @@ void fpsimd_flush_task_state(struct task_struct *t)
t->thread.fpsimd_state.cpu = NR_CPUS;
}
+#ifdef CONFIG_ARM64_SVE
+
+/* FPSIMD/SVE synchronisation helpers for ptrace */
+
+void fpsimd_sync_to_sve(struct task_struct *task)
+{
+ if (!test_tsk_thread_flag(task, TIF_SVE))
+ task_fpsimd_to_sve(task);
+}
+
+void fpsimd_sync_to_fpsimd(struct task_struct *task)
+{
+ if (test_tsk_thread_flag(task, TIF_SVE))
+ task_sve_to_fpsimd(task);
+}
+
+static void __fpsimd_sync_from_fpsimd_zeropad(struct task_struct *task,
+ unsigned int vq)
+{
+ struct sve_struct fpsimd_sve_state(vq) *sst =
+ __task_sve_state(task);
+ struct fpsimd_state *fst = &task->thread.fpsimd_state;
+ unsigned int i;
+
+ if (!test_tsk_thread_flag(task, TIF_SVE))
+ return;
+
+ memset(sst->zregs, 0, sizeof(sst->zregs));
+
+ for (i = 0; i < 32; ++i)
+ sst->zregs[i][0] = fst->vregs[i];
+}
+
+void fpsimd_sync_from_fpsimd_zeropad(struct task_struct *task)
+{
+ unsigned int vl = sve_get_vl();
+
+ __fpsimd_sync_from_fpsimd_zeropad(task, sve_vq_from_vl(vl));
+}
+
+#endif /* CONFIG_ARM64_SVE */
+
#ifdef CONFIG_KERNEL_MODE_NEON
static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index e0c81da..bdd2ad3 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -30,7 +30,9 @@
#include <linux/seccomp.h>
#include <linux/security.h>
#include <linux/init.h>
+#include <linux/sched.h>
#include <linux/signal.h>
+#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
@@ -611,13 +613,46 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
/*
* TODO: update fp accessors for lazy context switching (sync/flush hwstate)
*/
+static int __fpr_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf, unsigned int start_pos)
+{
+ struct user_fpsimd_state *uregs;
+
+ fpsimd_sync_to_fpsimd(target);
+
+ uregs = &target->thread.fpsimd_state.user_fpsimd;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
+ start_pos, start_pos + sizeof(*uregs));
+}
+
static int fpr_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- struct user_fpsimd_state *uregs;
- uregs = &target->thread.fpsimd_state.user_fpsimd;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
+ return __fpr_get(target, regset, pos, count, kbuf, ubuf, 0);
+}
+
+static int __fpr_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned int start_pos)
+{
+ int ret;
+ struct user_fpsimd_state newstate;
+
+ fpsimd_sync_to_fpsimd(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate,
+ start_pos, start_pos + sizeof(newstate));
+ if (ret)
+ return ret;
+
+ target->thread.fpsimd_state.user_fpsimd = newstate;
+
+ return ret;
}
static int fpr_set(struct task_struct *target, const struct user_regset *regset,
@@ -625,14 +660,14 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
int ret;
- struct user_fpsimd_state newstate;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
+ ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0);
if (ret)
return ret;
- target->thread.fpsimd_state.user_fpsimd = newstate;
+ fpsimd_sync_from_fpsimd_zeropad(target);
fpsimd_flush_task_state(target);
+
return ret;
}
@@ -685,6 +720,204 @@ static int system_call_set(struct task_struct *target,
return ret;
}
+#ifdef CONFIG_ARM64_SVE
+
+static int sve_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+ struct user_sve_header header;
+ unsigned int vq;
+ unsigned long start, end;
+
+ /* Header */
+ memset(&header, 0, sizeof(header));
+
+ header.vl = sve_get_vl();
+
+ BUG_ON(!sve_vl_valid(header.vl));
+ vq = sve_vq_from_vl(header.vl);
+
+ /* Until runtime or per-task vector length changing is supported: */
+ header.max_vl = header.vl;
+
+ header.flags = test_tsk_thread_flag(target, TIF_SVE) ?
+ SVE_PT_REGS_SVE : SVE_PT_REGS_FPSIMD;
+
+ header.size = SVE_PT_SIZE(vq, header.flags);
+ header.max_size = SVE_PT_SIZE(vq, SVE_PT_REGS_SVE);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &header,
+ 0, sizeof(header));
+ if (ret)
+ return ret;
+
+ /* Registers: FPSIMD-only case */
+
+ BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
+
+ if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD)
+ return __fpr_get(target, regset, pos, count, kbuf, ubuf,
+ SVE_PT_FPSIMD_OFFSET);
+
+ /* Otherwise: full SVE case */
+
+ BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+
+ start = SVE_PT_SVE_OFFSET;
+ end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+
+ BUG_ON((char *)__task_sve_state(target) < (char *)target);
+ BUG_ON(end < start);
+ BUG_ON(arch_task_struct_size < end - start);
+ BUG_ON((char *)__task_sve_state(target) - (char *)target >
+ arch_task_struct_size - (end - start));
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ __task_sve_state(target),
+ start, end);
+ if (ret)
+ return ret;
+
+ start = end;
+ end = SVE_PT_SVE_FPSR_OFFSET(vq);
+
+ BUG_ON(end < start);
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ start, end);
+ if (ret)
+ return ret;
+
+ start = end;
+ end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+
+ BUG_ON((char *)(&target->thread.fpsimd_state.fpcr + 1) <
+ (char *)&target->thread.fpsimd_state.fpsr);
+ BUG_ON(end < start);
+ BUG_ON((char *)(&target->thread.fpsimd_state.fpcr + 1) -
+ (char *)&target->thread.fpsimd_state.fpsr !=
+ end - start);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpsimd_state.fpsr,
+ start, end);
+ if (ret)
+ return ret;
+
+ start = end;
+ end = (SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE) + 15) / 16 * 16;
+ BUG_ON(end < start);
+
+ return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ start, end);
+}
+
+static int sve_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+ struct user_sve_header header;
+ unsigned int vq;
+ unsigned long start, end;
+
+ /* Header */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
+ 0, sizeof(header));
+ if (ret)
+ goto out;
+
+ if (header.vl != sve_get_vl())
+ return -EINVAL;
+
+ BUG_ON(!sve_vl_valid(header.vl));
+ vq = sve_vq_from_vl(header.vl);
+
+ if (header.flags & ~SVE_PT_REGS_MASK)
+ return -EINVAL;
+
+ /* Registers: FPSIMD-only case */
+
+ BUILD_BUG_ON(SVE_PT_FPSIMD_OFFSET != sizeof(header));
+
+ if ((header.flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD) {
+ ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
+ SVE_PT_FPSIMD_OFFSET);
+ clear_tsk_thread_flag(target, TIF_SVE);
+ goto out;
+ }
+
+ /* Otherwise: full SVE case */
+
+ fpsimd_sync_to_sve(target);
+ set_tsk_thread_flag(target, TIF_SVE);
+
+ BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
+
+ start = SVE_PT_SVE_OFFSET;
+ end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
+
+ BUG_ON((char *)__task_sve_state(target) < (char *)target);
+ BUG_ON(end < start);
+ BUG_ON(arch_task_struct_size < end - start);
+ BUG_ON((char *)__task_sve_state(target) - (char *)target >
+ arch_task_struct_size - (end - start));
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ __task_sve_state(target),
+ start, end);
+ if (ret)
+ goto out;
+
+ start = end;
+ end = SVE_PT_SVE_FPSR_OFFSET(vq);
+
+ BUG_ON(end < start);
+ ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ start, end);
+ if (ret)
+ goto out;
+
+ start = end;
+ end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
+
+ BUG_ON((char *)(&target->thread.fpsimd_state.fpcr + 1) <
+ (char *)&target->thread.fpsimd_state.fpsr);
+ BUG_ON(end < start);
+ BUG_ON((char *)(&target->thread.fpsimd_state.fpcr + 1) -
+ (char *)&target->thread.fpsimd_state.fpsr !=
+ end - start);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpsimd_state.fpsr,
+ start, end);
+
+out:
+ fpsimd_flush_task_state(target);
+ return ret;
+}
+
+#else /* !CONFIG_ARM64_SVE */
+
+static int sve_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ return -EINVAL;
+}
+
+static int sve_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return -EINVAL;
+}
+
+#endif /* !CONFIG_ARM64_SVE */
+
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
@@ -694,6 +927,7 @@ enum aarch64_regset {
REGSET_HW_WATCH,
#endif
REGSET_SYSTEM_CALL,
+ REGSET_SVE,
};
static const struct user_regset aarch64_regsets[] = {
@@ -751,6 +985,14 @@ static const struct user_regset aarch64_regsets[] = {
.get = system_call_get,
.set = system_call_set,
},
+ [REGSET_SVE] = { /* Scalable Vector Extension */
+ .core_note_type = NT_ARM_SVE,
+ .n = (SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE) + 15) / 16,
+ .size = 16,
+ .align = 16,
+ .get = sve_get,
+ .set = sve_set,
+ },
};
static const struct user_regset_view user_aarch64_view = {
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index b59ee07..23c6585 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -414,6 +414,7 @@ typedef struct elf64_shdr {
#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */
#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */
+#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */
#define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */
#define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */
#define NT_METAG_TLS 0x502 /* Metag TLS pointer */
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 28/29] arm64: KVM: Treat SVE use by guests as undefined instruction execution
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (26 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 27/29] arm64/sve: ptrace support Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-25 19:39 ` [RFC PATCH 29/29] arm64/sve: Limit vector length to 512 bits by default Dave Martin
` (2 subsequent siblings)
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
We don't currently support context-switching of Scalable Vector
Extension context between vcpus, and the SVE access exception is
thus left masked by default at EL2 when running a vcpu.
However, there's nothing to stop a guest trying to use SVE. If it
does, we'll get an SVE access exception to EL2 which will cause KVM
to panic since this exception isn't yet recognised.
This patch adds knowledge to KVM about the SVE access exception,
translating it into an undefined instruction exception injected to
the vcpu.
This prevents a malicious guest from panicking the host by
attempted SVE use.
SVE-enabled guests will still not work properly for now, but they
won't take the host down.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/kvm/handle_exit.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index a204adf..f43b6d0 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -125,6 +125,14 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
return ret;
}
+static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /* Until SVE is supported for guests: */
+ kvm_inject_undefined(vcpu);
+
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
[ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
@@ -137,6 +145,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_HVC64] = handle_hvc,
[ESR_ELx_EC_SMC64] = handle_smc,
[ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
+ [ESR_ELx_EC_SVE] = handle_sve,
[ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 29/29] arm64/sve: Limit vector length to 512 bits by default
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (27 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 28/29] arm64: KVM: Treat SVE use by guests as undefined instruction execution Dave Martin
@ 2016-11-25 19:39 ` Dave Martin
2016-11-30 9:56 ` [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Yao Qi
2016-11-30 10:08 ` Florian Weimer
30 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-25 19:39 UTC (permalink / raw)
To: linux-arm-kernel
As a transitional workaround for userspace incompatibilities caused
by enlargement of the signal frame, this patch adds a new config
option CONFIG_ARM64_SVE_FULL_VECTOR_LENGTH, which default to n.
Unless this option is set to y, the vector length for SVE will be
limited to 512 bits. This leaves a bit of free space for other
architecture extensions just in case we need it.
This option will be removed and replaced with a user/kernel control
interface in the future.
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
arch/arm64/Kconfig | 35 +++++++++++++++++++++++++++++++++++
arch/arm64/mm/proc.S | 5 +++++
2 files changed, 40 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index bf9915cb..7cd9812 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -887,6 +887,41 @@ config ARM64_SVE
To enable use of this extension on CPUs that implement it, say Y.
+if ARM64_SVE
+
+config ARM64_SVE_FULL_VECTOR_LENGTH
+ bool "Enable full hardware vector length for userspace"
+ default n
+ help
+ SVE vector lengths greater than 512 bits impact the size of signal
+ frames and therefore the size requirements for any userspace stack
+ onto which a signal may be delivered. Using larger vector lengths
+ may therefore cause problems for some software. For this reason, the
+ kernel currently limits the maximum vector length for userspace
+ software to 512 bits by default.
+
+ Enabling this option removes the limit, so that the full vector
+ length implemented by the hardware is made available to userspace.
+
+ Be aware: in general, software that (a) does not use SVE (including
+ via libraries), or (b) does not handle signals, or (c) uses default
+ process/thread stack sizes and does not use sigaltstack(2) should be
+ unaffected by enabling larger vectors. Software that does not meet
+ these criteria or that relies on certain legacy uses of the ucontext
+ API may be affected however.
+
+ This is a transitional compatibility option only and will be replaced
+ by a userspace ABI extension in the future. Do not assume that this
+ option will be available with compatible effect in future Linux
+ releases.
+
+ If you are developing software that uses SVE and understand the
+ implications, you can consider saying Y here.
+
+ If unsure, say N.
+
+endif
+
config ARM64_MODULE_CMODEL_LARGE
bool
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 1da8160..a2839e6 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -198,6 +198,11 @@ ENTRY(__cpu_setup)
mrs_s x5, ZIDR_EL1 // SVE: Enable full vector len
and x5, x5, #ZCR_EL1_LEN_MASK // initially
+#ifndef CONFIG_ARM64_SVE_FULL_VECTOR_LENGTH
+ mov x6, #(512 / 128 - 1) // Clamp VL to 512 bits
+ cmp x5, x6
+ csel x5, x5, x6, lo
+#endif
msr_s ZCR_EL1, x5
b 2f
--
2.1.4
^ permalink raw reply related [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (28 preceding siblings ...)
2016-11-25 19:39 ` [RFC PATCH 29/29] arm64/sve: Limit vector length to 512 bits by default Dave Martin
@ 2016-11-30 9:56 ` Yao Qi
2016-11-30 12:06 ` Dave Martin
2016-11-30 10:08 ` Florian Weimer
30 siblings, 1 reply; 61+ messages in thread
From: Yao Qi @ 2016-11-30 9:56 UTC (permalink / raw)
To: linux-arm-kernel
Hi, Dave,
On Fri, Nov 25, 2016 at 7:38 PM, Dave Martin <Dave.Martin@arm.com> wrote:
> * No independent SVE vector length configuration per thread. This is
> planned, but will follow as a separate add-on series.
If I read "independent SVE vector length configuration per thread"
correctly, SVE vector length can be different in each thread, so the
size of vector registers is different too. In GDB, we describe registers
by "target description" which is per process, not per thread.
--
Yao (??)
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 9:56 ` [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Yao Qi
@ 2016-11-30 12:06 ` Dave Martin
2016-11-30 12:22 ` Szabolcs Nagy
` (3 more replies)
0 siblings, 4 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-30 12:06 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Nov 30, 2016 at 11:08:50AM +0100, Florian Weimer wrote:
> On 11/25/2016 08:38 PM, Dave Martin wrote:
> >The Scalable Vector Extension (SVE) [1] is an extension to AArch64 which
> >adds extra SIMD functionality and supports much larger vectors.
> >
> >This series implements core Linux support for SVE.
> >
> >Recipents not copied on the whole series can find the rest of the
> >patches in the linux-arm-kernel archives [2].
> >
> >
> >The first 5 patches "arm64: signal: ..." factor out the allocation and
> >placement of state information in the signal frame. The first three
> >are prerequisites for the SVE support patches.
> >
> >Patches 04-05 implement expansion of the signal frame, and may remain
> >controversial due to ABI break issues:
> >
> > * Discussion is needed on how userspace should detect/negotiate signal
> > frame size in order for this expansion mechanism to be workable.
>
> I'm leaning towards a simple increase in the glibc headers (despite the ABI
> risk), plus a personality flag to disable really wide vector registers in
> case this causes problems with old binaries.
I'm concerned here that there may be no sensible fixed size for the
signal frame. We would make it ridiculously large in order to minimise
the chance of hitting this problem again -- but then it would be
ridiculously large, which is a potential problem for massively threaded
workloads.
Or we could be more conservative, but risk a re-run of similar ABI
breaks in the future.
A personality flag may also discourage use of larger vectors, even
though the vast majority of software will work fine with them.
> A more elaborate mechanism will likely introduce more bugs than it makes
> existing applications working, due to its complexity.
Yes, I was a bit concerned about this when I tried to sketch something
out.
[...]
> > * No independent SVE vector length configuration per thread. This is
> > planned, but will follow as a separate add-on series.
>
> Per-thread register widths will likely make coroutine switching (setcontext)
> and C++ resumable functions/executors quite challenging.
>
> Can you detail your plans in this area?
>
> Thanks,
> Florian
I'll also respond to Yao's question here, since it's closely related:
On Wed, Nov 30, 2016 at 09:56:14AM +0000, Yao Qi wrote:
[...]
> If I read "independent SVE vector length configuration per thread"
> correctly, SVE vector length can be different in each thread, so the
> size of vector registers is different too. In GDB, we describe registers
> by "target description" which is per process, not per thread.
>
> --
> Yao (??)
So, my key goal is to support _per-process_ vector length control.
>From the kernel perspective, it is easiest to achieve this by providing
per-thread control since that is the unit that context switching acts
on.
How useful it really is to have threads with different VLs in the same
process is an open question. It's theoretically useful for runtime
environments, which may want to dispatch code optimised for different
VLs -- changing the VL on-the-fly within a single thread is not
something I want to encourage, due to overhead and ABI issues, but
switching between threads of different VLs would be more manageable.
However, I expect mixing different VLs within a single process to be
very much a special case -- it's not something I'd expect to work with
general-purpose code.
Since the need for indepent VLs per thread is not proven, we could
* forbid it -- i.e., only a thread-group leader with no children is
permitted to change the VL, which is then inherited by any child threads
that are subsequently created
* permit it only if a special flag is specified when requesting the VL
change
* permit it and rely on userspace to be sensible -- easiest option for
the kernel.
For setcontext/setjmp, we don't save/restore any SVE state due to the
caller-save status of SVE, and I would not consider it necessary to
save/restore VL itself because of the no-change-on-the-fly policy for
this.
I'm not familiar with resumable functions/executors -- are these in
the C++ standards yet (not that that would cause me to be familiar
with them... ;) Any implementation of coroutines (i.e.,
cooperative switching) is likely to fall under the "setcontext"
argument above.
Thoughts?
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 12:06 ` Dave Martin
@ 2016-11-30 12:22 ` Szabolcs Nagy
2016-11-30 14:10 ` Dave Martin
2016-11-30 12:38 ` Florian Weimer
` (2 subsequent siblings)
3 siblings, 1 reply; 61+ messages in thread
From: Szabolcs Nagy @ 2016-11-30 12:22 UTC (permalink / raw)
To: linux-arm-kernel
On 30/11/16 12:06, Dave Martin wrote:
> For setcontext/setjmp, we don't save/restore any SVE state due to the
> caller-save status of SVE, and I would not consider it necessary to
> save/restore VL itself because of the no-change-on-the-fly policy for
> this.
the problem is not changing VL within a thread,
but that setcontext can resume a context of a
different thread which had different VL and there
might be SVE regs spilled on the stack according
to that.
(i consider this usage undefined, but at least
the gccgo runtime does this)
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 12:22 ` Szabolcs Nagy
@ 2016-11-30 14:10 ` Dave Martin
0 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-30 14:10 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Nov 30, 2016 at 12:22:32PM +0000, Szabolcs Nagy wrote:
> On 30/11/16 12:06, Dave Martin wrote:
> > For setcontext/setjmp, we don't save/restore any SVE state due to the
> > caller-save status of SVE, and I would not consider it necessary to
> > save/restore VL itself because of the no-change-on-the-fly policy for
> > this.
>
> the problem is not changing VL within a thread,
> but that setcontext can resume a context of a
> different thread which had different VL and there
> might be SVE regs spilled on the stack according
> to that.
>
> (i consider this usage undefined, but at least
> the gccgo runtime does this)
Understood -- which is part of the reason for the argument that although
the kernel may permit different threads to have different VLs, whether
this actually works usefully also depends on your userspace runtime
environment.
This again leads me to the conclusion that the request to create threads
with different VLs within a single process should be explicit, in order
to avoid accidents.
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 12:06 ` Dave Martin
2016-11-30 12:22 ` Szabolcs Nagy
@ 2016-11-30 12:38 ` Florian Weimer
2016-11-30 13:56 ` Dave Martin
2016-12-02 11:48 ` Dave Martin
2016-12-02 21:56 ` Yao Qi
2016-12-05 22:42 ` Torvald Riegel
3 siblings, 2 replies; 61+ messages in thread
From: Florian Weimer @ 2016-11-30 12:38 UTC (permalink / raw)
To: linux-arm-kernel
On 11/30/2016 01:06 PM, Dave Martin wrote:
> I'm concerned here that there may be no sensible fixed size for the
> signal frame. We would make it ridiculously large in order to minimise
> the chance of hitting this problem again -- but then it would be
> ridiculously large, which is a potential problem for massively threaded
> workloads.
What's ridiculously large?
We could add a system call to get the right stack size. But as it
depends on VL, I'm not sure what it looks like. Particularly if you
need determine the stack size before creating a thread that uses a
specific VL setting.
> For setcontext/setjmp, we don't save/restore any SVE state due to the
> caller-save status of SVE, and I would not consider it necessary to
> save/restore VL itself because of the no-change-on-the-fly policy for
> this.
Okay, so we'd potentially set it on thread creation only? That might
not be too bad.
I really want to avoid a repeat of the setxid fiasco, where we need to
run code on all threads to get something that approximates the
POSIX-mandated behavior (process attribute) from what the kernel
provides (thread/task attribute).
> I'm not familiar with resumable functions/executors -- are these in
> the C++ standards yet (not that that would cause me to be familiar
> with them... ;) Any implementation of coroutines (i.e.,
> cooperative switching) is likely to fall under the "setcontext"
> argument above.
There are different ways to implement coroutines. Stack switching (like
setcontext) is obviously impacted by non-uniform register sizes. But
even the most conservative variant, rather similar to switch-based
emulation you sometimes see in C coroutine implementations, might have
trouble restoring the state if it just cannot restore the saved state
due to register size reductions.
Thanks,
Florian
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 12:38 ` Florian Weimer
@ 2016-11-30 13:56 ` Dave Martin
2016-12-01 9:21 ` Florian Weimer
2016-12-02 11:48 ` Dave Martin
1 sibling, 1 reply; 61+ messages in thread
From: Dave Martin @ 2016-11-30 13:56 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Nov 30, 2016 at 01:38:28PM +0100, Florian Weimer wrote:
> On 11/30/2016 01:06 PM, Dave Martin wrote:
>
> >I'm concerned here that there may be no sensible fixed size for the
> >signal frame. We would make it ridiculously large in order to minimise
> >the chance of hitting this problem again -- but then it would be
> >ridiculously large, which is a potential problem for massively threaded
> >workloads.
>
> What's ridiculously large?
The SVE architecture permits VLs up to 2048 bits per vector initially --
but it makes space for future architecture revisions to expand up to
65536 bits per vector, which would result in a signal frame > 270 KB.
It's far from certain we'll ever see such large vectors, but it's hard
to know where to draw the line.
> We could add a system call to get the right stack size. But as it depends
> on VL, I'm not sure what it looks like. Particularly if you need determine
> the stack size before creating a thread that uses a specific VL setting.
I think that the most likely time to set the VL is libc startup or ld.so
startup -- so really a process considers the VL fixed, and a
hypothetical getsigstksz() function would return a constant value
depending on the VL that was set.
I'd expect that only specialised code such as libc/ld.so itself or fancy
runtimes would need to cope with the need to synchronise stack
allocation with VL setting.
The initial stack after exec is determined by RLIMIT_STACK -- we can
expect that to be easily large enough for the initial thread, under any
remotely normal scenario.
> >For setcontext/setjmp, we don't save/restore any SVE state due to the
> >caller-save status of SVE, and I would not consider it necessary to
> >save/restore VL itself because of the no-change-on-the-fly policy for
> >this.
>
> Okay, so we'd potentially set it on thread creation only? That might not be
> too bad.
Basically, yes. A runtime _could_ set it at other times, and my view
is that the kernel shouldn't arbitrarily forbid this -- but it's up to
userspace to determine when it's safe to do it, ensure that there's no
VL-dependent data live in memory, and to arrange to reallocate stacks
or pre-arrange that allocations were already big enough etc.
> I really want to avoid a repeat of the setxid fiasco, where we need to run
> code on all threads to get something that approximates the POSIX-mandated
> behavior (process attribute) from what the kernel provides (thread/task
> attribute).
Yeah, that would suck.
However, for the proposed ABI there is no illusion to preserve here,
since the VL is proposed as a per-thread property everywhere, and this
is outside the scope of POSIX.
If we do have distinct "set process VL" and "set thread VL" interfaces,
then my view is that the former should fail if there are already
multiple threads, rather than just setting the VL of a single thread or
(worse) asynchronously changing the VL of threads other than the
caller...
> >I'm not familiar with resumable functions/executors -- are these in
> >the C++ standards yet (not that that would cause me to be familiar
> >with them... ;) Any implementation of coroutines (i.e.,
> >cooperative switching) is likely to fall under the "setcontext"
> >argument above.
>
> There are different ways to implement coroutines. Stack switching (like
> setcontext) is obviously impacted by non-uniform register sizes. But even
> the most conservative variant, rather similar to switch-based emulation you
> sometimes see in C coroutine implementations, might have trouble restoring
> the state if it just cannot restore the saved state due to register size
> reductions.
Which is not a problem if the variably-sized state is not part of the
switched context?
Because the SVE procedure call standard determines that the SVE
registers are caller-save, they are not live at any external function
boundary -- so in cooperative switching it is useless to save/restore
this state unless the coroutine framework is defined to have a special
procedure call standard.
Similarly, my view is that we don't attempt to magically save and
restore VL itself either. Code that changes VL after startup would be
expected to be aware of and deal with the consequences itself.
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 13:56 ` Dave Martin
@ 2016-12-01 9:21 ` Florian Weimer
2016-12-01 10:30 ` Dave Martin
0 siblings, 1 reply; 61+ messages in thread
From: Florian Weimer @ 2016-12-01 9:21 UTC (permalink / raw)
To: linux-arm-kernel
On 11/30/2016 02:56 PM, Dave Martin wrote:
> If we do have distinct "set process VL" and "set thread VL" interfaces,
> then my view is that the former should fail if there are already
> multiple threads, rather than just setting the VL of a single thread or
> (worse) asynchronously changing the VL of threads other than the
> caller...
Yes, looks feasible to me.
>>> I'm not familiar with resumable functions/executors -- are these in
>>> the C++ standards yet (not that that would cause me to be familiar
>>> with them... ;) Any implementation of coroutines (i.e.,
>>> cooperative switching) is likely to fall under the "setcontext"
>>> argument above.
>>
>> There are different ways to implement coroutines. Stack switching (like
>> setcontext) is obviously impacted by non-uniform register sizes. But even
>> the most conservative variant, rather similar to switch-based emulation you
>> sometimes see in C coroutine implementations, might have trouble restoring
>> the state if it just cannot restore the saved state due to register size
>> reductions.
>
> Which is not a problem if the variably-sized state is not part of the
> switched context?
The VL value is implicitly thread-local data, and the encoded state may
have an implicit dependency on it, although it does not contain vector
registers as such.
> Because the SVE procedure call standard determines that the SVE
> registers are caller-save,
By the way, how is this implemented? Some of them overlap existing
callee-saved registers.
> they are not live at any external function
> boundary -- so in cooperative switching it is useless to save/restore
> this state unless the coroutine framework is defined to have a special
> procedure call standard.
It can use the standard calling convention, but it may have selected a
particular implementation based on the VL value before suspension.
Florian
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-01 9:21 ` Florian Weimer
@ 2016-12-01 10:30 ` Dave Martin
2016-12-01 12:19 ` Dave Martin
2016-12-05 10:44 ` Florian Weimer
0 siblings, 2 replies; 61+ messages in thread
From: Dave Martin @ 2016-12-01 10:30 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Dec 01, 2016 at 10:21:03AM +0100, Florian Weimer wrote:
> On 11/30/2016 02:56 PM, Dave Martin wrote:
>
> >If we do have distinct "set process VL" and "set thread VL" interfaces,
> >then my view is that the former should fail if there are already
> >multiple threads, rather than just setting the VL of a single thread or
> >(worse) asynchronously changing the VL of threads other than the
> >caller...
>
> Yes, looks feasible to me.
OK, I'll try to hack up something along these lines.
> >>>I'm not familiar with resumable functions/executors -- are these in
> >>>the C++ standards yet (not that that would cause me to be familiar
> >>>with them... ;) Any implementation of coroutines (i.e.,
> >>>cooperative switching) is likely to fall under the "setcontext"
> >>>argument above.
> >>
> >>There are different ways to implement coroutines. Stack switching (like
> >>setcontext) is obviously impacted by non-uniform register sizes. But even
> >>the most conservative variant, rather similar to switch-based emulation you
> >>sometimes see in C coroutine implementations, might have trouble restoring
> >>the state if it just cannot restore the saved state due to register size
> >>reductions.
> >
> >Which is not a problem if the variably-sized state is not part of the
> >switched context?
>
> The VL value is implicitly thread-local data, and the encoded state may have
> an implicit dependency on it, although it does not contain vector registers
> as such.
This doesn't sound like an absolute requirement to me.
If we presume that the SVE registers never need to get saved or
restored, what stops the context data format being VL-independent?
The setcontext()/getcontext() implementation for example will not change
at all for SVE.
> >Because the SVE procedure call standard determines that the SVE
> >registers are caller-save,
>
> By the way, how is this implemented? Some of them overlap existing
> callee-saved registers.
Basically, all the *new* state is caller-save.
The Neon/FPSIMD regs V8-V15 are callee-save, so in the SVE view
Zn[bits 127:0] is callee-save for all n = 8..15.
> >they are not live at any external function
> >boundary -- so in cooperative switching it is useless to save/restore
> >this state unless the coroutine framework is defined to have a special
> >procedure call standard.
>
> It can use the standard calling convention, but it may have selected a
> particular implementation based on the VL value before suspension.
If the save/restore logic doesn't touch SVE, which would its
implementation be VL-dependent?
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-01 10:30 ` Dave Martin
@ 2016-12-01 12:19 ` Dave Martin
2016-12-05 10:44 ` Florian Weimer
1 sibling, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-12-01 12:19 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Dec 01, 2016 at 10:30:51AM +0000, Dave Martin wrote:
[...]
> Basically, all the *new* state is caller-save.
>
> The Neon/FPSIMD regs V8-V15 are callee-save, so in the SVE view
> Zn[bits 127:0] is callee-save for all n = 8..15.
Ramana is right -- the current procedure call standard (ARM IHI 0055B)
only requires the bottom _64_ bits of V8-V15 to be preserved (not all
128 bits as I stated).
[...]
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-01 10:30 ` Dave Martin
2016-12-01 12:19 ` Dave Martin
@ 2016-12-05 10:44 ` Florian Weimer
2016-12-05 11:07 ` Szabolcs Nagy
2016-12-05 15:04 ` Dave Martin
1 sibling, 2 replies; 61+ messages in thread
From: Florian Weimer @ 2016-12-05 10:44 UTC (permalink / raw)
To: linux-arm-kernel
On 12/01/2016 11:30 AM, Dave Martin wrote:
>> The VL value is implicitly thread-local data, and the encoded state may have
>> an implicit dependency on it, although it does not contain vector registers
>> as such.
>
> This doesn't sound like an absolute requirement to me.
>
> If we presume that the SVE registers never need to get saved or
> restored, what stops the context data format being VL-independent?
I'm concerned the suspended computation has code which has been selected
to fit a particular VL value.
> If the save/restore logic doesn't touch SVE, which would its
> implementation be VL-dependent?
Because it has been optimized for a certain vector length?
>>> Because the SVE procedure call standard determines that the SVE
>>> registers are caller-save,
>>
>> By the way, how is this implemented? Some of them overlap existing
>> callee-saved registers.
>
> Basically, all the *new* state is caller-save.
>
> The Neon/FPSIMD regs V8-V15 are callee-save, so in the SVE view
> Zn[bits 127:0] is callee-save for all n = 8..15.
Are the extension parts of registers v8 to v15 used for argument passing?
If not, we should be able to use the existing dynamic linker trampoline.
Thanks,
Florian
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-05 10:44 ` Florian Weimer
@ 2016-12-05 11:07 ` Szabolcs Nagy
2016-12-05 15:04 ` Dave Martin
1 sibling, 0 replies; 61+ messages in thread
From: Szabolcs Nagy @ 2016-12-05 11:07 UTC (permalink / raw)
To: linux-arm-kernel
On 05/12/16 10:44, Florian Weimer wrote:
>>> By the way, how is this implemented? Some of them overlap existing
>>> callee-saved registers.
>>
>> Basically, all the *new* state is caller-save.
>>
>> The Neon/FPSIMD regs V8-V15 are callee-save, so in the SVE view
>> Zn[bits 127:0] is callee-save for all n = 8..15.
>
> Are the extension parts of registers v8 to v15 used for argument passing?
>
> If not, we should be able to use the existing dynamic linker trampoline.
>
if sve arguments are passed to a function then it
has special call abi (which is probably not yet
documented), this call abi requires that such a
call does not go through plt to avoid requiring
sve aware libc.
same for tls access: the top part of sve regs have
to be saved by the caller before accessing tls so
the tlsdesc entry does not have to save them.
so current trampolines should be fine.
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-05 10:44 ` Florian Weimer
2016-12-05 11:07 ` Szabolcs Nagy
@ 2016-12-05 15:04 ` Dave Martin
1 sibling, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-12-05 15:04 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Dec 05, 2016 at 11:44:38AM +0100, Florian Weimer wrote:
> On 12/01/2016 11:30 AM, Dave Martin wrote:
>
> >>The VL value is implicitly thread-local data, and the encoded state may have
> >>an implicit dependency on it, although it does not contain vector registers
> >>as such.
> >
> >This doesn't sound like an absolute requirement to me.
> >
> >If we presume that the SVE registers never need to get saved or
> >restored, what stops the context data format being VL-independent?
>
> I'm concerned the suspended computation has code which has been selected to
> fit a particular VL value.
>
> > If the save/restore logic doesn't touch SVE, which would its
> > implementation be VL-dependent?
>
> Because it has been optimized for a certain vector length?
I'll respond to these via Szabolcs' reply.
> >>>Because the SVE procedure call standard determines that the SVE
> >>>registers are caller-save,
> >>
> >>By the way, how is this implemented? Some of them overlap existing
> >>callee-saved registers.
> >
> >Basically, all the *new* state is caller-save.
> >
> >The Neon/FPSIMD regs V8-V15 are callee-save, so in the SVE view
> >Zn[bits 127:0] is callee-save for all n = 8..15.
>
> Are the extension parts of registers v8 to v15 used for argument passing?
No -- the idea is to be directly compatible with the existing PCS.
> If not, we should be able to use the existing dynamic linker trampoline.
Yes, I believe so.
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 12:38 ` Florian Weimer
2016-11-30 13:56 ` Dave Martin
@ 2016-12-02 11:48 ` Dave Martin
2016-12-02 16:34 ` Florian Weimer
1 sibling, 1 reply; 61+ messages in thread
From: Dave Martin @ 2016-12-02 11:48 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Nov 30, 2016 at 01:38:28PM +0100, Florian Weimer wrote:
[...]
> We could add a system call to get the right stack size. But as it depends
> on VL, I'm not sure what it looks like. Particularly if you need determine
> the stack size before creating a thread that uses a specific VL setting.
I missed this point previously -- apologies for that.
What would you think of:
set_vl(vl_for_new_thread);
minsigstksz = get_minsigstksz();
set_vl(my_vl);
This avoids get_minsigstksz() requiring parameters -- which is mainly a
concern because the parameters tomorrow might be different from the
parameters today.
If it is possible to create the new thread without any SVE-dependent code,
then we could
set_vl(vl_for_new_thread);
new_thread_stack = malloc(get_minsigstksz());
new_thread = create_thread(..., new_thread_stack);
set_vl(my_vl);
which has the nice property that the new thread directly inherits the
configuration that was used for get_minsigstksz().
However, it would be necessary to prevent GCC from moving any code
across these statements -- in particular, SVE code that access VL-
dependent data spilled on the stack is liable to go wrong if reordered
with the above. So the sequence would need to go in an external
function (or a single asm...)
Failing that, we could maybe define some extensible struct to
get_minsigstksz().
Thoughts?
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-02 11:48 ` Dave Martin
@ 2016-12-02 16:34 ` Florian Weimer
2016-12-02 16:59 ` Joseph Myers
0 siblings, 1 reply; 61+ messages in thread
From: Florian Weimer @ 2016-12-02 16:34 UTC (permalink / raw)
To: linux-arm-kernel
On 12/02/2016 12:48 PM, Dave Martin wrote:
> On Wed, Nov 30, 2016 at 01:38:28PM +0100, Florian Weimer wrote:
>
> [...]
>
>> We could add a system call to get the right stack size. But as it depends
>> on VL, I'm not sure what it looks like. Particularly if you need determine
>> the stack size before creating a thread that uses a specific VL setting.
>
> I missed this point previously -- apologies for that.
>
> What would you think of:
>
> set_vl(vl_for_new_thread);
> minsigstksz = get_minsigstksz();
> set_vl(my_vl);
>
> This avoids get_minsigstksz() requiring parameters -- which is mainly a
> concern because the parameters tomorrow might be different from the
> parameters today.
>
> If it is possible to create the new thread without any SVE-dependent code,
> then we could
>
> set_vl(vl_for_new_thread);
> new_thread_stack = malloc(get_minsigstksz());
> new_thread = create_thread(..., new_thread_stack);
> set_vl(my_vl);
>
> which has the nice property that the new thread directly inherits the
> configuration that was used for get_minsigstksz().
Because all SVE registers are caller-saved, it's acceptable to
temporarily reduce the VL value, I think. So this should work.
One complication is that both the kernel and the libc need to reserve
stack space, so the kernel-returned value and the one which has to be
used in reality will be different.
> However, it would be necessary to prevent GCC from moving any code
> across these statements -- in particular, SVE code that access VL-
> dependent data spilled on the stack is liable to go wrong if reordered
> with the above. So the sequence would need to go in an external
> function (or a single asm...)
I would talk to GCC folks?we have similar issues with changing the FPU
rounding mode, I assume.
Thanks,
Florian
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-02 16:34 ` Florian Weimer
@ 2016-12-02 16:59 ` Joseph Myers
2016-12-02 18:21 ` Dave Martin
0 siblings, 1 reply; 61+ messages in thread
From: Joseph Myers @ 2016-12-02 16:59 UTC (permalink / raw)
To: linux-arm-kernel
On Fri, 2 Dec 2016, Florian Weimer wrote:
> > However, it would be necessary to prevent GCC from moving any code
> > across these statements -- in particular, SVE code that access VL-
> > dependent data spilled on the stack is liable to go wrong if reordered
> > with the above. So the sequence would need to go in an external
> > function (or a single asm...)
>
> I would talk to GCC folks?we have similar issues with changing the FPU
> rounding mode, I assume.
In general, GCC doesn't track the implicit uses of thread-local state
involved in floating-point exceptions and rounding modes, and so doesn't
avoid moving code across manipulations of such state; there are various
open bugs in this area (though many of the open bugs are for local rather
than global issues with code generation or local optimizations not
respecting exceptions and rounding modes, which are easier to fix). Hence
glibc using various macros such as math_opt_barrier and math_force_eval
which use asms to prevent such motion.
I'm not familiar enough with the optimizers to judge the right way to
address such issues with implicit use of thread-local state. And I
haven't thought much yet about how to implement TS 18661-1 constant
rounding modes, which would involve the compiler implicitly inserting
rounding modes changes, though I think it would be fairly straightforward
given underlying support for avoiding inappropriate code motion.
--
Joseph S. Myers
joseph at codesourcery.com
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-02 16:59 ` Joseph Myers
@ 2016-12-02 18:21 ` Dave Martin
2016-12-02 21:56 ` Joseph Myers
0 siblings, 1 reply; 61+ messages in thread
From: Dave Martin @ 2016-12-02 18:21 UTC (permalink / raw)
To: linux-arm-kernel
On Fri, Dec 02, 2016 at 04:59:27PM +0000, Joseph Myers wrote:
> On Fri, 2 Dec 2016, Florian Weimer wrote:
>
> > > However, it would be necessary to prevent GCC from moving any code
> > > across these statements -- in particular, SVE code that access VL-
> > > dependent data spilled on the stack is liable to go wrong if reordered
> > > with the above. So the sequence would need to go in an external
> > > function (or a single asm...)
> >
> > I would talk to GCC folks?we have similar issues with changing the FPU
> > rounding mode, I assume.
>
> In general, GCC doesn't track the implicit uses of thread-local state
> involved in floating-point exceptions and rounding modes, and so doesn't
> avoid moving code across manipulations of such state; there are various
> open bugs in this area (though many of the open bugs are for local rather
> than global issues with code generation or local optimizations not
> respecting exceptions and rounding modes, which are easier to fix). Hence
> glibc using various macros such as math_opt_barrier and math_force_eval
> which use asms to prevent such motion.
Presumably the C language specs specify that fenv manipulations cannot
be reordered with respect to evaluation or floating-point expressions?
Sanity would seem to require this, though I've not dug into the specs
myself yet.
This doesn't get us off the hook for prctl() -- the C specs can only
define constraints on reordering for things that appear in the C spec.
prctl() is just an external function call in this context, and doesn't
enjoy the same guarantees.
> I'm not familiar enough with the optimizers to judge the right way to
> address such issues with implicit use of thread-local state. And I
> haven't thought much yet about how to implement TS 18661-1 constant
> rounding modes, which would involve the compiler implicitly inserting
> rounding modes changes, though I think it would be fairly straightforward
> given underlying support for avoiding inappropriate code motion.
My concern is that the compiler has no clue about what code motions are
appropriate or not with respect to a system call, beyond what applies
to a system call in general (i.e., asm volatile ( ::: "memory" ) for
GCC).
?
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-02 18:21 ` Dave Martin
@ 2016-12-02 21:56 ` Joseph Myers
0 siblings, 0 replies; 61+ messages in thread
From: Joseph Myers @ 2016-12-02 21:56 UTC (permalink / raw)
To: linux-arm-kernel
On Fri, 2 Dec 2016, Dave Martin wrote:
> Presumably the C language specs specify that fenv manipulations cannot
> be reordered with respect to evaluation or floating-point expressions?
Yes (in the context of #pragma STDC FENV_ACCESS ON). And you need to
presume that an arbitrary function call might manipulate the environment
unless you know it doesn't.
--
Joseph S. Myers
joseph at codesourcery.com
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 12:06 ` Dave Martin
2016-11-30 12:22 ` Szabolcs Nagy
2016-11-30 12:38 ` Florian Weimer
@ 2016-12-02 21:56 ` Yao Qi
2016-12-05 15:12 ` Dave Martin
2016-12-05 22:42 ` Torvald Riegel
3 siblings, 1 reply; 61+ messages in thread
From: Yao Qi @ 2016-12-02 21:56 UTC (permalink / raw)
To: linux-arm-kernel
On 16-11-30 12:06:54, Dave Martin wrote:
> So, my key goal is to support _per-process_ vector length control.
>
> From the kernel perspective, it is easiest to achieve this by providing
> per-thread control since that is the unit that context switching acts
> on.
>
Hi, Dave,
Thanks for the explanation.
> How useful it really is to have threads with different VLs in the same
> process is an open question. It's theoretically useful for runtime
> environments, which may want to dispatch code optimised for different
> VLs -- changing the VL on-the-fly within a single thread is not
> something I want to encourage, due to overhead and ABI issues, but
> switching between threads of different VLs would be more manageable.
This is a weird programming model.
> However, I expect mixing different VLs within a single process to be
> very much a special case -- it's not something I'd expect to work with
> general-purpose code.
>
> Since the need for indepent VLs per thread is not proven, we could
>
> * forbid it -- i.e., only a thread-group leader with no children is
> permitted to change the VL, which is then inherited by any child threads
> that are subsequently created
>
> * permit it only if a special flag is specified when requesting the VL
> change
>
> * permit it and rely on userspace to be sensible -- easiest option for
> the kernel.
Both the first and the third one is reasonable to me, but the first one
fit well in existing GDB design. I don't know how useful it is to have
per-thread VL, there may be some workloads can be implemented that way.
GDB needs some changes to support "per-thread" target description.
--
Yao
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-02 21:56 ` Yao Qi
@ 2016-12-05 15:12 ` Dave Martin
0 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-12-05 15:12 UTC (permalink / raw)
To: linux-arm-kernel
On Fri, Dec 02, 2016 at 09:56:46PM +0000, Yao Qi wrote:
> On 16-11-30 12:06:54, Dave Martin wrote:
> > So, my key goal is to support _per-process_ vector length control.
> >
> > From the kernel perspective, it is easiest to achieve this by providing
> > per-thread control since that is the unit that context switching acts
> > on.
> >
>
> Hi, Dave,
> Thanks for the explanation.
>
> > How useful it really is to have threads with different VLs in the same
> > process is an open question. It's theoretically useful for runtime
> > environments, which may want to dispatch code optimised for different
> > VLs -- changing the VL on-the-fly within a single thread is not
> > something I want to encourage, due to overhead and ABI issues, but
> > switching between threads of different VLs would be more manageable.
>
> This is a weird programming model.
I may not have explained that very well.
What I meant is, you have two threads communicating with one another,
say. Providing that they don't exchange data using a VL-dependent
representation, it should not matter that the two threads are running
with different VLs.
This may make sense if a particular piece of work was optimised for a
particular VL: you can pick a worker thread with the correct VL and
dispatch the job there for best performance.
I wouldn't expect this ability to be exploited except by specialised
frameworks.
> > However, I expect mixing different VLs within a single process to be
> > very much a special case -- it's not something I'd expect to work with
> > general-purpose code.
> >
> > Since the need for indepent VLs per thread is not proven, we could
> >
> > * forbid it -- i.e., only a thread-group leader with no children is
> > permitted to change the VL, which is then inherited by any child threads
> > that are subsequently created
> >
> > * permit it only if a special flag is specified when requesting the VL
> > change
> >
> > * permit it and rely on userspace to be sensible -- easiest option for
> > the kernel.
>
> Both the first and the third one is reasonable to me, but the first one
> fit well in existing GDB design. I don't know how useful it is to have
> per-thread VL, there may be some workloads can be implemented that way.
> GDB needs some changes to support "per-thread" target description.
OK -- I'll implement for per-thread for now, but this can be clarified
later.
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 12:06 ` Dave Martin
` (2 preceding siblings ...)
2016-12-02 21:56 ` Yao Qi
@ 2016-12-05 22:42 ` Torvald Riegel
2016-12-06 14:46 ` Dave Martin
3 siblings, 1 reply; 61+ messages in thread
From: Torvald Riegel @ 2016-12-05 22:42 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, 2016-11-30 at 12:06 +0000, Dave Martin wrote:
> So, my key goal is to support _per-process_ vector length control.
>
> From the kernel perspective, it is easiest to achieve this by providing
> per-thread control since that is the unit that context switching acts
> on.
>
> How useful it really is to have threads with different VLs in the same
> process is an open question. It's theoretically useful for runtime
> environments, which may want to dispatch code optimised for different
> VLs
What would be the primary use case(s)? Vectorization of short vectors
(eg, if having an array of structs or sth like that)?
> -- changing the VL on-the-fly within a single thread is not
> something I want to encourage, due to overhead and ABI issues, but
> switching between threads of different VLs would be more manageable.
So if on-the-fly switching is probably not useful, that would mean we
need special threads for the use cases. Is that a realistic assumption
for the use cases? Or do you primarily want to keep it possible to do
this, regardless of whether there are real use cases now?
I suppose allowing for a per-thread setting of VL could also be added as
a feature in the future without breaking existing code.
> For setcontext/setjmp, we don't save/restore any SVE state due to the
> caller-save status of SVE, and I would not consider it necessary to
> save/restore VL itself because of the no-change-on-the-fly policy for
> this.
Thus, you would basically consider VL changes or per-thread VL as in the
realm of compilation internals? So, the specific size for a particular
piece of code would not be part of an ABI?
> I'm not familiar with resumable functions/executors -- are these in
> the C++ standards yet (not that that would cause me to be familiar
> with them... ;) Any implementation of coroutines (i.e.,
> cooperative switching) is likely to fall under the "setcontext"
> argument above.
These are not part of the C++ standard yet, but will appear in TSes.
There are various features for which implementations would be assumed to
use one OS thread for several tasks, coroutines, etc. Some of them
switch between these tasks or coroutines while these are running,
whereas the ones that will be in C++17 only run more than parallel task
on the same OS thread but one after the other (like in a thread pool).
However, if we are careful not to expose VL or make promises about it,
this may just end up being a detail similar to, say, register
allocation, which isn't exposed beyond the internals of a particular
compiler either.
Exposing it as a feature the user can set without messing with the
implementation would introduce additional thread-specific state, as
Florian said. This might not be a show-stopper by itself, but the more
thread-specific state we have the more an implementation has to take
care of or switch, and the higher the runtime costs are. C++17 already
makes weaker promises for TLS for parallel tasks, so that
implementations don't have to run TLS constructors or destructors just
because a small parallel task was executed.
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-12-05 22:42 ` Torvald Riegel
@ 2016-12-06 14:46 ` Dave Martin
0 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-12-06 14:46 UTC (permalink / raw)
To: linux-arm-kernel
On Mon, Dec 05, 2016 at 11:42:19PM +0100, Torvald Riegel wrote:
Hi there,
> On Wed, 2016-11-30 at 12:06 +0000, Dave Martin wrote:
> > So, my key goal is to support _per-process_ vector length control.
> >
> > From the kernel perspective, it is easiest to achieve this by providing
> > per-thread control since that is the unit that context switching acts
> > on.
> >
> > How useful it really is to have threads with different VLs in the same
> > process is an open question. It's theoretically useful for runtime
> > environments, which may want to dispatch code optimised for different
> > VLs
>
> What would be the primary use case(s)? Vectorization of short vectors
> (eg, if having an array of structs or sth like that)?
I'm not sure exactly what you're asking here.
SVE supports a regular SIMD-type computational model, along with
scalable vectors and features for speculative vectorisation of loops
whose iteration count is not statically known (or, possibly not known
even at loop entry at runtime). It's intended as a compiler target, so
any algorithm that involves iterative computation may get some benefit
-- though the amount of benefit, and how the benefit scales with vector
length, will depend on the algorithm in question.
So some algorithms may get more benefit more from large VLs than others.
For jobs where performance tends to saturate at a shorter VL, it may
make sense to get the compiler to compile for the shorter VL -- this
may enable the same binary code to perform more optimally on a wider
range of hardware, but that may also mean you want to run that job with
the VL it was compiled for instead of what the hardware
supports.
In high-assurance scenarios, you might also want to restrict a
particular job to run at the VL that you validated for.
> > -- changing the VL on-the-fly within a single thread is not
> > something I want to encourage, due to overhead and ABI issues, but
> > switching between threads of different VLs would be more manageable.
>
> So if on-the-fly switching is probably not useful, that would mean we
> need special threads for the use cases. Is that a realistic assumption
> for the use cases? Or do you primarily want to keep it possible to do
> this, regardless of whether there are real use cases now?
> I suppose allowing for a per-thread setting of VL could also be added as
> a feature in the future without breaking existing code.
Per-thread VL use cases are hypothetical for now.
It's easy to support per-thread VLs in the kernel, but we could deny it
initially and wait for someone to come along with a concrete use case.
> > For setcontext/setjmp, we don't save/restore any SVE state due to the
> > caller-save status of SVE, and I would not consider it necessary to
> > save/restore VL itself because of the no-change-on-the-fly policy for
> > this.
>
> Thus, you would basically consider VL changes or per-thread VL as in the
> realm of compilation internals? So, the specific size for a particular
> piece of code would not be part of an ABI?
Basically yes. For most people, this would be hidden in libc/ld.so/some
framework. This goes for most prctl()s -- random user code shouldn't
normally touch them unless it knows what it's doing.
> > I'm not familiar with resumable functions/executors -- are these in
> > the C++ standards yet (not that that would cause me to be familiar
> > with them... ;) Any implementation of coroutines (i.e.,
> > cooperative switching) is likely to fall under the "setcontext"
> > argument above.
>
> These are not part of the C++ standard yet, but will appear in TSes.
> There are various features for which implementations would be assumed to
> use one OS thread for several tasks, coroutines, etc. Some of them
> switch between these tasks or coroutines while these are running,
Is the switching ever preemptive? If not, that these features are
unlikely to be a concern for SVE. It's preemptive switching that would
require the saving of extra SVE state (which is why we need to care for
signals).
> whereas the ones that will be in C++17 only run more than parallel task
> on the same OS thread but one after the other (like in a thread pool).
If jobs are only run to completion before yielding, that again isn't a
concern for SVE.
> However, if we are careful not to expose VL or make promises about it,
> this may just end up being a detail similar to, say, register
> allocation, which isn't exposed beyond the internals of a particular
> compiler either.
> Exposing it as a feature the user can set without messing with the
> implementation would introduce additional thread-specific state, as
> Florian said. This might not be a show-stopper by itself, but the more
> thread-specific state we have the more an implementation has to take
> care of or switch, and the higher the runtime costs are. C++17 already
> makes weaker promises for TLS for parallel tasks, so that
> implementations don't have to run TLS constructors or destructors just
> because a small parallel task was executed.
There's a difference between a feature that exposed by the kernel, and
a feature endorsed by the language / runtime.
For example, random code can enable seccomp via prctl(PR_SET_SECCOMP)
-- this may make most of libc unsafe to use, because under strict
seccomp most syscalls simply kill the thread. libc doesn't pretend to
support this out of the box, but this feature is also not needlessly
denied to user code that knows what it's doing.
I tend to put setting the VL into this category: it is safe, and
useful or even necessary to change the VL in some situations, but
userspace is responsible for managing this for itself. The kernel
doesn't have enough information to make these decisions unilaterally.
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-25 19:38 [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Dave Martin
` (29 preceding siblings ...)
2016-11-30 9:56 ` [RFC PATCH 00/29] arm64: Scalable Vector Extension core support Yao Qi
@ 2016-11-30 10:08 ` Florian Weimer
2016-11-30 11:05 ` Szabolcs Nagy
30 siblings, 1 reply; 61+ messages in thread
From: Florian Weimer @ 2016-11-30 10:08 UTC (permalink / raw)
To: linux-arm-kernel
On 11/25/2016 08:38 PM, Dave Martin wrote:
> The Scalable Vector Extension (SVE) [1] is an extension to AArch64 which
> adds extra SIMD functionality and supports much larger vectors.
>
> This series implements core Linux support for SVE.
>
> Recipents not copied on the whole series can find the rest of the
> patches in the linux-arm-kernel archives [2].
>
>
> The first 5 patches "arm64: signal: ..." factor out the allocation and
> placement of state information in the signal frame. The first three
> are prerequisites for the SVE support patches.
>
> Patches 04-05 implement expansion of the signal frame, and may remain
> controversial due to ABI break issues:
>
> * Discussion is needed on how userspace should detect/negotiate signal
> frame size in order for this expansion mechanism to be workable.
I'm leaning towards a simple increase in the glibc headers (despite the
ABI risk), plus a personality flag to disable really wide vector
registers in case this causes problems with old binaries.
A more elaborate mechanism will likely introduce more bugs than it makes
existing applications working, due to its complexity.
> The remaining patches implement initial SVE support for Linux, with the
> following limitations:
>
> * No KVM/virtualisation support for guests.
>
> * No independent SVE vector length configuration per thread. This is
> planned, but will follow as a separate add-on series.
Per-thread register widths will likely make coroutine switching
(setcontext) and C++ resumable functions/executors quite challenging.
Can you detail your plans in this area?
Thanks,
Florian
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 10:08 ` Florian Weimer
@ 2016-11-30 11:05 ` Szabolcs Nagy
2016-11-30 14:06 ` Dave Martin
0 siblings, 1 reply; 61+ messages in thread
From: Szabolcs Nagy @ 2016-11-30 11:05 UTC (permalink / raw)
To: linux-arm-kernel
On 30/11/16 10:08, Florian Weimer wrote:
> On 11/25/2016 08:38 PM, Dave Martin wrote:
>> The Scalable Vector Extension (SVE) [1] is an extension to AArch64 which
>> adds extra SIMD functionality and supports much larger vectors.
>>
>> This series implements core Linux support for SVE.
>>
>> Recipents not copied on the whole series can find the rest of the
>> patches in the linux-arm-kernel archives [2].
>>
>>
>> The first 5 patches "arm64: signal: ..." factor out the allocation and
>> placement of state information in the signal frame. The first three
>> are prerequisites for the SVE support patches.
>>
>> Patches 04-05 implement expansion of the signal frame, and may remain
>> controversial due to ABI break issues:
>>
>> * Discussion is needed on how userspace should detect/negotiate signal
>> frame size in order for this expansion mechanism to be workable.
>
> I'm leaning towards a simple increase in the glibc headers (despite the ABI risk), plus a personality flag to
> disable really wide vector registers in case this causes problems with old binaries.
>
if the kernel does not increase the size and libc
does not add size checks then old binaries would
work with new libc just fine..
but that's non-conforming, posix requires the check.
if the kernel increases the size then it has to be
changed in bionic and musl as well and old binaries
may break.
> A more elaborate mechanism will likely introduce more bugs than it makes existing applications working, due to
> its complexity.
>
>> The remaining patches implement initial SVE support for Linux, with the
>> following limitations:
>>
>> * No KVM/virtualisation support for guests.
>>
>> * No independent SVE vector length configuration per thread. This is
>> planned, but will follow as a separate add-on series.
>
> Per-thread register widths will likely make coroutine switching (setcontext) and C++ resumable
> functions/executors quite challenging.
>
i'd assume it's undefined to context switch to a different
thread or to resume a function on a different thread
(because the implementation can cache thread local state
on the stack: e.g. errno pointer).. of course this does
not stop ppl from doing it, but the practice is questionable.
> Can you detail your plans in this area?
>
> Thanks,
> Florian
^ permalink raw reply [flat|nested] 61+ messages in thread
* [RFC PATCH 00/29] arm64: Scalable Vector Extension core support
2016-11-30 11:05 ` Szabolcs Nagy
@ 2016-11-30 14:06 ` Dave Martin
0 siblings, 0 replies; 61+ messages in thread
From: Dave Martin @ 2016-11-30 14:06 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Nov 30, 2016 at 11:05:41AM +0000, Szabolcs Nagy wrote:
> On 30/11/16 10:08, Florian Weimer wrote:
> > On 11/25/2016 08:38 PM, Dave Martin wrote:
[...]
> >> * Discussion is needed on how userspace should detect/negotiate signal
> >> frame size in order for this expansion mechanism to be workable.
> >
> > I'm leaning towards a simple increase in the glibc headers (despite the ABI risk), plus a personality flag to
> > disable really wide vector registers in case this causes problems with old binaries.
> >
>
> if the kernel does not increase the size and libc
> does not add size checks then old binaries would
> work with new libc just fine..
> but that's non-conforming, posix requires the check.
>
> if the kernel increases the size then it has to be
> changed in bionic and musl as well and old binaries
> may break.
Or we need a personality flag or similar to distinguish the two cases.
[...]
> > A more elaborate mechanism will likely introduce more bugs than it makes existing applications working, due to
> > its complexity.
> >
> >> The remaining patches implement initial SVE support for Linux, with the
> >> following limitations:
> >>
> >> * No KVM/virtualisation support for guests.
> >>
> >> * No independent SVE vector length configuration per thread. This is
> >> planned, but will follow as a separate add-on series.
> >
> > Per-thread register widths will likely make coroutine switching (setcontext) and C++ resumable
> > functions/executors quite challenging.
> >
>
> i'd assume it's undefined to context switch to a different
> thread or to resume a function on a different thread
> (because the implementation can cache thread local state
> on the stack: e.g. errno pointer).. of course this does
> not stop ppl from doing it, but the practice is questionable.
I don't have a view on this.
Cheers
---Dave
^ permalink raw reply [flat|nested] 61+ messages in thread