All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 08/33] target/arm: Handle SVE vector length changes in system mode
Date: Mon,  8 Oct 2018 14:59:39 +0100	[thread overview]
Message-ID: <20181008140004.12612-9-peter.maydell@linaro.org> (raw)
In-Reply-To: <20181008140004.12612-1-peter.maydell@linaro.org>

From: Richard Henderson <richard.henderson@linaro.org>

SVE vector length can change when changing EL, or when writing
to one of the ZCR_ELn registers.

For correctness, our implementation requires that predicate bits
that are inaccessible are never set.  Which means noticing length
changes and zeroing the appropriate register bits.

Tested-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20181005175350.30752-5-richard.henderson@linaro.org
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/cpu.h       |   4 ++
 target/arm/cpu64.c     |  42 -------------
 target/arm/helper.c    | 133 +++++++++++++++++++++++++++++++++++++----
 target/arm/op_helper.c |   1 +
 4 files changed, 125 insertions(+), 55 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 65c0fa0a659..a4ee83dc770 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -910,6 +910,10 @@ int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
 int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
+void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el);
+#else
+static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
+static inline void aarch64_sve_change_el(CPUARMState *env, int o, int n) { }
 #endif
 
 target_ulong do_arm_semihosting(CPUARMState *env);
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 800bff780e2..db71504cb5c 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -410,45 +410,3 @@ static void aarch64_cpu_register_types(void)
 }
 
 type_init(aarch64_cpu_register_types)
-
-/* The manual says that when SVE is enabled and VQ is widened the
- * implementation is allowed to zero the previously inaccessible
- * portion of the registers.  The corollary to that is that when
- * SVE is enabled and VQ is narrowed we are also allowed to zero
- * the now inaccessible portion of the registers.
- *
- * The intent of this is that no predicate bit beyond VQ is ever set.
- * Which means that some operations on predicate registers themselves
- * may operate on full uint64_t or even unrolled across the maximum
- * uint64_t[4].  Performing 4 bits of host arithmetic unconditionally
- * may well be cheaper than conditionals to restrict the operation
- * to the relevant portion of a uint16_t[16].
- *
- * TODO: Need to call this for changes to the real system registers
- * and EL state changes.
- */
-void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
-{
-    int i, j;
-    uint64_t pmask;
-
-    assert(vq >= 1 && vq <= ARM_MAX_VQ);
-    assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
-
-    /* Zap the high bits of the zregs.  */
-    for (i = 0; i < 32; i++) {
-        memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
-    }
-
-    /* Zap the high bits of the pregs and ffr.  */
-    pmask = 0;
-    if (vq & 3) {
-        pmask = ~(-1ULL << (16 * (vq & 3)));
-    }
-    for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
-        for (i = 0; i < 17; ++i) {
-            env->vfp.pregs[i].p[j] &= pmask;
-        }
-        pmask = 0;
-    }
-}
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 35458ad4a76..72f7f5cfec2 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4461,11 +4461,44 @@ static int sve_exception_el(CPUARMState *env, int el)
     return 0;
 }
 
+/*
+ * Given that SVE is enabled, return the vector length for EL.
+ */
+static uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    uint32_t zcr_len = cpu->sve_max_vq - 1;
+
+    if (el <= 1) {
+        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
+    }
+    if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
+        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
+    }
+    if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
+        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
+    }
+    return zcr_len;
+}
+
 static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                       uint64_t value)
 {
+    int cur_el = arm_current_el(env);
+    int old_len = sve_zcr_len_for_el(env, cur_el);
+    int new_len;
+
     /* Bits other than [3:0] are RAZ/WI.  */
     raw_write(env, ri, value & 0xf);
+
+    /*
+     * Because we arrived here, we know both FP and SVE are enabled;
+     * otherwise we would have trapped access to the ZCR_ELn register.
+     */
+    new_len = sve_zcr_len_for_el(env, cur_el);
+    if (new_len < old_len) {
+        aarch64_sve_narrow_vq(env, new_len + 1);
+    }
 }
 
 static const ARMCPRegInfo zcr_el1_reginfo = {
@@ -8304,8 +8337,11 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
     unsigned int new_el = env->exception.target_el;
     target_ulong addr = env->cp15.vbar_el[new_el];
     unsigned int new_mode = aarch64_pstate_mode(new_el, true);
+    unsigned int cur_el = arm_current_el(env);
 
-    if (arm_current_el(env) < new_el) {
+    aarch64_sve_change_el(env, cur_el, new_el);
+
+    if (cur_el < new_el) {
         /* Entry vector offset depends on whether the implemented EL
          * immediately lower than the target level is using AArch32 or AArch64
          */
@@ -12597,18 +12633,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
             if (sve_el != 0 && fp_el == 0) {
                 zcr_len = 0;
             } else {
-                ARMCPU *cpu = arm_env_get_cpu(env);
-
-                zcr_len = cpu->sve_max_vq - 1;
-                if (current_el <= 1) {
-                    zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
-                }
-                if (current_el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
-                    zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
-                }
-                if (current_el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
-                    zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
-                }
+                zcr_len = sve_zcr_len_for_el(env, current_el);
             }
             flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT;
             flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT;
@@ -12664,3 +12689,85 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
     *pflags = flags;
     *cs_base = 0;
 }
+
+#ifdef TARGET_AARCH64
+/*
+ * The manual says that when SVE is enabled and VQ is widened the
+ * implementation is allowed to zero the previously inaccessible
+ * portion of the registers.  The corollary to that is that when
+ * SVE is enabled and VQ is narrowed we are also allowed to zero
+ * the now inaccessible portion of the registers.
+ *
+ * The intent of this is that no predicate bit beyond VQ is ever set.
+ * Which means that some operations on predicate registers themselves
+ * may operate on full uint64_t or even unrolled across the maximum
+ * uint64_t[4].  Performing 4 bits of host arithmetic unconditionally
+ * may well be cheaper than conditionals to restrict the operation
+ * to the relevant portion of a uint16_t[16].
+ */
+void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
+{
+    int i, j;
+    uint64_t pmask;
+
+    assert(vq >= 1 && vq <= ARM_MAX_VQ);
+    assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
+
+    /* Zap the high bits of the zregs.  */
+    for (i = 0; i < 32; i++) {
+        memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
+    }
+
+    /* Zap the high bits of the pregs and ffr.  */
+    pmask = 0;
+    if (vq & 3) {
+        pmask = ~(-1ULL << (16 * (vq & 3)));
+    }
+    for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
+        for (i = 0; i < 17; ++i) {
+            env->vfp.pregs[i].p[j] &= pmask;
+        }
+        pmask = 0;
+    }
+}
+
+/*
+ * Notice a change in SVE vector size when changing EL.
+ */
+void aarch64_sve_change_el(CPUARMState *env, int old_el, int new_el)
+{
+    int old_len, new_len;
+
+    /* Nothing to do if no SVE.  */
+    if (!arm_feature(env, ARM_FEATURE_SVE)) {
+        return;
+    }
+
+    /* Nothing to do if FP is disabled in either EL.  */
+    if (fp_exception_el(env, old_el) || fp_exception_el(env, new_el)) {
+        return;
+    }
+
+    /*
+     * DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
+     * at ELx, or not available because the EL is in AArch32 state, then
+     * for all purposes other than a direct read, the ZCR_ELx.LEN field
+     * has an effective value of 0".
+     *
+     * Consider EL2 (aa64, vq=4) -> EL0 (aa32) -> EL1 (aa64, vq=0).
+     * If we ignore aa32 state, we would fail to see the vq4->vq0 transition
+     * from EL2->EL1.  Thus we go ahead and narrow when entering aa32 so that
+     * we already have the correct register contents when encountering the
+     * vq0->vq0 transition between EL0->EL1.
+     */
+    old_len = (arm_el_is_aa64(env, old_el) && !sve_exception_el(env, old_el)
+               ? sve_zcr_len_for_el(env, old_el) : 0);
+    new_len = (arm_el_is_aa64(env, new_el) && !sve_exception_el(env, new_el)
+               ? sve_zcr_len_for_el(env, new_el) : 0);
+
+    /* When changing vector length, clear inaccessible state.  */
+    if (new_len < old_len) {
+        aarch64_sve_narrow_vq(env, new_len + 1);
+    }
+}
+#endif
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 952b8d122b7..430c50a9f99 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -1082,6 +1082,7 @@ void HELPER(exception_return)(CPUARMState *env)
                       "AArch64 EL%d PC 0x%" PRIx64 "\n",
                       cur_el, new_el, env->pc);
     }
+    aarch64_sve_change_el(env, cur_el, new_el);
 
     qemu_mutex_lock_iothread();
     arm_call_el_change_hook(arm_env_get_cpu(env));
-- 
2.19.0

  parent reply	other threads:[~2018-10-08 14:01 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-08 13:59 [Qemu-devel] [PULL 00/33] target-arm queue Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 01/33] target/arm: fix code comments error Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 02/33] virt: Suppress external aborts on virt-2.10 and earlier Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 03/33] target/arm: Correct condition for v8M callee stack push Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 04/33] target/arm: Don't read r4 from v8M exception stackframe twice Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 05/33] target/arm: Define ID_AA64ZFR0_EL1 Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 06/33] target/arm: Adjust sve_exception_el Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 07/33] target/arm: Pass in current_el to fp and sve_exception_el Peter Maydell
2018-10-08 13:59 ` Peter Maydell [this message]
2018-10-08 13:59 ` [Qemu-devel] [PULL 09/33] target/arm: Adjust aarch64_cpu_dump_state for system mode SVE Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 10/33] target/arm: Clear unused predicate bits for LD1RQ Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 11/33] target/arm: Rewrite helper_sve_ld1*_r using pages Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 12/33] target/arm: Rewrite helper_sve_ld[234]*_r Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 13/33] target/arm: Rewrite helper_sve_st[1234]*_r Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 14/33] target/arm: Split contiguous loads for endianness Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 15/33] target/arm: Split contiguous stores " Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 16/33] target/arm: Rewrite vector gather loads Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 17/33] target/arm: Rewrite vector gather stores Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 18/33] target/arm: Rewrite vector gather first-fault loads Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 19/33] target/arm: Pass TCGMemOpIdx to sve memory helpers Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 20/33] target/arm: Define new TBFLAG for v8M stack checking Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 21/33] target/arm: Define new EXCP type for v8M stack overflows Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 22/33] target/arm: Move v7m_using_psp() to internals.h Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 23/33] target/arm: Add v8M stack checks on ADD/SUB/MOV of SP Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 24/33] target/arm: Add some comments in Thumb decode Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 25/33] target/arm: Add v8M stack checks on exception entry Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 26/33] target/arm: Add v8M stack limit checks on NS function calls Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 27/33] target/arm: Add v8M stack checks for LDRD/STRD (imm) Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 28/33] target/arm: Add v8M stack checks for Thumb2 LDM/STM Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 29/33] target/arm: Add v8M stack checks for T32 load/store single Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 30/33] target/arm: Add v8M stack checks for Thumb push/pop Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 31/33] target/arm: Add v8M stack checks for VLDM/VSTM Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 32/33] target/arm: Add v8M stack checks for MSR to SP_NS Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 33/33] hw/display/bcm2835_fb: Silence Coverity warning about multiply overflow Peter Maydell
2018-10-08 14:46 ` [Qemu-devel] [PULL 00/33] target-arm queue Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181008140004.12612-9-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.