All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/7] target/arm: SME prep patches
@ 2022-05-17  5:48 Richard Henderson
  2022-05-17  5:48 ` [PATCH v2 1/7] target/arm: Enable FEAT_HCX for -cpu max Richard Henderson
                   ` (7 more replies)
  0 siblings, 8 replies; 16+ messages in thread
From: Richard Henderson @ 2022-05-17  5:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Changes for v2:
  * Fixed the errors PMM noticed in patch 2.
  * Cleanups for SVE vector length selection.
  * Cleanups for SVE exception el selection.
  * Add el_is_in_host -- I'm not sure this one will really help,
    but it's certainly appearing more and more in the pseudocode.


r~


Richard Henderson (7):
  target/arm: Enable FEAT_HCX for -cpu max
  target/arm: Use FIELD definitions for CPACR, CPTR_ELx
  target/arm: Do not use aarch64_sve_zcr_get_valid_len in reset
  target/arm: Merge aarch64_sve_zcr_get_valid_len into caller
  target/arm: Use uint32_t instead of bitmap for sve vq's
  target/arm: Remove fp checks from sve_exception_el
  target/arm: Add el_is_in_host

 target/arm/cpu.h       |  70 +++++++++++++--
 target/arm/internals.h |  18 ++--
 target/arm/kvm_arm.h   |   7 +-
 hw/arm/boot.c          |   2 +-
 target/arm/cpu.c       |  14 +--
 target/arm/cpu64.c     | 118 ++++++++++++------------
 target/arm/helper.c    | 198 +++++++++++++++++++++++++----------------
 target/arm/kvm64.c     |  36 ++------
 8 files changed, 264 insertions(+), 199 deletions(-)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v2 1/7] target/arm: Enable FEAT_HCX for -cpu max
  2022-05-17  5:48 [PATCH v2 0/7] target/arm: SME prep patches Richard Henderson
@ 2022-05-17  5:48 ` Richard Henderson
  2022-05-17  5:48 ` [PATCH v2 2/7] target/arm: Use FIELD definitions for CPACR, CPTR_ELx Richard Henderson
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2022-05-17  5:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm, Peter Maydell

This feature adds a new register, HCRX_EL2, which controls
many of the newer AArch64 features.  So far the register is
effectively RES0, because none of the new features are done.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu.h    | 20 ++++++++++++++++++
 target/arm/cpu64.c  |  1 +
 target/arm/helper.c | 50 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 18ca61e8e2..b35b117fe7 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -362,6 +362,7 @@ typedef struct CPUArchState {
         uint32_t pmsav5_data_ap; /* PMSAv5 MPU data access permissions */
         uint32_t pmsav5_insn_ap; /* PMSAv5 MPU insn access permissions */
         uint64_t hcr_el2; /* Hypervisor configuration register */
+        uint64_t hcrx_el2; /* Extended Hypervisor configuration register */
         uint64_t scr_el3; /* Secure configuration register.  */
         union { /* Fault status registers.  */
             struct {
@@ -1543,6 +1544,19 @@ static inline void xpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
 #define HCR_TWEDEN    (1ULL << 59)
 #define HCR_TWEDEL    MAKE_64BIT_MASK(60, 4)
 
+#define HCRX_ENAS0    (1ULL << 0)
+#define HCRX_ENALS    (1ULL << 1)
+#define HCRX_ENASR    (1ULL << 2)
+#define HCRX_FNXS     (1ULL << 3)
+#define HCRX_FGTNXS   (1ULL << 4)
+#define HCRX_SMPME    (1ULL << 5)
+#define HCRX_TALLINT  (1ULL << 6)
+#define HCRX_VINMI    (1ULL << 7)
+#define HCRX_VFNMI    (1ULL << 8)
+#define HCRX_CMOW     (1ULL << 9)
+#define HCRX_MCE2     (1ULL << 10)
+#define HCRX_MSCEN    (1ULL << 11)
+
 #define HPFAR_NS      (1ULL << 63)
 
 #define SCR_NS                (1U << 0)
@@ -2310,6 +2324,7 @@ static inline bool arm_is_el2_enabled(CPUARMState *env)
  * Not included here is HCR_RW.
  */
 uint64_t arm_hcr_el2_eff(CPUARMState *env);
+uint64_t arm_hcrx_el2_eff(CPUARMState *env);
 
 /* Return true if the specified exception level is running in AArch64 state. */
 static inline bool arm_el_is_aa64(CPUARMState *env, int el)
@@ -3931,6 +3946,11 @@ static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id)
     return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2;
 }
 
+static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0;
+}
+
 static inline bool isar_feature_aa64_uao(const ARMISARegisters *id)
 {
     return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, UAO) != 0;
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 04427e073f..4ab1dcf2ef 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -910,6 +910,7 @@ static void aarch64_max_initfn(Object *obj)
     t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1);       /* FEAT_LOR */
     t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 2);      /* FEAT_PAN2 */
     t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1);      /* FEAT_XNX */
+    t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1);      /* FEAT_HCX */
     cpu->isar.id_aa64mmfr1 = t;
 
     t = cpu->isar.id_aa64mmfr2;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 432bd81919..93ab552346 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5278,6 +5278,52 @@ uint64_t arm_hcr_el2_eff(CPUARMState *env)
     return ret;
 }
 
+static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                       uint64_t value)
+{
+    uint64_t valid_mask = 0;
+
+    /* No features adding bits to HCRX are implemented. */
+
+    /* Clear RES0 bits.  */
+    env->cp15.hcrx_el2 = value & valid_mask;
+}
+
+static CPAccessResult access_hxen(CPUARMState *env, const ARMCPRegInfo *ri,
+                                  bool isread)
+{
+    if (arm_current_el(env) < 3
+        && arm_feature(env, ARM_FEATURE_EL3)
+        && !(env->cp15.scr_el3 & SCR_HXEN)) {
+        return CP_ACCESS_TRAP_EL3;
+    }
+    return CP_ACCESS_OK;
+}
+
+static const ARMCPRegInfo hcrx_el2_reginfo = {
+    .name = "HCRX_EL2", .state = ARM_CP_STATE_AA64,
+    .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 2, .opc2 = 2,
+    .access = PL2_RW, .writefn = hcrx_write, .accessfn = access_hxen,
+    .fieldoffset = offsetof(CPUARMState, cp15.hcrx_el2),
+};
+
+/* Return the effective value of HCRX_EL2.  */
+uint64_t arm_hcrx_el2_eff(CPUARMState *env)
+{
+    /*
+     * The bits in this register behave as 0 for all purposes other than
+     * direct reads of the register if:
+     *   - EL2 is not enabled in the current security state,
+     *   - SCR_EL3.HXEn is 0.
+     */
+    if (!arm_is_el2_enabled(env)
+        || (arm_feature(env, ARM_FEATURE_EL3)
+            && !(env->cp15.scr_el3 & SCR_HXEN))) {
+        return 0;
+    }
+    return env->cp15.hcrx_el2;
+}
+
 static void cptr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
                            uint64_t value)
 {
@@ -8384,6 +8430,10 @@ void register_cp_regs_for_features(ARMCPU *cpu)
         define_arm_cp_regs(cpu, zcr_reginfo);
     }
 
+    if (cpu_isar_feature(aa64_hcx, cpu)) {
+        define_one_arm_cp_reg(cpu, &hcrx_el2_reginfo);
+    }
+
 #ifdef TARGET_AARCH64
     if (cpu_isar_feature(aa64_pauth, cpu)) {
         define_arm_cp_regs(cpu, pauth_reginfo);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 2/7] target/arm: Use FIELD definitions for CPACR, CPTR_ELx
  2022-05-17  5:48 [PATCH v2 0/7] target/arm: SME prep patches Richard Henderson
  2022-05-17  5:48 ` [PATCH v2 1/7] target/arm: Enable FEAT_HCX for -cpu max Richard Henderson
@ 2022-05-17  5:48 ` Richard Henderson
  2022-05-17  5:48 ` [PATCH v2 3/7] target/arm: Do not use aarch64_sve_zcr_get_valid_len in reset Richard Henderson
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2022-05-17  5:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm, Peter Maydell

We had a few CPTR_* bits defined, but missed quite a few.
Complete all of the fields up to ARMv9.2.
Use FIELD_EX64 instead of manual extract32.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu.h    | 44 +++++++++++++++++++++++++++++++-----
 hw/arm/boot.c       |  2 +-
 target/arm/cpu.c    | 11 ++++++---
 target/arm/helper.c | 54 ++++++++++++++++++++++-----------------------
 4 files changed, 75 insertions(+), 36 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index b35b117fe7..d23b7b3ad4 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1259,11 +1259,45 @@ void pmu_init(ARMCPU *cpu);
 #define SCTLR_SPINTMASK (1ULL << 62) /* FEAT_NMI */
 #define SCTLR_TIDCP   (1ULL << 63) /* FEAT_TIDCP1 */
 
-#define CPTR_TCPAC    (1U << 31)
-#define CPTR_TTA      (1U << 20)
-#define CPTR_TFP      (1U << 10)
-#define CPTR_TZ       (1U << 8)   /* CPTR_EL2 */
-#define CPTR_EZ       (1U << 8)   /* CPTR_EL3 */
+/* Bit definitions for CPACR (AArch32 only) */
+FIELD(CPACR, CP10, 20, 2)
+FIELD(CPACR, CP11, 22, 2)
+FIELD(CPACR, TRCDIS, 28, 1)    /* matches CPACR_EL1.TTA */
+FIELD(CPACR, D32DIS, 30, 1)    /* up to v7; RAZ in v8 */
+FIELD(CPACR, ASEDIS, 31, 1)
+
+/* Bit definitions for CPACR_EL1 (AArch64 only) */
+FIELD(CPACR_EL1, ZEN, 16, 2)
+FIELD(CPACR_EL1, FPEN, 20, 2)
+FIELD(CPACR_EL1, SMEN, 24, 2)
+FIELD(CPACR_EL1, TTA, 28, 1)   /* matches CPACR.TRCDIS */
+
+/* Bit definitions for HCPTR (AArch32 only) */
+FIELD(HCPTR, TCP10, 10, 1)
+FIELD(HCPTR, TCP11, 11, 1)
+FIELD(HCPTR, TASE, 15, 1)
+FIELD(HCPTR, TTA, 20, 1)
+FIELD(HCPTR, TAM, 30, 1)       /* matches CPTR_EL2.TAM */
+FIELD(HCPTR, TCPAC, 31, 1)     /* matches CPTR_EL2.TCPAC */
+
+/* Bit definitions for CPTR_EL2 (AArch64 only) */
+FIELD(CPTR_EL2, TZ, 8, 1)      /* !E2H */
+FIELD(CPTR_EL2, TFP, 10, 1)    /* !E2H, matches HCPTR.TCP10 */
+FIELD(CPTR_EL2, TSM, 12, 1)    /* !E2H */
+FIELD(CPTR_EL2, ZEN, 16, 2)    /* E2H */
+FIELD(CPTR_EL2, FPEN, 20, 2)   /* E2H */
+FIELD(CPTR_EL2, SMEN, 24, 2)   /* E2H */
+FIELD(CPTR_EL2, TTA, 28, 1)
+FIELD(CPTR_EL2, TAM, 30, 1)    /* matches HCPTR.TAM */
+FIELD(CPTR_EL2, TCPAC, 31, 1)  /* matches HCPTR.TCPAC */
+
+/* Bit definitions for CPTR_EL3 (AArch64 only) */
+FIELD(CPTR_EL3, EZ, 8, 1)
+FIELD(CPTR_EL3, TFP, 10, 1)
+FIELD(CPTR_EL3, ESM, 12, 1)
+FIELD(CPTR_EL3, TTA, 20, 1)
+FIELD(CPTR_EL3, TAM, 30, 1)
+FIELD(CPTR_EL3, TCPAC, 31, 1)
 
 #define MDCR_EPMAD    (1U << 21)
 #define MDCR_EDAD     (1U << 20)
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index a47f38dfc9..a8de33fd64 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -761,7 +761,7 @@ static void do_cpu_reset(void *opaque)
                         env->cp15.scr_el3 |= SCR_ATA;
                     }
                     if (cpu_isar_feature(aa64_sve, cpu)) {
-                        env->cp15.cptr_el[3] |= CPTR_EZ;
+                        env->cp15.cptr_el[3] |= R_CPTR_EL3_EZ_MASK;
                     }
                     /* AArch64 kernels never boot in secure mode */
                     assert(!info->secure_boot);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 029f644768..d2bd74c2ed 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -201,9 +201,11 @@ static void arm_cpu_reset(DeviceState *dev)
         /* Trap on btype=3 for PACIxSP. */
         env->cp15.sctlr_el[1] |= SCTLR_BT0;
         /* and to the FP/Neon instructions */
-        env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 2, 3);
+        env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+                                         CPACR_EL1, FPEN, 3);
         /* and to the SVE instructions */
-        env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 16, 2, 3);
+        env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+                                         CPACR_EL1, ZEN, 3);
         /* with reasonable vector length */
         if (cpu_isar_feature(aa64_sve, cpu)) {
             env->vfp.zcr_el[1] =
@@ -252,7 +254,10 @@ static void arm_cpu_reset(DeviceState *dev)
     } else {
 #if defined(CONFIG_USER_ONLY)
         /* Userspace expects access to cp10 and cp11 for FP/Neon */
-        env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 4, 0xf);
+        env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+                                         CPACR, CP10, 3);
+        env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+                                         CPACR, CP11, 3);
 #endif
     }
 
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 93ab552346..5fd64b742a 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -767,11 +767,14 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
          */
         if (cpu_isar_feature(aa32_vfp_simd, env_archcpu(env))) {
             /* VFP coprocessor: cp10 & cp11 [23:20] */
-            mask |= (1 << 31) | (1 << 30) | (0xf << 20);
+            mask |= R_CPACR_ASEDIS_MASK |
+                    R_CPACR_D32DIS_MASK |
+                    R_CPACR_CP11_MASK |
+                    R_CPACR_CP10_MASK;
 
             if (!arm_feature(env, ARM_FEATURE_NEON)) {
                 /* ASEDIS [31] bit is RAO/WI */
-                value |= (1 << 31);
+                value |= R_CPACR_ASEDIS_MASK;
             }
 
             /* VFPv3 and upwards with NEON implement 32 double precision
@@ -779,7 +782,7 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
              */
             if (!cpu_isar_feature(aa32_simd_r32, env_archcpu(env))) {
                 /* D32DIS [30] is RAO/WI if D16-31 are not implemented. */
-                value |= (1 << 30);
+                value |= R_CPACR_D32DIS_MASK;
             }
         }
         value &= mask;
@@ -791,8 +794,8 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
      */
     if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
         !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
-        value &= ~(0xf << 20);
-        value |= env->cp15.cpacr_el1 & (0xf << 20);
+        mask = R_CPACR_CP11_MASK | R_CPACR_CP10_MASK;
+        value = (value & ~mask) | (env->cp15.cpacr_el1 & mask);
     }
 
     env->cp15.cpacr_el1 = value;
@@ -808,7 +811,7 @@ static uint64_t cpacr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 
     if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
         !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
-        value &= ~(0xf << 20);
+        value = ~(R_CPACR_CP11_MASK | R_CPACR_CP10_MASK);
     }
     return value;
 }
@@ -828,11 +831,11 @@ static CPAccessResult cpacr_access(CPUARMState *env, const ARMCPRegInfo *ri,
     if (arm_feature(env, ARM_FEATURE_V8)) {
         /* Check if CPACR accesses are to be trapped to EL2 */
         if (arm_current_el(env) == 1 && arm_is_el2_enabled(env) &&
-            (env->cp15.cptr_el[2] & CPTR_TCPAC)) {
+            FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TCPAC)) {
             return CP_ACCESS_TRAP_EL2;
         /* Check if CPACR accesses are to be trapped to EL3 */
         } else if (arm_current_el(env) < 3 &&
-                   (env->cp15.cptr_el[3] & CPTR_TCPAC)) {
+                   FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, TCPAC)) {
             return CP_ACCESS_TRAP_EL3;
         }
     }
@@ -844,7 +847,8 @@ static CPAccessResult cptr_access(CPUARMState *env, const ARMCPRegInfo *ri,
                                   bool isread)
 {
     /* Check if CPTR accesses are set to trap to EL3 */
-    if (arm_current_el(env) == 2 && (env->cp15.cptr_el[3] & CPTR_TCPAC)) {
+    if (arm_current_el(env) == 2 &&
+        FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, TCPAC)) {
         return CP_ACCESS_TRAP_EL3;
     }
 
@@ -5333,8 +5337,8 @@ static void cptr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
      */
     if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
         !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
-        value &= ~(0x3 << 10);
-        value |= env->cp15.cptr_el[2] & (0x3 << 10);
+        uint64_t mask = R_HCPTR_TCP11_MASK | R_HCPTR_TCP10_MASK;
+        value = (value & ~mask) | (env->cp15.cptr_el[2] & mask);
     }
     env->cp15.cptr_el[2] = value;
 }
@@ -5349,7 +5353,7 @@ static uint64_t cptr_el2_read(CPUARMState *env, const ARMCPRegInfo *ri)
 
     if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
         !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
-        value |= 0x3 << 10;
+        value |= R_HCPTR_TCP11_MASK | R_HCPTR_TCP10_MASK;
     }
     return value;
 }
@@ -6144,8 +6148,7 @@ int sve_exception_el(CPUARMState *env, int el)
     uint64_t hcr_el2 = arm_hcr_el2_eff(env);
 
     if (el <= 1 && (hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
-        /* Check CPACR.ZEN.  */
-        switch (extract32(env->cp15.cpacr_el1, 16, 2)) {
+        switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, ZEN)) {
         case 1:
             if (el != 0) {
                 break;
@@ -6158,7 +6161,7 @@ int sve_exception_el(CPUARMState *env, int el)
         }
 
         /* Check CPACR.FPEN.  */
-        switch (extract32(env->cp15.cpacr_el1, 20, 2)) {
+        switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, FPEN)) {
         case 1:
             if (el != 0) {
                 break;
@@ -6175,8 +6178,7 @@ int sve_exception_el(CPUARMState *env, int el)
      */
     if (el <= 2) {
         if (hcr_el2 & HCR_E2H) {
-            /* Check CPTR_EL2.ZEN.  */
-            switch (extract32(env->cp15.cptr_el[2], 16, 2)) {
+            switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, ZEN)) {
             case 1:
                 if (el != 0 || !(hcr_el2 & HCR_TGE)) {
                     break;
@@ -6187,8 +6189,7 @@ int sve_exception_el(CPUARMState *env, int el)
                 return 2;
             }
 
-            /* Check CPTR_EL2.FPEN.  */
-            switch (extract32(env->cp15.cptr_el[2], 20, 2)) {
+            switch (FIELD_EX32(env->cp15.cptr_el[2], CPTR_EL2, FPEN)) {
             case 1:
                 if (el == 2 || !(hcr_el2 & HCR_TGE)) {
                     break;
@@ -6199,10 +6200,10 @@ int sve_exception_el(CPUARMState *env, int el)
                 return 0;
             }
         } else if (arm_is_el2_enabled(env)) {
-            if (env->cp15.cptr_el[2] & CPTR_TZ) {
+            if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TZ)) {
                 return 2;
             }
-            if (env->cp15.cptr_el[2] & CPTR_TFP) {
+            if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TFP)) {
                 return 0;
             }
         }
@@ -6210,7 +6211,7 @@ int sve_exception_el(CPUARMState *env, int el)
 
     /* CPTR_EL3.  Since EZ is negative we must check for EL3.  */
     if (arm_feature(env, ARM_FEATURE_EL3)
-        && !(env->cp15.cptr_el[3] & CPTR_EZ)) {
+        && !FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, EZ)) {
         return 3;
     }
 #endif
@@ -13266,7 +13267,7 @@ int fp_exception_el(CPUARMState *env, int cur_el)
      * This register is ignored if E2H+TGE are both set.
      */
     if ((hcr_el2 & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
-        int fpen = extract32(env->cp15.cpacr_el1, 20, 2);
+        int fpen = FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, FPEN);
 
         switch (fpen) {
         case 0:
@@ -13312,8 +13313,7 @@ int fp_exception_el(CPUARMState *env, int cur_el)
      */
     if (cur_el <= 2) {
         if (hcr_el2 & HCR_E2H) {
-            /* Check CPTR_EL2.FPEN.  */
-            switch (extract32(env->cp15.cptr_el[2], 20, 2)) {
+            switch (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, FPEN)) {
             case 1:
                 if (cur_el != 0 || !(hcr_el2 & HCR_TGE)) {
                     break;
@@ -13324,14 +13324,14 @@ int fp_exception_el(CPUARMState *env, int cur_el)
                 return 2;
             }
         } else if (arm_is_el2_enabled(env)) {
-            if (env->cp15.cptr_el[2] & CPTR_TFP) {
+            if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TFP)) {
                 return 2;
             }
         }
     }
 
     /* CPTR_EL3 : present in v8 */
-    if (env->cp15.cptr_el[3] & CPTR_TFP) {
+    if (FIELD_EX64(env->cp15.cptr_el[3], CPTR_EL3, TFP)) {
         /* Trap all FP ops to EL3 */
         return 3;
     }
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 3/7] target/arm: Do not use aarch64_sve_zcr_get_valid_len in reset
  2022-05-17  5:48 [PATCH v2 0/7] target/arm: SME prep patches Richard Henderson
  2022-05-17  5:48 ` [PATCH v2 1/7] target/arm: Enable FEAT_HCX for -cpu max Richard Henderson
  2022-05-17  5:48 ` [PATCH v2 2/7] target/arm: Use FIELD definitions for CPACR, CPTR_ELx Richard Henderson
@ 2022-05-17  5:48 ` Richard Henderson
  2022-05-19 10:40   ` Peter Maydell
  2022-05-17  5:48 ` [PATCH v2 4/7] target/arm: Merge aarch64_sve_zcr_get_valid_len into caller Richard Henderson
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2022-05-17  5:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

We don't need to constrain the value set in zcr_el[1],
because it will be done by sve_zcr_len_for_el.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index d2bd74c2ed..0621944167 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -208,8 +208,7 @@ static void arm_cpu_reset(DeviceState *dev)
                                          CPACR_EL1, ZEN, 3);
         /* with reasonable vector length */
         if (cpu_isar_feature(aa64_sve, cpu)) {
-            env->vfp.zcr_el[1] =
-                aarch64_sve_zcr_get_valid_len(cpu, cpu->sve_default_vq - 1);
+            env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
         }
         /*
          * Enable 48-bit address space (TODO: take reserved_va into account).
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 4/7] target/arm: Merge aarch64_sve_zcr_get_valid_len into caller
  2022-05-17  5:48 [PATCH v2 0/7] target/arm: SME prep patches Richard Henderson
                   ` (2 preceding siblings ...)
  2022-05-17  5:48 ` [PATCH v2 3/7] target/arm: Do not use aarch64_sve_zcr_get_valid_len in reset Richard Henderson
@ 2022-05-17  5:48 ` Richard Henderson
  2022-05-17  5:48 ` [PATCH v2 5/7] target/arm: Use uint32_t instead of bitmap for sve vq's Richard Henderson
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2022-05-17  5:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

This function is used only once, and will need modification
for Streaming SVE mode.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/internals.h | 11 -----------
 target/arm/helper.c    | 30 +++++++++++-------------------
 2 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index 6ca0e95746..36ff843cef 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -189,17 +189,6 @@ void arm_translate_init(void);
 void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
 #endif /* CONFIG_TCG */
 
-/**
- * aarch64_sve_zcr_get_valid_len:
- * @cpu: cpu context
- * @start_len: maximum len to consider
- *
- * Return the maximum supported sve vector length <= @start_len.
- * Note that both @start_len and the return value are in units
- * of ZCR_ELx.LEN, so the vector bit length is (x + 1) * 128.
- */
-uint32_t aarch64_sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len);
-
 enum arm_fprounding {
     FPROUNDING_TIEEVEN,
     FPROUNDING_POSINF,
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 5fd64b742a..0308920357 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6218,40 +6218,32 @@ int sve_exception_el(CPUARMState *env, int el)
     return 0;
 }
 
-uint32_t aarch64_sve_zcr_get_valid_len(ARMCPU *cpu, uint32_t start_len)
-{
-    uint32_t end_len;
-
-    start_len = MIN(start_len, ARM_MAX_VQ - 1);
-    end_len = start_len;
-
-    if (!test_bit(start_len, cpu->sve_vq_map)) {
-        end_len = find_last_bit(cpu->sve_vq_map, start_len);
-        assert(end_len < start_len);
-    }
-    return end_len;
-}
-
 /*
  * Given that SVE is enabled, return the vector length for EL.
  */
 uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
 {
     ARMCPU *cpu = env_archcpu(env);
-    uint32_t zcr_len = cpu->sve_max_vq - 1;
+    uint32_t len = cpu->sve_max_vq - 1;
+    uint32_t end_len;
 
     if (el <= 1 &&
         (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
-        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
+        len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
     }
     if (el <= 2 && arm_feature(env, ARM_FEATURE_EL2)) {
-        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
+        len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
     }
     if (arm_feature(env, ARM_FEATURE_EL3)) {
-        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
+        len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
     }
 
-    return aarch64_sve_zcr_get_valid_len(cpu, zcr_len);
+    end_len = len;
+    if (!test_bit(len, cpu->sve_vq_map)) {
+        end_len = find_last_bit(cpu->sve_vq_map, len);
+        assert(end_len < len);
+    }
+    return end_len;
 }
 
 static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 5/7] target/arm: Use uint32_t instead of bitmap for sve vq's
  2022-05-17  5:48 [PATCH v2 0/7] target/arm: SME prep patches Richard Henderson
                   ` (3 preceding siblings ...)
  2022-05-17  5:48 ` [PATCH v2 4/7] target/arm: Merge aarch64_sve_zcr_get_valid_len into caller Richard Henderson
@ 2022-05-17  5:48 ` Richard Henderson
  2022-05-19 12:10   ` Peter Maydell
  2022-05-17  5:48 ` [PATCH v2 6/7] target/arm: Remove fp checks from sve_exception_el Richard Henderson
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2022-05-17  5:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

The bitmap need only hold 15 bits; bitmap is over-complicated.
We can simplify operations quite a bit with plain logical ops.

The introduction of SVE_VQ_POW2_MAP eliminates the need for
looping in order to search for powers of two.  Simply perform
the logical ops and use count leading or trailing zeros as
required to find the result.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/cpu.h       |   6 +--
 target/arm/internals.h |   5 ++
 target/arm/kvm_arm.h   |   7 ++-
 target/arm/cpu64.c     | 117 ++++++++++++++++++++---------------------
 target/arm/helper.c    |   9 +---
 target/arm/kvm64.c     |  36 +++----------
 6 files changed, 75 insertions(+), 105 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d23b7b3ad4..db7e51bf67 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1039,9 +1039,9 @@ struct ArchCPU {
      * Bits set in sve_vq_supported represent valid vector lengths for
      * the CPU type.
      */
-    DECLARE_BITMAP(sve_vq_map, ARM_MAX_VQ);
-    DECLARE_BITMAP(sve_vq_init, ARM_MAX_VQ);
-    DECLARE_BITMAP(sve_vq_supported, ARM_MAX_VQ);
+    uint32_t sve_vq_map;
+    uint32_t sve_vq_init;
+    uint32_t sve_vq_supported;
 
     /* Generic timer counter frequency, in Hz */
     uint64_t gt_cntfrq_hz;
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 36ff843cef..4165d49570 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1312,4 +1312,9 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu);
 
 void aa32_max_features(ARMCPU *cpu);
 
+/* Powers of 2 for sve_vq_map et al. */
+#define SVE_VQ_POW2_MAP                                 \
+    ((1 << (1 - 1)) | (1 << (2 - 1)) |                  \
+     (1 << (4 - 1)) | (1 << (8 - 1)) | (1 << (16 - 1)))
+
 #endif
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index b7f78b5215..99017b635c 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -239,13 +239,12 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf);
 /**
  * kvm_arm_sve_get_vls:
  * @cs: CPUState
- * @map: bitmap to fill in
  *
  * Get all the SVE vector lengths supported by the KVM host, setting
  * the bits corresponding to their length in quadwords minus one
- * (vq - 1) in @map up to ARM_MAX_VQ.
+ * (vq - 1) up to ARM_MAX_VQ.  Return the resulting map.
  */
-void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map);
+uint32_t kvm_arm_sve_get_vls(CPUState *cs);
 
 /**
  * kvm_arm_set_cpu_features_from_host:
@@ -439,7 +438,7 @@ static inline void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp)
     g_assert_not_reached();
 }
 
-static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
+static inline uint32_t kvm_arm_sve_get_vls(CPUState *cs)
 {
     g_assert_not_reached();
 }
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 4ab1dcf2ef..a020933975 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -341,8 +341,11 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
      * any of the above.  Finally, if SVE is not disabled, then at least one
      * vector length must be enabled.
      */
-    DECLARE_BITMAP(tmp, ARM_MAX_VQ);
-    uint32_t vq, max_vq = 0;
+    uint32_t vq_map = cpu->sve_vq_map;
+    uint32_t vq_init = cpu->sve_vq_init;
+    uint32_t vq_supported;
+    uint32_t vq_mask = 0;
+    uint32_t tmp, vq, max_vq = 0;
 
     /*
      * CPU models specify a set of supported vector lengths which are
@@ -350,10 +353,16 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
      * in the supported bitmap results in an error.  When KVM is enabled we
      * fetch the supported bitmap from the host.
      */
-    if (kvm_enabled() && kvm_arm_sve_supported()) {
-        kvm_arm_sve_get_vls(CPU(cpu), cpu->sve_vq_supported);
-    } else if (kvm_enabled()) {
-        assert(!cpu_isar_feature(aa64_sve, cpu));
+    if (kvm_enabled()) {
+        if (kvm_arm_sve_supported()) {
+            cpu->sve_vq_supported = kvm_arm_sve_get_vls(CPU(cpu));
+            vq_supported = cpu->sve_vq_supported;
+        } else {
+            assert(!cpu_isar_feature(aa64_sve, cpu));
+            vq_supported = 0;
+        }
+    } else {
+        vq_supported = cpu->sve_vq_supported;
     }
 
     /*
@@ -361,8 +370,9 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
      * From the properties, sve_vq_map<N> implies sve_vq_init<N>.
      * Check first for any sve<N> enabled.
      */
-    if (!bitmap_empty(cpu->sve_vq_map, ARM_MAX_VQ)) {
-        max_vq = find_last_bit(cpu->sve_vq_map, ARM_MAX_VQ) + 1;
+    if (vq_map != 0) {
+        max_vq = 32 - clz32(vq_map);
+        vq_mask = MAKE_64BIT_MASK(0, max_vq);
 
         if (cpu->sve_max_vq && max_vq > cpu->sve_max_vq) {
             error_setg(errp, "cannot enable sve%d", max_vq * 128);
@@ -378,15 +388,10 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
              * For KVM we have to automatically enable all supported unitialized
              * lengths, even when the smaller lengths are not all powers-of-two.
              */
-            bitmap_andnot(tmp, cpu->sve_vq_supported, cpu->sve_vq_init, max_vq);
-            bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq);
+            vq_map |= vq_supported & ~vq_init & vq_mask;
         } else {
             /* Propagate enabled bits down through required powers-of-two. */
-            for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
-                if (!test_bit(vq - 1, cpu->sve_vq_init)) {
-                    set_bit(vq - 1, cpu->sve_vq_map);
-                }
-            }
+            vq_map |= SVE_VQ_POW2_MAP & ~vq_init & vq_mask;
         }
     } else if (cpu->sve_max_vq == 0) {
         /*
@@ -399,25 +404,18 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
 
         if (kvm_enabled()) {
             /* Disabling a supported length disables all larger lengths. */
-            for (vq = 1; vq <= ARM_MAX_VQ; ++vq) {
-                if (test_bit(vq - 1, cpu->sve_vq_init) &&
-                    test_bit(vq - 1, cpu->sve_vq_supported)) {
-                    break;
-                }
-            }
+            tmp = vq_init & vq_supported;
         } else {
             /* Disabling a power-of-two disables all larger lengths. */
-            for (vq = 1; vq <= ARM_MAX_VQ; vq <<= 1) {
-                if (test_bit(vq - 1, cpu->sve_vq_init)) {
-                    break;
-                }
-            }
+            tmp = vq_init & SVE_VQ_POW2_MAP;
         }
+        vq = ctz32(tmp) + 1;
 
         max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
-        bitmap_andnot(cpu->sve_vq_map, cpu->sve_vq_supported,
-                      cpu->sve_vq_init, max_vq);
-        if (max_vq == 0 || bitmap_empty(cpu->sve_vq_map, max_vq)) {
+        vq_mask = MAKE_64BIT_MASK(0, max_vq);
+        vq_map = vq_supported & ~vq_init & vq_mask;
+
+        if (max_vq == 0 || vq_map == 0) {
             error_setg(errp, "cannot disable sve%d", vq * 128);
             error_append_hint(errp, "Disabling sve%d results in all "
                               "vector lengths being disabled.\n",
@@ -427,7 +425,8 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
             return;
         }
 
-        max_vq = find_last_bit(cpu->sve_vq_map, max_vq) + 1;
+        max_vq = 32 - clz32(vq_map);
+        vq_mask = MAKE_64BIT_MASK(0, max_vq);
     }
 
     /*
@@ -437,9 +436,9 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
      */
     if (cpu->sve_max_vq != 0) {
         max_vq = cpu->sve_max_vq;
+        vq_mask = MAKE_64BIT_MASK(0, max_vq);
 
-        if (!test_bit(max_vq - 1, cpu->sve_vq_map) &&
-            test_bit(max_vq - 1, cpu->sve_vq_init)) {
+        if (vq_init & ~vq_map & (1 << (max_vq - 1))) {
             error_setg(errp, "cannot disable sve%d", max_vq * 128);
             error_append_hint(errp, "The maximum vector length must be "
                               "enabled, sve-max-vq=%d (%d bits)\n",
@@ -448,8 +447,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
         }
 
         /* Set all bits not explicitly set within sve-max-vq. */
-        bitmap_complement(tmp, cpu->sve_vq_init, max_vq);
-        bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq);
+        vq_map |= ~vq_init & vq_mask;
     }
 
     /*
@@ -458,13 +456,14 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
      * are clear, just in case anybody looks.
      */
     assert(max_vq != 0);
-    bitmap_clear(cpu->sve_vq_map, max_vq, ARM_MAX_VQ - max_vq);
+    assert(vq_mask != 0);
+    vq_map &= vq_mask;
 
     /* Ensure the set of lengths matches what is supported. */
-    bitmap_xor(tmp, cpu->sve_vq_map, cpu->sve_vq_supported, max_vq);
-    if (!bitmap_empty(tmp, max_vq)) {
-        vq = find_last_bit(tmp, max_vq) + 1;
-        if (test_bit(vq - 1, cpu->sve_vq_map)) {
+    tmp = vq_map ^ (vq_supported & vq_mask);
+    if (tmp) {
+        vq = 32 - clz32(tmp);
+        if (vq_map & (1 << (vq - 1))) {
             if (cpu->sve_max_vq) {
                 error_setg(errp, "cannot set sve-max-vq=%d", cpu->sve_max_vq);
                 error_append_hint(errp, "This CPU does not support "
@@ -488,15 +487,15 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
                 return;
             } else {
                 /* Ensure all required powers-of-two are enabled. */
-                for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) {
-                    if (!test_bit(vq - 1, cpu->sve_vq_map)) {
-                        error_setg(errp, "cannot disable sve%d", vq * 128);
-                        error_append_hint(errp, "sve%d is required as it "
-                                          "is a power-of-two length smaller "
-                                          "than the maximum, sve%d\n",
-                                          vq * 128, max_vq * 128);
-                        return;
-                    }
+                tmp = SVE_VQ_POW2_MAP & vq_mask & ~vq_map;
+                if (tmp) {
+                    vq = 32 - clz32(tmp);
+                    error_setg(errp, "cannot disable sve%d", vq * 128);
+                    error_append_hint(errp, "sve%d is required as it "
+                                      "is a power-of-two length smaller "
+                                      "than the maximum, sve%d\n",
+                                      vq * 128, max_vq * 128);
+                    return;
                 }
             }
         }
@@ -516,6 +515,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
 
     /* From now on sve_max_vq is the actual maximum supported length. */
     cpu->sve_max_vq = max_vq;
+    cpu->sve_vq_map = vq_map;
 }
 
 static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name,
@@ -576,7 +576,7 @@ static void cpu_arm_get_sve_vq(Object *obj, Visitor *v, const char *name,
     if (!cpu_isar_feature(aa64_sve, cpu)) {
         value = false;
     } else {
-        value = test_bit(vq - 1, cpu->sve_vq_map);
+        value = (cpu->sve_vq_map >> (vq - 1)) & 1;
     }
     visit_type_bool(v, name, &value, errp);
 }
@@ -598,12 +598,8 @@ static void cpu_arm_set_sve_vq(Object *obj, Visitor *v, const char *name,
         return;
     }
 
-    if (value) {
-        set_bit(vq - 1, cpu->sve_vq_map);
-    } else {
-        clear_bit(vq - 1, cpu->sve_vq_map);
-    }
-    set_bit(vq - 1, cpu->sve_vq_init);
+    cpu->sve_vq_map = deposit32(cpu->sve_vq_map, vq - 1, 1, value);
+    cpu->sve_vq_init |= 1 << (vq - 1);
 }
 
 static bool cpu_arm_get_sve(Object *obj, Error **errp)
@@ -952,7 +948,7 @@ static void aarch64_max_initfn(Object *obj)
     cpu->dcz_blocksize = 7; /*  512 bytes */
 #endif
 
-    bitmap_fill(cpu->sve_vq_supported, ARM_MAX_VQ);
+    cpu->sve_vq_supported = MAKE_64BIT_MASK(0, ARM_MAX_VQ);
 
     aarch64_add_pauth_properties(obj);
     aarch64_add_sve_properties(obj);
@@ -998,12 +994,11 @@ static void aarch64_a64fx_initfn(Object *obj)
     cpu->gic_vpribits = 5;
     cpu->gic_vprebits = 5;
 
-    /* Suppport of A64FX's vector length are 128,256 and 512bit only */
+    /* Suppport of A64FX's vector length are 128, 256 and 512bit only */
     aarch64_add_sve_properties(obj);
-    bitmap_zero(cpu->sve_vq_supported, ARM_MAX_VQ);
-    set_bit(0, cpu->sve_vq_supported); /* 128bit */
-    set_bit(1, cpu->sve_vq_supported); /* 256bit */
-    set_bit(3, cpu->sve_vq_supported); /* 512bit */
+    cpu->sve_vq_supported = (1 << 0)  /* 128bit */
+                          | (1 << 1)  /* 256bit */
+                          | (1 << 3); /* 512bit */
 
     /* TODO:  Add A64FX specific HPC extension registers */
 }
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 0308920357..edeab4e473 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6225,7 +6225,6 @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
 {
     ARMCPU *cpu = env_archcpu(env);
     uint32_t len = cpu->sve_max_vq - 1;
-    uint32_t end_len;
 
     if (el <= 1 &&
         (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE)) {
@@ -6238,12 +6237,8 @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
         len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
     }
 
-    end_len = len;
-    if (!test_bit(len, cpu->sve_vq_map)) {
-        end_len = find_last_bit(cpu->sve_vq_map, len);
-        assert(end_len < len);
-    }
-    return end_len;
+    len = 31 - clz32(cpu->sve_vq_map & MAKE_64BIT_MASK(0, len + 1));
+    return len;
 }
 
 static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index b8cfaf5782..16cf01acb5 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -748,15 +748,13 @@ bool kvm_arm_steal_time_supported(void)
 
 QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
 
-void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
+uint32_t kvm_arm_sve_get_vls(CPUState *cs)
 {
     /* Only call this function if kvm_arm_sve_supported() returns true. */
     static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS];
     static bool probed;
     uint32_t vq = 0;
-    int i, j;
-
-    bitmap_zero(map, ARM_MAX_VQ);
+    int i;
 
     /*
      * KVM ensures all host CPUs support the same set of vector lengths.
@@ -797,46 +795,24 @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
         if (vq > ARM_MAX_VQ) {
             warn_report("KVM supports vector lengths larger than "
                         "QEMU can enable");
+            vls[0] &= MAKE_64BIT_MASK(0, ARM_MAX_VQ);
         }
     }
 
-    for (i = 0; i < KVM_ARM64_SVE_VLS_WORDS; ++i) {
-        if (!vls[i]) {
-            continue;
-        }
-        for (j = 1; j <= 64; ++j) {
-            vq = j + i * 64;
-            if (vq > ARM_MAX_VQ) {
-                return;
-            }
-            if (vls[i] & (1UL << (j - 1))) {
-                set_bit(vq - 1, map);
-            }
-        }
-    }
+    return vls[0];
 }
 
 static int kvm_arm_sve_set_vls(CPUState *cs)
 {
-    uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = {0};
+    ARMCPU *cpu = ARM_CPU(cs);
+    uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq_map };
     struct kvm_one_reg reg = {
         .id = KVM_REG_ARM64_SVE_VLS,
         .addr = (uint64_t)&vls[0],
     };
-    ARMCPU *cpu = ARM_CPU(cs);
-    uint32_t vq;
-    int i, j;
 
     assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX);
 
-    for (vq = 1; vq <= cpu->sve_max_vq; ++vq) {
-        if (test_bit(vq - 1, cpu->sve_vq_map)) {
-            i = (vq - 1) / 64;
-            j = (vq - 1) % 64;
-            vls[i] |= 1UL << j;
-        }
-    }
-
     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 }
 
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 6/7] target/arm: Remove fp checks from sve_exception_el
  2022-05-17  5:48 [PATCH v2 0/7] target/arm: SME prep patches Richard Henderson
                   ` (4 preceding siblings ...)
  2022-05-17  5:48 ` [PATCH v2 5/7] target/arm: Use uint32_t instead of bitmap for sve vq's Richard Henderson
@ 2022-05-17  5:48 ` Richard Henderson
  2022-05-19 11:36   ` Peter Maydell
  2022-05-17  5:48 ` [PATCH v2 7/7] target/arm: Add el_is_in_host Richard Henderson
  2022-05-19 12:11 ` [PATCH v2 0/7] target/arm: SME prep patches Peter Maydell
  7 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2022-05-17  5:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

Instead of checking these bits in fp_exception_el and
also in sve_exception_el, document that we must compare
the results.  The only place where we have not already
checked that FP EL is zero is in rebuild_hflags_a64.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper.c | 52 ++++++++++++++-------------------------------
 1 file changed, 16 insertions(+), 36 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index edeab4e473..05baa73986 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6136,10 +6136,12 @@ static const ARMCPRegInfo minimal_ras_reginfo[] = {
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.vsesr_el2) },
 };
 
-/* Return the exception level to which exceptions should be taken
- * via SVEAccessTrap.  If an exception should be routed through
- * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should
- * take care of raising that exception.
+/*
+ * Return the exception level to which exceptions should be taken
+ * via SVEAccessTrap.  This excludes the check for whether the exception
+ * should be routed through AArch64.AdvSIMDFPAccessTrap.  That can easily
+ * be found by testing 0 < fp_exception_el <= sve_exception_el.
+ *
  * C.f. the ARM pseudocode function CheckSVEEnabled.
  */
 int sve_exception_el(CPUARMState *env, int el)
@@ -6159,18 +6161,6 @@ int sve_exception_el(CPUARMState *env, int el)
             /* route_to_el2 */
             return hcr_el2 & HCR_TGE ? 2 : 1;
         }
-
-        /* Check CPACR.FPEN.  */
-        switch (FIELD_EX64(env->cp15.cpacr_el1, CPACR_EL1, FPEN)) {
-        case 1:
-            if (el != 0) {
-                break;
-            }
-            /* fall through */
-        case 0:
-        case 2:
-            return 0;
-        }
     }
 
     /*
@@ -6188,24 +6178,10 @@ int sve_exception_el(CPUARMState *env, int el)
             case 2:
                 return 2;
             }
-
-            switch (FIELD_EX32(env->cp15.cptr_el[2], CPTR_EL2, FPEN)) {
-            case 1:
-                if (el == 2 || !(hcr_el2 & HCR_TGE)) {
-                    break;
-                }
-                /* fall through */
-            case 0:
-            case 2:
-                return 0;
-            }
         } else if (arm_is_el2_enabled(env)) {
             if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TZ)) {
                 return 2;
             }
-            if (FIELD_EX64(env->cp15.cptr_el[2], CPTR_EL2, TFP)) {
-                return 0;
-            }
         }
     }
 
@@ -13541,15 +13517,19 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
 
     if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
         int sve_el = sve_exception_el(env, el);
-        uint32_t zcr_len;
+        uint32_t zcr_len = 0;
 
         /*
-         * If SVE is disabled, but FP is enabled,
-         * then the effective len is 0.
+         * If either FP or SVE are disabled, translator does not need len.
+         * If SVE EL >= FP EL, FP exception has precedence, and translator
+         * does not need SVE EL.  Save potential re-translations by forcing
+         * the unneeded data to zero.
          */
-        if (sve_el != 0 && fp_el == 0) {
-            zcr_len = 0;
-        } else {
+        if (fp_el != 0) {
+            if (sve_el >= fp_el) {
+                sve_el = 0;
+            }
+        } else if (sve_el == 0) {
             zcr_len = sve_zcr_len_for_el(env, el);
         }
         DP_TBFLAG_A64(flags, SVEEXC_EL, sve_el);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v2 7/7] target/arm: Add el_is_in_host
  2022-05-17  5:48 [PATCH v2 0/7] target/arm: SME prep patches Richard Henderson
                   ` (5 preceding siblings ...)
  2022-05-17  5:48 ` [PATCH v2 6/7] target/arm: Remove fp checks from sve_exception_el Richard Henderson
@ 2022-05-17  5:48 ` Richard Henderson
  2022-05-19 11:39   ` Peter Maydell
  2022-05-19 12:11 ` [PATCH v2 0/7] target/arm: SME prep patches Peter Maydell
  7 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2022-05-17  5:48 UTC (permalink / raw)
  To: qemu-devel; +Cc: qemu-arm

This (newish) ARM pseudocode function is easier to work with
than open-coded tests for HCR_E2H etc.  Use of the function
will be staged into the code base in parts.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/internals.h |  2 ++
 target/arm/helper.c    | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index 4165d49570..58392c8246 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1310,6 +1310,8 @@ static inline void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) { }
 void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu);
 #endif
 
+bool el_is_in_host(CPUARMState *env, int el);
+
 void aa32_max_features(ARMCPU *cpu);
 
 /* Powers of 2 for sve_vq_map et al. */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 05baa73986..d082a1cf18 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -5282,6 +5282,29 @@ uint64_t arm_hcr_el2_eff(CPUARMState *env)
     return ret;
 }
 
+/*
+ * Corresponds to ARM pseudocode function ELIsInHost().
+ */
+bool el_is_in_host(CPUARMState *env, int el)
+{
+    uint64_t mask;
+    /*
+     * Since we only care about E2H and TGE, we can skip arm_hcr_el2_eff.
+     * Perform the simplest bit tests first, and validate EL2 afterward.
+     */
+    if (el & 1) {
+        return false; /* EL1 or EL3 */
+    }
+
+    mask = el ? HCR_E2H : HCR_E2H | HCR_TGE;
+    if ((env->cp15.hcr_el2 & mask) != mask) {
+        return false;
+    }
+
+    /* TGE and/or E2H set: double check those bits are currently legal. */
+    return arm_is_el2_enabled(env) && arm_el_is_aa64(env, 2);
+}
+
 static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
 {
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/7] target/arm: Do not use aarch64_sve_zcr_get_valid_len in reset
  2022-05-17  5:48 ` [PATCH v2 3/7] target/arm: Do not use aarch64_sve_zcr_get_valid_len in reset Richard Henderson
@ 2022-05-19 10:40   ` Peter Maydell
  2022-05-19 16:33     ` Richard Henderson
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Maydell @ 2022-05-19 10:40 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Tue, 17 May 2022 at 07:00, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> We don't need to constrain the value set in zcr_el[1],
> because it will be done by sve_zcr_len_for_el.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/cpu.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index d2bd74c2ed..0621944167 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -208,8 +208,7 @@ static void arm_cpu_reset(DeviceState *dev)
>                                           CPACR_EL1, ZEN, 3);
>          /* with reasonable vector length */
>          if (cpu_isar_feature(aa64_sve, cpu)) {
> -            env->vfp.zcr_el[1] =
> -                aarch64_sve_zcr_get_valid_len(cpu, cpu->sve_default_vq - 1);
> +            env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
>          }

Not all the code that looks at the sve vector length
goes through sve_zcr_len_for_el(), though. In particular,
this is setting up ZCR_EL1 for usermode, and all
the code under linux-user/ that wants to know the vector
length does it with "env->vfp.zcr_el[1] & 0xf".

More generally, it seems to me less confusing for debug
purposes if we set zcr_el[1] on reset to a valid value,
not to some invalid value that we're relying on being
coerced to something else at point of use.

Incidentally, do_prctl_set_vl() also sets zcr_el[1] and
it doesn't call aarch64_sve_zcr_get_valid_len(). Should it,
or is it doing an equivalent check anyway?

-- PMM


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 6/7] target/arm: Remove fp checks from sve_exception_el
  2022-05-17  5:48 ` [PATCH v2 6/7] target/arm: Remove fp checks from sve_exception_el Richard Henderson
@ 2022-05-19 11:36   ` Peter Maydell
  2022-05-19 15:01     ` Richard Henderson
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Maydell @ 2022-05-19 11:36 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Tue, 17 May 2022 at 07:00, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Instead of checking these bits in fp_exception_el and
> also in sve_exception_el, document that we must compare
> the results.  The only place where we have not already
> checked that FP EL is zero is in rebuild_hflags_a64.

aarch64_cpu_dump_state() calls sve_exception_el() and doesn't
check against the FP exception EL.

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/helper.c | 52 ++++++++++++++-------------------------------
>  1 file changed, 16 insertions(+), 36 deletions(-)
>
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index edeab4e473..05baa73986 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -6136,10 +6136,12 @@ static const ARMCPRegInfo minimal_ras_reginfo[] = {
>        .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.vsesr_el2) },
>  };
>
> -/* Return the exception level to which exceptions should be taken
> - * via SVEAccessTrap.  If an exception should be routed through
> - * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should
> - * take care of raising that exception.
> +/*
> + * Return the exception level to which exceptions should be taken
> + * via SVEAccessTrap.  This excludes the check for whether the exception
> + * should be routed through AArch64.AdvSIMDFPAccessTrap.  That can easily
> + * be found by testing 0 < fp_exception_el <= sve_exception_el.
> + *
>   * C.f. the ARM pseudocode function CheckSVEEnabled.

We should probably note that the pseudocode does *not* separate
out the FP trap checks, but has them all in the same function.

>   */

>          /*
> -         * If SVE is disabled, but FP is enabled,
> -         * then the effective len is 0.
> +         * If either FP or SVE are disabled, translator does not need len.
> +         * If SVE EL >= FP EL, FP exception has precedence, and translator
> +         * does not need SVE EL.  Save potential re-translations by forcing
> +         * the unneeded data to zero.
>           */

These comments say that if SVE EL == FP EL then the FP exception
has precedence, but looking at the pseudocode CheckNormalSVEEnabled()
it seems to be the other way around:  eg if CPACR_EL1 has
"disabled at EL0" for both the .ZEN bits and the .FPEN bits
then the SVEAccessTrap() is handled first.

thanks
-- PMM


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 7/7] target/arm: Add el_is_in_host
  2022-05-17  5:48 ` [PATCH v2 7/7] target/arm: Add el_is_in_host Richard Henderson
@ 2022-05-19 11:39   ` Peter Maydell
  2022-05-19 15:04     ` Richard Henderson
  0 siblings, 1 reply; 16+ messages in thread
From: Peter Maydell @ 2022-05-19 11:39 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Tue, 17 May 2022 at 07:05, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> This (newish) ARM pseudocode function is easier to work with
> than open-coded tests for HCR_E2H etc.  Use of the function
> will be staged into the code base in parts.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/internals.h |  2 ++
>  target/arm/helper.c    | 23 +++++++++++++++++++++++
>  2 files changed, 25 insertions(+)
> +/*
> + * Corresponds to ARM pseudocode function ELIsInHost().
> + */
> +bool el_is_in_host(CPUARMState *env, int el)
> +{
> +    uint64_t mask;
> +    /*
> +     * Since we only care about E2H and TGE, we can skip arm_hcr_el2_eff.
> +     * Perform the simplest bit tests first, and validate EL2 afterward.
> +     */
> +    if (el & 1) {
> +        return false; /* EL1 or EL3 */
> +    }
> +
> +    mask = el ? HCR_E2H : HCR_E2H | HCR_TGE;
> +    if ((env->cp15.hcr_el2 & mask) != mask) {
> +        return false;
> +    }
> +
> +    /* TGE and/or E2H set: double check those bits are currently legal. */
> +    return arm_is_el2_enabled(env) && arm_el_is_aa64(env, 2);
> +}

What about the HaveVirtHostExt() check ?

Otherwise, looks like it matches the pseudocode, but I'd
rather wait until we have some uses of the function before
I think too hard about it.

thanks
-- PMM


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 5/7] target/arm: Use uint32_t instead of bitmap for sve vq's
  2022-05-17  5:48 ` [PATCH v2 5/7] target/arm: Use uint32_t instead of bitmap for sve vq's Richard Henderson
@ 2022-05-19 12:10   ` Peter Maydell
  0 siblings, 0 replies; 16+ messages in thread
From: Peter Maydell @ 2022-05-19 12:10 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Tue, 17 May 2022 at 06:59, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> The bitmap need only hold 15 bits; bitmap is over-complicated.
> We can simplify operations quite a bit with plain logical ops.
>
> The introduction of SVE_VQ_POW2_MAP eliminates the need for
> looping in order to search for powers of two.  Simply perform
> the logical ops and use count leading or trailing zeros as
> required to find the result.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---

>  static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name,
> @@ -576,7 +576,7 @@ static void cpu_arm_get_sve_vq(Object *obj, Visitor *v, const char *name,
>      if (!cpu_isar_feature(aa64_sve, cpu)) {
>          value = false;
>      } else {
> -        value = test_bit(vq - 1, cpu->sve_vq_map);
> +        value = (cpu->sve_vq_map >> (vq - 1)) & 1;

Use extract32() here, since you use deposit32() in the _set_ function?

> @@ -998,12 +994,11 @@ static void aarch64_a64fx_initfn(Object *obj)
>      cpu->gic_vpribits = 5;
>      cpu->gic_vprebits = 5;
>
> -    /* Suppport of A64FX's vector length are 128,256 and 512bit only */
> +    /* Suppport of A64FX's vector length are 128, 256 and 512bit only */

If we're going to tweak this comment text we might as well fix
all of the typo/grammar nits:

/* The A64FX supports only 128, 256 and 512 bit vector lengths */

>      aarch64_add_sve_properties(obj);
> -    bitmap_zero(cpu->sve_vq_supported, ARM_MAX_VQ);
> -    set_bit(0, cpu->sve_vq_supported); /* 128bit */
> -    set_bit(1, cpu->sve_vq_supported); /* 256bit */
> -    set_bit(3, cpu->sve_vq_supported); /* 512bit */
> +    cpu->sve_vq_supported = (1 << 0)  /* 128bit */
> +                          | (1 << 1)  /* 256bit */
> +                          | (1 << 3); /* 512bit */

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 0/7] target/arm: SME prep patches
  2022-05-17  5:48 [PATCH v2 0/7] target/arm: SME prep patches Richard Henderson
                   ` (6 preceding siblings ...)
  2022-05-17  5:48 ` [PATCH v2 7/7] target/arm: Add el_is_in_host Richard Henderson
@ 2022-05-19 12:11 ` Peter Maydell
  7 siblings, 0 replies; 16+ messages in thread
From: Peter Maydell @ 2022-05-19 12:11 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-arm

On Tue, 17 May 2022 at 06:49, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Changes for v2:
>   * Fixed the errors PMM noticed in patch 2.
>   * Cleanups for SVE vector length selection.
>   * Cleanups for SVE exception el selection.
>   * Add el_is_in_host -- I'm not sure this one will really help,
>     but it's certainly appearing more and more in the pseudocode.
>
>

I've applied the first 2 patches to target-arm.next.

thanks
-- PMM


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 6/7] target/arm: Remove fp checks from sve_exception_el
  2022-05-19 11:36   ` Peter Maydell
@ 2022-05-19 15:01     ` Richard Henderson
  0 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2022-05-19 15:01 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel, qemu-arm

On 5/19/22 04:36, Peter Maydell wrote:
> On Tue, 17 May 2022 at 07:00, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> Instead of checking these bits in fp_exception_el and
>> also in sve_exception_el, document that we must compare
>> the results.  The only place where we have not already
>> checked that FP EL is zero is in rebuild_hflags_a64.
> 
> aarch64_cpu_dump_state() calls sve_exception_el() and doesn't
> check against the FP exception EL.

Yes it does, just 6 lines above -- fp_exc == 0.

>> -/* Return the exception level to which exceptions should be taken
>> - * via SVEAccessTrap.  If an exception should be routed through
>> - * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should
>> - * take care of raising that exception.
>> +/*
>> + * Return the exception level to which exceptions should be taken
>> + * via SVEAccessTrap.  This excludes the check for whether the exception
>> + * should be routed through AArch64.AdvSIMDFPAccessTrap.  That can easily
>> + * be found by testing 0 < fp_exception_el <= sve_exception_el.
>> + *
>>    * C.f. the ARM pseudocode function CheckSVEEnabled.
> 
> We should probably note that the pseudocode does *not* separate
> out the FP trap checks, but has them all in the same function.

Sure.

>>           /*
>> -         * If SVE is disabled, but FP is enabled,
>> -         * then the effective len is 0.
>> +         * If either FP or SVE are disabled, translator does not need len.
>> +         * If SVE EL >= FP EL, FP exception has precedence, and translator
>> +         * does not need SVE EL.  Save potential re-translations by forcing
>> +         * the unneeded data to zero.
>>            */
> 
> These comments say that if SVE EL == FP EL then the FP exception
> has precedence, but looking at the pseudocode CheckNormalSVEEnabled()
> it seems to be the other way around:  eg if CPACR_EL1 has
> "disabled at EL0" for both the .ZEN bits and the .FPEN bits
> then the SVEAccessTrap() is handled first.
Yep.


r~


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 7/7] target/arm: Add el_is_in_host
  2022-05-19 11:39   ` Peter Maydell
@ 2022-05-19 15:04     ` Richard Henderson
  0 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2022-05-19 15:04 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel, qemu-arm

On 5/19/22 04:39, Peter Maydell wrote:
>> +    mask = el ? HCR_E2H : HCR_E2H | HCR_TGE;
>> +    if ((env->cp15.hcr_el2 & mask) != mask) {
>> +        return false;
>> +    }
>> +
>> +    /* TGE and/or E2H set: double check those bits are currently legal. */
>> +    return arm_is_el2_enabled(env) && arm_el_is_aa64(env, 2);
>> +}
> 
> What about the HaveVirtHostExt() check ?

Handled by do_hcr_write, in not letting E2H be set.
Will add a comment.


r~


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v2 3/7] target/arm: Do not use aarch64_sve_zcr_get_valid_len in reset
  2022-05-19 10:40   ` Peter Maydell
@ 2022-05-19 16:33     ` Richard Henderson
  0 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2022-05-19 16:33 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel, qemu-arm

On 5/19/22 03:40, Peter Maydell wrote:
> Not all the code that looks at the sve vector length
> goes through sve_zcr_len_for_el(), though. In particular,
> this is setting up ZCR_EL1 for usermode, and all
> the code under linux-user/ that wants to know the vector
> length does it with "env->vfp.zcr_el[1] & 0xf".

Oops, yes.  Linux-user should be checking ZCR_LEN from env->hflags.

> Incidentally, do_prctl_set_vl() also sets zcr_el[1] and
> it doesn't call aarch64_sve_zcr_get_valid_len(). Should it,
> or is it doing an equivalent check anyway?

I think this got missed when we introduced the set of valid lengths -- it's still assuming 
all lengths less than maximum are valid.

I'll add a couple of cleanup patches for this.

r~



^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2022-05-19 16:36 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-17  5:48 [PATCH v2 0/7] target/arm: SME prep patches Richard Henderson
2022-05-17  5:48 ` [PATCH v2 1/7] target/arm: Enable FEAT_HCX for -cpu max Richard Henderson
2022-05-17  5:48 ` [PATCH v2 2/7] target/arm: Use FIELD definitions for CPACR, CPTR_ELx Richard Henderson
2022-05-17  5:48 ` [PATCH v2 3/7] target/arm: Do not use aarch64_sve_zcr_get_valid_len in reset Richard Henderson
2022-05-19 10:40   ` Peter Maydell
2022-05-19 16:33     ` Richard Henderson
2022-05-17  5:48 ` [PATCH v2 4/7] target/arm: Merge aarch64_sve_zcr_get_valid_len into caller Richard Henderson
2022-05-17  5:48 ` [PATCH v2 5/7] target/arm: Use uint32_t instead of bitmap for sve vq's Richard Henderson
2022-05-19 12:10   ` Peter Maydell
2022-05-17  5:48 ` [PATCH v2 6/7] target/arm: Remove fp checks from sve_exception_el Richard Henderson
2022-05-19 11:36   ` Peter Maydell
2022-05-19 15:01     ` Richard Henderson
2022-05-17  5:48 ` [PATCH v2 7/7] target/arm: Add el_is_in_host Richard Henderson
2022-05-19 11:39   ` Peter Maydell
2022-05-19 15:04     ` Richard Henderson
2022-05-19 12:11 ` [PATCH v2 0/7] target/arm: SME prep patches Peter Maydell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.