All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 19/33] target/arm: Pass TCGMemOpIdx to sve memory helpers
Date: Mon,  8 Oct 2018 14:59:50 +0100	[thread overview]
Message-ID: <20181008140004.12612-20-peter.maydell@linaro.org> (raw)
In-Reply-To: <20181008140004.12612-1-peter.maydell@linaro.org>

From: Richard Henderson <richard.henderson@linaro.org>

There is quite a lot of code required to compute cpu_mem_index,
or even put together the full TCGMemOpIdx.  This can easily be
done at translation time.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20181005175350.30752-16-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/internals.h     |   5 ++
 target/arm/sve_helper.c    | 138 +++++++++++++++++++------------------
 target/arm/translate-sve.c |  67 +++++++++++-------
 3 files changed, 121 insertions(+), 89 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index dc9357766c9..24c0444c8d2 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -796,4 +796,9 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env)
     }
 }
 
+/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3.
+ * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits.
+ */
+#define MEMOPIDX_SHIFT  8
+
 #endif
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 7756c0b0989..8cbc6516ab5 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "internals.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "exec/helper-proto.h"
@@ -3990,7 +3991,7 @@ typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host,
  * The controlling predicate is known to be true.
  */
 typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
-                            target_ulong vaddr, int mmu_idx, uintptr_t ra);
+                            target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra);
 typedef sve_ld1_tlb_fn sve_st1_tlb_fn;
 
 /*
@@ -4017,16 +4018,15 @@ static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host,           \
 #ifdef CONFIG_SOFTMMU
 #define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \
 static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
-                             target_ulong addr, int mmu_idx, uintptr_t ra)  \
+                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra)  \
 {                                                                           \
-    TCGMemOpIdx oi = make_memop_idx(ctz32(sizeof(TYPEM)) | MOEND, mmu_idx); \
     TYPEM val = TLB(env, addr, oi, ra);                                     \
     *(TYPEE *)(vd + H(reg_off)) = val;                                      \
 }
 #else
 #define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB)                  \
 static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
-                             target_ulong addr, int mmu_idx, uintptr_t ra)  \
+                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra)  \
 {                                                                           \
     TYPEM val = HOST(g2h(addr));                                            \
     *(TYPEE *)(vd + H(reg_off)) = val;                                      \
@@ -4154,11 +4154,13 @@ static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
                       sve_ld1_host_fn *host_fn,
                       sve_ld1_tlb_fn *tlb_fn)
 {
-    void *vd = &env->vfp.zregs[simd_data(desc)];
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    void *vd = &env->vfp.zregs[rd];
     const int diffsz = esz - msz;
     const intptr_t reg_max = simd_oprsz(desc);
     const intptr_t mem_max = reg_max >> diffsz;
-    const int mmu_idx = cpu_mmu_index(env, false);
     ARMVectorReg scratch;
     void *host;
     intptr_t split, reg_off, mem_off;
@@ -4232,7 +4234,7 @@ static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
          * on I/O memory, it may succeed but not bring in the TLB entry.
          * But even then we have still made forward progress.
          */
-        tlb_fn(env, &scratch, reg_off, addr + mem_off, mmu_idx, retaddr);
+        tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr);
         reg_off += 1 << esz;
     }
 #endif
@@ -4293,9 +4295,9 @@ static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
                       uint32_t desc, int size, uintptr_t ra,
                       sve_ld1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
     intptr_t i, oprsz = simd_oprsz(desc);
-    unsigned rd = simd_data(desc);
     ARMVectorReg scratch[2] = { };
 
     set_helper_retaddr(ra);
@@ -4303,8 +4305,8 @@ static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
         uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
         do {
             if (pg & 1) {
-                tlb_fn(env, &scratch[0], i, addr, mmu_idx, ra);
-                tlb_fn(env, &scratch[1], i, addr + size, mmu_idx, ra);
+                tlb_fn(env, &scratch[0], i, addr, oi, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
             }
             i += size, pg >>= size;
             addr += 2 * size;
@@ -4321,9 +4323,9 @@ static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
                       uint32_t desc, int size, uintptr_t ra,
                       sve_ld1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
     intptr_t i, oprsz = simd_oprsz(desc);
-    unsigned rd = simd_data(desc);
     ARMVectorReg scratch[3] = { };
 
     set_helper_retaddr(ra);
@@ -4331,9 +4333,9 @@ static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
         uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
         do {
             if (pg & 1) {
-                tlb_fn(env, &scratch[0], i, addr, mmu_idx, ra);
-                tlb_fn(env, &scratch[1], i, addr + size, mmu_idx, ra);
-                tlb_fn(env, &scratch[2], i, addr + 2 * size, mmu_idx, ra);
+                tlb_fn(env, &scratch[0], i, addr, oi, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+                tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
             }
             i += size, pg >>= size;
             addr += 3 * size;
@@ -4351,9 +4353,9 @@ static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
                       uint32_t desc, int size, uintptr_t ra,
                       sve_ld1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
     intptr_t i, oprsz = simd_oprsz(desc);
-    unsigned rd = simd_data(desc);
     ARMVectorReg scratch[4] = { };
 
     set_helper_retaddr(ra);
@@ -4361,10 +4363,10 @@ static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
         uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
         do {
             if (pg & 1) {
-                tlb_fn(env, &scratch[0], i, addr, mmu_idx, ra);
-                tlb_fn(env, &scratch[1], i, addr + size, mmu_idx, ra);
-                tlb_fn(env, &scratch[2], i, addr + 2 * size, mmu_idx, ra);
-                tlb_fn(env, &scratch[3], i, addr + 3 * size, mmu_idx, ra);
+                tlb_fn(env, &scratch[0], i, addr, oi, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
+                tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
+                tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra);
             }
             i += size, pg >>= size;
             addr += 4 * size;
@@ -4459,11 +4461,13 @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
                         sve_ld1_host_fn *host_fn,
                         sve_ld1_tlb_fn *tlb_fn)
 {
-    void *vd = &env->vfp.zregs[simd_data(desc)];
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    void *vd = &env->vfp.zregs[rd];
     const int diffsz = esz - msz;
     const intptr_t reg_max = simd_oprsz(desc);
     const intptr_t mem_max = reg_max >> diffsz;
-    const int mmu_idx = cpu_mmu_index(env, false);
     intptr_t split, reg_off, mem_off;
     void *host;
 
@@ -4515,7 +4519,7 @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
      * Perform one normal read, which will fault or not.
      * But it is likely to bring the page into the tlb.
      */
-    tlb_fn(env, vd, reg_off, addr + mem_off, mmu_idx, retaddr);
+    tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr);
 
     /* After any fault, zero any leading predicated false elts.  */
     swap_memzero(vd, reg_off);
@@ -4544,7 +4548,8 @@ static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
                         uint32_t desc, const int esz, const int msz,
                         sve_ld1_host_fn *host_fn)
 {
-    void *vd = &env->vfp.zregs[simd_data(desc)];
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
+    void *vd = &env->vfp.zregs[rd];
     const int diffsz = esz - msz;
     const intptr_t reg_max = simd_oprsz(desc);
     const intptr_t mem_max = reg_max >> diffsz;
@@ -4677,15 +4682,14 @@ DO_LDFF1_LDNF1_2(dd,  3, 3)
 #ifdef CONFIG_SOFTMMU
 #define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
 static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
-                             target_ulong addr, int mmu_idx, uintptr_t ra)  \
+                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
 {                                                                           \
-    TCGMemOpIdx oi = make_memop_idx(ctz32(sizeof(TYPEM)) | MOEND, mmu_idx); \
     TLB(env, addr, *(TYPEM *)(vd + H(reg_off)), oi, ra);                    \
 }
 #else
 #define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
 static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off,  \
-                             target_ulong addr, int mmu_idx, uintptr_t ra)  \
+                             target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
 {                                                                           \
     HOST(g2h(addr), *(TYPEM *)(vd + H(reg_off)));                           \
 }
@@ -4724,9 +4728,9 @@ static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
                       const int esize, const int msize,
                       sve_st1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
     intptr_t i, oprsz = simd_oprsz(desc);
-    unsigned rd = simd_data(desc);
     void *vd = &env->vfp.zregs[rd];
 
     set_helper_retaddr(ra);
@@ -4734,7 +4738,7 @@ static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
         uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
         do {
             if (pg & 1) {
-                tlb_fn(env, vd, i, addr, mmu_idx, ra);
+                tlb_fn(env, vd, i, addr, oi, ra);
             }
             i += esize, pg >>= esize;
             addr += msize;
@@ -4748,9 +4752,9 @@ static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
                       const int esize, const int msize,
                       sve_st1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
     intptr_t i, oprsz = simd_oprsz(desc);
-    unsigned rd = simd_data(desc);
     void *d1 = &env->vfp.zregs[rd];
     void *d2 = &env->vfp.zregs[(rd + 1) & 31];
 
@@ -4759,8 +4763,8 @@ static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
         uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
         do {
             if (pg & 1) {
-                tlb_fn(env, d1, i, addr, mmu_idx, ra);
-                tlb_fn(env, d2, i, addr + msize, mmu_idx, ra);
+                tlb_fn(env, d1, i, addr, oi, ra);
+                tlb_fn(env, d2, i, addr + msize, oi, ra);
             }
             i += esize, pg >>= esize;
             addr += 2 * msize;
@@ -4774,9 +4778,9 @@ static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
                       const int esize, const int msize,
                       sve_st1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
     intptr_t i, oprsz = simd_oprsz(desc);
-    unsigned rd = simd_data(desc);
     void *d1 = &env->vfp.zregs[rd];
     void *d2 = &env->vfp.zregs[(rd + 1) & 31];
     void *d3 = &env->vfp.zregs[(rd + 2) & 31];
@@ -4786,9 +4790,9 @@ static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
         uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
         do {
             if (pg & 1) {
-                tlb_fn(env, d1, i, addr, mmu_idx, ra);
-                tlb_fn(env, d2, i, addr + msize, mmu_idx, ra);
-                tlb_fn(env, d3, i, addr + 2 * msize, mmu_idx, ra);
+                tlb_fn(env, d1, i, addr, oi, ra);
+                tlb_fn(env, d2, i, addr + msize, oi, ra);
+                tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
             }
             i += esize, pg >>= esize;
             addr += 3 * msize;
@@ -4802,9 +4806,9 @@ static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
                       const int esize, const int msize,
                       sve_st1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
     intptr_t i, oprsz = simd_oprsz(desc);
-    unsigned rd = simd_data(desc);
     void *d1 = &env->vfp.zregs[rd];
     void *d2 = &env->vfp.zregs[(rd + 1) & 31];
     void *d3 = &env->vfp.zregs[(rd + 2) & 31];
@@ -4815,10 +4819,10 @@ static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
         uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
         do {
             if (pg & 1) {
-                tlb_fn(env, d1, i, addr, mmu_idx, ra);
-                tlb_fn(env, d2, i, addr + msize, mmu_idx, ra);
-                tlb_fn(env, d3, i, addr + 2 * msize, mmu_idx, ra);
-                tlb_fn(env, d4, i, addr + 3 * msize, mmu_idx, ra);
+                tlb_fn(env, d1, i, addr, oi, ra);
+                tlb_fn(env, d2, i, addr + msize, oi, ra);
+                tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
+                tlb_fn(env, d4, i, addr + 3 * msize, oi, ra);
             }
             i += esize, pg >>= esize;
             addr += 4 * msize;
@@ -4916,9 +4920,9 @@ static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
                        target_ulong base, uint32_t desc, uintptr_t ra,
                        zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
     intptr_t i, oprsz = simd_oprsz(desc);
-    unsigned scale = simd_data(desc);
     ARMVectorReg scratch = { };
 
     set_helper_retaddr(ra);
@@ -4927,7 +4931,7 @@ static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
         do {
             if (likely(pg & 1)) {
                 target_ulong off = off_fn(vm, i);
-                tlb_fn(env, &scratch, i, base + (off << scale), mmu_idx, ra);
+                tlb_fn(env, &scratch, i, base + (off << scale), oi, ra);
             }
             i += 4, pg >>= 4;
         } while (i & 15);
@@ -4942,9 +4946,9 @@ static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
                        target_ulong base, uint32_t desc, uintptr_t ra,
                        zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
     intptr_t i, oprsz = simd_oprsz(desc) / 8;
-    unsigned scale = simd_data(desc);
     ARMVectorReg scratch = { };
 
     set_helper_retaddr(ra);
@@ -4952,7 +4956,7 @@ static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
         uint8_t pg = *(uint8_t *)(vg + H1(i));
         if (likely(pg & 1)) {
             target_ulong off = off_fn(vm, i * 8);
-            tlb_fn(env, &scratch, i * 8, base + (off << scale), mmu_idx, ra);
+            tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra);
         }
     }
     set_helper_retaddr(0);
@@ -5058,7 +5062,7 @@ typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off,
 #ifdef CONFIG_SOFTMMU
 #define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
 static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
-                            target_ulong addr, int mmu_idx)                 \
+                              target_ulong addr, int mmu_idx)               \
 {                                                                           \
     target_ulong next_page = -(addr | TARGET_PAGE_MASK);                    \
     if (likely(next_page - addr >= sizeof(TYPEM))) {                        \
@@ -5117,9 +5121,10 @@ static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
                                 zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
                                 sve_ld1_nf_fn *nonfault_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
     intptr_t reg_off, reg_max = simd_oprsz(desc);
-    unsigned scale = simd_data(desc);
     target_ulong addr;
 
     /* Skip to the first true predicate.  */
@@ -5129,7 +5134,7 @@ static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
         set_helper_retaddr(ra);
         addr = off_fn(vm, reg_off);
         addr = base + (addr << scale);
-        tlb_fn(env, vd, reg_off, addr, mmu_idx, ra);
+        tlb_fn(env, vd, reg_off, addr, oi, ra);
 
         /* The rest of the reads will be non-faulting.  */
         set_helper_retaddr(0);
@@ -5158,9 +5163,10 @@ static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
                                 zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
                                 sve_ld1_nf_fn *nonfault_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int mmu_idx = get_mmuidx(oi);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
     intptr_t reg_off, reg_max = simd_oprsz(desc);
-    unsigned scale = simd_data(desc);
     target_ulong addr;
 
     /* Skip to the first true predicate.  */
@@ -5170,7 +5176,7 @@ static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
         set_helper_retaddr(ra);
         addr = off_fn(vm, reg_off);
         addr = base + (addr << scale);
-        tlb_fn(env, vd, reg_off, addr, mmu_idx, ra);
+        tlb_fn(env, vd, reg_off, addr, oi, ra);
 
         /* The rest of the reads will be non-faulting.  */
         set_helper_retaddr(0);
@@ -5282,9 +5288,9 @@ static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
                        target_ulong base, uint32_t desc, uintptr_t ra,
                        zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
     intptr_t i, oprsz = simd_oprsz(desc);
-    unsigned scale = simd_data(desc);
 
     set_helper_retaddr(ra);
     for (i = 0; i < oprsz; ) {
@@ -5292,7 +5298,7 @@ static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
         do {
             if (likely(pg & 1)) {
                 target_ulong off = off_fn(vm, i);
-                tlb_fn(env, vd, i, base + (off << scale), mmu_idx, ra);
+                tlb_fn(env, vd, i, base + (off << scale), oi, ra);
             }
             i += 4, pg >>= 4;
         } while (i & 15);
@@ -5304,16 +5310,16 @@ static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
                        target_ulong base, uint32_t desc, uintptr_t ra,
                        zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
 {
-    const int mmu_idx = cpu_mmu_index(env, false);
+    const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
+    const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
     intptr_t i, oprsz = simd_oprsz(desc) / 8;
-    unsigned scale = simd_data(desc);
 
     set_helper_retaddr(ra);
     for (i = 0; i < oprsz; i++) {
         uint8_t pg = *(uint8_t *)(vg + H1(i));
         if (likely(pg & 1)) {
             target_ulong off = off_fn(vm, i * 8);
-            tlb_fn(env, vd, i * 8, base + (off << scale), mmu_idx, ra);
+            tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra);
         }
     }
     set_helper_retaddr(0);
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 888a968ddc5..fe7aebdc19f 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -4600,25 +4600,34 @@ static const uint8_t dtype_esz[16] = {
     3, 2, 1, 3
 };
 
+static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
+{
+    return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
+}
+
 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
-                       gen_helper_gvec_mem *fn)
+                       int dtype, gen_helper_gvec_mem *fn)
 {
     unsigned vsz = vec_full_reg_size(s);
     TCGv_ptr t_pg;
-    TCGv_i32 desc;
+    TCGv_i32 t_desc;
+    int desc;
 
     /* For e.g. LD4, there are not enough arguments to pass all 4
      * registers as pointers, so encode the regno into the data field.
      * For consistency, do this even for LD1.
      */
-    desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
+    desc = sve_memopidx(s, dtype);
+    desc |= zt << MEMOPIDX_SHIFT;
+    desc = simd_desc(vsz, vsz, desc);
+    t_desc = tcg_const_i32(desc);
     t_pg = tcg_temp_new_ptr();
 
     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
-    fn(cpu_env, t_pg, addr, desc);
+    fn(cpu_env, t_pg, addr, t_desc);
 
     tcg_temp_free_ptr(t_pg);
-    tcg_temp_free_i32(desc);
+    tcg_temp_free_i32(t_desc);
 }
 
 static void do_ld_zpa(DisasContext *s, int zt, int pg,
@@ -4681,7 +4690,7 @@ static void do_ld_zpa(DisasContext *s, int zt, int pg,
      * accessible via the instruction encoding.
      */
     assert(fn != NULL);
-    do_mem_zpa(s, zt, pg, addr, fn);
+    do_mem_zpa(s, zt, pg, addr, dtype, fn);
 }
 
 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
@@ -4763,7 +4772,8 @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
         TCGv_i64 addr = new_tmp_a64(s);
         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
-        do_mem_zpa(s, a->rd, a->pg, addr, fns[s->be_data == MO_BE][a->dtype]);
+        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
+                   fns[s->be_data == MO_BE][a->dtype]);
     }
     return true;
 }
@@ -4821,7 +4831,8 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
         TCGv_i64 addr = new_tmp_a64(s);
 
         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
-        do_mem_zpa(s, a->rd, a->pg, addr, fns[s->be_data == MO_BE][a->dtype]);
+        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
+                   fns[s->be_data == MO_BE][a->dtype]);
     }
     return true;
 }
@@ -4836,11 +4847,14 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
     };
     unsigned vsz = vec_full_reg_size(s);
     TCGv_ptr t_pg;
-    TCGv_i32 desc;
-    int poff;
+    TCGv_i32 t_desc;
+    int desc, poff;
 
     /* Load the first quadword using the normal predicated load helpers.  */
-    desc = tcg_const_i32(simd_desc(16, 16, zt));
+    desc = sve_memopidx(s, msz_dtype(msz));
+    desc |= zt << MEMOPIDX_SHIFT;
+    desc = simd_desc(16, 16, desc);
+    t_desc = tcg_const_i32(desc);
 
     poff = pred_full_reg_offset(s, pg);
     if (vsz > 16) {
@@ -4864,10 +4878,10 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
     t_pg = tcg_temp_new_ptr();
     tcg_gen_addi_ptr(t_pg, cpu_env, poff);
 
-    fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, desc);
+    fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
 
     tcg_temp_free_ptr(t_pg);
-    tcg_temp_free_i32(desc);
+    tcg_temp_free_i32(t_desc);
 
     /* Replicate that first quadword.  */
     if (vsz > 16) {
@@ -5019,7 +5033,7 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
         fn = fn_multiple[be][nreg - 1][msz];
     }
     assert(fn != NULL);
-    do_mem_zpa(s, zt, pg, addr, fn);
+    do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
 }
 
 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
@@ -5057,24 +5071,31 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
  *** SVE gather loads / scatter stores
  */
 
-static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
-                       TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
+static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
+                       int scale, TCGv_i64 scalar, int msz,
+                       gen_helper_gvec_mem_scatter *fn)
 {
     unsigned vsz = vec_full_reg_size(s);
-    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
     TCGv_ptr t_zm = tcg_temp_new_ptr();
     TCGv_ptr t_pg = tcg_temp_new_ptr();
     TCGv_ptr t_zt = tcg_temp_new_ptr();
+    TCGv_i32 t_desc;
+    int desc;
+
+    desc = sve_memopidx(s, msz_dtype(msz));
+    desc |= scale << MEMOPIDX_SHIFT;
+    desc = simd_desc(vsz, vsz, desc);
+    t_desc = tcg_const_i32(desc);
 
     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
-    fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
+    fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
 
     tcg_temp_free_ptr(t_zt);
     tcg_temp_free_ptr(t_zm);
     tcg_temp_free_ptr(t_pg);
-    tcg_temp_free_i32(desc);
+    tcg_temp_free_i32(t_desc);
 }
 
 /* Indexed by [be][ff][xs][u][msz].  */
@@ -5263,7 +5284,7 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
     assert(fn != NULL);
 
     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
-               cpu_reg_sp(s, a->rn), fn);
+               cpu_reg_sp(s, a->rn), a->msz, fn);
     return true;
 }
 
@@ -5294,7 +5315,7 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
      * by loading the immediate into the scalar parameter.
      */
     imm = tcg_const_i64(a->imm << a->msz);
-    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
+    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
     tcg_temp_free_i64(imm);
     return true;
 }
@@ -5369,7 +5390,7 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
         g_assert_not_reached();
     }
     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
-               cpu_reg_sp(s, a->rn), fn);
+               cpu_reg_sp(s, a->rn), a->msz, fn);
     return true;
 }
 
@@ -5400,7 +5421,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
      * by loading the immediate into the scalar parameter.
      */
     imm = tcg_const_i64(a->imm << a->msz);
-    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
+    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
     tcg_temp_free_i64(imm);
     return true;
 }
-- 
2.19.0

  parent reply	other threads:[~2018-10-08 14:00 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-08 13:59 [Qemu-devel] [PULL 00/33] target-arm queue Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 01/33] target/arm: fix code comments error Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 02/33] virt: Suppress external aborts on virt-2.10 and earlier Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 03/33] target/arm: Correct condition for v8M callee stack push Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 04/33] target/arm: Don't read r4 from v8M exception stackframe twice Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 05/33] target/arm: Define ID_AA64ZFR0_EL1 Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 06/33] target/arm: Adjust sve_exception_el Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 07/33] target/arm: Pass in current_el to fp and sve_exception_el Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 08/33] target/arm: Handle SVE vector length changes in system mode Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 09/33] target/arm: Adjust aarch64_cpu_dump_state for system mode SVE Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 10/33] target/arm: Clear unused predicate bits for LD1RQ Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 11/33] target/arm: Rewrite helper_sve_ld1*_r using pages Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 12/33] target/arm: Rewrite helper_sve_ld[234]*_r Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 13/33] target/arm: Rewrite helper_sve_st[1234]*_r Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 14/33] target/arm: Split contiguous loads for endianness Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 15/33] target/arm: Split contiguous stores " Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 16/33] target/arm: Rewrite vector gather loads Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 17/33] target/arm: Rewrite vector gather stores Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 18/33] target/arm: Rewrite vector gather first-fault loads Peter Maydell
2018-10-08 13:59 ` Peter Maydell [this message]
2018-10-08 13:59 ` [Qemu-devel] [PULL 20/33] target/arm: Define new TBFLAG for v8M stack checking Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 21/33] target/arm: Define new EXCP type for v8M stack overflows Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 22/33] target/arm: Move v7m_using_psp() to internals.h Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 23/33] target/arm: Add v8M stack checks on ADD/SUB/MOV of SP Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 24/33] target/arm: Add some comments in Thumb decode Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 25/33] target/arm: Add v8M stack checks on exception entry Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 26/33] target/arm: Add v8M stack limit checks on NS function calls Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 27/33] target/arm: Add v8M stack checks for LDRD/STRD (imm) Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 28/33] target/arm: Add v8M stack checks for Thumb2 LDM/STM Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 29/33] target/arm: Add v8M stack checks for T32 load/store single Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 30/33] target/arm: Add v8M stack checks for Thumb push/pop Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 31/33] target/arm: Add v8M stack checks for VLDM/VSTM Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 32/33] target/arm: Add v8M stack checks for MSR to SP_NS Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 33/33] hw/display/bcm2835_fb: Silence Coverity warning about multiply overflow Peter Maydell
2018-10-08 14:46 ` [Qemu-devel] [PULL 00/33] target-arm queue Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181008140004.12612-20-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.