All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 12/33] target/arm: Rewrite helper_sve_ld[234]*_r
Date: Mon,  8 Oct 2018 14:59:43 +0100	[thread overview]
Message-ID: <20181008140004.12612-13-peter.maydell@linaro.org> (raw)
In-Reply-To: <20181008140004.12612-1-peter.maydell@linaro.org>

From: Richard Henderson <richard.henderson@linaro.org>

Use the same *_tlb primitives as we use for ld1.

For linux-user, this hoists the set of helper_retaddr.  For softmmu,
hoists the computation of the current mmu_idx outside the loop,
fixes the endianness problem, and moves the main loop out of a
macro and into an inlined function.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Tested-by: Laurent Desnogues <laurent.desnogues@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20181005175350.30752-9-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target/arm/sve_helper.c | 210 ++++++++++++++++++++++------------------
 1 file changed, 117 insertions(+), 93 deletions(-)

diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index d628978431c..f712b382f8b 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -4285,109 +4285,133 @@ DO_LD1_2(ld1dd,  3, 3)
 #undef DO_LD1_1
 #undef DO_LD1_2
 
-#define DO_LD2(NAME, FN, TYPEE, TYPEM, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc);                  \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    void *d1 = &env->vfp.zregs[rd];                        \
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];             \
-    for (i = 0; i < oprsz; ) {                             \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            TYPEM m1 = 0, m2 = 0;                          \
-            if (pg & 1) {                                  \
-                m1 = FN(env, addr, ra);                    \
-                m2 = FN(env, addr + sizeof(TYPEM), ra);    \
-            }                                              \
-            *(TYPEE *)(d1 + H(i)) = m1;                    \
-            *(TYPEE *)(d2 + H(i)) = m2;                    \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += 2 * sizeof(TYPEM);                     \
-        } while (i & 15);                                  \
-    }                                                      \
+/*
+ * Common helpers for all contiguous 2,3,4-register predicated loads.
+ */
+static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, int size, uintptr_t ra,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    unsigned rd = simd_data(desc);
+    ARMVectorReg scratch[2] = { };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, &scratch[0], i, addr, mmu_idx, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, mmu_idx, ra);
+            }
+            i += size, pg >>= size;
+            addr += 2 * size;
+        } while (i & 15);
+    }
+    set_helper_retaddr(0);
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
 }
 
-#define DO_LD3(NAME, FN, TYPEE, TYPEM, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc);                  \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    void *d1 = &env->vfp.zregs[rd];                        \
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];             \
-    void *d3 = &env->vfp.zregs[(rd + 2) & 31];             \
-    for (i = 0; i < oprsz; ) {                             \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            TYPEM m1 = 0, m2 = 0, m3 = 0;                  \
-            if (pg & 1) {                                  \
-                m1 = FN(env, addr, ra);                    \
-                m2 = FN(env, addr + sizeof(TYPEM), ra);    \
-                m3 = FN(env, addr + 2 * sizeof(TYPEM), ra); \
-            }                                              \
-            *(TYPEE *)(d1 + H(i)) = m1;                    \
-            *(TYPEE *)(d2 + H(i)) = m2;                    \
-            *(TYPEE *)(d3 + H(i)) = m3;                    \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += 3 * sizeof(TYPEM);                     \
-        } while (i & 15);                                  \
-    }                                                      \
+static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, int size, uintptr_t ra,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    unsigned rd = simd_data(desc);
+    ARMVectorReg scratch[3] = { };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, &scratch[0], i, addr, mmu_idx, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, mmu_idx, ra);
+                tlb_fn(env, &scratch[2], i, addr + 2 * size, mmu_idx, ra);
+            }
+            i += size, pg >>= size;
+            addr += 3 * size;
+        } while (i & 15);
+    }
+    set_helper_retaddr(0);
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
 }
 
-#define DO_LD4(NAME, FN, TYPEE, TYPEM, H)                  \
-void HELPER(NAME)(CPUARMState *env, void *vg,              \
-                  target_ulong addr, uint32_t desc)        \
-{                                                          \
-    intptr_t i, oprsz = simd_oprsz(desc);                  \
-    intptr_t ra = GETPC();                                 \
-    unsigned rd = simd_data(desc);                         \
-    void *d1 = &env->vfp.zregs[rd];                        \
-    void *d2 = &env->vfp.zregs[(rd + 1) & 31];             \
-    void *d3 = &env->vfp.zregs[(rd + 2) & 31];             \
-    void *d4 = &env->vfp.zregs[(rd + 3) & 31];             \
-    for (i = 0; i < oprsz; ) {                             \
-        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));    \
-        do {                                               \
-            TYPEM m1 = 0, m2 = 0, m3 = 0, m4 = 0;          \
-            if (pg & 1) {                                  \
-                m1 = FN(env, addr, ra);                    \
-                m2 = FN(env, addr + sizeof(TYPEM), ra);    \
-                m3 = FN(env, addr + 2 * sizeof(TYPEM), ra); \
-                m4 = FN(env, addr + 3 * sizeof(TYPEM), ra); \
-            }                                              \
-            *(TYPEE *)(d1 + H(i)) = m1;                    \
-            *(TYPEE *)(d2 + H(i)) = m2;                    \
-            *(TYPEE *)(d3 + H(i)) = m3;                    \
-            *(TYPEE *)(d4 + H(i)) = m4;                    \
-            i += sizeof(TYPEE), pg >>= sizeof(TYPEE);      \
-            addr += 4 * sizeof(TYPEM);                     \
-        } while (i & 15);                                  \
-    }                                                      \
+static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
+                      uint32_t desc, int size, uintptr_t ra,
+                      sve_ld1_tlb_fn *tlb_fn)
+{
+    const int mmu_idx = cpu_mmu_index(env, false);
+    intptr_t i, oprsz = simd_oprsz(desc);
+    unsigned rd = simd_data(desc);
+    ARMVectorReg scratch[4] = { };
+
+    set_helper_retaddr(ra);
+    for (i = 0; i < oprsz; ) {
+        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
+        do {
+            if (pg & 1) {
+                tlb_fn(env, &scratch[0], i, addr, mmu_idx, ra);
+                tlb_fn(env, &scratch[1], i, addr + size, mmu_idx, ra);
+                tlb_fn(env, &scratch[2], i, addr + 2 * size, mmu_idx, ra);
+                tlb_fn(env, &scratch[3], i, addr + 3 * size, mmu_idx, ra);
+            }
+            i += size, pg >>= size;
+            addr += 4 * size;
+        } while (i & 15);
+    }
+    set_helper_retaddr(0);
+
+    /* Wait until all exceptions have been raised to write back.  */
+    memcpy(&env->vfp.zregs[rd], &scratch[0], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 1) & 31], &scratch[1], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 2) & 31], &scratch[2], oprsz);
+    memcpy(&env->vfp.zregs[(rd + 3) & 31], &scratch[3], oprsz);
 }
 
-DO_LD2(sve_ld2bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
-DO_LD3(sve_ld3bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
-DO_LD4(sve_ld4bb_r, cpu_ldub_data_ra, uint8_t, uint8_t, H1)
+#define DO_LDN_1(N) \
+void __attribute__((flatten)) HELPER(sve_ld##N##bb_r)               \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)  \
+{                                                                   \
+    sve_ld##N##_r(env, vg, addr, desc, 1, GETPC(), sve_ld1bb_tlb);  \
+}
 
-DO_LD2(sve_ld2hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
-DO_LD3(sve_ld3hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
-DO_LD4(sve_ld4hh_r, cpu_lduw_data_ra, uint16_t, uint16_t, H1_2)
+#define DO_LDN_2(N, SUFF, SIZE)                                       \
+void __attribute__((flatten)) HELPER(sve_ld##N##SUFF##_r)             \
+    (CPUARMState *env, void *vg, target_ulong addr, uint32_t desc)    \
+{                                                                     \
+    sve_ld##N##_r(env, vg, addr, desc, SIZE, GETPC(),                 \
+                  arm_cpu_data_is_big_endian(env)                     \
+                  ? sve_ld1##SUFF##_be_tlb : sve_ld1##SUFF##_le_tlb); \
+}
 
-DO_LD2(sve_ld2ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
-DO_LD3(sve_ld3ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
-DO_LD4(sve_ld4ss_r, cpu_ldl_data_ra, uint32_t, uint32_t, H1_4)
+DO_LDN_1(2)
+DO_LDN_1(3)
+DO_LDN_1(4)
 
-DO_LD2(sve_ld2dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
-DO_LD3(sve_ld3dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
-DO_LD4(sve_ld4dd_r, cpu_ldq_data_ra, uint64_t, uint64_t, )
+DO_LDN_2(2, hh, 2)
+DO_LDN_2(3, hh, 2)
+DO_LDN_2(4, hh, 2)
 
-#undef DO_LD2
-#undef DO_LD3
-#undef DO_LD4
+DO_LDN_2(2, ss, 4)
+DO_LDN_2(3, ss, 4)
+DO_LDN_2(4, ss, 4)
+
+DO_LDN_2(2, dd, 8)
+DO_LDN_2(3, dd, 8)
+DO_LDN_2(4, dd, 8)
+
+#undef DO_LDN_1
+#undef DO_LDN_2
 
 /*
  * Load contiguous data, first-fault and no-fault.
-- 
2.19.0

  parent reply	other threads:[~2018-10-08 14:00 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-08 13:59 [Qemu-devel] [PULL 00/33] target-arm queue Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 01/33] target/arm: fix code comments error Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 02/33] virt: Suppress external aborts on virt-2.10 and earlier Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 03/33] target/arm: Correct condition for v8M callee stack push Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 04/33] target/arm: Don't read r4 from v8M exception stackframe twice Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 05/33] target/arm: Define ID_AA64ZFR0_EL1 Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 06/33] target/arm: Adjust sve_exception_el Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 07/33] target/arm: Pass in current_el to fp and sve_exception_el Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 08/33] target/arm: Handle SVE vector length changes in system mode Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 09/33] target/arm: Adjust aarch64_cpu_dump_state for system mode SVE Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 10/33] target/arm: Clear unused predicate bits for LD1RQ Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 11/33] target/arm: Rewrite helper_sve_ld1*_r using pages Peter Maydell
2018-10-08 13:59 ` Peter Maydell [this message]
2018-10-08 13:59 ` [Qemu-devel] [PULL 13/33] target/arm: Rewrite helper_sve_st[1234]*_r Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 14/33] target/arm: Split contiguous loads for endianness Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 15/33] target/arm: Split contiguous stores " Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 16/33] target/arm: Rewrite vector gather loads Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 17/33] target/arm: Rewrite vector gather stores Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 18/33] target/arm: Rewrite vector gather first-fault loads Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 19/33] target/arm: Pass TCGMemOpIdx to sve memory helpers Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 20/33] target/arm: Define new TBFLAG for v8M stack checking Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 21/33] target/arm: Define new EXCP type for v8M stack overflows Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 22/33] target/arm: Move v7m_using_psp() to internals.h Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 23/33] target/arm: Add v8M stack checks on ADD/SUB/MOV of SP Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 24/33] target/arm: Add some comments in Thumb decode Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 25/33] target/arm: Add v8M stack checks on exception entry Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 26/33] target/arm: Add v8M stack limit checks on NS function calls Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 27/33] target/arm: Add v8M stack checks for LDRD/STRD (imm) Peter Maydell
2018-10-08 13:59 ` [Qemu-devel] [PULL 28/33] target/arm: Add v8M stack checks for Thumb2 LDM/STM Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 29/33] target/arm: Add v8M stack checks for T32 load/store single Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 30/33] target/arm: Add v8M stack checks for Thumb push/pop Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 31/33] target/arm: Add v8M stack checks for VLDM/VSTM Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 32/33] target/arm: Add v8M stack checks for MSR to SP_NS Peter Maydell
2018-10-08 14:00 ` [Qemu-devel] [PULL 33/33] hw/display/bcm2835_fb: Silence Coverity warning about multiply overflow Peter Maydell
2018-10-08 14:46 ` [Qemu-devel] [PULL 00/33] target-arm queue Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181008140004.12612-13-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.