All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, qemu-arm@nongnu.org, steplong@quicinc.com
Subject: [PATCH v7 28/42] target/arm: Use mte_checkN for sve unpredicated loads
Date: Tue,  2 Jun 2020 18:13:03 -0700	[thread overview]
Message-ID: <20200603011317.473934-29-richard.henderson@linaro.org> (raw)
In-Reply-To: <20200603011317.473934-1-richard.henderson@linaro.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper-sve.h    |  2 +
 target/arm/sve_helper.c    | 70 +++++++++++++++++++++++++++-
 target/arm/translate-sve.c | 93 ++++++++++++++++----------------------
 3 files changed, 109 insertions(+), 56 deletions(-)

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 7a200755ac..550ae7f7e6 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -1138,6 +1138,8 @@ DEF_HELPER_FLAGS_5(sve_ftmad_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_ftmad_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(sve_ldr, TCG_CALL_NO_WG, void, env, ptr, tl, int)
+
 DEF_HELPER_FLAGS_4(sve_ld1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ld3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index e590db6637..04340cbe0c 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -3935,7 +3935,7 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
  */
 
 /*
- * Load one element into @vd + @reg_off from @host.
+ * Load one element into @vd + @reg_off, from @host.
  * The controlling predicate is known to be true.
  */
 typedef void sve_ldst1_host_fn(void *vd, intptr_t reg_off, void *host);
@@ -4129,6 +4129,74 @@ static bool sve_probe_page(SVEHostPage *info, bool nofault,
     return true;
 }
 
+/*
+ * Load contiguous data, unpredicated.
+ *
+ * Note that unpredicated load/store of vector/predicate registers
+ * are defined as a stream of bytes, which equates to little-endian
+ * operations on larger quantities.
+ *
+ * Note any MTE check is already handled.
+ */
+
+void HELPER(sve_ldr)(CPUARMState *env, void *vd, target_ulong addr, int size)
+{
+    int mmu_idx = cpu_mmu_index(env, false);
+    int in_page = -((int)addr | TARGET_PAGE_MASK);
+    uintptr_t ra = GETPC();
+    uint64_t val;
+    int i;
+
+    /* Small loads are expanded inline. */
+    tcg_debug_assert(size > 2 * 8);
+
+    /* Bulk copy the data from memory to the register. */
+    if (likely(size <= in_page)) {
+        void *host = probe_read(env, addr, size, mmu_idx, ra);
+
+        if (unlikely(!host)) {
+            goto mmio;
+        }
+        memcpy(vd, host, size);
+    } else {
+        void *h1 = probe_read(env, addr, in_page, mmu_idx, ra);
+        void *h2 = probe_read(env, addr + in_page, size - in_page, mmu_idx, ra);
+
+        if (unlikely(!h1 || !h2)) {
+            goto mmio;
+        }
+        memcpy(vd, h1, in_page);
+        memcpy(vd + in_page, h2, size - in_page);
+    }
+
+    /* Predicate load length may be any multiple of 2; ensure high bits 0. */
+    if (unlikely(size & 7)) {
+        memset(vd + size, 0, 8 - (size & 7));
+    }
+
+    /*
+     * The memcpy and memset above kept the bytes in memory order.
+     * The in-register format has uint64_t in host order, so for
+     * big-endian host we need to bswap.
+     */
+    for (i = 0; i < size; i += 8) {
+        le64_to_cpus(vd + i);
+    }
+    return;
+
+ mmio:
+    for (i = 0; i + 8 <= size; i += 8) {
+        val = cpu_ldq_le_data_ra(env, addr + i, ra);
+        *(uint64_t *)(vd + i) = val;
+    }
+
+    /* Predicate load length may be any multiple of 2. */
+    if (unlikely(i != size)) {
+        val = cpu_ldq_le_data_ra(env, addr + size - 8, ra);
+        val >>= (size - i) * 8;
+        *(uint64_t *)(vd + i + 8) = val;
+    }
+}
 
 /*
  * Analyse contiguous data, protected by a governing predicate.
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index ac7b3119e5..35b979fa13 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -4332,8 +4332,13 @@ static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
  */
 
-/* Subroutine loading a vector register at VOFS of LEN bytes.
+/*
+ * Subroutine loading a vector register at VOFS of LEN bytes.
  * The load should begin at the address Rn + IMM.
+ *
+ * Note that unpredicated load/store of vector/predicate registers
+ * are defined as a stream of bytes, which equates to little-endian
+ * operations on larger quantities.
  */
 
 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
@@ -4342,81 +4347,59 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
     int len_remain = len % 8;
     int nparts = len / 8 + ctpop8(len_remain);
     int midx = get_mem_index(s);
-    TCGv_i64 addr, t0, t1;
+    TCGv_i64 dirty_addr, clean_addr, t0, t1;
+    int i;
 
-    addr = tcg_temp_new_i64();
-    t0 = tcg_temp_new_i64();
+    dirty_addr = read_cpu_reg_sp(s, rn, true);
+    tcg_gen_addi_i64(dirty_addr, dirty_addr, imm);
 
-    /* Note that unpredicated load/store of vector/predicate registers
-     * are defined as a stream of bytes, which equates to little-endian
-     * operations on larger quantities.  There is no nice way to force
-     * a little-endian load for aarch64_be-linux-user out of line.
-     *
-     * Attempt to keep code expansion to a minimum by limiting the
-     * amount of unrolling done.
-     */
-    if (nparts <= 4) {
-        int i;
+    clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
 
-        for (i = 0; i < len_align; i += 8) {
-            tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
-            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
-            tcg_gen_st_i64(t0, cpu_env, vofs + i);
-        }
-    } else {
-        TCGLabel *loop = gen_new_label();
-        TCGv_ptr tp, i = tcg_const_local_ptr(0);
+    /* Limit tcg code expansion by doing large loads out of line. */
+    if (nparts > 4) {
+        TCGv_ptr t_rd = tcg_temp_new_ptr();
+        TCGv_i32 t_len = tcg_const_i32(len);
 
-        gen_set_label(loop);
-
-        /* Minimize the number of local temps that must be re-read from
-         * the stack each iteration.  Instead, re-compute values other
-         * than the loop counter.
-         */
-        tp = tcg_temp_new_ptr();
-        tcg_gen_addi_ptr(tp, i, imm);
-        tcg_gen_extu_ptr_i64(addr, tp);
-        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
-
-        tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
-
-        tcg_gen_add_ptr(tp, cpu_env, i);
-        tcg_gen_addi_ptr(i, i, 8);
-        tcg_gen_st_i64(t0, tp, vofs);
-        tcg_temp_free_ptr(tp);
-
-        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
-        tcg_temp_free_ptr(i);
+        tcg_gen_addi_ptr(t_rd, cpu_env, vofs);
+        gen_helper_sve_ldr(cpu_env, t_rd, clean_addr, t_len);
+        tcg_temp_free_ptr(t_rd);
+        tcg_temp_free_i32(t_len);
+        return;
     }
 
-    /* Predicate register loads can be any multiple of 2.
-     * Note that we still store the entire 64-bit unit into cpu_env.
+    t0 = tcg_temp_new_i64();
+    for (i = 0; i < len_align; i += 8) {
+        tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
+        tcg_gen_st_i64(t0, cpu_env, vofs + i);
+        tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+    }
+
+    /*
+     * Predicate register loads can be any multiple of 2.
+     * Note that we still store the entire 64-bit unit into cpu_env
+     * so that the high bits are zeroed.
      */
     if (len_remain) {
-        tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
-
         switch (len_remain) {
         case 2:
-        case 4:
-        case 8:
-            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
+            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUW);
+            break;
+        case 4:
+            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
             break;
-
         case 6:
             t1 = tcg_temp_new_i64();
-            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
-            tcg_gen_addi_i64(addr, addr, 4);
-            tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
+            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
+            tcg_gen_addi_i64(clean_addr, clean_addr, 4);
+            tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
             tcg_temp_free_i64(t1);
             break;
-
         default:
             g_assert_not_reached();
         }
         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
     }
-    tcg_temp_free_i64(addr);
     tcg_temp_free_i64(t0);
 }
 
-- 
2.25.1



  parent reply	other threads:[~2020-06-03  1:24 UTC|newest]

Thread overview: 96+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-03  1:12 [PATCH v7 00/42] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
2020-06-03  1:12 ` [PATCH v7 01/42] target/arm: Add isar tests for mte Richard Henderson
2020-06-18 10:50   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 02/42] target/arm: Improve masking of SCR RES0 bits Richard Henderson
2020-06-18 10:50   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 03/42] target/arm: Add support for MTE to SCTLR_ELx Richard Henderson
2020-06-18 10:52   ` Peter Maydell
2020-06-18 18:08     ` Richard Henderson
2020-06-18 18:44       ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 04/42] target/arm: Add support for MTE to HCR_EL2 and SCR_EL3 Richard Henderson
2020-06-18 11:02   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 05/42] target/arm: Rename DISAS_UPDATE to DISAS_UPDATE_EXIT Richard Henderson
2020-06-18 11:03   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 06/42] target/arm: Add DISAS_UPDATE_NOCHAIN Richard Henderson
2020-06-18 11:14   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 07/42] target/arm: Add MTE system registers Richard Henderson
2020-06-18 11:29   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 08/42] target/arm: Add MTE bits to tb_flags Richard Henderson
2020-06-18 11:37   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 09/42] target/arm: Implement the IRG instruction Richard Henderson
2020-06-18 11:48   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 10/42] target/arm: Implement the ADDG, SUBG instructions Richard Henderson
2020-06-18 13:17   ` Peter Maydell
2020-06-18 16:12     ` Richard Henderson
2020-06-18 16:16       ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 11/42] target/arm: Implement the GMI instruction Richard Henderson
2020-06-18 13:19   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 12/42] target/arm: Implement the SUBP instruction Richard Henderson
2020-06-03  1:12 ` [PATCH v7 13/42] target/arm: Define arm_cpu_do_unaligned_access for user-only Richard Henderson
2020-06-18 13:31   ` Peter Maydell
2020-06-18 17:03     ` Richard Henderson
2020-06-18 17:45       ` Peter Maydell
2020-06-18 21:01       ` Richard Henderson
2020-06-03  1:12 ` [PATCH v7 14/42] target/arm: Add helper_probe_access Richard Henderson
2020-06-18 13:33   ` Peter Maydell
2020-06-18 19:19     ` Richard Henderson
2020-06-03  1:12 ` [PATCH v7 15/42] target/arm: Implement LDG, STG, ST2G instructions Richard Henderson
2020-06-18 13:56   ` Peter Maydell
2020-06-18 17:09     ` Richard Henderson
2020-06-03  1:12 ` [PATCH v7 16/42] target/arm: Implement the STGP instruction Richard Henderson
2020-06-03  1:12 ` [PATCH v7 17/42] target/arm: Restrict the values of DCZID.BS under TCG Richard Henderson
2020-06-18 14:07   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 18/42] target/arm: Simplify DC_ZVA Richard Henderson
2020-06-18 14:22   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 19/42] target/arm: Implement the LDGM, STGM, STZGM instructions Richard Henderson
2020-06-19 11:04   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 20/42] target/arm: Implement the access tag cache flushes Richard Henderson
2020-06-18 16:28   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 21/42] target/arm: Move regime_el to internals.h Richard Henderson
2020-06-18 16:29   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 22/42] target/arm: Move regime_tcr " Richard Henderson
2020-06-18 16:30   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 23/42] target/arm: Add gen_mte_check1 Richard Henderson
2020-06-18 16:34   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 24/42] target/arm: Add gen_mte_checkN Richard Henderson
2020-06-18 16:36   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 25/42] target/arm: Implement helper_mte_check1 Richard Henderson
2020-06-18 16:37   ` Peter Maydell
2020-06-18 17:32     ` Richard Henderson
2020-06-19 13:44   ` Peter Maydell
2020-06-19 17:07     ` Richard Henderson
2020-06-03  1:13 ` [PATCH v7 26/42] target/arm: Implement helper_mte_checkN Richard Henderson
2020-06-19 13:52   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 27/42] target/arm: Add helper_mte_check_zva Richard Henderson
2020-06-19 13:55   ` Peter Maydell
2020-06-03  1:13 ` Richard Henderson [this message]
2020-06-19 13:58   ` [PATCH v7 28/42] target/arm: Use mte_checkN for sve unpredicated loads Peter Maydell
2020-06-19 19:26     ` Richard Henderson
2020-06-03  1:13 ` [PATCH v7 29/42] target/arm: Use mte_checkN for sve unpredicated stores Richard Henderson
2020-06-19 14:01   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 30/42] target/arm: Use mte_check1 for sve LD1R Richard Henderson
2020-06-19 14:03   ` Peter Maydell
2020-06-19 19:40     ` Richard Henderson
2020-06-03  1:13 ` [PATCH v7 31/42] target/arm: Add mte helpers for sve scalar + int loads Richard Henderson
2020-06-19 14:06   ` Peter Maydell
2020-06-19 19:45     ` Richard Henderson
2020-06-03  1:13 ` [PATCH v7 32/42] target/arm: Add mte helpers for sve scalar + int stores Richard Henderson
2020-06-03  1:13 ` [PATCH v7 33/42] target/arm: Add mte helpers for sve scalar + int ff/nf loads Richard Henderson
2020-06-03  1:13 ` [PATCH v7 34/42] target/arm: Handle TBI for sve scalar + int memory ops Richard Henderson
2020-06-19 14:07   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 35/42] target/arm: Add mte helpers for sve scatter/gather " Richard Henderson
2020-06-03  1:13 ` [PATCH v7 36/42] target/arm: Complete TBI clearing for user-only for SVE Richard Henderson
2020-06-03  1:13 ` [PATCH v7 37/42] target/arm: Implement data cache set allocation tags Richard Henderson
2020-06-19 14:11   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 38/42] target/arm: Set PSTATE.TCO on exception entry Richard Henderson
2020-06-03  1:13 ` [PATCH v7 39/42] target/arm: Enable MTE Richard Henderson
2020-06-18 16:39   ` Peter Maydell
2020-06-18 17:35     ` Richard Henderson
2020-06-03  1:13 ` [PATCH v7 40/42] target/arm: Cache the Tagged bit for a page in MemTxAttrs Richard Henderson
2020-06-19 14:29   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 41/42] target/arm: Create tagged ram when MTE is enabled Richard Henderson
2020-06-19 14:37   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 42/42] target/arm: Add allocation tag storage for system mode Richard Henderson
2020-06-03  2:15 ` [PATCH v7 00/42] target/arm: Implement ARMv8.5-MemTag, " no-reply
2020-06-03  4:07   ` Richard Henderson
2020-06-19 14:38 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200603011317.473934-29-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=steplong@quicinc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.