All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, qemu-arm@nongnu.org, steplong@quicinc.com
Subject: [PATCH v7 29/42] target/arm: Use mte_checkN for sve unpredicated stores
Date: Tue,  2 Jun 2020 18:13:04 -0700	[thread overview]
Message-ID: <20200603011317.473934-30-richard.henderson@linaro.org> (raw)
In-Reply-To: <20200603011317.473934-1-richard.henderson@linaro.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper-sve.h    |  1 +
 target/arm/sve_helper.c    | 63 ++++++++++++++++++++++++++-
 target/arm/translate-sve.c | 88 ++++++++++++++------------------------
 3 files changed, 94 insertions(+), 58 deletions(-)

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 550ae7f7e6..197cd72999 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -1139,6 +1139,7 @@ DEF_HELPER_FLAGS_5(sve_ftmad_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_ftmad_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(sve_ldr, TCG_CALL_NO_WG, void, env, ptr, tl, int)
+DEF_HELPER_FLAGS_4(sve_str, TCG_CALL_NO_WG, void, env, ptr, tl, int)
 
 DEF_HELPER_FLAGS_4(sve_ld1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
 DEF_HELPER_FLAGS_4(sve_ld2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 04340cbe0c..507fba37c9 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -4130,7 +4130,7 @@ static bool sve_probe_page(SVEHostPage *info, bool nofault,
 }
 
 /*
- * Load contiguous data, unpredicated.
+ * Load/store contiguous data, unpredicated.
  *
  * Note that unpredicated load/store of vector/predicate registers
  * are defined as a stream of bytes, which equates to little-endian
@@ -4198,6 +4198,67 @@ void HELPER(sve_ldr)(CPUARMState *env, void *vd, target_ulong addr, int size)
     }
 }
 
+void HELPER(sve_str)(CPUARMState *env, void *vd, target_ulong addr, int size)
+{
+    int mem_idx = cpu_mmu_index(env, false);
+    int in_page = -((int)addr | TARGET_PAGE_MASK);
+    uintptr_t ra = GETPC();
+    uint64_t val;
+    void *host;
+    int i;
+
+    /* Small stores are expanded inline. */
+    tcg_debug_assert(size > 2 * 8);
+
+    if (likely(size <= in_page)) {
+        host = probe_write(env, addr, size, mem_idx, ra);
+        if (likely(host != NULL)) {
+            for (i = 0; i + 8 <= size; i += 8) {
+                stq_le_p(host + i, *(uint64_t *)(vd + i));
+            }
+
+            /* Predicate load length may be any multiple of 2. */
+            if (unlikely(i != size)) {
+                val = *(uint64_t *)(vd + i);
+                if (size & 4) {
+                    stl_le_p(host + i, val);
+                    i += 4;
+                    val >>= 32;
+                }
+                if (size & 2) {
+                    stw_le_p(host + i, val);
+                }
+            }
+            return;
+        }
+    } else {
+        (void)probe_write(env, addr, in_page, mem_idx, ra);
+        (void)probe_write(env, addr + in_page, size - in_page, mem_idx, ra);
+    }
+
+    /*
+     * Note there is no endian-specific target store function, so to handle
+     * aarch64_be-linux-user we need to bswap the big-endian store.
+     */
+    for (i = 0; i + 8 <= size; i += 8) {
+        val = *(uint64_t *)(vd + i);
+        cpu_stq_le_data_ra(env, addr + i, val, ra);
+    }
+
+    /* Predicate load length may be any multiple of 2. */
+    if (unlikely(i != size)) {
+        val = *(uint64_t *)(vd + i);
+        if (size & 4) {
+            cpu_stl_le_data_ra(env, addr + i, val, ra);
+            i += 4;
+            val >>= 32;
+        }
+        if (size & 2) {
+            cpu_stw_le_data_ra(env, addr + i, val, ra);
+        }
+    }
+}
+
 /*
  * Analyse contiguous data, protected by a governing predicate.
  */
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 35b979fa13..e515646db2 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -4410,78 +4410,52 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
     int len_remain = len % 8;
     int nparts = len / 8 + ctpop8(len_remain);
     int midx = get_mem_index(s);
-    TCGv_i64 addr, t0;
+    TCGv_i64 dirty_addr, clean_addr, t0;
+    int i;
+
+    dirty_addr = read_cpu_reg_sp(s, rn, true);
+    tcg_gen_addi_i64(dirty_addr, dirty_addr, imm);
+
+    clean_addr = gen_mte_checkN(s, dirty_addr, true, rn != 31, len, MO_8);
+
+    /* Limit tcg code expansion by doing large loads out of line. */
+    if (nparts > 4) {
+        TCGv_ptr t_rd = tcg_temp_new_ptr();
+        TCGv_i32 t_len = tcg_const_i32(len);
+
+        tcg_gen_addi_ptr(t_rd, cpu_env, vofs);
+        gen_helper_sve_str(cpu_env, t_rd, clean_addr, t_len);
+        tcg_temp_free_ptr(t_rd);
+        tcg_temp_free_i32(t_len);
+        return;
+    }
 
-    addr = tcg_temp_new_i64();
     t0 = tcg_temp_new_i64();
-
-    /* Note that unpredicated load/store of vector/predicate registers
-     * are defined as a stream of bytes, which equates to little-endian
-     * operations on larger quantities.  There is no nice way to force
-     * a little-endian store for aarch64_be-linux-user out of line.
-     *
-     * Attempt to keep code expansion to a minimum by limiting the
-     * amount of unrolling done.
-     */
-    if (nparts <= 4) {
-        int i;
-
-        for (i = 0; i < len_align; i += 8) {
-            tcg_gen_ld_i64(t0, cpu_env, vofs + i);
-            tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
-            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
-        }
-    } else {
-        TCGLabel *loop = gen_new_label();
-        TCGv_ptr t2, i = tcg_const_local_ptr(0);
-
-        gen_set_label(loop);
-
-        t2 = tcg_temp_new_ptr();
-        tcg_gen_add_ptr(t2, cpu_env, i);
-        tcg_gen_ld_i64(t0, t2, vofs);
-
-        /* Minimize the number of local temps that must be re-read from
-         * the stack each iteration.  Instead, re-compute values other
-         * than the loop counter.
-         */
-        tcg_gen_addi_ptr(t2, i, imm);
-        tcg_gen_extu_ptr_i64(addr, t2);
-        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
-        tcg_temp_free_ptr(t2);
-
-        tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
-
-        tcg_gen_addi_ptr(i, i, 8);
-
-        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
-        tcg_temp_free_ptr(i);
+    for (i = 0; i < len_align; i += 8) {
+        tcg_gen_ld_i64(t0, cpu_env, vofs + i);
+        tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
+        tcg_gen_addi_i64(clean_addr, clean_addr, 8);
     }
 
     /* Predicate register stores can be any multiple of 2.  */
     if (len_remain) {
         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
-        tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
-
         switch (len_remain) {
-        case 2:
-        case 4:
-        case 8:
-            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
-            break;
-
         case 6:
-            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
-            tcg_gen_addi_i64(addr, addr, 4);
+            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
+            tcg_gen_addi_i64(clean_addr, clean_addr, 4);
             tcg_gen_shri_i64(t0, t0, 32);
-            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
+            /* fall through */
+        case 2:
+            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
+            break;
+        case 4:
+            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
             break;
-
         default:
             g_assert_not_reached();
         }
     }
-    tcg_temp_free_i64(addr);
     tcg_temp_free_i64(t0);
 }
 
-- 
2.25.1



  parent reply	other threads:[~2020-06-03  1:27 UTC|newest]

Thread overview: 96+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-03  1:12 [PATCH v7 00/42] target/arm: Implement ARMv8.5-MemTag, system mode Richard Henderson
2020-06-03  1:12 ` [PATCH v7 01/42] target/arm: Add isar tests for mte Richard Henderson
2020-06-18 10:50   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 02/42] target/arm: Improve masking of SCR RES0 bits Richard Henderson
2020-06-18 10:50   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 03/42] target/arm: Add support for MTE to SCTLR_ELx Richard Henderson
2020-06-18 10:52   ` Peter Maydell
2020-06-18 18:08     ` Richard Henderson
2020-06-18 18:44       ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 04/42] target/arm: Add support for MTE to HCR_EL2 and SCR_EL3 Richard Henderson
2020-06-18 11:02   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 05/42] target/arm: Rename DISAS_UPDATE to DISAS_UPDATE_EXIT Richard Henderson
2020-06-18 11:03   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 06/42] target/arm: Add DISAS_UPDATE_NOCHAIN Richard Henderson
2020-06-18 11:14   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 07/42] target/arm: Add MTE system registers Richard Henderson
2020-06-18 11:29   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 08/42] target/arm: Add MTE bits to tb_flags Richard Henderson
2020-06-18 11:37   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 09/42] target/arm: Implement the IRG instruction Richard Henderson
2020-06-18 11:48   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 10/42] target/arm: Implement the ADDG, SUBG instructions Richard Henderson
2020-06-18 13:17   ` Peter Maydell
2020-06-18 16:12     ` Richard Henderson
2020-06-18 16:16       ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 11/42] target/arm: Implement the GMI instruction Richard Henderson
2020-06-18 13:19   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 12/42] target/arm: Implement the SUBP instruction Richard Henderson
2020-06-03  1:12 ` [PATCH v7 13/42] target/arm: Define arm_cpu_do_unaligned_access for user-only Richard Henderson
2020-06-18 13:31   ` Peter Maydell
2020-06-18 17:03     ` Richard Henderson
2020-06-18 17:45       ` Peter Maydell
2020-06-18 21:01       ` Richard Henderson
2020-06-03  1:12 ` [PATCH v7 14/42] target/arm: Add helper_probe_access Richard Henderson
2020-06-18 13:33   ` Peter Maydell
2020-06-18 19:19     ` Richard Henderson
2020-06-03  1:12 ` [PATCH v7 15/42] target/arm: Implement LDG, STG, ST2G instructions Richard Henderson
2020-06-18 13:56   ` Peter Maydell
2020-06-18 17:09     ` Richard Henderson
2020-06-03  1:12 ` [PATCH v7 16/42] target/arm: Implement the STGP instruction Richard Henderson
2020-06-03  1:12 ` [PATCH v7 17/42] target/arm: Restrict the values of DCZID.BS under TCG Richard Henderson
2020-06-18 14:07   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 18/42] target/arm: Simplify DC_ZVA Richard Henderson
2020-06-18 14:22   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 19/42] target/arm: Implement the LDGM, STGM, STZGM instructions Richard Henderson
2020-06-19 11:04   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 20/42] target/arm: Implement the access tag cache flushes Richard Henderson
2020-06-18 16:28   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 21/42] target/arm: Move regime_el to internals.h Richard Henderson
2020-06-18 16:29   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 22/42] target/arm: Move regime_tcr " Richard Henderson
2020-06-18 16:30   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 23/42] target/arm: Add gen_mte_check1 Richard Henderson
2020-06-18 16:34   ` Peter Maydell
2020-06-03  1:12 ` [PATCH v7 24/42] target/arm: Add gen_mte_checkN Richard Henderson
2020-06-18 16:36   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 25/42] target/arm: Implement helper_mte_check1 Richard Henderson
2020-06-18 16:37   ` Peter Maydell
2020-06-18 17:32     ` Richard Henderson
2020-06-19 13:44   ` Peter Maydell
2020-06-19 17:07     ` Richard Henderson
2020-06-03  1:13 ` [PATCH v7 26/42] target/arm: Implement helper_mte_checkN Richard Henderson
2020-06-19 13:52   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 27/42] target/arm: Add helper_mte_check_zva Richard Henderson
2020-06-19 13:55   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 28/42] target/arm: Use mte_checkN for sve unpredicated loads Richard Henderson
2020-06-19 13:58   ` Peter Maydell
2020-06-19 19:26     ` Richard Henderson
2020-06-03  1:13 ` Richard Henderson [this message]
2020-06-19 14:01   ` [PATCH v7 29/42] target/arm: Use mte_checkN for sve unpredicated stores Peter Maydell
2020-06-03  1:13 ` [PATCH v7 30/42] target/arm: Use mte_check1 for sve LD1R Richard Henderson
2020-06-19 14:03   ` Peter Maydell
2020-06-19 19:40     ` Richard Henderson
2020-06-03  1:13 ` [PATCH v7 31/42] target/arm: Add mte helpers for sve scalar + int loads Richard Henderson
2020-06-19 14:06   ` Peter Maydell
2020-06-19 19:45     ` Richard Henderson
2020-06-03  1:13 ` [PATCH v7 32/42] target/arm: Add mte helpers for sve scalar + int stores Richard Henderson
2020-06-03  1:13 ` [PATCH v7 33/42] target/arm: Add mte helpers for sve scalar + int ff/nf loads Richard Henderson
2020-06-03  1:13 ` [PATCH v7 34/42] target/arm: Handle TBI for sve scalar + int memory ops Richard Henderson
2020-06-19 14:07   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 35/42] target/arm: Add mte helpers for sve scatter/gather " Richard Henderson
2020-06-03  1:13 ` [PATCH v7 36/42] target/arm: Complete TBI clearing for user-only for SVE Richard Henderson
2020-06-03  1:13 ` [PATCH v7 37/42] target/arm: Implement data cache set allocation tags Richard Henderson
2020-06-19 14:11   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 38/42] target/arm: Set PSTATE.TCO on exception entry Richard Henderson
2020-06-03  1:13 ` [PATCH v7 39/42] target/arm: Enable MTE Richard Henderson
2020-06-18 16:39   ` Peter Maydell
2020-06-18 17:35     ` Richard Henderson
2020-06-03  1:13 ` [PATCH v7 40/42] target/arm: Cache the Tagged bit for a page in MemTxAttrs Richard Henderson
2020-06-19 14:29   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 41/42] target/arm: Create tagged ram when MTE is enabled Richard Henderson
2020-06-19 14:37   ` Peter Maydell
2020-06-03  1:13 ` [PATCH v7 42/42] target/arm: Add allocation tag storage for system mode Richard Henderson
2020-06-03  2:15 ` [PATCH v7 00/42] target/arm: Implement ARMv8.5-MemTag, " no-reply
2020-06-03  4:07   ` Richard Henderson
2020-06-19 14:38 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200603011317.473934-30-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=steplong@quicinc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.