All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yongbok Kim <yongbok.kim@imgtec.com>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, leon.alrae@imgtec.com,
	afaerber@suse.de, rth@twiddle.net
Subject: [Qemu-devel] [PATCH v3 2/2] target-mips: Misaligned memory accesses for MSA
Date: Wed, 13 May 2015 16:37:37 +0100	[thread overview]
Message-ID: <1431531457-17127-3-git-send-email-yongbok.kim@imgtec.com> (raw)
In-Reply-To: <1431531457-17127-1-git-send-email-yongbok.kim@imgtec.com>

MIPS SIMD Architecture vector loads and stores require misalignment support.
MSA Memory access should work as an atomic operation. Therefore, it has to
check validity of all addresses for an access if it is spanning into two pages.

For a case of R5 with MSA, there is no clear solution to distinguish
instructions support misaligned or not, a work-around which is using
byte-to-byte accesses and endianness corrections has been introduced.

Separating helper functions for each data format as format is known in
translation.
To use mmu_idx from cpu_mmu_index() instead of calculating it from hflag.

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h    |   10 +++-
 target-mips/op_helper.c |  142 ++++++++++++++++++++++++++--------------------
 target-mips/translate.c |   23 +++++--
 3 files changed, 104 insertions(+), 71 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index 3bd0b02..c532276 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -931,5 +931,11 @@ DEF_HELPER_4(msa_ftint_u_df, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ffint_s_df, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ffint_u_df, void, env, i32, i32, i32)
 
-DEF_HELPER_5(msa_ld_df, void, env, i32, i32, i32, s32)
-DEF_HELPER_5(msa_st_df, void, env, i32, i32, i32, s32)
+#define MSALDST_PROTO(type)                                 \
+DEF_HELPER_4(msa_ld_ ## type, void, env, i32, i32, s32)     \
+DEF_HELPER_4(msa_st_ ## type, void, env, i32, i32, s32)
+MSALDST_PROTO(b)
+MSALDST_PROTO(h)
+MSALDST_PROTO(w)
+MSALDST_PROTO(d)
+#undef MSALDST_PROTO
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 58f02cf..6b49162 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -3565,72 +3565,90 @@ FOP_CONDN_S(sne,  (float32_lt(fst1, fst0, &env->active_fpu.fp_status)
 /* Element-by-element access macros */
 #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
 
-void helper_msa_ld_df(CPUMIPSState *env, uint32_t df, uint32_t wd, uint32_t rs,
-                     int32_t s10)
+static inline void ensure_atomic_msa_block_access(CPUMIPSState *env,
+                                                  target_ulong addr,
+                                                  int rw,
+                                                  int mmu_idx)
 {
-    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    target_ulong addr = env->active_tc.gpr[rs] + (s10 << df);
-    int i;
+#if !defined(CONFIG_USER_ONLY)
+#define MSA_PAGESPAN(x) (unlikely((((x) & ~TARGET_PAGE_MASK)                \
+                                   + MSA_WRLEN/8 - 1) >= TARGET_PAGE_SIZE))
+    CPUState *cs = CPU(mips_env_get_cpu(env));
+    target_ulong page_addr;
 
-    switch (df) {
-    case DF_BYTE:
-        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
-            pwd->b[i] = do_lbu(env, addr + (i << DF_BYTE),
-                                env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_HALF:
-        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
-            pwd->h[i] = do_lhu(env, addr + (i << DF_HALF),
-                                env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_WORD:
-        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
-            pwd->w[i] = do_lw(env, addr + (i << DF_WORD),
-                                env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_DOUBLE:
-        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
-            pwd->d[i] = do_ld(env, addr + (i << DF_DOUBLE),
-                                env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
+    if (MSA_PAGESPAN(addr)) {
+        /* first page */
+        tlb_fill(cs, addr, rw, mmu_idx, 0);
+        /* second page */
+        page_addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+        tlb_fill(cs, page_addr, rw, mmu_idx, 0);
     }
+#endif
 }
 
-void helper_msa_st_df(CPUMIPSState *env, uint32_t df, uint32_t wd, uint32_t rs,
-                     int32_t s10)
-{
-    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
-    target_ulong addr = env->active_tc.gpr[rs] + (s10 << df);
-    int i;
+#define CORRECT_VECTOR_ENDIANNESS(DF, TYPE, SWAP)                           \
+static void correct_vector_endianness_ ## TYPE(wr_t *pwd, wr_t *pws)        \
+{                                                                           \
+    int i;                                                                  \
+    for (i = 0; i < DF_ELEMENTS(DF); i++) {                                 \
+        pwd->TYPE[i] = SWAP(pws->TYPE[i]);                                  \
+    }                                                                       \
+}
+CORRECT_VECTOR_ENDIANNESS(DF_BYTE,   b, /* assign */)
+CORRECT_VECTOR_ENDIANNESS(DF_HALF,   h, tswap16)
+CORRECT_VECTOR_ENDIANNESS(DF_WORD,   w, tswap32)
+CORRECT_VECTOR_ENDIANNESS(DF_DOUBLE, d, tswap64)
+
+#define MSA_LD_DF(DF, TYPE, LD_INSN)                                    \
+void helper_msa_ld_ ## TYPE(CPUMIPSState *env, uint32_t wd, uint32_t rs,\
+                     int32_t s10)                                       \
+{                                                                       \
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
+    target_ulong addr = env->active_tc.gpr[rs] + (s10 << DF);           \
+    int i;                                                              \
+    int mmu_idx = cpu_mmu_index(env);                                   \
+    ensure_atomic_msa_block_access(env, addr, MMU_DATA_LOAD, mmu_idx);  \
+    if (unlikely(addr & ((1 << DF) - 1))) {                             \
+        /* work-around for misaligned accesses */                       \
+        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
+            pwd->b[i] = do_lbu(env, addr + (i << DF_BYTE), mmu_idx);    \
+        }                                                               \
+        correct_vector_endianness_ ## TYPE(pwd, pwd);                   \
+    } else {                                                            \
+        for (i = 0; i < DF_ELEMENTS(DF); i++) {                         \
+            pwd->TYPE[i] = LD_INSN(env, addr + (i << DF), mmu_idx);     \
+        }                                                               \
+    }                                                                   \
+}
+MSA_LD_DF(DF_BYTE,   b, do_lbu)
+MSA_LD_DF(DF_HALF,   h, do_lhu)
+MSA_LD_DF(DF_WORD,   w, do_lw)
+MSA_LD_DF(DF_DOUBLE, d, do_ld)
 
-    switch (df) {
-    case DF_BYTE:
-        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
-            do_sb(env, addr + (i << DF_BYTE), pwd->b[i],
-                    env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_HALF:
-        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
-            do_sh(env, addr + (i << DF_HALF), pwd->h[i],
-                    env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_WORD:
-        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
-            do_sw(env, addr + (i << DF_WORD), pwd->w[i],
-                    env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    case DF_DOUBLE:
-        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
-            do_sd(env, addr + (i << DF_DOUBLE), pwd->d[i],
-                    env->hflags & MIPS_HFLAG_KSU);
-        }
-        break;
-    }
+#define MSA_ST_DF(DF, TYPE, ST_INSN)                                    \
+void helper_msa_st_ ## TYPE(CPUMIPSState *env, uint32_t wd, uint32_t rs,\
+                     int32_t s10)                                       \
+{                                                                       \
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
+    target_ulong addr = env->active_tc.gpr[rs] + (s10 << DF);           \
+    int i;                                                              \
+    int mmu_idx = cpu_mmu_index(env);                                   \
+    ensure_atomic_msa_block_access(env, addr, MMU_DATA_STORE, mmu_idx); \
+    if (unlikely(addr & ((1 << DF) - 1))) {                             \
+        /* work-around for misaligned accesses */                       \
+        wr_t wx;                                                        \
+        correct_vector_endianness_ ## TYPE(&wx, pwd);                   \
+        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
+            do_sb(env, addr + (i << DF_BYTE), wx.b[i], mmu_idx);        \
+        }                                                               \
+    } else {                                                            \
+        for (i = 0; i < DF_ELEMENTS(DF); i++) {                         \
+            ST_INSN(env, addr + (i << DF), pwd->TYPE[i], mmu_idx);      \
+        }                                                               \
+    }                                                                   \
 }
+MSA_ST_DF(DF_BYTE,   b, do_sb)
+MSA_ST_DF(DF_HALF,   h, do_sh)
+MSA_ST_DF(DF_WORD,   w, do_sw)
+MSA_ST_DF(DF_DOUBLE, d, do_sd)
+
diff --git a/target-mips/translate.c b/target-mips/translate.c
index fd063a2..59c0aeb 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -18402,32 +18402,41 @@ static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
             int32_t s10 = sextract32(ctx->opcode, 16, 10);
             uint8_t rs = (ctx->opcode >> 11) & 0x1f;
             uint8_t wd = (ctx->opcode >> 6) & 0x1f;
-            uint8_t df = (ctx->opcode >> 0) & 0x3;
 
-            TCGv_i32 tdf = tcg_const_i32(df);
             TCGv_i32 twd = tcg_const_i32(wd);
             TCGv_i32 trs = tcg_const_i32(rs);
             TCGv_i32 ts10 = tcg_const_i32(s10);
 
+            save_cpu_state(ctx, 1);
+
             switch (MASK_MSA_MINOR(opcode)) {
             case OPC_LD_B:
+                gen_helper_msa_ld_b(cpu_env, twd, trs, ts10);
+                break;
             case OPC_LD_H:
+                gen_helper_msa_ld_h(cpu_env, twd, trs, ts10);
+                break;
             case OPC_LD_W:
+                gen_helper_msa_ld_w(cpu_env, twd, trs, ts10);
+                break;
             case OPC_LD_D:
-                save_cpu_state(ctx, 1);
-                gen_helper_msa_ld_df(cpu_env, tdf, twd, trs, ts10);
+                gen_helper_msa_ld_d(cpu_env, twd, trs, ts10);
                 break;
             case OPC_ST_B:
+                gen_helper_msa_st_b(cpu_env, twd, trs, ts10);
+                break;
             case OPC_ST_H:
+                gen_helper_msa_st_h(cpu_env, twd, trs, ts10);
+                break;
             case OPC_ST_W:
+                gen_helper_msa_st_w(cpu_env, twd, trs, ts10);
+                break;
             case OPC_ST_D:
-                save_cpu_state(ctx, 1);
-                gen_helper_msa_st_df(cpu_env, tdf, twd, trs, ts10);
+                gen_helper_msa_st_d(cpu_env, twd, trs, ts10);
                 break;
             }
 
             tcg_temp_free_i32(twd);
-            tcg_temp_free_i32(tdf);
             tcg_temp_free_i32(trs);
             tcg_temp_free_i32(ts10);
         }
-- 
1.7.5.4

  parent reply	other threads:[~2015-05-13 15:38 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-13 15:37 [Qemu-devel] [PATCH v3 0/2] target-mips: Add support for misaligned accesses Yongbok Kim
2015-05-13 15:37 ` [Qemu-devel] [PATCH v3 1/2] target-mips: Misaligned memory accesses for R6 Yongbok Kim
2015-05-13 15:37 ` Yongbok Kim [this message]
2015-05-13 19:28   ` [Qemu-devel] [PATCH v3 2/2] target-mips: Misaligned memory accesses for MSA Richard Henderson
2015-05-13 19:56     ` Maciej W. Rozycki
2015-05-13 19:58       ` Richard Henderson
2015-05-13 20:59         ` Leon Alrae
2015-05-13 21:21           ` Maciej W. Rozycki
2015-05-13 21:36             ` Richard Henderson
2015-05-13 22:54               ` Maciej W. Rozycki
2015-05-14  8:51                 ` Leon Alrae
2015-05-14 11:22                   ` Maciej W. Rozycki
2015-05-13 21:31           ` Richard Henderson
2015-05-14  9:00     ` Yongbok Kim
2015-05-14  9:46       ` Yongbok Kim
2015-05-14 18:44         ` Richard Henderson
2015-05-14  9:50     ` Leon Alrae
2015-05-14 15:27       ` Richard Henderson
2015-05-14 19:12         ` Richard Henderson
2015-05-15 12:09           ` Leon Alrae
2015-05-15 13:43             ` Richard Henderson
2015-05-15 14:04               ` Leon Alrae

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1431531457-17127-3-git-send-email-yongbok.kim@imgtec.com \
    --to=yongbok.kim@imgtec.com \
    --cc=afaerber@suse.de \
    --cc=leon.alrae@imgtec.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.